API Client Snippets
Python Client
Copy
import requests
import json
class BudRuntimeClient:
def __init__(self, base_url, api_key):
self.base_url = base_url
self.headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
def generate_text(self, prompt, model="llama2-7b"):
response = requests.post(
f"{self.base_url}/v1/completions",
headers=self.headers,
json={
"model": model,
"prompt": prompt,
"max_tokens": 100
}
)
return response.json()
def generate_image(self, prompt, size="1024x1024"):
response = requests.post(
f"{self.base_url}/v1/images/generations",
headers=self.headers,
json={
"prompt": prompt,
"size": size,
"n": 1
}
)
return response.json()
# Usage
client = BudRuntimeClient("https://api.bud.example.com", "your-api-key")
result = client.generate_text("Hello, world!")
JavaScript/TypeScript Client
Copy
interface BudRuntimeConfig {
baseUrl: string;
apiKey: string;
}
class BudRuntimeClient {
private config: BudRuntimeConfig;
constructor(config: BudRuntimeConfig) {
this.config = config;
}
async generateText(prompt: string, model: string = 'llama2-7b') {
const response = await fetch(`${this.config.baseUrl}/v1/completions`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${this.config.apiKey}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model,
prompt,
max_tokens: 100
})
});
return response.json();
}
async generateImage(prompt: string, size: string = '1024x1024') {
const response = await fetch(`${this.config.baseUrl}/v1/images/generations`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${this.config.apiKey}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
prompt,
size,
n: 1
})
});
return response.json();
}
}
// Usage
const client = new BudRuntimeClient({
baseUrl: 'https://api.bud.example.com',
apiKey: 'your-api-key'
});
Deployment Snippets
Kubernetes Deployment
Copy
apiVersion: apps/v1
kind: Deployment
metadata:
name: custom-model
namespace: bud-system
spec:
replicas: 2
selector:
matchLabels:
app: custom-model
template:
metadata:
labels:
app: custom-model
spec:
containers:
- name: model-server
image: budstudio/model-server:latest
env:
- name: MODEL_NAME
value: "custom-llm"
- name: MODEL_PATH
value: "/models/custom-llm"
resources:
limits:
nvidia.com/gpu: 1
memory: 16Gi
requests:
nvidia.com/gpu: 1
memory: 8Gi
volumeMounts:
- name: model-storage
mountPath: /models
volumes:
- name: model-storage
persistentVolumeClaim:
claimName: model-storage-pvc
Helm Values Override
Copy
# custom-values.yaml
global:
imageRegistry: my-registry.example.com
budserve:
image:
tag: v1.2.3
nodeSelector:
gpu-type: "nvidia-a100"
tolerations:
- key: "gpu"
operator: "Equal"
value: "true"
effect: "NoSchedule"
budgateway:
ingress:
enabled: true
annotations:
kubernetes.io/ingress.class: nginx
cert-manager.io/cluster-issuer: letsencrypt-prod
hosts:
- host: api.example.com
paths:
- path: /
pathType: Prefix
tls:
- secretName: api-tls
hosts:
- api.example.com
Monitoring Snippets
Prometheus Query Examples
Copy
# GPU Utilization
avg(gpu_utilization{namespace="bud-system"}) by (pod)
# Request Rate
rate(http_requests_total{namespace="bud-system"}[5m])
# Model Inference Latency
histogram_quantile(0.95,
rate(model_inference_duration_seconds_bucket{namespace="bud-system"}[5m])
)
# Memory Usage
container_memory_usage_bytes{namespace="bud-system"} /
container_spec_memory_limit_bytes{namespace="bud-system"}
Grafana Dashboard JSON
Copy
{
"dashboard": {
"title": "Bud Runtime Monitoring",
"panels": [
{
"title": "GPU Utilization",
"targets": [
{
"expr": "avg(gpu_utilization{namespace=\"bud-system\"}) by (pod)"
}
],
"type": "graph"
},
{
"title": "Request Rate",
"targets": [
{
"expr": "rate(http_requests_total{namespace=\"bud-system\"}[5m])"
}
],
"type": "graph"
}
]
}
}
Utility Scripts
Model Download Script
Copy
#!/bin/bash
# download-model.sh
MODEL_NAME=$1
MODEL_URL=$2
CACHE_DIR="/models"
if [ -z "$MODEL_NAME" ] || [ -z "$MODEL_URL" ]; then
echo "Usage: ./download-model.sh <model-name> <model-url>"
exit 1
fi
mkdir -p "$CACHE_DIR/$MODEL_NAME"
cd "$CACHE_DIR/$MODEL_NAME"
echo "Downloading $MODEL_NAME..."
wget -c "$MODEL_URL" -O model.bin
echo "Verifying download..."
if [ -f "model.bin" ]; then
echo "Model downloaded successfully"
else
echo "Download failed"
exit 1
fi
Health Check Script
Copy
#!/usr/bin/env python3
# health-check.py
import requests
import sys
def check_health(base_url):
endpoints = [
"/health",
"/v1/models",
"/metrics"
]
all_healthy = True
for endpoint in endpoints:
try:
response = requests.get(f"{base_url}{endpoint}", timeout=5)
if response.status_code == 200:
print(f"✓ {endpoint} - OK")
else:
print(f"✗ {endpoint} - Status: {response.status_code}")
all_healthy = False
except Exception as e:
print(f"✗ {endpoint} - Error: {str(e)}")
all_healthy = False
return all_healthy
if __name__ == "__main__":
base_url = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:8080"
if check_health(base_url):
print("\nAll endpoints healthy!")
sys.exit(0)
else:
print("\nSome endpoints are unhealthy!")
sys.exit(1)