Skip to main content

API Client Snippets

Python Client

import requests
import json

class BudRuntimeClient:
    def __init__(self, base_url, api_key):
        self.base_url = base_url
        self.headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }

    def generate_text(self, prompt, model="llama2-7b"):
        response = requests.post(
            f"{self.base_url}/v1/completions",
            headers=self.headers,
            json={
                "model": model,
                "prompt": prompt,
                "max_tokens": 100
            }
        )
        return response.json()

    def generate_image(self, prompt, size="1024x1024"):
        response = requests.post(
            f"{self.base_url}/v1/images/generations",
            headers=self.headers,
            json={
                "prompt": prompt,
                "size": size,
                "n": 1
            }
        )
        return response.json()

# Usage
client = BudRuntimeClient("https://api.bud.example.com", "your-api-key")
result = client.generate_text("Hello, world!")

JavaScript/TypeScript Client

interface BudRuntimeConfig {
  baseUrl: string;
  apiKey: string;
}

class BudRuntimeClient {
  private config: BudRuntimeConfig;

  constructor(config: BudRuntimeConfig) {
    this.config = config;
  }

  async generateText(prompt: string, model: string = 'llama2-7b') {
    const response = await fetch(`${this.config.baseUrl}/v1/completions`, {
      method: 'POST',
      headers: {
        'Authorization': `Bearer ${this.config.apiKey}`,
        'Content-Type': 'application/json'
      },
      body: JSON.stringify({
        model,
        prompt,
        max_tokens: 100
      })
    });
    return response.json();
  }

  async generateImage(prompt: string, size: string = '1024x1024') {
    const response = await fetch(`${this.config.baseUrl}/v1/images/generations`, {
      method: 'POST',
      headers: {
        'Authorization': `Bearer ${this.config.apiKey}`,
        'Content-Type': 'application/json'
      },
      body: JSON.stringify({
        prompt,
        size,
        n: 1
      })
    });
    return response.json();
  }
}

// Usage
const client = new BudRuntimeClient({
  baseUrl: 'https://api.bud.example.com',
  apiKey: 'your-api-key'
});

Deployment Snippets

Kubernetes Deployment

apiVersion: apps/v1
kind: Deployment
metadata:
  name: custom-model
  namespace: bud-system
spec:
  replicas: 2
  selector:
    matchLabels:
      app: custom-model
  template:
    metadata:
      labels:
        app: custom-model
    spec:
      containers:
      - name: model-server
        image: budstudio/model-server:latest
        env:
        - name: MODEL_NAME
          value: "custom-llm"
        - name: MODEL_PATH
          value: "/models/custom-llm"
        resources:
          limits:
            nvidia.com/gpu: 1
            memory: 16Gi
          requests:
            nvidia.com/gpu: 1
            memory: 8Gi
        volumeMounts:
        - name: model-storage
          mountPath: /models
      volumes:
      - name: model-storage
        persistentVolumeClaim:
          claimName: model-storage-pvc

Helm Values Override

# custom-values.yaml
global:
  imageRegistry: my-registry.example.com

budserve:
  image:
    tag: v1.2.3
  nodeSelector:
    gpu-type: "nvidia-a100"
  tolerations:
  - key: "gpu"
    operator: "Equal"
    value: "true"
    effect: "NoSchedule"

budgateway:
  ingress:
    enabled: true
    annotations:
      kubernetes.io/ingress.class: nginx
      cert-manager.io/cluster-issuer: letsencrypt-prod
    hosts:
    - host: api.example.com
      paths:
      - path: /
        pathType: Prefix
    tls:
    - secretName: api-tls
      hosts:
      - api.example.com

Monitoring Snippets

Prometheus Query Examples

# GPU Utilization
avg(gpu_utilization{namespace="bud-system"}) by (pod)

# Request Rate
rate(http_requests_total{namespace="bud-system"}[5m])

# Model Inference Latency
histogram_quantile(0.95,
  rate(model_inference_duration_seconds_bucket{namespace="bud-system"}[5m])
)

# Memory Usage
container_memory_usage_bytes{namespace="bud-system"} /
container_spec_memory_limit_bytes{namespace="bud-system"}

Grafana Dashboard JSON

{
  "dashboard": {
    "title": "Bud Runtime Monitoring",
    "panels": [
      {
        "title": "GPU Utilization",
        "targets": [
          {
            "expr": "avg(gpu_utilization{namespace=\"bud-system\"}) by (pod)"
          }
        ],
        "type": "graph"
      },
      {
        "title": "Request Rate",
        "targets": [
          {
            "expr": "rate(http_requests_total{namespace=\"bud-system\"}[5m])"
          }
        ],
        "type": "graph"
      }
    ]
  }
}

Utility Scripts

Model Download Script

#!/bin/bash
# download-model.sh

MODEL_NAME=$1
MODEL_URL=$2
CACHE_DIR="/models"

if [ -z "$MODEL_NAME" ] || [ -z "$MODEL_URL" ]; then
  echo "Usage: ./download-model.sh <model-name> <model-url>"
  exit 1
fi

mkdir -p "$CACHE_DIR/$MODEL_NAME"
cd "$CACHE_DIR/$MODEL_NAME"

echo "Downloading $MODEL_NAME..."
wget -c "$MODEL_URL" -O model.bin

echo "Verifying download..."
if [ -f "model.bin" ]; then
  echo "Model downloaded successfully"
else
  echo "Download failed"
  exit 1
fi

Health Check Script

#!/usr/bin/env python3
# health-check.py

import requests
import sys

def check_health(base_url):
    endpoints = [
        "/health",
        "/v1/models",
        "/metrics"
    ]

    all_healthy = True

    for endpoint in endpoints:
        try:
            response = requests.get(f"{base_url}{endpoint}", timeout=5)
            if response.status_code == 200:
                print(f"✓ {endpoint} - OK")
            else:
                print(f"✗ {endpoint} - Status: {response.status_code}")
                all_healthy = False
        except Exception as e:
            print(f"✗ {endpoint} - Error: {str(e)}")
            all_healthy = False

    return all_healthy

if __name__ == "__main__":
    base_url = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:8080"
    if check_health(base_url):
        print("\nAll endpoints healthy!")
        sys.exit(0)
    else:
        print("\nSome endpoints are unhealthy!")
        sys.exit(1)
For more examples and integration guides, see our API Reference.