import axios from 'axios';
interface DocumentInput {
type: 'document_url' | 'image_url';
document_url?: string;
image_url?: string;
}
interface PageResult {
page_number: number;
markdown: string;
}
interface DocumentResponse {
id: string;
object: string;
created: number;
model: string;
document_id: string;
pages: PageResult[];
usage_info: {
pages_processed: number;
size_bytes: number;
filename: string;
};
}
class DocumentProcessor {
private apiKey: string;
private baseUrl: string;
constructor(apiKey: string, baseUrl = 'https://api.example.com') {
this.apiKey = apiKey;
this.baseUrl = baseUrl;
}
async processDocument(
documentUrl: string,
docType: 'document_url' | 'image_url' = 'document_url',
model = 'buddoc-v1',
prompt?: string
): Promise<DocumentResponse> {
const url = `${this.baseUrl}/v1/documents`;
const payload = {
model,
document: {
type: docType,
[docType]: documentUrl
} as DocumentInput,
...(prompt && { prompt })
};
try {
const response = await axios.post<DocumentResponse>(url, payload, {
headers: {
'Authorization': `Bearer ${this.apiKey}`,
'Content-Type': 'application/json'
}
});
return response.data;
} catch (error) {
if (axios.isAxiosError(error)) {
throw new Error(`Document processing failed: ${error.response?.data?.error?.message || error.message}`);
}
throw error;
}
}
async extractStructuredData<T = any>(
documentUrl: string,
schema: string
): Promise<T> {
const result = await this.processDocument(
documentUrl,
'document_url',
'buddoc-v1',
`Extract data according to this JSON schema: ${schema}`
);
// Parse structured data from markdown
const combinedMarkdown = result.pages
.map(p => p.markdown)
.join('\n\n');
// Extract JSON from markdown (assuming it's in a code block)
const jsonMatch = combinedMarkdown.match(/```json\n([\s\S]*?)\n```/);
if (jsonMatch) {
return JSON.parse(jsonMatch[1]) as T;
}
throw new Error('No structured data found in document');
}
}
// Usage
const processor = new DocumentProcessor(process.env.API_KEY!);
// Process document
async function analyzeDocument() {
try {
const result = await processor.processDocument(
'https://example.com/report.pdf',
'document_url',
'buddoc-v1',
'Summarize key findings'
);
console.log(`Processed ${result.usage_info.pages_processed} pages`);
result.pages.forEach(page => {
console.log(`Page ${page.page_number}:`);
console.log(page.markdown);
});
} catch (error) {
console.error('Error:', error);
}
}
analyzeDocument();