import { SolVec } from '@veclabs/solvec';
const sv = new SolVec({ network: 'devnet' });
const collection = sv.collection('knowledge-base', { dimensions: 1536 });
// ── INDEXING PHASE ──────────────────────────────────────
interface Document {
id: string;
text: string;
source: string;
page?: number;
}
async function indexDocuments(documents: Document[]) {
// Split into chunks
const chunks = documents.flatMap(doc => chunkText(doc));
// Embed all chunks
const embeddings = await batchEmbed(chunks.map(c => c.text));
// Store in VecLabs
await collection.upsert(
chunks.map((chunk, i) => ({
id: chunk.id,
values: embeddings[i],
metadata: {
text: chunk.text,
source: chunk.source,
page: chunk.page,
chunkIndex: chunk.chunkIndex,
}
}))
);
console.log(`Indexed ${chunks.length} chunks from ${documents.length} documents`);
}
function chunkText(doc: Document, chunkSize = 400, overlap = 50) {
const words = doc.text.split(' ');
const chunks = [];
let i = 0;
let chunkIndex = 0;
while (i < words.length) {
const chunk = words.slice(i, i + chunkSize).join(' ');
chunks.push({
id: `${doc.id}_chunk_${chunkIndex}`,
text: chunk,
source: doc.source,
page: doc.page,
chunkIndex,
});
i += chunkSize - overlap;
chunkIndex++;
}
return chunks;
}
// ── RETRIEVAL + GENERATION PHASE ───────────────────────
async function rag(question: string): Promise<string> {
// 1. Embed the question
const queryEmbedding = await embed(question);
// 2. Retrieve relevant chunks
const results = await collection.query({
vector: queryEmbedding,
topK: 5,
minScore: 0.75,
});
if (results.length === 0) {
return "I don't have relevant information to answer that question.";
}
// 3. Build context from retrieved chunks
const context = results
.map((r, i) => `[${i + 1}] ${r.metadata.text}\n(Source: ${r.metadata.source})`)
.join('\n\n');
// 4. Generate answer with LLM
const prompt = `Answer the question based on the provided context.
If the context doesn't contain enough information, say so.
Context:
${context}
Question: ${question}
Answer:`;
const answer = await callLLM(prompt);
// 5. Add source citations
const sources = [...new Set(results.map(r => r.metadata.source))];
return `${answer}\n\nSources: ${sources.join(', ')}`;
}
// Placeholder functions - replace with your providers
async function embed(text: string): Promise<number[]> {
return Array(1536).fill(0).map(() => Math.random());
}
async function batchEmbed(texts: string[]): Promise<number[][]> {
return texts.map(() => Array(1536).fill(0).map(() => Math.random()));
}
async function callLLM(prompt: string): Promise<string> {
return "LLM response here";
}
// ── EXAMPLE USAGE ──────────────────────────────────────
async function main() {
// Index some documents
await indexDocuments([
{
id: 'doc_001',
text: 'VecLabs is a decentralized vector database...',
source: 'veclabs-docs.pdf',
page: 1,
}
]);
// Query
const answer = await rag('How does VecLabs ensure data privacy?');
console.log(answer);
}