import { SolVec } from "@veclabs/solvec";
const sv = new SolVec({ network: "devnet" });
const collection = sv.collection("contracts", { dimensions: 1536 });
interface ContractChunk {
contractId: string;
contractName: string;
party: string;
effectiveDate: string;
chunkText: string;
chunkIndex: number;
}
async function indexContracts(
contracts: Array<{
id: string;
name: string;
party: string;
date: string;
fullText: string;
}>,
) {
for (const contract of contracts) {
const chunks = chunkByParagraph(contract.fullText);
const embeddings = await batchEmbed(chunks);
await collection.upsert(
chunks.map((chunk, i) => ({
id: `${contract.id}__p${i}`,
values: embeddings[i],
metadata: {
contractId: contract.id,
contractName: contract.name,
party: contract.party,
effectiveDate: contract.date,
text: chunk,
chunkIndex: i,
},
})),
);
}
}
// Find relevant clauses across all contracts
async function findClauses(query: string, topK = 20) {
const embedding = await embed(query);
const results = await collection.query({
vector: embedding,
topK,
minScore: 0.78, // high threshold for legal documents
});
// Group by contract
const byContract = new Map<string, typeof results>();
results.forEach((r) => {
const existing = byContract.get(r.metadata.contractId) || [];
byContract.set(r.metadata.contractId, [...existing, r]);
});
return Array.from(byContract.entries()).map(([contractId, chunks]) => ({
contractId,
contractName: chunks[0].metadata.contractName,
party: chunks[0].metadata.party,
relevantClauses: chunks.map((c) => c.metadata.text),
maxScore: Math.max(...chunks.map((c) => c.score)),
}));
}
function chunkByParagraph(text: string): string[] {
return text
.split(/\n\n+/)
.map((p) => p.trim())
.filter((p) => p.length > 50); // skip very short paragraphs
}