LangChain Integration - Python
Official LangChain integration is on the VecLabs roadmap. The implementation
below shows how to use VecLabs as a custom vectorstore in LangChain Python
applications today using LangChain’s base class interface.
Custom vectorstore implementation
LangChain’sVectorStore base class makes it straightforward to wrap VecLabs:
from __future__ import annotations
from typing import Any, Iterable, List, Optional, Tuple
from langchain_core.vectorstores import VectorStore
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from solvec import SolVec
import uuid
class VecLabsVectorStore(VectorStore):
"""VecLabs vectorstore for LangChain."""
def __init__(
self,
collection_name: str,
embedding: Embeddings,
dimensions: int,
network: str = "devnet",
):
self._embedding = embedding
self._collection_name = collection_name
self._sv = SolVec(network=network)
self._collection = self._sv.collection(
collection_name,
dimensions=dimensions,
metric="cosine"
)
@property
def embeddings(self) -> Embeddings:
return self._embedding
def add_texts(
self,
texts: Iterable[str],
metadatas: Optional[List[dict]] = None,
ids: Optional[List[str]] = None,
**kwargs: Any,
) -> List[str]:
texts = list(texts)
embeddings = self._embedding.embed_documents(texts)
if ids is None:
ids = [str(uuid.uuid4()) for _ in texts]
if metadatas is None:
metadatas = [{} for _ in texts]
self._collection.upsert([
{
"id": id_,
"values": embedding,
"metadata": {**meta, "text": text}
}
for id_, text, embedding, meta in zip(ids, texts, embeddings, metadatas)
])
return ids
def similarity_search(
self,
query: str,
k: int = 4,
**kwargs: Any,
) -> List[Document]:
docs_and_scores = self.similarity_search_with_score(query, k=k, **kwargs)
return [doc for doc, _ in docs_and_scores]
def similarity_search_with_score(
self,
query: str,
k: int = 4,
**kwargs: Any,
) -> List[Tuple[Document, float]]:
query_embedding = self._embedding.embed_query(query)
results = self._collection.query(
vector=query_embedding,
top_k=k,
)
return [
(
Document(
page_content=r.metadata.get("text", ""),
metadata={k: v for k, v in r.metadata.items() if k != "text"}
),
r.score
)
for r in results
]
@classmethod
def from_texts(
cls,
texts: List[str],
embedding: Embeddings,
metadatas: Optional[List[dict]] = None,
collection_name: str = "langchain",
dimensions: int = 1536,
network: str = "devnet",
**kwargs: Any,
) -> "VecLabsVectorStore":
store = cls(
collection_name=collection_name,
embedding=embedding,
dimensions=dimensions,
network=network,
)
store.add_texts(texts, metadatas=metadatas)
return store
Usage in a RAG chain
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
# Initialize
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
vectorstore = VecLabsVectorStore(
collection_name="my-knowledge-base",
embedding=embeddings,
dimensions=1536,
network="devnet",
)
# Index documents
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
loader = TextLoader("my-document.txt")
documents = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(documents)
vectorstore.add_documents(chunks)
# Build RAG chain
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
template = """Answer the question based only on the following context:
{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
llm = ChatOpenAI(model="gpt-4o-mini")
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
# Query
answer = rag_chain.invoke("How does VecLabs ensure data privacy?")
print(answer)
Usage with LangChain agents
from langchain.agents import AgentExecutor, create_openai_tools_agent
from langchain_core.tools import Tool
from langchain_openai import ChatOpenAI
# Create a search tool from the vectorstore
search_tool = Tool(
name="knowledge_base_search",
description="Search the knowledge base for relevant information",
func=lambda q: "\n\n".join(
doc.page_content
for doc in vectorstore.similarity_search(q, k=3)
)
)
llm = ChatOpenAI(model="gpt-4o")
# Standard LangChain agent setup
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
prompt = ChatPromptTemplate.from_messages([
("system", "You are a helpful assistant with access to a knowledge base."),
MessagesPlaceholder("chat_history", optional=True),
("human", "{input}"),
MessagesPlaceholder("agent_scratchpad"),
])
agent = create_openai_tools_agent(llm, [search_tool], prompt)
agent_executor = AgentExecutor(agent=agent, tools=[search_tool])
result = agent_executor.invoke({"input": "What is VecLabs?"})
print(result["output"])