rag-chain-agent/app/retriever.py
2025-04-23 04:16:16 +00:00

46 lines
1.7 KiB
Python

from langchain.vectorstores import Weaviate
from langchain_weaviate.vectorstores import WeaviateVectorStore
import weaviate
from weaviate.classes.init import Auth
from weaviate.config import AdditionalConfig, Timeout
from app.embedding import WeaviateCustomEmbeddings
import os
internal_url = "vector-store-01jse60hep6416b15n1nsj6ch8.cheetah.svc.cluster.local"
external_url = "183.111.96.67"
# http_host = os.getenv("WEAVIATE_HTTP_HOST", "183.111.96.67")
http_host = os.getenv("WEAVIATE_HTTP_HOST", internal_url)
# grpc_host = os.getenv("WEAVIATE_GRPC_HOST", "183.111.96.67")
grpc_host = os.getenv("WEAVIATE_GRPC_HOST", internal_url)
weaviate_api_key = os.getenv("WEAVIATE_API_KEY", "01jse60hwsf37za5kmnkzzcwa9")
def get_retriever():
client = weaviate.connect_to_custom(
http_host=http_host, # Hostname for the HTTP API connection
http_port=80, # Default is 80, WCD uses 443
http_secure=False, # Whether to use https (secure) for the HTTP API connection
grpc_host=grpc_host, # Hostname for the gRPC API connection
grpc_port=50051, # Default is 50051, WCD uses 443
grpc_secure=False, # Whether to use a secure channel for the gRPC API connection
auth_credentials=Auth.api_key(weaviate_api_key), # API key for authentication
)
embedding = WeaviateCustomEmbeddings()
vectorstore = WeaviateVectorStore(
client=client,
index_name="Loca",
text_key="page_content",
embedding=embedding,
attributes=["source", "page_content"]
)
retriever = vectorstore.as_retriever(
search_kwargs={
"k": 5
}
)
return retriever