From bd618fd353e7908035e78d82d2de7c73fb648850 Mon Sep 17 00:00:00 2001 From: groupuser Date: Tue, 22 Apr 2025 23:56:13 +0000 Subject: [PATCH] Upload retriever.py --- app/retriever.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 app/retriever.py diff --git a/app/retriever.py b/app/retriever.py new file mode 100644 index 0000000..0f0ad1f --- /dev/null +++ b/app/retriever.py @@ -0,0 +1,41 @@ +from langchain.vectorstores import Weaviate +from langchain_weaviate.vectorstores import WeaviateVectorStore + +import weaviate +from weaviate.classes.init import Auth +from weaviate.config import AdditionalConfig, Timeout +from embedding import WeaviateCustomEmbeddings +import os + +http_host = os.getenv("WEAVIATE_HTTP_HOST", "183.111.96.67") +grpc_host = os.getenv("WEAVIATE_GRPC_HOST", "183.111.96.67") +weaviate_api_key = os.getenv("WEAVIATE_API_KEY", "01jse60hwsf37za5kmnkzzcwa9") + +def get_retriever(): + client = weaviate.connect_to_custom( + http_host=http_host, # Hostname for the HTTP API connection + http_port=32656, # Default is 80, WCD uses 443 + http_secure=False, # Whether to use https (secure) for the HTTP API connection + grpc_host=grpc_host, # Hostname for the gRPC API connection + grpc_port=30498, # Default is 50051, WCD uses 443 + grpc_secure=False, # Whether to use a secure channel for the gRPC API connection + auth_credentials=Auth.api_key(weaviate_api_key), # API key for authentication + ) + + embedding = WeaviateCustomEmbeddings() + + vectorstore = WeaviateVectorStore( + client=client, + index_name="Loca", + text_key="page_content", + embedding=embedding, + attributes=["source", "page_content"] + ) + + retriever = vectorstore.as_retriever( + search_kwargs={ + "k": 5 + } + ) + + return retriever