rag-tool

Running

Chris4K commited on Jan 20, 2024

Commit

142d17f

verified ·

1 Parent(s): ebf441f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -20,6 +20,10 @@ print("-----------")
 print(documents[0])
 print("-----------")
 # Extract the embedding arrays from the PDF documents
 embeddings = []
@@ -27,7 +31,9 @@ for doc in documents:
     embeddings.extend(doc['embeddings'])
 # Create Chroma vector store for API embeddings
-api_db = Chroma.from_texts(embeddings, api_hf_embeddings, collection_name="api-collection")
 # Define the PDF retrieval function

 print(documents[0])
 print("-----------")
+# Split the documents into chunks and embed them using the HfApiEmbeddingTool
+text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
+vdocuments = text_splitter.split_documents(documents)
 # Extract the embedding arrays from the PDF documents
 embeddings = []
     embeddings.extend(doc['embeddings'])
 # Create Chroma vector store for API embeddings
+api_db = Chroma.from_documents(vdocuments, HfApiEmbeddingRetriever, collection_name="api-collection")
+#api_db = Chroma.from_texts(embeddings, api_hf_embeddings, collection_name="api-collection")
 # Define the PDF retrieval function