rag-tool

Running

Chris4K commited on Nov 21, 2023

Commit

b54046d

1 Parent(s): a9f4491

Update vector_store_retriever.py

Files changed (1) hide show

vector_store_retriever.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gradio as gr
 from langchain.vectorstores import Chroma
 from langchain.document_loaders import PyPDFLoader
 from langchain.embeddings import HuggingFaceInstructEmbeddings
 # Initialize the HuggingFaceInstructEmbeddings
 hf = HuggingFaceInstructEmbeddings(
@@ -14,8 +15,12 @@ hf = HuggingFaceInstructEmbeddings(
 loader = PyPDFLoader('./new_papers/new_papers/', glob="./*.pdf")
 documents = loader.load()
 # Create a Chroma vector store from the PDF documents
-db = Chroma.from_documents(documents, hf, collection_name="my-collection")
 class VectoreStoreRetrievalTool:
     def __init__(self):

 from langchain.vectorstores import Chroma
 from langchain.document_loaders import PyPDFLoader
 from langchain.embeddings import HuggingFaceInstructEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter
 # Initialize the HuggingFaceInstructEmbeddings
 hf = HuggingFaceInstructEmbeddings(
 loader = PyPDFLoader('./new_papers/new_papers/', glob="./*.pdf")
 documents = loader.load()
+#splitting the text into
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+texts = text_splitter.split_documents(documents)
 # Create a Chroma vector store from the PDF documents
+db = Chroma.from_documents(texts, hf, collection_name="my-collection")
 class VectoreStoreRetrievalTool:
     def __init__(self):