rag-tool

Running

Chris4K commited on Nov 22, 2023

Commit

49b8a0a

1 Parent(s): ca3c913

Update vector_store_retriever.py

Files changed (1) hide show

vector_store_retriever.py CHANGED Viewed

@@ -96,26 +96,6 @@ load_model("meta-llama/Llama-2-70b-chat-hf")
 #####
 #########
-from langchain.document_loaders import PyPDFDirectoryLoader
-from langchain.document_loaders.utils import RecursiveCharacterTextSplitter
-from langchain.vectorstores import Chroma
-def load_and_process_pdfs(directory_path: str, chunk_size: int = 500, chunk_overlap: int = 200, collection_name: str = "my-collection"):
-    # Load PDF files from the specified directory
-    loader = PyPDFDirectoryLoader(directory_path)
-    documents = loader.load()
-    # Split the text into chunks
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
-    texts = text_splitter.split_documents(documents)
-    # Create a Chroma vector store from the processed texts
-    db = Chroma.from_documents(texts, hf, collection_name=collection_name)
-    return db  # You can return the Chroma vector store if needed
-# Call the function with the desired directory path and parameters
-load_and_process_pdfs("new_papers/")
 ###
 ###

 #####
 #########
 ###
 ###