| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from eval_config import CHUNK_SIZE, CHUNK_OVERLAP | |
| def load_training_documents(file_path): | |
| loader = PyPDFLoader(file_path) | |
| data = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=CHUNK_SIZE, | |
| chunk_overlap=CHUNK_OVERLAP, | |
| length_function=len | |
| ) | |
| return text_splitter.split_documents(data) | |
| def load_sample_questions(questions): | |
| return questions | |