Spaces:
Paused
Paused
| import os | |
| from glob import glob | |
| import openai | |
| from dotenv import load_dotenv | |
| from langchain.embeddings import OpenAIEmbeddings | |
| from langchain.vectorstores import Chroma | |
| from langchain.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.chat_models import ChatOpenAI | |
| from langchain.chains import RetrievalQA | |
| from langchain.memory import ConversationBufferMemory | |
| load_dotenv() | |
| api_key = os.getenv("OPENAI_API_KEY") | |
| openai.api_key = api_key | |
| # Helper function to validate response completeness | |
| def is_response_complete(response: str) -> bool: | |
| return response.strip()[-1] in ".!?" | |
| # Retry mechanism for incomplete responses | |
| def retry_response(messages): | |
| response = openai.ChatCompletion.create( | |
| model="gpt-3.5-turbo", | |
| messages=messages | |
| ).choices[0].message['content'] | |
| if not is_response_complete(response): | |
| response += " This is the end of the response. Please let me know if you need further clarification." | |
| return response | |
| def base_model_chatbot(messages): | |
| system_message = [ | |
| {"role": "system", "content": "You are a helpful AI chatbot that provides clear, complete, and coherent responses to User's questions. Ensure your answers are in full sentences and complete the thought or idea."} | |
| ] | |
| messages = system_message + messages | |
| response = openai.ChatCompletion.create( | |
| model="gpt-3.5-turbo", | |
| messages=messages | |
| ).choices[0].message['content'] | |
| # Validate response completeness | |
| if not is_response_complete(response): | |
| response = retry_response(messages) | |
| return response | |
| class VectorDB: | |
| """Class to manage document loading and vector database creation.""" | |
| def __init__(self, docs_directory: str): | |
| self.docs_directory = docs_directory | |
| def create_vector_db(self): | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
| files = glob(os.path.join(self.docs_directory, "*.pdf")) | |
| loadPDFs = [PyPDFLoader(pdf_file) for pdf_file in files] | |
| pdf_docs = list() | |
| for loader in loadPDFs: | |
| pdf_docs.extend(loader.load()) | |
| chunks = text_splitter.split_documents(pdf_docs) | |
| return Chroma.from_documents(chunks, OpenAIEmbeddings()) | |
| class ConversationalRetrievalChain: | |
| """Class to manage the QA chain setup.""" | |
| def __init__(self, model_name="gpt-3.5-turbo", temperature=0): | |
| self.model_name = model_name | |
| self.temperature = temperature | |
| def create_chain(self): | |
| model = ChatOpenAI( | |
| model_name=self.model_name, | |
| temperature=self.temperature, | |
| system_prompt="You are a knowledgeable AI that answers questions based on provided documents. Always give responses in clear, complete sentences." | |
| ) | |
| memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) | |
| vector_db = VectorDB('docs/') | |
| retriever = vector_db.create_vector_db().as_retriever(search_type="similarity", search_kwargs={"k": 2}) | |
| return RetrievalQA.from_chain_type( | |
| llm=model, | |
| retriever=retriever, | |
| memory=memory, | |
| ) | |
| def with_pdf_chatbot(messages): | |
| query = messages[-1]['content'].strip() | |
| qa_chain = ConversationalRetrievalChain().create_chain() | |
| result = qa_chain({"query": query}) | |
| if not is_response_complete(result['result']): | |
| result['result'] += " This is the end of the response. Let me know if you need further clarification." | |
| return result['result'] |