| | import os |
| | os.environ["TRANSFORMERS_CACHE"] = "/app/.cache/transformers" |
| | os.environ["HF_HOME"] = "/app/.cache/huggingface" |
| |
|
| | from fastapi import FastAPI, File, UploadFile |
| | from fastapi.responses import StreamingResponse |
| | from fastapi.middleware.cors import CORSMiddleware |
| | import os |
| | import openai |
| | from io import BytesIO |
| | from gtts import gTTS |
| | import tempfile |
| | from dotenv import load_dotenv |
| | from sentence_transformers import SentenceTransformer |
| | import math |
| | from collections import Counter |
| | import json |
| | import pandas as pd |
| | import asyncio |
| | import numpy as np |
| | from deepgram import Deepgram |
| | from fastapi.staticfiles import StaticFiles |
| | from fastapi.responses import HTMLResponse |
| | import openai as _openai_mod |
| | import requests |
| |
|
| | load_dotenv() |
| | DEEPGRAM_API_KEY = os.getenv("DEEPGRAM_API_KEY") |
| | dg_client = Deepgram(DEEPGRAM_API_KEY) |
| | openai.api_key = os.getenv("OPENAI_API_KEY") |
| |
|
| | app = FastAPI() |
| |
|
| | app.add_middleware( |
| | CORSMiddleware, |
| | allow_origins=["*"], |
| | allow_credentials=True, |
| | allow_methods=["*"], |
| | allow_headers=["*"], |
| | ) |
| |
|
| | app.mount("/static", StaticFiles(directory="static"), name="static") |
| |
|
| | @app.get("/", response_class=HTMLResponse) |
| | async def serve_html(): |
| | with open("templates/index.html", "r", encoding="utf-8") as f: |
| | html_content = f.read() |
| | return HTMLResponse(content=html_content) |
| |
|
| |
|
| | chat_messages = [{"role": "system", "content": ''' |
| | You are kammi, a friendly, human-like voice assistant developed/created by Facile AI Solutions, headed by Deepti. You assist customers specifically with knee replacement surgery queries and you are the assistant of Dr.Sandeep who is a highly experienced knee replacement surgeon. Your boss is Dr.Sandeep. Deepti has created you for Dr.Sandeep. |
| | |
| | Rules for your responses: |
| | |
| | 1. **Context-driven answers only**: Answer strictly based on the provided context and previous conversation history. Do not use external knowledge. |
| | |
| | 2. **General conversation**: Engage in greetings and casual conversation. If the user mentions their name, greet them personally and continue using their name. |
| | |
| | 3. **Technical/medical queries**: |
| | - If the question is **relevant to knee replacement surgery** and the answer is in the context or chat history, provide the answer. |
| | - If the question is **relevant but not present in the context**, respond: "please connect with Dr.Sandeep or Reception for this details." |
| | |
| | 4. **Irrelevant queries**: |
| | - If the question is completely unrelated to knee replacement surgery, politely decline and respond: "I am here to assist only with knee replacement surgery related queries." |
| | |
| | 5. **Drive conversation**: |
| | - After answering the user’s question, suggest a follow-up question from the context that you can answer. |
| | - Make the follow-up natural and conversational. The follow up question must be relevant to the current question or response |
| | - If the user responds with confirmation like “yes”, “okay” give the answer for the previous follow-up question from the context. |
| | - If the user ends the conversation, do not ask or suggest any follow-up question. |
| | |
| | 6. **Readable voice output for gTTS**: |
| | - Break sentences at natural punctuation: `, . ? ! : ;`. |
| | - Do not use `#`, `**`, or other markdown symbols. |
| | - Numbers and points must be spelled out: e.g., `2.5 lakh` → `two point five lakh`. Similarly Dr, Mr, Mrs, etc. must be written as Doctor, Mister, Misses etc. |
| | |
| | 7. **Concise and human-like**: |
| | - Keep answers short, conversational, and natural. |
| | - Maximum 40 words / ~20 seconds of speech. |
| | |
| | 8. **Tone and style**: |
| | - Helpful, friendly, approachable, and human-like. |
| | - Maintain professionalism while being conversational. |
| | |
| | 9. **About Dr.Sandeep**: |
| | - He has over 5 years of experience in orthopedic and joint replacement surgery. |
| | - Qualifications: MBBS, MS Orthopedics, DNB Orthopedics, Fellowship in Joint Replacement, Fellowship in robotic joint replacement, mako certified surgeon. |
| | - He specializes in total and partial knee replacement procedures. |
| | - He specializes in total and partial knee replacement procedures. |
| | - Known for a patient-friendly approach, focusing on pre-surgery preparation, post-surgery rehabilitation, and pain management. |
| | - Actively keeps up-to-date with the latest techniques and technologies in knee replacement surgery. |
| | - Highly approachable and prefers that patients are well-informed about their treatment options and recovery process. |
| | |
| | Always provide readable, streaming-friendly sentences so gTTS can read smoothly. Drive conversation forward while staying strictly on knee replacement surgery topics, and suggest follow-up questions for which you have context-based answers. |
| | '''}] |
| |
|
| | class BM25: |
| | def __init__(self, corpus, k1=1.2, b=0.75): |
| | self.corpus = [doc.split() if isinstance(doc, str) else doc for doc in corpus] |
| | self.k1 = k1 |
| | self.b = b |
| | self.N = len(self.corpus) |
| | self.avgdl = sum(len(doc) for doc in self.corpus) / self.N |
| | self.doc_freqs = self._compute_doc_frequencies() |
| | self.idf = self._compute_idf() |
| |
|
| | def _compute_doc_frequencies(self): |
| | """Count how many documents contain each term""" |
| | df = {} |
| | for doc in self.corpus: |
| | unique_terms = set(doc) |
| | for term in unique_terms: |
| | df[term] = df.get(term, 0) + 1 |
| | return df |
| |
|
| | def _compute_idf(self): |
| | """Compute the IDF for each term in the corpus""" |
| | idf = {} |
| | for term, df in self.doc_freqs.items(): |
| | idf[term] = math.log((self.N - df + 0.5) / (df + 0.5) + 1) |
| | return idf |
| |
|
| | def score(self, query, document): |
| | """Compute the BM25 score for one document and one query""" |
| | query_terms = query.split() if isinstance(query, str) else query |
| | doc_terms = document.split() if isinstance(document, str) else document |
| | score = 0.0 |
| | freqs = Counter(doc_terms) |
| | doc_len = len(doc_terms) |
| |
|
| | for term in query_terms: |
| | if term not in freqs: |
| | continue |
| | f = freqs[term] |
| | idf = self.idf.get(term, 0) |
| | denom = f + self.k1 * (1 - self.b + self.b * doc_len / self.avgdl) |
| | score += idf * (f * (self.k1 + 1)) / denom |
| | return score |
| |
|
| | def rank(self, query): |
| | """Rank all documents for a given query""" |
| | return [(i, self.score(query, doc)) for i, doc in enumerate(self.corpus)] |
| |
|
| |
|
| | def sigmoid_scaled(x, midpoint=3.0): |
| | """ |
| | Sigmoid function with shifting. |
| | `midpoint` controls where the output is 0.5. |
| | """ |
| | return 1 / (1 + math.exp(-(x - midpoint))) |
| |
|
| | def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float: |
| |
|
| | return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)) |
| |
|
| | async def compute_similarity(query: str, query_embedding: np.ndarray, chunk_text: str, chunk_embedding: np.ndarray, sem_weight: float,syn_weight:float,bm25) -> float: |
| |
|
| | semantic_score = cosine_similarity(query_embedding, chunk_embedding) |
| |
|
| | |
| | syntactic_score = bm25.score(query,chunk_text) |
| | final_syntactic_score = sigmoid_scaled(syntactic_score) |
| |
|
| | combined_score = sem_weight * semantic_score + syn_weight * final_syntactic_score |
| |
|
| | return combined_score |
| |
|
| | async def retrieve_top_k_hybrid(query, k, sem_weight,syn_weight,bm25): |
| |
|
| | query_embedding = model.encode(query) |
| |
|
| | tasks = [ |
| |
|
| | compute_similarity(query, query_embedding, row["Chunks"], row["Embeddings"] , sem_weight,syn_weight,bm25) |
| |
|
| | for _, row in df_expanded.iterrows() |
| |
|
| | ] |
| |
|
| | similarities = await asyncio.gather(*tasks) |
| |
|
| | df_expanded["similarity"] = similarities |
| |
|
| | top_results = df_expanded.sort_values(by="similarity", ascending=False).head(k) |
| |
|
| | return top_results["Chunks"].to_list() |
| |
|
| |
|
| | os.makedirs("/tmp/transformers_cache", exist_ok=True) |
| |
|
| | model = SentenceTransformer("abhinand/MedEmbed-large-v0.1") |
| | df_expanded = pd.read_excel("Database.xlsx") |
| | df_expanded["Embeddings"] = df_expanded["Embeddings"].map(lambda x: json.loads(x)) |
| | corpus = df_expanded['Chunks'].to_list() |
| | bm25 = BM25(corpus) |
| |
|
| |
|
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| |
|
| |
|
| | def tts_chunk_stream(text_chunk: str, lang: str = "en"): |
| | """ |
| | REST-based OpenAI TTS fallback for older openai SDKs (e.g. 0.28). |
| | Returns a generator yielding MP3 byte chunks (1024 bytes). |
| | """ |
| | if not text_chunk or not text_chunk.strip(): |
| | return [] |
| |
|
| | |
| | language_map = { |
| | "en": "en-US", |
| | "en-US": "en-US", |
| | "en-GB": "en-GB", |
| | "hi": "hi-IN", |
| | } |
| | language_code = language_map.get(lang, "en-GB") |
| |
|
| | |
| | model = "gpt-4o-mini-tts" |
| | voice = "alloy" |
| | fmt = "mp3" |
| |
|
| | |
| | api_key = None |
| | try: |
| | |
| | |
| | api_key = getattr(_openai_mod, "api_key", None) |
| | except Exception: |
| | api_key = None |
| |
|
| | if not api_key: |
| | api_key = os.getenv("OPENAI_API_KEY") |
| |
|
| | if not api_key: |
| | print("OpenAI API key not found. Set openai.api_key or env var OPENAI_API_KEY.") |
| | return [] |
| |
|
| | url = "https://api.openai.com/v1/audio/speech" |
| |
|
| | headers = { |
| | "Authorization": f"Bearer {api_key}", |
| | "Content-Type": "application/json", |
| | } |
| |
|
| | payload = { |
| | "model": model, |
| | "voice": voice, |
| | "input": text_chunk, |
| | "format": fmt, |
| | |
| | } |
| |
|
| | try: |
| | |
| | resp = requests.post(url, headers=headers, json=payload, stream=True, timeout=60) |
| | except Exception as e: |
| | print("OpenAI TTS request failed:", e) |
| | return [] |
| |
|
| | if resp.status_code != 200: |
| | |
| | try: |
| | err = resp.json() |
| | except Exception: |
| | err = resp.text |
| | print(f"OpenAI TTS REST call failed {resp.status_code}: {err}") |
| | try: |
| | resp.close() |
| | except Exception: |
| | pass |
| | return [] |
| |
|
| | |
| | def audio_stream(): |
| | try: |
| | for chunk in resp.iter_content(chunk_size=1024): |
| | if chunk: |
| | yield chunk |
| | finally: |
| | try: |
| | resp.close() |
| | except Exception: |
| | pass |
| |
|
| | return audio_stream() |
| |
|
| |
|
| |
|
| | async def get_rag_response(user_message: str): |
| | global chat_messages |
| | Chunks = await retrieve_top_k_hybrid(user_message,15, 0.9, 0.1,bm25) |
| | context = "======================================================================================================\n".join(Chunks) |
| | chat_messages.append({"role": "user", "content": f''' |
| | Context : {context} |
| | User Query: {user_message}'''}) |
| | |
| | return chat_messages |
| |
|
| |
|
| | |
| | async def gpt_tts_stream(prompt: str): |
| | |
| | global chat_messages |
| | chat_messages = await get_rag_response(prompt) |
| | |
| | response = openai.ChatCompletion.create( |
| | model="gpt-4o", |
| | messages= chat_messages, |
| | stream=True |
| | ) |
| | buffer = "" |
| | BUFFER_SIZE = 20 |
| | bot_response = "" |
| |
|
| | for chunk in response: |
| | choices = chunk.get("choices", []) |
| | if not choices: |
| | continue |
| |
|
| | delta = choices[0]["delta"].get("content", "") |
| | finish_reason = choices[0].get("finish_reason") |
| | if delta: |
| | bot_response = bot_response + delta |
| | buffer += delta |
| | if len(buffer) >= BUFFER_SIZE and buffer.endswith((".", "!",",", "?", "\n", ";", ":")): |
| | for audio_chunk in tts_chunk_stream(buffer): |
| | |
| | yield audio_chunk |
| | buffer = "" |
| |
|
| | if finish_reason is not None: |
| | break |
| | |
| | bot_response = bot_response.strip() |
| | chat_messages.append({"role": "assistant", "content": bot_response}) |
| |
|
| | if buffer.strip(): |
| | for audio_chunk in tts_chunk_stream(buffer): |
| | yield audio_chunk |
| |
|
| | @app.post("/chat_stream") |
| | async def chat_stream(file: UploadFile = File(...)): |
| | audio_bytes = await file.read() |
| |
|
| | |
| | response = await dg_client.transcription.prerecorded( |
| | { |
| | "buffer": audio_bytes, |
| | "mimetype": "audio/webm" |
| | }, |
| | { |
| | "model": "nova-3", |
| | "language": "en", |
| | "punctuate": True, |
| | "smart_format": True |
| | } |
| | ) |
| |
|
| | transcript_text = response["results"]["channels"][0]["alternatives"][0]["transcript"].strip() |
| |
|
| | return StreamingResponse(gpt_tts_stream(transcript_text), media_type="audio/mpeg") |
| |
|
| |
|
| | @app.post("/reset_chat") |
| | async def reset_chat(): |
| | global chat_messages |
| | chat_messages = [{ |
| | "role": "system", |
| | "content": ''' |
| | You are kammi, a friendly, human-like voice assistant developed/created by Facile AI Solutions, headed by Deepti. You assist customers specifically with knee replacement surgery queries and you are the assistant of Dr.Sandeep who is a highly experienced knee replacement surgeon. Your boss is Dr.Sandeep. Deepti has created you for Dr.Sandeep. |
| | |
| | Rules for your responses: |
| | |
| | 1. **Context-driven answers only**: Answer strictly based on the provided context and previous conversation history. Do not use external knowledge. |
| | |
| | 2. **General conversation**: Engage in greetings and casual conversation. If the user mentions their name, greet them personally and continue using their name. |
| | |
| | 3. **Technical/medical queries**: |
| | - If the question is **relevant to knee replacement surgery** and the answer is in the context or chat history, provide the answer. |
| | - If the question is **relevant but not present in the context**, respond: "please connect with Dr.Sandeep or Reception for this details." |
| | |
| | 4. **Irrelevant queries**: |
| | - If the question is completely unrelated to knee replacement surgery, politely decline and respond: "I am here to assist only with knee replacement surgery related queries." |
| | |
| | 5. **Drive conversation**: |
| | - After answering the user’s question, suggest a follow-up question from the context that you can answer. |
| | - Make the follow-up natural and conversational. The follow up question must be relevant to the current question or response |
| | - If the user responds with confirmation like “yes”, “okay” give the answer for the previous follow-up question from the context. |
| | - If the user ends the conversation, do not ask or suggest any follow-up question. |
| | |
| | 6. **Readable voice output for gTTS**: |
| | - Break sentences at natural punctuation: `, . ? ! : ;`. |
| | - Do not use `#`, `**`, or other markdown symbols. |
| | - Numbers and points must be spelled out: e.g., `2.5 lakh` → `two point five lakh`. Similarly Dr, Mr, Mrs, etc. must be written as Doctor, Mister, Misses etc. |
| | |
| | 7. **Concise and human-like**: |
| | - Keep answers short, conversational, and natural. |
| | - Maximum 40 words / ~20 seconds of speech. |
| | |
| | 8. **Tone and style**: |
| | - Helpful, friendly, approachable, and human-like. |
| | - Maintain professionalism while being conversational. |
| | |
| | 9. **About Dr.Sandeep**: |
| | - He has over 5 years of experience in orthopedic and joint replacement surgery. |
| | - Qualifications: MBBS, MS Orthopedics, DNB Orthopedics, Fellowship in Joint Replacement, Fellowship in robotic joint replacement, mako certified surgeon. |
| | - He specializes in total and partial knee replacement procedures. |
| | - He specializes in total and partial knee replacement procedures. |
| | - Known for a patient-friendly approach, focusing on pre-surgery preparation, post-surgery rehabilitation, and pain management. |
| | - Actively keeps up-to-date with the latest techniques and technologies in knee replacement surgery. |
| | - Highly approachable and prefers that patients are well-informed about their treatment options and recovery process. |
| | |
| | Always provide readable, streaming-friendly sentences so gTTS can read smoothly. Drive conversation forward while staying strictly on knee replacement surgery topics, and suggest follow-up questions for which you have context-based answers. |
| | ''' |
| | }] |
| | return {"message": "Chat history reset successfully."} |
| |
|
| |
|