Spaces:

arubaDev
/

models-with-dataset

Sleeping

App Files Files Community

arubaDev commited on Sep 11

Commit

225355f

verified ·

1 Parent(s): 4574c3a

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -4

app.py CHANGED Viewed

@@ -3,6 +3,10 @@ import sqlite3
 from datetime import datetime
 import gradio as gr
 from huggingface_hub import InferenceClient
 # ---------------------------
 # Config
@@ -10,7 +14,7 @@ from huggingface_hub import InferenceClient
 MODELS = {
     "Meta LLaMA 3.1 (8B Instruct)": "meta-llama/Llama-3.1-8B-Instruct",
     "Mistral 7B Instruct": "mistralai/Mistral-7B-Instruct-v0.3",
-    # Later you can add your fine-tuned backend-focused model here
     # "Backend-Finetuned Model": "your-username/backend-crud-model"
 }
@@ -22,7 +26,9 @@ SYSTEM_DEFAULT = (
     "Always prioritize database, API, authentication, routing, migrations, and CRUD logic. "
     "Provide full backend code scaffolds with files, paths, and commands. "
     "Only include frontend if required for framework integration "
-    "(e.g., Laravel Blade, Django templates). Ignore other frontend/UI tasks."
 )
 # ---------------------------
@@ -120,6 +126,23 @@ def update_session_title_if_needed(session_id: int, first_user_text: str):
         conn.commit()
     conn.close()
 # ---------------------------
 # Helpers
 # ---------------------------
@@ -131,9 +154,11 @@ def label_to_id(label: str | None) -> int | None:
     except Exception:
         return None
-def build_api_messages(session_id: int, system_message: str):
     msgs = [{"role": "system", "content": system_message.strip()}]
     msgs.extend(get_messages(session_id))
     return msgs
 def get_client(model_choice: str):
@@ -178,7 +203,7 @@ def send_cb(user_text, selected_label, chatbot_msgs, system_message, max_tokens,
     add_message(sid, "user", user_text)
     update_session_title_if_needed(sid, user_text)
-    api_messages = build_api_messages(sid, system_message)
     display_msgs = get_messages(sid)
     display_msgs.append({"role": "assistant", "content": ""})

 from datetime import datetime
 import gradio as gr
 from huggingface_hub import InferenceClient
+from datasets import load_dataset
+from sentence_transformers import SentenceTransformer
+from sklearn.metrics.pairwise import cosine_similarity
+import numpy as np
 # ---------------------------
 # Config
 MODELS = {
     "Meta LLaMA 3.1 (8B Instruct)": "meta-llama/Llama-3.1-8B-Instruct",
     "Mistral 7B Instruct": "mistralai/Mistral-7B-Instruct-v0.3",
+    # Add your backend-focused fine-tuned model here if available
     # "Backend-Finetuned Model": "your-username/backend-crud-model"
 }
     "Always prioritize database, API, authentication, routing, migrations, and CRUD logic. "
     "Provide full backend code scaffolds with files, paths, and commands. "
     "Only include frontend if required for framework integration "
+    "(e.g., Laravel Blade, Django templates). Ignore other frontend/UI tasks. "
+    "If user asks for excessive frontend work, politely respond: "
+    "'I am a backend assistant and focus only on backend tasks.'"
 )
 # ---------------------------
         conn.commit()
     conn.close()
+# ---------------------------
+# Dataset & Embeddings Setup
+# ---------------------------
+print("Loading dataset and embeddings... (this runs only once)")
+dataset = load_dataset("codeparrot/codeparrot-clean-python", split="train[:5%]")  # small % for speed
+backend_snippets = [d["content"] for d in dataset if any(k in d["content"].lower() for k in
+                                                           ["db", "database", "api", "crud", "auth", "routing", "migration"])]
+embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+snippet_embeddings = embed_model.encode(backend_snippets, convert_to_numpy=True)
+def get_relevant_snippets(user_text, top_k=3):
+    user_emb = embed_model.encode([user_text], convert_to_numpy=True)
+    sims = cosine_similarity(user_emb, snippet_embeddings)[0]
+    top_indices = np.argsort(sims)[-top_k:][::-1]
+    return "\n\n".join([backend_snippets[i] for i in top_indices])
 # ---------------------------
 # Helpers
 # ---------------------------
     except Exception:
         return None
+def build_api_messages(session_id: int, system_message: str, user_text: str):
+    relevant_snippets = get_relevant_snippets(user_text)
     msgs = [{"role": "system", "content": system_message.strip()}]
     msgs.extend(get_messages(session_id))
+    msgs.append({"role": "user", "content": relevant_snippets + "\n\n" + user_text})
     return msgs
 def get_client(model_choice: str):
     add_message(sid, "user", user_text)
     update_session_title_if_needed(sid, user_text)
+    api_messages = build_api_messages(sid, system_message, user_text)
     display_msgs = get_messages(sid)
     display_msgs.append({"role": "assistant", "content": ""})