Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -120,13 +120,14 @@ df = pd.read_csv("cleaned1.csv")
|
|
| 120 |
df2 = pd.read_csv("cleaned2.csv")
|
| 121 |
embeddings = torch.load("embeddings1.pt")
|
| 122 |
embeddings2 = torch.load("embeddings2.pt")
|
| 123 |
-
|
| 124 |
# Pre-extract DataFrame columns to avoid repeated iloc calls
|
| 125 |
df_questions = df["question"].values
|
| 126 |
df_links = df["link"].values
|
| 127 |
df2_questions = df2["question"].values
|
| 128 |
df2_links = df2["link"].values
|
| 129 |
-
|
|
|
|
| 130 |
def predict(text):
|
| 131 |
if not text or text.strip() == "":
|
| 132 |
return "No query provided"
|
|
@@ -136,17 +137,20 @@ def predict(text):
|
|
| 136 |
# Compute similarity scores
|
| 137 |
sim_scores1 = util.pytorch_cos_sim(query_embedding, embeddings)[0]
|
| 138 |
sim_scores2 = util.pytorch_cos_sim(query_embedding, embeddings2)[0]
|
|
|
|
| 139 |
|
| 140 |
# Get top 3 values and indices in one call
|
| 141 |
top3_scores1, top3_idx1 = sim_scores1.topk(3)
|
| 142 |
top3_scores2, top3_idx2 = sim_scores2.topk(3)
|
| 143 |
-
|
| 144 |
# Convert to CPU once
|
| 145 |
top3_idx1_cpu = top3_idx1.cpu().numpy()
|
| 146 |
top3_idx2_cpu = top3_idx2.cpu().numpy()
|
|
|
|
|
|
|
| 147 |
top3_scores1_cpu = top3_scores1.cpu().numpy()
|
| 148 |
top3_scores2_cpu = top3_scores2.cpu().numpy()
|
| 149 |
-
|
| 150 |
# Prepare results using pre-extracted arrays
|
| 151 |
results = {
|
| 152 |
"top1": [
|
|
@@ -165,6 +169,14 @@ def predict(text):
|
|
| 165 |
}
|
| 166 |
for idx, score in zip(top3_idx2_cpu, top3_scores2_cpu)
|
| 167 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
}
|
| 169 |
|
| 170 |
return results
|
|
|
|
| 120 |
df2 = pd.read_csv("cleaned2.csv")
|
| 121 |
embeddings = torch.load("embeddings1.pt")
|
| 122 |
embeddings2 = torch.load("embeddings2.pt")
|
| 123 |
+
embeddings3 = torch.load("embeddings3.pt")
|
| 124 |
# Pre-extract DataFrame columns to avoid repeated iloc calls
|
| 125 |
df_questions = df["question"].values
|
| 126 |
df_links = df["link"].values
|
| 127 |
df2_questions = df2["question"].values
|
| 128 |
df2_links = df2["link"].values
|
| 129 |
+
df3_questions = df3["question"].values
|
| 130 |
+
df3_links = df3["link"].values
|
| 131 |
def predict(text):
|
| 132 |
if not text or text.strip() == "":
|
| 133 |
return "No query provided"
|
|
|
|
| 137 |
# Compute similarity scores
|
| 138 |
sim_scores1 = util.pytorch_cos_sim(query_embedding, embeddings)[0]
|
| 139 |
sim_scores2 = util.pytorch_cos_sim(query_embedding, embeddings2)[0]
|
| 140 |
+
sim_scores3 = util.pytorch_cos_sim(query_embedding, embeddings3)[0]
|
| 141 |
|
| 142 |
# Get top 3 values and indices in one call
|
| 143 |
top3_scores1, top3_idx1 = sim_scores1.topk(3)
|
| 144 |
top3_scores2, top3_idx2 = sim_scores2.topk(3)
|
| 145 |
+
top3_scores3, top3_idx3 = sim_scores3.topk(3)
|
| 146 |
# Convert to CPU once
|
| 147 |
top3_idx1_cpu = top3_idx1.cpu().numpy()
|
| 148 |
top3_idx2_cpu = top3_idx2.cpu().numpy()
|
| 149 |
+
top3_idx3_cpu = top3_idx3.cpu().numpy()
|
| 150 |
+
|
| 151 |
top3_scores1_cpu = top3_scores1.cpu().numpy()
|
| 152 |
top3_scores2_cpu = top3_scores2.cpu().numpy()
|
| 153 |
+
top3_scores3_cpu = top3_scores3.cpu().numpy()
|
| 154 |
# Prepare results using pre-extracted arrays
|
| 155 |
results = {
|
| 156 |
"top1": [
|
|
|
|
| 169 |
}
|
| 170 |
for idx, score in zip(top3_idx2_cpu, top3_scores2_cpu)
|
| 171 |
]
|
| 172 |
+
"top3": [
|
| 173 |
+
{
|
| 174 |
+
"question": df3_questions[idx],
|
| 175 |
+
"link": df3_links[idx],
|
| 176 |
+
"score": float(score)
|
| 177 |
+
}
|
| 178 |
+
for idx, score in zip(top3_idx3_cpu, top3_scores3_cpu)
|
| 179 |
+
]
|
| 180 |
}
|
| 181 |
|
| 182 |
return results
|