Spaces:

mgbam
/

Medresearch

Sleeping

App Files Files Community

mgbam commited on Feb 7

Commit

bb79d76

verified ·

1 Parent(s): f7d2b7f

Update components/pubmed_search.py

Browse files

Files changed (1) hide show

components/pubmed_search.py +33 -16

components/pubmed_search.py CHANGED Viewed

@@ -17,29 +17,46 @@ def log_error(message: str):
 # ---------------------------- Tool Functions ----------------------------
 def search_pubmed(query: str) -> list:
-    """Searches PubMed and returns a list of article IDs."""
     try:
-        Entrez.email = os.environ.get("ENTREZ_EMAIL", "[email protected]")
         handle = Entrez.esearch(db="pubmed", term=query, retmax="5")
         record = Entrez.read(handle)
-        handle.close()
-        return record["IdList"]
     except Exception as e:
         log_error(f"PubMed search error: {e}")
         return [f"Error during PubMed search: {e}"]
-def fetch_abstract(article_id: str) -> str:
-    """Fetches the abstract for a given PubMed article ID."""
-    try:
-        Entrez.email = os.environ.get("ENTREZ_EMAIL", "[email protected]")
-        handle = Entrez.efetch(db="pubmed", id=article_id, rettype="abstract", retmode="text")
-        abstract = handle.read()
-        handle.close()
-        return abstract
-    except Exception as e:
-        log_error(f"Error fetching abstract for {article_id}: {e}")
-        return f"Error fetching abstract for {article_id}: {e}"
 # ---------------------------- Agent Function ----------------------------
 def medai_agent(query: str) -> str:

 # ---------------------------- Tool Functions ----------------------------
 def search_pubmed(query: str) -> list:
+    """Searches PubMed and returns a list of article IDs using semantic search."""
     try:
+        Entrez.email = ENTREZ_EMAIL
+        print(f"Entrez Email: {Entrez.email}")  # DEBUG: Check the email being used
+        print(f"PubMed Query: {query}")  # DEBUG: Check the query being sent
+        # Semantic Search Using Sentence Transformers:
+        from sentence_transformers import SentenceTransformer, util
+        model = SentenceTransformer('all-mpnet-base-v2') #Model by all-mpnet-base-v2
+        # Fetch PubMed IDs
         handle = Entrez.esearch(db="pubmed", term=query, retmax="5")
         record = Entrez.read(handle)
+        id_list = record["IdList"]
+        # Fetch abstracts for all IDs:
+        abstracts = []
+        for article_id in id_list:
+            abstracts.append(fetch_abstract(article_id))
+        # Generate embeddings for abstracts and the query:
+        query_embedding = model.encode(query)
+        abstract_embeddings = model.encode(abstracts)
+        # Calculate cosine similarities
+        similarities = util.cos_sim(query_embedding, abstract_embeddings)[0]
+        # Sort by similarity (higher is better)
+        ranked_articles = sorted(zip(id_list, similarities), key=lambda x: x[1], reverse=True)
+        # Extract ranked IDs:
+        ranked_ids = [article_id for article_id, similarity in ranked_articles]
+        print(f"PubMed Results: {ranked_ids}")  # DEBUG: Check the results
+        return ranked_ids
     except Exception as e:
         log_error(f"PubMed search error: {e}")
         return [f"Error during PubMed search: {e}"]
 # ---------------------------- Agent Function ----------------------------
 def medai_agent(query: str) -> str: