Spaces:

bhardwaj08sarthak
/

STEM-Question-Generator

Running

App Files Files Community

bhardwaj08sarthak commited on Sep 25

Commit

48b6006

verified ·

1 Parent(s): 5a92b71

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -6

app.py CHANGED Viewed

@@ -11,10 +11,11 @@ import gradio as gr
 from huggingface_hub import InferenceClient
 from smolagents import CodeAgent, InferenceClientModel, tool
 from langchain_community.embeddings import HuggingFaceEmbeddings
-from llama_index.core import VectorStoreIndex, Document
-from huggingface_hub import login
 from smolagents import tool
-from all_datasets import *
 from level_classifier_tool_2 import (
     classify_levels_phrases,
     HFEmbeddingBackend,
@@ -29,9 +30,33 @@ _backend = HFEmbeddingBackend(model_name="google/embeddinggemma-300m")
 _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
 _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
-file_path = hf_hub_download("bhardwaj08sarthak/stem_questioin_embeddings", "index.pkl")
-with open(file_path, "rb") as f:
-    index = pickle.load(f)
 #D = {
 #    "GSM8k": GSM8k['question'],
 #    "Olympiad": Olympiad_math['question'],

 from huggingface_hub import InferenceClient
 from smolagents import CodeAgent, InferenceClientModel, tool
 from langchain_community.embeddings import HuggingFaceEmbeddings
+#from llama_index.embeddings.huggingface import HuggingFaceEmbeddings
+from llama_index.core import StorageContext, load_index_from_storage
+from huggingface_hub import login, snapshot_download
 from smolagents import tool
+#from all_datasets import *
 from level_classifier_tool_2 import (
     classify_levels_phrases,
     HFEmbeddingBackend,
 _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
 _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
+DATASET_REPO = "bhardwaj08sarthak/my-stem-index"   # your dataset repo id
+PERSIST_SUBDIR = "index_store"             # the folder you uploaded
+LOCAL_BASE = "/data/index"                 # where to place files in the Space
+# Download the persisted index folder into ephemeral storage
+os.makedirs(LOCAL_BASE, exist_ok=True)
+snapshot_download(
+    repo_id=DATASET_REPO,
+    repo_type="dataset",
+    local_dir=LOCAL_BASE,
+    allow_patterns=[f"{PERSIST_SUBDIR}/**"],  # only grab the index folder
+    local_dir_use_symlinks=False,             # real files (safer in Spaces)
+)
+persist_dir = os.path.join(LOCAL_BASE, PERSIST_SUBDIR)
+# Recreate the SAME embedding model used to build the index
+emb = HuggingFaceEmbeddings(
+    model_name="google/embeddinggemma-300m",
+    model_kwargs={"device": "cpu"},               # use "cuda" if your Space has GPU
+    encode_kwargs={"normalize_embeddings": True},
+)
+# Load the index from storage
+storage_context = StorageContext.from_defaults(persist_dir=persist_dir)
+index = load_index_from_storage(storage_context, embed_model=emb)
 #D = {
 #    "GSM8k": GSM8k['question'],
 #    "Olympiad": Olympiad_math['question'],