bhardwaj08sarthak commited on
Commit
48b6006
·
verified ·
1 Parent(s): 5a92b71

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -6
app.py CHANGED
@@ -11,10 +11,11 @@ import gradio as gr
11
  from huggingface_hub import InferenceClient
12
  from smolagents import CodeAgent, InferenceClientModel, tool
13
  from langchain_community.embeddings import HuggingFaceEmbeddings
14
- from llama_index.core import VectorStoreIndex, Document
15
- from huggingface_hub import login
 
16
  from smolagents import tool
17
- from all_datasets import *
18
  from level_classifier_tool_2 import (
19
  classify_levels_phrases,
20
  HFEmbeddingBackend,
@@ -29,9 +30,33 @@ _backend = HFEmbeddingBackend(model_name="google/embeddinggemma-300m")
29
  _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
30
  _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
31
 
32
- file_path = hf_hub_download("bhardwaj08sarthak/stem_questioin_embeddings", "index.pkl")
33
- with open(file_path, "rb") as f:
34
- index = pickle.load(f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  #D = {
36
  # "GSM8k": GSM8k['question'],
37
  # "Olympiad": Olympiad_math['question'],
 
11
  from huggingface_hub import InferenceClient
12
  from smolagents import CodeAgent, InferenceClientModel, tool
13
  from langchain_community.embeddings import HuggingFaceEmbeddings
14
+ #from llama_index.embeddings.huggingface import HuggingFaceEmbeddings
15
+ from llama_index.core import StorageContext, load_index_from_storage
16
+ from huggingface_hub import login, snapshot_download
17
  from smolagents import tool
18
+ #from all_datasets import *
19
  from level_classifier_tool_2 import (
20
  classify_levels_phrases,
21
  HFEmbeddingBackend,
 
30
  _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
31
  _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
32
 
33
+ DATASET_REPO = "bhardwaj08sarthak/my-stem-index" # your dataset repo id
34
+ PERSIST_SUBDIR = "index_store" # the folder you uploaded
35
+ LOCAL_BASE = "/data/index" # where to place files in the Space
36
+
37
+
38
+ # Download the persisted index folder into ephemeral storage
39
+ os.makedirs(LOCAL_BASE, exist_ok=True)
40
+ snapshot_download(
41
+ repo_id=DATASET_REPO,
42
+ repo_type="dataset",
43
+ local_dir=LOCAL_BASE,
44
+ allow_patterns=[f"{PERSIST_SUBDIR}/**"], # only grab the index folder
45
+ local_dir_use_symlinks=False, # real files (safer in Spaces)
46
+ )
47
+
48
+ persist_dir = os.path.join(LOCAL_BASE, PERSIST_SUBDIR)
49
+
50
+ # Recreate the SAME embedding model used to build the index
51
+ emb = HuggingFaceEmbeddings(
52
+ model_name="google/embeddinggemma-300m",
53
+ model_kwargs={"device": "cpu"}, # use "cuda" if your Space has GPU
54
+ encode_kwargs={"normalize_embeddings": True},
55
+ )
56
+
57
+ # Load the index from storage
58
+ storage_context = StorageContext.from_defaults(persist_dir=persist_dir)
59
+ index = load_index_from_storage(storage_context, embed_model=emb)
60
  #D = {
61
  # "GSM8k": GSM8k['question'],
62
  # "Olympiad": Olympiad_math['question'],