Update app.py
Browse files
app.py
CHANGED
|
@@ -11,10 +11,11 @@ import gradio as gr
|
|
| 11 |
from huggingface_hub import InferenceClient
|
| 12 |
from smolagents import CodeAgent, InferenceClientModel, tool
|
| 13 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 14 |
-
from llama_index.
|
| 15 |
-
from
|
|
|
|
| 16 |
from smolagents import tool
|
| 17 |
-
from all_datasets import *
|
| 18 |
from level_classifier_tool_2 import (
|
| 19 |
classify_levels_phrases,
|
| 20 |
HFEmbeddingBackend,
|
|
@@ -29,9 +30,33 @@ _backend = HFEmbeddingBackend(model_name="google/embeddinggemma-300m")
|
|
| 29 |
_BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
|
| 30 |
_DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
|
| 31 |
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
#D = {
|
| 36 |
# "GSM8k": GSM8k['question'],
|
| 37 |
# "Olympiad": Olympiad_math['question'],
|
|
|
|
| 11 |
from huggingface_hub import InferenceClient
|
| 12 |
from smolagents import CodeAgent, InferenceClientModel, tool
|
| 13 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 14 |
+
#from llama_index.embeddings.huggingface import HuggingFaceEmbeddings
|
| 15 |
+
from llama_index.core import StorageContext, load_index_from_storage
|
| 16 |
+
from huggingface_hub import login, snapshot_download
|
| 17 |
from smolagents import tool
|
| 18 |
+
#from all_datasets import *
|
| 19 |
from level_classifier_tool_2 import (
|
| 20 |
classify_levels_phrases,
|
| 21 |
HFEmbeddingBackend,
|
|
|
|
| 30 |
_BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
|
| 31 |
_DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
|
| 32 |
|
| 33 |
+
DATASET_REPO = "bhardwaj08sarthak/my-stem-index" # your dataset repo id
|
| 34 |
+
PERSIST_SUBDIR = "index_store" # the folder you uploaded
|
| 35 |
+
LOCAL_BASE = "/data/index" # where to place files in the Space
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
# Download the persisted index folder into ephemeral storage
|
| 39 |
+
os.makedirs(LOCAL_BASE, exist_ok=True)
|
| 40 |
+
snapshot_download(
|
| 41 |
+
repo_id=DATASET_REPO,
|
| 42 |
+
repo_type="dataset",
|
| 43 |
+
local_dir=LOCAL_BASE,
|
| 44 |
+
allow_patterns=[f"{PERSIST_SUBDIR}/**"], # only grab the index folder
|
| 45 |
+
local_dir_use_symlinks=False, # real files (safer in Spaces)
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
persist_dir = os.path.join(LOCAL_BASE, PERSIST_SUBDIR)
|
| 49 |
+
|
| 50 |
+
# Recreate the SAME embedding model used to build the index
|
| 51 |
+
emb = HuggingFaceEmbeddings(
|
| 52 |
+
model_name="google/embeddinggemma-300m",
|
| 53 |
+
model_kwargs={"device": "cpu"}, # use "cuda" if your Space has GPU
|
| 54 |
+
encode_kwargs={"normalize_embeddings": True},
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
# Load the index from storage
|
| 58 |
+
storage_context = StorageContext.from_defaults(persist_dir=persist_dir)
|
| 59 |
+
index = load_index_from_storage(storage_context, embed_model=emb)
|
| 60 |
#D = {
|
| 61 |
# "GSM8k": GSM8k['question'],
|
| 62 |
# "Olympiad": Olympiad_math['question'],
|