Vladyslav Humennyy commited on
Commit
ddef6b7
·
1 Parent(s): 381e299

Token correct authentication

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -22,6 +22,9 @@ from typing import Any
22
 
23
  HF_LE_LLM_READ_TOKEN = os.environ.get('HF_LE_LLM_READ_TOKEN')
24
 
 
 
 
25
  #MODEL_ID = "le-llm/lapa-v0.1-reasoning-only-32768"
26
  MODEL_ID = "le-llm/lapa-v0.1-instruct"
27
  MODEL_ID = "le-llm/lapa-v0.1-matt-instruction-5e06"
@@ -36,10 +39,10 @@ def _begin_analytics_session():
36
  def load_model():
37
  """Lazy-load model, tokenizer, and optional processor (for zeroGPU)."""
38
  device = "cuda" # if torch.cuda.is_available() else "cpu"
39
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_LE_LLM_READ_TOKEN)
40
  processor = None
41
  try:
42
- processor = AutoProcessor.from_pretrained(MODEL_ID, token=HF_LE_LLM_READ_TOKEN)
43
  except Exception as err: # pragma: no cover - informative fallback
44
  print(f"Warning: AutoProcessor not available ({err}). Falling back to tokenizer.")
45
 
@@ -48,7 +51,6 @@ def load_model():
48
  dtype=torch.bfloat16, # if device == "cuda" else torch.float32,
49
  device_map="auto", # if device == "cuda" else None,
50
  attn_implementation="flash_attention_2",# "kernels-community/vllm-flash-attn3", # #
51
- token=HF_LE_LLM_READ_TOKEN
52
  ) # .cuda()
53
  print(f"Selected device:", device)
54
  return model, tokenizer, processor, device
 
22
 
23
  HF_LE_LLM_READ_TOKEN = os.environ.get('HF_LE_LLM_READ_TOKEN')
24
 
25
+ from huggingface_hub import login
26
+ login(token=HF_LE_LLM_READ_TOKEN)
27
+
28
  #MODEL_ID = "le-llm/lapa-v0.1-reasoning-only-32768"
29
  MODEL_ID = "le-llm/lapa-v0.1-instruct"
30
  MODEL_ID = "le-llm/lapa-v0.1-matt-instruction-5e06"
 
39
  def load_model():
40
  """Lazy-load model, tokenizer, and optional processor (for zeroGPU)."""
41
  device = "cuda" # if torch.cuda.is_available() else "cpu"
42
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
43
  processor = None
44
  try:
45
+ processor = AutoProcessor.from_pretrained(MODEL_ID)
46
  except Exception as err: # pragma: no cover - informative fallback
47
  print(f"Warning: AutoProcessor not available ({err}). Falling back to tokenizer.")
48
 
 
51
  dtype=torch.bfloat16, # if device == "cuda" else torch.float32,
52
  device_map="auto", # if device == "cuda" else None,
53
  attn_implementation="flash_attention_2",# "kernels-community/vllm-flash-attn3", # #
 
54
  ) # .cuda()
55
  print(f"Selected device:", device)
56
  return model, tokenizer, processor, device