FlameF0X commited on
Commit
b3cb140
Β·
verified Β·
1 Parent(s): b46686e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -22
app.py CHANGED
@@ -8,7 +8,7 @@ from huggingface_hub import hf_hub_download
8
  DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9
 
10
  # -------------------- MODEL CONFIG --------------------
11
- MODEL_NAME = "FlameF0X/i3-80m" # HuggingFace repo name
12
  LOCAL_SAFETENSORS = Path("model.safetensors")
13
  LOCAL_BIN = Path("pytorch_model.bin")
14
  VOCAB_JSON = Path("chunk_vocab_combined.json")
@@ -19,7 +19,6 @@ with open(VOCAB_JSON, 'r') as f:
19
  VOCAB_SIZE = vocab_data["vocab_size"]
20
 
21
  # -------------------- IMPORT YOUR MODEL CLASS --------------------
22
- # Make sure i3Model is in the same folder or installed as a package
23
  from app_classes import i3Model, ChunkTokenizer
24
 
25
  tokenizer = ChunkTokenizer()
@@ -45,7 +44,6 @@ try:
45
  model.load_state_dict(state_dict)
46
  print("βœ… Loaded weights from local .bin")
47
  else:
48
- # HuggingFace fallback
49
  print("⚑ Downloading model from HuggingFace...")
50
  bin_file = hf_hub_download(repo_id=MODEL_NAME, filename="pytorch_model.bin")
51
  state_dict = torch.load(bin_file, map_location=DEVICE, weights_only=False)
@@ -58,23 +56,136 @@ model.eval()
58
 
59
  # -------------------- GENERATION FUNCTION --------------------
60
  def generate_text(prompt, max_tokens=100, temperature=0.8, top_k=40):
61
- idx = torch.tensor([tokenizer.encode(prompt)], dtype=torch.long).to(DEVICE)
62
- out_idx = model.generate(idx, max_new_tokens=max_tokens, temperature=temperature, top_k=top_k)
63
- return tokenizer.decode(out_idx[0].cpu())
 
 
 
 
 
 
64
 
65
  # -------------------- GRADIO UI --------------------
66
- with gr.Blocks() as demo:
67
- gr.Markdown("### i3-80M Text Generation")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
 
69
  with gr.Row():
70
- prompt_input = gr.Textbox(label="Prompt", placeholder="Type something...")
71
- max_tokens_input = gr.Slider(10, 500, value=100, step=10, label="Max Tokens")
72
- temp_input = gr.Slider(0.1, 2.0, value=0.8, step=0.05, label="Temperature")
73
- topk_input = gr.Slider(1, 100, value=40, step=1, label="Top-k Sampling")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
- output_text = gr.Textbox(label="Generated Text")
 
 
 
 
 
 
 
 
 
 
76
 
77
- generate_btn = gr.Button("Generate")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  # Connect UI
80
  generate_btn.click(
@@ -82,14 +193,7 @@ with gr.Blocks() as demo:
82
  inputs=[prompt_input, max_tokens_input, temp_input, topk_input],
83
  outputs=[output_text]
84
  )
85
-
86
- # Developer Panel (shows model info)
87
- with gr.Accordion("Dev Panel: Model Info", open=False):
88
- gr.Markdown(f"**Device:** {DEVICE}")
89
- gr.Markdown(f"**Vocab size:** {VOCAB_SIZE}")
90
- total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
91
- gr.Markdown(f"**Total Parameters:** {total_params:,} ({total_params/1e6:.2f}M)")
92
 
93
  # -------------------- RUN --------------------
94
  if __name__ == "__main__":
95
- demo.launch()
 
8
  DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9
 
10
  # -------------------- MODEL CONFIG --------------------
11
+ MODEL_NAME = "FlameF0X/i3-80m"
12
  LOCAL_SAFETENSORS = Path("model.safetensors")
13
  LOCAL_BIN = Path("pytorch_model.bin")
14
  VOCAB_JSON = Path("chunk_vocab_combined.json")
 
19
  VOCAB_SIZE = vocab_data["vocab_size"]
20
 
21
  # -------------------- IMPORT YOUR MODEL CLASS --------------------
 
22
  from app_classes import i3Model, ChunkTokenizer
23
 
24
  tokenizer = ChunkTokenizer()
 
44
  model.load_state_dict(state_dict)
45
  print("βœ… Loaded weights from local .bin")
46
  else:
 
47
  print("⚑ Downloading model from HuggingFace...")
48
  bin_file = hf_hub_download(repo_id=MODEL_NAME, filename="pytorch_model.bin")
49
  state_dict = torch.load(bin_file, map_location=DEVICE, weights_only=False)
 
56
 
57
  # -------------------- GENERATION FUNCTION --------------------
58
  def generate_text(prompt, max_tokens=100, temperature=0.8, top_k=40):
59
+ if not prompt.strip():
60
+ return "⚠️ Please enter a prompt to generate text."
61
+
62
+ try:
63
+ idx = torch.tensor([tokenizer.encode(prompt)], dtype=torch.long).to(DEVICE)
64
+ out_idx = model.generate(idx, max_new_tokens=max_tokens, temperature=temperature, top_k=top_k)
65
+ return tokenizer.decode(out_idx[0].cpu())
66
+ except Exception as e:
67
+ return f"❌ Generation error: {str(e)}"
68
 
69
  # -------------------- GRADIO UI --------------------
70
+ custom_css = """
71
+ .gradio-container {
72
+ max-width: 1200px !important;
73
+ }
74
+ .main-header {
75
+ text-align: center;
76
+ margin-bottom: 2rem;
77
+ }
78
+ .param-card {
79
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
80
+ padding: 1.5rem;
81
+ border-radius: 12px;
82
+ margin-bottom: 1rem;
83
+ }
84
+ """
85
+
86
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
87
+ # Header
88
+ with gr.Row():
89
+ gr.Markdown(
90
+ """
91
+ # πŸš€ i3-80M Text Generation
92
+ ### Powered by Mamba-based Architecture
93
+ Generate creative text using the i3-80M language model with customizable parameters.
94
+ """,
95
+ elem_classes="main-header"
96
+ )
97
 
98
+ # Main Generation Area
99
  with gr.Row():
100
+ with gr.Column(scale=2):
101
+ prompt_input = gr.Textbox(
102
+ label="✍️ Enter Your Prompt",
103
+ placeholder="Once upon a time in a distant galaxy...",
104
+ lines=4,
105
+ max_lines=8
106
+ )
107
+
108
+ with gr.Accordion("βš™οΈ Generation Parameters", open=True):
109
+ with gr.Row():
110
+ max_tokens_input = gr.Slider(
111
+ 10, 500,
112
+ value=100,
113
+ step=10,
114
+ label="Max Tokens",
115
+ info="Maximum number of tokens to generate"
116
+ )
117
+ temp_input = gr.Slider(
118
+ 0.1, 2.0,
119
+ value=0.8,
120
+ step=0.05,
121
+ label="Temperature",
122
+ info="Higher = more creative, Lower = more focused"
123
+ )
124
+
125
+ topk_input = gr.Slider(
126
+ 1, 100,
127
+ value=40,
128
+ step=1,
129
+ label="Top-k Sampling",
130
+ info="Number of top tokens to consider"
131
+ )
132
+
133
+ with gr.Row():
134
+ generate_btn = gr.Button("🎨 Generate Text", variant="primary", size="lg")
135
+ clear_btn = gr.ClearButton(components=[prompt_input], value="πŸ—‘οΈ Clear", size="lg")
136
+
137
+ with gr.Column(scale=2):
138
+ output_text = gr.Textbox(
139
+ label="πŸ“ Generated Output",
140
+ lines=12,
141
+ max_lines=20,
142
+ show_copy_button=True
143
+ )
144
 
145
+ # Examples Section
146
+ with gr.Row():
147
+ gr.Examples(
148
+ examples=[
149
+ ["The future of artificial intelligence is", 150, 0.7, 50],
150
+ ["In a world where technology and nature coexist", 200, 0.9, 40],
151
+ ["The scientist discovered something remarkable", 120, 0.8, 45],
152
+ ],
153
+ inputs=[prompt_input, max_tokens_input, temp_input, topk_input],
154
+ label="πŸ’‘ Try These Examples"
155
+ )
156
 
157
+ # Developer Panel
158
+ with gr.Accordion("πŸ”§ Developer Info", open=False):
159
+ total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
160
+
161
+ with gr.Row():
162
+ with gr.Column():
163
+ gr.Markdown(f"""
164
+ **Model Architecture:**
165
+ - **Model:** i3-80M
166
+ - **Device:** {DEVICE}
167
+ - **Vocab Size:** {VOCAB_SIZE:,}
168
+ - **Parameters:** {total_params:,} ({total_params/1e6:.2f}M)
169
+ """)
170
+
171
+ with gr.Column():
172
+ gr.Markdown(f"""
173
+ **Configuration:**
174
+ - **d_model:** 512
175
+ - **n_heads:** 16
176
+ - **max_seq_len:** 256
177
+ - **d_state:** 32
178
+ """)
179
+
180
+ # Footer
181
+ gr.Markdown(
182
+ """
183
+ ---
184
+ <div style="text-align: center; color: #666;">
185
+ <p>Built with ❀️ using Gradio | Model: FlameF0X/i3-80m</p>
186
+ </div>
187
+ """,
188
+ )
189
 
190
  # Connect UI
191
  generate_btn.click(
 
193
  inputs=[prompt_input, max_tokens_input, temp_input, topk_input],
194
  outputs=[output_text]
195
  )
 
 
 
 
 
 
 
196
 
197
  # -------------------- RUN --------------------
198
  if __name__ == "__main__":
199
+ demo.launch(share=False)