VladBoyko commited on
Commit
ebe1956
Β·
verified Β·
1 Parent(s): fa4b185

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +323 -257
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer
 
4
  import re
5
  import time
6
 
@@ -35,10 +36,11 @@ class VibeThinkerModel:
35
  print(f"❌ Error loading model: {e}")
36
  raise
37
 
38
- def generate_response(self, prompt, temperature=0.6, max_new_tokens=8192, max_thinking_tokens=4096):
39
- """Generate response with thinking length control"""
40
  if not self.model or not self.tokenizer:
41
- return "Model not loaded!", 0, 0, 0, None
 
42
 
43
  try:
44
  start_time = time.time()
@@ -61,232 +63,342 @@ Keep reasoning under {max_thinking_tokens} tokens. Be direct and avoid repetitio
61
  inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to(self.device)
62
  prompt_length = inputs.input_ids.shape[1]
63
 
64
- with torch.no_grad():
65
- outputs = self.model.generate(
66
- **inputs,
67
- max_new_tokens=max_new_tokens,
68
- temperature=temperature,
69
- top_p=0.95,
70
- top_k=50,
71
- do_sample=True,
72
- repetition_penalty=1.1,
73
- pad_token_id=self.tokenizer.eos_token_id,
74
- )
 
 
 
 
 
 
 
 
75
 
76
- full_output = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
77
 
78
- if "<|im_start|>assistant" in full_output:
79
- generated_text = full_output.split("<|im_start|>assistant")[-1].strip()
80
- else:
81
- generated_text = full_output[len(formatted_prompt):].strip()
82
 
83
- # Store original before truncation
84
- original_text = generated_text
85
- truncated_content = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
- # Check for loops and truncate if needed
88
- if self._detect_loop(generated_text):
89
- generated_text = self._truncate_loop(generated_text)
90
- # Calculate what was removed
91
- if len(generated_text) < len(original_text):
92
- truncated_content = original_text[len(generated_text):].strip()
93
- generated_text += "\n\n⚠️ *[Repetitive content detected and truncated]*"
94
 
95
- generation_time = time.time() - start_time
96
- completion_length = outputs.shape[1] - prompt_length
 
97
 
98
- return generated_text, prompt_length, completion_length, generation_time, truncated_content
 
 
 
 
 
 
99
 
100
  except Exception as e:
101
- return f"Error during generation: {str(e)}", 0, 0, 0, None
102
 
103
- def _detect_loop(self, text):
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  words = text.split()
105
- if len(words) < 20:
106
  return False
107
 
108
- for length in [10, 15, 20]:
109
- if len(words) < length * 3:
 
 
 
 
 
110
  continue
111
- for i in range(len(words) - length * 3):
112
- phrase = ' '.join(words[i:i+length])
113
- rest = ' '.join(words[i+length:])
114
- if rest.count(phrase) >= 2:
 
 
 
 
 
 
115
  return True
 
116
  return False
117
 
118
- def _truncate_loop(self, text):
 
 
 
 
119
  words = text.split()
120
- for length in [10, 15, 20]:
121
- if len(words) < length * 2:
 
 
122
  continue
123
- for i in range(len(words) - length * 2):
124
- phrase = ' '.join(words[i:i+length])
125
- rest_start = i + length
126
- rest = ' '.join(words[rest_start:])
127
- if phrase in rest:
128
- return ' '.join(words[:rest_start])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  return text
130
 
131
- def parse_model_output(text, truncated_content=None):
132
- """
133
- Parse model output into sections with proper edge case handling
134
- """
135
- loop_warning = ""
136
- loop_details_html = ""
137
 
138
- if "[Repetitive content detected and truncated]" in text:
139
- text = text.replace("⚠️ *[Repetitive content detected and truncated]*", "")
 
 
 
 
 
 
 
 
 
 
 
 
140
 
141
- # Create expandable section for truncated content
142
- if truncated_content:
143
- truncated_escaped = truncated_content.replace('<', '&lt;').replace('>', '&gt;')
144
- truncated_word_count = len(truncated_content.split())
145
- loop_details_html = f"""
146
- <details style="background: #fff3cd; border: 2px solid #ffc107; border-radius: 8px; padding: 16px; margin-top: 12px;">
147
- <summary style="cursor: pointer; font-weight: 600; font-size: 14px; color: #856404; user-select: none; display: flex; align-items: center; gap: 8px;">
148
- <span style="font-size: 18px;">⚠️</span>
149
- <span>Truncated Repetitive Content ({truncated_word_count} words removed)</span>
150
- <span style="margin-left: auto; font-size: 12px; font-weight: normal;">β–Ά Click to view what was removed</span>
151
- </summary>
152
- <div style="margin-top: 12px; padding-top: 12px; border-top: 2px solid #ffc107; color: #856404; line-height: 1.6; white-space: pre-wrap; font-size: 13px; font-family: 'SF Mono', Monaco, 'Courier New', monospace; max-height: 400px; overflow-y: auto;">
153
- {truncated_escaped}
154
- </div>
155
- </details>
156
- """
157
 
158
- loop_warning = loop_details_html
159
-
160
- # Extract all code blocks
161
- code_pattern = r'```(\w+)?\n(.*?)```'
162
- code_blocks = re.findall(code_pattern, text, re.DOTALL)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
- # Remove code blocks from text
165
- text_without_code = re.sub(code_pattern, '###CODE_PLACEHOLDER###', text, flags=re.DOTALL)
166
 
167
- # Try to find thinking section
168
- thinking_content = ""
169
- explanation_content = text_without_code
170
 
171
- # Check for explicit thinking tags
172
- thinking_patterns = [
173
- r'<think>(.*?)</think>',
174
- r'<thinking>(.*?)</thinking>',
175
- ]
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
- for pattern in thinking_patterns:
178
- match = re.search(pattern, text_without_code, re.DOTALL | re.IGNORECASE)
179
- if match:
180
- thinking_content = match.group(1).strip()
181
- explanation_content = re.sub(pattern, '', text_without_code, flags=re.DOTALL | re.IGNORECASE).strip()
182
- break
183
 
184
- # If no explicit tags, try to detect thinking by content
185
- if not thinking_content:
186
- split_patterns = [
187
- r'^(.*?)(?=\n\n(?:Solution|Implementation|Code|Here\'s|Let me|Let\'s code|Final code))',
188
- r'^(.*?)(?=###CODE_PLACEHOLDER###)',
189
- ]
190
 
191
- for pattern in split_patterns:
192
- match = re.search(pattern, text_without_code, re.DOTALL | re.IGNORECASE)
193
- if match:
194
- potential_thinking = match.group(1).strip()
195
-
196
- if len(potential_thinking) > 150:
197
- thinking_lower = potential_thinking.lower()
198
- reasoning_keywords = [
199
- 'let me think', 'let\'s think', 'first', 'approach', 'idea',
200
- 'we can', 'we need', 'step', 'analyze', 'consider', 'observation'
201
- ]
202
- if any(keyword in thinking_lower for keyword in reasoning_keywords):
203
- thinking_content = potential_thinking
204
- explanation_content = text_without_code[len(potential_thinking):].strip()
205
- break
206
-
207
- # Clean up explanation
208
- explanation_content = explanation_content.replace('###CODE_PLACEHOLDER###', '').strip()
209
- explanation_content = re.sub(r'\n{3,}', '\n\n', explanation_content)
210
- explanation_content = re.sub(
211
- r'(?:Implementation|Code|Solution|Here\'s the code|Final code):\s*$',
212
- '',
213
- explanation_content,
214
- flags=re.IGNORECASE
215
- ).strip()
216
-
217
- # Handle boxed answers
218
- answer_match = re.search(r'\\boxed\{([^}]+)\}', explanation_content)
219
- if answer_match:
220
- explanation_content = f"**Final Answer:** {answer_match.group(1)}\n\n{explanation_content}"
221
-
222
- explanation_content += loop_warning
223
-
224
- return thinking_content, explanation_content, code_blocks
225
-
226
- def format_output_html(thinking, explanation, code_blocks, prompt_tokens, completion_tokens, generation_time):
227
- """
228
- Format output with harmonized design and edge case handling
229
  """
230
- total_tokens = prompt_tokens + completion_tokens
231
- thinking_tokens_est = len(thinking.split()) * 1.3 if thinking else 0
232
- tokens_per_sec = completion_tokens / generation_time if generation_time > 0 else 0
233
 
234
- # Card style for consistent sections
235
- card_base_style = "background: #ffffff; border-radius: 12px; padding: 24px; margin-bottom: 20px; box-shadow: 0 2px 8px rgba(0,0,0,0.08);"
 
 
 
 
 
 
 
 
236
 
237
- # Thinking section (collapsed, only show if exists)
238
- thinking_html = ""
239
  if thinking and len(thinking.strip()) > 0:
240
  thinking_escaped = thinking.replace('<', '&lt;').replace('>', '&gt;')
241
- thinking_html = f"""
242
- <details style="{card_base_style} border-left: 4px solid #6c757d;">
243
- <summary style="cursor: pointer; font-weight: 600; font-size: 16px; color: #495057; user-select: none; display: flex; align-items: center; gap: 10px; padding: 4px 0;">
 
244
  <span style="font-size: 20px;">🧠</span>
245
  <span>Reasoning Process</span>
246
- <span style="margin-left: auto; font-size: 13px; color: #6c757d; font-weight: normal;">~{int(thinking_tokens_est):,} tokens β€’ Click to expand β–Ό</span>
247
  </summary>
248
  <div style="margin-top: 20px; padding-top: 20px; border-top: 2px solid #e9ecef; color: #495057; line-height: 1.8; white-space: pre-wrap; font-size: 14px; font-family: 'SF Mono', Monaco, 'Courier New', monospace;">
249
- {thinking_escaped}
250
  </div>
251
  </details>
252
  """
253
 
254
- # Explanation section (only show if has meaningful content)
255
- # Note: explanation may contain the loop warning HTML which should NOT be escaped
256
- explanation_html = ""
257
  if explanation and len(explanation.strip()) > 10:
258
- # Split into text and HTML parts
259
- # If it contains our loop warning HTML, don't escape that part
260
- if '<details style="background: #fff3cd' in explanation:
261
- # Split at the warning
262
- parts = explanation.split('<details style="background: #fff3cd', 1)
263
- text_part = parts[0].replace('<', '&lt;').replace('>', '&gt;')
264
- html_part = '<details style="background: #fff3cd' + parts[1] if len(parts) > 1 else ''
265
- explanation_display = text_part + html_part
266
- else:
267
- explanation_display = explanation.replace('<', '&lt;').replace('>', '&gt;')
268
-
269
- explanation_html = f"""
270
- <div style="{card_base_style} border-left: 4px solid #28a745;">
271
- <h3 style="margin: 0 0 16px 0; color: #28a745; font-size: 18px; font-weight: 600; display: flex; align-items: center; gap: 10px;">
272
- <span style="font-size: 22px;">βœ…</span>
273
  <span>Solution Explanation</span>
274
  </h3>
275
  <div style="color: #495057; line-height: 1.8; font-size: 15px; white-space: pre-wrap;">
276
- {explanation_display}
277
  </div>
278
  </div>
279
  """
280
 
281
- # Code section (only show if code exists)
282
- code_html = ""
283
  if code_blocks and len(code_blocks) > 0:
284
  code_blocks_html = ""
285
  for idx, (lang, code) in enumerate(code_blocks):
286
  lang_display = lang if lang else "code"
287
- code_id = f"code_{idx}"
288
  code_clean = code.strip()
289
 
 
 
 
290
  code_blocks_html += f"""
291
  <div style="margin-bottom: 16px; background: #1e1e1e; border-radius: 10px; overflow: hidden; box-shadow: 0 2px 8px rgba(0,0,0,0.15);">
292
  <div style="background: #2d2d2d; padding: 12px 20px; color: #e0e0e0; font-weight: 600; font-size: 13px; display: flex; justify-content: space-between; align-items: center;">
@@ -295,13 +407,13 @@ def format_output_html(thinking, explanation, code_blocks, prompt_tokens, comple
295
  <span>{lang_display}</span>
296
  </span>
297
  <div style="display: flex; gap: 8px;">
298
- <button onclick="navigator.clipboard.writeText(document.getElementById('{code_id}').textContent); this.textContent='βœ“ Copied'; setTimeout(() => this.textContent='πŸ“‹ Copy', 2000)"
299
  style="background: #28a745; color: white; border: none; padding: 7px 16px; border-radius: 6px; cursor: pointer; font-size: 12px; font-weight: 500; transition: all 0.2s;"
300
  onmouseover="if(this.textContent==='πŸ“‹ Copy') this.style.background='#218838'"
301
  onmouseout="if(this.textContent==='πŸ“‹ Copy') this.style.background='#28a745'">
302
  πŸ“‹ Copy
303
  </button>
304
- <button onclick="downloadCode(document.getElementById('{code_id}').textContent, '{lang_display}')"
305
  style="background: #007bff; color: white; border: none; padding: 7px 16px; border-radius: 6px; cursor: pointer; font-size: 12px; font-weight: 500; transition: all 0.2s;"
306
  onmouseover="this.style.background='#0056b3'"
307
  onmouseout="this.style.background='#007bff'">
@@ -309,14 +421,14 @@ def format_output_html(thinking, explanation, code_blocks, prompt_tokens, comple
309
  </button>
310
  </div>
311
  </div>
312
- <pre style="margin: 0; padding: 20px; color: #d4d4d4; overflow-x: auto; font-family: 'SF Mono', Monaco, 'Courier New', monospace; font-size: 14px; line-height: 1.6; background: #1e1e1e;"><code id="{code_id}">{code_clean}</code></pre>
313
  </div>
314
  """
315
 
316
- code_html = f"""
317
- <div style="{card_base_style} border-left: 4px solid #007bff;">
318
- <h3 style="margin: 0 0 20px 0; color: #007bff; font-size: 18px; font-weight: 600; display: flex; align-items: center; gap: 10px;">
319
- <span style="font-size: 22px;">πŸ“</span>
320
  <span>Implementation</span>
321
  </h3>
322
  {code_blocks_html}
@@ -345,85 +457,31 @@ def format_output_html(thinking, explanation, code_blocks, prompt_tokens, comple
345
  </script>
346
  """
347
 
348
- # If no explanation but has code, add a minimal message
349
- if not explanation_html and code_html:
350
- explanation_html = f"""
351
- <div style="{card_base_style} border-left: 4px solid #6c757d;">
352
- <p style="color: #6c757d; font-size: 14px; margin: 0; font-style: italic;">
353
- No explanation provided - see implementation below.
354
- </p>
355
- </div>
356
- """
357
-
358
- html = f"""
359
- <div style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; max-width: 100%; margin: 0 auto; background: #f8f9fa; padding: 20px; border-radius: 12px;">
360
-
361
- <!-- Stats Card -->
362
- <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 24px; border-radius: 12px; margin-bottom: 20px; color: white; box-shadow: 0 4px 12px rgba(102,126,234,0.3);">
363
- <h3 style="margin: 0 0 16px 0; font-size: 17px; font-weight: 600; opacity: 0.95;">πŸ“Š Generation Stats</h3>
364
- <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(130px, 1fr)); gap: 12px; font-size: 13px;">
365
- <div style="background: rgba(255,255,255,0.15); padding: 14px; border-radius: 8px; backdrop-filter: blur(10px);">
366
- <div style="opacity: 0.85; font-size: 11px; margin-bottom: 6px; text-transform: uppercase; letter-spacing: 0.5px;">Time</div>
367
- <div style="font-size: 22px; font-weight: 700;">{generation_time:.1f}s</div>
368
- </div>
369
- <div style="background: rgba(255,255,255,0.15); padding: 14px; border-radius: 8px; backdrop-filter: blur(10px);">
370
- <div style="opacity: 0.85; font-size: 11px; margin-bottom: 6px; text-transform: uppercase; letter-spacing: 0.5px;">Speed</div>
371
- <div style="font-size: 22px; font-weight: 700;">{tokens_per_sec:.1f} t/s</div>
372
- </div>
373
- <div style="background: rgba(255,255,255,0.15); padding: 14px; border-radius: 8px; backdrop-filter: blur(10px);">
374
- <div style="opacity: 0.85; font-size: 11px; margin-bottom: 6px; text-transform: uppercase; letter-spacing: 0.5px;">Prompt</div>
375
- <div style="font-size: 22px; font-weight: 700;">{prompt_tokens:,}</div>
376
- </div>
377
- <div style="background: rgba(255,255,255,0.15); padding: 14px; border-radius: 8px; backdrop-filter: blur(10px);">
378
- <div style="opacity: 0.85; font-size: 11px; margin-bottom: 6px; text-transform: uppercase; letter-spacing: 0.5px;">Output</div>
379
- <div style="font-size: 22px; font-weight: 700;">{completion_tokens:,}</div>
380
- </div>
381
- <div style="background: rgba(255,255,255,0.15); padding: 14px; border-radius: 8px; backdrop-filter: blur(10px);">
382
- <div style="opacity: 0.85; font-size: 11px; margin-bottom: 6px; text-transform: uppercase; letter-spacing: 0.5px;">Thinking</div>
383
- <div style="font-size: 22px; font-weight: 700;">~{int(thinking_tokens_est):,}</div>
384
- </div>
385
- <div style="background: rgba(255,255,255,0.15); padding: 14px; border-radius: 8px; backdrop-filter: blur(10px);">
386
- <div style="opacity: 0.85; font-size: 11px; margin-bottom: 6px; text-transform: uppercase; letter-spacing: 0.5px;">Total</div>
387
- <div style="font-size: 22px; font-weight: 700;">{total_tokens:,}</div>
388
- </div>
389
- </div>
390
- </div>
391
-
392
- <!-- Content Sections -->
393
- {thinking_html}
394
- {explanation_html}
395
- {code_html}
396
-
397
- </div>
398
- """
399
  return html
400
 
401
- # Initialize model
402
- print("πŸ”„ Initializing VibeThinker-1.5B...")
403
- vibe_model = VibeThinkerModel()
404
-
405
- def generate_solution(prompt, temperature=0.6, max_tokens=8192, max_thinking_tokens=4096, progress=gr.Progress()):
406
- """Generate and format solution with progress tracking"""
407
  if not prompt.strip():
408
- return "<p style='color: #dc3545; font-size: 16px; padding: 20px;'>⚠️ Please enter a problem to solve.</p>"
 
409
 
410
- progress(0, desc="πŸ”„ Initializing...")
411
- progress(0.2, desc="🧠 Generating solution...")
412
 
413
- response, prompt_tokens, completion_tokens, gen_time, truncated = vibe_model.generate_response(
 
414
  prompt,
415
  temperature=temperature,
416
  max_new_tokens=max_tokens,
417
  max_thinking_tokens=max_thinking_tokens
418
- )
419
-
420
- progress(0.8, desc="πŸ“ Formatting output...")
421
-
422
- thinking, explanation, code_blocks = parse_model_output(response, truncated)
423
- html_output = format_output_html(thinking, explanation, code_blocks, prompt_tokens, completion_tokens, gen_time)
424
-
425
- progress(1.0, desc="βœ… Complete!")
426
- return html_output
427
 
428
  # Create Gradio interface
429
  with gr.Blocks(
@@ -438,6 +496,8 @@ with gr.Blocks(
438
  🎯 **Best for**: Python algorithmic problems with clear input/output specifications
439
 
440
  ⚠️ **Note**: This model is specialized for competitive programming, not general software development
 
 
441
  """)
442
 
443
  with gr.Row():
@@ -468,11 +528,17 @@ with gr.Blocks(
468
  - Higher thinking tokens (4096-8192) for complex reasoning
469
  - Temperature 0.6 balances creativity and accuracy
470
 
471
- **Output Structure:**
472
- - 🧠 **Reasoning** (collapsed) - Model's thinking process
473
- - βœ… **Explanation** - Solution approach without code
474
- - πŸ“ **Implementation** - Clean code with copy/download
475
- - ⚠️ **Truncated Content** (if detected) - View removed repetitions
 
 
 
 
 
 
476
  """)
477
 
478
  generate_btn = gr.Button("πŸš€ Generate Solution", variant="primary", size="lg")
@@ -482,7 +548,7 @@ with gr.Blocks(
482
  output_html = gr.HTML(label="Solution")
483
 
484
  generate_btn.click(
485
- fn=generate_solution,
486
  inputs=[prompt_input, temperature_slider, max_tokens_slider, max_thinking_slider],
487
  outputs=output_html
488
  )
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
+ from threading import Thread
5
  import re
6
  import time
7
 
 
36
  print(f"❌ Error loading model: {e}")
37
  raise
38
 
39
+ def generate_response_streaming(self, prompt, temperature=0.6, max_new_tokens=8192, max_thinking_tokens=4096):
40
+ """Generate response with streaming and real-time loop detection"""
41
  if not self.model or not self.tokenizer:
42
+ yield "Model not loaded!", None, False
43
+ return
44
 
45
  try:
46
  start_time = time.time()
 
63
  inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to(self.device)
64
  prompt_length = inputs.input_ids.shape[1]
65
 
66
+ # Create streamer
67
+ streamer = TextIteratorStreamer(
68
+ self.tokenizer,
69
+ skip_prompt=True,
70
+ skip_special_tokens=True
71
+ )
72
+
73
+ # Generation kwargs
74
+ generation_kwargs = dict(
75
+ **inputs,
76
+ max_new_tokens=max_new_tokens,
77
+ temperature=temperature,
78
+ top_p=0.95,
79
+ top_k=50,
80
+ do_sample=True,
81
+ repetition_penalty=1.1,
82
+ pad_token_id=self.tokenizer.eos_token_id,
83
+ streamer=streamer,
84
+ )
85
 
86
+ # Start generation in background thread
87
+ thread = Thread(target=self.model.generate, kwargs=generation_kwargs)
88
+ thread.start()
89
 
90
+ # Stream tokens with real-time loop detection
91
+ generated_text = ""
92
+ last_loop_check = ""
93
+ loop_detected = False
94
 
95
+ for new_text in streamer:
96
+ if loop_detected:
97
+ break # Stop streaming if loop detected
98
+
99
+ generated_text += new_text
100
+ generation_time = time.time() - start_time
101
+ tokens_generated = len(self.tokenizer.encode(generated_text))
102
+
103
+ # Check for loops every ~50 tokens
104
+ if len(generated_text) - len(last_loop_check) > 200:
105
+ if self._detect_loop_realtime(generated_text):
106
+ loop_detected = True
107
+ # Truncate at loop point
108
+ generated_text = self._truncate_loop(generated_text)
109
+ last_loop_check = generated_text
110
+
111
+ # Yield current state
112
+ yield generated_text, {
113
+ "prompt_tokens": prompt_length,
114
+ "tokens_generated": tokens_generated,
115
+ "generation_time": generation_time,
116
+ "is_complete": False
117
+ }, loop_detected
118
 
119
+ # Wait for thread to complete
120
+ thread.join()
 
 
 
 
 
121
 
122
+ # Final yield
123
+ final_time = time.time() - start_time
124
+ final_tokens = len(self.tokenizer.encode(generated_text))
125
 
126
+ yield generated_text, {
127
+ "prompt_tokens": prompt_length,
128
+ "completion_tokens": final_tokens,
129
+ "generation_time": final_time,
130
+ "tokens_per_sec": final_tokens / final_time if final_time > 0 else 0,
131
+ "is_complete": True
132
+ }, loop_detected
133
 
134
  except Exception as e:
135
+ yield f"Error during generation: {str(e)}", None, False
136
 
137
+ def _detect_loop_realtime(self, text, check_window=200, min_repetitions=5):
138
+ """Quick loop detection for real-time streaming.
139
+
140
+ Real loops repeat SHORT phrases (3-9 words) MANY times (5+ repetitions).
141
+ Example: "Wait, let me reconsider... Wait, let me reconsider... Wait, let me reconsider..."
142
+
143
+ Args:
144
+ text: Full generated text
145
+ check_window: Number of recent words to check (default: 200)
146
+ min_repetitions: Minimum repetitions to consider a loop (default: 5)
147
+
148
+ Returns:
149
+ bool: True if a loop is detected
150
+ """
151
  words = text.split()
152
+ if len(words) < 30:
153
  return False
154
 
155
+ # Check last N words for repetitive patterns
156
+ recent_words = words[-check_window:] if len(words) > check_window else words
157
+ text_to_check = ' '.join(recent_words)
158
+
159
+ # Look for short phrases (3-9 words) repeated multiple times
160
+ for phrase_len in range(3, 10): # 3 to 9 words
161
+ if len(recent_words) < phrase_len * min_repetitions:
162
  continue
163
+
164
+ # Check different starting positions
165
+ for i in range(len(recent_words) - phrase_len):
166
+ phrase = ' '.join(recent_words[i:i+phrase_len])
167
+
168
+ # Count how many times this phrase appears
169
+ count = text_to_check.count(phrase)
170
+
171
+ # If phrase appears 5+ times, it's a loop
172
+ if count >= min_repetitions:
173
  return True
174
+
175
  return False
176
 
177
+ def _truncate_loop(self, text, min_repetitions=5):
178
+ """Truncate text at the point where loop starts.
179
+
180
+ Find where a 3-9 word phrase starts repeating 5+ times and truncate there.
181
+ """
182
  words = text.split()
183
+
184
+ # Check for short phrases (3-9 words) repeated multiple times
185
+ for phrase_len in range(3, 10):
186
+ if len(words) < phrase_len * min_repetitions:
187
  continue
188
+
189
+ # Scan through text to find loop start point
190
+ for i in range(len(words) - phrase_len * min_repetitions):
191
+ phrase = ' '.join(words[i:i+phrase_len])
192
+
193
+ # Count consecutive repetitions starting from position i
194
+ repetition_count = 0
195
+ check_pos = i
196
+
197
+ while check_pos + phrase_len <= len(words):
198
+ check_phrase = ' '.join(words[check_pos:check_pos+phrase_len])
199
+ if check_phrase == phrase:
200
+ repetition_count += 1
201
+ check_pos += phrase_len
202
+ else:
203
+ break
204
+
205
+ # If we found 5+ consecutive repetitions, truncate at loop start
206
+ if repetition_count >= min_repetitions:
207
+ return ' '.join(words[:i])
208
+
209
+ # If no clear loop found, return original
210
  return text
211
 
212
+ # Initialize model
213
+ print("πŸ”„ Initializing VibeThinker-1.5B...")
214
+ vibe_model = VibeThinkerModel()
215
+
216
+ class IntelligentStreamParser:
217
+ """Parse streaming output in real-time into sections"""
218
 
219
+ def __init__(self):
220
+ self.reset()
221
+
222
+ def reset(self):
223
+ self.thinking = ""
224
+ self.explanation = ""
225
+ self.code_blocks = []
226
+ self.current_code_lang = None
227
+ self.current_code_content = ""
228
+ self.in_code_block = False
229
+ self.phase = "thinking" # thinking -> explanation -> code
230
+
231
+ def parse_chunk(self, full_text):
232
+ """Parse text in real-time as it streams"""
233
 
234
+ # Detect code blocks with regex
235
+ code_pattern = r'```(\w+)?\n(.*?)```'
236
+ found_codes = re.findall(code_pattern, full_text, re.DOTALL)
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
+ # Remove code blocks from text for section detection
239
+ text_without_code = re.sub(code_pattern, '###CODE_PLACEHOLDER###', full_text, flags=re.DOTALL)
240
+
241
+ # Try to split thinking and explanation
242
+ thinking_content = ""
243
+ explanation_content = text_without_code
244
+
245
+ # Check for explicit markers
246
+ if "Solution:" in text_without_code or "Explanation:" in text_without_code:
247
+ parts = re.split(r'(?:Solution|Explanation):', text_without_code, maxsplit=1)
248
+ if len(parts) == 2:
249
+ thinking_content = parts[0].strip()
250
+ explanation_content = parts[1].strip()
251
+ elif "```" in text_without_code:
252
+ # Split at first code block
253
+ parts = text_without_code.split("###CODE_PLACEHOLDER###", maxsplit=1)
254
+ if len(parts) == 2 and len(parts[0]) > 100:
255
+ # Check if first part looks like thinking
256
+ first_part_lower = parts[0].lower()
257
+ thinking_keywords = ['approach', 'idea', 'step', 'first', "let's", 'plan', 'strategy']
258
+ if any(kw in first_part_lower for kw in thinking_keywords):
259
+ thinking_content = parts[0].strip()
260
+ explanation_content = parts[1].strip()
261
+
262
+ # Clean up placeholders
263
+ explanation_content = explanation_content.replace('###CODE_PLACEHOLDER###', '').strip()
264
+
265
+ return {
266
+ 'thinking': thinking_content,
267
+ 'explanation': explanation_content,
268
+ 'code_blocks': found_codes
269
+ }
270
+
271
+ parser = IntelligentStreamParser()
272
+
273
+ def format_streaming_html(generated_text, stats, loop_detected, is_generating=True):
274
+ """Format streaming output with intelligent parsing"""
275
+
276
+ # Parse the current text
277
+ parsed = parser.parse_chunk(generated_text)
278
+
279
+ thinking = parsed['thinking']
280
+ explanation = parsed['explanation']
281
+ code_blocks = parsed['code_blocks']
282
+
283
+ # Stats
284
+ if stats:
285
+ prompt_tokens = stats.get('prompt_tokens', 0)
286
+ tokens_generated = stats.get('tokens_generated', 0) or stats.get('completion_tokens', 0)
287
+ generation_time = stats.get('generation_time', 0)
288
+ tokens_per_sec = stats.get('tokens_per_sec', 0) or (tokens_generated / generation_time if generation_time > 0 else 0)
289
+ is_complete = stats.get('is_complete', False)
290
+ else:
291
+ prompt_tokens = tokens_generated = generation_time = tokens_per_sec = 0
292
+ is_complete = False
293
 
294
+ thinking_tokens_est = len(thinking.split()) * 1.3 if thinking else 0
295
+ total_tokens = prompt_tokens + tokens_generated
296
 
297
+ # Card style
298
+ card_base_style = "background: #ffffff; border-radius: 12px; padding: 24px; margin-bottom: 20px; box-shadow: 0 2px 8px rgba(0,0,0,0.08);"
 
299
 
300
+ # Blink cursor CSS
301
+ cursor_style = """
302
+ <style>
303
+ @keyframes blink {
304
+ 0%, 49% { opacity: 1; }
305
+ 50%, 100% { opacity: 0; }
306
+ }
307
+ .cursor {
308
+ display: inline-block;
309
+ width: 2px;
310
+ height: 1em;
311
+ background: #667eea;
312
+ margin-left: 2px;
313
+ animation: blink 0.7s infinite;
314
+ }
315
+ </style>
316
+ """
317
 
318
+ # Status message
319
+ status_emoji = "βœ…" if is_complete else "πŸ”„"
320
+ status_text = "Complete" if is_complete else "Generating..."
 
 
 
321
 
322
+ # Stats card
323
+ html = f"""
324
+ {cursor_style}
325
+ <div style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; max-width: 100%; margin: 0 auto; background: #f8f9fa; padding: 20px; border-radius: 12px;">
 
 
326
 
327
+ <!-- Stats Card -->
328
+ <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 24px; border-radius: 12px; margin-bottom: 20px; color: white; box-shadow: 0 4px 12px rgba(102,126,234,0.3);">
329
+ <h3 style="margin: 0 0 16px 0; font-size: 17px; font-weight: 600; opacity: 0.95;">{status_emoji} {status_text}</h3>
330
+ <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(130px, 1fr)); gap: 12px; font-size: 13px;">
331
+ <div style="background: rgba(255,255,255,0.15); padding: 14px; border-radius: 8px; backdrop-filter: blur(10px);">
332
+ <div style="opacity: 0.85; font-size: 11px; margin-bottom: 6px; text-transform: uppercase; letter-spacing: 0.5px;">Time</div>
333
+ <div style="font-size: 22px; font-weight: 700;">{generation_time:.1f}s</div>
334
+ </div>
335
+ <div style="background: rgba(255,255,255,0.15); padding: 14px; border-radius: 8px; backdrop-filter: blur(10px);">
336
+ <div style="opacity: 0.85; font-size: 11px; margin-bottom: 6px; text-transform: uppercase; letter-spacing: 0.5px;">Speed</div>
337
+ <div style="font-size: 22px; font-weight: 700;">{tokens_per_sec:.1f} t/s</div>
338
+ </div>
339
+ <div style="background: rgba(255,255,255,0.15); padding: 14px; border-radius: 8px; backdrop-filter: blur(10px);">
340
+ <div style="opacity: 0.85; font-size: 11px; margin-bottom: 6px; text-transform: uppercase; letter-spacing: 0.5px;">Tokens</div>
341
+ <div style="font-size: 22px; font-weight: 700;">{tokens_generated:,}</div>
342
+ </div>
343
+ </div>
344
+ </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
  """
 
 
 
346
 
347
+ # Loop warning (if detected)
348
+ if loop_detected:
349
+ html += f"""
350
+ <div style="{card_base_style} border-left: 4px solid #ffc107; background: #fff3cd;">
351
+ <div style="color: #856404; font-weight: 600; display: flex; align-items: center; gap: 8px;">
352
+ <span style="font-size: 20px;">⚠️</span>
353
+ <span>Loop Detected - Generation stopped to prevent repetition</span>
354
+ </div>
355
+ </div>
356
+ """
357
 
358
+ # Thinking section (collapsed if exists)
 
359
  if thinking and len(thinking.strip()) > 0:
360
  thinking_escaped = thinking.replace('<', '&lt;').replace('>', '&gt;')
361
+ cursor_html = '<span class="cursor"></span>' if is_generating and not explanation and not code_blocks else ''
362
+ html += f"""
363
+ <details style="{card_base_style} border-left: 4px solid #8b5cf6;">
364
+ <summary style="cursor: pointer; font-weight: 600; font-size: 16px; color: #7c3aed; user-select: none; display: flex; align-items: center; gap: 10px; padding: 4px 0;">
365
  <span style="font-size: 20px;">🧠</span>
366
  <span>Reasoning Process</span>
367
+ <span style="margin-left: auto; font-size: 13px; color: #8b5cf6; font-weight: normal;">~{int(thinking_tokens_est):,} tokens β€’ Click to expand β–Ό</span>
368
  </summary>
369
  <div style="margin-top: 20px; padding-top: 20px; border-top: 2px solid #e9ecef; color: #495057; line-height: 1.8; white-space: pre-wrap; font-size: 14px; font-family: 'SF Mono', Monaco, 'Courier New', monospace;">
370
+ {thinking_escaped}{cursor_html}
371
  </div>
372
  </details>
373
  """
374
 
375
+ # Explanation section
 
 
376
  if explanation and len(explanation.strip()) > 10:
377
+ explanation_escaped = explanation.replace('<', '&lt;').replace('>', '&gt;')
378
+ cursor_html = '<span class="cursor"></span>' if is_generating and not code_blocks else ''
379
+ html += f"""
380
+ <div style="{card_base_style} border-left: 4px solid #10b981;">
381
+ <h3 style="margin: 0 0 16px 0; color: #10b981; font-size: 18px; font-weight: 600; display: flex; align-items: center; gap: 10px;">
382
+ <span style="font-size: 22px;">πŸ’‘</span>
 
 
 
 
 
 
 
 
 
383
  <span>Solution Explanation</span>
384
  </h3>
385
  <div style="color: #495057; line-height: 1.8; font-size: 15px; white-space: pre-wrap;">
386
+ {explanation_escaped}{cursor_html}
387
  </div>
388
  </div>
389
  """
390
 
391
+ # Code blocks
 
392
  if code_blocks and len(code_blocks) > 0:
393
  code_blocks_html = ""
394
  for idx, (lang, code) in enumerate(code_blocks):
395
  lang_display = lang if lang else "code"
396
+ code_id = f"code_{idx}_{int(time.time()*1000)}"
397
  code_clean = code.strip()
398
 
399
+ # Add cursor to last code block if generating
400
+ cursor_html = '<span class="cursor"></span>' if is_generating and idx == len(code_blocks) - 1 else ''
401
+
402
  code_blocks_html += f"""
403
  <div style="margin-bottom: 16px; background: #1e1e1e; border-radius: 10px; overflow: hidden; box-shadow: 0 2px 8px rgba(0,0,0,0.15);">
404
  <div style="background: #2d2d2d; padding: 12px 20px; color: #e0e0e0; font-weight: 600; font-size: 13px; display: flex; justify-content: space-between; align-items: center;">
 
407
  <span>{lang_display}</span>
408
  </span>
409
  <div style="display: flex; gap: 8px;">
410
+ <button onclick="navigator.clipboard.writeText(document.getElementById('{code_id}').textContent.replace('β–Œ', '')); this.textContent='βœ“ Copied'; setTimeout(() => this.textContent='πŸ“‹ Copy', 2000)"
411
  style="background: #28a745; color: white; border: none; padding: 7px 16px; border-radius: 6px; cursor: pointer; font-size: 12px; font-weight: 500; transition: all 0.2s;"
412
  onmouseover="if(this.textContent==='πŸ“‹ Copy') this.style.background='#218838'"
413
  onmouseout="if(this.textContent==='πŸ“‹ Copy') this.style.background='#28a745'">
414
  πŸ“‹ Copy
415
  </button>
416
+ <button onclick="downloadCode(document.getElementById('{code_id}').textContent.replace('β–Œ', ''), '{lang_display}')"
417
  style="background: #007bff; color: white; border: none; padding: 7px 16px; border-radius: 6px; cursor: pointer; font-size: 12px; font-weight: 500; transition: all 0.2s;"
418
  onmouseover="this.style.background='#0056b3'"
419
  onmouseout="this.style.background='#007bff'">
 
421
  </button>
422
  </div>
423
  </div>
424
+ <pre style="margin: 0; padding: 20px; color: #d4d4d4; overflow-x: auto; font-family: 'SF Mono', Monaco, 'Courier New', monospace; font-size: 14px; line-height: 1.6; background: #1e1e1e;"><code id="{code_id}">{code_clean}{cursor_html}</code></pre>
425
  </div>
426
  """
427
 
428
+ html += f"""
429
+ <div style="{card_base_style} border-left: 4px solid #6b7280;">
430
+ <h3 style="margin: 0 0 20px 0; color: #6b7280; font-size: 18px; font-weight: 600; display: flex; align-items: center; gap: 10px;">
431
+ <span style="font-size: 22px;">πŸ’»</span>
432
  <span>Implementation</span>
433
  </h3>
434
  {code_blocks_html}
 
457
  </script>
458
  """
459
 
460
+ html += "</div>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
461
  return html
462
 
463
+ def generate_solution_streaming(prompt, temperature=0.6, max_tokens=8192, max_thinking_tokens=4096):
464
+ """Generate solution with streaming"""
 
 
 
 
465
  if not prompt.strip():
466
+ yield "<p style='color: #dc3545; font-size: 16px; padding: 20px;'>⚠️ Please enter a problem to solve.</p>"
467
+ return
468
 
469
+ # Reset parser
470
+ parser.reset()
471
 
472
+ # Stream generation
473
+ for generated_text, stats, loop_detected in vibe_model.generate_response_streaming(
474
  prompt,
475
  temperature=temperature,
476
  max_new_tokens=max_tokens,
477
  max_thinking_tokens=max_thinking_tokens
478
+ ):
479
+ if stats:
480
+ is_generating = not stats.get('is_complete', False)
481
+ html_output = format_streaming_html(generated_text, stats, loop_detected, is_generating)
482
+ yield html_output
483
+ else:
484
+ yield f"<p style='color: #dc3545;'>Error: {generated_text}</p>"
 
 
485
 
486
  # Create Gradio interface
487
  with gr.Blocks(
 
496
  🎯 **Best for**: Python algorithmic problems with clear input/output specifications
497
 
498
  ⚠️ **Note**: This model is specialized for competitive programming, not general software development
499
+
500
+ ✨ **Features**: Real-time streaming with intelligent section parsing and automatic loop detection
501
  """)
502
 
503
  with gr.Row():
 
528
  - Higher thinking tokens (4096-8192) for complex reasoning
529
  - Temperature 0.6 balances creativity and accuracy
530
 
531
+ **Real-time Features:**
532
+ - πŸ”„ Live token-by-token streaming
533
+ - 🧠 Intelligent section parsing (thinking/explanation/code)
534
+ - ⚠️ Automatic loop detection (stops if repetitive patterns detected)
535
+ - ⚑ Blinking cursors on actively streaming sections
536
+ - πŸ“Š Live statistics (time, speed, tokens)
537
+
538
+ **Loop Detection:**
539
+ - Monitors for 3-9 word phrases repeated 5+ times
540
+ - Automatically stops generation to save tokens
541
+ - Truncates at loop start position
542
  """)
543
 
544
  generate_btn = gr.Button("πŸš€ Generate Solution", variant="primary", size="lg")
 
548
  output_html = gr.HTML(label="Solution")
549
 
550
  generate_btn.click(
551
+ fn=generate_solution_streaming,
552
  inputs=[prompt_input, temperature_slider, max_tokens_slider, max_thinking_slider],
553
  outputs=output_html
554
  )