Alikestocode commited on
Commit
fc0ab14
·
1 Parent(s): c454e43

Make GPU duration slider functional with dynamic wrapper creation

Browse files

- Create GPU wrappers dynamically based on user's gpu_duration slider value
- Cache wrappers to avoid recreating them for the same duration
- Round duration to nearest 60 seconds for efficient caching
- Now the slider value actually controls GPU allocation duration

Files changed (1) hide show
  1. app.py +24 -5
app.py CHANGED
@@ -387,7 +387,18 @@ def _generate_router_plan_streaming_internal(
387
  yield "", {}, error_msg, ""
388
 
389
 
390
- @spaces.GPU(duration=1800) # Use maximum duration to allow user flexibility
 
 
 
 
 
 
 
 
 
 
 
391
  def generate_router_plan_streaming(
392
  user_task: str,
393
  context: str,
@@ -404,13 +415,21 @@ def generate_router_plan_streaming(
404
  """
405
  Generate router plan with streaming output.
406
 
407
- Note: gpu_duration parameter is for user awareness. The actual GPU allocation
408
- uses the decorator's duration (1800s max) to allow flexibility.
409
  """
410
- yield from _generate_router_plan_streaming_internal(
 
 
 
 
 
 
 
 
 
411
  user_task, context, acceptance, extra_guidance,
412
  difficulty, tags, model_choice, max_new_tokens,
413
- temperature, top_p, gpu_duration
414
  )
415
 
416
 
 
387
  yield "", {}, error_msg, ""
388
 
389
 
390
+ def _create_gpu_wrapper(duration: int):
391
+ """Create a GPU-decorated wrapper function with specific duration."""
392
+ @spaces.GPU(duration=duration)
393
+ def wrapper(*args, **kwargs):
394
+ yield from _generate_router_plan_streaming_internal(*args, **kwargs)
395
+ return wrapper
396
+
397
+
398
+ # Cache for GPU wrappers to avoid recreating them
399
+ _gpu_wrapper_cache: Dict[int, Any] = {}
400
+
401
+
402
  def generate_router_plan_streaming(
403
  user_task: str,
404
  context: str,
 
415
  """
416
  Generate router plan with streaming output.
417
 
418
+ Uses user-specified gpu_duration to create a dynamically decorated function.
 
419
  """
420
+ # Round to nearest 60 seconds for caching efficiency
421
+ rounded_duration = ((gpu_duration + 30) // 60) * 60
422
+ rounded_duration = max(60, min(1800, rounded_duration)) # Clamp between 60 and 1800
423
+
424
+ # Get or create wrapper with this duration
425
+ if rounded_duration not in _gpu_wrapper_cache:
426
+ _gpu_wrapper_cache[rounded_duration] = _create_gpu_wrapper(rounded_duration)
427
+
428
+ wrapper = _gpu_wrapper_cache[rounded_duration]
429
+ yield from wrapper(
430
  user_task, context, acceptance, extra_guidance,
431
  difficulty, tags, model_choice, max_new_tokens,
432
+ temperature, top_p, rounded_duration
433
  )
434
 
435