Spaces:

evgueni-p
/

fbmc-chronos2

Sleeping

fbmc-chronos2 / app.py

Evgueni Poloukarov

fix: revert PyTorch + redirect cache to /tmp for A100

73e9c10 15 days ago

5.77 kB

	#!/usr/bin/env python3
	"""
	FBMC Chronos-2 Forecasting API
	HuggingFace Space Gradio Interface
	Version: 1.6.0 - Extended Context Window (2,160 hours = 90 days / 3 months)
	FORCE REBUILD: Optimized for 96GB VRAM with memory profiling diagnostics
	"""

	# CRITICAL: Set PyTorch memory allocator config BEFORE any imports
	# This prevents memory fragmentation issues that cause OOM even with sufficient free memory
	# Must be set before torch is imported the first time (including via gradio or other dependencies)
	import os
	os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

	# Redirect ALL caches to /tmp to prevent 50GB storage limit exceeded
	# This is the most common cause of "no logs" silent failures on A100 Spaces
	# See: https://discuss.huggingface.co/t/how-to-fix-workload-evicted-storage-limit-exceeded-50g-error-in-huggingface-spaces/169258
	os.environ['TORCH_HOME'] = '/tmp/torch_cache'
	os.environ['HF_HOME'] = '/tmp/hf_home'
	os.environ['TRANSFORMERS_CACHE'] = '/tmp/transformers_cache'
	os.environ['HUB_DIR'] = '/tmp/torch_hub'

	import sys
	print(f"[STARTUP] Python version: {sys.version}", flush=True)
	print(f"[STARTUP] Python path: {sys.path[:3]}", flush=True)
	print(f"[STARTUP] PyTorch memory config: {os.environ.get('PYTORCH_CUDA_ALLOC_CONF')}", flush=True)

	import gradio as gr
	from datetime import datetime

	print("[STARTUP] Basic imports successful", flush=True)

	try:
	from src.forecasting.chronos_inference import run_inference
	print("[STARTUP] chronos_inference import successful", flush=True)
	except Exception as e:
	print(f"[ERROR] Failed to import chronos_inference: {e}", flush=True)
	import traceback
	traceback.print_exc()
	run_inference = None


	# Global configuration
	FORECAST_TYPES = {
	"smoke_test": "Smoke Test (1 border × 7 days)",
	"full_14day": "Full Forecast (All borders × 14 days)"
	}

	print("[STARTUP] Configuration loaded", flush=True)


	def forecast_api(run_date_str, forecast_type):
	"""
	API endpoint for triggering forecasts.

	Args:
	run_date_str: Date in YYYY-MM-DD format
	forecast_type: 'smoke_test' or 'full_14day'

	Returns:
	Path to downloadable forecast results file
	"""
	try:
	# Validate run date
	run_date = datetime.strptime(run_date_str, "%Y-%m-%d")

	# Run inference
	result_path = run_inference(
	run_date=run_date_str,
	forecast_type=forecast_type,
	output_dir="/tmp"
	)

	return result_path

	except Exception as e:
	error_msg = f"Error: {str(e)}"
	print(error_msg)
	# Return error message as text file
	error_path = "/tmp/error.txt"
	with open(error_path, 'w') as f:
	f.write(error_msg)
	return error_path


	# Build Gradio interface
	with gr.Blocks(title="FBMC Chronos-2 Forecasting") as demo:
	gr.Markdown("""
	# FBMC Chronos-2 Zero-Shot Forecasting API

	Flow-Based Market Coupling electricity flow forecasting using Amazon Chronos-2.

	This Space provides GPU-accelerated zero-shot inference for cross-border electricity flows.
	""")

	with gr.Row():
	with gr.Column():
	gr.Markdown("### Configuration")

	run_date_input = gr.Textbox(
	label="Run Date (YYYY-MM-DD)",
	value="2025-09-30",
	placeholder="2025-09-30",
	info="Date when forecast is made (data up to this date is historical)"
	)

	forecast_type_input = gr.Radio(
	choices=list(FORECAST_TYPES.keys()),
	value="smoke_test",
	label="Forecast Type",
	info="Smoke test: Quick validation (1 border, 7 days). Full: Production forecast (all borders, 14 days)"
	)

	submit_btn = gr.Button("Run Forecast", variant="primary")

	with gr.Column():
	gr.Markdown("### Results")

	output_file = gr.File(
	label="Download Forecast Results",
	type="filepath"
	)

	gr.Markdown("""
	Output format: Parquet file with columns:
	- `timestamp`: Hourly timestamps (D+1 to D+7 or D+14)
	- `{border}_median`: Median forecast (MW)
	- `{border}_q10`: 10th percentile (MW)
	- `{border}_q90`: 90th percentile (MW)

	Inference environment:
	- GPU: HuggingFace Space GPU (accelerated inference)
	- Model: amazon/chronos-2 (120M parameters)
	- Precision: bfloat16
	""")

	# Wire up the interface
	submit_btn.click(
	fn=forecast_api,
	inputs=[run_date_input, forecast_type_input],
	outputs=output_file
	)

	gr.Markdown("""
	---
	### About

	Zero-shot forecasting: No model training required. The pre-trained Chronos-2 model
	generalizes directly to FBMC cross-border flows using historical patterns and future covariates.

	Features:
	- 2,514 engineered features using past-only covariate masking
	- Known-future: weather, generation, load forecasts (615 features)
	- Past-only masked: CNEC outages, volatility, flows (1,899 features)
	- 24-month historical context (Oct 2023 - Oct 2025)
	- Time-aware extraction (prevents data leakage)
	- Probabilistic forecasts (9 quantiles: 1st/5th/10th/25th/50th/75th/90th/95th/99th)

	Performance:
	- Smoke test: ~30 seconds (1 border × 168 hours)
	- Full forecast: ~5 minutes (38 borders × 336 hours)

	Project: FBMC Flow Forecasting MVP \| Author: Evgueni Poloukarov
	""")

	# Launch the app
	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False
	)