import gradio as gr import json from PIL import Image import numpy as np # --- User Defined Logic --- import tempfile import os import requests import duckdb from typing import Dict, Any def paris_trees_info() -> str: """ Downloads the Paris trees dataset (parquet format) and provides comprehensive information about the dataset structure including column names, data types, row count, and basic statistics. This helps users understand what data is available about tree protection efforts in Paris. Returns: str: A formatted string containing dataset information including number of rows, column names and types, and basic descriptive statistics. """ url = "https://opendata.paris.fr/api/explore/v2.1/catalog/datasets/les-arbres/exports/parquet?lang=fr&timezone=Europe%2FBerlin" # Download the parquet file response = requests.get(url, stream=True) response.raise_for_status() # Save to a temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=".parquet") as tmp_file: for chunk in response.iter_content(chunk_size=8192): tmp_file.write(chunk) tmp_path = tmp_file.name try: # Connect to DuckDB conn = duckdb.connect() # Load the parquet file conn.execute(f"CREATE VIEW paris_trees AS SELECT * FROM read_parquet('{tmp_path}')") # Get row count row_count = conn.execute("SELECT COUNT(*) FROM paris_trees").fetchone()[0] # Get column information columns_info = conn.execute("DESCRIBE paris_trees").fetchall() # Get summary statistics summary = conn.execute("SUMMARIZE paris_trees").fetchall() # Format the output output = [] output.append("=== Paris Trees Dataset Information ===\n") output.append(f"Total Rows: {row_count:,}\n") output.append("\n=== Column Information ===") output.append(f"{'Column Name':<30} {'Data Type':<20}") output.append("-" * 50) for col in columns_info: output.append(f"{col[0]:<30} {col[1]:<20}") output.append("\n=== Summary Statistics ===") output.append(f"{'Column':<30} {'Min':<15} {'Max':<15} {'Avg':<15} {'Null %':<10}") output.append("-" * 85) for row in summary: col_name = row[0] min_val = str(row[2]) if row[2] is not None else "N/A" max_val = str(row[3]) if row[3] is not None else "N/A" avg_val = f"{row[4]:.2f}" if row[4] is not None else "N/A" null_pct = f"{row[7]:.1f}" if row[7] is not None else "N/A" output.append(f"{col_name:<30} {min_val:<15} {max_val:<15} {avg_val:<15} {null_pct:<10}") conn.close() return "\n".join(output) finally: # Clean up temporary file if os.path.exists(tmp_path): os.unlink(tmp_path) # --- Interface Factory --- def create_interface(): return gr.Interface( fn=paris_trees_info, inputs=[gr.Textbox(label=k) for k in []], outputs=gr.Textbox(label="Comprehensive dataset information including row count, column details, and summary statistics"), title="paris_trees_info", description="Auto-generated tool: paris_trees_info" )