Spaces:

helperai
/

ai

Running

File size: 4,517 Bytes

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import time

# --- MODEL CONSTANTS ---
MODEL_NAME = "deepseek-ai/deepseek-coder-1.3b-instruct"
# CRITICAL: Force model to use CPU for the free tier
DEVICE = "cpu"
MAX_NEW_TOKENS = 512 # Limit output size for speed and cost control
TORCH_DTYPE = torch.float32 # Use standard float for maximum CPU compatibility

# Global variables for model and tokenizer
model = None
tokenizer = None

# --- API Data Structure ---
class CodeRequest(BaseModel):
    """Defines the expected input structure from the front-end website."""
    user_prompt: str # The user's request (e.g., "Fix the bug in this function")
    code_context: str # The block of code the user provided
    
# --- FastAPI App Setup ---
app = FastAPI(title="CodeFlow AI Agent Backend - DeepSeek SLM")

@app.on_event("startup")
async def startup_event():
    """Load the DeepSeek SLM Model and Tokenizer ONLY ONCE when the server starts."""
    global model, tokenizer
    print(f"--- Starting CodeFlow AI Agent (DeepSeek 1.3B) on {DEVICE} ---")
    start_time = time.time()
    
    try:
        # Load the Tokenizer
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
        
        # Load the Model
        # Using device_map="cpu" is essential for the free tier.
        model = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME,
            dtype=TORCH_DTYPE, # Changed the argument name from torch_dtype to dtype
            device_map=DEVICE, 
            trust_remote_code=True 
        )
        model.eval() # Set model to evaluation mode
        print(f"DeepSeek Model loaded successfully in {time.time() - start_time:.2f} seconds.")
        
    except Exception as e:
        # If the model fails to load, log the error and stop
        print(f"ERROR: Failed to load DeepSeek model on CPU: {e}")
        # Raising an exception will cause the Docker container to exit, which is the correct behavior 
        # when a critical component (the model) fails to load.
        raise RuntimeError(f"Model failed to load on startup: {e}")
        
# --- CRITICAL HEALTH CHECK ENDPOINT (Fixes the Launch Timeout Error) ---
# This must be outside of any other function and use correct indentation.
@app.get("/")
def read_root():
    """Simple health check endpoint for Hugging Face to confirm the app is running."""
    return {"status": "ok", "agent": "CodeBuddy DeepSeek-Coder-1.3B"}

# --- The Code Fixing API Endpoint ---
@app.post("/fix_code")
async def fix_code_endpoint(request: CodeRequest):
    """
    Accepts code context and task, processes it with DeepSeek-Coder, and returns the fix.
    """
    if model is None or tokenizer is None:
        raise HTTPException(status_code=503, detail="AI Agent is still loading or failed to start.")
    
    # --- CONSTRUCT AGENT PROMPT (DeepSeek Instruction Format) ---
    instruction = (
        f"You are Arya's CodeBuddy, an elite Full-Stack Software Engineer. Your only job is to analyze "
        f"the user's request and provide the complete, fixed, or generated code. You must ONLY output "
        f"a single, complete, and corrected Markdown code block. Use a friendly and encouraging tone.\n\n"
        f"TASK: {request.user_prompt}\n\n"
        f"CODE_CONTEXT:\n{request.code_context}"
    )
    
    # Format the prompt correctly for the model
    prompt = f"### Instruction:\n{instruction}\n\n### Response:\n"
    
    # Tokenize and send tensors to CPU
    model_inputs = tokenizer([prompt], return_tensors="pt").to(DEVICE) 
    
    try:
        # --- GENERATE CODE (CPU Inference) ---
        generated_ids = model.generate(
            **model_inputs,
            max_new_tokens=MAX_NEW_TOKENS,
            do_sample=False, # Deterministic output
            temperature=0.1, # Low temperature for reliable coding
        )
        
        # Decode the output
        response_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
        
        # Post-processing: Extract ONLY the text after the '### Response:' tag.
        final_code_only = response_text.split("### Response:")[1].strip()
        
        return {"fixed_code": final_code_only}
        
    except Exception as e:
        print(f"Generation error: {e}")
        # Return a generic error to the user
        raise HTTPException(status_code=500, detail="The DeepSeek CodeBuddy encountered a processing error.")