""" Integration tests for RAG Pipeline application. Tests actual components without mocking for real confidence. """ import pytest import asyncio import time from fastapi.testclient import TestClient from app.main import app, rag_qa from app.pipeline import RAGPipeline # Test client client = TestClient(app) class TestRealIntegration: """Integration tests using actual components""" def test_real_rag_pipeline_creation(self): """Test creating real RAG pipeline with actual dataset""" # This test uses real components but minimal dataset pipeline = RAGPipeline.from_preset('developer-portfolio') # Verify real pipeline was created assert pipeline is not None assert hasattr(pipeline, 'config') assert hasattr(pipeline, 'documents') assert len(pipeline.documents) > 0 # Verify document structure first_doc = pipeline.documents[0] assert hasattr(first_doc, 'content') assert hasattr(first_doc, 'meta') assert 'question' in first_doc.meta assert 'answer' in first_doc.meta def test_real_rag_question_answering(self): """Test actual RAG question answering""" pipeline = RAGPipeline.from_preset('developer-portfolio') # Ask a real question question = "What is your current role?" result = pipeline.answer_question(question) # Verify we get a meaningful response assert result is not None assert len(result) > 100 # Should be substantial assert 'role' in result.lower() or 'tech lead' in result.lower() def test_rag_qa_function_with_real_pipeline(self): """Test rag_qa function with actual loaded pipeline""" # Import and modify global pipelines for this test from app.main import pipelines original_pipelines = pipelines.copy() try: # Load a real pipeline test_pipeline = RAGPipeline.from_preset('developer-portfolio') pipelines['developer-portfolio'] = test_pipeline # Test the rag_qa function result = rag_qa("What is your experience?", "developer-portfolio") # Verify real results assert result is not None assert len(result) > 50 assert "still loading" not in result.lower() finally: # Restore original pipelines pipelines.clear() pipelines.update(original_pipelines) def test_chat_endpoint_with_real_components(self): """Test chat endpoint with actual OpenRouter client""" # This test makes real API calls but uses simple requests request_data = { "messages": [ {"role": "user", "content": "Hello! Can you help me?"} ] } response = client.post("/chat", json=request_data) # Should get a response (may fail if API issues, but structure should be correct) assert response.status_code in [200, 500] # 500 if API issues if response.status_code == 200: data = response.json() assert "response" in data assert "tool_calls" in data # For simple greeting, probably no tool calls assert isinstance(data["tool_calls"], (type(None), list)) def test_dataset_loading_performance(self): """Test that dataset loading completes in reasonable time""" start_time = time.time() # Load pipeline and time it pipeline = RAGPipeline.from_preset('developer-portfolio') load_time = time.time() - start_time # Should load in under 30 seconds (even with embeddings) assert load_time < 30.0 assert len(pipeline.documents) > 0 # Verify embeddings were created assert hasattr(pipeline, 'document_store') assert hasattr(pipeline, 'retriever') def test_pipeline_document_structure(self): """Test that loaded documents have expected structure""" pipeline = RAGPipeline.from_preset('developer-portfolio') # Check document metadata for doc in pipeline.documents[:5]: # Check first 5 docs assert hasattr(doc, 'content') assert hasattr(doc, 'meta') assert doc.content is not None assert len(doc.content) > 0 # Check expected metadata fields meta = doc.meta assert isinstance(meta, dict) # Should have question and answer from dataset if 'question' in meta: assert isinstance(meta['question'], str) if 'answer' in meta: assert isinstance(meta['answer'], str) def test_multiple_different_questions(self): """Test pipeline with multiple different questions""" pipeline = RAGPipeline.from_preset('developer-portfolio') questions = [ "What is your current role?", "What technologies do you use?", "Tell me about your experience" ] results = [] for question in questions: result = pipeline.answer_question(question) results.append(result) # Should get different responses for different questions assert len(results) == len(questions) # Results should be different (not identical) for i in range(len(results)): for j in range(i + 1, len(results)): # Allow some similarity but not exact matches similarity = len(set(results[i].split()) & set(results[j].split())) assert similarity < len(results[i].split()) * 0.8 # Less than 80% similar def test_error_handling_with_real_pipeline(self): """Test error handling with real pipeline""" pipeline = RAGPipeline.from_preset('developer-portfolio') # Test with empty question result = pipeline.answer_question("") # Should handle gracefully assert result is not None assert len(result) > 0 def test_config_access(self): """Test that pipeline configuration is accessible""" pipeline = RAGPipeline.from_preset('developer-portfolio') # Verify config properties assert hasattr(pipeline, 'config') config = pipeline.config assert hasattr(config, 'name') assert hasattr(config, 'content_field') assert hasattr(config, 'prompt_template') # Verify specific config values assert config.name == 'syntaxhacker/developer-portfolio-rag' assert config.content_field == 'answer' assert config.prompt_template is not None class TestSystemIntegration: """Test system-level integration""" def test_fastapi_app_startup(self): """Test that FastAPI app starts correctly""" # Test app import and basic structure from app.main import app assert app is not None assert hasattr(app, 'routes') # Check that our endpoints are registered route_paths = [route.path for route in app.routes] assert '/chat' in route_paths assert '/answer' in route_paths assert '/health' in route_paths assert '/datasets' in route_paths def test_openrouter_client_configuration(self): """Test OpenRouter client is properly configured""" from app.main import openrouter_client, MODEL_NAME assert openrouter_client is not None assert hasattr(openrouter_client, 'base_url') assert hasattr(openrouter_client, 'api_key') # Check model configuration assert MODEL_NAME == "z-ai/glm-4.5-air:free" assert str(openrouter_client.base_url) == "https://openrouter.ai/api/v1/" def test_tools_configuration_structure(self): """Test that tools are properly configured for real use""" from app.main import TOOLS assert isinstance(TOOLS, list) assert len(TOOLS) > 0 # Check rag_qa tool structure rag_tool = None for tool in TOOLS: if tool['function']['name'] == 'rag_qa': rag_tool = tool break assert rag_tool is not None assert 'parameters' in rag_tool['function'] assert 'properties' in rag_tool['function']['parameters'] assert 'question' in rag_tool['function']['parameters']['properties'] if __name__ == "__main__": pytest.main([__file__, "-v", "-s"])