Spaces:
Runtime error
Runtime error
| import os | |
| import torch | |
| import json | |
| import time | |
| import logging | |
| from datetime import datetime | |
| from threading import Thread | |
| from queue import Queue | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| # Configuration | |
| PRIMARY_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # First model to try | |
| SECONDARY_MODEL = "facebook/opt-1.3b" # More powerful backup model | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| BATCH_SIZE = 5 # Process 5 chapters at a time | |
| MAX_RETRIES = 3 | |
| OUTPUT_DIR = "calculus_textbook_output" | |
| LOG_FILE = "textbook_generation.log" | |
| # Setup logging | |
| os.makedirs(OUTPUT_DIR, exist_ok=True) | |
| logging.basicConfig( | |
| filename=os.path.join(OUTPUT_DIR, LOG_FILE), | |
| level=logging.INFO, | |
| format='%(asctime)s - %(levelname)s - %(message)s' | |
| ) | |
| class ModelManager: | |
| """Manages loading and switching between language models for text generation.""" | |
| def __init__(self): | |
| self.models = {} | |
| self.tokenizers = {} | |
| self.current_model = None | |
| def load_model(self, model_name): | |
| """Load a model and its tokenizer if not already loaded.""" | |
| if model_name not in self.models: | |
| try: | |
| logging.info(f"Loading model: {model_name}") | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32, | |
| device_map="auto" if DEVICE == "cuda" else None | |
| ) | |
| model.eval() | |
| self.models[model_name] = model | |
| self.tokenizers[model_name] = tokenizer | |
| logging.info(f"Successfully loaded model: {model_name}") | |
| return True | |
| except Exception as e: | |
| logging.error(f"Failed to load model {model_name}: {str(e)}") | |
| return False | |
| return True | |
| def set_current_model(self, model_name): | |
| """Set the current model to use for generation.""" | |
| if model_name not in self.models and not self.load_model(model_name): | |
| return False | |
| self.current_model = model_name | |
| return True | |
| def generate_text(self, prompt, max_length=1024): | |
| """Generate text using the current model.""" | |
| if not self.current_model: | |
| raise ValueError("No model selected. Call set_current_model first.") | |
| model = self.models[self.current_model] | |
| tokenizer = self.tokenizers[self.current_model] | |
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| # Generate with some randomness for creativity | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_length=max_length, | |
| temperature=0.7, | |
| top_p=0.9, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Extract only the generated part | |
| generated_text = response[len(tokenizer.decode(inputs['input_ids'][0], skip_special_tokens=True)):].strip() | |
| return generated_text | |
| class CalculusTextbookGenerator: | |
| """Generates a complete calculus textbook with questions and solutions.""" | |
| def __init__(self): | |
| self.model_manager = ModelManager() | |
| self.textbook_data = self.create_initial_textbook_structure() | |
| def create_initial_textbook_structure(self): | |
| """Create the initial structure of the calculus textbook.""" | |
| return { | |
| "books": [ | |
| { | |
| "name": "Calculus 1: Early Transcendentals", | |
| "details": "Introduction to single-variable calculus including limits, derivatives, and basic integration techniques.", | |
| "chapters": [ | |
| { | |
| "chapterTitle": "Chapter 6: Applications of Integration", | |
| "subChapters": [ | |
| "6.1: Areas Between Curves", | |
| "6.2: Volumes", | |
| "6.3: Volumes by Cylindrical Shells", | |
| "6.4: Work", | |
| "6.5: Average Value of a Function" | |
| ], | |
| "questions": [] # Will be filled with generated questions | |
| }, | |
| { | |
| "chapterTitle": "Chapter 8: Further Applications of Integration", | |
| "subChapters": [ | |
| "8.1: Arc Length", | |
| "8.2: Area of a Surface of Revolution", | |
| "8.3: Applications to Physics and Engineering", | |
| "8.4: Applications to Economics and Biology", | |
| "8.5: Probability" | |
| ], | |
| "questions": [] | |
| }, | |
| { | |
| "chapterTitle": "Chapter 9: Differential Equations", | |
| "subChapters": [ | |
| "9.1: Modeling with Differential Equations", | |
| "9.2: Direction Fields and Euler's Method", | |
| "9.3: Separable Equations", | |
| "9.4: Models for Population Growth", | |
| "9.5: Linear Equations", | |
| "9.6: Predator–Prey Systems" | |
| ], | |
| "questions": [] | |
| }, | |
| { | |
| "chapterTitle": "Chapter 10: Parametric Equations and Polar Coordinates", | |
| "subChapters": [ | |
| "10.1: Curves Defined by Parametric Equations", | |
| "10.2: Calculus with Parametric Curves", | |
| "10.3: Polar Coordinates", | |
| "10.4: Calculus in Polar Coordinates", | |
| "10.5: Conic Sections", | |
| "10.6: Conic Sections in Polar Coordinates" | |
| ], | |
| "questions": [] | |
| }, | |
| { | |
| "chapterTitle": "Chapter 11: Sequences, Series, and Power Series", | |
| "subChapters": [ | |
| "11.1: Sequences", | |
| "11.2: Series", | |
| "11.3: The Integral Test and Estimates of Sums", | |
| "11.4: The Comparison Tests", | |
| "11.5: Alternating Series and Absolute Convergence", | |
| "11.6: The Ratio and Root Tests", | |
| "11.7: Power Series" | |
| ], | |
| "questions": [] | |
| } | |
| ] | |
| }, | |
| { | |
| "name": "Calculus 2: Advanced Concepts", | |
| "details": "Advances into series, sequences, techniques of integration, and vector calculus.", | |
| "chapters": [ | |
| { | |
| "chapterTitle": "Chapter 12: Vectors and the Geometry of Space", | |
| "subChapters": [ | |
| "12.1: Three-Dimensional Coordinate Systems", | |
| "12.2: Vectors", | |
| "12.3: The Dot Product", | |
| "12.4: The Cross Product", | |
| "12.5: Equations of Lines and Planes", | |
| "12.6: Cylinders and Quadric Surfaces" | |
| ], | |
| "questions": [] | |
| }, | |
| { | |
| "chapterTitle": "Chapter 13: Vector Functions", | |
| "subChapters": [ | |
| "13.1: Vector Functions and Space Curves", | |
| "13.2: Derivatives and Integrals of Vector Functions", | |
| "13.3: Arc Length and Curvature", | |
| "13.4: Motion in Space: Velocity and Acceleration" | |
| ], | |
| "questions": [] | |
| }, | |
| { | |
| "chapterTitle": "Chapter 14: Partial Derivatives", | |
| "subChapters": [ | |
| "14.1: Functions of Several Variables", | |
| "14.2: Limits and Continuity", | |
| "14.3: Partial Derivatives", | |
| "14.4: Tangent Planes and Linear Approximation", | |
| "14.5: The Chain Rule" | |
| ], | |
| "questions": [] | |
| } | |
| ] | |
| } | |
| ] | |
| } | |
| def generate_question_set(self, chapter_title, subchapter_titles, num_questions=3): | |
| """Generate a set of questions with step-by-step solutions for a chapter.""" | |
| # Try the primary model first | |
| self.model_manager.set_current_model(PRIMARY_MODEL) | |
| prompt = f"""Create {num_questions} calculus questions with detailed step-by-step solutions for: | |
| {chapter_title} | |
| The questions should cover these subchapters: | |
| {', '.join(subchapter_titles)} | |
| For each question: | |
| 1. Write a clear, university-level calculus problem | |
| 2. Provide a comprehensive step-by-step solution with all math steps shown | |
| 3. Include a final answer | |
| Format each question as: | |
| QUESTION: [Problem statement] | |
| SOLUTION: | |
| Step 1: [First step with explanation] | |
| Step 2: [Next step] | |
| ... | |
| Final Answer: [The solution] | |
| Make sure to use proper mathematical notation and include a variety of question types. | |
| """ | |
| try: | |
| generated_content = self.model_manager.generate_text(prompt, max_length=2048) | |
| # Check if the content looks good | |
| if len(generated_content) < 200 or "QUESTION" not in generated_content: | |
| # Try the secondary model if the primary one gave poor results | |
| logging.warning(f"Primary model gave insufficient results for {chapter_title}. Trying secondary model.") | |
| self.model_manager.set_current_model(SECONDARY_MODEL) | |
| generated_content = self.model_manager.generate_text(prompt, max_length=2048) | |
| # Parse the generated content into question objects | |
| questions = self.parse_questions(generated_content) | |
| if not questions or len(questions) == 0: | |
| logging.warning(f"Failed to parse any questions from content for {chapter_title}") | |
| return [] | |
| return questions | |
| except Exception as e: | |
| logging.error(f"Error generating questions for {chapter_title}: {str(e)}") | |
| return [] | |
| def parse_questions(self, content): | |
| """Parse the generated content into structured question objects.""" | |
| questions = [] | |
| # Split by "QUESTION:" or similar markers | |
| parts = content.split("QUESTION:") | |
| for i, part in enumerate(parts): | |
| if i == 0: | |
| continue # Skip the first part (before the first QUESTION:) | |
| try: | |
| # Split into question and solution | |
| if "SOLUTION:" in part: | |
| question_text, solution = part.split("SOLUTION:", 1) | |
| else: | |
| # Try alternative formats | |
| for marker in ["Solution:", "STEPS:", "Steps:"]: | |
| if marker in part: | |
| question_text, solution = part.split(marker, 1) | |
| break | |
| else: | |
| question_text = part | |
| solution = "" | |
| questions.append({ | |
| "question": question_text.strip(), | |
| "solution": solution.strip() | |
| }) | |
| except Exception as e: | |
| logging.error(f"Error parsing question {i}: {str(e)}") | |
| continue | |
| return questions | |
| def worker_function(self, queue, results): | |
| """Worker thread function to process chapters from queue.""" | |
| while True: | |
| item = queue.get() | |
| if item is None: # None signals to exit | |
| queue.task_done() | |
| break | |
| book_idx, chapter_idx, chapter = item | |
| chapter_title = chapter["chapterTitle"] | |
| subchapters = chapter.get("subChapters", []) | |
| logging.info(f"Processing: {chapter_title}") | |
| # Try to generate questions with retries | |
| for attempt in range(MAX_RETRIES): | |
| try: | |
| questions = self.generate_question_set(chapter_title, subchapters, num_questions=4) | |
| if questions: | |
| # Save the questions to the chapter | |
| self.textbook_data["books"][book_idx]["chapters"][chapter_idx]["questions"] = questions | |
| logging.info(f"✓ Generated {len(questions)} questions for {chapter_title}") | |
| break # Success, exit retry loop | |
| else: | |
| logging.warning(f"No questions generated for {chapter_title} on attempt {attempt+1}") | |
| except Exception as e: | |
| logging.error(f"Attempt {attempt+1}/{MAX_RETRIES} failed for {chapter_title}: {str(e)}") | |
| time.sleep(2) # Wait before retrying | |
| # Save current state to file | |
| self.save_current_state() | |
| queue.task_done() | |
| def save_current_state(self): | |
| """Save the current state of the textbook generation.""" | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| with open(os.path.join(OUTPUT_DIR, f"textbook_state_{timestamp}.json"), "w") as f: | |
| json.dump(self.textbook_data, f, indent=2) | |
| # Also save to a fixed filename for the latest state | |
| with open(os.path.join(OUTPUT_DIR, "textbook_latest.json"), "w") as f: | |
| json.dump(self.textbook_data, f, indent=2) | |
| def process_in_batches(self): | |
| """Process all chapters in batches.""" | |
| queue = Queue() | |
| # Queue all chapters for processing | |
| for book_idx, book in enumerate(self.textbook_data["books"]): | |
| for chapter_idx, chapter in enumerate(book["chapters"]): | |
| queue.put((book_idx, chapter_idx, chapter)) | |
| # Create and start worker thread | |
| worker = Thread(target=self.worker_function, args=(queue, None)) | |
| worker.daemon = True # Allow the program to exit even if the thread is running | |
| worker.start() | |
| # Process in batches | |
| total_chapters = queue.qsize() | |
| processed = 0 | |
| while processed < total_chapters: | |
| # Wait for the batch to be processed | |
| start_size = queue.qsize() | |
| batch_size = min(BATCH_SIZE, start_size) | |
| logging.info(f"Processing batch of {batch_size} chapters. {start_size} remaining.") | |
| # Wait until this batch is done | |
| while queue.qsize() > start_size - batch_size: | |
| time.sleep(2) | |
| processed += batch_size | |
| logging.info(f"Batch complete. {processed}/{total_chapters} chapters processed.") | |
| # Save current state | |
| self.save_current_state() | |
| # Signal worker to exit | |
| queue.put(None) | |
| worker.join() | |
| # Save final state | |
| self.save_current_state() | |
| logging.info("All chapters processed. Textbook generation complete.") | |
| def main(): | |
| start_time = datetime.now() | |
| logging.info(f"Starting textbook generation at {start_time}") | |
| generator = CalculusTextbookGenerator() | |
| generator.process_in_batches() | |
| end_time = datetime.now() | |
| duration = end_time - start_time | |
| logging.info(f"Textbook generation completed in {duration}") | |
| logging.info(f"Final textbook saved to {os.path.join(OUTPUT_DIR, 'textbook_latest.json')}") | |
| if __name__ == "__main__": | |
| main() |