Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import re | |
| from sentence_transformers import SentenceTransformer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import spacy | |
| from collections import Counter | |
| import json | |
| import PyPDF2 | |
| import docx | |
| import io | |
| from pathlib import Path | |
| import os | |
| import google.generativeai as genai | |
| from typing import Dict, Any | |
| # Configure Gemini API | |
| genai.configure(api_key=os.environ.get("GEMINI_API_KEY")) | |
| class ATSScorer: | |
| def __init__(self): | |
| # Load pre-trained models | |
| print("Loading models...") | |
| self.sentence_model = SentenceTransformer('all-MiniLM-L6-v2') | |
| # Try to load spaCy model, fallback if not available | |
| try: | |
| self.nlp = spacy.load("en_core_web_sm") | |
| except OSError: | |
| print("spaCy model not found. Install with: python -m spacy download en_core_web_sm") | |
| self.nlp = None | |
| # Scoring weights from your requirements | |
| self.weights = { | |
| 'relevant_skills': 0.25, | |
| 'work_experience': 0.20, | |
| 'education': 0.10, | |
| 'certifications': 0.07, | |
| 'projects': 0.10, | |
| 'keywords_match': 0.10, | |
| 'tools_tech': 0.10, | |
| 'soft_skills': 0.08 | |
| } | |
| # Enhanced skill categories with domain-specific grouping | |
| self.skill_categories = { | |
| 'programming': ['python', 'java', 'javascript', 'c++', 'c#', 'go', 'rust', 'php', 'ruby', 'kotlin', 'swift', 'typescript', 'dart'], | |
| 'data_science': ['machine learning', 'deep learning', 'data analysis', 'statistics', 'pandas', 'numpy', 'tensorflow', 'pytorch', 'scikit-learn', 'matplotlib', 'seaborn'], | |
| 'web_development': ['html', 'css', 'react', 'vue', 'angular', 'node.js', 'express', 'django', 'flask', 'next.js', 'nuxt.js', 'svelte', 'bootstrap', 'tailwind'], | |
| 'mobile_development': ['react native', 'flutter', 'android studio', 'ios', 'swift', 'kotlin', 'xamarin', 'ionic', 'cordova', 'firebase'], | |
| 'cybersecurity': ['malware analysis', 'penetration testing', 'vulnerability assessment', 'ida pro', 'ghidra', 'wireshark', 'burp suite', 'metasploit', 'nmap', 'reverse engineering', 'oscp', 'cissp', 'ceh', 'security', 'threat', 'exploit'], | |
| 'databases': ['sql', 'mysql', 'postgresql', 'mongodb', 'redis', 'elasticsearch', 'oracle', 'sqlite', 'cassandra', 'dynamodb'], | |
| 'cloud': ['aws', 'azure', 'gcp', 'docker', 'kubernetes', 'terraform', 'jenkins', 'ci/cd', 'devops', 'microservices'], | |
| 'ui_ux_design': ['figma', 'sketch', 'adobe xd', 'photoshop', 'illustrator', 'wireframing', 'prototyping', 'user research', 'usability testing', 'interaction design', 'visual design', 'design thinking', 'user journey', 'persona', 'a/b testing'], | |
| 'business_analysis': ['business analysis', 'requirements gathering', 'stakeholder management', 'process mapping', 'gap analysis', 'user stories', 'acceptance criteria', 'brd', 'frd', 'visio', 'lucidchart', 'jira', 'confluence', 'agile', 'scrum', 'waterfall'], | |
| 'marketing': ['digital marketing', 'content marketing', 'social media marketing', 'seo', 'sem', 'ppc', 'google ads', 'facebook ads', 'email marketing', 'marketing automation', 'analytics', 'google analytics', 'hubspot', 'salesforce', 'brand management', 'campaign management'], | |
| 'consultancy': ['strategic planning', 'business strategy', 'change management', 'project management', 'stakeholder engagement', 'process improvement', 'risk assessment', 'financial analysis', 'market research', 'competitive analysis', 'presentation skills', 'client management'], | |
| 'ai_ml_engineering': ['artificial intelligence', 'machine learning', 'deep learning', 'neural networks', 'nlp', 'computer vision', 'tensorflow', 'pytorch', 'keras', 'opencv', 'transformers', 'bert', 'gpt', 'llm', 'mlops', 'model deployment', 'feature engineering', 'hyperparameter tuning'], | |
| 'soft_skills': ['leadership', 'teamwork', 'communication', 'problem solving', 'project management', 'collaboration', 'analytical', 'creative'] | |
| } | |
| # Fixed domain indicators with better separation and priority scoring | |
| self.domain_indicators = { | |
| 'web_development': { | |
| 'high_priority': ['web developer', 'frontend developer', 'backend developer', 'full stack developer', 'full-stack developer', 'web development', 'frontend development', 'backend development', 'fullstack'], | |
| 'medium_priority': ['web', 'frontend', 'backend', 'full stack', 'website development', 'web application development', 'web app', 'spa', 'single page application'], | |
| 'low_priority': ['html', 'css', 'javascript', 'react', 'vue', 'angular', 'node.js', 'express', 'django', 'flask', 'responsive design'] | |
| }, | |
| 'ui_ux_design': { | |
| 'high_priority': ['ui designer', 'ux designer', 'ui/ux designer', 'product designer', 'user experience designer', 'user interface designer', 'design lead', 'visual designer'], | |
| 'medium_priority': ['ui design', 'ux design', 'user experience', 'user interface', 'interaction design', 'visual design', 'product design'], | |
| 'low_priority': ['figma', 'sketch', 'adobe xd', 'wireframing', 'prototyping', 'user research', 'usability testing'] | |
| }, | |
| 'mobile_development': { | |
| 'high_priority': ['mobile developer', 'android developer', 'ios developer', 'mobile app developer', 'app developer'], | |
| 'medium_priority': ['mobile', 'android', 'ios', 'app development', 'mobile application', 'cross-platform'], | |
| 'low_priority': ['react native', 'flutter', 'swift', 'kotlin', 'xamarin'] | |
| }, | |
| 'data_science': { | |
| 'high_priority': ['data scientist', 'data analyst', 'machine learning engineer', 'data engineer'], | |
| 'medium_priority': ['data science', 'machine learning', 'analytics', 'data analysis', 'ai', 'artificial intelligence'], | |
| 'low_priority': ['python', 'pandas', 'numpy', 'tensorflow', 'pytorch'] | |
| }, | |
| 'cybersecurity': { | |
| 'high_priority': ['security analyst', 'cybersecurity specialist', 'security engineer', 'penetration tester', 'security researcher'], | |
| 'medium_priority': ['security', 'malware', 'vulnerability', 'penetration', 'threat', 'exploit', 'cybersecurity', 'infosec', 'reverse engineering'], | |
| 'low_priority': ['wireshark', 'burp suite', 'metasploit', 'nmap'] | |
| }, | |
| 'devops': { | |
| 'high_priority': ['devops engineer', 'site reliability engineer', 'infrastructure engineer', 'cloud engineer'], | |
| 'medium_priority': ['devops', 'infrastructure', 'deployment', 'ci/cd', 'automation', 'cloud'], | |
| 'low_priority': ['docker', 'kubernetes', 'terraform', 'jenkins'] | |
| }, | |
| 'game_development': { | |
| 'high_priority': ['game developer', 'game programmer', 'unity developer', 'unreal developer'], | |
| 'medium_priority': ['game', 'unity', 'unreal', 'gaming', 'game development', '3d', 'graphics'], | |
| 'low_priority': ['c#', 'c++', 'opengl', 'directx'] | |
| }, | |
| 'business_analysis': { | |
| 'high_priority': ['business analyst', 'systems analyst', 'functional analyst', 'requirements analyst'], | |
| 'medium_priority': ['business analysis', 'requirements', 'stakeholder', 'process', 'analyst', 'functional requirements', 'business requirements'], | |
| 'low_priority': ['jira', 'confluence', 'visio', 'lucidchart'] | |
| }, | |
| 'marketing': { | |
| 'high_priority': ['marketing manager', 'digital marketing specialist', 'marketing analyst', 'content marketer'], | |
| 'medium_priority': ['marketing', 'digital marketing', 'content marketing', 'social media', 'seo', 'brand', 'campaign', 'advertising', 'promotion', 'market research'], | |
| 'low_priority': ['google ads', 'facebook ads', 'hubspot', 'salesforce'] | |
| }, | |
| 'consultancy': { | |
| 'high_priority': ['consultant', 'management consultant', 'strategy consultant', 'business consultant'], | |
| 'medium_priority': ['consulting', 'advisory', 'strategy', 'strategic', 'transformation', 'change management', 'business consulting', 'management consulting'], | |
| 'low_priority': ['powerpoint', 'excel', 'presentation'] | |
| }, | |
| 'ai_ml_engineering': { | |
| 'high_priority': ['ai engineer', 'ml engineer', 'machine learning engineer', 'ai specialist', 'nlp engineer'], | |
| 'medium_priority': ['artificial intelligence', 'deep learning', 'neural networks', 'nlp engineer', 'computer vision', 'mlops'], | |
| 'low_priority': ['tensorflow', 'pytorch', 'keras', 'opencv'] | |
| } | |
| } | |
| self.education_keywords = ['bachelor', 'master', 'phd', 'degree', 'university', 'college', 'education', 'graduated'] | |
| self.certification_keywords = ['certified', 'certification', 'certificate', 'licensed', 'accredited'] | |
| self.project_keywords = ['project', 'developed', 'built', 'created', 'implemented', 'designed'] | |
| # Extended education patterns for undergraduates | |
| self.education_patterns = { | |
| 'undergraduate': ['undergraduate', 'pursuing', 'currently enrolled', 'final year', 'third year', 'fourth year', 'sophomore', 'junior', 'senior'], | |
| 'year_indicators': ['first year', 'second year', 'third year', 'fourth year', 'final year', 'sophomore', 'junior', 'senior'], | |
| 'degree_types': ['bachelor', 'bs', 'ba', 'btech', 'bsc', 'be', 'master', 'ms', 'ma', 'mtech', 'msc', 'phd', 'doctorate', 'mba', 'bba', 'bfa', 'mfa'] | |
| } | |
| # Soft skills inference from interests and activities | |
| self.interest_skill_mapping = { | |
| 'creativity': ['art', 'drawing', 'painting', 'design', 'photography', 'music', 'writing', 'creative', 'sketch'], | |
| 'leadership': ['captain', 'president', 'head', 'leader', 'coordinator', 'organizer', 'mentor', 'ncc', 'scouts'], | |
| 'teamwork': ['team', 'collaboration', 'group projects', 'sports', 'football', 'basketball', 'cricket', 'volleyball'], | |
| 'dedication': ['marathon', 'athletics', 'gym', 'fitness', 'ncc', 'volunteer', 'community service', 'consistent'], | |
| 'analytical': ['chess', 'puzzle', 'mathematics', 'strategy', 'analysis', 'research', 'debate'], | |
| 'communication': ['debate', 'public speaking', 'presentation', 'writing', 'blog', 'theater', 'drama'], | |
| 'adaptability': ['travel', 'different cultures', 'international', 'languages', 'diverse'], | |
| 'persistence': ['marathon', 'long distance', 'endurance', 'consistent', 'regular', 'discipline'] | |
| } | |
| # Project category patterns for better classification | |
| self.project_categories = { | |
| 'web_development': [ | |
| 'website', 'web app', 'web application', 'e-commerce', 'blog', 'portfolio', 'dashboard', | |
| 'frontend', 'backend', 'full stack', 'responsive', 'landing page', 'cms', | |
| 'online store', 'booking system', 'social media', 'chat app', 'forum' | |
| ], | |
| 'mobile_development': [ | |
| 'mobile app', 'android app', 'ios app', 'flutter app', 'react native', 'mobile application', | |
| 'app development', 'cross-platform', 'native app', 'hybrid app', 'mobile game' | |
| ], | |
| 'data_science': [ | |
| 'machine learning', 'data analysis', 'prediction model', 'recommendation system', | |
| 'data visualization', 'analytics', 'ai model', 'neural network', 'classification', | |
| 'regression', 'clustering', 'sentiment analysis', 'nlp', 'computer vision' | |
| ], | |
| 'cybersecurity': [ | |
| 'security tool', 'vulnerability scanner', 'penetration testing', 'malware analysis', | |
| 'encryption', 'security audit', 'threat detection', 'firewall', 'intrusion detection', | |
| 'security framework', 'ethical hacking', 'forensics' | |
| ], | |
| 'game_development': [ | |
| 'game', 'unity', 'unreal', '2d game', '3d game', 'mobile game', 'web game', | |
| 'game engine', 'graphics', 'animation', 'gameplay', 'level design' | |
| ], | |
| 'devops': [ | |
| 'ci/cd', 'deployment', 'automation', 'infrastructure', 'monitoring', 'containerization', | |
| 'orchestration', 'pipeline', 'cloud deployment', 'server management' | |
| ], | |
| 'desktop_application': [ | |
| 'desktop app', 'gui application', 'desktop software', 'system tool', 'utility', | |
| 'desktop game', 'productivity tool', 'file manager', 'text editor' | |
| ], | |
| 'api_backend': [ | |
| 'api', 'rest api', 'backend service', 'microservice', 'web service', 'server', | |
| 'database integration', 'authentication system', 'payment gateway' | |
| ], | |
| 'ui_ux_design': [ | |
| 'ui design', 'ux design', 'user interface', 'user experience', 'wireframe', 'prototype', | |
| 'mockup', 'design system', 'user research', 'usability testing', 'interaction design', | |
| 'visual design', 'app design', 'website design' | |
| ], | |
| 'business_analysis': [ | |
| 'business analysis', 'requirements gathering', 'process mapping', 'workflow design', | |
| 'business process', 'system analysis', 'gap analysis', 'stakeholder analysis', | |
| 'business requirements', 'functional requirements' | |
| ], | |
| 'marketing': [ | |
| 'marketing campaign', 'digital marketing', 'social media campaign', 'content strategy', | |
| 'seo optimization', 'brand campaign', 'market research', 'customer analysis', | |
| 'marketing automation', 'email campaign' | |
| ], | |
| 'ai_ml_engineering': [ | |
| 'ai system', 'ml pipeline', 'deep learning model', 'neural network', 'nlp system', | |
| 'computer vision', 'recommendation engine', 'chatbot', 'ai application', | |
| 'model deployment', 'mlops', 'feature engineering' | |
| ] | |
| } | |
| def analyze_cv(self, cv_text: str, job_description: str) -> Dict[str, Any]: | |
| """ | |
| Analyze CV against job description using Gemini AI | |
| """ | |
| try: | |
| prompt = f"""You are a smart and unbiased AI CV screening assistant. Your task is to evaluate how well a candidate's resume (CV) matches a job description. The job description may include one or more roles and may contain responsibilities, expectations, and skill requirements. | |
| Carefully review both the CV and the Job Description, and provide the output as a **valid JSON object** with the following keys: | |
| 1. **reasoning** (string): Provide a concise but insightful explanation of how well the candidate matches the job requirements β mention key matching points like role alignment, experience, and relevant technologies. | |
| 2. **skills_available** (array of 6 or fewer strings): List up to 6 skills or competencies from the CV that strongly align with the job description. | |
| 3. **missing** (array of 6 or fewer strings): List up to 6 important skills, experiences, or qualifications the candidate lacks based on the job description. If nothing is missing, return a single string in the array: "You are good to go". | |
| CV: | |
| \"\"\" | |
| {cv_text} | |
| \"\"\" | |
| Job Description: | |
| \"\"\" | |
| {job_description} | |
| \"\"\" | |
| """ | |
| model = genai.GenerativeModel('gemini-2.0-flash-exp') | |
| response = model.generate_content(prompt) | |
| # Extract JSON from response | |
| text = response.text | |
| json_start = text.find("{") | |
| json_end = text.rfind("}") + 1 | |
| if json_start != -1 and json_end != -1: | |
| json_string = text[json_start:json_end] | |
| parsed_result = json.loads(json_string) | |
| return {"success": True, "result": parsed_result} | |
| else: | |
| return {"success": False, "message": "Could not parse JSON response"} | |
| except Exception as e: | |
| print(f'Error analyzing CV: {e}') | |
| return {"success": False, "message": f"Error: {str(e)}"} | |
| def format_analysis_output(self, analysis_result: Dict[str, Any]) -> str: | |
| """ | |
| Format the analysis result for display in Gradio | |
| """ | |
| if not analysis_result.get("success"): | |
| return f"β **Error:** {analysis_result.get('message', 'Unknown error')}" | |
| result = analysis_result["result"] | |
| output = "## π **AI-Powered CV Analysis**\n\n" | |
| # Reasoning section | |
| output += "### π **Analysis & Reasoning**\n" | |
| output += f"{result.get('reasoning', 'No reasoning provided')}\n\n" | |
| # Skills available | |
| output += "### β **Matching Skills Found**\n" | |
| skills = result.get('skills_available', []) | |
| if skills: | |
| for skill in skills: | |
| output += f"β’ {skill}\n" | |
| else: | |
| output += "β’ No matching skills identified\n" | |
| output += "\n" | |
| # Missing skills | |
| output += "### β οΈ **Areas for Improvement**\n" | |
| missing = result.get('missing', []) | |
| if missing: | |
| if len(missing) == 1 and missing[0] == "You are good to go": | |
| output += "π **Excellent! You are good to go!**\n" | |
| else: | |
| for item in missing: | |
| output += f"β’ {item}\n" | |
| else: | |
| output += "β’ No gaps identified\n" | |
| return output | |
| def extract_text_from_pdf(self, pdf_file): | |
| """Extract text from PDF file""" | |
| try: | |
| pdf_reader = PyPDF2.PdfReader(pdf_file) | |
| text = "" | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() + "\n" | |
| return text.strip() | |
| except Exception as e: | |
| raise Exception(f"Error reading PDF: {str(e)}") | |
| def extract_text_from_docx(self, docx_file): | |
| """Extract text from DOCX file""" | |
| try: | |
| doc = docx.Document(docx_file) | |
| text = "" | |
| for paragraph in doc.paragraphs: | |
| text += paragraph.text + "\n" | |
| return text.strip() | |
| except Exception as e: | |
| raise Exception(f"Error reading DOCX: {str(e)}") | |
| def extract_text_from_file(self, file): | |
| """Extract text from uploaded file (PDF or DOCX)""" | |
| if file is None: | |
| return "" | |
| file_path = Path(file) | |
| file_extension = file_path.suffix.lower() | |
| try: | |
| if file_extension == '.pdf': | |
| return self.extract_text_from_pdf(file) | |
| elif file_extension in ['.docx', '.doc']: | |
| return self.extract_text_from_docx(file) | |
| else: | |
| raise Exception(f"Unsupported file format: {file_extension}. Please upload PDF or DOCX files.") | |
| except Exception as e: | |
| raise Exception(f"Error processing file: {str(e)}") | |
| def preprocess_text(self, text): | |
| """Clean and preprocess text""" | |
| # Convert to lowercase | |
| text = text.lower() | |
| # Remove extra whitespace | |
| text = re.sub(r'\s+', ' ', text) | |
| # Remove special characters but keep important ones | |
| text = re.sub(r'[^\w\s\-\+\#\.]', ' ', text) | |
| return text.strip() | |
| def extract_skills_from_text(self, text, domain=None): | |
| """Extract skills from text based on domain""" | |
| text = self.preprocess_text(text) | |
| found_skills = [] | |
| # If domain is specified, prioritize skills from that domain | |
| if domain and domain in self.skill_categories: | |
| domain_skills = self.skill_categories[domain] | |
| for skill in domain_skills: | |
| if skill.lower() in text: | |
| found_skills.append(skill) | |
| # Also check all skill categories | |
| for category, skills in self.skill_categories.items(): | |
| for skill in skills: | |
| if skill.lower() in text and skill not in found_skills: | |
| found_skills.append(skill) | |
| return list(set(found_skills)) | |
| def detect_domain(self, text): | |
| """Detect the primary domain/field from text""" | |
| text = self.preprocess_text(text) | |
| domain_scores = {} | |
| for domain, priorities in self.domain_indicators.items(): | |
| score = 0 | |
| # High priority keywords | |
| for keyword in priorities['high_priority']: | |
| if keyword in text: | |
| score += 3 | |
| # Medium priority keywords | |
| for keyword in priorities['medium_priority']: | |
| if keyword in text: | |
| score += 2 | |
| # Low priority keywords | |
| for keyword in priorities['low_priority']: | |
| if keyword in text: | |
| score += 1 | |
| domain_scores[domain] = score | |
| # Return the domain with highest score | |
| if domain_scores: | |
| return max(domain_scores, key=domain_scores.get) | |
| return None | |
| def calculate_relevant_skills_score(self, job_description, resume): | |
| """Calculate relevant skills score""" | |
| # Detect domain from job description | |
| job_domain = self.detect_domain(job_description) | |
| # Extract skills from both texts | |
| job_skills = self.extract_skills_from_text(job_description, job_domain) | |
| resume_skills = self.extract_skills_from_text(resume, job_domain) | |
| if not job_skills: | |
| return 50 # Default score if no skills detected in job description | |
| # Calculate overlap | |
| matching_skills = set(job_skills) & set(resume_skills) | |
| skill_match_ratio = len(matching_skills) / len(job_skills) | |
| # Bonus for domain-specific skills | |
| domain_bonus = 0 | |
| if job_domain and job_domain in self.skill_categories: | |
| domain_skills = self.skill_categories[job_domain] | |
| domain_matches = [skill for skill in matching_skills if skill in domain_skills] | |
| domain_bonus = min(15, len(domain_matches) * 3) | |
| # Calculate base score | |
| base_score = min(85, skill_match_ratio * 100) | |
| final_score = min(100, base_score + domain_bonus) | |
| return final_score | |
| def extract_experience_years(self, text): | |
| """Extract years of experience from text""" | |
| text = self.preprocess_text(text) | |
| # Patterns for experience extraction | |
| patterns = [ | |
| r'(\d+)\+?\s*years?\s*(?:of\s*)?experience', | |
| r'(\d+)\+?\s*years?\s*(?:of\s*)?(?:work\s*)?experience', | |
| r'experience\s*(?:of\s*)?(\d+)\+?\s*years?', | |
| r'(\d+)\+?\s*years?\s*(?:in|of|with)', | |
| r'over\s*(\d+)\s*years?', | |
| r'more\s*than\s*(\d+)\s*years?' | |
| ] | |
| years = [] | |
| for pattern in patterns: | |
| matches = re.findall(pattern, text) | |
| years.extend([int(match) for match in matches]) | |
| # Also look for date ranges in experience section | |
| date_patterns = [ | |
| r'(\d{4})\s*-\s*(\d{4})', | |
| r'(\d{4})\s*to\s*(\d{4})', | |
| r'(\d{4})\s*β\s*(\d{4})' | |
| ] | |
| current_year = 2024 | |
| for pattern in date_patterns: | |
| matches = re.findall(pattern, text) | |
| for start, end in matches: | |
| start_year = int(start) | |
| end_year = int(end) if end != 'present' else current_year | |
| if end_year > start_year: | |
| years.append(end_year - start_year) | |
| return max(years) if years else 0 | |
| def calculate_work_experience_score(self, job_description, resume): | |
| """Calculate work experience score""" | |
| # Extract required experience from job description | |
| job_experience = self.extract_experience_years(job_description) | |
| resume_experience = self.extract_experience_years(resume) | |
| # Look for experience-related keywords in resume | |
| experience_keywords = ['experience', 'worked', 'employed', 'position', 'role', 'job', 'internship', 'intern'] | |
| resume_lower = resume.lower() | |
| experience_mentions = sum(1 for keyword in experience_keywords if keyword in resume_lower) | |
| # Calculate score based on experience match | |
| if job_experience == 0: | |
| # If no specific experience required, base on mentions | |
| return min(80, 40 + experience_mentions * 8) | |
| if resume_experience >= job_experience: | |
| return min(100, 80 + (resume_experience - job_experience) * 2) | |
| elif resume_experience >= job_experience * 0.7: | |
| return 70 | |
| elif resume_experience >= job_experience * 0.5: | |
| return 60 | |
| else: | |
| return max(30, 30 + experience_mentions * 5) | |
| def calculate_education_score(self, job_description, resume): | |
| """Calculate education score""" | |
| resume_lower = resume.lower() | |
| job_lower = job_description.lower() | |
| # Check for degree types | |
| degree_score = 0 | |
| for degree in self.education_patterns['degree_types']: | |
| if degree in resume_lower: | |
| degree_score += 20 | |
| break | |
| # Check for education keywords | |
| education_mentions = sum(1 for keyword in self.education_keywords if keyword in resume_lower) | |
| education_score = min(30, education_mentions * 10) | |
| # Check for undergraduate patterns | |
| undergraduate_score = 0 | |
| for pattern in self.education_patterns['undergraduate']: | |
| if pattern in resume_lower: | |
| undergraduate_score = 15 | |
| break | |
| # Year indicators | |
| year_score = 0 | |
| for year in self.education_patterns['year_indicators']: | |
| if year in resume_lower: | |
| year_score = 10 | |
| break | |
| # Bonus for relevant field | |
| field_bonus = 0 | |
| domain = self.detect_domain(job_description) | |
| if domain: | |
| domain_keywords = [domain.replace('_', ' '), domain.replace('_', '')] | |
| for keyword in domain_keywords: | |
| if keyword in resume_lower: | |
| field_bonus = 20 | |
| break | |
| total_score = degree_score + education_score + undergraduate_score + year_score + field_bonus | |
| return min(100, max(40, total_score)) | |
| def calculate_certifications_score(self, job_description, resume): | |
| """Calculate certifications score""" | |
| resume_lower = resume.lower() | |
| # Check for certification keywords | |
| cert_mentions = sum(1 for keyword in self.certification_keywords if keyword in resume_lower) | |
| # Look for specific certification patterns | |
| cert_patterns = [ | |
| r'certified\s+\w+', | |
| r'\w+\s+certification', | |
| r'\w+\s+certificate', | |
| r'licensed\s+\w+', | |
| r'accredited\s+\w+' | |
| ] | |
| pattern_matches = 0 | |
| for pattern in cert_patterns: | |
| if re.search(pattern, resume_lower): | |
| pattern_matches += 1 | |
| # Domain-specific certifications | |
| domain = self.detect_domain(job_description) | |
| domain_cert_bonus = 0 | |
| if domain == 'cybersecurity': | |
| cyber_certs = ['cissp', 'ceh', 'oscp', 'comptia', 'security+'] | |
| for cert in cyber_certs: | |
| if cert in resume_lower: | |
| domain_cert_bonus += 15 | |
| elif domain == 'cloud': | |
| cloud_certs = ['aws', 'azure', 'gcp', 'cloud practitioner'] | |
| for cert in cloud_certs: | |
| if cert in resume_lower: | |
| domain_cert_bonus += 15 | |
| base_score = min(60, cert_mentions * 15 + pattern_matches * 10) | |
| total_score = min(100, base_score + domain_cert_bonus) | |
| return max(40, total_score) if cert_mentions > 0 or pattern_matches > 0 else 40 | |
| def categorize_projects(self, project_text): | |
| """Categorize projects based on content""" | |
| project_text = self.preprocess_text(project_text) | |
| categories = [] | |
| for category, keywords in self.project_categories.items(): | |
| for keyword in keywords: | |
| if keyword in project_text: | |
| categories.append(category) | |
| break | |
| return categories | |
| def calculate_projects_score(self, job_description, resume): | |
| """Calculate projects score""" | |
| resume_lower = resume.lower() | |
| # Extract project mentions | |
| project_mentions = sum(1 for keyword in self.project_keywords if keyword in resume_lower) | |
| # Look for project sections | |
| project_section_indicators = ['projects', 'personal projects', 'academic projects', 'work projects'] | |
| has_project_section = any(indicator in resume_lower for indicator in project_section_indicators) | |
| # Categorize projects | |
| project_categories = self.categorize_projects(resume) | |
| job_domain = self.detect_domain(job_description) | |
| # Calculate relevance | |
| relevance_bonus = 0 | |
| if job_domain and job_domain in project_categories: | |
| relevance_bonus = 25 | |
| # Calculate base score | |
| base_score = min(50, project_mentions * 8) | |
| section_bonus = 20 if has_project_section else 0 | |
| category_bonus = min(15, len(project_categories) * 3) | |
| total_score = base_score + section_bonus + category_bonus + relevance_bonus | |
| return min(100, max(30, total_score)) | |
| def calculate_keywords_match_score(self, job_description, resume): | |
| """Calculate keyword matching score using semantic similarity""" | |
| try: | |
| # Preprocess texts | |
| job_text = self.preprocess_text(job_description) | |
| resume_text = self.preprocess_text(resume) | |
| # Get embeddings | |
| job_embedding = self.sentence_model.encode([job_text]) | |
| resume_embedding = self.sentence_model.encode([resume_text]) | |
| # Calculate cosine similarity | |
| similarity = cosine_similarity(job_embedding, resume_embedding)[0][0] | |
| # Convert to percentage | |
| similarity_score = similarity * 100 | |
| # Add keyword overlap bonus | |
| job_words = set(job_text.split()) | |
| resume_words = set(resume_text.split()) | |
| # Filter out common words | |
| common_words = {'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'can', 'must', 'shall', 'a', 'an', 'this', 'that', 'these', 'those'} | |
| job_words = job_words - common_words | |
| resume_words = resume_words - common_words | |
| if job_words: | |
| overlap = len(job_words & resume_words) / len(job_words) | |
| overlap_bonus = overlap * 20 | |
| else: | |
| overlap_bonus = 0 | |
| final_score = min(100, similarity_score + overlap_bonus) | |
| return max(30, final_score) | |
| except Exception as e: | |
| print(f"Error in keyword matching: {e}") | |
| # Fallback to simple word matching | |
| job_words = set(job_description.lower().split()) | |
| resume_words = set(resume.lower().split()) | |
| if job_words: | |
| overlap = len(job_words & resume_words) / len(job_words) | |
| return min(100, max(30, overlap * 100)) | |
| return 50 | |
| def calculate_tools_tech_score(self, job_description, resume): | |
| """Calculate tools and technology score""" | |
| # Extract tools and technologies from both texts | |
| job_tools = self.extract_skills_from_text(job_description) | |
| resume_tools = self.extract_skills_from_text(resume) | |
| # Focus on technical skills | |
| technical_categories = ['programming', 'databases', 'cloud', 'web_development', 'mobile_development', 'data_science', 'cybersecurity', 'ai_ml_engineering'] | |
| job_tech_skills = [] | |
| resume_tech_skills = [] | |
| for category in technical_categories: | |
| if category in self.skill_categories: | |
| category_skills = self.skill_categories[category] | |
| job_tech_skills.extend([skill for skill in job_tools if skill in category_skills]) | |
| resume_tech_skills.extend([skill for skill in resume_tools if skill in category_skills]) | |
| if not job_tech_skills: | |
| return 60 # Default score if no technical skills in job description | |
| # Calculate overlap | |
| matching_tools = set(job_tech_skills) & set(resume_tech_skills) | |
| tool_match_ratio = len(matching_tools) / len(job_tech_skills) | |
| # Bonus for having more tools than required | |
| extra_tools_bonus = min(15, max(0, len(resume_tech_skills) - len(job_tech_skills)) * 2) | |
| base_score = tool_match_ratio * 85 | |
| final_score = min(100, base_score + extra_tools_bonus) | |
| return max(40, final_score) | |
| def infer_soft_skills(self, text): | |
| """Infer soft skills from interests and activities""" | |
| text = self.preprocess_text(text) | |
| inferred_skills = [] | |
| for skill, indicators in self.interest_skill_mapping.items(): | |
| for indicator in indicators: | |
| if indicator in text: | |
| inferred_skills.append(skill) | |
| break | |
| return inferred_skills | |
| def calculate_soft_skills_score(self, job_description, resume): | |
| """Calculate soft skills score""" | |
| # Direct soft skills from skill categories | |
| job_soft_skills = [skill for skill in self.skill_categories['soft_skills'] if skill in job_description.lower()] | |
| resume_soft_skills = [skill for skill in self.skill_categories['soft_skills'] if skill in resume.lower()] | |
| # Inferred soft skills from activities and interests | |
| inferred_skills = self.infer_soft_skills(resume) | |
| # Combine direct and inferred skills | |
| all_resume_soft_skills = list(set(resume_soft_skills + inferred_skills)) | |
| if not job_soft_skills: | |
| # If no specific soft skills mentioned in job, give credit for having any | |
| return min(80, 50 + len(all_resume_soft_skills) * 5) | |
| # Calculate overlap | |
| matching_soft_skills = set(job_soft_skills) & set(all_resume_soft_skills) | |
| if job_soft_skills: | |
| soft_skill_ratio = len(matching_soft_skills) / len(job_soft_skills) | |
| else: | |
| soft_skill_ratio = 0.6 # Default ratio | |
| # Bonus for having diverse soft skills | |
| diversity_bonus = min(20, len(all_resume_soft_skills) * 3) | |
| base_score = soft_skill_ratio * 70 | |
| final_score = min(100, base_score + diversity_bonus) | |
| return max(50, final_score) | |
| def calculate_final_score(self, job_description, resume): | |
| """Calculate the weighted final score""" | |
| scores = {} | |
| # Calculate individual dimension scores | |
| scores['relevant_skills'] = self.calculate_relevant_skills_score(job_description, resume) | |
| scores['work_experience'] = self.calculate_work_experience_score(job_description, resume) | |
| scores['education'] = self.calculate_education_score(job_description, resume) | |
| scores['certifications'] = self.calculate_certifications_score(job_description, resume) | |
| scores['projects'] = self.calculate_projects_score(job_description, resume) | |
| scores['keywords_match'] = self.calculate_keywords_match_score(job_description, resume) | |
| scores['tools_tech'] = self.calculate_tools_tech_score(job_description, resume) | |
| scores['soft_skills'] = self.calculate_soft_skills_score(job_description, resume) | |
| # Calculate weighted final score | |
| final_score = sum(scores[dim] * self.weights[dim] for dim in scores) | |
| return final_score, scores | |
| # Initialize the scorer | |
| scorer = ATSScorer() | |
| def score_resume(job_description, resume_file, resume_text): | |
| """Enhanced function to score resume and provide AI analysis""" | |
| if not job_description.strip(): | |
| return "Please provide a job description.", "", "" | |
| # Determine resume source | |
| resume_content = "" | |
| if resume_file is not None: | |
| try: | |
| resume_content = scorer.extract_text_from_file(resume_file) | |
| if not resume_content.strip(): | |
| return "Could not extract text from the uploaded file. Please check the file format.", "", "" | |
| except Exception as e: | |
| return f"Error processing file: {str(e)}", "", "" | |
| elif resume_text.strip(): | |
| resume_content = resume_text.strip() | |
| else: | |
| return "Please provide either a resume file (PDF/DOCX) or paste resume text.", "", "" | |
| try: | |
| # Get ATS score | |
| final_score, dimension_scores = scorer.calculate_final_score(job_description, resume_content) | |
| # Get AI analysis | |
| analysis_result = scorer.analyze_cv(resume_content, job_description) | |
| ai_analysis = scorer.format_analysis_output(analysis_result) | |
| # Create ATS breakdown | |
| ats_breakdown = f""" | |
| ## Overall ATS Score: {final_score:.1f}/100 | |
| ### Dimension Breakdown: | |
| - **Relevant Skills** (25%): {dimension_scores['relevant_skills']:.1f}/100 | |
| - **Work Experience** (20%): {dimension_scores['work_experience']:.1f}/100 | |
| - **Education** (10%): {dimension_scores['education']:.1f}/100 | |
| - **Certifications & Courses** (7%): {dimension_scores['certifications']:.1f}/100 | |
| - **Projects** (10%): {dimension_scores['projects']:.1f}/100 | |
| - **Keywords Match** (10%): {dimension_scores['keywords_match']:.1f}/100 | |
| - **Tools & Technologies** (10%): {dimension_scores['tools_tech']:.1f}/100 | |
| - **Soft Skills Indicators** (8%): {dimension_scores['soft_skills']:.1f}/100 | |
| ### Score Interpretation: | |
| - **90-100**: Excellent match | |
| - **76-89**: Very good match | |
| - **56-75**: Good match | |
| - **45-55**: Fair match | |
| - **Below 40**: Poor match | |
| """ | |
| # Create score chart data | |
| chart_data = pd.DataFrame({ | |
| 'Dimension': [ | |
| 'Relevant Skills', 'Work Experience', 'Education', | |
| 'Certifications', 'Projects', 'Keywords Match', | |
| 'Tools & Tech', 'Soft Skills' | |
| ], | |
| 'Score': [ | |
| dimension_scores['relevant_skills'], | |
| dimension_scores['work_experience'], | |
| dimension_scores['education'], | |
| dimension_scores['certifications'], | |
| dimension_scores['projects'], | |
| dimension_scores['keywords_match'], | |
| dimension_scores['tools_tech'], | |
| dimension_scores['soft_skills'] | |
| ], | |
| 'Weight (%)': [25, 20, 10, 7, 10, 10, 10, 8] | |
| }) | |
| return ats_breakdown, ai_analysis, chart_data | |
| except Exception as e: | |
| return f"Error processing resume: {str(e)}", "", "" | |
| # Create Enhanced Gradio interface | |
| with gr.Blocks(title="Enhanced ATS Resume Scorer", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # π― Enhanced ATS Resume Scorer with AI Analysis | |
| This tool provides **dual analysis** of your resume: | |
| 1. **ATS Score** - Technical matching across 8 dimensions | |
| 2. **AI Analysis** - Intelligent insights and recommendations | |
| **π Resume Input:** Upload PDF/DOCX file OR paste text manually | |
| **π Job Description:** Paste as text | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| job_desc_input = gr.Textbox( | |
| label="π Job Description", | |
| placeholder="Paste the complete job description here...", | |
| lines=12, | |
| max_lines=20 | |
| ) | |
| with gr.Column(): | |
| gr.Markdown("### π Resume Input") | |
| with gr.Tab("Upload File (PDF/DOCX)"): | |
| resume_file_input = gr.File( | |
| label="Upload Resume", | |
| file_types=[".pdf", ".docx", ".doc"], | |
| type="filepath" | |
| ) | |
| gr.Markdown("*Supported formats: PDF, DOCX, DOC*") | |
| with gr.Tab("Paste Text"): | |
| resume_text_input = gr.Textbox( | |
| label="Resume Text", | |
| placeholder="Or paste your resume text here...", | |
| lines=10, | |
| max_lines=15 | |
| ) | |
| score_btn = gr.Button("π Analyze Resume", variant="primary", size="lg") | |
| with gr.Row(): | |
| with gr.Column(): | |
| ats_output = gr.Markdown(label="ATS Scoring Results") | |
| with gr.Column(): | |
| ai_output = gr.Markdown(label="AI Analysis Results") | |
| with gr.Row(): | |
| chart_output = gr.Dataframe( | |
| label="Dimension Scores", | |
| headers=['Dimension', 'Score', 'Weight (%)'], | |
| datatype=['str', 'number', 'number'] | |
| ) | |
| score_btn.click( | |
| fn=score_resume, | |
| inputs=[job_desc_input, resume_file_input, resume_text_input], | |
| outputs=[ats_output, ai_output, chart_output] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |