File size: 6,193 Bytes
fd242ab
 
 
 
 
 
 
e0510bc
fd242ab
e0510bc
 
fd242ab
 
 
 
 
 
 
e0510bc
 
 
 
 
fd242ab
e0510bc
 
 
 
 
fd242ab
 
 
e0510bc
fd242ab
 
 
 
 
 
 
 
 
 
 
 
e0510bc
 
 
 
 
 
fd242ab
 
 
 
 
e0510bc
fd242ab
 
 
 
e0510bc
fd242ab
 
e0510bc
 
fd242ab
 
e0510bc
 
fd242ab
 
e0510bc
fd242ab
e0510bc
fd242ab
e0510bc
fd242ab
 
e0510bc
 
 
 
 
fd242ab
e0510bc
fd242ab
 
e0510bc
fd242ab
e0510bc
 
 
fd242ab
 
e0510bc
 
fd242ab
 
e0510bc
 
fd242ab
e0510bc
 
fd242ab
 
e0510bc
fd242ab
 
 
e0510bc
 
fd242ab
 
 
 
 
 
 
 
 
 
 
 
e0510bc
fd242ab
 
e0510bc
 
fd242ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e0510bc
fd242ab
e0510bc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import gradio as gr
import joblib
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
from collections import OrderedDict


class RiasecPredictor:
    def __init__(self, regressor_path='riasec_regressor_v1.pkl', 
                 scaler_path='riasec_scaler.pkl',  
                 embedding_model_path='all-MiniLM-L6-v2'):
        """
        Load saved models for RIASEC prediction
        """
        print("Loading models...")
        self.embedding_model = SentenceTransformer(embedding_model_path)
        self.regressor = joblib.load(regressor_path)
        try:
            self.scaler = joblib.load(scaler_path)  # 👈 Load scaler
        except FileNotFoundError:
            raise FileNotFoundError(f"Scaler file not found at {scaler_path}. "
                                    "Did you save it during training?")
        self.riasec_labels = ['R', 'I', 'A', 'S', 'E', 'C']
        self.code_to_name = {
            'R': 'Realistic', 'I': 'Investigative', 'A': 'Artistic',
            'S': 'Social', 'E': 'Enterprising', 'C': 'Conventional'
        }
        print("✅ Models and scaler loaded successfully!")
    
    def predict(self, job_title=None, job_description=None, full_text=None, sort_by_score=True):
        """
        Predict RIASEC scores for a job (in original 1-7 scale)
        """
        # Handle input
        if full_text is not None:
            text = full_text
        elif job_title is not None and job_description is not None:
            text = f"{job_title} {job_description}"
        else:
            raise ValueError("Provide either full_text OR both job_title and job_description")
        
        # Generate embedding
        embedding = self.embedding_model.encode([text], convert_to_numpy=True)
        
        # Make prediction in scaled space
        prediction_scaled = self.regressor.predict(embedding)[0]
        
        # Convert back to original scale
        prediction = self.scaler.inverse_transform(prediction_scaled.reshape(1, -1))[0]
        prediction = np.clip(prediction, 1.0, 7.0)  # Enforce valid range
        
        # Create dictionary
        riasec_dict = dict(zip(self.riasec_labels, prediction.tolist()))
        
        if sort_by_score:
            return OrderedDict(sorted(riasec_dict.items(), key=lambda x: x[1], reverse=True))
        else:
            return riasec_dict
    
    def predict_with_names(self, job_title=None, job_description=None, full_text=None):
        """Predict with full names in R-I-A-S-E-C order"""
        results = self.predict(job_title, job_description, full_text, sort_by_score=False)
        ordered_with_names = OrderedDict()
        for code in ['R', 'I', 'A', 'S', 'E', 'C']:
            ordered_with_names[self.code_to_name[code]] = results[code]
        return ordered_with_names


# Initialize predictor
predictor = RiasecPredictor()


def predict_riasec(job_title, job_description):
    """Wrapper for Gradio"""
    try:
        if not job_title.strip() or not job_description.strip():
            return None, "Please provide both job title and job description."
        
        result = predictor.predict(
            job_title=job_title,
            job_description=job_description,
            sort_by_score=False  # Don't sort by score, maintain R-I-A-S-E-C order
        )
        
        # Prepare bar chart data in R-I-A-S-E-C order with abbreviations
        riasec_order = ['R', 'I', 'A', 'S', 'E', 'C']
        ordered_labels = []
        ordered_scores = []
        
        for code in riasec_order:
            ordered_labels.append(code)  # Use abbreviations
            ordered_scores.append(result[code])
        
        bar_data = pd.DataFrame({
            "Category": ordered_labels,
            "Score": ordered_scores
        })
        
        # Prepare top 3 (sorted by score)
        sorted_result = sorted(result.items(), key=lambda x: x[1], reverse=True)
        top_3_result = "### Top 3 RIASEC Types\n\n"
        for key, _ in sorted_result[:3]:
            top_3_result += f"<div style='font-size: 1.5em; font-weight: bold; margin: 5px 0; padding: 10px; background-color: #f0f0f0; color: #000000; border-radius: 5px; text-align: center; border: 1px solid #cccccc;'>{key}</div>\n"
        
        return bar_data, top_3_result
        
    except Exception as e:
        return None, f"Error: {str(e)}"


# Updated Gradio UI
with gr.Blocks(title="RIASEC Predictor") as demo:
    gr.Markdown("# RIASEC Predictor")
    gr.Markdown("Predict RIASEC personality type scores for job descriptions")
    
    with gr.Row():
        with gr.Column():
            job_title = gr.Textbox(label="Job Title", placeholder="e.g., Data Scientist")
            job_description = gr.Textbox(label="Job Description", placeholder="e.g., Analyze large datasets...", lines=4)
            submit_btn = gr.Button("Predict RIASEC Scores", variant="primary")
        
        with gr.Column():
            output_chart = gr.BarPlot(
                x="Category",
                y="Score",
                title="RIASEC Scores",
                vertical=False,  # Horizontal bars
                tooltip=["Category", "Score"],
                show_legend=False,
                height=400
            )
        
        with gr.Column():
            top_3_output = gr.Markdown(label="Top 3 RIASEC", elem_classes="top-3-riasec")
    
    gr.Markdown("Note: Please provide both job title and job description.")
    
    submit_btn.click(
        fn=predict_riasec,
        inputs=[job_title, job_description],
        outputs=[output_chart, top_3_output],
        show_progress=True
    )
    
    gr.Examples(
        examples=[
            ["Data Scientist", "Analyze large datasets and build machine learning models"],
            ["Graphic Designer", "Create visual content and design marketing materials"],
            ["Software Engineer", "Develop and maintain software applications"]
        ],
        inputs=[job_title, job_description],
        outputs=[output_chart, top_3_output],
        fn=predict_riasec,
        cache_examples=False,
    )


if __name__ == "__main__":
    demo.queue().launch(share=True)