Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| from sklearn.preprocessing import MultiLabelBinarizer | |
| def trainingDataFromUTagsJSON(data: dict) -> pd.DataFrame: | |
| """ | |
| Get the training data from the UTags JSON file | |
| ============================================== | |
| Parameters: | |
| ----------- | |
| data: | |
| description: UTags JSON file | |
| type: dict | |
| ----------- | |
| Returns: | |
| -------- | |
| data: | |
| description: Training data | |
| type: pd.DataFrame | |
| """ | |
| df = pd.DataFrame() | |
| df['disease'] = [disease.disease_persian[0] for disease in data['diseases']] # disease[UTag] | |
| df['symptoms'] = [disease.symptom_eng for disease in data['diseases']] | |
| df['causes'] = [disease.cause_eng for disease in data['diseases']] | |
| # df['cause_persian'] = [disease.cause_persian for disease in data['diseases']] | |
| mlb = MultiLabelBinarizer(sparse_output=True) | |
| for col in df.columns: | |
| if col == 'disease': | |
| continue | |
| try: | |
| df = df.join( | |
| pd.DataFrame.sparse.from_spmatrix( | |
| mlb.fit_transform(df.pop(col)), # type: ignore | |
| index=df.index, | |
| columns=[f'{col}_'] + mlb.classes_ | |
| ), | |
| ) | |
| except Exception as error: | |
| print(f'Error: {error} at column: {col}, skipping...') | |
| return df | |
| def trainingDataFromPromptsForBERT(data: dict) -> pd.DataFrame: | |
| """ | |
| Get the training data from the prompts JSON file | |
| ================================================ | |
| Parameters: | |
| ----------- | |
| data: | |
| description: Prompts JSON file | |
| type: dict | |
| ----------- | |
| Returns: | |
| -------- | |
| data: | |
| description: Training data | |
| type: pd.DataFrame | |
| """ | |
| sentences = [] | |
| for prompt in data['diseasesPrompts']: | |
| for sentence in prompt['sentences']: | |
| sentences.append((sentence, prompt['disease'])) | |
| df = pd.DataFrame(sentences, columns=['sentence', 'disease']) | |
| return df |