nextbird
This is a merge of pre-trained language models created using mergekit.
| Tasks | Version | Filter | n-shot | Metric | Value | Stderr | ||
|---|---|---|---|---|---|---|---|---|
| eq_bench | 2.1 | none | 0 | eqbench | ↑ | 79.1207 | ± | 1.4524 |
| none | 0 | percent_parseable | ↑ | 100.0000 | ± | 0.0000 |
... Okay then. My thought to merge in some reasonably smart RP-focused models for personal use looks like it might've given me a clever bird here.
Initial private run of openllm benchmark:
Cut it off before the GSM8K, but it definitely looks like the eqbench is an outlier on general capabilities.
(Not that it isn't still decently smart.)
| Tasks | Version | Filter | n-shot | Metric | Value | Stderr | ||
|---|---|---|---|---|---|---|---|---|
| arc_challenge | 1 | none | 25 | acc | ↑ | 0.6527 | ± | 0.0139 |
| none | 25 | acc_norm | ↑ | 0.6877 | ± | 0.0135 |
"results": {
"hellaswag": {
"acc,none": 0.6864170483967337,
"acc_stderr,none": 0.004630008293925667,
"acc_norm,none": 0.8674566819358693,
"acc_norm_stderr,none": 0.0033838751726699766,
"alias": "hellaswag"
}
},
"results": {
"mmlu": {
"acc,none": 0.6405782652043869,
"acc_stderr,none": 0.0038184215234779296,
"alias": "mmlu"
},
"mmlu_humanities": {
"alias": " - humanities",
"acc,none": 0.6004250797024442,
"acc_stderr,none": 0.006745181916513025
},
"mmlu_formal_logic": {
"alias": " - formal_logic",
"acc,none": 0.5158730158730159,
"acc_stderr,none": 0.044698818540726076
},
"mmlu_high_school_european_history": {
"alias": " - high_school_european_history",
"acc,none": 0.7818181818181819,
"acc_stderr,none": 0.03225078108306289
},
"mmlu_high_school_us_history": {
"alias": " - high_school_us_history",
"acc,none": 0.8431372549019608,
"acc_stderr,none": 0.02552472232455333
},
"mmlu_high_school_world_history": {
"alias": " - high_school_world_history",
"acc,none": 0.8227848101265823,
"acc_stderr,none": 0.024856364184503228
},
"mmlu_international_law": {
"alias": " - international_law",
"acc,none": 0.8016528925619835,
"acc_stderr,none": 0.036401182719909456
},
"mmlu_jurisprudence": {
"alias": " - jurisprudence",
"acc,none": 0.7685185185185185,
"acc_stderr,none": 0.04077494709252627
},
"mmlu_logical_fallacies": {
"alias": " - logical_fallacies",
"acc,none": 0.7484662576687117,
"acc_stderr,none": 0.034089978868575295
},
"mmlu_moral_disputes": {
"alias": " - moral_disputes",
"acc,none": 0.7312138728323699,
"acc_stderr,none": 0.023868003262500118
},
"mmlu_moral_scenarios": {
"alias": " - moral_scenarios",
"acc,none": 0.41564245810055866,
"acc_stderr,none": 0.01648278218750067
},
"mmlu_philosophy": {
"alias": " - philosophy",
"acc,none": 0.7234726688102894,
"acc_stderr,none": 0.02540383297817961
},
"mmlu_prehistory": {
"alias": " - prehistory",
"acc,none": 0.7376543209876543,
"acc_stderr,none": 0.024477222856135114
},
"mmlu_professional_law": {
"alias": " - professional_law",
"acc,none": 0.47392438070404175,
"acc_stderr,none": 0.012752858346533141
},
"mmlu_world_religions": {
"alias": " - world_religions",
"acc,none": 0.8538011695906432,
"acc_stderr,none": 0.027097290118070782
},
"mmlu_other": {
"alias": " - other",
"acc,none": 0.7074348245896364,
"acc_stderr,none": 0.007835823166037259
},
"mmlu_business_ethics": {
"alias": " - business_ethics",
"acc,none": 0.63,
"acc_stderr,none": 0.04852365870939098
},
"mmlu_clinical_knowledge": {
"alias": " - clinical_knowledge",
"acc,none": 0.7056603773584905,
"acc_stderr,none": 0.028049186315695248
},
"mmlu_college_medicine": {
"alias": " - college_medicine",
"acc,none": 0.6647398843930635,
"acc_stderr,none": 0.03599586301247077
},
"mmlu_global_facts": {
"alias": " - global_facts",
"acc,none": 0.31,
"acc_stderr,none": 0.04648231987117316
},
"mmlu_human_aging": {
"alias": " - human_aging",
"acc,none": 0.7040358744394619,
"acc_stderr,none": 0.030636591348699786
},
"mmlu_management": {
"alias": " - management",
"acc,none": 0.7766990291262136,
"acc_stderr,none": 0.04123553189891431
},
"mmlu_marketing": {
"alias": " - marketing",
"acc,none": 0.8846153846153846,
"acc_stderr,none": 0.020930193185179333
},
"mmlu_medical_genetics": {
"alias": " - medical_genetics",
"acc,none": 0.71,
"acc_stderr,none": 0.04560480215720684
},
"mmlu_miscellaneous": {
"alias": " - miscellaneous",
"acc,none": 0.8275862068965517,
"acc_stderr,none": 0.013507943909371807
},
"mmlu_nutrition": {
"alias": " - nutrition",
"acc,none": 0.7124183006535948,
"acc_stderr,none": 0.02591780611714716
},
"mmlu_professional_accounting": {
"alias": " - professional_accounting",
"acc,none": 0.5035460992907801,
"acc_stderr,none": 0.02982674915328092
},
"mmlu_professional_medicine": {
"alias": " - professional_medicine",
"acc,none": 0.7022058823529411,
"acc_stderr,none": 0.027778298701545443
},
"mmlu_virology": {
"alias": " - virology",
"acc,none": 0.5301204819277109,
"acc_stderr,none": 0.03885425420866766
},
"mmlu_social_sciences": {
"alias": " - social_sciences",
"acc,none": 0.7461813454663634,
"acc_stderr,none": 0.007659164764320299
},
"mmlu_econometrics": {
"alias": " - econometrics",
"acc,none": 0.49122807017543857,
"acc_stderr,none": 0.04702880432049615
},
"mmlu_high_school_geography": {
"alias": " - high_school_geography",
"acc,none": 0.797979797979798,
"acc_stderr,none": 0.028606204289229886
},
"mmlu_high_school_government_and_politics": {
"alias": " - high_school_government_and_politics",
"acc,none": 0.917098445595855,
"acc_stderr,none": 0.01989934131572178
},
"mmlu_high_school_macroeconomics": {
"alias": " - high_school_macroeconomics",
"acc,none": 0.6564102564102564,
"acc_stderr,none": 0.024078696580635484
},
"mmlu_high_school_microeconomics": {
"alias": " - high_school_microeconomics",
"acc,none": 0.6890756302521008,
"acc_stderr,none": 0.030066761582977934
},
"mmlu_high_school_psychology": {
"alias": " - high_school_psychology",
"acc,none": 0.8385321100917431,
"acc_stderr,none": 0.015776239256163224
},
"mmlu_human_sexuality": {
"alias": " - human_sexuality",
"acc,none": 0.7786259541984732,
"acc_stderr,none": 0.036412970813137276
},
"mmlu_professional_psychology": {
"alias": " - professional_psychology",
"acc,none": 0.673202614379085,
"acc_stderr,none": 0.018975427920507215
},
"mmlu_public_relations": {
"alias": " - public_relations",
"acc,none": 0.6818181818181818,
"acc_stderr,none": 0.044612721759105085
},
"mmlu_security_studies": {
"alias": " - security_studies",
"acc,none": 0.7428571428571429,
"acc_stderr,none": 0.027979823538744546
},
"mmlu_sociology": {
"alias": " - sociology",
"acc,none": 0.8606965174129353,
"acc_stderr,none": 0.02448448716291397
},
"mmlu_us_foreign_policy": {
"alias": " - us_foreign_policy",
"acc,none": 0.84,
"acc_stderr,none": 0.03684529491774708
},
"mmlu_stem": {
"alias": " - stem",
"acc,none": 0.531557247066286,
"acc_stderr,none": 0.00850786694491014
},
"mmlu_abstract_algebra": {
"alias": " - abstract_algebra",
"acc,none": 0.37,
"acc_stderr,none": 0.04852365870939099
},
"mmlu_anatomy": {
"alias": " - anatomy",
"acc,none": 0.6148148148148148,
"acc_stderr,none": 0.042039210401562783
},
"mmlu_astronomy": {
"alias": " - astronomy",
"acc,none": 0.6842105263157895,
"acc_stderr,none": 0.037827289808654685
},
"mmlu_college_biology": {
"alias": " - college_biology",
"acc,none": 0.75,
"acc_stderr,none": 0.03621034121889507
},
"mmlu_college_chemistry": {
"alias": " - college_chemistry",
"acc,none": 0.47,
"acc_stderr,none": 0.05016135580465919
},
"mmlu_college_computer_science": {
"alias": " - college_computer_science",
"acc,none": 0.56,
"acc_stderr,none": 0.04988876515698589
},
"mmlu_college_mathematics": {
"alias": " - college_mathematics",
"acc,none": 0.31,
"acc_stderr,none": 0.04648231987117317
},
"mmlu_college_physics": {
"alias": " - college_physics",
"acc,none": 0.4411764705882353,
"acc_stderr,none": 0.049406356306056595
},
"mmlu_computer_security": {
"alias": " - computer_security",
"acc,none": 0.7,
"acc_stderr,none": 0.04605661864718381
},
"mmlu_conceptual_physics": {
"alias": " - conceptual_physics",
"acc,none": 0.5829787234042553,
"acc_stderr,none": 0.03223276266711712
},
"mmlu_electrical_engineering": {
"alias": " - electrical_engineering",
"acc,none": 0.5724137931034483,
"acc_stderr,none": 0.04122737111370333
},
"mmlu_elementary_mathematics": {
"alias": " - elementary_mathematics",
"acc,none": 0.3915343915343915,
"acc_stderr,none": 0.025138091388851112
},
"mmlu_high_school_biology": {
"alias": " - high_school_biology",
"acc,none": 0.7870967741935484,
"acc_stderr,none": 0.023287665127268525
},
"mmlu_high_school_chemistry": {
"alias": " - high_school_chemistry",
"acc,none": 0.4975369458128079,
"acc_stderr,none": 0.03517945038691063
},
"mmlu_high_school_computer_science": {
"alias": " - high_school_computer_science",
"acc,none": 0.7,
"acc_stderr,none": 0.046056618647183814
},
"mmlu_high_school_mathematics": {
"alias": " - high_school_mathematics",
"acc,none": 0.34074074074074073,
"acc_stderr,none": 0.028897748741131143
},
"mmlu_high_school_physics": {
"alias": " - high_school_physics",
"acc,none": 0.3509933774834437,
"acc_stderr,none": 0.03896981964257375
},
"mmlu_high_school_statistics": {
"alias": " - high_school_statistics",
"acc,none": 0.5324074074074074,
"acc_stderr,none": 0.03402801581358966
},
"mmlu_machine_learning": {
"alias": " - machine_learning",
"acc,none": 0.4642857142857143,
"acc_stderr,none": 0.04733667890053756
}
}
"groups": {
"mmlu": {
"acc,none": 0.6405782652043869,
"acc_stderr,none": 0.0038184215234779296,
"alias": "mmlu"
},
"mmlu_humanities": {
"alias": " - humanities",
"acc,none": 0.6004250797024442,
"acc_stderr,none": 0.006745181916513025
},
"mmlu_other": {
"alias": " - other",
"acc,none": 0.7074348245896364,
"acc_stderr,none": 0.007835823166037259
},
"mmlu_social_sciences": {
"alias": " - social_sciences",
"acc,none": 0.7461813454663634,
"acc_stderr,none": 0.007659164764320299
},
"mmlu_stem": {
"alias": " - stem",
"acc,none": 0.531557247066286,
"acc_stderr,none": 0.00850786694491014
}
},
2024-06-10T22:21:47.823028670Z | Tasks |Version|Filter|n-shot| Metric | | Value | |Stderr| 2024-06-10T22:21:47.823031588Z |-----------------|-------|------|-----:|-----------|---|------:|---|-----:| 2024-06-10T22:21:47.823034317Z |truthfulqa |N/A |none | 0|acc |↑ | 0.5705|± |0.0115| 2024-06-10T22:21:47.823037124Z | | |none | 0|bleu_acc |↑ | 0.5055|± |0.0175| 2024-06-10T22:21:47.823039646Z | | |none | 0|bleu_diff |↑ | 2.6078|± |0.5566| 2024-06-10T22:21:47.823042171Z | | |none | 0|bleu_max |↑ |18.5891|± |0.6661| 2024-06-10T22:21:47.823051908Z | | |none | 0|rouge1_acc |↑ | 0.5324|± |0.0175| 2024-06-10T22:21:47.823054777Z | | |none | 0|rouge1_diff|↑ | 3.4985|± |0.7934| 2024-06-10T22:21:47.823057217Z | | |none | 0|rouge1_max |↑ |43.9198|± |0.7994| 2024-06-10T22:21:47.823059786Z | | |none | 0|rouge2_acc |↑ | 0.4602|± |0.0174| 2024-06-10T22:21:47.823062153Z | | |none | 0|rouge2_diff|↑ | 3.4789|± |0.8571| 2024-06-10T22:21:47.823064586Z | | |none | 0|rouge2_max |↑ |29.8401|± |0.8868| 2024-06-10T22:21:47.823067019Z | | |none | 0|rougeL_acc |↑ | 0.4982|± |0.0175| 2024-06-10T22:21:47.823069451Z | | |none | 0|rougeL_diff|↑ | 2.9852|± |0.8028| 2024-06-10T22:21:47.823071848Z | | |none | 0|rougeL_max |↑ |40.3657|± |0.8083| 2024-06-10T22:21:47.823074282Z | - truthfulqa_gen| 3|none | 0|bleu_acc |↑ | 0.5055|± |0.0175| 2024-06-10T22:21:47.823077006Z | | |none | 0|bleu_diff |↑ | 2.6078|± |0.5566| 2024-06-10T22:21:47.823079477Z | | |none | 0|bleu_max |↑ |18.5891|± |0.6661| 2024-06-10T22:21:47.823081789Z | | |none | 0|rouge1_acc |↑ | 0.5324|± |0.0175| 2024-06-10T22:21:47.823084333Z | | |none | 0|rouge1_diff|↑ | 3.4985|± |0.7934| 2024-06-10T22:21:47.823086626Z | | |none | 0|rouge1_max |↑ |43.9198|± |0.7994| 2024-06-10T22:21:47.823089026Z | | |none | 0|rouge2_acc |↑ | 0.4602|± |0.0174| 2024-06-10T22:21:47.823091989Z | | |none | 0|rouge2_diff|↑ | 3.4789|± |0.8571| 2024-06-10T22:21:47.823094422Z | | |none | 0|rouge2_max |↑ |29.8401|± |0.8868| 2024-06-10T22:21:47.823096718Z | | |none | 0|rougeL_acc |↑ | 0.4982|± |0.0175| 2024-06-10T22:21:47.823099191Z | | |none | 0|rougeL_diff|↑ | 2.9852|± |0.8028| 2024-06-10T22:21:47.823101513Z | | |none | 0|rougeL_max |↑ |40.3657|± |0.8083| 2024-06-10T22:21:47.823103918Z | - truthfulqa_mc1| 2|none | 0|acc |↑ | 0.4823|± |0.0175| 2024-06-10T22:21:47.823106217Z | - truthfulqa_mc2| 2|none | 0|acc |↑ | 0.6588|± |0.0150|
| Groups | Version | Filter | n-shot | Metric | Value | Stderr | ||
|---|---|---|---|---|---|---|---|---|
| truthfulqa | N/A | none | 0 | acc | ↑ | 0.5705 | ± | 0.0115 |
| none | 0 | bleu_acc | ↑ | 0.5055 | ± | 0.0175 | ||
| none | 0 | bleu_diff | ↑ | 2.6078 | ± | 0.5566 | ||
| none | 0 | bleu_max | ↑ | 18.5891 | ± | 0.6661 | ||
| none | 0 | rouge1_acc | ↑ | 0.5324 | ± | 0.0175 | ||
| none | 0 | rouge1_diff | ↑ | 3.4985 | ± | 0.7934 | ||
| none | 0 | rouge1_max | ↑ | 43.9198 | ± | 0.7994 | ||
| none | 0 | rouge2_acc | ↑ | 0.4602 | ± | 0.0174 | ||
| none | 0 | rouge2_diff | ↑ | 3.4789 | ± | 0.8571 | ||
| none | 0 | rouge2_max | ↑ | 29.8401 | ± | 0.8868 | ||
| none | 0 | rougeL_acc | ↑ | 0.4982 | ± | 0.0175 | ||
| none | 0 | rougeL_diff | ↑ | 2.9852 | ± | 0.8028 | ||
| none | 0 | rougeL_max | ↑ | 40.3657 | ± | 0.8083 |
| Tasks | Version | Filter | n-shot | Metric | Value | Stderr | ||
|---|---|---|---|---|---|---|---|---|
| winogrande | 1 | none | 5 | acc | ↑ | 0.8114 | ± | 0.011 |
Merge Details
Additional details on edgymaid-7B merge configuration:
models:
- model: SanjiWatsuki/Loyal-Macaroni-Maid-7B
parameters:
density: 0.5
weight: 0.3
- model: crestf411/daybreak-kunoichi-2dpo-7b
parameters:
density: 0.5
weight: 0.3
- model: SanjiWatsuki/Kunoichi-DPO-v2-7B
parameters:
density: 0.5
weight: 0.3
base_model: mistralai/Mistral-7B-v0.1
merge_method: ties
dtype: float16
Merge Method
This model was merged using the TIES merge method using mistralai/Mistral-7B-v0.1 as a base.
Models Merged
The following models were included in the merge:
Configuration
The following YAML configuration was used to produce this model:
models:
- model: Lambent/threebird-scribe-alpha0.3-7B
parameters:
density: 1.0
weight: 1.0
- model: Lambent/edgymaid-7B
parameters:
density: 0.9
weight: 0.5
base_model: mistralai/Mistral-7B-v0.1
merge_method: ties
dtype: float16
- Downloads last month
- 5