nextbird

This is a merge of pre-trained language models created using mergekit.

Tasks	Version	Filter	n-shot	Metric		Value		Stderr
eq_bench	2.1	none	0	eqbench	↑	79.1207	±	1.4524
		none	0	percent_parseable	↑	100.0000	±	0.0000

... Okay then. My thought to merge in some reasonably smart RP-focused models for personal use looks like it might've given me a clever bird here.

Initial private run of openllm benchmark:

Cut it off before the GSM8K, but it definitely looks like the eqbench is an outlier on general capabilities.

(Not that it isn't still decently smart.)

Tasks	Version	Filter	n-shot	Metric		Value		Stderr
arc_challenge	1	none	25	acc	↑	0.6527	±	0.0139
		none	25	acc_norm	↑	0.6877	±	0.0135

  "results": {
    "hellaswag": {
      "acc,none": 0.6864170483967337,
      "acc_stderr,none": 0.004630008293925667,
      "acc_norm,none": 0.8674566819358693,
      "acc_norm_stderr,none": 0.0033838751726699766,
      "alias": "hellaswag"
    }
  },

  "results": {
    "mmlu": {
      "acc,none": 0.6405782652043869,
      "acc_stderr,none": 0.0038184215234779296,
      "alias": "mmlu"
    },
    "mmlu_humanities": {
      "alias": " - humanities",
      "acc,none": 0.6004250797024442,
      "acc_stderr,none": 0.006745181916513025
    },
    "mmlu_formal_logic": {
      "alias": "  - formal_logic",
      "acc,none": 0.5158730158730159,
      "acc_stderr,none": 0.044698818540726076
    },
    "mmlu_high_school_european_history": {
      "alias": "  - high_school_european_history",
      "acc,none": 0.7818181818181819,
      "acc_stderr,none": 0.03225078108306289
    },
    "mmlu_high_school_us_history": {
      "alias": "  - high_school_us_history",
      "acc,none": 0.8431372549019608,
      "acc_stderr,none": 0.02552472232455333
    },
    "mmlu_high_school_world_history": {
      "alias": "  - high_school_world_history",
      "acc,none": 0.8227848101265823,
      "acc_stderr,none": 0.024856364184503228
    },
    "mmlu_international_law": {
      "alias": "  - international_law",
      "acc,none": 0.8016528925619835,
      "acc_stderr,none": 0.036401182719909456
    },
    "mmlu_jurisprudence": {
      "alias": "  - jurisprudence",
      "acc,none": 0.7685185185185185,
      "acc_stderr,none": 0.04077494709252627
    },
    "mmlu_logical_fallacies": {
      "alias": "  - logical_fallacies",
      "acc,none": 0.7484662576687117,
      "acc_stderr,none": 0.034089978868575295
    },
    "mmlu_moral_disputes": {
      "alias": "  - moral_disputes",
      "acc,none": 0.7312138728323699,
      "acc_stderr,none": 0.023868003262500118
    },
    "mmlu_moral_scenarios": {
      "alias": "  - moral_scenarios",
      "acc,none": 0.41564245810055866,
      "acc_stderr,none": 0.01648278218750067
    },
    "mmlu_philosophy": {
      "alias": "  - philosophy",
      "acc,none": 0.7234726688102894,
      "acc_stderr,none": 0.02540383297817961
    },
    "mmlu_prehistory": {
      "alias": "  - prehistory",
      "acc,none": 0.7376543209876543,
      "acc_stderr,none": 0.024477222856135114
    },
    "mmlu_professional_law": {
      "alias": "  - professional_law",
      "acc,none": 0.47392438070404175,
      "acc_stderr,none": 0.012752858346533141
    },
    "mmlu_world_religions": {
      "alias": "  - world_religions",
      "acc,none": 0.8538011695906432,
      "acc_stderr,none": 0.027097290118070782
    },
    "mmlu_other": {
      "alias": " - other",
      "acc,none": 0.7074348245896364,
      "acc_stderr,none": 0.007835823166037259
    },
    "mmlu_business_ethics": {
      "alias": "  - business_ethics",
      "acc,none": 0.63,
      "acc_stderr,none": 0.04852365870939098
    },
    "mmlu_clinical_knowledge": {
      "alias": "  - clinical_knowledge",
      "acc,none": 0.7056603773584905,
      "acc_stderr,none": 0.028049186315695248
    },
    "mmlu_college_medicine": {
      "alias": "  - college_medicine",
      "acc,none": 0.6647398843930635,
      "acc_stderr,none": 0.03599586301247077
    },
    "mmlu_global_facts": {
      "alias": "  - global_facts",
      "acc,none": 0.31,
      "acc_stderr,none": 0.04648231987117316
    },
    "mmlu_human_aging": {
      "alias": "  - human_aging",
      "acc,none": 0.7040358744394619,
      "acc_stderr,none": 0.030636591348699786
    },
"mmlu_management": {
      "alias": "  - management",
      "acc,none": 0.7766990291262136,
      "acc_stderr,none": 0.04123553189891431
    },
    "mmlu_marketing": {
      "alias": "  - marketing",
      "acc,none": 0.8846153846153846,
      "acc_stderr,none": 0.020930193185179333
    },
    "mmlu_medical_genetics": {
      "alias": "  - medical_genetics",
      "acc,none": 0.71,
      "acc_stderr,none": 0.04560480215720684
    },
    "mmlu_miscellaneous": {
      "alias": "  - miscellaneous",
      "acc,none": 0.8275862068965517,
      "acc_stderr,none": 0.013507943909371807
    },
    "mmlu_nutrition": {
      "alias": "  - nutrition",
      "acc,none": 0.7124183006535948,
      "acc_stderr,none": 0.02591780611714716
    },
    "mmlu_professional_accounting": {
      "alias": "  - professional_accounting",
      "acc,none": 0.5035460992907801,
      "acc_stderr,none": 0.02982674915328092
    },
    "mmlu_professional_medicine": {
      "alias": "  - professional_medicine",
      "acc,none": 0.7022058823529411,
      "acc_stderr,none": 0.027778298701545443
    },
    "mmlu_virology": {
      "alias": "  - virology",
      "acc,none": 0.5301204819277109,
      "acc_stderr,none": 0.03885425420866766
    },
    "mmlu_social_sciences": {
      "alias": " - social_sciences",
      "acc,none": 0.7461813454663634,
      "acc_stderr,none": 0.007659164764320299
    },
    "mmlu_econometrics": {
      "alias": "  - econometrics",
      "acc,none": 0.49122807017543857,
      "acc_stderr,none": 0.04702880432049615
    },
    "mmlu_high_school_geography": {
      "alias": "  - high_school_geography",
      "acc,none": 0.797979797979798,
      "acc_stderr,none": 0.028606204289229886
    },
"mmlu_high_school_government_and_politics": {
      "alias": "  - high_school_government_and_politics",
      "acc,none": 0.917098445595855,
      "acc_stderr,none": 0.01989934131572178
    },
    "mmlu_high_school_macroeconomics": {
      "alias": "  - high_school_macroeconomics",
      "acc,none": 0.6564102564102564,
      "acc_stderr,none": 0.024078696580635484
    },
    "mmlu_high_school_microeconomics": {
      "alias": "  - high_school_microeconomics",
      "acc,none": 0.6890756302521008,
      "acc_stderr,none": 0.030066761582977934
    },
    "mmlu_high_school_psychology": {
      "alias": "  - high_school_psychology",
      "acc,none": 0.8385321100917431,
      "acc_stderr,none": 0.015776239256163224
    },
    "mmlu_human_sexuality": {
      "alias": "  - human_sexuality",
      "acc,none": 0.7786259541984732,
      "acc_stderr,none": 0.036412970813137276
    },
    "mmlu_professional_psychology": {
      "alias": "  - professional_psychology",
      "acc,none": 0.673202614379085,
      "acc_stderr,none": 0.018975427920507215
    },
    "mmlu_public_relations": {
      "alias": "  - public_relations",
      "acc,none": 0.6818181818181818,
      "acc_stderr,none": 0.044612721759105085
    },
    "mmlu_security_studies": {
      "alias": "  - security_studies",
      "acc,none": 0.7428571428571429,
      "acc_stderr,none": 0.027979823538744546
    },
    "mmlu_sociology": {
      "alias": "  - sociology",
      "acc,none": 0.8606965174129353,
      "acc_stderr,none": 0.02448448716291397
    },
    "mmlu_us_foreign_policy": {
      "alias": "  - us_foreign_policy",
      "acc,none": 0.84,
      "acc_stderr,none": 0.03684529491774708
    },
    "mmlu_stem": {
      "alias": " - stem",
      "acc,none": 0.531557247066286,
      "acc_stderr,none": 0.00850786694491014
    },
"mmlu_abstract_algebra": {
      "alias": "  - abstract_algebra",
      "acc,none": 0.37,
      "acc_stderr,none": 0.04852365870939099
    },
    "mmlu_anatomy": {
      "alias": "  - anatomy",
      "acc,none": 0.6148148148148148,
      "acc_stderr,none": 0.042039210401562783
    },
    "mmlu_astronomy": {
      "alias": "  - astronomy",
      "acc,none": 0.6842105263157895,
      "acc_stderr,none": 0.037827289808654685
    },
    "mmlu_college_biology": {
      "alias": "  - college_biology",
      "acc,none": 0.75,
      "acc_stderr,none": 0.03621034121889507
    },
    "mmlu_college_chemistry": {
      "alias": "  - college_chemistry",
      "acc,none": 0.47,
      "acc_stderr,none": 0.05016135580465919
    },
    "mmlu_college_computer_science": {
      "alias": "  - college_computer_science",
      "acc,none": 0.56,
      "acc_stderr,none": 0.04988876515698589
    },
    "mmlu_college_mathematics": {
      "alias": "  - college_mathematics",
      "acc,none": 0.31,
      "acc_stderr,none": 0.04648231987117317
    },
    "mmlu_college_physics": {
      "alias": "  - college_physics",
      "acc,none": 0.4411764705882353,
      "acc_stderr,none": 0.049406356306056595
    },
    "mmlu_computer_security": {
      "alias": "  - computer_security",
      "acc,none": 0.7,
      "acc_stderr,none": 0.04605661864718381
    },
    "mmlu_conceptual_physics": {
      "alias": "  - conceptual_physics",
      "acc,none": 0.5829787234042553,
      "acc_stderr,none": 0.03223276266711712
    },
    "mmlu_electrical_engineering": {
      "alias": "  - electrical_engineering",
      "acc,none": 0.5724137931034483,
      "acc_stderr,none": 0.04122737111370333
    },
    "mmlu_elementary_mathematics": {
      "alias": "  - elementary_mathematics",
      "acc,none": 0.3915343915343915,
      "acc_stderr,none": 0.025138091388851112
    },
    "mmlu_high_school_biology": {
      "alias": "  - high_school_biology",
      "acc,none": 0.7870967741935484,
      "acc_stderr,none": 0.023287665127268525
    },
    "mmlu_high_school_chemistry": {
      "alias": "  - high_school_chemistry",
      "acc,none": 0.4975369458128079,
      "acc_stderr,none": 0.03517945038691063
    },
    "mmlu_high_school_computer_science": {
      "alias": "  - high_school_computer_science",
      "acc,none": 0.7,
      "acc_stderr,none": 0.046056618647183814
    },
    "mmlu_high_school_mathematics": {
      "alias": "  - high_school_mathematics",
      "acc,none": 0.34074074074074073,
      "acc_stderr,none": 0.028897748741131143
    },
    "mmlu_high_school_physics": {
      "alias": "  - high_school_physics",
      "acc,none": 0.3509933774834437,
      "acc_stderr,none": 0.03896981964257375
    },
    "mmlu_high_school_statistics": {
      "alias": "  - high_school_statistics",
      "acc,none": 0.5324074074074074,
      "acc_stderr,none": 0.03402801581358966
    },
    "mmlu_machine_learning": {
      "alias": "  - machine_learning",
      "acc,none": 0.4642857142857143,
      "acc_stderr,none": 0.04733667890053756
    }
  }
  "groups": {
    "mmlu": {
      "acc,none": 0.6405782652043869,
      "acc_stderr,none": 0.0038184215234779296,
      "alias": "mmlu"
    },
    "mmlu_humanities": {
      "alias": " - humanities",
      "acc,none": 0.6004250797024442,
      "acc_stderr,none": 0.006745181916513025
    },
    "mmlu_other": {
      "alias": " - other",
      "acc,none": 0.7074348245896364,
      "acc_stderr,none": 0.007835823166037259
    },
    "mmlu_social_sciences": {
      "alias": " - social_sciences",
      "acc,none": 0.7461813454663634,
      "acc_stderr,none": 0.007659164764320299
    },
    "mmlu_stem": {
      "alias": " - stem",
      "acc,none": 0.531557247066286,
      "acc_stderr,none": 0.00850786694491014
    }
  },

2024-06-10T22:21:47.823028670Z | 2024-06-10T22:21:47.823031588Z 2024-06-10T22:21:47.823034317Z |truthfulqa 2024-06-10T22:21:47.823037124Z | 2024-06-10T22:21:47.823039646Z | 2024-06-10T22:21:47.823042171Z | 2024-06-10T22:21:47.823051908Z | 2024-06-10T22:21:47.823054777Z | 2024-06-10T22:21:47.823057217Z | 2024-06-10T22:21:47.823059786Z | 2024-06-10T22:21:47.823062153Z | 2024-06-10T22:21:47.823064586Z | 2024-06-10T22:21:47.823067019Z | 2024-06-10T22:21:47.823069451Z | 2024-06-10T22:21:47.823071848Z | 2024-06-10T22:21:47.823074282Z 2024-06-10T22:21:47.823077006Z | 2024-06-10T22:21:47.823079477Z | 2024-06-10T22:21:47.823081789Z | 2024-06-10T22:21:47.823084333Z | 2024-06-10T22:21:47.823086626Z | 2024-06-10T22:21:47.823089026Z | 2024-06-10T22:21:47.823091989Z | 2024-06-10T22:21:47.823094422Z | 2024-06-10T22:21:47.823096718Z | 2024-06-10T22:21:47.823099191Z | 2024-06-10T22:21:47.823101513Z | 2024-06-10T22:21:47.823103918Z 2024-06-10T22:21:47.823106217Z Tasks |Version|Filter|n-shot| Metric | | Value | |Stderr| |-----------------|-------|------|-----:|-----------|---|------:|---|-----:| |N/A |none | 0|acc |↑ | 0.5705|± |0.0115| | |none | 0|bleu_acc |↑ | 0.5055|± |0.0175| | |none | 0|bleu_diff |↑ | 2.6078|± |0.5566| | |none | 0|bleu_max |↑ |18.5891|± |0.6661| | |none | 0|rouge1_acc |↑ | 0.5324|± |0.0175| | |none | 0|rouge1_diff|↑ | 3.4985|± |0.7934| | |none | 0|rouge1_max |↑ |43.9198|± |0.7994| | |none | 0|rouge2_acc |↑ | 0.4602|± |0.0174| | |none | 0|rouge2_diff|↑ | 3.4789|± |0.8571| | |none | 0|rouge2_max |↑ |29.8401|± |0.8868| | |none | 0|rougeL_acc |↑ | 0.4982|± |0.0175| | |none | 0|rougeL_diff|↑ | 2.9852|± |0.8028| | |none | 0|rougeL_max |↑ |40.3657|± |0.8083| | - truthfulqa_gen| 3|none | 0|bleu_acc |↑ | 0.5055|± |0.0175| | |none | 0|bleu_diff |↑ | 2.6078|± |0.5566| | |none | 0|bleu_max |↑ |18.5891|± |0.6661| | |none | 0|rouge1_acc |↑ | 0.5324|± |0.0175| | |none | 0|rouge1_diff|↑ | 3.4985|± |0.7934| | |none | 0|rouge1_max |↑ |43.9198|± |0.7994| | |none | 0|rouge2_acc |↑ | 0.4602|± |0.0174| | |none | 0|rouge2_diff|↑ | 3.4789|± |0.8571| | |none | 0|rouge2_max |↑ |29.8401|± |0.8868| | |none | 0|rougeL_acc |↑ | 0.4982|± |0.0175| | |none | 0|rougeL_diff|↑ | 2.9852|± |0.8028| | |none | 0|rougeL_max |↑ |40.3657|± |0.8083| | - truthfulqa_mc1| 2|none | 0|acc |↑ | 0.4823|± |0.0175| | - truthfulqa_mc2| 2|none | 0|acc |↑ | 0.6588|± |0.0150|

Groups	Version	Filter	Metric		Value		Stderr
truthfulqa	N/A	none	acc	↑	0.5705	±	0.0115
		none	bleu_acc	↑	0.5055	±	0.0175
		none	bleu_diff	↑	2.6078	±	0.5566
		none	bleu_max	↑	18.5891	±	0.6661
		none	rouge1_acc	↑	0.5324	±	0.0175
		none	rouge1_diff	↑	3.4985	±	0.7934
		none	rouge1_max	↑	43.9198	±	0.7994
		none	rouge2_acc	↑	0.4602	±	0.0174
		none	rouge2_diff	↑	3.4789	±	0.8571
		none	rouge2_max	↑	29.8401	±	0.8868
		none	rougeL_acc	↑	0.4982	±	0.0175
		none	rougeL_diff	↑	2.9852	±	0.8028
		none	rougeL_max	↑	40.3657	±	0.8083

Tasks	Version	Filter	n-shot	Metric		Value		Stderr
winogrande	1	none	5	acc	↑	0.8114	±	0.011

Merge Details

Additional details on edgymaid-7B merge configuration:

models:
  - model: SanjiWatsuki/Loyal-Macaroni-Maid-7B
    parameters:
      density: 0.5
      weight: 0.3
  - model: crestf411/daybreak-kunoichi-2dpo-7b
    parameters:
      density: 0.5
      weight: 0.3
  - model: SanjiWatsuki/Kunoichi-DPO-v2-7B
    parameters:
      density: 0.5
      weight: 0.3
base_model: mistralai/Mistral-7B-v0.1
merge_method: ties
dtype: float16

Merge Method

This model was merged using the TIES merge method using mistralai/Mistral-7B-v0.1 as a base.

Models Merged

The following models were included in the merge:

Configuration

The following YAML configuration was used to produce this model:

models:
  - model: Lambent/threebird-scribe-alpha0.3-7B
    parameters:
      density: 1.0
      weight: 1.0
  - model: Lambent/edgymaid-7B
    parameters:
      density: 0.9
      weight: 0.5
base_model: mistralai/Mistral-7B-v0.1
merge_method: ties
dtype: float16

Downloads last month: 1

Safetensors

Model size

7B params

Tensor type

F16

Model tree for Lambent/skarmory-7B

Lambent/threebird-scribe-alpha0.3-7B

mistralai/Mistral-7B-v0.1

Merge model

this model

Merges

1 model

Paper for Lambent/skarmory-7B

Resolving Interference When Merging Models

Paper • 2306.01708 • Published Jun 2, 2023 • 15