You need to agree to share your contact information to access this model

This repository is publicly accessible, but you have to accept the conditions to access its files and content.

Training Process

Model + LoRA Loading

from unsloth import FastLanguageModel
import torch
max_seq_length = 2048
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-8b-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Dataset Preparation

from datasets import load_dataset
dataset = load_dataset(
    "csv",
    data_files = "/content/synth_data.csv",
    split = "train",
)

from unsloth import to_sharegpt
dataset = to_sharegpt(
    dataset,
    merged_prompt = "Labels: {available_entities}\n\nText: {text}\n",
    conversation_extension = 5, # Randomnly combines conversations into 1
    output_column_name = "label",
)

from unsloth import standardize_sharegpt
dataset = standardize_sharegpt(dataset)

chat_template = """{SYSTEM}
USER: {INPUT}
ASSISTANT: {OUTPUT}"""

from unsloth import apply_chat_template
dataset = apply_chat_template(
    dataset,
    tokenizer = tokenizer,
    chat_template = chat_template,
    default_system_message = "NER Task: Label the text based on the available Labels."
)

Training Configuration

from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = True, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        # max_steps = None,
        num_train_epochs = 1,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

trainer_stats = trainer.train()

# Save to 8bit Q8_0
if False: model.save_pretrained_gguf("model", tokenizer,)

Training Results

Steps Trained: 26
Final Loss: 0.1870
Total Time: 21:04 min
Full epoch had been 261 steps

Sample Inference

FastLanguageModel.for_inference(model) # Enable native 2x faster inference
messages = [                    # Change below!
    {"role": "user", "content": 'Labels: ATTR, CITY, CITY_PART, COUNTRY, O, ORG, PER, PHONE, REGION, REL, STREET, WORK_P, WORK_S\n\n'\
                                'Text: "doctors in berlin"'},
]
input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True,
    return_tensors = "pt",
).to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(input_ids, streamer = text_streamer, max_new_tokens = 128, pad_token_id = tokenizer.eos_token_id)

Downloads last month: 4

GGUF

Model size

8B params

Architecture

llama

Hardware compatibility

8-bit

Model tree for putazon/SearchQueryNER-llama-3-8b-v0

Base model

meta-llama/Meta-Llama-3-8B

Quantized

unsloth/llama-3-8b-bnb-4bit

Quantized

(873)

this model