!uv pip install -q datasets bitsandbytes trlSupervised Fine-Tuning on CUDA
Dependencies
Model parameters
import os
DATASET_REPO = "simonguest/test-dataset" # Change this to your uploaded dataset location on HF
BASE_MODEL_VENDOR = "Qwen" # Change this to your desired base model
BASE_MODEL_NAME = "Qwen3-1.7B" # Change this to your desired base model
BASE_MODEL = f"{BASE_MODEL_VENDOR}/{BASE_MODEL_NAME}"
PROJECT_NAME = "code-explainer" # Name of your training project
os.environ["WANDB_PROJECT"] = PROJECT_NAME # WandB will use the same project name
MODEL_NAME = f"{BASE_MODEL_NAME}-{PROJECT_NAME}" # Final model name
HF_USERNAME = "simonguest" # Hugging Face username - used to upload your final models
MODEL_FOLDER = f"./models/{MODEL_NAME}" # Local folder for storing the model files during trainingTraining hyperparameters
BATCH_SIZE = 4
GRADIENT_ACCUMULATION_STEPS = 4
LEARNING_RATE = 2e-4
NUM_EPOCHS = 3
MAX_SEQ_LENGTH = 512
# LoRA-specific Parameters
LORA_R = 16
LORA_ALPHA = 32
LORA_DROPOUT = 0.05
# Use 4-bit quantization for efficiency (QLoRA)
USE_4BIT = TrueAPI keys and tokens
import sys
import os
from dotenv import load_dotenv
if 'google.colab' in sys.modules:
from google.colab import userdata # type:ignore
os.environ['HF_TOKEN'] = userdata.get('HF_TOKEN')
os.environ['WANDB_API_KEY'] = userdata.get('WANDB_API_KEY')
print("HF and WANDB API Tokens set for Colab")
else:
load_dotenv()
print("Loaded env vars from .env")Load dataset from Hugging Face
from datasets import load_dataset
dataset = load_dataset(DATASET_REPO)Format dataset for correct chat template
from transformers import AutoTokenizer
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
def format_chat_template(example):
messages = example["messages"]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=False
)
return {"text": text}
formatted_dataset = dataset.map(
format_chat_template,
remove_columns=dataset['train'].column_names # type: ignore
)
print("\nFormatted example:")
print(formatted_dataset['train'][0]['text'][:500]) # type: ignoreLoad base model with QLoRA configuration
import torch
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
# Configure 4-bit quantization
if USE_4BIT:
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True,
)
else:
bnb_config = None
# Load base model
model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True,
dtype=torch.bfloat16,
)
# Prepare model for k-bit training
if USE_4BIT:
model = prepare_model_for_kbit_training(model)
# Configure LoRA
peft_config = LoraConfig(
r=LORA_R,
lora_alpha=LORA_ALPHA,
lora_dropout=LORA_DROPOUT,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj"],
bias="none",
task_type="CAUSAL_LM",
)
# Apply LoRA
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()Create training configuration and trainer
import os
from transformers import TrainingArguments
from trl import SFTTrainer
report_to = "none"
if os.environ.get("WANDB_API_KEY") != None:
report_to = "wandb"
# Training arguments
training_args = TrainingArguments(
output_dir=MODEL_FOLDER,
num_train_epochs=NUM_EPOCHS,
per_device_train_batch_size=BATCH_SIZE,
per_device_eval_batch_size=BATCH_SIZE,
gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
learning_rate=LEARNING_RATE,
lr_scheduler_type="cosine",
warmup_ratio=0.1,
logging_steps=10,
eval_strategy="steps",
eval_steps=100,
save_strategy="steps",
save_steps=100,
save_total_limit=2,
bf16=True,
gradient_checkpointing=True,
optim="paged_adamw_8bit",
report_to=report_to,
load_best_model_at_end=True,
metric_for_best_model="eval_loss",
)
# Create trainer
trainer = SFTTrainer(
model=model,
args=training_args,
train_dataset=formatted_dataset["train"],
eval_dataset=formatted_dataset["validation"],
processing_class=tokenizer,
)
# Save the tokenizer model as this won't change during training
tokenizer.save_pretrained(f"{MODEL_FOLDER}/lora")Train and save the final model
# Start training
trainer.train()
# Save the final model
trainer.save_model(f"{MODEL_FOLDER}/lora")Load the final model
from transformers import pipeline
pipe = pipeline(
"text-generation",
model=f"{MODEL_FOLDER}/lora",
tokenizer=f"{MODEL_FOLDER}/lora",
temperature = 0.7,
max_new_tokens = 256,
)Test the final model using the test dataset
import random
idx = random.randint(0, len(formatted_dataset['test']) -1)
test_data = formatted_dataset['test'][idx]["text"]
output = pipe(test_data)
print(output[0]['generated_text'])Create a model card
from huggingface_hub import ModelCard
card_content = f"""---
base_model: {BASE_MODEL}
tags:
- peft
- lora
- text-generation
---
# {MODEL_NAME}
## Model Description
Fine-tuned from `{BASE_MODEL}` using QLoRA (4-bit) with supervised fine-tuning.
## Training Details
- Dataset: `{DATASET_REPO}`
- LoRA rank: {LORA_R}, alpha: {LORA_ALPHA}
- Epochs: {NUM_EPOCHS}, Learning rate: {LEARNING_RATE}
## Intended Use
This model is a test model used for the CS-394/594 class at DigiPen.
The model is designed to provide a summary explanation of a snippet of Python code, to be used in an IDE. This model takes a snippet of code (passed as the user prompt) and returns a two paragraph explanation of what the code does, including an analogy that helps students better understand how the code functions.
## Limitations
This model is a single-turn model and has not been trained on support long, multi-turn conversations.
"""
card = ModelCard(card_content)
card.save(f"{MODEL_FOLDER}/lora/README.md")Merge the models
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
# Load the configuration and model
base_model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
device_map="auto",
dtype=torch.bfloat16,
)
adapter_model = PeftModel.from_pretrained(base_model, f"{MODEL_FOLDER}/lora")
# Merge and save the model
merged_model = adapter_model.merge_and_unload() # type: ignore
merged_model.save_pretrained(f"{MODEL_FOLDER}/merged")
# Save the tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.save_pretrained(f"{MODEL_FOLDER}/merged")
# Copy the model card to the merged folder
!cp {MODEL_FOLDER}/lora/README.md {MODEL_FOLDER}/merged/.Upload the merged model and adapter to Hugging Face
from huggingface_hub import HfApi, create_repo
# Set the MODEL_REPO
MODEL_REPO = f"{HF_USERNAME}/{MODEL_NAME}"
# Initialize the API
api = HfApi()
# Create the repository (if it doesn't exist)
try:
create_repo(MODEL_REPO, repo_type="model", exist_ok=True, private=False)
print(f"Repository {MODEL_REPO} created or already exists")
except Exception as e:
print(f"Error creating repo: {e}")
# Upload merged model files to root (this is the main model)
print("\nUploading merged model to root...")
api.upload_folder(
folder_path=f"{MODEL_FOLDER}/merged",
repo_id=MODEL_REPO,
repo_type="model",
path_in_repo="", # Empty string uploads to root
commit_message="Upload merged model"
)
# Upload LoRA adapter to subfolder (optional but useful for reference)
print("\nUploading LoRA adapter...")
api.upload_folder(
folder_path=f"{MODEL_FOLDER}/lora",
repo_id=MODEL_REPO,
repo_type="model",
path_in_repo="lora_adapter", # Keep adapter in subfolder
commit_message="Upload LoRA adapter"
)
print(f"\n✓ All files uploaded successfully to https://huggingface.co/{MODEL_REPO}")