| import torch, torch.optim as optim |
| from torch.utils.data import DataLoader |
| from tqdm import tqdm |
|
|
| model_ckpt = "distilbert-base-uncased" |
| batch_size = 16 |
| n_epochs = 3 |
| learning_rate = 1e-4 |
| RANK = 4 |
| ALPHA = 4 |
|
|
| """ |
| ---- Device ---- |
| """ |
|
|
| if torch.cuda.is_available(): |
| device = torch.device('cuda') |
| print("Using CUDA (GPU)") |
| elif torch.backends.mps.is_available() and torch.backends.mps.is_built(): |
| device = torch.device('mps') |
| print("Using MPS (Apple Silicon GPU)") |
| else: |
| device = torch.device('cpu') |
| print("Using device's CPU") |
|
|
|
|
| from baseline import tokenized_datasets |
|
|
| """ |
| tokenized_datasets: |
| |
| DatasetDict({ |
| train: Dataset({ |
| features: ['labels', 'input_ids', 'attention_mask'], |
| num_rows: 25000 |
| }) |
| test: Dataset({ |
| features: ['labels', 'input_ids', 'attention_mask'], |
| num_rows: 25000 |
| }) |
| unsupervised: Dataset({ |
| features: ['labels', 'input_ids', 'attention_mask'], |
| num_rows: 50000 |
| }) |
| }) |
| """ |
|
|
| train_dataloader = DataLoader(tokenized_datasets["train"], shuffle=True, batch_size=batch_size) |
| eval_dataloader = DataLoader(tokenized_datasets["test"], batch_size=batch_size) |
|
|
| from loraLinear import model |
|
|
| model.to(device) |
| print(f"INFO: Moved model to {device}") |
|
|
| trainable_params = [p for p in model.parameters() if p.requires_grad] |
| optimizer = optim.AdamW(trainable_params, lr=learning_rate) |
|
|
| for epoch in range(n_epochs): |
| model.train() |
| print(f"\n--- Starting Epoch {epoch+1}/{n_epochs} ---") |
| for batch in tqdm(train_dataloader, desc=f"Training Epoch {epoch+1}"): |
| batch = {k: v.to(device) for k, v in batch.items()} |
| optimizer.zero_grad() |
| outputs = model(**batch) |
| loss = outputs.loss |
| loss.backward() |
| optimizer.step() |
|
|
| model.eval() |
| num_correct = 0 |
| num_samples = 0 |
| with torch.no_grad(): |
| for batch in tqdm(eval_dataloader, desc=f"Evaluating Epoch {epoch+1}"): |
| batch = {k: v.to(device) for k, v in batch.items()} |
| outputs = model(**batch) |
| predictions = torch.argmax(outputs.logits, dim=-1) |
| num_correct += (predictions == batch["labels"]).sum().item() |
| num_samples += batch["labels"].size(0) |
| |
| accuracy = num_correct / num_samples |
| print(f"--- Epoch {epoch+1} Validation Accuracy: {accuracy:.4f} ---") |
|
|
| print("\nFine-tuning complete.") |
| torch.save(model.state_dict(), "DISTILBERT_WITH_LORA.pth") |
| print("Trained LoRA model saved.") |