#! /usr/bin/env python3 from transformers import BartForConditionalGeneration, BartTokenizer, AdamW import torch from dl import load_dataset from tqdm import tqdm # Enable cudnn optimizations torch.backends.cudnn.benchmark = True device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # load tokenizer and model tokenizer = BartTokenizer.from_pretrained('facebook/bart-base') model = BartForConditionalGeneration.from_pretrained('facebook/bart-base') model.to(device) # set up optimizer optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5) # Initialize Amp. This should be optional and should not affect computation if not available try: from torch.cuda.amp import GradScaler, autocast scaler = GradScaler() except ImportError: # If Amp is not available, we'll simply define a dummy context manager class autocast: def __enter__(self): pass def __exit__(self, *args): pass scaler = None # We won't use a scaler if we don't have Amp def train_model(dataloader): model.train() total_loss = 0 print("Training model...") for batch in tqdm(dataloader): optimizer.zero_grad() inputs = tokenizer(batch[1], return_tensors="pt", padding=True, truncation=True, max_length=512) inputs.to(device) labels = tokenizer(batch[0], return_tensors="pt", padding=True, truncation=True, max_length=512) labels.to(device) outputs = model(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], labels=labels["input_ids"]) loss = outputs.loss loss.backward() optimizer.step() total_loss += loss.item() avg_train_loss = total_loss / len(dataloader) return avg_train_loss def test_model(dataloader): model.eval() total_loss = 0 print("Testing model...") for batch in tqdm(dataloader): with torch.no_grad(): inputs = tokenizer(batch[1], return_tensors="pt", padding=True, truncation=True, max_length=512) inputs.to(device) labels = tokenizer(batch[0], return_tensors="pt", padding=True, truncation=True, max_length=512) labels.to(device) outputs = model(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], labels=labels["input_ids"]) loss = outputs.loss total_loss += loss.item() avg_test_loss = total_loss / len(dataloader) return avg_test_loss def train(): train_dataloader, test_dataloader = load_dataset( "../datasets/deu_mixed-typical_2011_1M/deu_mixed-typical_2011_1M-sentences.txt", 100, 100, 1, test_ratio=0.2 ) num_epochs = 3 for epoch in range(num_epochs): avg_train_loss = train_model(train_dataloader) print(f"Train loss for epoch {epoch+1}: {avg_train_loss}") avg_test_loss = test_model(test_dataloader) print(f"Test loss for epoch {epoch+1}: {avg_test_loss}") if __name__ == "__main__": train()