Spaces:
Runtime error
Runtime error
| from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup | |
| from sklearn.metrics import classification_report, f1_score | |
| from torch.utils.data import Dataset, DataLoader | |
| from tqdm.auto import tqdm | |
| from config import params | |
| from torch import nn | |
| import pandas as pd | |
| import numpy as np | |
| import warnings | |
| import random | |
| import torch | |
| import os | |
| device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | |
| class Dataset(Dataset): | |
| def __init__(self, texts, max_len): | |
| self.texts = texts | |
| self.tokenizer = BertTokenizer.from_pretrained(params['pretrained_model_name']) | |
| self.max_len = max_len | |
| def __len__(self): | |
| return len(self.texts) | |
| def __getitem__(self, item): | |
| text = str(self.texts[item]) | |
| encoding = self.tokenizer.encode_plus( | |
| text, | |
| add_special_tokens=True, | |
| max_length=self.max_len, | |
| return_token_type_ids=False, | |
| pad_to_max_length=True, | |
| return_attention_mask=True, | |
| truncation=True, | |
| return_tensors='pt', | |
| ) | |
| return { | |
| 'text': text, | |
| 'input_ids': encoding['input_ids'].flatten(), | |
| 'attention_mask': encoding['attention_mask'].flatten(), | |
| } | |
| class SentimentClassifier(nn.Module): | |
| def __init__(self, n_classes): | |
| super(SentimentClassifier, self).__init__() | |
| self.bert = BertModel.from_pretrained(params['pretrained_model_name']) | |
| self.drop = nn.Dropout(params['dropout']) | |
| self.out = nn.Linear(self.bert.config.hidden_size, n_classes) | |
| def forward(self, input_ids, attention_mask): | |
| bert_output = self.bert( | |
| input_ids=input_ids, | |
| attention_mask=attention_mask, | |
| return_dict=False | |
| ) | |
| last_hidden_state, pooled_output = bert_output | |
| output = self.drop(pooled_output) | |
| return self.out(output) | |
| class PredictionModel: | |
| def __init__(self): | |
| self.model = SentimentClassifier(n_classes = 6) | |
| self.loss_fn = nn.CrossEntropyLoss().to(device) | |
| def create_data_loader(self, X_test, max_len, batch_size): | |
| ds = Dataset( | |
| texts= np.array(X_test), | |
| max_len=max_len | |
| ) | |
| return DataLoader( | |
| ds, | |
| batch_size=batch_size | |
| ) | |
| def predict(self, X_test: list): | |
| data_loader = self.create_data_loader(X_test, params['max_length'], params['batch_size']) | |
| self.model.load_state_dict(torch.load(params['path_to_model_bin'])) | |
| self.model.eval() | |
| losses = [] | |
| y_pred = [] | |
| with torch.no_grad(): | |
| for d in data_loader: | |
| input_ids = d["input_ids"].to(device) | |
| attention_mask = d["attention_mask"].to(device) | |
| outputs = self.model( | |
| input_ids=input_ids, | |
| attention_mask=attention_mask | |
| ) | |
| _, preds = torch.max(outputs, dim=1) | |
| y_pred += preds.tolist() | |
| return y_pred | |