Spaces:

yiningmao
/

metaphor-detection-baseline

Runtime error

App Files Files Community

metaphor-detection-baseline / modeling.py

yiningmao

Upload 34 files

c5db72e about 3 years ago

raw

history blame contribute delete

19.3 kB

	import numpy as np
	import torch
	import torch.nn as nn

	from utils import Config
	from transformers import AutoTokenizer, AutoModel


	class AutoModelForSequenceClassification(nn.Module):
	"""Base model for sequence classification"""

	def __init__(self, args, Model, config, num_labels=2):
	"""Initialize the model"""
	super(AutoModelForSequenceClassification, self).__init__()
	self.num_labels = num_labels
	self.encoder = Model
	self.config = config
	self.dropout = nn.Dropout(args.drop_ratio)
	self.classifier = nn.Linear(config.hidden_size, num_labels)
	self.logsoftmax = nn.LogSoftmax(dim=1)

	self._init_weights(self.classifier)

	def _init_weights(self, module):
	"""Initialize the weights"""
	if isinstance(module, (nn.Linear, nn.Embedding)):
	module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
	elif isinstance(module, nn.LayerNorm):
	module.bias.data.zero_()
	module.weight.data.fill_(1.0)
	if isinstance(module, nn.Linear) and module.bias is not None:
	module.bias.data.zero_()

	def forward(
	self,
	input_ids,
	target_mask=None,
	token_type_ids=None,
	attention_mask=None,
	labels=None,
	head_mask=None,
	):
	"""
	Inputs:
	`input_ids`: a torch.LongTensor of shape [batch_size, sequence_length] with the word token indices in the vocabulary
	`target_mask`: a torch.LongTensor of shape [batch_size, sequence_length] with the mask for target wor. 1 for target word and 0 otherwise.
	`token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token types indices
	selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to a `sentence B` token (see BERT paper for more details).
	`attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices selected in [0, 1].
	It's a mask to be used if the input sequence length is smaller than the max input sequence length in the current batch.
	It's the mask that we typically use for attention when a batch has varying length sentences.
	`labels`: optional labels for the classification output: torch.LongTensor of shape [batch_size, sequence_length]
	with indices selected in [0, ..., num_labels].
	`head_mask`: an optional torch.Tensor of shape [num_heads] or [num_layers, num_heads] with indices between 0 and 1.
	It's a mask to be used to nullify some heads of the transformer. 1.0 => head is fully masked, 0.0 => head is not masked.
	"""
	outputs = self.encoder(
	input_ids,
	token_type_ids=token_type_ids,
	attention_mask=attention_mask,
	head_mask=head_mask,
	)
	pooled_output = outputs[1]
	pooled_output = self.dropout(pooled_output)
	logits = self.classifier(pooled_output)
	logits = self.logsoftmax(logits)

	if labels is not None:
	loss_fct = nn.NLLLoss()
	loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
	return loss
	return logits


	class AutoModelForTokenClassification(nn.Module):
	"""Base model for token classification"""

	def __init__(self, args, Model, config, num_labels=2):
	"""Initialize the model"""
	super(AutoModelForTokenClassification, self).__init__()
	self.num_labels = num_labels
	self.bert = Model
	self.config = config
	self.dropout = nn.Dropout(args.drop_ratio)
	self.classifier = nn.Linear(config.hidden_size, num_labels)
	self.logsoftmax = nn.LogSoftmax(dim=1)

	self._init_weights(self.classifier)

	def _init_weights(self, module):
	"""Initialize the weights"""
	if isinstance(module, (nn.Linear, nn.Embedding)):
	module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
	elif isinstance(module, nn.LayerNorm):
	module.bias.data.zero_()
	module.weight.data.fill_(1.0)
	if isinstance(module, nn.Linear) and module.bias is not None:
	module.bias.data.zero_()

	def forward(
	self,
	input_ids,
	target_mask,
	token_type_ids=None,
	attention_mask=None,
	labels=None,
	head_mask=None,
	):
	"""
	Inputs:
	`input_ids`: a torch.LongTensor of shape [batch_size, sequence_length] with the word token indices in the vocabulary
	`target_mask`: a torch.LongTensor of shape [batch_size, sequence_length] with the mask for target wor. 1 for target word and 0 otherwise.
	`token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token types indices
	selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to a `sentence B` token (see BERT paper for more details).
	`attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices selected in [0, 1].
	It's a mask to be used if the input sequence length is smaller than the max input sequence length in the current batch.
	It's the mask that we typically use for attention when a batch has varying length sentences.
	`labels`: optional labels for the classification output: torch.LongTensor of shape [batch_size, sequence_length]
	with indices selected in [0, ..., num_labels].
	`head_mask`: an optional torch.Tensor of shape [num_heads] or [num_layers, num_heads] with indices between 0 and 1.
	It's a mask to be used to nullify some heads of the transformer. 1.0 => head is fully masked, 0.0 => head is not masked.
	"""
	outputs = self.bert(
	input_ids,
	token_type_ids=token_type_ids,
	attention_mask=attention_mask,
	head_mask=head_mask,
	)
	sequence_output = outputs[0] # [batch, max_len, hidden]
	target_output = sequence_output * target_mask.unsqueeze(2)
	target_output = self.dropout(target_output)
	target_output = target_output.sum(1) / target_mask.sum() # [batch, hideen]

	logits = self.classifier(target_output)
	logits = self.logsoftmax(logits)

	if labels is not None:
	loss_fct = nn.NLLLoss()
	loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
	return loss
	return logits


	class AutoModelForSequenceClassification_SPV(nn.Module):
	"""MelBERT with only SPV"""

	def __init__(self, args, Model, config, num_labels=2):
	"""Initialize the model"""
	super(AutoModelForSequenceClassification_SPV, self).__init__()
	self.num_labels = num_labels
	self.encoder = Model
	self.config = config
	self.dropout = nn.Dropout(args.drop_ratio)
	self.classifier = nn.Linear(config.hidden_size * 2, num_labels)
	self.logsoftmax = nn.LogSoftmax(dim=1)

	self._init_weights(self.classifier)

	def _init_weights(self, module):
	"""Initialize the weights"""
	if isinstance(module, (nn.Linear, nn.Embedding)):
	module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
	elif isinstance(module, nn.LayerNorm):
	module.bias.data.zero_()
	module.weight.data.fill_(1.0)
	if isinstance(module, nn.Linear) and module.bias is not None:
	module.bias.data.zero_()

	def forward(
	self,
	input_ids,
	target_mask,
	token_type_ids=None,
	attention_mask=None,
	labels=None,
	head_mask=None,
	):
	"""
	Inputs:
	`input_ids`: a torch.LongTensor of shape [batch_size, sequence_length] with the word token indices in the vocabulary
	`target_mask`: a torch.LongTensor of shape [batch_size, sequence_length] with the mask for target wor. 1 for target word and 0 otherwise.
	`token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token types indices
	selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to a `sentence B` token (see BERT paper for more details).
	`attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices selected in [0, 1].
	`labels`: optional labels for the classification output: torch.LongTensor of shape [batch_size, sequence_length]
	with indices selected in [0, ..., num_labels].
	`head_mask`: an optional torch.Tensor of shape [num_heads] or [num_layers, num_heads] with indices between 0 and 1.
	It's a mask to be used to nullify some heads of the transformer. 1.0 => head is fully masked, 0.0 => head is not masked.
	"""
	outputs = self.encoder(
	input_ids,
	token_type_ids=token_type_ids,
	attention_mask=attention_mask,
	head_mask=head_mask,
	)
	sequence_output = outputs[0] # [batch, max_len, hidden]
	pooled_output = outputs[1] # [batch, hidden]

	# Get target ouput with target mask
	target_output = sequence_output * target_mask.unsqueeze(2) # [batch, hidden]

	# dropout
	target_output = self.dropout(target_output)
	pooled_output = self.dropout(pooled_output)

	# Get mean value of target output if the target output consistst of more than one token
	target_output = target_output.mean(1)

	logits = self.classifier(torch.cat([target_output, pooled_output], dim=1))
	logits = self.logsoftmax(logits)

	if labels is not None:
	loss_fct = nn.NLLLoss()
	loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
	return loss
	return logits


	class AutoModelForSequenceClassification_MIP(nn.Module):
	"""MelBERT with only MIP"""

	def __init__(self, args, Model, config, num_labels=2):
	"""Initialize the model"""
	super(AutoModelForSequenceClassification_MIP, self).__init__()
	self.num_labels = num_labels
	self.encoder = Model
	self.config = config
	self.dropout = nn.Dropout(args.drop_ratio)
	self.args = args
	self.classifier = nn.Linear(config.hidden_size * 2, num_labels)
	self.logsoftmax = nn.LogSoftmax(dim=1)

	self._init_weights(self.classifier)

	def _init_weights(self, module):
	"""Initialize the weights"""
	if isinstance(module, (nn.Linear, nn.Embedding)):
	module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
	elif isinstance(module, nn.LayerNorm):
	module.bias.data.zero_()
	module.weight.data.fill_(1.0)
	if isinstance(module, nn.Linear) and module.bias is not None:
	module.bias.data.zero_()

	def forward(
	self,
	input_ids,
	input_ids_2,
	target_mask,
	target_mask_2,
	attention_mask_2,
	token_type_ids=None,
	attention_mask=None,
	labels=None,
	head_mask=None,
	):
	"""
	Inputs:
	`input_ids`: a torch.LongTensor of shape [batch_size, sequence_length] with the first input token indices in the vocabulary
	`input_ids_2`: a torch.LongTensor of shape [batch_size, sequence_length] with the second input token indicies
	`target_mask`: a torch.LongTensor of shape [batch_size, sequence_length] with the mask for target word in the first input. 1 for target word and 0 otherwise.
	`target_mask_2`: a torch.LongTensor of shape [batch_size, sequence_length] with the mask for target word in the second input. 1 for target word and 0 otherwise.
	`attention_mask_2`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices selected in [0, 1] for the second input.
	`token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token types indices
	selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to a `sentence B` token (see BERT paper for more details).
	`attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices selected in [0, 1] for the first input.
	`labels`: optional labels for the classification output: torch.LongTensor of shape [batch_size, sequence_length]
	with indices selected in [0, ..., num_labels].
	`head_mask`: an optional torch.Tensor of shape [num_heads] or [num_layers, num_heads] with indices between 0 and 1.
	It's a mask to be used to nullify some heads of the transformer. 1.0 => head is fully masked, 0.0 => head is not masked.
	"""
	# First encoder for full sentence
	outputs = self.encoder(
	input_ids,
	token_type_ids=token_type_ids,
	attention_mask=attention_mask,
	head_mask=head_mask,
	)
	sequence_output = outputs[0] # [batch, max_len, hidden]

	# Get target ouput with target mask
	target_output = sequence_output * target_mask.unsqueeze(2)
	target_output = self.dropout(target_output)
	target_output = target_output.sum(1) / target_mask.sum() # [batch, hidden]

	# Second encoder for only the target word
	outputs_2 = self.encoder(input_ids_2, attention_mask=attention_mask_2, head_mask=head_mask)
	sequence_output_2 = outputs_2[0] # [batch, max_len, hidden]

	# Get target ouput with target mask
	target_output_2 = sequence_output_2 * target_mask_2.unsqueeze(2)
	target_output_2 = self.dropout(target_output_2)
	target_output_2 = target_output_2.sum(1) / target_mask_2.sum()

	logits = self.classifier(torch.cat([target_output_2, target_output], dim=1))
	logits = self.logsoftmax(logits)

	if labels is not None:
	loss_fct = nn.NLLLoss()
	loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
	return loss
	return logits


	class AutoModelForSequenceClassification_SPV_MIP(nn.Module):
	"""MelBERT"""

	def __init__(self, args, Model, config, num_labels=2):
	"""Initialize the model"""
	super(AutoModelForSequenceClassification_SPV_MIP, self).__init__()
	self.num_labels = num_labels
	self.encoder = Model
	self.config = config
	self.dropout = nn.Dropout(args.drop_ratio)
	self.args = args

	self.SPV_linear = nn.Linear(config.hidden_size * 2, args.classifier_hidden)
	self.MIP_linear = nn.Linear(config.hidden_size * 2, args.classifier_hidden)
	self.classifier = nn.Linear(args.classifier_hidden * 2, num_labels)
	self._init_weights(self.SPV_linear)
	self._init_weights(self.MIP_linear)

	self.logsoftmax = nn.LogSoftmax(dim=1)
	self._init_weights(self.classifier)

	def _init_weights(self, module):
	"""Initialize the weights"""
	if isinstance(module, (nn.Linear, nn.Embedding)):
	module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
	elif isinstance(module, nn.LayerNorm):
	module.bias.data.zero_()
	module.weight.data.fill_(1.0)
	if isinstance(module, nn.Linear) and module.bias is not None:
	module.bias.data.zero_()

	def forward(
	self,
	input_ids,
	input_ids_2,
	target_mask,
	target_mask_2,
	attention_mask_2,
	token_type_ids=None,
	attention_mask=None,
	labels=None,
	head_mask=None,
	):
	"""
	Inputs:
	`input_ids`: a torch.LongTensor of shape [batch_size, sequence_length] with the first input token indices in the vocabulary
	`input_ids_2`: a torch.LongTensor of shape [batch_size, sequence_length] with the second input token indicies
	`target_mask`: a torch.LongTensor of shape [batch_size, sequence_length] with the mask for target word in the first input. 1 for target word and 0 otherwise.
	`target_mask_2`: a torch.LongTensor of shape [batch_size, sequence_length] with the mask for target word in the second input. 1 for target word and 0 otherwise.
	`attention_mask_2`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices selected in [0, 1] for the second input.
	`token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token types indices
	selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to a `sentence B` token (see BERT paper for more details).
	`attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices selected in [0, 1] for the first input.
	`labels`: optional labels for the classification output: torch.LongTensor of shape [batch_size, sequence_length]
	with indices selected in [0, ..., num_labels].
	`head_mask`: an optional torch.Tensor of shape [num_heads] or [num_layers, num_heads] with indices between 0 and 1.
	It's a mask to be used to nullify some heads of the transformer. 1.0 => head is fully masked, 0.0 => head is not masked.
	"""

	# First encoder for full sentence
	outputs = self.encoder(
	input_ids,
	token_type_ids=token_type_ids,
	attention_mask=attention_mask,
	head_mask=head_mask,
	)
	sequence_output = outputs[0] # [batch, max_len, hidden]
	pooled_output = outputs[1] # [batch, hidden]

	# Get target ouput with target mask
	target_output = sequence_output * target_mask.unsqueeze(2)

	# dropout
	target_output = self.dropout(target_output)
	pooled_output = self.dropout(pooled_output)

	target_output = target_output.mean(1) # [batch, hidden]

	# Second encoder for only the target word
	outputs_2 = self.encoder(input_ids_2, attention_mask=attention_mask_2, head_mask=head_mask)
	sequence_output_2 = outputs_2[0] # [batch, max_len, hidden]

	# Get target ouput with target mask
	target_output_2 = sequence_output_2 * target_mask_2.unsqueeze(2)
	target_output_2 = self.dropout(target_output_2)
	target_output_2 = target_output_2.mean(1)

	# Get hidden vectors each from SPV and MIP linear layers
	SPV_hidden = self.SPV_linear(torch.cat([pooled_output, target_output], dim=1))
	MIP_hidden = self.MIP_linear(torch.cat([target_output_2, target_output], dim=1))

	logits = self.classifier(self.dropout(torch.cat([SPV_hidden, MIP_hidden], dim=1)))
	logits = self.logsoftmax(logits)

	if labels is not None:
	loss_fct = nn.NLLLoss()
	loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
	return loss
	return logits