ccdv
/

lsg-bart-base-4096-booksum

text2text-generation

Model card Files Files and versions

ccdv commited on Dec 3, 2023

Commit

c870da8

·

1 Parent(s): 6e5504c

fix for transformers >= 4.35.2

Files changed (2) hide show

README.md +2 -2
modeling_lsg_bart.py +3 -3

README.md CHANGED Viewed

@@ -18,7 +18,7 @@ model-index:
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-**Transformers >= 4.23.1**\
 **This model relies on a custom modeling file, you need to add trust_remote_code=True**\
 **See [\#13467](https://github.com/huggingface/transformers/pull/13467)**
@@ -105,7 +105,7 @@ The following hyperparameters were used during generation:
 ### Framework versions
-- Transformers 4.23.1
 - Pytorch 1.12.1
 - Datasets 2.3.2
 - Tokenizers 0.11.6

 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+**Transformers >= 4.35.2**\
 **This model relies on a custom modeling file, you need to add trust_remote_code=True**\
 **See [\#13467](https://github.com/huggingface/transformers/pull/13467)**
 ### Framework versions
+- Transformers 4.35.2
 - Pytorch 1.12.1
 - Datasets 2.3.2
 - Tokenizers 0.11.6

modeling_lsg_bart.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from logging import warn
 import torch
 from transformers.models.bart.modeling_bart import *
-from transformers.models.bart.modeling_bart import _expand_mask
 import torch.nn as nn
 import sys
@@ -852,7 +852,7 @@ class LSGBartEncoder(LSGBartPretrainedModel, BartEncoder):
         # expand attention_mask
         if attention_mask is not None:
             # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
-            attention_mask = _expand_mask(attention_mask, inputs_embeds.dtype)
         encoder_states = () if output_hidden_states else None
         all_attentions = () if output_attentions else None
@@ -1093,4 +1093,4 @@ try:
         str_to_class(value.split(".")[-1]).register_for_auto_class(key)
 except:
     warn("AutoRegister isn't available, you'll have to manually copy modeling.py after .save_pretrained(...).")
-    warn("Update to transformers >= 4.23.1 to fix.")

 from logging import warn
 import torch
 from transformers.models.bart.modeling_bart import *
+from transformers.modeling_attn_mask_utils import _prepare_4d_attention_mask, _prepare_4d_causal_attention_mask
 import torch.nn as nn
 import sys
         # expand attention_mask
         if attention_mask is not None:
             # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
+            attention_mask = _prepare_4d_attention_mask(attention_mask, inputs_embeds.dtype)
         encoder_states = () if output_hidden_states else None
         all_attentions = () if output_attentions else None
         str_to_class(value.split(".")[-1]).register_for_auto_class(key)
 except:
     warn("AutoRegister isn't available, you'll have to manually copy modeling.py after .save_pretrained(...).")
+    warn("Update to transformers >= 4.35.2 to fix.")