huggingface · zucchini-nlp · Aug 7, 2024 · Jun 14, 2024 · Jun 14, 2024 · Jun 14, 2024
diff --git a/bart.py b/bart.py
@@ -0,0 +1,16 @@
+from transformers import AutoTokenizer, BartForConditionalGeneration
+
+model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn").to("cuda:0")
+tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
+
+ARTICLE_TO_SUMMARIZE = (
+    "PG&E stated it scheduled the blackouts in response to forecasts for high winds "
+    "amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were "
+    "scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow."
+)
+inputs = tokenizer(ARTICLE_TO_SUMMARIZE, return_tensors="pt").to("cuda:0")
+
+# Generate Summary
+summary_ids = model.generate(**inputs, num_beams=1, do_sample=False, max_new_tokens=30, use_cache=False)
+out = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
+print(out)
diff --git a/src/transformers/models/codegen/modeling_codegen.py b/src/transformers/models/codegen/modeling_codegen.py
diff --git a/src/transformers/models/data2vec/modeling_data2vec_audio.py b/src/transformers/models/data2vec/modeling_data2vec_audio.py
@@ -622,6 +622,7 @@ def _flash_attention_forward(
         """
         Calls the forward method of Flash Attention - if the input hidden states contain at least one padding token
         first unpad the input, then computes the attention scores and pad the final attention scores.
+
         Args:
             query_states (`torch.Tensor`):
                 Input query states to be passed to Flash Attention API

diff --git a/src/transformers/models/falcon/modeling_falcon.py b/src/transformers/models/falcon/modeling_falcon.py
diff --git a/src/transformers/models/git/modeling_git.py b/src/transformers/models/git/modeling_git.py
diff --git a/src/transformers/models/gpt_neo/modeling_gpt_neo.py b/src/transformers/models/gpt_neo/modeling_gpt_neo.py
diff --git a/src/transformers/models/gpt_neox/modeling_gpt_neox.py b/src/transformers/models/gpt_neox/modeling_gpt_neox.py
diff --git a/src/transformers/models/gptj/modeling_gptj.py b/src/transformers/models/gptj/modeling_gptj.py
diff --git a/src/transformers/models/hubert/modeling_hubert.py b/src/transformers/models/hubert/modeling_hubert.py
@@ -692,6 +692,7 @@ def _flash_attention_forward(
         """
         Calls the forward method of Flash Attention - if the input hidden states contain at least one padding token
         first unpad the input, then computes the attention scores and pad the final attention scores.
+
         Args:
             query_states (`torch.Tensor`):
                 Input query states to be passed to Flash Attention API

diff --git a/src/transformers/models/musicgen/modeling_musicgen.py b/src/transformers/models/musicgen/modeling_musicgen.py
@@ -453,6 +453,7 @@ def _flash_attention_forward(
         """
         Calls the forward method of Flash Attention - if the input hidden states contain at least one padding token
         first unpad the input, then computes the attention scores and pad the final attention scores.
+
         Args:
             query_states (`torch.Tensor`):
                 Input query states to be passed to Flash Attention API

diff --git a/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py b/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py
@@ -469,6 +469,7 @@ def _flash_attention_forward(
         """
         Calls the forward method of Flash Attention - if the input hidden states contain at least one padding token
         first unpad the input, then computes the attention scores and pad the final attention scores.
+
         Args:
             query_states (`torch.Tensor`):
                 Input query states to be passed to Flash Attention API

diff --git a/src/transformers/models/sew/modeling_sew.py b/src/transformers/models/sew/modeling_sew.py
@@ -692,6 +692,7 @@ def _flash_attention_forward(
         """
         Calls the forward method of Flash Attention - if the input hidden states contain at least one padding token
         first unpad the input, then computes the attention scores and pad the final attention scores.
+
         Args:
             query_states (`torch.Tensor`):
                 Input query states to be passed to Flash Attention API

diff --git a/src/transformers/models/unispeech/modeling_unispeech.py b/src/transformers/models/unispeech/modeling_unispeech.py
@@ -728,6 +728,7 @@ def _flash_attention_forward(
         """
         Calls the forward method of Flash Attention - if the input hidden states contain at least one padding token
         first unpad the input, then computes the attention scores and pad the final attention scores.
+
         Args:
             query_states (`torch.Tensor`):
                 Input query states to be passed to Flash Attention API

diff --git a/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py b/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py
@@ -745,6 +745,7 @@ def _flash_attention_forward(
         """
         Calls the forward method of Flash Attention - if the input hidden states contain at least one padding token
         first unpad the input, then computes the attention scores and pad the final attention scores.
+
         Args:
             query_states (`torch.Tensor`):
                 Input query states to be passed to Flash Attention API

diff --git a/update_llava.py b/update_llava.py