Skip to content

Commit

Permalink
trying to solve the black and click confilct issue
Browse files Browse the repository at this point in the history
  • Loading branch information
qiyanjun committed Mar 10, 2024
1 parent cd8f5ce commit 3ad4fe1
Show file tree
Hide file tree
Showing 14 changed files with 26 additions and 20 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,4 @@ checkpoints/
*.csv
!tests/sample_outputs/csv_attack_log.csv
tests/test_command_line/attack_log.txt
textattack/=22.3.0
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ more-itertools
pinyin>=0.4.0
jieba
OpenHowNet
click==8.0.2
4 changes: 2 additions & 2 deletions textattack/attack.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ def __init__(
constraints: List[Union[Constraint, PreTransformationConstraint]],
transformation: Transformation,
search_method: SearchMethod,
transformation_cache_size=2**15,
constraint_cache_size=2**15,
transformation_cache_size=2 ** 15,
constraint_cache_size=2 ** 15,
):
"""Initialize an attack object.
Expand Down
4 changes: 2 additions & 2 deletions textattack/attack_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,8 +507,8 @@ class _CommandLineAttackArgs:
interactive: bool = False
parallel: bool = False
model_batch_size: int = 32
model_cache_size: int = 2**18
constraint_cache_size: int = 2**18
model_cache_size: int = 2 ** 18
constraint_cache_size: int = 2 ** 18

@classmethod
def _add_parser_args(cls, parser):
Expand Down
2 changes: 1 addition & 1 deletion textattack/constraints/grammaticality/cola.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def __init__(

self.max_diff = max_diff
self.model_name = model_name
self._reference_score_cache = lru.LRU(2**10)
self._reference_score_cache = lru.LRU(2 ** 10)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = HuggingFaceModelWrapper(model, tokenizer)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __init__(self):
self.sess, self.graph, self.PBTXT_PATH, self.CKPT_PATH
)

self.lm_cache = lru.LRU(2**18)
self.lm_cache = lru.LRU(2 ** 18)

def clear_cache(self):
self.lm_cache.clear()
Expand Down
2 changes: 1 addition & 1 deletion textattack/constraints/grammaticality/part_of_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def __init__(
self.language_nltk = language_nltk
self.language_stanza = language_stanza

self._pos_tag_cache = lru.LRU(2**14)
self._pos_tag_cache = lru.LRU(2 ** 14)
if tagger_type == "flair":
if tagset == "universal":
self._flair_pos_tagger = SequenceTagger.load("upos-fast")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def __init__(self, embedding=None, **kwargs):
def clear_cache(self):
self._get_thought_vector.cache_clear()

@functools.lru_cache(maxsize=2**10)
@functools.lru_cache(maxsize=2 ** 10)
def _get_thought_vector(self, text):
"""Sums the embeddings of all the words in ``text`` into a "thought
vector"."""
Expand Down
2 changes: 1 addition & 1 deletion textattack/goal_functions/goal_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def __init__(
use_cache=True,
query_budget=float("inf"),
model_batch_size=32,
model_cache_size=2**20,
model_cache_size=2 ** 20,
):
validators.validate_model_goal_function_compatibility(
self.__class__, model_wrapper.model.__class__
Expand Down
2 changes: 1 addition & 1 deletion textattack/goal_functions/text/minimize_bleu.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def extra_repr_keys(self):
return ["maximizable", "target_bleu"]


@functools.lru_cache(maxsize=2**12)
@functools.lru_cache(maxsize=2 ** 12)
def get_bleu(a, b):
ref = a.words
hyp = b.words
Expand Down
4 changes: 2 additions & 2 deletions textattack/goal_functions/text/non_overlapping_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,12 @@ def _get_score(self, model_output, _):
return num_words_diff / len(get_words_cached(self.ground_truth_output))


@functools.lru_cache(maxsize=2**12)
@functools.lru_cache(maxsize=2 ** 12)
def get_words_cached(s):
return np.array(words_from_text(s))


@functools.lru_cache(maxsize=2**12)
@functools.lru_cache(maxsize=2 ** 12)
def word_difference_score(s1, s2):
"""Returns the number of words that are non-overlapping between s1 and
s2."""
Expand Down
2 changes: 1 addition & 1 deletion textattack/metrics/attack_metrics/words_perturbed.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def calculate(self, results):
self.total_attacks = len(self.results)
self.all_num_words = np.zeros(len(self.results))
self.perturbed_word_percentages = np.zeros(len(self.results))
self.num_words_changed_until_success = np.zeros(2**16)
self.num_words_changed_until_success = np.zeros(2 ** 16)
self.max_words_changed = 0

for i, result in enumerate(self.results):
Expand Down
5 changes: 1 addition & 4 deletions textattack/shared/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,7 @@
r"^textattack.models.helpers.word_cnn_for_classification.*",
r"^transformers.modeling_\w*\.\w*ForSequenceClassification$",
],
(
NonOverlappingOutput,
MinimizeBleu,
): [
(NonOverlappingOutput, MinimizeBleu,): [
r"^textattack.models.helpers.t5_for_text_to_text.*",
],
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,28 @@ class ChineseWordSwapMaskedLM(WordSwap):
def __init__(self, task="fill-mask", model="xlm-roberta-base", **kwargs):
from transformers import BertTokenizer, BertForMaskedLM
import torch

self.tt = BertTokenizer.from_pretrained(model)
self.mm = BertForMaskedLM.from_pretrained(model)
self.mm.to("cuda")
super().__init__(**kwargs)

def get_replacement_words(self, current_text, indice_to_modify):
masked_text = current_text.replace_word_at_index(indice_to_modify, "[MASK]") # 修改前<mask>,xlmrberta的模型
masked_text = current_text.replace_word_at_index(
indice_to_modify, "[MASK]"
) # 修改前<mask>,xlmrberta的模型
tokens = self.tt.tokenize(masked_text.text)
input_ids = self.tt.convert_tokens_to_ids(tokens)
input_tensor = torch.tensor([input_ids]).to("cuda")
with torch.no_grad():
outputs = self.mm(input_tensor)
predictions = outputs.logits
predicted_token_ids = torch.argsort(predictions[0, indice_to_modify], descending=True)[:50]
predicted_tokens = self.tt.convert_ids_to_tokens(predicted_token_ids.tolist()[1:])
predicted_token_ids = torch.argsort(
predictions[0, indice_to_modify], descending=True
)[:50]
predicted_tokens = self.tt.convert_ids_to_tokens(
predicted_token_ids.tolist()[1:]
)
return predicted_tokens

def _get_transformations(self, current_text, indices_to_modify):
Expand Down

0 comments on commit 3ad4fe1

Please sign in to comment.