Spaces:

anonymous8
/

Rapid-Textual-Adversarial-Defense

Sleeping

Rapid-Textual-Adversarial-Defense / textattack /goal_functions /classification /input_reduction.py

anonymous8/RPD-Demo

initial commit

4943752 over 2 years ago

1.98 kB

	"""

	Determine if maintaining the same predicted label
	---------------------------------------------------------------------
	"""


	from .classification_goal_function import ClassificationGoalFunction


	class InputReduction(ClassificationGoalFunction):
	"""Attempts to reduce the input down to as few words as possible while
	maintaining the same predicted label.

	From Feng, Wallace, Grissom, Iyyer, Rodriguez, Boyd-Graber. (2018).
	Pathologies of Neural Models Make Interpretations Difficult.
	https://arxiv.org/abs/1804.07781
	"""

	def __init__(self, args, target_num_words=1, *kwargs):
	self.target_num_words = target_num_words
	super().__init__(args, *kwargs)

	def _is_goal_complete(self, model_output, attacked_text):
	return (
	self.ground_truth_output == model_output.argmax()
	and attacked_text.num_words <= self.target_num_words
	)

	def _should_skip(self, model_output, attacked_text):
	return self.ground_truth_output != model_output.argmax()

	def _get_score(self, model_output, attacked_text):
	# Give the lowest score possible to inputs which don't maintain the ground truth label.
	if self.ground_truth_output != model_output.argmax():
	return 0

	cur_num_words = attacked_text.num_words
	initial_num_words = self.initial_attacked_text.num_words

	# The main goal is to reduce the number of words (num_words_score)
	# Higher model score for the ground truth label is used as a tiebreaker (model_score)
	num_words_score = max(
	(initial_num_words - cur_num_words) / initial_num_words, 0
	)
	model_score = model_output[self.ground_truth_output]
	return min(num_words_score + model_score / initial_num_words, 1)

	def extra_repr_keys(self):
	if self.maximizable:
	return ["maximizable"]
	else:
	return ["maximizable", "target_num_words"]