stealth-edits / evaluation /eval_dims.py
qinghuazhou
Initial commit
85e172b
raw
history blame
5.49 kB
import os
import sys
import argparse
import numpy as np
from tqdm import tqdm
import torch
device = torch.device(r'cuda' if torch.cuda.is_available() else r'cpu')
# load utility functions
from evaluation import eval_utils
from util import utils
from util import evaluation
def calculate_t3_intrinsic_dims(
model_name,
model,
tok,
hparams,
edit_mode,
theta,
num_aug,
layers,
save_path,
output_path,
augmented_cache = None,
cache_features = False,
):
""" Theorem 3 intrinsic dimensionality of augmented prompt features for multiple samples.
"""
# load activation function
activation = utils.load_activation(hparams['activation'])
# find unique pickle files
pickle_paths = np.array([
f for f in utils.path_all_files(save_path) \
if f.endswith('.pickle') and ('perplexity' not in f)
])
_, unique_indices = np.unique(
np.array([os.path.basename(f) for f in pickle_paths]), return_index=True)
pickle_paths = pickle_paths[unique_indices]
pickle_paths = utils.shuffle_list(pickle_paths)
print('Number of pickle files:', len(pickle_paths))
for sample_idx in tqdm(range(len(pickle_paths))):
try:
# find sample file
edit_contents = utils.loadpickle(pickle_paths[sample_idx])
case_id = edit_contents['case_id']
output_file = os.path.join(output_path, f'{case_id}.pickle')
if os.path.exists(output_file):
print('Already exists:', output_file)
continue
# extract features and calculate intrinsic dims
layer_features, layer_masks, intrinsic_dims = eval_utils.sample_t3_intrinsic_dims(
model,
tok,
hparams,
layers = layers,
request = edit_contents['request'],
edit_mode = edit_mode,
num_aug = num_aug,
theta = theta,
augmented_cache = augmented_cache,
verbose = False
)
# calculate false positive rates
fpr_raw, fpr_ftd = eval_utils.calculate_fpr(
model_name,
layers,
save_path,
case_id,
activation,
layer_features,
layer_masks,
num_aug
)
# save results
to_save = {'intrinsic_dims': intrinsic_dims}
to_save['layer_indices'] = layers
to_save['fpr_raw'] = fpr_raw
to_save['fpr_ftd'] = fpr_ftd
to_save['num_aug'] = num_aug
to_save['num_filtered'] = [np.sum(layer_masks[l]) for l in layers]
if cache_features:
to_save['layer_features'] = layer_features
to_save['layer_masks'] = layer_masks
utils.savepickle(output_file, to_save)
except:
print('Error:', case_id)
continue
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
'--model', default="gpt-j-6b", type=str, help='model to edit')
parser.add_argument(
'--dataset', default="mcf", type=str, choices=['mcf', 'zsre'], help='dataset for evaluation')
parser.add_argument(
'--edit_mode',
choices=['prompt', 'context', 'wikipedia'],
default='in-place',
help='mode of edit/attack to execute'
)
parser.add_argument(
'--num_aug', default=2000, type=int, help='layer for basis edits')
parser.add_argument(
'--static_context', type=str, default=None, help='output directory')
parser.add_argument(
'--augmented_cache', type=str, default=None, help='output directory')
parser.add_argument(
'--theta', default=0.005, type=float, help='theta for intrinsic dim calculation')
parser.add_argument(
'--cache_features', default=0, type=int, help='boolean switch to cache features')
parser.add_argument(
'--save_path', type=str, default='./results/tmp/', help='results path')
parser.add_argument(
'--output_path', type=str, default='./results/dimensionality/', help='results path')
args = parser.parse_args()
# boolean arguments
args.cache_features = bool(args.cache_features)
# loading hyperparameters
hparams_path = f'./hparams/SE/{args.model}.json'
hparams = utils.loadjson(hparams_path)
if args.static_context is not None:
hparams['static_context'] = args.static_context
# ensure results path exists
args.save_path = os.path.join(args.save_path, f'{args.dataset}/{args.model}/')
args.output_path = os.path.join(args.output_path, f'{args.edit_mode}/{args.dataset}/{args.model}/')
utils.assure_path_exists(args.output_path)
# load model and tokenizer
model, tok = utils.load_model_tok(model_name=args.model)
# calculate intrinsic dims
calculate_t3_intrinsic_dims(
args.model,
model,
tok,
hparams,
edit_mode = args.edit_mode,
theta = args.theta,
num_aug = args.num_aug,
layers = evaluation.model_layer_indices[args.model],
save_path = args.save_path,
output_path = args.output_path,
augmented_cache=args.augmented_cache,
cache_features = args.cache_features
)