Spaces:

zama-fhe
/

encrypted_dna

Running

App Files Files Community

encrypted_dna / utils.py

kcelia

chore: update paths

45b7f9a unverified about 1 month ago

raw

history blame

5.32 kB

	import pickle
	import random
	import shutil
	from collections import Counter
	from pathlib import Path

	import numpy

	import zipfile


	SERVER_URL = "http://localhost:8000/"

	INPUT_BROWSER_LIMIT = 550

	DATA_DIR = Path("./data")
	DEPLOYMENT_DIR = Path("./deployment")
	ROOT_DIR = DEPLOYMENT_DIR / "users"

	SHARED_BASE_MODULE_DIR = DEPLOYMENT_DIR / "base_modules"
	SHARED_SMOOTHER_MODULE_DIR = DEPLOYMENT_DIR / "smoother_module"

	KEY_SMOOTHER_MODULE_DIR = "EvaluationKey_Smoother"
	KEY_BASE_MODULE_DIR = "EvaluationKey_Base_Modules"
	ENCRYPTED_INPUT_DIR = "Encrypt_Input"
	ENCRYPTED_OUTPUT_DIR = "Encrypt_Output"
	FHE_COMPUTATION_TIMELINE = Path("server_fhe_computation_timeline.txt")

	LABELS = ["European", "African", "Americas", "East Asian", "South Asian"]

	ID_POPULATION = {0: "European", 3: "African", 2: "Americas", 1: "East Asian", 4: "South Asian"}

	POPULATION_ID = {"European": 0, "African": 3, "Americas": 2, "East Asian": 1, "South Asian": 4}

	COLORS = ["#FFD208", "#FFE46C", "#FFED9C", "#FFF6CE", "#FFD9A0"]

	# load_pickle("data/meta_dict.pkl")
	META = {"A": 5, "C": 1059079, "M": 10589, "NW": 100, "CT": 1059, "CTR": 0.1, "WSCM": 0.2, "SS": 75}

	BUILD_GENS = [1, 2, 4, 6, 8, 12, 16, 24, 32, 48]

	import os

	def load_pickle_from_zip(file_name, zip_path="data.zip"):
	"""
	Load a pickle file from within a zip archive.
	"""
	if not os.path.exists(zip_path):
	raise FileNotFoundError(f"The zip file '{zip_path}' does not exist.")

	with zipfile.ZipFile(zip_path, 'r') as z:
	if file_name not in z.namelist():
	print("-----", file_name, z.namelist())
	raise KeyError(f"The file '{file_name}' does not exist in the zip archive '{zip_path}'.")
	with z.open(file_name) as f:
	return pickle.load(f)

	def generate_weighted_percentages():
	dominant_percentage = random.randint(50, 70)
	remaining_percentage = 100 - dominant_percentage
	other_percentages = [random.random() for _ in range(4)]

	total = sum(other_percentages)
	other_percentages = [round(p / total * remaining_percentage, 2) for p in other_percentages]

	percentages = [dominant_percentage] + other_percentages

	# Adjust the total to be exactly 100 (if rounding errors occurred)
	diff = round(100 - sum(percentages), 2)
	if diff != 0:
	percentages[0] += diff # Adjust the dominant percentage to make the total 100

	return percentages


	def select_random_ancestors():
	ancestors = list(ID_POPULATION.keys())
	random.shuffle(ancestors)
	return ancestors


	def read_pickle(path):
	with open(path, "rb") as f:
	data = pickle.load(f)
	return data


	def compute_distribution(y, size=5):
	y_pred = numpy.zeros(size)
	for k, v in Counter(y).items():
	y_pred[k] = v / len(y)
	return y_pred


	def slide_window(data, smooth_win_size, y=None):
	N, W, A = data.shape

	pad = (smooth_win_size + 1) // 2
	data_padded = numpy.pad(data, ((0, 0), (pad, pad), (0, 0)), mode="reflect")
	X_slide = numpy.lib.stride_tricks.sliding_window_view(data_padded, (1, smooth_win_size, A))
	X_slide = X_slide[:, :W, :].reshape(N * W, -1)
	y_slide = None if y is None else y.reshape(N * W)

	return X_slide, y_slide


	# def read_vcf(vcf_file):
	# return allel.read_vcf(vcf_file, region=None, fields="*")


	def clean_dir(directory):
	"""Remove the specified directory if it exists."""
	if directory.exists() and directory.is_dir():
	print(f"Removing existing model directory: {directory}")
	shutil.rmtree(directory)


	def process_data_for_base_modules(meta, X_t):

	n_windows = meta["NW"] # meta["C"] // meta["M"]
	context = meta["CT"] # int(meta["M"] * meta['CTR'])

	if context != 0.0:
	pad_left = numpy.flip(X_t[:, 0:context], axis=1)
	pad_right = numpy.flip(X_t[:, -context:], axis=1)
	X_t = numpy.concatenate([pad_left, X_t, pad_right], axis=1)

	M_ = meta["M"] + 2 * context
	idx = numpy.arange(0, meta["C"], meta["M"])[:-2]
	X_b = numpy.lib.stride_tricks.sliding_window_view(X_t, M_, axis=1)[:, idx, :]
	rem = meta["C"] - meta["M"] * n_windows

	# print(f"{X_t.shape=} -> {X_b.shape=} \| {n_windows=}, {context=}, {M_=}, {rem=}")

	return X_b, n_windows, M_, rem


	def extract_model_number(path):
	try:
	return int(path.split("_")[-1])
	except (ValueError, IndexError):
	print(f"Error: Unable to extract model number from path: {path}")
	return None


	def is_none(obj) -> bool:
	"""
	Check if the object is None.
	Args:
	obj (any): The input to be checked.
	Returns:
	bool: True if the object is None or empty, False otherwise.
	"""
	return obj is None or (obj is not None and (hasattr(obj, "__len__") and len(obj) == 0))


	def load_pickle(path: str) -> numpy.array:
	"""Load data.

	Args:
	path (str):

	Returns:
	Dict: The genome.
	"""
	with open(path, "rb") as f:
	data = pickle.load(f)
	return data


	def write_pickle(path: str, data) -> numpy.array:
	with open(path, "wb") as f:
	pickle.dump(data, f)


	def write_bytes(path, data):
	"""Save binary data."""
	with path.open("wb") as f:
	f.write(data)


	def read_bytes(path):
	"""Load data from a binary file."""
	with path.open("rb") as f:
	return f.read()