import os from magika import Magika import datetime def validate_file_types(directory): m = Magika() file_types = {} for root, _, files in os.walk(directory): if '.git' in root: continue for file_name in files: file_path = os.path.join(root, file_name) try: with open(file_path, 'rb') as file: file_bytes = file.read() result = m.identify_bytes(file_bytes) file_types[file_path] = result.output.ct_label except Exception as e: file_types[file_path] = f"Error: {str(e)}" return file_types def get_file_summary(file_path, file_type): size = os.path.getsize(file_path) creation_date = datetime.datetime.utcfromtimestamp(os.path.getctime(file_path)).strftime('%Y-%m-%d %H:%M:%S UTC') modification_date = datetime.datetime.utcfromtimestamp(os.path.getmtime(file_path)).strftime('%Y-%m-%d %H:%M:%S UTC') return { "name": os.path.relpath(file_path), "type": file_type, "size": size, "creation_date": creation_date, "modification_date": modification_date } def read_file_content(file_path, max_size=16*1024): with open(file_path, "r", encoding="utf-8", errors="ignore") as file: if os.path.getsize(file_path) > max_size: return file.read(max_size) + "\n... [Content Truncated] ..." else: return file.read()