from html import escape import iscc_sct as ict def generate_html(fingerprint_data): chunks = fingerprint_data["features"] # Sort chunks by offset chunks.sort(key=lambda x: x["offset"]) html_content = f""" Text Fingerprint Visualization

Text Fingerprint Visualization

ISCC: {fingerprint_data['iscc']}
Characters: {fingerprint_data['characters']}
""" chunk_color = "bg-yellow-100" overlap_color = "bg-red-100" current_pos = 0 for i, chunk in enumerate(chunks): start = max(chunk["offset"], current_pos) end = chunk["offset"] + chunk["size"] if start < end: # Function to escape text and preserve line breaks def escape_and_preserve_breaks(text): return escape(text).replace("\n", "
") # Non-overlapping part html_content += f'{escape_and_preserve_breaks(chunk["text"][current_pos - chunk["offset"]:start - chunk["offset"]])}' # Overlapping part (if any) if i < len(chunks) - 1 and end > chunks[i + 1]["offset"]: overlap_end = chunks[i + 1]["offset"] html_content += f'{escape_and_preserve_breaks(chunk["text"][start - chunk["offset"]:overlap_end - chunk["offset"]])}' html_content += escape_and_preserve_breaks( chunk["text"][overlap_end - chunk["offset"] :] ) else: html_content += escape_and_preserve_breaks(chunk["text"][start - chunk["offset"] :]) # Fingerprint badge html_content += f'{chunk["feature"]}' html_content += "" current_pos = end html_content += """
""" return html_content def main(): with open("../README.md", "rb") as f: data = f.read() text = data.decode("utf-8") result = ict.create(text, granular=True) print(result.model_dump()) # Generate the HTML content html_content = generate_html(result.model_dump()) # Write the HTML content to a file with open("readme.html", "wt", encoding="utf-8") as f: f.write(html_content) if __name__ == "__main__": main()