from html import escape
import iscc_sct as ict
def generate_html(fingerprint_data):
chunks = fingerprint_data["features"]
# Sort chunks by offset
chunks.sort(key=lambda x: x["offset"])
html_content = f"""
Text Fingerprint Visualization
ISCC: {fingerprint_data['iscc']}
Characters: {fingerprint_data['characters']}
"""
chunk_color = "bg-yellow-100"
overlap_color = "bg-red-100"
current_pos = 0
for i, chunk in enumerate(chunks):
start = max(chunk["offset"], current_pos)
end = chunk["offset"] + chunk["size"]
if start < end:
# Function to escape text and preserve line breaks
def escape_and_preserve_breaks(text):
return escape(text).replace("\n", "
")
# Non-overlapping part
html_content += f'{escape_and_preserve_breaks(chunk["text"][current_pos - chunk["offset"]:start - chunk["offset"]])}'
# Overlapping part (if any)
if i < len(chunks) - 1 and end > chunks[i + 1]["offset"]:
overlap_end = chunks[i + 1]["offset"]
html_content += f'{escape_and_preserve_breaks(chunk["text"][start - chunk["offset"]:overlap_end - chunk["offset"]])}'
html_content += escape_and_preserve_breaks(
chunk["text"][overlap_end - chunk["offset"] :]
)
else:
html_content += escape_and_preserve_breaks(chunk["text"][start - chunk["offset"] :])
# Fingerprint badge
html_content += f'{chunk["feature"]}'
html_content += ""
current_pos = end
html_content += """
"""
return html_content
def main():
with open("../README.md", "rb") as f:
data = f.read()
text = data.decode("utf-8")
result = ict.create(text, granular=True)
print(result.model_dump())
# Generate the HTML content
html_content = generate_html(result.model_dump())
# Write the HTML content to a file
with open("readme.html", "wt", encoding="utf-8") as f:
f.write(html_content)
if __name__ == "__main__":
main()