Spaces:

iscc
/

iscc-sct

Running

App Files Files Community

titusz commited on Aug 18, 2024

Commit

63ba62d

verified ·

1 Parent(s): 0f2b045

Synced repo using 'sync_with_huggingface' Github Action

Browse files

Files changed (6) hide show

iscc_sct/code_semantic_text.py +6 -4
iscc_sct/demo.py +97 -34
iscc_sct/models.py +74 -0
poetry.lock +71 -71
tests/test_demo.py +9 -12
tests/test_models.py +92 -0

iscc_sct/code_semantic_text.py CHANGED Viewed

@@ -75,9 +75,10 @@ def code_text_semantic(fp, **options):
     :key characters (bool): Return document character count (default True).
     :key embedding (bool): Return global document embedding (default False).
     :key precision (int): Max fractional digits for embeddings (default 8).
-    :key features (bool): Return granular document features (default False).
     :key offsets (bool): Return character offsets for granular features (default False).
-    :key chunks (bool): Return text chunks (default False).
     :key max_tokens (int): Max tokens per chunk (default 127).
     :key overlap (int): Max tokens allowed to overlap between chunks (default 48).
     :key trim (int): Trim whitespace from chunks (default False).
@@ -98,9 +99,10 @@ def gen_text_code_semantic(text, **options):
     :key characters (bool): Return document character count (default True).
     :key embedding (bool): Return global document embedding (default False).
     :key precision (int): Max fractional digits for embeddings (default 8).
-    :key features (bool): Return granular document features (default False).
     :key offsets (bool): Return character offsets for granular features (default False).
-    :key chunks (bool): Return text chunks (default False).
     :key max_tokens (int): Max tokens per chunk (default 127).
     :key overlap (int): Max tokens allowed to overlap between chunks (default 48).
     :key trim (int): Trim whitespace from chunks (default False).

     :key characters (bool): Return document character count (default True).
     :key embedding (bool): Return global document embedding (default False).
     :key precision (int): Max fractional digits for embeddings (default 8).
+    :key simprints (bool): Return granular document features (default False).
     :key offsets (bool): Return character offsets for granular features (default False).
+    :key sizes (bool): Include sizes of granular features (number of chars, default False).
+    :key contents (bool): Return text chunks (default False).
     :key max_tokens (int): Max tokens per chunk (default 127).
     :key overlap (int): Max tokens allowed to overlap between chunks (default 48).
     :key trim (int): Trim whitespace from chunks (default False).
     :key characters (bool): Return document character count (default True).
     :key embedding (bool): Return global document embedding (default False).
     :key precision (int): Max fractional digits for embeddings (default 8).
+    :key simprints (bool): Return granular document features (default False).
     :key offsets (bool): Return character offsets for granular features (default False).
+    :key sizes (bool): Include sizes of granular features (number of chars, default False).
+    :key contents (bool): Return text chunks (default False).
     :key max_tokens (int): Max tokens per chunk (default 127).
     :key overlap (int): Max tokens allowed to overlap between chunks (default 48).
     :key trim (int): Trim whitespace from chunks (default False).

iscc_sct/demo.py CHANGED Viewed

@@ -8,6 +8,46 @@ import iscc_sct as sct
 import textwrap
 def compute_iscc_code(text1, text2, bit_length):
     code1 = sct.gen_text_code_semantic(text1, bits=bit_length)
     code2 = sct.gen_text_code_semantic(text2, bits=bit_length)
@@ -119,33 +159,6 @@ ISCC 适用于特定的数字资产，是使用本文件中的算法和规则从
 )
 custom_css = """
-#chunked-text span.label {
-    text-transform: none !important;
-}
-.clickable-example {
-    cursor: pointer;
-    transition: all 0.3s ease;
-}
-.clickable-example:hover {
-    background-color: #f0f0f0;
-    transform: scale(1.02);
-}
-.clickable-example .label-wrap {
-    font-weight: bold;
-    color: #4a90e2;
-}
-.truncate-text {
-    white-space: nowrap;
-    overflow: hidden;
-    text-overflow: ellipsis;
-    max-width: 300px;
-    display: inline-block;
-}
 """
 iscc_theme = gr.themes.Default(
@@ -183,6 +196,11 @@ with gr.Blocks(css=custom_css, theme=iscc_theme) as demo:
                 choices=["None", "English", "Bulgarian"], label="Select sample for Text A", value="None"
             )
             out_code_a = gr.Textbox(label="ISCC Code for Text A")
         with gr.Column(variant="panel"):
             in_text_b = gr.TextArea(
                 label="Text B",
@@ -194,6 +212,11 @@ with gr.Blocks(css=custom_css, theme=iscc_theme) as demo:
                 choices=["None", "German", "Chinese"], label="Select sample for Text B", value="None"
             )
             out_code_b = gr.Textbox(label="ISCC Code for Text B")
     def update_sample_text(choice, text_a_or_b):
         if choice == "None":
@@ -216,12 +239,47 @@ with gr.Blocks(css=custom_css, theme=iscc_theme) as demo:
     def process_text(text, nbits, suffix):
         log.debug(f"{text[:20]}")
-        if not text:
-            return
         out_code_func = globals().get(f"out_code_{suffix}")
-        iscc = sct.Metadata(**sct.gen_text_code_semantic(text, bits=nbits))
-        result = {out_code_func: gr.Textbox(value=iscc.iscc)}
-        return result
     def recalculate_iscc(text_a, text_b, nbits):
         code_a = sct.gen_text_code_semantic(text_a, bits=nbits)["iscc"] if text_a else None
@@ -241,14 +299,14 @@ with gr.Blocks(css=custom_css, theme=iscc_theme) as demo:
     in_text_a.change(
         lambda text, nbits: process_text(text, nbits, "a"),
         inputs=[in_text_a, in_iscc_bits],
-        outputs=[out_code_a],
         show_progress="full",
         trigger_mode="always_last",
     )
     in_text_b.change(
         lambda text, nbits: process_text(text, nbits, "b"),
         inputs=[in_text_b, in_iscc_bits],
-        outputs=[out_code_b],
         show_progress="full",
         trigger_mode="always_last",
     )
@@ -273,6 +331,8 @@ with gr.Blocks(css=custom_css, theme=iscc_theme) as demo:
             gr.Textbox(value=""),  # Reset ISCC Code for Text A
             gr.Textbox(value=""),  # Reset ISCC Code for Text B
             gr.HTML(value=""),  # Reset Similarity
         )
     with gr.Row(variant="panel"):
@@ -289,6 +349,8 @@ with gr.Blocks(css=custom_css, theme=iscc_theme) as demo:
             out_code_a,
             out_code_b,
             out_similarity,
         ],
     )
@@ -327,5 +389,6 @@ This technology opens up new possibilities for understanding and managing text c
 """
             )
 if __name__ == "__main__":  # pragma: no cover
     demo.launch()

 import textwrap
+newline_symbols = {
+    "\u000a": "⏎",  # Line Feed - Represented by the 'Return' symbol
+    "\u000b": "↨",  # Vertical Tab - Represented by the 'Up Down Arrow' symbol
+    "\u000c": "␌",  # Form Feed - Unicode Control Pictures representation
+    "\u000d": "↵",  # Carriage Return - 'Downwards Arrow with Corner Leftwards' symbol
+    "\u0085": "⤓",  # Next Line - 'Downwards Arrow with Double Stroke' symbol
+    "\u2028": "↲",  # Line Separator - 'Downwards Arrow with Tip Leftwards' symbol
+    "\u2029": "¶",  # Paragraph Separator - Represented by the 'Pilcrow' symbol
+}
+def no_nl(text):
+    """Replace non-printable newline characters with printable symbols"""
+    for char, symbol in newline_symbols.items():
+        text = text.replace(char, symbol)
+    return text
+def no_nl_inner(text):
+    """Replace non-printable newline characters with printable symbols, ignoring leading and
+    trailing newlines"""
+    # Strip leading and trailing whitespace
+    stripped_text = text.strip()
+    # Replace newline characters within the text
+    for char, symbol in newline_symbols.items():
+        stripped_text = stripped_text.replace(char, symbol)
+    # Add back the leading and trailing newlines
+    leading_newlines = len(text) - len(text.lstrip())
+    trailing_newlines = len(text) - len(text.rstrip())
+    return "\n" * leading_newlines + stripped_text + "\n" * trailing_newlines
+def clean_chunk(chunk):
+    """Strip consecutive line breaks in text to a maximum of 2."""
+    return chunk.replace("\n\n", "\n")
 def compute_iscc_code(text1, text2, bit_length):
     code1 = sct.gen_text_code_semantic(text1, bits=bit_length)
     code2 = sct.gen_text_code_semantic(text2, bits=bit_length)
 )
 custom_css = """
 """
 iscc_theme = gr.themes.Default(
                 choices=["None", "English", "Bulgarian"], label="Select sample for Text A", value="None"
             )
             out_code_a = gr.Textbox(label="ISCC Code for Text A")
+            out_chunks_a = gr.HighlightedText(
+                label="Chunked Text A",
+                interactive=False,
+                elem_id="chunked-text-a",
+            )
         with gr.Column(variant="panel"):
             in_text_b = gr.TextArea(
                 label="Text B",
                 choices=["None", "German", "Chinese"], label="Select sample for Text B", value="None"
             )
             out_code_b = gr.Textbox(label="ISCC Code for Text B")
+            out_chunks_b = gr.HighlightedText(
+                label="Chunked Text B",
+                interactive=False,
+                elem_id="chunked-text-b",
+            )
     def update_sample_text(choice, text_a_or_b):
         if choice == "None":
     def process_text(text, nbits, suffix):
         log.debug(f"{text[:20]}")
         out_code_func = globals().get(f"out_code_{suffix}")
+        out_chunks_func = globals().get(f"out_chunks_{suffix}")
+        if not text:
+            return {
+                out_code_func: gr.Textbox(value=None),
+                out_chunks_func: gr.HighlightedText(value=None, elem_id="chunked-text"),
+            }
+        result = sct.gen_text_code_semantic(text, bits=nbits, simprints=True, offsets=True, sizes=True, contents=True)
+        iscc = sct.Metadata(**result).to_object_format()
+        # Generate chunked text with simprints and overlaps
+        features = iscc.features[0]
+        highlighted_chunks = []
+        overlaps = iscc.get_overlaps()
+        for i, feature in enumerate(features.simprints):
+            feature: sct.Feature
+            content = feature.content
+            # Remove leading overlap
+            if i > 0 and overlaps[i - 1]:
+                content = content[len(overlaps[i - 1]) :]
+            # Remove trailing overlap
+            if i < len(overlaps) and overlaps[i]:
+                content = content[: -len(overlaps[i])]
+            label = f"{feature.size}:{feature.simprint}"
+            highlighted_chunks.append((no_nl_inner(content), label))
+            if i < len(overlaps):
+                overlap = overlaps[i]
+                if overlap:
+                    highlighted_chunks.append((f"\n{no_nl(overlap)}\n", "overlap"))
+        return {
+            out_code_func: gr.Textbox(value=iscc.iscc),
+            out_chunks_func: gr.HighlightedText(value=highlighted_chunks, elem_id="chunked-text"),
+        }
     def recalculate_iscc(text_a, text_b, nbits):
         code_a = sct.gen_text_code_semantic(text_a, bits=nbits)["iscc"] if text_a else None
     in_text_a.change(
         lambda text, nbits: process_text(text, nbits, "a"),
         inputs=[in_text_a, in_iscc_bits],
+        outputs=[out_code_a, out_chunks_a],
         show_progress="full",
         trigger_mode="always_last",
     )
     in_text_b.change(
         lambda text, nbits: process_text(text, nbits, "b"),
         inputs=[in_text_b, in_iscc_bits],
+        outputs=[out_code_b, out_chunks_b],
         show_progress="full",
         trigger_mode="always_last",
     )
             gr.Textbox(value=""),  # Reset ISCC Code for Text A
             gr.Textbox(value=""),  # Reset ISCC Code for Text B
             gr.HTML(value=""),  # Reset Similarity
+            gr.HighlightedText(value=[]),  # Reset Chunked Text A
+            gr.HighlightedText(value=[]),  # Reset Chunked Text B
         )
     with gr.Row(variant="panel"):
             out_code_a,
             out_code_b,
             out_similarity,
+            out_chunks_a,
+            out_chunks_b,
         ],
     )
 """
             )
 if __name__ == "__main__":  # pragma: no cover
     demo.launch()

iscc_sct/models.py CHANGED Viewed

@@ -139,6 +139,80 @@ class Metadata(PrettyBaseModel):
         return Metadata(iscc=self.iscc, characters=self.characters, features=new_features)
     def to_object_format(self) -> "Metadata":
         """
         Convert the Metadata object to use the Object-Format for features.

         return Metadata(iscc=self.iscc, characters=self.characters, features=new_features)
+    def get_content(self) -> Optional[str]:
+        """
+        Reconstruct and return the original input text if all necessary data is available.
+        This method removes overlaps in adjacent text chunks.
+        :return: The reconstructed original text, or None if the necessary data is not available.
+        """
+        if not self.features or not self.features[0].simprints:
+            return None
+        feature_set = self.features[0]
+        if isinstance(feature_set.simprints[0], str):
+            # Convert to object format if in index format
+            feature_set = self.to_object_format().features[0]
+        if not all(feature.content and feature.offset is not None for feature in feature_set.simprints):
+            return None
+        # Sort features by offset
+        sorted_features = sorted(feature_set.simprints, key=lambda x: x.offset)
+        reconstructed_text = ""
+        last_end = 0
+        for feature in sorted_features:
+            start = feature.offset
+            if start < last_end:
+                # Remove overlap
+                feature_content = feature.content[last_end - start :]
+            else:
+                feature_content = feature.content
+            reconstructed_text += feature_content
+            last_end = start + len(feature.content)
+        return reconstructed_text
+    def get_overlaps(self) -> List[str]:
+        """
+        Returns a list of overlapping text between consecutive chunks.
+        For non-overlapping consecutive chunks, returns an empty string.
+        :return: List of overlapping text or empty strings.
+        """
+        if not self.features or not self.features[0].simprints:
+            return []
+        feature_set = self.features[0]
+        if isinstance(feature_set.simprints[0], str):
+            # Convert to object format if in index format
+            feature_set = self.to_object_format().features[0]
+        if not all(feature.content and feature.offset is not None for feature in feature_set.simprints):
+            return []
+        # Sort features by offset
+        sorted_features = sorted(feature_set.simprints, key=lambda x: x.offset)
+        overlaps = []
+        for i in range(len(sorted_features) - 1):
+            current_feature = sorted_features[i]
+            next_feature = sorted_features[i + 1]
+            current_end = current_feature.offset + len(current_feature.content)
+            next_start = next_feature.offset
+            if current_end > next_start:
+                overlap = current_feature.content[next_start - current_feature.offset :]
+                overlaps.append(overlap)
+            else:
+                overlaps.append("")
+        return overlaps
     def to_object_format(self) -> "Metadata":
         """
         Convert the Metadata object to use the Object-Format for features.

poetry.lock CHANGED Viewed

@@ -416,18 +416,18 @@ test = ["pytest (>=6)"]
 [[package]]
 name = "fastapi"
-version = "0.112.0"
 description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production"
 optional = true
 python-versions = ">=3.8"
 files = [
-    {file = "fastapi-0.112.0-py3-none-any.whl", hash = "sha256:3487ded9778006a45834b8c816ec4a48d522e2631ca9e75ec5a774f1b052f821"},
-    {file = "fastapi-0.112.0.tar.gz", hash = "sha256:d262bc56b7d101d1f4e8fc0ad2ac75bb9935fec504d2b7117686cec50710cf05"},
 ]
 [package.dependencies]
 pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0"
-starlette = ">=0.37.2,<0.38.0"
 typing-extensions = ">=4.8.0"
 [package.extras]
@@ -774,13 +774,13 @@ test = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "p
 [[package]]
 name = "importlib-resources"
-version = "6.4.2"
 description = "Read resources from Python packages"
 optional = true
 python-versions = ">=3.8"
 files = [
-    {file = "importlib_resources-6.4.2-py3-none-any.whl", hash = "sha256:8bba8c54a8a3afaa1419910845fa26ebd706dc716dd208d9b158b4b6966f5c5c"},
-    {file = "importlib_resources-6.4.2.tar.gz", hash = "sha256:6cbfbefc449cc6e2095dd184691b7a12a04f40bc75dd4c55d31c34f174cdf57a"},
 ]
 [package.dependencies]
@@ -1310,69 +1310,69 @@ files = [
 [[package]]
 name = "onnxruntime"
-version = "1.18.1"
 description = "ONNX Runtime is a runtime accelerator for Machine Learning models"
 optional = false
 python-versions = "*"
 files = [
-    {file = "onnxruntime-1.18.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:29ef7683312393d4ba04252f1b287d964bd67d5e6048b94d2da3643986c74d80"},
-    {file = "onnxruntime-1.18.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fc706eb1df06ddf55776e15a30519fb15dda7697f987a2bbda4962845e3cec05"},
-    {file = "onnxruntime-1.18.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b7de69f5ced2a263531923fa68bbec52a56e793b802fcd81a03487b5e292bc3a"},
-    {file = "onnxruntime-1.18.1-cp310-cp310-win32.whl", hash = "sha256:221e5b16173926e6c7de2cd437764492aa12b6811f45abd37024e7cf2ae5d7e3"},
-    {file = "onnxruntime-1.18.1-cp310-cp310-win_amd64.whl", hash = "sha256:75211b619275199c861ee94d317243b8a0fcde6032e5a80e1aa9ded8ab4c6060"},
-    {file = "onnxruntime-1.18.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:f26582882f2dc581b809cfa41a125ba71ad9e715738ec6402418df356969774a"},
-    {file = "onnxruntime-1.18.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ef36f3a8b768506d02be349ac303fd95d92813ba3ba70304d40c3cd5c25d6a4c"},
-    {file = "onnxruntime-1.18.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:170e711393e0618efa8ed27b59b9de0ee2383bd2a1f93622a97006a5ad48e434"},
-    {file = "onnxruntime-1.18.1-cp311-cp311-win32.whl", hash = "sha256:9b6a33419b6949ea34e0dc009bc4470e550155b6da644571ecace4b198b0d88f"},
-    {file = "onnxruntime-1.18.1-cp311-cp311-win_amd64.whl", hash = "sha256:5c1380a9f1b7788da742c759b6a02ba771fe1ce620519b2b07309decbd1a2fe1"},
-    {file = "onnxruntime-1.18.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:31bd57a55e3f983b598675dfc7e5d6f0877b70ec9864b3cc3c3e1923d0a01919"},
-    {file = "onnxruntime-1.18.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b9e03c4ba9f734500691a4d7d5b381cd71ee2f3ce80a1154ac8f7aed99d1ecaa"},
-    {file = "onnxruntime-1.18.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:781aa9873640f5df24524f96f6070b8c550c66cb6af35710fd9f92a20b4bfbf6"},
-    {file = "onnxruntime-1.18.1-cp312-cp312-win32.whl", hash = "sha256:3a2d9ab6254ca62adbb448222e630dc6883210f718065063518c8f93a32432be"},
-    {file = "onnxruntime-1.18.1-cp312-cp312-win_amd64.whl", hash = "sha256:ad93c560b1c38c27c0275ffd15cd7f45b3ad3fc96653c09ce2931179982ff204"},
-    {file = "onnxruntime-1.18.1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:3b55dc9d3c67626388958a3eb7ad87eb7c70f75cb0f7ff4908d27b8b42f2475c"},
-    {file = "onnxruntime-1.18.1-cp38-cp38-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f80dbcfb6763cc0177a31168b29b4bd7662545b99a19e211de8c734b657e0669"},
-    {file = "onnxruntime-1.18.1-cp38-cp38-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f1ff2c61a16d6c8631796c54139bafea41ee7736077a0fc64ee8ae59432f5c58"},
-    {file = "onnxruntime-1.18.1-cp38-cp38-win32.whl", hash = "sha256:219855bd272fe0c667b850bf1a1a5a02499269a70d59c48e6f27f9c8bcb25d02"},
-    {file = "onnxruntime-1.18.1-cp38-cp38-win_amd64.whl", hash = "sha256:afdf16aa607eb9a2c60d5ca2d5abf9f448e90c345b6b94c3ed14f4fb7e6a2d07"},
-    {file = "onnxruntime-1.18.1-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:128df253ade673e60cea0955ec9d0e89617443a6d9ce47c2d79eb3f72a3be3de"},
-    {file = "onnxruntime-1.18.1-cp39-cp39-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9839491e77e5c5a175cab3621e184d5a88925ee297ff4c311b68897197f4cde9"},
-    {file = "onnxruntime-1.18.1-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ad3187c1faff3ac15f7f0e7373ef4788c582cafa655a80fdbb33eaec88976c66"},
-    {file = "onnxruntime-1.18.1-cp39-cp39-win32.whl", hash = "sha256:34657c78aa4e0b5145f9188b550ded3af626651b15017bf43d280d7e23dbf195"},
-    {file = "onnxruntime-1.18.1-cp39-cp39-win_amd64.whl", hash = "sha256:9c14fd97c3ddfa97da5feef595e2c73f14c2d0ec1d4ecbea99c8d96603c89589"},
 ]
 [package.dependencies]
 coloredlogs = "*"
 flatbuffers = "*"
-numpy = ">=1.21.6,<2.0"
 packaging = "*"
 protobuf = "*"
 sympy = "*"
 [[package]]
 name = "onnxruntime-gpu"
-version = "1.18.1"
 description = "ONNX Runtime is a runtime accelerator for Machine Learning models"
 optional = true
 python-versions = "*"
 files = [
-    {file = "onnxruntime_gpu-1.18.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e9de7b8c7c975f7830d1e9323daca2090408df821f2adc10ea267b4f469b59e0"},
-    {file = "onnxruntime_gpu-1.18.1-cp310-cp310-win_amd64.whl", hash = "sha256:7622a5979bb64a6631f888439b6ac8b11fbdd47da980425c697b82d4dd04b427"},
-    {file = "onnxruntime_gpu-1.18.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:af2d3ee6fba72b57abf6f379b8aca30ee773959d4346271e7d92557dd5cf2901"},
-    {file = "onnxruntime_gpu-1.18.1-cp311-cp311-win_amd64.whl", hash = "sha256:e40ba43771043fe286fea38b2c6549d1cc4b2b5b424624163c923163a8dc27ac"},
-    {file = "onnxruntime_gpu-1.18.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:87d1884523f485f40d3a126677a9f93fc15569dd817fc753ee7fc519142a4425"},
-    {file = "onnxruntime_gpu-1.18.1-cp312-cp312-win_amd64.whl", hash = "sha256:3f2ab38a62350965f5007111728410b3ef25213104dd1e7d61ecc158002ea3f5"},
-    {file = "onnxruntime_gpu-1.18.1-cp38-cp38-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:96e665393934cd6a2d7f5c4a8449815fc88e6afaeafc0b1da795545fabc7624f"},
-    {file = "onnxruntime_gpu-1.18.1-cp38-cp38-win_amd64.whl", hash = "sha256:5ba076faa5c6fdccf99a607ea0910f066ad4f53c472269ac9f5768c5193aa6c7"},
-    {file = "onnxruntime_gpu-1.18.1-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d39d3ff85eaa70d85401bd81f675b44959a135fdfb2a21383f424d1f8f290c32"},
-    {file = "onnxruntime_gpu-1.18.1-cp39-cp39-win_amd64.whl", hash = "sha256:126035cd623445f9922ca0f277cf18ffc83d7e073c0c5c8057eee37f22e24440"},
 ]
 [package.dependencies]
 coloredlogs = "*"
 flatbuffers = "*"
-numpy = ">=1.21.6,<2.0"
 packaging = "*"
 protobuf = "*"
 sympy = "*"
@@ -2299,29 +2299,29 @@ files = [
 [[package]]
 name = "ruff"
-version = "0.5.7"
 description = "An extremely fast Python linter and code formatter, written in Rust."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "ruff-0.5.7-py3-none-linux_armv6l.whl", hash = "sha256:548992d342fc404ee2e15a242cdbea4f8e39a52f2e7752d0e4cbe88d2d2f416a"},
-    {file = "ruff-0.5.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:00cc8872331055ee017c4f1071a8a31ca0809ccc0657da1d154a1d2abac5c0be"},
-    {file = "ruff-0.5.7-py3-none-macosx_11_0_arm64.whl", hash = "sha256:eaf3d86a1fdac1aec8a3417a63587d93f906c678bb9ed0b796da7b59c1114a1e"},
-    {file = "ruff-0.5.7-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a01c34400097b06cf8a6e61b35d6d456d5bd1ae6961542de18ec81eaf33b4cb8"},
-    {file = "ruff-0.5.7-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fcc8054f1a717e2213500edaddcf1dbb0abad40d98e1bd9d0ad364f75c763eea"},
-    {file = "ruff-0.5.7-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7f70284e73f36558ef51602254451e50dd6cc479f8b6f8413a95fcb5db4a55fc"},
-    {file = "ruff-0.5.7-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:a78ad870ae3c460394fc95437d43deb5c04b5c29297815a2a1de028903f19692"},
-    {file = "ruff-0.5.7-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9ccd078c66a8e419475174bfe60a69adb36ce04f8d4e91b006f1329d5cd44bcf"},
-    {file = "ruff-0.5.7-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7e31c9bad4ebf8fdb77b59cae75814440731060a09a0e0077d559a556453acbb"},
-    {file = "ruff-0.5.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d796327eed8e168164346b769dd9a27a70e0298d667b4ecee6877ce8095ec8e"},
-    {file = "ruff-0.5.7-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:4a09ea2c3f7778cc635e7f6edf57d566a8ee8f485f3c4454db7771efb692c499"},
-    {file = "ruff-0.5.7-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:a36d8dcf55b3a3bc353270d544fb170d75d2dff41eba5df57b4e0b67a95bb64e"},
-    {file = "ruff-0.5.7-py3-none-musllinux_1_2_i686.whl", hash = "sha256:9369c218f789eefbd1b8d82a8cf25017b523ac47d96b2f531eba73770971c9e5"},
-    {file = "ruff-0.5.7-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b88ca3db7eb377eb24fb7c82840546fb7acef75af4a74bd36e9ceb37a890257e"},
-    {file = "ruff-0.5.7-py3-none-win32.whl", hash = "sha256:33d61fc0e902198a3e55719f4be6b375b28f860b09c281e4bdbf783c0566576a"},
-    {file = "ruff-0.5.7-py3-none-win_amd64.whl", hash = "sha256:083bbcbe6fadb93cd86709037acc510f86eed5a314203079df174c40bbbca6b3"},
-    {file = "ruff-0.5.7-py3-none-win_arm64.whl", hash = "sha256:2dca26154ff9571995107221d0aeaad0e75a77b5a682d6236cf89a58c70b76f4"},
-    {file = "ruff-0.5.7.tar.gz", hash = "sha256:8dfc0a458797f5d9fb622dd0efc52d796f23f0a1493a9527f4e49a550ae9a7e5"},
 ]
 [[package]]
@@ -2398,13 +2398,13 @@ files = [
 [[package]]
 name = "starlette"
-version = "0.37.2"
 description = "The little ASGI library that shines."
 optional = true
 python-versions = ">=3.8"
 files = [
-    {file = "starlette-0.37.2-py3-none-any.whl", hash = "sha256:6fe59f29268538e5d0d182f2791a479a0c64638e6935d1c6989e63fb2699c6ee"},
-    {file = "starlette-0.37.2.tar.gz", hash = "sha256:9af890290133b79fc3db55474ade20f6220a364a0402e0b556e7cd5e1e093823"},
 ]
 [package.dependencies]
@@ -2592,13 +2592,13 @@ telegram = ["requests"]
 [[package]]
 name = "typer"
-version = "0.12.3"
 description = "Typer, build great CLIs. Easy to code. Based on Python type hints."
 optional = true
 python-versions = ">=3.7"
 files = [
-    {file = "typer-0.12.3-py3-none-any.whl", hash = "sha256:070d7ca53f785acbccba8e7d28b08dcd88f79f1fbda035ade0aecec71ca5c914"},
-    {file = "typer-0.12.3.tar.gz", hash = "sha256:49e73131481d804288ef62598d97a1ceef3058905aa536a1134f90891ba35482"},
 ]
 [package.dependencies]

 [[package]]
 name = "fastapi"
+version = "0.112.1"
 description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production"
 optional = true
 python-versions = ">=3.8"
 files = [
+    {file = "fastapi-0.112.1-py3-none-any.whl", hash = "sha256:bcbd45817fc2a1cd5da09af66815b84ec0d3d634eb173d1ab468ae3103e183e4"},
+    {file = "fastapi-0.112.1.tar.gz", hash = "sha256:b2537146f8c23389a7faa8b03d0bd38d4986e6983874557d95eed2acc46448ef"},
 ]
 [package.dependencies]
 pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0"
+starlette = ">=0.37.2,<0.39.0"
 typing-extensions = ">=4.8.0"
 [package.extras]
 [[package]]
 name = "importlib-resources"
+version = "6.4.3"
 description = "Read resources from Python packages"
 optional = true
 python-versions = ">=3.8"
 files = [
+    {file = "importlib_resources-6.4.3-py3-none-any.whl", hash = "sha256:2d6dfe3b9e055f72495c2085890837fc8c758984e209115c8792bddcb762cd93"},
+    {file = "importlib_resources-6.4.3.tar.gz", hash = "sha256:4a202b9b9d38563b46da59221d77bb73862ab5d79d461307bcb826d725448b98"},
 ]
 [package.dependencies]
 [[package]]
 name = "onnxruntime"
+version = "1.19.0"
 description = "ONNX Runtime is a runtime accelerator for Machine Learning models"
 optional = false
 python-versions = "*"
 files = [
+    {file = "onnxruntime-1.19.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:6ce22a98dfec7b646ae305f52d0ce14a189a758b02ea501860ca719f4b0ae04b"},
+    {file = "onnxruntime-1.19.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:19019c72873f26927aa322c54cf2bf7312b23451b27451f39b88f57016c94f8b"},
+    {file = "onnxruntime-1.19.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8eaa16df99171dc636e30108d15597aed8c4c2dd9dbfdd07cc464d57d73fb275"},
+    {file = "onnxruntime-1.19.0-cp310-cp310-win32.whl", hash = "sha256:0eb0f8dbe596fd0f4737fe511fdbb17603853a7d204c5b2ca38d3c7808fc556b"},
+    {file = "onnxruntime-1.19.0-cp310-cp310-win_amd64.whl", hash = "sha256:616092d54ba8023b7bc0a5f6d900a07a37cc1cfcc631873c15f8c1d6e9e184d4"},
+    {file = "onnxruntime-1.19.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:a2b53b3c287cd933e5eb597273926e899082d8c84ab96e1b34035764a1627e17"},
+    {file = "onnxruntime-1.19.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e94984663963e74fbb468bde9ec6f19dcf890b594b35e249c4dc8789d08993c5"},
+    {file = "onnxruntime-1.19.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6f379d1f050cfb55ce015d53727b78ee362febc065c38eed81512b22b757da73"},
+    {file = "onnxruntime-1.19.0-cp311-cp311-win32.whl", hash = "sha256:4ccb48faea02503275ae7e79e351434fc43c294c4cb5c4d8bcb7479061396614"},
+    {file = "onnxruntime-1.19.0-cp311-cp311-win_amd64.whl", hash = "sha256:9cdc8d311289a84e77722de68bd22b8adfb94eea26f4be6f9e017350faac8b18"},
+    {file = "onnxruntime-1.19.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:1b59eaec1be9a8613c5fdeaafe67f73a062edce3ac03bbbdc9e2d98b58a30617"},
+    {file = "onnxruntime-1.19.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:be4144d014a4b25184e63ce7a463a2e7796e2f3df931fccc6a6aefa6f1365dc5"},
+    {file = "onnxruntime-1.19.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:10d7e7d4ca7021ce7f29a66dbc6071addf2de5839135339bd855c6d9c2bba371"},
+    {file = "onnxruntime-1.19.0-cp312-cp312-win32.whl", hash = "sha256:87f2c58b577a1fb31dc5d92b647ecc588fd5f1ea0c3ad4526f5f80a113357c8d"},
+    {file = "onnxruntime-1.19.0-cp312-cp312-win_amd64.whl", hash = "sha256:8a1f50d49676d7b69566536ff039d9e4e95fc482a55673719f46528218ecbb94"},
+    {file = "onnxruntime-1.19.0-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:71423c8c4b2d7a58956271534302ec72721c62a41efd0c4896343249b8399ab0"},
+    {file = "onnxruntime-1.19.0-cp38-cp38-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9d63630d45e9498f96e75bbeb7fd4a56acb10155de0de4d0e18d1b6cbb0b358a"},
+    {file = "onnxruntime-1.19.0-cp38-cp38-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f3bfd15db1e8794d379a86c1a9116889f47f2cca40cc82208fc4f7e8c38e8522"},
+    {file = "onnxruntime-1.19.0-cp38-cp38-win32.whl", hash = "sha256:3b098003b6b4cb37cc84942e5f1fe27f945dd857cbd2829c824c26b0ba4a247e"},
+    {file = "onnxruntime-1.19.0-cp38-cp38-win_amd64.whl", hash = "sha256:cea067a6541d6787d903ee6843401c5b1332a266585160d9700f9f0939443886"},
+    {file = "onnxruntime-1.19.0-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:c4fcff12dc5ca963c5f76b9822bb404578fa4a98c281e8c666b429192799a099"},
+    {file = "onnxruntime-1.19.0-cp39-cp39-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f6dcad8a4db908fbe70b98c79cea1c8b6ac3316adf4ce93453136e33a524ac59"},
+    {file = "onnxruntime-1.19.0-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4bc449907c6e8d99eee5ae5cc9c8fdef273d801dcd195393d3f9ab8ad3f49522"},
+    {file = "onnxruntime-1.19.0-cp39-cp39-win32.whl", hash = "sha256:947febd48405afcf526e45ccff97ff23b15e530434705f734870d22ae7fcf236"},
+    {file = "onnxruntime-1.19.0-cp39-cp39-win_amd64.whl", hash = "sha256:f60be47eff5ee77fd28a466b0fd41d7debc42a32179d1ddb21e05d6067d7b48b"},
 ]
 [package.dependencies]
 coloredlogs = "*"
 flatbuffers = "*"
+numpy = ">=1.21.6"
 packaging = "*"
 protobuf = "*"
 sympy = "*"
 [[package]]
 name = "onnxruntime-gpu"
+version = "1.19.0"
 description = "ONNX Runtime is a runtime accelerator for Machine Learning models"
 optional = true
 python-versions = "*"
 files = [
+    {file = "onnxruntime_gpu-1.19.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee4cc525a581fd57ffbe266b23484cadbdcd43daf1cc91a632c043c9edd07f55"},
+    {file = "onnxruntime_gpu-1.19.0-cp310-cp310-win_amd64.whl", hash = "sha256:895221ce7cb0a637d3841ba53701a3aa4d284a7d6e391c873de87ce09defa9e9"},
+    {file = "onnxruntime_gpu-1.19.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c8bae9164d9586cbf7c7976915b515584d077470307a255b7ca03cf425ce38bf"},
+    {file = "onnxruntime_gpu-1.19.0-cp311-cp311-win_amd64.whl", hash = "sha256:62418bf4bde804afd1cea1e391f1ee4ba5f50e09cd0397c852e150c013e27ae4"},
+    {file = "onnxruntime_gpu-1.19.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cbceb55c265266350fa731365ccd224dc29c352470671a24cbc153fe0c4be1f9"},
+    {file = "onnxruntime_gpu-1.19.0-cp312-cp312-win_amd64.whl", hash = "sha256:35405747644bf8d9d9c84fb6ee3dd04cc594e9c3f5448735c68565fb3372b1cd"},
+    {file = "onnxruntime_gpu-1.19.0-cp38-cp38-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ae04a2fdb81654e7616254525d9cd23b05e1ce64a9ea798080d2a20200dddb4a"},
+    {file = "onnxruntime_gpu-1.19.0-cp38-cp38-win_amd64.whl", hash = "sha256:5aee8fa0e578a696f05335dcabd4cd2027f1e40acbe1cf1930df960efeb3e36f"},
+    {file = "onnxruntime_gpu-1.19.0-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f64a02672770d11761b5e2ea34749526f564b88ec364032f613f1519dce2bcc"},
+    {file = "onnxruntime_gpu-1.19.0-cp39-cp39-win_amd64.whl", hash = "sha256:c30cc85fd09ee9ddd0915855901f9dc80add98572a969ea3c1e8ae9a7e96e94c"},
 ]
 [package.dependencies]
 coloredlogs = "*"
 flatbuffers = "*"
+numpy = ">=1.21.6"
 packaging = "*"
 protobuf = "*"
 sympy = "*"
 [[package]]
 name = "ruff"
+version = "0.6.1"
 description = "An extremely fast Python linter and code formatter, written in Rust."
 optional = false
 python-versions = ">=3.7"
 files = [
+    {file = "ruff-0.6.1-py3-none-linux_armv6l.whl", hash = "sha256:b4bb7de6a24169dc023f992718a9417380301b0c2da0fe85919f47264fb8add9"},
+    {file = "ruff-0.6.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:45efaae53b360c81043e311cdec8a7696420b3d3e8935202c2846e7a97d4edae"},
+    {file = "ruff-0.6.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:bc60c7d71b732c8fa73cf995efc0c836a2fd8b9810e115be8babb24ae87e0850"},
+    {file = "ruff-0.6.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c7477c3b9da822e2db0b4e0b59e61b8a23e87886e727b327e7dcaf06213c5cf"},
+    {file = "ruff-0.6.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3a0af7ab3f86e3dc9f157a928e08e26c4b40707d0612b01cd577cc84b8905cc9"},
+    {file = "ruff-0.6.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:392688dbb50fecf1bf7126731c90c11a9df1c3a4cdc3f481b53e851da5634fa5"},
+    {file = "ruff-0.6.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:5278d3e095ccc8c30430bcc9bc550f778790acc211865520f3041910a28d0024"},
+    {file = "ruff-0.6.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fe6d5f65d6f276ee7a0fc50a0cecaccb362d30ef98a110f99cac1c7872df2f18"},
+    {file = "ruff-0.6.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2e0dd11e2ae553ee5c92a81731d88a9883af8db7408db47fc81887c1f8b672e"},
+    {file = "ruff-0.6.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d812615525a34ecfc07fd93f906ef5b93656be01dfae9a819e31caa6cfe758a1"},
+    {file = "ruff-0.6.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:faaa4060f4064c3b7aaaa27328080c932fa142786f8142aff095b42b6a2eb631"},
+    {file = "ruff-0.6.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:99d7ae0df47c62729d58765c593ea54c2546d5de213f2af2a19442d50a10cec9"},
+    {file = "ruff-0.6.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:9eb18dfd7b613eec000e3738b3f0e4398bf0153cb80bfa3e351b3c1c2f6d7b15"},
+    {file = "ruff-0.6.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:c62bc04c6723a81e25e71715aa59489f15034d69bf641df88cb38bdc32fd1dbb"},
+    {file = "ruff-0.6.1-py3-none-win32.whl", hash = "sha256:9fb4c4e8b83f19c9477a8745e56d2eeef07a7ff50b68a6998f7d9e2e3887bdc4"},
+    {file = "ruff-0.6.1-py3-none-win_amd64.whl", hash = "sha256:c2ebfc8f51ef4aca05dad4552bbcf6fe8d1f75b2f6af546cc47cc1c1ca916b5b"},
+    {file = "ruff-0.6.1-py3-none-win_arm64.whl", hash = "sha256:3bc81074971b0ffad1bd0c52284b22411f02a11a012082a76ac6da153536e014"},
+    {file = "ruff-0.6.1.tar.gz", hash = "sha256:af3ffd8c6563acb8848d33cd19a69b9bfe943667f0419ca083f8ebe4224a3436"},
 ]
 [[package]]
 [[package]]
 name = "starlette"
+version = "0.38.2"
 description = "The little ASGI library that shines."
 optional = true
 python-versions = ">=3.8"
 files = [
+    {file = "starlette-0.38.2-py3-none-any.whl", hash = "sha256:4ec6a59df6bbafdab5f567754481657f7ed90dc9d69b0c9ff017907dd54faeff"},
+    {file = "starlette-0.38.2.tar.gz", hash = "sha256:c7c0441065252160993a1a37cf2a73bb64d271b17303e0b0c1eb7191cfb12d75"},
 ]
 [package.dependencies]
 [[package]]
 name = "typer"
+version = "0.12.4"
 description = "Typer, build great CLIs. Easy to code. Based on Python type hints."
 optional = true
 python-versions = ">=3.7"
 files = [
+    {file = "typer-0.12.4-py3-none-any.whl", hash = "sha256:819aa03699f438397e876aa12b0d63766864ecba1b579092cc9fe35d886e34b6"},
+    {file = "typer-0.12.4.tar.gz", hash = "sha256:c9c1613ed6a166162705b3347b8d10b661ccc5d95692654d0fb628118f2c34e6"},
 ]
 [package.dependencies]

tests/test_demo.py CHANGED Viewed

@@ -52,30 +52,27 @@ import gradio as gr
 from iscc_sct.demo import process_text
-@patch("iscc_sct.demo.sct.gen_text_code_semantic")
-def test_process_text(mock_gen_text_code):
-    mock_gen_text_code.return_value = {"iscc": "ISCC:EAAQCVG2TABD6"}
     # Test with valid input
     result = process_text("Hello, world!", 64, "a")
     assert isinstance(result, dict)
-    assert len(result) == 1
     key, value = next(iter(result.items()))
     assert isinstance(key, gr.components.Textbox)
     assert isinstance(value, gr.components.Textbox)
-    assert value.value == "ISCC:EAAQCVG2TABD6"
     # Test with empty input
     result = process_text("", 64, "b")
-    assert result is None
-    # Test with different bit length
-    process_text("Test", 128, "a")
-    mock_gen_text_code.assert_called_with("Test", bits=128)
     # Test with different suffix
     result = process_text("Test", 64, "b")
-    assert len(result) == 1
     key, value = next(iter(result.items()))
     assert isinstance(key, gr.components.Textbox)
     assert isinstance(value, gr.components.Textbox)

 from iscc_sct.demo import process_text
+def test_process_text():
     # Test with valid input
     result = process_text("Hello, world!", 64, "a")
     assert isinstance(result, dict)
+    assert len(result) == 2
     key, value = next(iter(result.items()))
     assert isinstance(key, gr.components.Textbox)
     assert isinstance(value, gr.components.Textbox)
+    assert value.value == "ISCC:CAA7GY4JTDI3XZYV"
     # Test with empty input
     result = process_text("", 64, "b")
+    assert isinstance(result, dict)
+    assert len(result) == 2
+    for key, value in result.items():
+        assert isinstance(key, (gr.components.Textbox, gr.components.HighlightedText))
+        assert value.value is None
     # Test with different suffix
     result = process_text("Test", 64, "b")
+    assert len(result) == 2
     key, value = next(iter(result.items()))
     assert isinstance(key, gr.components.Textbox)
     assert isinstance(value, gr.components.Textbox)

tests/test_models.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import pytest
 from pydantic import ValidationError
 from iscc_sct.models import Metadata, Feature, FeatureSet
 def test_feature_initialization():
@@ -100,3 +101,94 @@ def test_metadata_format_conversion_with_no_features():
     object_meta = meta.to_object_format()
     assert index_meta.model_dump() == meta.model_dump()
     assert object_meta.model_dump() == meta.model_dump()

 import pytest
 from pydantic import ValidationError
 from iscc_sct.models import Metadata, Feature, FeatureSet
+import iscc_sct as sct
 def test_feature_initialization():
     object_meta = meta.to_object_format()
     assert index_meta.model_dump() == meta.model_dump()
     assert object_meta.model_dump() == meta.model_dump()
+def test_metadata_get_content(text_en):
+    iscc = sct.create(text_en, granular=True)
+    assert iscc.get_content() == text_en
+def test_metadata_get_content_no_fetures():
+    meta = Metadata(iscc="ISCC1234567890")
+    assert meta.get_content() is None
+def test_metadata_get_content_index_format():
+    meta = sct.create("Hello World", granular=True).to_index_format()
+    assert meta.get_content() == "Hello World"
+def test_metadata_get_content_no_content():
+    meta = sct.create("Hello World", granular=True, contents=False)
+    assert meta.get_content() is None
+def test_metadata_get_overlaps():
+    # Test with no features
+    meta = Metadata(iscc="ISCC1234567890")
+    assert meta.get_overlaps() == []
+    # Test with features but no simprints
+    meta = Metadata(iscc="ISCC1234567890", features=[FeatureSet()])
+    assert meta.get_overlaps() == []
+    # Test with non-overlapping chunks
+    features = [
+        FeatureSet(
+            simprints=[
+                Feature(simprint="feature1", offset=0, content="Hello"),
+                Feature(simprint="feature2", offset=5, content="World"),
+            ]
+        )
+    ]
+    meta = Metadata(iscc="ISCC1234567890", features=features)
+    assert meta.get_overlaps() == [""]
+    # Test with overlapping chunks
+    features = [
+        FeatureSet(
+            simprints=[
+                Feature(simprint="feature1", offset=0, content="Hello W"),
+                Feature(simprint="feature2", offset=5, content="World"),
+            ]
+        )
+    ]
+    meta = Metadata(iscc="ISCC1234567890", features=features)
+    assert meta.get_overlaps() == [" W"]
+    # Test with multiple overlaps
+    features = [
+        FeatureSet(
+            simprints=[
+                Feature(simprint="feature1", offset=0, content="Hello W"),
+                Feature(simprint="feature2", offset=5, content="World!"),
+                Feature(simprint="feature3", offset=10, content="! How are you?"),
+            ]
+        )
+    ]
+    meta = Metadata(iscc="ISCC1234567890", features=features)
+    assert meta.get_overlaps() == [" W", "!"]
+    # Test with index format
+    features = [
+        FeatureSet(
+            simprints=["feature1", "feature2", "feature3"],
+            offsets=[0, 5, 10],
+            contents=["Hello W", "World!", "! How are you?"],
+        )
+    ]
+    meta = Metadata(iscc="ISCC1234567890", features=features)
+    assert meta.get_overlaps() == [" W", "!"]
+    # Test with missing content or offset
+    features = [
+        FeatureSet(
+            simprints=[
+                Feature(simprint="feature1", offset=0, content="Hello"),
+                Feature(simprint="feature2", content="World"),
+                Feature(simprint="feature3", offset=10),
+            ]
+        )
+    ]
+    meta = Metadata(iscc="ISCC1234567890", features=features)
+    assert meta.get_overlaps() == []