Synced repo using 'sync_with_huggingface' Github Action
Browse files- iscc_sct/code_semantic_text.py +6 -4
- iscc_sct/demo.py +97 -34
- iscc_sct/models.py +74 -0
- poetry.lock +71 -71
- tests/test_demo.py +9 -12
- tests/test_models.py +92 -0
iscc_sct/code_semantic_text.py
CHANGED
@@ -75,9 +75,10 @@ def code_text_semantic(fp, **options):
|
|
75 |
:key characters (bool): Return document character count (default True).
|
76 |
:key embedding (bool): Return global document embedding (default False).
|
77 |
:key precision (int): Max fractional digits for embeddings (default 8).
|
78 |
-
:key
|
79 |
:key offsets (bool): Return character offsets for granular features (default False).
|
80 |
-
:key
|
|
|
81 |
:key max_tokens (int): Max tokens per chunk (default 127).
|
82 |
:key overlap (int): Max tokens allowed to overlap between chunks (default 48).
|
83 |
:key trim (int): Trim whitespace from chunks (default False).
|
@@ -98,9 +99,10 @@ def gen_text_code_semantic(text, **options):
|
|
98 |
:key characters (bool): Return document character count (default True).
|
99 |
:key embedding (bool): Return global document embedding (default False).
|
100 |
:key precision (int): Max fractional digits for embeddings (default 8).
|
101 |
-
:key
|
102 |
:key offsets (bool): Return character offsets for granular features (default False).
|
103 |
-
:key
|
|
|
104 |
:key max_tokens (int): Max tokens per chunk (default 127).
|
105 |
:key overlap (int): Max tokens allowed to overlap between chunks (default 48).
|
106 |
:key trim (int): Trim whitespace from chunks (default False).
|
|
|
75 |
:key characters (bool): Return document character count (default True).
|
76 |
:key embedding (bool): Return global document embedding (default False).
|
77 |
:key precision (int): Max fractional digits for embeddings (default 8).
|
78 |
+
:key simprints (bool): Return granular document features (default False).
|
79 |
:key offsets (bool): Return character offsets for granular features (default False).
|
80 |
+
:key sizes (bool): Include sizes of granular features (number of chars, default False).
|
81 |
+
:key contents (bool): Return text chunks (default False).
|
82 |
:key max_tokens (int): Max tokens per chunk (default 127).
|
83 |
:key overlap (int): Max tokens allowed to overlap between chunks (default 48).
|
84 |
:key trim (int): Trim whitespace from chunks (default False).
|
|
|
99 |
:key characters (bool): Return document character count (default True).
|
100 |
:key embedding (bool): Return global document embedding (default False).
|
101 |
:key precision (int): Max fractional digits for embeddings (default 8).
|
102 |
+
:key simprints (bool): Return granular document features (default False).
|
103 |
:key offsets (bool): Return character offsets for granular features (default False).
|
104 |
+
:key sizes (bool): Include sizes of granular features (number of chars, default False).
|
105 |
+
:key contents (bool): Return text chunks (default False).
|
106 |
:key max_tokens (int): Max tokens per chunk (default 127).
|
107 |
:key overlap (int): Max tokens allowed to overlap between chunks (default 48).
|
108 |
:key trim (int): Trim whitespace from chunks (default False).
|
iscc_sct/demo.py
CHANGED
@@ -8,6 +8,46 @@ import iscc_sct as sct
|
|
8 |
import textwrap
|
9 |
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
def compute_iscc_code(text1, text2, bit_length):
|
12 |
code1 = sct.gen_text_code_semantic(text1, bits=bit_length)
|
13 |
code2 = sct.gen_text_code_semantic(text2, bits=bit_length)
|
@@ -119,33 +159,6 @@ ISCC 适用于特定的数字资产,是使用本文件中的算法和规则从
|
|
119 |
)
|
120 |
|
121 |
custom_css = """
|
122 |
-
#chunked-text span.label {
|
123 |
-
text-transform: none !important;
|
124 |
-
}
|
125 |
-
|
126 |
-
.clickable-example {
|
127 |
-
cursor: pointer;
|
128 |
-
transition: all 0.3s ease;
|
129 |
-
}
|
130 |
-
|
131 |
-
.clickable-example:hover {
|
132 |
-
background-color: #f0f0f0;
|
133 |
-
transform: scale(1.02);
|
134 |
-
}
|
135 |
-
|
136 |
-
.clickable-example .label-wrap {
|
137 |
-
font-weight: bold;
|
138 |
-
color: #4a90e2;
|
139 |
-
}
|
140 |
-
|
141 |
-
.truncate-text {
|
142 |
-
white-space: nowrap;
|
143 |
-
overflow: hidden;
|
144 |
-
text-overflow: ellipsis;
|
145 |
-
max-width: 300px;
|
146 |
-
display: inline-block;
|
147 |
-
}
|
148 |
-
|
149 |
"""
|
150 |
|
151 |
iscc_theme = gr.themes.Default(
|
@@ -183,6 +196,11 @@ with gr.Blocks(css=custom_css, theme=iscc_theme) as demo:
|
|
183 |
choices=["None", "English", "Bulgarian"], label="Select sample for Text A", value="None"
|
184 |
)
|
185 |
out_code_a = gr.Textbox(label="ISCC Code for Text A")
|
|
|
|
|
|
|
|
|
|
|
186 |
with gr.Column(variant="panel"):
|
187 |
in_text_b = gr.TextArea(
|
188 |
label="Text B",
|
@@ -194,6 +212,11 @@ with gr.Blocks(css=custom_css, theme=iscc_theme) as demo:
|
|
194 |
choices=["None", "German", "Chinese"], label="Select sample for Text B", value="None"
|
195 |
)
|
196 |
out_code_b = gr.Textbox(label="ISCC Code for Text B")
|
|
|
|
|
|
|
|
|
|
|
197 |
|
198 |
def update_sample_text(choice, text_a_or_b):
|
199 |
if choice == "None":
|
@@ -216,12 +239,47 @@ with gr.Blocks(css=custom_css, theme=iscc_theme) as demo:
|
|
216 |
|
217 |
def process_text(text, nbits, suffix):
|
218 |
log.debug(f"{text[:20]}")
|
219 |
-
if not text:
|
220 |
-
return
|
221 |
out_code_func = globals().get(f"out_code_{suffix}")
|
222 |
-
|
223 |
-
|
224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
|
226 |
def recalculate_iscc(text_a, text_b, nbits):
|
227 |
code_a = sct.gen_text_code_semantic(text_a, bits=nbits)["iscc"] if text_a else None
|
@@ -241,14 +299,14 @@ with gr.Blocks(css=custom_css, theme=iscc_theme) as demo:
|
|
241 |
in_text_a.change(
|
242 |
lambda text, nbits: process_text(text, nbits, "a"),
|
243 |
inputs=[in_text_a, in_iscc_bits],
|
244 |
-
outputs=[out_code_a],
|
245 |
show_progress="full",
|
246 |
trigger_mode="always_last",
|
247 |
)
|
248 |
in_text_b.change(
|
249 |
lambda text, nbits: process_text(text, nbits, "b"),
|
250 |
inputs=[in_text_b, in_iscc_bits],
|
251 |
-
outputs=[out_code_b],
|
252 |
show_progress="full",
|
253 |
trigger_mode="always_last",
|
254 |
)
|
@@ -273,6 +331,8 @@ with gr.Blocks(css=custom_css, theme=iscc_theme) as demo:
|
|
273 |
gr.Textbox(value=""), # Reset ISCC Code for Text A
|
274 |
gr.Textbox(value=""), # Reset ISCC Code for Text B
|
275 |
gr.HTML(value=""), # Reset Similarity
|
|
|
|
|
276 |
)
|
277 |
|
278 |
with gr.Row(variant="panel"):
|
@@ -289,6 +349,8 @@ with gr.Blocks(css=custom_css, theme=iscc_theme) as demo:
|
|
289 |
out_code_a,
|
290 |
out_code_b,
|
291 |
out_similarity,
|
|
|
|
|
292 |
],
|
293 |
)
|
294 |
|
@@ -327,5 +389,6 @@ This technology opens up new possibilities for understanding and managing text c
|
|
327 |
"""
|
328 |
)
|
329 |
|
|
|
330 |
if __name__ == "__main__": # pragma: no cover
|
331 |
demo.launch()
|
|
|
8 |
import textwrap
|
9 |
|
10 |
|
11 |
+
newline_symbols = {
|
12 |
+
"\u000a": "⏎", # Line Feed - Represented by the 'Return' symbol
|
13 |
+
"\u000b": "↨", # Vertical Tab - Represented by the 'Up Down Arrow' symbol
|
14 |
+
"\u000c": "␌", # Form Feed - Unicode Control Pictures representation
|
15 |
+
"\u000d": "↵", # Carriage Return - 'Downwards Arrow with Corner Leftwards' symbol
|
16 |
+
"\u0085": "⤓", # Next Line - 'Downwards Arrow with Double Stroke' symbol
|
17 |
+
"\u2028": "↲", # Line Separator - 'Downwards Arrow with Tip Leftwards' symbol
|
18 |
+
"\u2029": "¶", # Paragraph Separator - Represented by the 'Pilcrow' symbol
|
19 |
+
}
|
20 |
+
|
21 |
+
|
22 |
+
def no_nl(text):
|
23 |
+
"""Replace non-printable newline characters with printable symbols"""
|
24 |
+
for char, symbol in newline_symbols.items():
|
25 |
+
text = text.replace(char, symbol)
|
26 |
+
return text
|
27 |
+
|
28 |
+
|
29 |
+
def no_nl_inner(text):
|
30 |
+
"""Replace non-printable newline characters with printable symbols, ignoring leading and
|
31 |
+
trailing newlines"""
|
32 |
+
# Strip leading and trailing whitespace
|
33 |
+
stripped_text = text.strip()
|
34 |
+
|
35 |
+
# Replace newline characters within the text
|
36 |
+
for char, symbol in newline_symbols.items():
|
37 |
+
stripped_text = stripped_text.replace(char, symbol)
|
38 |
+
|
39 |
+
# Add back the leading and trailing newlines
|
40 |
+
leading_newlines = len(text) - len(text.lstrip())
|
41 |
+
trailing_newlines = len(text) - len(text.rstrip())
|
42 |
+
|
43 |
+
return "\n" * leading_newlines + stripped_text + "\n" * trailing_newlines
|
44 |
+
|
45 |
+
|
46 |
+
def clean_chunk(chunk):
|
47 |
+
"""Strip consecutive line breaks in text to a maximum of 2."""
|
48 |
+
return chunk.replace("\n\n", "\n")
|
49 |
+
|
50 |
+
|
51 |
def compute_iscc_code(text1, text2, bit_length):
|
52 |
code1 = sct.gen_text_code_semantic(text1, bits=bit_length)
|
53 |
code2 = sct.gen_text_code_semantic(text2, bits=bit_length)
|
|
|
159 |
)
|
160 |
|
161 |
custom_css = """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
"""
|
163 |
|
164 |
iscc_theme = gr.themes.Default(
|
|
|
196 |
choices=["None", "English", "Bulgarian"], label="Select sample for Text A", value="None"
|
197 |
)
|
198 |
out_code_a = gr.Textbox(label="ISCC Code for Text A")
|
199 |
+
out_chunks_a = gr.HighlightedText(
|
200 |
+
label="Chunked Text A",
|
201 |
+
interactive=False,
|
202 |
+
elem_id="chunked-text-a",
|
203 |
+
)
|
204 |
with gr.Column(variant="panel"):
|
205 |
in_text_b = gr.TextArea(
|
206 |
label="Text B",
|
|
|
212 |
choices=["None", "German", "Chinese"], label="Select sample for Text B", value="None"
|
213 |
)
|
214 |
out_code_b = gr.Textbox(label="ISCC Code for Text B")
|
215 |
+
out_chunks_b = gr.HighlightedText(
|
216 |
+
label="Chunked Text B",
|
217 |
+
interactive=False,
|
218 |
+
elem_id="chunked-text-b",
|
219 |
+
)
|
220 |
|
221 |
def update_sample_text(choice, text_a_or_b):
|
222 |
if choice == "None":
|
|
|
239 |
|
240 |
def process_text(text, nbits, suffix):
|
241 |
log.debug(f"{text[:20]}")
|
|
|
|
|
242 |
out_code_func = globals().get(f"out_code_{suffix}")
|
243 |
+
out_chunks_func = globals().get(f"out_chunks_{suffix}")
|
244 |
+
|
245 |
+
if not text:
|
246 |
+
return {
|
247 |
+
out_code_func: gr.Textbox(value=None),
|
248 |
+
out_chunks_func: gr.HighlightedText(value=None, elem_id="chunked-text"),
|
249 |
+
}
|
250 |
+
|
251 |
+
result = sct.gen_text_code_semantic(text, bits=nbits, simprints=True, offsets=True, sizes=True, contents=True)
|
252 |
+
iscc = sct.Metadata(**result).to_object_format()
|
253 |
+
|
254 |
+
# Generate chunked text with simprints and overlaps
|
255 |
+
features = iscc.features[0]
|
256 |
+
highlighted_chunks = []
|
257 |
+
overlaps = iscc.get_overlaps()
|
258 |
+
|
259 |
+
for i, feature in enumerate(features.simprints):
|
260 |
+
feature: sct.Feature
|
261 |
+
content = feature.content
|
262 |
+
|
263 |
+
# Remove leading overlap
|
264 |
+
if i > 0 and overlaps[i - 1]:
|
265 |
+
content = content[len(overlaps[i - 1]) :]
|
266 |
+
|
267 |
+
# Remove trailing overlap
|
268 |
+
if i < len(overlaps) and overlaps[i]:
|
269 |
+
content = content[: -len(overlaps[i])]
|
270 |
+
|
271 |
+
label = f"{feature.size}:{feature.simprint}"
|
272 |
+
highlighted_chunks.append((no_nl_inner(content), label))
|
273 |
+
|
274 |
+
if i < len(overlaps):
|
275 |
+
overlap = overlaps[i]
|
276 |
+
if overlap:
|
277 |
+
highlighted_chunks.append((f"\n{no_nl(overlap)}\n", "overlap"))
|
278 |
+
|
279 |
+
return {
|
280 |
+
out_code_func: gr.Textbox(value=iscc.iscc),
|
281 |
+
out_chunks_func: gr.HighlightedText(value=highlighted_chunks, elem_id="chunked-text"),
|
282 |
+
}
|
283 |
|
284 |
def recalculate_iscc(text_a, text_b, nbits):
|
285 |
code_a = sct.gen_text_code_semantic(text_a, bits=nbits)["iscc"] if text_a else None
|
|
|
299 |
in_text_a.change(
|
300 |
lambda text, nbits: process_text(text, nbits, "a"),
|
301 |
inputs=[in_text_a, in_iscc_bits],
|
302 |
+
outputs=[out_code_a, out_chunks_a],
|
303 |
show_progress="full",
|
304 |
trigger_mode="always_last",
|
305 |
)
|
306 |
in_text_b.change(
|
307 |
lambda text, nbits: process_text(text, nbits, "b"),
|
308 |
inputs=[in_text_b, in_iscc_bits],
|
309 |
+
outputs=[out_code_b, out_chunks_b],
|
310 |
show_progress="full",
|
311 |
trigger_mode="always_last",
|
312 |
)
|
|
|
331 |
gr.Textbox(value=""), # Reset ISCC Code for Text A
|
332 |
gr.Textbox(value=""), # Reset ISCC Code for Text B
|
333 |
gr.HTML(value=""), # Reset Similarity
|
334 |
+
gr.HighlightedText(value=[]), # Reset Chunked Text A
|
335 |
+
gr.HighlightedText(value=[]), # Reset Chunked Text B
|
336 |
)
|
337 |
|
338 |
with gr.Row(variant="panel"):
|
|
|
349 |
out_code_a,
|
350 |
out_code_b,
|
351 |
out_similarity,
|
352 |
+
out_chunks_a,
|
353 |
+
out_chunks_b,
|
354 |
],
|
355 |
)
|
356 |
|
|
|
389 |
"""
|
390 |
)
|
391 |
|
392 |
+
|
393 |
if __name__ == "__main__": # pragma: no cover
|
394 |
demo.launch()
|
iscc_sct/models.py
CHANGED
@@ -139,6 +139,80 @@ class Metadata(PrettyBaseModel):
|
|
139 |
|
140 |
return Metadata(iscc=self.iscc, characters=self.characters, features=new_features)
|
141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
def to_object_format(self) -> "Metadata":
|
143 |
"""
|
144 |
Convert the Metadata object to use the Object-Format for features.
|
|
|
139 |
|
140 |
return Metadata(iscc=self.iscc, characters=self.characters, features=new_features)
|
141 |
|
142 |
+
def get_content(self) -> Optional[str]:
|
143 |
+
"""
|
144 |
+
Reconstruct and return the original input text if all necessary data is available.
|
145 |
+
This method removes overlaps in adjacent text chunks.
|
146 |
+
|
147 |
+
:return: The reconstructed original text, or None if the necessary data is not available.
|
148 |
+
"""
|
149 |
+
if not self.features or not self.features[0].simprints:
|
150 |
+
return None
|
151 |
+
|
152 |
+
feature_set = self.features[0]
|
153 |
+
if isinstance(feature_set.simprints[0], str):
|
154 |
+
# Convert to object format if in index format
|
155 |
+
feature_set = self.to_object_format().features[0]
|
156 |
+
|
157 |
+
if not all(feature.content and feature.offset is not None for feature in feature_set.simprints):
|
158 |
+
return None
|
159 |
+
|
160 |
+
# Sort features by offset
|
161 |
+
sorted_features = sorted(feature_set.simprints, key=lambda x: x.offset)
|
162 |
+
|
163 |
+
reconstructed_text = ""
|
164 |
+
last_end = 0
|
165 |
+
|
166 |
+
for feature in sorted_features:
|
167 |
+
start = feature.offset
|
168 |
+
if start < last_end:
|
169 |
+
# Remove overlap
|
170 |
+
feature_content = feature.content[last_end - start :]
|
171 |
+
else:
|
172 |
+
feature_content = feature.content
|
173 |
+
|
174 |
+
reconstructed_text += feature_content
|
175 |
+
last_end = start + len(feature.content)
|
176 |
+
|
177 |
+
return reconstructed_text
|
178 |
+
|
179 |
+
def get_overlaps(self) -> List[str]:
|
180 |
+
"""
|
181 |
+
Returns a list of overlapping text between consecutive chunks.
|
182 |
+
For non-overlapping consecutive chunks, returns an empty string.
|
183 |
+
|
184 |
+
:return: List of overlapping text or empty strings.
|
185 |
+
"""
|
186 |
+
if not self.features or not self.features[0].simprints:
|
187 |
+
return []
|
188 |
+
|
189 |
+
feature_set = self.features[0]
|
190 |
+
if isinstance(feature_set.simprints[0], str):
|
191 |
+
# Convert to object format if in index format
|
192 |
+
feature_set = self.to_object_format().features[0]
|
193 |
+
|
194 |
+
if not all(feature.content and feature.offset is not None for feature in feature_set.simprints):
|
195 |
+
return []
|
196 |
+
|
197 |
+
# Sort features by offset
|
198 |
+
sorted_features = sorted(feature_set.simprints, key=lambda x: x.offset)
|
199 |
+
overlaps = []
|
200 |
+
|
201 |
+
for i in range(len(sorted_features) - 1):
|
202 |
+
current_feature = sorted_features[i]
|
203 |
+
next_feature = sorted_features[i + 1]
|
204 |
+
|
205 |
+
current_end = current_feature.offset + len(current_feature.content)
|
206 |
+
next_start = next_feature.offset
|
207 |
+
|
208 |
+
if current_end > next_start:
|
209 |
+
overlap = current_feature.content[next_start - current_feature.offset :]
|
210 |
+
overlaps.append(overlap)
|
211 |
+
else:
|
212 |
+
overlaps.append("")
|
213 |
+
|
214 |
+
return overlaps
|
215 |
+
|
216 |
def to_object_format(self) -> "Metadata":
|
217 |
"""
|
218 |
Convert the Metadata object to use the Object-Format for features.
|
poetry.lock
CHANGED
@@ -416,18 +416,18 @@ test = ["pytest (>=6)"]
|
|
416 |
|
417 |
[[package]]
|
418 |
name = "fastapi"
|
419 |
-
version = "0.112.
|
420 |
description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production"
|
421 |
optional = true
|
422 |
python-versions = ">=3.8"
|
423 |
files = [
|
424 |
-
{file = "fastapi-0.112.
|
425 |
-
{file = "fastapi-0.112.
|
426 |
]
|
427 |
|
428 |
[package.dependencies]
|
429 |
pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0"
|
430 |
-
starlette = ">=0.37.2,<0.
|
431 |
typing-extensions = ">=4.8.0"
|
432 |
|
433 |
[package.extras]
|
@@ -774,13 +774,13 @@ test = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "p
|
|
774 |
|
775 |
[[package]]
|
776 |
name = "importlib-resources"
|
777 |
-
version = "6.4.
|
778 |
description = "Read resources from Python packages"
|
779 |
optional = true
|
780 |
python-versions = ">=3.8"
|
781 |
files = [
|
782 |
-
{file = "importlib_resources-6.4.
|
783 |
-
{file = "importlib_resources-6.4.
|
784 |
]
|
785 |
|
786 |
[package.dependencies]
|
@@ -1310,69 +1310,69 @@ files = [
|
|
1310 |
|
1311 |
[[package]]
|
1312 |
name = "onnxruntime"
|
1313 |
-
version = "1.
|
1314 |
description = "ONNX Runtime is a runtime accelerator for Machine Learning models"
|
1315 |
optional = false
|
1316 |
python-versions = "*"
|
1317 |
files = [
|
1318 |
-
{file = "onnxruntime-1.
|
1319 |
-
{file = "onnxruntime-1.
|
1320 |
-
{file = "onnxruntime-1.
|
1321 |
-
{file = "onnxruntime-1.
|
1322 |
-
{file = "onnxruntime-1.
|
1323 |
-
{file = "onnxruntime-1.
|
1324 |
-
{file = "onnxruntime-1.
|
1325 |
-
{file = "onnxruntime-1.
|
1326 |
-
{file = "onnxruntime-1.
|
1327 |
-
{file = "onnxruntime-1.
|
1328 |
-
{file = "onnxruntime-1.
|
1329 |
-
{file = "onnxruntime-1.
|
1330 |
-
{file = "onnxruntime-1.
|
1331 |
-
{file = "onnxruntime-1.
|
1332 |
-
{file = "onnxruntime-1.
|
1333 |
-
{file = "onnxruntime-1.
|
1334 |
-
{file = "onnxruntime-1.
|
1335 |
-
{file = "onnxruntime-1.
|
1336 |
-
{file = "onnxruntime-1.
|
1337 |
-
{file = "onnxruntime-1.
|
1338 |
-
{file = "onnxruntime-1.
|
1339 |
-
{file = "onnxruntime-1.
|
1340 |
-
{file = "onnxruntime-1.
|
1341 |
-
{file = "onnxruntime-1.
|
1342 |
-
{file = "onnxruntime-1.
|
1343 |
]
|
1344 |
|
1345 |
[package.dependencies]
|
1346 |
coloredlogs = "*"
|
1347 |
flatbuffers = "*"
|
1348 |
-
numpy = ">=1.21.6
|
1349 |
packaging = "*"
|
1350 |
protobuf = "*"
|
1351 |
sympy = "*"
|
1352 |
|
1353 |
[[package]]
|
1354 |
name = "onnxruntime-gpu"
|
1355 |
-
version = "1.
|
1356 |
description = "ONNX Runtime is a runtime accelerator for Machine Learning models"
|
1357 |
optional = true
|
1358 |
python-versions = "*"
|
1359 |
files = [
|
1360 |
-
{file = "onnxruntime_gpu-1.
|
1361 |
-
{file = "onnxruntime_gpu-1.
|
1362 |
-
{file = "onnxruntime_gpu-1.
|
1363 |
-
{file = "onnxruntime_gpu-1.
|
1364 |
-
{file = "onnxruntime_gpu-1.
|
1365 |
-
{file = "onnxruntime_gpu-1.
|
1366 |
-
{file = "onnxruntime_gpu-1.
|
1367 |
-
{file = "onnxruntime_gpu-1.
|
1368 |
-
{file = "onnxruntime_gpu-1.
|
1369 |
-
{file = "onnxruntime_gpu-1.
|
1370 |
]
|
1371 |
|
1372 |
[package.dependencies]
|
1373 |
coloredlogs = "*"
|
1374 |
flatbuffers = "*"
|
1375 |
-
numpy = ">=1.21.6
|
1376 |
packaging = "*"
|
1377 |
protobuf = "*"
|
1378 |
sympy = "*"
|
@@ -2299,29 +2299,29 @@ files = [
|
|
2299 |
|
2300 |
[[package]]
|
2301 |
name = "ruff"
|
2302 |
-
version = "0.
|
2303 |
description = "An extremely fast Python linter and code formatter, written in Rust."
|
2304 |
optional = false
|
2305 |
python-versions = ">=3.7"
|
2306 |
files = [
|
2307 |
-
{file = "ruff-0.
|
2308 |
-
{file = "ruff-0.
|
2309 |
-
{file = "ruff-0.
|
2310 |
-
{file = "ruff-0.
|
2311 |
-
{file = "ruff-0.
|
2312 |
-
{file = "ruff-0.
|
2313 |
-
{file = "ruff-0.
|
2314 |
-
{file = "ruff-0.
|
2315 |
-
{file = "ruff-0.
|
2316 |
-
{file = "ruff-0.
|
2317 |
-
{file = "ruff-0.
|
2318 |
-
{file = "ruff-0.
|
2319 |
-
{file = "ruff-0.
|
2320 |
-
{file = "ruff-0.
|
2321 |
-
{file = "ruff-0.
|
2322 |
-
{file = "ruff-0.
|
2323 |
-
{file = "ruff-0.
|
2324 |
-
{file = "ruff-0.
|
2325 |
]
|
2326 |
|
2327 |
[[package]]
|
@@ -2398,13 +2398,13 @@ files = [
|
|
2398 |
|
2399 |
[[package]]
|
2400 |
name = "starlette"
|
2401 |
-
version = "0.
|
2402 |
description = "The little ASGI library that shines."
|
2403 |
optional = true
|
2404 |
python-versions = ">=3.8"
|
2405 |
files = [
|
2406 |
-
{file = "starlette-0.
|
2407 |
-
{file = "starlette-0.
|
2408 |
]
|
2409 |
|
2410 |
[package.dependencies]
|
@@ -2592,13 +2592,13 @@ telegram = ["requests"]
|
|
2592 |
|
2593 |
[[package]]
|
2594 |
name = "typer"
|
2595 |
-
version = "0.12.
|
2596 |
description = "Typer, build great CLIs. Easy to code. Based on Python type hints."
|
2597 |
optional = true
|
2598 |
python-versions = ">=3.7"
|
2599 |
files = [
|
2600 |
-
{file = "typer-0.12.
|
2601 |
-
{file = "typer-0.12.
|
2602 |
]
|
2603 |
|
2604 |
[package.dependencies]
|
|
|
416 |
|
417 |
[[package]]
|
418 |
name = "fastapi"
|
419 |
+
version = "0.112.1"
|
420 |
description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production"
|
421 |
optional = true
|
422 |
python-versions = ">=3.8"
|
423 |
files = [
|
424 |
+
{file = "fastapi-0.112.1-py3-none-any.whl", hash = "sha256:bcbd45817fc2a1cd5da09af66815b84ec0d3d634eb173d1ab468ae3103e183e4"},
|
425 |
+
{file = "fastapi-0.112.1.tar.gz", hash = "sha256:b2537146f8c23389a7faa8b03d0bd38d4986e6983874557d95eed2acc46448ef"},
|
426 |
]
|
427 |
|
428 |
[package.dependencies]
|
429 |
pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0"
|
430 |
+
starlette = ">=0.37.2,<0.39.0"
|
431 |
typing-extensions = ">=4.8.0"
|
432 |
|
433 |
[package.extras]
|
|
|
774 |
|
775 |
[[package]]
|
776 |
name = "importlib-resources"
|
777 |
+
version = "6.4.3"
|
778 |
description = "Read resources from Python packages"
|
779 |
optional = true
|
780 |
python-versions = ">=3.8"
|
781 |
files = [
|
782 |
+
{file = "importlib_resources-6.4.3-py3-none-any.whl", hash = "sha256:2d6dfe3b9e055f72495c2085890837fc8c758984e209115c8792bddcb762cd93"},
|
783 |
+
{file = "importlib_resources-6.4.3.tar.gz", hash = "sha256:4a202b9b9d38563b46da59221d77bb73862ab5d79d461307bcb826d725448b98"},
|
784 |
]
|
785 |
|
786 |
[package.dependencies]
|
|
|
1310 |
|
1311 |
[[package]]
|
1312 |
name = "onnxruntime"
|
1313 |
+
version = "1.19.0"
|
1314 |
description = "ONNX Runtime is a runtime accelerator for Machine Learning models"
|
1315 |
optional = false
|
1316 |
python-versions = "*"
|
1317 |
files = [
|
1318 |
+
{file = "onnxruntime-1.19.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:6ce22a98dfec7b646ae305f52d0ce14a189a758b02ea501860ca719f4b0ae04b"},
|
1319 |
+
{file = "onnxruntime-1.19.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:19019c72873f26927aa322c54cf2bf7312b23451b27451f39b88f57016c94f8b"},
|
1320 |
+
{file = "onnxruntime-1.19.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8eaa16df99171dc636e30108d15597aed8c4c2dd9dbfdd07cc464d57d73fb275"},
|
1321 |
+
{file = "onnxruntime-1.19.0-cp310-cp310-win32.whl", hash = "sha256:0eb0f8dbe596fd0f4737fe511fdbb17603853a7d204c5b2ca38d3c7808fc556b"},
|
1322 |
+
{file = "onnxruntime-1.19.0-cp310-cp310-win_amd64.whl", hash = "sha256:616092d54ba8023b7bc0a5f6d900a07a37cc1cfcc631873c15f8c1d6e9e184d4"},
|
1323 |
+
{file = "onnxruntime-1.19.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:a2b53b3c287cd933e5eb597273926e899082d8c84ab96e1b34035764a1627e17"},
|
1324 |
+
{file = "onnxruntime-1.19.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e94984663963e74fbb468bde9ec6f19dcf890b594b35e249c4dc8789d08993c5"},
|
1325 |
+
{file = "onnxruntime-1.19.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6f379d1f050cfb55ce015d53727b78ee362febc065c38eed81512b22b757da73"},
|
1326 |
+
{file = "onnxruntime-1.19.0-cp311-cp311-win32.whl", hash = "sha256:4ccb48faea02503275ae7e79e351434fc43c294c4cb5c4d8bcb7479061396614"},
|
1327 |
+
{file = "onnxruntime-1.19.0-cp311-cp311-win_amd64.whl", hash = "sha256:9cdc8d311289a84e77722de68bd22b8adfb94eea26f4be6f9e017350faac8b18"},
|
1328 |
+
{file = "onnxruntime-1.19.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:1b59eaec1be9a8613c5fdeaafe67f73a062edce3ac03bbbdc9e2d98b58a30617"},
|
1329 |
+
{file = "onnxruntime-1.19.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:be4144d014a4b25184e63ce7a463a2e7796e2f3df931fccc6a6aefa6f1365dc5"},
|
1330 |
+
{file = "onnxruntime-1.19.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:10d7e7d4ca7021ce7f29a66dbc6071addf2de5839135339bd855c6d9c2bba371"},
|
1331 |
+
{file = "onnxruntime-1.19.0-cp312-cp312-win32.whl", hash = "sha256:87f2c58b577a1fb31dc5d92b647ecc588fd5f1ea0c3ad4526f5f80a113357c8d"},
|
1332 |
+
{file = "onnxruntime-1.19.0-cp312-cp312-win_amd64.whl", hash = "sha256:8a1f50d49676d7b69566536ff039d9e4e95fc482a55673719f46528218ecbb94"},
|
1333 |
+
{file = "onnxruntime-1.19.0-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:71423c8c4b2d7a58956271534302ec72721c62a41efd0c4896343249b8399ab0"},
|
1334 |
+
{file = "onnxruntime-1.19.0-cp38-cp38-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9d63630d45e9498f96e75bbeb7fd4a56acb10155de0de4d0e18d1b6cbb0b358a"},
|
1335 |
+
{file = "onnxruntime-1.19.0-cp38-cp38-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f3bfd15db1e8794d379a86c1a9116889f47f2cca40cc82208fc4f7e8c38e8522"},
|
1336 |
+
{file = "onnxruntime-1.19.0-cp38-cp38-win32.whl", hash = "sha256:3b098003b6b4cb37cc84942e5f1fe27f945dd857cbd2829c824c26b0ba4a247e"},
|
1337 |
+
{file = "onnxruntime-1.19.0-cp38-cp38-win_amd64.whl", hash = "sha256:cea067a6541d6787d903ee6843401c5b1332a266585160d9700f9f0939443886"},
|
1338 |
+
{file = "onnxruntime-1.19.0-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:c4fcff12dc5ca963c5f76b9822bb404578fa4a98c281e8c666b429192799a099"},
|
1339 |
+
{file = "onnxruntime-1.19.0-cp39-cp39-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f6dcad8a4db908fbe70b98c79cea1c8b6ac3316adf4ce93453136e33a524ac59"},
|
1340 |
+
{file = "onnxruntime-1.19.0-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4bc449907c6e8d99eee5ae5cc9c8fdef273d801dcd195393d3f9ab8ad3f49522"},
|
1341 |
+
{file = "onnxruntime-1.19.0-cp39-cp39-win32.whl", hash = "sha256:947febd48405afcf526e45ccff97ff23b15e530434705f734870d22ae7fcf236"},
|
1342 |
+
{file = "onnxruntime-1.19.0-cp39-cp39-win_amd64.whl", hash = "sha256:f60be47eff5ee77fd28a466b0fd41d7debc42a32179d1ddb21e05d6067d7b48b"},
|
1343 |
]
|
1344 |
|
1345 |
[package.dependencies]
|
1346 |
coloredlogs = "*"
|
1347 |
flatbuffers = "*"
|
1348 |
+
numpy = ">=1.21.6"
|
1349 |
packaging = "*"
|
1350 |
protobuf = "*"
|
1351 |
sympy = "*"
|
1352 |
|
1353 |
[[package]]
|
1354 |
name = "onnxruntime-gpu"
|
1355 |
+
version = "1.19.0"
|
1356 |
description = "ONNX Runtime is a runtime accelerator for Machine Learning models"
|
1357 |
optional = true
|
1358 |
python-versions = "*"
|
1359 |
files = [
|
1360 |
+
{file = "onnxruntime_gpu-1.19.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee4cc525a581fd57ffbe266b23484cadbdcd43daf1cc91a632c043c9edd07f55"},
|
1361 |
+
{file = "onnxruntime_gpu-1.19.0-cp310-cp310-win_amd64.whl", hash = "sha256:895221ce7cb0a637d3841ba53701a3aa4d284a7d6e391c873de87ce09defa9e9"},
|
1362 |
+
{file = "onnxruntime_gpu-1.19.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c8bae9164d9586cbf7c7976915b515584d077470307a255b7ca03cf425ce38bf"},
|
1363 |
+
{file = "onnxruntime_gpu-1.19.0-cp311-cp311-win_amd64.whl", hash = "sha256:62418bf4bde804afd1cea1e391f1ee4ba5f50e09cd0397c852e150c013e27ae4"},
|
1364 |
+
{file = "onnxruntime_gpu-1.19.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cbceb55c265266350fa731365ccd224dc29c352470671a24cbc153fe0c4be1f9"},
|
1365 |
+
{file = "onnxruntime_gpu-1.19.0-cp312-cp312-win_amd64.whl", hash = "sha256:35405747644bf8d9d9c84fb6ee3dd04cc594e9c3f5448735c68565fb3372b1cd"},
|
1366 |
+
{file = "onnxruntime_gpu-1.19.0-cp38-cp38-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ae04a2fdb81654e7616254525d9cd23b05e1ce64a9ea798080d2a20200dddb4a"},
|
1367 |
+
{file = "onnxruntime_gpu-1.19.0-cp38-cp38-win_amd64.whl", hash = "sha256:5aee8fa0e578a696f05335dcabd4cd2027f1e40acbe1cf1930df960efeb3e36f"},
|
1368 |
+
{file = "onnxruntime_gpu-1.19.0-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f64a02672770d11761b5e2ea34749526f564b88ec364032f613f1519dce2bcc"},
|
1369 |
+
{file = "onnxruntime_gpu-1.19.0-cp39-cp39-win_amd64.whl", hash = "sha256:c30cc85fd09ee9ddd0915855901f9dc80add98572a969ea3c1e8ae9a7e96e94c"},
|
1370 |
]
|
1371 |
|
1372 |
[package.dependencies]
|
1373 |
coloredlogs = "*"
|
1374 |
flatbuffers = "*"
|
1375 |
+
numpy = ">=1.21.6"
|
1376 |
packaging = "*"
|
1377 |
protobuf = "*"
|
1378 |
sympy = "*"
|
|
|
2299 |
|
2300 |
[[package]]
|
2301 |
name = "ruff"
|
2302 |
+
version = "0.6.1"
|
2303 |
description = "An extremely fast Python linter and code formatter, written in Rust."
|
2304 |
optional = false
|
2305 |
python-versions = ">=3.7"
|
2306 |
files = [
|
2307 |
+
{file = "ruff-0.6.1-py3-none-linux_armv6l.whl", hash = "sha256:b4bb7de6a24169dc023f992718a9417380301b0c2da0fe85919f47264fb8add9"},
|
2308 |
+
{file = "ruff-0.6.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:45efaae53b360c81043e311cdec8a7696420b3d3e8935202c2846e7a97d4edae"},
|
2309 |
+
{file = "ruff-0.6.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:bc60c7d71b732c8fa73cf995efc0c836a2fd8b9810e115be8babb24ae87e0850"},
|
2310 |
+
{file = "ruff-0.6.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c7477c3b9da822e2db0b4e0b59e61b8a23e87886e727b327e7dcaf06213c5cf"},
|
2311 |
+
{file = "ruff-0.6.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3a0af7ab3f86e3dc9f157a928e08e26c4b40707d0612b01cd577cc84b8905cc9"},
|
2312 |
+
{file = "ruff-0.6.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:392688dbb50fecf1bf7126731c90c11a9df1c3a4cdc3f481b53e851da5634fa5"},
|
2313 |
+
{file = "ruff-0.6.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:5278d3e095ccc8c30430bcc9bc550f778790acc211865520f3041910a28d0024"},
|
2314 |
+
{file = "ruff-0.6.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fe6d5f65d6f276ee7a0fc50a0cecaccb362d30ef98a110f99cac1c7872df2f18"},
|
2315 |
+
{file = "ruff-0.6.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2e0dd11e2ae553ee5c92a81731d88a9883af8db7408db47fc81887c1f8b672e"},
|
2316 |
+
{file = "ruff-0.6.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d812615525a34ecfc07fd93f906ef5b93656be01dfae9a819e31caa6cfe758a1"},
|
2317 |
+
{file = "ruff-0.6.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:faaa4060f4064c3b7aaaa27328080c932fa142786f8142aff095b42b6a2eb631"},
|
2318 |
+
{file = "ruff-0.6.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:99d7ae0df47c62729d58765c593ea54c2546d5de213f2af2a19442d50a10cec9"},
|
2319 |
+
{file = "ruff-0.6.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:9eb18dfd7b613eec000e3738b3f0e4398bf0153cb80bfa3e351b3c1c2f6d7b15"},
|
2320 |
+
{file = "ruff-0.6.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:c62bc04c6723a81e25e71715aa59489f15034d69bf641df88cb38bdc32fd1dbb"},
|
2321 |
+
{file = "ruff-0.6.1-py3-none-win32.whl", hash = "sha256:9fb4c4e8b83f19c9477a8745e56d2eeef07a7ff50b68a6998f7d9e2e3887bdc4"},
|
2322 |
+
{file = "ruff-0.6.1-py3-none-win_amd64.whl", hash = "sha256:c2ebfc8f51ef4aca05dad4552bbcf6fe8d1f75b2f6af546cc47cc1c1ca916b5b"},
|
2323 |
+
{file = "ruff-0.6.1-py3-none-win_arm64.whl", hash = "sha256:3bc81074971b0ffad1bd0c52284b22411f02a11a012082a76ac6da153536e014"},
|
2324 |
+
{file = "ruff-0.6.1.tar.gz", hash = "sha256:af3ffd8c6563acb8848d33cd19a69b9bfe943667f0419ca083f8ebe4224a3436"},
|
2325 |
]
|
2326 |
|
2327 |
[[package]]
|
|
|
2398 |
|
2399 |
[[package]]
|
2400 |
name = "starlette"
|
2401 |
+
version = "0.38.2"
|
2402 |
description = "The little ASGI library that shines."
|
2403 |
optional = true
|
2404 |
python-versions = ">=3.8"
|
2405 |
files = [
|
2406 |
+
{file = "starlette-0.38.2-py3-none-any.whl", hash = "sha256:4ec6a59df6bbafdab5f567754481657f7ed90dc9d69b0c9ff017907dd54faeff"},
|
2407 |
+
{file = "starlette-0.38.2.tar.gz", hash = "sha256:c7c0441065252160993a1a37cf2a73bb64d271b17303e0b0c1eb7191cfb12d75"},
|
2408 |
]
|
2409 |
|
2410 |
[package.dependencies]
|
|
|
2592 |
|
2593 |
[[package]]
|
2594 |
name = "typer"
|
2595 |
+
version = "0.12.4"
|
2596 |
description = "Typer, build great CLIs. Easy to code. Based on Python type hints."
|
2597 |
optional = true
|
2598 |
python-versions = ">=3.7"
|
2599 |
files = [
|
2600 |
+
{file = "typer-0.12.4-py3-none-any.whl", hash = "sha256:819aa03699f438397e876aa12b0d63766864ecba1b579092cc9fe35d886e34b6"},
|
2601 |
+
{file = "typer-0.12.4.tar.gz", hash = "sha256:c9c1613ed6a166162705b3347b8d10b661ccc5d95692654d0fb628118f2c34e6"},
|
2602 |
]
|
2603 |
|
2604 |
[package.dependencies]
|
tests/test_demo.py
CHANGED
@@ -52,30 +52,27 @@ import gradio as gr
|
|
52 |
from iscc_sct.demo import process_text
|
53 |
|
54 |
|
55 |
-
|
56 |
-
def test_process_text(mock_gen_text_code):
|
57 |
-
mock_gen_text_code.return_value = {"iscc": "ISCC:EAAQCVG2TABD6"}
|
58 |
-
|
59 |
# Test with valid input
|
60 |
result = process_text("Hello, world!", 64, "a")
|
61 |
assert isinstance(result, dict)
|
62 |
-
assert len(result) ==
|
63 |
key, value = next(iter(result.items()))
|
64 |
assert isinstance(key, gr.components.Textbox)
|
65 |
assert isinstance(value, gr.components.Textbox)
|
66 |
-
assert value.value == "ISCC:
|
67 |
|
68 |
# Test with empty input
|
69 |
result = process_text("", 64, "b")
|
70 |
-
assert result
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
|
76 |
# Test with different suffix
|
77 |
result = process_text("Test", 64, "b")
|
78 |
-
assert len(result) ==
|
79 |
key, value = next(iter(result.items()))
|
80 |
assert isinstance(key, gr.components.Textbox)
|
81 |
assert isinstance(value, gr.components.Textbox)
|
|
|
52 |
from iscc_sct.demo import process_text
|
53 |
|
54 |
|
55 |
+
def test_process_text():
|
|
|
|
|
|
|
56 |
# Test with valid input
|
57 |
result = process_text("Hello, world!", 64, "a")
|
58 |
assert isinstance(result, dict)
|
59 |
+
assert len(result) == 2
|
60 |
key, value = next(iter(result.items()))
|
61 |
assert isinstance(key, gr.components.Textbox)
|
62 |
assert isinstance(value, gr.components.Textbox)
|
63 |
+
assert value.value == "ISCC:CAA7GY4JTDI3XZYV"
|
64 |
|
65 |
# Test with empty input
|
66 |
result = process_text("", 64, "b")
|
67 |
+
assert isinstance(result, dict)
|
68 |
+
assert len(result) == 2
|
69 |
+
for key, value in result.items():
|
70 |
+
assert isinstance(key, (gr.components.Textbox, gr.components.HighlightedText))
|
71 |
+
assert value.value is None
|
72 |
|
73 |
# Test with different suffix
|
74 |
result = process_text("Test", 64, "b")
|
75 |
+
assert len(result) == 2
|
76 |
key, value = next(iter(result.items()))
|
77 |
assert isinstance(key, gr.components.Textbox)
|
78 |
assert isinstance(value, gr.components.Textbox)
|
tests/test_models.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import pytest
|
2 |
from pydantic import ValidationError
|
3 |
from iscc_sct.models import Metadata, Feature, FeatureSet
|
|
|
4 |
|
5 |
|
6 |
def test_feature_initialization():
|
@@ -100,3 +101,94 @@ def test_metadata_format_conversion_with_no_features():
|
|
100 |
object_meta = meta.to_object_format()
|
101 |
assert index_meta.model_dump() == meta.model_dump()
|
102 |
assert object_meta.model_dump() == meta.model_dump()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import pytest
|
2 |
from pydantic import ValidationError
|
3 |
from iscc_sct.models import Metadata, Feature, FeatureSet
|
4 |
+
import iscc_sct as sct
|
5 |
|
6 |
|
7 |
def test_feature_initialization():
|
|
|
101 |
object_meta = meta.to_object_format()
|
102 |
assert index_meta.model_dump() == meta.model_dump()
|
103 |
assert object_meta.model_dump() == meta.model_dump()
|
104 |
+
|
105 |
+
|
106 |
+
def test_metadata_get_content(text_en):
|
107 |
+
iscc = sct.create(text_en, granular=True)
|
108 |
+
assert iscc.get_content() == text_en
|
109 |
+
|
110 |
+
|
111 |
+
def test_metadata_get_content_no_fetures():
|
112 |
+
meta = Metadata(iscc="ISCC1234567890")
|
113 |
+
assert meta.get_content() is None
|
114 |
+
|
115 |
+
|
116 |
+
def test_metadata_get_content_index_format():
|
117 |
+
meta = sct.create("Hello World", granular=True).to_index_format()
|
118 |
+
assert meta.get_content() == "Hello World"
|
119 |
+
|
120 |
+
|
121 |
+
def test_metadata_get_content_no_content():
|
122 |
+
meta = sct.create("Hello World", granular=True, contents=False)
|
123 |
+
assert meta.get_content() is None
|
124 |
+
|
125 |
+
|
126 |
+
def test_metadata_get_overlaps():
|
127 |
+
# Test with no features
|
128 |
+
meta = Metadata(iscc="ISCC1234567890")
|
129 |
+
assert meta.get_overlaps() == []
|
130 |
+
|
131 |
+
# Test with features but no simprints
|
132 |
+
meta = Metadata(iscc="ISCC1234567890", features=[FeatureSet()])
|
133 |
+
assert meta.get_overlaps() == []
|
134 |
+
|
135 |
+
# Test with non-overlapping chunks
|
136 |
+
features = [
|
137 |
+
FeatureSet(
|
138 |
+
simprints=[
|
139 |
+
Feature(simprint="feature1", offset=0, content="Hello"),
|
140 |
+
Feature(simprint="feature2", offset=5, content="World"),
|
141 |
+
]
|
142 |
+
)
|
143 |
+
]
|
144 |
+
meta = Metadata(iscc="ISCC1234567890", features=features)
|
145 |
+
assert meta.get_overlaps() == [""]
|
146 |
+
|
147 |
+
# Test with overlapping chunks
|
148 |
+
features = [
|
149 |
+
FeatureSet(
|
150 |
+
simprints=[
|
151 |
+
Feature(simprint="feature1", offset=0, content="Hello W"),
|
152 |
+
Feature(simprint="feature2", offset=5, content="World"),
|
153 |
+
]
|
154 |
+
)
|
155 |
+
]
|
156 |
+
meta = Metadata(iscc="ISCC1234567890", features=features)
|
157 |
+
assert meta.get_overlaps() == [" W"]
|
158 |
+
|
159 |
+
# Test with multiple overlaps
|
160 |
+
features = [
|
161 |
+
FeatureSet(
|
162 |
+
simprints=[
|
163 |
+
Feature(simprint="feature1", offset=0, content="Hello W"),
|
164 |
+
Feature(simprint="feature2", offset=5, content="World!"),
|
165 |
+
Feature(simprint="feature3", offset=10, content="! How are you?"),
|
166 |
+
]
|
167 |
+
)
|
168 |
+
]
|
169 |
+
meta = Metadata(iscc="ISCC1234567890", features=features)
|
170 |
+
assert meta.get_overlaps() == [" W", "!"]
|
171 |
+
|
172 |
+
# Test with index format
|
173 |
+
features = [
|
174 |
+
FeatureSet(
|
175 |
+
simprints=["feature1", "feature2", "feature3"],
|
176 |
+
offsets=[0, 5, 10],
|
177 |
+
contents=["Hello W", "World!", "! How are you?"],
|
178 |
+
)
|
179 |
+
]
|
180 |
+
meta = Metadata(iscc="ISCC1234567890", features=features)
|
181 |
+
assert meta.get_overlaps() == [" W", "!"]
|
182 |
+
|
183 |
+
# Test with missing content or offset
|
184 |
+
features = [
|
185 |
+
FeatureSet(
|
186 |
+
simprints=[
|
187 |
+
Feature(simprint="feature1", offset=0, content="Hello"),
|
188 |
+
Feature(simprint="feature2", content="World"),
|
189 |
+
Feature(simprint="feature3", offset=10),
|
190 |
+
]
|
191 |
+
)
|
192 |
+
]
|
193 |
+
meta = Metadata(iscc="ISCC1234567890", features=features)
|
194 |
+
assert meta.get_overlaps() == []
|