titusz commited on
Commit
681cad2
1 Parent(s): 02c9cf7

Synced repo using 'sync_with_huggingface' Github Action

Browse files
CHANGELOG.md CHANGED
@@ -1,5 +1,7 @@
1
  # Changelog
2
 
 
 
3
  ## [0.1.2] - 2024-08-19
4
  - Encode granular features with base64
5
  - Refactor result format to generic ISCC data model
 
1
  # Changelog
2
 
3
+ ## [0.1.3] - Unrelease
4
+
5
  ## [0.1.2] - 2024-08-19
6
  - Encode granular features with base64
7
  - Refactor result format to generic ISCC data model
iscc_sct/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "0.1.2"
2
  from iscc_sct.options import *
3
  from iscc_sct.utils import *
4
  from iscc_sct.code_semantic_text import *
 
1
+ __version__ = "0.1.3"
2
  from iscc_sct.options import *
3
  from iscc_sct.utils import *
4
  from iscc_sct.code_semantic_text import *
iscc_sct/demo.py CHANGED
@@ -443,9 +443,8 @@ with gr.Blocks(css=custom_css, theme=iscc_theme) as demo:
443
  )
444
 
445
  with gr.Row(variant="panel"):
446
- with gr.Column(variant="panel"):
447
- gr.Markdown(
448
- """
449
  ## Understanding ISCC Semantic Text-Codes
450
 
451
  ### What is an ISCC Semantic Text-Code?
@@ -476,7 +475,11 @@ document-embedding.
476
  The similarity shown is calculated by comparing the ISCC codes, not the original texts. This
477
  allows for efficient and privacy-preserving comparisons, as only the codes need to be shared
478
  or stored.
 
 
479
 
 
 
480
  ### Why is this useful?
481
  - **Content creators**: Find similar content across languages.
482
  - **Researchers**: Quickly compare documents or find related texts in different languages.
@@ -490,20 +493,30 @@ language barriers!
490
  The "Explore Details & Advanced Options" section provides additional tools and information:
491
 
492
  1. **ISCC Bit-Length**: Adjust the precision of the ISCC code. Higher values provide more detailed
493
- comparisons but may be more sensitive to minor differences.
494
 
495
  2. **Max Tokens**: Set the maximum number of tokens per chunk. This affects how the text is split
496
- for processing.
497
 
498
  3. **Chunked Text**: View how each input text is divided into chunks for processing. Each chunk is
499
- color-coded and labeled with its size and simprint (a similarity preserving fingerprint).
500
 
501
  4. **Granular Matches**: See a detailed comparison of individual chunks between Text A and Text B.
502
- This table shows which specific parts of the texts are most similar, along with their approximate
503
- cosine similarity (scaled -100% to +100%).
 
 
 
 
 
 
504
  """
505
- )
506
-
 
 
 
 
507
 
508
  if __name__ == "__main__": # pragma: no cover
509
  demo.launch()
 
443
  )
444
 
445
  with gr.Row(variant="panel"):
446
+ gr.Markdown(
447
+ """
 
448
  ## Understanding ISCC Semantic Text-Codes
449
 
450
  ### What is an ISCC Semantic Text-Code?
 
475
  The similarity shown is calculated by comparing the ISCC codes, not the original texts. This
476
  allows for efficient and privacy-preserving comparisons, as only the codes need to be shared
477
  or stored.
478
+ """
479
+ )
480
 
481
+ gr.Markdown(
482
+ """
483
  ### Why is this useful?
484
  - **Content creators**: Find similar content across languages.
485
  - **Researchers**: Quickly compare documents or find related texts in different languages.
 
493
  The "Explore Details & Advanced Options" section provides additional tools and information:
494
 
495
  1. **ISCC Bit-Length**: Adjust the precision of the ISCC code. Higher values provide more detailed
496
+ comparisons but may be more sensitive to minor differences.
497
 
498
  2. **Max Tokens**: Set the maximum number of tokens per chunk. This affects how the text is split
499
+ for processing.
500
 
501
  3. **Chunked Text**: View how each input text is divided into chunks for processing. Each chunk is
502
+ color-coded and labeled with its size and simprint (a similarity preserving fingerprint).
503
 
504
  4. **Granular Matches**: See a detailed comparison of individual chunks between Text A and Text B.
505
+ This table shows which specific parts of the texts are most similar (above 80%), along with their
506
+ approximate cosine similarity (scaled -100% to +100%).
507
+
508
+ For more information about the **ISCC** see:
509
+ - https://github.com/iscc
510
+ - https://iscc.codes
511
+ - https://iscc.io
512
+ - [ISO 24138:2024](https://www.iso.org/standard/77899.html)
513
  """
514
+ )
515
+ with gr.Row():
516
+ gr.Markdown(
517
+ f"iscc-sct v{sct.__version__} | Source Code: https://github.com/iscc/iscc-sct",
518
+ elem_classes="footer",
519
+ )
520
 
521
  if __name__ == "__main__": # pragma: no cover
522
  demo.launch()
poetry.lock CHANGED
@@ -755,13 +755,13 @@ files = [
755
 
756
  [[package]]
757
  name = "importlib-metadata"
758
- version = "8.2.0"
759
  description = "Read metadata from Python packages"
760
  optional = false
761
  python-versions = ">=3.8"
762
  files = [
763
- {file = "importlib_metadata-8.2.0-py3-none-any.whl", hash = "sha256:11901fa0c2f97919b288679932bb64febaeacf289d18ac84dd68cb2e74213369"},
764
- {file = "importlib_metadata-8.2.0.tar.gz", hash = "sha256:72e8d4399996132204f9a16dcc751af254a48f8d1b20b9ff0f98d4a8f901e73d"},
765
  ]
766
 
767
  [package.dependencies]
 
755
 
756
  [[package]]
757
  name = "importlib-metadata"
758
+ version = "8.3.0"
759
  description = "Read metadata from Python packages"
760
  optional = false
761
  python-versions = ">=3.8"
762
  files = [
763
+ {file = "importlib_metadata-8.3.0-py3-none-any.whl", hash = "sha256:42817a4a0be5845d22c6e212db66a94ad261e2318d80b3e0d363894a79df2b67"},
764
+ {file = "importlib_metadata-8.3.0.tar.gz", hash = "sha256:9c8fa6e8ea0f9516ad5c8db9246a731c948193c7754d3babb0114a05b27dd364"},
765
  ]
766
 
767
  [package.dependencies]
pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
  [tool.poetry]
2
  name = "iscc-sct"
3
- version = "0.1.2"
4
  description = "ISCC - Semantic Code Text"
5
  authors = ["Titusz <[email protected]>"]
6
  license = "CC-BY-NC-SA-4.0"
 
1
  [tool.poetry]
2
  name = "iscc-sct"
3
+ version = "0.1.3"
4
  description = "ISCC - Semantic Code Text"
5
  authors = ["Titusz <[email protected]>"]
6
  license = "CC-BY-NC-SA-4.0"
tests/test_iscc_sct.py CHANGED
@@ -31,7 +31,7 @@ be matched based on lexical similarity.
31
 
32
 
33
  def test_version():
34
- assert sct.__version__ == "0.1.2"
35
 
36
 
37
  def test_code_text_semantic_default():
 
31
 
32
 
33
  def test_version():
34
+ assert sct.__version__ == "0.1.3"
35
 
36
 
37
  def test_code_text_semantic_default():