|
import pytest |
|
from pydantic import ValidationError |
|
from iscc_sct.models import Metadata, Feature, FeatureSet |
|
import iscc_sct as sct |
|
|
|
|
|
def test_feature_initialization(): |
|
|
|
with pytest.raises(ValidationError): |
|
Feature() |
|
feature = Feature(simprint="XZjeSfdyVi0") |
|
assert feature.simprint == "XZjeSfdyVi0" |
|
assert feature.offset is None |
|
assert feature.content is None |
|
|
|
|
|
feature = Feature(simprint="feature", offset=5, content="example text") |
|
assert feature.simprint == "feature" |
|
assert feature.offset == 5 |
|
assert feature.content == "example text" |
|
|
|
|
|
def test_feature_set_initialization(): |
|
fs = FeatureSet() |
|
assert fs.model_dump(exclude_none=True) == { |
|
"maintype": "semantic", |
|
"subtype": "text", |
|
"version": 0, |
|
} |
|
|
|
|
|
def test_sct_meta_initialization(): |
|
|
|
meta = Metadata(iscc="ISCC1234567890") |
|
assert meta.iscc == "ISCC1234567890" |
|
assert meta.characters is None |
|
assert meta.features is None |
|
|
|
|
|
features = [ |
|
FeatureSet( |
|
simprints=[Feature(simprint="feature1", offset=0, content="text1")], |
|
embedding=[0.1, 0.2], |
|
) |
|
] |
|
meta = Metadata(iscc="ISCC1234567890", characters=1000, features=features) |
|
assert meta.iscc == "ISCC1234567890" |
|
assert meta.characters == 1000 |
|
assert meta.features == features |
|
assert meta.features[0].embedding == [0.1, 0.2] |
|
|
|
|
|
def test_metadata_to_index_format(): |
|
|
|
features = [ |
|
FeatureSet( |
|
simprints=[ |
|
Feature(simprint="feature1", offset=0, size=5, content="text1"), |
|
Feature(simprint="feature2", offset=5, size=5, content="text2"), |
|
] |
|
) |
|
] |
|
meta = Metadata(iscc="ISCC1234567890", features=features) |
|
index_meta = meta.to_index_format() |
|
assert isinstance(index_meta.features[0].simprints[0], str) |
|
assert index_meta.features[0].simprints == ["feature1", "feature2"] |
|
assert index_meta.features[0].offsets == [0, 5] |
|
assert index_meta.features[0].sizes == [5, 5] |
|
assert index_meta.features[0].contents == ["text1", "text2"] |
|
|
|
|
|
index_meta2 = index_meta.to_index_format() |
|
assert index_meta2.model_dump() == index_meta.model_dump() |
|
|
|
|
|
def test_metadata_to_object_format(): |
|
|
|
features = [ |
|
FeatureSet( |
|
simprints=["feature1", "feature2"], |
|
offsets=[0, 5], |
|
sizes=[5, 5], |
|
contents=["text1", "text2"], |
|
) |
|
] |
|
meta = Metadata(iscc="ISCC1234567890", features=features) |
|
object_meta = meta.to_object_format() |
|
assert isinstance(object_meta.features[0].simprints[0], Feature) |
|
assert object_meta.features[0].simprints[0].simprint == "feature1" |
|
assert object_meta.features[0].simprints[0].offset == 0 |
|
assert object_meta.features[0].simprints[0].size == 5 |
|
assert object_meta.features[0].simprints[0].content == "text1" |
|
assert object_meta.features[0].offsets is None |
|
assert object_meta.features[0].sizes is None |
|
assert object_meta.features[0].contents is None |
|
|
|
|
|
object_meta2 = object_meta.to_object_format() |
|
assert object_meta2.model_dump() == object_meta.model_dump() |
|
|
|
|
|
def test_metadata_to_index_format_with_none_simprints(): |
|
|
|
features = [FeatureSet(simprints=None, embedding=[0.1, 0.2])] |
|
meta = Metadata(iscc="ISCC1234567890", features=features) |
|
index_meta = meta.to_index_format() |
|
assert index_meta.features[0].simprints is None |
|
assert index_meta.features[0].embedding == [0.1, 0.2] |
|
assert index_meta.model_dump() == meta.model_dump() |
|
|
|
|
|
def test_metadata_format_conversion_with_no_features(): |
|
meta = Metadata(iscc="ISCC1234567890") |
|
index_meta = meta.to_index_format() |
|
object_meta = meta.to_object_format() |
|
assert index_meta.model_dump() == meta.model_dump() |
|
assert object_meta.model_dump() == meta.model_dump() |
|
|
|
|
|
def test_metadata_get_content(text_en): |
|
iscc = sct.create(text_en, granular=True) |
|
assert iscc.get_content() == text_en |
|
|
|
|
|
def test_metadata_get_content_no_fetures(): |
|
meta = Metadata(iscc="ISCC1234567890") |
|
assert meta.get_content() is None |
|
|
|
|
|
def test_metadata_get_content_index_format(): |
|
meta = sct.create("Hello World", granular=True).to_index_format() |
|
assert meta.get_content() == "Hello World" |
|
|
|
|
|
def test_metadata_get_content_no_content(): |
|
meta = sct.create("Hello World", granular=True, contents=False) |
|
assert meta.get_content() is None |
|
|
|
|
|
def test_metadata_get_overlaps(): |
|
|
|
meta = Metadata(iscc="ISCC1234567890") |
|
assert meta.get_overlaps() == [] |
|
|
|
|
|
meta = Metadata(iscc="ISCC1234567890", features=[FeatureSet()]) |
|
assert meta.get_overlaps() == [] |
|
|
|
|
|
features = [ |
|
FeatureSet( |
|
simprints=[ |
|
Feature(simprint="feature1", offset=0, content="Hello"), |
|
Feature(simprint="feature2", offset=5, content="World"), |
|
] |
|
) |
|
] |
|
meta = Metadata(iscc="ISCC1234567890", features=features) |
|
assert meta.get_overlaps() == [""] |
|
|
|
|
|
features = [ |
|
FeatureSet( |
|
simprints=[ |
|
Feature(simprint="feature1", offset=0, content="Hello W"), |
|
Feature(simprint="feature2", offset=5, content="World"), |
|
] |
|
) |
|
] |
|
meta = Metadata(iscc="ISCC1234567890", features=features) |
|
assert meta.get_overlaps() == [" W"] |
|
|
|
|
|
features = [ |
|
FeatureSet( |
|
simprints=[ |
|
Feature(simprint="feature1", offset=0, content="Hello W"), |
|
Feature(simprint="feature2", offset=5, content="World!"), |
|
Feature(simprint="feature3", offset=10, content="! How are you?"), |
|
] |
|
) |
|
] |
|
meta = Metadata(iscc="ISCC1234567890", features=features) |
|
assert meta.get_overlaps() == [" W", "!"] |
|
|
|
|
|
features = [ |
|
FeatureSet( |
|
simprints=["feature1", "feature2", "feature3"], |
|
offsets=[0, 5, 10], |
|
contents=["Hello W", "World!", "! How are you?"], |
|
) |
|
] |
|
meta = Metadata(iscc="ISCC1234567890", features=features) |
|
assert meta.get_overlaps() == [" W", "!"] |
|
|
|
|
|
features = [ |
|
FeatureSet( |
|
simprints=[ |
|
Feature(simprint="feature1", offset=0, content="Hello"), |
|
Feature(simprint="feature2", content="World"), |
|
Feature(simprint="feature3", offset=10), |
|
] |
|
) |
|
] |
|
meta = Metadata(iscc="ISCC1234567890", features=features) |
|
assert meta.get_overlaps() == [] |
|
|