|
from dotenv import load_dotenv |
|
from pydantic import Field |
|
from pydantic_settings import BaseSettings, SettingsConfigDict |
|
|
|
|
|
__all__ = [ |
|
"SctOptions", |
|
"sct_opts", |
|
] |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
class SctOptions(BaseSettings): |
|
bits: int = Field( |
|
64, |
|
description="ISCC_SCT_BITS - Default bit-length of generated Semantic Text-Code in bits", |
|
ge=32, |
|
le=256, |
|
multiple_of=32, |
|
) |
|
|
|
bits_granular: int = Field( |
|
64, |
|
description="ISCC_SCT_BITS_GRANULAR - Default bit-length of granular features", |
|
ge=32, |
|
le=256, |
|
multiple_of=32, |
|
) |
|
|
|
characters: bool = Field( |
|
True, description="ISCC_SCT_CHARACTERS - Include document character count" |
|
) |
|
embedding: bool = Field( |
|
False, description="ISCC_SCT_EMBEDDING - Include global document embedding" |
|
) |
|
|
|
precision: int = Field( |
|
8, description="ISCC_SCT_PRECISION - Max fractional digits for embeddings (default 8)" |
|
) |
|
|
|
simprints: bool = Field( |
|
False, description="ISCC_SCT_SIMPRINTS - Include granular feature simprints" |
|
) |
|
offsets: bool = Field( |
|
False, description="ISCC_SCT_OFFSETS - Include offsets of granular features" |
|
) |
|
|
|
sizes: bool = Field( |
|
False, description="ISCC_SCT_SIZES - Include sizes of granular features (number of chars)" |
|
) |
|
|
|
contents: bool = Field(False, description="ISCC_SCT_CONTENTS - Include granular text chunks") |
|
|
|
max_tokens: int = Field( |
|
127, |
|
description="ISCC_SCT_MAX_TOKENS - Max tokens per chunk (Default 127)", |
|
le=127, |
|
) |
|
|
|
overlap: int = Field( |
|
48, |
|
description="ISCC_SCT_OVERLAP - Max tokens allowed to overlap between chunks (Default 48)", |
|
) |
|
|
|
trim: bool = Field( |
|
False, description="ISCC_SCT_TRIM - Trim whitespace from chunks (Default False)" |
|
) |
|
|
|
model_config = SettingsConfigDict( |
|
env_file=".env", |
|
env_file_encoding="utf-8", |
|
env_prefix="ISCC_SCT_", |
|
extra="ignore", |
|
validate_assignment=True, |
|
) |
|
|
|
def override(self, update=None): |
|
|
|
"""Returns an updated and validated deep copy of the current settings instance.""" |
|
|
|
update = update or {} |
|
|
|
opts = self.model_copy(deep=True) |
|
|
|
for field, value in update.items(): |
|
setattr(opts, field, value) |
|
return opts |
|
|
|
|
|
sct_opts = SctOptions() |
|
|