File size: 2,433 Bytes
b31f748 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
from dotenv import load_dotenv
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
__all__ = [
"SctOptions",
"sct_opts",
]
load_dotenv()
class SctOptions(BaseSettings):
bits: int = Field(
64,
description="ISCC_SCT_BITS - Default bit-length of generated Semantic Text-Code in bits",
ge=32,
le=256,
multiple_of=32,
)
bits_granular: int = Field(
64,
description="ISCC_SCT_BITS_GRANULAR - Default bit-length of granular features",
ge=32,
le=256,
multiple_of=32,
)
characters: bool = Field(True, description="ISCC_SCT_CHARACTERS - Include document character count")
embedding: bool = Field(False, description="ISCC_SCT_EMBEDDING - Include global document embedding")
precision: int = Field(8, description="ISCC_SCT_PRECISION - Max fractional digits for embeddings (default 8)")
simprints: bool = Field(False, description="ISCC_SCT_SIMPRINTS - Include granular feature simprints")
offsets: bool = Field(False, description="ISCC_SCT_OFFSETS - Include offsets of granular features")
sizes: bool = Field(False, description="ISCC_SCT_SIZES - Include sizes of granular features (number of chars)")
contents: bool = Field(False, description="ISCC_SCT_CONTENTS - Include granular text chunks")
max_tokens: int = Field(
127,
description="ISCC_SCT_MAX_TOKENS - Max tokens per chunk (Default 127)",
le=127,
)
overlap: int = Field(
48,
description="ISCC_SCT_OVERLAP - Max tokens allowed to overlap between chunks (Default 48)",
)
trim: bool = Field(False, description="ISCC_SCT_TRIM - Trim whitespace from chunks (Default False)")
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
env_prefix="ISCC_SCT_",
extra="ignore",
validate_assignment=True,
)
def override(self, update=None):
# type: (dict|None) -> SctOptions
"""Returns an updated and validated deep copy of the current settings instance."""
update = update or {} # sets {} if update is None
opts = self.model_copy(deep=True)
# We need update fields individually so validation gets triggered
for field, value in update.items():
setattr(opts, field, value)
return opts
sct_opts = SctOptions()
|