gzip-openhermes / config_gzipembed.py
crumb's picture
Upload model
a2aeb80
raw
history blame contribute delete
841 Bytes
from transformers import PretrainedConfig
from nltk.corpus import stopwords
from typing import List
import nltk
nltk.download('stopwords')
nltk.download('punkt')
class GZIPEmbeddingConfig(PretrainedConfig):
model_type = "gzipembed"
def __init__(
self,
normalize = True,
normalized_corpus = True,
reduction = False,
reduced_dimension = 0,
remove_stop_words = True,
stop_words = stopwords.words('english'),
corpus = [],
**kwargs,
):
self.corpus = corpus
self.normalize = normalize
self.normalized_corpus = normalized_corpus
self.reduction = reduction
self.reduced_dimension = reduced_dimension,
self.remove_stop_words = remove_stop_words
self.stop_words = stop_words
super().__init__(**kwargs)