--- license: apache-2.0 datasets: - OpenAssistant/oasst2 language: - bg - ca - cs - da - de - en - es - fr - hr - hu - it - nl - pl - pt - ro - ru - sl - sr - sv - uk library_name: transformers widget: - text: | system You are a helpful AI assistant. user What is the meaning of life in the current time? model --- ![image/png](https://cdn-uploads.huggingface.co/production/uploads/641b435ba5f876fe30c5ae0a/YXqUXFjX8uIJT-mdOnM1h.png) ``` reference data model: datasets: - lang: "bg,ca,cs,da,de,en,es,fr,hr,hu,it,nl,pl,pt,ro,ru,sl,sr,sv,uk" link: https://huggingface.co/datasets/NickyNicky/oasst2_clusters model: - google/gemma-2b-it Link: https://huggingface.co/google/gemma-2b-it Epoch: 7 future experts: Cluster_3 Eval model: - link: soon ``` ## ```Python !python -m pip install --upgrade pip !pip install "torch>=2.1.1" -U !pip install torchaudio==2.2.0 !pip install -q datasets trl peft bitsandbytes sentencepiece wandb !pip install -q accelerate safetensors deepspeed !pip install -q scipy ninja -U !pip install -q -U transformers==4.38.0 ``` ## Version ```py import torch torch.__version__ #OUTPUTS: ('2.2.0+cu121' ) ``` ## How to use ```py from transformers import ( AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, TrainingArguments, pipeline, logging, GenerationConfig, TextIteratorStreamer, ) from transformers import StoppingCriteria, StoppingCriteriaList import torch model_id='NickyNicky/gemma-2b-it_oasst2_chatML_Cluster_3_V1' model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True, torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", # load_in_4bit=True, # low_cpu_mem_usage= True, ) max_length=2055 print("max_length",max_length) tokenizer = AutoTokenizer.from_pretrained(model_id, # use_fast = False, max_length=max_length,) class ListOfTokensStoppingCriteria(StoppingCriteria): """ Clase para definir un criterio de parada basado en una lista de tokens específicos. """ def __init__(self, tokenizer, stop_tokens): self.tokenizer = tokenizer # Codifica cada token de parada y guarda sus IDs en una lista self.stop_token_ids_list = [tokenizer.encode(stop_token, add_special_tokens=False) for stop_token in stop_tokens] def __call__(self, input_ids, scores, **kwargs): # Verifica si los últimos tokens generados coinciden con alguno de los conjuntos de tokens de parada for stop_token_ids in self.stop_token_ids_list: len_stop_tokens = len(stop_token_ids) if len(input_ids[0]) >= len_stop_tokens: if input_ids[0, -len_stop_tokens:].tolist() == stop_token_ids: return True return False # Uso del criterio de parada personalizado stop_tokens = [""] # Lista de tokens de parada # Inicializa tu criterio de parada con el tokenizer y la lista de tokens de parada stopping_criteria = ListOfTokensStoppingCriteria(tokenizer, stop_tokens) # Añade tu criterio de parada a una StoppingCriteriaList stopping_criteria_list = StoppingCriteriaList([stopping_criteria]) #EXAMPLE #1 txt="""system You are a helpful AI assistant. user Me dices los diferentes tipos de reciclaje que suelen existir en las ciudades europeas model """ #EXAMPLE #2 txt="""system You are a helpful AI assistant. user What is the meaning of life in the current time? model """ inputs = tokenizer.encode(txt, return_tensors="pt").to("cuda") max_new_tokens=1000 generation_config = GenerationConfig( max_new_tokens=max_new_tokens, temperature=0.55, #top_p=0.9, #top_k=len_tokens, repetition_penalty=1.1, do_sample=True, ) outputs = model.generate(generation_config=generation_config, input_ids=inputs, stopping_criteria=stopping_criteria_list,) tokenizer.decode(outputs[0], skip_special_tokens=False) #True ```