# Model settings device = "cuda" model_name = "openbmb/MiniCPM-o-2_6" # Decoding settings sampling = True stream = True repetition_penalty = 1.05