Spaces:
Runtime error
Runtime error
import json | |
import random | |
import requests | |
from mtranslate import translate | |
import streamlit as st | |
LOGO = "https://raw.githubusercontent.com/nlp-en-es/assets/main/logo.png" | |
MODELS = { | |
"Model trained on OSCAR": { | |
"url": "/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fmodels%2Fflax-community%2Fgpt-2-spanish%26quot%3B%3C%2Fspan%3E%3C!-- HTML_TAG_END --> | |
}, | |
"Model trained on the Large Spanish Corpus": { | |
"url": "/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fmodels%2Fmrm8488%2Fspanish-gpt2%26quot%3B%3C%2Fspan%3E%3C!-- HTML_TAG_END --> | |
}, | |
} | |
PROMPT_LIST = { | |
"Érase una vez...": ["Érase una vez "], | |
"¡Hola!": ["¡Hola! Me llamo "], | |
"¿Ser o no ser?": ["En mi opinión, 'ser' es "], | |
} | |
def query(payload, model_name): | |
data = json.dumps(payload) | |
print("model url:", MODELS[model_name]["url"]) | |
response = requests.request( | |
"POST", MODELS[model_name]["url"], headers={}, data=data | |
) | |
return json.loads(response.content.decode("utf-8")) | |
def process( | |
text: str, model_name: str, max_len: int, temp: float, top_k: int, top_p: float | |
): | |
payload = { | |
"inputs": text, | |
"parameters": { | |
"max_new_tokens": max_len, | |
"top_k": top_k, | |
"top_p": top_p, | |
"temperature": temp, | |
"repetition_penalty": 2.0, | |
}, | |
"options": { | |
"use_cache": True, | |
}, | |
} | |
return query(payload, model_name) | |
# Page | |
st.set_page_config(page_title="Spanish GPT-2 Demo", page_icon=LOGO) | |
st.title("Spanish GPT-2") | |
# Sidebar | |
st.sidebar.image(LOGO) | |
st.sidebar.subheader("Configurable parameters") | |
max_len = st.sidebar.number_input( | |
"Maximum length", | |
value=100, | |
help="The maximum length of the sequence to be generated.", | |
) | |
temp = st.sidebar.slider( | |
"Temperature", | |
value=1.0, | |
min_value=0.1, | |
max_value=100.0, | |
help="The value used to module the next token probabilities.", | |
) | |
top_k = st.sidebar.number_input( | |
"Top k", | |
value=10, | |
help="The number of highest probability vocabulary tokens to keep for top-k-filtering.", | |
) | |
top_p = st.sidebar.number_input( | |
"Top p", | |
value=0.95, | |
help=" If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.", | |
) | |
do_sample = st.sidebar.selectbox( | |
"Sampling?", | |
(True, False), | |
help="Whether or not to use sampling; use greedy decoding otherwise.", | |
) | |
# Body | |
st.markdown( | |
""" | |
Spanish GPT-2 models trained from scratch on two different datasets. One | |
model is trained on the Spanish portion of | |
[OSCAR](https://huggingface.co/datasets/viewer/?dataset=oscar) | |
and the other on the | |
[large_spanish_corpus](https://huggingface.co/datasets/viewer/?dataset=large_spanish_corpus) | |
aka BETO's corpus. | |
The models are trained with Flax and using TPUs sponsored by Google since this is part of the | |
[Flax/Jax Community Week](/static-proxy?url=https%3A%2F%2Fdiscuss.huggingface.co%2Ft%2Fopen-to-the-community-community-week-using-jax-flax-for-nlp-cv%2F7104)%3C%2Fspan%3E%3C!-- HTML_TAG_END --> | |
organised by HuggingFace. | |
""" | |
) | |
model_name = st.selectbox("Model", (list(MODELS.keys()))) | |
ALL_PROMPTS = list(PROMPT_LIST.keys()) + ["Custom"] | |
prompt = st.selectbox("Prompt", ALL_PROMPTS, index=len(ALL_PROMPTS) - 1) | |
if prompt == "Custom": | |
prompt_box = "Enter your text here" | |
else: | |
prompt_box = random.choice(PROMPT_LIST[prompt]) | |
text = st.text_area("Enter text", prompt_box) | |
if st.button("Run"): | |
with st.spinner(text="Getting results..."): | |
st.subheader("Result") | |
print(f"maxlen:{max_len}, temp:{temp}, top_k:{top_k}, top_p:{top_p}") | |
result = process( | |
text=text, | |
model_name=model_name, | |
max_len=int(max_len), | |
temp=temp, | |
top_k=int(top_k), | |
top_p=float(top_p), | |
) | |
print("result:", result) | |
if "error" in result: | |
if type(result["error"]) is str: | |
st.write(f'{result["error"]}.', end=" ") | |
if "estimated_time" in result: | |
st.write( | |
f'Please try again in about {result["estimated_time"]:.0f} seconds.' | |
) | |
else: | |
if type(result["error"]) is list: | |
for error in result["error"]: | |
st.write(f"{error}") | |
else: | |
result = result[0]["generated_text"] | |
st.write(result.replace("\n", " \n")) | |
st.text("English translation") | |
st.write(translate(result, "en", "es").replace("\n", " \n")) | |
st.markdown( | |
""" | |
### Team members | |
- Manuel Romero ([mrm8488](https://huggingface.co/mrm8488)) | |
- María Grandury ([mariagrandury](https://huggingface.co/mariagrandury)) | |
- Pablo González de Prado ([Pablogps](https://huggingface.co/Pablogps)) | |
- Daniel Vera ([daveni](https://huggingface.co/daveni)) | |
- Sri Lakshmi ([srisweet](https://huggingface.co/srisweet)) | |
- José Posada ([jdposa](https://huggingface.co/jdposa)) | |
- Santiago Hincapie ([shpotes](https://huggingface.co/shpotes)) | |
- Jorge ([jorgealro](https://huggingface.co/jorgealro)) | |
### More information | |
You can find more information about these models in their cards: | |
- [Model trained on OSCAR](https://huggingface.co/models/flax-community/gpt-2-spanish) | |
- [Model trained on the Large Spanish Corpus](https://huggingface.co/mrm8488/spanish-gpt2) | |
""" | |
) | |