zero-shot-turkish

Build error

App Files Files Community

memojja

emrecan commited on Mar 26, 2023

Commit

7278f27

•

0 Parent(s):

Duplicate from emrecan/zero-shot-turkish

Browse files

Co-authored-by: Emrecan Çelik <[email protected]>

Files changed (6) hide show

.gitattributes +27 -0
.gitignore +2 -0
README.md +39 -0
app.py +135 -0
models.py +26 -0
requirements.txt +215 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,27 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bin.* filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zstandard filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ venv
2	+ __pycache__

README.md ADDED Viewed

	@@ -0,0 +1,39 @@

+---
+title: Zero-shot Turkish
+emoji: 🐨
+colorFrom: yellow
+colorTo: red
+sdk: streamlit
+sdk_version: 1.2.0
+app_file: app.py
+pinned: false
+duplicated_from: emrecan/zero-shot-turkish
+---
+# Configuration
+`title`: _string_
+Display title for the Space
+`emoji`: _string_
+Space emoji (emoji-only character allowed)
+`colorFrom`: _string_
+Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
+`colorTo`: _string_
+Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
+`sdk`: _string_
+Can be either `gradio` or `streamlit`
+`sdk_version` : _string_
+Only applicable for `streamlit` SDK.
+See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
+`app_file`: _string_
+Path to your main application file (which contains either `gradio` or `streamlit` Python code).
+Path is relative to the root of the repository.
+`pinned`: _boolean_
+Whether the Space stays on top of your list.

app.py ADDED Viewed

	@@ -0,0 +1,135 @@

+from __future__ import annotations
+import psutil
+import pandas as pd
+import streamlit as st
+import plotly.express as px
+from models import NLI_MODEL_OPTIONS, NSP_MODEL_OPTIONS, METHOD_OPTIONS
+from zeroshot_classification.classifiers import NSPZeroshotClassifier, NLIZeroshotClassifier
+print(f"Total mem: {psutil.virtual_memory().total}")
+def init_state(key: str):
+    if key not in st.session_state:
+        st.session_state[key] = None
+for k in [
+    "current_model",
+    "current_model_option",
+    "current_method_option",
+    "current_prediction",
+    "current_chart",
+]:
+    init_state(k)
+def load_model(model_option: str, method_option: str, random_state: int = 0):
+    with st.spinner("Loading selected model..."):
+        if method_option == "Natural Language Inference":
+            st.session_state.current_model = NLIZeroshotClassifier(
+                model_name=model_option, random_state=random_state
+            )
+        else:
+            st.session_state.current_model = NSPZeroshotClassifier(
+                model_name=model_option, random_state=random_state
+            )
+        st.success("Model loaded!")
+def visualize_output(labels: list[str], probabilities: list[float]):
+    data = pd.DataFrame({"labels": labels, "probability": probabilities}).sort_values(
+        by="probability", ascending=False
+    )
+    chart = px.bar(
+        data,
+        x="probability",
+        y="labels",
+        color="labels",
+        orientation="h",
+        height=290,
+        width=500,
+    ).update_layout(
+        {
+            "xaxis": {"title": "probability", "visible": True, "showticklabels": True},
+            "yaxis": {"title": None, "visible": True, "showticklabels": True},
+            "margin": dict(
+                l=10,  # left
+                r=10,  # right
+                t=50,  # top
+                b=10,  # bottom
+            ),
+            "showlegend": False,
+        }
+    )
+    return chart
+st.title("Zero-shot Turkish Text Classification")
+method_option = st.radio(
+    "Select a zero-shot classification method.",
+    [
+        METHOD_OPTIONS["nli"],
+        METHOD_OPTIONS["nsp"],
+    ],
+)
+if method_option == METHOD_OPTIONS["nli"]:
+    model_option = st.selectbox(
+        "Select a natural language inference model.", NLI_MODEL_OPTIONS, index=3
+    )
+if method_option == METHOD_OPTIONS["nsp"]:
+    model_option = st.selectbox(
+        "Select a BERT model for next sentence prediction.", NSP_MODEL_OPTIONS, index=0
+    )
+if model_option != st.session_state.current_model_option:
+    st.session_state.current_model_option = model_option
+    st.session_state.current_method_option = method_option
+    load_model(
+        st.session_state.current_model_option, st.session_state.current_method_option
+    )
+st.header("Configure prompts and labels")
+col1, col2 = st.columns(2)
+col1.subheader("Candidate labels")
+labels = col1.text_area(
+    label="These are the labels that the model will try to predict for the given text input. Your input labels should be comma separated and meaningful.",
+    value="spor,dünya,siyaset,ekonomi,sanat",
+    key="current_labels",
+)
+col1.header("Make predictions")
+text = col1.text_area(
+    "Enter a sentence or a paragraph to classify.",
+    value="Ian Anderson, Jethro Tull konserinde yan flüt çalarak zeybek oynadı.",
+    key="current_text",
+)
+col2.subheader("Prompt template")
+prompt_template = col2.text_area(
+    label="Prompt template is used to transform NLI and NSP tasks into a general-use zero-shot classifier. Models replace {} with the labels that you have given.",
+    value="Bu metin {} kategorisine aittir",
+    key="current_template",
+)
+col2.header("")
+make_pred = col1.button("Predict")
+if make_pred:
+    st.session_state.current_prediction = (
+        st.session_state.current_model.predict_on_texts(
+            [st.session_state.current_text],
+            candidate_labels=st.session_state.current_labels.split(","),
+            prompt_template=st.session_state.current_template,
+        )
+    )
+    if "scores" in st.session_state.current_prediction[0]:
+        st.session_state.current_chart = visualize_output(
+            st.session_state.current_prediction[0]["labels"],
+            st.session_state.current_prediction[0]["scores"],
+        )
+    elif "probabilities" in st.session_state.current_prediction[0]:
+        st.session_state.current_chart = visualize_output(
+            st.session_state.current_prediction[0]["labels"],
+            st.session_state.current_prediction[0]["probabilities"],
+        )
+    col2.plotly_chart(st.session_state.current_chart, use_container_width=True)

models.py ADDED Viewed

	@@ -0,0 +1,26 @@

+METHOD_OPTIONS = {
+    "nli": "Natural Language Inference",
+    "nsp": "Next Sentence Prediction",
+}
+NLI_MODEL_OPTIONS = [
+    "emrecan/distilbert-base-turkish-cased-allnli_tr",
+    "emrecan/distilbert-base-turkish-cased-multinli_tr",
+    "emrecan/distilbert-base-turkish-cased-snli_tr",
+    "emrecan/bert-base-turkish-cased-allnli_tr",
+    "emrecan/bert-base-turkish-cased-multinli_tr",
+    "emrecan/bert-base-turkish-cased-snli_tr",
+    "emrecan/convbert-base-turkish-mc4-cased-allnli_tr",
+    "emrecan/convbert-base-turkish-mc4-cased-multinli_tr",
+    "emrecan/convbert-base-turkish-mc4-cased-snli_tr",
+    "emrecan/bert-base-multilingual-cased-allnli_tr",
+    "emrecan/bert-base-multilingual-cased-multinli_tr",
+    "emrecan/bert-base-multilingual-cased-snli_tr",
+]
+NSP_MODEL_OPTIONS = [
+    "dbmdz/bert-base-turkish-cased",
+    "dbmdz/bert-base-turkish-uncased",
+    "dbmdz/bert-base-turkish-128k-cased",
+    "dbmdz/bert-base-turkish-128k-uncased",
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,215 @@

+git+https://github.com/emres/turkish-deasciifier.git
+git+https://github.com/emrecncelik/zeroshot-turkish.git
+absl-py==1.0.0
+aiohttp==3.8.0
+aiosignal==1.2.0
+altair==4.1.0
+APScheduler==3.8.1
+argon2-cffi==21.3.0
+argon2-cffi-bindings==21.2.0
+astor==0.8.1
+astunparse==1.6.3
+async-timeout==4.0.1
+attrs==21.2.0
+backcall==0.2.0
+backports.zoneinfo==0.2.1
+base58==2.1.1
+beautifulsoup4==4.11.1
+black==21.10b0
+bleach==5.0.0
+blinker==1.4
+blis==0.7.5
+Brotli==1.0.9
+cachetools==4.2.4
+catalogue==2.0.6
+certifi==2021.10.8
+cffi==1.15.0
+charset-normalizer==2.0.7
+click
+codecarbon==1.2.0
+commonmark==0.9.1
+configparser==5.1.0
+cycler==0.11.0
+cymem==2.0.6
+cytoolz==0.11.2
+dash==2.0.0
+dash-bootstrap-components==1.0.0
+dash-core-components==2.0.0
+dash-html-components==2.0.0
+dash-table==5.0.0
+datasets==2.3.2
+debugpy==1.5.1
+decorator==5.1.0
+defusedxml==0.7.1
+dill==0.3.4
+docker-pycreds==0.4.0
+entrypoints==0.3
+et-xmlfile==1.1.0
+fastjsonschema==2.15.3
+fasttext==0.9.2
+filelock==3.3.2
+fire==0.4.0
+Flask==2.0.2
+Flask-Compress==1.10.1
+flatbuffers==2.0
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.0
+gast==0.4.0
+gitdb==4.0.9
+GitPython==3.1.24
+google-auth==2.3.3
+google-auth-oauthlib==0.4.6
+google-pasta==0.2.0
+grpcio==1.41.1
+h5py==3.5.0
+huggingface-hub==0.1.2
+idna==3.3
+importlib-metadata==4.12.0
+importlib-resources==5.7.1
+ipykernel==6.6.0
+ipython==7.30.1
+ipython-genutils==0.2.0
+ipywidgets==7.6.5
+itsdangerous==2.0.1
+jedi==0.18.1
+jellyfish==0.8.9
+Jinja2==3.0.3
+joblib==1.1.0
+jsonschema==4.5.1
+jupyter-client==7.1.0
+jupyter-core==4.9.1
+jupyterlab-pygments==0.2.2
+jupyterlab-widgets==1.0.2
+keras==2.7.0
+Keras-Preprocessing==1.1.2
+kiwisolver==1.3.2
+langcodes==3.3.0
+libclang==12.0.0
+loguru==0.6.0
+lxml==4.6.5
+Markdown==3.3.4
+MarkupSafe==2.0.1
+matplotlib==3.5.1
+matplotlib-inline==0.1.3
+mistune==0.8.4
+multidict==5.2.0
+multiprocess==0.70.12.2
+murmurhash==1.0.6
+mypy-extensions==0.4.3
+nbclient==0.6.3
+nbconvert==6.5.0
+nbformat==5.4.0
+nest-asyncio==1.5.4
+networkx==2.6.3
+nltk==3.6.7
+notebook==6.4.11
+numpy==1.21.4
+oauthlib==3.1.1
+openpyxl==3.0.9
+opt-einsum==3.3.0
+packaging==21.2
+pandas==1.4.2
+pandocfilters==1.5.0
+parso==0.8.3
+pathspec==0.9.0
+pathtools==0.1.2
+pathy==0.6.1
+pexpect==4.8.0
+pickleshare==0.7.5
+Pillow==8.4.0
+platformdirs==2.4.0
+plotly==5.4.0
+preshed==3.0.6
+prometheus-client==0.14.1
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.8.0
+ptyprocess==0.7.0
+py-cpuinfo==8.0.0
+pyarrow==6.0.0
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pybind11==2.9.2
+pycparser==2.21
+pydantic==1.8.2
+pydeck==0.7.1
+Pygments==2.10.0
+Pympler==0.9
+pynvml==11.0.0
+pyparsing==2.4.7
+pyphen==0.11.0
+pyrsistent==0.18.1
+python-dateutil==2.8.2
+pytz==2021.3
+pytz-deprecation-shim==0.1.0.post0
+PyYAML==6.0
+pyzmq==22.3.0
+regex==2021.11.10
+requests==2.26.0
+requests-oauthlib==1.3.0
+responses==0.18.0
+rich==12.4.4
+rsa==4.7.2
+sacremoses==0.0.46
+scikit-learn==1.0.1
+scipy==1.7.2
+semver==2.13.0
+Send2Trash==1.8.0
+sentencepiece==0.1.96
+sentry-sdk==1.4.3
+setuptools-scm==6.3.2
+shortuuid==1.0.8
+six==1.16.0
+sklearn==0.0
+smart-open==5.2.1
+smmap==5.0.0
+soupsieve==2.3.2.post1
+spacy==3.2.1
+spacy-legacy==3.0.8
+spacy-loggers==1.0.1
+srsly==2.4.2
+streamlit==1.2.0
+subprocess32==3.5.4
+tenacity==8.0.1
+tensorboard==2.7.0
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorflow==2.7.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.22.0
+termcolor==1.1.0
+terminado==0.15.0
+testpath==0.5.0
+textacy==0.12.0
+thinc==8.0.13
+threadpoolctl==3.0.0
+tinycss2==1.1.1
+tokenizers==0.12.1
+toml==0.10.2
+tomli==1.2.2
+toolz==0.11.2
+torch==1.11.0
+tornado==6.1
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.20.0
+typer==0.4.0
+typing-extensions
+tzdata==2021.5
+tzlocal==4.1
+urllib3==1.26.7
+validators==0.18.2
+wandb==0.12.6
+wasabi==0.9.0
+watchdog==2.1.6
+wcwidth==0.2.5
+webencodings==0.5.1
+Werkzeug==2.0.2
+widgetsnbextension==3.5.2
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.8.0