Spaces:
Build error
Build error
cetinca
commited on
Commit
·
46f5320
verified
·
0
Parent(s):
Initial commit
Browse files- .gitattributes +34 -0
- .gitignore +163 -0
- .gitlab-ci.yml +6 -0
- Dockerfile +22 -0
- README.md +11 -0
- app.py +47 -0
- data/test_data_text2int.csv +43 -0
- modules/__init__.py +0 -0
- modules/sentiment.py +8 -0
- modules/text2int.py +192 -0
- requirements.txt +11 -0
- scripts/__init__.py +0 -0
- scripts/api_scaling.py +96 -0
- scripts/api_scaling.sh +83 -0
- scripts/make_request.py +14 -0
- scripts/make_request.sh +52 -0
- scripts/plot_calls.py +116 -0
- static/styles.css +8 -0
- templates/home.html +35 -0
- tests/__init__.py +0 -0
- tests/test_text2int.py +61 -0
.gitattributes
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# poetry
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
102 |
+
#poetry.lock
|
103 |
+
|
104 |
+
# pdm
|
105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
106 |
+
#pdm.lock
|
107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
108 |
+
# in version control.
|
109 |
+
# https://pdm.fming.dev/#use-with-ide
|
110 |
+
.pdm.toml
|
111 |
+
|
112 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
113 |
+
__pypackages__/
|
114 |
+
|
115 |
+
# Celery stuff
|
116 |
+
celerybeat-schedule
|
117 |
+
celerybeat.pid
|
118 |
+
|
119 |
+
# SageMath parsed files
|
120 |
+
*.sage.py
|
121 |
+
|
122 |
+
# Environments
|
123 |
+
.env
|
124 |
+
.venv
|
125 |
+
env/
|
126 |
+
venv/
|
127 |
+
ENV/
|
128 |
+
env.bak/
|
129 |
+
venv.bak/
|
130 |
+
|
131 |
+
# Spyder project settings
|
132 |
+
.spyderproject
|
133 |
+
.spyproject
|
134 |
+
|
135 |
+
# Rope project settings
|
136 |
+
.ropeproject
|
137 |
+
|
138 |
+
# mkdocs documentation
|
139 |
+
/site
|
140 |
+
|
141 |
+
# mypy
|
142 |
+
.mypy_cache/
|
143 |
+
.dmypy.json
|
144 |
+
dmypy.json
|
145 |
+
|
146 |
+
# Pyre type checker
|
147 |
+
.pyre/
|
148 |
+
|
149 |
+
# pytype static type analyzer
|
150 |
+
.pytype/
|
151 |
+
|
152 |
+
# Cython debug symbols
|
153 |
+
cython_debug/
|
154 |
+
|
155 |
+
# PyCharm
|
156 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
157 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
158 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
159 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
+
.idea/
|
161 |
+
|
162 |
+
*history_sentiment*
|
163 |
+
*history_text2int*
|
.gitlab-ci.yml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
run_tests:
|
2 |
+
image: python:3.10-slim-buster
|
3 |
+
before_script:
|
4 |
+
- echo "Image build has started!"
|
5 |
+
script:
|
6 |
+
- pip install -r requirements.txt && pytest --verbose
|
Dockerfile
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# https://huggingface.co/docs/hub/spaces-sdks-docker-first-demo
|
2 |
+
|
3 |
+
FROM python:3.9
|
4 |
+
|
5 |
+
WORKDIR /code
|
6 |
+
|
7 |
+
COPY ./requirements.txt /code/requirements.txt
|
8 |
+
|
9 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
10 |
+
|
11 |
+
RUN useradd -m -u 1000 user
|
12 |
+
|
13 |
+
USER user
|
14 |
+
|
15 |
+
ENV HOME=/home/user \
|
16 |
+
PATH=/home/user/.local/bin:$PATH
|
17 |
+
|
18 |
+
WORKDIR $HOME/app
|
19 |
+
|
20 |
+
COPY --chown=user . $HOME/app
|
21 |
+
|
22 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Mathtext Fastapi
|
3 |
+
emoji: 🐨
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: red
|
6 |
+
sdk: docker
|
7 |
+
pinned: false
|
8 |
+
license: agpl-3.0
|
9 |
+
---
|
10 |
+
|
11 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""FastAPI endpoint
|
2 |
+
To run locally use 'uvicorn modules.app:app --host localhost --port 7860'
|
3 |
+
"""
|
4 |
+
|
5 |
+
from fastapi import FastAPI, Request
|
6 |
+
from fastapi.responses import JSONResponse
|
7 |
+
from fastapi.staticfiles import StaticFiles
|
8 |
+
from fastapi.templating import Jinja2Templates
|
9 |
+
from pydantic import BaseModel
|
10 |
+
|
11 |
+
from modules.sentiment import sentiment
|
12 |
+
from modules.text2int import text2int
|
13 |
+
|
14 |
+
app = FastAPI()
|
15 |
+
|
16 |
+
app.mount("/static", StaticFiles(directory="static"), name="static")
|
17 |
+
|
18 |
+
templates = Jinja2Templates(directory="templates")
|
19 |
+
|
20 |
+
|
21 |
+
class Text(BaseModel):
|
22 |
+
content: str = ""
|
23 |
+
|
24 |
+
|
25 |
+
@app.get("/")
|
26 |
+
def home(request: Request):
|
27 |
+
return templates.TemplateResponse("home.html", {"request": request})
|
28 |
+
|
29 |
+
|
30 |
+
@app.post("/hello")
|
31 |
+
def hello(content: Text = None):
|
32 |
+
content = {"message": f"Hello {content.content}!"}
|
33 |
+
return JSONResponse(content=content)
|
34 |
+
|
35 |
+
|
36 |
+
@app.post("/sentiment-analysis")
|
37 |
+
def sentiment_analysis_ep(content: Text = None):
|
38 |
+
ml_response = sentiment(content.content)
|
39 |
+
content = {"message": ml_response}
|
40 |
+
return JSONResponse(content=content)
|
41 |
+
|
42 |
+
|
43 |
+
@app.post("/text2int")
|
44 |
+
def text2int_ep(content: Text = None):
|
45 |
+
ml_response = text2int(content.content)
|
46 |
+
content = {"message": ml_response}
|
47 |
+
return JSONResponse(content=content)
|
data/test_data_text2int.csv
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
input,output
|
2 |
+
fourteen,14
|
3 |
+
one thousand four hundred ninety two,1492
|
4 |
+
Fourteen Hundred Ninety-Two,1492
|
5 |
+
forteen,14
|
6 |
+
seventeen-thousand and seventy two,17072
|
7 |
+
two hundred and nine,209
|
8 |
+
# Following are the ones that the current text2int.py fails on,-
|
9 |
+
# ninety nine hundred and seventy seven,9977
|
10 |
+
# seven thousands,7000
|
11 |
+
# 2 hundreds,200
|
12 |
+
# 99 thousands and one,9901
|
13 |
+
# "forty-five thousand, seven hundred and nine",45709
|
14 |
+
# eighty eight hundred eighty,8880
|
15 |
+
# a hundred hundred,10000
|
16 |
+
# a hundred thousand,100000
|
17 |
+
# a hundred million,100000000
|
18 |
+
# nineteen ninety nine,1999
|
19 |
+
# forteen twenty seven,1427
|
20 |
+
# one nine eight five,1985
|
21 |
+
# nineteen eighty-five,1985
|
22 |
+
# six,6
|
23 |
+
# eighty,80
|
24 |
+
# eight three seven five three zero nine,8375308
|
25 |
+
# oh one,1
|
26 |
+
# six oh 1,601
|
27 |
+
# sex,6
|
28 |
+
# eight oh,80
|
29 |
+
# ate,8
|
30 |
+
# double eight,88
|
31 |
+
# eight three seven five three O nine,8375308
|
32 |
+
# eight three seven five three oh nine,8375308
|
33 |
+
# eight three seven five three 0 nine,8375308
|
34 |
+
# eight three seven five three oh ni-ee-ine,8375308
|
35 |
+
# two thousand ten,2010
|
36 |
+
# two thousand and ten,2010
|
37 |
+
# twelve million,12000000
|
38 |
+
# 8 billion,8000000000
|
39 |
+
# twenty ten,2010
|
40 |
+
# thirty-two hundred,3200
|
41 |
+
# nine,9
|
42 |
+
# forty two,42
|
43 |
+
# 1 2 three,123
|
modules/__init__.py
ADDED
File without changes
|
modules/sentiment.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline
|
2 |
+
|
3 |
+
sentiment_obj = pipeline(task="sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
|
4 |
+
|
5 |
+
|
6 |
+
def sentiment(text):
|
7 |
+
# Returns sentiment value
|
8 |
+
return sentiment_obj(text)
|
modules/text2int.py
ADDED
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import spacy # noqa
|
2 |
+
|
3 |
+
# import os
|
4 |
+
# os.environ['KMP_DUPLICATE_LIB_OK']='True'
|
5 |
+
# import spacy
|
6 |
+
|
7 |
+
# Change this according to what words should be corrected to
|
8 |
+
SPELL_CORRECT_MIN_CHAR_DIFF = 2
|
9 |
+
|
10 |
+
TOKENS2INT_ERROR_INT = 32202
|
11 |
+
|
12 |
+
ONES = [
|
13 |
+
"zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
|
14 |
+
"nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
|
15 |
+
"sixteen", "seventeen", "eighteen", "nineteen",
|
16 |
+
]
|
17 |
+
|
18 |
+
CHAR_MAPPING = {
|
19 |
+
"-": " ",
|
20 |
+
"_": " ",
|
21 |
+
"and": " ",
|
22 |
+
}
|
23 |
+
# CHAR_MAPPING.update((str(i), word) for i, word in enumerate([" " + s + " " for s in ONES]))
|
24 |
+
TOKEN_MAPPING = {
|
25 |
+
"and": " ",
|
26 |
+
"oh": "0",
|
27 |
+
}
|
28 |
+
|
29 |
+
|
30 |
+
def find_char_diff(a, b):
|
31 |
+
# Finds the character difference between two str objects by counting the occurences of every character. Not edit distance.
|
32 |
+
char_counts_a = {}
|
33 |
+
char_counts_b = {}
|
34 |
+
for char in a:
|
35 |
+
if char in char_counts_a.keys():
|
36 |
+
char_counts_a[char] += 1
|
37 |
+
else:
|
38 |
+
char_counts_a[char] = 1
|
39 |
+
for char in b:
|
40 |
+
if char in char_counts_b.keys():
|
41 |
+
char_counts_b[char] += 1
|
42 |
+
else:
|
43 |
+
char_counts_b[char] = 1
|
44 |
+
char_diff = 0
|
45 |
+
for i in char_counts_a:
|
46 |
+
if i in char_counts_b.keys():
|
47 |
+
char_diff += abs(char_counts_a[i] - char_counts_b[i])
|
48 |
+
else:
|
49 |
+
char_diff += char_counts_a[i]
|
50 |
+
return char_diff
|
51 |
+
|
52 |
+
|
53 |
+
def tokenize(text):
|
54 |
+
text = text.lower()
|
55 |
+
# print(text)
|
56 |
+
text = replace_tokens(''.join(i for i in replace_chars(text)).split())
|
57 |
+
# print(text)
|
58 |
+
text = [i for i in text if i != ' ']
|
59 |
+
# print(text)
|
60 |
+
output = []
|
61 |
+
for word in text:
|
62 |
+
# print(word)
|
63 |
+
output.append(convert_word_to_int(word))
|
64 |
+
output = [i for i in output if i != ' ']
|
65 |
+
# print(output)
|
66 |
+
return output
|
67 |
+
|
68 |
+
|
69 |
+
def detokenize(tokens):
|
70 |
+
return ' '.join(tokens)
|
71 |
+
|
72 |
+
|
73 |
+
def replace_tokens(tokens, token_mapping=TOKEN_MAPPING):
|
74 |
+
return [token_mapping.get(tok, tok) for tok in tokens]
|
75 |
+
|
76 |
+
|
77 |
+
def replace_chars(text, char_mapping=CHAR_MAPPING):
|
78 |
+
return [char_mapping.get(c, c) for c in text]
|
79 |
+
|
80 |
+
|
81 |
+
def convert_word_to_int(in_word, numwords={}):
|
82 |
+
# Converts a single word/str into a single int
|
83 |
+
tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
|
84 |
+
scales = ["hundred", "thousand", "million", "billion", "trillion"]
|
85 |
+
if not numwords:
|
86 |
+
for idx, word in enumerate(ONES):
|
87 |
+
numwords[word] = idx
|
88 |
+
for idx, word in enumerate(tens):
|
89 |
+
numwords[word] = idx * 10
|
90 |
+
for idx, word in enumerate(scales):
|
91 |
+
numwords[word] = 10 ** (idx * 3 or 2)
|
92 |
+
if in_word in numwords:
|
93 |
+
# print(in_word)
|
94 |
+
# print(numwords[in_word])
|
95 |
+
return numwords[in_word]
|
96 |
+
try:
|
97 |
+
int(in_word)
|
98 |
+
return int(in_word)
|
99 |
+
except ValueError:
|
100 |
+
pass
|
101 |
+
# Spell correction using find_char_diff
|
102 |
+
char_diffs = [find_char_diff(in_word, i) for i in ONES + tens + scales]
|
103 |
+
min_char_diff = min(char_diffs)
|
104 |
+
if min_char_diff <= SPELL_CORRECT_MIN_CHAR_DIFF:
|
105 |
+
return char_diffs.index(min_char_diff)
|
106 |
+
|
107 |
+
|
108 |
+
def tokens2int(tokens):
|
109 |
+
# Takes a list of tokens and returns a int representation of them
|
110 |
+
types = []
|
111 |
+
for i in tokens:
|
112 |
+
if i <= 9:
|
113 |
+
types.append(1)
|
114 |
+
|
115 |
+
elif i <= 90:
|
116 |
+
types.append(2)
|
117 |
+
|
118 |
+
else:
|
119 |
+
types.append(3)
|
120 |
+
# print(tokens)
|
121 |
+
if len(tokens) <= 3:
|
122 |
+
current = 0
|
123 |
+
for i, number in enumerate(tokens):
|
124 |
+
if i != 0 and types[i] < types[i - 1] and current != tokens[i - 1] and types[i - 1] != 3:
|
125 |
+
current += tokens[i] + tokens[i - 1]
|
126 |
+
elif current <= tokens[i] and current != 0:
|
127 |
+
current *= tokens[i]
|
128 |
+
elif 3 not in types and 1 not in types:
|
129 |
+
current = int(''.join(str(i) for i in tokens))
|
130 |
+
break
|
131 |
+
elif '111' in ''.join(str(i) for i in types) and 2 not in types and 3 not in types:
|
132 |
+
current = int(''.join(str(i) for i in tokens))
|
133 |
+
break
|
134 |
+
else:
|
135 |
+
current += number
|
136 |
+
|
137 |
+
elif 3 not in types and 2 not in types:
|
138 |
+
current = int(''.join(str(i) for i in tokens))
|
139 |
+
|
140 |
+
else:
|
141 |
+
"""
|
142 |
+
double_list = []
|
143 |
+
current_double = []
|
144 |
+
double_type_list = []
|
145 |
+
for i in tokens:
|
146 |
+
if len(current_double) < 2:
|
147 |
+
current_double.append(i)
|
148 |
+
else:
|
149 |
+
double_list.append(current_double)
|
150 |
+
current_double = []
|
151 |
+
current_double = []
|
152 |
+
for i in types:
|
153 |
+
if len(current_double) < 2:
|
154 |
+
current_double.append(i)
|
155 |
+
else:
|
156 |
+
double_type_list.append(current_double)
|
157 |
+
current_double = []
|
158 |
+
print(double_type_list)
|
159 |
+
print(double_list)
|
160 |
+
current = 0
|
161 |
+
for i, type_double in enumerate(double_type_list):
|
162 |
+
if len(type_double) == 1:
|
163 |
+
current += double_list[i][0]
|
164 |
+
elif type_double[0] == type_double[1]:
|
165 |
+
current += int(str(double_list[i][0]) + str(double_list[i][1]))
|
166 |
+
elif type_double[0] > type_double[1]:
|
167 |
+
current += sum(double_list[i])
|
168 |
+
elif type_double[0] < type_double[1]:
|
169 |
+
current += double_list[i][0] * double_list[i][1]
|
170 |
+
#print(current)
|
171 |
+
"""
|
172 |
+
count = 0
|
173 |
+
current = 0
|
174 |
+
for i, token in enumerate(tokens):
|
175 |
+
count += 1
|
176 |
+
if count == 2:
|
177 |
+
if types[i - 1] == types[i]:
|
178 |
+
current += int(str(token) + str(tokens[i - 1]))
|
179 |
+
elif types[i - 1] > types[i]:
|
180 |
+
current += tokens[i - 1] + token
|
181 |
+
else:
|
182 |
+
current += tokens[i - 1] * token
|
183 |
+
count = 0
|
184 |
+
elif i == len(tokens) - 1:
|
185 |
+
current += token
|
186 |
+
|
187 |
+
return current
|
188 |
+
|
189 |
+
|
190 |
+
def text2int(text):
|
191 |
+
# Wraps all of the functions up into one
|
192 |
+
return tokens2int(tokenize(text))
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi==0.74.*
|
2 |
+
requests==2.27.*
|
3 |
+
sentencepiece==0.1.*
|
4 |
+
torch==1.11.*
|
5 |
+
transformers==4.*
|
6 |
+
uvicorn[standard]==0.17.*
|
7 |
+
pydantic
|
8 |
+
spacy
|
9 |
+
pandas
|
10 |
+
matplotlib
|
11 |
+
pytest
|
scripts/__init__.py
ADDED
File without changes
|
scripts/api_scaling.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""https://zetcode.com/python/concurrent-http-requests/"""
|
2 |
+
|
3 |
+
import asyncio
|
4 |
+
import random
|
5 |
+
import time
|
6 |
+
import pandas as pd
|
7 |
+
import httpx
|
8 |
+
from os.path import exists
|
9 |
+
|
10 |
+
NUMBER_OF_CALLS = 1
|
11 |
+
|
12 |
+
headers = {"Content-Type": "application/json; charset=utf-8"}
|
13 |
+
|
14 |
+
# base_url = "https://tangibleai-mathtext.hf.space/run/{endpoint}"
|
15 |
+
base_url = "http://localhost:7860/run/{endpoint}"
|
16 |
+
|
17 |
+
data_list_1 = {
|
18 |
+
"endpoint": "text2int",
|
19 |
+
"test_data": [
|
20 |
+
"one hundred forty five",
|
21 |
+
"twenty thousand nine hundred fifty",
|
22 |
+
"one hundred forty five",
|
23 |
+
"nine hundred eighty three",
|
24 |
+
"five million",
|
25 |
+
]
|
26 |
+
}
|
27 |
+
|
28 |
+
data_list_2 = {
|
29 |
+
"endpoint": "text2int-preprocessed",
|
30 |
+
"test_data": [
|
31 |
+
"one hundred forty five",
|
32 |
+
"twenty thousand nine hundred fifty",
|
33 |
+
"one hundred forty five",
|
34 |
+
"nine hundred eighty three",
|
35 |
+
"five million",
|
36 |
+
]
|
37 |
+
}
|
38 |
+
data_list_3 = {
|
39 |
+
"endpoint": "sentiment-analysis",
|
40 |
+
"test_data": [
|
41 |
+
"Totally agree",
|
42 |
+
"I like it",
|
43 |
+
"No more",
|
44 |
+
"I am not sure",
|
45 |
+
"Never",
|
46 |
+
]
|
47 |
+
}
|
48 |
+
|
49 |
+
|
50 |
+
# async call to endpoint
|
51 |
+
async def call_api(url, data, call_number, number_of_calls):
|
52 |
+
json = {"data": [data]}
|
53 |
+
async with httpx.AsyncClient() as client:
|
54 |
+
start = time.perf_counter() # Used perf_counter for more precise result.
|
55 |
+
response = await client.post(url=url, headers=headers, json=json, timeout=30)
|
56 |
+
end = time.perf_counter()
|
57 |
+
return {
|
58 |
+
"endpoint": url.split("/")[-1],
|
59 |
+
"test data": data,
|
60 |
+
"status code": response.status_code,
|
61 |
+
"response": response.json().get("data"),
|
62 |
+
"call number": call_number,
|
63 |
+
"number of calls": number_of_calls,
|
64 |
+
"start": start.__round__(4),
|
65 |
+
"end": end.__round__(4),
|
66 |
+
"delay": (end - start).__round__(4)
|
67 |
+
}
|
68 |
+
|
69 |
+
|
70 |
+
data_lists = [data_list_1, data_list_2, data_list_3]
|
71 |
+
|
72 |
+
results = []
|
73 |
+
|
74 |
+
|
75 |
+
async def main(number_of_calls):
|
76 |
+
for data_list in data_lists:
|
77 |
+
calls = []
|
78 |
+
for call_number in range(1, number_of_calls + 1):
|
79 |
+
url = base_url.format(endpoint=data_list["endpoint"])
|
80 |
+
data = random.choice(data_list["test_data"])
|
81 |
+
calls.append(call_api(url, data, call_number, number_of_calls))
|
82 |
+
r = await asyncio.gather(*calls)
|
83 |
+
results.extend(r)
|
84 |
+
|
85 |
+
|
86 |
+
|
87 |
+
start = time.perf_counter()
|
88 |
+
asyncio.run(main(NUMBER_OF_CALLS))
|
89 |
+
end = time.perf_counter()
|
90 |
+
print(end-start)
|
91 |
+
df = pd.DataFrame(results)
|
92 |
+
|
93 |
+
if exists("call_history.csv"):
|
94 |
+
df.to_csv(path_or_buf="call_history.csv", mode="a", header=False, index=False)
|
95 |
+
else:
|
96 |
+
df.to_csv(path_or_buf="call_history.csv", mode="w", header=True, index=False)
|
scripts/api_scaling.sh
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#! /bin/env bash
|
2 |
+
|
3 |
+
LOG_FILE_NAME="call_history_bash.csv"
|
4 |
+
|
5 |
+
if [[ ! -f "$LOG_FILE_NAME" ]]; then
|
6 |
+
# Creation of column names if the file does not exits
|
7 |
+
echo "student_id;active_students;endpoint;inputs;outputs;started;finished" >$LOG_FILE_NAME
|
8 |
+
fi
|
9 |
+
|
10 |
+
data_list_1() {
|
11 |
+
responses=(
|
12 |
+
"one hundred forty five"
|
13 |
+
"twenty thousand nine hundred fifty"
|
14 |
+
"one hundred forty five"
|
15 |
+
"nine hundred eighty three"
|
16 |
+
"five million"
|
17 |
+
)
|
18 |
+
echo "${responses[$1]}"
|
19 |
+
}
|
20 |
+
|
21 |
+
data_list_2() {
|
22 |
+
responses=(
|
23 |
+
"Totally agree"
|
24 |
+
"I like it"
|
25 |
+
"No more"
|
26 |
+
"I am not sure"
|
27 |
+
"Never"
|
28 |
+
)
|
29 |
+
echo "${responses[$1]}"
|
30 |
+
}
|
31 |
+
|
32 |
+
# endpoints: "text2int" "sentiment-analysis"
|
33 |
+
# selected endpoint to test
|
34 |
+
endpoint="sentiment-analysis"
|
35 |
+
|
36 |
+
create_random_delay() {
|
37 |
+
# creates a random delay for given arguments
|
38 |
+
echo "scale=8; $RANDOM/32768*$1" | bc
|
39 |
+
}
|
40 |
+
|
41 |
+
simulate_student() {
|
42 |
+
# Student simulator waits randomly between 0-10s after an interaction.
|
43 |
+
# Based on 100 interactions per student
|
44 |
+
for i in {1..100}; do
|
45 |
+
|
46 |
+
random_value=$((RANDOM % 5))
|
47 |
+
text=$(data_list_2 $random_value)
|
48 |
+
data='{"data": ["'$text'"]}'
|
49 |
+
|
50 |
+
start_=$(date +"%F %T.%6N")
|
51 |
+
|
52 |
+
url="https://tangibleai-mathtext.hf.space/run/$3"
|
53 |
+
response=$(curl --silent --connect-timeout 30 --max-time 30 -X POST "$url" -H 'Content-Type: application/json' -d "$data")
|
54 |
+
|
55 |
+
if [[ "$response" == *"Time-out"* ]]; then
|
56 |
+
echo "$response" >>bad_response.txt
|
57 |
+
response="504 Gateway Time-out"
|
58 |
+
elif [[ -z "$response" ]]; then
|
59 |
+
echo "No response" >>bad_response.txt
|
60 |
+
response="504 Gateway Time-out"
|
61 |
+
fi
|
62 |
+
|
63 |
+
end_=$(date +"%F %T.%6N")
|
64 |
+
|
65 |
+
printf "%s;%s;%s;%s;%s;%s;%s\n" "$1" "$2" "$3" "$data" "$response" "$start_" "$end_" >>$LOG_FILE_NAME
|
66 |
+
sleep "$(create_random_delay 10)"
|
67 |
+
|
68 |
+
done
|
69 |
+
}
|
70 |
+
|
71 |
+
echo "start: $(date)"
|
72 |
+
|
73 |
+
active_students=250 # the number of students using the system at the same time
|
74 |
+
|
75 |
+
i=1
|
76 |
+
while [[ "$i" -le "$active_students" ]]; do
|
77 |
+
simulate_student "student$i" "$active_students" "$endpoint" &
|
78 |
+
sleep "$(create_random_delay 1)" # adding a random delay between students
|
79 |
+
i=$(("$i" + 1))
|
80 |
+
done
|
81 |
+
|
82 |
+
wait
|
83 |
+
echo "end: $(date)"
|
scripts/make_request.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
|
3 |
+
request = requests.post(url=
|
4 |
+
'https://cetinca-fastapi-ep.hf.space/sentiment-analysis',
|
5 |
+
json={"content": "I reject it"}).json()
|
6 |
+
|
7 |
+
print(request)
|
8 |
+
|
9 |
+
request = requests.post(url=
|
10 |
+
'https://cetinca-fastapi-ep.hf.space/text2int',
|
11 |
+
json={"content": "seven thousand nine hundred fifty seven"}
|
12 |
+
).json()
|
13 |
+
|
14 |
+
print(request)
|
scripts/make_request.sh
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#root_url="localhost:7860"
|
2 |
+
root_url="https://cetinca-fastapi-ep.hf.space"
|
3 |
+
|
4 |
+
ep="/"
|
5 |
+
url=$root_url$ep
|
6 |
+
data=''
|
7 |
+
|
8 |
+
response=$(curl --silent -X GET "$url" -H 'Content-Type: application/json')
|
9 |
+
|
10 |
+
echo "URL: $url"
|
11 |
+
echo "Data: $data"
|
12 |
+
echo "Response: $response"
|
13 |
+
echo
|
14 |
+
|
15 |
+
sleep 0.1
|
16 |
+
|
17 |
+
ep="/hello"
|
18 |
+
url=$root_url$ep
|
19 |
+
data='{"content":"Rori"}'
|
20 |
+
|
21 |
+
response=$(curl --silent -X POST "$url" -H 'Content-Type: application/json' -d "$data")
|
22 |
+
|
23 |
+
echo "URL: $url"
|
24 |
+
echo "Data: $data"
|
25 |
+
echo "Response: $response"
|
26 |
+
echo
|
27 |
+
|
28 |
+
sleep 0.5
|
29 |
+
|
30 |
+
ep="/sentiment-analysis"
|
31 |
+
url=$root_url$ep
|
32 |
+
data='{"content":"I am happy with it!"}'
|
33 |
+
|
34 |
+
response=$(curl --silent -X POST "$url" -H 'Content-Type: application/json' -d "$data")
|
35 |
+
|
36 |
+
echo "URL: $url"
|
37 |
+
echo "Data: $data"
|
38 |
+
echo "Response: $response"
|
39 |
+
echo
|
40 |
+
|
41 |
+
sleep 0.5
|
42 |
+
|
43 |
+
ep="/text2int"
|
44 |
+
url=$root_url$ep
|
45 |
+
data='{"content":"one hundred forty two"}'
|
46 |
+
|
47 |
+
response=$(curl --silent -X POST "$url" -H 'Content-Type: application/json' -d "$data")
|
48 |
+
|
49 |
+
echo "URL: $url"
|
50 |
+
echo "Data: $data"
|
51 |
+
echo "Response: $response"
|
52 |
+
echo
|
scripts/plot_calls.py
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import math
|
2 |
+
from datetime import datetime
|
3 |
+
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import pandas as pd
|
6 |
+
|
7 |
+
pd.set_option('display.max_columns', None)
|
8 |
+
pd.set_option('display.max_rows', None)
|
9 |
+
|
10 |
+
log_files = [
|
11 |
+
'call_history_sentiment_1_bash.csv',
|
12 |
+
'call_history_text2int_1_bash.csv',
|
13 |
+
]
|
14 |
+
|
15 |
+
for log_file in log_files:
|
16 |
+
path_ = f"./data/{log_file}"
|
17 |
+
df = pd.read_csv(filepath_or_buffer=path_, sep=";")
|
18 |
+
df["finished_ts"] = df["finished"].apply(
|
19 |
+
lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f").timestamp())
|
20 |
+
df["started_ts"] = df["started"].apply(
|
21 |
+
lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f").timestamp())
|
22 |
+
df["elapsed"] = df["finished_ts"] - df["started_ts"]
|
23 |
+
|
24 |
+
df["success"] = df["outputs"].apply(lambda x: 0 if "Time-out" in x else 1)
|
25 |
+
|
26 |
+
student_numbers = sorted(df['active_students'].unique())
|
27 |
+
|
28 |
+
bins_dict = dict() # bins size for each group
|
29 |
+
min_finished_dict = dict() # zero time for each group
|
30 |
+
|
31 |
+
for student_number in student_numbers:
|
32 |
+
# for each student group calculates bins size and zero time
|
33 |
+
min_finished = df["finished_ts"][df["active_students"] == student_number].min()
|
34 |
+
max_finished = df["finished_ts"][df["active_students"] == student_number].max()
|
35 |
+
bins = math.ceil(max_finished - min_finished)
|
36 |
+
bins_dict.update({student_number: bins})
|
37 |
+
min_finished_dict.update({student_number: min_finished})
|
38 |
+
print(f"student number: {student_number}")
|
39 |
+
print(f"min finished: {min_finished}")
|
40 |
+
print(f"max finished: {max_finished}")
|
41 |
+
print(f"bins finished seconds: {bins}, minutes: {bins / 60}")
|
42 |
+
|
43 |
+
df["time_line"] = None
|
44 |
+
for student_number in student_numbers:
|
45 |
+
# calculates time-line for each student group
|
46 |
+
df["time_line"] = df.apply(
|
47 |
+
lambda x: x["finished_ts"] - min_finished_dict[student_number]
|
48 |
+
if x["active_students"] == student_number
|
49 |
+
else x["time_line"],
|
50 |
+
axis=1
|
51 |
+
)
|
52 |
+
|
53 |
+
# creates a '.csv' from the dataframe
|
54 |
+
df.to_csv(f"./data/processed_{log_file}", index=False, sep=";")
|
55 |
+
|
56 |
+
result = df.groupby(['active_students', 'success']) \
|
57 |
+
.agg({
|
58 |
+
'elapsed': ['mean', 'median', 'min', 'max'],
|
59 |
+
'success': ['count'],
|
60 |
+
})
|
61 |
+
|
62 |
+
print(f"Results for {log_file}")
|
63 |
+
print(result, "\n")
|
64 |
+
|
65 |
+
title = None
|
66 |
+
if "sentiment" in log_file.lower():
|
67 |
+
title = "API result for 'sentiment-analysis' endpoint"
|
68 |
+
elif "text2int" in log_file.lower():
|
69 |
+
title = "API result for 'text2int' endpoint"
|
70 |
+
|
71 |
+
for student_number in student_numbers:
|
72 |
+
# Prints percentage of the successful and failed calls
|
73 |
+
try:
|
74 |
+
failed_calls = result.loc[(student_number, 0), 'success'][0]
|
75 |
+
except:
|
76 |
+
failed_calls = 0
|
77 |
+
successful_calls = result.loc[(student_number, 1), 'success'][0]
|
78 |
+
percentage = (successful_calls / (failed_calls + successful_calls)) * 100
|
79 |
+
print(f"Percentage of successful API calls for {student_number} students: {percentage.__round__(2)}")
|
80 |
+
|
81 |
+
rows = len(student_numbers)
|
82 |
+
|
83 |
+
fig, axs = plt.subplots(rows, 2) # (rows, columns)
|
84 |
+
|
85 |
+
for index, student_number in enumerate(student_numbers):
|
86 |
+
# creates a boxplot for each test group
|
87 |
+
data = df[df["active_students"] == student_number]
|
88 |
+
axs[index][0].boxplot(x=data["elapsed"]) # axs[row][column]
|
89 |
+
# axs[index][0].set_title(f'Boxplot for {student_number} students')
|
90 |
+
axs[index][0].set_xlabel(f'student number {student_number}')
|
91 |
+
axs[index][0].set_ylabel('Elapsed time (s)')
|
92 |
+
|
93 |
+
# creates a histogram for each test group
|
94 |
+
axs[index][1].hist(x=data["elapsed"], bins=25) # axs[row][column]
|
95 |
+
# axs[index][1].set_title(f'Histogram for {student_number} students')
|
96 |
+
axs[index][1].set_xlabel('seconds')
|
97 |
+
axs[index][1].set_ylabel('Count of API calls')
|
98 |
+
|
99 |
+
fig.suptitle(title, fontsize=16)
|
100 |
+
|
101 |
+
fig, axs = plt.subplots(rows, 1) # (rows, columns)
|
102 |
+
|
103 |
+
for index, student_number in enumerate(student_numbers):
|
104 |
+
# creates a histogram and shows API calls on a timeline for each test group
|
105 |
+
data = df[df["active_students"] == student_number]
|
106 |
+
|
107 |
+
print(data["time_line"].head(10))
|
108 |
+
|
109 |
+
axs[index].hist(x=data["time_line"], bins=bins_dict[student_number]) # axs[row][column]
|
110 |
+
# axs[index][1].set_title(f'Histogram for {student_number} students')
|
111 |
+
axs[index].set_xlabel('seconds')
|
112 |
+
axs[index].set_ylabel('Count of API calls')
|
113 |
+
|
114 |
+
fig.suptitle(title, fontsize=16)
|
115 |
+
|
116 |
+
plt.show()
|
static/styles.css
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300&display=swap');
|
2 |
+
|
3 |
+
body {
|
4 |
+
font-family: 'Roboto', sans-serif;
|
5 |
+
font-size: 16px;
|
6 |
+
background-color: black;
|
7 |
+
color: white
|
8 |
+
}
|
templates/home.html
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<title>Title</title>
|
6 |
+
<link rel="stylesheet" href="{{ url_for('static', path='/styles.css') }}">
|
7 |
+
</head>
|
8 |
+
<body>
|
9 |
+
<h2>Mathbot</h2>
|
10 |
+
<h3>Created with FastAPI</h3>
|
11 |
+
|
12 |
+
<h4>To make a request with python</h4>
|
13 |
+
<pre><code>
|
14 |
+
import requests
|
15 |
+
|
16 |
+
requests.post(
|
17 |
+
url='https://cetinca-fastapi-ep.hf.space/sentiment-analysis',
|
18 |
+
json={"content": "I reject it"}
|
19 |
+
).json()
|
20 |
+
|
21 |
+
requests.post(
|
22 |
+
url='https://cetinca-fastapi-ep.hf.space/text2int',
|
23 |
+
json={"content": "forty two"}
|
24 |
+
).json()
|
25 |
+
|
26 |
+
</code></pre>
|
27 |
+
|
28 |
+
<h4>To make a request with curl</h4>
|
29 |
+
<pre><code>
|
30 |
+
curl --silent -X POST "https://cetinca-fastapi-ep.hf.space/sentiment-analysis" -H 'Content-Type: application/json' -d '{"content":"I am happy with it!"}'
|
31 |
+
|
32 |
+
curl --silent -X POST "https://cetinca-fastapi-ep.hf.space/text2int" -H 'Content-Type: application/json' -d '{"content":"forty two"}'
|
33 |
+
</code></pre>
|
34 |
+
</body>
|
35 |
+
</html>
|
tests/__init__.py
ADDED
File without changes
|
tests/test_text2int.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import unittest
|
2 |
+
|
3 |
+
from fastapi.testclient import TestClient
|
4 |
+
|
5 |
+
from app import app
|
6 |
+
|
7 |
+
TEST_DATA_FILE = "data/test_data_text2int.csv"
|
8 |
+
|
9 |
+
client = TestClient(app)
|
10 |
+
|
11 |
+
|
12 |
+
class TestStringMethods(unittest.TestCase):
|
13 |
+
|
14 |
+
def setUp(self):
|
15 |
+
self.client = TestClient(app)
|
16 |
+
|
17 |
+
def test_1(self):
|
18 |
+
response = self.client.post("/text2int",
|
19 |
+
json={"content": "fourteen"}
|
20 |
+
)
|
21 |
+
self.assertEqual(response.status_code, 200)
|
22 |
+
self.assertEqual(str(response.json()["message"]), "14")
|
23 |
+
|
24 |
+
def test_2(self):
|
25 |
+
response = self.client.post("/text2int",
|
26 |
+
json={"content": "one thousand four hundred ninety two"}
|
27 |
+
)
|
28 |
+
self.assertEqual(response.status_code, 200)
|
29 |
+
self.assertEqual(str(response.json()["message"]), "1492")
|
30 |
+
|
31 |
+
def test_3(self):
|
32 |
+
response = self.client.post("/text2int",
|
33 |
+
json={"content": "Fourteen Hundred Ninety-Two"}
|
34 |
+
)
|
35 |
+
self.assertEqual(response.status_code, 200)
|
36 |
+
self.assertEqual(str(response.json()["message"]), "1492")
|
37 |
+
|
38 |
+
def test_4(self):
|
39 |
+
response = client.post("/text2int",
|
40 |
+
json={"content": "forteen"}
|
41 |
+
)
|
42 |
+
self.assertEqual(response.status_code, 200)
|
43 |
+
self.assertEqual(str(response.json()["message"]), "14")
|
44 |
+
|
45 |
+
def test_5(self):
|
46 |
+
response = client.post("/text2int",
|
47 |
+
json={"content": "seventeen-thousand and seventy two"}
|
48 |
+
)
|
49 |
+
self.assertEqual(response.status_code, 200)
|
50 |
+
self.assertEqual(str(response.json()["message"]), "17072")
|
51 |
+
|
52 |
+
def test_6(self):
|
53 |
+
response = client.post("/text2int",
|
54 |
+
json={"content": "two hundred and nine"}
|
55 |
+
)
|
56 |
+
self.assertEqual(response.status_code, 200)
|
57 |
+
self.assertEqual(str(response.json()["message"]), "209")
|
58 |
+
|
59 |
+
|
60 |
+
if __name__ == '__main__':
|
61 |
+
unittest.main()
|