vijay399 BlackKakapo commited on
Commit
d120873
·
0 Parent(s):

Duplicate from BlackKakapo/ParaphraseAPI

Browse files

Co-authored-by: Alexandru Petrachi <[email protected]>

.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official Python 3.9 image
2
+ FROM python:3.9
3
+
4
+ # Set the working directory to /code
5
+ WORKDIR /code
6
+
7
+ # Copy the current directory contents into the container at /code
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ # Install requirements.txt
11
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
12
+
13
+ # Set up a new user named "user" with user ID 1000
14
+ RUN useradd -m -u 1000 user
15
+ # Switch to the "user" user
16
+ USER user
17
+ # Set home to the user's home directory
18
+ ENV HOME=/home/user \
19
+ PATH=/home/user/.local/bin:$PATH
20
+
21
+ # Set the working directory to the user's home directory
22
+ WORKDIR $HOME/app
23
+
24
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
25
+ COPY --chown=user . $HOME/app
26
+
27
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: ParaphraseAPI
3
+ emoji: 😻
4
+ colorFrom: yellow
5
+ colorTo: green
6
+ sdk: docker
7
+ pinned: false
8
+ license: apache-2.0
9
+ duplicated_from: BlackKakapo/ParaphraseAPI
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ import src.paraphrase.Paraphrase as Paraphrase
3
+ import src.translate.Translate as Translate
4
+ import nltk
5
+ nltk.download('punkt')
6
+
7
+
8
+ app = FastAPI(docs_url="/")
9
+
10
+
11
+ @app.get("/")
12
+ def index():
13
+ return {"output": "HELLO!!!"}
14
+
15
+ @app.get("/paraphrase")
16
+ def paraphrase(text: str):
17
+ resultValue, exception = Paraphrase.paraphraseParaphraseMethod(text)
18
+ return {"request": text, "result": resultValue, "exception": exception}
19
+
20
+
21
+ @app.get("/translate")
22
+ def paraphrase(text: str):
23
+ resultValue, exception = Translate.paraphraseTranslateMethod(text)
24
+ return {"request": text, "result": resultValue, "exception": exception}
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.74.*
2
+ requests==2.27.*
3
+ uvicorn[standard]==0.17.*
4
+ sentencepiece==0.1.*
5
+ torch==1.11.*
6
+ transformers==4.*
7
+ joblib==1.2.*
8
+ classifier==2.*
9
+ cvxopt==1.3.*
10
+ pandas==1.5.*
11
+ scikit-learn==1.2.*
12
+ numpy==1.24.*
13
+ nltk==3.8.*
src/exception/Exception/Exception.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Exception
2
+ TOO_SHORT = "The length of text you sent to me is too short. Minimum length is 20 characters."
3
+ TOO_LONG = "The length of text you sent to me is too long. maximum length is 400 characters."
4
+ ## End Exception
5
+
6
+
7
+
8
+
9
+
10
+
11
+
12
+ def checkForException(requestValue : str, METHOD : str):
13
+ exception = ""
14
+
15
+ if METHOD == "SYNONYM":
16
+ exception = checkExceptionSynosnym(requestValue)
17
+ elif METHOD == "TRANSLATE":
18
+ exception = checkExceptionTranslate(requestValue)
19
+ elif METHOD == "PARAPHRASE":
20
+ exception = checkExceptionParaphrase(requestValue)
21
+ elif METHOD == "PIPELINE":
22
+ exception = checkExceptionPipeline(requestValue)
23
+
24
+ return exception
25
+
26
+ def checkExceptionSynosnym(requestValue : str):
27
+ exception = ""
28
+
29
+ if len(requestValue) < 20:
30
+ exception = TOO_SHORT
31
+ return exception
32
+ elif len(requestValue) > 400:
33
+ exception = TOO_LONG
34
+ return exception
35
+ else:
36
+ return exception
37
+
38
+
39
+ def checkExceptionTranslate(requestValue : str):
40
+ exception = ""
41
+
42
+ if len(requestValue) < 20:
43
+ exception = TOO_SHORT
44
+ return exception
45
+ elif len(requestValue) > 400:
46
+ exception = TOO_LONG
47
+ return exception
48
+ else:
49
+ return exception
50
+
51
+ def checkExceptionParaphrase(requestValue : str):
52
+ exception = ""
53
+
54
+ if len(requestValue) < 20:
55
+ exception = TOO_SHORT
56
+ return exception
57
+ elif len(requestValue) > 400:
58
+ exception = TOO_LONG
59
+ return exception
60
+ else:
61
+ return exception
62
+
63
+ def checkExceptionPipeline(requestValue : str):
64
+ exception = ""
65
+
66
+ if len(requestValue) < 20:
67
+ exception = TOO_SHORT
68
+ return exception
69
+ elif len(requestValue) > 400:
70
+ exception = TOO_LONG
71
+ return exception
72
+ else:
73
+ return exception
74
+
src/exception/Exception/__pycache__/Exception.cpython-39.pyc ADDED
Binary file (1.54 kB). View file
 
src/paraphrase/Paraphrase.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from nltk.tokenize import sent_tokenize
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
+ import torch
4
+ import src.exception.Exception.Exception as ExceptionCustom
5
+
6
+
7
+ METHOD = "PARAPHRASE"
8
+
9
+
10
+ tokenizer = AutoTokenizer.from_pretrained("BlackKakapo/flan-t5-base-paraphrase-ro")
11
+ model = AutoModelForSeq2SeqLM.from_pretrained("BlackKakapo/flan-t5-base-paraphrase-ro")
12
+
13
+ # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
+ # model.to(device)
15
+
16
+
17
+ def paraphraseParaphraseMethod(requestValue : str):
18
+
19
+ exception = ""
20
+ result_value = ""
21
+
22
+
23
+ exception = ExceptionCustom.checkForException(requestValue, METHOD)
24
+ if exception != "":
25
+ return "", exception
26
+
27
+ tokenized_sent_list = sent_tokenize(requestValue)
28
+
29
+ for SENTENCE in tokenized_sent_list:
30
+
31
+ text = "paraphrase: " + SENTENCE
32
+
33
+ encoding = tokenizer.encode_plus(text, pad_to_max_length=True, return_tensors="pt")
34
+ input_ids, attention_masks = encoding["input_ids"], encoding["attention_mask"]
35
+
36
+ beam_outputs = model.generate(
37
+ input_ids=input_ids,
38
+ attention_mask=attention_masks,
39
+ do_sample=True,
40
+ max_length=512,
41
+ top_k=120,
42
+ top_p=0.90,
43
+ early_stopping=False,
44
+ num_return_sequences=1,
45
+ no_repeat_ngram_size=2,
46
+ num_beams=1
47
+ )
48
+
49
+ for beam_output in beam_outputs:
50
+ text_para = tokenizer.decode(beam_output, skip_special_tokens=True, clean_up_tokenization_spaces=True)
51
+
52
+ if SENTENCE.lower().strip() != text_para.lower().strip():
53
+ result_value += text_para + " "
54
+ break
55
+
56
+ return result_value, ""
src/paraphrase/__pycache__/Paraphrase.cpython-39.pyc ADDED
Binary file (1.52 kB). View file
 
src/translate/Translate.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from nltk.tokenize import sent_tokenize
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
+ import torch
4
+ import src.exception.Exception.Exception as ExceptionCustom
5
+
6
+
7
+ METHOD = "TRANSLATE"
8
+
9
+ tokenizerROMENG = AutoTokenizer.from_pretrained("BlackKakapo/opus-mt-ro-en")
10
+ modelROMENG = AutoModelForSeq2SeqLM.from_pretrained("BlackKakapo/opus-mt-ro-en")
11
+
12
+ tokenizerENGROM = AutoTokenizer.from_pretrained("BlackKakapo/opus-mt-en-ro")
13
+ modelENGROM = AutoModelForSeq2SeqLM.from_pretrained("BlackKakapo/opus-mt-en-ro")
14
+
15
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
+ modelROMENG.to(device)
17
+ modelENGROM.to(device)
18
+
19
+
20
+ def paraphraseTranslateMethod(requestValue : str):
21
+
22
+ exception = ""
23
+ result_value = ""
24
+
25
+
26
+ exception = ExceptionCustom.checkForException(requestValue, METHOD)
27
+ if exception != "":
28
+ return "", exception
29
+
30
+ tokenized_sent_list = sent_tokenize(requestValue)
31
+
32
+ for SENTENCE in tokenized_sent_list:
33
+
34
+ input_ids1 = tokenizerROMENG(SENTENCE, return_tensors='pt').to(device)
35
+
36
+ output1 = modelROMENG.generate(
37
+ input_ids=input_ids1.input_ids,
38
+ do_sample=True,
39
+ max_length=256,
40
+ top_k=90,
41
+ top_p=0.97,
42
+ early_stopping=False
43
+ )
44
+
45
+ result1 = tokenizerROMENG.batch_decode(output1, skip_special_tokens=True)[0]
46
+
47
+ input_ids = tokenizerENGROM(result1, return_tensors='pt').to(device)
48
+
49
+ output = modelENGROM.generate(
50
+ input_ids=input_ids.input_ids,
51
+ do_sample=True,
52
+ max_length=256,
53
+ top_k=90,
54
+ top_p=0.97,
55
+ early_stopping=False
56
+ )
57
+
58
+ result = tokenizerENGROM.batch_decode(output, skip_special_tokens=True)[0]
59
+
60
+ result_value += result + " "
61
+
62
+ return result_value, ""
src/translate/__pycache__/Translate.cpython-39.pyc ADDED
Binary file (1.51 kB). View file