Spaces:

vijay399
/

testing

Running

App Files Files Community

vijay399

BlackKakapo commited on Apr 24, 2023

Commit

d120873

0 Parent(s):

Duplicate from BlackKakapo/ParaphraseAPI

Browse files

Co-authored-by: Alexandru Petrachi <[email protected]>

Files changed (11) hide show

.gitattributes +34 -0
Dockerfile +27 -0
README.md +12 -0
app.py +24 -0
requirements.txt +13 -0
src/exception/Exception/Exception.py +74 -0
src/exception/Exception/__pycache__/Exception.cpython-39.pyc +0 -0
src/paraphrase/Paraphrase.py +56 -0
src/paraphrase/__pycache__/Paraphrase.cpython-39.pyc +0 -0
src/translate/Translate.py +62 -0
src/translate/__pycache__/Translate.cpython-39.pyc +0 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,27 @@

+# Use the official Python 3.9 image
+FROM python:3.9
+# Set the working directory to /code
+WORKDIR /code
+# Copy the current directory contents into the container at /code
+COPY ./requirements.txt /code/requirements.txt
+# Install requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+# Set up a new user named "user" with user ID 1000
+RUN useradd -m -u 1000 user
+# Switch to the "user" user
+USER user
+# Set home to the user's home directory
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+# Set the working directory to the user's home directory
+WORKDIR $HOME/app
+# Copy the current directory contents into the container at $HOME/app setting the owner to the user
+COPY --chown=user . $HOME/app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,12 @@

+---
+title: ParaphraseAPI
+emoji: 😻
+colorFrom: yellow
+colorTo: green
+sdk: docker
+pinned: false
+license: apache-2.0
+duplicated_from: BlackKakapo/ParaphraseAPI
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from fastapi import FastAPI
+import src.paraphrase.Paraphrase as Paraphrase
+import src.translate.Translate as Translate
+import nltk
+nltk.download('punkt')
+app = FastAPI(docs_url="/")
+@app.get("/")
+def index():
+    return {"output": "HELLO!!!"}
+@app.get("/paraphrase")
+def paraphrase(text: str):
+    resultValue, exception = Paraphrase.paraphraseParaphraseMethod(text)
+    return {"request": text, "result": resultValue, "exception": exception}
+@app.get("/translate")
+def paraphrase(text: str):
+    resultValue, exception = Translate.paraphraseTranslateMethod(text)
+    return {"request": text, "result": resultValue, "exception": exception}

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+fastapi==0.74.*
+requests==2.27.*
+uvicorn[standard]==0.17.*
+sentencepiece==0.1.*
+torch==1.11.*
+transformers==4.*
+joblib==1.2.*
+classifier==2.*
+cvxopt==1.3.*
+pandas==1.5.*
+scikit-learn==1.2.*
+numpy==1.24.*
+nltk==3.8.*

src/exception/Exception/Exception.py ADDED Viewed

	@@ -0,0 +1,74 @@

+## Exception
+TOO_SHORT 	= "The length of text you sent to me is too short. Minimum length is 20 characters."
+TOO_LONG 	= "The length of text you sent to me is too long. maximum length is 400 characters."
+## End Exception
+def checkForException(requestValue : str, METHOD : str):
+	exception = ""
+	if METHOD == "SYNONYM":
+		exception = checkExceptionSynosnym(requestValue)
+	elif METHOD == "TRANSLATE":
+		exception = checkExceptionTranslate(requestValue)
+	elif METHOD == "PARAPHRASE":
+		exception = checkExceptionParaphrase(requestValue)
+	elif METHOD == "PIPELINE":
+		exception = checkExceptionPipeline(requestValue)
+	return exception
+def checkExceptionSynosnym(requestValue : str):
+	exception = ""
+	if len(requestValue) < 20:
+		exception = TOO_SHORT
+		return exception
+	elif len(requestValue) > 400:
+		exception = TOO_LONG
+		return exception
+	else:
+		return exception
+def checkExceptionTranslate(requestValue : str):
+	exception = ""
+	if len(requestValue) < 20:
+		exception = TOO_SHORT
+		return exception
+	elif len(requestValue) > 400:
+		exception = TOO_LONG
+		return exception
+	else:
+		return exception
+def checkExceptionParaphrase(requestValue : str):
+	exception = ""
+	if len(requestValue) < 20:
+		exception = TOO_SHORT
+		return exception
+	elif len(requestValue) > 400:
+		exception = TOO_LONG
+		return exception
+	else:
+		return exception
+def checkExceptionPipeline(requestValue : str):
+	exception = ""
+	if len(requestValue) < 20:
+		exception = TOO_SHORT
+		return exception
+	elif len(requestValue) > 400:
+		exception = TOO_LONG
+		return exception
+	else:
+		return exception

src/exception/Exception/__pycache__/Exception.cpython-39.pyc ADDED Viewed

Binary file (1.54 kB). View file

src/paraphrase/Paraphrase.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from nltk.tokenize import sent_tokenize
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import torch
+import src.exception.Exception.Exception as ExceptionCustom
+METHOD = "PARAPHRASE"
+tokenizer = AutoTokenizer.from_pretrained("BlackKakapo/flan-t5-base-paraphrase-ro")
+model = AutoModelForSeq2SeqLM.from_pretrained("BlackKakapo/flan-t5-base-paraphrase-ro")
+# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# model.to(device)
+def paraphraseParaphraseMethod(requestValue : str):
+	exception = ""
+	result_value = ""
+	exception = ExceptionCustom.checkForException(requestValue, METHOD)
+	if exception != "":
+		return "", exception
+	tokenized_sent_list = sent_tokenize(requestValue)
+	for SENTENCE in tokenized_sent_list:
+		text = "paraphrase: " + SENTENCE
+		encoding = tokenizer.encode_plus(text, pad_to_max_length=True, return_tensors="pt")
+		input_ids, attention_masks = encoding["input_ids"], encoding["attention_mask"]
+        beam_outputs = model.generate(
+            input_ids=input_ids,
+            attention_mask=attention_masks,
+            do_sample=True,
+            max_length=512,
+            top_k=120,
+            top_p=0.90,
+            early_stopping=False,
+            num_return_sequences=1,
+            no_repeat_ngram_size=2,
+            num_beams=1
+        )
+		for beam_output in beam_outputs:
+		    text_para = tokenizer.decode(beam_output, skip_special_tokens=True, clean_up_tokenization_spaces=True)
+		    if SENTENCE.lower().strip() != text_para.lower().strip():
+		    	result_value += text_para + " "
+		    	break
+	return result_value, ""

src/paraphrase/__pycache__/Paraphrase.cpython-39.pyc ADDED Viewed

Binary file (1.52 kB). View file

src/translate/Translate.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from nltk.tokenize import sent_tokenize
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import torch
+import src.exception.Exception.Exception as ExceptionCustom
+METHOD = "TRANSLATE"
+tokenizerROMENG = AutoTokenizer.from_pretrained("BlackKakapo/opus-mt-ro-en")
+modelROMENG = AutoModelForSeq2SeqLM.from_pretrained("BlackKakapo/opus-mt-ro-en")
+tokenizerENGROM = AutoTokenizer.from_pretrained("BlackKakapo/opus-mt-en-ro")
+modelENGROM = AutoModelForSeq2SeqLM.from_pretrained("BlackKakapo/opus-mt-en-ro")
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+modelROMENG.to(device)
+modelENGROM.to(device)
+def paraphraseTranslateMethod(requestValue : str):
+	exception = ""
+	result_value = ""
+	exception = ExceptionCustom.checkForException(requestValue, METHOD)
+	if exception != "":
+		return "", exception
+	tokenized_sent_list = sent_tokenize(requestValue)
+	for SENTENCE in tokenized_sent_list:
+		input_ids1 = tokenizerROMENG(SENTENCE, return_tensors='pt').to(device)
+		output1 = modelROMENG.generate(
+	        input_ids=input_ids1.input_ids,
+	        do_sample=True,
+	        max_length=256,
+	        top_k=90,
+	        top_p=0.97,
+	        early_stopping=False
+	    )
+		result1 = tokenizerROMENG.batch_decode(output1, skip_special_tokens=True)[0]
+		input_ids = tokenizerENGROM(result1, return_tensors='pt').to(device)
+		output = modelENGROM.generate(
+			input_ids=input_ids.input_ids,
+			do_sample=True,
+			max_length=256,
+			top_k=90,
+			top_p=0.97,
+			early_stopping=False
+		)
+		result = tokenizerENGROM.batch_decode(output, skip_special_tokens=True)[0]
+		result_value += result + " "
+	return result_value, ""

src/translate/__pycache__/Translate.cpython-39.pyc ADDED Viewed

Binary file (1.51 kB). View file