Spaces:
Sleeping
Sleeping
dinhquangson
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -158,7 +158,27 @@ async def download_database():
|
|
158 |
|
159 |
# Return the zip file as a response for download
|
160 |
return FileResponse(zip_path, media_type='application/zip', filename='database.zip')
|
|
|
|
|
|
|
|
|
|
|
161 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
@app.get("/")
|
163 |
def api_home():
|
164 |
return {'detail': 'Welcome to FastAPI Qdrant importer!'}
|
|
|
158 |
|
159 |
# Return the zip file as a response for download
|
160 |
return FileResponse(zip_path, media_type='application/zip', filename='database.zip')
|
161 |
+
|
162 |
+
@app.post("/pdf2text/")
|
163 |
+
async def create_upload_file(file: UploadFile = File(...)):
|
164 |
+
import pytesseract
|
165 |
+
from pdf2image import convert_from_path
|
166 |
|
167 |
+
file_savePath = join(temp_path,file.filename)
|
168 |
+
|
169 |
+
with open(file_savePath,'wb') as f:
|
170 |
+
shutil.copyfileobj(file.file, f)
|
171 |
+
# convert PDF to image
|
172 |
+
images = convert_from_path(file_savePath)
|
173 |
+
|
174 |
+
text=""
|
175 |
+
|
176 |
+
# Extract text from images
|
177 |
+
for x in fruits:
|
178 |
+
ocr_text = pytesseract.image_to_string(image,lang='vie')
|
179 |
+
text=text+ocr_text+'\n'
|
180 |
+
|
181 |
+
return ocr_text
|
182 |
@app.get("/")
|
183 |
def api_home():
|
184 |
return {'detail': 'Welcome to FastAPI Qdrant importer!'}
|