Spaces:

Detomo
/

aisatsu-api

Sleeping

App Files Files Community

vumichien commited on Mar 26, 2023

Commit

ca0120c

1 Parent(s): eb90129

Update main.py

Browse files

Files changed (1) hide show

main.py +19 -10

main.py CHANGED Viewed

@@ -5,8 +5,8 @@ import numpy as np
 from scipy.spatial import distance as dist
 from sahi.utils.cv import read_image_as_pil
-from fastapi import FastAPI, File, UploadFile, Form
-from utils import tts, read_image_file, pil_to_base64, base64_to_pil, get_hist
 from typing import Optional
 from huggingface_hub import hf_hub_download
@@ -26,14 +26,13 @@ def read_root():
 @app.post("/aisatsu_api/")
 async def predict_api(
         file: UploadFile = File(...),
-        last_seen: Optional[str] = Form(None)
 ):
     image = read_image_file(await file.read())
     results = model.predict(image, show=False)[0]
     image = read_image_as_pil(image)
     masks, boxes = results.masks, results.boxes
     area_image = image.width * image.height
-    voice_bot = None
     most_close = 0
     out_img = None
     diff_value = 0.5
@@ -47,13 +46,23 @@ async def predict_api(
                 out_img = image.crop(tuple(box)).resize((64, 64))
                 most_close = area_rate
     if last_seen is not None:
-        last_seen = base64_to_pil(last_seen)
         if out_img is not None:
             diff_value = dist.euclidean(get_hist(out_img), get_hist(last_seen))
     print(most_close, diff_value)
     if most_close >= area_thres and diff_value >= 0.5:
-        voice_bot = tts(defaul_bot_voice, language="ja")
-    return {
-        "voice": voice_bot,
-        "image": pil_to_base64(out_img) if out_img is not None else None
-    }

 from scipy.spatial import distance as dist
 from sahi.utils.cv import read_image_as_pil
+from fastapi import FastAPI, File, UploadFile
+from utils import tts, read_image_file, pil_to_base64, get_hist
 from typing import Optional
 from huggingface_hub import hf_hub_download
 @app.post("/aisatsu_api/")
 async def predict_api(
         file: UploadFile = File(...),
+        last_seen: Union[UploadFile, None] = File(None)
 ):
     image = read_image_file(await file.read())
     results = model.predict(image, show=False)[0]
     image = read_image_as_pil(image)
     masks, boxes = results.masks, results.boxes
     area_image = image.width * image.height
     most_close = 0
     out_img = None
     diff_value = 0.5
                 out_img = image.crop(tuple(box)).resize((64, 64))
                 most_close = area_rate
     if last_seen is not None:
+        last_seen = read_image_file(await last_seen.read())
         if out_img is not None:
             diff_value = dist.euclidean(get_hist(out_img), get_hist(last_seen))
     print(most_close, diff_value)
     if most_close >= area_thres and diff_value >= 0.5:
+        voice_bot_path = tts(defaul_bot_voice, language="ja")
+        image_bot_path = pil_to_base64(out_img)
+        io = BytesIO()
+        zip_filename = "final_archive.zip"
+        with zipfile.ZipFile(io, mode='w', compression=zipfile.ZIP_DEFLATED) as zf:
+            for file_path in [voice_bot_path, image_bot_path]:
+                zf.write(file_path)
+            zf.close()
+        return StreamingResponse(
+            iter([io.getvalue()]),
+            media_type="application/x-zip-compressed",
+            headers={"Content-Disposition": f"attachment;filename=%s" % zip_filename}
+        )
+    else:
+        return {"message": "No face detected"}