vumichien commited on
Commit
ca0120c
·
1 Parent(s): eb90129

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +19 -10
main.py CHANGED
@@ -5,8 +5,8 @@ import numpy as np
5
  from scipy.spatial import distance as dist
6
 
7
  from sahi.utils.cv import read_image_as_pil
8
- from fastapi import FastAPI, File, UploadFile, Form
9
- from utils import tts, read_image_file, pil_to_base64, base64_to_pil, get_hist
10
  from typing import Optional
11
  from huggingface_hub import hf_hub_download
12
 
@@ -26,14 +26,13 @@ def read_root():
26
  @app.post("/aisatsu_api/")
27
  async def predict_api(
28
  file: UploadFile = File(...),
29
- last_seen: Optional[str] = Form(None)
30
  ):
31
  image = read_image_file(await file.read())
32
  results = model.predict(image, show=False)[0]
33
  image = read_image_as_pil(image)
34
  masks, boxes = results.masks, results.boxes
35
  area_image = image.width * image.height
36
- voice_bot = None
37
  most_close = 0
38
  out_img = None
39
  diff_value = 0.5
@@ -47,13 +46,23 @@ async def predict_api(
47
  out_img = image.crop(tuple(box)).resize((64, 64))
48
  most_close = area_rate
49
  if last_seen is not None:
50
- last_seen = base64_to_pil(last_seen)
51
  if out_img is not None:
52
  diff_value = dist.euclidean(get_hist(out_img), get_hist(last_seen))
53
  print(most_close, diff_value)
54
  if most_close >= area_thres and diff_value >= 0.5:
55
- voice_bot = tts(defaul_bot_voice, language="ja")
56
- return {
57
- "voice": voice_bot,
58
- "image": pil_to_base64(out_img) if out_img is not None else None
59
- }
 
 
 
 
 
 
 
 
 
 
 
5
  from scipy.spatial import distance as dist
6
 
7
  from sahi.utils.cv import read_image_as_pil
8
+ from fastapi import FastAPI, File, UploadFile
9
+ from utils import tts, read_image_file, pil_to_base64, get_hist
10
  from typing import Optional
11
  from huggingface_hub import hf_hub_download
12
 
 
26
  @app.post("/aisatsu_api/")
27
  async def predict_api(
28
  file: UploadFile = File(...),
29
+ last_seen: Union[UploadFile, None] = File(None)
30
  ):
31
  image = read_image_file(await file.read())
32
  results = model.predict(image, show=False)[0]
33
  image = read_image_as_pil(image)
34
  masks, boxes = results.masks, results.boxes
35
  area_image = image.width * image.height
 
36
  most_close = 0
37
  out_img = None
38
  diff_value = 0.5
 
46
  out_img = image.crop(tuple(box)).resize((64, 64))
47
  most_close = area_rate
48
  if last_seen is not None:
49
+ last_seen = read_image_file(await last_seen.read())
50
  if out_img is not None:
51
  diff_value = dist.euclidean(get_hist(out_img), get_hist(last_seen))
52
  print(most_close, diff_value)
53
  if most_close >= area_thres and diff_value >= 0.5:
54
+ voice_bot_path = tts(defaul_bot_voice, language="ja")
55
+ image_bot_path = pil_to_base64(out_img)
56
+ io = BytesIO()
57
+ zip_filename = "final_archive.zip"
58
+ with zipfile.ZipFile(io, mode='w', compression=zipfile.ZIP_DEFLATED) as zf:
59
+ for file_path in [voice_bot_path, image_bot_path]:
60
+ zf.write(file_path)
61
+ zf.close()
62
+ return StreamingResponse(
63
+ iter([io.getvalue()]),
64
+ media_type="application/x-zip-compressed",
65
+ headers={"Content-Disposition": f"attachment;filename=%s" % zip_filename}
66
+ )
67
+ else:
68
+ return {"message": "No face detected"}