ikechan8370 commited on
Commit
0abb626
·
1 Parent(s): f617159

Add application file

Browse files
Files changed (5) hide show
  1. .idea/.gitignore +8 -0
  2. Dockerfile +11 -0
  3. README.md +30 -11
  4. main.py +177 -0
  5. requirements.txt +6 -0
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . .
10
+
11
+ CMD python main.py
README.md CHANGED
@@ -1,11 +1,30 @@
1
- ---
2
- title: Cp Extra
3
- emoji: 📚
4
- colorFrom: green
5
- colorTo: purple
6
- sdk: docker
7
- pinned: false
8
- license: apache-2.0
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # chatgpt-plugin-extras
2
+
3
+ chatgpt-plugin一些花活额外工具
4
+
5
+ ## 安装
6
+
7
+ `pip install -r requirements.txt`
8
+
9
+ `mkdir -p data/upload`
10
+
11
+ `python main.py`
12
+
13
+ 运行在5000端口。目前默认使用CPU
14
+
15
+ ## 使用
16
+
17
+ ### ImageCaption
18
+
19
+ POST http://127.0.0.1:5000/image-captioning
20
+
21
+ Form-Data \
22
+ file: 图片文件
23
+
24
+ ### Visual QA
25
+
26
+ POST http://127.0.0.1:5000/visual-qa
27
+
28
+ Form-Data \
29
+ file: 图片文件 \
30
+ q: 问题
main.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from uuid import uuid4
3
+
4
+ import torch
5
+ from PIL import Image
6
+ from controlnet_aux import HEDdetector
7
+ from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
8
+ from flask import Flask, request, send_file
9
+ from transformers import BlipProcessor, BlipForConditionalGeneration, BlipForQuestionAnswering
10
+ from transformers import pipeline
11
+
12
+ app = Flask('chatgpt-plugin-extras')
13
+
14
+
15
+ class VitGPT2:
16
+ def __init__(self, device):
17
+ print(f"Initializing VitGPT2 ImageCaptioning to {device}")
18
+ self.pipeline = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
19
+
20
+ def inference(self, image_path):
21
+ captions = self.pipeline(image_path)[0]['generated_text']
22
+ print(f"\nProcessed ImageCaptioning, Input Image: {image_path}, Output Text: {captions}")
23
+ return captions
24
+
25
+
26
+ class ImageCaptioning:
27
+ def __init__(self, device):
28
+ print(f"Initializing ImageCaptioning to {device}")
29
+ self.device = device
30
+ self.torch_dtype = torch.float16 if 'cuda' in device else torch.float32
31
+ self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
32
+ self.model = BlipForConditionalGeneration.from_pretrained(
33
+ "Salesforce/blip-image-captioning-large", torch_dtype=self.torch_dtype).to(self.device)
34
+
35
+ def inference(self, image_path):
36
+ inputs = self.processor(Image.open(image_path), return_tensors="pt").to(self.device, self.torch_dtype)
37
+ out = self.model.generate(**inputs)
38
+ captions = self.processor.decode(out[0], skip_special_tokens=True)
39
+ print(f"\nProcessed ImageCaptioning, Input Image: {image_path}, Output Text: {captions}")
40
+ return captions
41
+
42
+
43
+ class VQA:
44
+ def __init__(self, device):
45
+ print(f"Initializing Visual QA to {device}")
46
+ self.device = device
47
+ self.torch_dtype = torch.float16 if 'cuda' in device else torch.float32
48
+ self.processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
49
+ self.model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base",
50
+ torch_dtype=self.torch_dtype).to(self.device)
51
+
52
+ def inference(self, image_path, question):
53
+ inputs = self.processor(Image.open(image_path), question, return_tensors="pt").to(self.device, self.torch_dtype)
54
+ out = self.model.generate(**inputs)
55
+ answers = self.processor.decode(out[0], skip_special_tokens=True)
56
+ print(f"\nProcessed Visual QA, Input Image: {image_path}, Output Text: {answers}")
57
+ return answers
58
+
59
+
60
+ class Image2Hed:
61
+ def __init__(self, device):
62
+ print("Initializing Image2Hed")
63
+ self.detector = HEDdetector.from_pretrained('lllyasviel/ControlNet')
64
+
65
+ def inference(self, inputs, output_filename):
66
+ output_path = os.path.join('data', output_filename)
67
+ image = Image.open(inputs)
68
+ hed = self.detector(image)
69
+ hed.save(output_path)
70
+ print(f"\nProcessed Image2Hed, Input Image: {inputs}, Output Hed: {output_path}")
71
+ return '/result/' + output_filename
72
+
73
+
74
+ class Image2Scribble:
75
+ def __init__(self, device):
76
+ print("Initializing Image2Scribble")
77
+ self.detector = HEDdetector.from_pretrained('lllyasviel/ControlNet')
78
+
79
+ def inference(self, inputs, output_filename):
80
+ output_path = os.path.join('data', output_filename)
81
+ image = Image.open(inputs)
82
+ hed = self.detector(image, scribble=True)
83
+ hed.save(output_path)
84
+ print(f"\nProcessed Image2Hed, Input Image: {inputs}, Output Hed: {output_path}")
85
+ return '/result/' + output_filename
86
+
87
+ class InstructPix2Pix:
88
+ def __init__(self, device):
89
+ print(f"Initializing InstructPix2Pix to {device}")
90
+ self.device = device
91
+ self.torch_dtype = torch.float16 if 'cuda' in device else torch.float32
92
+ self.pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained("timbrooks/instruct-pix2pix",
93
+ safety_checker=None,
94
+ torch_dtype=self.torch_dtype).to(device)
95
+ self.pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(self.pipe.scheduler.config)
96
+
97
+ def inference(self, image_path, text, output_filename):
98
+ """Change style of image."""
99
+ print("===>Starting InstructPix2Pix Inference")
100
+ original_image = Image.open(image_path)
101
+ image = self.pipe(text, image=original_image, num_inference_steps=40, image_guidance_scale=1.2).images[0]
102
+ output_path = os.path.join('data', output_filename)
103
+ image.save(output_path)
104
+
105
+ print(f"\nProcessed InstructPix2Pix, Input Image: {image_path}, Instruct Text: {text}, "
106
+ f"Output Image: {output_path}")
107
+ return '/result/' + output_path
108
+
109
+ @app.route('/result/<filename>')
110
+ def get_result(filename):
111
+ file_path = os.path.join('data', filename)
112
+ return send_file(file_path, mimetype='image/png')
113
+
114
+
115
+ ic = ImageCaptioning("cpu")
116
+ vqa = VQA("cpu")
117
+ i2h = Image2Hed("cpu")
118
+ i2s = Image2Scribble("cpu")
119
+ # vgic = VitGPT2("cpu")
120
+ # ip2p = InstructPix2Pix("cpu")
121
+
122
+ @app.route('/image2hed', methods=['POST'])
123
+ def imag2hed():
124
+ file = request.files['file'] # 获取上传的文件
125
+ filename = str(uuid4()) + '.png'
126
+ filepath = os.path.join('data', 'upload', filename)
127
+ file.save(filepath)
128
+ output_filename = str(uuid4()) + '.png'
129
+ result = i2h.inference(filepath, output_filename)
130
+ return result
131
+
132
+
133
+ @app.route('/image2Scribble', methods=['POST'])
134
+ def image2Scribble():
135
+ file = request.files['file'] # 获取上传的文件
136
+ filename = str(uuid4()) + '.png'
137
+ filepath = os.path.join('data', 'upload', filename)
138
+ file.save(filepath)
139
+ output_filename = str(uuid4()) + '.png'
140
+ result = i2s.inference(filepath, output_filename)
141
+ return result
142
+
143
+
144
+ @app.route('/image-captioning', methods=['POST'])
145
+ def image_caption():
146
+ file = request.files['file'] # 获取上传的文件
147
+ filename = str(uuid4()) + '.png'
148
+ filepath = os.path.join('data', 'upload', filename)
149
+ file.save(filepath)
150
+ # result1 = vgic.inference(filepath)
151
+ result2 = ic.inference(filepath)
152
+ return result2
153
+
154
+
155
+ @app.route('/visual-qa', methods=['POST'])
156
+ def visual_qa():
157
+ file = request.files['file'] # 获取上传的文件
158
+ filename = str(uuid4()) + '.png'
159
+ filepath = os.path.join('data', 'upload', filename)
160
+ file.save(filepath)
161
+ question = request.args.get('q')
162
+ result = vqa.inference(filepath, question=question)
163
+ return result
164
+
165
+ @app.route('/instruct-pix2pix', methods=['POST'])
166
+ def InstructPix2Pix():
167
+ file = request.files['file'] # 获取上传的文件
168
+ filename = str(uuid4()) + '.png'
169
+ filepath = os.path.join('data', 'upload', filename)
170
+ file.save(filepath)
171
+ output_filename = str(uuid4()) + '.png'
172
+ question = request.args.get('t')
173
+ result = ip2p.inference(filepath, question, output_filename)
174
+ return result
175
+
176
+ if __name__ == '__main__':
177
+ app.run(host='0.0.0.0')
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ torch~=2.0.1
2
+ transformers~=4.30.2
3
+ Pillow~=9.5.0
4
+ Flask~=2.3.2
5
+ controlnet_aux==0.0.5
6
+ matplotlib