hysts HF staff commited on
Commit
f185807
·
1 Parent(s): 1e6091d
.gitattributes CHANGED
@@ -1,3 +1,4 @@
 
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
@@ -17,6 +18,7 @@
17
  *.pth filter=lfs diff=lfs merge=lfs -text
18
  *.rar filter=lfs diff=lfs merge=lfs -text
19
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 
20
  *.tar.* filter=lfs diff=lfs merge=lfs -text
21
  *.tflite filter=lfs diff=lfs merge=lfs -text
22
  *.tgz filter=lfs diff=lfs merge=lfs -text
 
1
+ *.mp4 filter=lfs diff=lfs merge=lfs -text
2
  *.7z filter=lfs diff=lfs merge=lfs -text
3
  *.arrow filter=lfs diff=lfs merge=lfs -text
4
  *.bin filter=lfs diff=lfs merge=lfs -text
 
18
  *.pth filter=lfs diff=lfs merge=lfs -text
19
  *.rar filter=lfs diff=lfs merge=lfs -text
20
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
+ *.tar filter=lfs diff=lfs merge=lfs -text
22
  *.tar.* filter=lfs diff=lfs merge=lfs -text
23
  *.tflite filter=lfs diff=lfs merge=lfs -text
24
  *.tgz filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ mmdet_configs/configs
.gitmodules ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [submodule "ViTPose"]
2
+ path = ViTPose
3
+ url = https://github.com/ViTAE-Transformer/ViTPose
.pre-commit-config.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ exclude: ^(ViTPose/|mmdet_configs/configs/)
2
+ repos:
3
+ - repo: https://github.com/pre-commit/pre-commit-hooks
4
+ rev: v4.2.0
5
+ hooks:
6
+ - id: check-executables-have-shebangs
7
+ - id: check-json
8
+ - id: check-merge-conflict
9
+ - id: check-shebang-scripts-are-executable
10
+ - id: check-toml
11
+ - id: check-yaml
12
+ - id: double-quote-string-fixer
13
+ - id: end-of-file-fixer
14
+ - id: mixed-line-ending
15
+ args: ['--fix=lf']
16
+ - id: requirements-txt-fixer
17
+ - id: trailing-whitespace
18
+ - repo: https://github.com/myint/docformatter
19
+ rev: v1.4
20
+ hooks:
21
+ - id: docformatter
22
+ args: ['--in-place']
23
+ - repo: https://github.com/pycqa/isort
24
+ rev: 5.10.1
25
+ hooks:
26
+ - id: isort
27
+ - repo: https://github.com/pre-commit/mirrors-mypy
28
+ rev: v0.812
29
+ hooks:
30
+ - id: mypy
31
+ args: ['--ignore-missing-imports']
32
+ - repo: https://github.com/google/yapf
33
+ rev: v0.32.0
34
+ hooks:
35
+ - id: yapf
36
+ args: ['--parallel', '--in-place']
37
+ - repo: https://github.com/kynan/nbstripout
38
+ rev: 0.5.0
39
+ hooks:
40
+ - id: nbstripout
41
+ args: ['--extra-keys', 'metadata.interpreter metadata.kernelspec cell.metadata.pycharm']
42
+ - repo: https://github.com/nbQA-dev/nbQA
43
+ rev: 1.3.1
44
+ hooks:
45
+ - id: nbqa-isort
46
+ - id: nbqa-yapf
.style.yapf ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ [style]
2
+ based_on_style = pep8
3
+ blank_line_before_nested_class_or_def = false
4
+ spaces_before_comment = 2
5
+ split_before_logical_operator = true
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🦀
4
  colorFrom: gray
5
  colorTo: purple
6
  sdk: gradio
7
- sdk_version: 3.0.13
8
  app_file: app.py
9
  pinned: false
10
  ---
 
4
  colorFrom: gray
5
  colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 3.0.11
8
  app_file: app.py
9
  pinned: false
10
  ---
ViTPose ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit 92d0aa2710b8e9136dc1712a1c13c12157e435e8
app.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import pathlib
7
+ import tarfile
8
+
9
+ import gradio as gr
10
+
11
+ from model import AppModel
12
+
13
+ DESCRIPTION = '''# ViTPose
14
+
15
+ This is an unofficial demo for [https://github.com/ViTAE-Transformer/ViTPose](https://github.com/ViTAE-Transformer/ViTPose).
16
+
17
+ Related app: [https://huggingface.co/spaces/Gradio-Blocks/ViTPose](https://huggingface.co/spaces/Gradio-Blocks/ViTPose)
18
+
19
+ '''
20
+ FOOTER = '<img id="visitor-badge" alt="visitor badge" src="https://visitor-badge.glitch.me/badge?page_id=hysts.vitpose_video" />'
21
+
22
+
23
+ def parse_args() -> argparse.Namespace:
24
+ parser = argparse.ArgumentParser()
25
+ parser.add_argument('--device', type=str, default='cpu')
26
+ parser.add_argument('--theme', type=str)
27
+ parser.add_argument('--share', action='store_true')
28
+ parser.add_argument('--port', type=int)
29
+ parser.add_argument('--disable-queue',
30
+ dest='enable_queue',
31
+ action='store_false')
32
+ return parser.parse_args()
33
+
34
+
35
+ def set_example_video(example: list) -> dict:
36
+ return gr.Video.update(value=example[0])
37
+
38
+
39
+ def extract_tar() -> None:
40
+ if pathlib.Path('mmdet_configs/configs').exists():
41
+ return
42
+ with tarfile.open('mmdet_configs/configs.tar') as f:
43
+ f.extractall('mmdet_configs')
44
+
45
+
46
+ def main():
47
+ args = parse_args()
48
+
49
+ extract_tar()
50
+
51
+ model = AppModel(device=args.device)
52
+
53
+ with gr.Blocks(theme=args.theme, css='style.css') as demo:
54
+ gr.Markdown(DESCRIPTION)
55
+
56
+ with gr.Row():
57
+ with gr.Column():
58
+ input_video = gr.Video(label='Input Video',
59
+ format='mp4',
60
+ elem_id='input_video')
61
+ with gr.Group():
62
+ detector_name = gr.Dropdown(
63
+ list(model.det_model.MODEL_DICT.keys()),
64
+ value=model.det_model.model_name,
65
+ label='Detector')
66
+ pose_model_name = gr.Dropdown(
67
+ list(model.pose_model.MODEL_DICT.keys()),
68
+ value=model.pose_model.model_name,
69
+ label='Pose Model')
70
+ det_score_threshold = gr.Slider(
71
+ 0,
72
+ 1,
73
+ step=0.05,
74
+ value=0.5,
75
+ label='Box Score Threshold')
76
+ max_num_frames = gr.Slider(
77
+ 1,
78
+ 300,
79
+ step=1,
80
+ value=60,
81
+ label='Maximum Number of Frames')
82
+ predict_button = gr.Button(value='Predict')
83
+ pose_preds = gr.Variable()
84
+
85
+ paths = sorted(pathlib.Path('videos').rglob('*.mp4'))
86
+ example_videos = gr.Dataset(components=[input_video],
87
+ samples=[[path.as_posix()]
88
+ for path in paths])
89
+
90
+ with gr.Column():
91
+ with gr.Group():
92
+ result = gr.Video(label='Result',
93
+ format='mp4',
94
+ elem_id='result')
95
+ vis_kpt_score_threshold = gr.Slider(
96
+ 0,
97
+ 1,
98
+ step=0.05,
99
+ value=0.3,
100
+ label='Visualization Score Threshold')
101
+ vis_dot_radius = gr.Slider(1,
102
+ 10,
103
+ step=1,
104
+ value=4,
105
+ label='Dot Radius')
106
+ vis_line_thickness = gr.Slider(1,
107
+ 10,
108
+ step=1,
109
+ value=2,
110
+ label='Line Thickness')
111
+ redraw_button = gr.Button(value='Redraw')
112
+
113
+ gr.Markdown(FOOTER)
114
+
115
+ detector_name.change(fn=model.det_model.set_model,
116
+ inputs=detector_name,
117
+ outputs=None)
118
+ pose_model_name.change(fn=model.pose_model.set_model,
119
+ inputs=pose_model_name,
120
+ outputs=None)
121
+ predict_button.click(fn=model.run,
122
+ inputs=[
123
+ input_video,
124
+ detector_name,
125
+ pose_model_name,
126
+ det_score_threshold,
127
+ max_num_frames,
128
+ vis_kpt_score_threshold,
129
+ vis_dot_radius,
130
+ vis_line_thickness,
131
+ ],
132
+ outputs=[
133
+ result,
134
+ pose_preds,
135
+ ])
136
+ redraw_button.click(fn=model.visualize_pose_results,
137
+ inputs=[
138
+ input_video,
139
+ pose_preds,
140
+ vis_kpt_score_threshold,
141
+ vis_dot_radius,
142
+ vis_line_thickness,
143
+ ],
144
+ outputs=result)
145
+
146
+ example_videos.click(fn=set_example_video,
147
+ inputs=example_videos,
148
+ outputs=input_video)
149
+
150
+ demo.launch(
151
+ enable_queue=args.enable_queue,
152
+ server_port=args.port,
153
+ share=args.share,
154
+ )
155
+
156
+
157
+ if __name__ == '__main__':
158
+ main()
mmdet_configs/LICENSE ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright 2018-2023 OpenMMLab. All rights reserved.
2
+
3
+ Apache License
4
+ Version 2.0, January 2004
5
+ http://www.apache.org/licenses/
6
+
7
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
8
+
9
+ 1. Definitions.
10
+
11
+ "License" shall mean the terms and conditions for use, reproduction,
12
+ and distribution as defined by Sections 1 through 9 of this document.
13
+
14
+ "Licensor" shall mean the copyright owner or entity authorized by
15
+ the copyright owner that is granting the License.
16
+
17
+ "Legal Entity" shall mean the union of the acting entity and all
18
+ other entities that control, are controlled by, or are under common
19
+ control with that entity. For the purposes of this definition,
20
+ "control" means (i) the power, direct or indirect, to cause the
21
+ direction or management of such entity, whether by contract or
22
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
23
+ outstanding shares, or (iii) beneficial ownership of such entity.
24
+
25
+ "You" (or "Your") shall mean an individual or Legal Entity
26
+ exercising permissions granted by this License.
27
+
28
+ "Source" form shall mean the preferred form for making modifications,
29
+ including but not limited to software source code, documentation
30
+ source, and configuration files.
31
+
32
+ "Object" form shall mean any form resulting from mechanical
33
+ transformation or translation of a Source form, including but
34
+ not limited to compiled object code, generated documentation,
35
+ and conversions to other media types.
36
+
37
+ "Work" shall mean the work of authorship, whether in Source or
38
+ Object form, made available under the License, as indicated by a
39
+ copyright notice that is included in or attached to the work
40
+ (an example is provided in the Appendix below).
41
+
42
+ "Derivative Works" shall mean any work, whether in Source or Object
43
+ form, that is based on (or derived from) the Work and for which the
44
+ editorial revisions, annotations, elaborations, or other modifications
45
+ represent, as a whole, an original work of authorship. For the purposes
46
+ of this License, Derivative Works shall not include works that remain
47
+ separable from, or merely link (or bind by name) to the interfaces of,
48
+ the Work and Derivative Works thereof.
49
+
50
+ "Contribution" shall mean any work of authorship, including
51
+ the original version of the Work and any modifications or additions
52
+ to that Work or Derivative Works thereof, that is intentionally
53
+ submitted to Licensor for inclusion in the Work by the copyright owner
54
+ or by an individual or Legal Entity authorized to submit on behalf of
55
+ the copyright owner. For the purposes of this definition, "submitted"
56
+ means any form of electronic, verbal, or written communication sent
57
+ to the Licensor or its representatives, including but not limited to
58
+ communication on electronic mailing lists, source code control systems,
59
+ and issue tracking systems that are managed by, or on behalf of, the
60
+ Licensor for the purpose of discussing and improving the Work, but
61
+ excluding communication that is conspicuously marked or otherwise
62
+ designated in writing by the copyright owner as "Not a Contribution."
63
+
64
+ "Contributor" shall mean Licensor and any individual or Legal Entity
65
+ on behalf of whom a Contribution has been received by Licensor and
66
+ subsequently incorporated within the Work.
67
+
68
+ 2. Grant of Copyright License. Subject to the terms and conditions of
69
+ this License, each Contributor hereby grants to You a perpetual,
70
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
71
+ copyright license to reproduce, prepare Derivative Works of,
72
+ publicly display, publicly perform, sublicense, and distribute the
73
+ Work and such Derivative Works in Source or Object form.
74
+
75
+ 3. Grant of Patent License. Subject to the terms and conditions of
76
+ this License, each Contributor hereby grants to You a perpetual,
77
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
78
+ (except as stated in this section) patent license to make, have made,
79
+ use, offer to sell, sell, import, and otherwise transfer the Work,
80
+ where such license applies only to those patent claims licensable
81
+ by such Contributor that are necessarily infringed by their
82
+ Contribution(s) alone or by combination of their Contribution(s)
83
+ with the Work to which such Contribution(s) was submitted. If You
84
+ institute patent litigation against any entity (including a
85
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
86
+ or a Contribution incorporated within the Work constitutes direct
87
+ or contributory patent infringement, then any patent licenses
88
+ granted to You under this License for that Work shall terminate
89
+ as of the date such litigation is filed.
90
+
91
+ 4. Redistribution. You may reproduce and distribute copies of the
92
+ Work or Derivative Works thereof in any medium, with or without
93
+ modifications, and in Source or Object form, provided that You
94
+ meet the following conditions:
95
+
96
+ (a) You must give any other recipients of the Work or
97
+ Derivative Works a copy of this License; and
98
+
99
+ (b) You must cause any modified files to carry prominent notices
100
+ stating that You changed the files; and
101
+
102
+ (c) You must retain, in the Source form of any Derivative Works
103
+ that You distribute, all copyright, patent, trademark, and
104
+ attribution notices from the Source form of the Work,
105
+ excluding those notices that do not pertain to any part of
106
+ the Derivative Works; and
107
+
108
+ (d) If the Work includes a "NOTICE" text file as part of its
109
+ distribution, then any Derivative Works that You distribute must
110
+ include a readable copy of the attribution notices contained
111
+ within such NOTICE file, excluding those notices that do not
112
+ pertain to any part of the Derivative Works, in at least one
113
+ of the following places: within a NOTICE text file distributed
114
+ as part of the Derivative Works; within the Source form or
115
+ documentation, if provided along with the Derivative Works; or,
116
+ within a display generated by the Derivative Works, if and
117
+ wherever such third-party notices normally appear. The contents
118
+ of the NOTICE file are for informational purposes only and
119
+ do not modify the License. You may add Your own attribution
120
+ notices within Derivative Works that You distribute, alongside
121
+ or as an addendum to the NOTICE text from the Work, provided
122
+ that such additional attribution notices cannot be construed
123
+ as modifying the License.
124
+
125
+ You may add Your own copyright statement to Your modifications and
126
+ may provide additional or different license terms and conditions
127
+ for use, reproduction, or distribution of Your modifications, or
128
+ for any such Derivative Works as a whole, provided Your use,
129
+ reproduction, and distribution of the Work otherwise complies with
130
+ the conditions stated in this License.
131
+
132
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
133
+ any Contribution intentionally submitted for inclusion in the Work
134
+ by You to the Licensor shall be under the terms and conditions of
135
+ this License, without any additional terms or conditions.
136
+ Notwithstanding the above, nothing herein shall supersede or modify
137
+ the terms of any separate license agreement you may have executed
138
+ with Licensor regarding such Contributions.
139
+
140
+ 6. Trademarks. This License does not grant permission to use the trade
141
+ names, trademarks, service marks, or product names of the Licensor,
142
+ except as required for reasonable and customary use in describing the
143
+ origin of the Work and reproducing the content of the NOTICE file.
144
+
145
+ 7. Disclaimer of Warranty. Unless required by applicable law or
146
+ agreed to in writing, Licensor provides the Work (and each
147
+ Contributor provides its Contributions) on an "AS IS" BASIS,
148
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
149
+ implied, including, without limitation, any warranties or conditions
150
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
151
+ PARTICULAR PURPOSE. You are solely responsible for determining the
152
+ appropriateness of using or redistributing the Work and assume any
153
+ risks associated with Your exercise of permissions under this License.
154
+
155
+ 8. Limitation of Liability. In no event and under no legal theory,
156
+ whether in tort (including negligence), contract, or otherwise,
157
+ unless required by applicable law (such as deliberate and grossly
158
+ negligent acts) or agreed to in writing, shall any Contributor be
159
+ liable to You for damages, including any direct, indirect, special,
160
+ incidental, or consequential damages of any character arising as a
161
+ result of this License or out of the use or inability to use the
162
+ Work (including but not limited to damages for loss of goodwill,
163
+ work stoppage, computer failure or malfunction, or any and all
164
+ other commercial damages or losses), even if such Contributor
165
+ has been advised of the possibility of such damages.
166
+
167
+ 9. Accepting Warranty or Additional Liability. While redistributing
168
+ the Work or Derivative Works thereof, You may choose to offer,
169
+ and charge a fee for, acceptance of support, warranty, indemnity,
170
+ or other liability obligations and/or rights consistent with this
171
+ License. However, in accepting such obligations, You may act only
172
+ on Your own behalf and on Your sole responsibility, not on behalf
173
+ of any other Contributor, and only if You agree to indemnify,
174
+ defend, and hold each Contributor harmless for any liability
175
+ incurred by, or claims asserted against, such Contributor by reason
176
+ of your accepting any such warranty or additional liability.
177
+
178
+ END OF TERMS AND CONDITIONS
179
+
180
+ APPENDIX: How to apply the Apache License to your work.
181
+
182
+ To apply the Apache License to your work, attach the following
183
+ boilerplate notice, with the fields enclosed by brackets "[]"
184
+ replaced with your own identifying information. (Don't include
185
+ the brackets!) The text should be enclosed in the appropriate
186
+ comment syntax for the file format. We also recommend that a
187
+ file or class name and description of purpose be included on the
188
+ same "printed page" as the copyright notice for easier
189
+ identification within third-party archives.
190
+
191
+ Copyright 2018-2023 OpenMMLab.
192
+
193
+ Licensed under the Apache License, Version 2.0 (the "License");
194
+ you may not use this file except in compliance with the License.
195
+ You may obtain a copy of the License at
196
+
197
+ http://www.apache.org/licenses/LICENSE-2.0
198
+
199
+ Unless required by applicable law or agreed to in writing, software
200
+ distributed under the License is distributed on an "AS IS" BASIS,
201
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
202
+ See the License for the specific language governing permissions and
203
+ limitations under the License.
mmdet_configs/README.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ `configs.tar` is a tarball of https://github.com/open-mmlab/mmdetection/tree/v2.24.1/configs.
2
+ The license file of the mmdetection is also included in this directory.
mmdet_configs/configs.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d2091e07da6b74a6cd694e895b653485f7ce9d5d17738a415ca77a56940b989
3
+ size 3389440
model.py ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import subprocess
5
+ import sys
6
+ import tempfile
7
+
8
+ if os.getenv('SYSTEM') == 'spaces':
9
+ import mim
10
+
11
+ mim.uninstall('mmcv-full', confirm_yes=True)
12
+ mim.install('mmcv-full==1.5.0', is_yes=True)
13
+
14
+ subprocess.call('pip uninstall -y opencv-python'.split())
15
+ subprocess.call('pip uninstall -y opencv-python-headless'.split())
16
+ subprocess.call('pip install opencv-python-headless==4.5.5.64'.split())
17
+
18
+ import cv2
19
+ import huggingface_hub
20
+ import numpy as np
21
+ import torch
22
+ import torch.nn as nn
23
+
24
+ sys.path.insert(0, 'ViTPose/')
25
+
26
+ from mmdet.apis import inference_detector, init_detector
27
+ from mmpose.apis import (inference_top_down_pose_model, init_pose_model,
28
+ process_mmdet_results, vis_pose_result)
29
+
30
+ HF_TOKEN = os.environ['HF_TOKEN']
31
+
32
+
33
+ class DetModel:
34
+ MODEL_DICT = {
35
+ 'YOLOX-tiny': {
36
+ 'config':
37
+ 'mmdet_configs/configs/yolox/yolox_tiny_8x8_300e_coco.py',
38
+ 'model':
39
+ 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth',
40
+ },
41
+ 'YOLOX-s': {
42
+ 'config':
43
+ 'mmdet_configs/configs/yolox/yolox_s_8x8_300e_coco.py',
44
+ 'model':
45
+ 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_s_8x8_300e_coco/yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth',
46
+ },
47
+ 'YOLOX-l': {
48
+ 'config':
49
+ 'mmdet_configs/configs/yolox/yolox_l_8x8_300e_coco.py',
50
+ 'model':
51
+ 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth',
52
+ },
53
+ 'YOLOX-x': {
54
+ 'config':
55
+ 'mmdet_configs/configs/yolox/yolox_x_8x8_300e_coco.py',
56
+ 'model':
57
+ 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_x_8x8_300e_coco/yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth',
58
+ },
59
+ }
60
+
61
+ def __init__(self, device: str | torch.device):
62
+ self.device = torch.device(device)
63
+ self._load_all_models_once()
64
+ self.model_name = 'YOLOX-l'
65
+ self.model = self._load_model(self.model_name)
66
+
67
+ def _load_all_models_once(self) -> None:
68
+ for name in self.MODEL_DICT:
69
+ self._load_model(name)
70
+
71
+ def _load_model(self, name: str) -> nn.Module:
72
+ dic = self.MODEL_DICT[name]
73
+ return init_detector(dic['config'], dic['model'], device=self.device)
74
+
75
+ def set_model(self, name: str) -> None:
76
+ if name == self.model_name:
77
+ return
78
+ self.model_name = name
79
+ self.model = self._load_model(name)
80
+
81
+ def detect_and_visualize(
82
+ self, image: np.ndarray,
83
+ score_threshold: float) -> tuple[list[np.ndarray], np.ndarray]:
84
+ out = self.detect(image)
85
+ vis = self.visualize_detection_results(image, out, score_threshold)
86
+ return out, vis
87
+
88
+ def detect(self, image: np.ndarray) -> list[np.ndarray]:
89
+ image = image[:, :, ::-1] # RGB -> BGR
90
+ out = inference_detector(self.model, image)
91
+ return out
92
+
93
+ def visualize_detection_results(
94
+ self,
95
+ image: np.ndarray,
96
+ detection_results: list[np.ndarray],
97
+ score_threshold: float = 0.3) -> np.ndarray:
98
+ person_det = [detection_results[0]] + [np.array([]).reshape(0, 5)] * 79
99
+
100
+ image = image[:, :, ::-1] # RGB -> BGR
101
+ vis = self.model.show_result(image,
102
+ person_det,
103
+ score_thr=score_threshold,
104
+ bbox_color=None,
105
+ text_color=(200, 200, 200),
106
+ mask_color=None)
107
+ return vis[:, :, ::-1] # BGR -> RGB
108
+
109
+
110
+ class PoseModel:
111
+ MODEL_DICT = {
112
+ 'ViTPose-B (single-task train)': {
113
+ 'config':
114
+ 'ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_coco_256x192.py',
115
+ 'model': 'models/vitpose-b.pth',
116
+ },
117
+ 'ViTPose-L (single-task train)': {
118
+ 'config':
119
+ 'ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_large_coco_256x192.py',
120
+ 'model': 'models/vitpose-l.pth',
121
+ },
122
+ 'ViTPose-B (multi-task train, COCO)': {
123
+ 'config':
124
+ 'ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_coco_256x192.py',
125
+ 'model': 'models/vitpose-b-multi-coco.pth',
126
+ },
127
+ 'ViTPose-L (multi-task train, COCO)': {
128
+ 'config':
129
+ 'ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_large_coco_256x192.py',
130
+ 'model': 'models/vitpose-l-multi-coco.pth',
131
+ },
132
+ }
133
+
134
+ def __init__(self, device: str | torch.device):
135
+ self.device = torch.device(device)
136
+ self.model_name = 'ViTPose-B (multi-task train, COCO)'
137
+ self.model = self._load_model(self.model_name)
138
+
139
+ def _load_all_models_once(self) -> None:
140
+ for name in self.MODEL_DICT:
141
+ self._load_model(name)
142
+
143
+ def _load_model(self, name: str) -> nn.Module:
144
+ dic = self.MODEL_DICT[name]
145
+ ckpt_path = huggingface_hub.hf_hub_download('hysts/ViTPose',
146
+ dic['model'],
147
+ use_auth_token=HF_TOKEN)
148
+ model = init_pose_model(dic['config'], ckpt_path, device=self.device)
149
+ return model
150
+
151
+ def set_model(self, name: str) -> None:
152
+ if name == self.model_name:
153
+ return
154
+ self.model_name = name
155
+ self.model = self._load_model(name)
156
+
157
+ def predict_pose_and_visualize(
158
+ self,
159
+ image: np.ndarray,
160
+ det_results: list[np.ndarray],
161
+ box_score_threshold: float,
162
+ kpt_score_threshold: float,
163
+ vis_dot_radius: int,
164
+ vis_line_thickness: int,
165
+ ) -> tuple[list[dict[str, np.ndarray]], np.ndarray]:
166
+ out = self.predict_pose(image, det_results, box_score_threshold)
167
+ vis = self.visualize_pose_results(image, out, kpt_score_threshold,
168
+ vis_dot_radius, vis_line_thickness)
169
+ return out, vis
170
+
171
+ def predict_pose(
172
+ self,
173
+ image: np.ndarray,
174
+ det_results: list[np.ndarray],
175
+ box_score_threshold: float = 0.5) -> list[dict[str, np.ndarray]]:
176
+ image = image[:, :, ::-1] # RGB -> BGR
177
+ person_results = process_mmdet_results(det_results, 1)
178
+ out, _ = inference_top_down_pose_model(self.model,
179
+ image,
180
+ person_results=person_results,
181
+ bbox_thr=box_score_threshold,
182
+ format='xyxy')
183
+ return out
184
+
185
+ def visualize_pose_results(self,
186
+ image: np.ndarray,
187
+ pose_results: list[dict[str, np.ndarray]],
188
+ kpt_score_threshold: float = 0.3,
189
+ vis_dot_radius: int = 4,
190
+ vis_line_thickness: int = 1) -> np.ndarray:
191
+ image = image[:, :, ::-1] # RGB -> BGR
192
+ vis = vis_pose_result(self.model,
193
+ image,
194
+ pose_results,
195
+ kpt_score_thr=kpt_score_threshold,
196
+ radius=vis_dot_radius,
197
+ thickness=vis_line_thickness)
198
+ return vis[:, :, ::-1] # BGR -> RGB
199
+
200
+
201
+ class AppModel:
202
+ def __init__(self, device: str | torch.device):
203
+ self.det_model = DetModel(device)
204
+ self.pose_model = PoseModel(device)
205
+
206
+ def run(
207
+ self, video_path: str, det_model_name: str, pose_model_name: str,
208
+ box_score_threshold: float, max_num_frames: int,
209
+ kpt_score_threshold: float, vis_dot_radius: int,
210
+ vis_line_thickness: int
211
+ ) -> tuple[str, list[list[dict[str, np.ndarray]]]]:
212
+ if video_path is None:
213
+ return
214
+ self.det_model.set_model(det_model_name)
215
+ self.pose_model.set_model(pose_model_name)
216
+
217
+ cap = cv2.VideoCapture(video_path)
218
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
219
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
220
+ fps = cap.get(cv2.CAP_PROP_FPS)
221
+
222
+ preds_all = []
223
+
224
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
225
+ temp_file = tempfile.NamedTemporaryFile(suffix='.mp4')
226
+ writer = cv2.VideoWriter(temp_file.name, fourcc, fps, (width, height))
227
+ for _ in range(max_num_frames):
228
+ ok, frame = cap.read()
229
+ if not ok:
230
+ break
231
+ rgb_frame = frame[:, :, ::-1]
232
+ det_preds = self.det_model.detect(rgb_frame)
233
+ preds, vis = self.pose_model.predict_pose_and_visualize(
234
+ rgb_frame, det_preds, box_score_threshold, kpt_score_threshold,
235
+ vis_dot_radius, vis_line_thickness)
236
+ preds_all.append(preds)
237
+ writer.write(vis[:, :, ::-1])
238
+ cap.release()
239
+ writer.release()
240
+
241
+ out_file = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
242
+ subprocess.run(
243
+ f'ffmpeg -y -loglevel quiet -stats -i {temp_file.name} -c:v libx264 {out_file.name}'
244
+ .split())
245
+ return out_file.name, preds_all
246
+
247
+ def visualize_pose_results(self, video_path: str,
248
+ pose_preds_all: list[list[dict[str,
249
+ np.ndarray]]],
250
+ kpt_score_threshold: float, vis_dot_radius: int,
251
+ vis_line_thickness: int) -> str:
252
+ if video_path is None or pose_preds_all is None:
253
+ return
254
+ cap = cv2.VideoCapture(video_path)
255
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
256
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
257
+ fps = cap.get(cv2.CAP_PROP_FPS)
258
+
259
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
260
+ temp_file = tempfile.NamedTemporaryFile(suffix='.mp4')
261
+ writer = cv2.VideoWriter(temp_file.name, fourcc, fps, (width, height))
262
+ for pose_preds in pose_preds_all:
263
+ ok, frame = cap.read()
264
+ if not ok:
265
+ break
266
+ rgb_frame = frame[:, :, ::-1]
267
+ vis = self.pose_model.visualize_pose_results(
268
+ rgb_frame, pose_preds, kpt_score_threshold, vis_dot_radius,
269
+ vis_line_thickness)
270
+ writer.write(vis[:, :, ::-1])
271
+ cap.release()
272
+ writer.release()
273
+
274
+ out_file = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
275
+ subprocess.run(
276
+ f'ffmpeg -y -loglevel quiet -stats -i {temp_file.name} -c:v libx264 {out_file.name}'
277
+ .split())
278
+ return out_file.name
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ mmcv-full==1.5.0
2
+ mmdet==2.24.1
3
+ mmpose==0.25.1
4
+ numpy==1.22.4
5
+ opencv-python-headless==4.5.5.64
6
+ openmim==0.1.5
7
+ timm==0.5.4
8
+ torch==1.11.0
9
+ torchvision==0.12.0
style.css ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ h1 {
2
+ text-align: center;
3
+ }
4
+ /*
5
+ div#input_video {
6
+ max-width: 600px;
7
+ max-height: 600px;
8
+ }
9
+ div#result {
10
+ max-width: 600px;
11
+ max-height: 600px;
12
+ }
13
+ */
14
+ img#visitor-badge {
15
+ display: block;
16
+ margin: auto;
17
+ }
videos/README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ These videos are from the following public domain:
2
+
3
+ - https://www.pexels.com/video/young-guy-doing-break-dance-on-the-street-5362370/
4
+ - https://www.pexels.com/video/a-woman-dancing-at-home-6003986/
5
+ - https://www.pexels.com/video/long-haired-man-dancing-in-a-library-6344381/
6
+ - https://www.pexels.com/video/a-female-model-dancing-around-6815069/
videos/pexels-allan-mas-5362370.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:747f9c2f9d19e4955603e1a13b69663187882d4c6a8fbcad18ddbd04ee792d4d
3
+ size 1972564
videos/pexels-artem-podrez-6003986.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1044083afc06aa6f956838c7fcd582c9cfd59ea3a994adc8a0f5889ffca4d9c8
3
+ size 2494082
videos/pexels-c-technical-6344381.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7763476045f4683d53d751fb8befaf637c0101a0693e72f5b582e6aa5ac63cac
3
+ size 3967587
videos/pexels-roman-odintsov-6815069.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44045b239c0f523bfeedc5871019ae9f67525fcf65ba46d7ca4516994e6b2f57
3
+ size 2617714