yourusername commited on
Commit
28b1a6e
·
1 Parent(s): a58eee5

:beers: cheers

Browse files
Files changed (3) hide show
  1. app.py +77 -0
  2. modeling.py +89 -0
  3. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gc
2
+
3
+ import gradio as gr
4
+ import numpy as np
5
+ import torch
6
+ from huggingface_hub import hf_hub_download
7
+ from PIL.Image import Resampling
8
+ from pytorchvideo.data.encoded_video import EncodedVideo
9
+ from pytorchvideo.transforms.functional import uniform_temporal_subsample
10
+ from torchvision.io import write_video
11
+ from torchvision.transforms.functional import resize
12
+
13
+ from modeling import Generator
14
+
15
+ MAX_DURATION = 4
16
+ OUT_FPS = 18
17
+ DEVICE = "cpu" if not torch.cuda.is_available() else "cuda"
18
+
19
+ # Reupload of model found here: https://huggingface.co/spaces/awacke1/Image2LineDrawing
20
+ model = Generator(3, 1, 3)
21
+ weights_path = hf_hub_download("nateraw/image-2-line-drawing", "pytorch_model.bin")
22
+ model.load_state_dict(torch.load(weights_path, map_location=DEVICE))
23
+ model.eval()
24
+
25
+
26
+ def process_one_second(vid, start_sec, out_fps):
27
+ """Process one second of a video at a given fps
28
+
29
+ Args:
30
+ vid (_type_): A pytorchvideo.EncodedVideo instance containing the video to process
31
+ start_sec (_type_): The second to start processing at
32
+ out_fps (_type_): The fps to output the video at
33
+
34
+ Returns:
35
+ np.array: The processed video as a numpy array with shape (T, H, W, C)
36
+ """
37
+ # C, T, H, W
38
+ video_arr = vid.get_clip(start_sec, start_sec + 1)["video"]
39
+ # C, T, H, W where T == frames per second
40
+ x = uniform_temporal_subsample(video_arr, out_fps)
41
+ # C, T, H, W where H has been scaled to 256 (This will probably be no bueno on vertical vids but whatever)
42
+ x = resize(x, 256, Resampling.BICUBIC)
43
+ # C, T, H, W -> T, C, H, W (basically T acts as batch size now)
44
+ x = x.permute(1, 0, 2, 3)
45
+
46
+ with torch.no_grad():
47
+ # T, 1, H, W
48
+ out = model(x)
49
+
50
+ # T, C, H, W -> T, H, W, C Rescaled to 0-255
51
+ out = out.permute(0, 2, 3, 1).clip(0, 1) * 255
52
+ # Greyscale -> RGB
53
+ out = out.repeat(1, 1, 1, 3)
54
+ return out
55
+
56
+
57
+ def fn(fpath):
58
+ start_sec = 0
59
+ vid = EncodedVideo.from_path(fpath)
60
+ duration = min(MAX_DURATION, int(vid.duration))
61
+ for i in range(duration):
62
+ print(f"🖼️ Processing step {i + 1}/{duration}...")
63
+ video = process_one_second(vid, start_sec=i + start_sec, out_fps=OUT_FPS)
64
+ gc.collect()
65
+ if i == 0:
66
+ video_all = video
67
+ else:
68
+ video_all = np.concatenate((video_all, video))
69
+
70
+ write_video("out.mp4", video_all, fps=OUT_FPS)
71
+ return "out.mp4"
72
+
73
+
74
+ webcam_interface = gr.Interface(
75
+ fn, gr.Video(source="webcam"), gr.Video(type="file", format="mp4")
76
+ )
77
+ webcam_interface.launch()
modeling.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Taken from here: https://huggingface.co/spaces/awacke1/Image2LineDrawing
2
+ from torch import nn
3
+
4
+ norm_layer = nn.InstanceNorm2d
5
+
6
+
7
+ class ResidualBlock(nn.Module):
8
+ def __init__(self, in_features):
9
+ super(ResidualBlock, self).__init__()
10
+
11
+ conv_block = [
12
+ nn.ReflectionPad2d(1),
13
+ nn.Conv2d(in_features, in_features, 3),
14
+ norm_layer(in_features),
15
+ nn.ReLU(inplace=True),
16
+ nn.ReflectionPad2d(1),
17
+ nn.Conv2d(in_features, in_features, 3),
18
+ norm_layer(in_features),
19
+ ]
20
+
21
+ self.conv_block = nn.Sequential(*conv_block)
22
+
23
+ def forward(self, x):
24
+ return x + self.conv_block(x)
25
+
26
+
27
+ class Generator(nn.Module):
28
+ def __init__(self, input_nc, output_nc, n_residual_blocks=9, sigmoid=True):
29
+ super(Generator, self).__init__()
30
+
31
+ # Initial convolution block
32
+ model0 = [
33
+ nn.ReflectionPad2d(3),
34
+ nn.Conv2d(input_nc, 64, 7),
35
+ norm_layer(64),
36
+ nn.ReLU(inplace=True),
37
+ ]
38
+ self.model0 = nn.Sequential(*model0)
39
+
40
+ # Downsampling
41
+ model1 = []
42
+ in_features = 64
43
+ out_features = in_features * 2
44
+ for _ in range(2):
45
+ model1 += [
46
+ nn.Conv2d(in_features, out_features, 3, stride=2, padding=1),
47
+ norm_layer(out_features),
48
+ nn.ReLU(inplace=True),
49
+ ]
50
+ in_features = out_features
51
+ out_features = in_features * 2
52
+ self.model1 = nn.Sequential(*model1)
53
+
54
+ model2 = []
55
+ # Residual blocks
56
+ for _ in range(n_residual_blocks):
57
+ model2 += [ResidualBlock(in_features)]
58
+ self.model2 = nn.Sequential(*model2)
59
+
60
+ # Upsampling
61
+ model3 = []
62
+ out_features = in_features // 2
63
+ for _ in range(2):
64
+ model3 += [
65
+ nn.ConvTranspose2d(
66
+ in_features, out_features, 3, stride=2, padding=1, output_padding=1
67
+ ),
68
+ norm_layer(out_features),
69
+ nn.ReLU(inplace=True),
70
+ ]
71
+ in_features = out_features
72
+ out_features = in_features // 2
73
+ self.model3 = nn.Sequential(*model3)
74
+
75
+ # Output layer
76
+ model4 = [nn.ReflectionPad2d(3), nn.Conv2d(64, output_nc, 7)]
77
+ if sigmoid:
78
+ model4 += [nn.Sigmoid()]
79
+
80
+ self.model4 = nn.Sequential(*model4)
81
+
82
+ def forward(self, x, cond=None):
83
+ out = self.model0(x)
84
+ out = self.model1(out)
85
+ out = self.model2(out)
86
+ out = self.model3(out)
87
+ out = self.model4(out)
88
+
89
+ return out
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ huggingface_hub
3
+ torch==1.11.0
4
+ torchvision==0.12.0
5
+ pytorchvideo==0.1.5