vukadinovic936 commited on
Commit
8806acd
·
1 Parent(s): a6bdf5e
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +1 -0
  2. submission/__pycache__/__init__.cpython-310.pyc +0 -0
  3. submission/__pycache__/__init__.cpython-36.pyc +0 -0
  4. submission/__pycache__/__init__.cpython-38.pyc +0 -0
  5. submission/__pycache__/__init__.cpython-39.pyc +0 -0
  6. submission/__pycache__/run_context.cpython-310.pyc +0 -0
  7. submission/__pycache__/run_context.cpython-36.pyc +0 -0
  8. submission/__pycache__/run_context.cpython-38.pyc +0 -0
  9. submission/__pycache__/run_context.cpython-39.pyc +0 -0
  10. submission/__pycache__/submit.cpython-310.pyc +0 -0
  11. submission/__pycache__/submit.cpython-36.pyc +0 -0
  12. submission/__pycache__/submit.cpython-38.pyc +0 -0
  13. submission/__pycache__/submit.cpython-39.pyc +0 -0
  14. submission/internal/__pycache__/__init__.cpython-36.pyc +0 -0
  15. submission/internal/__pycache__/__init__.cpython-38.pyc +0 -0
  16. submission/internal/__pycache__/__init__.cpython-39.pyc +0 -0
  17. submission/internal/__pycache__/local.cpython-36.pyc +0 -0
  18. submission/internal/__pycache__/local.cpython-38.pyc +0 -0
  19. submission/internal/__pycache__/local.cpython-39.pyc +0 -0
  20. tflib/.custom_ops.py.un~ +0 -0
  21. tflib/__init__.py +0 -18
  22. tflib/__pycache__/__init__.cpython-36.pyc +0 -0
  23. tflib/__pycache__/__init__.cpython-38.pyc +0 -0
  24. tflib/__pycache__/__init__.cpython-39.pyc +0 -0
  25. tflib/__pycache__/autosummary.cpython-36.pyc +0 -0
  26. tflib/__pycache__/autosummary.cpython-38.pyc +0 -0
  27. tflib/__pycache__/autosummary.cpython-39.pyc +0 -0
  28. tflib/__pycache__/custom_ops.cpython-36.pyc +0 -0
  29. tflib/__pycache__/network.cpython-36.pyc +0 -0
  30. tflib/__pycache__/optimizer.cpython-36.pyc +0 -0
  31. tflib/__pycache__/tfutil.cpython-36.pyc +0 -0
  32. tflib/__pycache__/tfutil.cpython-38.pyc +0 -0
  33. tflib/__pycache__/tfutil.cpython-39.pyc +0 -0
  34. tflib/_cudacache/fused_bias_act_26d9116e2d7d28cdd451d2d21a1bb7ac.dll +0 -0
  35. tflib/_cudacache/fused_bias_act_40c2d778d681ce8357acbd8537ee6613.so +0 -0
  36. tflib/_cudacache/fused_bias_act_d3f49482b02b6a96d02e754e40c292bc.dll +0 -0
  37. tflib/_cudacache/upfirdn_2d_0acb578db4996f3ae2116b184d4a1bdb.dll +0 -0
  38. tflib/_cudacache/upfirdn_2d_79757bc08e1fcc2d526ecf3cb32fad92.so +0 -3
  39. tflib/_cudacache/upfirdn_2d_f65c24ab0d389aaa83e5b3537e3134e8.dll +0 -0
  40. tflib/autosummary.py +0 -191
  41. tflib/custom_ops.py +0 -169
  42. tflib/custom_ops.py~ +0 -169
  43. tflib/network.py +0 -590
  44. tflib/ops/__init__.py +0 -7
  45. tflib/ops/__pycache__/__init__.cpython-36.pyc +0 -0
  46. tflib/ops/__pycache__/fused_bias_act.cpython-36.pyc +0 -0
  47. tflib/ops/__pycache__/upfirdn_2d.cpython-36.pyc +0 -0
  48. tflib/ops/fused_bias_act.cu +0 -188
  49. tflib/ops/fused_bias_act.py +0 -196
  50. tflib/ops/upfirdn_2d.cu +0 -326
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__
submission/__pycache__/__init__.cpython-310.pyc DELETED
Binary file (200 Bytes)
 
submission/__pycache__/__init__.cpython-36.pyc DELETED
Binary file (183 Bytes)
 
submission/__pycache__/__init__.cpython-38.pyc DELETED
Binary file (198 Bytes)
 
submission/__pycache__/__init__.cpython-39.pyc DELETED
Binary file (202 Bytes)
 
submission/__pycache__/run_context.cpython-310.pyc DELETED
Binary file (4.26 kB)
 
submission/__pycache__/run_context.cpython-36.pyc DELETED
Binary file (4.19 kB)
 
submission/__pycache__/run_context.cpython-38.pyc DELETED
Binary file (4.22 kB)
 
submission/__pycache__/run_context.cpython-39.pyc DELETED
Binary file (4.24 kB)
 
submission/__pycache__/submit.cpython-310.pyc DELETED
Binary file (11.1 kB)
 
submission/__pycache__/submit.cpython-36.pyc DELETED
Binary file (11.1 kB)
 
submission/__pycache__/submit.cpython-38.pyc DELETED
Binary file (11.2 kB)
 
submission/__pycache__/submit.cpython-39.pyc DELETED
Binary file (11.2 kB)
 
submission/internal/__pycache__/__init__.cpython-36.pyc DELETED
Binary file (157 Bytes)
 
submission/internal/__pycache__/__init__.cpython-38.pyc DELETED
Binary file (172 Bytes)
 
submission/internal/__pycache__/__init__.cpython-39.pyc DELETED
Binary file (176 Bytes)
 
submission/internal/__pycache__/local.cpython-36.pyc DELETED
Binary file (1.15 kB)
 
submission/internal/__pycache__/local.cpython-38.pyc DELETED
Binary file (1.19 kB)
 
submission/internal/__pycache__/local.cpython-39.pyc DELETED
Binary file (1.19 kB)
 
tflib/.custom_ops.py.un~ DELETED
Binary file (1.81 kB)
 
tflib/__init__.py DELETED
@@ -1,18 +0,0 @@
1
- # Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
2
- #
3
- # This work is made available under the Nvidia Source Code License-NC.
4
- # To view a copy of this license, visit
5
- # https://nvlabs.github.io/stylegan2/license.html
6
-
7
- from . import autosummary
8
- from . import network
9
- from . import optimizer
10
- from . import tfutil
11
- from . import custom_ops
12
-
13
- from .tfutil import *
14
- from .network import Network
15
-
16
- from .optimizer import Optimizer
17
-
18
- from .custom_ops import get_plugin
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tflib/__pycache__/__init__.cpython-36.pyc DELETED
Binary file (383 Bytes)
 
tflib/__pycache__/__init__.cpython-38.pyc DELETED
Binary file (398 Bytes)
 
tflib/__pycache__/__init__.cpython-39.pyc DELETED
Binary file (402 Bytes)
 
tflib/__pycache__/autosummary.cpython-36.pyc DELETED
Binary file (6.71 kB)
 
tflib/__pycache__/autosummary.cpython-38.pyc DELETED
Binary file (6.75 kB)
 
tflib/__pycache__/autosummary.cpython-39.pyc DELETED
Binary file (7.07 kB)
 
tflib/__pycache__/custom_ops.cpython-36.pyc DELETED
Binary file (4.87 kB)
 
tflib/__pycache__/network.cpython-36.pyc DELETED
Binary file (28.9 kB)
 
tflib/__pycache__/optimizer.cpython-36.pyc DELETED
Binary file (14.7 kB)
 
tflib/__pycache__/tfutil.cpython-36.pyc DELETED
Binary file (8.65 kB)
 
tflib/__pycache__/tfutil.cpython-38.pyc DELETED
Binary file (8.7 kB)
 
tflib/__pycache__/tfutil.cpython-39.pyc DELETED
Binary file (8.82 kB)
 
tflib/_cudacache/fused_bias_act_26d9116e2d7d28cdd451d2d21a1bb7ac.dll DELETED
Binary file (462 kB)
 
tflib/_cudacache/fused_bias_act_40c2d778d681ce8357acbd8537ee6613.so DELETED
Binary file (850 kB)
 
tflib/_cudacache/fused_bias_act_d3f49482b02b6a96d02e754e40c292bc.dll DELETED
Binary file (462 kB)
 
tflib/_cudacache/upfirdn_2d_0acb578db4996f3ae2116b184d4a1bdb.dll DELETED
Binary file (703 kB)
 
tflib/_cudacache/upfirdn_2d_79757bc08e1fcc2d526ecf3cb32fad92.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:11a9d92fab510cb996dcd2bf8644c34f7c5e963e6abba4ad3e606e590917e3e0
3
- size 1099920
 
 
 
 
tflib/_cudacache/upfirdn_2d_f65c24ab0d389aaa83e5b3537e3134e8.dll DELETED
Binary file (703 kB)
 
tflib/autosummary.py DELETED
@@ -1,191 +0,0 @@
1
- # Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
2
- #
3
- # This work is made available under the Nvidia Source Code License-NC.
4
- # To view a copy of this license, visit
5
- # https://nvlabs.github.io/stylegan2/license.html
6
-
7
- """Helper for adding automatically tracked values to Tensorboard.
8
-
9
- Autosummary creates an identity op that internally keeps track of the input
10
- values and automatically shows up in TensorBoard. The reported value
11
- represents an average over input components. The average is accumulated
12
- constantly over time and flushed when save_summaries() is called.
13
-
14
- Notes:
15
- - The output tensor must be used as an input for something else in the
16
- graph. Otherwise, the autosummary op will not get executed, and the average
17
- value will not get accumulated.
18
- - It is perfectly fine to include autosummaries with the same name in
19
- several places throughout the graph, even if they are executed concurrently.
20
- - It is ok to also pass in a python scalar or numpy array. In this case, it
21
- is added to the average immediately.
22
- """
23
-
24
- from collections import OrderedDict
25
- import numpy as np
26
- import tensorflow as tf
27
- from tensorboard import summary as summary_lib
28
- from tensorboard.plugins.custom_scalar import layout_pb2
29
-
30
- from . import tfutil
31
- from .tfutil import TfExpression
32
- from .tfutil import TfExpressionEx
33
-
34
- # Enable "Custom scalars" tab in TensorBoard for advanced formatting.
35
- # Disabled by default to reduce tfevents file size.
36
- enable_custom_scalars = False
37
-
38
- _dtype = tf.float64
39
- _vars = OrderedDict() # name => [var, ...]
40
- _immediate = OrderedDict() # name => update_op, update_value
41
- _finalized = False
42
- _merge_op = None
43
-
44
-
45
- def _create_var(name: str, value_expr: TfExpression) -> TfExpression:
46
- """Internal helper for creating autosummary accumulators."""
47
- assert not _finalized
48
- name_id = name.replace("/", "_")
49
- v = tf.cast(value_expr, _dtype)
50
-
51
- if v.shape.is_fully_defined():
52
- size = np.prod(v.shape.as_list())
53
- size_expr = tf.constant(size, dtype=_dtype)
54
- else:
55
- size = None
56
- size_expr = tf.reduce_prod(tf.cast(tf.shape(v), _dtype))
57
-
58
- if size == 1:
59
- if v.shape.ndims != 0:
60
- v = tf.reshape(v, [])
61
- v = [size_expr, v, tf.square(v)]
62
- else:
63
- v = [size_expr, tf.reduce_sum(v), tf.reduce_sum(tf.square(v))]
64
- v = tf.cond(tf.is_finite(v[1]), lambda: tf.stack(v), lambda: tf.zeros(3, dtype=_dtype))
65
-
66
- with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.control_dependencies(None):
67
- var = tf.Variable(tf.zeros(3, dtype=_dtype), trainable=False) # [sum(1), sum(x), sum(x**2)]
68
- update_op = tf.cond(tf.is_variable_initialized(var), lambda: tf.assign_add(var, v), lambda: tf.assign(var, v))
69
-
70
- if name in _vars:
71
- _vars[name].append(var)
72
- else:
73
- _vars[name] = [var]
74
- return update_op
75
-
76
-
77
- def autosummary(name: str, value: TfExpressionEx, passthru: TfExpressionEx = None, condition: TfExpressionEx = True) -> TfExpressionEx:
78
- """Create a new autosummary.
79
-
80
- Args:
81
- name: Name to use in TensorBoard
82
- value: TensorFlow expression or python value to track
83
- passthru: Optionally return this TF node without modifications but tack an autosummary update side-effect to this node.
84
-
85
- Example use of the passthru mechanism:
86
-
87
- n = autosummary('l2loss', loss, passthru=n)
88
-
89
- This is a shorthand for the following code:
90
-
91
- with tf.control_dependencies([autosummary('l2loss', loss)]):
92
- n = tf.identity(n)
93
- """
94
- tfutil.assert_tf_initialized()
95
- name_id = name.replace("/", "_")
96
-
97
- if tfutil.is_tf_expression(value):
98
- with tf.name_scope("summary_" + name_id), tf.device(value.device):
99
- condition = tf.convert_to_tensor(condition, name='condition')
100
- update_op = tf.cond(condition, lambda: tf.group(_create_var(name, value)), tf.no_op)
101
- with tf.control_dependencies([update_op]):
102
- return tf.identity(value if passthru is None else passthru)
103
-
104
- else: # python scalar or numpy array
105
- assert not tfutil.is_tf_expression(passthru)
106
- assert not tfutil.is_tf_expression(condition)
107
- if condition:
108
- if name not in _immediate:
109
- with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.device(None), tf.control_dependencies(None):
110
- update_value = tf.placeholder(_dtype)
111
- update_op = _create_var(name, update_value)
112
- _immediate[name] = update_op, update_value
113
- update_op, update_value = _immediate[name]
114
- tfutil.run(update_op, {update_value: value})
115
- return value if passthru is None else passthru
116
-
117
-
118
- def finalize_autosummaries() -> None:
119
- """Create the necessary ops to include autosummaries in TensorBoard report.
120
- Note: This should be done only once per graph.
121
- """
122
- global _finalized
123
- tfutil.assert_tf_initialized()
124
-
125
- if _finalized:
126
- return None
127
-
128
- _finalized = True
129
- tfutil.init_uninitialized_vars([var for vars_list in _vars.values() for var in vars_list])
130
-
131
- # Create summary ops.
132
- with tf.device(None), tf.control_dependencies(None):
133
- for name, vars_list in _vars.items():
134
- name_id = name.replace("/", "_")
135
- with tfutil.absolute_name_scope("Autosummary/" + name_id):
136
- moments = tf.add_n(vars_list)
137
- moments /= moments[0]
138
- with tf.control_dependencies([moments]): # read before resetting
139
- reset_ops = [tf.assign(var, tf.zeros(3, dtype=_dtype)) for var in vars_list]
140
- with tf.name_scope(None), tf.control_dependencies(reset_ops): # reset before reporting
141
- mean = moments[1]
142
- std = tf.sqrt(moments[2] - tf.square(moments[1]))
143
- tf.summary.scalar(name, mean)
144
- if enable_custom_scalars:
145
- tf.summary.scalar("xCustomScalars/" + name + "/margin_lo", mean - std)
146
- tf.summary.scalar("xCustomScalars/" + name + "/margin_hi", mean + std)
147
-
148
- # Setup layout for custom scalars.
149
- layout = None
150
- if enable_custom_scalars:
151
- cat_dict = OrderedDict()
152
- for series_name in sorted(_vars.keys()):
153
- p = series_name.split("/")
154
- cat = p[0] if len(p) >= 2 else ""
155
- chart = "/".join(p[1:-1]) if len(p) >= 3 else p[-1]
156
- if cat not in cat_dict:
157
- cat_dict[cat] = OrderedDict()
158
- if chart not in cat_dict[cat]:
159
- cat_dict[cat][chart] = []
160
- cat_dict[cat][chart].append(series_name)
161
- categories = []
162
- for cat_name, chart_dict in cat_dict.items():
163
- charts = []
164
- for chart_name, series_names in chart_dict.items():
165
- series = []
166
- for series_name in series_names:
167
- series.append(layout_pb2.MarginChartContent.Series(
168
- value=series_name,
169
- lower="xCustomScalars/" + series_name + "/margin_lo",
170
- upper="xCustomScalars/" + series_name + "/margin_hi"))
171
- margin = layout_pb2.MarginChartContent(series=series)
172
- charts.append(layout_pb2.Chart(title=chart_name, margin=margin))
173
- categories.append(layout_pb2.Category(title=cat_name, chart=charts))
174
- layout = summary_lib.custom_scalar_pb(layout_pb2.Layout(category=categories))
175
- return layout
176
-
177
- def save_summaries(file_writer, global_step=None):
178
- """Call FileWriter.add_summary() with all summaries in the default graph,
179
- automatically finalizing and merging them on the first call.
180
- """
181
- global _merge_op
182
- tfutil.assert_tf_initialized()
183
-
184
- if _merge_op is None:
185
- layout = finalize_autosummaries()
186
- if layout is not None:
187
- file_writer.add_summary(layout)
188
- with tf.device(None), tf.control_dependencies(None):
189
- _merge_op = tf.summary.merge_all()
190
-
191
- file_writer.add_summary(_merge_op.eval(), global_step)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tflib/custom_ops.py DELETED
@@ -1,169 +0,0 @@
1
- # Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
2
- #
3
- # This work is made available under the Nvidia Source Code License-NC.
4
- # To view a copy of this license, visit
5
- # https://nvlabs.github.io/stylegan2/license.html
6
-
7
- """TensorFlow custom ops builder.
8
- """
9
-
10
- import os
11
- import re
12
- import uuid
13
- import hashlib
14
- import tempfile
15
- import shutil
16
- import tensorflow as tf
17
- from tensorflow.python.client import device_lib # pylint: disable=no-name-in-module
18
-
19
- #----------------------------------------------------------------------------
20
- # Global options.
21
-
22
- cuda_cache_path = os.path.join(os.path.dirname(__file__), '_cudacache')
23
- cuda_cache_version_tag = 'v1'
24
- do_not_hash_included_headers = False # Speed up compilation by assuming that headers included by the CUDA code never change. Unsafe!
25
- verbose = True # Print status messages to stdout.
26
-
27
- compiler_bindir_search_path = [
28
- 'C:/Program Files (x86)/Microsoft Visual Studio/2017/Community/VC/Tools/MSVC/14.16.27023/bin/Hostx64/x64',
29
- 'C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.23.28105/bin/Hostx64/x64',
30
- 'C:/Program Files (x86)/Microsoft Visual Studio 14.0/vc/bin',
31
- ]
32
-
33
- #----------------------------------------------------------------------------
34
- # Internal helper funcs.
35
-
36
- def _find_compiler_bindir():
37
- for compiler_path in compiler_bindir_search_path:
38
- if os.path.isdir(compiler_path):
39
- return compiler_path
40
- return None
41
-
42
- def _get_compute_cap(device):
43
- caps_str = device.physical_device_desc
44
- m = re.search('compute capability: (\\d+).(\\d+)', caps_str)
45
- major = m.group(1)
46
- minor = m.group(2)
47
- return (major, minor)
48
-
49
- def _get_cuda_gpu_arch_string():
50
- gpus = [x for x in device_lib.list_local_devices() if x.device_type == 'GPU']
51
- if len(gpus) == 0:
52
- raise RuntimeError('No GPU devices found')
53
- (major, minor) = _get_compute_cap(gpus[0])
54
- return 'sm_%s%s' % (major, minor)
55
-
56
- def _run_cmd(cmd):
57
- with os.popen(cmd) as pipe:
58
- output = pipe.read()
59
- status = pipe.close()
60
- if status is not None:
61
- raise RuntimeError('NVCC returned an error. See below for full command line and output log:\n\n%s\n\n%s' % (cmd, output))
62
-
63
- def _prepare_nvcc_cli(opts):
64
- cmd = 'nvcc ' + opts.strip()
65
- cmd += ' --disable-warnings'
66
- cmd += ' --include-path "%s"' % tf.sysconfig.get_include()
67
- cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'protobuf_archive', 'src')
68
- cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'com_google_absl')
69
- cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'eigen_archive')
70
-
71
- compiler_bindir = _find_compiler_bindir()
72
- if compiler_bindir is None:
73
- # Require that _find_compiler_bindir succeeds on Windows. Allow
74
- # nvcc to use whatever is the default on Linux.
75
- if os.name == 'nt':
76
- raise RuntimeError('Could not find MSVC/GCC/CLANG installation on this computer. Check compiler_bindir_search_path list in "%s".' % __file__)
77
- else:
78
- cmd += ' --compiler-bindir "%s"' % compiler_bindir
79
- cmd += ' 2>&1'
80
- return cmd
81
-
82
- #----------------------------------------------------------------------------
83
- # Main entry point.
84
-
85
- _plugin_cache = dict()
86
-
87
- def get_plugin(cuda_file):
88
- cuda_file_base = os.path.basename(cuda_file)
89
- cuda_file_name, cuda_file_ext = os.path.splitext(cuda_file_base)
90
-
91
- # Already in cache?
92
- if cuda_file in _plugin_cache:
93
- return _plugin_cache[cuda_file]
94
-
95
- # Setup plugin.
96
- if verbose:
97
- print('Setting up TensorFlow plugin "%s": ' % cuda_file_base, end='', flush=True)
98
- try:
99
- # Hash CUDA source.
100
- md5 = hashlib.md5()
101
- with open(cuda_file, 'rb') as f:
102
- md5.update(f.read())
103
- md5.update(b'\n')
104
-
105
- # Hash headers included by the CUDA code by running it through the preprocessor.
106
- if not do_not_hash_included_headers:
107
- if verbose:
108
- print('Preprocessing... ', end='', flush=True)
109
- with tempfile.TemporaryDirectory() as tmp_dir:
110
- tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + cuda_file_ext)
111
- _run_cmd(_prepare_nvcc_cli('"%s" --preprocess -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir)))
112
- with open(tmp_file, 'rb') as f:
113
- bad_file_str = ('"' + cuda_file.replace('\\', '/') + '"').encode('utf-8') # __FILE__ in error check macros
114
- good_file_str = ('"' + cuda_file_base + '"').encode('utf-8')
115
- for ln in f:
116
- if not ln.startswith(b'# ') and not ln.startswith(b'#line '): # ignore line number pragmas
117
- ln = ln.replace(bad_file_str, good_file_str)
118
- md5.update(ln)
119
- md5.update(b'\n')
120
-
121
- # Select compiler options.
122
- compile_opts = ''
123
- if os.name == 'nt':
124
- compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.lib')
125
- elif os.name == 'posix':
126
- compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.so')
127
- compile_opts += ' --compiler-options \'-fPIC -D_GLIBCXX_USE_CXX11_ABI=0\''
128
- else:
129
- assert False # not Windows or Linux, w00t?
130
- compile_opts += ' --gpu-architecture=%s' % _get_cuda_gpu_arch_string()
131
- compile_opts += ' --use_fast_math'
132
- nvcc_cmd = _prepare_nvcc_cli(compile_opts)
133
-
134
- # Hash build configuration.
135
- md5.update(('nvcc_cmd: ' + nvcc_cmd).encode('utf-8') + b'\n')
136
- md5.update(('tf.VERSION: ' + tf.VERSION).encode('utf-8') + b'\n')
137
- md5.update(('cuda_cache_version_tag: ' + cuda_cache_version_tag).encode('utf-8') + b'\n')
138
-
139
- # Compile if not already compiled.
140
- bin_file_ext = '.dll' if os.name == 'nt' else '.so'
141
- bin_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + md5.hexdigest() + bin_file_ext)
142
- if not os.path.isfile(bin_file):
143
- if verbose:
144
- print('Compiling... ', end='', flush=True)
145
- with tempfile.TemporaryDirectory() as tmp_dir:
146
- tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + bin_file_ext)
147
- _run_cmd(nvcc_cmd + ' "%s" --shared -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir))
148
- os.makedirs(cuda_cache_path, exist_ok=True)
149
- intermediate_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + uuid.uuid4().hex + '_tmp' + bin_file_ext)
150
- shutil.copyfile(tmp_file, intermediate_file)
151
- os.rename(intermediate_file, bin_file) # atomic
152
-
153
- # Load.
154
- if verbose:
155
- print('Loading... ', end='', flush=True)
156
- plugin = tf.load_op_library(bin_file)
157
-
158
- # Add to cache.
159
- _plugin_cache[cuda_file] = plugin
160
- if verbose:
161
- print('Done.', flush=True)
162
- return plugin
163
-
164
- except:
165
- if verbose:
166
- print('Failed!', flush=True)
167
- raise
168
-
169
- #----------------------------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tflib/custom_ops.py~ DELETED
@@ -1,169 +0,0 @@
1
- # Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
2
- #
3
- # This work is made available under the Nvidia Source Code License-NC.
4
- # To view a copy of this license, visit
5
- # https://nvlabs.github.io/stylegan2/license.html
6
-
7
- """TensorFlow custom ops builder.
8
- """
9
-
10
- import os
11
- import re
12
- import uuid
13
- import hashlib
14
- import tempfile
15
- import shutil
16
- import tensorflow as tf
17
- from tensorflow.python.client import device_lib # pylint: disable=no-name-in-module
18
-
19
- #----------------------------------------------------------------------------
20
- # Global options.
21
-
22
- cuda_cache_path = os.path.join(os.path.dirname(__file__), '_cudacache')
23
- cuda_cache_version_tag = 'v1'
24
- do_not_hash_included_headers = False # Speed up compilation by assuming that headers included by the CUDA code never change. Unsafe!
25
- verbose = True # Print status messages to stdout.
26
-
27
- compiler_bindir_search_path = [
28
- 'C:/Program Files (x86)/Microsoft Visual Studio/2017/Community/VC/Tools/MSVC/14.16.27023/bin/Hostx64/x64',
29
- 'C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.23.28105/bin/Hostx64/x64',
30
- 'C:/Program Files (x86)/Microsoft Visual Studio 14.0/vc/bin',
31
- ]
32
-
33
- #----------------------------------------------------------------------------
34
- # Internal helper funcs.
35
-
36
- def _find_compiler_bindir():
37
- for compiler_path in compiler_bindir_search_path:
38
- if os.path.isdir(compiler_path):
39
- return compiler_path
40
- return None
41
-
42
- def _get_compute_cap(device):
43
- caps_str = device.physical_device_desc
44
- m = re.search('compute capability: (\\d+).(\\d+)', caps_str)
45
- major = m.group(1)
46
- minor = m.group(2)
47
- return (major, minor)
48
-
49
- def _get_cuda_gpu_arch_string():
50
- gpus = [x for x in device_lib.list_local_devices() if x.device_type == 'GPU']
51
- if len(gpus) == 0:
52
- raise RuntimeError('No GPU devices found')
53
- (major, minor) = _get_compute_cap(gpus[0])
54
- return 'sm_%s%s' % (major, minor)
55
-
56
- def _run_cmd(cmd):
57
- with os.popen(cmd) as pipe:
58
- output = pipe.read()
59
- status = pipe.close()
60
- if status is not None:
61
- raise RuntimeError('NVCC returned an error. See below for full command line and output log:\n\n%s\n\n%s' % (cmd, output))
62
-
63
- def _prepare_nvcc_cli(opts):
64
- cmd = 'nvcc ' + opts.strip()
65
- cmd += ' --disable-warnings'
66
- cmd += ' --include-path "%s"' % tf.sysconfig.get_include()
67
- cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'protobuf_archive', 'src')
68
- cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'com_google_absl')
69
- cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'eigen_archive')
70
-
71
- compiler_bindir = _find_compiler_bindir()
72
- if compiler_bindir is None:
73
- # Require that _find_compiler_bindir succeeds on Windows. Allow
74
- # nvcc to use whatever is the default on Linux.
75
- if os.name == 'nt':
76
- raise RuntimeError('Could not find MSVC/GCC/CLANG installation on this computer. Check compiler_bindir_search_path list in "%s".' % __file__)
77
- else:
78
- cmd += ' --compiler-bindir "%s"' % compiler_bindir
79
- cmd += ' 2>&1'
80
- return cmd
81
-
82
- #----------------------------------------------------------------------------
83
- # Main entry point.
84
-
85
- _plugin_cache = dict()
86
-
87
- def get_plugin(cuda_file):
88
- cuda_file_base = os.path.basename(cuda_file)
89
- cuda_file_name, cuda_file_ext = os.path.splitext(cuda_file_base)
90
-
91
- # Already in cache?
92
- if cuda_file in _plugin_cache:
93
- return _plugin_cache[cuda_file]
94
-
95
- # Setup plugin.
96
- if verbose:
97
- print('Setting up TensorFlow plugin "%s": ' % cuda_file_base, end='', flush=True)
98
- try:
99
- # Hash CUDA source.
100
- md5 = hashlib.md5()
101
- with open(cuda_file, 'rb') as f:
102
- md5.update(f.read())
103
- md5.update(b'\n')
104
-
105
- # Hash headers included by the CUDA code by running it through the preprocessor.
106
- if not do_not_hash_included_headers:
107
- if verbose:
108
- print('Preprocessing... ', end='', flush=True)
109
- with tempfile.TemporaryDirectory() as tmp_dir:
110
- tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + cuda_file_ext)
111
- _run_cmd(_prepare_nvcc_cli('"%s" --preprocess -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir)))
112
- with open(tmp_file, 'rb') as f:
113
- bad_file_str = ('"' + cuda_file.replace('\\', '/') + '"').encode('utf-8') # __FILE__ in error check macros
114
- good_file_str = ('"' + cuda_file_base + '"').encode('utf-8')
115
- for ln in f:
116
- if not ln.startswith(b'# ') and not ln.startswith(b'#line '): # ignore line number pragmas
117
- ln = ln.replace(bad_file_str, good_file_str)
118
- md5.update(ln)
119
- md5.update(b'\n')
120
-
121
- # Select compiler options.
122
- compile_opts = ''
123
- if os.name == 'nt':
124
- compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.lib')
125
- elif os.name == 'posix':
126
- compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.so')
127
- compile_opts += ' --compiler-options \'-fPIC -D_GLIBCXX_USE_CXX11_ABI=0\''
128
- else:
129
- assert False # not Windows or Linux, w00t?
130
- compile_opts += ' --gpu-architecture=%s' % _get_cuda_gpu_arch_string()
131
- compile_opts += ' --use_fast_math'
132
- nvcc_cmd = _prepare_nvcc_cli(compile_opts)
133
-
134
- # Hash build configuration.
135
- md5.update(('nvcc_cmd: ' + nvcc_cmd).encode('utf-8') + b'\n')
136
- md5.update(('tf.VERSION: ' + tf.VERSION).encode('utf-8') + b'\n')
137
- md5.update(('cuda_cache_version_tag: ' + cuda_cache_version_tag).encode('utf-8') + b'\n')
138
-
139
- # Compile if not already compiled.
140
- bin_file_ext = '.dll' if os.name == 'nt' else '.so'
141
- bin_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + md5.hexdigest() + bin_file_ext)
142
- if not os.path.isfile(bin_file):
143
- if verbose:
144
- print('Compiling... ', end='', flush=True)
145
- with tempfile.TemporaryDirectory() as tmp_dir:
146
- tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + bin_file_ext)
147
- _run_cmd(nvcc_cmd + ' "%s" --shared -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir))
148
- os.makedirs(cuda_cache_path, exist_ok=True)
149
- intermediate_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + uuid.uuid4().hex + '_tmp' + bin_file_ext)
150
- shutil.copyfile(tmp_file, intermediate_file)
151
- os.rename(intermediate_file, bin_file) # atomic
152
-
153
- # Load.
154
- if verbose:
155
- print('Loading... ', end='', flush=True)
156
- plugin = tf.load_op_library(bin_file)
157
-
158
- # Add to cache.
159
- _plugin_cache[cuda_file] = plugin
160
- if verbose:
161
- print('Done.', flush=True)
162
- return plugin
163
-
164
- except:
165
- if verbose:
166
- print('Failed!', flush=True)
167
- raise
168
-
169
- #----------------------------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tflib/network.py DELETED
@@ -1,590 +0,0 @@
1
- # Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
2
- #
3
- # This work is made available under the Nvidia Source Code License-NC.
4
- # To view a copy of this license, visit
5
- # https://nvlabs.github.io/stylegan2/license.html
6
-
7
- """Helper for managing networks."""
8
-
9
- import types
10
- import inspect
11
- import re
12
- import uuid
13
- import sys
14
- import numpy as np
15
- import tensorflow as tf
16
-
17
- from collections import OrderedDict
18
- from typing import Any, List, Tuple, Union
19
-
20
- from . import tfutil
21
- from .. import util
22
-
23
- from .tfutil import TfExpression, TfExpressionEx
24
-
25
- _import_handlers = [] # Custom import handlers for dealing with legacy data in pickle import.
26
- _import_module_src = dict() # Source code for temporary modules created during pickle import.
27
-
28
-
29
- def import_handler(handler_func):
30
- """Function decorator for declaring custom import handlers."""
31
- _import_handlers.append(handler_func)
32
- return handler_func
33
-
34
-
35
- class Network:
36
- """Generic network abstraction.
37
-
38
- Acts as a convenience wrapper for a parameterized network construction
39
- function, providing several utility methods and convenient access to
40
- the inputs/outputs/weights.
41
-
42
- Network objects can be safely pickled and unpickled for long-term
43
- archival purposes. The pickling works reliably as long as the underlying
44
- network construction function is defined in a standalone Python module
45
- that has no side effects or application-specific imports.
46
-
47
- Args:
48
- name: Network name. Used to select TensorFlow name and variable scopes.
49
- func_name: Fully qualified name of the underlying network construction function, or a top-level function object.
50
- static_kwargs: Keyword arguments to be passed in to the network construction function.
51
-
52
- Attributes:
53
- name: User-specified name, defaults to build func name if None.
54
- scope: Unique TensorFlow scope containing template graph and variables, derived from the user-specified name.
55
- static_kwargs: Arguments passed to the user-supplied build func.
56
- components: Container for sub-networks. Passed to the build func, and retained between calls.
57
- num_inputs: Number of input tensors.
58
- num_outputs: Number of output tensors.
59
- input_shapes: Input tensor shapes (NC or NCHW), including minibatch dimension.
60
- output_shapes: Output tensor shapes (NC or NCHW), including minibatch dimension.
61
- input_shape: Short-hand for input_shapes[0].
62
- output_shape: Short-hand for output_shapes[0].
63
- input_templates: Input placeholders in the template graph.
64
- output_templates: Output tensors in the template graph.
65
- input_names: Name string for each input.
66
- output_names: Name string for each output.
67
- own_vars: Variables defined by this network (local_name => var), excluding sub-networks.
68
- vars: All variables (local_name => var).
69
- trainables: All trainable variables (local_name => var).
70
- var_global_to_local: Mapping from variable global names to local names.
71
- """
72
-
73
- def __init__(self, name: str = None, func_name: Any = None, **static_kwargs):
74
- tfutil.assert_tf_initialized()
75
- assert isinstance(name, str) or name is None
76
- assert func_name is not None
77
- assert isinstance(func_name, str) or util.is_top_level_function(func_name)
78
- assert util.is_pickleable(static_kwargs)
79
-
80
- self._init_fields()
81
- self.name = name
82
- self.static_kwargs = util.EasyDict(static_kwargs)
83
-
84
- # Locate the user-specified network build function.
85
- if util.is_top_level_function(func_name):
86
- func_name = util.get_top_level_function_name(func_name)
87
- module, self._build_func_name = util.get_module_from_obj_name(func_name)
88
- self._build_func = util.get_obj_from_module(module, self._build_func_name)
89
- assert callable(self._build_func)
90
-
91
- # Dig up source code for the module containing the build function.
92
- self._build_module_src = _import_module_src.get(module, None)
93
- if self._build_module_src is None:
94
- self._build_module_src = inspect.getsource(module)
95
-
96
- # Init TensorFlow graph.
97
- self._init_graph()
98
- self.reset_own_vars()
99
-
100
- def _init_fields(self) -> None:
101
- self.name = None
102
- self.scope = None
103
- self.static_kwargs = util.EasyDict()
104
- self.components = util.EasyDict()
105
- self.num_inputs = 0
106
- self.num_outputs = 0
107
- self.input_shapes = [[]]
108
- self.output_shapes = [[]]
109
- self.input_shape = []
110
- self.output_shape = []
111
- self.input_templates = []
112
- self.output_templates = []
113
- self.input_names = []
114
- self.output_names = []
115
- self.own_vars = OrderedDict()
116
- self.vars = OrderedDict()
117
- self.trainables = OrderedDict()
118
- self.var_global_to_local = OrderedDict()
119
-
120
- self._build_func = None # User-supplied build function that constructs the network.
121
- self._build_func_name = None # Name of the build function.
122
- self._build_module_src = None # Full source code of the module containing the build function.
123
- self._run_cache = dict() # Cached graph data for Network.run().
124
-
125
- def _init_graph(self) -> None:
126
- # Collect inputs.
127
- self.input_names = []
128
-
129
- for param in inspect.signature(self._build_func).parameters.values():
130
- if param.kind == param.POSITIONAL_OR_KEYWORD and param.default is param.empty:
131
- self.input_names.append(param.name)
132
-
133
- self.num_inputs = len(self.input_names)
134
- assert self.num_inputs >= 1
135
-
136
- # Choose name and scope.
137
- if self.name is None:
138
- self.name = self._build_func_name
139
- assert re.match("^[A-Za-z0-9_.\\-]*$", self.name)
140
- with tf.name_scope(None):
141
- self.scope = tf.get_default_graph().unique_name(self.name, mark_as_used=True)
142
-
143
- # Finalize build func kwargs.
144
- build_kwargs = dict(self.static_kwargs)
145
- build_kwargs["is_template_graph"] = True
146
- build_kwargs["components"] = self.components
147
-
148
- # Build template graph.
149
- with tfutil.absolute_variable_scope(self.scope, reuse=False), tfutil.absolute_name_scope(self.scope): # ignore surrounding scopes
150
- assert tf.get_variable_scope().name == self.scope
151
- assert tf.get_default_graph().get_name_scope() == self.scope
152
- with tf.control_dependencies(None): # ignore surrounding control dependencies
153
- self.input_templates = [tf.placeholder(tf.float32, name=name) for name in self.input_names]
154
- out_expr = self._build_func(*self.input_templates, **build_kwargs)
155
-
156
- # Collect outputs.
157
- assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple)
158
- self.output_templates = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr)
159
- self.num_outputs = len(self.output_templates)
160
- assert self.num_outputs >= 1
161
- assert all(tfutil.is_tf_expression(t) for t in self.output_templates)
162
-
163
- # Perform sanity checks.
164
- if any(t.shape.ndims is None for t in self.input_templates):
165
- raise ValueError("Network input shapes not defined. Please call x.set_shape() for each input.")
166
- if any(t.shape.ndims is None for t in self.output_templates):
167
- raise ValueError("Network output shapes not defined. Please call x.set_shape() where applicable.")
168
- if any(not isinstance(comp, Network) for comp in self.components.values()):
169
- raise ValueError("Components of a Network must be Networks themselves.")
170
- if len(self.components) != len(set(comp.name for comp in self.components.values())):
171
- raise ValueError("Components of a Network must have unique names.")
172
-
173
- # List inputs and outputs.
174
- self.input_shapes = [t.shape.as_list() for t in self.input_templates]
175
- self.output_shapes = [t.shape.as_list() for t in self.output_templates]
176
- self.input_shape = self.input_shapes[0]
177
- self.output_shape = self.output_shapes[0]
178
- self.output_names = [t.name.split("/")[-1].split(":")[0] for t in self.output_templates]
179
-
180
- # List variables.
181
- self.own_vars = OrderedDict((var.name[len(self.scope) + 1:].split(":")[0], var) for var in tf.global_variables(self.scope + "/"))
182
- self.vars = OrderedDict(self.own_vars)
183
- self.vars.update((comp.name + "/" + name, var) for comp in self.components.values() for name, var in comp.vars.items())
184
- self.trainables = OrderedDict((name, var) for name, var in self.vars.items() if var.trainable)
185
- self.var_global_to_local = OrderedDict((var.name.split(":")[0], name) for name, var in self.vars.items())
186
-
187
- def reset_own_vars(self) -> None:
188
- """Re-initialize all variables of this network, excluding sub-networks."""
189
- tfutil.run([var.initializer for var in self.own_vars.values()])
190
-
191
- def reset_vars(self) -> None:
192
- """Re-initialize all variables of this network, including sub-networks."""
193
- tfutil.run([var.initializer for var in self.vars.values()])
194
-
195
- def reset_trainables(self) -> None:
196
- """Re-initialize all trainable variables of this network, including sub-networks."""
197
- tfutil.run([var.initializer for var in self.trainables.values()])
198
-
199
- def get_output_for(self, *in_expr: TfExpression, return_as_list: bool = False, **dynamic_kwargs) -> Union[TfExpression, List[TfExpression]]:
200
- """Construct TensorFlow expression(s) for the output(s) of this network, given the input expression(s)."""
201
- assert len(in_expr) == self.num_inputs
202
- assert not all(expr is None for expr in in_expr)
203
-
204
- # Finalize build func kwargs.
205
- build_kwargs = dict(self.static_kwargs)
206
- build_kwargs.update(dynamic_kwargs)
207
- build_kwargs["is_template_graph"] = False
208
- build_kwargs["components"] = self.components
209
-
210
- # Build TensorFlow graph to evaluate the network.
211
- with tfutil.absolute_variable_scope(self.scope, reuse=True), tf.name_scope(self.name):
212
- assert tf.get_variable_scope().name == self.scope
213
- valid_inputs = [expr for expr in in_expr if expr is not None]
214
- final_inputs = []
215
- for expr, name, shape in zip(in_expr, self.input_names, self.input_shapes):
216
- if expr is not None:
217
- expr = tf.identity(expr, name=name)
218
- else:
219
- expr = tf.zeros([tf.shape(valid_inputs[0])[0]] + shape[1:], name=name)
220
- final_inputs.append(expr)
221
- out_expr = self._build_func(*final_inputs, **build_kwargs)
222
-
223
- # Propagate input shapes back to the user-specified expressions.
224
- for expr, final in zip(in_expr, final_inputs):
225
- if isinstance(expr, tf.Tensor):
226
- expr.set_shape(final.shape)
227
-
228
- # Express outputs in the desired format.
229
- assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple)
230
- if return_as_list:
231
- out_expr = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr)
232
- return out_expr
233
-
234
- def get_var_local_name(self, var_or_global_name: Union[TfExpression, str]) -> str:
235
- """Get the local name of a given variable, without any surrounding name scopes."""
236
- assert tfutil.is_tf_expression(var_or_global_name) or isinstance(var_or_global_name, str)
237
- global_name = var_or_global_name if isinstance(var_or_global_name, str) else var_or_global_name.name
238
- return self.var_global_to_local[global_name]
239
-
240
- def find_var(self, var_or_local_name: Union[TfExpression, str]) -> TfExpression:
241
- """Find variable by local or global name."""
242
- assert tfutil.is_tf_expression(var_or_local_name) or isinstance(var_or_local_name, str)
243
- return self.vars[var_or_local_name] if isinstance(var_or_local_name, str) else var_or_local_name
244
-
245
- def get_var(self, var_or_local_name: Union[TfExpression, str]) -> np.ndarray:
246
- """Get the value of a given variable as NumPy array.
247
- Note: This method is very inefficient -- prefer to use tflib.run(list_of_vars) whenever possible."""
248
- return self.find_var(var_or_local_name).eval()
249
-
250
- def set_var(self, var_or_local_name: Union[TfExpression, str], new_value: Union[int, float, np.ndarray]) -> None:
251
- """Set the value of a given variable based on the given NumPy array.
252
- Note: This method is very inefficient -- prefer to use tflib.set_vars() whenever possible."""
253
- tfutil.set_vars({self.find_var(var_or_local_name): new_value})
254
-
255
- def __getstate__(self) -> dict:
256
- """Pickle export."""
257
- state = dict()
258
- state["version"] = 4
259
- state["name"] = self.name
260
- state["static_kwargs"] = dict(self.static_kwargs)
261
- state["components"] = dict(self.components)
262
- state["build_module_src"] = self._build_module_src
263
- state["build_func_name"] = self._build_func_name
264
- state["variables"] = list(zip(self.own_vars.keys(), tfutil.run(list(self.own_vars.values()))))
265
- return state
266
-
267
- def __setstate__(self, state: dict) -> None:
268
- """Pickle import."""
269
- # pylint: disable=attribute-defined-outside-init
270
- tfutil.assert_tf_initialized()
271
- self._init_fields()
272
-
273
- # Execute custom import handlers.
274
- for handler in _import_handlers:
275
- state = handler(state)
276
-
277
- # Set basic fields.
278
- assert state["version"] in [2, 3, 4]
279
- self.name = state["name"]
280
- self.static_kwargs = util.EasyDict(state["static_kwargs"])
281
- self.components = util.EasyDict(state.get("components", {}))
282
- self._build_module_src = state["build_module_src"]
283
- self._build_func_name = state["build_func_name"]
284
-
285
- # Create temporary module from the imported source code.
286
- module_name = "_tflib_network_import_" + uuid.uuid4().hex
287
- module = types.ModuleType(module_name)
288
- sys.modules[module_name] = module
289
- _import_module_src[module] = self._build_module_src
290
- exec(self._build_module_src, module.__dict__) # pylint: disable=exec-used
291
-
292
- # Locate network build function in the temporary module.
293
- self._build_func = util.get_obj_from_module(module, self._build_func_name)
294
- assert callable(self._build_func)
295
-
296
- # Init TensorFlow graph.
297
- self._init_graph()
298
- self.reset_own_vars()
299
- tfutil.set_vars({self.find_var(name): value for name, value in state["variables"]})
300
-
301
- def clone(self, name: str = None, **new_static_kwargs) -> "Network":
302
- """Create a clone of this network with its own copy of the variables."""
303
- # pylint: disable=protected-access
304
- net = object.__new__(Network)
305
- net._init_fields()
306
- net.name = name if name is not None else self.name
307
- net.static_kwargs = util.EasyDict(self.static_kwargs)
308
- net.static_kwargs.update(new_static_kwargs)
309
- net._build_module_src = self._build_module_src
310
- net._build_func_name = self._build_func_name
311
- net._build_func = self._build_func
312
- net._init_graph()
313
- net.copy_vars_from(self)
314
- return net
315
-
316
- def copy_own_vars_from(self, src_net: "Network") -> None:
317
- """Copy the values of all variables from the given network, excluding sub-networks."""
318
- names = [name for name in self.own_vars.keys() if name in src_net.own_vars]
319
- tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names}))
320
-
321
- def copy_vars_from(self, src_net: "Network") -> None:
322
- """Copy the values of all variables from the given network, including sub-networks."""
323
- names = [name for name in self.vars.keys() if name in src_net.vars]
324
- tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names}))
325
-
326
- def copy_trainables_from(self, src_net: "Network") -> None:
327
- """Copy the values of all trainable variables from the given network, including sub-networks."""
328
- names = [name for name in self.trainables.keys() if name in src_net.trainables]
329
- tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names}))
330
-
331
- def convert(self, new_func_name: str, new_name: str = None, **new_static_kwargs) -> "Network":
332
- """Create new network with the given parameters, and copy all variables from this network."""
333
- if new_name is None:
334
- new_name = self.name
335
- static_kwargs = dict(self.static_kwargs)
336
- static_kwargs.update(new_static_kwargs)
337
- net = Network(name=new_name, func_name=new_func_name, **static_kwargs)
338
- net.copy_vars_from(self)
339
- return net
340
-
341
- def setup_as_moving_average_of(self, src_net: "Network", beta: TfExpressionEx = 0.99, beta_nontrainable: TfExpressionEx = 0.0) -> tf.Operation:
342
- """Construct a TensorFlow op that updates the variables of this network
343
- to be slightly closer to those of the given network."""
344
- with tfutil.absolute_name_scope(self.scope + "/_MovingAvg"):
345
- ops = []
346
- for name, var in self.vars.items():
347
- if name in src_net.vars:
348
- cur_beta = beta if name in self.trainables else beta_nontrainable
349
- new_value = tfutil.lerp(src_net.vars[name], var, cur_beta)
350
- ops.append(var.assign(new_value))
351
- return tf.group(*ops)
352
-
353
- def run(self,
354
- *in_arrays: Tuple[Union[np.ndarray, None], ...],
355
- input_transform: dict = None,
356
- output_transform: dict = None,
357
- return_as_list: bool = False,
358
- print_progress: bool = False,
359
- minibatch_size: int = None,
360
- num_gpus: int = 1,
361
- assume_frozen: bool = False,
362
- **dynamic_kwargs) -> Union[np.ndarray, Tuple[np.ndarray, ...], List[np.ndarray]]:
363
- """Run this network for the given NumPy array(s), and return the output(s) as NumPy array(s).
364
-
365
- Args:
366
- input_transform: A dict specifying a custom transformation to be applied to the input tensor(s) before evaluating the network.
367
- The dict must contain a 'func' field that points to a top-level function. The function is called with the input
368
- TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs.
369
- output_transform: A dict specifying a custom transformation to be applied to the output tensor(s) after evaluating the network.
370
- The dict must contain a 'func' field that points to a top-level function. The function is called with the output
371
- TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs.
372
- return_as_list: True = return a list of NumPy arrays, False = return a single NumPy array, or a tuple if there are multiple outputs.
373
- print_progress: Print progress to the console? Useful for very large input arrays.
374
- minibatch_size: Maximum minibatch size to use, None = disable batching.
375
- num_gpus: Number of GPUs to use.
376
- assume_frozen: Improve multi-GPU performance by assuming that the trainable parameters will remain changed between calls.
377
- dynamic_kwargs: Additional keyword arguments to be passed into the network build function.
378
- """
379
- assert len(in_arrays) == self.num_inputs
380
- assert not all(arr is None for arr in in_arrays)
381
- assert input_transform is None or util.is_top_level_function(input_transform["func"])
382
- assert output_transform is None or util.is_top_level_function(output_transform["func"])
383
- output_transform, dynamic_kwargs = _handle_legacy_output_transforms(output_transform, dynamic_kwargs)
384
- num_items = in_arrays[0].shape[0]
385
- if minibatch_size is None:
386
- minibatch_size = num_items
387
-
388
- # Construct unique hash key from all arguments that affect the TensorFlow graph.
389
- key = dict(input_transform=input_transform, output_transform=output_transform, num_gpus=num_gpus, assume_frozen=assume_frozen, dynamic_kwargs=dynamic_kwargs)
390
- def unwind_key(obj):
391
- if isinstance(obj, dict):
392
- return [(key, unwind_key(value)) for key, value in sorted(obj.items())]
393
- if callable(obj):
394
- return util.get_top_level_function_name(obj)
395
- return obj
396
- key = repr(unwind_key(key))
397
-
398
- # Build graph.
399
- if key not in self._run_cache:
400
- with tfutil.absolute_name_scope(self.scope + "/_Run"), tf.control_dependencies(None):
401
- with tf.device("/cpu:0"):
402
- in_expr = [tf.placeholder(tf.float32, name=name) for name in self.input_names]
403
- in_split = list(zip(*[tf.split(x, num_gpus) for x in in_expr]))
404
-
405
- out_split = []
406
- for gpu in range(num_gpus):
407
- with tf.device("/gpu:%d" % gpu):
408
- net_gpu = self.clone() if assume_frozen else self
409
- in_gpu = in_split[gpu]
410
-
411
- if input_transform is not None:
412
- in_kwargs = dict(input_transform)
413
- in_gpu = in_kwargs.pop("func")(*in_gpu, **in_kwargs)
414
- in_gpu = [in_gpu] if tfutil.is_tf_expression(in_gpu) else list(in_gpu)
415
-
416
- assert len(in_gpu) == self.num_inputs
417
- out_gpu = net_gpu.get_output_for(*in_gpu, return_as_list=True, **dynamic_kwargs)
418
-
419
- if output_transform is not None:
420
- out_kwargs = dict(output_transform)
421
- out_gpu = out_kwargs.pop("func")(*out_gpu, **out_kwargs)
422
- out_gpu = [out_gpu] if tfutil.is_tf_expression(out_gpu) else list(out_gpu)
423
-
424
- assert len(out_gpu) == self.num_outputs
425
- out_split.append(out_gpu)
426
-
427
- with tf.device("/cpu:0"):
428
- out_expr = [tf.concat(outputs, axis=0) for outputs in zip(*out_split)]
429
- self._run_cache[key] = in_expr, out_expr
430
-
431
- # Run minibatches.
432
- in_expr, out_expr = self._run_cache[key]
433
- out_arrays = [np.empty([num_items] + expr.shape.as_list()[1:], expr.dtype.name) for expr in out_expr]
434
-
435
- for mb_begin in range(0, num_items, minibatch_size):
436
- if print_progress:
437
- print("\r%d / %d" % (mb_begin, num_items), end="")
438
-
439
- mb_end = min(mb_begin + minibatch_size, num_items)
440
- mb_num = mb_end - mb_begin
441
- mb_in = [src[mb_begin : mb_end] if src is not None else np.zeros([mb_num] + shape[1:]) for src, shape in zip(in_arrays, self.input_shapes)]
442
- mb_out = tf.get_default_session().run(out_expr, dict(zip(in_expr, mb_in)))
443
-
444
- for dst, src in zip(out_arrays, mb_out):
445
- dst[mb_begin: mb_end] = src
446
-
447
- # Done.
448
- if print_progress:
449
- print("\r%d / %d" % (num_items, num_items))
450
-
451
- if not return_as_list:
452
- out_arrays = out_arrays[0] if len(out_arrays) == 1 else tuple(out_arrays)
453
- return out_arrays
454
-
455
- def list_ops(self) -> List[TfExpression]:
456
- include_prefix = self.scope + "/"
457
- exclude_prefix = include_prefix + "_"
458
- ops = tf.get_default_graph().get_operations()
459
- ops = [op for op in ops if op.name.startswith(include_prefix)]
460
- ops = [op for op in ops if not op.name.startswith(exclude_prefix)]
461
- return ops
462
-
463
- def list_layers(self) -> List[Tuple[str, TfExpression, List[TfExpression]]]:
464
- """Returns a list of (layer_name, output_expr, trainable_vars) tuples corresponding to
465
- individual layers of the network. Mainly intended to be used for reporting."""
466
- layers = []
467
-
468
- def recurse(scope, parent_ops, parent_vars, level):
469
- # Ignore specific patterns.
470
- if any(p in scope for p in ["/Shape", "/strided_slice", "/Cast", "/concat", "/Assign"]):
471
- return
472
-
473
- # Filter ops and vars by scope.
474
- global_prefix = scope + "/"
475
- local_prefix = global_prefix[len(self.scope) + 1:]
476
- cur_ops = [op for op in parent_ops if op.name.startswith(global_prefix) or op.name == global_prefix[:-1]]
477
- cur_vars = [(name, var) for name, var in parent_vars if name.startswith(local_prefix) or name == local_prefix[:-1]]
478
- if not cur_ops and not cur_vars:
479
- return
480
-
481
- # Filter out all ops related to variables.
482
- for var in [op for op in cur_ops if op.type.startswith("Variable")]:
483
- var_prefix = var.name + "/"
484
- cur_ops = [op for op in cur_ops if not op.name.startswith(var_prefix)]
485
-
486
- # Scope does not contain ops as immediate children => recurse deeper.
487
- contains_direct_ops = any("/" not in op.name[len(global_prefix):] and op.type not in ["Identity", "Cast", "Transpose"] for op in cur_ops)
488
- if (level == 0 or not contains_direct_ops) and (len(cur_ops) + len(cur_vars)) > 1:
489
- visited = set()
490
- for rel_name in [op.name[len(global_prefix):] for op in cur_ops] + [name[len(local_prefix):] for name, _var in cur_vars]:
491
- token = rel_name.split("/")[0]
492
- if token not in visited:
493
- recurse(global_prefix + token, cur_ops, cur_vars, level + 1)
494
- visited.add(token)
495
- return
496
-
497
- # Report layer.
498
- layer_name = scope[len(self.scope) + 1:]
499
- layer_output = cur_ops[-1].outputs[0] if cur_ops else cur_vars[-1][1]
500
- layer_trainables = [var for _name, var in cur_vars if var.trainable]
501
- layers.append((layer_name, layer_output, layer_trainables))
502
-
503
- recurse(self.scope, self.list_ops(), list(self.vars.items()), 0)
504
- return layers
505
-
506
- def print_layers(self, title: str = None, hide_layers_with_no_params: bool = False) -> None:
507
- """Print a summary table of the network structure."""
508
- rows = [[title if title is not None else self.name, "Params", "OutputShape", "WeightShape"]]
509
- rows += [["---"] * 4]
510
- total_params = 0
511
-
512
- for layer_name, layer_output, layer_trainables in self.list_layers():
513
- num_params = sum(int(np.prod(var.shape.as_list())) for var in layer_trainables)
514
- weights = [var for var in layer_trainables if var.name.endswith("/weight:0")]
515
- weights.sort(key=lambda x: len(x.name))
516
- if len(weights) == 0 and len(layer_trainables) == 1:
517
- weights = layer_trainables
518
- total_params += num_params
519
-
520
- if not hide_layers_with_no_params or num_params != 0:
521
- num_params_str = str(num_params) if num_params > 0 else "-"
522
- output_shape_str = str(layer_output.shape)
523
- weight_shape_str = str(weights[0].shape) if len(weights) >= 1 else "-"
524
- rows += [[layer_name, num_params_str, output_shape_str, weight_shape_str]]
525
-
526
- rows += [["---"] * 4]
527
- rows += [["Total", str(total_params), "", ""]]
528
-
529
- widths = [max(len(cell) for cell in column) for column in zip(*rows)]
530
- print()
531
- for row in rows:
532
- print(" ".join(cell + " " * (width - len(cell)) for cell, width in zip(row, widths)))
533
- print()
534
-
535
- def setup_weight_histograms(self, title: str = None) -> None:
536
- """Construct summary ops to include histograms of all trainable parameters in TensorBoard."""
537
- if title is None:
538
- title = self.name
539
-
540
- with tf.name_scope(None), tf.device(None), tf.control_dependencies(None):
541
- for local_name, var in self.trainables.items():
542
- if "/" in local_name:
543
- p = local_name.split("/")
544
- name = title + "_" + p[-1] + "/" + "_".join(p[:-1])
545
- else:
546
- name = title + "_toplevel/" + local_name
547
-
548
- tf.summary.histogram(name, var)
549
-
550
- #----------------------------------------------------------------------------
551
- # Backwards-compatible emulation of legacy output transformation in Network.run().
552
-
553
- _print_legacy_warning = True
554
-
555
- def _handle_legacy_output_transforms(output_transform, dynamic_kwargs):
556
- global _print_legacy_warning
557
- legacy_kwargs = ["out_mul", "out_add", "out_shrink", "out_dtype"]
558
- if not any(kwarg in dynamic_kwargs for kwarg in legacy_kwargs):
559
- return output_transform, dynamic_kwargs
560
-
561
- if _print_legacy_warning:
562
- _print_legacy_warning = False
563
- print()
564
- print("WARNING: Old-style output transformations in Network.run() are deprecated.")
565
- print("Consider using 'output_transform=dict(func=tflib.convert_images_to_uint8)'")
566
- print("instead of 'out_mul=127.5, out_add=127.5, out_dtype=np.uint8'.")
567
- print()
568
- assert output_transform is None
569
-
570
- new_kwargs = dict(dynamic_kwargs)
571
- new_transform = {kwarg: new_kwargs.pop(kwarg) for kwarg in legacy_kwargs if kwarg in dynamic_kwargs}
572
- new_transform["func"] = _legacy_output_transform_func
573
- return new_transform, new_kwargs
574
-
575
- def _legacy_output_transform_func(*expr, out_mul=1.0, out_add=0.0, out_shrink=1, out_dtype=None):
576
- if out_mul != 1.0:
577
- expr = [x * out_mul for x in expr]
578
-
579
- if out_add != 0.0:
580
- expr = [x + out_add for x in expr]
581
-
582
- if out_shrink > 1:
583
- ksize = [1, 1, out_shrink, out_shrink]
584
- expr = [tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW") for x in expr]
585
-
586
- if out_dtype is not None:
587
- if tf.as_dtype(out_dtype).is_integer:
588
- expr = [tf.round(x) for x in expr]
589
- expr = [tf.saturate_cast(x, out_dtype) for x in expr]
590
- return expr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tflib/ops/__init__.py DELETED
@@ -1,7 +0,0 @@
1
- # Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
2
- #
3
- # This work is made available under the Nvidia Source Code License-NC.
4
- # To view a copy of this license, visit
5
- # https://nvlabs.github.io/stylegan2/license.html
6
-
7
- # empty
 
 
 
 
 
 
 
 
tflib/ops/__pycache__/__init__.cpython-36.pyc DELETED
Binary file (117 Bytes)
 
tflib/ops/__pycache__/fused_bias_act.cpython-36.pyc DELETED
Binary file (8.54 kB)
 
tflib/ops/__pycache__/upfirdn_2d.cpython-36.pyc DELETED
Binary file (14.8 kB)
 
tflib/ops/fused_bias_act.cu DELETED
@@ -1,188 +0,0 @@
1
- // Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
2
- //
3
- // This work is made available under the Nvidia Source Code License-NC.
4
- // To view a copy of this license, visit
5
- // https://nvlabs.github.io/stylegan2/license.html
6
-
7
- #define EIGEN_USE_GPU
8
- #define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
9
- #include "tensorflow/core/framework/op.h"
10
- #include "tensorflow/core/framework/op_kernel.h"
11
- #include "tensorflow/core/framework/shape_inference.h"
12
- #include <stdio.h>
13
-
14
- using namespace tensorflow;
15
- using namespace tensorflow::shape_inference;
16
-
17
- #define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal(cudaGetErrorName(err))); } while (false)
18
-
19
- //------------------------------------------------------------------------
20
- // CUDA kernel.
21
-
22
- template <class T>
23
- struct FusedBiasActKernelParams
24
- {
25
- const T* x; // [sizeX]
26
- const T* b; // [sizeB] or NULL
27
- const T* ref; // [sizeX] or NULL
28
- T* y; // [sizeX]
29
-
30
- int grad;
31
- int axis;
32
- int act;
33
- float alpha;
34
- float gain;
35
-
36
- int sizeX;
37
- int sizeB;
38
- int stepB;
39
- int loopX;
40
- };
41
-
42
- template <class T>
43
- static __global__ void FusedBiasActKernel(const FusedBiasActKernelParams<T> p)
44
- {
45
- const float expRange = 80.0f;
46
- const float halfExpRange = 40.0f;
47
- const float seluScale = 1.0507009873554804934193349852946f;
48
- const float seluAlpha = 1.6732632423543772848170429916717f;
49
-
50
- // Loop over elements.
51
- int xi = blockIdx.x * p.loopX * blockDim.x + threadIdx.x;
52
- for (int loopIdx = 0; loopIdx < p.loopX && xi < p.sizeX; loopIdx++, xi += blockDim.x)
53
- {
54
- // Load and apply bias.
55
- float x = (float)p.x[xi];
56
- if (p.b)
57
- x += (float)p.b[(xi / p.stepB) % p.sizeB];
58
- float ref = (p.ref) ? (float)p.ref[xi] : 0.0f;
59
- if (p.gain != 0.0f & p.act != 9)
60
- ref /= p.gain;
61
-
62
- // Evaluate activation func.
63
- float y;
64
- switch (p.act * 10 + p.grad)
65
- {
66
- // linear
67
- default:
68
- case 10: y = x; break;
69
- case 11: y = x; break;
70
- case 12: y = 0.0f; break;
71
-
72
- // relu
73
- case 20: y = (x > 0.0f) ? x : 0.0f; break;
74
- case 21: y = (ref > 0.0f) ? x : 0.0f; break;
75
- case 22: y = 0.0f; break;
76
-
77
- // lrelu
78
- case 30: y = (x > 0.0f) ? x : x * p.alpha; break;
79
- case 31: y = (ref > 0.0f) ? x : x * p.alpha; break;
80
- case 32: y = 0.0f; break;
81
-
82
- // tanh
83
- case 40: { float c = expf(x); float d = 1.0f / c; y = (x < -expRange) ? -1.0f : (x > expRange) ? 1.0f : (c - d) / (c + d); } break;
84
- case 41: y = x * (1.0f - ref * ref); break;
85
- case 42: y = x * (1.0f - ref * ref) * (-2.0f * ref); break;
86
-
87
- // sigmoid
88
- case 50: y = (x < -expRange) ? 0.0f : 1.0f / (expf(-x) + 1.0f); break;
89
- case 51: y = x * ref * (1.0f - ref); break;
90
- case 52: y = x * ref * (1.0f - ref) * (1.0f - 2.0f * ref); break;
91
-
92
- // elu
93
- case 60: y = (x >= 0.0f) ? x : expf(x) - 1.0f; break;
94
- case 61: y = (ref >= 0.0f) ? x : x * (ref + 1.0f); break;
95
- case 62: y = (ref >= 0.0f) ? 0.0f : x * (ref + 1.0f); break;
96
-
97
- // selu
98
- case 70: y = (x >= 0.0f) ? seluScale * x : (seluScale * seluAlpha) * (expf(x) - 1.0f); break;
99
- case 71: y = (ref >= 0.0f) ? x * seluScale : x * (ref + seluScale * seluAlpha); break;
100
- case 72: y = (ref >= 0.0f) ? 0.0f : x * (ref + seluScale * seluAlpha); break;
101
-
102
- // softplus
103
- case 80: y = (x > expRange) ? x : logf(expf(x) + 1.0f); break;
104
- case 81: y = x * (1.0f - expf(-ref)); break;
105
- case 82: { float c = expf(-ref); y = x * c * (1.0f - c); } break;
106
-
107
- // swish
108
- case 90: y = (x < -expRange) ? 0.0f : x / (expf(-x) + 1.0f); break;
109
- case 91: { float c = expf(ref); float d = c + 1.0f; y = (ref > halfExpRange) ? x : x * c * (ref + d) / (d * d); } break;
110
- case 92: { float c = expf(ref); float d = c + 1.0f; y = (ref > halfExpRange) ? 0.0f : x * c * (ref * (2.0f - d) + 2.0f * d) / (d * d * d); } break;
111
- }
112
-
113
- // Apply gain and store.
114
- p.y[xi] = (T)(y * p.gain);
115
- }
116
- }
117
-
118
- //------------------------------------------------------------------------
119
- // TensorFlow op.
120
-
121
- template <class T>
122
- struct FusedBiasActOp : public OpKernel
123
- {
124
- FusedBiasActKernelParams<T> m_attribs;
125
-
126
- FusedBiasActOp(OpKernelConstruction* ctx) : OpKernel(ctx)
127
- {
128
- memset(&m_attribs, 0, sizeof(m_attribs));
129
- OP_REQUIRES_OK(ctx, ctx->GetAttr("grad", &m_attribs.grad));
130
- OP_REQUIRES_OK(ctx, ctx->GetAttr("axis", &m_attribs.axis));
131
- OP_REQUIRES_OK(ctx, ctx->GetAttr("act", &m_attribs.act));
132
- OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &m_attribs.alpha));
133
- OP_REQUIRES_OK(ctx, ctx->GetAttr("gain", &m_attribs.gain));
134
- OP_REQUIRES(ctx, m_attribs.grad >= 0, errors::InvalidArgument("grad must be non-negative"));
135
- OP_REQUIRES(ctx, m_attribs.axis >= 0, errors::InvalidArgument("axis must be non-negative"));
136
- OP_REQUIRES(ctx, m_attribs.act >= 0, errors::InvalidArgument("act must be non-negative"));
137
- }
138
-
139
- void Compute(OpKernelContext* ctx)
140
- {
141
- FusedBiasActKernelParams<T> p = m_attribs;
142
- cudaStream_t stream = ctx->eigen_device<Eigen::GpuDevice>().stream();
143
-
144
- const Tensor& x = ctx->input(0); // [...]
145
- const Tensor& b = ctx->input(1); // [sizeB] or [0]
146
- const Tensor& ref = ctx->input(2); // x.shape or [0]
147
- p.x = x.flat<T>().data();
148
- p.b = (b.NumElements()) ? b.flat<T>().data() : NULL;
149
- p.ref = (ref.NumElements()) ? ref.flat<T>().data() : NULL;
150
- OP_REQUIRES(ctx, b.NumElements() == 0 || m_attribs.axis < x.dims(), errors::InvalidArgument("axis out of bounds"));
151
- OP_REQUIRES(ctx, b.dims() == 1, errors::InvalidArgument("b must have rank 1"));
152
- OP_REQUIRES(ctx, b.NumElements() == 0 || b.NumElements() == x.dim_size(m_attribs.axis), errors::InvalidArgument("b has wrong number of elements"));
153
- OP_REQUIRES(ctx, ref.NumElements() == ((p.grad == 0) ? 0 : x.NumElements()), errors::InvalidArgument("ref has wrong number of elements"));
154
- OP_REQUIRES(ctx, x.NumElements() <= kint32max, errors::InvalidArgument("x is too large"));
155
-
156
- p.sizeX = (int)x.NumElements();
157
- p.sizeB = (int)b.NumElements();
158
- p.stepB = 1;
159
- for (int i = m_attribs.axis + 1; i < x.dims(); i++)
160
- p.stepB *= (int)x.dim_size(i);
161
-
162
- Tensor* y = NULL; // x.shape
163
- OP_REQUIRES_OK(ctx, ctx->allocate_output(0, x.shape(), &y));
164
- p.y = y->flat<T>().data();
165
-
166
- p.loopX = 4;
167
- int blockSize = 4 * 32;
168
- int gridSize = (p.sizeX - 1) / (p.loopX * blockSize) + 1;
169
- void* args[] = {&p};
170
- OP_CHECK_CUDA_ERROR(ctx, cudaLaunchKernel((void*)FusedBiasActKernel<T>, gridSize, blockSize, args, 0, stream));
171
- }
172
- };
173
-
174
- REGISTER_OP("FusedBiasAct")
175
- .Input ("x: T")
176
- .Input ("b: T")
177
- .Input ("ref: T")
178
- .Output ("y: T")
179
- .Attr ("T: {float, half}")
180
- .Attr ("grad: int = 0")
181
- .Attr ("axis: int = 1")
182
- .Attr ("act: int = 0")
183
- .Attr ("alpha: float = 0.0")
184
- .Attr ("gain: float = 1.0");
185
- REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint<float>("T"), FusedBiasActOp<float>);
186
- REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"), FusedBiasActOp<Eigen::half>);
187
-
188
- //------------------------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tflib/ops/fused_bias_act.py DELETED
@@ -1,196 +0,0 @@
1
- # Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
2
- #
3
- # This work is made available under the Nvidia Source Code License-NC.
4
- # To view a copy of this license, visit
5
- # https://nvlabs.github.io/stylegan2/license.html
6
-
7
- """Custom TensorFlow ops for efficient bias and activation."""
8
-
9
- import os
10
- import numpy as np
11
- import tensorflow as tf
12
- from .. import custom_ops
13
- from ...util import EasyDict
14
-
15
- def _get_plugin():
16
- return custom_ops.get_plugin(os.path.splitext(__file__)[0] + '.cu')
17
-
18
- #----------------------------------------------------------------------------
19
-
20
- activation_funcs = {
21
- 'linear': EasyDict(func=lambda x, **_: x, def_alpha=None, def_gain=1.0, cuda_idx=1, ref='y', zero_2nd_grad=True),
22
- 'relu': EasyDict(func=lambda x, **_: tf.nn.relu(x), def_alpha=None, def_gain=np.sqrt(2), cuda_idx=2, ref='y', zero_2nd_grad=True),
23
- 'lrelu': EasyDict(func=lambda x, alpha, **_: tf.nn.leaky_relu(x, alpha), def_alpha=0.2, def_gain=np.sqrt(2), cuda_idx=3, ref='y', zero_2nd_grad=True),
24
- 'tanh': EasyDict(func=lambda x, **_: tf.nn.tanh(x), def_alpha=None, def_gain=1.0, cuda_idx=4, ref='y', zero_2nd_grad=False),
25
- 'sigmoid': EasyDict(func=lambda x, **_: tf.nn.sigmoid(x), def_alpha=None, def_gain=1.0, cuda_idx=5, ref='y', zero_2nd_grad=False),
26
- 'elu': EasyDict(func=lambda x, **_: tf.nn.elu(x), def_alpha=None, def_gain=1.0, cuda_idx=6, ref='y', zero_2nd_grad=False),
27
- 'selu': EasyDict(func=lambda x, **_: tf.nn.selu(x), def_alpha=None, def_gain=1.0, cuda_idx=7, ref='y', zero_2nd_grad=False),
28
- 'softplus': EasyDict(func=lambda x, **_: tf.nn.softplus(x), def_alpha=None, def_gain=1.0, cuda_idx=8, ref='y', zero_2nd_grad=False),
29
- 'swish': EasyDict(func=lambda x, **_: tf.nn.sigmoid(x) * x, def_alpha=None, def_gain=np.sqrt(2), cuda_idx=9, ref='x', zero_2nd_grad=False),
30
- }
31
-
32
- #----------------------------------------------------------------------------
33
-
34
- def fused_bias_act(x, b=None, axis=1, act='linear', alpha=None, gain=None, impl='cuda'):
35
- r"""Fused bias and activation function.
36
-
37
- Adds bias `b` to activation tensor `x`, evaluates activation function `act`,
38
- and scales the result by `gain`. Each of the steps is optional. In most cases,
39
- the fused op is considerably more efficient than performing the same calculation
40
- using standard TensorFlow ops. It supports first and second order gradients,
41
- but not third order gradients.
42
-
43
- Args:
44
- x: Input activation tensor. Can have any shape, but if `b` is defined, the
45
- dimension corresponding to `axis`, as well as the rank, must be known.
46
- b: Bias vector, or `None` to disable. Must be a 1D tensor of the same type
47
- as `x`. The shape must be known, and it must match the dimension of `x`
48
- corresponding to `axis`.
49
- axis: The dimension in `x` corresponding to the elements of `b`.
50
- The value of `axis` is ignored if `b` is not specified.
51
- act: Name of the activation function to evaluate, or `"linear"` to disable.
52
- Can be e.g. `"relu"`, `"lrelu"`, `"tanh"`, `"sigmoid"`, `"swish"`, etc.
53
- See `activation_funcs` for a full list. `None` is not allowed.
54
- alpha: Shape parameter for the activation function, or `None` to use the default.
55
- gain: Scaling factor for the output tensor, or `None` to use default.
56
- See `activation_funcs` for the default scaling of each activation function.
57
- If unsure, consider specifying `1.0`.
58
- impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
59
-
60
- Returns:
61
- Tensor of the same shape and datatype as `x`.
62
- """
63
-
64
- impl_dict = {
65
- 'ref': _fused_bias_act_ref,
66
- 'cuda': _fused_bias_act_cuda,
67
- }
68
- return impl_dict[impl](x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain)
69
-
70
- #----------------------------------------------------------------------------
71
-
72
- def _fused_bias_act_ref(x, b, axis, act, alpha, gain):
73
- """Slow reference implementation of `fused_bias_act()` using standard TensorFlow ops."""
74
-
75
- # Validate arguments.
76
- x = tf.convert_to_tensor(x)
77
- b = tf.convert_to_tensor(b) if b is not None else tf.constant([], dtype=x.dtype)
78
- act_spec = activation_funcs[act]
79
- assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis])
80
- assert b.shape[0] == 0 or 0 <= axis < x.shape.rank
81
- if alpha is None:
82
- alpha = act_spec.def_alpha
83
- if gain is None:
84
- gain = act_spec.def_gain
85
-
86
- # Add bias.
87
- if b.shape[0] != 0:
88
- x += tf.reshape(b, [-1 if i == axis else 1 for i in range(x.shape.rank)])
89
-
90
- # Evaluate activation function.
91
- x = act_spec.func(x, alpha=alpha)
92
-
93
- # Scale by gain.
94
- if gain != 1:
95
- x *= gain
96
- return x
97
-
98
- #----------------------------------------------------------------------------
99
-
100
- def _fused_bias_act_cuda(x, b, axis, act, alpha, gain):
101
- """Fast CUDA implementation of `fused_bias_act()` using custom ops."""
102
-
103
- # Validate arguments.
104
- x = tf.convert_to_tensor(x)
105
- empty_tensor = tf.constant([], dtype=x.dtype)
106
- b = tf.convert_to_tensor(b) if b is not None else empty_tensor
107
- act_spec = activation_funcs[act]
108
- assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis])
109
- assert b.shape[0] == 0 or 0 <= axis < x.shape.rank
110
- if alpha is None:
111
- alpha = act_spec.def_alpha
112
- if gain is None:
113
- gain = act_spec.def_gain
114
-
115
- # Special cases.
116
- if act == 'linear' and b is None and gain == 1.0:
117
- return x
118
- if act_spec.cuda_idx is None:
119
- return _fused_bias_act_ref(x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain)
120
-
121
- # CUDA kernel.
122
- cuda_kernel = _get_plugin().fused_bias_act
123
- cuda_kwargs = dict(axis=axis, act=act_spec.cuda_idx, alpha=alpha, gain=gain)
124
-
125
- # Forward pass: y = func(x, b).
126
- def func_y(x, b):
127
- y = cuda_kernel(x=x, b=b, ref=empty_tensor, grad=0, **cuda_kwargs)
128
- y.set_shape(x.shape)
129
- return y
130
-
131
- # Backward pass: dx, db = grad(dy, x, y)
132
- def grad_dx(dy, x, y):
133
- ref = {'x': x, 'y': y}[act_spec.ref]
134
- dx = cuda_kernel(x=dy, b=empty_tensor, ref=ref, grad=1, **cuda_kwargs)
135
- dx.set_shape(x.shape)
136
- return dx
137
- def grad_db(dx):
138
- if b.shape[0] == 0:
139
- return empty_tensor
140
- db = dx
141
- if axis < x.shape.rank - 1:
142
- db = tf.reduce_sum(db, list(range(axis + 1, x.shape.rank)))
143
- if axis > 0:
144
- db = tf.reduce_sum(db, list(range(axis)))
145
- db.set_shape(b.shape)
146
- return db
147
-
148
- # Second order gradients: d_dy, d_x = grad2(d_dx, d_db, x, y)
149
- def grad2_d_dy(d_dx, d_db, x, y):
150
- ref = {'x': x, 'y': y}[act_spec.ref]
151
- d_dy = cuda_kernel(x=d_dx, b=d_db, ref=ref, grad=1, **cuda_kwargs)
152
- d_dy.set_shape(x.shape)
153
- return d_dy
154
- def grad2_d_x(d_dx, d_db, x, y):
155
- ref = {'x': x, 'y': y}[act_spec.ref]
156
- d_x = cuda_kernel(x=d_dx, b=d_db, ref=ref, grad=2, **cuda_kwargs)
157
- d_x.set_shape(x.shape)
158
- return d_x
159
-
160
- # Fast version for piecewise-linear activation funcs.
161
- @tf.custom_gradient
162
- def func_zero_2nd_grad(x, b):
163
- y = func_y(x, b)
164
- @tf.custom_gradient
165
- def grad(dy):
166
- dx = grad_dx(dy, x, y)
167
- db = grad_db(dx)
168
- def grad2(d_dx, d_db):
169
- d_dy = grad2_d_dy(d_dx, d_db, x, y)
170
- return d_dy
171
- return (dx, db), grad2
172
- return y, grad
173
-
174
- # Slow version for general activation funcs.
175
- @tf.custom_gradient
176
- def func_nonzero_2nd_grad(x, b):
177
- y = func_y(x, b)
178
- def grad_wrap(dy):
179
- @tf.custom_gradient
180
- def grad_impl(dy, x):
181
- dx = grad_dx(dy, x, y)
182
- db = grad_db(dx)
183
- def grad2(d_dx, d_db):
184
- d_dy = grad2_d_dy(d_dx, d_db, x, y)
185
- d_x = grad2_d_x(d_dx, d_db, x, y)
186
- return d_dy, d_x
187
- return (dx, db), grad2
188
- return grad_impl(dy, x)
189
- return y, grad_wrap
190
-
191
- # Which version to use?
192
- if act_spec.zero_2nd_grad:
193
- return func_zero_2nd_grad(x, b)
194
- return func_nonzero_2nd_grad(x, b)
195
-
196
- #----------------------------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tflib/ops/upfirdn_2d.cu DELETED
@@ -1,326 +0,0 @@
1
- // Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
2
- //
3
- // This work is made available under the Nvidia Source Code License-NC.
4
- // To view a copy of this license, visit
5
- // https://nvlabs.github.io/stylegan2/license.html
6
-
7
- #define EIGEN_USE_GPU
8
- #define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
9
- #include "tensorflow/core/framework/op.h"
10
- #include "tensorflow/core/framework/op_kernel.h"
11
- #include "tensorflow/core/framework/shape_inference.h"
12
- #include <stdio.h>
13
-
14
- using namespace tensorflow;
15
- using namespace tensorflow::shape_inference;
16
-
17
- //------------------------------------------------------------------------
18
- // Helpers.
19
-
20
- #define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal(cudaGetErrorName(err))); } while (false)
21
-
22
- static __host__ __device__ __forceinline__ int floorDiv(int a, int b)
23
- {
24
- int c = a / b;
25
- if (c * b > a)
26
- c--;
27
- return c;
28
- }
29
-
30
- //------------------------------------------------------------------------
31
- // CUDA kernel params.
32
-
33
- template <class T>
34
- struct UpFirDn2DKernelParams
35
- {
36
- const T* x; // [majorDim, inH, inW, minorDim]
37
- const T* k; // [kernelH, kernelW]
38
- T* y; // [majorDim, outH, outW, minorDim]
39
-
40
- int upx;
41
- int upy;
42
- int downx;
43
- int downy;
44
- int padx0;
45
- int padx1;
46
- int pady0;
47
- int pady1;
48
-
49
- int majorDim;
50
- int inH;
51
- int inW;
52
- int minorDim;
53
- int kernelH;
54
- int kernelW;
55
- int outH;
56
- int outW;
57
- int loopMajor;
58
- int loopX;
59
- };
60
-
61
- //------------------------------------------------------------------------
62
- // General CUDA implementation for large filter kernels.
63
-
64
- template <class T>
65
- static __global__ void UpFirDn2DKernel_large(const UpFirDn2DKernelParams<T> p)
66
- {
67
- // Calculate thread index.
68
- int minorIdx = blockIdx.x * blockDim.x + threadIdx.x;
69
- int outY = minorIdx / p.minorDim;
70
- minorIdx -= outY * p.minorDim;
71
- int outXBase = blockIdx.y * p.loopX * blockDim.y + threadIdx.y;
72
- int majorIdxBase = blockIdx.z * p.loopMajor;
73
- if (outXBase >= p.outW || outY >= p.outH || majorIdxBase >= p.majorDim)
74
- return;
75
-
76
- // Setup Y receptive field.
77
- int midY = outY * p.downy + p.upy - 1 - p.pady0;
78
- int inY = min(max(floorDiv(midY, p.upy), 0), p.inH);
79
- int h = min(max(floorDiv(midY + p.kernelH, p.upy), 0), p.inH) - inY;
80
- int kernelY = midY + p.kernelH - (inY + 1) * p.upy;
81
-
82
- // Loop over majorDim and outX.
83
- for (int loopMajor = 0, majorIdx = majorIdxBase; loopMajor < p.loopMajor && majorIdx < p.majorDim; loopMajor++, majorIdx++)
84
- for (int loopX = 0, outX = outXBase; loopX < p.loopX && outX < p.outW; loopX++, outX += blockDim.y)
85
- {
86
- // Setup X receptive field.
87
- int midX = outX * p.downx + p.upx - 1 - p.padx0;
88
- int inX = min(max(floorDiv(midX, p.upx), 0), p.inW);
89
- int w = min(max(floorDiv(midX + p.kernelW, p.upx), 0), p.inW) - inX;
90
- int kernelX = midX + p.kernelW - (inX + 1) * p.upx;
91
-
92
- // Initialize pointers.
93
- const T* xp = &p.x[((majorIdx * p.inH + inY) * p.inW + inX) * p.minorDim + minorIdx];
94
- const T* kp = &p.k[kernelY * p.kernelW + kernelX];
95
- int xpx = p.minorDim;
96
- int kpx = -p.upx;
97
- int xpy = p.inW * p.minorDim;
98
- int kpy = -p.upy * p.kernelW;
99
-
100
- // Inner loop.
101
- float v = 0.0f;
102
- for (int y = 0; y < h; y++)
103
- {
104
- for (int x = 0; x < w; x++)
105
- {
106
- v += (float)(*xp) * (float)(*kp);
107
- xp += xpx;
108
- kp += kpx;
109
- }
110
- xp += xpy - w * xpx;
111
- kp += kpy - w * kpx;
112
- }
113
-
114
- // Store result.
115
- p.y[((majorIdx * p.outH + outY) * p.outW + outX) * p.minorDim + minorIdx] = (T)v;
116
- }
117
- }
118
-
119
- //------------------------------------------------------------------------
120
- // Specialized CUDA implementation for small filter kernels.
121
-
122
- template <class T, int upx, int upy, int downx, int downy, int kernelW, int kernelH, int tileOutW, int tileOutH>
123
- static __global__ void UpFirDn2DKernel_small(const UpFirDn2DKernelParams<T> p)
124
- {
125
- //assert(kernelW % upx == 0);
126
- //assert(kernelH % upy == 0);
127
- const int tileInW = ((tileOutW - 1) * downx + kernelW - 1) / upx + 1;
128
- const int tileInH = ((tileOutH - 1) * downy + kernelH - 1) / upy + 1;
129
- __shared__ volatile float sk[kernelH][kernelW];
130
- __shared__ volatile float sx[tileInH][tileInW];
131
-
132
- // Calculate tile index.
133
- int minorIdx = blockIdx.x;
134
- int tileOutY = minorIdx / p.minorDim;
135
- minorIdx -= tileOutY * p.minorDim;
136
- tileOutY *= tileOutH;
137
- int tileOutXBase = blockIdx.y * p.loopX * tileOutW;
138
- int majorIdxBase = blockIdx.z * p.loopMajor;
139
- if (tileOutXBase >= p.outW | tileOutY >= p.outH | majorIdxBase >= p.majorDim)
140
- return;
141
-
142
- // Load filter kernel (flipped).
143
- for (int tapIdx = threadIdx.x; tapIdx < kernelH * kernelW; tapIdx += blockDim.x)
144
- {
145
- int ky = tapIdx / kernelW;
146
- int kx = tapIdx - ky * kernelW;
147
- float v = 0.0f;
148
- if (kx < p.kernelW & ky < p.kernelH)
149
- v = (float)p.k[(p.kernelH - 1 - ky) * p.kernelW + (p.kernelW - 1 - kx)];
150
- sk[ky][kx] = v;
151
- }
152
-
153
- // Loop over majorDim and outX.
154
- for (int loopMajor = 0, majorIdx = majorIdxBase; loopMajor < p.loopMajor & majorIdx < p.majorDim; loopMajor++, majorIdx++)
155
- for (int loopX = 0, tileOutX = tileOutXBase; loopX < p.loopX & tileOutX < p.outW; loopX++, tileOutX += tileOutW)
156
- {
157
- // Load input pixels.
158
- int tileMidX = tileOutX * downx + upx - 1 - p.padx0;
159
- int tileMidY = tileOutY * downy + upy - 1 - p.pady0;
160
- int tileInX = floorDiv(tileMidX, upx);
161
- int tileInY = floorDiv(tileMidY, upy);
162
- __syncthreads();
163
- for (int inIdx = threadIdx.x; inIdx < tileInH * tileInW; inIdx += blockDim.x)
164
- {
165
- int relInY = inIdx / tileInW;
166
- int relInX = inIdx - relInY * tileInW;
167
- int inX = relInX + tileInX;
168
- int inY = relInY + tileInY;
169
- float v = 0.0f;
170
- if (inX >= 0 & inY >= 0 & inX < p.inW & inY < p.inH)
171
- v = (float)p.x[((majorIdx * p.inH + inY) * p.inW + inX) * p.minorDim + minorIdx];
172
- sx[relInY][relInX] = v;
173
- }
174
-
175
- // Loop over output pixels.
176
- __syncthreads();
177
- for (int outIdx = threadIdx.x; outIdx < tileOutH * tileOutW; outIdx += blockDim.x)
178
- {
179
- int relOutY = outIdx / tileOutW;
180
- int relOutX = outIdx - relOutY * tileOutW;
181
- int outX = relOutX + tileOutX;
182
- int outY = relOutY + tileOutY;
183
-
184
- // Setup receptive field.
185
- int midX = tileMidX + relOutX * downx;
186
- int midY = tileMidY + relOutY * downy;
187
- int inX = floorDiv(midX, upx);
188
- int inY = floorDiv(midY, upy);
189
- int relInX = inX - tileInX;
190
- int relInY = inY - tileInY;
191
- int kernelX = (inX + 1) * upx - midX - 1; // flipped
192
- int kernelY = (inY + 1) * upy - midY - 1; // flipped
193
-
194
- // Inner loop.
195
- float v = 0.0f;
196
- #pragma unroll
197
- for (int y = 0; y < kernelH / upy; y++)
198
- #pragma unroll
199
- for (int x = 0; x < kernelW / upx; x++)
200
- v += sx[relInY + y][relInX + x] * sk[kernelY + y * upy][kernelX + x * upx];
201
-
202
- // Store result.
203
- if (outX < p.outW & outY < p.outH)
204
- p.y[((majorIdx * p.outH + outY) * p.outW + outX) * p.minorDim + minorIdx] = (T)v;
205
- }
206
- }
207
- }
208
-
209
- //------------------------------------------------------------------------
210
- // TensorFlow op.
211
-
212
- template <class T>
213
- struct UpFirDn2DOp : public OpKernel
214
- {
215
- UpFirDn2DKernelParams<T> m_attribs;
216
-
217
- UpFirDn2DOp(OpKernelConstruction* ctx) : OpKernel(ctx)
218
- {
219
- memset(&m_attribs, 0, sizeof(m_attribs));
220
- OP_REQUIRES_OK(ctx, ctx->GetAttr("upx", &m_attribs.upx));
221
- OP_REQUIRES_OK(ctx, ctx->GetAttr("upy", &m_attribs.upy));
222
- OP_REQUIRES_OK(ctx, ctx->GetAttr("downx", &m_attribs.downx));
223
- OP_REQUIRES_OK(ctx, ctx->GetAttr("downy", &m_attribs.downy));
224
- OP_REQUIRES_OK(ctx, ctx->GetAttr("padx0", &m_attribs.padx0));
225
- OP_REQUIRES_OK(ctx, ctx->GetAttr("padx1", &m_attribs.padx1));
226
- OP_REQUIRES_OK(ctx, ctx->GetAttr("pady0", &m_attribs.pady0));
227
- OP_REQUIRES_OK(ctx, ctx->GetAttr("pady1", &m_attribs.pady1));
228
- OP_REQUIRES(ctx, m_attribs.upx >= 1 && m_attribs.upy >= 1, errors::InvalidArgument("upx and upy must be at least 1x1"));
229
- OP_REQUIRES(ctx, m_attribs.downx >= 1 && m_attribs.downy >= 1, errors::InvalidArgument("downx and downy must be at least 1x1"));
230
- }
231
-
232
- void Compute(OpKernelContext* ctx)
233
- {
234
- UpFirDn2DKernelParams<T> p = m_attribs;
235
- cudaStream_t stream = ctx->eigen_device<Eigen::GpuDevice>().stream();
236
-
237
- const Tensor& x = ctx->input(0); // [majorDim, inH, inW, minorDim]
238
- const Tensor& k = ctx->input(1); // [kernelH, kernelW]
239
- p.x = x.flat<T>().data();
240
- p.k = k.flat<T>().data();
241
- OP_REQUIRES(ctx, x.dims() == 4, errors::InvalidArgument("input must have rank 4"));
242
- OP_REQUIRES(ctx, k.dims() == 2, errors::InvalidArgument("kernel must have rank 2"));
243
- OP_REQUIRES(ctx, x.NumElements() <= kint32max, errors::InvalidArgument("input too large"));
244
- OP_REQUIRES(ctx, k.NumElements() <= kint32max, errors::InvalidArgument("kernel too large"));
245
-
246
- p.majorDim = (int)x.dim_size(0);
247
- p.inH = (int)x.dim_size(1);
248
- p.inW = (int)x.dim_size(2);
249
- p.minorDim = (int)x.dim_size(3);
250
- p.kernelH = (int)k.dim_size(0);
251
- p.kernelW = (int)k.dim_size(1);
252
- OP_REQUIRES(ctx, p.kernelW >= 1 && p.kernelH >= 1, errors::InvalidArgument("kernel must be at least 1x1"));
253
-
254
- p.outW = (p.inW * p.upx + p.padx0 + p.padx1 - p.kernelW + p.downx) / p.downx;
255
- p.outH = (p.inH * p.upy + p.pady0 + p.pady1 - p.kernelH + p.downy) / p.downy;
256
- OP_REQUIRES(ctx, p.outW >= 1 && p.outH >= 1, errors::InvalidArgument("output must be at least 1x1"));
257
-
258
- Tensor* y = NULL; // [majorDim, outH, outW, minorDim]
259
- TensorShape ys;
260
- ys.AddDim(p.majorDim);
261
- ys.AddDim(p.outH);
262
- ys.AddDim(p.outW);
263
- ys.AddDim(p.minorDim);
264
- OP_REQUIRES_OK(ctx, ctx->allocate_output(0, ys, &y));
265
- p.y = y->flat<T>().data();
266
- OP_REQUIRES(ctx, y->NumElements() <= kint32max, errors::InvalidArgument("output too large"));
267
-
268
- // Choose CUDA kernel to use.
269
- void* cudaKernel = (void*)UpFirDn2DKernel_large<T>;
270
- int tileOutW = -1;
271
- int tileOutH = -1;
272
- if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 7 && p.kernelH <= 7) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 1,1, 7,7, 64,16>; tileOutW = 64; tileOutH = 16; }
273
- if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 1,1, 6,6, 64,16>; tileOutW = 64; tileOutH = 16; }
274
- if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 5 && p.kernelH <= 5) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 1,1, 5,5, 64,16>; tileOutW = 64; tileOutH = 16; }
275
- if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 1,1, 4,4, 64,16>; tileOutW = 64; tileOutH = 16; }
276
- if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 3 && p.kernelH <= 3) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 1,1, 3,3, 64,16>; tileOutW = 64; tileOutH = 16; }
277
- if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 8 && p.kernelH <= 8) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 2,2, 1,1, 8,8, 64,16>; tileOutW = 64; tileOutH = 16; }
278
- if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 2,2, 1,1, 6,6, 64,16>; tileOutW = 64; tileOutH = 16; }
279
- if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 2,2, 1,1, 4,4, 64,16>; tileOutW = 64; tileOutH = 16; }
280
- if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 2 && p.kernelH <= 2) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 2,2, 1,1, 2,2, 64,16>; tileOutW = 64; tileOutH = 16; }
281
- if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 8 && p.kernelH <= 8) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 2,2, 8,8, 32,8>; tileOutW = 32; tileOutH = 8; }
282
- if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 2,2, 6,6, 32,8>; tileOutW = 32; tileOutH = 8; }
283
- if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 2,2, 4,4, 32,8>; tileOutW = 32; tileOutH = 8; }
284
- if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 2 && p.kernelH <= 2) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 2,2, 2,2, 32,8>; tileOutW = 32; tileOutH = 8; }
285
-
286
- // Choose launch params.
287
- dim3 blockSize;
288
- dim3 gridSize;
289
- if (tileOutW > 0 && tileOutH > 0) // small
290
- {
291
- p.loopMajor = (p.majorDim - 1) / 16384 + 1;
292
- p.loopX = 1;
293
- blockSize = dim3(32 * 8, 1, 1);
294
- gridSize = dim3(((p.outH - 1) / tileOutH + 1) * p.minorDim, (p.outW - 1) / (p.loopX * tileOutW) + 1, (p.majorDim - 1) / p.loopMajor + 1);
295
- }
296
- else // large
297
- {
298
- p.loopMajor = (p.majorDim - 1) / 16384 + 1;
299
- p.loopX = 4;
300
- blockSize = dim3(4, 32, 1);
301
- gridSize = dim3((p.outH * p.minorDim - 1) / blockSize.x + 1, (p.outW - 1) / (p.loopX * blockSize.y) + 1, (p.majorDim - 1) / p.loopMajor + 1);
302
- }
303
-
304
- // Launch CUDA kernel.
305
- void* args[] = {&p};
306
- OP_CHECK_CUDA_ERROR(ctx, cudaLaunchKernel(cudaKernel, gridSize, blockSize, args, 0, stream));
307
- }
308
- };
309
-
310
- REGISTER_OP("UpFirDn2D")
311
- .Input ("x: T")
312
- .Input ("k: T")
313
- .Output ("y: T")
314
- .Attr ("T: {float, half}")
315
- .Attr ("upx: int = 1")
316
- .Attr ("upy: int = 1")
317
- .Attr ("downx: int = 1")
318
- .Attr ("downy: int = 1")
319
- .Attr ("padx0: int = 0")
320
- .Attr ("padx1: int = 0")
321
- .Attr ("pady0: int = 0")
322
- .Attr ("pady1: int = 0");
323
- REGISTER_KERNEL_BUILDER(Name("UpFirDn2D").Device(DEVICE_GPU).TypeConstraint<float>("T"), UpFirDn2DOp<float>);
324
- REGISTER_KERNEL_BUILDER(Name("UpFirDn2D").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"), UpFirDn2DOp<Eigen::half>);
325
-
326
- //------------------------------------------------------------------------