diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..bccee65d1ad00e309e458a86e98c9790eba999c3 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tflib/_cudacache/upfirdn_2d_79757bc08e1fcc2d526ecf3cb32fad92.so filter=lfs diff=lfs merge=lfs -text diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e34112b628e3d526739681eac984c5c2db704814 --- /dev/null +++ b/__init__.py @@ -0,0 +1,21 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +from . import submission + +from .submission.run_context import RunContext + +from .submission.submit import SubmitTarget +from .submission.submit import PathType +from .submission.submit import SubmitConfig +from .submission.submit import submit_run +from .submission.submit import get_path_from_template +from .submission.submit import convert_path +from .submission.submit import make_run_dir_path + +from .util import EasyDict + +submit_config: SubmitConfig = None # Package level variable for SubmitConfig which is only valid when inside the run function. diff --git a/__pycache__/__init__.cpython-310.pyc b/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c2b000332def8d28317693e662f04e4525458c09 Binary files /dev/null and b/__pycache__/__init__.cpython-310.pyc differ diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7a465d6738c22a1526b2f9e0e7e0c5d76acf1c34 Binary files /dev/null and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/__pycache__/__init__.cpython-38.pyc b/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..97876f7343c4781d33c587b6eebb2fb5e7bc9c6e Binary files /dev/null and b/__pycache__/__init__.cpython-38.pyc differ diff --git a/__pycache__/__init__.cpython-39.pyc b/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2c8013a91ae1dbf8c20f0b834c5a74f17e50bc59 Binary files /dev/null and b/__pycache__/__init__.cpython-39.pyc differ diff --git a/__pycache__/util.cpython-310.pyc b/__pycache__/util.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ef0c00c3d0a4d0b33a8b5d015d760f260238dd54 Binary files /dev/null and b/__pycache__/util.cpython-310.pyc differ diff --git a/__pycache__/util.cpython-36.pyc b/__pycache__/util.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ff883d33814c3c00ab6851dc2a1099f0f037d44a Binary files /dev/null and b/__pycache__/util.cpython-36.pyc differ diff --git a/__pycache__/util.cpython-38.pyc b/__pycache__/util.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9a400c9dd6bda17d69821bcec87a898684e08a66 Binary files /dev/null and b/__pycache__/util.cpython-38.pyc differ diff --git a/__pycache__/util.cpython-39.pyc b/__pycache__/util.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1e20e94a3137cb8c42ebb20dffb4faf3ae7c864b Binary files /dev/null and b/__pycache__/util.cpython-39.pyc differ diff --git a/submission/__init__.py b/submission/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..acf2fbee4b216cb9f2a0b73993fd1c7042e2248d --- /dev/null +++ b/submission/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +from . import run_context +from . import submit diff --git a/submission/__pycache__/__init__.cpython-310.pyc b/submission/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..49d95f12dab43b5210719a46cd77487759d3b2de Binary files /dev/null and b/submission/__pycache__/__init__.cpython-310.pyc differ diff --git a/submission/__pycache__/__init__.cpython-36.pyc b/submission/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ddc1f46c28d2083ea572abefb0e943169a53a6bf Binary files /dev/null and b/submission/__pycache__/__init__.cpython-36.pyc differ diff --git a/submission/__pycache__/__init__.cpython-38.pyc b/submission/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7fb9c21385f191a2e293c6c92bc0f2600eb63988 Binary files /dev/null and b/submission/__pycache__/__init__.cpython-38.pyc differ diff --git a/submission/__pycache__/__init__.cpython-39.pyc b/submission/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a3eb6c3a4dc80bf848d05969fd9ba0adb1beaa14 Binary files /dev/null and b/submission/__pycache__/__init__.cpython-39.pyc differ diff --git a/submission/__pycache__/run_context.cpython-310.pyc b/submission/__pycache__/run_context.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..16a88b1fe5c61aa930139d80da56ac319bfc3599 Binary files /dev/null and b/submission/__pycache__/run_context.cpython-310.pyc differ diff --git a/submission/__pycache__/run_context.cpython-36.pyc b/submission/__pycache__/run_context.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c79464ccf271723928e7ea5846f3121642912acf Binary files /dev/null and b/submission/__pycache__/run_context.cpython-36.pyc differ diff --git a/submission/__pycache__/run_context.cpython-38.pyc b/submission/__pycache__/run_context.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..27f155bf32573a58a1b14b2cfaeece781ca4db14 Binary files /dev/null and b/submission/__pycache__/run_context.cpython-38.pyc differ diff --git a/submission/__pycache__/run_context.cpython-39.pyc b/submission/__pycache__/run_context.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..82e906015f2d2c1aea6d1b2ea746dfe1f1aacd09 Binary files /dev/null and b/submission/__pycache__/run_context.cpython-39.pyc differ diff --git a/submission/__pycache__/submit.cpython-310.pyc b/submission/__pycache__/submit.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0f0d56d928ab6555b9b4084a030dda1963e28424 Binary files /dev/null and b/submission/__pycache__/submit.cpython-310.pyc differ diff --git a/submission/__pycache__/submit.cpython-36.pyc b/submission/__pycache__/submit.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f0edb249c830bc74157540f24be2caa25d8f8817 Binary files /dev/null and b/submission/__pycache__/submit.cpython-36.pyc differ diff --git a/submission/__pycache__/submit.cpython-38.pyc b/submission/__pycache__/submit.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6fb6b1a8a68da858aaebcbb567ae0dd9f1ee7def Binary files /dev/null and b/submission/__pycache__/submit.cpython-38.pyc differ diff --git a/submission/__pycache__/submit.cpython-39.pyc b/submission/__pycache__/submit.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a631dc24ce45365f1f8274e563e1b4354afac50c Binary files /dev/null and b/submission/__pycache__/submit.cpython-39.pyc differ diff --git a/submission/internal/__init__.py b/submission/internal/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0f11279893d6056e8cb6f9e04e12aad07a776496 --- /dev/null +++ b/submission/internal/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +from . import local diff --git a/submission/internal/__pycache__/__init__.cpython-36.pyc b/submission/internal/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e197ab9bed4593646f04c5f1f123f49ddd9b34c5 Binary files /dev/null and b/submission/internal/__pycache__/__init__.cpython-36.pyc differ diff --git a/submission/internal/__pycache__/__init__.cpython-38.pyc b/submission/internal/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9d15b0c3a8da1eac47d29d581014b76e5ca26096 Binary files /dev/null and b/submission/internal/__pycache__/__init__.cpython-38.pyc differ diff --git a/submission/internal/__pycache__/__init__.cpython-39.pyc b/submission/internal/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bc046661f9d0a58c4db9a911605f6b743a317747 Binary files /dev/null and b/submission/internal/__pycache__/__init__.cpython-39.pyc differ diff --git a/submission/internal/__pycache__/local.cpython-36.pyc b/submission/internal/__pycache__/local.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7899b1660eb9752f9fb45b94b762a69807251023 Binary files /dev/null and b/submission/internal/__pycache__/local.cpython-36.pyc differ diff --git a/submission/internal/__pycache__/local.cpython-38.pyc b/submission/internal/__pycache__/local.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..679805d01f94611885c9e9241db82f629e9ab438 Binary files /dev/null and b/submission/internal/__pycache__/local.cpython-38.pyc differ diff --git a/submission/internal/__pycache__/local.cpython-39.pyc b/submission/internal/__pycache__/local.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9f57b457b4f40416ed89a492062b3a26eb3b757f Binary files /dev/null and b/submission/internal/__pycache__/local.cpython-39.pyc differ diff --git a/submission/internal/local.py b/submission/internal/local.py new file mode 100644 index 0000000000000000000000000000000000000000..c03c79e93ca19704157782a0bae556a7752b775c --- /dev/null +++ b/submission/internal/local.py @@ -0,0 +1,22 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +class TargetOptions(): + def __init__(self): + self.do_not_copy_source_files = False + +class Target(): + def __init__(self): + pass + + def finalize_submit_config(self, submit_config, host_run_dir): + print ('Local submit ', end='', flush=True) + submit_config.run_dir = host_run_dir + + def submit(self, submit_config, host_run_dir): + from ..submit import run_wrapper, convert_path + print('- run_dir: %s' % convert_path(submit_config.run_dir), flush=True) + return run_wrapper(submit_config) diff --git a/submission/run_context.py b/submission/run_context.py new file mode 100644 index 0000000000000000000000000000000000000000..62fbb1afd86be9d5fa963a1958485a2fc6d1152a --- /dev/null +++ b/submission/run_context.py @@ -0,0 +1,110 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Helpers for managing the run/training loop.""" + +import datetime +import json +import os +import pprint +import time +import types + +from typing import Any + +from . import submit + +# Singleton RunContext +_run_context = None + +class RunContext(object): + """Helper class for managing the run/training loop. + + The context will hide the implementation details of a basic run/training loop. + It will set things up properly, tell if run should be stopped, and then cleans up. + User should call update periodically and use should_stop to determine if run should be stopped. + + Args: + submit_config: The SubmitConfig that is used for the current run. + config_module: (deprecated) The whole config module that is used for the current run. + """ + + def __init__(self, submit_config: submit.SubmitConfig, config_module: types.ModuleType = None): + global _run_context + # Only a single RunContext can be alive + assert _run_context is None + _run_context = self + self.submit_config = submit_config + self.should_stop_flag = False + self.has_closed = False + self.start_time = time.time() + self.last_update_time = time.time() + self.last_update_interval = 0.0 + self.progress_monitor_file_path = None + + # vestigial config_module support just prints a warning + if config_module is not None: + print("RunContext.config_module parameter support has been removed.") + + # write out details about the run to a text file + self.run_txt_data = {"task_name": submit_config.task_name, "host_name": submit_config.host_name, "start_time": datetime.datetime.now().isoformat(sep=" ")} + with open(os.path.join(submit_config.run_dir, "run.txt"), "w") as f: + pprint.pprint(self.run_txt_data, stream=f, indent=4, width=200, compact=False) + + def __enter__(self) -> "RunContext": + return self + + def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: + self.close() + + def update(self, loss: Any = 0, cur_epoch: Any = 0, max_epoch: Any = None) -> None: + """Do general housekeeping and keep the state of the context up-to-date. + Should be called often enough but not in a tight loop.""" + assert not self.has_closed + + self.last_update_interval = time.time() - self.last_update_time + self.last_update_time = time.time() + + if os.path.exists(os.path.join(self.submit_config.run_dir, "abort.txt")): + self.should_stop_flag = True + + def should_stop(self) -> bool: + """Tell whether a stopping condition has been triggered one way or another.""" + return self.should_stop_flag + + def get_time_since_start(self) -> float: + """How much time has passed since the creation of the context.""" + return time.time() - self.start_time + + def get_time_since_last_update(self) -> float: + """How much time has passed since the last call to update.""" + return time.time() - self.last_update_time + + def get_last_update_interval(self) -> float: + """How much time passed between the previous two calls to update.""" + return self.last_update_interval + + def close(self) -> None: + """Close the context and clean up. + Should only be called once.""" + if not self.has_closed: + # update the run.txt with stopping time + self.run_txt_data["stop_time"] = datetime.datetime.now().isoformat(sep=" ") + with open(os.path.join(self.submit_config.run_dir, "run.txt"), "w") as f: + pprint.pprint(self.run_txt_data, stream=f, indent=4, width=200, compact=False) + self.has_closed = True + + # detach the global singleton + global _run_context + if _run_context is self: + _run_context = None + + @staticmethod + def get(): + import dnnlib + if _run_context is not None: + return _run_context + return RunContext(dnnlib.submit_config) diff --git a/submission/submit.py b/submission/submit.py new file mode 100644 index 0000000000000000000000000000000000000000..514647dd6a0585c7bd6864380a95b8059bcfba42 --- /dev/null +++ b/submission/submit.py @@ -0,0 +1,343 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Submit a function to be run either locally or in a computing cluster.""" + +import copy +import inspect +import os +import pathlib +import pickle +import platform +import pprint +import re +import shutil +import sys +import time +import traceback + +from enum import Enum + +from .. import util +from ..util import EasyDict + +from . import internal + +class SubmitTarget(Enum): + """The target where the function should be run. + + LOCAL: Run it locally. + """ + LOCAL = 1 + + +class PathType(Enum): + """Determines in which format should a path be formatted. + + WINDOWS: Format with Windows style. + LINUX: Format with Linux/Posix style. + AUTO: Use current OS type to select either WINDOWS or LINUX. + """ + WINDOWS = 1 + LINUX = 2 + AUTO = 3 + + +class PlatformExtras: + """A mixed bag of values used by dnnlib heuristics. + + Attributes: + + data_reader_buffer_size: Used by DataReader to size internal shared memory buffers. + data_reader_process_count: Number of worker processes to spawn (zero for single thread operation) + """ + def __init__(self): + self.data_reader_buffer_size = 1<<30 # 1 GB + self.data_reader_process_count = 0 # single threaded default + + +_user_name_override = None + +class SubmitConfig(util.EasyDict): + """Strongly typed config dict needed to submit runs. + + Attributes: + run_dir_root: Path to the run dir root. Can be optionally templated with tags. Needs to always be run through get_path_from_template. + run_desc: Description of the run. Will be used in the run dir and task name. + run_dir_ignore: List of file patterns used to ignore files when copying files to the run dir. + run_dir_extra_files: List of (abs_path, rel_path) tuples of file paths. rel_path root will be the src directory inside the run dir. + submit_target: Submit target enum value. Used to select where the run is actually launched. + num_gpus: Number of GPUs used/requested for the run. + print_info: Whether to print debug information when submitting. + local.do_not_copy_source_files: Do not copy source files from the working directory to the run dir. + run_id: Automatically populated value during submit. + run_name: Automatically populated value during submit. + run_dir: Automatically populated value during submit. + run_func_name: Automatically populated value during submit. + run_func_kwargs: Automatically populated value during submit. + user_name: Automatically populated value during submit. Can be set by the user which will then override the automatic value. + task_name: Automatically populated value during submit. + host_name: Automatically populated value during submit. + platform_extras: Automatically populated values during submit. Used by various dnnlib libraries such as the DataReader class. + """ + + def __init__(self): + super().__init__() + + # run (set these) + self.run_dir_root = "" # should always be passed through get_path_from_template + self.run_desc = "" + self.run_dir_ignore = ["__pycache__", "*.pyproj", "*.sln", "*.suo", ".cache", ".idea", ".vs", ".vscode", "_cudacache"] + self.run_dir_extra_files = [] + + # submit (set these) + self.submit_target = SubmitTarget.LOCAL + self.num_gpus = 1 + self.print_info = False + self.nvprof = False + self.local = internal.local.TargetOptions() + self.datasets = [] + + # (automatically populated) + self.run_id = None + self.run_name = None + self.run_dir = None + self.run_func_name = None + self.run_func_kwargs = None + self.user_name = None + self.task_name = None + self.host_name = "localhost" + self.platform_extras = PlatformExtras() + + +def get_path_from_template(path_template: str, path_type: PathType = PathType.AUTO) -> str: + """Replace tags in the given path template and return either Windows or Linux formatted path.""" + # automatically select path type depending on running OS + if path_type == PathType.AUTO: + if platform.system() == "Windows": + path_type = PathType.WINDOWS + elif platform.system() == "Linux": + path_type = PathType.LINUX + else: + raise RuntimeError("Unknown platform") + + path_template = path_template.replace("", get_user_name()) + + # return correctly formatted path + if path_type == PathType.WINDOWS: + return str(pathlib.PureWindowsPath(path_template)) + elif path_type == PathType.LINUX: + return str(pathlib.PurePosixPath(path_template)) + else: + raise RuntimeError("Unknown platform") + + +def get_template_from_path(path: str) -> str: + """Convert a normal path back to its template representation.""" + path = path.replace("\\", "/") + return path + + +def convert_path(path: str, path_type: PathType = PathType.AUTO) -> str: + """Convert a normal path to template and the convert it back to a normal path with given path type.""" + path_template = get_template_from_path(path) + path = get_path_from_template(path_template, path_type) + return path + + +def set_user_name_override(name: str) -> None: + """Set the global username override value.""" + global _user_name_override + _user_name_override = name + + +def get_user_name(): + """Get the current user name.""" + if _user_name_override is not None: + return _user_name_override + elif platform.system() == "Windows": + return os.getlogin() + elif platform.system() == "Linux": + try: + import pwd + return pwd.getpwuid(os.geteuid()).pw_name + except: + return "unknown" + else: + raise RuntimeError("Unknown platform") + + +def make_run_dir_path(*paths): + """Make a path/filename that resides under the current submit run_dir. + + Args: + *paths: Path components to be passed to os.path.join + + Returns: + A file/dirname rooted at submit_config.run_dir. If there's no + submit_config or run_dir, the base directory is the current + working directory. + + E.g., `os.path.join(dnnlib.submit_config.run_dir, "output.txt"))` + """ + import dnnlib + if (dnnlib.submit_config is None) or (dnnlib.submit_config.run_dir is None): + return os.path.join(os.getcwd(), *paths) + return os.path.join(dnnlib.submit_config.run_dir, *paths) + + +def _create_run_dir_local(submit_config: SubmitConfig) -> str: + """Create a new run dir with increasing ID number at the start.""" + run_dir_root = get_path_from_template(submit_config.run_dir_root, PathType.AUTO) + + if not os.path.exists(run_dir_root): + os.makedirs(run_dir_root) + + submit_config.run_id = _get_next_run_id_local(run_dir_root) + submit_config.run_name = "{0:05d}-{1}".format(submit_config.run_id, submit_config.run_desc) + run_dir = os.path.join(run_dir_root, submit_config.run_name) + + if os.path.exists(run_dir): + raise RuntimeError("The run dir already exists! ({0})".format(run_dir)) + + os.makedirs(run_dir) + + return run_dir + + +def _get_next_run_id_local(run_dir_root: str) -> int: + """Reads all directory names in a given directory (non-recursive) and returns the next (increasing) run id. Assumes IDs are numbers at the start of the directory names.""" + dir_names = [d for d in os.listdir(run_dir_root) if os.path.isdir(os.path.join(run_dir_root, d))] + r = re.compile("^\\d+") # match one or more digits at the start of the string + run_id = 0 + + for dir_name in dir_names: + m = r.match(dir_name) + + if m is not None: + i = int(m.group()) + run_id = max(run_id, i + 1) + + return run_id + + +def _populate_run_dir(submit_config: SubmitConfig, run_dir: str) -> None: + """Copy all necessary files into the run dir. Assumes that the dir exists, is local, and is writable.""" + pickle.dump(submit_config, open(os.path.join(run_dir, "submit_config.pkl"), "wb")) + with open(os.path.join(run_dir, "submit_config.txt"), "w") as f: + pprint.pprint(submit_config, stream=f, indent=4, width=200, compact=False) + + if (submit_config.submit_target == SubmitTarget.LOCAL) and submit_config.local.do_not_copy_source_files: + return + + files = [] + + run_func_module_dir_path = util.get_module_dir_by_obj_name(submit_config.run_func_name) + assert '.' in submit_config.run_func_name + for _idx in range(submit_config.run_func_name.count('.') - 1): + run_func_module_dir_path = os.path.dirname(run_func_module_dir_path) + files += util.list_dir_recursively_with_ignore(run_func_module_dir_path, ignores=submit_config.run_dir_ignore, add_base_to_relative=False) + + dnnlib_module_dir_path = util.get_module_dir_by_obj_name("dnnlib") + files += util.list_dir_recursively_with_ignore(dnnlib_module_dir_path, ignores=submit_config.run_dir_ignore, add_base_to_relative=True) + + files += submit_config.run_dir_extra_files + + files = [(f[0], os.path.join(run_dir, "src", f[1])) for f in files] + files += [(os.path.join(dnnlib_module_dir_path, "submission", "internal", "run.py"), os.path.join(run_dir, "run.py"))] + + util.copy_files_and_create_dirs(files) + + + +def run_wrapper(submit_config: SubmitConfig) -> None: + """Wrap the actual run function call for handling logging, exceptions, typing, etc.""" + is_local = submit_config.submit_target == SubmitTarget.LOCAL + + # when running locally, redirect stderr to stdout, log stdout to a file, and force flushing + if is_local: + logger = util.Logger(file_name=os.path.join(submit_config.run_dir, "log.txt"), file_mode="w", should_flush=True) + else: # when running in a cluster, redirect stderr to stdout, and just force flushing (log writing is handled by run.sh) + logger = util.Logger(file_name=None, should_flush=True) + + import dnnlib + dnnlib.submit_config = submit_config + + exit_with_errcode = False + try: + print("dnnlib: Running {0}() on {1}...".format(submit_config.run_func_name, submit_config.host_name)) + start_time = time.time() + + run_func_obj = util.get_obj_by_name(submit_config.run_func_name) + assert callable(run_func_obj) + sig = inspect.signature(run_func_obj) + if 'submit_config' in sig.parameters: + run_func_obj(submit_config=submit_config, **submit_config.run_func_kwargs) + else: + run_func_obj(**submit_config.run_func_kwargs) + + print("dnnlib: Finished {0}() in {1}.".format(submit_config.run_func_name, util.format_time(time.time() - start_time))) + except: + if is_local: + raise + else: + traceback.print_exc() + + log_src = os.path.join(submit_config.run_dir, "log.txt") + log_dst = os.path.join(get_path_from_template(submit_config.run_dir_root), "{0}-error.txt".format(submit_config.run_name)) + shutil.copyfile(log_src, log_dst) + + # Defer sys.exit(1) to happen after we close the logs and create a _finished.txt + exit_with_errcode = True + finally: + open(os.path.join(submit_config.run_dir, "_finished.txt"), "w").close() + + dnnlib.RunContext.get().close() + dnnlib.submit_config = None + logger.close() + + # If we hit an error, get out of the script now and signal the error + # to whatever process that started this script. + if exit_with_errcode: + sys.exit(1) + + return submit_config + + +def submit_run(submit_config: SubmitConfig, run_func_name: str, **run_func_kwargs) -> None: + """Create a run dir, gather files related to the run, copy files to the run dir, and launch the run in appropriate place.""" + submit_config = copy.deepcopy(submit_config) + + submit_target = submit_config.submit_target + farm = None + if submit_target == SubmitTarget.LOCAL: + farm = internal.local.Target() + assert farm is not None # unknown target + + # Disallow submitting jobs with zero num_gpus. + if (submit_config.num_gpus is None) or (submit_config.num_gpus == 0): + raise RuntimeError("submit_config.num_gpus must be set to a non-zero value") + + if submit_config.user_name is None: + submit_config.user_name = get_user_name() + + submit_config.run_func_name = run_func_name + submit_config.run_func_kwargs = run_func_kwargs + + #-------------------------------------------------------------------- + # Prepare submission by populating the run dir + #-------------------------------------------------------------------- + host_run_dir = _create_run_dir_local(submit_config) + + submit_config.task_name = "{0}-{1:05d}-{2}".format(submit_config.user_name, submit_config.run_id, submit_config.run_desc) + docker_valid_name_regex = "^[a-zA-Z0-9][a-zA-Z0-9_.-]+$" + if not re.match(docker_valid_name_regex, submit_config.task_name): + raise RuntimeError("Invalid task name. Probable reason: unacceptable characters in your submit_config.run_desc. Task name must be accepted by the following regex: " + docker_valid_name_regex + ", got " + submit_config.task_name) + + # Farm specific preparations for a submit + farm.finalize_submit_config(submit_config, host_run_dir) + _populate_run_dir(submit_config, host_run_dir) + return farm.submit(submit_config, host_run_dir) diff --git a/tflib/.custom_ops.py.un~ b/tflib/.custom_ops.py.un~ new file mode 100644 index 0000000000000000000000000000000000000000..e41c460c3b3111ddcbeb6b9567b4b6e41d6eb9c6 Binary files /dev/null and b/tflib/.custom_ops.py.un~ differ diff --git a/tflib/__init__.py b/tflib/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..02c25173d3f2391c88b142cf80af02cd93b0b5a0 --- /dev/null +++ b/tflib/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +from . import autosummary +from . import network +from . import optimizer +from . import tfutil +from . import custom_ops + +from .tfutil import * +from .network import Network + +from .optimizer import Optimizer + +from .custom_ops import get_plugin diff --git a/tflib/__pycache__/__init__.cpython-36.pyc b/tflib/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..55466792997d614403fe6026e63a5e2e32d5f4c5 Binary files /dev/null and b/tflib/__pycache__/__init__.cpython-36.pyc differ diff --git a/tflib/__pycache__/__init__.cpython-38.pyc b/tflib/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c48dd77f8014fcbca868e4826ddc7c07e558bb87 Binary files /dev/null and b/tflib/__pycache__/__init__.cpython-38.pyc differ diff --git a/tflib/__pycache__/__init__.cpython-39.pyc b/tflib/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3010c9010ee84397e342ca1748e4bfb0869ea9d4 Binary files /dev/null and b/tflib/__pycache__/__init__.cpython-39.pyc differ diff --git a/tflib/__pycache__/autosummary.cpython-36.pyc b/tflib/__pycache__/autosummary.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8663ce369c5c3d996a2d372e75b53f9c67ecac63 Binary files /dev/null and b/tflib/__pycache__/autosummary.cpython-36.pyc differ diff --git a/tflib/__pycache__/autosummary.cpython-38.pyc b/tflib/__pycache__/autosummary.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..772042b65a5c87fdd55625424b014978a08a8aba Binary files /dev/null and b/tflib/__pycache__/autosummary.cpython-38.pyc differ diff --git a/tflib/__pycache__/autosummary.cpython-39.pyc b/tflib/__pycache__/autosummary.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..51636d9cbd98f5953b8ed47a1a29aadec86da7ce Binary files /dev/null and b/tflib/__pycache__/autosummary.cpython-39.pyc differ diff --git a/tflib/__pycache__/custom_ops.cpython-36.pyc b/tflib/__pycache__/custom_ops.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c5bc61adabc982614e494f66403172a241a0a274 Binary files /dev/null and b/tflib/__pycache__/custom_ops.cpython-36.pyc differ diff --git a/tflib/__pycache__/network.cpython-36.pyc b/tflib/__pycache__/network.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..15b13f2d4d385523fdfa41e298f4a4db2d3c28de Binary files /dev/null and b/tflib/__pycache__/network.cpython-36.pyc differ diff --git a/tflib/__pycache__/optimizer.cpython-36.pyc b/tflib/__pycache__/optimizer.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b676edf06988f727c4684867a0d4921c7fa2a62a Binary files /dev/null and b/tflib/__pycache__/optimizer.cpython-36.pyc differ diff --git a/tflib/__pycache__/tfutil.cpython-36.pyc b/tflib/__pycache__/tfutil.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fc0253c2da1f3aa0979212e78adf111de2311459 Binary files /dev/null and b/tflib/__pycache__/tfutil.cpython-36.pyc differ diff --git a/tflib/__pycache__/tfutil.cpython-38.pyc b/tflib/__pycache__/tfutil.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eaddd1d7edc28bb2f69e914274e24c9febfd2989 Binary files /dev/null and b/tflib/__pycache__/tfutil.cpython-38.pyc differ diff --git a/tflib/__pycache__/tfutil.cpython-39.pyc b/tflib/__pycache__/tfutil.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e0ef28b02cff59ccbc79a0804c30d90fbfdded44 Binary files /dev/null and b/tflib/__pycache__/tfutil.cpython-39.pyc differ diff --git a/tflib/_cudacache/fused_bias_act_26d9116e2d7d28cdd451d2d21a1bb7ac.dll b/tflib/_cudacache/fused_bias_act_26d9116e2d7d28cdd451d2d21a1bb7ac.dll new file mode 100644 index 0000000000000000000000000000000000000000..f03635ef2de5533438983be7e5430b2d68e703fe Binary files /dev/null and b/tflib/_cudacache/fused_bias_act_26d9116e2d7d28cdd451d2d21a1bb7ac.dll differ diff --git a/tflib/_cudacache/fused_bias_act_40c2d778d681ce8357acbd8537ee6613.so b/tflib/_cudacache/fused_bias_act_40c2d778d681ce8357acbd8537ee6613.so new file mode 100644 index 0000000000000000000000000000000000000000..271fc0309be80c13246bd506e4f4e9cae1da3a5c Binary files /dev/null and b/tflib/_cudacache/fused_bias_act_40c2d778d681ce8357acbd8537ee6613.so differ diff --git a/tflib/_cudacache/fused_bias_act_d3f49482b02b6a96d02e754e40c292bc.dll b/tflib/_cudacache/fused_bias_act_d3f49482b02b6a96d02e754e40c292bc.dll new file mode 100644 index 0000000000000000000000000000000000000000..34f36c1e1c1393b01ab6adbd96cb91d99d895379 Binary files /dev/null and b/tflib/_cudacache/fused_bias_act_d3f49482b02b6a96d02e754e40c292bc.dll differ diff --git a/tflib/_cudacache/upfirdn_2d_0acb578db4996f3ae2116b184d4a1bdb.dll b/tflib/_cudacache/upfirdn_2d_0acb578db4996f3ae2116b184d4a1bdb.dll new file mode 100644 index 0000000000000000000000000000000000000000..c77d3dbe97574675decdfcf3f332e9064018f652 Binary files /dev/null and b/tflib/_cudacache/upfirdn_2d_0acb578db4996f3ae2116b184d4a1bdb.dll differ diff --git a/tflib/_cudacache/upfirdn_2d_79757bc08e1fcc2d526ecf3cb32fad92.so b/tflib/_cudacache/upfirdn_2d_79757bc08e1fcc2d526ecf3cb32fad92.so new file mode 100644 index 0000000000000000000000000000000000000000..f168469daf6becbe8129f5344f8632a089873a1d --- /dev/null +++ b/tflib/_cudacache/upfirdn_2d_79757bc08e1fcc2d526ecf3cb32fad92.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11a9d92fab510cb996dcd2bf8644c34f7c5e963e6abba4ad3e606e590917e3e0 +size 1099920 diff --git a/tflib/_cudacache/upfirdn_2d_f65c24ab0d389aaa83e5b3537e3134e8.dll b/tflib/_cudacache/upfirdn_2d_f65c24ab0d389aaa83e5b3537e3134e8.dll new file mode 100644 index 0000000000000000000000000000000000000000..dfa6979fc99fdf7b052769b88cb23a852dc223d1 Binary files /dev/null and b/tflib/_cudacache/upfirdn_2d_f65c24ab0d389aaa83e5b3537e3134e8.dll differ diff --git a/tflib/autosummary.py b/tflib/autosummary.py new file mode 100644 index 0000000000000000000000000000000000000000..6b0d80b371620bedadf8164772b7d6f87806fc11 --- /dev/null +++ b/tflib/autosummary.py @@ -0,0 +1,191 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Helper for adding automatically tracked values to Tensorboard. + +Autosummary creates an identity op that internally keeps track of the input +values and automatically shows up in TensorBoard. The reported value +represents an average over input components. The average is accumulated +constantly over time and flushed when save_summaries() is called. + +Notes: +- The output tensor must be used as an input for something else in the + graph. Otherwise, the autosummary op will not get executed, and the average + value will not get accumulated. +- It is perfectly fine to include autosummaries with the same name in + several places throughout the graph, even if they are executed concurrently. +- It is ok to also pass in a python scalar or numpy array. In this case, it + is added to the average immediately. +""" + +from collections import OrderedDict +import numpy as np +import tensorflow as tf +from tensorboard import summary as summary_lib +from tensorboard.plugins.custom_scalar import layout_pb2 + +from . import tfutil +from .tfutil import TfExpression +from .tfutil import TfExpressionEx + +# Enable "Custom scalars" tab in TensorBoard for advanced formatting. +# Disabled by default to reduce tfevents file size. +enable_custom_scalars = False + +_dtype = tf.float64 +_vars = OrderedDict() # name => [var, ...] +_immediate = OrderedDict() # name => update_op, update_value +_finalized = False +_merge_op = None + + +def _create_var(name: str, value_expr: TfExpression) -> TfExpression: + """Internal helper for creating autosummary accumulators.""" + assert not _finalized + name_id = name.replace("/", "_") + v = tf.cast(value_expr, _dtype) + + if v.shape.is_fully_defined(): + size = np.prod(v.shape.as_list()) + size_expr = tf.constant(size, dtype=_dtype) + else: + size = None + size_expr = tf.reduce_prod(tf.cast(tf.shape(v), _dtype)) + + if size == 1: + if v.shape.ndims != 0: + v = tf.reshape(v, []) + v = [size_expr, v, tf.square(v)] + else: + v = [size_expr, tf.reduce_sum(v), tf.reduce_sum(tf.square(v))] + v = tf.cond(tf.is_finite(v[1]), lambda: tf.stack(v), lambda: tf.zeros(3, dtype=_dtype)) + + with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.control_dependencies(None): + var = tf.Variable(tf.zeros(3, dtype=_dtype), trainable=False) # [sum(1), sum(x), sum(x**2)] + update_op = tf.cond(tf.is_variable_initialized(var), lambda: tf.assign_add(var, v), lambda: tf.assign(var, v)) + + if name in _vars: + _vars[name].append(var) + else: + _vars[name] = [var] + return update_op + + +def autosummary(name: str, value: TfExpressionEx, passthru: TfExpressionEx = None, condition: TfExpressionEx = True) -> TfExpressionEx: + """Create a new autosummary. + + Args: + name: Name to use in TensorBoard + value: TensorFlow expression or python value to track + passthru: Optionally return this TF node without modifications but tack an autosummary update side-effect to this node. + + Example use of the passthru mechanism: + + n = autosummary('l2loss', loss, passthru=n) + + This is a shorthand for the following code: + + with tf.control_dependencies([autosummary('l2loss', loss)]): + n = tf.identity(n) + """ + tfutil.assert_tf_initialized() + name_id = name.replace("/", "_") + + if tfutil.is_tf_expression(value): + with tf.name_scope("summary_" + name_id), tf.device(value.device): + condition = tf.convert_to_tensor(condition, name='condition') + update_op = tf.cond(condition, lambda: tf.group(_create_var(name, value)), tf.no_op) + with tf.control_dependencies([update_op]): + return tf.identity(value if passthru is None else passthru) + + else: # python scalar or numpy array + assert not tfutil.is_tf_expression(passthru) + assert not tfutil.is_tf_expression(condition) + if condition: + if name not in _immediate: + with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.device(None), tf.control_dependencies(None): + update_value = tf.placeholder(_dtype) + update_op = _create_var(name, update_value) + _immediate[name] = update_op, update_value + update_op, update_value = _immediate[name] + tfutil.run(update_op, {update_value: value}) + return value if passthru is None else passthru + + +def finalize_autosummaries() -> None: + """Create the necessary ops to include autosummaries in TensorBoard report. + Note: This should be done only once per graph. + """ + global _finalized + tfutil.assert_tf_initialized() + + if _finalized: + return None + + _finalized = True + tfutil.init_uninitialized_vars([var for vars_list in _vars.values() for var in vars_list]) + + # Create summary ops. + with tf.device(None), tf.control_dependencies(None): + for name, vars_list in _vars.items(): + name_id = name.replace("/", "_") + with tfutil.absolute_name_scope("Autosummary/" + name_id): + moments = tf.add_n(vars_list) + moments /= moments[0] + with tf.control_dependencies([moments]): # read before resetting + reset_ops = [tf.assign(var, tf.zeros(3, dtype=_dtype)) for var in vars_list] + with tf.name_scope(None), tf.control_dependencies(reset_ops): # reset before reporting + mean = moments[1] + std = tf.sqrt(moments[2] - tf.square(moments[1])) + tf.summary.scalar(name, mean) + if enable_custom_scalars: + tf.summary.scalar("xCustomScalars/" + name + "/margin_lo", mean - std) + tf.summary.scalar("xCustomScalars/" + name + "/margin_hi", mean + std) + + # Setup layout for custom scalars. + layout = None + if enable_custom_scalars: + cat_dict = OrderedDict() + for series_name in sorted(_vars.keys()): + p = series_name.split("/") + cat = p[0] if len(p) >= 2 else "" + chart = "/".join(p[1:-1]) if len(p) >= 3 else p[-1] + if cat not in cat_dict: + cat_dict[cat] = OrderedDict() + if chart not in cat_dict[cat]: + cat_dict[cat][chart] = [] + cat_dict[cat][chart].append(series_name) + categories = [] + for cat_name, chart_dict in cat_dict.items(): + charts = [] + for chart_name, series_names in chart_dict.items(): + series = [] + for series_name in series_names: + series.append(layout_pb2.MarginChartContent.Series( + value=series_name, + lower="xCustomScalars/" + series_name + "/margin_lo", + upper="xCustomScalars/" + series_name + "/margin_hi")) + margin = layout_pb2.MarginChartContent(series=series) + charts.append(layout_pb2.Chart(title=chart_name, margin=margin)) + categories.append(layout_pb2.Category(title=cat_name, chart=charts)) + layout = summary_lib.custom_scalar_pb(layout_pb2.Layout(category=categories)) + return layout + +def save_summaries(file_writer, global_step=None): + """Call FileWriter.add_summary() with all summaries in the default graph, + automatically finalizing and merging them on the first call. + """ + global _merge_op + tfutil.assert_tf_initialized() + + if _merge_op is None: + layout = finalize_autosummaries() + if layout is not None: + file_writer.add_summary(layout) + with tf.device(None), tf.control_dependencies(None): + _merge_op = tf.summary.merge_all() + + file_writer.add_summary(_merge_op.eval(), global_step) diff --git a/tflib/custom_ops.py b/tflib/custom_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..9046787626a0530f03b9f8982ec625e6bd257858 --- /dev/null +++ b/tflib/custom_ops.py @@ -0,0 +1,169 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""TensorFlow custom ops builder. +""" + +import os +import re +import uuid +import hashlib +import tempfile +import shutil +import tensorflow as tf +from tensorflow.python.client import device_lib # pylint: disable=no-name-in-module + +#---------------------------------------------------------------------------- +# Global options. + +cuda_cache_path = os.path.join(os.path.dirname(__file__), '_cudacache') +cuda_cache_version_tag = 'v1' +do_not_hash_included_headers = False # Speed up compilation by assuming that headers included by the CUDA code never change. Unsafe! +verbose = True # Print status messages to stdout. + +compiler_bindir_search_path = [ + 'C:/Program Files (x86)/Microsoft Visual Studio/2017/Community/VC/Tools/MSVC/14.16.27023/bin/Hostx64/x64', + 'C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.23.28105/bin/Hostx64/x64', + 'C:/Program Files (x86)/Microsoft Visual Studio 14.0/vc/bin', +] + +#---------------------------------------------------------------------------- +# Internal helper funcs. + +def _find_compiler_bindir(): + for compiler_path in compiler_bindir_search_path: + if os.path.isdir(compiler_path): + return compiler_path + return None + +def _get_compute_cap(device): + caps_str = device.physical_device_desc + m = re.search('compute capability: (\\d+).(\\d+)', caps_str) + major = m.group(1) + minor = m.group(2) + return (major, minor) + +def _get_cuda_gpu_arch_string(): + gpus = [x for x in device_lib.list_local_devices() if x.device_type == 'GPU'] + if len(gpus) == 0: + raise RuntimeError('No GPU devices found') + (major, minor) = _get_compute_cap(gpus[0]) + return 'sm_%s%s' % (major, minor) + +def _run_cmd(cmd): + with os.popen(cmd) as pipe: + output = pipe.read() + status = pipe.close() + if status is not None: + raise RuntimeError('NVCC returned an error. See below for full command line and output log:\n\n%s\n\n%s' % (cmd, output)) + +def _prepare_nvcc_cli(opts): + cmd = 'nvcc ' + opts.strip() + cmd += ' --disable-warnings' + cmd += ' --include-path "%s"' % tf.sysconfig.get_include() + cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'protobuf_archive', 'src') + cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'com_google_absl') + cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'eigen_archive') + + compiler_bindir = _find_compiler_bindir() + if compiler_bindir is None: + # Require that _find_compiler_bindir succeeds on Windows. Allow + # nvcc to use whatever is the default on Linux. + if os.name == 'nt': + raise RuntimeError('Could not find MSVC/GCC/CLANG installation on this computer. Check compiler_bindir_search_path list in "%s".' % __file__) + else: + cmd += ' --compiler-bindir "%s"' % compiler_bindir + cmd += ' 2>&1' + return cmd + +#---------------------------------------------------------------------------- +# Main entry point. + +_plugin_cache = dict() + +def get_plugin(cuda_file): + cuda_file_base = os.path.basename(cuda_file) + cuda_file_name, cuda_file_ext = os.path.splitext(cuda_file_base) + + # Already in cache? + if cuda_file in _plugin_cache: + return _plugin_cache[cuda_file] + + # Setup plugin. + if verbose: + print('Setting up TensorFlow plugin "%s": ' % cuda_file_base, end='', flush=True) + try: + # Hash CUDA source. + md5 = hashlib.md5() + with open(cuda_file, 'rb') as f: + md5.update(f.read()) + md5.update(b'\n') + + # Hash headers included by the CUDA code by running it through the preprocessor. + if not do_not_hash_included_headers: + if verbose: + print('Preprocessing... ', end='', flush=True) + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + cuda_file_ext) + _run_cmd(_prepare_nvcc_cli('"%s" --preprocess -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir))) + with open(tmp_file, 'rb') as f: + bad_file_str = ('"' + cuda_file.replace('\\', '/') + '"').encode('utf-8') # __FILE__ in error check macros + good_file_str = ('"' + cuda_file_base + '"').encode('utf-8') + for ln in f: + if not ln.startswith(b'# ') and not ln.startswith(b'#line '): # ignore line number pragmas + ln = ln.replace(bad_file_str, good_file_str) + md5.update(ln) + md5.update(b'\n') + + # Select compiler options. + compile_opts = '' + if os.name == 'nt': + compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.lib') + elif os.name == 'posix': + compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.so') + compile_opts += ' --compiler-options \'-fPIC -D_GLIBCXX_USE_CXX11_ABI=0\'' + else: + assert False # not Windows or Linux, w00t? + compile_opts += ' --gpu-architecture=%s' % _get_cuda_gpu_arch_string() + compile_opts += ' --use_fast_math' + nvcc_cmd = _prepare_nvcc_cli(compile_opts) + + # Hash build configuration. + md5.update(('nvcc_cmd: ' + nvcc_cmd).encode('utf-8') + b'\n') + md5.update(('tf.VERSION: ' + tf.VERSION).encode('utf-8') + b'\n') + md5.update(('cuda_cache_version_tag: ' + cuda_cache_version_tag).encode('utf-8') + b'\n') + + # Compile if not already compiled. + bin_file_ext = '.dll' if os.name == 'nt' else '.so' + bin_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + md5.hexdigest() + bin_file_ext) + if not os.path.isfile(bin_file): + if verbose: + print('Compiling... ', end='', flush=True) + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + bin_file_ext) + _run_cmd(nvcc_cmd + ' "%s" --shared -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir)) + os.makedirs(cuda_cache_path, exist_ok=True) + intermediate_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + uuid.uuid4().hex + '_tmp' + bin_file_ext) + shutil.copyfile(tmp_file, intermediate_file) + os.rename(intermediate_file, bin_file) # atomic + + # Load. + if verbose: + print('Loading... ', end='', flush=True) + plugin = tf.load_op_library(bin_file) + + # Add to cache. + _plugin_cache[cuda_file] = plugin + if verbose: + print('Done.', flush=True) + return plugin + + except: + if verbose: + print('Failed!', flush=True) + raise + +#---------------------------------------------------------------------------- diff --git a/tflib/custom_ops.py~ b/tflib/custom_ops.py~ new file mode 100644 index 0000000000000000000000000000000000000000..9046787626a0530f03b9f8982ec625e6bd257858 --- /dev/null +++ b/tflib/custom_ops.py~ @@ -0,0 +1,169 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""TensorFlow custom ops builder. +""" + +import os +import re +import uuid +import hashlib +import tempfile +import shutil +import tensorflow as tf +from tensorflow.python.client import device_lib # pylint: disable=no-name-in-module + +#---------------------------------------------------------------------------- +# Global options. + +cuda_cache_path = os.path.join(os.path.dirname(__file__), '_cudacache') +cuda_cache_version_tag = 'v1' +do_not_hash_included_headers = False # Speed up compilation by assuming that headers included by the CUDA code never change. Unsafe! +verbose = True # Print status messages to stdout. + +compiler_bindir_search_path = [ + 'C:/Program Files (x86)/Microsoft Visual Studio/2017/Community/VC/Tools/MSVC/14.16.27023/bin/Hostx64/x64', + 'C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.23.28105/bin/Hostx64/x64', + 'C:/Program Files (x86)/Microsoft Visual Studio 14.0/vc/bin', +] + +#---------------------------------------------------------------------------- +# Internal helper funcs. + +def _find_compiler_bindir(): + for compiler_path in compiler_bindir_search_path: + if os.path.isdir(compiler_path): + return compiler_path + return None + +def _get_compute_cap(device): + caps_str = device.physical_device_desc + m = re.search('compute capability: (\\d+).(\\d+)', caps_str) + major = m.group(1) + minor = m.group(2) + return (major, minor) + +def _get_cuda_gpu_arch_string(): + gpus = [x for x in device_lib.list_local_devices() if x.device_type == 'GPU'] + if len(gpus) == 0: + raise RuntimeError('No GPU devices found') + (major, minor) = _get_compute_cap(gpus[0]) + return 'sm_%s%s' % (major, minor) + +def _run_cmd(cmd): + with os.popen(cmd) as pipe: + output = pipe.read() + status = pipe.close() + if status is not None: + raise RuntimeError('NVCC returned an error. See below for full command line and output log:\n\n%s\n\n%s' % (cmd, output)) + +def _prepare_nvcc_cli(opts): + cmd = 'nvcc ' + opts.strip() + cmd += ' --disable-warnings' + cmd += ' --include-path "%s"' % tf.sysconfig.get_include() + cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'protobuf_archive', 'src') + cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'com_google_absl') + cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'eigen_archive') + + compiler_bindir = _find_compiler_bindir() + if compiler_bindir is None: + # Require that _find_compiler_bindir succeeds on Windows. Allow + # nvcc to use whatever is the default on Linux. + if os.name == 'nt': + raise RuntimeError('Could not find MSVC/GCC/CLANG installation on this computer. Check compiler_bindir_search_path list in "%s".' % __file__) + else: + cmd += ' --compiler-bindir "%s"' % compiler_bindir + cmd += ' 2>&1' + return cmd + +#---------------------------------------------------------------------------- +# Main entry point. + +_plugin_cache = dict() + +def get_plugin(cuda_file): + cuda_file_base = os.path.basename(cuda_file) + cuda_file_name, cuda_file_ext = os.path.splitext(cuda_file_base) + + # Already in cache? + if cuda_file in _plugin_cache: + return _plugin_cache[cuda_file] + + # Setup plugin. + if verbose: + print('Setting up TensorFlow plugin "%s": ' % cuda_file_base, end='', flush=True) + try: + # Hash CUDA source. + md5 = hashlib.md5() + with open(cuda_file, 'rb') as f: + md5.update(f.read()) + md5.update(b'\n') + + # Hash headers included by the CUDA code by running it through the preprocessor. + if not do_not_hash_included_headers: + if verbose: + print('Preprocessing... ', end='', flush=True) + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + cuda_file_ext) + _run_cmd(_prepare_nvcc_cli('"%s" --preprocess -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir))) + with open(tmp_file, 'rb') as f: + bad_file_str = ('"' + cuda_file.replace('\\', '/') + '"').encode('utf-8') # __FILE__ in error check macros + good_file_str = ('"' + cuda_file_base + '"').encode('utf-8') + for ln in f: + if not ln.startswith(b'# ') and not ln.startswith(b'#line '): # ignore line number pragmas + ln = ln.replace(bad_file_str, good_file_str) + md5.update(ln) + md5.update(b'\n') + + # Select compiler options. + compile_opts = '' + if os.name == 'nt': + compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.lib') + elif os.name == 'posix': + compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.so') + compile_opts += ' --compiler-options \'-fPIC -D_GLIBCXX_USE_CXX11_ABI=0\'' + else: + assert False # not Windows or Linux, w00t? + compile_opts += ' --gpu-architecture=%s' % _get_cuda_gpu_arch_string() + compile_opts += ' --use_fast_math' + nvcc_cmd = _prepare_nvcc_cli(compile_opts) + + # Hash build configuration. + md5.update(('nvcc_cmd: ' + nvcc_cmd).encode('utf-8') + b'\n') + md5.update(('tf.VERSION: ' + tf.VERSION).encode('utf-8') + b'\n') + md5.update(('cuda_cache_version_tag: ' + cuda_cache_version_tag).encode('utf-8') + b'\n') + + # Compile if not already compiled. + bin_file_ext = '.dll' if os.name == 'nt' else '.so' + bin_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + md5.hexdigest() + bin_file_ext) + if not os.path.isfile(bin_file): + if verbose: + print('Compiling... ', end='', flush=True) + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + bin_file_ext) + _run_cmd(nvcc_cmd + ' "%s" --shared -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir)) + os.makedirs(cuda_cache_path, exist_ok=True) + intermediate_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + uuid.uuid4().hex + '_tmp' + bin_file_ext) + shutil.copyfile(tmp_file, intermediate_file) + os.rename(intermediate_file, bin_file) # atomic + + # Load. + if verbose: + print('Loading... ', end='', flush=True) + plugin = tf.load_op_library(bin_file) + + # Add to cache. + _plugin_cache[cuda_file] = plugin + if verbose: + print('Done.', flush=True) + return plugin + + except: + if verbose: + print('Failed!', flush=True) + raise + +#---------------------------------------------------------------------------- diff --git a/tflib/network.py b/tflib/network.py new file mode 100644 index 0000000000000000000000000000000000000000..409babb1d2166d341bfaee2ef460d8810bdaf51f --- /dev/null +++ b/tflib/network.py @@ -0,0 +1,590 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Helper for managing networks.""" + +import types +import inspect +import re +import uuid +import sys +import numpy as np +import tensorflow as tf + +from collections import OrderedDict +from typing import Any, List, Tuple, Union + +from . import tfutil +from .. import util + +from .tfutil import TfExpression, TfExpressionEx + +_import_handlers = [] # Custom import handlers for dealing with legacy data in pickle import. +_import_module_src = dict() # Source code for temporary modules created during pickle import. + + +def import_handler(handler_func): + """Function decorator for declaring custom import handlers.""" + _import_handlers.append(handler_func) + return handler_func + + +class Network: + """Generic network abstraction. + + Acts as a convenience wrapper for a parameterized network construction + function, providing several utility methods and convenient access to + the inputs/outputs/weights. + + Network objects can be safely pickled and unpickled for long-term + archival purposes. The pickling works reliably as long as the underlying + network construction function is defined in a standalone Python module + that has no side effects or application-specific imports. + + Args: + name: Network name. Used to select TensorFlow name and variable scopes. + func_name: Fully qualified name of the underlying network construction function, or a top-level function object. + static_kwargs: Keyword arguments to be passed in to the network construction function. + + Attributes: + name: User-specified name, defaults to build func name if None. + scope: Unique TensorFlow scope containing template graph and variables, derived from the user-specified name. + static_kwargs: Arguments passed to the user-supplied build func. + components: Container for sub-networks. Passed to the build func, and retained between calls. + num_inputs: Number of input tensors. + num_outputs: Number of output tensors. + input_shapes: Input tensor shapes (NC or NCHW), including minibatch dimension. + output_shapes: Output tensor shapes (NC or NCHW), including minibatch dimension. + input_shape: Short-hand for input_shapes[0]. + output_shape: Short-hand for output_shapes[0]. + input_templates: Input placeholders in the template graph. + output_templates: Output tensors in the template graph. + input_names: Name string for each input. + output_names: Name string for each output. + own_vars: Variables defined by this network (local_name => var), excluding sub-networks. + vars: All variables (local_name => var). + trainables: All trainable variables (local_name => var). + var_global_to_local: Mapping from variable global names to local names. + """ + + def __init__(self, name: str = None, func_name: Any = None, **static_kwargs): + tfutil.assert_tf_initialized() + assert isinstance(name, str) or name is None + assert func_name is not None + assert isinstance(func_name, str) or util.is_top_level_function(func_name) + assert util.is_pickleable(static_kwargs) + + self._init_fields() + self.name = name + self.static_kwargs = util.EasyDict(static_kwargs) + + # Locate the user-specified network build function. + if util.is_top_level_function(func_name): + func_name = util.get_top_level_function_name(func_name) + module, self._build_func_name = util.get_module_from_obj_name(func_name) + self._build_func = util.get_obj_from_module(module, self._build_func_name) + assert callable(self._build_func) + + # Dig up source code for the module containing the build function. + self._build_module_src = _import_module_src.get(module, None) + if self._build_module_src is None: + self._build_module_src = inspect.getsource(module) + + # Init TensorFlow graph. + self._init_graph() + self.reset_own_vars() + + def _init_fields(self) -> None: + self.name = None + self.scope = None + self.static_kwargs = util.EasyDict() + self.components = util.EasyDict() + self.num_inputs = 0 + self.num_outputs = 0 + self.input_shapes = [[]] + self.output_shapes = [[]] + self.input_shape = [] + self.output_shape = [] + self.input_templates = [] + self.output_templates = [] + self.input_names = [] + self.output_names = [] + self.own_vars = OrderedDict() + self.vars = OrderedDict() + self.trainables = OrderedDict() + self.var_global_to_local = OrderedDict() + + self._build_func = None # User-supplied build function that constructs the network. + self._build_func_name = None # Name of the build function. + self._build_module_src = None # Full source code of the module containing the build function. + self._run_cache = dict() # Cached graph data for Network.run(). + + def _init_graph(self) -> None: + # Collect inputs. + self.input_names = [] + + for param in inspect.signature(self._build_func).parameters.values(): + if param.kind == param.POSITIONAL_OR_KEYWORD and param.default is param.empty: + self.input_names.append(param.name) + + self.num_inputs = len(self.input_names) + assert self.num_inputs >= 1 + + # Choose name and scope. + if self.name is None: + self.name = self._build_func_name + assert re.match("^[A-Za-z0-9_.\\-]*$", self.name) + with tf.name_scope(None): + self.scope = tf.get_default_graph().unique_name(self.name, mark_as_used=True) + + # Finalize build func kwargs. + build_kwargs = dict(self.static_kwargs) + build_kwargs["is_template_graph"] = True + build_kwargs["components"] = self.components + + # Build template graph. + with tfutil.absolute_variable_scope(self.scope, reuse=False), tfutil.absolute_name_scope(self.scope): # ignore surrounding scopes + assert tf.get_variable_scope().name == self.scope + assert tf.get_default_graph().get_name_scope() == self.scope + with tf.control_dependencies(None): # ignore surrounding control dependencies + self.input_templates = [tf.placeholder(tf.float32, name=name) for name in self.input_names] + out_expr = self._build_func(*self.input_templates, **build_kwargs) + + # Collect outputs. + assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple) + self.output_templates = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr) + self.num_outputs = len(self.output_templates) + assert self.num_outputs >= 1 + assert all(tfutil.is_tf_expression(t) for t in self.output_templates) + + # Perform sanity checks. + if any(t.shape.ndims is None for t in self.input_templates): + raise ValueError("Network input shapes not defined. Please call x.set_shape() for each input.") + if any(t.shape.ndims is None for t in self.output_templates): + raise ValueError("Network output shapes not defined. Please call x.set_shape() where applicable.") + if any(not isinstance(comp, Network) for comp in self.components.values()): + raise ValueError("Components of a Network must be Networks themselves.") + if len(self.components) != len(set(comp.name for comp in self.components.values())): + raise ValueError("Components of a Network must have unique names.") + + # List inputs and outputs. + self.input_shapes = [t.shape.as_list() for t in self.input_templates] + self.output_shapes = [t.shape.as_list() for t in self.output_templates] + self.input_shape = self.input_shapes[0] + self.output_shape = self.output_shapes[0] + self.output_names = [t.name.split("/")[-1].split(":")[0] for t in self.output_templates] + + # List variables. + self.own_vars = OrderedDict((var.name[len(self.scope) + 1:].split(":")[0], var) for var in tf.global_variables(self.scope + "/")) + self.vars = OrderedDict(self.own_vars) + self.vars.update((comp.name + "/" + name, var) for comp in self.components.values() for name, var in comp.vars.items()) + self.trainables = OrderedDict((name, var) for name, var in self.vars.items() if var.trainable) + self.var_global_to_local = OrderedDict((var.name.split(":")[0], name) for name, var in self.vars.items()) + + def reset_own_vars(self) -> None: + """Re-initialize all variables of this network, excluding sub-networks.""" + tfutil.run([var.initializer for var in self.own_vars.values()]) + + def reset_vars(self) -> None: + """Re-initialize all variables of this network, including sub-networks.""" + tfutil.run([var.initializer for var in self.vars.values()]) + + def reset_trainables(self) -> None: + """Re-initialize all trainable variables of this network, including sub-networks.""" + tfutil.run([var.initializer for var in self.trainables.values()]) + + def get_output_for(self, *in_expr: TfExpression, return_as_list: bool = False, **dynamic_kwargs) -> Union[TfExpression, List[TfExpression]]: + """Construct TensorFlow expression(s) for the output(s) of this network, given the input expression(s).""" + assert len(in_expr) == self.num_inputs + assert not all(expr is None for expr in in_expr) + + # Finalize build func kwargs. + build_kwargs = dict(self.static_kwargs) + build_kwargs.update(dynamic_kwargs) + build_kwargs["is_template_graph"] = False + build_kwargs["components"] = self.components + + # Build TensorFlow graph to evaluate the network. + with tfutil.absolute_variable_scope(self.scope, reuse=True), tf.name_scope(self.name): + assert tf.get_variable_scope().name == self.scope + valid_inputs = [expr for expr in in_expr if expr is not None] + final_inputs = [] + for expr, name, shape in zip(in_expr, self.input_names, self.input_shapes): + if expr is not None: + expr = tf.identity(expr, name=name) + else: + expr = tf.zeros([tf.shape(valid_inputs[0])[0]] + shape[1:], name=name) + final_inputs.append(expr) + out_expr = self._build_func(*final_inputs, **build_kwargs) + + # Propagate input shapes back to the user-specified expressions. + for expr, final in zip(in_expr, final_inputs): + if isinstance(expr, tf.Tensor): + expr.set_shape(final.shape) + + # Express outputs in the desired format. + assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple) + if return_as_list: + out_expr = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr) + return out_expr + + def get_var_local_name(self, var_or_global_name: Union[TfExpression, str]) -> str: + """Get the local name of a given variable, without any surrounding name scopes.""" + assert tfutil.is_tf_expression(var_or_global_name) or isinstance(var_or_global_name, str) + global_name = var_or_global_name if isinstance(var_or_global_name, str) else var_or_global_name.name + return self.var_global_to_local[global_name] + + def find_var(self, var_or_local_name: Union[TfExpression, str]) -> TfExpression: + """Find variable by local or global name.""" + assert tfutil.is_tf_expression(var_or_local_name) or isinstance(var_or_local_name, str) + return self.vars[var_or_local_name] if isinstance(var_or_local_name, str) else var_or_local_name + + def get_var(self, var_or_local_name: Union[TfExpression, str]) -> np.ndarray: + """Get the value of a given variable as NumPy array. + Note: This method is very inefficient -- prefer to use tflib.run(list_of_vars) whenever possible.""" + return self.find_var(var_or_local_name).eval() + + def set_var(self, var_or_local_name: Union[TfExpression, str], new_value: Union[int, float, np.ndarray]) -> None: + """Set the value of a given variable based on the given NumPy array. + Note: This method is very inefficient -- prefer to use tflib.set_vars() whenever possible.""" + tfutil.set_vars({self.find_var(var_or_local_name): new_value}) + + def __getstate__(self) -> dict: + """Pickle export.""" + state = dict() + state["version"] = 4 + state["name"] = self.name + state["static_kwargs"] = dict(self.static_kwargs) + state["components"] = dict(self.components) + state["build_module_src"] = self._build_module_src + state["build_func_name"] = self._build_func_name + state["variables"] = list(zip(self.own_vars.keys(), tfutil.run(list(self.own_vars.values())))) + return state + + def __setstate__(self, state: dict) -> None: + """Pickle import.""" + # pylint: disable=attribute-defined-outside-init + tfutil.assert_tf_initialized() + self._init_fields() + + # Execute custom import handlers. + for handler in _import_handlers: + state = handler(state) + + # Set basic fields. + assert state["version"] in [2, 3, 4] + self.name = state["name"] + self.static_kwargs = util.EasyDict(state["static_kwargs"]) + self.components = util.EasyDict(state.get("components", {})) + self._build_module_src = state["build_module_src"] + self._build_func_name = state["build_func_name"] + + # Create temporary module from the imported source code. + module_name = "_tflib_network_import_" + uuid.uuid4().hex + module = types.ModuleType(module_name) + sys.modules[module_name] = module + _import_module_src[module] = self._build_module_src + exec(self._build_module_src, module.__dict__) # pylint: disable=exec-used + + # Locate network build function in the temporary module. + self._build_func = util.get_obj_from_module(module, self._build_func_name) + assert callable(self._build_func) + + # Init TensorFlow graph. + self._init_graph() + self.reset_own_vars() + tfutil.set_vars({self.find_var(name): value for name, value in state["variables"]}) + + def clone(self, name: str = None, **new_static_kwargs) -> "Network": + """Create a clone of this network with its own copy of the variables.""" + # pylint: disable=protected-access + net = object.__new__(Network) + net._init_fields() + net.name = name if name is not None else self.name + net.static_kwargs = util.EasyDict(self.static_kwargs) + net.static_kwargs.update(new_static_kwargs) + net._build_module_src = self._build_module_src + net._build_func_name = self._build_func_name + net._build_func = self._build_func + net._init_graph() + net.copy_vars_from(self) + return net + + def copy_own_vars_from(self, src_net: "Network") -> None: + """Copy the values of all variables from the given network, excluding sub-networks.""" + names = [name for name in self.own_vars.keys() if name in src_net.own_vars] + tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names})) + + def copy_vars_from(self, src_net: "Network") -> None: + """Copy the values of all variables from the given network, including sub-networks.""" + names = [name for name in self.vars.keys() if name in src_net.vars] + tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names})) + + def copy_trainables_from(self, src_net: "Network") -> None: + """Copy the values of all trainable variables from the given network, including sub-networks.""" + names = [name for name in self.trainables.keys() if name in src_net.trainables] + tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names})) + + def convert(self, new_func_name: str, new_name: str = None, **new_static_kwargs) -> "Network": + """Create new network with the given parameters, and copy all variables from this network.""" + if new_name is None: + new_name = self.name + static_kwargs = dict(self.static_kwargs) + static_kwargs.update(new_static_kwargs) + net = Network(name=new_name, func_name=new_func_name, **static_kwargs) + net.copy_vars_from(self) + return net + + def setup_as_moving_average_of(self, src_net: "Network", beta: TfExpressionEx = 0.99, beta_nontrainable: TfExpressionEx = 0.0) -> tf.Operation: + """Construct a TensorFlow op that updates the variables of this network + to be slightly closer to those of the given network.""" + with tfutil.absolute_name_scope(self.scope + "/_MovingAvg"): + ops = [] + for name, var in self.vars.items(): + if name in src_net.vars: + cur_beta = beta if name in self.trainables else beta_nontrainable + new_value = tfutil.lerp(src_net.vars[name], var, cur_beta) + ops.append(var.assign(new_value)) + return tf.group(*ops) + + def run(self, + *in_arrays: Tuple[Union[np.ndarray, None], ...], + input_transform: dict = None, + output_transform: dict = None, + return_as_list: bool = False, + print_progress: bool = False, + minibatch_size: int = None, + num_gpus: int = 1, + assume_frozen: bool = False, + **dynamic_kwargs) -> Union[np.ndarray, Tuple[np.ndarray, ...], List[np.ndarray]]: + """Run this network for the given NumPy array(s), and return the output(s) as NumPy array(s). + + Args: + input_transform: A dict specifying a custom transformation to be applied to the input tensor(s) before evaluating the network. + The dict must contain a 'func' field that points to a top-level function. The function is called with the input + TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs. + output_transform: A dict specifying a custom transformation to be applied to the output tensor(s) after evaluating the network. + The dict must contain a 'func' field that points to a top-level function. The function is called with the output + TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs. + return_as_list: True = return a list of NumPy arrays, False = return a single NumPy array, or a tuple if there are multiple outputs. + print_progress: Print progress to the console? Useful for very large input arrays. + minibatch_size: Maximum minibatch size to use, None = disable batching. + num_gpus: Number of GPUs to use. + assume_frozen: Improve multi-GPU performance by assuming that the trainable parameters will remain changed between calls. + dynamic_kwargs: Additional keyword arguments to be passed into the network build function. + """ + assert len(in_arrays) == self.num_inputs + assert not all(arr is None for arr in in_arrays) + assert input_transform is None or util.is_top_level_function(input_transform["func"]) + assert output_transform is None or util.is_top_level_function(output_transform["func"]) + output_transform, dynamic_kwargs = _handle_legacy_output_transforms(output_transform, dynamic_kwargs) + num_items = in_arrays[0].shape[0] + if minibatch_size is None: + minibatch_size = num_items + + # Construct unique hash key from all arguments that affect the TensorFlow graph. + key = dict(input_transform=input_transform, output_transform=output_transform, num_gpus=num_gpus, assume_frozen=assume_frozen, dynamic_kwargs=dynamic_kwargs) + def unwind_key(obj): + if isinstance(obj, dict): + return [(key, unwind_key(value)) for key, value in sorted(obj.items())] + if callable(obj): + return util.get_top_level_function_name(obj) + return obj + key = repr(unwind_key(key)) + + # Build graph. + if key not in self._run_cache: + with tfutil.absolute_name_scope(self.scope + "/_Run"), tf.control_dependencies(None): + with tf.device("/cpu:0"): + in_expr = [tf.placeholder(tf.float32, name=name) for name in self.input_names] + in_split = list(zip(*[tf.split(x, num_gpus) for x in in_expr])) + + out_split = [] + for gpu in range(num_gpus): + with tf.device("/gpu:%d" % gpu): + net_gpu = self.clone() if assume_frozen else self + in_gpu = in_split[gpu] + + if input_transform is not None: + in_kwargs = dict(input_transform) + in_gpu = in_kwargs.pop("func")(*in_gpu, **in_kwargs) + in_gpu = [in_gpu] if tfutil.is_tf_expression(in_gpu) else list(in_gpu) + + assert len(in_gpu) == self.num_inputs + out_gpu = net_gpu.get_output_for(*in_gpu, return_as_list=True, **dynamic_kwargs) + + if output_transform is not None: + out_kwargs = dict(output_transform) + out_gpu = out_kwargs.pop("func")(*out_gpu, **out_kwargs) + out_gpu = [out_gpu] if tfutil.is_tf_expression(out_gpu) else list(out_gpu) + + assert len(out_gpu) == self.num_outputs + out_split.append(out_gpu) + + with tf.device("/cpu:0"): + out_expr = [tf.concat(outputs, axis=0) for outputs in zip(*out_split)] + self._run_cache[key] = in_expr, out_expr + + # Run minibatches. + in_expr, out_expr = self._run_cache[key] + out_arrays = [np.empty([num_items] + expr.shape.as_list()[1:], expr.dtype.name) for expr in out_expr] + + for mb_begin in range(0, num_items, minibatch_size): + if print_progress: + print("\r%d / %d" % (mb_begin, num_items), end="") + + mb_end = min(mb_begin + minibatch_size, num_items) + mb_num = mb_end - mb_begin + mb_in = [src[mb_begin : mb_end] if src is not None else np.zeros([mb_num] + shape[1:]) for src, shape in zip(in_arrays, self.input_shapes)] + mb_out = tf.get_default_session().run(out_expr, dict(zip(in_expr, mb_in))) + + for dst, src in zip(out_arrays, mb_out): + dst[mb_begin: mb_end] = src + + # Done. + if print_progress: + print("\r%d / %d" % (num_items, num_items)) + + if not return_as_list: + out_arrays = out_arrays[0] if len(out_arrays) == 1 else tuple(out_arrays) + return out_arrays + + def list_ops(self) -> List[TfExpression]: + include_prefix = self.scope + "/" + exclude_prefix = include_prefix + "_" + ops = tf.get_default_graph().get_operations() + ops = [op for op in ops if op.name.startswith(include_prefix)] + ops = [op for op in ops if not op.name.startswith(exclude_prefix)] + return ops + + def list_layers(self) -> List[Tuple[str, TfExpression, List[TfExpression]]]: + """Returns a list of (layer_name, output_expr, trainable_vars) tuples corresponding to + individual layers of the network. Mainly intended to be used for reporting.""" + layers = [] + + def recurse(scope, parent_ops, parent_vars, level): + # Ignore specific patterns. + if any(p in scope for p in ["/Shape", "/strided_slice", "/Cast", "/concat", "/Assign"]): + return + + # Filter ops and vars by scope. + global_prefix = scope + "/" + local_prefix = global_prefix[len(self.scope) + 1:] + cur_ops = [op for op in parent_ops if op.name.startswith(global_prefix) or op.name == global_prefix[:-1]] + cur_vars = [(name, var) for name, var in parent_vars if name.startswith(local_prefix) or name == local_prefix[:-1]] + if not cur_ops and not cur_vars: + return + + # Filter out all ops related to variables. + for var in [op for op in cur_ops if op.type.startswith("Variable")]: + var_prefix = var.name + "/" + cur_ops = [op for op in cur_ops if not op.name.startswith(var_prefix)] + + # Scope does not contain ops as immediate children => recurse deeper. + contains_direct_ops = any("/" not in op.name[len(global_prefix):] and op.type not in ["Identity", "Cast", "Transpose"] for op in cur_ops) + if (level == 0 or not contains_direct_ops) and (len(cur_ops) + len(cur_vars)) > 1: + visited = set() + for rel_name in [op.name[len(global_prefix):] for op in cur_ops] + [name[len(local_prefix):] for name, _var in cur_vars]: + token = rel_name.split("/")[0] + if token not in visited: + recurse(global_prefix + token, cur_ops, cur_vars, level + 1) + visited.add(token) + return + + # Report layer. + layer_name = scope[len(self.scope) + 1:] + layer_output = cur_ops[-1].outputs[0] if cur_ops else cur_vars[-1][1] + layer_trainables = [var for _name, var in cur_vars if var.trainable] + layers.append((layer_name, layer_output, layer_trainables)) + + recurse(self.scope, self.list_ops(), list(self.vars.items()), 0) + return layers + + def print_layers(self, title: str = None, hide_layers_with_no_params: bool = False) -> None: + """Print a summary table of the network structure.""" + rows = [[title if title is not None else self.name, "Params", "OutputShape", "WeightShape"]] + rows += [["---"] * 4] + total_params = 0 + + for layer_name, layer_output, layer_trainables in self.list_layers(): + num_params = sum(int(np.prod(var.shape.as_list())) for var in layer_trainables) + weights = [var for var in layer_trainables if var.name.endswith("/weight:0")] + weights.sort(key=lambda x: len(x.name)) + if len(weights) == 0 and len(layer_trainables) == 1: + weights = layer_trainables + total_params += num_params + + if not hide_layers_with_no_params or num_params != 0: + num_params_str = str(num_params) if num_params > 0 else "-" + output_shape_str = str(layer_output.shape) + weight_shape_str = str(weights[0].shape) if len(weights) >= 1 else "-" + rows += [[layer_name, num_params_str, output_shape_str, weight_shape_str]] + + rows += [["---"] * 4] + rows += [["Total", str(total_params), "", ""]] + + widths = [max(len(cell) for cell in column) for column in zip(*rows)] + print() + for row in rows: + print(" ".join(cell + " " * (width - len(cell)) for cell, width in zip(row, widths))) + print() + + def setup_weight_histograms(self, title: str = None) -> None: + """Construct summary ops to include histograms of all trainable parameters in TensorBoard.""" + if title is None: + title = self.name + + with tf.name_scope(None), tf.device(None), tf.control_dependencies(None): + for local_name, var in self.trainables.items(): + if "/" in local_name: + p = local_name.split("/") + name = title + "_" + p[-1] + "/" + "_".join(p[:-1]) + else: + name = title + "_toplevel/" + local_name + + tf.summary.histogram(name, var) + +#---------------------------------------------------------------------------- +# Backwards-compatible emulation of legacy output transformation in Network.run(). + +_print_legacy_warning = True + +def _handle_legacy_output_transforms(output_transform, dynamic_kwargs): + global _print_legacy_warning + legacy_kwargs = ["out_mul", "out_add", "out_shrink", "out_dtype"] + if not any(kwarg in dynamic_kwargs for kwarg in legacy_kwargs): + return output_transform, dynamic_kwargs + + if _print_legacy_warning: + _print_legacy_warning = False + print() + print("WARNING: Old-style output transformations in Network.run() are deprecated.") + print("Consider using 'output_transform=dict(func=tflib.convert_images_to_uint8)'") + print("instead of 'out_mul=127.5, out_add=127.5, out_dtype=np.uint8'.") + print() + assert output_transform is None + + new_kwargs = dict(dynamic_kwargs) + new_transform = {kwarg: new_kwargs.pop(kwarg) for kwarg in legacy_kwargs if kwarg in dynamic_kwargs} + new_transform["func"] = _legacy_output_transform_func + return new_transform, new_kwargs + +def _legacy_output_transform_func(*expr, out_mul=1.0, out_add=0.0, out_shrink=1, out_dtype=None): + if out_mul != 1.0: + expr = [x * out_mul for x in expr] + + if out_add != 0.0: + expr = [x + out_add for x in expr] + + if out_shrink > 1: + ksize = [1, 1, out_shrink, out_shrink] + expr = [tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW") for x in expr] + + if out_dtype is not None: + if tf.as_dtype(out_dtype).is_integer: + expr = [tf.round(x) for x in expr] + expr = [tf.saturate_cast(x, out_dtype) for x in expr] + return expr diff --git a/tflib/ops/__init__.py b/tflib/ops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9ab9908efa3cb38af52e8d5bcaa8acffde5a8875 --- /dev/null +++ b/tflib/ops/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +# empty diff --git a/tflib/ops/__pycache__/__init__.cpython-36.pyc b/tflib/ops/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d6d9fe932896826b80acf49e15fff6079f2f3c49 Binary files /dev/null and b/tflib/ops/__pycache__/__init__.cpython-36.pyc differ diff --git a/tflib/ops/__pycache__/fused_bias_act.cpython-36.pyc b/tflib/ops/__pycache__/fused_bias_act.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aaf22aefb1529d3d0cd19bbd0ba49816d532ac6e Binary files /dev/null and b/tflib/ops/__pycache__/fused_bias_act.cpython-36.pyc differ diff --git a/tflib/ops/__pycache__/upfirdn_2d.cpython-36.pyc b/tflib/ops/__pycache__/upfirdn_2d.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..740a77b3c7d29cb546482a3352e74b74f3b27c2b Binary files /dev/null and b/tflib/ops/__pycache__/upfirdn_2d.cpython-36.pyc differ diff --git a/tflib/ops/fused_bias_act.cu b/tflib/ops/fused_bias_act.cu new file mode 100644 index 0000000000000000000000000000000000000000..1102f624fadd0b803bdfb99fecfe145d7ec8abc4 --- /dev/null +++ b/tflib/ops/fused_bias_act.cu @@ -0,0 +1,188 @@ +// Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +// +// This work is made available under the Nvidia Source Code License-NC. +// To view a copy of this license, visit +// https://nvlabs.github.io/stylegan2/license.html + +#define EIGEN_USE_GPU +#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include + +using namespace tensorflow; +using namespace tensorflow::shape_inference; + +#define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal(cudaGetErrorName(err))); } while (false) + +//------------------------------------------------------------------------ +// CUDA kernel. + +template +struct FusedBiasActKernelParams +{ + const T* x; // [sizeX] + const T* b; // [sizeB] or NULL + const T* ref; // [sizeX] or NULL + T* y; // [sizeX] + + int grad; + int axis; + int act; + float alpha; + float gain; + + int sizeX; + int sizeB; + int stepB; + int loopX; +}; + +template +static __global__ void FusedBiasActKernel(const FusedBiasActKernelParams p) +{ + const float expRange = 80.0f; + const float halfExpRange = 40.0f; + const float seluScale = 1.0507009873554804934193349852946f; + const float seluAlpha = 1.6732632423543772848170429916717f; + + // Loop over elements. + int xi = blockIdx.x * p.loopX * blockDim.x + threadIdx.x; + for (int loopIdx = 0; loopIdx < p.loopX && xi < p.sizeX; loopIdx++, xi += blockDim.x) + { + // Load and apply bias. + float x = (float)p.x[xi]; + if (p.b) + x += (float)p.b[(xi / p.stepB) % p.sizeB]; + float ref = (p.ref) ? (float)p.ref[xi] : 0.0f; + if (p.gain != 0.0f & p.act != 9) + ref /= p.gain; + + // Evaluate activation func. + float y; + switch (p.act * 10 + p.grad) + { + // linear + default: + case 10: y = x; break; + case 11: y = x; break; + case 12: y = 0.0f; break; + + // relu + case 20: y = (x > 0.0f) ? x : 0.0f; break; + case 21: y = (ref > 0.0f) ? x : 0.0f; break; + case 22: y = 0.0f; break; + + // lrelu + case 30: y = (x > 0.0f) ? x : x * p.alpha; break; + case 31: y = (ref > 0.0f) ? x : x * p.alpha; break; + case 32: y = 0.0f; break; + + // tanh + case 40: { float c = expf(x); float d = 1.0f / c; y = (x < -expRange) ? -1.0f : (x > expRange) ? 1.0f : (c - d) / (c + d); } break; + case 41: y = x * (1.0f - ref * ref); break; + case 42: y = x * (1.0f - ref * ref) * (-2.0f * ref); break; + + // sigmoid + case 50: y = (x < -expRange) ? 0.0f : 1.0f / (expf(-x) + 1.0f); break; + case 51: y = x * ref * (1.0f - ref); break; + case 52: y = x * ref * (1.0f - ref) * (1.0f - 2.0f * ref); break; + + // elu + case 60: y = (x >= 0.0f) ? x : expf(x) - 1.0f; break; + case 61: y = (ref >= 0.0f) ? x : x * (ref + 1.0f); break; + case 62: y = (ref >= 0.0f) ? 0.0f : x * (ref + 1.0f); break; + + // selu + case 70: y = (x >= 0.0f) ? seluScale * x : (seluScale * seluAlpha) * (expf(x) - 1.0f); break; + case 71: y = (ref >= 0.0f) ? x * seluScale : x * (ref + seluScale * seluAlpha); break; + case 72: y = (ref >= 0.0f) ? 0.0f : x * (ref + seluScale * seluAlpha); break; + + // softplus + case 80: y = (x > expRange) ? x : logf(expf(x) + 1.0f); break; + case 81: y = x * (1.0f - expf(-ref)); break; + case 82: { float c = expf(-ref); y = x * c * (1.0f - c); } break; + + // swish + case 90: y = (x < -expRange) ? 0.0f : x / (expf(-x) + 1.0f); break; + case 91: { float c = expf(ref); float d = c + 1.0f; y = (ref > halfExpRange) ? x : x * c * (ref + d) / (d * d); } break; + case 92: { float c = expf(ref); float d = c + 1.0f; y = (ref > halfExpRange) ? 0.0f : x * c * (ref * (2.0f - d) + 2.0f * d) / (d * d * d); } break; + } + + // Apply gain and store. + p.y[xi] = (T)(y * p.gain); + } +} + +//------------------------------------------------------------------------ +// TensorFlow op. + +template +struct FusedBiasActOp : public OpKernel +{ + FusedBiasActKernelParams m_attribs; + + FusedBiasActOp(OpKernelConstruction* ctx) : OpKernel(ctx) + { + memset(&m_attribs, 0, sizeof(m_attribs)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("grad", &m_attribs.grad)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("axis", &m_attribs.axis)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("act", &m_attribs.act)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &m_attribs.alpha)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("gain", &m_attribs.gain)); + OP_REQUIRES(ctx, m_attribs.grad >= 0, errors::InvalidArgument("grad must be non-negative")); + OP_REQUIRES(ctx, m_attribs.axis >= 0, errors::InvalidArgument("axis must be non-negative")); + OP_REQUIRES(ctx, m_attribs.act >= 0, errors::InvalidArgument("act must be non-negative")); + } + + void Compute(OpKernelContext* ctx) + { + FusedBiasActKernelParams p = m_attribs; + cudaStream_t stream = ctx->eigen_device().stream(); + + const Tensor& x = ctx->input(0); // [...] + const Tensor& b = ctx->input(1); // [sizeB] or [0] + const Tensor& ref = ctx->input(2); // x.shape or [0] + p.x = x.flat().data(); + p.b = (b.NumElements()) ? b.flat().data() : NULL; + p.ref = (ref.NumElements()) ? ref.flat().data() : NULL; + OP_REQUIRES(ctx, b.NumElements() == 0 || m_attribs.axis < x.dims(), errors::InvalidArgument("axis out of bounds")); + OP_REQUIRES(ctx, b.dims() == 1, errors::InvalidArgument("b must have rank 1")); + OP_REQUIRES(ctx, b.NumElements() == 0 || b.NumElements() == x.dim_size(m_attribs.axis), errors::InvalidArgument("b has wrong number of elements")); + OP_REQUIRES(ctx, ref.NumElements() == ((p.grad == 0) ? 0 : x.NumElements()), errors::InvalidArgument("ref has wrong number of elements")); + OP_REQUIRES(ctx, x.NumElements() <= kint32max, errors::InvalidArgument("x is too large")); + + p.sizeX = (int)x.NumElements(); + p.sizeB = (int)b.NumElements(); + p.stepB = 1; + for (int i = m_attribs.axis + 1; i < x.dims(); i++) + p.stepB *= (int)x.dim_size(i); + + Tensor* y = NULL; // x.shape + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, x.shape(), &y)); + p.y = y->flat().data(); + + p.loopX = 4; + int blockSize = 4 * 32; + int gridSize = (p.sizeX - 1) / (p.loopX * blockSize) + 1; + void* args[] = {&p}; + OP_CHECK_CUDA_ERROR(ctx, cudaLaunchKernel((void*)FusedBiasActKernel, gridSize, blockSize, args, 0, stream)); + } +}; + +REGISTER_OP("FusedBiasAct") + .Input ("x: T") + .Input ("b: T") + .Input ("ref: T") + .Output ("y: T") + .Attr ("T: {float, half}") + .Attr ("grad: int = 0") + .Attr ("axis: int = 1") + .Attr ("act: int = 0") + .Attr ("alpha: float = 0.0") + .Attr ("gain: float = 1.0"); +REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint("T"), FusedBiasActOp); +REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint("T"), FusedBiasActOp); + +//------------------------------------------------------------------------ diff --git a/tflib/ops/fused_bias_act.py b/tflib/ops/fused_bias_act.py new file mode 100644 index 0000000000000000000000000000000000000000..52f6bfd77a4b0151103c1a76fa877e084831f7c4 --- /dev/null +++ b/tflib/ops/fused_bias_act.py @@ -0,0 +1,196 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Custom TensorFlow ops for efficient bias and activation.""" + +import os +import numpy as np +import tensorflow as tf +from .. import custom_ops +from ...util import EasyDict + +def _get_plugin(): + return custom_ops.get_plugin(os.path.splitext(__file__)[0] + '.cu') + +#---------------------------------------------------------------------------- + +activation_funcs = { + 'linear': EasyDict(func=lambda x, **_: x, def_alpha=None, def_gain=1.0, cuda_idx=1, ref='y', zero_2nd_grad=True), + 'relu': EasyDict(func=lambda x, **_: tf.nn.relu(x), def_alpha=None, def_gain=np.sqrt(2), cuda_idx=2, ref='y', zero_2nd_grad=True), + 'lrelu': EasyDict(func=lambda x, alpha, **_: tf.nn.leaky_relu(x, alpha), def_alpha=0.2, def_gain=np.sqrt(2), cuda_idx=3, ref='y', zero_2nd_grad=True), + 'tanh': EasyDict(func=lambda x, **_: tf.nn.tanh(x), def_alpha=None, def_gain=1.0, cuda_idx=4, ref='y', zero_2nd_grad=False), + 'sigmoid': EasyDict(func=lambda x, **_: tf.nn.sigmoid(x), def_alpha=None, def_gain=1.0, cuda_idx=5, ref='y', zero_2nd_grad=False), + 'elu': EasyDict(func=lambda x, **_: tf.nn.elu(x), def_alpha=None, def_gain=1.0, cuda_idx=6, ref='y', zero_2nd_grad=False), + 'selu': EasyDict(func=lambda x, **_: tf.nn.selu(x), def_alpha=None, def_gain=1.0, cuda_idx=7, ref='y', zero_2nd_grad=False), + 'softplus': EasyDict(func=lambda x, **_: tf.nn.softplus(x), def_alpha=None, def_gain=1.0, cuda_idx=8, ref='y', zero_2nd_grad=False), + 'swish': EasyDict(func=lambda x, **_: tf.nn.sigmoid(x) * x, def_alpha=None, def_gain=np.sqrt(2), cuda_idx=9, ref='x', zero_2nd_grad=False), +} + +#---------------------------------------------------------------------------- + +def fused_bias_act(x, b=None, axis=1, act='linear', alpha=None, gain=None, impl='cuda'): + r"""Fused bias and activation function. + + Adds bias `b` to activation tensor `x`, evaluates activation function `act`, + and scales the result by `gain`. Each of the steps is optional. In most cases, + the fused op is considerably more efficient than performing the same calculation + using standard TensorFlow ops. It supports first and second order gradients, + but not third order gradients. + + Args: + x: Input activation tensor. Can have any shape, but if `b` is defined, the + dimension corresponding to `axis`, as well as the rank, must be known. + b: Bias vector, or `None` to disable. Must be a 1D tensor of the same type + as `x`. The shape must be known, and it must match the dimension of `x` + corresponding to `axis`. + axis: The dimension in `x` corresponding to the elements of `b`. + The value of `axis` is ignored if `b` is not specified. + act: Name of the activation function to evaluate, or `"linear"` to disable. + Can be e.g. `"relu"`, `"lrelu"`, `"tanh"`, `"sigmoid"`, `"swish"`, etc. + See `activation_funcs` for a full list. `None` is not allowed. + alpha: Shape parameter for the activation function, or `None` to use the default. + gain: Scaling factor for the output tensor, or `None` to use default. + See `activation_funcs` for the default scaling of each activation function. + If unsure, consider specifying `1.0`. + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the same shape and datatype as `x`. + """ + + impl_dict = { + 'ref': _fused_bias_act_ref, + 'cuda': _fused_bias_act_cuda, + } + return impl_dict[impl](x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain) + +#---------------------------------------------------------------------------- + +def _fused_bias_act_ref(x, b, axis, act, alpha, gain): + """Slow reference implementation of `fused_bias_act()` using standard TensorFlow ops.""" + + # Validate arguments. + x = tf.convert_to_tensor(x) + b = tf.convert_to_tensor(b) if b is not None else tf.constant([], dtype=x.dtype) + act_spec = activation_funcs[act] + assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis]) + assert b.shape[0] == 0 or 0 <= axis < x.shape.rank + if alpha is None: + alpha = act_spec.def_alpha + if gain is None: + gain = act_spec.def_gain + + # Add bias. + if b.shape[0] != 0: + x += tf.reshape(b, [-1 if i == axis else 1 for i in range(x.shape.rank)]) + + # Evaluate activation function. + x = act_spec.func(x, alpha=alpha) + + # Scale by gain. + if gain != 1: + x *= gain + return x + +#---------------------------------------------------------------------------- + +def _fused_bias_act_cuda(x, b, axis, act, alpha, gain): + """Fast CUDA implementation of `fused_bias_act()` using custom ops.""" + + # Validate arguments. + x = tf.convert_to_tensor(x) + empty_tensor = tf.constant([], dtype=x.dtype) + b = tf.convert_to_tensor(b) if b is not None else empty_tensor + act_spec = activation_funcs[act] + assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis]) + assert b.shape[0] == 0 or 0 <= axis < x.shape.rank + if alpha is None: + alpha = act_spec.def_alpha + if gain is None: + gain = act_spec.def_gain + + # Special cases. + if act == 'linear' and b is None and gain == 1.0: + return x + if act_spec.cuda_idx is None: + return _fused_bias_act_ref(x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain) + + # CUDA kernel. + cuda_kernel = _get_plugin().fused_bias_act + cuda_kwargs = dict(axis=axis, act=act_spec.cuda_idx, alpha=alpha, gain=gain) + + # Forward pass: y = func(x, b). + def func_y(x, b): + y = cuda_kernel(x=x, b=b, ref=empty_tensor, grad=0, **cuda_kwargs) + y.set_shape(x.shape) + return y + + # Backward pass: dx, db = grad(dy, x, y) + def grad_dx(dy, x, y): + ref = {'x': x, 'y': y}[act_spec.ref] + dx = cuda_kernel(x=dy, b=empty_tensor, ref=ref, grad=1, **cuda_kwargs) + dx.set_shape(x.shape) + return dx + def grad_db(dx): + if b.shape[0] == 0: + return empty_tensor + db = dx + if axis < x.shape.rank - 1: + db = tf.reduce_sum(db, list(range(axis + 1, x.shape.rank))) + if axis > 0: + db = tf.reduce_sum(db, list(range(axis))) + db.set_shape(b.shape) + return db + + # Second order gradients: d_dy, d_x = grad2(d_dx, d_db, x, y) + def grad2_d_dy(d_dx, d_db, x, y): + ref = {'x': x, 'y': y}[act_spec.ref] + d_dy = cuda_kernel(x=d_dx, b=d_db, ref=ref, grad=1, **cuda_kwargs) + d_dy.set_shape(x.shape) + return d_dy + def grad2_d_x(d_dx, d_db, x, y): + ref = {'x': x, 'y': y}[act_spec.ref] + d_x = cuda_kernel(x=d_dx, b=d_db, ref=ref, grad=2, **cuda_kwargs) + d_x.set_shape(x.shape) + return d_x + + # Fast version for piecewise-linear activation funcs. + @tf.custom_gradient + def func_zero_2nd_grad(x, b): + y = func_y(x, b) + @tf.custom_gradient + def grad(dy): + dx = grad_dx(dy, x, y) + db = grad_db(dx) + def grad2(d_dx, d_db): + d_dy = grad2_d_dy(d_dx, d_db, x, y) + return d_dy + return (dx, db), grad2 + return y, grad + + # Slow version for general activation funcs. + @tf.custom_gradient + def func_nonzero_2nd_grad(x, b): + y = func_y(x, b) + def grad_wrap(dy): + @tf.custom_gradient + def grad_impl(dy, x): + dx = grad_dx(dy, x, y) + db = grad_db(dx) + def grad2(d_dx, d_db): + d_dy = grad2_d_dy(d_dx, d_db, x, y) + d_x = grad2_d_x(d_dx, d_db, x, y) + return d_dy, d_x + return (dx, db), grad2 + return grad_impl(dy, x) + return y, grad_wrap + + # Which version to use? + if act_spec.zero_2nd_grad: + return func_zero_2nd_grad(x, b) + return func_nonzero_2nd_grad(x, b) + +#---------------------------------------------------------------------------- diff --git a/tflib/ops/upfirdn_2d.cu b/tflib/ops/upfirdn_2d.cu new file mode 100644 index 0000000000000000000000000000000000000000..b97ef36c9e5ba46a92a380dbc687e275235a1ccf --- /dev/null +++ b/tflib/ops/upfirdn_2d.cu @@ -0,0 +1,326 @@ +// Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +// +// This work is made available under the Nvidia Source Code License-NC. +// To view a copy of this license, visit +// https://nvlabs.github.io/stylegan2/license.html + +#define EIGEN_USE_GPU +#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include + +using namespace tensorflow; +using namespace tensorflow::shape_inference; + +//------------------------------------------------------------------------ +// Helpers. + +#define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal(cudaGetErrorName(err))); } while (false) + +static __host__ __device__ __forceinline__ int floorDiv(int a, int b) +{ + int c = a / b; + if (c * b > a) + c--; + return c; +} + +//------------------------------------------------------------------------ +// CUDA kernel params. + +template +struct UpFirDn2DKernelParams +{ + const T* x; // [majorDim, inH, inW, minorDim] + const T* k; // [kernelH, kernelW] + T* y; // [majorDim, outH, outW, minorDim] + + int upx; + int upy; + int downx; + int downy; + int padx0; + int padx1; + int pady0; + int pady1; + + int majorDim; + int inH; + int inW; + int minorDim; + int kernelH; + int kernelW; + int outH; + int outW; + int loopMajor; + int loopX; +}; + +//------------------------------------------------------------------------ +// General CUDA implementation for large filter kernels. + +template +static __global__ void UpFirDn2DKernel_large(const UpFirDn2DKernelParams p) +{ + // Calculate thread index. + int minorIdx = blockIdx.x * blockDim.x + threadIdx.x; + int outY = minorIdx / p.minorDim; + minorIdx -= outY * p.minorDim; + int outXBase = blockIdx.y * p.loopX * blockDim.y + threadIdx.y; + int majorIdxBase = blockIdx.z * p.loopMajor; + if (outXBase >= p.outW || outY >= p.outH || majorIdxBase >= p.majorDim) + return; + + // Setup Y receptive field. + int midY = outY * p.downy + p.upy - 1 - p.pady0; + int inY = min(max(floorDiv(midY, p.upy), 0), p.inH); + int h = min(max(floorDiv(midY + p.kernelH, p.upy), 0), p.inH) - inY; + int kernelY = midY + p.kernelH - (inY + 1) * p.upy; + + // Loop over majorDim and outX. + for (int loopMajor = 0, majorIdx = majorIdxBase; loopMajor < p.loopMajor && majorIdx < p.majorDim; loopMajor++, majorIdx++) + for (int loopX = 0, outX = outXBase; loopX < p.loopX && outX < p.outW; loopX++, outX += blockDim.y) + { + // Setup X receptive field. + int midX = outX * p.downx + p.upx - 1 - p.padx0; + int inX = min(max(floorDiv(midX, p.upx), 0), p.inW); + int w = min(max(floorDiv(midX + p.kernelW, p.upx), 0), p.inW) - inX; + int kernelX = midX + p.kernelW - (inX + 1) * p.upx; + + // Initialize pointers. + const T* xp = &p.x[((majorIdx * p.inH + inY) * p.inW + inX) * p.minorDim + minorIdx]; + const T* kp = &p.k[kernelY * p.kernelW + kernelX]; + int xpx = p.minorDim; + int kpx = -p.upx; + int xpy = p.inW * p.minorDim; + int kpy = -p.upy * p.kernelW; + + // Inner loop. + float v = 0.0f; + for (int y = 0; y < h; y++) + { + for (int x = 0; x < w; x++) + { + v += (float)(*xp) * (float)(*kp); + xp += xpx; + kp += kpx; + } + xp += xpy - w * xpx; + kp += kpy - w * kpx; + } + + // Store result. + p.y[((majorIdx * p.outH + outY) * p.outW + outX) * p.minorDim + minorIdx] = (T)v; + } +} + +//------------------------------------------------------------------------ +// Specialized CUDA implementation for small filter kernels. + +template +static __global__ void UpFirDn2DKernel_small(const UpFirDn2DKernelParams p) +{ + //assert(kernelW % upx == 0); + //assert(kernelH % upy == 0); + const int tileInW = ((tileOutW - 1) * downx + kernelW - 1) / upx + 1; + const int tileInH = ((tileOutH - 1) * downy + kernelH - 1) / upy + 1; + __shared__ volatile float sk[kernelH][kernelW]; + __shared__ volatile float sx[tileInH][tileInW]; + + // Calculate tile index. + int minorIdx = blockIdx.x; + int tileOutY = minorIdx / p.minorDim; + minorIdx -= tileOutY * p.minorDim; + tileOutY *= tileOutH; + int tileOutXBase = blockIdx.y * p.loopX * tileOutW; + int majorIdxBase = blockIdx.z * p.loopMajor; + if (tileOutXBase >= p.outW | tileOutY >= p.outH | majorIdxBase >= p.majorDim) + return; + + // Load filter kernel (flipped). + for (int tapIdx = threadIdx.x; tapIdx < kernelH * kernelW; tapIdx += blockDim.x) + { + int ky = tapIdx / kernelW; + int kx = tapIdx - ky * kernelW; + float v = 0.0f; + if (kx < p.kernelW & ky < p.kernelH) + v = (float)p.k[(p.kernelH - 1 - ky) * p.kernelW + (p.kernelW - 1 - kx)]; + sk[ky][kx] = v; + } + + // Loop over majorDim and outX. + for (int loopMajor = 0, majorIdx = majorIdxBase; loopMajor < p.loopMajor & majorIdx < p.majorDim; loopMajor++, majorIdx++) + for (int loopX = 0, tileOutX = tileOutXBase; loopX < p.loopX & tileOutX < p.outW; loopX++, tileOutX += tileOutW) + { + // Load input pixels. + int tileMidX = tileOutX * downx + upx - 1 - p.padx0; + int tileMidY = tileOutY * downy + upy - 1 - p.pady0; + int tileInX = floorDiv(tileMidX, upx); + int tileInY = floorDiv(tileMidY, upy); + __syncthreads(); + for (int inIdx = threadIdx.x; inIdx < tileInH * tileInW; inIdx += blockDim.x) + { + int relInY = inIdx / tileInW; + int relInX = inIdx - relInY * tileInW; + int inX = relInX + tileInX; + int inY = relInY + tileInY; + float v = 0.0f; + if (inX >= 0 & inY >= 0 & inX < p.inW & inY < p.inH) + v = (float)p.x[((majorIdx * p.inH + inY) * p.inW + inX) * p.minorDim + minorIdx]; + sx[relInY][relInX] = v; + } + + // Loop over output pixels. + __syncthreads(); + for (int outIdx = threadIdx.x; outIdx < tileOutH * tileOutW; outIdx += blockDim.x) + { + int relOutY = outIdx / tileOutW; + int relOutX = outIdx - relOutY * tileOutW; + int outX = relOutX + tileOutX; + int outY = relOutY + tileOutY; + + // Setup receptive field. + int midX = tileMidX + relOutX * downx; + int midY = tileMidY + relOutY * downy; + int inX = floorDiv(midX, upx); + int inY = floorDiv(midY, upy); + int relInX = inX - tileInX; + int relInY = inY - tileInY; + int kernelX = (inX + 1) * upx - midX - 1; // flipped + int kernelY = (inY + 1) * upy - midY - 1; // flipped + + // Inner loop. + float v = 0.0f; + #pragma unroll + for (int y = 0; y < kernelH / upy; y++) + #pragma unroll + for (int x = 0; x < kernelW / upx; x++) + v += sx[relInY + y][relInX + x] * sk[kernelY + y * upy][kernelX + x * upx]; + + // Store result. + if (outX < p.outW & outY < p.outH) + p.y[((majorIdx * p.outH + outY) * p.outW + outX) * p.minorDim + minorIdx] = (T)v; + } + } +} + +//------------------------------------------------------------------------ +// TensorFlow op. + +template +struct UpFirDn2DOp : public OpKernel +{ + UpFirDn2DKernelParams m_attribs; + + UpFirDn2DOp(OpKernelConstruction* ctx) : OpKernel(ctx) + { + memset(&m_attribs, 0, sizeof(m_attribs)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("upx", &m_attribs.upx)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("upy", &m_attribs.upy)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("downx", &m_attribs.downx)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("downy", &m_attribs.downy)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("padx0", &m_attribs.padx0)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("padx1", &m_attribs.padx1)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("pady0", &m_attribs.pady0)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("pady1", &m_attribs.pady1)); + OP_REQUIRES(ctx, m_attribs.upx >= 1 && m_attribs.upy >= 1, errors::InvalidArgument("upx and upy must be at least 1x1")); + OP_REQUIRES(ctx, m_attribs.downx >= 1 && m_attribs.downy >= 1, errors::InvalidArgument("downx and downy must be at least 1x1")); + } + + void Compute(OpKernelContext* ctx) + { + UpFirDn2DKernelParams p = m_attribs; + cudaStream_t stream = ctx->eigen_device().stream(); + + const Tensor& x = ctx->input(0); // [majorDim, inH, inW, minorDim] + const Tensor& k = ctx->input(1); // [kernelH, kernelW] + p.x = x.flat().data(); + p.k = k.flat().data(); + OP_REQUIRES(ctx, x.dims() == 4, errors::InvalidArgument("input must have rank 4")); + OP_REQUIRES(ctx, k.dims() == 2, errors::InvalidArgument("kernel must have rank 2")); + OP_REQUIRES(ctx, x.NumElements() <= kint32max, errors::InvalidArgument("input too large")); + OP_REQUIRES(ctx, k.NumElements() <= kint32max, errors::InvalidArgument("kernel too large")); + + p.majorDim = (int)x.dim_size(0); + p.inH = (int)x.dim_size(1); + p.inW = (int)x.dim_size(2); + p.minorDim = (int)x.dim_size(3); + p.kernelH = (int)k.dim_size(0); + p.kernelW = (int)k.dim_size(1); + OP_REQUIRES(ctx, p.kernelW >= 1 && p.kernelH >= 1, errors::InvalidArgument("kernel must be at least 1x1")); + + p.outW = (p.inW * p.upx + p.padx0 + p.padx1 - p.kernelW + p.downx) / p.downx; + p.outH = (p.inH * p.upy + p.pady0 + p.pady1 - p.kernelH + p.downy) / p.downy; + OP_REQUIRES(ctx, p.outW >= 1 && p.outH >= 1, errors::InvalidArgument("output must be at least 1x1")); + + Tensor* y = NULL; // [majorDim, outH, outW, minorDim] + TensorShape ys; + ys.AddDim(p.majorDim); + ys.AddDim(p.outH); + ys.AddDim(p.outW); + ys.AddDim(p.minorDim); + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, ys, &y)); + p.y = y->flat().data(); + OP_REQUIRES(ctx, y->NumElements() <= kint32max, errors::InvalidArgument("output too large")); + + // Choose CUDA kernel to use. + void* cudaKernel = (void*)UpFirDn2DKernel_large; + int tileOutW = -1; + int tileOutH = -1; + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 7 && p.kernelH <= 7) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 5 && p.kernelH <= 5) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 3 && p.kernelH <= 3) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 8 && p.kernelH <= 8) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 2 && p.kernelH <= 2) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 8 && p.kernelH <= 8) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 2 && p.kernelH <= 2) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; } + + // Choose launch params. + dim3 blockSize; + dim3 gridSize; + if (tileOutW > 0 && tileOutH > 0) // small + { + p.loopMajor = (p.majorDim - 1) / 16384 + 1; + p.loopX = 1; + blockSize = dim3(32 * 8, 1, 1); + gridSize = dim3(((p.outH - 1) / tileOutH + 1) * p.minorDim, (p.outW - 1) / (p.loopX * tileOutW) + 1, (p.majorDim - 1) / p.loopMajor + 1); + } + else // large + { + p.loopMajor = (p.majorDim - 1) / 16384 + 1; + p.loopX = 4; + blockSize = dim3(4, 32, 1); + gridSize = dim3((p.outH * p.minorDim - 1) / blockSize.x + 1, (p.outW - 1) / (p.loopX * blockSize.y) + 1, (p.majorDim - 1) / p.loopMajor + 1); + } + + // Launch CUDA kernel. + void* args[] = {&p}; + OP_CHECK_CUDA_ERROR(ctx, cudaLaunchKernel(cudaKernel, gridSize, blockSize, args, 0, stream)); + } +}; + +REGISTER_OP("UpFirDn2D") + .Input ("x: T") + .Input ("k: T") + .Output ("y: T") + .Attr ("T: {float, half}") + .Attr ("upx: int = 1") + .Attr ("upy: int = 1") + .Attr ("downx: int = 1") + .Attr ("downy: int = 1") + .Attr ("padx0: int = 0") + .Attr ("padx1: int = 0") + .Attr ("pady0: int = 0") + .Attr ("pady1: int = 0"); +REGISTER_KERNEL_BUILDER(Name("UpFirDn2D").Device(DEVICE_GPU).TypeConstraint("T"), UpFirDn2DOp); +REGISTER_KERNEL_BUILDER(Name("UpFirDn2D").Device(DEVICE_GPU).TypeConstraint("T"), UpFirDn2DOp); + +//------------------------------------------------------------------------ diff --git a/tflib/ops/upfirdn_2d.py b/tflib/ops/upfirdn_2d.py new file mode 100644 index 0000000000000000000000000000000000000000..fd23777ebb87bc83e8728d6fe3904fbbfb5c524c --- /dev/null +++ b/tflib/ops/upfirdn_2d.py @@ -0,0 +1,364 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Custom TensorFlow ops for efficient resampling of 2D images.""" + +import os +import numpy as np +import tensorflow as tf +from .. import custom_ops + +def _get_plugin(): + return custom_ops.get_plugin(os.path.splitext(__file__)[0] + '.cu') + +#---------------------------------------------------------------------------- + +def upfirdn_2d(x, k, upx=1, upy=1, downx=1, downy=1, padx0=0, padx1=0, pady0=0, pady1=0, impl='cuda'): + r"""Pad, upsample, FIR filter, and downsample a batch of 2D images. + + Accepts a batch of 2D images of the shape `[majorDim, inH, inW, minorDim]` + and performs the following operations for each image, batched across + `majorDim` and `minorDim`: + + 1. Pad the image with zeros by the specified number of pixels on each side + (`padx0`, `padx1`, `pady0`, `pady1`). Specifying a negative value + corresponds to cropping the image. + + 2. Upsample the image by inserting the zeros after each pixel (`upx`, `upy`). + + 3. Convolve the image with the specified 2D FIR filter (`k`), shrinking the + image so that the footprint of all output pixels lies within the input image. + + 4. Downsample the image by throwing away pixels (`downx`, `downy`). + + This sequence of operations bears close resemblance to scipy.signal.upfirdn(). + The fused op is considerably more efficient than performing the same calculation + using standard TensorFlow ops. It supports gradients of arbitrary order. + + Args: + x: Input tensor of the shape `[majorDim, inH, inW, minorDim]`. + k: 2D FIR filter of the shape `[firH, firW]`. + upx: Integer upsampling factor along the X-axis (default: 1). + upy: Integer upsampling factor along the Y-axis (default: 1). + downx: Integer downsampling factor along the X-axis (default: 1). + downy: Integer downsampling factor along the Y-axis (default: 1). + padx0: Number of pixels to pad on the left side (default: 0). + padx1: Number of pixels to pad on the right side (default: 0). + pady0: Number of pixels to pad on the top side (default: 0). + pady1: Number of pixels to pad on the bottom side (default: 0). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the shape `[majorDim, outH, outW, minorDim]`, and same datatype as `x`. + """ + + impl_dict = { + 'ref': _upfirdn_2d_ref, + 'cuda': _upfirdn_2d_cuda, + } + return impl_dict[impl](x=x, k=k, upx=upx, upy=upy, downx=downx, downy=downy, padx0=padx0, padx1=padx1, pady0=pady0, pady1=pady1) + +#---------------------------------------------------------------------------- + +def _upfirdn_2d_ref(x, k, upx, upy, downx, downy, padx0, padx1, pady0, pady1): + """Slow reference implementation of `upfirdn_2d()` using standard TensorFlow ops.""" + + x = tf.convert_to_tensor(x) + k = np.asarray(k, dtype=np.float32) + assert x.shape.rank == 4 + inH = x.shape[1].value + inW = x.shape[2].value + minorDim = _shape(x, 3) + kernelH, kernelW = k.shape + assert inW >= 1 and inH >= 1 + assert kernelW >= 1 and kernelH >= 1 + assert isinstance(upx, int) and isinstance(upy, int) + assert isinstance(downx, int) and isinstance(downy, int) + assert isinstance(padx0, int) and isinstance(padx1, int) + assert isinstance(pady0, int) and isinstance(pady1, int) + + # Upsample (insert zeros). + x = tf.reshape(x, [-1, inH, 1, inW, 1, minorDim]) + x = tf.pad(x, [[0, 0], [0, 0], [0, upy - 1], [0, 0], [0, upx - 1], [0, 0]]) + x = tf.reshape(x, [-1, inH * upy, inW * upx, minorDim]) + + # Pad (crop if negative). + x = tf.pad(x, [[0, 0], [max(pady0, 0), max(pady1, 0)], [max(padx0, 0), max(padx1, 0)], [0, 0]]) + x = x[:, max(-pady0, 0) : x.shape[1].value - max(-pady1, 0), max(-padx0, 0) : x.shape[2].value - max(-padx1, 0), :] + + # Convolve with filter. + x = tf.transpose(x, [0, 3, 1, 2]) + x = tf.reshape(x, [-1, 1, inH * upy + pady0 + pady1, inW * upx + padx0 + padx1]) + w = tf.constant(k[::-1, ::-1, np.newaxis, np.newaxis], dtype=x.dtype) + x = tf.nn.conv2d(x, w, strides=[1,1,1,1], padding='VALID', data_format='NCHW') + x = tf.reshape(x, [-1, minorDim, inH * upy + pady0 + pady1 - kernelH + 1, inW * upx + padx0 + padx1 - kernelW + 1]) + x = tf.transpose(x, [0, 2, 3, 1]) + + # Downsample (throw away pixels). + return x[:, ::downy, ::downx, :] + +#---------------------------------------------------------------------------- + +def _upfirdn_2d_cuda(x, k, upx, upy, downx, downy, padx0, padx1, pady0, pady1): + """Fast CUDA implementation of `upfirdn_2d()` using custom ops.""" + + x = tf.convert_to_tensor(x) + k = np.asarray(k, dtype=np.float32) + majorDim, inH, inW, minorDim = x.shape.as_list() + kernelH, kernelW = k.shape + assert inW >= 1 and inH >= 1 + assert kernelW >= 1 and kernelH >= 1 + assert isinstance(upx, int) and isinstance(upy, int) + assert isinstance(downx, int) and isinstance(downy, int) + assert isinstance(padx0, int) and isinstance(padx1, int) + assert isinstance(pady0, int) and isinstance(pady1, int) + + outW = (inW * upx + padx0 + padx1 - kernelW) // downx + 1 + outH = (inH * upy + pady0 + pady1 - kernelH) // downy + 1 + assert outW >= 1 and outH >= 1 + + kc = tf.constant(k, dtype=x.dtype) + gkc = tf.constant(k[::-1, ::-1], dtype=x.dtype) + gpadx0 = kernelW - padx0 - 1 + gpady0 = kernelH - pady0 - 1 + gpadx1 = inW * upx - outW * downx + padx0 - upx + 1 + gpady1 = inH * upy - outH * downy + pady0 - upy + 1 + + @tf.custom_gradient + def func(x): + y = _get_plugin().up_fir_dn2d(x=x, k=kc, upx=upx, upy=upy, downx=downx, downy=downy, padx0=padx0, padx1=padx1, pady0=pady0, pady1=pady1) + y.set_shape([majorDim, outH, outW, minorDim]) + @tf.custom_gradient + def grad(dy): + dx = _get_plugin().up_fir_dn2d(x=dy, k=gkc, upx=downx, upy=downy, downx=upx, downy=upy, padx0=gpadx0, padx1=gpadx1, pady0=gpady0, pady1=gpady1) + dx.set_shape([majorDim, inH, inW, minorDim]) + return dx, func + return y, grad + return func(x) + +#---------------------------------------------------------------------------- + +def filter_2d(x, k, gain=1, data_format='NCHW', impl='cuda'): + r"""Filter a batch of 2D images with the given FIR filter. + + Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` + and filters each image with the given filter. The filter is normalized so that + if the input pixels are constant, they will be scaled by the specified `gain`. + Pixels outside the image are assumed to be zero. + + Args: + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). + gain: Scaling factor for signal magnitude (default: 1.0). + data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the same shape and datatype as `x`. + """ + + k = _setup_kernel(k) * gain + p = k.shape[0] - 1 + return _simple_upfirdn_2d(x, k, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl) + +#---------------------------------------------------------------------------- + +def upsample_2d(x, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'): + r"""Upsample a batch of 2D images with the given filter. + + Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` + and upsamples each image with the given filter. The filter is normalized so that + if the input pixels are constant, they will be scaled by the specified `gain`. + Pixels outside the image are assumed to be zero, and the filter is padded with + zeros so that its shape is a multiple of the upsampling factor. + + Args: + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). + The default is `[1] * factor`, which corresponds to nearest-neighbor + upsampling. + factor: Integer upsampling factor (default: 2). + gain: Scaling factor for signal magnitude (default: 1.0). + data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the shape `[N, C, H * factor, W * factor]` or + `[N, H * factor, W * factor, C]`, and same datatype as `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + if k is None: + k = [1] * factor + k = _setup_kernel(k) * (gain * (factor ** 2)) + p = k.shape[0] - factor + return _simple_upfirdn_2d(x, k, up=factor, pad0=(p+1)//2+factor-1, pad1=p//2, data_format=data_format, impl=impl) + +#---------------------------------------------------------------------------- + +def downsample_2d(x, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'): + r"""Downsample a batch of 2D images with the given filter. + + Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` + and downsamples each image with the given filter. The filter is normalized so that + if the input pixels are constant, they will be scaled by the specified `gain`. + Pixels outside the image are assumed to be zero, and the filter is padded with + zeros so that its shape is a multiple of the downsampling factor. + + Args: + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). + The default is `[1] * factor`, which corresponds to average pooling. + factor: Integer downsampling factor (default: 2). + gain: Scaling factor for signal magnitude (default: 1.0). + data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the shape `[N, C, H // factor, W // factor]` or + `[N, H // factor, W // factor, C]`, and same datatype as `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + if k is None: + k = [1] * factor + k = _setup_kernel(k) * gain + p = k.shape[0] - factor + return _simple_upfirdn_2d(x, k, down=factor, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl) + +#---------------------------------------------------------------------------- + +def upsample_conv_2d(x, w, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'): + r"""Fused `upsample_2d()` followed by `tf.nn.conv2d()`. + + Padding is performed only once at the beginning, not between the operations. + The fused op is considerably more efficient than performing the same calculation + using standard TensorFlow ops. It supports gradients of arbitrary order. + + Args: + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + w: Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`. + Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). + The default is `[1] * factor`, which corresponds to nearest-neighbor + upsampling. + factor: Integer upsampling factor (default: 2). + gain: Scaling factor for signal magnitude (default: 1.0). + data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the shape `[N, C, H * factor, W * factor]` or + `[N, H * factor, W * factor, C]`, and same datatype as `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + + # Check weight shape. + w = tf.convert_to_tensor(w) + assert w.shape.rank == 4 + convH = w.shape[0].value + convW = w.shape[1].value + inC = _shape(w, 2) + outC = _shape(w, 3) + assert convW == convH + + # Setup filter kernel. + if k is None: + k = [1] * factor + k = _setup_kernel(k) * (gain * (factor ** 2)) + p = (k.shape[0] - factor) - (convW - 1) + + # Determine data dimensions. + if data_format == 'NCHW': + stride = [1, 1, factor, factor] + output_shape = [_shape(x, 0), outC, (_shape(x, 2) - 1) * factor + convH, (_shape(x, 3) - 1) * factor + convW] + num_groups = _shape(x, 1) // inC + else: + stride = [1, factor, factor, 1] + output_shape = [_shape(x, 0), (_shape(x, 1) - 1) * factor + convH, (_shape(x, 2) - 1) * factor + convW, outC] + num_groups = _shape(x, 3) // inC + + # Transpose weights. + w = tf.reshape(w, [convH, convW, inC, num_groups, -1]) + w = tf.transpose(w[::-1, ::-1], [0, 1, 4, 3, 2]) + w = tf.reshape(w, [convH, convW, -1, num_groups * inC]) + + # Execute. + x = tf.nn.conv2d_transpose(x, w, output_shape=output_shape, strides=stride, padding='VALID', data_format=data_format) + return _simple_upfirdn_2d(x, k, pad0=(p+1)//2+factor-1, pad1=p//2+1, data_format=data_format, impl=impl) + +#---------------------------------------------------------------------------- + +def conv_downsample_2d(x, w, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'): + r"""Fused `tf.nn.conv2d()` followed by `downsample_2d()`. + + Padding is performed only once at the beginning, not between the operations. + The fused op is considerably more efficient than performing the same calculation + using standard TensorFlow ops. It supports gradients of arbitrary order. + + Args: + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + w: Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`. + Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). + The default is `[1] * factor`, which corresponds to average pooling. + factor: Integer downsampling factor (default: 2). + gain: Scaling factor for signal magnitude (default: 1.0). + data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the shape `[N, C, H // factor, W // factor]` or + `[N, H // factor, W // factor, C]`, and same datatype as `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + w = tf.convert_to_tensor(w) + convH, convW, _inC, _outC = w.shape.as_list() + assert convW == convH + if k is None: + k = [1] * factor + k = _setup_kernel(k) * gain + p = (k.shape[0] - factor) + (convW - 1) + if data_format == 'NCHW': + s = [1, 1, factor, factor] + else: + s = [1, factor, factor, 1] + x = _simple_upfirdn_2d(x, k, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl) + return tf.nn.conv2d(x, w, strides=s, padding='VALID', data_format=data_format) + +#---------------------------------------------------------------------------- +# Internal helper funcs. + +def _shape(tf_expr, dim_idx): + if tf_expr.shape.rank is not None: + dim = tf_expr.shape[dim_idx].value + if dim is not None: + return dim + return tf.shape(tf_expr)[dim_idx] + +def _setup_kernel(k): + k = np.asarray(k, dtype=np.float32) + if k.ndim == 1: + k = np.outer(k, k) + k /= np.sum(k) + assert k.ndim == 2 + assert k.shape[0] == k.shape[1] + return k + +def _simple_upfirdn_2d(x, k, up=1, down=1, pad0=0, pad1=0, data_format='NCHW', impl='cuda'): + assert data_format in ['NCHW', 'NHWC'] + assert x.shape.rank == 4 + y = x + if data_format == 'NCHW': + y = tf.reshape(y, [-1, _shape(y, 2), _shape(y, 3), 1]) + y = upfirdn_2d(y, k, upx=up, upy=up, downx=down, downy=down, padx0=pad0, padx1=pad1, pady0=pad0, pady1=pad1, impl=impl) + if data_format == 'NCHW': + y = tf.reshape(y, [-1, _shape(x, 1), _shape(y, 1), _shape(y, 2)]) + return y + +#---------------------------------------------------------------------------- diff --git a/tflib/optimizer.py b/tflib/optimizer.py new file mode 100644 index 0000000000000000000000000000000000000000..9389b5adaa4b47a896410d549016316e6b85ea2d --- /dev/null +++ b/tflib/optimizer.py @@ -0,0 +1,370 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Helper wrapper for a Tensorflow optimizer.""" + +import platform +import numpy as np +import tensorflow as tf + +from collections import OrderedDict +from typing import List, Union + +from . import autosummary +from . import tfutil +from .. import util + +from .tfutil import TfExpression, TfExpressionEx + +_collective_ops_warning_printed = False +_collective_ops_group_key = 831766147 +_collective_ops_instance_key = 436340067 + +class Optimizer: + """A Wrapper for tf.train.Optimizer. + + Automatically takes care of: + - Gradient averaging for multi-GPU training. + - Gradient accumulation for arbitrarily large minibatches. + - Dynamic loss scaling and typecasts for FP16 training. + - Ignoring corrupted gradients that contain NaNs/Infs. + - Reporting statistics. + - Well-chosen default settings. + """ + + def __init__(self, + name: str = "Train", # Name string that will appear in TensorFlow graph. + tf_optimizer: str = "tf.train.AdamOptimizer", # Underlying optimizer class. + learning_rate: TfExpressionEx = 0.001, # Learning rate. Can vary over time. + minibatch_multiplier: TfExpressionEx = None, # Treat N consecutive minibatches as one by accumulating gradients. + share: "Optimizer" = None, # Share internal state with a previously created optimizer? + use_loss_scaling: bool = False, # Enable dynamic loss scaling for robust mixed-precision training? + loss_scaling_init: float = 64.0, # Log2 of initial loss scaling factor. + loss_scaling_inc: float = 0.0005, # Log2 of per-minibatch loss scaling increment when there is no overflow. + loss_scaling_dec: float = 1.0, # Log2 of per-minibatch loss scaling decrement when there is an overflow. + report_mem_usage: bool = False, # Report fine-grained memory usage statistics in TensorBoard? + **kwargs): + + # Public fields. + self.name = name + self.learning_rate = learning_rate + self.minibatch_multiplier = minibatch_multiplier + self.id = self.name.replace("/", ".") + self.scope = tf.get_default_graph().unique_name(self.id) + self.optimizer_class = util.get_obj_by_name(tf_optimizer) + self.optimizer_kwargs = dict(kwargs) + self.use_loss_scaling = use_loss_scaling + self.loss_scaling_init = loss_scaling_init + self.loss_scaling_inc = loss_scaling_inc + self.loss_scaling_dec = loss_scaling_dec + + # Private fields. + self._updates_applied = False + self._devices = OrderedDict() # device_name => EasyDict() + self._shared_optimizers = OrderedDict() # device_name => optimizer_class + self._gradient_shapes = None # [shape, ...] + self._report_mem_usage = report_mem_usage + + # Validate arguments. + assert callable(self.optimizer_class) + + # Share internal state if requested. + if share is not None: + assert isinstance(share, Optimizer) + assert self.optimizer_class is share.optimizer_class + assert self.learning_rate is share.learning_rate + assert self.optimizer_kwargs == share.optimizer_kwargs + self._shared_optimizers = share._shared_optimizers # pylint: disable=protected-access + + def _get_device(self, device_name: str): + """Get internal state for the given TensorFlow device.""" + tfutil.assert_tf_initialized() + if device_name in self._devices: + return self._devices[device_name] + + # Initialize fields. + device = util.EasyDict() + device.name = device_name + device.optimizer = None # Underlying optimizer: optimizer_class + device.loss_scaling_var = None # Log2 of loss scaling: tf.Variable + device.grad_raw = OrderedDict() # Raw gradients: var => [grad, ...] + device.grad_clean = OrderedDict() # Clean gradients: var => grad + device.grad_acc_vars = OrderedDict() # Accumulation sums: var => tf.Variable + device.grad_acc_count = None # Accumulation counter: tf.Variable + device.grad_acc = OrderedDict() # Accumulated gradients: var => grad + + # Setup TensorFlow objects. + with tfutil.absolute_name_scope(self.scope + "/Devices"), tf.device(device_name), tf.control_dependencies(None): + if device_name not in self._shared_optimizers: + optimizer_name = self.scope.replace("/", "_") + "_opt%d" % len(self._shared_optimizers) + self._shared_optimizers[device_name] = self.optimizer_class(name=optimizer_name, learning_rate=self.learning_rate, **self.optimizer_kwargs) + device.optimizer = self._shared_optimizers[device_name] + if self.use_loss_scaling: + device.loss_scaling_var = tf.Variable(np.float32(self.loss_scaling_init), trainable=False, name="loss_scaling_var") + + # Register device. + self._devices[device_name] = device + return device + + def register_gradients(self, loss: TfExpression, trainable_vars: Union[List, dict]) -> None: + """Register the gradients of the given loss function with respect to the given variables. + Intended to be called once per GPU.""" + tfutil.assert_tf_initialized() + assert not self._updates_applied + device = self._get_device(loss.device) + + # Validate trainables. + if isinstance(trainable_vars, dict): + trainable_vars = list(trainable_vars.values()) # allow passing in Network.trainables as vars + assert isinstance(trainable_vars, list) and len(trainable_vars) >= 1 + assert all(tfutil.is_tf_expression(expr) for expr in trainable_vars + [loss]) + assert all(var.device == device.name for var in trainable_vars) + + # Validate shapes. + if self._gradient_shapes is None: + self._gradient_shapes = [var.shape.as_list() for var in trainable_vars] + assert len(trainable_vars) == len(self._gradient_shapes) + assert all(var.shape.as_list() == var_shape for var, var_shape in zip(trainable_vars, self._gradient_shapes)) + + # Report memory usage if requested. + deps = [] + if self._report_mem_usage: + self._report_mem_usage = False + try: + with tf.name_scope(self.id + '_mem'), tf.device(device.name), tf.control_dependencies([loss]): + deps.append(autosummary.autosummary(self.id + "/mem_usage_gb", tf.contrib.memory_stats.BytesInUse() / 2**30)) + except tf.errors.NotFoundError: + pass + + # Compute gradients. + with tf.name_scope(self.id + "_grad"), tf.device(device.name), tf.control_dependencies(deps): + loss = self.apply_loss_scaling(tf.cast(loss, tf.float32)) + gate = tf.train.Optimizer.GATE_NONE # disable gating to reduce memory usage + grad_list = device.optimizer.compute_gradients(loss=loss, var_list=trainable_vars, gate_gradients=gate) + + # Register gradients. + for grad, var in grad_list: + if var not in device.grad_raw: + device.grad_raw[var] = [] + device.grad_raw[var].append(grad) + + def apply_updates(self, allow_no_op: bool = False) -> tf.Operation: + """Construct training op to update the registered variables based on their gradients.""" + tfutil.assert_tf_initialized() + assert not self._updates_applied + self._updates_applied = True + all_ops = [] + + # Check for no-op. + if allow_no_op and len(self._devices) == 0: + with tfutil.absolute_name_scope(self.scope): + return tf.no_op(name='TrainingOp') + + # Clean up gradients. + for device_idx, device in enumerate(self._devices.values()): + with tfutil.absolute_name_scope(self.scope + "/Clean%d" % device_idx), tf.device(device.name): + for var, grad in device.grad_raw.items(): + + # Filter out disconnected gradients and convert to float32. + grad = [g for g in grad if g is not None] + grad = [tf.cast(g, tf.float32) for g in grad] + + # Sum within the device. + if len(grad) == 0: + grad = tf.zeros(var.shape) # No gradients => zero. + elif len(grad) == 1: + grad = grad[0] # Single gradient => use as is. + else: + grad = tf.add_n(grad) # Multiple gradients => sum. + + # Scale as needed. + scale = 1.0 / len(device.grad_raw[var]) / len(self._devices) + scale = tf.constant(scale, dtype=tf.float32, name="scale") + if self.minibatch_multiplier is not None: + scale /= tf.cast(self.minibatch_multiplier, tf.float32) + scale = self.undo_loss_scaling(scale) + device.grad_clean[var] = grad * scale + + # Sum gradients across devices. + if len(self._devices) > 1: + with tfutil.absolute_name_scope(self.scope + "/Broadcast"), tf.device(None): + if platform.system() == "Windows": # Windows => NCCL ops are not available. + self._broadcast_fallback() + elif tf.VERSION.startswith("1.15."): # TF 1.15 => NCCL ops are broken: https://github.com/tensorflow/tensorflow/issues/41539 + self._broadcast_fallback() + else: # Otherwise => NCCL ops are safe to use. + self._broadcast_nccl() + + # Apply updates separately on each device. + for device_idx, device in enumerate(self._devices.values()): + with tfutil.absolute_name_scope(self.scope + "/Apply%d" % device_idx), tf.device(device.name): + # pylint: disable=cell-var-from-loop + + # Accumulate gradients over time. + if self.minibatch_multiplier is None: + acc_ok = tf.constant(True, name='acc_ok') + device.grad_acc = OrderedDict(device.grad_clean) + else: + # Create variables. + with tf.control_dependencies(None): + for var in device.grad_clean.keys(): + device.grad_acc_vars[var] = tf.Variable(tf.zeros(var.shape), trainable=False, name="grad_acc_var") + device.grad_acc_count = tf.Variable(tf.zeros([]), trainable=False, name="grad_acc_count") + + # Track counter. + count_cur = device.grad_acc_count + 1.0 + count_inc_op = lambda: tf.assign(device.grad_acc_count, count_cur) + count_reset_op = lambda: tf.assign(device.grad_acc_count, tf.zeros([])) + acc_ok = (count_cur >= tf.cast(self.minibatch_multiplier, tf.float32)) + all_ops.append(tf.cond(acc_ok, count_reset_op, count_inc_op)) + + # Track gradients. + for var, grad in device.grad_clean.items(): + acc_var = device.grad_acc_vars[var] + acc_cur = acc_var + grad + device.grad_acc[var] = acc_cur + with tf.control_dependencies([acc_cur]): + acc_inc_op = lambda: tf.assign(acc_var, acc_cur) + acc_reset_op = lambda: tf.assign(acc_var, tf.zeros(var.shape)) + all_ops.append(tf.cond(acc_ok, acc_reset_op, acc_inc_op)) + + # No overflow => apply gradients. + all_ok = tf.reduce_all(tf.stack([acc_ok] + [tf.reduce_all(tf.is_finite(g)) for g in device.grad_acc.values()])) + apply_op = lambda: device.optimizer.apply_gradients([(tf.cast(grad, var.dtype), var) for var, grad in device.grad_acc.items()]) + all_ops.append(tf.cond(all_ok, apply_op, tf.no_op)) + + # Adjust loss scaling. + if self.use_loss_scaling: + ls_inc_op = lambda: tf.assign_add(device.loss_scaling_var, self.loss_scaling_inc) + ls_dec_op = lambda: tf.assign_sub(device.loss_scaling_var, self.loss_scaling_dec) + ls_update_op = lambda: tf.group(tf.cond(all_ok, ls_inc_op, ls_dec_op)) + all_ops.append(tf.cond(acc_ok, ls_update_op, tf.no_op)) + + # Last device => report statistics. + if device_idx == len(self._devices) - 1: + all_ops.append(autosummary.autosummary(self.id + "/learning_rate", tf.convert_to_tensor(self.learning_rate))) + all_ops.append(autosummary.autosummary(self.id + "/overflow_frequency", tf.where(all_ok, 0, 1), condition=acc_ok)) + if self.use_loss_scaling: + all_ops.append(autosummary.autosummary(self.id + "/loss_scaling_log2", device.loss_scaling_var)) + + # Initialize variables. + self.reset_optimizer_state() + if self.use_loss_scaling: + tfutil.init_uninitialized_vars([device.loss_scaling_var for device in self._devices.values()]) + if self.minibatch_multiplier is not None: + tfutil.run([var.initializer for device in self._devices.values() for var in list(device.grad_acc_vars.values()) + [device.grad_acc_count]]) + + # Group everything into a single op. + with tfutil.absolute_name_scope(self.scope): + return tf.group(*all_ops, name="TrainingOp") + + def reset_optimizer_state(self) -> None: + """Reset internal state of the underlying optimizer.""" + tfutil.assert_tf_initialized() + tfutil.run([var.initializer for device in self._devices.values() for var in device.optimizer.variables()]) + + def get_loss_scaling_var(self, device: str) -> Union[tf.Variable, None]: + """Get or create variable representing log2 of the current dynamic loss scaling factor.""" + return self._get_device(device).loss_scaling_var + + def apply_loss_scaling(self, value: TfExpression) -> TfExpression: + """Apply dynamic loss scaling for the given expression.""" + assert tfutil.is_tf_expression(value) + if not self.use_loss_scaling: + return value + return value * tfutil.exp2(self.get_loss_scaling_var(value.device)) + + def undo_loss_scaling(self, value: TfExpression) -> TfExpression: + """Undo the effect of dynamic loss scaling for the given expression.""" + assert tfutil.is_tf_expression(value) + if not self.use_loss_scaling: + return value + return value * tfutil.exp2(-self.get_loss_scaling_var(value.device)) # pylint: disable=invalid-unary-operand-type + + def _broadcast_nccl(self): + """Sum gradients across devices using NCCL ops (fast path).""" + from tensorflow.python.ops import nccl_ops # pylint: disable=no-name-in-module + for all_vars in zip(*[device.grad_clean.keys() for device in self._devices.values()]): + if any(x.shape.num_elements() > 0 for x in all_vars): + all_grads = [device.grad_clean[var] for device, var in zip(self._devices.values(), all_vars)] + all_grads = nccl_ops.all_sum(all_grads) + for device, var, grad in zip(self._devices.values(), all_vars, all_grads): + device.grad_clean[var] = grad + + def _broadcast_fallback(self): + """Sum gradients across devices using TensorFlow collective ops (slow fallback path).""" + from tensorflow.python.ops import collective_ops # pylint: disable=no-name-in-module + global _collective_ops_warning_printed, _collective_ops_group_key, _collective_ops_instance_key + if all(x.shape.num_elements() == 0 for device in self._devices.values() for x in device.grad_clean.values()): + return + if not _collective_ops_warning_printed: + print("------------------------------------------------------------------------") + print("WARNING: Using slow fallback implementation for inter-GPU communication.") + print("Please use TensorFlow 1.14 on Linux for optimal training performance.") + print("------------------------------------------------------------------------") + _collective_ops_warning_printed = True + for device in self._devices.values(): + with tf.device(device.name): + combo = [tf.reshape(x, [x.shape.num_elements()]) for x in device.grad_clean.values()] + combo = tf.concat(combo, axis=0) + combo = collective_ops.all_reduce(combo, merge_op='Add', final_op='Id', + group_size=len(self._devices), group_key=_collective_ops_group_key, + instance_key=_collective_ops_instance_key) + cur_ofs = 0 + for var, grad_old in device.grad_clean.items(): + grad_new = tf.reshape(combo[cur_ofs : cur_ofs + grad_old.shape.num_elements()], grad_old.shape) + cur_ofs += grad_old.shape.num_elements() + device.grad_clean[var] = grad_new + _collective_ops_instance_key += 1 + + +class SimpleAdam: + """Simplified version of tf.train.AdamOptimizer that behaves identically when used with dnnlib.tflib.Optimizer.""" + + def __init__(self, name="Adam", learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8): + self.name = name + self.learning_rate = learning_rate + self.beta1 = beta1 + self.beta2 = beta2 + self.epsilon = epsilon + self.all_state_vars = [] + + def variables(self): + return self.all_state_vars + + def compute_gradients(self, loss, var_list, gate_gradients=tf.train.Optimizer.GATE_NONE): + assert gate_gradients == tf.train.Optimizer.GATE_NONE + return list(zip(tf.gradients(loss, var_list), var_list)) + + def apply_gradients(self, grads_and_vars): + with tf.name_scope(self.name): + state_vars = [] + update_ops = [] + + # Adjust learning rate to deal with startup bias. + with tf.control_dependencies(None): + b1pow_var = tf.Variable(dtype=tf.float32, initial_value=1, trainable=False) + b2pow_var = tf.Variable(dtype=tf.float32, initial_value=1, trainable=False) + state_vars += [b1pow_var, b2pow_var] + b1pow_new = b1pow_var * self.beta1 + b2pow_new = b2pow_var * self.beta2 + update_ops += [tf.assign(b1pow_var, b1pow_new), tf.assign(b2pow_var, b2pow_new)] + lr_new = self.learning_rate * tf.sqrt(1 - b2pow_new) / (1 - b1pow_new) + + # Construct ops to update each variable. + for grad, var in grads_and_vars: + with tf.control_dependencies(None): + m_var = tf.Variable(dtype=tf.float32, initial_value=tf.zeros_like(var), trainable=False) + v_var = tf.Variable(dtype=tf.float32, initial_value=tf.zeros_like(var), trainable=False) + state_vars += [m_var, v_var] + m_new = self.beta1 * m_var + (1 - self.beta1) * grad + v_new = self.beta2 * v_var + (1 - self.beta2) * tf.square(grad) + var_delta = lr_new * m_new / (tf.sqrt(v_new) + self.epsilon) + update_ops += [tf.assign(m_var, m_new), tf.assign(v_var, v_new), tf.assign_sub(var, var_delta)] + + # Group everything together. + self.all_state_vars += state_vars + return tf.group(*update_ops) diff --git a/tflib/tfutil.py b/tflib/tfutil.py new file mode 100644 index 0000000000000000000000000000000000000000..1127c7beecfe526b459b3b99ee34e1c431e19e1c --- /dev/null +++ b/tflib/tfutil.py @@ -0,0 +1,252 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Miscellaneous helper utils for Tensorflow.""" + +import os +import numpy as np +import tensorflow as tf + +# Silence deprecation warnings from TensorFlow 1.13 onwards +import logging +logging.getLogger('tensorflow').setLevel(logging.ERROR) +import tensorflow.contrib # requires TensorFlow 1.x! +tf.contrib = tensorflow.contrib + +from typing import Any, Iterable, List, Union + +TfExpression = Union[tf.Tensor, tf.Variable, tf.Operation] +"""A type that represents a valid Tensorflow expression.""" + +TfExpressionEx = Union[TfExpression, int, float, np.ndarray] +"""A type that can be converted to a valid Tensorflow expression.""" + + +def run(*args, **kwargs) -> Any: + """Run the specified ops in the default session.""" + assert_tf_initialized() + return tf.get_default_session().run(*args, **kwargs) + + +def is_tf_expression(x: Any) -> bool: + """Check whether the input is a valid Tensorflow expression, i.e., Tensorflow Tensor, Variable, or Operation.""" + return isinstance(x, (tf.Tensor, tf.Variable, tf.Operation)) + + +def shape_to_list(shape: Iterable[tf.Dimension]) -> List[Union[int, None]]: + """Convert a Tensorflow shape to a list of ints. Retained for backwards compatibility -- use TensorShape.as_list() in new code.""" + return [dim.value for dim in shape] + + +def flatten(x: TfExpressionEx) -> TfExpression: + """Shortcut function for flattening a tensor.""" + with tf.name_scope("Flatten"): + return tf.reshape(x, [-1]) + + +def log2(x: TfExpressionEx) -> TfExpression: + """Logarithm in base 2.""" + with tf.name_scope("Log2"): + return tf.log(x) * np.float32(1.0 / np.log(2.0)) + + +def exp2(x: TfExpressionEx) -> TfExpression: + """Exponent in base 2.""" + with tf.name_scope("Exp2"): + return tf.exp(x * np.float32(np.log(2.0))) + + +def lerp(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpressionEx: + """Linear interpolation.""" + with tf.name_scope("Lerp"): + return a + (b - a) * t + + +def lerp_clip(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpression: + """Linear interpolation with clip.""" + with tf.name_scope("LerpClip"): + return a + (b - a) * tf.clip_by_value(t, 0.0, 1.0) + + +def absolute_name_scope(scope: str) -> tf.name_scope: + """Forcefully enter the specified name scope, ignoring any surrounding scopes.""" + return tf.name_scope(scope + "/") + + +def absolute_variable_scope(scope: str, **kwargs) -> tf.variable_scope: + """Forcefully enter the specified variable scope, ignoring any surrounding scopes.""" + return tf.variable_scope(tf.VariableScope(name=scope, **kwargs), auxiliary_name_scope=False) + + +def _sanitize_tf_config(config_dict: dict = None) -> dict: + # Defaults. + cfg = dict() + cfg["rnd.np_random_seed"] = None # Random seed for NumPy. None = keep as is. + cfg["rnd.tf_random_seed"] = "auto" # Random seed for TensorFlow. 'auto' = derive from NumPy random state. None = keep as is. + cfg["env.TF_CPP_MIN_LOG_LEVEL"] = "1" # 0 = Print all available debug info from TensorFlow. 1 = Print warnings and errors, but disable debug info. + cfg["graph_options.place_pruned_graph"] = True # False = Check that all ops are available on the designated device. True = Skip the check for ops that are not used. + cfg["gpu_options.allow_growth"] = True # False = Allocate all GPU memory at the beginning. True = Allocate only as much GPU memory as needed. + + # Remove defaults for environment variables that are already set. + for key in list(cfg): + fields = key.split(".") + if fields[0] == "env": + assert len(fields) == 2 + if fields[1] in os.environ: + del cfg[key] + + # User overrides. + if config_dict is not None: + cfg.update(config_dict) + return cfg + + +def init_tf(config_dict: dict = None) -> None: + """Initialize TensorFlow session using good default settings.""" + # Skip if already initialized. + if tf.get_default_session() is not None: + return + + # Setup config dict and random seeds. + cfg = _sanitize_tf_config(config_dict) + np_random_seed = cfg["rnd.np_random_seed"] + if np_random_seed is not None: + np.random.seed(np_random_seed) + tf_random_seed = cfg["rnd.tf_random_seed"] + if tf_random_seed == "auto": + tf_random_seed = np.random.randint(1 << 31) + if tf_random_seed is not None: + tf.set_random_seed(tf_random_seed) + + # Setup environment variables. + for key, value in cfg.items(): + fields = key.split(".") + if fields[0] == "env": + assert len(fields) == 2 + os.environ[fields[1]] = str(value) + + # Create default TensorFlow session. + create_session(cfg, force_as_default=True) + + +def assert_tf_initialized(): + """Check that TensorFlow session has been initialized.""" + if tf.get_default_session() is None: + raise RuntimeError("No default TensorFlow session found. Please call dnnlib.tflib.init_tf().") + + +def create_session(config_dict: dict = None, force_as_default: bool = False) -> tf.Session: + """Create tf.Session based on config dict.""" + # Setup TensorFlow config proto. + cfg = _sanitize_tf_config(config_dict) + config_proto = tf.ConfigProto() + for key, value in cfg.items(): + fields = key.split(".") + if fields[0] not in ["rnd", "env"]: + obj = config_proto + for field in fields[:-1]: + obj = getattr(obj, field) + setattr(obj, fields[-1], value) + + # Create session. + session = tf.Session(config=config_proto) + if force_as_default: + # pylint: disable=protected-access + session._default_session = session.as_default() + session._default_session.enforce_nesting = False + session._default_session.__enter__() + return session + + +def init_uninitialized_vars(target_vars: List[tf.Variable] = None) -> None: + """Initialize all tf.Variables that have not already been initialized. + + Equivalent to the following, but more efficient and does not bloat the tf graph: + tf.variables_initializer(tf.report_uninitialized_variables()).run() + """ + assert_tf_initialized() + if target_vars is None: + target_vars = tf.global_variables() + + test_vars = [] + test_ops = [] + + with tf.control_dependencies(None): # ignore surrounding control_dependencies + for var in target_vars: + assert is_tf_expression(var) + + try: + tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/IsVariableInitialized:0")) + except KeyError: + # Op does not exist => variable may be uninitialized. + test_vars.append(var) + + with absolute_name_scope(var.name.split(":")[0]): + test_ops.append(tf.is_variable_initialized(var)) + + init_vars = [var for var, inited in zip(test_vars, run(test_ops)) if not inited] + run([var.initializer for var in init_vars]) + + +def set_vars(var_to_value_dict: dict) -> None: + """Set the values of given tf.Variables. + + Equivalent to the following, but more efficient and does not bloat the tf graph: + tflib.run([tf.assign(var, value) for var, value in var_to_value_dict.items()] + """ + assert_tf_initialized() + ops = [] + feed_dict = {} + + for var, value in var_to_value_dict.items(): + assert is_tf_expression(var) + + try: + setter = tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/setter:0")) # look for existing op + except KeyError: + with absolute_name_scope(var.name.split(":")[0]): + with tf.control_dependencies(None): # ignore surrounding control_dependencies + setter = tf.assign(var, tf.placeholder(var.dtype, var.shape, "new_value"), name="setter") # create new setter + + ops.append(setter) + feed_dict[setter.op.inputs[1]] = value + + run(ops, feed_dict) + + +def create_var_with_large_initial_value(initial_value: np.ndarray, *args, **kwargs): + """Create tf.Variable with large initial value without bloating the tf graph.""" + assert_tf_initialized() + assert isinstance(initial_value, np.ndarray) + zeros = tf.zeros(initial_value.shape, initial_value.dtype) + var = tf.Variable(zeros, *args, **kwargs) + set_vars({var: initial_value}) + return var + + +def convert_images_from_uint8(images, drange=[-1,1], nhwc_to_nchw=False): + """Convert a minibatch of images from uint8 to float32 with configurable dynamic range. + Can be used as an input transformation for Network.run(). + """ + images = tf.cast(images, tf.float32) + if nhwc_to_nchw: + images = tf.transpose(images, [0, 3, 1, 2]) + return images * ((drange[1] - drange[0]) / 255) + drange[0] + + +def convert_images_to_uint8(images, drange=[-1,1], nchw_to_nhwc=False, shrink=1): + """Convert a minibatch of images from float32 to uint8 with configurable dynamic range. + Can be used as an output transformation for Network.run(). + """ + images = tf.cast(images, tf.float32) + if shrink > 1: + ksize = [1, 1, shrink, shrink] + images = tf.nn.avg_pool(images, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW") + if nchw_to_nhwc: + images = tf.transpose(images, [0, 2, 3, 1]) + scale = 255 / (drange[1] - drange[0]) + images = images * scale + (0.5 - drange[0] * scale) + return tf.saturate_cast(images, tf.uint8) diff --git a/util.py b/util.py new file mode 100644 index 0000000000000000000000000000000000000000..73c98d73c26e03800a39c62386c322bb518bbd18 --- /dev/null +++ b/util.py @@ -0,0 +1,410 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Miscellaneous utility classes and functions.""" + +import ctypes +import fnmatch +import importlib +import inspect +import numpy as np +import os +import shutil +import sys +import types +import io +import pickle +import re +import requests +import html +import hashlib +import glob +import uuid + +from distutils.util import strtobool +from typing import Any, List, Tuple, Union + + +# Util classes +# ------------------------------------------------------------------------------------------ + + +class EasyDict(dict): + """Convenience class that behaves like a dict but allows access with the attribute syntax.""" + + def __getattr__(self, name: str) -> Any: + try: + return self[name] + except KeyError: + raise AttributeError(name) + + def __setattr__(self, name: str, value: Any) -> None: + self[name] = value + + def __delattr__(self, name: str) -> None: + del self[name] + + +class Logger(object): + """Redirect stderr to stdout, optionally print stdout to a file, and optionally force flushing on both stdout and the file.""" + + def __init__(self, file_name: str = None, file_mode: str = "w", should_flush: bool = True): + self.file = None + + if file_name is not None: + self.file = open(file_name, file_mode) + + self.should_flush = should_flush + self.stdout = sys.stdout + self.stderr = sys.stderr + + sys.stdout = self + sys.stderr = self + + def __enter__(self) -> "Logger": + return self + + def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: + self.close() + + def write(self, text: str) -> None: + """Write text to stdout (and a file) and optionally flush.""" + if len(text) == 0: # workaround for a bug in VSCode debugger: sys.stdout.write(''); sys.stdout.flush() => crash + return + + if self.file is not None: + self.file.write(text) + + self.stdout.write(text) + + if self.should_flush: + self.flush() + + def flush(self) -> None: + """Flush written text to both stdout and a file, if open.""" + if self.file is not None: + self.file.flush() + + self.stdout.flush() + + def close(self) -> None: + """Flush, close possible files, and remove stdout/stderr mirroring.""" + self.flush() + + # if using multiple loggers, prevent closing in wrong order + if sys.stdout is self: + sys.stdout = self.stdout + if sys.stderr is self: + sys.stderr = self.stderr + + if self.file is not None: + self.file.close() + + +# Small util functions +# ------------------------------------------------------------------------------------------ + + +def format_time(seconds: Union[int, float]) -> str: + """Convert the seconds to human readable string with days, hours, minutes and seconds.""" + s = int(np.rint(seconds)) + + if s < 60: + return "{0}s".format(s) + elif s < 60 * 60: + return "{0}m {1:02}s".format(s // 60, s % 60) + elif s < 24 * 60 * 60: + return "{0}h {1:02}m {2:02}s".format(s // (60 * 60), (s // 60) % 60, s % 60) + else: + return "{0}d {1:02}h {2:02}m".format(s // (24 * 60 * 60), (s // (60 * 60)) % 24, (s // 60) % 60) + + +def ask_yes_no(question: str) -> bool: + """Ask the user the question until the user inputs a valid answer.""" + while True: + try: + print("{0} [y/n]".format(question)) + return strtobool(input().lower()) + except ValueError: + pass + + +def tuple_product(t: Tuple) -> Any: + """Calculate the product of the tuple elements.""" + result = 1 + + for v in t: + result *= v + + return result + + +_str_to_ctype = { + "uint8": ctypes.c_ubyte, + "uint16": ctypes.c_uint16, + "uint32": ctypes.c_uint32, + "uint64": ctypes.c_uint64, + "int8": ctypes.c_byte, + "int16": ctypes.c_int16, + "int32": ctypes.c_int32, + "int64": ctypes.c_int64, + "float32": ctypes.c_float, + "float64": ctypes.c_double +} + + +def get_dtype_and_ctype(type_obj: Any) -> Tuple[np.dtype, Any]: + """Given a type name string (or an object having a __name__ attribute), return matching Numpy and ctypes types that have the same size in bytes.""" + type_str = None + + if isinstance(type_obj, str): + type_str = type_obj + elif hasattr(type_obj, "__name__"): + type_str = type_obj.__name__ + elif hasattr(type_obj, "name"): + type_str = type_obj.name + else: + raise RuntimeError("Cannot infer type name from input") + + assert type_str in _str_to_ctype.keys() + + my_dtype = np.dtype(type_str) + my_ctype = _str_to_ctype[type_str] + + assert my_dtype.itemsize == ctypes.sizeof(my_ctype) + + return my_dtype, my_ctype + + +def is_pickleable(obj: Any) -> bool: + try: + with io.BytesIO() as stream: + pickle.dump(obj, stream) + return True + except: + return False + + +# Functionality to import modules/objects by name, and call functions by name +# ------------------------------------------------------------------------------------------ + +def get_module_from_obj_name(obj_name: str) -> Tuple[types.ModuleType, str]: + """Searches for the underlying module behind the name to some python object. + Returns the module and the object name (original name with module part removed).""" + + # allow convenience shorthands, substitute them by full names + obj_name = re.sub("^np.", "numpy.", obj_name) + obj_name = re.sub("^tf.", "tensorflow.", obj_name) + + # list alternatives for (module_name, local_obj_name) + parts = obj_name.split(".") + name_pairs = [(".".join(parts[:i]), ".".join(parts[i:])) for i in range(len(parts), 0, -1)] + + # try each alternative in turn + for module_name, local_obj_name in name_pairs: + try: + module = importlib.import_module(module_name) # may raise ImportError + get_obj_from_module(module, local_obj_name) # may raise AttributeError + return module, local_obj_name + except: + pass + + # maybe some of the modules themselves contain errors? + for module_name, _local_obj_name in name_pairs: + try: + importlib.import_module(module_name) # may raise ImportError + except ImportError: + if not str(sys.exc_info()[1]).startswith("No module named '" + module_name + "'"): + raise + + # maybe the requested attribute is missing? + for module_name, local_obj_name in name_pairs: + try: + module = importlib.import_module(module_name) # may raise ImportError + get_obj_from_module(module, local_obj_name) # may raise AttributeError + except ImportError: + pass + + # we are out of luck, but we have no idea why + raise ImportError(obj_name) + + +def get_obj_from_module(module: types.ModuleType, obj_name: str) -> Any: + """Traverses the object name and returns the last (rightmost) python object.""" + if obj_name == '': + return module + obj = module + for part in obj_name.split("."): + obj = getattr(obj, part) + return obj + + +def get_obj_by_name(name: str) -> Any: + """Finds the python object with the given name.""" + module, obj_name = get_module_from_obj_name(name) + return get_obj_from_module(module, obj_name) + + +def call_func_by_name(*args, func_name: str = None, **kwargs) -> Any: + """Finds the python object with the given name and calls it as a function.""" + assert func_name is not None + func_obj = get_obj_by_name(func_name) + assert callable(func_obj) + return func_obj(*args, **kwargs) + + +def get_module_dir_by_obj_name(obj_name: str) -> str: + """Get the directory path of the module containing the given object name.""" + module, _ = get_module_from_obj_name(obj_name) + return os.path.dirname(inspect.getfile(module)) + + +def is_top_level_function(obj: Any) -> bool: + """Determine whether the given object is a top-level function, i.e., defined at module scope using 'def'.""" + return callable(obj) and obj.__name__ in sys.modules[obj.__module__].__dict__ + + +def get_top_level_function_name(obj: Any) -> str: + """Return the fully-qualified name of a top-level function.""" + assert is_top_level_function(obj) + return obj.__module__ + "." + obj.__name__ + + +# File system helpers +# ------------------------------------------------------------------------------------------ + +def list_dir_recursively_with_ignore(dir_path: str, ignores: List[str] = None, add_base_to_relative: bool = False) -> List[Tuple[str, str]]: + """List all files recursively in a given directory while ignoring given file and directory names. + Returns list of tuples containing both absolute and relative paths.""" + assert os.path.isdir(dir_path) + base_name = os.path.basename(os.path.normpath(dir_path)) + + if ignores is None: + ignores = [] + + result = [] + + for root, dirs, files in os.walk(dir_path, topdown=True): + for ignore_ in ignores: + dirs_to_remove = [d for d in dirs if fnmatch.fnmatch(d, ignore_)] + + # dirs need to be edited in-place + for d in dirs_to_remove: + dirs.remove(d) + + files = [f for f in files if not fnmatch.fnmatch(f, ignore_)] + + absolute_paths = [os.path.join(root, f) for f in files] + relative_paths = [os.path.relpath(p, dir_path) for p in absolute_paths] + + if add_base_to_relative: + relative_paths = [os.path.join(base_name, p) for p in relative_paths] + + assert len(absolute_paths) == len(relative_paths) + result += zip(absolute_paths, relative_paths) + + return result + + +def copy_files_and_create_dirs(files: List[Tuple[str, str]]) -> None: + """Takes in a list of tuples of (src, dst) paths and copies files. + Will create all necessary directories.""" + for file in files: + target_dir_name = os.path.dirname(file[1]) + + # will create all intermediate-level directories + if not os.path.exists(target_dir_name): + os.makedirs(target_dir_name) + + shutil.copyfile(file[0], file[1]) + + +# URL helpers +# ------------------------------------------------------------------------------------------ + +def is_url(obj: Any, allow_file_urls: bool = False) -> bool: + """Determine whether the given object is a valid URL string.""" + if not isinstance(obj, str) or not "://" in obj: + return False + if allow_file_urls and obj.startswith('file:///'): + return True + try: + res = requests.compat.urlparse(obj) + if not res.scheme or not res.netloc or not "." in res.netloc: + return False + res = requests.compat.urlparse(requests.compat.urljoin(obj, "/")) + if not res.scheme or not res.netloc or not "." in res.netloc: + return False + except: + return False + return True + + +def open_url(url: str, cache_dir: str = None, num_attempts: int = 10, verbose: bool = True) -> Any: + """Download the given URL and return a binary-mode file object to access the data.""" + assert is_url(url, allow_file_urls=True) + assert num_attempts >= 1 + + # Handle file URLs. + if url.startswith('file:///'): + return open(url[len('file:///'):], "rb") + + # Lookup from cache. + url_md5 = hashlib.md5(url.encode("utf-8")).hexdigest() + if cache_dir is not None: + cache_files = glob.glob(os.path.join(cache_dir, url_md5 + "_*")) + if len(cache_files) == 1: + return open(cache_files[0], "rb") + + # Download. + url_name = None + url_data = None + with requests.Session() as session: + if verbose: + print("Downloading %s ..." % url, end="", flush=True) + for attempts_left in reversed(range(num_attempts)): + try: + with session.get(url) as res: + res.raise_for_status() + if len(res.content) == 0: + raise IOError("No data received") + + if len(res.content) < 8192: + content_str = res.content.decode("utf-8") + if "download_warning" in res.headers.get("Set-Cookie", ""): + links = [html.unescape(link) for link in content_str.split('"') if "export=download" in link] + if len(links) == 1: + url = requests.compat.urljoin(url, links[0]) + raise IOError("Google Drive virus checker nag") + if "Google Drive - Quota exceeded" in content_str: + raise IOError("Google Drive download quota exceeded -- please try again later") + + match = re.search(r'filename="([^"]*)"', res.headers.get("Content-Disposition", "")) + url_name = match[1] if match else url + url_data = res.content + if verbose: + print(" done") + break + except: + if not attempts_left: + if verbose: + print(" failed") + raise + if verbose: + print(".", end="", flush=True) + + # Save to cache. + if cache_dir is not None: + safe_name = re.sub(r"[^0-9a-zA-Z-._]", "_", url_name) + cache_file = os.path.join(cache_dir, url_md5 + "_" + safe_name) + temp_file = os.path.join(cache_dir, "tmp_" + uuid.uuid4().hex + "_" + url_md5 + "_" + safe_name) + os.makedirs(cache_dir, exist_ok=True) + with open(temp_file, "wb") as f: + f.write(url_data) + os.replace(temp_file, cache_file) # atomic + + # Return data as file object. + return io.BytesIO(url_data)