Spaces:
Build error
Build error
''' | |
A sampler is just a list of integer listing the indexes of the | |
inputs in a data set to sample. For reproducibility, the | |
FixedRandomSubsetSampler uses a seeded prng to produce the same | |
sequence always. FixedSubsetSampler is just a wrapper for an | |
explicit list of integers. | |
coordinate_sample solves another sampling problem: when testing | |
convolutional outputs, we can reduce data explosing by sampling | |
random points of the feature map rather than the entire feature map. | |
coordinate_sample does this in a deterministic way that is also | |
resolution-independent. | |
''' | |
import numpy | |
import random | |
from torch.utils.data.sampler import Sampler | |
class FixedSubsetSampler(Sampler): | |
"""Represents a fixed sequence of data set indices. | |
Subsets can be created by specifying a subset of output indexes. | |
""" | |
def __init__(self, samples): | |
self.samples = samples | |
def __iter__(self): | |
return iter(self.samples) | |
def __len__(self): | |
return len(self.samples) | |
def __getitem__(self, key): | |
return self.samples[key] | |
def subset(self, new_subset): | |
return FixedSubsetSampler(self.dereference(new_subset)) | |
def dereference(self, indices): | |
''' | |
Translate output sample indices (small numbers indexing the sample) | |
to input sample indices (larger number indexing the original full set) | |
''' | |
return [self.samples[i] for i in indices] | |
class FixedRandomSubsetSampler(FixedSubsetSampler): | |
"""Samples a fixed number of samples from the dataset, deterministically. | |
Arguments: | |
data_source, | |
sample_size, | |
seed (optional) | |
""" | |
def __init__(self, data_source, start=None, end=None, seed=1): | |
rng = random.Random(seed) | |
shuffled = list(range(len(data_source))) | |
rng.shuffle(shuffled) | |
self.data_source = data_source | |
super(FixedRandomSubsetSampler, self).__init__(shuffled[start:end]) | |
def class_subset(self, class_filter): | |
''' | |
Returns only the subset matching the given rule. | |
''' | |
if isinstance(class_filter, int): | |
rule = lambda d: d[1] == class_filter | |
else: | |
rule = class_filter | |
return self.subset([i for i, j in enumerate(self.samples) | |
if rule(self.data_source[j])]) | |
def coordinate_sample(shape, sample_size, seeds, grid=13, seed=1, flat=False): | |
''' | |
Returns a (end-start) sets of sample_size grid points within | |
the shape given. If the shape dimensions are a multiple of 'grid', | |
then sampled points within the same row will never be duplicated. | |
''' | |
if flat: | |
sampind = numpy.zeros((len(seeds), sample_size), dtype=int) | |
else: | |
sampind = numpy.zeros((len(seeds), 2, sample_size), dtype=int) | |
assert sample_size <= grid | |
for j, seed in enumerate(seeds): | |
rng = numpy.random.RandomState(seed) | |
# Shuffle the 169 random grid squares, and pick :sample_size. | |
square_count = grid ** len(shape) | |
square = numpy.stack(numpy.unravel_index( | |
rng.choice(square_count, square_count)[:sample_size], | |
(grid,) * len(shape))) | |
# Then add a random offset to each x, y and put in the range [0...1) | |
# Notice this selects the same locations regardless of resolution. | |
uniform = (square + rng.uniform(size=square.shape)) / grid | |
# TODO: support affine scaling so that we can align receptive field | |
# centers exactly when sampling neurons in different layers. | |
coords = (uniform * numpy.array(shape)[:,None]).astype(int) | |
# Now take sample_size without replacement. We do this in a way | |
# such that if sample_size is decreased or increased up to 'grid', | |
# the selected points become a subset, not totally different points. | |
if flat: | |
sampind[j] = numpy.ravel_multi_index(coords, dims=shape) | |
else: | |
sampind[j] = coords | |
return sampind | |
if __name__ == '__main__': | |
from numpy.testing import assert_almost_equal | |
# Test that coordinate_sample is deterministic, in-range, and scalable. | |
assert_almost_equal(coordinate_sample((26, 26), 10, range(101, 102)), | |
[[[14, 0, 12, 11, 8, 13, 11, 20, 7, 20], | |
[ 9, 22, 7, 11, 23, 18, 21, 15, 2, 5]]]) | |
assert_almost_equal(coordinate_sample((13, 13), 10, range(101, 102)), | |
[[[ 7, 0, 6, 5, 4, 6, 5, 10, 3, 20 // 2], | |
[ 4, 11, 3, 5, 11, 9, 10, 7, 1, 5 // 2]]]) | |
assert_almost_equal(coordinate_sample((13, 13), 10, range(100, 102), | |
flat=True), | |
[[ 8, 24, 67, 103, 87, 79, 138, 94, 98, 53], | |
[ 95, 11, 81, 70, 63, 87, 75, 137, 40, 2+10*13]]) | |
assert_almost_equal(coordinate_sample((13, 13), 10, range(101, 103), | |
flat=True), | |
[[ 95, 11, 81, 70, 63, 87, 75, 137, 40, 132], | |
[ 0, 78, 114, 111, 66, 45, 72, 73, 79, 135]]) | |
assert_almost_equal(coordinate_sample((26, 26), 10, range(101, 102), | |
flat=True), | |
[[373, 22, 319, 297, 231, 356, 307, 535, 184, 5+20*26]]) | |
# Test FixedRandomSubsetSampler | |
fss = FixedRandomSubsetSampler(range(10)) | |
assert len(fss) == 10 | |
assert_almost_equal(list(fss), [8, 0, 3, 4, 5, 2, 9, 6, 7, 1]) | |
fss = FixedRandomSubsetSampler(range(10), 3, 8) | |
assert len(fss) == 5 | |
assert_almost_equal(list(fss), [4, 5, 2, 9, 6]) | |
fss = FixedRandomSubsetSampler([(i, i % 3) for i in range(10)], | |
class_filter=1) | |
assert len(fss) == 3 | |
assert_almost_equal(list(fss), [4, 7, 1]) | |