Haaribo's picture
Add application file
8d4ee22
# TravelingBirds dataset needs to be downloaded from https://worksheets.codalab.org/bundles/0x518829de2aa440c79cd9d75ef6669f27
# as it comes from https://github.com/yewsiang/ConceptBottleneck
import os
from pathlib import Path
import numpy as np
import pandas as pd
from dataset_classes.cub200 import CUB200Class
from dataset_classes.utils import index_list_with_sorting, mask_list
class TravelingBirds(CUB200Class):
init_base_folder = 'CUB_fixed'
root = Path.home() / "tmp/Datasets/TravelingBirds"
crop_root = Path.home() / "tmp/Datasets/PPTravelingBirds"
def get_all_samples_dir(self, dir):
self.base_folder = os.path.join(self.init_base_folder, dir)
main_dir = Path(self.root) / self.init_base_folder / dir
return self.get_all_sample(main_dir)
def adapt_to_crop(self):
self.root = self.crop_root
folder_name = "train" if self.train else "test"
folder_name = folder_name + "_cropped"
self.base_folder = 'CUB_fixed/' + folder_name
def get_all_sample(self, dir):
answer = []
for i, sub_dir in enumerate(sorted(os.listdir(dir))):
class_dir = dir / sub_dir
for single_img in os.listdir(class_dir):
answer.append([Path(sub_dir) / single_img, i + 1])
return answer
def _load_metadata(self):
train_test_split = pd.read_csv(
os.path.join(Path(self.root).parent / "CUB200", 'CUB_200_2011', 'train_test_split.txt'),
sep=' ', names=['img_id', 'is_training_img'])
data = pd.read_csv(
os.path.join(Path(self.root).parent / "CUB200", 'CUB_200_2011', 'images.txt'),
sep=' ', names=['img_id', "path"])
img_dict = {x[1]: x[0] for x in data.values}
# TravelingBirds has all train+test images in both folders, just with different backgrounds.
# They are separated by train_test_split of CUB200.
if self.train:
samples = self.get_all_samples_dir("train")
mask = train_test_split["is_training_img"] == 1
else:
samples = self.get_all_samples_dir("test")
mask = train_test_split["is_training_img"] == 0
ids = np.array([img_dict[str(x[0])] for x in samples])
sorted = np.argsort(ids)
samples = index_list_with_sorting(samples, sorted)
samples = mask_list(samples, mask)
filepaths = [x[0] for x in samples]
labels = [x[1] for x in samples]
samples = pd.DataFrame({"filepath": filepaths, "target": labels})
self.data = samples