Spaces:
Running
Running
File size: 4,266 Bytes
c5f65a4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
"""Utils to load CSV file of audio datasets."""
import os
import pandas as pd
import shared.utils as su
def configure_paths_sound_of_water(
data_root="/work/piyush/from_nfs2/datasets/SoundOfWater",
):
paths = {
"data_dir": data_root,
"video_clip_dir": os.path.join(data_root, "videos"),
"audio_clip_dir": os.path.join(data_root, "videos"),
"annot_dir": os.path.join(data_root, "annotations"),
"split_dir": os.path.join(data_root, "splits"),
}
return paths
def load_csv_sound_of_water(
paths: dict,
csv_filters=dict(),
csv_name="localisation.csv",
ds_name="SoundOfWater",
split=None,
check_first_frame_annots=True,
):
"""Loads CSV containing metadata of the dataset."""
su.log.print_update(
f" [:::] Loading {ds_name}.",
pos="left",
fillchar=".",
)
# Configure paths
video_clip_dir = paths["video_clip_dir"]
audio_clip_dir = paths["audio_clip_dir"]
# Load main CSV
path = os.path.join(
paths["annot_dir"], csv_name,
)
assert os.path.exists(path), \
f"CSV file not found at {path}."
print(" [:::] CSV path:", path)
df = pd.read_csv(path)
# Load side information: containers
container_path = os.path.join(
paths['annot_dir'], "containers.yaml",
)
assert os.path.exists(container_path)
containers = su.io.load_yml(container_path)
# Update CSV with container information (optional)
update_with_container_info = True
if update_with_container_info:
rows = []
for row in df.iterrows():
row = row[1].to_dict()
row.update(containers[row["container_id"]])
rows.append(row)
df = pd.DataFrame(rows)
print(" [:::] Shape of CSV: ", df.shape)
# 1. Update item_id
df["item_id"] = df.apply(
lambda d: f"{d['video_id']}_{d['start_time']:.1f}_{d['end_time']:.1f}",
axis=1,
)
# 2. Update video_clip_path
# df["video_path"] = df["video_id"].apply(
# lambda d: os.path.join(
# video_dir, f"{d}.mp4"
# )
# )
df["video_clip_path"] = df["item_id"].apply(
lambda d: os.path.join(
video_clip_dir, f"{d}.mp4"
)
)
df = df[df["video_clip_path"].apply(os.path.exists)]
print(" [:::] Shape of CSV with available video: ", df.shape)
# 3. Update audio_clip_path
# df["audio_path"] = df["video_id"].apply(
# lambda d: os.path.join(
# audio_dir, f"{d}.mp4"
# )
# )
df["audio_clip_path"] = df["item_id"].apply(
lambda d: os.path.join(
audio_clip_dir, f"{d}.mp4"
)
)
df = df[df["audio_clip_path"].apply(os.path.exists)]
print(" [:::] Shape of CSV with available audio: ", df.shape)
# Add first frame annotation paths
if check_first_frame_annots:
frame_annot_dir = os.path.join(paths["annot_dir"], "container_bboxes")
df["box_path"] = df["video_id"].apply(
lambda d: os.path.join(frame_annot_dir, f"{d}_box.npy"),
)
df["mask_path"] = df["video_id"].apply(
lambda d: os.path.join(frame_annot_dir, f"{d}_mask.npy"),
)
df = df[df["box_path"].apply(os.path.exists)]
df = df[df["mask_path"].apply(os.path.exists)]
print(" [:::] Shape of CSV with first frame annotations: ", df.shape)
# Add split filter
if split is not None and ("item_id" not in csv_filters):
assert "split_dir" in paths
split_path = os.path.join(paths["split_dir"], f"{split}")
assert os.path.exists(split_path), \
f"Split file not found at {split_path}."
item_ids = su.io.load_txt(split_path)
print(" [:::] Number of item_ids in split:", len(item_ids))
csv_filters["item_id"] = item_ids
# Apply filter to the CSV
if len(csv_filters) > 0:
df = su.pd_utils.apply_filters(df, csv_filters)
print(" [:::] Shape of CSV after filtering: ", df.shape)
return df
if __name__ == "__main__":
paths = configure_paths_sound_of_water()
df = load_csv_sound_of_water(paths)
row = df.iloc[0].to_dict()
su.log.json_print(row)
|