File size: 4,266 Bytes
c5f65a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
"""Utils to load CSV file of audio datasets."""
import os

import pandas as pd
import shared.utils as su


def configure_paths_sound_of_water(
        data_root="/work/piyush/from_nfs2/datasets/SoundOfWater",
    ):
    paths = {
        "data_dir": data_root,
        "video_clip_dir": os.path.join(data_root, "videos"),
        "audio_clip_dir": os.path.join(data_root, "videos"),
        "annot_dir": os.path.join(data_root, "annotations"),
        "split_dir": os.path.join(data_root, "splits"),
    }
    return paths


def load_csv_sound_of_water(
        paths: dict,
        csv_filters=dict(),
        csv_name="localisation.csv",
        ds_name="SoundOfWater",
        split=None,
        check_first_frame_annots=True,
    ):
    """Loads CSV containing metadata of the dataset."""

    su.log.print_update(
        f" [:::] Loading {ds_name}.",
        pos="left",
        fillchar=".",
    )

    # Configure paths
    video_clip_dir = paths["video_clip_dir"]
    audio_clip_dir = paths["audio_clip_dir"]

    # Load main CSV
    path = os.path.join(
        paths["annot_dir"], csv_name,
    )
    assert os.path.exists(path), \
        f"CSV file not found at {path}."
    print(" [:::] CSV path:", path)
    df = pd.read_csv(path)

    # Load side information: containers
    container_path = os.path.join(
        paths['annot_dir'], "containers.yaml",
    )
    assert os.path.exists(container_path)
    containers = su.io.load_yml(container_path)

    # Update CSV with container information (optional)
    update_with_container_info = True
    if update_with_container_info:
        rows = []
        for row in df.iterrows():
            row = row[1].to_dict()
            row.update(containers[row["container_id"]])
            rows.append(row)
        df = pd.DataFrame(rows)
    print(" [:::] Shape of CSV: ", df.shape)

    # 1. Update item_id
    df["item_id"] = df.apply(
        lambda d: f"{d['video_id']}_{d['start_time']:.1f}_{d['end_time']:.1f}",
        axis=1,
    )

    # 2. Update video_clip_path
    # df["video_path"] = df["video_id"].apply(
    #     lambda d: os.path.join(
    #         video_dir, f"{d}.mp4"
    #     )
    # )
    df["video_clip_path"] = df["item_id"].apply(
        lambda d: os.path.join(
            video_clip_dir, f"{d}.mp4"
        )
    )
    df = df[df["video_clip_path"].apply(os.path.exists)]
    print(" [:::] Shape of CSV with available video: ", df.shape)

    # 3. Update audio_clip_path
    # df["audio_path"] = df["video_id"].apply(
    #     lambda d: os.path.join(
    #         audio_dir, f"{d}.mp4"
    #     )
    # )
    df["audio_clip_path"] = df["item_id"].apply(
        lambda d: os.path.join(
            audio_clip_dir, f"{d}.mp4"
        )
    )
    df = df[df["audio_clip_path"].apply(os.path.exists)]
    print(" [:::] Shape of CSV with available audio: ", df.shape)

    # Add first frame annotation paths
    if check_first_frame_annots:
        frame_annot_dir = os.path.join(paths["annot_dir"], "container_bboxes")
        df["box_path"] = df["video_id"].apply(
            lambda d: os.path.join(frame_annot_dir, f"{d}_box.npy"),
        )
        df["mask_path"] = df["video_id"].apply(
            lambda d: os.path.join(frame_annot_dir, f"{d}_mask.npy"),
        )
        df = df[df["box_path"].apply(os.path.exists)]
        df = df[df["mask_path"].apply(os.path.exists)]
        print(" [:::] Shape of CSV with first frame annotations: ", df.shape)

    # Add split filter
    if split is not None and ("item_id" not in csv_filters):
        assert "split_dir" in paths
        split_path = os.path.join(paths["split_dir"], f"{split}")
        assert os.path.exists(split_path), \
            f"Split file not found at {split_path}."
        item_ids = su.io.load_txt(split_path)
        print(" [:::] Number of item_ids in split:", len(item_ids))
        csv_filters["item_id"] = item_ids

    # Apply filter to the CSV
    if len(csv_filters) > 0:
        df = su.pd_utils.apply_filters(df, csv_filters)
        print(" [:::] Shape of CSV after filtering: ", df.shape)
    
    return df


if __name__ == "__main__":
    paths = configure_paths_sound_of_water()
    df = load_csv_sound_of_water(paths)
    row = df.iloc[0].to_dict()
    su.log.json_print(row)