Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,939 Bytes
9c9498f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
import argparse
import glob
from functools import reduce
import os
import pandas as pd
import json
import numpy as np
from PIL import Image
parser = argparse.ArgumentParser()
parser.add_argument("--data_path", type=str, default="./data/videoattentiontarget")
args = parser.parse_args()
# preprocessing adapted from https://github.com/ejcgt/attention-target-detection/blob/master/dataset.py
def merge_dfs(ls):
for i, df in enumerate(ls): # give columns unique names
df.columns = [col if col == "path" else f"{col}_df{i}" for col in df.columns]
merged_df = reduce(
lambda left, right: pd.merge(left, right, on=["path"], how="outer"), ls
)
merged_df = merged_df.sort_values(by=["path"])
merged_df = merged_df.reset_index(drop=True)
return merged_df
def smooth_by_conv(window_size, df, col):
"""Temporal smoothing on labels to match original VideoAttTarget evaluation.
Adapted from https://github.com/ejcgt/attention-target-detection/blob/acd264a3c9e6002b71244dea8c1873e5c5818500/utils/myutils.py"""
values = df[col].values
padded_track = np.concatenate([values[0].repeat(window_size // 2), values, values[-1].repeat(window_size // 2)])
smoothed_signals = np.convolve(
padded_track.squeeze(), np.ones(window_size) / window_size, mode="valid"
)
return smoothed_signals
def smooth_df(window_size, df):
df["xmin"] = smooth_by_conv(window_size, df, "xmin")
df["ymin"] = smooth_by_conv(window_size, df, "ymin")
df["xmax"] = smooth_by_conv(window_size, df, "xmax")
df["ymax"] = smooth_by_conv(window_size, df, "ymax")
return df
def main(PATH):
# preprocess by sequence and person track
splits = ["train", "test"]
for split in splits:
sequences = []
max_num_ppl = 0
seq_idx = 0
for seq_path in glob.glob(
os.path.join(PATH, "annotations", split, "*", "*")
):
seq_img_path = os.path.join("images", *seq_path.split("/")[-2:]
)
sample_image = os.path.join(PATH, seq_img_path, os.listdir(os.path.join(PATH, seq_img_path))[0])
width, height = Image.open(sample_image).size
seq_dict = {"path": seq_img_path, "width": width, "height": height}
frames = []
person_files = glob.glob(os.path.join(seq_path, "*"))
num_ppl = len(person_files)
if num_ppl > max_num_ppl:
max_num_ppl = num_ppl
person_dfs = [
pd.read_csv(
file,
header=None,
index_col=False,
names=["path", "xmin", "ymin", "xmax", "ymax", "gazex", "gazey"],
)
for file in person_files
]
# moving-avg smoothing to match original benchmark's evaluation
window_size = 11
person_dfs = [smooth_df(window_size, df) for df in person_dfs]
merged_df = merge_dfs(person_dfs) # merge annotations per person for same frames
for frame_idx, row in merged_df.iterrows():
frame_dict = {
"path": os.path.join(seq_img_path, row["path"]),
"heads": [],
}
p_idx = 0
for i in range(1, num_ppl * 6 + 1, 6):
if not np.isnan(row.iloc[i]): # if it's nan lack of continuity (one person leaving the frame for a period of time)
xmin, ymin, xmax, ymax, gazex, gazey = row[i: i+6].values.tolist()
# match original benchmark's preprocessing of annotations
if gazex >=0 and gazey < 0:
gazey = 0
elif gazey >=0 and gazex < 0:
gazex = 0
inout = int(gazex >= 0 and gazey >= 0)
frame_dict["heads"].append({
"bbox": [xmin, ymin, xmax, ymax],
"bbox_norm": [xmin / float(width), ymin / float(height), xmax / float(width), ymax / float(height)],
"gazex": [gazex],
"gazex_norm": [gazex / float(width)],
"gazey": [gazey],
"gazey_norm": [gazey / float(height)],
"inout": inout
})
p_idx = p_idx + 1
frames.append(frame_dict)
seq_dict["frames"] = frames
sequences.append(seq_dict)
seq_idx += 1
print("{} max people per image {}".format(split, max_num_ppl))
print("{} num unique video sequences {}".format(split, len(sequences)))
out_file = open(os.path.join(PATH, "{}_preprocessed.json".format(split)), "w")
json.dump(sequences, out_file)
if __name__ == "__main__":
main(args.data_path) |