from dataclasses import dataclass from typing import Union, List import torch @dataclass class DataSample: id_: int query: str positive: str negative: str = None task_name: str = None class TrainSample: """ Structure for one input example with texts, the label and a unique id """ def __init__( self, guid: str = "", texts: List[str] = None, label: Union[int, float] = 0 ): """ Creates one TrainSample with the given texts, guid and label :param guid id for the example :param texts the texts for the example. :param label the label for the example """ self.guid = guid self.texts = texts self.label = label def __str__(self): return " label: {}, texts: {}".format( str(self.label), "; ".join(self.texts) ) class Dataset(torch.utils.data.Dataset): def load_data(self, file_path: str = None): raise NotImplementedError() def __getitem__(self, index): raise NotImplementedError() def __len__(self): raise NotImplementedError()