Spaces:
Runtime error
Runtime error
File size: 1,501 Bytes
58627fa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import os
import time
import torch
import ujson
from colbert.utils.utils import f7, print_message, timestamp
def load_contexts(first_hop_topk_path):
qid2backgrounds = {}
with open(first_hop_topk_path) as f:
print_message(f"#> Loading backgrounds from {f.name} ..")
last = None
for line in f:
qid, facts = ujson.loads(line)
facts = [(tuple(f) if type(f) is list else f) for f in facts]
qid2backgrounds[qid] = facts
last = (qid, facts)
# assert len(qid2backgrounds) in [0, len(queries)], (len(qid2backgrounds), len(queries))
print_message(f"#> {first_hop_topk_path} has {len(qid2backgrounds)} qids. Last = {last}")
return qid2backgrounds
def load_collectionX(collection_path, dict_in_dict=False):
print_message("#> Loading collection...")
collectionX = {}
with open(collection_path) as f:
for line_idx, line in enumerate(f):
line = ujson.loads(line)
assert type(line['text']) is list
assert line['pid'] == line_idx, (line_idx, line)
passage = [line['title'] + ' | ' + sentence for sentence in line['text']]
if dict_in_dict:
collectionX[line_idx] = {}
for idx, sentence in enumerate(passage):
if dict_in_dict:
collectionX[line_idx][idx] = sentence
else:
collectionX[(line_idx, idx)] = sentence
return collectionX
|