Spaces:
Running
on
Zero
Running
on
Zero
""" | |
Parts of the code is based on source code of memit | |
MIT License | |
Copyright (c) 2022 Kevin Meng | |
Permission is hereby granted, free of charge, to any person obtaining a copy | |
of this software and associated documentation files (the "Software"), to deal | |
in the Software without restriction, including without limitation the rights | |
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
copies of the Software, and to permit persons to whom the Software is | |
furnished to do so, subject to the following conditions: | |
The above copyright notice and this permission notice shall be included in all | |
copies or substantial portions of the Software. | |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
SOFTWARE. | |
""" | |
import collections | |
import json | |
from pathlib import Path | |
import torch | |
REMOTE_ROOT_URL = "https://rome.baulab.info" | |
REMOTE_URL = f"{REMOTE_ROOT_URL}/data/dsets/attribute_snippets.json" | |
class AttributeSnippets: | |
""" | |
Contains wikipedia snippets discussing entities that have some property. | |
More formally, given a tuple t = (s, r, o): | |
- Let snips = AttributeSnippets(DATA_DIR) | |
- snips[r][o] is a list of wikipedia articles for all s' such that t' = (s', r, o) is valid. | |
""" | |
def __init__(self, data_dir: str): | |
data_dir = Path(data_dir) | |
snips_loc = data_dir / "attribute_snippets.json" | |
if not snips_loc.exists(): | |
print(f"{snips_loc} does not exist. Downloading from {REMOTE_URL}") | |
data_dir.mkdir(exist_ok=True, parents=True) | |
torch.hub.download_url_to_file(REMOTE_URL, snips_loc) | |
with open(snips_loc, "r") as f: | |
snippets_list = json.load(f) | |
snips = collections.defaultdict(lambda: collections.defaultdict(list)) | |
for el in snippets_list: | |
rid, tid = el["relation_id"], el["target_id"] | |
for sample in el["samples"]: | |
snips[rid][tid].append(sample) | |
self._data = snips | |
self.snippets_list = snippets_list | |
def __getitem__(self, item): | |
return self._data[item] | |