-
Notifications
You must be signed in to change notification settings - Fork 1
/
prot-partof-dom-fam.py
54 lines (48 loc) · 1.54 KB
/
prot-partof-dom-fam.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import os, json, argparse, sys, datetime, time
import pronto, six
"""
bzcat latest-all.json.bz2 |wikibase-dump-filter --simplify --claim 'P698&P921' |jq '[.id,.claims.P698,.claims.P921]' -c >PMID.ndjson
"""
# Initiate the parser
parser = argparse.ArgumentParser()
parser.add_argument("-q", "--query", help="perform SPARQL query",
action="store_true")
# Read arguments from the command line
args = parser.parse_args()
# Check for --version or -V
dontquery = not args.query
script = os.path.basename(sys.argv[0])[:-3]
if dontquery is False:
print('performing query...')
ret = os.popen('wd sparql {}.rq >{}.json'.format(script, script))
if ret.close() is not None:
raise
file = open('{}.json'.format(script))
s = file.read()
jol = json.loads(s)
print('reading {} part-of statements'.format(len(jol)))
partofs = set()
for d in jol:
item = d.get('item')
fam = d.get('fam')
partofs.add((item, fam))
if dontquery is False:
print('performing query...')
ret = os.popen('wd sparql {}.rq1 >{}1.json'.format(script, script))
if ret.close() is not None:
raise
file = open('{}1.json'.format(script))
s = file.read()
jol = json.loads(s)
print('considering {} has-part statements'.format(len(jol)))
for d in jol:
item = d.get('item')
fam = d.get('fam')
if (item, fam) not in partofs:
j = {"id": item,
"claims": {
"P361": { "value": fam,
"references": { "P887": "Q96775080" } },
}
}
print(json.dumps(j), flush=True)