-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcreate_reaction_table.psql
77 lines (59 loc) · 1.82 KB
/
create_reaction_table.psql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
-- create data set for deep learning
set search_path to reaxys;
drop table if exists reac_var cascade;
-- first pass at table for deep learning
select
reaction_id,
variation_id,
reaxys_reaction_id,
rxnsmiles,
reactant_id,
product_id,
rxno_reaction_type,
products,
reagents,
catalysts,
solvents,
array_length(stages,1) as nstages,
year,
time,
temperature
into reac_var
from
reaction, citation, rdfile, variation
left join conditions on -- left join because not all reactions have conditions
condition_id = any(variation.conditions)
where
variation_id = any(reaction.variations)
and rdfile.rx_id = reaxys_reaction_id
and citation_id = variation.cit_id[1]; -- first citation is most recent
update reac_var set products = product_id where products is null;
-- expand to one reaction per product
select
reaction_id,
variation_id,
reaxys_reaction_id,
rxnsmiles,
reactant_id,
rxno_reaction_type,
unnest(products) as product,
reagents,
catalysts,
solvents,
nstages,
year,
time,
temperature
into reac_var2
from reac_var;
drop table reac_var;
-- add the yield for each product that has one.
alter table reac_var2 add column yield float;
update reac_var2 set yield = substance.yield from substance where reac_var2.product = substance.substance_id;
-- set smiles for reactions w/o yeild
alter table reac_var2 add column product_smiles text;
update reac_var2 set product_smiles = smiles from molecule where molecule_id = product and smiles != '' and smiles is not null;
-- set smiles for products with yeilds
select distinct substance_id, smiles into temporary table rxn_smiles from molecule, substance where molecule_id = reaxys_id and smiles != '' and smiles is not null;
update reac_var2 set product_smiles = smiles from rxn_smiles where product = substance_id and product_smiles is null;
delete from reac_var2 where product_smiles is null;