-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcsv_writer.py
192 lines (175 loc) · 5.52 KB
/
csv_writer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
from __future__ import division
class CSVWriter(object):
def __init__(self, handle):
self.handle = handle
def write(self, filename):
import csv
cur = self.handle.cursor()
# These are the current column headings in the PDB stat and Xia2 stat tables.
stat_name_list = [
'anomalouscorrelation',
'IoverSigma',
'completeness',
'diffI',
'lowresolutionlimit',
'RpimI',
'multiplicity',
'RmeasIpim',
'anomalousslope',
'diffF',
'wilsonbfactor',
'RmeasI',
'highresolutionlimit',
'RpimIpim',
'anomalousmultiplicity',
'RmergeI',
'totalobservations',
'anomalouscompleteness',
'cchalf',
'totalunique']
pdb_stat_name_list = [
'Program',
'Resolution_Range_High',
'Resolution_Range_Low',
'Completeness',
'Number_of_Reflections',
'R_Value',
'R_free',
'Num_Atoms',
'Wilson_B',
'Experiment_Type'
'Date_of_Collection',
'Synchrotron_(Y/N)',
'Radiation_Source',
'Beamline',
'Wavlength_or_Range',
'Detector_Type',
'Detector_Manufacturer',
'Intensity_Integration_Software',
'Data_Scaling_Software',
'Data_Redundancy',
'R_Merge',
'R_Sym',
'I/SIGMA',
'Phasing_method',
'Solvent_Content',
'Matthews_Coefficient']
db = [['PDB_id',['pdb_id','data_type']],
['PDB_id_Stats', pdb_stat_name_list],
['High_Res_Stats', stat_name_list],
['Low_Res_Stats', stat_name_list],
['Overall_Stats', stat_name_list],
['SWEEPS',['wavelength', 'sweep_number']],
['Dev_Stats_PDB',['date_time','execution_number']],
['Dev_Stats_json', ['date_time','execution_number', 'dials_version']]]
print 'Enter the tables to extract data from. Type "done" when finished.'
print 'Available Tables...'
available_tables = []
for item in db:
print item[0]
available_tables.append(item[0])
table_count = 1
to_extract = [] # This is the list that will be populated with columns to extract.
while True:
table_name = raw_input('Table %s: '% table_count)
if table_name == 'done':
if table_count == 1:
print 'Select at least one table.'
continue
else:
break
if table_name not in available_tables:
print 'Invalid name'
continue
else:
to_extract.append([table_name,])
table_count += 1
print 'Enter the columns to extract data from. Type "done" when finished.'
print 'Type "all" for all columns in that table.'
count = 0
for name in to_extract:
for item in db:
table_name = name[0]
column_list = item[1]
if table_name == item[0]:
print 'Available columns for %s are...' % table_name
for column in column_list:
print column
column_count = 1
while True:
column_name = raw_input('Column %s: '% column_count)
if column_name == 'done':
if column_count == 1:
print 'Select at least one table.'
continue
else:
break
if column_name == 'all':
to_extract[count].append(column_list)
break
if column_name not in column_list:
print 'Invalid name'
continue
else:
try:
to_extract[count][1].append(column_name)
column_count += 1
except:
to_extract[count].append([])
to_extract[count][1].append(column_name)
column_count += 1
count += 1
print 'Selected items are...'
for item in to_extract:
string = item[0] + ':'
for column in item[1]:
string += ' ' + column +','
string = string[:-1]
print string
while True:
answer = raw_input('Is this correct? (y/n)')
if answer.lower() == 'n':
exit(0)
elif answer.lower() != 'y':
print 'Please enter again (y/n)'
continue
break
# The following builds the SQL command in order to select the data needed for the csv.
# It firstly builds a string in the right format of the column names and then joins
# all the tables together. There have been some problems with this execution, whereby
# the database stalls, but in its current instance it works as expected.
sql_command = str()
for item in to_extract:
for column in item[1]:
sql_command += item[0] + '.' + column + ', '
sql_command = sql_command[:-2]
sql_command = 'SELECT ' + sql_command + ' FROM'
sql_command = sql_command + '''
PDB_id
JOIN PDB_id_Stats
JOIN SWEEPS
JOIN High_Res_Stats
JOIN Phasing
JOIN Dev_Stats_PDB
JOIN Dev_Stats_json
JOIN Low_Res_Stats
JOIN Overall_Stats
ON PDB_id_Stats.pdb_id_id=PDB_id.id
and SWEEPS.pdb_id_id = PDB_id.id
and Phasing.pdb_id_id=PDB_id.id
and Dev_Stats_PDB.pdb_id_id=PDB_id.id
and Dev_Stats_json.sweep_id=SWEEPS.id
and High_Res_Stats.sweep_id = SWEEPS.id
and Low_Res_Stats.sweep_id=SWEEPS.id
and Overall_Stats.sweep_id=SWEEPS.id'''
print sql_command
column_headings = []
for item in to_extract:
for column in item[1]:
column_headings.append(column)
data = cur.execute(sql_command)
print data
with open('output.csv', 'w') as f:
writer = csv.writer(f)
writer.writerow(column_headings)
writer.writerows(data)