Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Import cross sections from csv #41

Draft
wants to merge 28 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
0a0270c
add test for process_pdos
Jun 23, 2021
d5c8552
fix the imports
Yaxuan-Lii Jun 29, 2021
553738f
removeed irrelevant files
Yaxuan-Lii Jul 1, 2021
5300d62
import is fixed
Yaxuan-Lii Jul 1, 2021
dab091b
try to remove irrelevant fils second time
Yaxuan-Lii Jul 1, 2021
d69d61b
remove irrelevant files second time
Yaxuan-Lii Jul 1, 2021
7984c15
delet irrelevant files
Yaxuan-Lii Jul 1, 2021
66b1c6f
remove irrelevant files
Yaxuan-Lii Jul 1, 2021
4fdafa7
delete .DS_Store
Yaxuan-Lii Jul 1, 2021
dca2d49
Modified test_process_pdos.py
Yaxuan-Lii Jul 2, 2021
eb079f6
add test.py
Yaxuan-Lii Jul 2, 2021
8190183
Restore some files that were accidentally deleted
ajjackson Jul 2, 2021
946efaa
use flake8 to optimise format
Yaxuan-Lii Jul 5, 2021
cd0f3c0
import cross-sections from CSV archives
Yaxuan-Lii Jul 15, 2021
8f48e5e
form modify
Yaxuan-Lii Jul 17, 2021
a959150
modify follow the comments
Yaxuan-Lii Jul 21, 2021
067c289
make corrections
Yaxuan-Lii Jul 21, 2021
a621ab0
make correction
Yaxuan-Lii Jul 21, 2021
13911d0
Revert changes to galore/__init__.py
ajjackson Jul 21, 2021
4fb993d
modified as comments
Yaxuan-Lii Aug 2, 2021
cd4dc0e
merge the change of galore/__init__.py
Yaxuan-Lii Aug 2, 2021
5c9c31c
add cli to install data and get cross sections
Yaxuan-Lii Aug 9, 2021
b138c4b
merge get_cross_sections_from_csv into get_cross_sections
Yaxuan-Lii Aug 10, 2021
bb8a526
modify and add new test
Yaxuan-Lii Aug 24, 2021
5cdb67b
add a IF statement
Yaxuan-Lii Aug 31, 2021
39651d5
modify as comments
Yaxuan-Lii Sep 9, 2021
a2441b5
modify as comments
Yaxuan-Lii Sep 9, 2021
f59b50a
mistakes fix
Yaxuan-Lii Sep 12, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions galore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,13 @@
from collections.abc import Sequence
import logging

import galore.formats
from math import sqrt, log
import numpy as np
from scipy.interpolate import interp1d

import galore.formats
from galore.cross_sections import cross_sections_info

from galore.cross_sections import get_cross_sections, cross_sections_info


def auto_limits(data_1d, padding=0.05):
Expand Down
144 changes: 144 additions & 0 deletions galore/cross_sections.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,3 +281,147 @@ def _eval_fit(energy, coeffs):
orb: _eval_fit(energy, np_fromstr(coeffs))
for orb, coeffs in orbitals_fits}})
return el_cross_sections



import tarfile
import numpy as np
def read_csv_file(tar_file_name,file_path):
'''read csv file
Input: the file name
Output: main matrix of each file'''

###Open zipfile
with tarfile.open(tar_file_name) as tf:
with tf.extractfile(file_path) as hello:
data = hello.read().decode()
a = data.split('\r\n')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use descriptive names for variables. It is hard to read a line of code operating on a, b, c, and d and understand what it is supposed to be doing.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new name data_string is a bit better because it is at least "greppable". But it's also a bit misleading because data_string is not actually a string, it's a list. Maybe something like data_lines would be better, as this conveys how it was split?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

data_strings would at least be better. If you read data_string[0] it looks like it indexes a single letter from a string. Whereas data_strings[0] clearly gets a longer string, which can be split.


###get number of elements of each raw
a0 = a[0].split(',')
new_a0 = [i for i in a0 if i !='']
lenth = len(new_a0)

###build main matrix
result = []
for i in range(len(a)):
c = a[i].split(',')[0:lenth]
result.append(c)

###delet needless elements
d = result[-2]
result1 = [i for i in result if i!=d]
new_result = [i for i in result if i!=d][0:-2]

###build dict
dic={}
dic['headers'] = new_result[0]
dic['electron_counts'] = [i for i in result1[-2] if i !=''][1:]
dic['data_table'] = new_result[1:]


return dic



def _cross_sections_from_csv_data(energy,data,reference):


## replace '' with nan
for i in range(len(data['data_table'])):
data['data_table'][i] = [float('NaN') if x == '' else x for x in data['data_table'][i]]

## change the main matrix to float array
data['data_table'] = np.array(data['data_table']).astype(float)
data['electron_counts'] = np.array(data['electron_counts']).astype(float)

## build a new dict which keys are like '1s1/2', '2s1/2', '2p1/2', '2p3/2', '3s1/2', '3p1/2', '3p3/2'...
new_lenth = len(data['electron_counts'])
new_value=np.concatenate((data['data_table'].T[-new_lenth:].T,[data['electron_counts']]),axis=0).T
new_dic = {}
for i in range(new_lenth):
new_key = data['headers'][-new_lenth:][i]
new_dic[new_key]=new_value[i]

## add electron numbers of each orbitals
energy_array = np.array(data['data_table']).T[0]
new_dic['PhotonEnergy'] = energy_array

## match the import energy
index = np.where(new_dic['PhotonEnergy']==energy)[0][0]

## build result dict
res_dict = {}

## result for s orbital
c_s = np.array([new_dic[key] for key in new_dic if 's' in key]).T[index]
n_electrons = np.array([new_dic[key] for key in new_dic if 's' in key]).T[-1]
unit_c_s = np.true_divide(c_s,n_electrons)


value_s = np.max(np.nan_to_num(unit_c_s))

res_dict['s'] = value_s

## result for 'p', 'd', 'f' orbitals
orbitals = ['p', 'd', 'f']


for i in orbitals:
main_matrix = np.array([new_dic[key] for key in new_dic if i in key])
if np.shape(main_matrix) != (0,):
if reference == 'Scofield':
c_s = main_matrix.T[index]

n_electrons = main_matrix.T[-1]
unit_c_s = np.true_divide(c_s,n_electrons)
unit_c_s = np.array([unit_c_s[i:i+2] for i in range(0, len(unit_c_s), 2)])
percent =np.array([np.true_divide(c_s[i:i+2],c_s[i:i+2].sum()) for i in range(0, len(c_s), 2)])
result = np.array(list(map(sum,unit_c_s*percent)))

value = np.max(np.nan_to_num(result))
res_dict[i] = value
else:
c_s = main_matrix.T[index]
n_electrons = main_matrix.T[-1]
unit_c_s = np.true_divide(c_s,n_electrons)
value = np.max(np.nan_to_num(unit_c_s))
res_dict[i] = value

return res_dict


def get_metadata(energy,reference):
dict = {}
dict['energy'] = energy
if reference == 'Scofield':
dict['reference'] = 'J.H. Scofield, Theoretical photoionization cross sections from 1 to 1500 keV'
dict['link'] = 'https://doi.org/10.2172/4545040'
else:
dict['reference'] = 'Yeh, J.J. and Lindau, I. (1985) Atomic Data and Nuclear Data Tables 32 pp 1-155'
dict['link'] = 'https://doi.org/10.1016/0092-640X(85)90016-6'
return dict


def get_cross_section_from_csv(elements,energy,reference):
result = {}
metadata = get_metadata(energy,reference)
result.update(metadata)


for element in elements:

if reference == 'Scofield':
filename = 'Scofield_csv_database.tar.gz'
filepath = 'Scofield_csv_database/Z_{element1}.csv'
else:
filename ='Yeh_Lindau_1985_Xsection_CSV_Database.tar.gz'
filepath = 'Yeh_Lindau_1985_Xsection_CSV_Database/{element1}.csv'

filepath = filepath.format(element1 = element)
data = read_csv_file(filename,filepath)

cross_sections = _cross_sections_from_csv_data(energy,data,reference)
result[element] = cross_sections

return result
42 changes: 42 additions & 0 deletions test/test_process_pdos.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import numpy as np
import galore
import unittest
from numpy.testing import assert_array_almost_equal


class test_process_pdos(unittest.TestCase):
def test_lorentzian(self):
self.assertAlmostEqual(galore.lorentzian(3., f0=1, fwhm=(3 * 2.35482)),
0.068238617255)

def test_broaden(self):
broadened_data = galore.xy_to_1d(np.array([[1., 0.5], [3., 1.5]]),
range(6),
spikes=False)
lorentzian = 0.2

assert_array_almost_equal(
galore.broaden(broadened_data,
d=2,
dist='lorentzian',
width=lorentzian),
np.array([
0.00595715, 1.60246962, 3.19897467, 4.7825862, 0.01190685, 0.
]))

def test_process_pdos(self):
vasprun = 'test/SnO2/vasprun.xml.gz'
xmin, xmax = (-10, 4)
weighting = 'Alka'

plotting_data = galore.process_pdos(input=[vasprun],
gaussian=0.3,
lorentzian=0.2,
xmin=xmin,
xmax=xmax,
weighting=weighting)
self.assertEqual(plotting_data['O']['energy'][0], -10.0)


if __name__ == '__main__':
unittest.main()