To run on HPC

arm61 · Jan 11, 2024 · 2db0c20 · 2db0c20
1 parent 9d2557e
commit 2db0c20
Show file tree

Hide file tree

Showing 10 changed files with 199 additions and 199 deletions.
diff --git a/Snakefile b/Snakefile
@@ -390,7 +390,7 @@ rule rw_4096_true_cov_true_128:
         f'src/code/random_walks/truecov_rw.py'
 
 for start_diff in [0, 2, 4, 6, 8, 10, 15, 20]:
-    for n in range(0, 16, 1):
+    for n in range(0, 6, 1):
         rule:
             name:
                 f"llzo_many_{n}_{start_diff}"
@@ -415,7 +415,7 @@ for start_diff in [0, 2, 4, 6, 8, 10, 15, 20]:
             f"true_D_llzo_{start_diff}"
         input:
             'src/code/llzo/true_ls.py',
-            [f'src/data/llzo/diffusion_{n}_{start_diff}.npz' for n in range(0, 16, 1)]
+            [f'src/data/llzo/diffusion_{n}_{start_diff}.npz' for n in range(0, 6, 1)]
         output:
             f'src/data/llzo/true_{start_diff}.npz'
         conda:
@@ -432,7 +432,7 @@ for start_diff in [0, 2, 4, 6, 8, 10, 15, 20]:
             f"glswlsols_D_llzo_{start_diff}"
         input:
             'src/code/llzo/glswlsols.py',
-            [f'src/data/llzo/diffusion_{n}_{start_diff}.npz' for n in range(0, 16, 1)]
+            [f'src/data/llzo/diffusion_{n}_{start_diff}.npz' for n in range(0, 6, 1)]
         output:
             f'src/data/llzo/glswlsols_{start_diff}.npz'
         conda:

diff --git a/environment.yml b/environment.yml
@@ -7,8 +7,8 @@ dependencies:
   - python=3.10
   - scipy=1.9.3
   - scikit-learn=1.1.3
-  - mdanalysis=2.3.0
+  - mdanalysis=2.6.1
   - pip:
-      - git+https://github.com/matplotlib/matplotlib.git
+      - matplotlib==3.8.2
       - pyblock==0.6
-      - git+https://github.com/bjmorgan/kinisi.git
+      - git+https://github.com/bjmorgan/kinisi.git@with-f
diff --git a/showyourwork.yml b/showyourwork.yml
@@ -11,25 +11,25 @@ margin_icons:
         sandbox: "0.25,0.25,0.25"
     horizontal_offset: -4
 
-datasets:
-    10.5281/zenodo.7602139:
-        contents:
-            traj0.xyz: src/data/llzo/traj0.xyz
-            traj1.xyz: src/data/llzo/traj1.xyz
-            traj2.xyz: src/data/llzo/traj2.xyz
-            traj3.xyz: src/data/llzo/traj3.xyz
-            traj4.xyz: src/data/llzo/traj4.xyz
-            traj5.xyz: src/data/llzo/traj5.xyz
-            traj6.xyz: src/data/llzo/traj6.xyz
-            traj7.xyz: src/data/llzo/traj7.xyz
-            traj8.xyz: src/data/llzo/traj8.xyz
-            traj9.xyz: src/data/llzo/traj9.xyz
-            traj10.xyz: src/data/llzo/traj10.xyz
-            traj11.xyz: src/data/llzo/traj11.xyz
-            traj12.xyz: src/data/llzo/traj12.xyz
-            traj13.xyz: src/data/llzo/traj13.xyz
-            traj14.xyz: src/data/llzo/traj14.xyz
-            traj15.xyz: src/data/llzo/traj15.xyz
+# datasets:
+#     10.5281/zenodo.7602139:
+#         contents:
+#             traj0.xyz: src/data/llzo/traj0.xyz
+#             traj1.xyz: src/data/llzo/traj1.xyz
+#             traj2.xyz: src/data/llzo/traj2.xyz
+#             traj3.xyz: src/data/llzo/traj3.xyz
+#             traj4.xyz: src/data/llzo/traj4.xyz
+#             traj5.xyz: src/data/llzo/traj5.xyz
+#             traj6.xyz: src/data/llzo/traj6.xyz
+#             traj7.xyz: src/data/llzo/traj7.xyz
+#             traj8.xyz: src/data/llzo/traj8.xyz
+#             traj9.xyz: src/data/llzo/traj9.xyz
+#             traj10.xyz: src/data/llzo/traj10.xyz
+#             traj11.xyz: src/data/llzo/traj11.xyz
+#             traj12.xyz: src/data/llzo/traj12.xyz
+#             traj13.xyz: src/data/llzo/traj13.xyz
+#             traj14.xyz: src/data/llzo/traj14.xyz
+#             traj15.xyz: src/data/llzo/traj15.xyz
 
 dependencies:
     src/ms.tex:
@@ -45,7 +45,7 @@ dependencies:
         - src/data/random_walks/numerical/D_1_128_128.npz
     src/scripts/diffusion.py:
         - src/data/llzo/true_10.npz
-        {% for n in range(0, 16, 1) %}
+        {% for n in range(0, 6, 1) %}
         - src/data/llzo/diffusion_{{n}}_10.npz
         {% endfor %}
         - src/scripts/utils/_fig_params.py

diff --git a/src/code/llzo/glswlsols.py b/src/code/llzo/glswlsols.py
@@ -15,9 +15,12 @@
 start_diff = snakemake.params['start_diff']
 
 timestep = np.load(f'src/data/llzo/diffusion_0_{start_diff}.npz')['dt'][0, 0]
-no = np.load(f'src/data/llzo/diffusion_0_{start_diff}.npz')['no'][0, 0]
-d = np.zeros((16, 8, 1, timestep.size))
-for i in range(0, 16, 1):
+length = 2000
+ll = len([i + length for i in range(0, 20001 - length, length)])
+# length = 500
+# ll = len([i + length for i in range(0, 2000, length)])
+d = np.zeros((6, 8, ll, timestep.size))
+for i in range(0, 6, 1):
     d[i] = np.load(f'src/data/llzo/diffusion_{i}_{start_diff}.npz')['msd_true']
 
 max_ngp = np.argwhere(timestep > start_diff)[0][0]
@@ -31,16 +34,16 @@
 V = np.diag(true_cov.diagonal())
 
 g1 = np.matmul(np.linalg.inv(np.matmul(A.T, np.matmul(np.linalg.pinv(W), A))),
-               np.matmul(A.T, np.matmul(np.linalg.pinv(W), y)))[0] / 6
-c1 = np.sqrt(np.linalg.inv(np.matmul(A.T, np.matmul(np.linalg.pinv(W), A)))[0][0]) / 6
+               np.matmul(A.T, np.matmul(np.linalg.pinv(W), y)))[0] / 6e4
+c1 = np.sqrt(np.linalg.inv(np.matmul(A.T, np.matmul(np.linalg.pinv(W), A)))[0][0]) / 6e4
 g2 = np.matmul(np.linalg.inv(np.matmul(A.T, np.matmul(np.linalg.pinv(V), A))),
-               np.matmul(A.T, np.matmul(np.linalg.pinv(V), y)))[0] / 6
-c2 = np.sqrt(np.linalg.inv(np.matmul(A.T, np.matmul(np.linalg.pinv(V), A)))[0][0]) / 6
-g3 = np.matmul(np.linalg.inv(np.matmul(A.T, A)), np.matmul(A.T, y))[0] / 6
+               np.matmul(A.T, np.matmul(np.linalg.pinv(V), y)))[0] / 6e4
+c2 = np.sqrt(np.linalg.inv(np.matmul(A.T, np.matmul(np.linalg.pinv(V), A)))[0][0]) / 6e4
+g3 = np.matmul(np.linalg.inv(np.matmul(A.T, A)), np.matmul(A.T, y))[0] / 6e4
 
 c3 = []
 for i in true_msd:
-    c3.append(linregress(timestep, i).stderr / 6)
+    c3.append(linregress(timestep, i).stderr / 6e4)
 
 np.savez(f'src/data/llzo/glswlsols_{start_diff}.npz',
          gls_pop=g1,

diff --git a/src/code/llzo/many_runs.py b/src/code/llzo/many_runs.py
@@ -2,97 +2,84 @@
 import numpy as np
 from kinisi.parser import MDAnalysisParser
 from kinisi.diffusion import MSDBootstrap
-from tqdm import tqdm
 
 n = snakemake.params['n']
 start_diff = snakemake.params['start_diff']
 
-def universe_slice(u: mda.Universe, data_file: str, slice_start: int, slice_end: int) -> mda.Universe:
+step = 8
+
+def universe_slice(data: str, slice_start: int, slice_end: int, m: int) -> mda.Universe:
     """
-    Concatenate a list of Universes.
+    Create slice universe.
 
-    :param u_list: List of Universe objects
-    :param data_file: File path for data
+    :param data: Numpy data of coordinates
     :param slice_start: Step to start the slice from
+    :param slice_end: Step to end the slice from
     :return: A single flattened Universe
     """
-    j = 0
-    trajectory = np.zeros((len(u.trajectory[slice_start:slice_end]), len(u.atoms), 3))
-    for t in tqdm(u.trajectory[slice_start:slice_end]):
-        trajectory[j] = t.positions
-        j += 1
-    shorter_universe = mda.Universe(data_file, trajectory)
-    for t in tqdm(shorter_universe.trajectory):
-        for i in shorter_universe.atoms:
-            if i.type == 'LI':
-                i.type = 'Li'
-            if i.type == 'Li1':
-                i.type = 'Li'
-            if i.type == 'Li2':
-                i.type = 'Li'
-            if i.type == 'L':
-                i.type = 'La'
-            if i.type == 'Z':
-                i.type = 'Zr'
-        t.dimensions = np.array([26.175106662244392197, 26.175106662244392197, 26.175106662244392197, 90, 90, 90])
-    return shorter_universe
-
-file = f'src/data/llzo/traj{n}.xyz'
+    data_reshape = np.copy(data).reshape(-1, 1536, 3)
+    data_subset = data_reshape[slice_start:slice_end, m::step]
+    data_subset *= 0.52917721067
+    u = mda.Universe.empty(data_subset.shape[1], 1, n_frames=data_subset.shape[0], trajectory=True)
+    list_type = ['A'] * (1536-448) + ['Li'] * 448
+    u.add_TopologyAttr('type', list_type[m::step])
+    for i, t in enumerate(u.trajectory):
+        t.positions = data_subset[i]
+        t.dimensions = np.array([26.30938114, 26.30938114, 26.30938114, 90, 90, 90])
+    return u
 
-
-u = mda.Universe(file, file)
-uu = universe_slice(u, file, 0, 2000)
+data = np.loadtxt(f'src/data/llzo/traj{n}.out', usecols=[0, 1, 2])
+length = 2000
+uu = universe_slice(data, 0, length, 0)
 step_skip = 100  # sampling rate
 timestep = 5.079648e-4  # ps
 da_params = {'specie': 'Li', 'time_step': timestep, 'step_skip': step_skip}
 p = MDAnalysisParser(uu, **da_params)
 
-ngp = np.zeros((8, 1, p.delta_t.size))
-no = np.zeros((8, 1, p.delta_t.size))
-dt = np.zeros((8, 1, p.delta_t.size))
-msd = np.zeros((8, 1, p.delta_t.size))
-msd_true = np.zeros((8, 1, p.delta_t.size))
-msd_std = np.zeros((8, 1, p.delta_t.size))
-cov = np.zeros((8, 1, p.delta_t[np.where(p.delta_t > start_diff)].size, p.delta_t[np.where(p.delta_t > start_diff)].size))
-d = np.zeros((8, 1, 3200))
-intercept = np.zeros((8, 1, 3200))
+ll = len([i + length for i in range(0, 20001 - length, length)])
+dt = np.zeros((step, ll, p.delta_t.size))
+msd = np.zeros((step, ll, p.delta_t.size))
+msd_true = np.zeros((step, ll, p.delta_t.size))
+msd_std = np.zeros((step, ll, p.delta_t.size))
+n_o = np.zeros((step, ll, p.delta_t.size))
+cov = np.zeros((step, ll, p.delta_t[np.where(p.delta_t > start_diff)].size, p.delta_t[np.where(p.delta_t > start_diff)].size))
+d = np.zeros((step, ll, 3200))
+g = np.zeros((step, ll, 3200))
+f = np.zeros((step, ll, p.delta_t.size))
+intercept = np.zeros((step, ll, 3200))
 
-for m in range(0, 8, 1):
-    for i, slice in enumerate(range(0, 2000, 2000)):
-        uu = universe_slice(u, file, slice, slice+2000)
+for m in range(0, step, 1):
+    for i, slice in enumerate(range(0, 20001 - length, length)):
+        uu = universe_slice(data, slice, slice+length, m)
 
         rng = np.random.RandomState(42)
         np.random.seed(42)
 
         da_params = {'specie': 'Li', 'time_step': timestep, 'step_skip': step_skip}
         p = MDAnalysisParser(uu, **da_params)
-        n_disp_3d = []
-        for t, j in enumerate(p.disp_3d):
-            if m == 0:
-                n_disp_3d.append(j[::8])
-                msd_true[m, i, t] = np.sum(j[::8, ::p.timesteps[t]]**2, axis=-1).mean()
-            else:
-                n_disp_3d.append(j[m::8])
-                msd_true[m, i, t] = np.sum(j[m::8, ::p.timesteps[t]]**2, axis=-1).mean()
-
-        b = MSDBootstrap(p.delta_t, n_disp_3d, p._n_o / 8)
+        b = MSDBootstrap(p.delta_t, p.disp_3d, p._n_o)
         b.diffusion(start_diff, random_state=rng)
-        ngp[m, i] = b.ngp
-        no[m, i] = p._n_o / 8
+        # print(b._popt)
         dt[m, i] = b.dt
         msd[m, i] = b.n
+        msd_true[m, i] = b.n
         msd_std[m, i] = b.s
+        n_o[m, i] = p._n_o
         cov[m, i] = b.covariance_matrix
         d[m, i] = b.D.samples
+        g[m, i] = b.gradient.samples
+        f[m, i] = b._f
+        print(b._f)
         intercept[m, i] = b.intercept.samples
 
 np.savez(f'src/data/llzo/diffusion_{n}_{start_diff}.npz',
-        ngp=ngp,
-        no=no,
         dt=dt,
         msd_true=msd_true,
         msd=msd,
         msd_std=msd_std,
+        n_o = n_o,
         cov=cov,
         d=d,
+        g=g,
+        f=f,
         intercept=intercept)
diff --git a/src/code/llzo/true_ls.py b/src/code/llzo/true_ls.py
@@ -14,9 +14,12 @@
 start_diff = snakemake.params['start_diff']
 
 timestep = np.load(f'src/data/llzo/diffusion_0_{start_diff}.npz')['dt'][0, 0]
-no = np.load(f'src/data/llzo/diffusion_0_{start_diff}.npz')['no'][0, 0]
-d = np.zeros((16, 8, 1, timestep.size))
-for i in range(0, 16, 1):
+length = 2000
+ll = len([i + length for i in range(0, 20001 - length, length)])
+# length = 500 
+# ll = len([i + length for i in range(0, 2000, length)])
+d = np.zeros((6, 8, ll, timestep.size))
+for i in range(0, 6, 1):
     d[i] = np.load(f'src/data/llzo/diffusion_{i}_{start_diff}.npz')['msd_true']
 
 true_mean = d.reshape(-1, d.shape[-1]).mean(0)

diff --git a/src/scripts/diffusion.py b/src/scripts/diffusion.py
@@ -13,8 +13,12 @@
 type = 'kinisi'
 
 dllzo_true = np.load(paths.data / "llzo/true_10.npz")['diff_c']
-d = np.zeros((16, 8, 1, 3200))
-for i in range(0, 16, 1):
+length = 2000
+ll = len([i + length for i in range(0, 20001 - length, length)])
+# length = 500
+# ll = len([i + length for i in range(0, 2000, length)])
+d = np.zeros((6, 8, ll, 3200))
+for i in range(0, 6, 1):
     d[i] = np.load(paths.data / f'llzo/diffusion_{i}_10.npz')['d']
 d = d.reshape(-1, 3200)
 llzo_x = np.linspace(dllzo_true.min(), dllzo_true.max(), 1000)
@@ -26,6 +30,8 @@
 axes = []
 titles = []
 
+print(d.shape)
+
 axes.append(fig.add_subplot(gs[0, 0]))
 y, x = np.histogram(d.mean(-1), bins=int(fp.NBINS * 0.5), density=True)
 axes[-1].stairs(y, x, fill=True, color='#ECBCA7', label='$p(\hat{D}^*)$')

diff --git a/src/scripts/random_walk.py b/src/scripts/random_walk.py
@@ -75,11 +75,11 @@
               norm.pdf(rw_x, dinfty_true.mean(), dinfty_true.std()),
               color='#B8B8B8',
               label='$p(\hat{D}^*_{\mathrm{num}})$')
-axes[-1].plot([true.mean(-1).mean() - true.mean(-1).std(),
-               true.mean(-1).mean() + true.mean(-1).std()],
+axes[-1].plot([dinfty_true.mean() - dinfty_true.std(),
+               dinfty_true.mean() + dinfty_true.std()],
               [axes[-1].get_ylim()[1] * 1.1, axes[-1].get_ylim()[1] * 1.1],
               color='#F3ADBC',
-              label='$\sigma[\hat{D}^*]$',
+              label='$\sigma[\hat{D}^*_{\mathrm{num}}]$',
               marker='|')
 axes[-1].set_yticks([0, 10, 20])
 axes[-1].set_xlabel(r"$\hat{D}^*$")
@@ -93,14 +93,14 @@
 axes.append(fig.add_subplot(gs[0, 3]))
 y, x = np.histogram(true.var(-1, ddof=1), bins=fp.NBINS, density=True)
 axes[-1].stairs(y, x, fill=True, color='#B3CBE8', label='$p(\hat{\sigma}^2[D^*])$')
-axes[-1].axvline(true.mean(-1).var(ddof=1),
+axes[-1].axvline(dinfty_true.var(ddof=1),
                  ymax=0.85,
                  c='#F3ADBC',
-                 label='$\sigma^2[\hat{D}^*]$',
+                 label=r'$\sigma^2[\hat{D}^*_{\mathrm{num}}]$',
                  ls='-')
 axes[-1].set_xlabel(r"$\sigma^2 [\hat{D}^*]$")
 axes[-1].set_ylabel(r"$p(\sigma^2 [\hat{D}^*])$")
-axes[-1].set_xlim([0, None])
+axes[-1].set_xlim([0, 3.5e-3])
 titles.append("d")
 axes[-1].set_title("all simulations")
 axes[-1].legend(loc='upper left', bbox_to_anchor=(0.5, 1))