diff --git a/Makefile b/Makefile index c8a3d3b441..e8ee556107 100644 --- a/Makefile +++ b/Makefile @@ -7,14 +7,20 @@ compile: # The reconstruction/ecoli/dataclasses/process/*.py files were generated by # write_ode_file.py in Parca code. # Fireworks writes launcher_20* and block_20*. +# `aesara-cache clear` clears the current $(aesara-cache) cache, not others, and +# leaves 3 small *.delete.me dirs for later in case they're in use, esp. on NFS. clean: - rm -fr fixtures + rm -fr fixtures cache (cd reconstruction/ecoli/dataclasses/process && rm -f equilibrium_odes.py two_component_system_odes*.py) find . -name "*.pyc" -exec rm -rf {} \; find . -name "*.o" -exec rm -fr {} \; find . -name "*.so" -exec rm -fr {} \; rm -fr build rm -fr launcher_20* block_20* + if [ "`aesara-cache | xargs du -sm | cut -f1`" -gt 30 ]; then \ + echo "Clearing the aesara-cache since it's larger than threshold."; \ + aesara-cache clear; \ + fi # Delete just the *.so libraries then (re)compile them. # This is useful when switching to a different Python virtualenv. diff --git a/reconstruction/ecoli/fit_sim_data_1.py b/reconstruction/ecoli/fit_sim_data_1.py index 2ad175dfdc..2a21e01c7f 100644 --- a/reconstruction/ecoli/fit_sim_data_1.py +++ b/reconstruction/ecoli/fit_sim_data_1.py @@ -5,12 +5,10 @@ TODO: functionalize so that values are not both set and returned from some methods """ -from __future__ import absolute_import, division, print_function - +import binascii import functools import itertools import os -import sys import time import traceback from typing import Callable, List @@ -19,6 +17,7 @@ from cvxpy import Variable, Problem, Minimize, norm import numpy as np import scipy.optimize +import scipy.sparse import six from six.moves import cPickle, range, zip @@ -26,7 +25,6 @@ from wholecell.containers.bulk_objects_container import BulkObjectsContainer from wholecell.utils import filepath, parallelization, units from wholecell.utils.fitting import normalize, masses_and_counts_for_homeostatic_target -from wholecell.utils import parallelization # Fitting parameters @@ -3164,6 +3162,16 @@ def calculateRnapRecruitment(sim_data, cell_specs): } +def crc32(*arrays: np.ndarray, initial: int = 0) -> int: + """Return a CRC32 checksum of the given ndarrays.""" + def crc_next(initial: int, array: np.ndarray) -> int: + shape = str(array.shape).encode() + values = array.tobytes() + return binascii.crc32(values, binascii.crc32(shape, initial)) + + return functools.reduce(crc_next, arrays, initial) + + def setKmCooperativeEndoRNonLinearRNAdecay(sim_data, bulkContainer): """ Fits the affinities (Michaelis-Menten constants) for RNAs binding to endoRNAses. @@ -3219,6 +3227,9 @@ def setKmCooperativeEndoRNonLinearRNAdecay(sim_data, bulkContainer): TODO (John): Determine what part (if any) of the 'linear' parameter fitting should be retained. """ + def arrays_differ(a: np.ndarray, b: np.ndarray) -> bool: + return a.shape != b.shape or not np.allclose(a, b, equal_nan=True) + cellDensity = sim_data.constants.cell_density cellVolume = sim_data.mass.avg_cell_dry_mass_init / cellDensity / sim_data.mass.cell_dry_mass_fraction countsToMolar = 1 / (sim_data.constants.n_avogadro * cellVolume) @@ -3262,17 +3273,19 @@ def setKmCooperativeEndoRNonLinearRNAdecay(sim_data, bulkContainer): if sim_data.constants.sensitivity_analysis_alpha: Alphas = [0.0001, 0.001, 0.01, 0.1, 1, 10] - for alpha in Alphas: + total_endo_rnase_capacity_mol_l_s = totalEndoRnaseCapacity.asNumber(units.mol / units.L / units.s) + rna_conc_mol_l = (countsToMolar * rnaCounts).asNumber(units.mol / units.L) + degradation_rates_s = degradationRates.asNumber(1 / units.s) + for alpha in Alphas: if VERBOSE: print('Alpha = %f' % alpha) LossFunction, Rneg, R, LossFunctionP, R_aux, L_aux, Lp_aux, Jacob, Jacob_aux = sim_data.process.rna_decay.km_loss_function( - totalEndoRnaseCapacity.asNumber(units.mol / units.L / units.s), - (countsToMolar * rnaCounts).asNumber(units.mol / units.L), - degradationRates.asNumber(1 / units.s), + total_endo_rnase_capacity_mol_l_s, + rna_conc_mol_l, + degradation_rates_s, isEndoRnase, - alpha - ) + alpha) KmCooperativeModel = scipy.optimize.fsolve(LossFunction, Kmcounts, fprime = LossFunctionP) sim_data.process.rna_decay.sensitivity_analysis_alpha_residual[alpha] = np.sum(np.abs(R_aux(KmCooperativeModel))) sim_data.process.rna_decay.sensitivity_analysis_alpha_regulari_neg[alpha] = np.sum(np.abs(Rneg(KmCooperativeModel))) @@ -3291,11 +3304,10 @@ def setKmCooperativeEndoRNonLinearRNAdecay(sim_data, bulkContainer): totalEndoRNcap = units.sum(endoRNaseConc * kcat) LossFunction, Rneg, R, LossFunctionP, R_aux, L_aux, Lp_aux, Jacob, Jacob_aux = sim_data.process.rna_decay.km_loss_function( totalEndoRNcap.asNumber(units.mol / units.L), - (countsToMolar * rnaCounts).asNumber(units.mol / units.L), - degradationRates.asNumber(1 / units.s), + rna_conc_mol_l, + degradation_rates_s, isEndoRnase, - alpha - ) + alpha) KmcountsIni = (( totalEndoRNcap / degradationRates.asNumber() ) - rnaConc).asNumber() KmCooperativeModel = scipy.optimize.fsolve(LossFunction, KmcountsIni, fprime = LossFunctionP) sim_data.process.rna_decay.sensitivity_analysis_kcat[kcat] = KmCooperativeModel @@ -3305,34 +3317,39 @@ def setKmCooperativeEndoRNonLinearRNAdecay(sim_data, bulkContainer): # Loss function, and derivative LossFunction, Rneg, R, LossFunctionP, R_aux, L_aux, Lp_aux, Jacob, Jacob_aux = sim_data.process.rna_decay.km_loss_function( - totalEndoRnaseCapacity.asNumber(units.mol / units.L / units.s), - (countsToMolar * rnaCounts).asNumber(units.mol / units.L), - degradationRates.asNumber(1 / units.s), - isEndoRnase, - alpha - ) - - needToUpdate = False - fixturesDir = filepath.makedirs(filepath.ROOT_PATH, "fixtures", "endo_km") - # Numpy 'U' fields make these files incompatible with older code, so change - # the filename. No need to make files compatible between Python 2 & 3; we'd - # have to set the same protocol version and set Python 3-only args like - # encoding='latin1'. - km_filepath = os.path.join(fixturesDir, 'km{}.cPickle'.format(sys.version_info[0])) + total_endo_rnase_capacity_mol_l_s, + rna_conc_mol_l, + degradation_rates_s, + isEndoRnase, + alpha) + + # The checksum in the filename picks independent caches for distinct cases + # such as different Parca options or Parca code in different git branches. + # `make clean` will delete the cache files. + needToUpdate = '' + cache_dir = filepath.makedirs(filepath.ROOT_PATH, "cache") + checksum = crc32(Kmcounts, isEndoRnase, np.array(alpha)) + km_filepath = os.path.join(cache_dir, f'parca-km-{checksum}.cPickle') if os.path.exists(km_filepath): with open(km_filepath, "rb") as f: - KmcountsCached = cPickle.load(f) + KmCache = cPickle.load(f) - # KmcountsCached fits a set of Km values to give the expected degradation rates. + # KmCooperativeModel fits a set of Km values to give the expected degradation rates. # It takes 1.5 - 3 minutes to recompute. # R_aux calculates the difference of the degradation rate based on these - # Km values and the expected rate so this sum seems like a reliable test of - # whether the cache fits current input data. - if Kmcounts.shape != KmcountsCached.shape or np.sum(np.abs(R_aux(KmcountsCached))) > 1e-15: - needToUpdate = True + # Km values and the expected rate so this sum seems like a good test of + # whether the cache fits current input data, but cross-check additional + # inputs to avoid Issue #996. + KmCooperativeModel = KmCache['KmCooperativeModel'] + if (Kmcounts.shape != KmCooperativeModel.shape + or np.sum(np.abs(R_aux(KmCooperativeModel))) > 1e-15 + or arrays_differ(KmCache['total_endo_rnase_capacity_mol_l_s'], total_endo_rnase_capacity_mol_l_s) + or arrays_differ(KmCache['rna_conc_mol_l'], rna_conc_mol_l) + or arrays_differ(KmCache['degradation_rates_s'], degradation_rates_s)): + needToUpdate = 'recompute' else: - needToUpdate = True + needToUpdate = 'compute' if needToUpdate: @@ -3343,15 +3360,19 @@ def setKmCooperativeEndoRNonLinearRNAdecay(sim_data, bulkContainer): totalEndoRnaseCapacity = units.sum(endoRNaseConc * kcatEndoRNase) Kmcounts = (( 1 / degradationRates * totalEndoRnaseCapacity ) - rnaConc).asNumber() - if VERBOSE: print("Running non-linear optimization") + if VERBOSE: print(f'Running non-linear optimization to {needToUpdate} {km_filepath}') KmCooperativeModel = scipy.optimize.fsolve(LossFunction, Kmcounts, fprime = LossFunctionP) + KmCache = dict( + KmCooperativeModel=KmCooperativeModel, + total_endo_rnase_capacity_mol_l_s=total_endo_rnase_capacity_mol_l_s, + rna_conc_mol_l=rna_conc_mol_l, + degradation_rates_s=degradation_rates_s) with open(km_filepath, "wb") as f: - cPickle.dump(KmCooperativeModel, f, protocol=cPickle.HIGHEST_PROTOCOL) + cPickle.dump(KmCache, f, protocol=cPickle.HIGHEST_PROTOCOL) else: if VERBOSE: print("Not running non-linear optimization--using cached result {}".format(km_filepath)) - KmCooperativeModel = KmcountsCached if VERBOSE > 1: print("Loss function (Km inital) = %f" % np.sum(np.abs(LossFunction(Kmcounts))))