diff --git a/.gitignore b/.gitignore index 7778ca96e..edddd4fbc 100644 --- a/.gitignore +++ b/.gitignore @@ -173,6 +173,8 @@ examples/example3_systematics/data examples/example3_systematics/mg_processes examples/example3_systematics/models +examples/example4_tth + # MG / Pythia / Delphes patch patches/ diff --git a/docs/conf.py b/docs/conf.py index ea48983e8..ae2cfbfbc 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -24,7 +24,7 @@ author = 'Johann Brehmer, Felix Kling, Irina Espejo, and Kyle Cranmer' # The short X.Y version -version = '0.2.8' +version = '0.3.0' # The full version, including alpha/beta/rc tags release = version diff --git a/examples/tutorial_delphes/1_from_cards_to_likelihood_ratio.ipynb b/examples/tutorial_delphes/1_from_cards_to_likelihood_ratio.ipynb index 3593d5a5f..ccd81d11e 100755 --- a/examples/tutorial_delphes/1_from_cards_to_likelihood_ratio.ipynb +++ b/examples/tutorial_delphes/1_from_cards_to_likelihood_ratio.ipynb @@ -36,7 +36,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -49,11 +49,11 @@ "%matplotlib inline\n", "\n", "from madminer.core import MadMiner\n", - "from madminer.delphes import DelphesProcessor\n", + "from madminer.delphes import DelphesReader\n", "from madminer.sampling import combine_and_shuffle\n", "from madminer.sampling import SampleAugmenter\n", - "from madminer.sampling import constant_benchmark_theta, multiple_benchmark_thetas, random_morphing_thetas\n", - "from madminer.ml import MLForge\n", + "from madminer import sampling\n", + "from madminer.ml import ParameterizedRatioEstimator\n", "from madminer.plotting import plot_2d_morphing_basis, plot_distributions\n" ] }, @@ -66,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -82,7 +82,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -135,15 +135,15 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "15:15 madminer.core INFO Added parameter CWL2 (LHA: dim6 2, maximal power in squared ME: (2,), range: (-10.0, 10.0))\n", - "15:15 madminer.core INFO Added parameter CPWL2 (LHA: dim6 5, maximal power in squared ME: (2,), range: (-10.0, 10.0))\n" + "14:38 madminer.core INFO Added parameter CWL2 (LHA: dim6 2, maximal power in squared ME: (2,), range: (-10.0, 10.0))\n", + "14:38 madminer.core INFO Added parameter CPWL2 (LHA: dim6 5, maximal power in squared ME: (2,), range: (-10.0, 10.0))\n" ] } ], @@ -198,15 +198,15 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "15:15 madminer.core INFO Added benchmark sm: CWL2 = 0.00e+00, CPWL2 = 0.00e+00)\n", - "15:15 madminer.core INFO Added benchmark w: CWL2 = 10.00, CPWL2 = 0.00e+00)\n" + "14:38 madminer.core INFO Added benchmark sm: CWL2 = 0.00e+00, CPWL2 = 0.00e+00)\n", + "14:38 madminer.core INFO Added benchmark w: CWL2 = 10.00, CPWL2 = 0.00e+00)\n" ] } ], @@ -239,7 +239,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": { "scrolled": true }, @@ -248,13 +248,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "15:15 madminer.core INFO Optimizing basis for morphing\n", - "15:15 madminer.core INFO Added benchmark sm: CWL2 = 0.00e+00, CPWL2 = 0.00e+00)\n", - "15:15 madminer.core INFO Added benchmark w: CWL2 = 10.00, CPWL2 = 0.00e+00)\n", - "15:15 madminer.core INFO Added benchmark morphing_basis_vector_2: CWL2 = -6.60e+00, CPWL2 = -9.63e+00)\n", - "15:15 madminer.core INFO Added benchmark morphing_basis_vector_3: CWL2 = 1.07, CPWL2 = -6.54e+00)\n", - "15:15 madminer.core INFO Added benchmark morphing_basis_vector_4: CWL2 = -4.67e+00, CPWL2 = 8.95)\n", - "15:15 madminer.core INFO Added benchmark morphing_basis_vector_5: CWL2 = -9.59e+00, CPWL2 = -6.15e-01)\n" + "14:38 madminer.core INFO Optimizing basis for morphing\n", + "14:38 madminer.core INFO Set up morphing with 2 parameters, 6 morphing components, 2 predefined basis points, and 4 new basis points\n" ] } ], @@ -470,11 +465,11 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ - "dp = DelphesProcessor('data/madminer_example.h5')" + "delphes = DelphesReader('data/madminer_example.h5')" ] }, { @@ -502,14 +497,14 @@ } ], "source": [ - "dp.add_sample(\n", + "delphes.add_sample(\n", " lhe_filename='mg_processes/signal/Events/run_01/unweighted_events.lhe.gz',\n", " hepmc_filename='mg_processes/signal/Events/run_01/tag_1_pythia8_events.hepmc.gz',\n", " sampled_from_benchmark='sm',\n", " is_background=False,\n", ")\n", "\"\"\"\n", - "dp.add_sample(\n", + "delphes.add_sample(\n", " lhe_filename='mg_processes/background/Events/run_01/unweighted_events.lhe.gz',\n", " hepmc_filename='mg_processes/background/Events/run_01/tag_1_pythia8_events.hepmc.gz',\n", " sampled_from_benchmark='sm',\n", @@ -517,7 +512,7 @@ ")\n", "\"\"\"\n", "\n", - "dp.run_delphes(\n", + "delphes.run_delphes(\n", " delphes_directory=mg_dir + '/Delphes',\n", " delphes_card='cards/delphes_card.dat',\n", " log_file='logs/delphes.log',\n", @@ -542,13 +537,13 @@ "metadata": {}, "outputs": [], "source": [ - "dp.add_observable(\n", + "delphes.add_observable(\n", " 'pt_j1',\n", " 'j[0].pt',\n", " required=False,\n", " default=0.,\n", ")\n", - "dp.add_observable(\n", + "delphes.add_observable(\n", " 'delta_phi_jj',\n", " 'j[0].deltaphi(j[1]) * (-1. + 2.*float(j[0].eta > j[1].eta))',\n", " required=True,\n", @@ -568,7 +563,7 @@ "metadata": {}, "outputs": [], "source": [ - "dp.add_cut('pt_j1 > 30.')" + "delphes.add_cut('pt_j1 > 30.')" ] }, { @@ -595,7 +590,7 @@ } ], "source": [ - "dp.analyse_delphes_samples()" + "delphes.analyse_delphes_samples()" ] }, { @@ -611,7 +606,7 @@ "metadata": {}, "outputs": [], "source": [ - "dp.save('data/madminer_example_with_data.h5')" + "delphes.save('data/madminer_example_with_data.h5')" ] }, { @@ -734,25 +729,25 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "16:34 madminer.sampling INFO Loading data from data/madminer_example_shuffled.h5\n", - "16:34 madminer.sampling INFO Found 2 parameters\n", - "16:34 madminer.sampling INFO Did not find nuisance parameters\n", - "16:34 madminer.sampling INFO Found 6 benchmarks, of which 6 physical\n", - "16:34 madminer.sampling INFO Found 2 observables\n", - "16:34 madminer.sampling INFO Found 46046 events\n", - "16:34 madminer.sampling INFO Found morphing setup with 6 components\n" + "14:39 madminer.sampling INFO Loading data from data/madminer_example_shuffled.h5\n", + "14:39 madminer.sampling INFO Found 2 parameters\n", + "14:39 madminer.sampling INFO Did not find nuisance parameters\n", + "14:39 madminer.sampling INFO Found 6 benchmarks, of which 6 physical\n", + "14:39 madminer.sampling INFO Found 2 observables\n", + "14:39 madminer.sampling INFO Found 46046 events\n", + "14:39 madminer.sampling INFO Found morphing setup with 6 components\n" ] } ], "source": [ - "sa = SampleAugmenter('data/madminer_example_shuffled.h5')" + "sampler = SampleAugmenter('data/madminer_example_shuffled.h5')" ] }, { @@ -760,39 +755,41 @@ "metadata": {}, "source": [ "The `SampleAugmenter` class defines five different high-level functions to generate train or test samples:\n", - "- `extract_samples_train_plain()`, which only saves observations x, for instance for histograms or ABC;\n", - "- `extract_samples_train_local()` for methods like SALLY and SALLINO, which will be demonstrated in the second part of the tutorial;\n", - "- `extract_samples_train_ratio()` for techniques like CARL, ROLR, CASCAL, and RASCAL, when only theta0 is parameterized;\n", - "- `extract_samples_train_more_ratios()` for the same techniques, but with both theta0 and theta1 parameterized;\n", - "- `extract_samples_test()` for the evaluation of any method.\n", + "- `sample_train_plain()`, which only saves observations x, for instance for histograms or ABC;\n", + "- `sample_train_local()` for methods like SALLY and SALLINO, which will be demonstrated in the second part of the tutorial;\n", + "- `sample_train_density()` for neural density estimation techniques like MAF or SCANDAL;\n", + "- `sample_train_ratio()` for techniques like CARL, ROLR, CASCAL, and RASCAL, when only theta0 is parameterized;\n", + "- `sample_train_more_ratios()` for the same techniques, but with both theta0 and theta1 parameterized;\n", + "- `sample_test()` for the evaluation of any method.\n", "\n", - "For the arguments `theta`, `theta0`, or `theta1`, you can (and should!) use the helper functions `constant_benchmark_theta()`, `multiple_benchmark_thetas()`, `constant_morphing_theta()`, `multiple_morphing_thetas()`, and `random_morphing_thetas()`, all defined in the `madminer.sampling` module.\n", + "For the arguments `theta`, `theta0`, or `theta1`, you can (and should!) use the helper functions `benchmark()`, `benchmarks()`, `morphing_point()`, `morphing_points()`, and `random_morphing_points()`, all defined in the `madminer.sampling` module.\n", "\n", "Here we'll train a likelihood ratio estimator with the ALICES method, so we focus on the `extract_samples_train_ratio()` function." ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "16:34 madminer.sampling INFO Extracting training sample for ratio-based methods. Numerator hypothesis: (u'random', (100, [(u'gaussian', 0.0, 10.0), (u'gaussian', 0.0, 10.0)])), denominator hypothesis: (u'benchmark', u'sm')\n", - "16:34 madminer.sampling WARNING Large statistical uncertainty on the total cross section for theta = [-7.92202596 3.87210373]: (0.010395 +/- 0.001749) pb. Skipping these warnings in the future...\n", - "16:34 madminer.sampling WARNING For this value of theta, 1 / 23023 events have negative weight and will be ignored\n", - "16:34 madminer.sampling INFO Effective number of samples: mean 15.124643599114712, with individual thetas ranging from 5.939722567101945 to 60.460140446976716\n", - "16:34 madminer.sampling INFO Effective number of samples: mean 23023.000000006065, with individual thetas ranging from 23023.000000006068 to 23023.000000006068\n" + "14:40 madminer.sampling INFO Extracting training sample for ratio-based methods. Numerator hypothesis: (u'random', (100, [(u'gaussian', 0.0, 15.0), (u'gaussian', 0.0, 15.0)])), denominator hypothesis: (u'benchmark', u'sm')\n", + "14:40 madminer.sampling WARNING Large statistical uncertainty on the total cross section for theta = [ 15.57780154 -11.11415019]: (0.055290 +/- 0.006370) pb. Skipping these warnings in the future...\n", + "14:40 madminer.sampling WARNING For this value of theta, 1 / 36837 events have negative weight and will be ignored\n", + "14:40 madminer.sampling WARNING For this value of theta, 1 / 36837 events have negative weight and will be ignored\n", + "14:40 madminer.sampling INFO Effective number of samples: mean 28.67984811442441, with individual thetas ranging from 9.082242574348482 to 1089.7962485221415\n", + "14:40 madminer.sampling INFO Effective number of samples: mean 36837.00000001676, with individual thetas ranging from 36837.00000001676 to 36837.00000001676\n" ] } ], "source": [ - "x, theta0, theta1, y, r_xz, t_xz = sa.extract_samples_train_ratio(\n", - " theta0=random_morphing_thetas(100, [('gaussian', 0., 10.), ('gaussian', 0., 10.)]),\n", - " theta1=constant_benchmark_theta('sm'),\n", - " n_samples=25000,\n", + "x, theta0, theta1, y, r_xz, t_xz, _ = sampler.sample_train_ratio(\n", + " theta0=sampling.random_morphing_points(100, [('gaussian', 0., 15.), ('gaussian', 0., 15.)]),\n", + " theta1=sampling.benchmark('sm'),\n", + " n_samples=100000,\n", " folder='./data/samples',\n", " filename='train'\n", ")" @@ -802,12 +799,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "For the evaluation we'll need a test sample, and we'll make two just for fun:" + "For the evaluation we'll need a test sample:" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 12, "metadata": { "scrolled": true }, @@ -816,15 +813,15 @@ "name": "stderr", "output_type": "stream", "text": [ - "16:34 madminer.sampling INFO Extracting evaluation sample. Sampling according to (u'benchmark', u'sm')\n", - "16:34 madminer.sampling INFO Effective number of samples: 23022.000000006065\n" + "14:41 madminer.sampling INFO Extracting evaluation sample. Sampling according to (u'benchmark', u'sm')\n", + "14:41 madminer.sampling INFO Effective number of samples: 9207.999999998738\n" ] } ], "source": [ - "_ = sa.extract_samples_test(\n", - " theta=constant_benchmark_theta('sm'),\n", - " n_samples=25000,\n", + "_ = sampler.sample_test(\n", + " theta=sampling.benchmark('sm'),\n", + " n_samples=1000,\n", " folder='./data/samples',\n", " filename='test'\n", ")" @@ -846,36 +843,36 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "16:34 madminer.sampling INFO Starting cross-section calculation\n", - "16:34 madminer.sampling INFO Starting cross-section calculation\n" + "14:41 madminer.sampling INFO Starting cross-section calculation\n", + "14:41 madminer.sampling INFO Starting cross-section calculation\n" ] } ], "source": [ - "thetas_benchmarks, xsecs_benchmarks, xsec_errors_benchmarks = sa.extract_cross_sections(\n", - " theta=multiple_benchmark_thetas(['sm', 'w', 'morphing_basis_vector_2', 'morphing_basis_vector_3', 'morphing_basis_vector_4', 'morphing_basis_vector_5'])\n", + "thetas_benchmarks, xsecs_benchmarks, xsec_errors_benchmarks = sampler.cross_sections(\n", + " theta=sampling.benchmarks(['sm', 'w', 'morphing_basis_vector_2', 'morphing_basis_vector_3', 'morphing_basis_vector_4', 'morphing_basis_vector_5'])\n", ")\n", "\n", - "thetas_morphing, xsecs_morphing, xsec_errors_morphing = sa.extract_cross_sections(\n", - " theta=random_morphing_thetas(1000, [('gaussian', 0., 4.), ('gaussian', 0., 4.)])\n", + "thetas_morphing, xsecs_morphing, xsec_errors_morphing = sampler.cross_sections(\n", + " theta=sampling.random_morphing_points(1000, [('gaussian', 0., 4.), ('gaussian', 0., 4.)])\n", ")" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 14, "metadata": {}, "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWIAAAEYCAYAAABm5fzdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzsnXecXHW1wL/n3qlbk91NWdJDQoAECOmABAENVRBEimAjyhPre+p7iqjPxnu2pyJSRAGlSROkhg5CJEAaCemF9LKb7WXaLef9cWdnZ3ZmN7PJps+XzyUz9/5+v/u7d2fOnHt+p4iqUqBAgQIFDhzGgZ5AgQIFChzpFARxgQIFChxgCoK4QIECBQ4wBUFcoECBAgeYgiAuUKBAgQNMQRAXKFCgwAGmTwSxiNwtIrUisixtX4WIvCQia5P/9u+m72eTbdaKyGf7Yj4FChQocCjRVxrxX4Bzu+z7LvCKqo4FXkm+z0BEKoD/BqYD04D/7k5gFyhQoMDhSp8IYlV9A2josvti4K/J138FPp6j6znAS6raoKqNwEtkC/QCBQoUOKzx7cOxB6nqDgBV3SEiA3O0GQJsSXu/NbkvCxG5DrgOoLi4ePKxxx675zPTCDg1oDEQHxgDwOiXo10UtA0wwCin726X7c0BE6QIkD4at4/QBLh1yfsTAKMKJNR53FoO5IrINMA3MnlNeZxG40ACwQB7Y44xDTAHgVG5J1dRoA9YuHBhnaoO6G2/c84s1voGJ//zLI2/oKo9KmEici5wM2ACf1bVn3c5HgTuBSYD9cAVqrpRRD4K/BwIAAngP1X11WSfyXhP9GHgOeAbegDCjfelIM6HXBIo501Q1TuBOwGmTJmiCxYs2KMTavxttPE6IP3LHYbiT2GUfjN5Lgdt+neIvwEUAX7AgfKfYYQv3KPzduC2/QHa7gApwbtUH9L/j0hg4l6Nu79QtXF2Hk+CBJr8UxkIfvwYRinS73YkOL3HMVyngeaGz2FbKxDxoRqnxHcGQXc73vfE9YS5bxxScR/q7CLa+n/Y8VeAEIHiqwiVfAlJ/3HomJ+9Hrf9fnC2QGAqRtHliFGwdu0pIrJpT/rVNzi8+8LwvNub1WurdjMPE7gV+CiewjZfRJ5S1RVpzWYDjao6RkSuBH4BXAHUAR9T1e0iMgF4gU6F73Y8Be9tPEF8LjAn74n3EfvSa6JGRKoBkv/W5mizFRiW9n4osH0fzglt/RkQ67I3Cu33oG6T1ybyWFIIRwEn2T4OzTegTt2enzv2ErT9CUh4mra2gzaijdeibtsej7s/UbeBOHZKCAO4KHESqDqQxw9Kc+MXsa2lQBTVViBOm72WaPAMCH8KQhcj5b9AKh5A3UZad52HFX0cdRtQdzvx1ltpq7+aroqLG30Rp+5iNPogmngdbbsFZ9cs1N7cx3ehwO5QwO3Ff3kwDVinqh+oagJ4CM/8mU66OfQx4GwREVVdrKodcmU5EBKRYFIulanqvKQWfC+5Taj7nH0piJ8COrwgPgs8maPNC8AsEemfXKSbldy3T1B1wF6b+6AEwHrfex19AE8IZzWC2J7/WGr7XbnHVRdi++yy+xQ7ch/ek2GOY8Gz8Z4Ou8exN2Mn3gOsLkdiRGJzkLLvYfT7FRI6BxEf8bY/otqG94PY2daxlmEn5qb2qMZxW/4L70fTSbVDm3FaftSbSyzQJyiOunlvQJWILEjbrusyYD5mzFQbVbWBZjIffQE+ASxWzy42JDlOT2PuF/rENCEifwM+jHczt+J5QvwceEREZgObgU8m204BvqSqX1DVBhH5KTA/OdRPVLXrol8fYgBBsjViAKfTTqzt3fS3ejiWB06uhwK8+bjdHTu4cBKL8MwH2WjOH69MXGcnIn5cN4aL9xeRDgOV2qi2I1Keam/FXiNbaAMawY6/jT94uvc2sai7M0LiX6i6iBTc5vcXnkbcK1NrnapO6eF4PmbMHtuIyHg8c8WsXoy5X+grr4mrVLVaVf2qOlRV71LVelU9W1XHJv9tSLZdoKpfSOt7t6qOSW739MV8ukNEIHwJns0+44i3IOSb4L0NnoVnF+5KAIKn7fkEApNIv+UXXrMNs3otZvVqjNIvIyJ5bxdccMGezyMPLHsLbZFniMXnZ5gADN8ocmvEAcQctdtxxTeSOjvGZsfHluTW6njfBzH6IVKW2d6s6GakYBfb78G14KnWGjT6HGotP9BTOWD0sWkiHzNmqo2I+IBykt5cIjIUeAL4jKquT2s/dDdj7hcO9GLdfkdKv4Pa68BaBiiI57kg/e/0BDUgxdehsWfAbaFTGwtDcCbiP2HPz13yZTT+ctJjAua8EtnjsZ577rk97tsTqja1Df9OJPoMiB9QTGMA1QMewu8bgb/4c9iRR8k0FQBi4i++erfjb2r8KW1qosn+LtCgJjgG1eXfTf0NOggWX0vEWpa6Z2knJBC+qPNdYBK5hbGBBD6037RhddvQxn/zzFxiAi5qHo1U/BkxuvtROfxQFKdvnQ/mA2NFZBSwDbgS+FSXNh3m0HnAZcCrqqoi0g94FrhBVf+VmqPnzdUqIjOAd4DPALf05aTz5cgTxEYRUvkAai0FawUYgyB4Ot4PaLKNOQAqn0bb74T4KyDFEL4aKbps787tGw0VD6It/wPWwtR+Z8fYXo1jVndj5+4Dmlp/TyT2HEocNA6A7UTZsesKhg2eh+EbQ7D/bcSb/gNPGCsQJNT/FgyzZ/Nawt5BY+TFlBDuQBGapYzRxVdm9fGHzidQ9C6J9gcA8YSbOhT1vwXD7PSIFAlglP8Kt+nfATu5hUCKMMp+vDe3pFdo841gLcFbkE3utFehTd9EKv6y3+ZxMNBL00SPqKotIl/FW0MygbtVdbmI/ARYoKpPAXcB94nIOjxNuOMD9VVgDPADEflBct8sVa0FrqfTfW0OB8BjAkAOxQod+bivqb0Jbb8NEgvBGIgUz0ZCZ++nGeZHh/a3p4J4X/ztNm4fj+tmm+lFiqmueohQcEry3Bau9T5gYPhPwPMu6pnm6Busr/sKjrbmOCpMHrYGQ3KZhJKLfPE3EQnjD30EMcpytlN7I27kQXA2g38qRtFliFGes21fo24bWjuD3Db0ADLgVcTM5U5/8CIiC3dju83JxJMC+sqc/K+1asi2PTrP4cJhqRGrtRptuMILSMAFZzPavBy1r8Mo+cqBnt5Bjes2d3PEwHFrUu9E/JiBST2O5ahn1jGTwjXgG4LmWngDTClFevg4mr7hmL7dmz7ENxKz7Hu7bbdP0Fa6XXYRP7gNcIgJ4r2hLzXiw53DUxC33pRtU9QotN2BFn2q4ODfAwHfOBL2iuwDmiDgPzGvMVqtrbxb+3Nqo4sAZUBoItMG3kBZ4GiKAhNoiy8h3RPCkDCDy76QZR8+5DAGggSTCkBXXC/q8AhBAesQfNo+UBye/jyJbswW4ofE/NzHCgBQ0e+/syLWREIUF30Mv29YN706iTvNvLDlWmqiC1AcFJfa2GJe3HotMaeRsVV/ojQ4GZFgUgsOUlV8OdVlX91Xl7TfEDGh9NtAl4g/CUPxV3JGAh6uaPKvn+92pHNYasSeVmJ3c6x4/87lEKMoNJNBlX+lofknJKxVGEY/yktm06/0a3n1X9/yNI7GyHTHVByNs7b5CU6ouJZjBz1E3N5Cwqkh7BuDz8yR5+MQxSi6ApVStO134Gz1cmUUfwUJf+JAT23/ouAU5GveHJ6COPRxiD5CdiCACYFpGXvU3grxVwGF4NmIbyhHOkWhmRSFXt6jvnWxpThJb4t0HE1QF1uaeh/0DSOYh4Z9KCLh85Hw+Qd6GgcUL6CjQL4cloJYSr+FWovB2ejZhgmBGEj/O5C0VXm37XZouy35TqH112jJ9RglXz4Q0z4sKPUPQ/ChZD6RCCal/sKP3JGD4BxkQTYHM4enIDZKoPJxSPwLrKVeGsfQ+YhRmmqjiSXQdjvQRXtr+yMaOAUJnLx/J32YMLb8UtY0P4aTZhpSBZcAo0svOYAzK7A/UcAtmCby5rAUxIAXSRU83dtyoNFHyBLCAMTQ6MMFQbyHlPiHcPrgX/BWzQ9wXJtaK0StFULEz7q132da5SzOqf4MZh5+xwUObQoacf4cnl4T+eA2kju/h0IyHWaBPeOo4hlcOmoOxaFLqbPLcBEctbE0zrv1L/DMtj8f6CkW2MconiDOdzvSOWIFsQQ/4rkVZRH2jhXYKxRY1PQ2tmYumFqaYFHjq0SdvchiV+CQwFXJezvSOWxNE7slfAFE7gJ7M50miiCYQ2Avq3D0ln2ZO+JA0WY342ruUjmm+GlM1BAOj97Psyqwv+jQiAvkx5GrEUsQqXgYiq8Dc5i3FX8RqXxkvznen3/+nrs47U3f/UGxWUZ3qSkdtSj391gZp8AhjiI4GHlvRzpHrkaM510hpV+DPIMV+ppnn332gJy3Lr6FN2ofZEtkOUVmOTOqLmFC+Zl9GmLsM/xMrZzF/PoXsLQzCY5P/BxXNo1iX+6kPQUOHwomh/w5ogXxkUhtbCN/2fBtbDeB4tJmNzBn+23siK5jVnXX6jR7x7nVn8Fy4yxufA1T/DhqcVzZND4x7NAPZy7QMwXTRO8oCOIjjFd23o3lZialsTTGosY5zKi6lLI+NBmYYvLxoV/inMHX0JCooV+gimLf/klJWeBAIzhaMDnkS0EQH2FsjizLud/Ex+b2ZUzo9+E+P2fYV8IQX0mfj3uoEEu8R3P7/ThOA8Xhj1JadAnGYZ4AyAtxLgjifCkI4iOINruFqBMg5gqmuIQMC6Pj6VEgZBYSIvU1DS1/oKH1t3hFg10i8TdobL2DYQOfwUyL9DwcKZgm8qfwk3WEsDWygZtWfINWxyCufiJugEa7GDu5oGJgMKp44l6fx1WXFc2bWNK0noTbTQa8IwTL3kZDy/+hGqUjBY5qBNveTGPrrQd2cvsYVc80ke92pFPQiHeDOrsgMRfwQfCMbkv0HMyoKvdu+j0xN73cvaAobU4RAwPKFSN+hGnkLlOUL89ue4ffrfk7dtJ/2ED47KhZfGbUrN30PDxpj71ILhc+JU5r5HGqyr+7/ye1H3ELGnHe7FNBLCLjgIfTdo0Gfqiqv0tr82HgSWBDctfjqvqTfTmvfHHb7oS234P4AAG10bKbMIou2m3fg4n6RC1NifocRwRXDWYf/TsqAntXwmdJ43r+b/UjGUHjLspfNrzAsKKBnDlo77XtAocOntdEQdPNl30qiFV1NTARQLzqktuAJ3I0fVNV9284227QxHxouxWvGm9aMciW76OBkxDfiAM2t97iquP5COdIrWGIiSndfwxs12FN61YMEcaWDsXspiz9bWuf6i5zB7eve4oJ5aOpCpYe+uWQekFx6KPUka1TCAFKiw73THQFr4nesD9NE2cD61V103485x6j7fcCuWqP2Wj0MaT0W7n7OXXgbAFzOGJW5n2+mL2Tmvbnsd12KsOnUh6c2GdCqyo4mLBZRMLNzjbX319JmS+7hp+q8kbtUn616hEURYGQ4eeHEz7NxP5jstpvi9V1e/7aWDMf/+evqQgU890JH+e0AeP26noOFfy+oVSUfZOG1t+lFutEivCZ1fQvPbx9qQteE71jfwriK4G/dXPsFBFZAmwHvq2qy7s2EJHrgOsAhg8fvs8mmcKtI3d2NhvcXVl7VWNo03e8ah8SAI2joXOQ8v9FJNDjqba3PsHK+h+jqig2G5vvoiI0g6LQudREl1Lir+aY8gsp8Q/ao0sxxOCq4V/mrg9+haUWoBiY+AwfUyou5Hvv38bOWD1HlwzlqmGzWNS4nns3vkSbHc0YJ+rEuWHJXdx7ynexXZf7N7zJe42bOCrcj36+EtrtXD9cXl7ahGuzM9bMdxc9yB3Tv8j4fkdGkviKsq8RDp5KS/v92G4DJeFZlBZdipEz4dThhVOIrMub/SKIxZNEFwE35Di8CBihqm0icj7wD2Bs10aqeidwJ8CUKVP2fcrp4EywlpGds7gICXwoq7k2fz9ZcikOHaWCYi+hEkLKb+r2NDG7hpX1P8ZNKy/kapRdkddpbltMq2ti4Of9xgc5+6ibGFo8Y48uZ0NbI9uiJZT4ovjExnJNhhVN5LZ1zxN3vQxpNbEG5tYuR/F16/HgqMv9G17myS3LSLgWtrqsa91B0BTMHCmGVSHhdB6IuxZ3rX+V30z+zB5dx/7AUYd3Gxbzdv18AkaAMwacxviycXv8hBIOTiYcnNzHszy46cg1USA/9pdGfB6wSFVruh5Q1Za018+JyG0iUqWq3T/r5kA1Bon3vErN/pOQHuye+SBFV6GR+8B1IFX2xw9mNYQyvQDUbYHY80CiyygxNPIkTvHXMMyqnHOqaX8ezVF2XEQpMlppdfvhYoHC6zt+xFVHP40pvfNu2BzZzl83/Z2EC7FEpyZW17QGS006vBhdVeKupl1vNpbavFbzPhGn81oViDlKiCABn4WjHa5aELcNHNeX0XZd685ezT8XDfF2XHWpCvWtL67t2ty08rdsaN9MPGnKmd+wmNMHnMLsUVf36bkOZxSwtOCUlS/7605dRTdmCREZDNSoqorINDypkGuJv1vcyJPQ+t/JrgoEoP8tSJdCob1BjH5Q+Q+vGm/sJcCE8EVIydeyTQ1urfcDoJmCOOomaNN2tGYaIn7CRZ+mtPx7Gf0djabqu6lCQn24CH5xEBRbDdrcIAk1MRH+WfMnzhz0Ja8CSZ68WvMWtpudklIAn7gkkosq+TxmhAw/9fGodz+6YEqQH0/4LBWBMO/UreWe9W9gu10LuMKI4gF5z70r61tr+d6ix1nbWosAQ4r689OJH+ekir4pRPpG3Tw2tG8i7iZQ9e5J1LF4vfYtzhhwKmNKRvXJeQ53FCmYJnrBPn92EJEi4KPA42n7viQiX0q+vQxYlrQR/x64UnOpiN2g1lJo+QFoBLQNtB20EW28zvMB3pu5mwMxyv8HY9B8jEFvY5R9L6PuXQrjKOiSezfqJmjVRHKZy0Y1SqT9Xpob/yOjXWX4QxgSwFaDnXY/djll1Dsl7LD7sdMqp8EpJq4+FAMbYUHjCzy7/Xfkg6MOj2x+iae3zSNiCwnHyKwjJplerrv72ggQNAOo5tbIVZUiM8DRpUOo8g8kZvmJJ3y0xQI0R0I0R0IkEkGuHHFaqk9zIsqiui1sjzTv9nqaE1E+M/cuVjbvwHIdEq7DhrY6rpt3L9sijbvtnw9v7JqXEsKWmthqYqtBxHG5dd19KW2/wO5xMfLejnT2+R1Q1YiqVqpqc9q+O1T1juTrP6jqeFU9SVVnqOpbvRq//S/krD2nDhrN5SnX94hRBEXXAN5jv6rSrtmaIMSIRZ/DcXak9pQHJ1AVOoNddjk2BoqgGIDQ5BYntdROEemoxcqWN2iIb9vtvP53xT08tPl5Yq6F5zMsJBwzQxinay0iYApINyJ5QvlobpvyDc4cNCGnG1u7nWBezRbOnnML31/0LI1RJZrwY9sm6gqqguWY/HLp69iuy08WP89pT9/M7DcfYtac2/n06/fRlIjmOLPHP7YsJuE4WZq75Trc98Hbu70f+dIhhD0ktW2L1PKPbS/2aqwd0dW8uOMPPLvt16xtnddtsvzDDVUKkXW94NC/A/Zmcj9Uxz03sv2ElH4bSr4AUgwEcLt50BcJYFtrUu9VXVoZgp0Uvukk1Je1DzxBuTnyfo/z2di+nYWNK1MLcR09AWzXwCcm/f3lhMwSQkYAAcJmkCFF5Zzc/2gCho9iM4SBQcL20R6p4PWt9dy48Fm+OOYjVAZL6XhuUfW2SDzA71f8k12xNtoSFq7rhbqmrkHBcpVN7Y3csOBpHtuwhLhr02bHibs2C+u28OV/PdLtNa1q3pH8UcnEUoeVTTty9Og9M6tOwZeKMMy89w4uz2x/Ne+x3qj9C3/b+B2WND7H8uZXeGbrL3l40w04OX+kDzcEtxfbkc6hb00PTAZ7JdDlwy1FiH//RXOJGEjJ19Di68FtRmpORbU1q51iYZpDUu//Vftb1rbMQckWuV4Qcg5BLAYhs/tsZhvbd3DDkttJ5BBa4Gmm5w4+ncuGnkfACPCvuiXUxBoYWXwU0yqPxxSTHdF6bl31CnO2rSJiubjq2bHfqt3I+S/dzvSqEWyxoiCOp0HavmT9MXDtNOELqCgigohndG23Ejy3ZQWJLnZrS13eb9zBxtYGRpZWZM18TOlAgoaPeBePDlMMxpTtud0ZvKeYpU2bqI0KplagWk8uJ4k2O5LXeLtiG1hQ/w/sNG8YS2PsiK7h/caXmFhxcFdY2VsUCppuLzjkBbEUfw6NPgoZWoYJUgbh/f9hF/EhZiXFJdfR1nobkP6o7cfvPwGf3wuIiNqNrGl5FpdETp2gSBK0aZBcIvrokqk5zx+xY/zH4ltotyP4hJzCpDpUxedHfTL1/qxB2WMNCPbj+W3rkkI4LUUb4Kjydt0mXDWQtIcqVXBtI6OtdwDvJ0UEUMKmj5id+xHdLyZbI005BfElwyfxp7VvEO9ipg0YJp8efUrO8fKh3Y7z9QV38UFbDY66mCLE3VKqwu34zcyTDSuqzmvMVS1v5tR8bY3zftOLTKw4H1WX1U0PsLrpAeJOI6X+4ZxY9TWGFM/c42s5mCi4r+XPIX+nxKxGKh4C/1S8y/FB8Eyk8lHkADrNF5f+O+Hiy4EgIqVACH9gMv0r7km1aYivx5QAHUKrqzEjLAkM3OQRIWCECRhhLh/+Y/xGMOd5X6tdhO1m21E7CBoBLjhqJi9tX85tq1/lyS2Lidpd3e6gzY5jZWismRLdVcXo+gOhudt6eTok/R2VwdwpN+OuzZiy3MnpK4LF3HXq5xheXEHI9BM2/QwMlfL7aZ9iZMmeJ7T/3apnWNOyg6iTIOHaRB0LVw0aosWkLxsHDD+fHpFfaLKqm1yozcbFu6+L637DsoY7iTn1KC4t1kbm7fweW1rzN38crCj5V3AulFQ6DDRiAPEfg1Q+gKoNCF5aiwM8JzEp7/e/lJb+J7a9FsOsxufLjAgs8lUm55zDyq3QpkFMAT8+JvQ7lyFF4xlbOoOA0X1S8S2RXcTcBCDYauCjU6MTgSn9x3PHqoU0JaJEnARFZoBfL3+ee06bzZjSzsQ/pf4QYdNP3Ol5ccknBnbSk0By2LmzLgq48aTz+NeODTy3bXlmc4UiI8jgcGeGO1XlvtWLuXP5OzTEIxzffxD/PelSBhcXYavLqJKqvQoFd9XlhR3vYWm277Tgw0cRLhGGhqv57MhLObn/8XmNO7bsVBY0ZJomAHwS5OiSaby96wE2NT8GZN5fR+Msqb+ZYaVn7fE1HSwUNOL8OSwEcQd7G8SxLzDMCgLm9JzH+gdHUR4YTkN8PdqRrza52YQwjBJ86jBz4LVMqvh4XucbXVJN2AgQdRMogqWe3moAMypOYGubj5poK07yfG2Whe3afOzlP3JCv6P46nFnMHPwGEwx+OLY0/n9yteIpQljdcG2TFzHIGiYHFtZxYZoDYoyacBw3tm5nbiTOyDEdQU3YfDq5g+YX7sV2xJMf+dPkGsLLZbDuuZ6xpR7eTp+NP9lHlm3lKjtPeYv2LWVz7z8MLefcQkDwiXURtsYVLTnQR2Oul00/04Cpp9vH/t5plYc3WthXx0+huPLz2Rl8+tY6oV++yWI3yjlrfrHCEmcKtPFzDFsu72Dedtn05pYht8sZ2TZpxlZdnWvfMcPNAq4BRtx3hx8kusIY9aQX/L81m/Tam1LLnAkcKhiUPhMju93MqNLJhEw8jexzBwwkT+tf5q4ayU9NwQF/EaA86tP57p5D6Q8OmxHcFyDjmXBJY3b+Po7j/LDiedx6YiJXDv2VBx1+O3y1z0DiQuJaKdHQdxxWVxby9SBw3j0nGsAuPrVB1hct5V4UripgpsQ1DY7ujG/Ziv1sQhgYtuaphULAb/J0l3bGVnan4Z4hL+teS9rUS/m2Fz76qMU+4JYrsPUgcO4ZeZF9A9l3ydV5bktK/nr6gU0JWKcNWQMXzh2OlUhzzTiN3yMLhnI+rasoE9s1+G4siF7rHGfU/11xpaewtKmF7DcGEPC43mr/hEctUioi5jdGZBcGmLvIAK23c7qxptpTazhxAE/3aN5HBikUKGjF0gvYicOGqZMmaILFiw40NPoM1SVV2vuZ+6ufxBxDRJq4hM/QSPEN4+5iYpg7lzBG9p2sLBxLcVmiNMHnECJ3xNEO6L1/HLVA6xq2YwgDAiWc9Ww8/nR4heJuBHPZKuQsE1ymRLK/CHmXfBtfIan0eyKtfKdBU/yxtbN2Jbk7PPXsy/njKNGE7ET/GTRSzz+wfvYtuIkOuzDXfq4aXtM9TZvLY+AGyBk+rlwzDE8tWk5bVa2DTvdHu03DCZUDOaJCz6d1ezGd+fw5MZlRBxPow4YJqX+IM+e9wUGhj3Pk4UNH/DNhX/JcPULGX4+PfoMZh99ds57vye8WnMX79Y/kXr6GeJrwC9OZ7mqjushQcjMfKowJMjMIU9R5B/C/kREFqrqlN72GzqhXL/+yKl5t//O+Of36DyHC4Vnh4OAmBvlhZqXaHL8JJKBBLZaRJw2ntr+QFZ7V13+Z/kDXL/gZu5c/yy/X/MEl/3rx8yrWwFAdbiS3578dR4+9cfcN+P7/GX6jTy84X3a7HheYcy267KpvSH1fkColLs/dA3+nB4cHnevmA9AkS/AVaMmUWqV4kRNcLuxG0vH/wSc5Kae1h13HJoTMR5bs4xEjzZqb1zLdVnVWMvqxsxIynXNdTy+8f2UEAZIuN7Ytyybm9o3uWI0t039ItMrx9LPX8TY0mpunHBZnwphgJjTlhLCADvtcmz1AmwUE0MC+MQlaOS2VzfG3+vT+exrnKRWnM92pFMQxAcB61qXY+RYYFSUFS2LsvbP2f4ub+5aRty1sFybmJsg7lr8eNm9tFidfq6lviI2tjZzz9p5zK/biKOKZRmpAIzusNWhzJ+9INjt05N6UXUAbYkE1zz7SNL0kC+eIFYXcDo/knHLwXJyuPvnmIZpGGxqbcrYN3fnhpwXaqvLy9vWZOwb328YN0+5lufP+gH3nfp1Plp9Yi/mnx9jSqbiT6ve7GCy1e5PrTOAQcUXc97whylJ5K1DAAAgAElEQVT3F+d0OQQIGP36fE77ClWv+ku+25FOwUZ8EGD0sAiTa4HmiW1zk54R2fyzdgkfG3IK7XaCL/zrPlY312C7Do4qdtJO6yYMfKaTtAxDusZqIpzYfwgDQiVsbGnk1wvfZO72TZT4AwwKlbKpPUdOB4HzRxwLwHMbVuPskblLwDY7p+IAruA6YIS98fyG56GRM0u06zK2X2Yi/rDPnwzFztaqQ+be1efbE8aUTmdgaCQ1sQ+wkwmiTAkQMCs5deA3CJpFjCi9ivXNf8bVzNzOpoSoDOde9D1Y6euADhE5F7gZL+PUn1X1512OB4F7gcl4icOuUNWNIlIJPAZMBf6iql9N6/M6UE2nw/8sVa3t04nnQUEQHwSMLZ2Q8cjagYnJSf2y8w+3dZOA3VYnlcz9V++/yIqmHamFLtftDMpQFSzby1QnhhIwTAwRfIZBZbCY30z7BJtbmrjwyXtptxO4qjTGowRMozPBXRpl/iBXjfWiGGva24jZXQIZuoYNdriGdNmVauOVd/ByXqigEQFDwWdyxbgTeXLjcqJpnhlBw2T6oGGMKssMAvno0GP40cLs3BAh08eVR+//GnqGmHxqxM9Z0PAUS5tewlGb48tmMr3qEwTNIgCO7jeblsRKdkXn4tXWNhDxM23wHzEOQq+g7vD+hH1nckiWWrsVL4HYVmC+iDylqivSms0GGlV1jIhcCfwCuAKv1M4PgAnJrStXq+oBXXQ6dP6yhzEBI8jVw7/C/Zv+gKsODg4BI0ixr4yLjsrOgTut4lie2/5OygWtE6EyUIGq8o/NSzK8DTTXghmeOWBYSQVXj5lMkRFk8Y5aZr/4OHXRCG1Wpk054bgE8VFdVsLmtiYE4dRBI/jVaecT9nka5gkDBhP2+Wm3LQTxzBnJRThDxPPY6KrSamo6HlkKrIArWAmluT3BD6aezS8X/5OobQPKhSOP4zuTPswDy5ewun4X4yqruGjscVQEi/jl9Av5r3eeQVVJuA5FPj8nVR7F58fteYrUnmi3Y7xWs4Rd8SbGlg5hRuVx+IxOs5PPCDCj6jJmVF2Ws78hfiYPupnWxHqa4u8RMCsZED4No5c5qA88fV6zbhqwTlU/ABCRh4CLgXRBfDHwo+Trx4A/iIioajswV0Sya3wdJBQE8UHCif2m8Z3wr3mn/jUarTrGlIzn5P6nEjCyyyxdM/JsXq9dQrsdTbmiqULUhhsXPcWao+u7qbCRO6PFyJJKjisdwtVzHu7UNLVLl468PSpcM3Yynz7uZAyEQJeyHDOHjmR4WT/WNzWQcJ2UMDbFwLQEy3K9T116NwfEAQkIPp+BK9qteWNjUyMfSozgN9Mv5NgBA+gfCrOttYVZD91DwnGI2BZFPj+/emcuj11yFR8bcTxTBwzjmU0raEpEOW3wSGYMHLFPipiubtnKNxfdgaMuMTdB2AxSFSzjD1O+Srk/dyRhd5QGjqY0cHSfz3F/4fkR9+oeV4lIulZ6Z7IqTwdDgPQsXluBrraaVBtVtUWkGagEdldk4h4RcYC/Az/rTRrevqJgJT9AtFjNbIlsIOp0LmpVBgdy/lFXcPWIrzC98sM5hTDAwFB//jTtm5xQPhZXBccVIgk/rfEgcdfm3g/eYlRJ5mO6YeT+bIVNP9eMmcJ/vjkn43E/LftjhgbrMwz6BUOETF9KCCcch20tLcRsC0OEhz92FZeMPZ6Q6UOAiQOOwp8wcRJgqIFhGUhMIC4QE8TxclYUuQF+OPUsDNvIuSAnwLqGem5643W+8fxzfPKRh9jV3s43Xn6W5niMSNIkErEsmmMxvvri0wAMLirlC8dN59snfZhTBo3MEMIx2+42AKU3uOpy45K7aXdiKft91ImzI9rAzav2TzrWgw0HI+8NqFPVKWnbnV2GyyXVu35K8mnTlatV9QTg9OSW7QO5HyhoxGmouhB7Ho0+DBqD0HlI+HIv33AfEXdi3Lvxdla0vIdP/Dhqc2rVWVw69JoeF+26MijUn5gVpjGSPbeEY3PcwEFsj7YSdyxPqRXwm4LjeFqsIDjqMnvcDMaWDmRzF4+Dnjh3xDGAl2/ilnfm8adFC5OFT+Hy8RO48fQz+MXMc/nFzHNRVeasW8N3dryIlVZKShCki5Ui7Pfz6LJl2Lbb+clMsxsrYNsuDgoORG2La598nM2xZm+cpG254+XqXfXc/O5bfGNatj/r2qZ6bpj7AotqvbzO0wYP4+cfOoeRZdkVrfNhVcsW2p3svNi2Oryx6/1kMqEjR+/pyDXRh2wF0suwDMUrNpyrzVbxwmzLgQZ6QFW3Jf9tFZEH8Uwg9/bVpPPlyPlk7AZVRZu/hTbfAIl5YC2G1t+g9Z9A3fY+O889G/7AipYl2GoTc6NYajGv7jXm7Hh89527EHNy57V1UcoDIe4//XOcPmgM5f4w4hpg+ZFEgHhcGBKsYM6sL/GN8WdgimC7PVeeCIsff9ykJB7kuqf/wZubNnLru29z58IFRCyLqG0Ts20eWb6MH7z2SqqfiBD2+7t1yQJP1oZ9Pn56xlksr631hLQtpPIddSzeWWlJ6xVcR/mgoQk7okhCvFJ7CqKCqOeXfNuCd3l+/dqM8+2KtnPp0/ezoGYrjnpmkHd2bOHjT91Hc9xbCI3aFuua62lO5F4Y7UrMSWQnQUriqIt7BFb26OMKHfOBsSIyKlmM+ErgqS5tngI+m3x9GfBqT2YGEfGJSFXytR+4EFjWy8vsEwoacQfWYoi9Smbayhg429DIQ0jJ7L0+RWOintWty7C7pEdMaILXaudwXvWlvdKKzztqAiuat2cJ5LAZ4OzBxzO+/1HcNuMqTvn7rUSiHVLNAQc2NDXz6Pr3+dbEmbyy6YNk9JvmfLgrNYJYERd1oN6KUr9tK0trduI4SqKLAI/ZNk+uWsl3P3Q6/ZIhx6cNG0Guqh+GCEV+P9OGDuUrU6Zz8uCjCPtfJGJ5C33iSOfCnQKS/E51XApkaMyiQte8QwnH4Tdvz+XcozsLg9+3cjHxLpU+XJSYbfPQ6iW0OHHuXjEfQwws1+G8EeP4+SnnphYkc3Fc+XDsbqpvjCsdit84sr5qXoWOvtOIkzbfrwIv4K0w3K2qy0XkJ8ACVX0KuAu4T0TW4WnCV3b0F5GNQBkQEJGPA7OATcALSSFsAi8Df+qzSfeCI+vT0QMafwXPy6UrMYg9A30giBsSdfjElyWIwYukizlRinz5L+pcPHwSj25ewOa2hlTlirDpZ3LFCGYMGA3AOzWbU7bTdGKOzf2rF/OtiTO5a+lC1Mb7NHQsziWllE9MSglR47Rl9I9aticQXcCQjAU90zDY3NycEsQB0+TOCy9m9tNPoOoJvLDfz9H9K/jbJ66gyN8p4C47fjwPL3s/M+tbh8SMC/iSNgqDjGTL0uEV7ZJV13Rjc6bZZVHNduK2nbUQGXVsHvtgGVsjzRn28uc3ryHu2Nzx4e5TYIbNIF8acyF3rHsmFSptIAQMP98Yl1/qzMONvk5vqarPAc912ffDtNcx4JNd+yWPjexm2Ml9Nb+9oSCIUwTwvt05tJquVZv3kAHBwdg50i0CBI0QIbN3+ZMTjsMvJ13BW7VreG77+/gNk0uHT+aCISemNOvGeKzb1YqOHA6N8agnyGzN1CgVPjpiDK+s/iCzo5IqiCJIZ93UZGWnqGUxpLQso8v0ocN469rreGbNauoiEU4eXM1pw0dgdLFZfOe005m3eQtrGzILeUvyfNoh+HOQEsZdsByXvy1dyhUnnIAArZE4Ejc6WxoKPsVnCpvaGrOSDMUdm9e2racm0tpjprdLhp3GsOIB/G3ja9TEGjmufDjXjPwII4pz5wo5nPEy/x34dLSHCgVBnETCF6DtfyZbEIeRoiv65BxBI8TxZRNZ0fwedlppp4AR4JzBl+RtlqiPtfNf85/knV0bMUQo8QX5/sRzOH/Y+Ky2kwYche06STtr2qO+qZxQNQiA6dVDefaDNV6oqUtKmIVNP184YTKvrfkAJ12+JX9LOswNKQXaxtNGBSqLshcRy4IhPnXCST1eW9jvZ9qgIayrqe/8QXA7haznDkfPaY+7ovCzf77Owu3bGVXVn9W76tNmnxTuCcERRX25bbkBw8em1qbdptycUnEMUyqO6cXkDk/2wH3tiKYgiJOIbwxacj203YGn7jkgRRCYBqGL92psy03wt8338079W563AgYmPvwi+Ewf5wz6OGcOPDevsVxVrvnnvWxqa0glZI85Nt9d8BT9AkWcOmhURvvq4jIuGTWeh1csR900u4NrUGGW8J1XXuD5NetwjU77sCCETB/TqocwpXooZ40azasbPsBy3dTiWVebb0oYKxTvZfhwZXERATG983XBFMHuyc2z629ZcqEvqjbPrFmNudnIyK/szT05e/Xuby4hn3BtRpQeOrkeDjxSyCHRC/a5IE4ayVvxdDG7a6o78Zw6bwbOByLA51Q1O9PNHqBOPRr9Bzg7kMDJEPoo0oOZwSj5Mhr8CBp7CtwIEvoIBE7Za+f/ezb8iSVNi7HSbMOmEeAjgy/g3OqPYfaiosjbtRvZGW1JCeEOYo7N71f8M0sQA5zQ7yj+zkqsVCSedz2vbPgAw/YKfuLgabMGVITDfHbCJK6f6EWf3XTWR7n8sYeoaWsjYvXsc+sXg0vH51fFAmD9rnreWLeRoN/HrGPHUFVSzCcmjOfP7y7MEsRhnw8bF1FNmig6jxmG8LFx43hhw1ov4i49ICX5WoTs8OsMBBICgUxhHDRNzh46Zq8S0B+JFKoz58/+0ojPVNXuolvOA8Ymt+nA7WRHzPQajc9Dm76UfO6Mo9HHoO23UPkIYmQXpuxA/Mcg/m/v7elTNCUaea9pcbanhJvgldqXOL+6d9r2+ta6bl3NPmjNfYsfX7siU6gpYHmP+E7ycV+QVITbyUOq+cbkzmKcFeEiXrj6c7yxeSMramu5b+F77GrPzq4mwDGVlXz79NN3ex2qyg+ffYWnlq70fGwNg5+/+E9+fMFHuOSk4/mfcz/CDc+/hCkGiuK4yuypUxhcVsLPXnudhOPgOErQZ1IWCvLQFZczqn8FbYkEZ9zzZxqi0SzN3RTBb5g4PQVwWIIhQiBsYIjguC4XjjiOm2bM2u01Feikr70mDncOBtPExcC9SX+/t0Wkn4hUq+qOPR1Q1UKbvgaa7ooWAWc72vJzpN8v93rS+VIT24nf8GHn8PmN2BHibpyQ2X0Nuq6MKOmPzzCyKhl3HMsLW3KaF8CT0Utrs6tVmIbBmSNHc+bI0UwePIQvPP4EMctGFcQF0xQ+P20S/zVzJtubW/jNy3NZtHk7Q/uXce0pk5k0PDOh+fMr1/L0+yuJ2Z5QtBzvgr775As8/O4S/uucM/jX9dfx2voPiDsOM0eN5KgyTyOddFQ1Dy19n40NTSSiNu2xBLe9/g6zT5lCRXGYEysHM3frJrqWUI0kbLAUCXRd1OtQmb3/uQkYVzqIX51zDtVFZZQGchdqLdAzBdNE/uwPQazAiyKiwB9zhC7miiEfAmQIYhG5DrgOYPjwzCKcWSTmQ1ZCHAAbYnOA/SeIq4IDsXPmfYCgGeo2jLk7Ths0mv7BImKOlZGPIWz6+cpxucuwf+KY8ayor+l8ZHdzC+EOBheX9DiHcVVV9JcQtfH2ZI5igQT8/e1lbNjawNsbt+C4iu26rK7Zxdz1m/j+eWdy2cmdia8eePc9zwWuKwqLt+3k2nv/zq1XXcQlE7LNHOMGDODCY8Yx+4EnSNg2jiqra+t4fuVaVEAN9WzeXSw+4uBlc0PB75kz3K725qQpY01DPW9t2sLnTprU470AT7vfF7krDmX2QWTdYc3++Mk6TVUn4ZkgviIiXaVFXvHhqnpnRxz6gAEDej6jZoeadmJ3n+B8H1AZrGRc6XH4uqQwDBgBzhl0XoanxK5YK79dOYfL37iFf3vnbv5ZsyprPFMMHjzjs5xYMYSAYVLsC1DqD3LjSefw4eqxWe0BLjtmAhOqBlPk8+828j5omlw/pWfL0M9feoNdLe2dUWwO4EJLNMHrazcSt5yU+USBmGVz05zXiVoWrqtsrGukIZL2tNIRmpw2t5hlc9Nzr+U8v6py49MvEbU6f4xcVRKOQ8J2sCwXMyFIIplMyAGxk3NFMC2DUMzkouHjGFqaZvfVzi1m2zy2cnmP92HRrm1c/Oy9jL7vlxz/4G/44TsvEclV1ukIxUXy3o509rlGrKrbk//WisgTeLHcb6Q1ySeGvHcEpkKOoAkQCEzf79rLdUd/mXs2/In3m5fgEx+Oupw54COcW31hqs32aCNXz72NiJ3AUgfaYFnTFq4ccQpfOzbTPjm4qIyHz/w8O6MttCZijCytxG90v+AXME3+duEV3PjaSzyxYgWWurl//oDrJk/h/DHHoKos21HDrrZ2xlcPYlBpp5b83PLVnTIz/am+gxxjm4bw17cW8eBb79EeTxBLFsgRmwwB7KZ9IjfWNXL/W4tpicWZOLyaGaOHYxhCQyTKtqaWbq+3Y1pCMkw6x3xsV9nW1Monx03g5vnzUppx+pOC5Tjc9d5CFmzfxojyfnxqwokML/c8J5Y31HD1iw+lAj8itsVDa5ewvKGGx869+ojXkAvua71jnwpiESkGjGRCjWK8sMKfdGn2FPDVZH7R6UDz3tiHAcQoQUu/A62/ojNk2QcSQsq+32NfVcWy3sN1mwj4T8Iwu1/Yy5eQGeb6MV+n1Wqh2WqiKjgwyy586+qXabVipNefiDoWD2x8iytGzmBgqKzrsAwOlzE4XIblOqxvaqBfMERlOHeCokeXL+PplauxnTSPgC7J2j970sl8c/qH2NrUzOwHn6CmtQ3DEBK2w0UnHMdPzz8b0zA8v+S0fvl83eKWzW2vvu0l9CEzIWd6f8MG1ySlJf/2xbnELJtwwM+YgZXcPfsyfIaRM3CjW3L4HYvAyP79mHX0WO5YND9lq+4gYJpsbWnh1/PmErNt/IbBvUsXc9t5F1FVXMS35z5HNGFnBMAkXIeVjbUs2rWNyQOHZoxXH2/j3vVv82bNWvoHirjm6Bl8eNAxh7XALtiI82dfa8SDgCeSHzYf8KCqPi8iXwJQ1TvwQhbPB9bhua99vi9ObBRfg/rHoe13g7MdAlOR4tmIWd1tH8taS0P9NbhuA2CgmqC45DrKyr6b9YVps+pY1fwC7XYdRxWdyOjSD6USu/iN3Is7pf4ySv3ZAhVgbu1qchUBMkV4u24dFw3Nbau8b8VifjH/TRx1sV2XGdXD+J/TZnHv0sU8vsrzljhr5Cje3LApJWxEkwnb074n4oAV98w2n3/gcbY0NWfYT59dvooRFf34t1OnMn3kMOZt2NJ1Kp3kEHyW43rmgY7z5W4GgOF07u+wI0cSFqt27uLWV+bxn+fN5ITqwby3bUe2jTd9/K75KNIImj4+P2USYysqOb58AEu21aSS2PvDBn7DJGJbqfEt1yXhOsx+4XECPjOZlD55A4Nu6qXtuiyp35khiGuiLVz2+h202fFU1N6Sxq1cNWoq3xp/mHpjaMFG3Bv2qSBOZtPPCqVKCuCO1wp8ZV+cXwJTkcDUvNqqWtTXfQLXrSf9Wbm97S7iBAkGZ1IaPBFDAmxqe4c5236E4uKoxfuNL9Hk3JryZBhWNI5Lhl7PgFDnlzHuJHiz7j1Wt2yiOlzF2YOmUu7vfNwPmD6w04QHntYmGASNzACJ9c31/HrRG7y+ZSPRhJ0hvudu28hZj96FJAQrqX0+tWo1ndXppPPfDrus65kI5m/bxnvbdrCrvT1LwEUtm7+8s4h/O3UqPzj3LC658/7MfBBZN5SMnBXpQjh1fTm6SUffHA0StsMTi5bzn+fN5BcXn8MV9zxE1LKIWjZhvy/ljmeri6tKQAwscfGbplcNOjle2OfjF+efw/GDBvKNp55l9c76Ti8SBYkJUb+F25FkqGM+PnBUk0K4Y4IKCcMTxgJ+02RwUeZi5y2rXqXZimYsrkYdi/s/eJcrR01lSNGepd48mPE+VgVBnC8Hg/vaQUE89hpezpC0L4trsMMBbfwjIvcDytEVP+L5HXdiJxcEXRVqLR+KQ8c3fXNkFXesu4FvHvsHin3l1Meb+ffFv6HNjhJz4wQMP/dvmsNNJ1zPcWVeAMaswSdw3wfzcdzO85uGi2u6fGhgZ8jshpYGLnrmXm+hyoau0srFe0QWV5BknbrUY3yaYMzFll0trKyp7fbr0xz1kiKNqujPj889m9/98y1qWtvIMjl3nCMp6KV7k3QWgpeVrbsKHbGkhjy8oh+vfG02zy5fxYodtYyuquDiE49jS2Mz97y7iM2NzUweehSmCo++t4wW12FAaQmfmzqJa6ZMJGCabGxo5OV1H3gJgNKwXBfXAvwdcxJc3FT4dtaMVVM/PH4xOHtoZkWe13euyXk9hgj/ql3P5SOnZB07HChoxPlTEMRJHGcHmpaQx1ZhmxNGU+GvXvaxtfU3EmQwCQVbDaLqxycOtprJtp7gizrt/GLldxlePIMVzTtoSDSnTA+JZHaun664m/un/xhBmFe7Gc+VtvPD67gG5w6eRLGv09Tx28VzidpWcqzdf9DT8/emJ07PMEvgZZh0Ud7avKUzYMT1zAQdp6quLMFyHL70l3+wZMsOIgkLU0B9XZwF7aQGnFNwpZFD8w34TOIJJ3c/hYElndnpigJ+PnnyCXByZ5Px1SF+ffF51LS08ck7HmRXa3vqNLtibfzhpXmcMnwYx1UPZMmOnZ6Hm+P9WCCgSZuvQZpGl/QOIQHq06xUmyCEDD/l4SD3nP1JgmZXD5ncXzMDOSDVpPcHhcW63lGwpifxB07MKF3f4ub+8igW/YwG2jVADD8iQsBwKDITmNjEXR/tdoCY66PdquOd+lfZGVtHiT+CdFFHY06ctW1bWNq4lc3tuQoJCA9vWMTi+q2pPW/t2NytXTQDN1MIi5XcOty5rKTHguNtgudTu3Drds4aezR+FQzbE9AdgrqusZ0fPfkK723eTiTh/ZioAgkw42DEvM3sMEN0CPGOOaS5h2VteK5z1UUlqbTDuWIu6poi2E7PSdYTtsPlSSHs3cXkw4Dr2Zp//dKbAIR8PuIxO6Wxi3beH5/fTN3HDrOFOILEvS19bn7D4PYzP868y77M8RXZmdYuGT6RYA5h7KjLhwcfvgmC3KSdOJ/tSKcgiJP4/RPx+08EPO3TUklpuJkoDU4oeSxpbxVvC5o2bXaAiBugzQnR6oYRgZBpI0CRL9PHNGYb/G7lK/xwyROZnggZZ1NuXPh06n2/YJq3haHZpoaumi+dLmLS9T8XjAQYTud1FgcC/O/HPopPs+tNJByHJ5esyJ1vQkm7I2kKY4cwTgpsI7lJx+sOAR4Fq81hS01zpl9x+uaCum7KRNIdr61aT31bjhDs5DiLNnvekcu319A1x0+HQK4KhXPGBKVs68k/V9jn57qTpnLmsNFZKT07+OIxpzOufBBFphe8EzBMgoaP/510CWX+3qU+zUXMibC06XUWNLxAYyI7KvJA0BHQURDE+VEwTSQRESoq76el5SaikYcIGTatTnZ+W8FPvVNGd8/cRWaCNifzy2WKIgK+lHQR2uIB2hI+drRtwFFwesjdurm9kYZ4OxXBYj5//GR+9u5rRB0LMUATZEaQKZBeUgh6iKQTL7d68hJ9psFVE09gZ3MbpkhmkEVSyuby7EgJyq4uYsljYpGqT5fLlpw1MwsvBXSatpy6DlMoC+X2SolZNq8uW8fDC5fmtjGLN4/ioCcQX1i1Luc4ALvq2xGfZ3bJvi7BdIRhleV8eeI0Lhs3IecYHYRMPw+cPpu5tet4Z9dG+gfCXDjsRAaHy3vsB2C5NpsjNRSbIQaHK7OOr255l8e2/BqRjihBl0n9Z3Fe9RczPH3aEmuJO7soDRxLoA9cMvOhsFiXPwVBnIZhFNGv302Ul/+MAW6Epu3nkHDqSFeNDAlg9/AgsTu3UMGz/bYmgqQ0aiCXwVTVaytCqvDkVcdMZFHtdp7ZuArHdb0ybRnFlz0Dp/qUYgkQt+3cwrMr6iX7+cyUk2lojxKP2xk+wtohFE0ImAZ23E1J1t0pNJJbJnbvvqZ4i2VmWkMg7PdxxYwT8fuyf7RWba/l2j8+hu26RBwrK8F96l+BK6ecAHj26O7YnfVn8qAhPHxF/nmqDTGYOegYZg7K3xTx4s53uWPd47gojroMKxrID47/PNXhKgDa7CYe3fIrbE1k/GAtbnyZEcXjGV9+GjF7J4trridibcTAj6txjiq9jGMrb8wwxfU5WrAR94aCaSIHIoLPLGZS9WNUhE9HMAGT0uBEJlY/SNjM7QsMEHUyc0eoegt/AEVmEaNLhjE8PCrDJU0EfKabEsiqyX62YMV8BOwwz29akypX/3+nX8BzF32OY0q8UG9JMzskh8DvM3j5M5/j+inTUhpgTjpMCg5UmCH8psnamrr00bxzdLxwwI25GK4nMMX1+na3uJYhhNMFop30F05uXc0Ahpsc2/XG8IlwwcnH8h/nZ2d2c13l+rv/QUs0TiRukbLyJN3yUpvjnfPzp3peCp84aTx+s/uvgPH/7J13nBbV9f/fd2aesn3ZBZbO0rtKEcQGihW7WGOLSiyJJsZEY5rpUdO+qWqMGkvUGLsiKiigoID0Xpay1KVsL0+bcn9/zMxT59ldmvkF+bxewz7PzL137jzMnDn3nM85J4spOkfTuHTIkKz9DgeW1W3krxWv0GJGCJtRYpbOlubd3Lv8z3Ez1uqGuXjRYHQZZUHNO0gpWbpnKi2xCiwZwZBNWMTY3fwG2xqeOaLzd511x0wT7cMxjbgVBLQujCj7B5aMgZQoTqDGOV2+zlu7Hk5pa0loNHIwPEwMUctHQPHz3cF3MrRwENN2LmdN3TspbYQATbOQlsAy/TS3SKRp//fUGVF+vHAGf1+zkDcn30ihP0i/ojcW/5wAACAASURBVFIu6zuMLdWfEsFIeR6FgOuGHk+X/ALuPfkULh40mCnPv0QopqeGJrsaryNQP1y/mZ+88yGWIT0dggISdeqSzAoCUFSncEe64JWZ/V0Bm6KxugIdnMQ9ttCUTscJg/vysyvOzpgTYDsPown7u3BeZCLuaEtARXDFX16gIRKhORrF8MmEOpL+5nHmJJPYHzmaxoDSjlw+tP05lw8GL26bEa9958Jm40RZULOaUzsdT9ho8qx/CBAyGmmMrSJiVDnUygQsGWZb4z8pL77liM3fPs8xAdteHBPE7YAi/CkP6dCi0zGlzsw9jxG1QoCgU2AwVdFGBHpK8IRfKea44uFc2v18eubaqSBP6zwQ06O8uk+oXFo+mvoWeGVjasKZmGWyramOx1Yt4HujJwJw1ZARPLrsc2KWmRCcEvIUP0HTxw0vvcKOugbK8vP4wcTT+aRyGzM2brJ5v1mYC68uXc0lw1rR9tLtto7WK6OgylQzhRSQvPoVjhAX9o+TMl6yow+ZEIDCsesW5GTX6pvDsRR7qJC2I1CqmYq6KSXbq+vtS1AcjVwBy0fiaRDuH4GiS4QlGFneFb+mcfHgwVw8aDAB7cg+OrvD+z33x0yd3WE773TvvOH4at5Ct1Kdlwoq/fNHETH2kG3RGzPrDut803Es+9qB4ZggPkiMKJ7E8KIziVlhNMWPKjR2h3cxvepttrZsoWOgE+d3uZDBhUMxLJMVdVtZsP9TtjbX8tn+TRRqAep0CyklJpJc1U/nYCHfHHw2k970ruhtSskbW1fHBXFRMMhbU67jBx/PZP6u7bYQNgKYEYsnFy6Ja5i76ptYtnMPFw4dxEdfu5lrnnmZ6pDDKnC0vvgjI2Dehq3eFy0dBkbyd1czTvsLjp01YUpO2M+TjcMeDru4WdhpF/RrXHJiZj0+F8f37krMcNTnJOVPZPB9k+aF3VZRbFqbqjuKftoTIaQAE564+BKKcg6d4dAeRE0dvyikJRpCEZKAz4jXS/WrPnrldQGgT94Iugb7sju8ybYTAwKFgJrDyR0vRch6JN4ac56v/IhfhzwmiNuNY4L4ECCEIKAmkux0y+nO1L53prRZUbeVe5c+TYuhEzNds4V9g/qESoE/jxNL+jC+8wDO6zoCU0r2h1uynjOWFlbcq6iYf118Jbpp8u133uOjTZvRLSsuhJMfhWlrNzCxfx++dfp4fvLOR57jW5akOhROU1EdOIEP6U62bI9b3OyR3MhDExde58IW6gGfxsVjh3Fi/x6ZDRwU5QY5tX9vZq/ckjLvbP62lHeABUK1hbFwLTxpT0UAhdqWyBciiGujzdw8/zFqok1ELR8gCRs+CgJRgioU+/I5scResQghuKH8Z3y6/3WW1s/EsKL0zx9NWaAT/6r8OhGziT4+PwWqSbziK6CIIANKvnPEr+UYa6L9OCaIjyAa9RD3LHmSiGVgWpmGSF2aNOohZu7czKubN/LbnE/45rAJrRbIrGuJMPQff2Jyv4H8YPwESpxsa6YlmbU5qcAn3gLy97PnMePOm/ndjHk0RdPyNju2VcURkMkzUByH2gE/Wq56m+SoE1bq93i7pE0BhvYu44ErzmRE7y4pQ0Z1gxdmL+X9xRuobgoRMQxaorGMaMF4XbvkSafZrJM1byHt67SShLkAYlhc+PfnGFfeg19deA5di45c7brfrZvG3khDkunKnkhzNMDYbr24f8h1cQYNgE/xM7HsGiaWXQPA59X/4bPqf8VD8LfqxXSTKh21FsAkR+vOgJL76ZR7xhG7BrDvo2OmifbjmCA+Qtgfqecfm98natmUAvumzLwxDWmiyzDgZ0+4iV8s+4BhnTqxat++VCKBu8yPKITQeXPjWj6v2smMq28mqGmEDT2R8L4V6tW+lhYCmsa0u27k6n+8xN5GO3RbcXioIkk2C8cU4Wq2KWjvM5beL1lTz+LYkyr4/Rq/uO5c+nZJ5c7OW7OV7zzxDjHDxC0OIiHBpU6al3Cdfe3gBgnsih1ooHvw7QzLYsHWHVz19EvMvOsWgr7sj862hnp+Nm8Wn+yoRBGCc/sM4MennkHnXDs8e3+4hdk7NyGBM3r0o3OOnSRISsnsPas9/QdB1c+VPc6lxJ+dsWNKnQU1L8WFMNi22l1GIXvNjtw54EUC6hdVAFUkKR/H0Ba+dIJYShMrOhsrthChdETNuQyhZoalHvz4kkc3vcG7u+cTNZWE2054c1NtqlpCeoRNnd3ROvoVlrKjpYGIaUflyagCeuLGNkzJ7qYmpm/ewOWDhlEcDNIpP4/djU2tCsnO+fZDX1aQz5x7v8aqXXvYuK+GT9ZuZubKzZld0yloSfNObtMaFAG9SzuwfX+dZ572dIVVMeHO80/KEML7G5r57j/eiduDk4W5dCl0MvFXONq3FCB9HidOmaPgD1dNxufX+OsnC9iwb3+8jp4LU0qaozHeX7eRS4/zZk1Uh1q45NV/0RiLxh2o723ewOI9u5h17S28vHkFDy2Z7RRFhQcXzuC+UROZOtTOEphendtFxDSIZSm55aJZr0Fm6a8IlSaj5gsUxMdsxAeCL9UrS1ot6NWXYNTfg9XyFGbTH4nsnUBT4x8JRT5FZqECHQg+3LuY96sWoksDK8lRoooklS8NuplKeQsZMV4671oen3g53xs1kS5KkSOEU9fvhmnxTsUGwLYX/uSsMwgmefPTzxbQVG4ffyLNkSiWJanYU838DduYvaKCVVv3oMbVywTiTjiPLasATrYBW6DGoKamBWJk5JHIJpTXV+7LGPbdheuI6abned08EYBty3Z4yoppJ5tXomRwlZOHuWLMcM4ZPpAzBvbltalfoW+pd/RZSNdZW5U5NxfPrl5mJ2VKelMZUtIQifDY8oU8vGQOUdMkZOiEDZ2oZfK7ZR+zorqKiKUDwvuFjaQ0kF0bBsjRirDSqGouTGmQ9wVF1MExHvGB4kulERtNf0QamwDbw9xstVAnDWj8DYh8BApdSh8nLzjxoM/x6o45RCzXg52QOkKAIiRJWS6RUhCNqVhpSzgJFPiCTOzel4nd+/LSstVZziaoam6Kf5vUvx9PXnEpv/t4Hhv3V2PoFpYl40ELIzqV8Yfpc3n4rTkIIZCmhRnzyKgRpzl4++wgTRhbSX1I/BW6rZFaQEsklhi7LUiYvXwTW6pq6Ns1oRVv31/v2d09rWLZEXkuOyNZQXaDO6QTRyOT5lqam8NPLp2UMmZ5aQc27qvOOF+OT6N3SXHWqS/YtcMzT3PI0Hl783p0j5wiMdPkhQ3L+MGJExGoSJzq2EmrKJ+iobehEfuVHAYVTmRj48dxFgWAKjTK/MW8ue16LGnQO/90RnWcSq7WsdXxDgnSewV4DN74UmnEVuQNXCEckxZ10kgob7IZSzZSVXMrunHwJfMa9Ob4Z4HAtITDDJBoioVPMRFIdEMlFPajG6nvQilts8P6uoTW1b0ge26LDsGEJ19KydyNlWzcuR+foeCzFAaUlPKnyy7kkgGDWb99H6GojmFZ6KaJISXp1WxcAZsX8BH0aQlbqEsNM5zNiYjzKwoBv5LIXOYGbLif0yfczofTsiRzV6bS6Ab16JTJY06D8PjrbqoBOU5WNeE4JXNVjV9feS5C2CsM184+9eQxnlxhVVG4aMTgrOfvUVDkmfzHp6goCp45MCwk1ZEWinw5dAoWYjmvcDfNsYVAERr9CsqyX7iDs7vcTXneaFThI6DkoQofHVSDmLGLsFlL1GqkovE93tp2CxGzoc3xDgXHioe2H18qQUySltBoGZ7Ps5QmjaF/H/Qphhf1xc1bJoStbZpSxbQELVEfTeEAoWgAw1QRSkJjdsOapQTTkPxxVaK+6tVDRuBTMv+rAqrKpQMTwRcvLl7BC4uXEzVMmqMxIobB5v01/GbGJ0xbsZ5IzEgVZAJQSeSRcPYHfRpXjzueuQ/ewcmDe9tX42RRS9GEHeFc6s/DZwk7ZNmwNeFsDAt7LGmHIiKRUmbamyWoqoKWFn581sgBCfNDlrGzafAubjxlNH06diA/4OP4Xl159JZLCUV1zn3oKUY+8CfGP/gof3n/M/I0H/kiqeq1hHy/n2evv4LCYDDr+DcfNwq/mhldqSqCKwcPJ0fLzD+co/o4o3s/hBA8MOwigoofiYKFgkQhqPq5e9DZ7cpd7FMCXNrzp9za72ku7vEjzi67BQUrxUwmMYlZzayvf7PN8Q4W9otEtHv7suOoEsRSSmT4LazqKVj7z8Jq+AXS3BM/rgQm4F6ytxgGiKEbrdRjawM3lp9HwHlghIAcn44tcBQMU0Um/eRuWLOiWEgLLEMgTQWJYE3tHj7duY2Ptm3m1J69GFBSSiDpAQ+oKr0Ki7lkYEI7+/uni+I13lyYUrK7rgHLtDJst1gJ8wFJmyJgeM8ycv0+7r7gFHyKmiHg3M+6brG3tileWcRtp1gQ8KUJJCkRpkzYbWOO4HZVvzSa26RRA1K6z16+2U74k0Ur9mlKm0mXmpsiTLv/qyz8xV28cNc11DWH+eHLH7CrthEJNEdiPPvJEq7684vUt4TjaTsVHWTEZO3O1tNMHte5Cz8/bRI5mka+z0++30++z8+j51zEV4eOpktOPv6kits+RaFjTi5T+tkZ3E7rPIi/jr2JMSV96ODPZWhRdx464SquKR/f+oWlodDXmd55o2iIbcKQ4Yzjpoyxo2X+AY15YDiWBvNAcFTZiGXjDyH8LvHKzeGXkJF3oONbCLUrWsH3iEU/BdlCAJ2Yh2NDiFxyAu2rc+eFXnll/GnkPfxj89usbNhMsS/AuG4jqI0YvFlZ4W3jjN+H9gdpCGpaTG6f8RZgl+751ujxWKbk9Q1rsaTk0oFDmHrCGIKOhrVs5272Njd7ssykITHNzDO7QjiFfSAhFjM5bVA5AP26lDK2f08+XVvpMXFvRgXYmc1OHNSLxRt32Jq4ew6ZJtAlSAOkInHDx/yayjcvP5UuJake/sqqGnQjif+WbJcGrj9zFE9/tDgj4CQOSQYT4g/T58bLL7lwv0s/KWlCI7rJU3MXc9XY4zyvuTYU5sNNm4gYBq9d+hX2hptRhcK47j3iVTvenHwTf1n5KW9VrgUJF5YP4VvHn0KuLxHCPaqknCdOutXzHAeKoNoBBQ2LTPtyjnpka+UdsxG3H0eNIJbGJgi/AyQHKRggm5DNf0UU/Qqh9cDf6X2MlqcojMyiObYBmVJGQkVViijIufSQ5tInvyu/Pv72jP1R4zVm7FrrSVGSbn05CTKsYSJpthKmlL8snc+T517GXWNOyui7oHIHt/37TfsqUsLG7L9mzNMNH89slgzXsaUlaW19u5Qwf922zERArdhrY4ZJj85FnDSkF0+8u5BwLIYRMbPS4xQL/H6FU0/oy+2XnEzfbqnUtRdnLuWVWSvic5TpSYMkPDN9sV1nTtjOuCQZCtIOlT57ZCINpWFa7K5t9L4Ad9y0Cde0ZCacB3h77Tq+/8EMFKFgWRYIwTXHjeBHZ05MyYNRFAjyoxMn8aMTJ3mO016Y0mLark94e/fHtBhhhhf158byC+iVl1qlfEDhZFbX/ZuUXKmAJoIM7XDFIc2hLRwzObQfR8w0IYToKYSYLYRYJ4RYI4T4lkebiUKIBiHEcmd78KBPGM22zDIhOidxTrUzvsLvk9t5Jr3KZpMbOB37Z9DIzzmPnp2noyh2tFrUbKZJ34slWzFMep1RWqysr2RRTQVhMyFMf3jceZTlFMYrNQScSg0d1WJyVD8FvgA+05ciBF2EDYMnVi7yPN8vP5hNJK0ApitAfIrwrhzhIYDdzbIkdzz+GhW77eQy0aiRvTxTlmctN+BjzMCefOWsUcz6/R3M/M3tlBSkhQinZUeLxUxmfV7B7MUbU5ot27iTR1+fh2FYOCFbCMs2cwhTJhyEOE5CR7jHucTOOToX5jF2YCJUWlUEecFWUoSmQQKFOQF+MW0WszdswXRq++1ubOT7H8wkapiEdZ2oaRIxDF5cuZIHZszg0+3bE8E2h4iGWIjaaDN/2PA8z1ZOY0+khiYjxIKaVXx72R/Y1lKV0r7Q351Ty76HKgL4RC6aCKIKP8eVXE+33NGHZU5ecPnxx2zE7cOR1IgN4DtSyqVCiAJgiRBippRybVq7uVLKCw/5bCKP1FIVyce8cwT4ff3o3uml+EPiai4Rs5FZVY+wPbQIBQVNCXJypzsZXHRum9NYVV/J91c8T9TUHUedxd0DL+TSnidREshj+lnf4IPda1lWs4OuuUVM6jKYd7Zu4O3KdQRVlbKiAmZt3YqXhNudRFVzYVgWFftrslw3jO/bm+Ubd9ES0zOOZePyAizesovrfvcipw4uZ/7qSrwiMYQETQGfqhHTExngfJpC19JCTj+uL2BHrBXmBRk1sAcfLamwO7dy7sffmE9TS5Q+XUs5fVQ/Xpy5lGjMDmzByOxjKTY90E2xKQHUNNuFhOraZh57Zz53X3qqfW4huO6UkTz7yZIU84SqCIJ+HzHVJOqYQtzVxr7mFl5cuII3l61lYFlHnrn5Ct5au87WgklcmlQhKk1eXbuGdys20rOwiBeuvJLiYDBrSaXWsL2lmp+ufIX1jS6jRyffb+Ca4SWSqBXj2a3v8ODw21L69is8hx5549nZMh9T6vTIG3dkqWsOjtl+248jJoillFVAlfO5SQixDugOpAviw4PgWdD4U68DkHNtq12Tl45SSt7ZcR810S1YGFiAYUb5ZO//EVQLKc/P7jRp1EPcu/QpQmZqbbq/bJxGeX5nTujQF7+qcVHP47io53HURcJMnvZPaiNhog5HtEKpQQuAkZYGQhMK47r2zDinKgQBTcvUiIFcn49Lhg/GClss2rozUZ3ZvdZWbLwAUd1k7uqtSFPa5fGS2RYO/Suoaugxk26lBdQ0h/FrKpPHDeHOi8djmCa79zdQUpRLXtBPRaWT2rEt5VDCC+8vRQAPPfshpR3y7C7JrI0kKE70HMJe2ygGWIbdWGqJ9lHd5N+zl3P7BSfhd2h5d559Ensamnhv+Qa7grRuoAiF0pxcOhTmsL5mPyHdsDm9EHdKhmI666r28+xnS6m3InaOD3f6SeHWEjsIZENNNWP+/hiKBqf3KufnEyfRo7DtUkkAzUaEWxc8TqMeJlG6S6E+mkNJMITqsG8kktUNmz3HCKgF9Cs8p13nO1w4ZiNuP74Q1oQQohy76PlCj8PjhRArhBDvCSGy5joUQtwmhFgshFi8f39mrlahFELR74Ggsym2Juwfjci7sd1z3RtZR11sR4Zzw5BRPq9+utW+M6qWe/JEI5bOi5WfZOx/fM1CqiOhuBAGO++wkWa8FUBA07j9hFQnYswweGjmx+geAQRC2hFtD772IYsqd6bkK46zJtoB3bSDQtx5CBJLfYkkHNUxTIuaumZOH1xOLiqvz1zOlPv/yaQ7H+WGB//FeXc/zrf/8AbV9c02ta0NLnAyLEtSXduM1pYWmWSOQDq5fqTDyEhpJqltSrAINFXhV1efy/Tv3Uy3okIUSxCLGuyormf99n109eXzjTNOSnlZu4gaBq8tXcMp5b3J9fncaRD/sdKnKG2n6cfbK7nkPy/QGG29CKqL93ctJ2rqpNdPBGjRU00redoXk6qzPThmmmg/jrizTgiRD7wG3COlTPeMLAV6SymbhRCTgTeBAeljAEgpnwCeABgzZoznY6zknI0MzIHwdJCN4B8LvtGeD1E21MW2IaUVJ9ODI3wENMR2tdp3T6Quo6qCi6pIbca+97Zt8Iy0Auws64Zdrujk7r34ycln0rMgVYO69433+GRzZYbwD2gqAaliRiyiXhWXDecNrACmh0/KEdQC+0CK0yveRqYI82jE5KPFG+M5iGv1EALiLIfP12zHsqyEHRfwSEjnabKQ0tFwlVa4yUl9458Vey7JFisBdEi3VQMbd1ezs7rBzmPhIGaY7Glo5sk5i1JfZEmD6abJqeW9Gdq5M6v37iXssTJJgWWbUkJ6jN8smMvE8j6MLuueEpiTjvWNu53w53QIoqZKSNfI0QyCqo+LumeWkvpvQHJMwB4IjqggFkL4sIXwC1LK19OPJwtmKeV0IcSjQoiOUsrqgz6nUgJ51x9sdwq0MgzplhRy9T5bGBT4W49sGlrUkxzVn+KgA1BRGFFUDkCTHuWNrav4fP92ms0wnq75+MVIAqpKJy2P6155FVNanNt/APeOP5lwTOfjzVvjNsxkFAaD9AoUsGLHHo9BsROmO0I0xTzh/nVLGQE+TWVIr05s2lWdSUOzUvvFA0M8rshIoo3Fo94s7Mi+dCnv9Zp1bNQuGyK9uee+ZHMKNmviqgknEEjLnLazpoGHXvmISFhPEDGc1B5R3bDPnaR4xq3PEoZ27YwiBM9eNYUnP1/MHz/7zIMUaS8hXPOGlJKw0Hlp4wre3LIW3TL5+gnj+NboU1J6VUeaqWyupTRQQEDxebzk7YChmKURkApjSoZySfeJGWdv0mtYWPMO21pWU+TrzEkdL6ZHbvbowMOFw22ZEEKcB/wJ+9X6pJTy4bTjAeA5YDRQA1wtpawUQpQCrwInAs9IKe9K6jMaeAbIAaYD35KHy7N6ADhigljYauhTwDop5R+ytOkC7JVSSiHEWGwdJovn6YtBXXRnkhBO/guDis5rte9pnYbSwZ9PLFKfksrQr2pcVz6B3S2NXDbzn7ToMcKmjopA9UlMQyGjFLLDqos1S6Zt2BjXyF5bu4Y5W7fwwPjT8amqpyDe39xCB5mdDZAiuAR0yMvh/otP57V5q1i7fa+TutNethflBXjk1gu464+vsb26AdOyPGvBHRCc5YaQ9nVKBYQT1usKrPQJC4nt/vUob9/ahaqaIBDwYVqSKaeO4K5LU4VdTVOIr/zuRRpDkRQ2XPwlIUjJD5J+VyzeshPDtAhoGid060quFqDJiCblQZYpNfHiUzcEpiJp1u2X9uMrFjG4tDPnlg8gahp8f/E0Zuxch1/ViJk6+XneKyfbPiwo8XfhB0Mza9DVRHfx1ObvossYptTZFa6gomkR53adyqiStp3PBw15eOlrQggV+BtwNrATWCSEeDvN+X8rUCel7C+EuAZ4BLgaiAA/BoY7WzIeA24DFmAL4vOA9w7bxNuJI2kjPgW4ATgziZ42WQhxhxDiDqfNFcBqIcQK4M/ANQfyNpJStyPpaqdi1d2JjMzMmgawvVjf+D7e73JB1Gz22J+AT9F4Yuw3GFFcHt8XVALc1u98uueW8tMlH1AbCRE2bc3GdKSZqjmSzdUoDft8mLYKmJLJy7JoiEZZvq8q7jjyguZT0TzCooFE1WQJmhToUYNnZi3hd7deyNcnn0x55w50Kc5n4rC+/PTac3hp5hK276lD6pYdxnyQQtgOjpAptDUB8YrQikzIrgy4x43E3AEKcgOtmivOOX4gN5wxmt9OvYB7ppyOmvabvPTJMsLJRVWT+iLtl5HQyPriiZkmH2/YAtgBHSBRLJH4jTPf5whEIpLRQdjQeXy57UL59YqZzNy1nqhl0qRHiVoWLeEcNFQSN4lEVaz4SytspK7CXHxQ9Q8iVggznllQossoH1Q9SczMjLg7rJAHsLWNscAmKeUWKWUM+DdwSVqbS4Bnnc+vApOEEEJK2SKlnIctkOMQQnQFCqWU8x258xxwaEEEB4kjyZqYRxvPq5Tyr8BfD278GLL2JtDX4kbSydh88E+E4v87ILtwyritHmv7jllaU8nymioilm2YjGDyp/UzUNCYU7UZy2OMoKbRJ78j66trMAwZ146z5VWIGAbra6rpVlTA5uo027NzY2+pryU34KMxHE09ZqXybKUlCZk6W/fUcsVDz9PUGMEyJYoULGjYxmcrK4lE9VTh29rP4Kzb080FAZ/K4PIy1mzYjYn0kk/xYJJkRSrgV4k6gSCukHZLGikCbjh3DI++8WnGNNyk8R8sstOE5gR85OcEePQ7U+iTFCyycOOOFLtw8lxUIZg0oj8fVmyyQ8Q9ENEN7nn5Xc4a0o+7zz45zkwR0hbGltbKW8utIOJgb6iZiKnzWuUKomZatJ8hEREfpflW3Lnr3uIKglEl/TxPsaV5BV7/YYpQ2RZaw4CCMVkmd+g4QI24oxBicdL3Jxy/kIvuQHLugZ3AuLQx4m2klIYQogEoBbKZOrs74ySP2f1AJn248L+bayI8LUUIAyBDEJsNsQUHPezgwrPRRCBjvyb89Cs43bNPo97Cy9tm8eCqp3lo3b+JySjJRKuIpfPnDe9nFeSaULh/zATGl5XHgz00RUFVFE/OqQA65eXy/bNOtxMMpWkXQtqUvCe+epntp3IFsGFTu1wnnHsIbE27tiWMYUo774WUhKK6bRcWAqk4ws3V9JK25CxrqiLIydFQFTsZvqYqqIrgzBMH8Jf7ptjpHTMuKLFH4GjHlh3RZ8UshwFhG3yFsylOYMc/X1+AqohMR10awlGd6vpmvv771+KBGABdOhR4yklFEUwZP5zf3XgBv7l6sl29wwMS2/49e8NWXl64ksmDBqbkhG6voVQRgjFl3amLhrPK7ZihEDKsuPMYbO06qPq5ue9Znn2EyP6Iq+LI+uqTE1m1tQHVUsoxSdsTacN5/Szpv2572hxK+yOG/1lBLMNvkSKE4wciyMjBm3iGFl9AaaAPmkhk2NJEkMFF59I5ODCj/a7Qfm5a8BDPVn7Ap9WrsQhR4I+iKalalkQysrRbPDNb6jEY17k3z51/BU+eexlTR4zh7pHjefqiyz0zeQU1jRuOH8mont0JCjURWWYllvd+ofLw2x+DJVBidpVipR0150Q2y06Scy7xikmFqgjOOnEg373uTIqDAdAlZsTEilrMnLee5et32kIzY+zMe19TFXp37YCqtn6LxmIGVkSmvoyyTFJi50Vesn4H++ubqWsKc92EkRnOOwC/qnLrWWMBOGf4AF6/63py/b7UVUHSbxI1yT5vYQAAIABJREFUDF5ZvIpfnnMW95w6ni4F+eT4NMoLilOSNbkQiBQ2h09RuGnYKDoG89CEd2CSKSUtUR8RQ0VKgYLCqZ2G8OS4u+iRW+rZZ0jheJQsgU69c7NXxj5U2E7Jw0pf2wkkE+l7AOn5auNthBAaUARk0pVS2ydXpfUa8wvB/26uiVbe9Fkj7NoBTfFzee8/salxDhWNc/ApQYYWT6ZH7ijP9n/Y8ApNRoJo72oqub4YjdEgrjQwpMX9x0/ktrmvEXEqM6hC4FNUHho7OZ4U5uTuvTm5e+/4+L84YxI/mvURmmI7s0zL4pvjTuLE7vYK6saTRvL8wmUpWdcCqorRYrKyvqo1TkZWeNpcrcxjKULJ1ZBNePX9pdQ3pGp2lim597evc9ro/sxdujnOTc4GTVUoKcxzssZlbxu/vrTq0jLLnW1ZFj966n2awlEkMKhnJ246cwzPzFqE5ghMKSUP3Xg+XUsSFTEGdulIv9ISVu/emxD27l+HIqebFoZlMXXsGKaOtZf8lpR8d8Z7vL+5AiklmqJgSRhf3oNPq7YRtuyIQVVRuPbdl5ncdyBTB53E39d/FvcluCezbcKCmOEjZtjpM6/ueQa98zpjWCaqUDJMcud2ncqO0HpCRiO6jKAKHwoKU3rej6q0nVbzoCHJdEAfGhYBA4QQfYBdwDXAV9LavA3cBMzH9j/Nas3nJKWsEkI0CSFOwo5xuBH4y+GcdHvxPyuIRc4UpL4U0lP8iSAi56JDGlsVPgYVnc2gorNbbRfSI6xqqEBx+F8SN6xTODZGiSkFCoKBBV0Y06k3MyffzoublvH5/u30zi/mhgFjGFScvWbelGHDOKtfP2ZXbsUwLSaUl9MpLy9+/NtnnkJJbg7/+HQRNS1huhYW0LegAws3bk8IJkGmScC5PVOcY1biUPoj1KqDznGkSSn56PONtvAUmX0sC3p16UBxQQ6hcIxIzEBVBaqi0KOsmC27al1VikhI582ZK1IqabQ6Aec3T7kmj/aRmGFzfR2BtaZyDzv31/POj29m9fa9+DSFsQN6EfSnPhqb9tawaW+Nd55lC6Rqs09y/anCTRGCP5w7ma/X1jB/5w4K/AHO7tuPPL+f78/9gDcq1hIxDUKGLXTf31qBTwzmnuETeHTtPEJmDMOyUFQLTU1druiWxVs75/PI+ufZG6kjVwtyRY/TuKHPWfF8JXlaMd8Y8ChrGuaxvWUtxf7OHN9hEoU+bw36cOJwksAcm+9dwAfYmtbTUso1QoifA4ullG9js7SeF0JswtaEr3H7CyEqgULAL4S4FDjHYVzcSYK+9h7/BcYEgPgvUOYOGWPGjJGLFi1E1t8Jsc9t2zACO5z5MpSinx7xOUgp+eGqx1lcuyGuBbs/pekst6JGLgo+8rQcTu4wgnX1++iZV8xNA8YyrEPX7IMD2xvqaY7FGFBSis9jaesFS0oUITjtZ49T25Jma3SYBnHabppt19VqFccGnCq0pZM32ENAO30zHItSZlTokMDQfmX8+ftX8s7Hq1m8ehulxXmcd8pQ7v31a8R0M05hS+7otS/lGMQrjbgaoRRgqal9VEVgSolMM4/k+DW+dcXpXDnx+MwTOJizbgvfe/k9miOZ7ASJrYHffPIo7p88IesYyWjRY4x6/m8ZTjmwVzSLr/8GeT4/zXqUWz59mg1NmXmQ832SvEAUIykpVUDxMansBO4fenW75tEWhBBLpJQH7NEL9O0uu//yG+1uv/W6Hx7Uef5/gBDi3nY0a5FS/j3bwf9djVioUPw4xOYiw9NB+BA5l4Dvi/m/XNtYyer6LSnasBB24UdFSAJqkFv7nIdfBHlo+WxerltOzDJZWr2T93au45ejL+CS3iMyxt1aX8fXp79NZUM9qrC1xZ+dfiaXDh6KaVkoQmRlhLiOvaZwNFNmuVF0SUlz0lkQwrRVjeSoQrelFHa+iXRIsrM74pEPSV8Nw8KnKazbUMXiZdtRFMHMj9djmlZWLTb1ZKlf42O7yqIiQQg7tabpRO8J8PtU8vMC1DRlprEMxwwqdibC5muaQjz+3nw+XF6BqipceOJQJo8ZlMiFnGVu26rrW5l8KvaHWlCz/D9qisq+UAsxWc/f1n7KjoYYuu5H0XRUNXHVfl8kI6Vq1NKZuXcZt/Y7n1zNx5bmrQTVIH3yylFaNecdboh4atcvAe7D5iO3dsF3AEefIAbHIxyYgAi0Tws5nJi1dxGGjKZ4OyUSSwgEgh8Pu4FxpUP56scv0KxHk1b/kohp8ODS6ZzbY3BK+ZuoYXDVq/+mJhxKETIPzJzB7+d8xp7GJnJ8Pq45fgTfOf0Uz5pqAPlBP/WhtDwGls2YAG8TBU7F42Rflwt3uX3eiYOYuXADMd0ubpkT8BGL6OjtTVwB9Cgr5oKpjxGKtL9itks/zhDsHs64ZGHtCmOwbcMNMoKiKBkpPYM+jT5OkdLGUIRrfvMCtU2hOBXthTlLmbdmK+P69WTextS8zPHfS0JjJC1TUyvokpeflUVjWhbbW2q5+7M3iJqGQ3kUYPrID0KOX+ATAkW0eI7gFxqvbH+XJQ2foAoVKS1y1VzuGXg3vfN6e/Q4ApAHTF/7X8bzUsqft9ZACJHX2vH/WdbEfxMRM8asfQsRgtQNeyvP7cq40qFYUjJ/f6Xnw6Kg8N729czdvZVdzXak94wtmwgbqcEFmKCHLaoam5DYmbxeXL6Sb709Pev8rjzpuHjlZhfCSwgnH89ioRJAcX6QX95yHvv2NxIJxTCiFmbMorkxih5zSzAl5Qd2cgSnX7hfU/ls8ZaEEJYeW2vza6Ndir3bhdPesiRS9+6oaQoXnGTX/nv101U0hMIpmepihsnOmgYmDxvE2H49U6YOgGIL87OH9c9+AcDW2jpeX7OWTyor0RSVrw4bRU7ayzRH07hh6An8YtlMwqaexjsXGLqP34yawkfn3EeOmkmzBIhJnc9q5xKzYoTNMBErSq1ex8Prf0fUbP/L4pDh9f+bbfsfhpTy/kNt8z+tEf+3sKBmlScNTTiMpCt6nmF/xybbewVxhI0Y3/vsPQL4iVkGE7r3ZUheGWE9VVNUdJHkhrIRMQzmVW5jS00tfUtLMsa+Y9I4llfuZvXOvYSj9nhZnW2OlulZcdk+xO7qRh54bBqmYSXsx8kPkJRxQS6S+gns3MSqUDBiJnrETBzMAuE4vlIpEPZnTRGYpmyfMz5pPnFN2ZJ2ikzXnqxAl5IC/vD1SyjMs+mKc9duIapnmiDCMZ1FFTt44pbLuO7xl1lftY+Ybv8YflWlc0EeU8akR8/aMCyLe959l1lbtsSZDTmaxjNXTMGnqDy1ejGmZdv3bx4+ipuHj+L5dz73HEsTCsX+QvyqxuU9T+GV7XNTclCoKOSoAiEyBa4pTRbVLeHUjie34wc8HPjSaMQAODktfoodVSyBecDPpZRtpm04JogPAnWxhhQHSTL8isbpnUby3q5VfFi1ji65+exuacoQxqaU6LpFzCnt9PGurTSXxsjx+WhJFsZZhJaqCNbu2+8piAM+jadvv4Ll26q466k3aQxl0YIkCSdcG1pJPGmPIB6d536VaUI4/lmCoVuYluXpQc/Kpjdx32LxnYU5AULRWOIF4OG/lBkfkubo7JeGJE7TNaGpPkx+TiIvR4f8XI9Z2ahrCWOaFgM6lbJu+15UCdKCId078ejNl5AX8M7v8fjCz5m9xU3QZN83LbEYt7z2Oi9dcxW98zvg11Qm9e5Hrs9Hix7LvAh3ytIiz6lT+NU+57Iv0sCcfSvwCQ1TmvTKKyOo7aLBIwlczIpRG2uNVnuY8T+u6R4E/g18Akxxvl8HvAx4R9sk4ZggPggMKihHE2qGMBYIRhYP5quf/pPNTftsHqgUSKGhoWBIC5+i2HzTaKpLP2oaLKnZSWmwkIhheOY1ToGEsvzsZichBCPLu/G9iyfyi1c+zEwOJB2bsaspe1HcIEXbdS4y7vhrk1kmbEF1oA9kvKqzlWBBNDVH42kwBUmJeFpTukTqR8eCYl+PcyxmmLzwwRLuu+5MAK457QQ+XVeZyDSXhPkbtnH9X19mS3VtSp6Piqpqnp6zhO9c4J2C8rnlyz0T99eGw5z9wjP4faodIafN5rmLpzC0Y2fGdy7n071bU+4DAXTNLaRvgW3P1hSVHwy7ltv6T2ZzcxWdA8X0ye/C3zY9xqLaugwbdEDx0zv3C7IRw5dREJdIKX+R9P2XDlWuTRyVNmIpDWR0ATIyC2m135PdXgwp7EPf/B74lNT3mF/xUax1p6JxX4KMLyR+n46mmpzVbSBTeo/Er+cgTQVpkdikzXp4bPJFnNyjFz5FIaCqFOYHMqLrFCEozctlTI/Ww+KbwlG6lhTyldNGeghYEkIY4hIuoVVK3AQ9XtqyTLtzPJ+51kwQ7dgvkueR1sGNJCRd0LfBunCDP9w+hmmxanOiztvYgT25ZdKJyV3iW9SyWLt7n2fV55c+W+6d+xloinqvSAzLwjAtWnSdZj1GdTjE9W+9im6aPDLuQrrkFpKn2Vp2ruajyJ/Do6dOyWDNdAwUMa50MH3yuwBwSbeL8KUFa2hCpdRfyoiitqPpTKuRfQ1/pmLPIWRnk9gcwvZuRwdmCyGuEUIoznYV8G57Oh51GrGMLUPW3QHEsCVLDJl/N0p+ZlXlg4UQgl+M+DrPVU5j5p4FRK0YQwr6MLXfZXxv8bSMvLFC2BnWRnfuwlfKT+LlNavJSBInQY9JHl3yOd2LCnl2zBT6dyilJCeHX86aw8srVuNXVSwp6VqQz5NXXJaVxla5r5YHX5nJ8srd+DUNLEnQr6VoeZ6mCIdkrKkKimGbTrLaldO/ZxO6WQgVEplh+waQUrYqS1Om4Gi3EuySSM5nJ9lGZp/kF41FPPquYnc1Dz3/IbdfcjIlhblcf8YoHv9gPnF/XbKZpBXUtoTpWlyQsX9EWRlLdmdGzkoy7d0x0+STHZVMKu/HRxfcwUe7KtjYsJ8eecWc33MwOVqqgJVSsjfSQFD1Uey3V0g9cnvwvcHf5fnKF9gW2o4qVMaWjOH63l9pk8JmWo1s2nM+urkHyaE59v4HQxQOCkKIJhJejXuBfzmHFKAZ+ElbYxxVglhazci6W0C2pB5oeRSpDUQEzzhs5wqqfm7rdzm39bs8dQ6tqIFSQq7Pj1/xEU1LHo8EHYv3t1SgIHh9w1ruHXsyt48ay0/OOpO7xp/E6r376JiXy9DOnbIK4TcWreZnr30Ut+m62ls82U+yE8wLAoIBH/26dWDNlj02VSvulMsiwIVt18hQXNuR28Lj9N7IMm9X8XIFvnBMGlKVdnq25LbOGCnvDWFXEXlr3mrmrtzCf352E4GAhqaqRKXZit0lEyV5iSobUkpW7d7LgsrtjOrcjTX79mWYJ6RChoC3pEV1yOY6+xSV83oO5rye3kncP9u/gV+vfpMGvQVLSoYW9eDnx19F15wO9M/vx8+GP4hh2TX42sshrml65rAIYeBLY5qQUma+fQ8QR5UgJjKdTFUTkGFky5OHVRBnw0U9juPRDXNS6tAB+ITCmV0Hs6m+Jp4MPAWudFDsS4iaJo8smMfk/oPoWVhEaV4uE/qWt3ru2uYQv3x9Vko1DHdcqTiBF45Q8bQHY7ft162UUwf2Zt2WPQmtMKW9dARf6mApgR2e9ub4P6mf2wMnUk/RlLh9NlmYJv2xZbbpvBQdYeypnSVN0DAt9tc3c+Z3HqV7p2KK/UGqoy2YrrB0BbiV+O4ioKl85eRE5Q/DsvjmK9P4bOs2YoaJT1MRSAaUlbCzsZGiYJD9sRZMj7eaJWFUl25t/hxrG3bywLIXU0oorarfztQFj/PGhPvwO2YzTTmwR7wh/M7hEcJwNJkc2g0hxOXAqdi34Vwp5Zvt6Xd02YitfaTlfk46lhkieqCQUrK0biW/Wf8XfrrmEaZXzSRspp7v2j7jKM/vSI4TqCFwkrP0GUu/gs5UtTS1Ln6SHk5LSh6Zn1l0NBtmr93ina5REK82kRFznLapiuC2C07ipRlLMQ0ZZ1SkjOponck5DJPpYimnSR4/6fvBaMo+VY1nmIuH5ovMy4p/dm3ByY49kd4oAUuCbkkq99dT3dASD4IRjlMzhZWRtEV1k+E9usTHeWnJSj7dso2wbjtdI7qBrltU7Wli3m23ccf4sQjV+xcY2rETA0razgPxzObMl72FpMWIMmfvmjb7Z4MisrNGDhTxe6cd29EAIcSj2BF0q4DVwB1CiL+1p+/RpRH7RoDIzTRNoIJv9CEP/0zlv/l4/2dELVtj2Nq8nRl75vDrET8kV7Nv4BzNzwunTeW9XauZuXst+b4AU3qPZmzHPgAM6tCxVZMqyWGhEhbs2pmtZQYM02yfXU5g07/SEpv5VYXJ44Zw4sCeNDS3XmFYuDS21hp5zEU4ZoyUNu2UyskhxgqOnydLGg7XDCPjX+y/mmrzeHVhZkxPurbgNLaFsMC0AC1J607rfP8L79Kn7HoGdOnIi4tXeLIkhICZ6yuYtmUDMY/K20gY0KF9yXg2Ne/1NIOFzBiVzZlVztuLkvzrCdetRaYn0zpQJL8AvzyYAAx3M74JIZ7FFspt4ugSxP7TQO0JxhZsZ50LEyIzsBpLEAXfRojs9dyyYUdoN3P2zyOWtBSMSZ3aWB3vVn3IlT0vju8PqD4u7TWSS3uNzBinc24+E3v0ZfbOLakHHC1TpC/n2kgXmYzTBvfhkbc/zjzgarDJcJ3VgngwR1lJAQ/ecDbNoSiKEBnUqZTOjlrrmjjWf/wk9bvXt3uuySguG8zQU2894H5CykQYbRZhHlQUfDkaLVEdS0osy2L04J4s31aVwXJIZ4IkQ5NgKQJXfKefzrQk//joc35z3WRCMe+yRYZl0RyNketLrJaShZWCIN/vHS2Xjj55ndgVqsmQdbmqn5557c+sdsEFFzB9evYozYPHUcWGaC82AL2Abc73nsDK9nQ8qkwTQiiIkhcg5zIg/YZugdC/kHV3HvC4lrSYX7MgpbKDC10afFbjHQWVDU+edTnd8wpSl7gGEHNtrsQf0IJAgPZmyOvWoZAbTh2ZGCPpr5I8dYnNA3aXho5A3lXTgGFZ/PrZD+P19OK0NpGu4Ajicd1w0EIYoH7v+kwTRjshkuvCpR/Dtv02t0SxTNuuLS1YtamKIb2ypB7N4gTNCzr3k8f83Gmv27UPgAn9+6B5mIhUReHkPr25dtgIcrXMXMB+TeXywUO855WGr/abiD+NoiYQBFQfZ5R5R/h54cgIYQcepq+s29GBUmCdEGKOEGIOsBboJIR4Wwjxdmsdjy6NGEAEQRsC4h2P/+AoxBYh9fUIX/vKiS+omcW03S9REzUws5QQ1g6w5MzGumpqwmlLPyesV1oWQipxO2pVUzOf797FuO49vIbKwKmD+/DivOV2onhXy04Swm5Ag2JmXolPU2kORflw0UZ7R7o92f0rvQ8BnDzlt+2ap4vPXrsvc2fygG0oVQLoWlpEXUuIcFIiIVWzSzShKRjpvN+YwdrNexLFPZ1zKBZYiswQxj5VIWzo9sspi+oigN6dOgDw9dPG8cG6CpqjsXjOihyfxqRB/RhU1pGBnUs5v/8Apm/aSMQwUITAp6rcPupEhnUqa/2CHYwo7sXPj7uSh9e+RcTUMaVFn7zO/PqEa1MSSbUXn2/r7bl/bO9tnvvbhaNHwLYXDx5sx6NKEEtpIeumQmwZWZ12CNBXQjsE8bK6z3h95zPoMoZPFWDkZ7TxCx9ndD7Fo3d2zK/anpEBzPZG2VqbSJp6zDR4a8PadgviHJ9mp8pMdlApxCtYSAGKx8rZr6lcMHYIHy2q8B7YtUYcBCWtPUgfM8Uxlu2EQqAoglFDenDCsB78572l1DWFKC3OY/zIvpR2yOMv//4kQdRIErqGYcVzM0s3yNE14SQJY5+mIhSISdNm6aUV/HRt0AG/xtQz7ECQLoUFvH3bDfz908/5eNNW8gMBrj/xBKacMMyZtuC3Z53H9SNOYMaWTfgUhQsGDGJgaccD+s3O6DKc08uGsqOlmhzVT1lO8QH1P+L4kgliKaWHXbB9OKoEMbFPQF9BdiEMCAXU7BUxkjG96j/o0pZaqpAUaBGaDLeWnSCgBCjP68nZZRMPaJqF/gCaohCzWslv60Bi2xZjpsnczZXsb27huG5dGNrF+xqG9igjPxggFE0KKnFZE9jasOUH1RHGQkDQ56Nft1K+c8UEXpyxNPtkRKpMFI437EgJ5hRhnHwA4oLSsiQfL6pg5vz1xJxkPdU1LezYXUffvp2JhPV4eyHJ4O4KsPM0q46lxXQccW4bU5JTFCDcHE7MK2mF4fcJAkE/D152JieUJ2hnZYX5PHj+mdmvTwhO6NKVE7q0XiCgNeiWwT8qZvHajoW0GFGGFHbnniGTOb6Dt3b7hcJ9638JIISYJqW88FDaHHFBLIQ4D/gT9uL7SSnlw2nHA8BzwGigBrhaSll5MOeSkY+wq3W0NqEc8J/arvFqY/tSvudpOn7FRLdy6JM3lHO6nM0JxcPbJMuvrN3N39fNZ3NjDUM7lHFD/zHeyoIEjNSbN9fnY2BxR078w6PxfBGaojC2Vw8ev/JiO3IuCYoi+PPNFzH18dcS1STS7MXJfhRVVRjcuxNPfvsqahtDrNpShSckqAKCAY1I1MDvUzFiJooi7KTuRwDJwjj+2dGQE/Q1QXMolvI2kEBzTGflhl3xvkibYyAskeGUExBPhh+3SjjDBzSNQV078fnmHXH+stvEr6k8fP35TBjWN2sVFcOymF2xhc3VtfQuKWbSwH6eBWEPBg8se5GF1ZviNLbVDTu46/OneWzcVIYX92yj95HH0UJLawdObcMGLIChrQ1wRAWxEEIF/gacjV0xdZEQ4m2nVpSLW4E6KWV/IcQ1wCPAwdV5EbnE1+EZUEHpjCh5CtEOm65hxSjyBWgxmtCliht461Ms8jSD63tfTo/cPm2O88GO9dy78C2ipoEEtjRVM2PnBu4Zcxp/XLzAdig5lTfyFD8R3SSM/WDlaj7GlHXjtzPnpTjsYqbJZ1u2MfH/niSoaeRoGg3NEVRF4bQBvXlg8kRm/GgqD78xm3eXrI+zH1wvvTATAsewLFZv28sjL37Eq7NXpmifKVxgoH+PTvTvXsrCFZU0NkWQliSTBHZ40apOleWgVLyPpQvzeHtsoSHdlUOSjTgc0zllYG/W7d5HSySG7rx0cvw+Jo8azFnHDcg6vT2NzVz73MvUh8OEdYMcn0ae389LN11Nz+Ki1q6sTWxu2svC6s0ZXOKIpfPXDR/w+LiphzT+YcGXRxBf0o423lQaB0daIx4LbJJSbgEQQvwbe9LJgvgS7ByeAK8CfxVCiNaqr2aDyLkMGXqJTNOEH/LvRuR9za7q0Qa2Ni/n1R2/xi8MFGcNH7J8hK0AKiplge4pQtiUFp/sW8N7VUuQUnJe11FMLBsBEn6weDqRpLpkppSETZ3/VC5j4TV3MmNbBY2xKOO79mJAcSnTKjbwwqoV1IRCDCrtyP6aUCZrwrCXx7W64/BzlUMTXl+6lreXr+flO67lV185j0kj+vPIG3PYU9eEdBx3Stp7SrGkLYQhw6bsJmQXEiq27Wfz9mqklEhT/tcoNwktWSQEq70jlTfs2Tm70VmQakcGO6jmzXmrefW71/PsnKV8snYrRbkBrj1tJJNHDWp1nve9/R57GpviL8KWmE5YN/jW6+/y+i3pBYgPDGvqd2S9zHUNuw5p7GM4MByKbdjFkRbE3YEdSd93AuOytXEqtTZg00CqkxsJIW4Dbkv63q4JnD+pgGmv3YXIu61dfUJGA69s/wW6jDrnsffnKjqSAD1yB3NT+bfi7S1p8YPlz7G4djMRyxbaS+u28O7uxUztcwF6FjtwVaiRmGVw5cDUunVbautYu28/ummyvb4BJWRPIFk7zXCYuaw3xT5mmBZfe/YN5j5wG02hKPVNYZCJUknpb7hYJJNCIRQ8C4ZaydnQviCNJ+U0Lm1YJH6XA5lGenuZdMAuNpp5j+yra2bHvgbuu3QC9106IeXYtuo6Fm7eQUEwwIQhfeNlperDEZburMpIZ2pJScX+aqoam+haePApCkqDBahZlIpi/+GLjjsUfIlME4eMIy2IvSRf+n9Pe9ogpXwCeAJAiPb/F7/3URNK0a/b25w1DZ94RiwJAf3yuhGyOvLNpd9DCMGYDiPplzeSJXUJIQwQMmLM31dJTegdYpZ3asSYZVLZVEennAQTY8WePTy9bElKVFZGlrLWrtwNsDChvi7EaT99nOZwFMuQcW0vhU3hsATi+92oN4cdoLTmiFMgS278ww53DqoiMPCYk/Da6Y2zThpExZ5qtu6qTRHCKX/TYFqSrXtq6NutlLcXrWX7/jqG9erCom07+WBlhZ1dT7ELx/75xosYP6A3YV3POiVVUWiJtrpSbRPDC3t63ltB1cd1fdrnAzni+JI46w4HjrQg3okdXeKiB5CeD9Bts1PYxtsioNUyAqOPC/D5B73aPLnaNQsVqxU0G7UY0vsh2RnaypZQky2oJSyoWcT03RWEzcQNZ5iCiKEBkmV127HigSWJNq6S9ODi93lv8tfi+99Yv5aoG/rqUq58IC0Zz7/rlToyBXpC2MYLiKq2cFZSpwG4fOIkOoSrEarSNoG4fVzzR3LnI/icqaqC5VT2EEBurp9fffdivv3r17GSog293uoy+WWThPLuJZw8qi8bp1WndnC1bNfUkT4XRaBbFuf/8ilMSxLVDd5ctBbdMDF9qee5+7l3mPPDr7G1utY7jBk7q1p5aYc2f4PW8MDyV9Ct1PkKAX3yyriiV/qi87+AoytQo11wCoSGpbR5NcK2gwalbItBcOQF8SJggBCiD7ALuAZIN469DdwEzAeuAGYdjH34cKFH7hB8ShDdSrczC1pMf4q2bGFhWAZgL0elxBHCzrI4RAoDAAAgAElEQVRZgD8QIxz243qI4lcmobKplr2hJspy7SVqSNcTaScdO65UHEVVtTVQJZZFQ3b6eGqxbn897Vhrv7LlnDvdseWO3xq/9xChCME3bpzA5eePZNPWfUgk8xZt4sf/Ny1FCIO3aSJe9y4Nu2oaefjZD+N5mZOFtqYpBPw+olgpoc+qIuhYlMc/Zn2eQgnUDdtNqRhgJcVPCGDGygr+tnBhImAkeaICfnTORDTl4C3su0J1LK2tRHd4dnGih4StzXVtv6y/IIgjQ6b5/xkfYZdFana+5wIzgDaLBB5Rf4uU0gDuAj4A1gH/kVKuEUL8XAjhJmd4CigVQmzCTqr8wJGcU1volz+aEl83VJEcnWR7sKpjmba3XF8YzdEiTSvzAVAVSV5uFL/fLkGf0BQECIGRREqd1KcvgSRqU1xZcz+oic8S0sazBbbla0W+tvf5lDIReZeybHdOdoS4w+4Z+pV3YuGKSn78+7epqW/h8Rfm8q83F9HYHCEl41typzYMXlIIoqaZUQJJADl+jWvOGslrv/wq9101kYKcALkBH35N5YR+3bnvmjMIxVKT/cdPkXbumGmyu6GRfU0tieNJf4OqypCyTu3/QTywM1SbVB1GIJ0NBCEzmpIa878KeQDb0YGglNIVwjif22WwP+I8YinldGB62r4Hkz5HgCuP9DwAopE5tDT9DdPcic8/hvyCb6L5UulHilC5oc/DzNn3HKvqZ2NKnT55J1ATK8ZsXkX6XZOvWhT+P/bOO86K8nz732dmTtvO7rLsUpald5COItiiomLHEnsvUWNiYoqmG2MsKcZoYhJjorFgBRVFQBQQpAoC0suyy7KwvZ82M8/7x5x+5mwB/L0xcPE5n2XPzDPzzOzMNffc5bozulHe1kKzYX8DCAEOzcDvixcbKvRk0jMtC4AVFWX84JP5BKWRsqsFAkyNSIsgxS5op1r+XWHzVqyEtI4jRCpSGLYp92+ZdV9VECb8fNq7v4YdpVYO97I1u+wV5aREUUPaxDHuBQnRPOHYwFuMRZoIn19naN8Clm3aS4vXz1N3X0RGmousNDf52els2Hug01amQ1EY1683cs2ayG7jUgKlwOM4stuuJD0/Zewhy5GGW+l6ifNXgv8dgu0sWoUQ46SUnwMIIcYDnZKx+9+qrGsHrc1/o7n5UQjJ+xne/fh9H5Cb/yYO55i4dV1qGmcX3cHZRXdEvittLWNtwzYCZrz/WFNVHh55PeVtzbxXsZZ3yrYmdWwWCExDRROhBqJCQVNUHpsyEyEENW2t3Dzvbbx60GqQqYjQa128C0IEQTEsf25KWhBYLYBiiFgYoJqW/kJYNF7qodchR8jAjHTwOLK7x1Y7ohOIjesE9eiToL3ptKf90BWbXQIPPvsBOEAIBadDZdKwYh6/43wARvTpkUoLKO4h4HZoTOjfm5MGFnNCryLWlR+IK2UXQO9u2fTpZl+KvK22mr2N9QzIyWVwbny5c5W3hfLWBvpm5NDDk830giEsq9oel0fsVh3cPujUTmcUxeKINCVs8L+kM9wFfAd4XQgRjoMV0cmaiGOCiE2zmeam3xKfX2wgZRtNjT8hr/u78etLg32t66j27SbLUcjAzKmUpBdzXd+r+HfpS2iKCtLyEd/W/yYKPYUUegqZmDeIgRnL+fP2j/CFrGOnouJWnTwx6So+KNvO9sZqhuUUcMOQiRRnWAGbt3ZswQy7KARIVSIS3BzCCJFwaJ12eUbEpNYalgVtCjBj1OOEapFxOJsCTUZ+T+nbi3omItMII6doKA2VhymDWdg5AabwFMJwqEqkwCISVBSxa3acbRKXymaCVCS+gM6qLfuYvXg9V585Hoem8tPLzuDnry4koBuYUqKpCpqiMGZgT76srCLN6eDKKWO4fvo4AB69cAZXPP8qrYEAbYEgaQ7L1fGnS89LmkOj38eNH7zF1toq1NDDenT3Qv5x9sU4VIXvrXyHjw/swqVq+A2ds3sP4aEJF/KHbR8yr+ILBAJNUbl94Klc0bdrgbpzzz33q1NgO8ayJqSUa4QQQ4EhWBffNillp/xE4v9jXOywMWGMW3YlayJQezMNvqUYUfdNDAQ9epZhFQGCV29k9r77aNFr0E0/mnChKg4u7/s78lx98Ro+tjRuRQjBiKxhuNRk/dh1taX8Z+8KqnxNTM4fwFUlU8h3p84Z/dnSRbyweX30CykQQRGXJaH5RXwXKBOUxOAbRIN2uvVTNbAnocTijtA4EXI+i6AV+LPOENE29JJobkHk99DPOFZL+M7sXFPQ9hAhWyAs+BMO3kmFpGKMiN5yaMbtCclHlsXIV/YpyGHOr28ELMW2X8xeyIcbtmOYktwMDw9eejpnnjA45Xz9us6HW3eys6qWkrxunDN8MGlOB1JKNh46RE1rKyN79ODHyxawbH8pwZgHpVNROb1vf9wZkgX7dyRYvhoX9B3BI5POw2sEaAp4yXVl4FCOTul0LIQQ66SUE7o6zt27j+x9z32dXn/3j+47rP38N0EIkYYV5+orpbxVCDEIGCKlfK+jsceERawEltNNQL0UNiW5DmLfbxcdfJLGQCVmqMw4KL0EDR/vlP+cGwY8j0d1Mz43WfA9FuPzShifV9KpuQVNg9KWemsaMaQlkahSpacnk5Lsbuw+UEd1c0znkVAZb1zvuch4yz2qaQqmYSOX1k7Gg7U9gXRIzBAZS2JIOC6CF50Ddt2XEwJVR4KoxRvdSywJR1MHov9PfEUX4Thf4kRFws8QGlq8VldpIbj7H3P4ovSAVaAhrI7ND77yIT3zshjRpzBu3LJdpby0ZgMNbT5OH9KfW6dOIMttiUWVNzZyw1tvcai1BUUIAoZOwGUkubMCpsHifXtQMgMEiXf4+wyduaWb+cnYM0l3OPF4ut7o4P8Cx6Br4nlgHXBi6Pf9wOtAh0T8PyUMnwoCAwGkJ3XmcOD2XBC5YXUzwJ7mlRESjkLSqtdR6y896nN7cNlCVh/cH55oOEEDVEtg55XLr+CFWbO4ZOTwJLEY0xGyBIn3pSoOGNu/J2mpGkeGiSrBgg1X7InwSpq1fUVJ4sC47ST9P9ZSjl0uBDL8sZ9ZlxDVhojZRXj/EHdSIlwb+1YRk4GSeIASaPUHuPShF1i2ZQ+byiojokth+II6T72/Iu67JxYu49uvvcsnO/ayYX8lTy9Zyfl/eZGy+gYeXvwJZ/z9n+yrqcfrC9LiD+A3jWRJ1BAUIdCEvZWrCoU6f4fpqbYwpM72phV8Wv0ymxs/JmgepWahiTj2siYGSCkfA4IA0uo31akXwWPCIgbrHnPiCL1/tiFEOorai6ycX0XWMaWOXVWdNV4hYB7ehZ8Kdb425u6OKeKI7AxUTfDGRVfSK9PKqrhxynje3rSFeq834hcVISISCRXKpoSNlYfA10HpW6wFbdjxqgAVRvYrYueeKvwB+0h9mMQcIVeBocuoZRpxbySQrxCEC2M6vFKFCG0qut1kn3D8dFLd2yJ0vFJEXZiJ0pghnTZ0JGVV9Tz80uIksgzvf2VpOXf9ey4zTxjK0F7deXH1+jjC9usGtS2tXPziy7QGA9HtSEKdplPDoSoYwrRfSUCBJ1kfuyO0BOt4ofQ+vEYzAdOHU7j5SPkb15Q8Tp6rc5rXncKxGawLCCE8hC8PIQZA51piHxMWcRiKkktWzq/IyPw+2bl/Ib/gIxQlGsF2qml0c9q3MpfSpMA98KjOZ19TQ6TteSI0RaF7evRGy03z8MI1sxjbqydpTgfdM9K595QTUe38xFhp/qqm2N/o4SyJjqyREOFtKT/UKanLJ35wMU89cBlOTUmaU2Q3gngCTYinJU3pMDIAOoNIo9CwRR1+rQgRZVgs3jAldc1tKDGBJ4mlS2FqEJQmn2zdw0/fXMjdL7wT1+A0jAAmTX5fpFsHxFrowrKfEv4OHk3jexNO5rrBEyIdwSPLVAe3Dp2CS+26HfXegd/TFKwlYHoBSUB6aTOaeKv8113eVoc49izinwPzgT5CiJewCjx+0JmBx4xFDICSgyftynYV2E4v/DZzyn8SKnO2rhBNuJhWcCuaEg3MSSnZ0bKX1bUb0ITG1PzxFKf36tJ0emVkpcwHVYVCliu6v+1VNXzzpdkEDROfoaMHTf62ai2aosTd4BHEZE7EukXDlpzShYs/oJs4FKyofaxVnLCNx/+9mLzsdEqK89m5J17LOTyn+N8tB7OM+X/S8rjhh+/SCFu5kfs+XBwTMy0zHLBL2K9TU+M1jMMkHrOaNxCkrKbBGp9wVwkz1AM24fgjQU5dUJSRgXAKDrY20zMji+9OOIlLBo/AlBKP6uCf21fhNw3cqsYdw07k9mEn0lV49RZ2tWzCkKAKgSqiV0hD8BB1/gpyXV27htvF/w7BdgpSyoVCiM+BKVh/3nullDUdDAO+pkS8bqP/sHQkMCvBNx8856ZcpU/6GK4o+QMrq/9DlW8XWc4eTMr7JiUZ0YCulJKndj7P6rov8JsBFBTeq1zEBT3P5Iri89udQq2vldf3bGR7YxXDc3owtVcxyyvK4twTHk3j+hFj46Lg33/3A5pjhGL8uoEfgyy3A+En4q4IIy89jT55mWwoPYBuJDhcpEU44XS1zsCZ5mDq0BIWr9phf4NJKDvYQHllfcQPG4nrtWt1i+g64YwFGf/wiNlFHBRVJJU8h6EqVhfq6JGLSBaFtGk9KAHFoaBHBOejywJBnWfvnsX9L8yj2RfAh4FpU/USmUsiESNQTGmpuyUejwCP5uDR02cwrW/fpG0qQnDvqGncNWIqLUE/mQ4X6mGUR1f5Kvj7np/TYkQnpwmDNCWAEKAIhUBSWf+R4VhzTQghpgIbpJTzhBDXAA8IIZ6UUu7raOzXkoi7gnPOiKkwlF6k901EO0QMUOAeyAV9fpFy+eq6DRESBiufOGCavHNgIRNzx9A/wz61bnPdQa5e/B90aVm1C/Zvx6GoTOrZm9UHKnAoKkHT5LLBo/j+hGmRcVUtLeyqq0OGUxhi0tq8qkHfbtlUNbXgD+q4HBoOReEHZ08nw+XkjRUbWbx5d6S7RCRFLRzoCukhJCYdxN5DAotkxg3rTU1tCxt3Juo2xUBar/3hbYTJOC67ww42C5Ms+Rg4nSqnTRrEks93oetmpFAFYPKYEu67/jSueODfmHqUWBODe4m7NwMmqjumWi8EU4H/LP2c+lYr9iLblaWz2bYATVEJkOy26JWTxa+/8Q1bEo6FpijkuDxx3+1tqWZZ1TY0oXBa4QiKUvSsM6XJc3sfollvIHbiulTxmxpuVUeg0N3d/hyOo0P8BRgjhBgD3A/8E6v70CkdDfxaEvH48eNZu3ZtyuVm48/A+6r9ws7lV7eLDw8uiZBwLIKmzrLq1REiXnpoO09uW8C+llpynek0tjpo0aPjfIaOz9Cpo5kV37yDytZmirNyyHJGXRKGafLTRR8RCEfTzDDJWeI/QcPk2mljKc7MZmtlFQLBa6s38uM3PrSsQlNyz8yp9MnOYtO+g1Q3trJk0x5Mw8RnGtYVIEFIgTRlxBqNQAF0ic8X5M9vforfG4zP15XROcW5fUOdOdvRYY+xVRMQ8yRIMqpCKweCBtv2HuL7N53BX2cvp76pjW5ZadxxxcmcN30Em3dX4nY6aE38O7VjpQkZtVJjoRsmi9fvRldD/mOTsM5T/PiwtZ0Ah6rynVNP4nfLV6BGMnRMvn3SFO6Y0nWlNCklf9j2AW+UrcIMpQ0+tWMBdw8+01YCs7R1Gz7DPoc+IB1kCJUzC+9A7WI38o4nenQ39zWALqWUQogLgT9JKZ8TQlzfmYFfSyLuCMJ9NtI7l6Qyb+FBeNp3HXSEJdUfs7VpK3ZxTonEF0oFWnTgSx7c8GZEgKXS20ybz4UdK+1pqsXEZGR+civ1VzZuZPm+fRFyEmHZyhj84sPFzL/teiaU9Ob0x/4elb8M4XcLPkWTgty0NH4wczoPzDqdB1+cz/KtpRghiU1TSss1msJaBGj1BqwUt8T1QspwcVZ14jp6agvXlqvtUuNCRBnOciitquehZz+MrFbd2Mrj//oIrz/IKRMGEkzIRhEQ6U2XuAuJZfmmMnRNQ1p+5fDKBtHfgTSng8FF+WyurYpzM7k1jdOG9OemSROYNXokn+zZi26aTCspoXtGeoq9tY/PanbxVvnq+DZJEp7esYhJ+QMZlBmf19xqNJHqyCSCK4p/RXH6KNvlh41jM2uiWQjxY+AaYHqoVVynhD/+N7MmnCeB62SsHnYhCA9og8DTmfZS9tjTsofZ5a+gCR27x71LcTE5dyxSSp7Y+kGnVbAE8SpssXhx/Qa8IaF4OyEfQjN55KMlLPxyJwGbqD2ALiXVza389I2FfLpjH6t2lEXcFe1Voip2WQt2JKnYLIudY7gAhZgzl5Q21g7CFrJKUqAsFr6Azl9e+5ScTA8Thxfj0OIds5oiKOmRY1mvxM/HoaqoSjsnIwSB9bfQDMHMMUO5YOwwfn/1ebx4+xX86Yrz6R/SGs50Obl+ylgeu3gGAFluNxcMH8YlI0ccNgkDvFm2Cq+NwJRu6ry7P7kLdx/PQAxpHxTu7Rl49Ek4jKOcNSGEmCGE2C6E2CWESFJpFEK4hBCzQ8tXCSFKYpb9OPT9diHE2THflwohNgkhNgghUr9mdw5XYKWr3SylPIjVfejxzgz837SIhYCcp8A3H+l9A2QQ4bkQPBcgkoo6OobfaGNn8yrmVX5M0AziVCUBUwuFa6wbVxUKw7IGMDpnKF4jQJWvOWFOEiEk0ob1itIyKfTYl0A3+WPSEGWyNRzG5oNVTCjohS/YTq4vVhHCHz5YhkNV8Yfaz6ciNbdTA0MSCB9pO6pwoenZZz+E10nFcTG+6tihYf+yqgp0U3ZI9pYjWiClpPxgPQ/deS4PPjOPdVvLcWoafj2IM83JwcZWZKInXAhcqorpEASNZLeTXefnNIeDR66YgRJD3qcM6scpg/phSmn/EDsKaNbtg2oGkuZgsthXjjOfsTnT2dDwaaQFGIBDODmv53VfyRyBo+qaOJJGxEKI4Vha6COAnsAiIcRgKSM9Zk7rbHZDB8iVUv4+/IuUskwIUdaZgf+TRAxYKWqeczsMzHWE3c3reLP8NwihsN/rQqIhBGQ4/PgNlaBUEcCwzGJ+NOxbKELBqWhoQsGIsXKFAJcriM/nRBUKhpQ4hIJDVXlsyvkpFbNO7tuXOVu3YkppWa5SJpOxBI+qUedrw+FQkwsvEqyOqqZW0mJFF4SVE6uEsig8TgemNJk2qj+NDW2s3Wa1HYxrtdQOpAokt8Gzd0GE5maqyQvDuzJFglugA+iGSVa6mwyPiye/dwmHapuprG3imbkr+GL3gWhgL+acDy0u4Oc3nEWTz8+3/zoHU0r8AR2Py4GiCNrQ4zJTPE6NG06bEEfCsfiqSBhgQrf+fFFXTjBk5Ub6KqpOphXYCyhd3Pt2Cty9+bTmPdr0Znp6+nNO0TWUpHdecKkrEBx118RhNyIOff+qlNIP7A1pn0/CakZxNPGaEOIFLCvYDTwGTCBa8pwS/7NEfDTgNVp4s/w3lhUhwaWAz1StMJkAt2bgxsCluDij8GTUUDmqpqhc0Hss7+xfH+fHczlgUFY2Y7MHsa2hmuHdenDD4In0ybCPdgNcNWY0b28JXWsqoe7KMWQc8htX1DXxr7r1oFglziQWesRYs/mZ6bQ0eKMCOKEbRnELxvfvzcwJwxjdv4iSwlw27jrAt373Br6AnpqIY90GAFIgwhkeELWSzXgyDvuSE8V6rDEpTkgH/KYqghEDivAFdX749Dt8tnkfmqpw+vhBbIwl4Rg4NJWHbplBWyDI4F75zH/oFhZ8voO65jZG9ytiZN9CfvLqApZt3YtDVdFNkyumjuHWMya1P5kUkFLy8pcbeXb9Guq8XkYV9OAHU6YxtrCo3XGmNPnt5nm8Xb6OoGkiQxFRVZG4VY3+GQVMT0HEilCY1v18pnU/shhJl3B0ifhIGhH3AlYmjA0nTEtgQagP5rOh3piHi8lYVvgKIBN4CZjamYHHibgdbGv8NO73LM1Hk+4JXV8hlwQqmVomk3Ljr4nvjziHCm8962pL0RQFKaGHJ5tnJ19HD09Wp+fwwsYNluZD+CVKA6GHyFgKW7+xqVot5wi7smOsU5emIv0GQRkvBiQ06JvXjSdumYlT01i6aTertpcxdmAv7r1sOo++vNiynIlRbAuPDf2UWEJD/YpymTS0mNc+XB8t6RXCqiJJqNiNeGqO0IBUhMDldlCQm8l3rzuV6x96mZY2f2T/81ZssWRABXGWsASCiskVv/0PmqpimCaXnDSS+2edGpev+8cbz6eupY3qplZ652aT7j58oZ1fLFvMa1s3R3z/n1WUc9Xc13jh/EuZ2DN1mfErpauYu389gbjO4AIhVW4dcBpX9ZtqSbT+N6Drwbr8BB/t3xJI0e4KSdxDqnXaGztVSnlACFEALBRCbJNSLu30rOMRxMoQ8GBZxHvD/es6wteTiGVzx+scBXiNlrgghyokPV0N1AXT8ZounIqL8d0mclnvy3Eq8TemW3Xwl8nXs6e5iu1NB+mZlsPonD5dFu1euGe3ZcyG7i+JlSLlMAV5iofaFvsGAO40J2/ffBU/nv0h2w5U41CtCrypA/qyantZvGUoQNNULjl5NHsO1nHP029jSolhmggEQ/t0R1VDovKqwAxZu2EvqwsFISBgmBjSpPRgHfuq6i13Q6yXRAjrOEK+3AgOQ4o1NvNBAr2LuvGjm77B2KG9+dNrS/H6AnH6EFF3RHS8wHLJICSGISOuhzeWbyLD4+Lu8+ONmdyMNHIzjqxVfVVrC69s2ZTUWNSr6zy0fAnvXHZ1yrEv7F4e0bmOhVPVGJFTjEs98s4cpgxQ0/RX6lpfxDBbOx7QHrr2Z63pQAbzSBoRpxwrpQz/rBJCvI3lsjhcIl4DzAUmYlnizwohZkkpZ3U08OtJxHo5ZuvzKOk3fqW76Zs+ClVomDF94x2KSaGrjTE5J3Fer3s63Eb/zAL6ZxbYLtvdVMNftnzGptpKSjK7cfvwExmXH28R2QbnFHC5HQRbzZSPe28wSN/8brx815Xsr2ukrqWNAT3y+MXshXht+q8FdYOlW/bwt3c/o8UXH6zaWl5tBcvC5Bmqhgvve+b0kbzzyWbA4tSgYVrpXcIqlgtbRpKwGyJ0VF2ri0hCZHuAISXjhln32uqtZUmVhrGDwrm+JtjmDemGyQsfrePO8048rCq29rD+UCVORbXt8Ly5+lC7Y+sDKYhRQrXvyI0TKSX7am6i1bcSyVGosju6ronDbkQshHgHeFkI8XusYN0gYHWo67IipWwO/f8s4FccPm6WUoat+oPAhUKIazsz8GuavmZC8x+QXXhiS7MOs/lJzJpLMOtuRfqXxS0PGLXsqv0Vq8pPZs3+0ylreIZCV1+K00eixWRaCAROxc3J3TvVASUl1lXv54L5zzO3dDM7m2pYVLGTaxe/zNzSzXHrzRg40Lbjr26a9MrOTrn9/PQ0NlYcJGAY9M7NZnRxEekuJ7kZHttAkhBgGGZcF48w/EE9pfaOw6HyxfYKDDu9C0KdQUI/I1VtIRH3sIRnV4R9bNPfVMHw/tHc2e459qpkkWKN2E8KBHQjrmvz0UKu20Mqhb8MR/vujsFZhbbfG9JkeI69WFVX4A1soNW/6uiQMETaJXXm0xGOpBGxlPJL4DWswN584K5QxkQP4FMhxBfAamCelHL+4R5vDAnHfvdiZ8Z+PS1isJya+pfg7DhgIo2DyNqLwWwGLGtPBlcj025CybyXoNHA+gPnEzTqkKF36fKGZ6jzfsKsPv9hTe07rKv/gKDppX/GOKYXXEO2M97KDZgBDvlqyHZkkuVI3Y0jjAfXfBCXCyqBNl3nJ2vmc1rRQLJclpD4D0+ezorychp8Xry6jioEDlXlkTPOxC01vvvWPPTE7hcSGtp83PDimwgBPz/ndC4YPQyAS6eM4u1VXyalubk0jRNKerJ1t71VpjoUhBnfTw4sLqttbOeBqMSkfsVawCGfQjgAmGjZt3dvSgi5akRk0LUzJ0aWf/PMcazdWoZPNyIWczj1zq7yLRXSXUdfcH18US+yXC7agsG4Y3SrGleNGN3u2HuHnsVdq1/EH5Of7lI0pnQfQL+MI+sMDVgknCLfuMuQpG5Ce7ibPIJGxFLKh4GHE77bA4yxW///Gl9fIsYE0Tk9VtnyJJgNxHXUlF5o/Tsy7QoONL1G0GiIkLC1dR+tgW00+T7jxO6zOLG7vZtHSslbFe/x7oEPESgYUmdMzkjuHHAjLtXF2rqdVPkaGJLZm8FZVqC2KeBjT1Nt/NEYgClo1nXGv/4nzu07lJ+MP4O5O7fiTrcaj/bUMji1Vz9uGDuOgbl5SCm5dtIJvLB6Q1RwJvQjYBiR198H313I6r37Gd2rBzNGDOZ7F0zn8blLQq/dEsOU3Hn2FM4YOYiXFq1LOkYBTBreh5amADsravD6gzhUFUWBB675Bk/NbsellpiSlkjGJFe8RdYNF4GEukdHEosTXBqqolCUF334jRnUi6ycNLz1LZGdSjXqFondfcSNnDDPHjkZKVPTUmH53n38aeln7KtvoCS3G/dMm8LUfn3j1lGE4N8zL+Wqua/j03UMabmXJvfszXcnn9Tu9ifm9+NPE6/miS0fsLP5EOmai8uKJ3L30DO6NM9UUJVuCOGgk23WOsQxWFl32PhKetYJIR4HzscyP3cDN0opG2zWKwWasRhS72zPqglj3HL1wrGgDgSzBpyTERm3IlR7CT/z0CRI3j3ghMwH+KLxNVoCm22WQ8/MGxiQ95OUc5l/cDGvlr0Vpz2hCY3enhK2NQZo1a3IvUTS25PP5cWnckJOCae9+2ykmk6aII34d2WHUHEEHQR0M07mUgGeP+dSTunbL/Ldl5WHuHX2HBravJYou7jtJwUAACAASURBVO1JgHRVQwjBs9dcRL+8XJZu2YMpJdOH96N7VgY1Ta3c9IfZlFc1RoYJYeUVv/D9KykpzGXF5lJWfrmPuhYvew/WUtfiRZFQW22nZZCAcAJFwhRVGTKeYlhRhg424k6I/ZmwnbzsNN5/8vZIIPSf76/mH/NWRgtWYoclVOVF/dbx3197xji+d/EpHR9TCHM3b+Wn7y/Cp8f0ltM0fn3uN7hg5LCk9XXTZFlZKYfaWhhTUMSw/K5ZtOEWTkcThtnMtgPjMWW0AcLo4orD6iXnKewjB1zb+Z51Xz7x9e9ZdyT4qnzEC4GRUsrRwA7gx+2se5qU8oSu/REUMFsguAqM3eB9DVlzPlLfbb+6sI8mS+lHb/opxaICu5dhgQOHmjrHF2BOxftJAkBBU+ez6ipq/E20GX58ZgC/GWR3SyWPbHmdWZ/+kd6ZGREBmEQSBggGTdoCwSStYVPCLfPnUOeN3ixPfPwpDT4vutHOQ1WAN6jTFghy18vvkJXm4uLJI7l0yigyPW5mr/iCc37zT0obGzGU6Nlwuxw8d9/lDOiZj6ooTBvdn4LcDJZs2s2OihpqGlupbmrFcHYiNpPAG4oQfGPi4JDFSygIKOJJ0S69LZz5IKw2TnfOOjmOlN5Z8WUSCUcQGzgEnKpiZeOK6HemArM/28jGfZUdHRFgkerDCz+JI2EAn67z8MIltv5zTVE4raQ/Vw4fHUfCZc31fFi+nU21lbRnJB1tEgZQlUz65j+PItJRRAZWs4nDx9H0Ef+v4ytxTUgpF8T8uhIrgnk090B8BxIdZCuy6TFE7rPJq3suhtZ/EfYPx0KXBkIeZIAG1aZCs6kR9rgKoVKQnlqbwpQmjcGmpO+DpoohbUTMBUipEzABpYYsZxYBw6Q5GPsqGBqlp9ZrNKTJO7u2ccOocdS3eVlTXhHKakh49Y/dZMxkgobBUx+twClUFm3axb5D9UhTRhpj4oRwz1FTg10Ha8lJ9/DKkvVsKj3IF7sPYAZlvIEqBM50Fd1rWK4Au5sr5jshBDmZHr579akcqG5k6952MgZsTkPEinUo1LXGt7Bqr5uIhLhOzsKhIFSQenw6n1/X+dfH6/j9DTNTzyuE8vrGlBofPj1IRWMTxd1SP9CllAQMg3tXzOWTA3twKJYUZ3FGDv8+/Qp6pHUcczhayHBPZVjPDTT7PsIwm4DU6XQd4jjBdhr/Fz7im4DZKZZ1uqpFCHEbcBtAcW87C1dCYIXN9yDS70T6P0XqOxEEI5aGX+o0yBA5C8hTDLopOgeMbAwEA3N/idvRx3abYFUr5TiyaQg2xn1vtqeiE0KAICMKPVzaczo/W7mAxoAvMg9r49K+rYMEE8mhVssV0BIIWL5MI7qqXUpbrE6EN6Dz3MdrCVflJQXRwHILmNAWCPL+uq385tWPCOpGNC1MwyoUibnZAobJ0qfvos0X5Nz7nk0mY0GkoEMi+e7Vp9C9WwY/vP4M7njkdYJBPSJE5Haq9C7sxs6K9iUAdMPkH++uZHCf7pw00nLXfGPCYF75aL1t26JEUXifriN14jp2gPVA219j586Kh1/XKa2rT1J6C8MwJRlO+6DfobZmfr5yEYvKd2FIEyHAVE38oXnsaqrhlk/e4N1zj26aZsBoZV/Lx/jNRgrco+nuHhlnYSuKh+y08APoMIk44eF/HO3jsF0TQohFQojNNp8LY9Z5ECul/6UUm5kqpRwHnAPcJYSYnmp/Usq/SSknSCkndM9N1Z3Y/oIXSjoi7w1wjMeQJiYmQanTKOP7+ikCVAHF7l5M7L2UHpmXtnMGLMzqfT6uhGKONC1F/i/ha9NaVudvoU9mFhcOHIoWIoLw2zmO1FexW1MZX2j5w3tmZZLmCD2YBPGFH4TcrokGohn6PvRp77GhKQpf7j1Emz8Yn5srLMsybpYC1u+q4I4/voGZuCwMNTROgdmLNwAwrH8hL/zqas6YPJi83HR698xBOhX21TbQTleryMR9AZ3/fBgNMt4wYyL52em4HNHrJM4XbIeEc6QqglF97dPFwnhzw2am/P6vfPvN96xzk+j7FoKinEzuXTCPuz94lxXlUf2X1mCAC959gYXlO9FDATtTStBF5AFmSMnuphp2NR4NPRoLB9s+5/W9F7C6+g98XvMsCyvuZWHFvRg2+tpHgsQswY4+xzoO2yKWUn6jveUhQeSZwBkyhbPr6Fa1OMF9UTvz0RCeCzEDVtmyjmlLFAIJ+jYcSm6n9np6wTS8ho+3Kt7DlFZl2aTcsZyYW8ic/SvjpDCtYocwUwqqWgxu+fQlQKK6gwhToPstRhaqRDpMCMZHrBQBfbO6cVqxZf2pisJPzjyVB+YttHyUAtBAC4k46KYZydcVhvVRbAR5Up83aPL54lLQ7MhbYnXxuOepOYQnKkIlzbHWdvicSwEVNdE3iS9LD/HRxl2YUhJsiU+Hi71Zpd2XQFV9tKAhO93Nqz+7hreWbWLJht34Ajp7D9Xhk7qt5KcIbThWxN6lWaI+qbB6335+Of/jOL+wDCd3CIuEUaAi2Mju8joAPt67hxvGjOP+qdOYs/tLmoJ+yx0UN5NQ5DJ0mWiKyiFvCwOz81POpbPQTT+LK3+ILqPVmLrUqfJtYlP9C5yQd8sR7yMOxy3iTuMrcU0IIWYAPwROkVLa9qA/oqoWrRiEK3Tn+ECkgVqMyPxu+/PyzEQ0/hJJy1G7RoQQzOx5FjMKT6cu0ECGlk6a5kFKSYErm1fLllLtb8aUgoAhMEOM5g9YaULhG1EIUBSJ6jAwgtafRbhMFAf00fIob27EpWhcOmQEP5gyLa7ia+aIoXRL8/DUspXsra2je0Y6pTUNSdVbYS1fwwStA1dGeE79undjZ2VtUrAsLFURex4T270ligolIivDypXeXVHDL56fH23nFBuIEpYbQ2K9rahCRPzhYaiKYPyQeBdShsfFdWdN4LqzLDIN6gZ3Pv0Wa3ftT5qHJCZOqAiG9irgwVmns7umlt8t+JR0l5NZE0Yytm+0aOLZFWuSgnPhfGgJGA4T3CIu2OrVdf65YR2XjRjJ6kP78ep2aWLhp5V1LgKGzrAc+8rMrqKibSV2zntD+tnROPeoE/HxIFzn8VX5iP8MuLBENABWSinvEEL0BP4hpTwXq6rl7dByDXi501UtIgPRfSF430OaNQjHCeCajhDtC54I4ULr/iF67eU4jOQb0oKCyzW9y1FpTdEocFtWiyFNntn1Kh8fWoNDdVDg0XEraVS2OvEaBsMye7O2qoqAjCdKIUDVJEYwSo1up8pHl9/UYant1H59IzmrP//gI3ZV1UW3G7cTwAWmzxLvkaplJceScfhelQKLhBPHSyynlhE7yG5fAqlZwSjM0P5iVgjrZPzx9aUYYTa0g2JFISXQtyiXsqqGiP9XAG6ng0kjirnr6bfZdaCG4u453DJjMpOHFhM0DBZ9sYtFG3bgcmnJD4/wTwHXnzqOb50zFYemcOmfX2LHoahLYO76LcyaMIJfXHQmAGX17fiPJZhOGad1Ebu/hXt2Wz5hKUKPmPCDJX52HtXB5QNGk+s+Mn2LMIJmS8qqPl0e3cahwHGLuAv4qrImBqb4/gBwbuj/R1TVIpRukH5tl/1LilaEs8cyTL2SrJYXaGr9K5Yb2wDcCOEmO+fhDrbSPmaXzWdJ1TqCUidoWFaTYTZzQl4ffj/2+3xeW8ZtNa8kqGglw61qXD1obJf1DiobmyMkkNIl6gDhD1mBGqE65BgviLACfLbWcpgvzATuTHHjyZC1GXZvhH+2+YOUHapn457OpYl5PE40t8bAPvmUHqgjqBtMHFrM9HH9+ckLH0aqBasaWthcepD7LzuVOau/ZMeBGryBoHVYSsxxhqGAx+Vg6rB+uJ0aj85bEkfCYPlvX1+zmYvHj2RMnyJGFfWgvL7RnmwVrNaINiELv6Hzmw0fo6mxWTXCekoJq22VUAV57jRuGTaZW4d1vaddKhR6xiFtGpiCoNAz7qjtJ4LjRNxpfE21Jo4cilZEetZd5KVfjlt4cAgX6Y6RFBS8g6aVHPZ2pZTMrfg4KbfYwKSs7QBlrZUMyiogmIKEBZDhcOFSVGb0GcL9Y0/t8hwml/TGpbXzdiAABUoKcjj3hCHcfsZkbjx1ApqmxPleu3wfpXwqClAEUrU+YT+AQ1OSBIZsESK7lkCAreVV7DpUS1qmi38+cCVnnjiEp+d9hjeoRwOUWDnTj77xMdsrqiMiR+G4pVRAhFsuKVYnklF9C5k0yHJvvL52k/00gH99agUF75g6CWfCOY6cLwUUQ8Fp8wCVLhOJJBhxWYROuGFVOTpckgF5mQzKy6ZHelpKC/ZwkOEoYmDWTDThjvlWwaGkMT7/zqO2H8Ay8rvwOdbxNS5xPnxIsxmz6VGk7zVUTLIUABXMHci6G5H57yOUw8vdNKSJ17B/zVOFSk2ggeL0Im4YNIUXdsX3HnOrGvcNP4P+GQX0ycimhycTx2Hoy152wkieXb4mdVEDMH1QCX+/6uLI7x9+sQOnpiZlRtgixHZSIdJMNK5bs4xftT38e9FaWvwBkqQxYyFCLo1QybEvaOBvaeWaJ17F5VDxBvS4QGB4K37TiM8AtEmny053c+uZk7jy5BMi7qiU7aaw3jZ+OX8xWw5WMb5PT5bvLYsrk5ah7Be3Q6MoK4MDrc0hXYnQSg5SnldFM5GqZF9rHfta6/iy/iDz92/lLyddftQKOCZ3/x7d3SP4sv5V/GYjhZ5xjMm9iSxnah3kw8Zxgu00jjkiltJA1n0T9N0k53XpYNZjtr2KmnHrYW1fU1TyXd2o9tcnLQuaOn3TrC4M3xl+Gj092Ty7/VNq/C0UZ+RyeclYBmZ25w+bP2FjXSUCmJDfh4cnnkffjM5lcYDVpPKlay9j5rMvpgzEFWZncLCphcIsS6/j1OH9cagqlrZ1CEq0hRIx21EUq7+bYUrOHTeEbI+beau3Ud/UFqlQC68rsEg0LI8dm0GRlenm4w27rO7JVo1zUtpf38IcKuqb4/N0RSjFGmmRcMy+IsvDX9gdfDiopoLX0JkwsE9ck9H8jDSqmu2FjDbXVLGx/hC6KaOVkeEc5Jh9GabkPxdfxurK/dyz4L3oeu1AIBAx5qHXCLKyai+rqvcxpaCk/cGdhBCCAVnnMCDrnKOyvXb3dZyIO41jzzXh/wSM/UhSCZv4kP6PAZAySKt/PW2BzXRSaB+A60suwKXEF504FQcndx9LnitaYTUyt4Cfjj2DqweOpdJXy1NbF3PdkpdZX1uBIU10abK6uoxZi56nKdC1YIopIDc3DVOVxP+zSOjtTVuY8dd/saHC8s+6HBpP33QRHo+G6cT6qFa5r+Gw/m+oUNwzh4evmcFPrvgG7/30Rn55zdncd+kpvPvLGxk5sBDpBOkC0wXZOW6UcAFF2D8c+rjdGvWtPssCFyA1y10RqQPQwJmpUeVtQ1GiecBmWM2tIwPRpgde2LI2NesjseQu563fFrfaD887xd44VyGAaWVtQCTjJbFjiUNROLFvHwoyM5g5eCiZ6S6kI3QOIgeYDEVLvsbajCALKrbZrP01gOzC5xjHsWcRBzeAfUZdCAKh5FHfOo+yuh8iQ1EsVcmiX/5fyHCN73AfpxRMQAL/3juXukAjTtXBGQWTuLmfVSBS6a3n3rX/pNrfhCFN/IYOQqHR5yRRlcZE4jWCvLn3C24c0rnAzY6aGi556WUrvcqJlZEQCsapoXs9YJgEDJP75rzPR9+6CSnhNx98QjCU+wtEfKgySORKKa1r5MlFy3n921eT5Yn6GtfvPcD2gzVxWREtwSCmJpABaRFbjEWYl51OdYNldYYTJszwsyu0DW9Aj7g9ZFh7OIxEP4QkpQfCFqGVDSkJJLgizhk1hIY2H4/OWxJJARSK9XCx3ZQAt6qialarpV7ZmfzirNMjy68fOZa/f7EWn6GDIUCTCVFQidAMW/JXEHiOQueN/x84bhF3HsecRSyUAsCdsvIN4SbgPInSuu9iyCZM2YIp2wgaB9lVdQ1BIz6dqzlYzZrat1hR/QqV3h2AFbDLdToYnZNLrlMgaOPjqqXc+fkDrKpZz3fXPc/+tlq8RoCAaZGNppqhGSXPy2fofFFX0elj/N2y5fE5rmES1EAm3B01LW2U1Tfy2e4ydlfVxmsmhKcSQ6CmlFQ1tfKvpfFymb9/Z2mSbzVoGOhITCcYWpQg3Q6NC6cMR8e0LNOwhWrjSojIY4av1FifR/ijWHOMCM13hNhtKHD6qOQkn0FFeZhuMJxguCDoxDbfACzxokfOO4vuOWkEnTqVRjNnvvQ8jyxfgpSSb487kTNLBuBSVTI0F27pJEN1UpSWSbpDw+VQSHNptpN3qirnF4/szFH9d6Er1vBxwj72LGI8M6HliZgczlg4EOm3c9C7EimTo/lS6tS2vE5h9h0AbKz/kEUHn0EiMaXOyprZDMiYRHUwj3X1n1PnBzPEFhKTxmAzf9j5HLWBzCTvdJiMDTP5vdupqJRk5nX6ED8rK0u5LKkTc2hXn++roM2mhVKE6GImHNANXlu1iaw0N9OH9KN/QS57DtUlj42FAsIpyHN7+OFlp7FyVxnBuLI7IvoWtulyIuF3iN7ESvwygUXgqiqiRSLY3++KEJR075b0/WOLluE3jMi2w9uUIn4qEuvh9PiKZVT6W9AxCQSsa+fFjRvo7knnlnETeOob51Pe1MDmmioK0jIY16MoEoDb3lBNZWsTL+1ZxWfVpQRMAwWBU1W5bchUhub0sDuj//04TrCdxjFHxELpBjnPIhvuQkGGfL9+cJ2GkvVThFqEv+V87NoLSPz4jVIAmoJVLDr4DHoMYevSz6bGdVT4u9FmmJg4SaQV3TRIc7TSHExP2r7bGcQfTP6TqELhiv5jO3V8n+0vozWYWtg7keTy0tIo7pZNbkYaLk3Fb6ciZnNDNbR6+eP8T/nTghVcMmEEWR4XDa3t+LEFqJrCc/ddTobHxQMv29TuhEg/loy7dC+HshaECdOHlmCqsGbXfoSw9CggJrAWgqoIvEGdJz9aztqyCvp0y+a6KePYdqg6uv+EdL6wVyH8u6HCgfoWDHf8NePVdZ79fA23jLOq+/pk5dAnK1mFbUhOd8rbalhXvweHJqyYMZLL+o3m7uEp5Vf+qyE47proCo45IgYQrilQ8BkEViFkGzgnWQQdQrpzLG2BLZAQ0FNEGulOqwZla+NS2wBefVAlKHXMFD15LB0C+/QoRQFNM9F1BVUouFUHbtXBkydeTFFaVtL6H5Xt5vF1S9nTWE9hegbXDR3L40uWW/5UO8sSIs8Xh6riUBSeuHAGQgjOHTWExz5Ykry+JOmZFL6/rCahJnPWbWFoQR71ew8mWYuxcGoqpdX1uDQVTVVspSMlWG4GQgUloY20l6KciKH9evDUHRchhGBXZQ1b91fxt8Wr2VdTHzcnTVUY3beIy597BX9QJ2AYrCs7wPubd+ByqPiEkWSJy5iAmwy/LcSycgJqvW0dirgf9Dbyg3Vv4jND10XICp9TvoHTioZwcg/b+qj/fhwn4k7jmCRiACGc4Jpmu6wg61ZqW1/HjGsZo6KIdLqlWeJyAbMNg9T5popInWXhUj24FUecIBASTMOJx6Fwdt8RXNRnHOmakxHdimybfb6zeyv3L/vACgAB+5oaeHj1J0gFFMVisviiLytPVzgEmQ4XV44ZxdXjx9Az2yL4buke0tKcNDT74oeESYfUfOMNBGkM+iOZDRH3R0JqsG6Y9M7LRoCtRCUQ5/uVSii92CQhuGWP8Ny2VVQRNAycmsbAonwGFuUzYVAfrn/mNRrbvAQNE01R6JGTSUAzaPZFVfhMKfHpOkGpxPnGI67MsNVuN28b9MnO7jAH+J3yjbYVel4jyIt7Vn5tiVh8Bd1//ldxzBJxe3BpfRjc4zXK6n4csowh03UixXmPoipW3X+/jPGsrX2LYIKUZo5Dp9VUkMIk6oWOvxHvHHA5u5tbmb1vOU1BLyUZBdw1eAaDM3uR6XDj7iBKLqXk16s/jpBwGCYSnCCDEqGJuEBImMd0VeJ3GJw6pH+EhMNo9gesV/cwiYYHqcSXP9vcX4oiMGNKesOrKEFriKYIhvbqzoBCy9dd2C2T8pp4HefIZhN8vlJgRcrCxB6el00amwxlWcxeuZGAYTBlYDEjevWgKCeTD350I8u376OstoEBBbkM6dmdk373bGQ/cccVKyeauJ+Eh4IiBEN75rOjscbyK4fg1jTun3Jy8slKQI2vJWW5e42vEy2o/htxPAjXJRwn4hRIc45iaOF7GGYrAhVFccct7+UZTt/0sZS2rkcPkbEqHOQ50umZNpl19euQjgBtugNDRpnFKUz2tu3g+v5Xc+OA0zkc1Pu91Pu8KZdLRSJMEbUsIxUM1vKAYbC+8gATe8X3+Oubl8Pu6rp4n2hYT0IFl6YS1E1Mm5ZMuw7VJVuFkgg5Z7pdPHJNtIjg73fP4txf/TPS9NSWhMMQVh6zMMHj0CjKy+LciUP56/zP0GN69IWzLqSQ/HHBcoKGyV8+WsnJg0v43VXnoSoK04f1i6z/u4WfRrYf91NalrEM+ZzjlgFhAaLIlDXBt6ecyCdle3lz25cYpkmO28MPT5rGzMFDbQ4oHpPyS3hr33rajPgAsVNRmVowoMPx/6047iPuPI659LWuQlXSk0gYrAql83vdz7icKfR0QoFTYUz2qdw44Glu6ncb9wz6DpNyJ5LlkHjUAG41iEcNoqkGy6qX83bF3C7NI2AaNAV8SClJ0zpu8x5fwkGcdeJSVbqnJQcLv3fGybhjxNRRQXMpFGRnMGPUYB6+5GxK8nOQLiuly3BG3Qe291wMeTX4fdzx7zmRCrmiblk8c+fFuN0aWqhdkV0RRoQbNSuPt0XV2dlQxzOLVyFdCppLQagCqYBDsy5nQ7XKlA3TxBvU+XRHKW+sTtaPmPdl6kIJTYqoloSN1W0VoVifACYf7dnDr0/7Bl/cdjerb76DVTfdzqXDRqTcfixOLRxMn/RuOGPK2RUEaZqT6wac2Klt/DdCmJ3/HOs4bhEfJgJGMx/uvxmvXoWCFxcqtd45HGgdxIDs85GyBZ++g1xHPQFVpUl3EwwF8IIyyNyK9/i8fgv3DLqVQndqvdlWPcCvN3zAvP2bMaWkuzuDB0afzXn9hjBv7/a4V1olJLigKgpGLAGbxOVNCyE4e9CgpH2dPnQAD19wFo8tWEptaxuqIrhg9DAemHEqHqeDNz7fzP625mjRRrgIQ0TLoBMRW3dRXtvAx1v2cObIgTz63hJmr9qI060iDcv6k36TgE3nZbtKujChuzSVc0YN4kB9M26nxtryioh7IFyJ14LOowuXUdIjl8n9orrFKX23AmadMIoFpbs41JZc6iwQlrRnaLgiBO4QaTtVFafauaab9f5WVtWU4lY1njvpOp7btZy5ZRsImganFA7mO8PPIN+d0alt/VfiuEXcaRwn4sPEprq/0xo8gElY2cvAkAZrah5jn7eZpdXvEpR+VAFuxcTlDFITyCQQk02xt7WMn23+LX8e91ucir2Ve9vyl9lUXxEh3EpvE/evfZvHJ1xCRUsTm2oOooRKg3umZ/LQlLN4fctmNlcfol92N9JUJ/N37sShKAgh0BSFf1x4UbS9UgLOGzWEc0cOptnnx+N0hPQnrEDbYwuSizYsBzBIvZ1YWshqDhomr678goY2L2+s3kRANyKZEwLQNAW3UPEFQkSauB2bHQghGDewN7+dMpp/Ll3LqjJLZzpcyhwe16YHueOlOfz8vDO4aOxwAC4YNZTnVqxLEtB3qiq3T59Inx7ZPL7k0xilNELzknFzcakqFw0bFvm9KeBn9aFyXKrK5B7FONVkkYm/bV/KX7YvRVOiT5k/TrqM+0eeleosfr0QjjEcR6dwnIgPE6UtCyIkHAspNT6pfhsjRvQ93CE+x9FGVcAKkFkuW0mz3sKvvvw9Pxt+H041noy/rK9kS0NlUiDHZ+j8dfsy3px5K1/WHmJ7fQ19MrOZUNALIQQn9S6OW//QtBbWVlSQ4XJyUp/iCLmmghAirnwZoKKhKV6ZLREqyIRsr0hwL+xzlrC2dD8V9U14EwhdYqW3XX7iaHaUV1NaXUdNc5vV0qqdpIOAblDfZvnLpwzow9OqQtC0KvbifN0C2gydH733ISv2lfGtaZO5eepEFm7bTUVDE95gEEVY7oh7Tj2RwqxMLhk5gieXryQYiPfdCgRSkdb6qsq1J5zAmEJLzOlfW9fxyLpPcIQkMAWCZ069kGk9o77pZYd28uyOZfhNHX/MKb1n1WwWnnUvea6vsRUci+NE3GkcJ+LDRCoRoDZDRUHBsCmIdYRaJksp4gJ4u1tLeXbPi9wz6Oa49bc3HUppZW5tOMgdK17lin7juHjA8HZTpHpkZHDekCEdHlN7yPK4MMx2iDimGCOCWH2I0E3pdGjUpFA2M01JSUE3vj/TKmLwB3XeWLWJP85fblX92aSwuR0a40usoOPwXj04ZWg/Ptm2h7awcyacXRH6vwTe3byVhdt38cr1V/DmbVfx/ubtLN6xh9w0D5eNG8XInlYlW47HzYtXXMq35rxLk8+PIgRCwPXjx9IY9ONQFS4YMpSRPaz1Vx0s59F1S/AbOv6YP/9tH7/Nkktuo8BjEey/d30WJ38ahpSSeeWbuG7g19cvHMbxgo6u4TgRHyaKM05jd9N7yIRcYiEMRIoYqOVWNEPdog0CpoYhFSSSVbXrubGklQxHNIjWOy0HexkwMDFZXLmDlVWlnF88iofGnXf0Ds4G3dI8TCrpzcq95XF92MDqI5fuclgC7CoJDTGJyWWz0tx6d8tme2VyZ2IJjOwd7ZzscmhcNmUUr63cSFltA4Fwp+TQKXFpKiN692B8v2j2x+NXnssrKzfw6w+XRNPbEk6hKaEtEOTXH37Mf66//NnqoAAAIABJREFUnItPGMHFJ9gH1kYXFbLsjlvYUlVNwDAY0aPA1tUA8I8tq20J1pQmb+3azB2jpgBQ5WtOWgfAb+opl30tcTyPuNM4njXRSUgZpKn5n1QcPJ39lSdR4mglXc1EFWFJLoEq3EzOvwm3ZvNqKVUaDA8uxcAZ+mRoflyKdeNqikZdIL4P2oT8vnR3pdsUdFiODVUxaTMCzC3byJaGzrUbOhI8dskM+ud3I83pwOPQSHM6GNg9jycvO4/fXXkeb99zTVLXighiCHHW5GQRGyFgbN+eDO3ZPe57p6bx0t1Xct208eRnpuFyqDhUlfzMdG4+dSJ/u/niuLcBVVG45qRxTOnX25aEY7Gu/AApGownzE0wokcBY3sWpSRhgAOtKQjWMOKWTc7vhyaSb7001cm4vOKk77+uON6ho/M4bhF3AlJKDtVciz+wGhlqRa63vsgJrjxa3XdywLsBt5rH4OxZdPeMJtdzMs/t+TmG1NHNIKpQaTYlEpFk4LpVnaDU0E2DfFe8+LsiBP+adh23r3iZHU3VxKodhLsOK4pJwDBYdGAHw3OKvtLzkJuextw7r+XzsgPsra2nX143xhX3jCPCZ667iPtnv0+rP4Av1L4IAQ5VQVUUHpl1Nj+auwDDAUK30r/CxRNNMkCL30+GK15vMsPt4q6zT+TEYX2RUjKuuCcurf1L9+JxI/isPFWDWAtOVT1qnS8AphT2YUdDdVJwL11zMLFHtAPGTYOm8k75F7To/shf1KGo9E7P4ZTCwUdtPv9fcbygo0s4TsSdgM+/HH9gTYSELQQwzTqKtBaG9Xo6bv0iTz9+OOzvbGlcTWOwhrX1K6hqOUAq88ytSqbnn0yalpz2VJSWzbeGTefBz+fQFmq/ntBtHoSwtbC+CgghGN+3F+P79rJdPrBHHpecOJLFO/eSn+ZhVI9CKuubKG9qpLShge+/O5+AblgpadGXCQA2HjjIlc/N5u3br44LKC7ZuZfvvPEeQdN6AClC8Mj5Z3HuSHu/t5SSv65Y3e5xaIpg5siOiy26gluGT+S1nZvQzViCVejuyeDs4ijBFqVlM/vU23hi8wJWVO/GIVQu6DOGe4efjvp/9Hf8v8Dx/ODO4ysjYiHEL4BbgerQVw9IKd+3WW8G8CSWXfQPKeVvv6o5HS68vsVIWzF5P23e98nJ+m7SEofiYkw3S8vinco3UhoHQkjyHWlcV3JZyv1nOz2RQJEdNKEwo/cw+4WdgJSSzw8dYH9TE0Pz8hmS173jQUBrIMDbm7fwaek+emRmcnr/fnx/zny8wSABw0AAa/cfoJvbTV2rNz5NLKwjkXBMBxqb+GjbbmaMsIhrX10Dd7w6N0mL4f45HzCgex5DeuQnzWvd/gNUNrfE5TBHjhVwKIJ+ebn86Myjq2xWlJ7FnPOu5aE1i/m0shRNKMwsGcqDE05PcmmUZOTx5ynfPKr7/6/DcYu40/iqLeI/SCmfSLVQCKECTwNnAvuBNUKId6SUW77ieXUJipKJ1fUxORAjRLIqWiI04UTFj55SuaYNJYUltLflIHta9+JxBvAaJmZMtoWUoAiV24dMpX9mMiF1Bgdbmrn63dc5GPJhGlIyvkdP/n7ORaQ5rHQ6U0rm7djOq5s24tV1zh88lLMGDOSbL79Gvc+LN6ijCsGrazeCKWMcKODTdSpbWmzV4OweLG2BIMv37MMQkmW7S1lXWmFpaCSsq0vJX5et4g+zkoOUu2pqMaSJ6bAKTWI5XAh48tKZnDb4/7V33uFxVFf//5yZ2V3tqkuWZUnuDWPTDMbYmGaajalOTCCEEkoILeUlbyD5JYG8abwhISEJBAKBlxRiB0gAG0wMGBtjMBg33HsvsnrfOnN/f+xK1mp3pZXVxXyeZx/vzsy9c8e7OnPn3HO+ZyR6nCrLrbGm5DB/27KGMl8DFw4exZmDinh9z2aKvbWcM2g4V40Yz6jMXF64OPFN9fOE7ftNnp52TUwGdiqldgOIyDzgaqBXGeJUzxeorvk9LevciXjISLulzfZn5pzLh6WLMa2WIkAKDUXAjNWNUErxxI7XePPwJ4SUhcsBA3STQNBDvd+JhSLfncGvJ83htNzjr8B7539eY291FWazuLNVxYf48fL3eHT6TJRS3PfmAhbv3t3k+1x3tJhffvgBlt+i0R1qKhVOV413kgR/kIluSws2bWXB1m00BIKJH28VbDhcHHfXsOyscPCaRLQumlWuF4EhuVntNsJ/2rCS365Zjs8M304/PrKfoLLQDTCxWHxwJ09s/IjXL7uFnBRPu/rulyjsqIl20NUOqftEZL2IPC8isWUQoAg40Ozzwci2GETkThFZJSKrSktL4x3SZTiMYWRn/RTBRdixqSPixuO+Eo/7yjbbX1FwA4NSCnGIhY6FRF6GhF8F7lhDurJiKwsPr8RvBTGVSUiFa5qluvzcNW4KL19wJ+/O+HaHjPCeqkq2V5RHGWEIr/K/vmMLAdPkk4MHjxnhZlEIAdMkpLf8Q2vHH14CN4sCGkKhpmohCXsUGJgRP/HhrGFDopXZGuOZI4k1b2/fmfw4gaMNdTy25gO8ESMMELAslFJNokMNoSDFDbX8au2ydvXdn7GjJpKnQ4ZYRN4VkY1xXlcDTwGjgNOAI8Bj8bqIsy3u16KUekYpNUkpNSkvLzkfZmeSkXYjRYM+IifzB2RlPEhB3gLych5PatXdpafw3XGPMjZ9Ak5Nw6lZODULXRROzcnVRV+JabPg0Ap8Vmy5JktZWFoD4zKPxds2hPx8Wr6HLdXJhWM1Uu5twKHH/wlYSuENBVm0a+cxI9yc5spuRESGmn1uE6GpxlzUK560ZYIub5sSv5CrJsKw7Hj3fRBN4uo7t8aSg7sTuI4k6tcatCze2LelXX33a2K+3FZen3M65JpQSl2czHEi8izwRpxdB4EhzT4PBg53ZExdiWEUkpH+teNqKyLcPfr7vF38KstKF+E16xmUMphrim5kXMYpMcfXheKXHbJQ1Dfb9/fdH/LEtsUYmoapFNlOD7+b9BXGZAyK2745J+QOCCdJxGGAJ5UMpyucqtua3WpWz07pkZA0iJb3lfCsx6HrGJHSRJYGKjKjPib8LrGzo2bp0Y32UxFO5rjohLBE5OaSEv65YQNVXi/TR47ksrFjue7Uk/j1shZFVAmHic04oX1C6xoJS83GkPSNqJ9jZ9a1jy5zTYhI86DW2cDGOId9CowRkREi4gSuB+Z31Zh6Gl10LiuYwyOnPMvjE//B9058NK4RBjgv72RcWqwwj1t3cvaAcBbYsqPbeGL7YnxWkLqQH68Z4LC3ijs+fh5fnAyvlqQ7Xdx52iTcLWJyUwyDH519ASLCNeNaicYQSNHCbUUkHAmhhfWAm6cXK11hOhSWUzFn4km4PQ4sQ6H0sPG2HGFfrqXFMWNCs9l35CUwMCsNEeH/1qzh2nnzmLt+PW9s386PFi/mmhdf5Krx4xiXN6BJ3EgAt2Fw66SJjM6NX4g1ZFnsqiqnpCFajP3CISNj3DdhohcRdREuHdJP4oA7ilLte33O6crFukdF5DTCfzt7ga8DiEgh4TC1WUqpkIjcBywiHL72vFJqUxeOqc9wWeFkXj24nKP+KoKRWmYuzcHI1EKmDgirhz2/a1lcg+szg/xt94fcMfr8Nl0n9585jcK0dJ5c8wklDfWMzMzhwSnnMn3YSADGDxzI1KLBrDgUnRwhwMn5+VwzZjwf7tlHfTDAZ0eL8RFCzBYzw4hBNpw6ZwwrYu6G9XGra6CDshSiwipxjanUjfq/jdMGl6HzhVPGU1xby6MffBAVFtcQDLK3qooX1q5l7g1f4u0dO1m0fSdpTidzTp7A6UWFcf8fXt6+gZ9+soSgZRJSFhPzCvjD9KvI96QxwJ3KjyZfyM9WhvebSuHQNELKQvTwlaboBukOFw9OPD+mb6UU7xxdx9y971MeqOXkzGHcPupSRqa1/dTSl7FnxMkj7fEp9hYmTZqkVq1a1dPD6HLqQl5e2b+M946uRdc0ZhZMZnbRNJyRUkozFv+Ko76auG0dopOXks7vz7yR0ekdK8duKcV/L3qLBdu3NflXJxUW8cSsK8h2h5NQ6gIBpj39DHWBQPMEwKgFPqeus+xrt3PJcy9QGwjEGmIFLtH59tlTQYSlO/ZQ4/exp6KSkFJYSuFxOBiek8W8m67j35s384v3349xPwAUZWSw7I47krq+JQd2c/fi1/A2Kz2lizAkPYslc+5ouuatFaXM3f4ZpQ31TB8ykuEZ2czbuY7ihlrOLRjBl8ecRqYrtojA0zsW8sqBj5p8/oLg0h08ccZdjMs4/sXW7kBEViulJrW3XXrWYDXxvG8lffwHCx5o8zxt5RyIiAv4K3AGUA5cp5TaG9n3feB2wkW3vqmUWpRMn91FT4ev2bRCmuHmqyNn8NWRM+LuH59ZRImvNq5fMqhMDnuruH3Fcyy66Ltt1sFrDU2E38ycxY+nX8iuigoGpqZRlBEdP53mdPK3a+dw52uvU9HQEBb+aWZoHZrGWYMHk5eWxti8Aaw+HGcpQOCSE0Zz59mTAbhz6pkAbDlaysufbaTa52P66JHMOGE0Dl2PW3CzkXj7lFKsKTnMe/t34zYMrhw1jmEZ2Ty+9sMoIwzhcLxSbz0fHt7HuUXDARiXk8f/TLmYmoCPqkADBZ7MqNTleFQG6njpwHIC1rH+FQqfGeAP2xfw5KS7W23fl+nMGXGSOQe3A5VKqdEicj3wS+A6ERlP2O05ASgE3hWRRh9Sr8hjsA1xL0EpxbqqD1ha+iq1wSqK3CO4dNANDPEkXlj6+pjprCjb2ao/OGiZLCnewmVF8X3R7SHDlcLEgviP9hBRKrvzDp5Z+ykvrFtLdYOPFAwsSzEmN5dbJ5/Oy1s3MmHQQDaVlMTMZD0OB9NHjYjp98T8PB66dHrUNksppo8YwSPvvx9zvFPTuKKF7KdpWdz33gLeP7AHbyiIoWn8Ye0KHjzzfPbWVMa9npBlsbu6oskQ1wZ9fG/VfN4v3okuGoZofGvCBdw8enLC/5P1VXsxRCcQp+L3xup9Cdv1eRRhmbvOI5mcg6uBH0fevwI8IWHf3NXAPKWUH9gjIjsj/ZFEn92CbYh7CW8fncfy0jeaqkLvqFvP3l3buG3kDxiROj5um3GZBTw1+RYe2fgG22vjJzf4zRDFvqq4+zqbKp+Xa1+fx+G6GrxmCJdHJ4TFw1Mv5C+b1nD32/PD6zJKYSmi/MBOXacwI53LxkYvdh2oqWLFoQNkOF1cMGwE2yvKefiDxaw7egRd0/A4DYI+aZoBuw2DgWlp3D052ji+vmsL7x/Y06TX0Zic8uinyxiZnU2VPzZKxdCEUVnHFva+9uFcNlY2CvWH/dKPbXyPTEcKVw+Lf6NLN2JdFY2k6G3XHuzTtM8ODxCR5v7GZ5RSzzT7HC/n4KwWfTQdE1l/qgZyI9s/btG2MV+hrT67BdsQdzFKWfhCOxF0XMbIuItnDaFaPiidT0hFz2yDys+CQ//HN8f+KmH/E3OG8dJ59/LtT1/k/ZJtMW4Kl25wQkb7VdlWlx7kHzvXUOn3cnHRGGaPOBm30bp740fLF7O3urLJyHkjM96HP1yMaVlRqmR6isYgRxqhgIWuCVefeCL3TDmrSVVNKcUP3n+Hf23bhK5paAimUgTMUJPecciyqCEADjBMjYLUdO6cdCZfnDAhphTUi1vWNRnh5oSUxbjsgeyurohyTxiike9J5+yCsCzl1uqjbK4qjqmW4jWD/H7L+wkN8anZI3HpDhpMf9R2p2ZwRcGZrf5/9nXa6Zooa8NHnEzOQaJjEm2PFzXWI4tm/dIQK6sWxIFI4tlId1DjW86+sv/CVLWAwtAGMGLAE6S6JkYdd6BhB7o4YgwxwGHfXixlJdSiaOSusRfycfmuKDeFIToF7iymDGhfSfYnNiznqc0r8JlBFPDJ0f08t3Ulr874KhnO+P+nIcviP7u3x0hAooi7mGYqi1KznhW3fp1cd2xK8MtbN/Lq9s3hQqBmbLWTKDQIaRalVj1nDimKW4/PG4zvvjEti3xPKv97zkz+55PFeEPh6s+TBw3m8QuuaFqo21NbnlAZ7UhD/AVTAF00fnXarXx7zZ8xlUXICmFoOmPSCvna6Gjfv1KKDdXLWVb6L2qDFRS6R3Nx/g0UteKe6tV0biBAMjkHjcccFBEDyAQq2mjbK/IY+pUhVoGVqOqHwNwPCMp1LpLxM0Q/PkGcjuAL7mF36e1YzaQzA+YBdpTcwITCZTj0Y9mBbiMNRfzECoc4k0onGJdZwB8n38wjG99gV20JumhcNGg8/+/kK9s04s1ZW3aQxzd8EBbaieA1gxyqr+bpTSt4YOJ06oMBnlj/Ef/avYGgZXHx4NF845SzYytztELJH5/Du2krA+66P+k2jbgnjGPgPbfHbA9aJov27GBcHPW4WSNPYFd1RVOF50ZchoHhEBxOWDrnDqr9ftKdLrJToiVJh6flJLy+fHd6q+MdlzGE18/9IR+UbqLcX8OJmUM5OXNYzNPReyVz+bB0fpN7amfdWvbVb+bmEQ8xPIF7qteiOl0GsynnADhEePHthhbHzAduAVYAc4D3lFJKROYD/xCR3xBerBsDrCQ8U26rz26h3xhiFdyCqvga0ExAx78MVXE9DPgP4Rtk91Fa+39YcWa4SoUoq5tHQeY3sJTJntq32FL5EiOc+6k2HZQEM7AiSumGODgj54KkxctPzxnOy+fdh88MYoiGobVeJLQlDaEAX106FwuLlk9zActk/r5N/Ncp53HFm89zoL467N9V8OruTSw5tItRWTnsqCxP6lzeTVvbNbbW2x4TvEx007p5wun8c9sGShrqmoyxoWkECPL8jk/Qdob9zI9NvZKZQ2J1ik/MGsS4zIFsqjoSNet36w6+eWJs7HBLXLqDiwedlnB/Q6iW5aWvxXVPvXn4z9w75jdtnqM3Ec6s67wZcaKcAxH5CbBKKTUfeA74W2QxroKwYSVy3EuEF+FCwL1Khav79pY8hv5jiOv+CLRccAmBVQ7+pZCSVDZ2p+ENbgufvwUKP77gNpRSLDn8XY40fIzCwhDI0QNk6/XsCxQSVG4K3SOYVXBTu899vKFq8/dtxG/Fjrk5MxY+y76GCpSAROx8KGRSG/QzfehoDtZW4wsdE8dxOwwuHjqKd/fuavIZN2fYXx9p1xj33fz9+DtSLJSCc4bELzWU4XTx5hdu4a+b1vDG7m0ELZMD/gpMLYS32ST5/hXzOSWnkMLUWHnTZ6d9mQc+fZ0PS3ZjaDoawjdOPI/Zw09t1zXE45A3sXuq2LcXS5lo0r4ba4/TycLwET3zhS22PdTsvQ+Iq0GqlPo58PNk+uwJ+o0hJrSJuH521QChbUD3GmKP82Tq/KtoqWEskoLHeTLF3lUcaVgZ5ZKQiDrYKFcl5xQ8wtDUsZ1ayqct1pQdJKhM4q1tCJDqcLKrprxJxQwiGhC6wm+abK8p5d+zv8LvVn3EhtKjFKalc8/pZzF96Eje2LmVX6xcSqm3joHuNDoeuNXsu3ZZ4VRqLL7+/r958IzzmTHkBNKd0SWXMpwu7ps4lfsmTuXWpf9kz5HYKtkhZfHa3g3cM2FazBkznW7+NO16Kv0NVAYaGOzJwql3zp9Qit6ae8qRsCBtb6YzZ8T9nf5jiPWhYMapUSZu0OOX9QFQVh34F4cNtnMqYgzvlOEMTL+VsroXW7gnBA0nuanXsab8+ZgK0I1Y+EgzrG41wgBD07Jx6To+y4xW7EGRZrjYW1cZ5TuG8M2jUX9tgDuVE3PzeHrG1VHH7Kwq4+HVb+MjhHJaVFHf8cEaCjQVfqCU8HslUBao44crF/HQyrf5w7lXc9HgMXGbH66vjrs9ZFkcqGs93C/b5SHb1bmaw4PdY3Dr6QSs6Kc6XRycln1ht/8WOoytqtYu+t5tNgGS+nUgtuYbOCAlfmaa8i1BlUxDVT+MqnkEVXYlVvVD7ZKSTITTKGLMwHmkOMYiOBEceJwnM3bQvzH0LAxxJWwrCN5Qcr7WzmTOiFPRRUPTw7PcRgktp6Hxh3Nmt9rWZRjcMi5WllIpxe1LXqHC30B9KEBIWdTHCSNrNw4VnkY0EwVqtFV+M4TXDPKND16j0h8ucbW5qpgHP53PDUtf4PGNSwgXm473Pau48qNdjYhw0/Af4tEzcGpuDHHg0FwUukcxs+CWbh9Px7FFf9pDv5kRi2sqKuOHUPsLwlJdJuh5SNZTiMQaaGVVoKq+RYxf2fs6OCeB+6oOjynVdSrjC94laJYh6Bj6MY3cERkz2VT1d+IbAyEnpXMLW8ajIeTjveJ1bKzZz1BPPlcUncmfzvkS31jxL0wrvGCnUPz0jFmcWzCSXJeHYm9syXgB7jhxMucVxmbFba0qpdRb3/mTI7FomkeISljP7819W8lMcfKD1QsImCYWivUVh1uoqR2rmCLSM4YYID9lKN8d92e2166mJlhBoXskQzwnICIopagPfEaV9x0EBzmpV+B29O6wNlv0J3n6jSEG0DzXotxXQXALSCoYoxM/0vneStCLF9XwV6QTDHEjjjjhc9muUYxMv4zdtS3XCTRGpV+Bx+jakLtVFdt5YN3zBJpFADy9cyH3jrmcT666n3XlBwkqi9NzB+OK+EEfOuMS7l8xH1+UOI7GSbl5ZKcalPhqGZgSHcpVG/Cjd8VjtQ6YkVj9BN37TZNyXz2/3vxO1JgbkzIMh8KyBMsMG3RdtzAMEyuBy6g7MDQH4zOnRG1TSrGn4rtUNryJpXyAxpHapylIv5uirOSFdbode6abNP3KEAOIuMCZOEyoCasaSDDzsbonJXjaoIfId5/G2vKn8ZmVOLV0JmTfzITsrg1lrA/5+N5nL0QM0jErZqF4audbjM8cyqS84THtZg4ZR6rh4jfrl7KzppygCuEwLLbWH2T3liM8uXUpv5v8Jc4bdMwve1JOflMac6fS6JJQKvwUDDGzYpdhkOlyobUSh+1wmIgzOra4JhRbQ7AnqfK+TWXDwmYx6SZKmRTXPk2252I8zgk9Or64dH4ccb+m3/iI241zMsTNvDPAeW63DWN05lVcO3IhN41ZwXWj3uaknBuRdiRgHA/LSzcRsuJnq4WUySsHlidse27BCF6dcStzxpyI0xkiJGF/b8Ay8Zkh7v/0lajsPo/DyXcmno+7WUhdp82PNZXwF5yiG0zKG8y47Dzau2qUlxK/Fl5PUVo3D0s1xGy3lJ+yun/3wIiSxPYRJ02/mxEnjeMMcJwKgbUc8xNrIB4k7es9ObIupzbkjYjkxDeJpQk0jpvzxoENkVC3WFaU7GZ6wTH1szvGn8mIjGyeWP8Rh+prOCknn/87rpE3oymHI3wdozNzGJ6ezbryw3gMBzeMmcht4yYjQtzMQg0Jhwu2+C9w6w5uGJlYTS1Zyv0lLC97i8PefRS5h3NO3kxynAOPqy9LJYoysbCINdC9Btu+Js3n1hCLCGQ/i6p/Hhr+GQ5fc52PpH0T0ftH5YSgFWRpyWKWl71PSIWYlD2ZSwfN4rSskeiiYcWJGdYQzsxtu9xPKIERBuImhVw0eDQXDT62uNRRQ5xqOLBQFKVm8tjZV3DqgMTynL896wvcu+JlTGURtEzcuoN0h4uvjjmTJ7ctRZPwoqRScMuoqUzJG9mhse2p28qze36BaYUwMdlbv42PK97lzpE/YHjqCW130IJsz5XUBzZEpcsDaOIh2x0/Iqg3YMcRJ8/n1hADiDiRtLsg7a6eHkpCGoJ78IWKSXOOxanHr7UWD0tZPL79V+yr30NAhX3h7xz9DysrPuZH43/K2XknsqxkU0St7ZgxTjPczB48Naov0zJpMP2kGilNs8tpA0ex5Mj2mLjikGVx1oDY6InO5qUZN5LqcDI8PafNY88dNIpFM+7m5T1rOVBfxRm5Q7hy2MmkGk5mDz+NJUe2EVIW5+WPodCT1WpfISuE1/SSaqTGnWkrpfjH/icIWMfU1kxCmFaIufuf5HvjftfumOABqXMoqfsb/uB+VOTpTRM3aa7TyUg5r119dSu2IU6az7UhbkQF1qHqHoPgJtBywfM1xHNtjwbRB8wy1h+9h7rgNgQHSvkZlDabE3IfRpJIdd1UvYH9DXubjDBASIWoCVazrHQJPzn5JubuW8rf9yyh3vSjicaU3BO4f9xsspypAAStEM/tfoM3Dq/AVCaphptbh1/G5UVn88BJl/Jp2T68ZpBgxN/s1h3cO+78Tk92iMeEnPY9tRR4MvnmhAtitue60pgzPDb+uSWWsnjpwKu8fXQJpjJxag6uLpzF5QUzon4nFYES6kLxk0VqgpVUBcvJdrYvIkbX3IzPf42SuhepqH8NEQd5adeTm/rFLl9POG4UnZ7i3J/53BtiFfgUVXE7TX5isw7qfo4ydyIZ/6/HxvXZ0a9TF9gayb4Lj624fj4uPZ8R2fe22X5D9Wf4LX/M9qAKsrZqFTMLLufG4Rdy4/ALE/bx2LZ5LC9dj98KL75VB+t4atdraJrGZQVTWHDRPbywcwUfl+5hoDudW0ZNYerA6Mf63XVHeWr726yt3EuG4eb64WfzxaHHwrMSakf0Mv66bx7vl35IIBJjHDJD/PvQAiwUVxVe1nSciBa3dBWESyS1RwmvObrmoSDjaxRkfO242nc3grJdE+3ANsQ1PycmqUN5oeEfqNQ7e0RCsy6wnfrgrpgUaEt5OVD7QlKG2KN70NAiSmot96W22b7cX80HpZ9F1VoD8FtBXtizkJmDzmKgO50HTr40YR+7aou57eOnI7rGipqglye2LWJT9UFmzZrFwoXHp7Uya9aspvf1IT9vHlrJh2VbyXamMXvwFE7N7lzXSH2ogaUlywm2EOTxWwHmH17IrEGXYGjhP6UcZx7ZzjxK/bGytrnOQWQ62nal9BtsQ5w0vfS5pntQSkFoS/xRvSB4AAAWeElEQVSd4oTg+uPv26rBqv0VVsl54Vftr8OC9UngDx1BiO9+CFk1qFYWyhqZMmAaehwXhlNzcX5e4llwI/sajuJIIB1aEajl3tU/Y1lp65W0n9z+Nj4zEDVD9FlB3iveyB9f+gtKqeN6vfnmmwBUB+q5ecVveXrnIlZV7OTd4nXcv+Y5/rL7vTavrz2U+EsTSoqayqQ2VBe17cah38SluXFIOGTPECcpmpsbht3XqePq9djha0nTJTNiEfkn0Lg8nAVUKaVisixEZC9QS7gAWOh4ynZ3BBFBiQfihgdZoLW+cJMIpbyo8jlgHqJJfa3+BZTvHRjwepuVQ1KdJ6CIr8eQohcl5SMelFLAFwdfz78OzgNoqvIxNXcap2ZNbKM15LuyE4angeKg9yhP7PgHR71lXDt0Ztyj1lXuTZDALayr3Muw1I49bbyw5z3K/TVN41SEDf0LexYzq/AM8lIyO9R/IznObEIJ5UGFNCP6CaPIM4LvjfsdKyve44hvP4Upw5iceyFpRqy0Zr/F9hG3iy4xxEqp6xrfi8hjQPzVizDTlVJlXTGOpHB/CRr+ATT3pwpIFjjaztBTZjnK+08IbgZjLOK5DuVbAuZRoiUwA2AWg3cBeOJKpob7Uwrd3MNIZyYB8xBVplCjdEDQJIVR2f+d9KVdmH8xp2VPZG3laoJWkFOyTqXQ3Xr590aKPHmMSx/K5pq9LULVFIZYiIQfzV86uIjLCy/AE6dIZprhoi4UW5RTE40MRzyBpvaxuPizuDcLDeHDsi1cM3hKnFbtJ9ORwWlZp7CuakOUe8IpDqbnnYtDi9V/TndkclF+60JJ/R3bR5w8XeqaiJSy/hIwtyvP0xEk/X5wngmkAO6wRoWWi+T8uc0VaRXciiq7BOqeAv/bUP8MqmwG+F4jqlJIE16Uf0mrfdZX/5CailtIVXvJ1gIMNfyMMPy49HzG5f6M/LRZrbZvSY4zl4vyL2VmweVJG+FGfnzSbZyUORKnZtCoa2iIhUM7NtUxRGdv/aG47ecMnUpKHCOliXD2gLZjldsi8cKXoHXyT/uuUbdxatZJOMTArbsxxGBK7mRuGDqnU8/Tr7BdE0nT1Yt15wJHlVI7EuxXwNsiooA/tSifHYWI3AncCTB0aPwqDMeDiAvJeR4V3AzBjaAPBOc5SZVWUtUPgGruHwyACkBoK406vtFo4fC4BAQDq/E3vERzI64LZIiTgsxvkZJ6ZXsurcOkOzz86rR7ONRQyr2rf45FKCYTzVQW6Y74i383DJ/GpuoDfFS6DZCwxKbA42d8FddxVhFpzsyCifxz//KYBUULi3MHjsdvBni/dAWfVnxGmuHhkkHnMz4jvj5xW6ToLv5r7D1UBqooD1SQ78oj3dF6rbrPN7aBbQ/HbYhF5F0gXjDnD5RSr0fef5nWZ8PTlFKHRWQg8I6IbFVKLYt3YMRIPwMwadKkzldVdIwHR/IFGpVZDqHdiXYCDmJFhZyI50sJ+ww0zCfaRdKIF7/3JVJSv5j0+DqTIk8eF+Sfzgelq6PcFIKQ58qm0B1brBPA0HR+OfEr7KotZl3lPjKdHs7JG3fcpZxactOI6Swv3UyxrwqvGUBDcGgG942ZhUvTeXD9z6gIVOGPhJytrlzP5QUXc93Q41fWy3Zmke08vrWDzxUK2xC3g+M2xEqpVmsPRcpZfwFIGC2vlDoc+bdERF4FJgNxDXHvo7UfmQaeG6Dh7zTXuiX924jj5FZ6DLXSb8+tfHjNBnQOoosfU4UXCgVBxMRSe/nOuruZMegKZg66Mm4SzKj0QYxKb18CxsGGw7xdvIxSfzkTMscyfeA0Uo3oRJFUI4Xnp3yL94rXsyISvnZl0ZmMSi9g7v7XKPNXEFTHZst+K8CCw+9wwcCp5KfEv3nYdCL2Yl3SdKVr4mJgq1IqTv0iEJFUQFNK1UbeXwr8pAvH06mIPgClDwMzjtdFy0LSH4DUW8C/BBBwTUf0/Fb7dLmvxO99Oax7EXUyNy5Pz8yGAV7Y8zv2NGxhUEqIgGUQsHR0sXBoISx0fBa8VTwfC4vLC67p8Pk+LP2Up3f/lZBlYmGxsWYr8w+/wy9O/h4DXNFxuE7NYGbh6cwsPD1q+0dln0YZ4WMoVleuZ1bBRR0ep03rSFfIn/ZTunKx7npauCVEpFBEGqP484HlIvIZsBJ4Uyn1ny4cT6cjWb8ML+7hjGwxwuptWb9GRBB9EOL5MuK5vk0jDGA4z8KZMgOk2cxPPBiOk3C5v9Al19AWFYEydtZtwVRh/7BLD5Hu8OMxghhyrDBZwPLzTvGbrYR5JYffDPCn3X+LZLCFMCSEqXzUBmv4696Xk+4ncbFN6ZOFOPscCrBU8q/POV02I1ZKfTXOtsPArMj73UDH65D3IOI4CQa8hWp4MaxTYYxFPDciRvuiE5r6EyEt6/cE/YvxNfwTlB+XezZO9xWIJParKqXwB1YRCu3B4RiLKxlh/CSpDJRhiBG3zHtLLGVRE6omx5m8OFFLNtdsQxPBpQXRomrthNhQvZJdddMZmTomoQ7IwYY9LClZiC51aEiMKBEozszp0z+7PoK9WNcePvcpzh1F9EFI+nc6rz8RnCkX40xp1QXfhGmWUlx6LaGmCtYWDseJDBowF03reAJBvquQUNxH/FgUkGZ0NJJAAD9a3Dp0Jo9v/yXjMsZz16hvx2QOrqv8hL/v+yMhFcRUCk3SUEpDRcLZDE3nS4OvjHFv9DaUCoBvESrwGRiDEfdViNa7xxwX2xAnjW2I+zgl5XcSDO2CZroUgcBGyiq/w8DcZzvcf5ojgzOyz2ZN5QqCzZTclIKQaqxXBA5xMm3A+Tg1Z4KekmN8xlggkLAYqM8Ksq12M8tKFzN94DGdC1OFmHfg2aYxagK5zjp8lgOdLCblTOXC/GmMSO280MeuQJllqIprwaqMrBWkoOoeh+w/I85uTTztOLYhThrbWdaHCYUOEwishZhilwEavG9jWYkqO7SP64bewTl5l+DUXBhi4NHTKEgZjiYuUiKl3yflTGHO4I7X2nPpTlx668Y8YAVYVhqtJ3GwYS+Wil4cEgG3HiRFL+fGYbN7vREGUDX/E87KbFqw9YFqQFXel5TGSK/B9hG3C3tG3IcxrQoQB6jY2GNBw1I1aLSttNYWuhhcU3QjVxZej8/04dY9aKJRE6ymPFBGnmtgJ7gkjnFK5smsrlwdV05Si2xrKfGpS2P2Xzyk9+r2NkMpC/yLib2xAvgh+Bk4T4+zrzeiQNlRE8nS+3+dNglxOEaTyPiIloautR2p0R50MUg10ppSizMcmYxIHdWpRhjgi4O/SIqWgkSVcVJohP3GOjqnZkYbpEL3UNx6rCC9oDE67cQOu0y6B4vEwbcCKla3o1djpzgnjW2I+zCapJCV8R1EogV0RNxkZz7UJ2aB8RiUMogfT/gxZ+eejUtzogGGWBhiNd0MZhZEp3tronHriG/j0lJwSNjoOjUXaUY6Xx52Zw9cRfsRMVoRmjLB2bZqXq/Bdk20C9s10cfJTL8bXc+nquY3mOYhDH0E2ZkP4unFRSWTYWDKQO4YeQe3qdtYWfERS0vewWt6OTVrIhfnzyLDEStxOTx1DA+N/x0rK5ZR4j/CUM8ozsg+G5feuuxob0IyHkZV3BBxN4UIL4amQPoPY264vR57pps0tiHuB6R5vkCap2cSPjoDb6iCI961GJJCoWcShuaiIVRGfbCEDOdgpuSew5Tcc5LqK82RwYX5V3TxiLsOcZwIuQvC1cWDq0AfgqTejvQZ33AzbEOcNLYhTgJlFqNqfwv+98KLY+7ZSOo9iNbxhbD+gqVCBK0qHFoGmiTvj11T9hwbKl9EwyBc6UyR5RpOpX8nmjiwVJCxGVdy1sBvoiUhiN8fEGMwkvlQTw+jg9i+3/ZgG+I2UFYFquwaUNWAGfZ91f8F5f8Qcl9JSi6zv2FaXvZX/YGjda9gKj8uYzC1waNYkUfpovRrGZvzXbRWsgEB9tctZ2PlXEwVwGymVFfm2xw+TyQmeHvNm7j0dE4fcEeXXZNNJ6MAW2siaT5/VqSdqPq/RDSHm8dwBsDcGxb0Sbmkh0bWMyhlsaH4JuoCW1ARyc6G4DZ0BaHIrPZQ7cuErFpOynuk1b42VM4llEQkgKl8bKp6iYm5t8VdgFTKpNT7AeXej3BoWRSlXY3bUXRc12fTidgz4qTpm8vq3Yl/GbG6woSD7P0fd/twepoq30fUB7c3GWE4JvRpRG5WlvJxtG4hAbO81b68oYqkz2tagbhG27R8fHLkK6wv+W/21/yd3VV/YvmhKzhcuyDpvm26CDt8LWlsQ9wWWrjA5RU3HkIv2BH9ynoorLLWxuvyyy/v4YvoPKp9q7FaynQSzmLTmsU0a+Ki1r+Bw9VPsr3kDvZX/hxfaH9UmwLP6QmrVbfEoaVixIka2FP9HLX+bZiRMSmCWMrPpvKHCJhV7bk0m06lHaFrdviabYjbQlJvBnHz1uJY45MsCxcubPugPoJTz0FLUIVaRSVgNLCv/Bscrv4D1b53OVr7ApuOzKDae0z3/5ScGzG0FIhqJy0+gyEpEbdErADFodp/Y8WpaiJolDQsbseV2XQqKuzGSvb1ecf2EbeBuM5FeW4Dvg2AeaR9Nc/0gkTl+vomealXsKfy0ZjtSoEZua8LTtJ0DVNVcSzzL4ilguwu/xanFa1CRCfdUcCVQ5/h09InOdywGl2cjEq/FIXFjpqFgEITnTGeMeTp1YTMcgw9WmLTIr48p0JhJSHdadOF2DPdpLFnxEmgpX+rp4fQa3Do2Ywf+BS6pKJLGpp4AB1LHIiko+Ek1z0VVA3x0q8tFaA+sKHpc5ZzGJcUPcotYxZz4+i3mJr/X5yd/x1uGPkqk9MymejaR6q5gKPVv2DrkSnUeN+N6m+g5yIk7nxCMcCdXOyxTRfRTT5iEckRkXdEZEfk3+wEx90SOWaHiNzSbPsZIrJBRHaKyO8j1ecRkR+LyCERWRd5ta+EejuwZ8Q27SbbfQ5nDfmESu8HWKqezJSpaOKmIbSfFD0fh5bO6oMTOnSOirrnMEMbUIQX6BQ+ULC//G5OLFyLrqUBMDrrHkoaFhMya7Aii6q6uBmS/mU8juMT6LfpBJTqzvC17wGLlVL/KyLfi3x+sPkBIpIDPAxMIjxDWC0i85VSlcBThCvEfwwsBGYCb0Wa/lYp9euuvgDbENscF7qWwoDU6NC9TP2kpvdprtOp86+i5axYxEmqM3EB1UYq6+c2GeGo9mjUet8lKzVcG89l5DGtaD77a16ktGEpDj2LYRk3MsB93nFclU2n0n3REFcDF0Te/wVYSgtDDMwA3lFKVQCIyDvATBFZCmQopVZEtv8VuIZjhrhbsA2xTbtQZgn4/gPKC65zEcf4uMcNz3mELcVfxFK+SKibA00MRuU+jiSRIWcpb/zzY2GpaJ1lp57F6Ox7GZ19b7uv53hRVgWq/kUIfApGIeK5OeH/xecV1b4Z8QARWdXs8zNKqWeSbJuvlDoCoJQ6IiID4xxTBBxo9vlgZFtR5H3L7Y3cJyI3A6uA70Rm0J2ObYhtksZqeBVqGlNvQ1D3JCrlEiTzVzGJFm7HaE4ufI/SurnU+deQ4hjBwLSbSXEMS+pc6SkXUNXwOjGykMoiLeXcjl9MB1Ch/ajyOeGbEX4IaijvQlTGT9A8Ha9i3T9ot++3TCmVsASJiLwLDIqz6wdJ9h+v5otqZTuEXRY/jXz+KfAYcFuS52sXtiG2SQplFkeMcPNQMRP874JvPrhjDZBDz6Uw877jOl9+5gPUet/DVPU0CqVr4iE79TqcRs9W2lA1P48sRjbeJCzABzUPo1IuRbRYXeTPHY0ymJ3VnVIJiziKyFERKYjMhguAkjiHHeSY+wJgMGEXxsHI++bbD0fOebTZOZ4F3jje8beFHTVhkxy+N4krQq+84SrWnYzTGMKYQe+Qk3YjTmMEHucZFOU8RkHWTzr9XO0m8AFxBdxFh+Cn3T6c3ogClGkm/eog84HGKIhbgNfjHLMIuFREsiNRFZcCiyIujVoRmRKJlri5sX3EqDcyG9jY0YEmokMzYhG5FvgxcCIwWSm1qtm+7wO3ExZp+KZSalGc9iOAeUAOsAa4SSkVJ5/YpqdRVj0kiNnFquuSczqMQoqyf5ZgPHUgTqQdSm+dR4LKpoD9kBlBdWuppP8FXhKR24H9wLUAIjIJuEspdYdSqkJEfgo03il/0rhwB9wNvAC4CS/SNS7UPSoipxG+r+wFvt5VF9DRX81G4AvAn5pvFJHxwPXABKAQeFdExqrY6oe/JBweMk9EniZsuJ/q4JhsugBxTUM1PBfxizbHASkJnxo7HRVYhap+GMzdgKBc5yMZP0X0Ad02BlwXgf8dooWgABQ4z+y+cfRyVDcldCilyoGL4mxfBdzR7PPzwPMJjjspzvabOnekiemQa0IptUUptS3OrquBeUopv1JqD7ATmNz8gMhjwIXAK5FNfyEcNmLTG3GcDs4phCcNTRtBy0JSb+2WIajgNlTFbWDuIGwEQ+B/H1VxHaobs+gk44cRDZLG/wsHkIJkPtZDM/ReirKSf33O6arnqCLCwdGNtAwJAcgFqpRSoVaOaUJE7iQcdA3gF5Eu89e0xvGmLMfTSUjAAKDsuE7SY+S2fUiXXtcWoEcNYOTaYiZl/YETjqdRLZWL3lWvtOcxpY/95juXNg1xa2EjSql4TnFoPSSkPccc2xGOKXwmMqZVrYW69GX667X11+uC/n9tx9NOKTWzs8fSn2nTELcWNtIKB4EhzT43hYQ0owzIEhEjMiuOd4yNjY1Nv6erwtfmA9eLiCsSGTEGWNn8AKWUApYAcyKbEoWd2NjY2PRrOmSIRWS2iBwEpgJvisgiAKXUJuAlYDPwH+DexogJEVkoIoWRLh4E7heRnYQdjc8leepkUx/7Iv312vrrdYF9bTYdRJRdpsTGxsamR7Ez62xsbGx6GNsQ29jY2PQwfcYQi8i1IrJJRKxI6mLzfd+PqOtvE5EZPTXGzqA7qwJ0FyIyM/Ld7IwId/cbRGRvpLrDuuMN9eotiMjzIlLSPEY/2eoXNh2jzxhijqVTL2u+sUU69Uzgj5KM4G3v5rdKqdMirz5deTTyXTwJXAaMB74c+c76E9Mj31VfjyV+gfDfUHMaq1+MARZHPtt0Mn3GEHckndqmR5kM7FRK7Y4IOs0j/J3Z9DKUUsuAihabryYsPwC2DEGX0WcMcSskUt7vy9wnIusjj4p9/VGwP34/zVHA2yKyOpKG39+Iqn4BxKt+YdNBepVmXxemU/cq2qg20G1VAbqJPvf9tJNpSqnDkfI874jI1sjM0sYmaXqVIe7CdOpeRbLX2dVVAbqJPvf9tAelVGM1hxIReZWwK6Y/GeJkql/YdJD+4JpoM526L9GdVQG6iU+BMSIyQsIakdcT/s76PCKSKiLpje8JV33o699XS5KpfmHTQXrVjLg1RGQ28Acgj3A69Tql1Ayl1CYRaUynDtEsnbqP0m1VAboDpVRIRO4jXKpGB56PpMD3B/KBVyMSpwbwD6XUf3p2SMePiMwlXNdtQES64GESVL+w6VzsFGcbGxubHqY/uCZsbGxs+jS2IbaxsbHpYWxDbGNjY9PD2IbYxsbGpoexDbGNjY1ND2MbYhsbG5sexjbENjY2Nj3M/weGleyzh8+OUgAAAABJRU5ErkJggg==\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -926,16 +923,21 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "It's now time to build the neural network that estimates the likelihood ratio. The central object for this is the `madminer.ml.MLForge` class. It defines functions that train, save, load, and evaluate the estimators." + "It's now time to build the neural network that estimates the likelihood ratio. The central object for this is the `madminer.ml.ParameterizedRatioEstimator` class. It defines functions that train, save, load, and evaluate the estimators.\n", + "\n", + "In the initialization, the keywords `n_hidden` and `activation` define the architecture of the (fully connected) neural network:" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ - "forge = MLForge()" + "estimator = ParameterizedRatioEstimator(\n", + " n_hidden=(100,),\n", + " activation=\"tanh\"\n", + ")" ] }, { @@ -949,15 +951,15 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We will use the ALICES technique described in [\"Likelihood-free inference with an improved cross-entropy estimator\"](https://arxiv.org/abs/1808.00973). Most other methods, including RASCAL, are described in [\"Constraining Effective Field Theories With Machine Learning\"](https://arxiv.org/abs/1805.00013) and [\"A Guide to Constraining Effective Field Theories With Machine Learning\"](https://arxiv.org/abs/1805.00020). There is also SCANDAL introduced in [\"Mining gold from implicit models to improve likelihood-free inference\"](https://arxiv.org/abs/1805.12244).\n", - "\n", - "Most of these methods exist both in a \"single parameterized\" version, in which only the dependence of the likelihood ratio on the numerator is modelled, and a \"doubly parameterized\" version, in which both the dependence on the numerator and denominator parameters is modelled. For the single parameterized version, use `method='rascal'`, `method='alice'`, and so on. For the double parameterized version, use `method='rascal2'`, `method='alice2'`, etc. Note that for the doubly parameterized estimators you have to provide `theta1_filename`, and in the case of RASCAL and ALICES also `t_xz1_filename`." + "To train this model we will minimize the ALICES loss function described in [\"Likelihood-free inference with an improved cross-entropy estimator\"](https://arxiv.org/abs/1808.00973). Many alternatives, including RASCAL, are described in [\"Constraining Effective Field Theories With Machine Learning\"](https://arxiv.org/abs/1805.00013) and [\"A Guide to Constraining Effective Field Theories With Machine Learning\"](https://arxiv.org/abs/1805.00020). There is also SCANDAL introduced in [\"Mining gold from implicit models to improve likelihood-free inference\"](https://arxiv.org/abs/1805.12244)." ] }, { "cell_type": "code", "execution_count": 25, - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [ { "name": "stderr", @@ -1016,21 +1018,18 @@ } ], "source": [ - "forge.train(\n", + "estimator.train(\n", " method='alices',\n", - " theta0_filename='data/samples/theta0_train.npy',\n", - " x_filename='data/samples/x_train.npy',\n", - " y_filename='data/samples/y_train.npy',\n", - " r_xz_filename='data/samples/r_xz_train.npy',\n", - " t_xz0_filename='data/samples/t_xz_train.npy',\n", - " n_hidden=(20,20),\n", - " alpha=10.,\n", + " theta='data/samples/theta0_train.npy',\n", + " x='data/samples/x_train.npy',\n", + " y='data/samples/y_train.npy',\n", + " r_xz='data/samples/r_xz_train.npy',\n", + " t_xz='data/samples/t_xz_train.npy',\n", + " alpha=1.,\n", " n_epochs=20,\n", - " validation_split=0.3,\n", - " batch_size=256\n", ")\n", "\n", - "forge.save('models/alices')" + "estimator.save('models/alices')" ] }, { @@ -1044,12 +1043,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "`forge.evaluate(theta,x)` estimated the log likelihood ratio and the score for all combination between the given phase-space points `x` and parameters `theta`. That is, if given 100 events `x` and a grid of 25 `theta` points, it will return 25\\*100 estimates for the log likelihood and 25\\*100 estimates for the score, both indexed by `[i_theta,i_x]`." + "`estimator.evaluate_log_likelihood_ratio(theta,x)` estimated the log likelihood ratio and the score for all combination between the given phase-space points `x` and parameters `theta`. That is, if given 100 events `x` and a grid of 25 `theta` points, it will return 25\\*100 estimates for the log likelihood ratio and 25\\*100 estimates for the score, both indexed by `[i_theta,i_x]`." ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -1064,14 +1063,29 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "RuntimeError", + "evalue": "Can't find estimator type information in file. Maybe this file was created with an incompatible MadMiner version < v0.3.0?", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mestimator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'models/alices'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m log_r_hat, _ = estimator.evaluate_log_likelihood_ratio(\n\u001b[1;32m 4\u001b[0m \u001b[0mtheta\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'data/samples/theta_grid.npy'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'data/samples/x_test.npy'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/johannbrehmer/work/projects/madminer/madminer/madminer/ml.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(self, filename)\u001b[0m\n\u001b[1;32m 151\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilename\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"_settings.json\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"r\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 152\u001b[0m \u001b[0msettings\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 153\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_unwrap_settings\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msettings\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 154\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_create_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 155\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/johannbrehmer/work/projects/madminer/madminer/madminer/ml.py\u001b[0m in \u001b[0;36m_unwrap_settings\u001b[0;34m(self, settings)\u001b[0m\n\u001b[1;32m 586\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 587\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_unwrap_settings\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msettings\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 588\u001b[0;31m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mParameterizedRatioEstimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_unwrap_settings\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msettings\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 589\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 590\u001b[0m \u001b[0mestimator_type\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msettings\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"estimator_type\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/johannbrehmer/work/projects/madminer/madminer/madminer/ml.py\u001b[0m in \u001b[0;36m_unwrap_settings\u001b[0;34m(self, settings)\u001b[0m\n\u001b[1;32m 203\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 204\u001b[0m raise RuntimeError(\n\u001b[0;32m--> 205\u001b[0;31m \u001b[0;34m\"Can't find estimator type information in file. Maybe this file was created with\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 206\u001b[0m \u001b[0;34m\" an incompatible MadMiner version < v0.3.0?\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 207\u001b[0m )\n", + "\u001b[0;31mRuntimeError\u001b[0m: Can't find estimator type information in file. Maybe this file was created with an incompatible MadMiner version < v0.3.0?" + ] + } + ], "source": [ - "forge.load('models/alices')\n", + "estimator.load('models/alices')\n", "\n", - "log_r_hat, _, _ = forge.evaluate(\n", - " theta0_filename='data/samples/theta_grid.npy',\n", + "log_r_hat, _ = estimator.evaluate_log_likelihood_ratio(\n", + " theta='data/samples/theta_grid.npy',\n", " x='data/samples/x_test.npy',\n", " evaluate_score=False\n", ")" @@ -1086,14 +1100,12 @@ }, { "cell_type": "code", - "execution_count": 28, - "metadata": { - "scrolled": true - }, + "execution_count": 14, + "metadata": {}, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -1135,8 +1147,376 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Note that in this tutorial our sample size was very small, and the network does not really have a chance to converge to the correct likelihood ratio function. So don't worry if you find a minimum that is not at the right point (the SM, i.e. the origin in this plot). Feel free to dial up the event numbers in the run card as well as the training samples and see what happens then!\n", + "Note that in this tutorial our sample size was very small, and the network might not really have a chance to converge to the correct likelihood ratio function. So don't worry if you find a minimum that is not at the right point (the SM, i.e. the origin in this plot). Feel free to dial up the event numbers in the run card as well as the training samples and see what happens then!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 8. Limits" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the end, what we care about are not plots of the log likelihood ratio, but limits on parameters. But at least under some asymptotic assumptions, these are directly related. MadMiner makes it easy to calculate p-values in the asymptotic limit with the `AsymptoticLimits` class in the `madminer.limits`: " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "14:44 madminer.limits INFO Loading data from data/madminer_example_shuffled.h5\n", + "14:44 madminer.limits INFO Found 2 parameters\n", + "14:44 madminer.limits INFO Found 6 benchmarks, of which 6 physical\n", + "14:44 madminer.limits INFO Found 2 observables: pt_j1, delta_phi_jj\n", + "14:44 madminer.limits INFO Found 6537 events\n", + "14:44 madminer.limits INFO Found morphing setup with 6 components\n" + ] + } + ], + "source": [ + "limits = AsymptoticLimits('data/madminer_example_shuffled.h5')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This class provids two high-level functions:\n", + "- `AsymptoticLimits.observed_limits()` lets us calculate p-values on a parameter grid for some observed events, and\n", + "- `AsymptoticLimits.expected_limits()` lets us calculate expected p-values on a parameter grid based on all data in the MadMiner file.\n", + "\n", + "Note that these limits include both rate and kinematic information (the neural network is used for the kinematic part).\n", "\n", + "Let's try both:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "theta_min, theta_max = -20., 20.\n", + "resolution = 25" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Expected limits" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, with `mode=\"rate\"`, we can calculate expected limits based only on rate information:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "14:44 madminer.limits INFO Calculating rate log likelihood\n", + "14:44 madminer.limits INFO Calculating p-values\n" + ] + } + ], + "source": [ + "_, p_values_expected_xsec, best_fit_expected_xsec = limits.expected_limits(\n", + " theta_true=[0.,0.],\n", + " theta_ranges=[(theta_min, theta_max), (theta_min, theta_max)],\n", + " mode=\"rate\",\n", + " resolution=resolution,\n", + " luminosity=300000.0\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`mode=\"histo\"` calculates limits based on histograms. For now, there is not a lot of freedom in this step, the histogram binning is determined automatically." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "14:44 madminer.limits INFO Setting up standard summary statistics\n", + "14:44 madminer.limits INFO Creating histogram with 20 bins for the summary statistics\n", + "14:44 madminer.limits INFO Building histogram with %s bins per parameter and %s bins per observable\n", + "14:44 madminer.sampling INFO Loading data from data/madminer_example_shuffled.h5\n", + "14:44 madminer.sampling INFO Found 2 parameters\n", + "14:44 madminer.sampling INFO Did not find nuisance parameters\n", + "14:44 madminer.sampling INFO Found 6 benchmarks, of which 6 physical\n", + "14:44 madminer.sampling INFO Found 2 observables\n", + "14:44 madminer.sampling INFO Found 6537 events\n", + "14:44 madminer.sampling INFO Found morphing setup with 6 components\n", + "14:44 madminer.sampling INFO Extracting plain training sample. Sampling according to ('thetas', [array([-20., -20.]), array([-18.33333333, -20. ]), array([-16.66666667, -20. ]), array([-15., -20.]), array([-13.33333333, -20. ]), array([-11.66666667, -20. ]), array([-10., -20.]), array([ -8.33333333, -20. ]), array([ -6.66666667, -20. ]), array([ -5., -20.]), array([ -3.33333333, -20. ]), array([ -1.66666667, -20. ]), array([ 0., -20.]), array([ 1.66666667, -20. ]), array([ 3.33333333, -20. ]), array([ 5., -20.]), array([ 6.66666667, -20. ]), array([ 8.33333333, -20. ]), array([ 10., -20.]), array([ 11.66666667, -20. ]), array([ 13.33333333, -20. ]), array([ 15., -20.]), array([ 16.66666667, -20. ]), array([ 18.33333333, -20. ]), array([ 20., -20.]), array([-20. , -18.33333333]), array([-18.33333333, -18.33333333]), array([-16.66666667, -18.33333333]), array([-15. , -18.33333333]), array([-13.33333333, -18.33333333]), array([-11.66666667, -18.33333333]), array([-10. , -18.33333333]), array([ -8.33333333, -18.33333333]), array([ -6.66666667, -18.33333333]), array([ -5. , -18.33333333]), array([ -3.33333333, -18.33333333]), array([ -1.66666667, -18.33333333]), array([ 0. , -18.33333333]), array([ 1.66666667, -18.33333333]), array([ 3.33333333, -18.33333333]), array([ 5. , -18.33333333]), array([ 6.66666667, -18.33333333]), array([ 8.33333333, -18.33333333]), array([ 10. , -18.33333333]), array([ 11.66666667, -18.33333333]), array([ 13.33333333, -18.33333333]), array([ 15. , -18.33333333]), array([ 16.66666667, -18.33333333]), array([ 18.33333333, -18.33333333]), array([ 20. , -18.33333333]), array([-20. , -16.66666667]), array([-18.33333333, -16.66666667]), array([-16.66666667, -16.66666667]), array([-15. , -16.66666667]), array([-13.33333333, -16.66666667]), array([-11.66666667, -16.66666667]), array([-10. , -16.66666667]), array([ -8.33333333, -16.66666667]), array([ -6.66666667, -16.66666667]), array([ -5. , -16.66666667]), array([ -3.33333333, -16.66666667]), array([ -1.66666667, -16.66666667]), array([ 0. , -16.66666667]), array([ 1.66666667, -16.66666667]), array([ 3.33333333, -16.66666667]), array([ 5. , -16.66666667]), array([ 6.66666667, -16.66666667]), array([ 8.33333333, -16.66666667]), array([ 10. , -16.66666667]), array([ 11.66666667, -16.66666667]), array([ 13.33333333, -16.66666667]), array([ 15. , -16.66666667]), array([ 16.66666667, -16.66666667]), array([ 18.33333333, -16.66666667]), array([ 20. , -16.66666667]), array([-20., -15.]), array([-18.33333333, -15. ]), array([-16.66666667, -15. ]), array([-15., -15.]), array([-13.33333333, -15. ]), array([-11.66666667, -15. ]), array([-10., -15.]), array([ -8.33333333, -15. ]), array([ -6.66666667, -15. ]), array([ -5., -15.]), array([ -3.33333333, -15. ]), array([ -1.66666667, -15. ]), array([ 0., -15.]), array([ 1.66666667, -15. ]), array([ 3.33333333, -15. ]), array([ 5., -15.]), array([ 6.66666667, -15. ]), array([ 8.33333333, -15. ]), array([ 10., -15.]), array([ 11.66666667, -15. ]), array([ 13.33333333, -15. ]), array([ 15., -15.]), array([ 16.66666667, -15. ]), array([ 18.33333333, -15. ]), array([ 20., -15.]), array([-20. , -13.33333333]), array([-18.33333333, -13.33333333]), array([-16.66666667, -13.33333333]), array([-15. , -13.33333333]), array([-13.33333333, -13.33333333]), array([-11.66666667, -13.33333333]), array([-10. , -13.33333333]), array([ -8.33333333, -13.33333333]), array([ -6.66666667, -13.33333333]), array([ -5. , -13.33333333]), array([ -3.33333333, -13.33333333]), array([ -1.66666667, -13.33333333]), array([ 0. , -13.33333333]), array([ 1.66666667, -13.33333333]), array([ 3.33333333, -13.33333333]), array([ 5. , -13.33333333]), array([ 6.66666667, -13.33333333]), array([ 8.33333333, -13.33333333]), array([ 10. , -13.33333333]), array([ 11.66666667, -13.33333333]), array([ 13.33333333, -13.33333333]), array([ 15. , -13.33333333]), array([ 16.66666667, -13.33333333]), array([ 18.33333333, -13.33333333]), array([ 20. , -13.33333333]), array([-20. , -11.66666667]), array([-18.33333333, -11.66666667]), array([-16.66666667, -11.66666667]), array([-15. , -11.66666667]), array([-13.33333333, -11.66666667]), array([-11.66666667, -11.66666667]), array([-10. , -11.66666667]), array([ -8.33333333, -11.66666667]), array([ -6.66666667, -11.66666667]), array([ -5. , -11.66666667]), array([ -3.33333333, -11.66666667]), array([ -1.66666667, -11.66666667]), array([ 0. , -11.66666667]), array([ 1.66666667, -11.66666667]), array([ 3.33333333, -11.66666667]), array([ 5. , -11.66666667]), array([ 6.66666667, -11.66666667]), array([ 8.33333333, -11.66666667]), array([ 10. , -11.66666667]), array([ 11.66666667, -11.66666667]), array([ 13.33333333, -11.66666667]), array([ 15. , -11.66666667]), array([ 16.66666667, -11.66666667]), array([ 18.33333333, -11.66666667]), array([ 20. , -11.66666667]), array([-20., -10.]), array([-18.33333333, -10. ]), array([-16.66666667, -10. ]), array([-15., -10.]), array([-13.33333333, -10. ]), array([-11.66666667, -10. ]), array([-10., -10.]), array([ -8.33333333, -10. ]), array([ -6.66666667, -10. ]), array([ -5., -10.]), array([ -3.33333333, -10. ]), array([ -1.66666667, -10. ]), array([ 0., -10.]), array([ 1.66666667, -10. ]), array([ 3.33333333, -10. ]), array([ 5., -10.]), array([ 6.66666667, -10. ]), array([ 8.33333333, -10. ]), array([ 10., -10.]), array([ 11.66666667, -10. ]), array([ 13.33333333, -10. ]), array([ 15., -10.]), array([ 16.66666667, -10. ]), array([ 18.33333333, -10. ]), array([ 20., -10.]), array([-20. , -8.33333333]), array([-18.33333333, -8.33333333]), array([-16.66666667, -8.33333333]), array([-15. , -8.33333333]), array([-13.33333333, -8.33333333]), array([-11.66666667, -8.33333333]), array([-10. , -8.33333333]), array([-8.33333333, -8.33333333]), array([-6.66666667, -8.33333333]), array([-5. , -8.33333333]), array([-3.33333333, -8.33333333]), array([-1.66666667, -8.33333333]), array([ 0. , -8.33333333]), array([ 1.66666667, -8.33333333]), array([ 3.33333333, -8.33333333]), array([ 5. , -8.33333333]), array([ 6.66666667, -8.33333333]), array([ 8.33333333, -8.33333333]), array([10. , -8.33333333]), array([11.66666667, -8.33333333]), array([13.33333333, -8.33333333]), array([15. , -8.33333333]), array([16.66666667, -8.33333333]), array([18.33333333, -8.33333333]), array([20. , -8.33333333]), array([-20. , -6.66666667]), array([-18.33333333, -6.66666667]), array([-16.66666667, -6.66666667]), array([-15. , -6.66666667]), array([-13.33333333, -6.66666667]), array([-11.66666667, -6.66666667]), array([-10. , -6.66666667]), array([-8.33333333, -6.66666667]), array([-6.66666667, -6.66666667]), array([-5. , -6.66666667]), array([-3.33333333, -6.66666667]), array([-1.66666667, -6.66666667]), array([ 0. , -6.66666667]), array([ 1.66666667, -6.66666667]), array([ 3.33333333, -6.66666667]), array([ 5. , -6.66666667]), array([ 6.66666667, -6.66666667]), array([ 8.33333333, -6.66666667]), array([10. , -6.66666667]), array([11.66666667, -6.66666667]), array([13.33333333, -6.66666667]), array([15. , -6.66666667]), array([16.66666667, -6.66666667]), array([18.33333333, -6.66666667]), array([20. , -6.66666667]), array([-20., -5.]), array([-18.33333333, -5. ]), array([-16.66666667, -5. ]), array([-15., -5.]), array([-13.33333333, -5. ]), array([-11.66666667, -5. ]), array([-10., -5.]), array([-8.33333333, -5. ]), array([-6.66666667, -5. ]), array([-5., -5.]), array([-3.33333333, -5. ]), array([-1.66666667, -5. ]), array([ 0., -5.]), array([ 1.66666667, -5. ]), array([ 3.33333333, -5. ]), array([ 5., -5.]), array([ 6.66666667, -5. ]), array([ 8.33333333, -5. ]), array([10., -5.]), array([11.66666667, -5. ]), array([13.33333333, -5. ]), array([15., -5.]), array([16.66666667, -5. ]), array([18.33333333, -5. ]), array([20., -5.]), array([-20. , -3.33333333]), array([-18.33333333, -3.33333333]), array([-16.66666667, -3.33333333]), array([-15. , -3.33333333]), array([-13.33333333, -3.33333333]), array([-11.66666667, -3.33333333]), array([-10. , -3.33333333]), array([-8.33333333, -3.33333333]), array([-6.66666667, -3.33333333]), array([-5. , -3.33333333]), array([-3.33333333, -3.33333333]), array([-1.66666667, -3.33333333]), array([ 0. , -3.33333333]), array([ 1.66666667, -3.33333333]), array([ 3.33333333, -3.33333333]), array([ 5. , -3.33333333]), array([ 6.66666667, -3.33333333]), array([ 8.33333333, -3.33333333]), array([10. , -3.33333333]), array([11.66666667, -3.33333333]), array([13.33333333, -3.33333333]), array([15. , -3.33333333]), array([16.66666667, -3.33333333]), array([18.33333333, -3.33333333]), array([20. , -3.33333333]), array([-20. , -1.66666667]), array([-18.33333333, -1.66666667]), array([-16.66666667, -1.66666667]), array([-15. , -1.66666667]), array([-13.33333333, -1.66666667]), array([-11.66666667, -1.66666667]), array([-10. , -1.66666667]), array([-8.33333333, -1.66666667]), array([-6.66666667, -1.66666667]), array([-5. , -1.66666667]), array([-3.33333333, -1.66666667]), array([-1.66666667, -1.66666667]), array([ 0. , -1.66666667]), array([ 1.66666667, -1.66666667]), array([ 3.33333333, -1.66666667]), array([ 5. , -1.66666667]), array([ 6.66666667, -1.66666667]), array([ 8.33333333, -1.66666667]), array([10. , -1.66666667]), array([11.66666667, -1.66666667]), array([13.33333333, -1.66666667]), array([15. , -1.66666667]), array([16.66666667, -1.66666667]), array([18.33333333, -1.66666667]), array([20. , -1.66666667]), array([-20., 0.]), array([-18.33333333, 0. ]), array([-16.66666667, 0. ]), array([-15., 0.]), array([-13.33333333, 0. ]), array([-11.66666667, 0. ]), array([-10., 0.]), array([-8.33333333, 0. ]), array([-6.66666667, 0. ]), array([-5., 0.]), array([-3.33333333, 0. ]), array([-1.66666667, 0. ]), array([0., 0.]), array([1.66666667, 0. ]), array([3.33333333, 0. ]), array([5., 0.]), array([6.66666667, 0. ]), array([8.33333333, 0. ]), array([10., 0.]), array([11.66666667, 0. ]), array([13.33333333, 0. ]), array([15., 0.]), array([16.66666667, 0. ]), array([18.33333333, 0. ]), array([20., 0.]), array([-20. , 1.66666667]), array([-18.33333333, 1.66666667]), array([-16.66666667, 1.66666667]), array([-15. , 1.66666667]), array([-13.33333333, 1.66666667]), array([-11.66666667, 1.66666667]), array([-10. , 1.66666667]), array([-8.33333333, 1.66666667]), array([-6.66666667, 1.66666667]), array([-5. , 1.66666667]), array([-3.33333333, 1.66666667]), array([-1.66666667, 1.66666667]), array([0. , 1.66666667]), array([1.66666667, 1.66666667]), array([3.33333333, 1.66666667]), array([5. , 1.66666667]), array([6.66666667, 1.66666667]), array([8.33333333, 1.66666667]), array([10. , 1.66666667]), array([11.66666667, 1.66666667]), array([13.33333333, 1.66666667]), array([15. , 1.66666667]), array([16.66666667, 1.66666667]), array([18.33333333, 1.66666667]), array([20. , 1.66666667]), array([-20. , 3.33333333]), array([-18.33333333, 3.33333333]), array([-16.66666667, 3.33333333]), array([-15. , 3.33333333]), array([-13.33333333, 3.33333333]), array([-11.66666667, 3.33333333]), array([-10. , 3.33333333]), array([-8.33333333, 3.33333333]), array([-6.66666667, 3.33333333]), array([-5. , 3.33333333]), array([-3.33333333, 3.33333333]), array([-1.66666667, 3.33333333]), array([0. , 3.33333333]), array([1.66666667, 3.33333333]), array([3.33333333, 3.33333333]), array([5. , 3.33333333]), array([6.66666667, 3.33333333]), array([8.33333333, 3.33333333]), array([10. , 3.33333333]), array([11.66666667, 3.33333333]), array([13.33333333, 3.33333333]), array([15. , 3.33333333]), array([16.66666667, 3.33333333]), array([18.33333333, 3.33333333]), array([20. , 3.33333333]), array([-20., 5.]), array([-18.33333333, 5. ]), array([-16.66666667, 5. ]), array([-15., 5.]), array([-13.33333333, 5. ]), array([-11.66666667, 5. ]), array([-10., 5.]), array([-8.33333333, 5. ]), array([-6.66666667, 5. ]), array([-5., 5.]), array([-3.33333333, 5. ]), array([-1.66666667, 5. ]), array([0., 5.]), array([1.66666667, 5. ]), array([3.33333333, 5. ]), array([5., 5.]), array([6.66666667, 5. ]), array([8.33333333, 5. ]), array([10., 5.]), array([11.66666667, 5. ]), array([13.33333333, 5. ]), array([15., 5.]), array([16.66666667, 5. ]), array([18.33333333, 5. ]), array([20., 5.]), array([-20. , 6.66666667]), array([-18.33333333, 6.66666667]), array([-16.66666667, 6.66666667]), array([-15. , 6.66666667]), array([-13.33333333, 6.66666667]), array([-11.66666667, 6.66666667]), array([-10. , 6.66666667]), array([-8.33333333, 6.66666667]), array([-6.66666667, 6.66666667]), array([-5. , 6.66666667]), array([-3.33333333, 6.66666667]), array([-1.66666667, 6.66666667]), array([0. , 6.66666667]), array([1.66666667, 6.66666667]), array([3.33333333, 6.66666667]), array([5. , 6.66666667]), array([6.66666667, 6.66666667]), array([8.33333333, 6.66666667]), array([10. , 6.66666667]), array([11.66666667, 6.66666667]), array([13.33333333, 6.66666667]), array([15. , 6.66666667]), array([16.66666667, 6.66666667]), array([18.33333333, 6.66666667]), array([20. , 6.66666667]), array([-20. , 8.33333333]), array([-18.33333333, 8.33333333]), array([-16.66666667, 8.33333333]), array([-15. , 8.33333333]), array([-13.33333333, 8.33333333]), array([-11.66666667, 8.33333333]), array([-10. , 8.33333333]), array([-8.33333333, 8.33333333]), array([-6.66666667, 8.33333333]), array([-5. , 8.33333333]), array([-3.33333333, 8.33333333]), array([-1.66666667, 8.33333333]), array([0. , 8.33333333]), array([1.66666667, 8.33333333]), array([3.33333333, 8.33333333]), array([5. , 8.33333333]), array([6.66666667, 8.33333333]), array([8.33333333, 8.33333333]), array([10. , 8.33333333]), array([11.66666667, 8.33333333]), array([13.33333333, 8.33333333]), array([15. , 8.33333333]), array([16.66666667, 8.33333333]), array([18.33333333, 8.33333333]), array([20. , 8.33333333]), array([-20., 10.]), array([-18.33333333, 10. ]), array([-16.66666667, 10. ]), array([-15., 10.]), array([-13.33333333, 10. ]), array([-11.66666667, 10. ]), array([-10., 10.]), array([-8.33333333, 10. ]), array([-6.66666667, 10. ]), array([-5., 10.]), array([-3.33333333, 10. ]), array([-1.66666667, 10. ]), array([ 0., 10.]), array([ 1.66666667, 10. ]), array([ 3.33333333, 10. ]), array([ 5., 10.]), array([ 6.66666667, 10. ]), array([ 8.33333333, 10. ]), array([10., 10.]), array([11.66666667, 10. ]), array([13.33333333, 10. ]), array([15., 10.]), array([16.66666667, 10. ]), array([18.33333333, 10. ]), array([20., 10.]), array([-20. , 11.66666667]), array([-18.33333333, 11.66666667]), array([-16.66666667, 11.66666667]), array([-15. , 11.66666667]), array([-13.33333333, 11.66666667]), array([-11.66666667, 11.66666667]), array([-10. , 11.66666667]), array([-8.33333333, 11.66666667]), array([-6.66666667, 11.66666667]), array([-5. , 11.66666667]), array([-3.33333333, 11.66666667]), array([-1.66666667, 11.66666667]), array([ 0. , 11.66666667]), array([ 1.66666667, 11.66666667]), array([ 3.33333333, 11.66666667]), array([ 5. , 11.66666667]), array([ 6.66666667, 11.66666667]), array([ 8.33333333, 11.66666667]), array([10. , 11.66666667]), array([11.66666667, 11.66666667]), array([13.33333333, 11.66666667]), array([15. , 11.66666667]), array([16.66666667, 11.66666667]), array([18.33333333, 11.66666667]), array([20. , 11.66666667]), array([-20. , 13.33333333]), array([-18.33333333, 13.33333333]), array([-16.66666667, 13.33333333]), array([-15. , 13.33333333]), array([-13.33333333, 13.33333333]), array([-11.66666667, 13.33333333]), array([-10. , 13.33333333]), array([-8.33333333, 13.33333333]), array([-6.66666667, 13.33333333]), array([-5. , 13.33333333]), array([-3.33333333, 13.33333333]), array([-1.66666667, 13.33333333]), array([ 0. , 13.33333333]), array([ 1.66666667, 13.33333333]), array([ 3.33333333, 13.33333333]), array([ 5. , 13.33333333]), array([ 6.66666667, 13.33333333]), array([ 8.33333333, 13.33333333]), array([10. , 13.33333333]), array([11.66666667, 13.33333333]), array([13.33333333, 13.33333333]), array([15. , 13.33333333]), array([16.66666667, 13.33333333]), array([18.33333333, 13.33333333]), array([20. , 13.33333333]), array([-20., 15.]), array([-18.33333333, 15. ]), array([-16.66666667, 15. ]), array([-15., 15.]), array([-13.33333333, 15. ]), array([-11.66666667, 15. ]), array([-10., 15.]), array([-8.33333333, 15. ]), array([-6.66666667, 15. ]), array([-5., 15.]), array([-3.33333333, 15. ]), array([-1.66666667, 15. ]), array([ 0., 15.]), array([ 1.66666667, 15. ]), array([ 3.33333333, 15. ]), array([ 5., 15.]), array([ 6.66666667, 15. ]), array([ 8.33333333, 15. ]), array([10., 15.]), array([11.66666667, 15. ]), array([13.33333333, 15. ]), array([15., 15.]), array([16.66666667, 15. ]), array([18.33333333, 15. ]), array([20., 15.]), array([-20. , 16.66666667]), array([-18.33333333, 16.66666667]), array([-16.66666667, 16.66666667]), array([-15. , 16.66666667]), array([-13.33333333, 16.66666667]), array([-11.66666667, 16.66666667]), array([-10. , 16.66666667]), array([-8.33333333, 16.66666667]), array([-6.66666667, 16.66666667]), array([-5. , 16.66666667]), array([-3.33333333, 16.66666667]), array([-1.66666667, 16.66666667]), array([ 0. , 16.66666667]), array([ 1.66666667, 16.66666667]), array([ 3.33333333, 16.66666667]), array([ 5. , 16.66666667]), array([ 6.66666667, 16.66666667]), array([ 8.33333333, 16.66666667]), array([10. , 16.66666667]), array([11.66666667, 16.66666667]), array([13.33333333, 16.66666667]), array([15. , 16.66666667]), array([16.66666667, 16.66666667]), array([18.33333333, 16.66666667]), array([20. , 16.66666667]), array([-20. , 18.33333333]), array([-18.33333333, 18.33333333]), array([-16.66666667, 18.33333333]), array([-15. , 18.33333333]), array([-13.33333333, 18.33333333]), array([-11.66666667, 18.33333333]), array([-10. , 18.33333333]), array([-8.33333333, 18.33333333]), array([-6.66666667, 18.33333333]), array([-5. , 18.33333333]), array([-3.33333333, 18.33333333]), array([-1.66666667, 18.33333333]), array([ 0. , 18.33333333]), array([ 1.66666667, 18.33333333]), array([ 3.33333333, 18.33333333]), array([ 5. , 18.33333333]), array([ 6.66666667, 18.33333333]), array([ 8.33333333, 18.33333333]), array([10. , 18.33333333]), array([11.66666667, 18.33333333]), array([13.33333333, 18.33333333]), array([15. , 18.33333333]), array([16.66666667, 18.33333333]), array([18.33333333, 18.33333333]), array([20. , 18.33333333]), array([-20., 20.]), array([-18.33333333, 20. ]), array([-16.66666667, 20. ]), array([-15., 20.]), array([-13.33333333, 20. ]), array([-11.66666667, 20. ]), array([-10., 20.]), array([-8.33333333, 20. ]), array([-6.66666667, 20. ]), array([-5., 20.]), array([-3.33333333, 20. ]), array([-1.66666667, 20. ]), array([ 0., 20.]), array([ 1.66666667, 20. ]), array([ 3.33333333, 20. ]), array([ 5., 20.]), array([ 6.66666667, 20. ]), array([ 8.33333333, 20. ]), array([10., 20.]), array([11.66666667, 20. ]), array([13.33333333, 20. ]), array([15., 20.]), array([16.66666667, 20. ]), array([18.33333333, 20. ]), array([20., 20.])])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "14:44 madminer.sampling WARNING Large statistical uncertainty on the total cross section for theta = [-20. -20.]: (0.005813 +/- 0.001110) pb. Skipping these warnings in the future...\n", + "14:44 madminer.sampling WARNING For this value of theta, 1 / 5230 events have negative weight and will be ignored\n", + "14:44 madminer.sampling WARNING For this value of theta, 1 / 5230 events have negative weight and will be ignored\n", + "14:44 madminer.sampling WARNING For this value of theta, 1 / 5230 events have negative weight and will be ignored\n", + "14:44 madminer.sampling WARNING Skipping warnings about negative weights in the future...\n", + "14:44 madminer.sampling INFO Effective number of samples: mean 23.864853750376064, with individual thetas ranging from 6.466907370230287 to 5230.000000000667\n", + "14:44 madminer.limits INFO Calculating kinematic log likelihood with histograms\n", + "/Users/johannbrehmer/work/projects/madminer/madminer/madminer/utils/histo.py:200: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.\n", + " log_p += np.log(histo[histo_indices])\n", + "14:44 madminer.limits INFO Calculating p-values\n" + ] + } + ], + "source": [ + "_, p_values_expected_histo, best_fit_expected_histo = limits.expected_limits(\n", + " theta_true=[0.,0.],\n", + " theta_ranges=[(theta_min, theta_max), (theta_min, theta_max)],\n", + " mode=\"histo\",\n", + " hist_vars=[\"pt_j1\"],\n", + " include_xsec=False,\n", + " resolution=resolution,\n", + " luminosity=300000.0\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally and perhaps most importantly, `mode=\"ml\"` allows us to calculate limits based on any `ParamterizedRatioEstimator` instance like the ALICES estimator trained above:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "14:44 madminer.limits INFO Loading kinematic likelihood ratio estimator\n", + "14:44 madminer.limits INFO Calculating kinematic log likelihood ratio with estimator\n", + "14:45 madminer.limits INFO Calculating p-values\n" + ] + } + ], + "source": [ + "theta_grid, p_values_expected_ml, best_fit_expected_ml = limits.expected_limits(\n", + " theta_true=[0.,0.],\n", + " theta_ranges=[(theta_min, theta_max), (theta_min, theta_max)],\n", + " mode=\"ml\",\n", + " model_file='models/alices',\n", + " include_xsec=False,\n", + " resolution=resolution,\n", + " luminosity=300000.0\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Observed limits" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Observed limits take as input actual data, which we here generate on the fly:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "14:45 madminer.sampling INFO Loading data from data/madminer_example_shuffled.h5\n", + "14:45 madminer.sampling INFO Found 2 parameters\n", + "14:45 madminer.sampling INFO Did not find nuisance parameters\n", + "14:45 madminer.sampling INFO Found 6 benchmarks, of which 6 physical\n", + "14:45 madminer.sampling INFO Found 2 observables\n", + "14:45 madminer.sampling INFO Found 6537 events\n", + "14:45 madminer.sampling INFO Found morphing setup with 6 components\n", + "14:45 madminer.sampling INFO Extracting evaluation sample. Sampling according to ('theta', array([0., 0.]))\n", + "14:45 madminer.sampling INFO Effective number of samples: 1305.9999999999793\n" + ] + } + ], + "source": [ + "sampler = SampleAugmenter('data/madminer_example_shuffled.h5')\n", + "x_observed, _ = sampler.extract_samples_test(\n", + " theta=sampling.morphing_point([0.,0.]),\n", + " n_samples=5,\n", + " folder=None,\n", + " filename=None\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "14:45 madminer.limits INFO Loading kinematic likelihood ratio estimator\n", + "14:45 madminer.limits INFO Calculating kinematic log likelihood ratio with estimator\n", + "14:45 madminer.limits INFO Calculating rate log likelihood\n", + "14:45 madminer.limits INFO Calculating p-values\n" + ] + } + ], + "source": [ + "_, p_values_observed, best_fit_observed = limits.observed_limits(\n", + " x_observed=x_observed,\n", + " theta_ranges=[(theta_min, theta_max), (theta_min, theta_max)],\n", + " mode=\"ml\",\n", + " model_file='models/alices',\n", + " include_xsec=True,\n", + " resolution=resolution,\n", + " luminosity=300000.0,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Plot" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's plot the results:" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "bin_size = (theta_max - theta_min)/(resolution - 1)\n", + "edges = np.linspace(theta_min - bin_size/2, theta_max + bin_size/2, resolution + 1)\n", + "centers = np.linspace(theta_min, theta_max, resolution)\n", + "\n", + "fig = plt.figure(figsize=(6,5))\n", + "ax = plt.gca()\n", + "\n", + "cmin, cmax = 1.e-3, 1.\n", + " \n", + "pcm = ax.pcolormesh(\n", + " edges, edges, p_values_expected_ml.reshape((resolution, resolution)),\n", + " norm=matplotlib.colors.LogNorm(vmin=cmin, vmax=cmax),\n", + " cmap='Greys_r'\n", + ")\n", + "cbar = fig.colorbar(pcm, ax=ax, extend='both')\n", + "\n", + "plt.contour(\n", + " centers, centers, p_values_expected_xsec.reshape((resolution, resolution)),\n", + " levels=[0.05],\n", + " linestyles='-', colors='darkgreen'\n", + ")\n", + "plt.contour(\n", + " centers, centers, p_values_expected_ml.reshape((resolution, resolution)),\n", + " levels=[0.05],\n", + " linestyles='-', colors='#CC002E'\n", + ")\n", + "plt.contour(\n", + " centers, centers, p_values_expected_histo.reshape((resolution, resolution)),\n", + " levels=[0.05],\n", + " linestyles='-', colors='C1'\n", + ")\n", + "plt.contour(\n", + " centers, centers, p_values_observed.reshape((resolution, resolution)),\n", + " levels=[0.05],\n", + " linestyles='--', colors='black'\n", + ")\n", + "\n", + "plt.scatter(\n", + " theta_grid[best_fit_expected_xsec][0], theta_grid[best_fit_expected_xsec][1],\n", + " s=80., color='darkgreen', marker='*',\n", + " label=\"xsec\"\n", + ")\n", + "plt.scatter(\n", + " theta_grid[best_fit_expected_ml][0], theta_grid[best_fit_expected_ml][1],\n", + " s=80., color='#CC002E', marker='*',\n", + " label=\"ALICES\"\n", + ")\n", + "plt.scatter(\n", + " theta_grid[best_fit_expected_histo][0], theta_grid[best_fit_expected_histo][1],\n", + " s=80., color='C1', marker='*',\n", + " label=\"Histo\"\n", + ")\n", + "plt.scatter(\n", + " theta_grid[best_fit_observed][0], theta_grid[best_fit_observed][1],\n", + " s=80., color='black', marker='*',\n", + " label=\"Observed\"\n", + ")\n", + "\n", + "plt.legend()\n", + "\n", + "plt.xlabel(r'$\\theta_0$')\n", + "plt.ylabel(r'$\\theta_1$')\n", + "cbar.set_label('Expected p-value (ALICES)')\n", + "\n", + "plt.tight_layout()\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "That's it for now. Please have a look at the documentation for a detailed description of all classes and functions. And if you're curious about SALLY, Fisher information matrices, and ensemble methods, please look at the second part of the tutorial!" ] }, diff --git a/examples/tutorial_delphes/2_score_information_ensemble.ipynb b/examples/tutorial_delphes/2_score_information_ensemble.ipynb index f4b827de0..3b671b5a5 100755 --- a/examples/tutorial_delphes/2_score_information_ensemble.ipynb +++ b/examples/tutorial_delphes/2_score_information_ensemble.ipynb @@ -47,8 +47,9 @@ "from matplotlib import pyplot as plt\n", "%matplotlib inline\n", "\n", - "from madminer.sampling import SampleAugmenter, constant_benchmark_theta\n", - "from madminer.ml import MLForge, EnsembleForge\n", + "from madminer import sampling\n", + "from madminer.sampling import SampleAugmenter\n", + "from madminer.ml import ScoreEstimator, Ensemble\n", "from madminer.fisherinformation import FisherInformation\n", "from madminer.plotting import plot_fisher_information_contours_2d\n" ] @@ -116,25 +117,25 @@ "name": "stderr", "output_type": "stream", "text": [ - "09:28 madminer.sampling INFO Loading data from data/madminer_example_shuffled.h5\n", - "09:28 madminer.sampling INFO Found 2 parameters\n", - "09:28 madminer.sampling INFO Did not find nuisance parameters\n", - "09:28 madminer.sampling INFO Found 6 benchmarks, of which 6 physical\n", - "09:28 madminer.sampling INFO Found 2 observables\n", - "09:28 madminer.sampling INFO Found 46134 events\n", - "09:28 madminer.sampling INFO Found morphing setup with 6 components\n" + "15:36 madminer.analysis INFO Loading data from data/madminer_example_shuffled.h5\n", + "15:36 madminer.analysis INFO Found 2 parameters\n", + "15:36 madminer.analysis INFO Did not find nuisance parameters\n", + "15:36 madminer.analysis INFO Found 6 benchmarks, of which 6 physical\n", + "15:36 madminer.analysis INFO Found 2 observables\n", + "15:36 madminer.analysis INFO Found 6537 events\n", + "15:36 madminer.analysis INFO Found morphing setup with 6 components\n" ] } ], "source": [ - "sa = SampleAugmenter('data/madminer_example_shuffled.h5')" + "sampler = SampleAugmenter('data/madminer_example_shuffled.h5')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The relevant `SampleAugmenter` function for local score estimators is `extract_samples_train_local()`. As before, for the argument `theta` you can use the helper functions `constant_benchmark_theta()`, `multiple_benchmark_thetas()`, `constant_morphing_theta()`, `multiple_morphing_thetas()`, and `random_morphing_thetas()`." + "The relevant `SampleAugmenter` function for local score estimators is `extract_samples_train_local()`. As before, for the argument `theta` you can use the helper functions `sampling.benchmark()`, `sampling.benchmarks()`, `sampling.morphing_point()`, `sampling.morphing_points()`, and `sampling.random_morphing_points()`." ] }, { @@ -146,14 +147,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "09:28 madminer.sampling INFO Extracting training sample for local score regression. Sampling and score evaluation according to (u'benchmark', u'sm')\n", - "09:28 madminer.sampling INFO Effective number of samples: 23067.0000000061\n" + "15:36 madminer.sampling INFO Extracting training sample for local score regression. Sampling and score evaluation according to (u'benchmark', u'sm')\n", + "15:36 madminer.sampling INFO Effective number of samples: 5230.0\n" ] } ], "source": [ - "x, theta, t_xz = sa.extract_samples_train_local(\n", - " theta=constant_benchmark_theta('sm'),\n", + "x, theta, t_xz, _ = sampler.sample_train_local(\n", + " theta=sampling.benchmark('sm'),\n", " n_samples=100000,\n", " folder='./data/samples',\n", " filename='train'\n", @@ -173,7 +174,7 @@ "source": [ "It's now time to build a neural network. Only this time, instead of the likelihood ratio itself, we will estimate the gradient of the log likelihood with respect to the theory parameters -- the score. To be precise, the output of the neural network is an estimate of the score at some reference parameter point, for instance the Standard Model. A neural network that estimates this \"local\" score can be used to calculate the Fisher information at that point. The estimated score can also be used as a machine learning version of Optimal Observables, and likelihoods can be estimated based on density estimation in the estimated score space. This method for likelihood ratio estimation is called SALLY, and there is a closely related version called SALLINO. Both are explained in [\"Constraining Effective Field Theories With Machine Learning\"](https://arxiv.org/abs/1805.00013) and [\"A Guide to Constraining Effective Field Theories With Machine Learning\"](https://arxiv.org/abs/1805.00020).\n", "\n", - "Again, the central object for this is the `madminer.ml.MLForge` class:" + "The central object for this is the `madminer.ml.ScoreEstimator` class:" ] }, { @@ -182,7 +183,7 @@ "metadata": {}, "outputs": [], "source": [ - "forge = MLForge()" + "estimator = ScoreEstimator(n_hidden=(20,))" ] }, { @@ -201,65 +202,65 @@ "name": "stderr", "output_type": "stream", "text": [ - "09:28 madminer.ml INFO Starting training\n", - "09:28 madminer.ml INFO Method: sally\n", - "09:28 madminer.ml INFO Training data: x at data/samples/x_train.npy\n", - "09:28 madminer.ml INFO t_xz (theta0) at data/samples/t_xz_train.npy\n", - "09:28 madminer.ml INFO Features: all\n", - "09:28 madminer.ml INFO Method: sally\n", - "09:28 madminer.ml INFO Hidden layers: (100, 100)\n", - "09:28 madminer.ml INFO Activation function: tanh\n", - "09:28 madminer.ml INFO Batch size: 256\n", - "09:28 madminer.ml INFO Trainer: amsgrad\n", - "09:28 madminer.ml INFO Epochs: 20\n", - "09:28 madminer.ml INFO Learning rate: 0.001 initially, decaying to 0.0001\n", - "09:28 madminer.ml INFO Validation split: 0.3\n", - "09:28 madminer.ml INFO Early stopping: True\n", - "09:28 madminer.ml INFO Scale inputs: True\n", - "09:28 madminer.ml INFO Shuffle labels False\n", - "09:28 madminer.ml INFO Regularization: None\n", - "09:28 madminer.ml INFO Samples: all\n", - "09:28 madminer.ml INFO Loading training data\n", - "09:28 madminer.ml INFO Found 100000 samples with 2 parameters and 2 observables\n", - "09:28 madminer.ml INFO Rescaling inputs\n", - "09:28 madminer.ml INFO Creating model for method sally\n", - "09:28 madminer.ml INFO Training model\n", - "09:29 madminer.utils.ml.sc INFO Epoch 02: train loss 0.3547 (mse_score: 0.3547)\n", - "09:29 madminer.utils.ml.sc INFO val. loss 0.3119 (mse_score: 0.3119) (*)\n", - "09:29 madminer.utils.ml.sc INFO Epoch 04: train loss 0.3385 (mse_score: 0.3385)\n", - "09:29 madminer.utils.ml.sc INFO val. loss 0.3049 (mse_score: 0.3049) (*)\n", - "09:29 madminer.utils.ml.sc INFO Epoch 06: train loss 0.3327 (mse_score: 0.3327)\n", - "09:29 madminer.utils.ml.sc INFO val. loss 0.3036 (mse_score: 0.3036) (*)\n", - "09:29 madminer.utils.ml.sc INFO Epoch 08: train loss 0.3303 (mse_score: 0.3303)\n", - "09:29 madminer.utils.ml.sc INFO val. loss 0.3014 (mse_score: 0.3014)\n", - "09:30 madminer.utils.ml.sc INFO Epoch 10: train loss 0.3259 (mse_score: 0.3259)\n", - "09:30 madminer.utils.ml.sc INFO val. loss 0.2969 (mse_score: 0.2969) (*)\n", - "09:30 madminer.utils.ml.sc INFO Epoch 12: train loss 0.3239 (mse_score: 0.3239)\n", - "09:30 madminer.utils.ml.sc INFO val. loss 0.2981 (mse_score: 0.2981)\n", - "09:30 madminer.utils.ml.sc INFO Epoch 14: train loss 0.3229 (mse_score: 0.3229)\n", - "09:30 madminer.utils.ml.sc INFO val. loss 0.2977 (mse_score: 0.2977)\n", - "09:30 madminer.utils.ml.sc INFO Epoch 16: train loss 0.3218 (mse_score: 0.3218)\n", - "09:30 madminer.utils.ml.sc INFO val. loss 0.2952 (mse_score: 0.2952) (*)\n", - "09:31 madminer.utils.ml.sc INFO Epoch 18: train loss 0.3211 (mse_score: 0.3211)\n", - "09:31 madminer.utils.ml.sc INFO val. loss 0.2966 (mse_score: 0.2966)\n", - "09:31 madminer.utils.ml.sc INFO Epoch 20: train loss 0.3204 (mse_score: 0.3204)\n", - "09:31 madminer.utils.ml.sc INFO val. loss 0.2951 (mse_score: 0.2951)\n", - "09:31 madminer.utils.ml.sc INFO Early stopping after epoch 17, with loss 0.29 compared to final loss 0.30\n", - "09:31 madminer.utils.ml.sc INFO Finished training\n" + "15:36 madminer.ml INFO Starting training\n", + "15:36 madminer.ml INFO Batch size: 200\n", + "15:36 madminer.ml INFO Optimizer: amsgrad\n", + "15:36 madminer.ml INFO Epochs: 50\n", + "15:36 madminer.ml INFO Learning rate: 0.001 initially, decaying to 0.0001\n", + "15:36 madminer.ml INFO Validation split: 0.25\n", + "15:36 madminer.ml INFO Early stopping: True\n", + "15:36 madminer.ml INFO Scale inputs: True\n", + "15:36 madminer.ml INFO Shuffle labels False\n", + "15:36 madminer.ml INFO Samples: all\n", + "15:36 madminer.ml INFO Loading training data\n", + "15:36 madminer.ml INFO Found 100000 samples with 2 parameters and 2 observables\n", + "15:36 madminer.ml INFO Rescaling inputs\n", + "15:36 madminer.ml INFO Creating model\n", + "15:36 madminer.ml INFO Training model\n", + "15:36 madminer.utils.ml.tr INFO Epoch 3: train loss 0.19037 (mse_score: 0.190)\n", + "15:36 madminer.utils.ml.tr INFO val. loss 0.15904 (mse_score: 0.159)\n", + "15:37 madminer.utils.ml.tr INFO Epoch 6: train loss 0.14389 (mse_score: 0.144)\n", + "15:37 madminer.utils.ml.tr INFO val. loss 0.12421 (mse_score: 0.124)\n", + "15:37 madminer.utils.ml.tr INFO Epoch 9: train loss 0.12970 (mse_score: 0.130)\n", + "15:37 madminer.utils.ml.tr INFO val. loss 0.11444 (mse_score: 0.114)\n", + "15:37 madminer.utils.ml.tr INFO Epoch 12: train loss 0.12303 (mse_score: 0.123)\n", + "15:37 madminer.utils.ml.tr INFO val. loss 0.10981 (mse_score: 0.110)\n", + "15:37 madminer.utils.ml.tr INFO Epoch 15: train loss 0.11926 (mse_score: 0.119)\n", + "15:37 madminer.utils.ml.tr INFO val. loss 0.10723 (mse_score: 0.107)\n", + "15:37 madminer.utils.ml.tr INFO Epoch 18: train loss 0.11690 (mse_score: 0.117)\n", + "15:37 madminer.utils.ml.tr INFO val. loss 0.10569 (mse_score: 0.106)\n", + "15:37 madminer.utils.ml.tr INFO Epoch 21: train loss 0.11537 (mse_score: 0.115)\n", + "15:37 madminer.utils.ml.tr INFO val. loss 0.10467 (mse_score: 0.105)\n", + "15:37 madminer.utils.ml.tr INFO Epoch 24: train loss 0.11426 (mse_score: 0.114)\n", + "15:37 madminer.utils.ml.tr INFO val. loss 0.10407 (mse_score: 0.104)\n", + "15:38 madminer.utils.ml.tr INFO Epoch 27: train loss 0.11342 (mse_score: 0.113)\n", + "15:38 madminer.utils.ml.tr INFO val. loss 0.10358 (mse_score: 0.104)\n", + "15:38 madminer.utils.ml.tr INFO Epoch 30: train loss 0.11283 (mse_score: 0.113)\n", + "15:38 madminer.utils.ml.tr INFO val. loss 0.10306 (mse_score: 0.103)\n", + "15:38 madminer.utils.ml.tr INFO Epoch 33: train loss 0.11231 (mse_score: 0.112)\n", + "15:38 madminer.utils.ml.tr INFO val. loss 0.10260 (mse_score: 0.103)\n", + "15:38 madminer.utils.ml.tr INFO Epoch 36: train loss 0.11192 (mse_score: 0.112)\n", + "15:38 madminer.utils.ml.tr INFO val. loss 0.10236 (mse_score: 0.102)\n", + "15:38 madminer.utils.ml.tr INFO Epoch 39: train loss 0.11158 (mse_score: 0.112)\n", + "15:38 madminer.utils.ml.tr INFO val. loss 0.10215 (mse_score: 0.102)\n", + "15:38 madminer.utils.ml.tr INFO Epoch 42: train loss 0.11129 (mse_score: 0.111)\n", + "15:38 madminer.utils.ml.tr INFO val. loss 0.10194 (mse_score: 0.102)\n", + "15:39 madminer.utils.ml.tr INFO Epoch 45: train loss 0.11106 (mse_score: 0.111)\n", + "15:39 madminer.utils.ml.tr INFO val. loss 0.10175 (mse_score: 0.102)\n", + "15:39 madminer.utils.ml.tr INFO Epoch 48: train loss 0.11085 (mse_score: 0.111)\n", + "15:39 madminer.utils.ml.tr INFO val. loss 0.10162 (mse_score: 0.102)\n", + "15:39 madminer.utils.ml.tr INFO Early stopping did not improve performance\n" ] } ], "source": [ - "forge.train(\n", + "estimator.train(\n", " method='sally',\n", - " x_filename='data/samples/x_train.npy',\n", - " t_xz0_filename='data/samples/t_xz_train.npy',\n", - " n_epochs=20,\n", - " batch_size=256,\n", - " validation_split=0.3\n", + " x='data/samples/x_train.npy',\n", + " t_xz='data/samples/t_xz_train.npy',\n", ")\n", "\n", - "forge.save('models/sally')" + "estimator.save('models/sally')" ] }, { @@ -282,9 +283,9 @@ "metadata": {}, "outputs": [], "source": [ - "forge.load('models/sally')\n", + "estimator.load('models/sally')\n", "\n", - "t_hat = forge.evaluate(\n", + "t_hat = estimator.evaluate_score(\n", " x='data/samples/x_test.npy'\n", ")" ] @@ -303,7 +304,7 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -359,13 +360,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "09:31 madminer.fisherinfor INFO Loading data from data/madminer_example_shuffled.h5\n", - "09:31 madminer.fisherinfor INFO Found 2 parameters\n", - "09:31 madminer.fisherinfor WARNING Did not find nuisance parameters!\n", - "09:31 madminer.fisherinfor INFO Found 6 benchmarks, of which 6 physical\n", - "09:31 madminer.fisherinfor INFO Found 2 observables: pt_j1, delta_phi_jj\n", - "09:31 madminer.fisherinfor INFO Found 46134 events\n", - "09:31 madminer.fisherinfor INFO Found morphing setup with 6 components\n" + "15:39 madminer.analysis INFO Loading data from data/madminer_example_shuffled.h5\n", + "15:39 madminer.analysis INFO Found 2 parameters\n", + "15:39 madminer.analysis INFO Did not find nuisance parameters\n", + "15:39 madminer.analysis INFO Found 6 benchmarks, of which 6 physical\n", + "15:39 madminer.analysis INFO Found 2 observables\n", + "15:39 madminer.analysis INFO Found 6537 events\n", + "15:39 madminer.analysis INFO Found morphing setup with 6 components\n" ] } ], @@ -382,17 +383,22 @@ "name": "stderr", "output_type": "stream", "text": [ - "09:31 madminer.fisherinfor INFO Evaluating rate Fisher information\n", - "09:31 madminer.utils.inter WARNING include_nuisance_parameters=False without benchmark_is_nuisance information. Returning all weights.\n" + "15:39 madminer.fisherinfor INFO Evaluating rate Fisher information\n", + "15:39 madminer.utils.inter WARNING include_nuisance_parameters=False without benchmark_is_nuisance information. Returning all weights.\n" ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Kinematic Fisher information after 3000 ifb:\n", - "[[1369.72408453 61.3498349 ]\n", - " [ 61.3498349 419.61370008]]\n" + "ename": "AttributeError", + "evalue": "'NoneType' object has no attribute 'calculate_a'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mtheta\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0.\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0.\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mmodel_file\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'models/sally'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mluminosity\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m3000000.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m )\n\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/johannbrehmer/work/projects/madminer/madminer/madminer/fisherinformation.pyc\u001b[0m in \u001b[0;36mcalculate_fisher_information_full_detector\u001b[0;34m(self, theta, model_file, unweighted_x_sample_file, luminosity, include_xsec_info, mode, calculate_covariance, batch_size, test_split)\u001b[0m\n\u001b[1;32m 257\u001b[0m \u001b[0mlogger\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Evaluating rate Fisher information\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 258\u001b[0m fisher_info_rate, rate_covariance = self.calculate_fisher_information_rate(\n\u001b[0;32m--> 259\u001b[0;31m \u001b[0mtheta\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtheta\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mluminosity\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mluminosity\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minclude_nuisance_parameters\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minclude_nuisance_parameters\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 260\u001b[0m )\n\u001b[1;32m 261\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/johannbrehmer/work/projects/madminer/madminer/madminer/fisherinformation.pyc\u001b[0m in \u001b[0;36mcalculate_fisher_information_rate\u001b[0;34m(self, theta, luminosity, cuts, efficiency_functions, include_nuisance_parameters)\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[0msum_events\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 414\u001b[0m \u001b[0mcalculate_uncertainty\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 415\u001b[0;31m \u001b[0mweights_benchmark_uncertainties\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mweights_benchmark_uncertainties\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 416\u001b[0m )\n\u001b[1;32m 417\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/johannbrehmer/work/projects/madminer/madminer/madminer/fisherinformation.pyc\u001b[0m in \u001b[0;36m_calculate_fisher_information\u001b[0;34m(self, theta, weights_benchmarks, luminosity, include_nuisance_parameters, sum_events, calculate_uncertainty, weights_benchmark_uncertainties)\u001b[0m\n\u001b[1;32m 1041\u001b[0m \u001b[0;31m# Nuisance parameter Fisher info\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1042\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0minclude_nuisance_parameters\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minclude_nuisance_parameters\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1043\u001b[0;31m \u001b[0mnuisance_a\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnuisance_morpher\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcalculate_a\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mweights_benchmarks\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# Shape (n_nuisance_params, n_events)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1044\u001b[0m \u001b[0;31m# grad_i dsigma(x), where i is a nuisance parameter, is given by\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1045\u001b[0m \u001b[0;31m# sigma[np.newaxis, :] * a\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'calculate_a'" ] } ], @@ -400,7 +406,6 @@ "fisher_information, _ = fisher.calculate_fisher_information_full_detector(\n", " theta=[0.,0.],\n", " model_file='models/sally',\n", - " unweighted_x_sample_file='data/samples/x_test.npy',\n", " luminosity=3000000.\n", ")\n", "\n", @@ -416,32 +421,9 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/johannbrehmer/anaconda3/envs/python2/lib/python2.7/site-packages/matplotlib/contour.py:1004: UserWarning: The following kwargs were not used by contour: 'label'\n", - " s)\n", - "/Users/johannbrehmer/anaconda3/envs/python2/lib/python2.7/site-packages/matplotlib/cbook/deprecation.py:107: MatplotlibDeprecationWarning: Adding an axes using the same arguments as a previous axes currently reuses the earlier instance. In a future version, a new instance will always be created and returned. Meanwhile, this warning can be suppressed, and the future behavior ensured, by passing a unique label to each axes instance.\n", - " warnings.warn(message, mplDeprecation, stacklevel=1)\n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "_ = plot_fisher_information_contours_2d(\n", " [fisher_information],\n", @@ -466,11 +448,13 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "ensemble = EnsembleForge(estimators=5)" + "estimators = [ScoreEstimator(n_hidden=(20,)) for _ in range(5)]\n", + "\n", + "ensemble = Ensemble(estimators)" ] }, { @@ -489,269 +473,17 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": { "scrolled": false }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "09:32 madminer.ml INFO Training 5 estimators in ensemble\n", - "09:32 madminer.ml INFO Training estimator 1 / 5 in ensemble\n", - "09:32 madminer.ml INFO Starting training\n", - "09:32 madminer.ml INFO Method: sally\n", - "09:32 madminer.ml INFO Training data: x at data/samples/x_train.npy\n", - "09:32 madminer.ml INFO t_xz (theta0) at data/samples/t_xz_train.npy\n", - "09:32 madminer.ml INFO Features: all\n", - "09:32 madminer.ml INFO Method: sally\n", - "09:32 madminer.ml INFO Hidden layers: (100, 100)\n", - "09:32 madminer.ml INFO Activation function: tanh\n", - "09:32 madminer.ml INFO Batch size: 256\n", - "09:32 madminer.ml INFO Trainer: amsgrad\n", - "09:32 madminer.ml INFO Epochs: 20\n", - "09:32 madminer.ml INFO Learning rate: 0.001 initially, decaying to 0.0001\n", - "09:32 madminer.ml INFO Validation split: 0.3\n", - "09:32 madminer.ml INFO Early stopping: True\n", - "09:32 madminer.ml INFO Scale inputs: True\n", - "09:32 madminer.ml INFO Shuffle labels False\n", - "09:32 madminer.ml INFO Regularization: None\n", - "09:32 madminer.ml INFO Samples: all\n", - "09:32 madminer.ml INFO Loading training data\n", - "09:32 madminer.ml INFO Found 100000 samples with 2 parameters and 2 observables\n", - "09:32 madminer.ml INFO Rescaling inputs\n", - "09:32 madminer.ml INFO Creating model for method sally\n", - "09:32 madminer.ml INFO Training model\n", - "09:32 madminer.utils.ml.sc INFO Epoch 02: train loss 0.3376 (mse_score: 0.3376)\n", - "09:32 madminer.utils.ml.sc INFO val. loss 0.3613 (mse_score: 0.3613) (*)\n", - "09:32 madminer.utils.ml.sc INFO Epoch 04: train loss 0.3228 (mse_score: 0.3228)\n", - "09:32 madminer.utils.ml.sc INFO val. loss 0.3465 (mse_score: 0.3465) (*)\n", - "09:32 madminer.utils.ml.sc INFO Epoch 06: train loss 0.3177 (mse_score: 0.3177)\n", - "09:32 madminer.utils.ml.sc INFO val. loss 0.3448 (mse_score: 0.3448) (*)\n", - "09:33 madminer.utils.ml.sc INFO Epoch 08: train loss 0.3146 (mse_score: 0.3146)\n", - "09:33 madminer.utils.ml.sc INFO val. loss 0.3389 (mse_score: 0.3389) (*)\n", - "09:33 madminer.utils.ml.sc INFO Epoch 10: train loss 0.3120 (mse_score: 0.3120)\n", - "09:33 madminer.utils.ml.sc INFO val. loss 0.3394 (mse_score: 0.3394)\n", - "09:33 madminer.utils.ml.sc INFO Epoch 12: train loss 0.3109 (mse_score: 0.3109)\n", - "09:33 madminer.utils.ml.sc INFO val. loss 0.3379 (mse_score: 0.3379) (*)\n", - "09:33 madminer.utils.ml.sc INFO Epoch 14: train loss 0.3096 (mse_score: 0.3096)\n", - "09:33 madminer.utils.ml.sc INFO val. loss 0.3372 (mse_score: 0.3372) (*)\n", - "09:34 madminer.utils.ml.sc INFO Epoch 16: train loss 0.3091 (mse_score: 0.3091)\n", - "09:34 madminer.utils.ml.sc INFO val. loss 0.3370 (mse_score: 0.3370)\n", - "09:34 madminer.utils.ml.sc INFO Epoch 18: train loss 0.3085 (mse_score: 0.3085)\n", - "09:34 madminer.utils.ml.sc INFO val. loss 0.3358 (mse_score: 0.3358) (*)\n", - "09:34 madminer.utils.ml.sc INFO Epoch 20: train loss 0.3079 (mse_score: 0.3079)\n", - "09:34 madminer.utils.ml.sc INFO val. loss 0.3354 (mse_score: 0.3354) (*)\n", - "09:34 madminer.utils.ml.sc INFO Early stopping did not improve performance\n", - "09:34 madminer.utils.ml.sc INFO Finished training\n", - "09:34 madminer.ml INFO Training estimator 2 / 5 in ensemble\n", - "09:34 madminer.ml INFO Starting training\n", - "09:34 madminer.ml INFO Method: sally\n", - "09:34 madminer.ml INFO Training data: x at data/samples/x_train.npy\n", - "09:34 madminer.ml INFO t_xz (theta0) at data/samples/t_xz_train.npy\n", - "09:34 madminer.ml INFO Features: all\n", - "09:34 madminer.ml INFO Method: sally\n", - "09:34 madminer.ml INFO Hidden layers: (100, 100)\n", - "09:34 madminer.ml INFO Activation function: tanh\n", - "09:34 madminer.ml INFO Batch size: 256\n", - "09:34 madminer.ml INFO Trainer: amsgrad\n", - "09:34 madminer.ml INFO Epochs: 20\n", - "09:34 madminer.ml INFO Learning rate: 0.001 initially, decaying to 0.0001\n", - "09:34 madminer.ml INFO Validation split: 0.3\n", - "09:34 madminer.ml INFO Early stopping: True\n", - "09:34 madminer.ml INFO Scale inputs: True\n", - "09:34 madminer.ml INFO Shuffle labels False\n", - "09:34 madminer.ml INFO Regularization: None\n", - "09:34 madminer.ml INFO Samples: all\n", - "09:34 madminer.ml INFO Loading training data\n", - "09:34 madminer.ml INFO Found 100000 samples with 2 parameters and 2 observables\n", - "09:34 madminer.ml INFO Rescaling inputs\n", - "09:34 madminer.ml INFO Creating model for method sally\n", - "09:34 madminer.ml INFO Training model\n", - "09:35 madminer.utils.ml.sc INFO Epoch 02: train loss 0.3491 (mse_score: 0.3491)\n", - "09:35 madminer.utils.ml.sc INFO val. loss 0.3335 (mse_score: 0.3335) (*)\n", - "09:35 madminer.utils.ml.sc INFO Epoch 04: train loss 0.3325 (mse_score: 0.3325)\n", - "09:35 madminer.utils.ml.sc INFO val. loss 0.3232 (mse_score: 0.3232) (*)\n", - "09:35 madminer.utils.ml.sc INFO Epoch 06: train loss 0.3267 (mse_score: 0.3267)\n", - "09:35 madminer.utils.ml.sc INFO val. loss 0.3157 (mse_score: 0.3157) (*)\n", - "09:35 madminer.utils.ml.sc INFO Epoch 08: train loss 0.3244 (mse_score: 0.3244)\n", - "09:35 madminer.utils.ml.sc INFO val. loss 0.3125 (mse_score: 0.3125) (*)\n", - "09:36 madminer.utils.ml.sc INFO Epoch 10: train loss 0.3217 (mse_score: 0.3217)\n", - "09:36 madminer.utils.ml.sc INFO val. loss 0.3115 (mse_score: 0.3115) (*)\n", - "09:36 madminer.utils.ml.sc INFO Epoch 12: train loss 0.3200 (mse_score: 0.3200)\n", - "09:36 madminer.utils.ml.sc INFO val. loss 0.3101 (mse_score: 0.3101) (*)\n", - "09:36 madminer.utils.ml.sc INFO Epoch 14: train loss 0.3189 (mse_score: 0.3189)\n", - "09:36 madminer.utils.ml.sc INFO val. loss 0.3100 (mse_score: 0.3100) (*)\n", - "09:36 madminer.utils.ml.sc INFO Epoch 16: train loss 0.3184 (mse_score: 0.3184)\n", - "09:36 madminer.utils.ml.sc INFO val. loss 0.3097 (mse_score: 0.3097) (*)\n", - "09:37 madminer.utils.ml.sc INFO Epoch 18: train loss 0.3177 (mse_score: 0.3177)\n", - "09:37 madminer.utils.ml.sc INFO val. loss 0.3086 (mse_score: 0.3086) (*)\n", - "09:37 madminer.utils.ml.sc INFO Epoch 20: train loss 0.3171 (mse_score: 0.3171)\n", - "09:37 madminer.utils.ml.sc INFO val. loss 0.3081 (mse_score: 0.3081) (*)\n", - "09:37 madminer.utils.ml.sc INFO Early stopping did not improve performance\n", - "09:37 madminer.utils.ml.sc INFO Finished training\n", - "09:37 madminer.ml INFO Training estimator 3 / 5 in ensemble\n", - "09:37 madminer.ml INFO Starting training\n", - "09:37 madminer.ml INFO Method: sally\n", - "09:37 madminer.ml INFO Training data: x at data/samples/x_train.npy\n", - "09:37 madminer.ml INFO t_xz (theta0) at data/samples/t_xz_train.npy\n", - "09:37 madminer.ml INFO Features: all\n", - "09:37 madminer.ml INFO Method: sally\n", - "09:37 madminer.ml INFO Hidden layers: (100, 100)\n", - "09:37 madminer.ml INFO Activation function: tanh\n", - "09:37 madminer.ml INFO Batch size: 256\n", - "09:37 madminer.ml INFO Trainer: amsgrad\n", - "09:37 madminer.ml INFO Epochs: 20\n", - "09:37 madminer.ml INFO Learning rate: 0.001 initially, decaying to 0.0001\n", - "09:37 madminer.ml INFO Validation split: 0.3\n", - "09:37 madminer.ml INFO Early stopping: True\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "09:37 madminer.ml INFO Scale inputs: True\n", - "09:37 madminer.ml INFO Shuffle labels False\n", - "09:37 madminer.ml INFO Regularization: None\n", - "09:37 madminer.ml INFO Samples: all\n", - "09:37 madminer.ml INFO Loading training data\n", - "09:37 madminer.ml INFO Found 100000 samples with 2 parameters and 2 observables\n", - "09:37 madminer.ml INFO Rescaling inputs\n", - "09:37 madminer.ml INFO Creating model for method sally\n", - "09:37 madminer.ml INFO Training model\n", - "09:37 madminer.utils.ml.sc INFO Epoch 02: train loss 0.3556 (mse_score: 0.3556)\n", - "09:37 madminer.utils.ml.sc INFO val. loss 0.3212 (mse_score: 0.3212) (*)\n", - "09:37 madminer.utils.ml.sc INFO Epoch 04: train loss 0.3382 (mse_score: 0.3382)\n", - "09:37 madminer.utils.ml.sc INFO val. loss 0.3126 (mse_score: 0.3126) (*)\n", - "09:37 madminer.utils.ml.sc INFO Epoch 06: train loss 0.3323 (mse_score: 0.3323)\n", - "09:37 madminer.utils.ml.sc INFO val. loss 0.3115 (mse_score: 0.3115)\n", - "09:38 madminer.utils.ml.sc INFO Epoch 08: train loss 0.3290 (mse_score: 0.3290)\n", - "09:38 madminer.utils.ml.sc INFO val. loss 0.3091 (mse_score: 0.3091) (*)\n", - "09:38 madminer.utils.ml.sc INFO Epoch 10: train loss 0.3274 (mse_score: 0.3274)\n", - "09:38 madminer.utils.ml.sc INFO val. loss 0.3053 (mse_score: 0.3053) (*)\n", - "09:38 madminer.utils.ml.sc INFO Epoch 12: train loss 0.3247 (mse_score: 0.3247)\n", - "09:38 madminer.utils.ml.sc INFO val. loss 0.3034 (mse_score: 0.3034) (*)\n", - "09:38 madminer.utils.ml.sc INFO Epoch 14: train loss 0.3241 (mse_score: 0.3241)\n", - "09:38 madminer.utils.ml.sc INFO val. loss 0.3051 (mse_score: 0.3051)\n", - "09:38 madminer.utils.ml.sc INFO Epoch 16: train loss 0.3225 (mse_score: 0.3225)\n", - "09:38 madminer.utils.ml.sc INFO val. loss 0.3032 (mse_score: 0.3032) (*)\n", - "09:39 madminer.utils.ml.sc INFO Epoch 18: train loss 0.3222 (mse_score: 0.3222)\n", - "09:39 madminer.utils.ml.sc INFO val. loss 0.3041 (mse_score: 0.3041)\n", - "09:39 madminer.utils.ml.sc INFO Epoch 20: train loss 0.3215 (mse_score: 0.3215)\n", - "09:39 madminer.utils.ml.sc INFO val. loss 0.3028 (mse_score: 0.3028)\n", - "09:39 madminer.utils.ml.sc INFO Early stopping after epoch 19, with loss 0.30 compared to final loss 0.30\n", - "09:39 madminer.utils.ml.sc INFO Finished training\n", - "09:39 madminer.ml INFO Training estimator 4 / 5 in ensemble\n", - "09:39 madminer.ml INFO Starting training\n", - "09:39 madminer.ml INFO Method: sally\n", - "09:39 madminer.ml INFO Training data: x at data/samples/x_train.npy\n", - "09:39 madminer.ml INFO t_xz (theta0) at data/samples/t_xz_train.npy\n", - "09:39 madminer.ml INFO Features: all\n", - "09:39 madminer.ml INFO Method: sally\n", - "09:39 madminer.ml INFO Hidden layers: (100, 100)\n", - "09:39 madminer.ml INFO Activation function: tanh\n", - "09:39 madminer.ml INFO Batch size: 256\n", - "09:39 madminer.ml INFO Trainer: amsgrad\n", - "09:39 madminer.ml INFO Epochs: 20\n", - "09:39 madminer.ml INFO Learning rate: 0.001 initially, decaying to 0.0001\n", - "09:39 madminer.ml INFO Validation split: 0.3\n", - "09:39 madminer.ml INFO Early stopping: True\n", - "09:39 madminer.ml INFO Scale inputs: True\n", - "09:39 madminer.ml INFO Shuffle labels False\n", - "09:39 madminer.ml INFO Regularization: None\n", - "09:39 madminer.ml INFO Samples: all\n", - "09:39 madminer.ml INFO Loading training data\n", - "09:39 madminer.ml INFO Found 100000 samples with 2 parameters and 2 observables\n", - "09:39 madminer.ml INFO Rescaling inputs\n", - "09:39 madminer.ml INFO Creating model for method sally\n", - "09:39 madminer.ml INFO Training model\n", - "09:39 madminer.utils.ml.sc INFO Epoch 02: train loss 0.3327 (mse_score: 0.3327)\n", - "09:39 madminer.utils.ml.sc INFO val. loss 0.3826 (mse_score: 0.3826) (*)\n", - "09:39 madminer.utils.ml.sc INFO Epoch 04: train loss 0.3174 (mse_score: 0.3174)\n", - "09:39 madminer.utils.ml.sc INFO val. loss 0.3637 (mse_score: 0.3637) (*)\n", - "09:39 madminer.utils.ml.sc INFO Epoch 06: train loss 0.3130 (mse_score: 0.3130)\n", - "09:39 madminer.utils.ml.sc INFO val. loss 0.3600 (mse_score: 0.3600) (*)\n", - "09:39 madminer.utils.ml.sc INFO Epoch 08: train loss 0.3100 (mse_score: 0.3100)\n", - "09:39 madminer.utils.ml.sc INFO val. loss 0.3564 (mse_score: 0.3564) (*)\n", - "09:40 madminer.utils.ml.sc INFO Epoch 10: train loss 0.3080 (mse_score: 0.3080)\n", - "09:40 madminer.utils.ml.sc INFO val. loss 0.3527 (mse_score: 0.3527) (*)\n", - "09:40 madminer.utils.ml.sc INFO Epoch 12: train loss 0.3062 (mse_score: 0.3062)\n", - "09:40 madminer.utils.ml.sc INFO val. loss 0.3519 (mse_score: 0.3519) (*)\n", - "09:40 madminer.utils.ml.sc INFO Epoch 14: train loss 0.3056 (mse_score: 0.3056)\n", - "09:40 madminer.utils.ml.sc INFO val. loss 0.3501 (mse_score: 0.3501) (*)\n", - "09:40 madminer.utils.ml.sc INFO Epoch 16: train loss 0.3044 (mse_score: 0.3044)\n", - "09:40 madminer.utils.ml.sc INFO val. loss 0.3499 (mse_score: 0.3499) (*)\n", - "09:40 madminer.utils.ml.sc INFO Epoch 18: train loss 0.3048 (mse_score: 0.3048)\n", - "09:40 madminer.utils.ml.sc INFO val. loss 0.3485 (mse_score: 0.3485)\n", - "09:40 madminer.utils.ml.sc INFO Epoch 20: train loss 0.3032 (mse_score: 0.3032)\n", - "09:40 madminer.utils.ml.sc INFO val. loss 0.3493 (mse_score: 0.3493)\n", - "09:40 madminer.utils.ml.sc INFO Early stopping after epoch 17, with loss 0.35 compared to final loss 0.35\n", - "09:40 madminer.utils.ml.sc INFO Finished training\n", - "09:40 madminer.ml INFO Training estimator 5 / 5 in ensemble\n", - "09:40 madminer.ml INFO Starting training\n", - "09:40 madminer.ml INFO Method: sally\n", - "09:40 madminer.ml INFO Training data: x at data/samples/x_train.npy\n", - "09:40 madminer.ml INFO t_xz (theta0) at data/samples/t_xz_train.npy\n", - "09:40 madminer.ml INFO Features: all\n", - "09:40 madminer.ml INFO Method: sally\n", - "09:40 madminer.ml INFO Hidden layers: (100, 100)\n", - "09:40 madminer.ml INFO Activation function: tanh\n", - "09:40 madminer.ml INFO Batch size: 256\n", - "09:40 madminer.ml INFO Trainer: amsgrad\n", - "09:40 madminer.ml INFO Epochs: 20\n", - "09:40 madminer.ml INFO Learning rate: 0.001 initially, decaying to 0.0001\n", - "09:40 madminer.ml INFO Validation split: 0.3\n", - "09:40 madminer.ml INFO Early stopping: True\n", - "09:40 madminer.ml INFO Scale inputs: True\n", - "09:40 madminer.ml INFO Shuffle labels False\n", - "09:40 madminer.ml INFO Regularization: None\n", - "09:40 madminer.ml INFO Samples: all\n", - "09:40 madminer.ml INFO Loading training data\n", - "09:40 madminer.ml INFO Found 100000 samples with 2 parameters and 2 observables\n", - "09:40 madminer.ml INFO Rescaling inputs\n", - "09:40 madminer.ml INFO Creating model for method sally\n", - "09:40 madminer.ml INFO Training model\n", - "09:41 madminer.utils.ml.sc INFO Epoch 02: train loss 0.3580 (mse_score: 0.3580)\n", - "09:41 madminer.utils.ml.sc INFO val. loss 0.3284 (mse_score: 0.3284) (*)\n", - "09:41 madminer.utils.ml.sc INFO Epoch 04: train loss 0.3385 (mse_score: 0.3385)\n", - "09:41 madminer.utils.ml.sc INFO val. loss 0.3179 (mse_score: 0.3179) (*)\n", - "09:41 madminer.utils.ml.sc INFO Epoch 06: train loss 0.3323 (mse_score: 0.3323)\n", - "09:41 madminer.utils.ml.sc INFO val. loss 0.3080 (mse_score: 0.3080) (*)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "09:41 madminer.utils.ml.sc INFO Epoch 08: train loss 0.3283 (mse_score: 0.3283)\n", - "09:41 madminer.utils.ml.sc INFO val. loss 0.3079 (mse_score: 0.3079) (*)\n", - "09:41 madminer.utils.ml.sc INFO Epoch 10: train loss 0.3257 (mse_score: 0.3257)\n", - "09:41 madminer.utils.ml.sc INFO val. loss 0.3052 (mse_score: 0.3052)\n", - "09:41 madminer.utils.ml.sc INFO Epoch 12: train loss 0.3241 (mse_score: 0.3241)\n", - "09:41 madminer.utils.ml.sc INFO val. loss 0.3027 (mse_score: 0.3027) (*)\n", - "09:42 madminer.utils.ml.sc INFO Epoch 14: train loss 0.3231 (mse_score: 0.3231)\n", - "09:42 madminer.utils.ml.sc INFO val. loss 0.3019 (mse_score: 0.3019) (*)\n", - "09:42 madminer.utils.ml.sc INFO Epoch 16: train loss 0.3218 (mse_score: 0.3218)\n", - "09:42 madminer.utils.ml.sc INFO val. loss 0.3027 (mse_score: 0.3027)\n", - "09:42 madminer.utils.ml.sc INFO Epoch 18: train loss 0.3206 (mse_score: 0.3206)\n", - "09:42 madminer.utils.ml.sc INFO val. loss 0.3012 (mse_score: 0.3012)\n", - "09:42 madminer.utils.ml.sc INFO Epoch 20: train loss 0.3207 (mse_score: 0.3207)\n", - "09:42 madminer.utils.ml.sc INFO val. loss 0.3014 (mse_score: 0.3014)\n", - "09:42 madminer.utils.ml.sc INFO Early stopping after epoch 17, with loss 0.30 compared to final loss 0.30\n", - "09:42 madminer.utils.ml.sc INFO Finished training\n" - ] - } - ], + "outputs": [], "source": [ "ensemble.train_all(\n", " method='sally',\n", - " x_filename='data/samples/x_train.npy',\n", - " t_xz0_filename='data/samples/t_xz_train.npy',\n", - " n_epochs=20,\n", - " batch_size=256,\n", - " validation_split=0.3\n", + " x='data/samples/x_train.npy',\n", + " t_xz='data/samples/t_xz_train.npy',\n", + " n_epochs=5,\n", ")\n", "\n", "ensemble.save('models/sally_ensemble')" @@ -778,28 +510,9 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "09:42 madminer.fisherinfor INFO Loading data from data/madminer_example_shuffled.h5\n", - "09:42 madminer.fisherinfor INFO Found 2 parameters\n", - "09:42 madminer.fisherinfor WARNING Did not find nuisance parameters!\n", - "09:42 madminer.fisherinfor INFO Found 6 benchmarks, of which 6 physical\n", - "09:42 madminer.fisherinfor INFO Found 2 observables: pt_j1, delta_phi_jj\n", - "09:42 madminer.fisherinfor INFO Found 46134 events\n", - "09:42 madminer.fisherinfor INFO Found morphing setup with 6 components\n", - "09:42 madminer.ml INFO Found ensemble with 5 estimators and expectations None\n", - "09:42 madminer.fisherinfor INFO Evaluating rate Fisher information\n", - "09:42 madminer.utils.inter WARNING include_nuisance_parameters=False without benchmark_is_nuisance information. Returning all weights.\n", - "09:42 madminer.utils.inter WARNING include_nuisance_parameters=False without benchmark_is_nuisance information. Returning all weights.\n", - "09:42 madminer.fisherinfor INFO Evaluating kinematic Fisher information on batch 1 / 1\n" - ] - } - ], + "outputs": [], "source": [ "fisher = FisherInformation('data/madminer_example_shuffled.h5')\n", "\n", @@ -820,22 +533,9 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "_ = plot_fisher_information_contours_2d(\n", " [fisher_information_mean],\n", diff --git a/examples/tutorial_delphes/3_systematic_uncertainties.ipynb b/examples/tutorial_delphes/3_systematic_uncertainties.ipynb new file mode 100755 index 000000000..d1f706149 --- /dev/null +++ b/examples/tutorial_delphes/3_systematic_uncertainties.ipynb @@ -0,0 +1,366 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# MadMiner parton-level tutorial, part 3: Systematic uncertainties\n", + "\n", + "Johann Brehmer, Felix Kling, Kyle Cranmer 2018" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this tutorial we'll explain how to add systematic uncertainties to the MadMiner workflow." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Preparations" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before you execute this notebook, make sure you have running installations of MadGraph, Pythia, and Delphes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from __future__ import absolute_import, division, print_function, unicode_literals\n", + "\n", + "import logging\n", + "import numpy as np\n", + "import matplotlib\n", + "from matplotlib import pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "from madminer.core import MadMiner\n", + "from madminer.lhe import LHEReader\n", + "from madminer.sampling import combine_and_shuffle\n", + "from madminer.sampling import SampleAugmenter\n", + "from madminer import sampling\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Please enter here the path to your MG5 root directory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mg_dir = '/Users/johannbrehmer/work/projects/madminer/MG5_aMC_v2_6_4'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "MadMiner uses the Python `logging` module to provide additional information and debugging output. You can choose how much of this output you want to see by switching the level in the following lines to `logging.DEBUG` or `logging.WARNING`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# MadMiner output\n", + "logging.basicConfig(\n", + " format='%(asctime)-5.5s %(name)-20.20s %(levelname)-7.7s %(message)s',\n", + " datefmt='%H:%M',\n", + " level=logging.INFO\n", + ")\n", + "\n", + "# Output of all other modules (e.g. matplotlib)\n", + "for key in logging.Logger.manager.loggerDict:\n", + " if \"madminer\" not in key:\n", + " logging.getLogger(key).setLevel(logging.WARNING)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1.-2. Parameters and benchmarks" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We'll just load the MadMiner setup from the first part of this tutorial:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "miner = MadMiner()\n", + "miner.load('data/madminer_example.h5')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Set up systematics, save settings, run MadGraph" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is where things become interesting: We want to model systematic uncertainties. Currently this can be done in one of two ways: based on scale variation or based on PDF variations. You can also use both simultaneously. Here we just vary the scales:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "miner.set_systematics(scale_variation=(0.5,2.), pdf_variation=None)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Again, we save our setup:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "miner.save('data/madminer_example_systematics.h5')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now it's time to run MadGraph. MadMiner will instruct MadGraph to use its built-in `systematics` tool to calculate how the event weights change under the scale variation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "miner.run(\n", + " sample_benchmark='sm',\n", + " mg_directory=mg_dir,\n", + " mg_process_directory='./mg_processes/signal_systematics',\n", + " proc_card_file='cards/proc_card_signal.dat',\n", + " param_card_template_file='cards/param_card_template.dat',\n", + " run_card_file='cards/run_card_signal.dat',\n", + " log_directory='logs/signal',\n", + " python2_override=True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Run smearing and extract observables" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is just as before:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lhe = LHEReader('data/madminer_example_systematics.h5')\n", + "\n", + "lhe.add_sample(\n", + " lhe_filename='mg_processes/signal_systematics/Events/run_01/unweighted_events.lhe.gz',\n", + " sampled_from_benchmark='sm',\n", + " is_background=False,\n", + " k_factor=1.1,\n", + ")\n", + "\n", + "lhe.set_smearing(\n", + " pdgids=[1,2,3,4,5,6,9,22,-1,-2,-3,-4,-5,-6], # Partons giving rise to jets\n", + " energy_resolution_abs=0.,\n", + " energy_resolution_rel=0.1,\n", + " pt_resolution_abs=None,\n", + " pt_resolution_rel=None,\n", + " eta_resolution_abs=0.1,\n", + " eta_resolution_rel=0.,\n", + " phi_resolution_abs=0.1,\n", + " phi_resolution_rel=0.,\n", + ")\n", + "\n", + "lhe.add_observable(\n", + " 'pt_j1',\n", + " 'j[0].pt',\n", + " required=False,\n", + " default=0.,\n", + ")\n", + "lhe.add_observable(\n", + " 'delta_phi_jj',\n", + " 'j[0].deltaphi(j[1]) * (-1. + 2.*float(j[0].eta > j[1].eta))',\n", + " required=True,\n", + ")\n", + "lhe.add_observable(\n", + " 'met',\n", + " 'met.pt',\n", + " required=True,\n", + ")\n", + "\n", + "lhe.add_cut('(a[0] + a[1]).m > 124.')\n", + "lhe.add_cut('(a[0] + a[1]).m < 126.')\n", + "lhe.add_cut('pt_j1 > 30.')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "lhe.analyse_samples()\n", + "lhe.save('data/madminer_example_systematics_with_data.h5')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### A look at distributions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's see what our MC run produced:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "_ = plot_uncertainty(\n", + " filename='data/madminer_example_systematics_with_data.h5',\n", + " parameter_points=['sm', np.array([10.,0.])],\n", + " line_labels=['SM', 'BSM'],\n", + " uncertainties='none',\n", + " n_bins=20,\n", + " n_cols=3,\n", + " normalize=True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Make (unweighted) training and test samples with augmented data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sampler = SampleAugmenter('data/madminer_example_systematics_with_data.h5')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When we generate training data, we now also have to specify the values of the nuisance parameters. The helper functions `sampling.nominal_nuisance_parameters()` and `sampling.iid_nuisance_parameters()` can be used in addition to the usual ones. The `theta0` and `theta1` return now includes values for the nuisance parameters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x, theta0, theta1, y, r_xz, t_xz, _ = sampler.sample_train_ratio(\n", + " theta0=sampling.random_morphing_points(100, [('gaussian', 0., 15.), ('gaussian', 0., 15.)]),\n", + " theta1=sampling.benchmark('sm'),\n", + " nu0=sampling.iid_nuisance_parameters(\"gaussian\", 0., 1.),\n", + " nu1=sampling.nominal_nuisance_parameters(),\n", + " n_samples=1000,\n", + " folder='./data/samples',\n", + " filename='train'\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To be continued..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.15" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/tutorial_parton_level/1_from_cards_to_likelihood_ratio.ipynb b/examples/tutorial_parton_level/1_from_cards_to_likelihood_ratio.ipynb index 45dd305ab..a82aaa36f 100755 --- a/examples/tutorial_parton_level/1_from_cards_to_likelihood_ratio.ipynb +++ b/examples/tutorial_parton_level/1_from_cards_to_likelihood_ratio.ipynb @@ -49,12 +49,13 @@ "%matplotlib inline\n", "\n", "from madminer.core import MadMiner\n", - "from madminer.lhe import LHEProcessor\n", + "from madminer.lhe import LHEReader\n", "from madminer.sampling import combine_and_shuffle\n", "from madminer.sampling import SampleAugmenter\n", - "from madminer.sampling import constant_benchmark_theta, multiple_benchmark_thetas, random_morphing_thetas\n", - "from madminer.ml import MLForge\n", - "from madminer.plotting import plot_2d_morphing_basis, plot_distributions\n" + "from madminer import sampling\n", + "from madminer.ml import ParameterizedRatioEstimator\n", + "from madminer.plotting import plot_2d_morphing_basis, plot_distributions\n", + "from madminer.limits import AsymptoticLimits\n" ] }, { @@ -135,9 +136,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "14:57 madminer.core INFO Added parameter CWL2 (LHA: dim6 2, maximal power in squared ME: (2,), range: (-10.0, 10.0))\n", + "14:57 madminer.core INFO Added parameter CPWL2 (LHA: dim6 5, maximal power in squared ME: (2,), range: (-10.0, 1.0))\n" + ] + } + ], "source": [ "miner = MadMiner()\n", "\n", @@ -189,9 +199,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "14:57 madminer.core INFO Added benchmark sm: CWL2 = 0.00e+00, CPWL2 = 0.00e+00)\n", + "14:57 madminer.core INFO Added benchmark w: CWL2 = 10.00, CPWL2 = 0.00e+00)\n" + ] + } + ], "source": [ "miner.add_benchmark(\n", " {'CWL2':0., 'CPWL2':0.},\n", @@ -221,11 +240,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "14:57 madminer.core INFO Optimizing basis for morphing\n", + "14:57 madminer.utils.morph DEBUG Region 0: max overall power 2, max individual powers [2, 2]\n", + "14:57 madminer.utils.morph DEBUG Adding component [0 0]\n", + "14:57 madminer.utils.morph DEBUG Adding component [0 1]\n", + "14:57 madminer.utils.morph DEBUG Adding component [0 2]\n", + "14:57 madminer.utils.morph DEBUG Adding component [1 0]\n", + "14:57 madminer.utils.morph DEBUG Adding component [1 1]\n", + "14:57 madminer.utils.morph DEBUG Adding component [2 0]\n", + "14:57 madminer.core DEBUG Added benchmark sm: CWL2 = 0.00e+00, CPWL2 = 0.00e+00)\n", + "14:57 madminer.core DEBUG Added benchmark w: CWL2 = 10.00, CPWL2 = 0.00e+00)\n", + "14:57 madminer.core DEBUG Added benchmark morphing_basis_vector_2: CWL2 = -9.65e+00, CPWL2 = -4.70e-01)\n", + "14:57 madminer.core DEBUG Added benchmark morphing_basis_vector_3: CWL2 = -4.64e+00, CPWL2 = -3.84e+00)\n", + "14:57 madminer.core DEBUG Added benchmark morphing_basis_vector_4: CWL2 = 6.21, CPWL2 = -7.48e+00)\n", + "14:57 madminer.core DEBUG Added benchmark morphing_basis_vector_5: CWL2 = -7.38e+00, CPWL2 = -9.26e+00)\n", + "14:57 madminer.core INFO Set up morphing with 2 parameters, 6 morphing components, 2 predefined basis points, and 4 new basis points\n" + ] + } + ], "source": [ "miner.set_morphing(\n", " include_existing_benchmarks=True,\n", @@ -242,9 +283,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], "source": [ "fig = plot_2d_morphing_basis(\n", " miner.morpher,\n", @@ -264,9 +318,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "14:57 madminer.core INFO Saving setup (including morphing) to data/madminer_example.h5\n" + ] + } + ], "source": [ "miner.save('data/madminer_example.h5')" ] @@ -298,9 +360,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "14:57 madminer.utils.inter INFO Generating MadGraph process folder from cards/proc_card_signal.dat at ./mg_processes/signal\n", + "14:57 madminer.core INFO Run 0\n", + "14:57 madminer.core INFO Sampling from benchmark: sm\n", + "14:57 madminer.core INFO Original run card: cards/run_card_signal.dat\n", + "14:57 madminer.core INFO Original Pythia8 card: None\n", + "14:57 madminer.core INFO Copied run card: /madminer/cards/run_card_0.dat\n", + "14:57 madminer.core INFO Copied Pythia8 card: None\n", + "14:57 madminer.core INFO Param card: /madminer/cards/param_card_0.dat\n", + "14:57 madminer.core INFO Reweight card: /madminer/cards/reweight_card_0.dat\n", + "14:57 madminer.core INFO Log file: run_0.log\n", + "14:57 madminer.core INFO Creating param and reweight cards in ./mg_processes/signal//madminer/cards/param_card_0.dat, ./mg_processes/signal//madminer/cards/reweight_card_0.dat\n", + "14:57 madminer.utils.inter INFO Starting MadGraph and Pythia in ./mg_processes/signal\n", + "15:11 madminer.core INFO Finished running MadGraph! Please check that events were succesfully generated in the following folders:\n", + "\n", + "./mg_processes/signal/Events/run_01\n", + "\n", + "\n" + ] + } + ], "source": [ "miner.run(\n", " sample_benchmark='sm',\n", @@ -336,9 +422,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "\"\\nminer.run(\\n is_background=True,\\n sample_benchmark='sm',\\n mg_directory=mg_dir,\\n mg_process_directory='./mg_processes/background',\\n proc_card_file='cards/proc_card_background.dat',\\n param_card_template_file='cards/param_card_template.dat',\\n run_card_file='cards/run_card_background.dat',\\n log_directory='logs/background',\\n)\\n\"" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "\"\"\"\n", "miner.run(\n", @@ -379,11 +476,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "15:11 madminer.utils.inter DEBUG HDF5 file does not contain is_reference field.\n" + ] + } + ], "source": [ - "proc = LHEProcessor('data/madminer_example.h5')" + "lhe = LHEReader('data/madminer_example.h5')" ] }, { @@ -397,13 +502,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": { "scrolled": false }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "15:11 madminer.lhe DEBUG Adding event sample mg_processes/signal/Events/run_01/unweighted_events.lhe.gz\n" + ] + }, + { + "data": { + "text/plain": [ + "\"\\nlhe.add_sample(\\n lhe_filename='mg_processes/background/Events/run_01/unweighted_events.lhe.gz',\\n sampled_from_benchmark='sm',\\n is_background=True,\\n k_factor=1.0,\\n\"" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "proc.add_sample(\n", + "lhe.add_sample(\n", " lhe_filename='mg_processes/signal/Events/run_01/unweighted_events.lhe.gz',\n", " sampled_from_benchmark='sm',\n", " is_background=False,\n", @@ -411,7 +534,7 @@ ")\n", "\n", "\"\"\"\n", - "proc.add_sample(\n", + "lhe.add_sample(\n", " lhe_filename='mg_processes/background/Events/run_01/unweighted_events.lhe.gz',\n", " sampled_from_benchmark='sm',\n", " is_background=True,\n", @@ -428,11 +551,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ - "proc.set_smearing(\n", + "lhe.set_smearing(\n", " pdgids=[1,2,3,4,5,6,9,22,-1,-2,-3,-4,-5,-6], # Partons giving rise to jets\n", " energy_resolution_abs=0.,\n", " energy_resolution_rel=0.1,\n", @@ -460,22 +583,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "15:11 madminer.lhe DEBUG Adding optional observable pt_j1 = j[0].pt with default 0.0\n", + "15:11 madminer.lhe DEBUG Adding required observable delta_phi_jj = j[0].deltaphi(j[1]) * (-1. + 2.*float(j[0].eta > j[1].eta))\n", + "15:11 madminer.lhe DEBUG Adding required observable met = met.pt\n" + ] + } + ], "source": [ - "proc.add_observable(\n", + "lhe.add_observable(\n", " 'pt_j1',\n", " 'j[0].pt',\n", " required=False,\n", " default=0.,\n", ")\n", - "proc.add_observable(\n", + "lhe.add_observable(\n", " 'delta_phi_jj',\n", " 'j[0].deltaphi(j[1]) * (-1. + 2.*float(j[0].eta > j[1].eta))',\n", " required=True,\n", ")\n", - "proc.add_observable(\n", + "lhe.add_observable(\n", " 'met',\n", " 'met.pt',\n", " required=True,\n", @@ -491,13 +624,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "15:11 madminer.lhe DEBUG Adding cut (a[0] + a[1]).m > 124.\n", + "15:11 madminer.lhe DEBUG Adding cut (a[0] + a[1]).m < 126.\n", + "15:11 madminer.lhe DEBUG Adding cut pt_j1 > 30.\n" + ] + } + ], "source": [ - "proc.add_cut('(a[0] + a[1]).m > 124.')\n", - "proc.add_cut('(a[0] + a[1]).m < 126.')\n", - "proc.add_cut('pt_j1 > 30.')" + "lhe.add_cut('(a[0] + a[1]).m > 124.')\n", + "lhe.add_cut('(a[0] + a[1]).m < 126.')\n", + "lhe.add_cut('pt_j1 > 30.')" ] }, { @@ -509,13 +652,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "15:11 madminer.lhe INFO Analysing LHE sample mg_processes/signal/Events/run_01/unweighted_events.lhe.gz\n", + "15:11 madminer.lhe DEBUG Extracting nuisance parameter definitions from LHE file\n", + "15:11 madminer.utils.inter DEBUG Parsing nuisance parameter setup from LHE file at mg_processes/signal/Events/run_01/unweighted_events.lhe.gz\n", + "15:11 madminer.lhe DEBUG Found 0 nuisance parameters with matching benchmarks:\n", + "15:11 madminer.utils.inter DEBUG Parsing LHE file mg_processes/signal/Events/run_01/unweighted_events.lhe.gz\n", + "15:11 madminer.utils.inter DEBUG Parsing header and events as XML with cElementTree\n", + "15:11 madminer.utils.inter DEBUG Found entry event_norm = sum in LHE header. Interpreting this as weight_norm_is_average = False.\n", + "15:12 madminer.utils.inter DEBUG 51298 / 100000 events pass cut (a[0] + a[1]).m > 124.\n", + "15:12 madminer.utils.inter DEBUG 55361 / 100000 events pass cut (a[0] + a[1]).m < 126.\n", + "15:12 madminer.utils.inter DEBUG 98414 / 100000 events pass cut pt_j1 > 30.\n", + "15:12 madminer.utils.inter INFO 6557 events pass all cuts\n", + "15:12 madminer.lhe DEBUG Found weights ['sm', 'w', 'morphing_basis_vector_2', 'morphing_basis_vector_3', 'morphing_basis_vector_4', 'morphing_basis_vector_5'] in LHE file\n", + "15:12 madminer.lhe DEBUG Found 6557 events\n" + ] + } + ], "source": [ - "proc.analyse_samples()" + "lhe.analyse_samples()" ] }, { @@ -527,11 +690,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "15:12 madminer.lhe DEBUG Loading HDF5 data from data/madminer_example.h5 and saving file to data/madminer_example_with_data2.h5\n", + "15:12 madminer.lhe DEBUG Weight names: ['sm', 'w', 'morphing_basis_vector_2', 'morphing_basis_vector_3', 'morphing_basis_vector_4', 'morphing_basis_vector_5']\n", + "15:12 madminer.utils.inter DEBUG Benchmarks found in HDF5 file: ['sm', 'w', 'morphing_basis_vector_2', 'morphing_basis_vector_3', 'morphing_basis_vector_4', 'morphing_basis_vector_5']\n", + "15:12 madminer.utils.inter DEBUG Benchmark morphing_basis_vector_2 already in benchmark_names_phys\n", + "15:12 madminer.utils.inter DEBUG Benchmark morphing_basis_vector_3 already in benchmark_names_phys\n", + "15:12 madminer.utils.inter DEBUG Benchmark morphing_basis_vector_4 already in benchmark_names_phys\n", + "15:12 madminer.utils.inter DEBUG Benchmark morphing_basis_vector_5 already in benchmark_names_phys\n", + "15:12 madminer.utils.inter DEBUG Benchmark sm already in benchmark_names_phys\n", + "15:12 madminer.utils.inter DEBUG Benchmark w already in benchmark_names_phys\n", + "15:12 madminer.utils.inter DEBUG Combined benchmark names: ['sm', 'w', 'morphing_basis_vector_2', 'morphing_basis_vector_3', 'morphing_basis_vector_4', 'morphing_basis_vector_5']\n", + "15:12 madminer.utils.inter DEBUG Combined is_nuisance: [0 0 0 0 0 0]\n", + "15:12 madminer.utils.inter DEBUG Combined is_reference: [1 0 0 0 0 0]\n", + "15:12 madminer.utils.inter DEBUG Weight names found in event file: ['sm', 'w', 'morphing_basis_vector_2', 'morphing_basis_vector_3', 'morphing_basis_vector_4', 'morphing_basis_vector_5']\n", + "15:12 madminer.utils.inter DEBUG Benchmarks found in MadMiner file: ['sm', 'w', 'morphing_basis_vector_2', 'morphing_basis_vector_3', 'morphing_basis_vector_4', 'morphing_basis_vector_5']\n", + "15:12 madminer.utils.inter DEBUG Sorted benchmarks: ['sm', 'w', 'morphing_basis_vector_2', 'morphing_basis_vector_3', 'morphing_basis_vector_4', 'morphing_basis_vector_5']\n" + ] + } + ], "source": [ - "proc.save('data/madminer_example_with_data2.h5')" + "lhe.save('data/madminer_example_with_data2.h5')" ] }, { @@ -550,9 +735,55 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "15:21 madminer.analysis INFO Loading data from data/madminer_example_with_data2.h5\n", + "15:21 madminer.analysis INFO Found 2 parameters\n", + "15:21 madminer.analysis DEBUG CWL2 (LHA: dim6 2, maximal power in squared ME: (2,), range: (-10.0, 10.0))\n", + "15:21 madminer.analysis DEBUG CPWL2 (LHA: dim6 5, maximal power in squared ME: (2,), range: (-10.0, 1.0))\n", + "15:21 madminer.analysis INFO Did not find nuisance parameters\n", + "15:21 madminer.analysis INFO Found 6 benchmarks, of which 6 physical\n", + "15:21 madminer.analysis DEBUG sm: CWL2 = 0.00e+00, CPWL2 = 0.00e+00\n", + "15:21 madminer.analysis DEBUG w: CWL2 = 10.00, CPWL2 = 0.00e+00\n", + "15:21 madminer.analysis DEBUG morphing_basis_vector_2: CWL2 = -9.65e+00, CPWL2 = -4.70e-01\n", + "15:21 madminer.analysis DEBUG morphing_basis_vector_3: CWL2 = -4.64e+00, CPWL2 = -3.84e+00\n", + "15:21 madminer.analysis DEBUG morphing_basis_vector_4: CWL2 = 6.21, CPWL2 = -7.48e+00\n", + "15:21 madminer.analysis DEBUG morphing_basis_vector_5: CWL2 = -7.38e+00, CPWL2 = -9.26e+00\n", + "15:21 madminer.analysis INFO Found 3 observables\n", + "15:21 madminer.analysis DEBUG 0 pt_j1\n", + "15:21 madminer.analysis DEBUG 1 delta_phi_jj\n", + "15:21 madminer.analysis DEBUG 2 met\n", + "15:21 madminer.analysis INFO Found 6557 events\n", + "15:21 madminer.analysis INFO Found morphing setup with 6 components\n", + "15:21 madminer.plotting DEBUG Observable indices: [0, 1, 2]\n", + "15:21 madminer.plotting DEBUG Loaded raw data with shapes (6557, 3), (6557, 6)\n", + "15:21 madminer.plotting DEBUG Calculated 2 theta matrices\n", + "15:21 madminer.plotting DEBUG Plotting panel 0: observable 0, label pt_j1\n", + "15:21 madminer.plotting DEBUG Ranges for observable pt_j1: min = [30.00272214753616, 30.00272214753616], max = [268.4365868483533, 643.8710663910911]\n", + "15:21 madminer.plotting DEBUG Plotting panel 1: observable 1, label delta_phi_jj\n", + "15:21 madminer.plotting DEBUG Ranges for observable delta_phi_jj: min = [-3.13993286774336, -3.13993286774336], max = [3.138627963883186, 3.138627963883186]\n", + "15:21 madminer.plotting DEBUG Plotting panel 2: observable 2, label met\n", + "15:21 madminer.plotting DEBUG Ranges for observable met: min = [0.005806845822755466, 0.005806845822755466], max = [76.87778563467432, 178.33162311405053]\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], "source": [ "_ = plot_distributions(\n", " filename='data/madminer_example_with_data2.h5',\n", @@ -583,9 +814,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "15:21 madminer.sampling DEBUG Combining and shuffling samples\n", + "15:21 madminer.sampling INFO Copying setup from data/madminer_example_with_data.h5 to data/madminer_example_shuffled.h5\n", + "15:21 madminer.sampling INFO Loading samples from file 1 / 1 at data/madminer_example_with_data.h5, multiplying weights with k factor 1.0\n" + ] + } + ], "source": [ "combine_and_shuffle(\n", " ['data/madminer_example_with_data.h5'],\n", @@ -613,11 +854,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "15:32 madminer.analysis INFO Loading data from data/madminer_example_shuffled.h5\n", + "15:32 madminer.analysis INFO Found 2 parameters\n", + "15:32 madminer.analysis INFO Did not find nuisance parameters\n", + "15:32 madminer.analysis INFO Found 6 benchmarks, of which 6 physical\n", + "15:32 madminer.analysis INFO Found 2 observables\n", + "15:32 madminer.analysis INFO Found 6537 events\n", + "15:32 madminer.analysis INFO Found morphing setup with 6 components\n" + ] + } + ], "source": [ - "sa = SampleAugmenter('data/madminer_example_shuffled.h5')" + "sampler = SampleAugmenter('data/madminer_example_shuffled.h5')" ] }, { @@ -625,26 +880,40 @@ "metadata": {}, "source": [ "The `SampleAugmenter` class defines five different high-level functions to generate train or test samples:\n", - "- `extract_samples_train_plain()`, which only saves observations x, for instance for histograms or ABC;\n", - "- `extract_samples_train_local()` for methods like SALLY and SALLINO, which will be demonstrated in the second part of the tutorial;\n", - "- `extract_samples_train_ratio()` for techniques like CARL, ROLR, CASCAL, and RASCAL, when only theta0 is parameterized;\n", - "- `extract_samples_train_more_ratios()` for the same techniques, but with both theta0 and theta1 parameterized;\n", - "- `extract_samples_test()` for the evaluation of any method.\n", + "- `sample_train_plain()`, which only saves observations x, for instance for histograms or ABC;\n", + "- `sample_train_local()` for methods like SALLY and SALLINO, which will be demonstrated in the second part of the tutorial;\n", + "- `sample_train_density()` for neural density estimation techniques like MAF or SCANDAL;\n", + "- `sample_train_ratio()` for techniques like CARL, ROLR, CASCAL, and RASCAL, when only theta0 is parameterized;\n", + "- `sample_train_more_ratios()` for the same techniques, but with both theta0 and theta1 parameterized;\n", + "- `sample_test()` for the evaluation of any method.\n", "\n", - "For the arguments `theta`, `theta0`, or `theta1`, you can (and should!) use the helper functions `constant_benchmark_theta()`, `multiple_benchmark_thetas()`, `constant_morphing_theta()`, `multiple_morphing_thetas()`, and `random_morphing_thetas()`, all defined in the `madminer.sampling` module.\n", + "For the arguments `theta`, `theta0`, or `theta1`, you can (and should!) use the helper functions `benchmark()`, `benchmarks()`, `morphing_point()`, `morphing_points()`, and `random_morphing_points()`, all defined in the `madminer.sampling` module.\n", "\n", "Here we'll train a likelihood ratio estimator with the ALICES method, so we focus on the `extract_samples_train_ratio()` function." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "15:32 madminer.sampling INFO Extracting training sample for ratio-based methods. Numerator hypothesis: ('random_morphing_points', (100, [('gaussian', 0.0, 15.0), ('gaussian', 0.0, 15.0)])), denominator hypothesis: ('benchmark', 'sm')\n", + "/Users/johannbrehmer/work/projects/madminer/madminer/madminer/analysis.py:284: RuntimeWarning: invalid value encountered in sqrt\n", + " xsec_uncertainties = xsec_uncertainties ** 0.5\n", + "15:32 madminer.sampling WARNING Large statistical uncertainty on the total cross section when sampling from theta = [4.59589697 7.76994673]: (0.000720 +/- 0.000073) pb (10.124460729467307 %). Skipping these warnings in the future...\n", + "15:32 madminer.sampling INFO Effective number of samples: mean 16.457179431399243, with individual thetas ranging from 7.197251196742754 to 68.10093524160831\n", + "15:32 madminer.sampling INFO Effective number of samples: mean 5230.0, with individual thetas ranging from 5230.0 to 5230.0\n" + ] + } + ], "source": [ - "x, theta0, theta1, y, r_xz, t_xz = sa.extract_samples_train_ratio(\n", - " theta0=random_morphing_thetas(100, [('gaussian', 0., 10.), ('gaussian', 0., 10.)]),\n", - " theta1=constant_benchmark_theta('sm'),\n", + "x, theta0, theta1, y, r_xz, t_xz, _ = sampler.sample_train_ratio(\n", + " theta0=sampling.random_morphing_points(100, [('gaussian', 0., 15.), ('gaussian', 0., 15.)]),\n", + " theta1=sampling.benchmark('sm'),\n", " n_samples=100000,\n", " folder='./data/samples',\n", " filename='train'\n", @@ -660,15 +929,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "15:32 madminer.sampling INFO Extracting evaluation sample. Sampling according to ('benchmark', 'sm')\n", + "15:32 madminer.sampling INFO Effective number of samples: 1306.0000000000002\n" + ] + } + ], "source": [ - "_ = sa.extract_samples_test(\n", - " theta=constant_benchmark_theta('sm'),\n", - " n_samples=100000,\n", + "_ = sampler.sample_test(\n", + " theta=sampling.benchmark('sm'),\n", + " n_samples=1000,\n", " folder='./data/samples',\n", " filename='test'\n", ")" @@ -690,24 +968,46 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "15:32 madminer.sampling INFO Starting cross-section calculation\n", + "15:32 madminer.sampling INFO Starting cross-section calculation\n" + ] + } + ], "source": [ - "thetas_benchmarks, xsecs_benchmarks, xsec_errors_benchmarks = sa.extract_cross_sections(\n", - " theta=multiple_benchmark_thetas(['sm', 'w', 'morphing_basis_vector_2', 'morphing_basis_vector_3', 'morphing_basis_vector_4', 'morphing_basis_vector_5'])\n", + "thetas_benchmarks, xsecs_benchmarks, xsec_errors_benchmarks = sampler.cross_sections(\n", + " theta=sampling.benchmarks(['sm', 'w', 'morphing_basis_vector_2', 'morphing_basis_vector_3', 'morphing_basis_vector_4', 'morphing_basis_vector_5'])\n", ")\n", "\n", - "thetas_morphing, xsecs_morphing, xsec_errors_morphing = sa.extract_cross_sections(\n", - " theta=random_morphing_thetas(1000, [('gaussian', 0., 4.), ('gaussian', 0., 4.)])\n", + "thetas_morphing, xsecs_morphing, xsec_errors_morphing = sampler.cross_sections(\n", + " theta=sampling.random_morphing_points(1000, [('gaussian', 0., 4.), ('gaussian', 0., 4.)])\n", ")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], "source": [ "cmin, cmax = 0., 2.5 * np.mean(xsecs_morphing)\n", "\n", @@ -748,16 +1048,21 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "It's now time to build the neural network that estimates the likelihood ratio. The central object for this is the `madminer.ml.MLForge` class. It defines functions that train, save, load, and evaluate the estimators." + "It's now time to build the neural network that estimates the likelihood ratio. The central object for this is the `madminer.ml.ParameterizedRatioEstimator` class. It defines functions that train, save, load, and evaluate the estimators.\n", + "\n", + "In the initialization, the keywords `n_hidden` and `activation` define the architecture of the (fully connected) neural network:" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ - "forge = MLForge()" + "estimator = ParameterizedRatioEstimator(\n", + " n_hidden=(100,),\n", + " activation=\"tanh\"\n", + ")" ] }, { @@ -771,104 +1076,92 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We will use the ALICES technique described in [\"Likelihood-free inference with an improved cross-entropy estimator\"](https://arxiv.org/abs/1808.00973). Most other methods, including RASCAL, are described in [\"Constraining Effective Field Theories With Machine Learning\"](https://arxiv.org/abs/1805.00013) and [\"A Guide to Constraining Effective Field Theories With Machine Learning\"](https://arxiv.org/abs/1805.00020). There is also SCANDAL introduced in [\"Mining gold from implicit models to improve likelihood-free inference\"](https://arxiv.org/abs/1805.12244).\n", - "\n", - "Most of these methods exist both in a \"single parameterized\" version, in which only the dependence of the likelihood ratio on the numerator is modelled, and a \"doubly parameterized\" version, in which both the dependence on the numerator and denominator parameters is modelled. For the single parameterized version, use `method='rascal'`, `method='alice'`, and so on. For the double parameterized version, use `method='rascal2'`, `method='alice2'`, etc. Note that for the doubly parameterized estimators you have to provide `theta1_filename`, and in the case of RASCAL and ALICES also `t_xz1_filename`." + "To train this model we will minimize the ALICES loss function described in [\"Likelihood-free inference with an improved cross-entropy estimator\"](https://arxiv.org/abs/1808.00973). Many alternatives, including RASCAL, are described in [\"Constraining Effective Field Theories With Machine Learning\"](https://arxiv.org/abs/1805.00013) and [\"A Guide to Constraining Effective Field Theories With Machine Learning\"](https://arxiv.org/abs/1805.00020). There is also SCANDAL introduced in [\"Mining gold from implicit models to improve likelihood-free inference\"](https://arxiv.org/abs/1805.12244)." ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "18:02 madminer.ml INFO Starting training\n", - "18:02 madminer.ml INFO Method: alices\n", - "18:02 madminer.ml INFO Training data: x at data/samples/x_train.npy\n", - "18:02 madminer.ml INFO theta0 at data/samples/theta0_train.npy\n", - "18:02 madminer.ml INFO y at data/samples/y_train.npy\n", - "18:02 madminer.ml INFO r_xz at data/samples/r_xz_train.npy\n", - "18:02 madminer.ml INFO t_xz (theta0) at data/samples/t_xz_train.npy\n", - "18:02 madminer.ml INFO Features: all\n", - "18:02 madminer.ml INFO Method: alices\n", - "18:02 madminer.ml INFO Hidden layers: (20, 20)\n", - "18:02 madminer.ml INFO Activation function: tanh\n", - "18:02 madminer.ml INFO alpha: 10.0\n", - "18:02 madminer.ml INFO Batch size: 200\n", - "18:02 madminer.ml INFO Optimizer: amsgrad\n", - "18:02 madminer.ml INFO Epochs: 20\n", - "18:02 madminer.ml INFO Learning rate: 0.001 initially, decaying to 0.0001\n", - "18:02 madminer.ml INFO Validation split: 0.25\n", - "18:02 madminer.ml INFO Early stopping: True\n", - "18:02 madminer.ml INFO Scale inputs: True\n", - "18:02 madminer.ml INFO Shuffle labels False\n", - "18:02 madminer.ml INFO Samples: all\n", - "18:02 madminer.ml INFO Loading training data\n", - "18:02 madminer.ml INFO Found 100000 samples with 2 parameters and 2 observables\n", - "18:02 madminer.ml INFO Rescaling inputs\n", - "18:02 madminer.ml INFO Creating model for method alices\n", - "18:02 madminer.ml INFO Training model\n", - "18:03 madminer.utils.ml.tr INFO Epoch 1: train loss 0.65668 (improved_xe: 0.558, mse_score: 0.010)\n", - "18:03 madminer.utils.ml.tr INFO val. loss 0.60964 (improved_xe: 0.512, mse_score: 0.010)\n", - "18:03 madminer.utils.ml.tr INFO Epoch 2: train loss 0.58610 (improved_xe: 0.505, mse_score: 0.008)\n", - "18:03 madminer.utils.ml.tr INFO val. loss 0.58834 (improved_xe: 0.501, mse_score: 0.009)\n", - "18:03 madminer.utils.ml.tr INFO Epoch 3: train loss 0.56904 (improved_xe: 0.494, mse_score: 0.008)\n", - "18:03 madminer.utils.ml.tr INFO val. loss 0.57285 (improved_xe: 0.490, mse_score: 0.008)\n", - "18:04 madminer.utils.ml.tr INFO Epoch 4: train loss 0.55583 (improved_xe: 0.484, mse_score: 0.007)\n", - "18:04 madminer.utils.ml.tr INFO val. loss 0.56233 (improved_xe: 0.482, mse_score: 0.008)\n", - "18:04 madminer.utils.ml.tr INFO Epoch 5: train loss 0.54887 (improved_xe: 0.479, mse_score: 0.007)\n", - "18:04 madminer.utils.ml.tr INFO val. loss 0.55753 (improved_xe: 0.480, mse_score: 0.008)\n", - "18:04 madminer.utils.ml.tr INFO Epoch 6: train loss 0.54480 (improved_xe: 0.477, mse_score: 0.007)\n", - "18:04 madminer.utils.ml.tr INFO val. loss 0.55427 (improved_xe: 0.478, mse_score: 0.008)\n", - "18:04 madminer.utils.ml.tr INFO Epoch 7: train loss 0.54193 (improved_xe: 0.476, mse_score: 0.007)\n", - "18:04 madminer.utils.ml.tr INFO val. loss 0.55197 (improved_xe: 0.477, mse_score: 0.007)\n", - "18:05 madminer.utils.ml.tr INFO Epoch 8: train loss 0.53994 (improved_xe: 0.475, mse_score: 0.006)\n", - "18:05 madminer.utils.ml.tr INFO val. loss 0.55032 (improved_xe: 0.476, mse_score: 0.007)\n", - "18:05 madminer.utils.ml.tr INFO Epoch 9: train loss 0.53810 (improved_xe: 0.474, mse_score: 0.006)\n", - "18:05 madminer.utils.ml.tr INFO val. loss 0.54881 (improved_xe: 0.476, mse_score: 0.007)\n", - "18:05 madminer.utils.ml.tr INFO Epoch 10: train loss 0.53668 (improved_xe: 0.474, mse_score: 0.006)\n", - "18:05 madminer.utils.ml.tr INFO val. loss 0.54846 (improved_xe: 0.476, mse_score: 0.007)\n", - "18:05 madminer.utils.ml.tr INFO Epoch 11: train loss 0.53568 (improved_xe: 0.473, mse_score: 0.006)\n", - "18:05 madminer.utils.ml.tr INFO val. loss 0.54705 (improved_xe: 0.475, mse_score: 0.007)\n", - "18:06 madminer.utils.ml.tr INFO Epoch 12: train loss 0.53473 (improved_xe: 0.473, mse_score: 0.006)\n", - "18:06 madminer.utils.ml.tr INFO val. loss 0.54593 (improved_xe: 0.474, mse_score: 0.007)\n", - "18:06 madminer.utils.ml.tr INFO Epoch 13: train loss 0.53400 (improved_xe: 0.473, mse_score: 0.006)\n", - "18:06 madminer.utils.ml.tr INFO val. loss 0.54547 (improved_xe: 0.474, mse_score: 0.007)\n", - "18:06 madminer.utils.ml.tr INFO Epoch 14: train loss 0.53337 (improved_xe: 0.472, mse_score: 0.006)\n", - "18:06 madminer.utils.ml.tr INFO val. loss 0.54500 (improved_xe: 0.474, mse_score: 0.007)\n", - "18:07 madminer.utils.ml.tr INFO Epoch 15: train loss 0.53283 (improved_xe: 0.472, mse_score: 0.006)\n", - "18:07 madminer.utils.ml.tr INFO val. loss 0.54444 (improved_xe: 0.473, mse_score: 0.007)\n", - "18:07 madminer.utils.ml.tr INFO Epoch 16: train loss 0.53243 (improved_xe: 0.472, mse_score: 0.006)\n", - "18:07 madminer.utils.ml.tr INFO val. loss 0.54418 (improved_xe: 0.473, mse_score: 0.007)\n", - "18:07 madminer.utils.ml.tr INFO Epoch 17: train loss 0.53203 (improved_xe: 0.472, mse_score: 0.006)\n", - "18:07 madminer.utils.ml.tr INFO val. loss 0.54378 (improved_xe: 0.473, mse_score: 0.007)\n", - "18:07 madminer.utils.ml.tr INFO Epoch 18: train loss 0.53167 (improved_xe: 0.472, mse_score: 0.006)\n", - "18:07 madminer.utils.ml.tr INFO val. loss 0.54368 (improved_xe: 0.473, mse_score: 0.007)\n", - "18:08 madminer.utils.ml.tr INFO Epoch 19: train loss 0.53139 (improved_xe: 0.471, mse_score: 0.006)\n", - "18:08 madminer.utils.ml.tr INFO val. loss 0.54336 (improved_xe: 0.473, mse_score: 0.007)\n", - "18:08 madminer.utils.ml.tr INFO Epoch 20: train loss 0.53117 (improved_xe: 0.471, mse_score: 0.006)\n", - "18:08 madminer.utils.ml.tr INFO val. loss 0.54308 (improved_xe: 0.473, mse_score: 0.007)\n", - "18:08 madminer.utils.ml.tr INFO Early stopping did not improve performance\n" + "15:32 madminer.ml INFO Starting training\n", + "15:32 madminer.ml INFO Method: alices\n", + "15:32 madminer.ml INFO alpha: 1.0\n", + "15:32 madminer.ml INFO Batch size: 200\n", + "15:32 madminer.ml INFO Optimizer: amsgrad\n", + "15:32 madminer.ml INFO Epochs: 20\n", + "15:32 madminer.ml INFO Learning rate: 0.001 initially, decaying to 0.0001\n", + "15:32 madminer.ml INFO Validation split: 0.25\n", + "15:32 madminer.ml INFO Early stopping: True\n", + "15:32 madminer.ml INFO Scale inputs: True\n", + "15:32 madminer.ml INFO Shuffle labels False\n", + "15:32 madminer.ml INFO Samples: all\n", + "15:32 madminer.ml INFO Loading training data\n", + "15:32 madminer.ml INFO Found 100000 samples with 2 parameters and 2 observables\n", + "15:32 madminer.ml INFO Rescaling inputs\n", + "15:32 madminer.ml INFO Creating model\n", + "15:32 madminer.ml INFO Training model\n", + "15:32 madminer.utils.ml.tr INFO Epoch 1: train loss 0.54737 (improved_xe: 0.541, mse_score: 0.007)\n", + "15:32 madminer.utils.ml.tr INFO val. loss 0.47317 (improved_xe: 0.466, mse_score: 0.007)\n", + "15:33 madminer.utils.ml.tr INFO Epoch 2: train loss 0.46643 (improved_xe: 0.460, mse_score: 0.006)\n", + "15:33 madminer.utils.ml.tr INFO val. loss 0.46405 (improved_xe: 0.458, mse_score: 0.006)\n", + "15:33 madminer.utils.ml.tr INFO Epoch 3: train loss 0.45810 (improved_xe: 0.452, mse_score: 0.006)\n", + "15:33 madminer.utils.ml.tr INFO val. loss 0.45854 (improved_xe: 0.453, mse_score: 0.006)\n", + "15:33 madminer.utils.ml.tr INFO Epoch 4: train loss 0.45234 (improved_xe: 0.447, mse_score: 0.005)\n", + "15:33 madminer.utils.ml.tr INFO val. loss 0.45217 (improved_xe: 0.447, mse_score: 0.005)\n", + "15:34 madminer.utils.ml.tr INFO Epoch 5: train loss 0.44889 (improved_xe: 0.444, mse_score: 0.005)\n", + "15:34 madminer.utils.ml.tr INFO val. loss 0.44919 (improved_xe: 0.444, mse_score: 0.005)\n", + "15:34 madminer.utils.ml.tr INFO Epoch 6: train loss 0.44669 (improved_xe: 0.442, mse_score: 0.005)\n", + "15:34 madminer.utils.ml.tr INFO val. loss 0.44713 (improved_xe: 0.442, mse_score: 0.005)\n", + "15:34 madminer.utils.ml.tr INFO Epoch 7: train loss 0.44523 (improved_xe: 0.440, mse_score: 0.005)\n", + "15:34 madminer.utils.ml.tr INFO val. loss 0.44619 (improved_xe: 0.441, mse_score: 0.005)\n", + "15:35 madminer.utils.ml.tr INFO Epoch 8: train loss 0.44404 (improved_xe: 0.439, mse_score: 0.005)\n", + "15:35 madminer.utils.ml.tr INFO val. loss 0.44569 (improved_xe: 0.440, mse_score: 0.005)\n", + "15:35 madminer.utils.ml.tr INFO Epoch 9: train loss 0.44324 (improved_xe: 0.438, mse_score: 0.005)\n", + "15:35 madminer.utils.ml.tr INFO val. loss 0.44442 (improved_xe: 0.439, mse_score: 0.005)\n", + "15:35 madminer.utils.ml.tr INFO Epoch 10: train loss 0.44241 (improved_xe: 0.437, mse_score: 0.005)\n", + "15:35 madminer.utils.ml.tr INFO val. loss 0.44384 (improved_xe: 0.439, mse_score: 0.005)\n", + "15:36 madminer.utils.ml.tr INFO Epoch 11: train loss 0.44188 (improved_xe: 0.437, mse_score: 0.005)\n", + "15:36 madminer.utils.ml.tr INFO val. loss 0.44329 (improved_xe: 0.438, mse_score: 0.005)\n", + "15:36 madminer.utils.ml.tr INFO Epoch 12: train loss 0.44136 (improved_xe: 0.436, mse_score: 0.005)\n", + "15:36 madminer.utils.ml.tr INFO val. loss 0.44285 (improved_xe: 0.438, mse_score: 0.005)\n", + "15:36 madminer.utils.ml.tr INFO Epoch 13: train loss 0.44094 (improved_xe: 0.436, mse_score: 0.005)\n", + "15:36 madminer.utils.ml.tr INFO val. loss 0.44295 (improved_xe: 0.438, mse_score: 0.005)\n", + "15:37 madminer.utils.ml.tr INFO Epoch 14: train loss 0.44062 (improved_xe: 0.436, mse_score: 0.005)\n", + "15:37 madminer.utils.ml.tr INFO val. loss 0.44229 (improved_xe: 0.437, mse_score: 0.005)\n", + "15:37 madminer.utils.ml.tr INFO Epoch 15: train loss 0.44041 (improved_xe: 0.435, mse_score: 0.005)\n", + "15:37 madminer.utils.ml.tr INFO val. loss 0.44202 (improved_xe: 0.437, mse_score: 0.005)\n", + "15:38 madminer.utils.ml.tr INFO Epoch 16: train loss 0.44015 (improved_xe: 0.435, mse_score: 0.005)\n", + "15:38 madminer.utils.ml.tr INFO val. loss 0.44186 (improved_xe: 0.437, mse_score: 0.005)\n", + "15:38 madminer.utils.ml.tr INFO Epoch 17: train loss 0.43990 (improved_xe: 0.435, mse_score: 0.005)\n", + "15:38 madminer.utils.ml.tr INFO val. loss 0.44159 (improved_xe: 0.436, mse_score: 0.005)\n", + "15:38 madminer.utils.ml.tr INFO Epoch 18: train loss 0.43973 (improved_xe: 0.435, mse_score: 0.005)\n", + "15:38 madminer.utils.ml.tr INFO val. loss 0.44156 (improved_xe: 0.436, mse_score: 0.005)\n", + "15:39 madminer.utils.ml.tr INFO Epoch 19: train loss 0.43957 (improved_xe: 0.435, mse_score: 0.005)\n", + "15:39 madminer.utils.ml.tr INFO val. loss 0.44132 (improved_xe: 0.436, mse_score: 0.005)\n", + "15:39 madminer.utils.ml.tr INFO Epoch 20: train loss 0.43942 (improved_xe: 0.434, mse_score: 0.005)\n", + "15:39 madminer.utils.ml.tr INFO val. loss 0.44123 (improved_xe: 0.436, mse_score: 0.005)\n", + "15:39 madminer.utils.ml.tr INFO Early stopping did not improve performance\n" ] } ], "source": [ - "forge.train(\n", + "estimator.train(\n", " method='alices',\n", - " theta0_filename='data/samples/theta0_train.npy',\n", - " x_filename='data/samples/x_train.npy',\n", - " y_filename='data/samples/y_train.npy',\n", - " r_xz_filename='data/samples/r_xz_train.npy',\n", - " t_xz0_filename='data/samples/t_xz_train.npy',\n", - " n_hidden=(20,20),\n", - " alpha=10.,\n", + " theta='data/samples/theta0_train.npy',\n", + " x='data/samples/x_train.npy',\n", + " y='data/samples/y_train.npy',\n", + " r_xz='data/samples/r_xz_train.npy',\n", + " t_xz='data/samples/t_xz_train.npy',\n", + " alpha=1.,\n", " n_epochs=20,\n", ")\n", "\n", - "forge.save('models/alices')" + "estimator.save('models/alices')" ] }, { @@ -882,12 +1175,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "`forge.evaluate(theta,x)` estimated the log likelihood ratio and the score for all combination between the given phase-space points `x` and parameters `theta`. That is, if given 100 events `x` and a grid of 25 `theta` points, it will return 25\\*100 estimates for the log likelihood and 25\\*100 estimates for the score, both indexed by `[i_theta,i_x]`." + "`estimator.evaluate_log_likelihood_ratio(theta,x)` estimated the log likelihood ratio and the score for all combination between the given phase-space points `x` and parameters `theta`. That is, if given 100 events `x` and a grid of 25 `theta` points, it will return 25\\*100 estimates for the log likelihood ratio and 25\\*100 estimates for the score, both indexed by `[i_theta,i_x]`." ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -902,14 +1195,14 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ - "forge.load('models/alices')\n", + "estimator.load('models/alices')\n", "\n", - "log_r_hat, _, _ = forge.evaluate(\n", - " theta0_filename='data/samples/theta_grid.npy',\n", + "log_r_hat, _ = estimator.evaluate_log_likelihood_ratio(\n", + " theta='data/samples/theta_grid.npy',\n", " x='data/samples/x_test.npy',\n", " evaluate_score=False\n", ")" @@ -924,12 +1217,12 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 14, "metadata": {}, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -971,8 +1264,296 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Note that in this tutorial our sample size was very small, and the network does not really have a chance to converge to the correct likelihood ratio function. So don't worry if you find a minimum that is not at the right point (the SM, i.e. the origin in this plot). Feel free to dial up the event numbers in the run card as well as the training samples and see what happens then!\n", + "Note that in this tutorial our sample size was very small, and the network might not really have a chance to converge to the correct likelihood ratio function. So don't worry if you find a minimum that is not at the right point (the SM, i.e. the origin in this plot). Feel free to dial up the event numbers in the run card as well as the training samples and see what happens then!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 8. Limits" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the end, what we care about are not plots of the log likelihood ratio, but limits on parameters. But at least under some asymptotic assumptions, these are directly related. MadMiner makes it easy to calculate p-values in the asymptotic limit with the `AsymptoticLimits` class in the `madminer.limits`: " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "15:39 madminer.analysis INFO Loading data from data/madminer_example_shuffled.h5\n", + "15:39 madminer.analysis INFO Found 2 parameters\n", + "15:39 madminer.analysis INFO Did not find nuisance parameters\n", + "15:39 madminer.analysis INFO Found 6 benchmarks, of which 6 physical\n", + "15:39 madminer.analysis INFO Found 2 observables\n", + "15:39 madminer.analysis INFO Found 6537 events\n", + "15:39 madminer.analysis INFO Found morphing setup with 6 components\n" + ] + } + ], + "source": [ + "limits = AsymptoticLimits('data/madminer_example_shuffled.h5')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This class provids two high-level functions:\n", + "- `AsymptoticLimits.observed_limits()` lets us calculate p-values on a parameter grid for some observed events, and\n", + "- `AsymptoticLimits.expected_limits()` lets us calculate expected p-values on a parameter grid based on all data in the MadMiner file.\n", + "\n", + "Note that these limits include both rate and kinematic information (the neural network is used for the kinematic part).\n", + "\n", + "Let's try both:" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "theta_min, theta_max = -20., 20.\n", + "resolution = 25" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Expected limits" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, with `mode=\"rate\"`, we can calculate expected limits based only on rate information:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "expected_limits() got an unexpected keyword argument 'resolution'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"rate\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mresolution\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mresolution\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mluminosity\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m300000.0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m )\n", + "\u001b[0;31mTypeError\u001b[0m: expected_limits() got an unexpected keyword argument 'resolution'" + ] + } + ], + "source": [ + "_, p_values_expected_xsec, best_fit_expected_xsec = limits.expected_limits(\n", + " theta_true=[0.,0.],\n", + " theta_ranges=[(theta_min, theta_max), (theta_min, theta_max)],\n", + " mode=\"rate\",\n", + " resolution=resolution,\n", + " luminosity=300000.0\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`mode=\"histo\"` calculates limits based on histograms. For now, there is not a lot of freedom in this step, the histogram binning is determined automatically." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "_, p_values_expected_histo, best_fit_expected_histo = limits.expected_limits(\n", + " theta_true=[0.,0.],\n", + " theta_ranges=[(theta_min, theta_max), (theta_min, theta_max)],\n", + " mode=\"histo\",\n", + " hist_vars=[\"pt_j1\"],\n", + " include_xsec=False,\n", + " resolution=resolution,\n", + " luminosity=300000.0\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally and perhaps most importantly, `mode=\"ml\"` allows us to calculate limits based on any `ParamterizedRatioEstimator` instance like the ALICES estimator trained above:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "theta_grid, p_values_expected_ml, best_fit_expected_ml = limits.expected_limits(\n", + " theta_true=[0.,0.],\n", + " theta_ranges=[(theta_min, theta_max), (theta_min, theta_max)],\n", + " mode=\"ml\",\n", + " model_file='models/alices',\n", + " include_xsec=False,\n", + " resolution=resolution,\n", + " luminosity=300000.0\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Observed limits" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Observed limits take as input actual data, which we here generate on the fly:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sampler = SampleAugmenter('data/madminer_example_shuffled.h5')\n", + "x_observed, _ = sampler.extract_samples_test(\n", + " theta=sampling.morphing_point([0.,0.]),\n", + " n_samples=5,\n", + " folder=None,\n", + " filename=None\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "_, p_values_observed, best_fit_observed = limits.observed_limits(\n", + " x_observed=x_observed,\n", + " theta_ranges=[(theta_min, theta_max), (theta_min, theta_max)],\n", + " mode=\"ml\",\n", + " model_file='models/alices',\n", + " include_xsec=True,\n", + " resolution=resolution,\n", + " luminosity=300000.0,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Plot" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's plot the results:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bin_size = (theta_max - theta_min)/(resolution - 1)\n", + "edges = np.linspace(theta_min - bin_size/2, theta_max + bin_size/2, resolution + 1)\n", + "centers = np.linspace(theta_min, theta_max, resolution)\n", + "\n", + "fig = plt.figure(figsize=(6,5))\n", + "ax = plt.gca()\n", "\n", + "cmin, cmax = 1.e-3, 1.\n", + " \n", + "pcm = ax.pcolormesh(\n", + " edges, edges, p_values_expected_ml.reshape((resolution, resolution)),\n", + " norm=matplotlib.colors.LogNorm(vmin=cmin, vmax=cmax),\n", + " cmap='Greys_r'\n", + ")\n", + "cbar = fig.colorbar(pcm, ax=ax, extend='both')\n", + "\n", + "plt.contour(\n", + " centers, centers, p_values_expected_xsec.reshape((resolution, resolution)),\n", + " levels=[0.05],\n", + " linestyles='-', colors='darkgreen'\n", + ")\n", + "plt.contour(\n", + " centers, centers, p_values_expected_ml.reshape((resolution, resolution)),\n", + " levels=[0.05],\n", + " linestyles='-', colors='#CC002E'\n", + ")\n", + "plt.contour(\n", + " centers, centers, p_values_expected_histo.reshape((resolution, resolution)),\n", + " levels=[0.05],\n", + " linestyles='-', colors='C1'\n", + ")\n", + "plt.contour(\n", + " centers, centers, p_values_observed.reshape((resolution, resolution)),\n", + " levels=[0.05],\n", + " linestyles='--', colors='black'\n", + ")\n", + "\n", + "plt.scatter(\n", + " theta_grid[best_fit_expected_xsec][0], theta_grid[best_fit_expected_xsec][1],\n", + " s=80., color='darkgreen', marker='*',\n", + " label=\"xsec\"\n", + ")\n", + "plt.scatter(\n", + " theta_grid[best_fit_expected_ml][0], theta_grid[best_fit_expected_ml][1],\n", + " s=80., color='#CC002E', marker='*',\n", + " label=\"ALICES\"\n", + ")\n", + "plt.scatter(\n", + " theta_grid[best_fit_expected_histo][0], theta_grid[best_fit_expected_histo][1],\n", + " s=80., color='C1', marker='*',\n", + " label=\"Histo\"\n", + ")\n", + "plt.scatter(\n", + " theta_grid[best_fit_observed][0], theta_grid[best_fit_observed][1],\n", + " s=80., color='black', marker='*',\n", + " label=\"Observed\"\n", + ")\n", + "\n", + "plt.legend()\n", + "\n", + "plt.xlabel(r'$\\theta_0$')\n", + "plt.ylabel(r'$\\theta_1$')\n", + "cbar.set_label('Expected p-value (ALICES)')\n", + "\n", + "plt.tight_layout()\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "That's it for now. Please have a look at the documentation for a detailed description of all classes and functions. And if you're curious about SALLY, Fisher information matrices, and ensemble methods, please look at the second part of the tutorial!" ] }, diff --git a/examples/tutorial_parton_level/2_score_information_ensemble.ipynb b/examples/tutorial_parton_level/2_score_information_ensemble.ipynb index c2b9e544f..5fb3e7ff0 100755 --- a/examples/tutorial_parton_level/2_score_information_ensemble.ipynb +++ b/examples/tutorial_parton_level/2_score_information_ensemble.ipynb @@ -47,8 +47,9 @@ "from matplotlib import pyplot as plt\n", "%matplotlib inline\n", "\n", - "from madminer.sampling import SampleAugmenter, constant_benchmark_theta\n", - "from madminer.ml import MLForge, EnsembleForge\n", + "from madminer import sampling\n", + "from madminer.sampling import SampleAugmenter\n", + "from madminer.ml import ScoreEstimator, Ensemble\n", "from madminer.fisherinformation import FisherInformation\n", "from madminer.plotting import plot_fisher_information_contours_2d\n" ] @@ -116,25 +117,25 @@ "name": "stderr", "output_type": "stream", "text": [ - "11:50 madminer.sampling INFO Loading data from data/madminer_example_shuffled.h5\n", - "11:50 madminer.sampling INFO Found 2 parameters\n", - "11:50 madminer.sampling INFO Did not find nuisance parameters\n", - "11:50 madminer.sampling INFO Found 6 benchmarks, of which 6 physical\n", - "11:50 madminer.sampling INFO Found 2 observables\n", - "11:50 madminer.sampling INFO Found 6537 events\n", - "11:50 madminer.sampling INFO Found morphing setup with 6 components\n" + "15:36 madminer.analysis INFO Loading data from data/madminer_example_shuffled.h5\n", + "15:36 madminer.analysis INFO Found 2 parameters\n", + "15:36 madminer.analysis INFO Did not find nuisance parameters\n", + "15:36 madminer.analysis INFO Found 6 benchmarks, of which 6 physical\n", + "15:36 madminer.analysis INFO Found 2 observables\n", + "15:36 madminer.analysis INFO Found 6537 events\n", + "15:36 madminer.analysis INFO Found morphing setup with 6 components\n" ] } ], "source": [ - "sa = SampleAugmenter('data/madminer_example_shuffled.h5')" + "sampler = SampleAugmenter('data/madminer_example_shuffled.h5')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The relevant `SampleAugmenter` function for local score estimators is `extract_samples_train_local()`. As before, for the argument `theta` you can use the helper functions `constant_benchmark_theta()`, `multiple_benchmark_thetas()`, `constant_morphing_theta()`, `multiple_morphing_thetas()`, and `random_morphing_thetas()`." + "The relevant `SampleAugmenter` function for local score estimators is `extract_samples_train_local()`. As before, for the argument `theta` you can use the helper functions `sampling.benchmark()`, `sampling.benchmarks()`, `sampling.morphing_point()`, `sampling.morphing_points()`, and `sampling.random_morphing_points()`." ] }, { @@ -146,14 +147,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "11:50 madminer.sampling INFO Extracting training sample for local score regression. Sampling and score evaluation according to (u'benchmark', u'sm')\n", - "11:50 madminer.sampling INFO Effective number of samples: 3269.0000000001205\n" + "15:36 madminer.sampling INFO Extracting training sample for local score regression. Sampling and score evaluation according to (u'benchmark', u'sm')\n", + "15:36 madminer.sampling INFO Effective number of samples: 5230.0\n" ] } ], "source": [ - "x, theta, t_xz = sa.extract_samples_train_local(\n", - " theta=constant_benchmark_theta('sm'),\n", + "x, theta, t_xz, _ = sampler.sample_train_local(\n", + " theta=sampling.benchmark('sm'),\n", " n_samples=100000,\n", " folder='./data/samples',\n", " filename='train'\n", @@ -173,7 +174,7 @@ "source": [ "It's now time to build a neural network. Only this time, instead of the likelihood ratio itself, we will estimate the gradient of the log likelihood with respect to the theory parameters -- the score. To be precise, the output of the neural network is an estimate of the score at some reference parameter point, for instance the Standard Model. A neural network that estimates this \"local\" score can be used to calculate the Fisher information at that point. The estimated score can also be used as a machine learning version of Optimal Observables, and likelihoods can be estimated based on density estimation in the estimated score space. This method for likelihood ratio estimation is called SALLY, and there is a closely related version called SALLINO. Both are explained in [\"Constraining Effective Field Theories With Machine Learning\"](https://arxiv.org/abs/1805.00013) and [\"A Guide to Constraining Effective Field Theories With Machine Learning\"](https://arxiv.org/abs/1805.00020).\n", "\n", - "Again, the central object for this is the `madminer.ml.MLForge` class:" + "The central object for this is the `madminer.ml.ScoreEstimator` class:" ] }, { @@ -182,7 +183,7 @@ "metadata": {}, "outputs": [], "source": [ - "forge = MLForge()" + "estimator = ScoreEstimator(n_hidden=(20,))" ] }, { @@ -201,147 +202,65 @@ "name": "stderr", "output_type": "stream", "text": [ - "11:50 madminer.ml INFO Starting training\n", - "11:50 madminer.ml INFO Method: sally\n", - "11:50 madminer.ml INFO Training data: x at data/samples/x_train.npy\n", - "11:50 madminer.ml INFO t_xz (theta0) at data/samples/t_xz_train.npy\n", - "11:50 madminer.ml INFO Features: all\n", - "11:50 madminer.ml INFO Method: sally\n", - "11:50 madminer.ml INFO Hidden layers: (100, 100, 100)\n", - "11:50 madminer.ml INFO Activation function: tanh\n", - "11:50 madminer.ml INFO Batch size: 200\n", - "11:50 madminer.ml INFO Optimizer: amsgrad\n", - "11:50 madminer.ml INFO Epochs: 50\n", - "11:50 madminer.ml INFO Learning rate: 0.001 initially, decaying to 0.0001\n", - "11:50 madminer.ml INFO Validation split: 0.25\n", - "11:50 madminer.ml INFO Early stopping: True\n", - "11:50 madminer.ml INFO Scale inputs: True\n", - "11:50 madminer.ml INFO Shuffle labels False\n", - "11:50 madminer.ml INFO Samples: all\n", - "11:50 madminer.ml INFO Loading training data\n", - "11:50 madminer.ml INFO Found 100000 samples with 2 parameters and 2 observables\n", - "11:50 madminer.ml INFO Rescaling inputs\n", - "11:50 madminer.ml INFO Creating model for method sally\n", - "11:50 madminer.ml INFO Training model\n", - "11:50 madminer.utils.ml.tr INFO Epoch 1: train loss 0.12853 (mse_score: 0.129)\n", - "11:50 madminer.utils.ml.tr INFO val. loss 0.11797 (mse_score: 0.118)\n", - "11:50 madminer.utils.ml.tr INFO Epoch 2: train loss 0.10225 (mse_score: 0.102)\n", - "11:50 madminer.utils.ml.tr INFO val. loss 0.10731 (mse_score: 0.107)\n", - "11:50 madminer.utils.ml.tr INFO Epoch 3: train loss 0.09853 (mse_score: 0.099)\n", - "11:50 madminer.utils.ml.tr INFO val. loss 0.10433 (mse_score: 0.104)\n", - "11:50 madminer.utils.ml.tr INFO Epoch 4: train loss 0.09622 (mse_score: 0.096)\n", - "11:50 madminer.utils.ml.tr INFO val. loss 0.10161 (mse_score: 0.102)\n", - "11:51 madminer.utils.ml.tr INFO Epoch 5: train loss 0.09503 (mse_score: 0.095)\n", - "11:51 madminer.utils.ml.tr INFO val. loss 0.10109 (mse_score: 0.101)\n", - "11:51 madminer.utils.ml.tr INFO Epoch 6: train loss 0.09354 (mse_score: 0.094)\n", - "11:51 madminer.utils.ml.tr INFO val. loss 0.09757 (mse_score: 0.098)\n", - "11:51 madminer.utils.ml.tr INFO Epoch 7: train loss 0.09216 (mse_score: 0.092)\n", - "11:51 madminer.utils.ml.tr INFO val. loss 0.09496 (mse_score: 0.095)\n", - "11:51 madminer.utils.ml.tr INFO Epoch 8: train loss 0.09107 (mse_score: 0.091)\n", - "11:51 madminer.utils.ml.tr INFO val. loss 0.09510 (mse_score: 0.095)\n", - "11:51 madminer.utils.ml.tr INFO Epoch 9: train loss 0.09009 (mse_score: 0.090)\n", - "11:51 madminer.utils.ml.tr INFO val. loss 0.09363 (mse_score: 0.094)\n", - "11:51 madminer.utils.ml.tr INFO Epoch 10: train loss 0.08897 (mse_score: 0.089)\n", - "11:51 madminer.utils.ml.tr INFO val. loss 0.08927 (mse_score: 0.089)\n", - "11:51 madminer.utils.ml.tr INFO Epoch 11: train loss 0.08778 (mse_score: 0.088)\n", - "11:51 madminer.utils.ml.tr INFO val. loss 0.08908 (mse_score: 0.089)\n", - "11:51 madminer.utils.ml.tr INFO Epoch 12: train loss 0.08711 (mse_score: 0.087)\n", - "11:51 madminer.utils.ml.tr INFO val. loss 0.08946 (mse_score: 0.089)\n", - "11:51 madminer.utils.ml.tr INFO Epoch 13: train loss 0.08582 (mse_score: 0.086)\n", - "11:51 madminer.utils.ml.tr INFO val. loss 0.08651 (mse_score: 0.087)\n", - "11:51 madminer.utils.ml.tr INFO Epoch 14: train loss 0.08540 (mse_score: 0.085)\n", - "11:51 madminer.utils.ml.tr INFO val. loss 0.08647 (mse_score: 0.086)\n", - "11:51 madminer.utils.ml.tr INFO Epoch 15: train loss 0.08405 (mse_score: 0.084)\n", - "11:51 madminer.utils.ml.tr INFO val. loss 0.08439 (mse_score: 0.084)\n", - "11:51 madminer.utils.ml.tr INFO Epoch 16: train loss 0.08389 (mse_score: 0.084)\n", - "11:51 madminer.utils.ml.tr INFO val. loss 0.08511 (mse_score: 0.085)\n", - "11:51 madminer.utils.ml.tr INFO Epoch 17: train loss 0.08320 (mse_score: 0.083)\n", - "11:51 madminer.utils.ml.tr INFO val. loss 0.08306 (mse_score: 0.083)\n", - "11:52 madminer.utils.ml.tr INFO Epoch 18: train loss 0.08241 (mse_score: 0.082)\n", - "11:52 madminer.utils.ml.tr INFO val. loss 0.08270 (mse_score: 0.083)\n", - "11:52 madminer.utils.ml.tr INFO Epoch 19: train loss 0.08198 (mse_score: 0.082)\n", - "11:52 madminer.utils.ml.tr INFO val. loss 0.08095 (mse_score: 0.081)\n", - "11:52 madminer.utils.ml.tr INFO Epoch 20: train loss 0.08147 (mse_score: 0.081)\n", - "11:52 madminer.utils.ml.tr INFO val. loss 0.07968 (mse_score: 0.080)\n", - "11:52 madminer.utils.ml.tr INFO Epoch 21: train loss 0.08086 (mse_score: 0.081)\n", - "11:52 madminer.utils.ml.tr INFO val. loss 0.08142 (mse_score: 0.081)\n", - "11:52 madminer.utils.ml.tr INFO Epoch 22: train loss 0.08046 (mse_score: 0.080)\n", - "11:52 madminer.utils.ml.tr INFO val. loss 0.07964 (mse_score: 0.080)\n", - "11:52 madminer.utils.ml.tr INFO Epoch 23: train loss 0.08013 (mse_score: 0.080)\n", - "11:52 madminer.utils.ml.tr INFO val. loss 0.08024 (mse_score: 0.080)\n", - "11:52 madminer.utils.ml.tr INFO Epoch 24: train loss 0.07960 (mse_score: 0.080)\n", - "11:52 madminer.utils.ml.tr INFO val. loss 0.07819 (mse_score: 0.078)\n", - "11:52 madminer.utils.ml.tr INFO Epoch 25: train loss 0.07921 (mse_score: 0.079)\n", - "11:52 madminer.utils.ml.tr INFO val. loss 0.07826 (mse_score: 0.078)\n", - "11:52 madminer.utils.ml.tr INFO Epoch 26: train loss 0.07909 (mse_score: 0.079)\n", - "11:52 madminer.utils.ml.tr INFO val. loss 0.07719 (mse_score: 0.077)\n", - "11:52 madminer.utils.ml.tr INFO Epoch 27: train loss 0.07860 (mse_score: 0.079)\n", - "11:52 madminer.utils.ml.tr INFO val. loss 0.07716 (mse_score: 0.077)\n", - "11:52 madminer.utils.ml.tr INFO Epoch 28: train loss 0.07844 (mse_score: 0.078)\n", - "11:52 madminer.utils.ml.tr INFO val. loss 0.07652 (mse_score: 0.077)\n", - "11:52 madminer.utils.ml.tr INFO Epoch 29: train loss 0.07827 (mse_score: 0.078)\n", - "11:52 madminer.utils.ml.tr INFO val. loss 0.07640 (mse_score: 0.076)\n", - "11:52 madminer.utils.ml.tr INFO Epoch 30: train loss 0.07796 (mse_score: 0.078)\n", - "11:52 madminer.utils.ml.tr INFO val. loss 0.07635 (mse_score: 0.076)\n", - "11:52 madminer.utils.ml.tr INFO Epoch 31: train loss 0.07773 (mse_score: 0.078)\n", - "11:52 madminer.utils.ml.tr INFO val. loss 0.07553 (mse_score: 0.076)\n", - "11:52 madminer.utils.ml.tr INFO Epoch 32: train loss 0.07738 (mse_score: 0.077)\n", - "11:52 madminer.utils.ml.tr INFO val. loss 0.07580 (mse_score: 0.076)\n", - "11:52 madminer.utils.ml.tr INFO Epoch 33: train loss 0.07736 (mse_score: 0.077)\n", - "11:52 madminer.utils.ml.tr INFO val. loss 0.07492 (mse_score: 0.075)\n", - "11:52 madminer.utils.ml.tr INFO Epoch 34: train loss 0.07709 (mse_score: 0.077)\n", - "11:52 madminer.utils.ml.tr INFO val. loss 0.07560 (mse_score: 0.076)\n", - "11:52 madminer.utils.ml.tr INFO Epoch 35: train loss 0.07690 (mse_score: 0.077)\n", - "11:52 madminer.utils.ml.tr INFO val. loss 0.07532 (mse_score: 0.075)\n", - "11:53 madminer.utils.ml.tr INFO Epoch 36: train loss 0.07680 (mse_score: 0.077)\n", - "11:53 madminer.utils.ml.tr INFO val. loss 0.07509 (mse_score: 0.075)\n", - "11:53 madminer.utils.ml.tr INFO Epoch 37: train loss 0.07666 (mse_score: 0.077)\n", - "11:53 madminer.utils.ml.tr INFO val. loss 0.07478 (mse_score: 0.075)\n", - "11:53 madminer.utils.ml.tr INFO Epoch 38: train loss 0.07642 (mse_score: 0.076)\n", - "11:53 madminer.utils.ml.tr INFO val. loss 0.07495 (mse_score: 0.075)\n", - "11:53 madminer.utils.ml.tr INFO Epoch 39: train loss 0.07639 (mse_score: 0.076)\n", - "11:53 madminer.utils.ml.tr INFO val. loss 0.07424 (mse_score: 0.074)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "11:53 madminer.utils.ml.tr INFO Epoch 40: train loss 0.07616 (mse_score: 0.076)\n", - "11:53 madminer.utils.ml.tr INFO val. loss 0.07436 (mse_score: 0.074)\n", - "11:53 madminer.utils.ml.tr INFO Epoch 41: train loss 0.07610 (mse_score: 0.076)\n", - "11:53 madminer.utils.ml.tr INFO val. loss 0.07434 (mse_score: 0.074)\n", - "11:53 madminer.utils.ml.tr INFO Epoch 42: train loss 0.07597 (mse_score: 0.076)\n", - "11:53 madminer.utils.ml.tr INFO val. loss 0.07398 (mse_score: 0.074)\n", - "11:53 madminer.utils.ml.tr INFO Epoch 43: train loss 0.07590 (mse_score: 0.076)\n", - "11:53 madminer.utils.ml.tr INFO val. loss 0.07410 (mse_score: 0.074)\n", - "11:53 madminer.utils.ml.tr INFO Epoch 44: train loss 0.07574 (mse_score: 0.076)\n", - "11:53 madminer.utils.ml.tr INFO val. loss 0.07400 (mse_score: 0.074)\n", - "11:53 madminer.utils.ml.tr INFO Epoch 45: train loss 0.07568 (mse_score: 0.076)\n", - "11:53 madminer.utils.ml.tr INFO val. loss 0.07379 (mse_score: 0.074)\n", - "11:53 madminer.utils.ml.tr INFO Epoch 46: train loss 0.07557 (mse_score: 0.076)\n", - "11:53 madminer.utils.ml.tr INFO val. loss 0.07363 (mse_score: 0.074)\n", - "11:53 madminer.utils.ml.tr INFO Epoch 47: train loss 0.07560 (mse_score: 0.076)\n", - "11:53 madminer.utils.ml.tr INFO val. loss 0.07369 (mse_score: 0.074)\n", - "11:53 madminer.utils.ml.tr INFO Epoch 48: train loss 0.07534 (mse_score: 0.075)\n", - "11:53 madminer.utils.ml.tr INFO val. loss 0.07333 (mse_score: 0.073)\n", - "11:53 madminer.utils.ml.tr INFO Epoch 49: train loss 0.07528 (mse_score: 0.075)\n", - "11:53 madminer.utils.ml.tr INFO val. loss 0.07339 (mse_score: 0.073)\n", - "11:53 madminer.utils.ml.tr INFO Epoch 50: train loss 0.07520 (mse_score: 0.075)\n", - "11:53 madminer.utils.ml.tr INFO val. loss 0.07348 (mse_score: 0.073)\n", - "11:53 madminer.utils.ml.tr INFO Early stopping did not improve performance\n" + "15:36 madminer.ml INFO Starting training\n", + "15:36 madminer.ml INFO Batch size: 200\n", + "15:36 madminer.ml INFO Optimizer: amsgrad\n", + "15:36 madminer.ml INFO Epochs: 50\n", + "15:36 madminer.ml INFO Learning rate: 0.001 initially, decaying to 0.0001\n", + "15:36 madminer.ml INFO Validation split: 0.25\n", + "15:36 madminer.ml INFO Early stopping: True\n", + "15:36 madminer.ml INFO Scale inputs: True\n", + "15:36 madminer.ml INFO Shuffle labels False\n", + "15:36 madminer.ml INFO Samples: all\n", + "15:36 madminer.ml INFO Loading training data\n", + "15:36 madminer.ml INFO Found 100000 samples with 2 parameters and 2 observables\n", + "15:36 madminer.ml INFO Rescaling inputs\n", + "15:36 madminer.ml INFO Creating model\n", + "15:36 madminer.ml INFO Training model\n", + "15:36 madminer.utils.ml.tr INFO Epoch 3: train loss 0.19037 (mse_score: 0.190)\n", + "15:36 madminer.utils.ml.tr INFO val. loss 0.15904 (mse_score: 0.159)\n", + "15:37 madminer.utils.ml.tr INFO Epoch 6: train loss 0.14389 (mse_score: 0.144)\n", + "15:37 madminer.utils.ml.tr INFO val. loss 0.12421 (mse_score: 0.124)\n", + "15:37 madminer.utils.ml.tr INFO Epoch 9: train loss 0.12970 (mse_score: 0.130)\n", + "15:37 madminer.utils.ml.tr INFO val. loss 0.11444 (mse_score: 0.114)\n", + "15:37 madminer.utils.ml.tr INFO Epoch 12: train loss 0.12303 (mse_score: 0.123)\n", + "15:37 madminer.utils.ml.tr INFO val. loss 0.10981 (mse_score: 0.110)\n", + "15:37 madminer.utils.ml.tr INFO Epoch 15: train loss 0.11926 (mse_score: 0.119)\n", + "15:37 madminer.utils.ml.tr INFO val. loss 0.10723 (mse_score: 0.107)\n", + "15:37 madminer.utils.ml.tr INFO Epoch 18: train loss 0.11690 (mse_score: 0.117)\n", + "15:37 madminer.utils.ml.tr INFO val. loss 0.10569 (mse_score: 0.106)\n", + "15:37 madminer.utils.ml.tr INFO Epoch 21: train loss 0.11537 (mse_score: 0.115)\n", + "15:37 madminer.utils.ml.tr INFO val. loss 0.10467 (mse_score: 0.105)\n", + "15:37 madminer.utils.ml.tr INFO Epoch 24: train loss 0.11426 (mse_score: 0.114)\n", + "15:37 madminer.utils.ml.tr INFO val. loss 0.10407 (mse_score: 0.104)\n", + "15:38 madminer.utils.ml.tr INFO Epoch 27: train loss 0.11342 (mse_score: 0.113)\n", + "15:38 madminer.utils.ml.tr INFO val. loss 0.10358 (mse_score: 0.104)\n", + "15:38 madminer.utils.ml.tr INFO Epoch 30: train loss 0.11283 (mse_score: 0.113)\n", + "15:38 madminer.utils.ml.tr INFO val. loss 0.10306 (mse_score: 0.103)\n", + "15:38 madminer.utils.ml.tr INFO Epoch 33: train loss 0.11231 (mse_score: 0.112)\n", + "15:38 madminer.utils.ml.tr INFO val. loss 0.10260 (mse_score: 0.103)\n", + "15:38 madminer.utils.ml.tr INFO Epoch 36: train loss 0.11192 (mse_score: 0.112)\n", + "15:38 madminer.utils.ml.tr INFO val. loss 0.10236 (mse_score: 0.102)\n", + "15:38 madminer.utils.ml.tr INFO Epoch 39: train loss 0.11158 (mse_score: 0.112)\n", + "15:38 madminer.utils.ml.tr INFO val. loss 0.10215 (mse_score: 0.102)\n", + "15:38 madminer.utils.ml.tr INFO Epoch 42: train loss 0.11129 (mse_score: 0.111)\n", + "15:38 madminer.utils.ml.tr INFO val. loss 0.10194 (mse_score: 0.102)\n", + "15:39 madminer.utils.ml.tr INFO Epoch 45: train loss 0.11106 (mse_score: 0.111)\n", + "15:39 madminer.utils.ml.tr INFO val. loss 0.10175 (mse_score: 0.102)\n", + "15:39 madminer.utils.ml.tr INFO Epoch 48: train loss 0.11085 (mse_score: 0.111)\n", + "15:39 madminer.utils.ml.tr INFO val. loss 0.10162 (mse_score: 0.102)\n", + "15:39 madminer.utils.ml.tr INFO Early stopping did not improve performance\n" ] } ], "source": [ - "forge.train(\n", + "estimator.train(\n", " method='sally',\n", - " x_filename='data/samples/x_train.npy',\n", - " t_xz0_filename='data/samples/t_xz_train.npy',\n", - " verbose=\"all\",\n", + " x='data/samples/x_train.npy',\n", + " t_xz='data/samples/t_xz_train.npy',\n", ")\n", "\n", - "forge.save('models/sally')" + "estimator.save('models/sally')" ] }, { @@ -364,9 +283,9 @@ "metadata": {}, "outputs": [], "source": [ - "forge.load('models/sally')\n", + "estimator.load('models/sally')\n", "\n", - "t_hat = forge.evaluate(\n", + "t_hat = estimator.evaluate_score(\n", " x='data/samples/x_test.npy'\n", ")" ] @@ -385,7 +304,7 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -441,13 +360,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "11:53 madminer.fisherinfor INFO Loading data from data/madminer_example_shuffled.h5\n", - "11:53 madminer.fisherinfor INFO Found 2 parameters\n", - "11:53 madminer.fisherinfor WARNING Did not find nuisance parameters!\n", - "11:53 madminer.fisherinfor INFO Found 6 benchmarks, of which 6 physical\n", - "11:53 madminer.fisherinfor INFO Found 2 observables: pt_j1, delta_phi_jj\n", - "11:53 madminer.fisherinfor INFO Found 6537 events\n", - "11:53 madminer.fisherinfor INFO Found morphing setup with 6 components\n" + "15:39 madminer.analysis INFO Loading data from data/madminer_example_shuffled.h5\n", + "15:39 madminer.analysis INFO Found 2 parameters\n", + "15:39 madminer.analysis INFO Did not find nuisance parameters\n", + "15:39 madminer.analysis INFO Found 6 benchmarks, of which 6 physical\n", + "15:39 madminer.analysis INFO Found 2 observables\n", + "15:39 madminer.analysis INFO Found 6537 events\n", + "15:39 madminer.analysis INFO Found morphing setup with 6 components\n" ] } ], @@ -464,25 +383,29 @@ "name": "stderr", "output_type": "stream", "text": [ - "11:53 madminer.fisherinfor INFO Evaluating rate Fisher information\n", - "11:53 madminer.utils.inter WARNING include_nuisance_parameters=False without benchmark_is_nuisance information. Returning all weights.\n" + "15:39 madminer.fisherinfor INFO Evaluating rate Fisher information\n", + "15:39 madminer.utils.inter WARNING include_nuisance_parameters=False without benchmark_is_nuisance information. Returning all weights.\n" ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Kinematic Fisher information after 3000 ifb:\n", - "[[70.14907905 15.70474489]\n", - " [15.70474489 68.36131458]]\n" - ] + "ename": "AttributeError", + "evalue": "'NoneType' object has no attribute 'calculate_a'", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mtheta\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0.\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0.\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mmodel_file\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'models/sally'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mluminosity\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m3000000.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m )\n\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/johannbrehmer/work/projects/madminer/madminer/madminer/fisherinformation.pyc\u001b[0m in \u001b[0;36mcalculate_fisher_information_full_detector\u001b[0;34m(self, theta, model_file, unweighted_x_sample_file, luminosity, include_xsec_info, mode, calculate_covariance, batch_size, test_split)\u001b[0m\n\u001b[1;32m 257\u001b[0m \u001b[0mlogger\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Evaluating rate Fisher information\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 258\u001b[0m fisher_info_rate, rate_covariance = self.calculate_fisher_information_rate(\n\u001b[0;32m--> 259\u001b[0;31m \u001b[0mtheta\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtheta\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mluminosity\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mluminosity\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minclude_nuisance_parameters\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minclude_nuisance_parameters\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 260\u001b[0m )\n\u001b[1;32m 261\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/johannbrehmer/work/projects/madminer/madminer/madminer/fisherinformation.pyc\u001b[0m in \u001b[0;36mcalculate_fisher_information_rate\u001b[0;34m(self, theta, luminosity, cuts, efficiency_functions, include_nuisance_parameters)\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[0msum_events\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 414\u001b[0m \u001b[0mcalculate_uncertainty\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 415\u001b[0;31m \u001b[0mweights_benchmark_uncertainties\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mweights_benchmark_uncertainties\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 416\u001b[0m )\n\u001b[1;32m 417\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/johannbrehmer/work/projects/madminer/madminer/madminer/fisherinformation.pyc\u001b[0m in \u001b[0;36m_calculate_fisher_information\u001b[0;34m(self, theta, weights_benchmarks, luminosity, include_nuisance_parameters, sum_events, calculate_uncertainty, weights_benchmark_uncertainties)\u001b[0m\n\u001b[1;32m 1041\u001b[0m \u001b[0;31m# Nuisance parameter Fisher info\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1042\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0minclude_nuisance_parameters\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minclude_nuisance_parameters\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1043\u001b[0;31m \u001b[0mnuisance_a\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnuisance_morpher\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcalculate_a\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mweights_benchmarks\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# Shape (n_nuisance_params, n_events)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1044\u001b[0m \u001b[0;31m# grad_i dsigma(x), where i is a nuisance parameter, is given by\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1045\u001b[0m \u001b[0;31m# sigma[np.newaxis, :] * a\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'calculate_a'" + ], + "output_type": "error" } ], "source": [ "fisher_information, _ = fisher.calculate_fisher_information_full_detector(\n", " theta=[0.,0.],\n", " model_file='models/sally',\n", - " unweighted_x_sample_file='data/samples/x_test.npy',\n", " luminosity=3000000.\n", ")\n", "\n", @@ -498,32 +421,9 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/johannbrehmer/anaconda3/envs/python2/lib/python2.7/site-packages/matplotlib/contour.py:1004: UserWarning: The following kwargs were not used by contour: 'label'\n", - " s)\n", - "/Users/johannbrehmer/anaconda3/envs/python2/lib/python2.7/site-packages/matplotlib/cbook/deprecation.py:107: MatplotlibDeprecationWarning: Adding an axes using the same arguments as a previous axes currently reuses the earlier instance. In a future version, a new instance will always be created and returned. Meanwhile, this warning can be suppressed, and the future behavior ensured, by passing a unique label to each axes instance.\n", - " warnings.warn(message, mplDeprecation, stacklevel=1)\n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "_ = plot_fisher_information_contours_2d(\n", " [fisher_information],\n", @@ -548,11 +448,13 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "ensemble = EnsembleForge(estimators=5)" + "estimators = [ScoreEstimator(n_hidden=(20,)) for _ in range(5)]\n", + "\n", + "ensemble = Ensemble(estimators)" ] }, { @@ -571,257 +473,17 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": { "scrolled": false }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "11:54 madminer.ml INFO Training 5 estimators in ensemble\n", - "11:54 madminer.ml INFO Training estimator 1 / 5 in ensemble\n", - "11:54 madminer.ml INFO Starting training\n", - "11:54 madminer.ml INFO Method: sally\n", - "11:54 madminer.ml INFO Training data: x at data/samples/x_train.npy\n", - "11:54 madminer.ml INFO t_xz (theta0) at data/samples/t_xz_train.npy\n", - "11:54 madminer.ml INFO Features: all\n", - "11:54 madminer.ml INFO Method: sally\n", - "11:54 madminer.ml INFO Hidden layers: (100, 100, 100)\n", - "11:54 madminer.ml INFO Activation function: tanh\n", - "11:54 madminer.ml INFO Batch size: 200\n", - "11:54 madminer.ml INFO Optimizer: amsgrad\n", - "11:54 madminer.ml INFO Epochs: 10\n", - "11:54 madminer.ml INFO Learning rate: 0.001 initially, decaying to 0.0001\n", - "11:54 madminer.ml INFO Validation split: 0.25\n", - "11:54 madminer.ml INFO Early stopping: True\n", - "11:54 madminer.ml INFO Scale inputs: True\n", - "11:54 madminer.ml INFO Shuffle labels False\n", - "11:54 madminer.ml INFO Samples: all\n", - "11:54 madminer.ml INFO Loading training data\n", - "11:54 madminer.ml INFO Found 100000 samples with 2 parameters and 2 observables\n", - "11:54 madminer.ml INFO Rescaling inputs\n", - "11:54 madminer.ml INFO Creating model for method sally\n", - "11:54 madminer.ml INFO Training model\n", - "11:54 madminer.utils.ml.tr INFO Epoch 1: train loss 0.14047 (mse_score: 0.140)\n", - "11:54 madminer.utils.ml.tr INFO val. loss 0.10298 (mse_score: 0.103)\n", - "11:54 madminer.utils.ml.tr INFO Epoch 2: train loss 0.10709 (mse_score: 0.107)\n", - "11:54 madminer.utils.ml.tr INFO val. loss 0.09808 (mse_score: 0.098)\n", - "11:54 madminer.utils.ml.tr INFO Epoch 3: train loss 0.10139 (mse_score: 0.101)\n", - "11:54 madminer.utils.ml.tr INFO val. loss 0.09723 (mse_score: 0.097)\n", - "11:54 madminer.utils.ml.tr INFO Epoch 4: train loss 0.09838 (mse_score: 0.098)\n", - "11:54 madminer.utils.ml.tr INFO val. loss 0.09475 (mse_score: 0.095)\n", - "11:54 madminer.utils.ml.tr INFO Epoch 5: train loss 0.09645 (mse_score: 0.096)\n", - "11:54 madminer.utils.ml.tr INFO val. loss 0.09134 (mse_score: 0.091)\n", - "11:55 madminer.utils.ml.tr INFO Epoch 6: train loss 0.09478 (mse_score: 0.095)\n", - "11:55 madminer.utils.ml.tr INFO val. loss 0.09197 (mse_score: 0.092)\n", - "11:55 madminer.utils.ml.tr INFO Epoch 7: train loss 0.09360 (mse_score: 0.094)\n", - "11:55 madminer.utils.ml.tr INFO val. loss 0.09069 (mse_score: 0.091)\n", - "11:55 madminer.utils.ml.tr INFO Epoch 8: train loss 0.09290 (mse_score: 0.093)\n", - "11:55 madminer.utils.ml.tr INFO val. loss 0.09046 (mse_score: 0.090)\n", - "11:55 madminer.utils.ml.tr INFO Epoch 9: train loss 0.09208 (mse_score: 0.092)\n", - "11:55 madminer.utils.ml.tr INFO val. loss 0.08945 (mse_score: 0.089)\n", - "11:55 madminer.utils.ml.tr INFO Epoch 10: train loss 0.09175 (mse_score: 0.092)\n", - "11:55 madminer.utils.ml.tr INFO val. loss 0.08943 (mse_score: 0.089)\n", - "11:55 madminer.utils.ml.tr INFO Early stopping did not improve performance\n", - "11:55 madminer.ml INFO Training estimator 2 / 5 in ensemble\n", - "11:55 madminer.ml INFO Starting training\n", - "11:55 madminer.ml INFO Method: sally\n", - "11:55 madminer.ml INFO Training data: x at data/samples/x_train.npy\n", - "11:55 madminer.ml INFO t_xz (theta0) at data/samples/t_xz_train.npy\n", - "11:55 madminer.ml INFO Features: all\n", - "11:55 madminer.ml INFO Method: sally\n", - "11:55 madminer.ml INFO Hidden layers: (100, 100, 100)\n", - "11:55 madminer.ml INFO Activation function: tanh\n", - "11:55 madminer.ml INFO Batch size: 200\n", - "11:55 madminer.ml INFO Optimizer: amsgrad\n", - "11:55 madminer.ml INFO Epochs: 10\n", - "11:55 madminer.ml INFO Learning rate: 0.001 initially, decaying to 0.0001\n", - "11:55 madminer.ml INFO Validation split: 0.25\n", - "11:55 madminer.ml INFO Early stopping: True\n", - "11:55 madminer.ml INFO Scale inputs: True\n", - "11:55 madminer.ml INFO Shuffle labels False\n", - "11:55 madminer.ml INFO Samples: all\n", - "11:55 madminer.ml INFO Loading training data\n", - "11:55 madminer.ml INFO Found 100000 samples with 2 parameters and 2 observables\n", - "11:55 madminer.ml INFO Rescaling inputs\n", - "11:55 madminer.ml INFO Creating model for method sally\n", - "11:55 madminer.ml INFO Training model\n", - "11:55 madminer.utils.ml.tr INFO Epoch 1: train loss 0.13218 (mse_score: 0.132)\n", - "11:55 madminer.utils.ml.tr INFO val. loss 0.11648 (mse_score: 0.116)\n", - "11:55 madminer.utils.ml.tr INFO Epoch 2: train loss 0.10358 (mse_score: 0.104)\n", - "11:55 madminer.utils.ml.tr INFO val. loss 0.10520 (mse_score: 0.105)\n", - "11:55 madminer.utils.ml.tr INFO Epoch 3: train loss 0.09927 (mse_score: 0.099)\n", - "11:55 madminer.utils.ml.tr INFO val. loss 0.10344 (mse_score: 0.103)\n", - "11:55 madminer.utils.ml.tr INFO Epoch 4: train loss 0.09716 (mse_score: 0.097)\n", - "11:55 madminer.utils.ml.tr INFO val. loss 0.09833 (mse_score: 0.098)\n", - "11:55 madminer.utils.ml.tr INFO Epoch 5: train loss 0.09533 (mse_score: 0.095)\n", - "11:55 madminer.utils.ml.tr INFO val. loss 0.09678 (mse_score: 0.097)\n", - "11:55 madminer.utils.ml.tr INFO Epoch 6: train loss 0.09436 (mse_score: 0.094)\n", - "11:55 madminer.utils.ml.tr INFO val. loss 0.09660 (mse_score: 0.097)\n", - "11:55 madminer.utils.ml.tr INFO Epoch 7: train loss 0.09348 (mse_score: 0.093)\n", - "11:55 madminer.utils.ml.tr INFO val. loss 0.09560 (mse_score: 0.096)\n", - "11:55 madminer.utils.ml.tr INFO Epoch 8: train loss 0.09270 (mse_score: 0.093)\n", - "11:55 madminer.utils.ml.tr INFO val. loss 0.09529 (mse_score: 0.095)\n", - "11:55 madminer.utils.ml.tr INFO Epoch 9: train loss 0.09228 (mse_score: 0.092)\n", - "11:55 madminer.utils.ml.tr INFO val. loss 0.09457 (mse_score: 0.095)\n", - "11:55 madminer.utils.ml.tr INFO Epoch 10: train loss 0.09191 (mse_score: 0.092)\n", - "11:55 madminer.utils.ml.tr INFO val. loss 0.09379 (mse_score: 0.094)\n", - "11:55 madminer.utils.ml.tr INFO Early stopping did not improve performance\n", - "11:55 madminer.ml INFO Training estimator 3 / 5 in ensemble\n", - "11:55 madminer.ml INFO Starting training\n", - "11:55 madminer.ml INFO Method: sally\n", - "11:55 madminer.ml INFO Training data: x at data/samples/x_train.npy\n", - "11:55 madminer.ml INFO t_xz (theta0) at data/samples/t_xz_train.npy\n", - "11:55 madminer.ml INFO Features: all\n", - "11:55 madminer.ml INFO Method: sally\n", - "11:55 madminer.ml INFO Hidden layers: (100, 100, 100)\n", - "11:55 madminer.ml INFO Activation function: tanh\n", - "11:55 madminer.ml INFO Batch size: 200\n", - "11:55 madminer.ml INFO Optimizer: amsgrad\n", - "11:55 madminer.ml INFO Epochs: 10\n", - "11:55 madminer.ml INFO Learning rate: 0.001 initially, decaying to 0.0001\n", - "11:55 madminer.ml INFO Validation split: 0.25\n", - "11:55 madminer.ml INFO Early stopping: True\n", - "11:55 madminer.ml INFO Scale inputs: True\n", - "11:55 madminer.ml INFO Shuffle labels False\n", - "11:55 madminer.ml INFO Samples: all\n", - "11:55 madminer.ml INFO Loading training data\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "11:55 madminer.ml INFO Found 100000 samples with 2 parameters and 2 observables\n", - "11:55 madminer.ml INFO Rescaling inputs\n", - "11:55 madminer.ml INFO Creating model for method sally\n", - "11:55 madminer.ml INFO Training model\n", - "11:56 madminer.utils.ml.tr INFO Epoch 1: train loss 0.13724 (mse_score: 0.137)\n", - "11:56 madminer.utils.ml.tr INFO val. loss 0.11112 (mse_score: 0.111)\n", - "11:56 madminer.utils.ml.tr INFO Epoch 2: train loss 0.10583 (mse_score: 0.106)\n", - "11:56 madminer.utils.ml.tr INFO val. loss 0.10091 (mse_score: 0.101)\n", - "11:56 madminer.utils.ml.tr INFO Epoch 3: train loss 0.10046 (mse_score: 0.100)\n", - "11:56 madminer.utils.ml.tr INFO val. loss 0.09737 (mse_score: 0.097)\n", - "11:56 madminer.utils.ml.tr INFO Epoch 4: train loss 0.09774 (mse_score: 0.098)\n", - "11:56 madminer.utils.ml.tr INFO val. loss 0.09451 (mse_score: 0.095)\n", - "11:56 madminer.utils.ml.tr INFO Epoch 5: train loss 0.09596 (mse_score: 0.096)\n", - "11:56 madminer.utils.ml.tr INFO val. loss 0.09400 (mse_score: 0.094)\n", - "11:56 madminer.utils.ml.tr INFO Epoch 6: train loss 0.09471 (mse_score: 0.095)\n", - "11:56 madminer.utils.ml.tr INFO val. loss 0.09203 (mse_score: 0.092)\n", - "11:56 madminer.utils.ml.tr INFO Epoch 7: train loss 0.09348 (mse_score: 0.093)\n", - "11:56 madminer.utils.ml.tr INFO val. loss 0.09231 (mse_score: 0.092)\n", - "11:56 madminer.utils.ml.tr INFO Epoch 8: train loss 0.09290 (mse_score: 0.093)\n", - "11:56 madminer.utils.ml.tr INFO val. loss 0.09315 (mse_score: 0.093)\n", - "11:56 madminer.utils.ml.tr INFO Epoch 9: train loss 0.09225 (mse_score: 0.092)\n", - "11:56 madminer.utils.ml.tr INFO val. loss 0.09074 (mse_score: 0.091)\n", - "11:56 madminer.utils.ml.tr INFO Epoch 10: train loss 0.09176 (mse_score: 0.092)\n", - "11:56 madminer.utils.ml.tr INFO val. loss 0.09156 (mse_score: 0.092)\n", - "11:56 madminer.utils.ml.tr INFO Early stopping did not improve performance\n", - "11:56 madminer.ml INFO Training estimator 4 / 5 in ensemble\n", - "11:56 madminer.ml INFO Starting training\n", - "11:56 madminer.ml INFO Method: sally\n", - "11:56 madminer.ml INFO Training data: x at data/samples/x_train.npy\n", - "11:56 madminer.ml INFO t_xz (theta0) at data/samples/t_xz_train.npy\n", - "11:56 madminer.ml INFO Features: all\n", - "11:56 madminer.ml INFO Method: sally\n", - "11:56 madminer.ml INFO Hidden layers: (100, 100, 100)\n", - "11:56 madminer.ml INFO Activation function: tanh\n", - "11:56 madminer.ml INFO Batch size: 200\n", - "11:56 madminer.ml INFO Optimizer: amsgrad\n", - "11:56 madminer.ml INFO Epochs: 10\n", - "11:56 madminer.ml INFO Learning rate: 0.001 initially, decaying to 0.0001\n", - "11:56 madminer.ml INFO Validation split: 0.25\n", - "11:56 madminer.ml INFO Early stopping: True\n", - "11:56 madminer.ml INFO Scale inputs: True\n", - "11:56 madminer.ml INFO Shuffle labels False\n", - "11:56 madminer.ml INFO Samples: all\n", - "11:56 madminer.ml INFO Loading training data\n", - "11:56 madminer.ml INFO Found 100000 samples with 2 parameters and 2 observables\n", - "11:56 madminer.ml INFO Rescaling inputs\n", - "11:56 madminer.ml INFO Creating model for method sally\n", - "11:56 madminer.ml INFO Training model\n", - "11:56 madminer.utils.ml.tr INFO Epoch 1: train loss 0.13115 (mse_score: 0.131)\n", - "11:56 madminer.utils.ml.tr INFO val. loss 0.11818 (mse_score: 0.118)\n", - "11:56 madminer.utils.ml.tr INFO Epoch 2: train loss 0.10282 (mse_score: 0.103)\n", - "11:56 madminer.utils.ml.tr INFO val. loss 0.10607 (mse_score: 0.106)\n", - "11:56 madminer.utils.ml.tr INFO Epoch 3: train loss 0.09835 (mse_score: 0.098)\n", - "11:56 madminer.utils.ml.tr INFO val. loss 0.10730 (mse_score: 0.107)\n", - "11:57 madminer.utils.ml.tr INFO Epoch 4: train loss 0.09604 (mse_score: 0.096)\n", - "11:57 madminer.utils.ml.tr INFO val. loss 0.10177 (mse_score: 0.102)\n", - "11:57 madminer.utils.ml.tr INFO Epoch 5: train loss 0.09448 (mse_score: 0.094)\n", - "11:57 madminer.utils.ml.tr INFO val. loss 0.10045 (mse_score: 0.100)\n", - "11:57 madminer.utils.ml.tr INFO Epoch 6: train loss 0.09323 (mse_score: 0.093)\n", - "11:57 madminer.utils.ml.tr INFO val. loss 0.09987 (mse_score: 0.100)\n", - "11:57 madminer.utils.ml.tr INFO Epoch 7: train loss 0.09210 (mse_score: 0.092)\n", - "11:57 madminer.utils.ml.tr INFO val. loss 0.09916 (mse_score: 0.099)\n", - "11:57 madminer.utils.ml.tr INFO Epoch 8: train loss 0.09150 (mse_score: 0.091)\n", - "11:57 madminer.utils.ml.tr INFO val. loss 0.09832 (mse_score: 0.098)\n", - "11:57 madminer.utils.ml.tr INFO Epoch 9: train loss 0.09087 (mse_score: 0.091)\n", - "11:57 madminer.utils.ml.tr INFO val. loss 0.09678 (mse_score: 0.097)\n", - "11:57 madminer.utils.ml.tr INFO Epoch 10: train loss 0.09051 (mse_score: 0.091)\n", - "11:57 madminer.utils.ml.tr INFO val. loss 0.09635 (mse_score: 0.096)\n", - "11:57 madminer.utils.ml.tr INFO Early stopping did not improve performance\n", - "11:57 madminer.ml INFO Training estimator 5 / 5 in ensemble\n", - "11:57 madminer.ml INFO Starting training\n", - "11:57 madminer.ml INFO Method: sally\n", - "11:57 madminer.ml INFO Training data: x at data/samples/x_train.npy\n", - "11:57 madminer.ml INFO t_xz (theta0) at data/samples/t_xz_train.npy\n", - "11:57 madminer.ml INFO Features: all\n", - "11:57 madminer.ml INFO Method: sally\n", - "11:57 madminer.ml INFO Hidden layers: (100, 100, 100)\n", - "11:57 madminer.ml INFO Activation function: tanh\n", - "11:57 madminer.ml INFO Batch size: 200\n", - "11:57 madminer.ml INFO Optimizer: amsgrad\n", - "11:57 madminer.ml INFO Epochs: 10\n", - "11:57 madminer.ml INFO Learning rate: 0.001 initially, decaying to 0.0001\n", - "11:57 madminer.ml INFO Validation split: 0.25\n", - "11:57 madminer.ml INFO Early stopping: True\n", - "11:57 madminer.ml INFO Scale inputs: True\n", - "11:57 madminer.ml INFO Shuffle labels False\n", - "11:57 madminer.ml INFO Samples: all\n", - "11:57 madminer.ml INFO Loading training data\n", - "11:57 madminer.ml INFO Found 100000 samples with 2 parameters and 2 observables\n", - "11:57 madminer.ml INFO Rescaling inputs\n", - "11:57 madminer.ml INFO Creating model for method sally\n", - "11:57 madminer.ml INFO Training model\n", - "11:57 madminer.utils.ml.tr INFO Epoch 1: train loss 0.13555 (mse_score: 0.136)\n", - "11:57 madminer.utils.ml.tr INFO val. loss 0.10889 (mse_score: 0.109)\n", - "11:57 madminer.utils.ml.tr INFO Epoch 2: train loss 0.10552 (mse_score: 0.106)\n", - "11:57 madminer.utils.ml.tr INFO val. loss 0.10303 (mse_score: 0.103)\n", - "11:57 madminer.utils.ml.tr INFO Epoch 3: train loss 0.10057 (mse_score: 0.101)\n", - "11:57 madminer.utils.ml.tr INFO val. loss 0.09798 (mse_score: 0.098)\n", - "11:57 madminer.utils.ml.tr INFO Epoch 4: train loss 0.09764 (mse_score: 0.098)\n", - "11:57 madminer.utils.ml.tr INFO val. loss 0.09603 (mse_score: 0.096)\n", - "11:57 madminer.utils.ml.tr INFO Epoch 5: train loss 0.09553 (mse_score: 0.096)\n", - "11:57 madminer.utils.ml.tr INFO val. loss 0.09404 (mse_score: 0.094)\n", - "11:58 madminer.utils.ml.tr INFO Epoch 6: train loss 0.09460 (mse_score: 0.095)\n", - "11:58 madminer.utils.ml.tr INFO val. loss 0.09326 (mse_score: 0.093)\n", - "11:58 madminer.utils.ml.tr INFO Epoch 7: train loss 0.09309 (mse_score: 0.093)\n", - "11:58 madminer.utils.ml.tr INFO val. loss 0.09360 (mse_score: 0.094)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "11:58 madminer.utils.ml.tr INFO Epoch 8: train loss 0.09235 (mse_score: 0.092)\n", - "11:58 madminer.utils.ml.tr INFO val. loss 0.09164 (mse_score: 0.092)\n", - "11:58 madminer.utils.ml.tr INFO Epoch 9: train loss 0.09163 (mse_score: 0.092)\n", - "11:58 madminer.utils.ml.tr INFO val. loss 0.09133 (mse_score: 0.091)\n", - "11:58 madminer.utils.ml.tr INFO Epoch 10: train loss 0.09127 (mse_score: 0.091)\n", - "11:58 madminer.utils.ml.tr INFO val. loss 0.09110 (mse_score: 0.091)\n", - "11:58 madminer.utils.ml.tr INFO Early stopping did not improve performance\n" - ] - } - ], + "outputs": [], "source": [ "ensemble.train_all(\n", " method='sally',\n", - " x_filename='data/samples/x_train.npy',\n", - " t_xz0_filename='data/samples/t_xz_train.npy',\n", - " n_epochs=10,\n", + " x='data/samples/x_train.npy',\n", + " t_xz='data/samples/t_xz_train.npy',\n", + " n_epochs=5,\n", ")\n", "\n", "ensemble.save('models/sally_ensemble')" @@ -848,28 +510,9 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "12:12 madminer.fisherinfor INFO Loading data from data/madminer_example_shuffled.h5\n", - "12:12 madminer.fisherinfor INFO Found 2 parameters\n", - "12:12 madminer.fisherinfor WARNING Did not find nuisance parameters!\n", - "12:12 madminer.fisherinfor INFO Found 6 benchmarks, of which 6 physical\n", - "12:12 madminer.fisherinfor INFO Found 2 observables: pt_j1, delta_phi_jj\n", - "12:12 madminer.fisherinfor INFO Found 6537 events\n", - "12:12 madminer.fisherinfor INFO Found morphing setup with 6 components\n", - "12:12 madminer.ml INFO Found ensemble with 5 estimators and expectations None\n", - "12:12 madminer.fisherinfor INFO Evaluating rate Fisher information\n", - "12:12 madminer.utils.inter WARNING include_nuisance_parameters=False without benchmark_is_nuisance information. Returning all weights.\n", - "12:12 madminer.utils.inter WARNING include_nuisance_parameters=False without benchmark_is_nuisance information. Returning all weights.\n", - "12:12 madminer.fisherinfor INFO Evaluating kinematic Fisher information on batch 1 / 1\n" - ] - } - ], + "outputs": [], "source": [ "fisher = FisherInformation('data/madminer_example_shuffled.h5')\n", "\n", @@ -890,22 +533,9 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "_ = plot_fisher_information_contours_2d(\n", " [fisher_information_mean],\n", diff --git a/examples/tutorial_parton_level/3_systematic_uncertainties.ipynb b/examples/tutorial_parton_level/3_systematic_uncertainties.ipynb new file mode 100755 index 000000000..d1f706149 --- /dev/null +++ b/examples/tutorial_parton_level/3_systematic_uncertainties.ipynb @@ -0,0 +1,366 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# MadMiner parton-level tutorial, part 3: Systematic uncertainties\n", + "\n", + "Johann Brehmer, Felix Kling, Kyle Cranmer 2018" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this tutorial we'll explain how to add systematic uncertainties to the MadMiner workflow." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Preparations" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before you execute this notebook, make sure you have running installations of MadGraph, Pythia, and Delphes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from __future__ import absolute_import, division, print_function, unicode_literals\n", + "\n", + "import logging\n", + "import numpy as np\n", + "import matplotlib\n", + "from matplotlib import pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "from madminer.core import MadMiner\n", + "from madminer.lhe import LHEReader\n", + "from madminer.sampling import combine_and_shuffle\n", + "from madminer.sampling import SampleAugmenter\n", + "from madminer import sampling\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Please enter here the path to your MG5 root directory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mg_dir = '/Users/johannbrehmer/work/projects/madminer/MG5_aMC_v2_6_4'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "MadMiner uses the Python `logging` module to provide additional information and debugging output. You can choose how much of this output you want to see by switching the level in the following lines to `logging.DEBUG` or `logging.WARNING`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# MadMiner output\n", + "logging.basicConfig(\n", + " format='%(asctime)-5.5s %(name)-20.20s %(levelname)-7.7s %(message)s',\n", + " datefmt='%H:%M',\n", + " level=logging.INFO\n", + ")\n", + "\n", + "# Output of all other modules (e.g. matplotlib)\n", + "for key in logging.Logger.manager.loggerDict:\n", + " if \"madminer\" not in key:\n", + " logging.getLogger(key).setLevel(logging.WARNING)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1.-2. Parameters and benchmarks" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We'll just load the MadMiner setup from the first part of this tutorial:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "miner = MadMiner()\n", + "miner.load('data/madminer_example.h5')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Set up systematics, save settings, run MadGraph" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is where things become interesting: We want to model systematic uncertainties. Currently this can be done in one of two ways: based on scale variation or based on PDF variations. You can also use both simultaneously. Here we just vary the scales:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "miner.set_systematics(scale_variation=(0.5,2.), pdf_variation=None)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Again, we save our setup:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "miner.save('data/madminer_example_systematics.h5')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now it's time to run MadGraph. MadMiner will instruct MadGraph to use its built-in `systematics` tool to calculate how the event weights change under the scale variation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "miner.run(\n", + " sample_benchmark='sm',\n", + " mg_directory=mg_dir,\n", + " mg_process_directory='./mg_processes/signal_systematics',\n", + " proc_card_file='cards/proc_card_signal.dat',\n", + " param_card_template_file='cards/param_card_template.dat',\n", + " run_card_file='cards/run_card_signal.dat',\n", + " log_directory='logs/signal',\n", + " python2_override=True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Run smearing and extract observables" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is just as before:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lhe = LHEReader('data/madminer_example_systematics.h5')\n", + "\n", + "lhe.add_sample(\n", + " lhe_filename='mg_processes/signal_systematics/Events/run_01/unweighted_events.lhe.gz',\n", + " sampled_from_benchmark='sm',\n", + " is_background=False,\n", + " k_factor=1.1,\n", + ")\n", + "\n", + "lhe.set_smearing(\n", + " pdgids=[1,2,3,4,5,6,9,22,-1,-2,-3,-4,-5,-6], # Partons giving rise to jets\n", + " energy_resolution_abs=0.,\n", + " energy_resolution_rel=0.1,\n", + " pt_resolution_abs=None,\n", + " pt_resolution_rel=None,\n", + " eta_resolution_abs=0.1,\n", + " eta_resolution_rel=0.,\n", + " phi_resolution_abs=0.1,\n", + " phi_resolution_rel=0.,\n", + ")\n", + "\n", + "lhe.add_observable(\n", + " 'pt_j1',\n", + " 'j[0].pt',\n", + " required=False,\n", + " default=0.,\n", + ")\n", + "lhe.add_observable(\n", + " 'delta_phi_jj',\n", + " 'j[0].deltaphi(j[1]) * (-1. + 2.*float(j[0].eta > j[1].eta))',\n", + " required=True,\n", + ")\n", + "lhe.add_observable(\n", + " 'met',\n", + " 'met.pt',\n", + " required=True,\n", + ")\n", + "\n", + "lhe.add_cut('(a[0] + a[1]).m > 124.')\n", + "lhe.add_cut('(a[0] + a[1]).m < 126.')\n", + "lhe.add_cut('pt_j1 > 30.')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "lhe.analyse_samples()\n", + "lhe.save('data/madminer_example_systematics_with_data.h5')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### A look at distributions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's see what our MC run produced:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "_ = plot_uncertainty(\n", + " filename='data/madminer_example_systematics_with_data.h5',\n", + " parameter_points=['sm', np.array([10.,0.])],\n", + " line_labels=['SM', 'BSM'],\n", + " uncertainties='none',\n", + " n_bins=20,\n", + " n_cols=3,\n", + " normalize=True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Make (unweighted) training and test samples with augmented data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sampler = SampleAugmenter('data/madminer_example_systematics_with_data.h5')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When we generate training data, we now also have to specify the values of the nuisance parameters. The helper functions `sampling.nominal_nuisance_parameters()` and `sampling.iid_nuisance_parameters()` can be used in addition to the usual ones. The `theta0` and `theta1` return now includes values for the nuisance parameters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x, theta0, theta1, y, r_xz, t_xz, _ = sampler.sample_train_ratio(\n", + " theta0=sampling.random_morphing_points(100, [('gaussian', 0., 15.), ('gaussian', 0., 15.)]),\n", + " theta1=sampling.benchmark('sm'),\n", + " nu0=sampling.iid_nuisance_parameters(\"gaussian\", 0., 1.),\n", + " nu1=sampling.nominal_nuisance_parameters(),\n", + " n_samples=1000,\n", + " folder='./data/samples',\n", + " filename='train'\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To be continued..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.15" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/tutorial_parton_level/debug.py b/examples/tutorial_parton_level/debug.py new file mode 100644 index 000000000..d230f50a7 --- /dev/null +++ b/examples/tutorial_parton_level/debug.py @@ -0,0 +1,27 @@ +#! /usr/bin/env python + +from __future__ import absolute_import, division, print_function, unicode_literals + +import logging +logging.basicConfig( + format='%(asctime)-5.5s %(name)-20.20s %(levelname)-7.7s %(message)s', + datefmt='%H:%M', + level=logging.INFO +) + +from madminer import sampling +from madminer.sampling import SampleAugmenter +from madminer.fisherinformation import FisherInformation +from madminer.ml import ScoreEstimator + +sampler = SampleAugmenter('data/madminer_example_systematics_with_data.h5') + +x, theta0, theta1, y, r_xz, t_xz, n_effective_samples = sampler.sample_train_ratio( + theta0=sampling.random_morphing_points(None, [('gaussian', 0., 15.), ('gaussian', 0., 15.)]), + theta1=sampling.benchmark('sm'), + nu0=sampling.iid_nuisance_parameters(), + nu1=sampling.nominal_nuisance_parameters(), + n_samples=10000, +) + +logging.info("x: %s", x) diff --git a/examples/tutorial_toy_simulator/tutorial_toy_simulator.ipynb b/examples/tutorial_toy_simulator/tutorial_toy_simulator.ipynb index 2122e37c3..7551c5a3e 100644 --- a/examples/tutorial_toy_simulator/tutorial_toy_simulator.ipynb +++ b/examples/tutorial_toy_simulator/tutorial_toy_simulator.ipynb @@ -27,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -41,12 +41,12 @@ "from matplotlib import pyplot as plt\n", "%matplotlib inline\n", "\n", - "from madminer.ml import MLForge" + "from madminer.ml import LikelihoodEstimator, ParameterizedRatioEstimator" ] }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -63,7 +63,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -102,7 +102,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -119,7 +119,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -153,7 +153,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -172,7 +172,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -186,7 +186,7 @@ }, { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -236,7 +236,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -255,7 +255,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -275,7 +275,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -294,18 +294,6 @@ "np.save('data/t_xz_train.npy', t_xz_train)" ] }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [], - "source": [ - "# For flow training\n", - "np.save('data/theta0_train_density.npy', theta0.reshape(-1,1))\n", - "np.save('data/x_train_density.npy', x_from_theta0.reshape(-1,1))\n", - "np.save('data/t_xz_train_density.npy', t_xz_from_theta0.reshape(-1,1))" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -329,281 +317,293 @@ "name": "stderr", "output_type": "stream", "text": [ - "17:37 madminer.ml INFO Starting training\n", - "17:37 madminer.ml INFO Method: carl\n", - "17:37 madminer.ml INFO Training data: x at data/x_train.npy\n", - "17:37 madminer.ml INFO theta0 at data/theta0_train.npy\n", - "17:37 madminer.ml INFO y at data/y_train.npy\n", - "17:37 madminer.ml INFO Features: all\n", - "17:37 madminer.ml INFO Method: carl\n", - "17:37 madminer.ml INFO Hidden layers: (20, 20)\n", - "17:37 madminer.ml INFO Activation function: tanh\n", - "17:37 madminer.ml INFO Batch size: 200\n", - "17:37 madminer.ml INFO Optimizer: amsgrad\n", - "17:37 madminer.ml INFO Epochs: 20\n", - "17:37 madminer.ml INFO Learning rate: 0.001 initially, decaying to 0.0001\n", - "17:37 madminer.ml INFO Validation split: 0.25\n", - "17:37 madminer.ml INFO Early stopping: True\n", - "17:37 madminer.ml INFO Scale inputs: True\n", - "17:37 madminer.ml INFO Shuffle labels False\n", - "17:37 madminer.ml INFO Samples: all\n", - "17:37 madminer.ml INFO Loading training data\n", - "17:37 madminer.ml INFO Found 100000 samples with 1 parameters and 1 observables\n", - "17:37 madminer.ml INFO Rescaling inputs\n", - "17:37 madminer.ml INFO Creating model for method carl\n", - "17:37 madminer.ml INFO Training model\n", - "17:37 madminer.utils.ml.tr INFO Epoch 1: train loss 0.63091 (xe: 0.631)\n", - "17:37 madminer.utils.ml.tr INFO val. loss 0.59211 (xe: 0.592)\n", - "17:37 madminer.utils.ml.tr INFO Epoch 2: train loss 0.59132 (xe: 0.591)\n", - "17:37 madminer.utils.ml.tr INFO val. loss 0.58826 (xe: 0.588)\n", - "17:37 madminer.utils.ml.tr INFO Epoch 3: train loss 0.58932 (xe: 0.589)\n", - "17:37 madminer.utils.ml.tr INFO val. loss 0.58768 (xe: 0.588)\n", - "17:38 madminer.utils.ml.tr INFO Epoch 4: train loss 0.58851 (xe: 0.589)\n", - "17:38 madminer.utils.ml.tr INFO val. loss 0.58663 (xe: 0.587)\n", - "17:38 madminer.utils.ml.tr INFO Epoch 5: train loss 0.58779 (xe: 0.588)\n", - "17:38 madminer.utils.ml.tr INFO val. loss 0.58573 (xe: 0.586)\n", - "17:38 madminer.utils.ml.tr INFO Epoch 6: train loss 0.58742 (xe: 0.587)\n", - "17:38 madminer.utils.ml.tr INFO val. loss 0.58609 (xe: 0.586)\n", - "17:38 madminer.utils.ml.tr INFO Epoch 7: train loss 0.58707 (xe: 0.587)\n", - "17:38 madminer.utils.ml.tr INFO val. loss 0.58517 (xe: 0.585)\n", - "17:38 madminer.utils.ml.tr INFO Epoch 8: train loss 0.58683 (xe: 0.587)\n", - "17:38 madminer.utils.ml.tr INFO val. loss 0.58653 (xe: 0.587)\n", - "17:39 madminer.utils.ml.tr INFO Epoch 9: train loss 0.58645 (xe: 0.586)\n", - "17:39 madminer.utils.ml.tr INFO val. loss 0.58496 (xe: 0.585)\n", - "17:39 madminer.utils.ml.tr INFO Epoch 10: train loss 0.58649 (xe: 0.586)\n", - "17:39 madminer.utils.ml.tr INFO val. loss 0.58473 (xe: 0.585)\n", - "17:39 madminer.utils.ml.tr INFO Epoch 11: train loss 0.58621 (xe: 0.586)\n", - "17:39 madminer.utils.ml.tr INFO val. loss 0.58466 (xe: 0.585)\n", - "17:39 madminer.utils.ml.tr INFO Epoch 12: train loss 0.58616 (xe: 0.586)\n", - "17:39 madminer.utils.ml.tr INFO val. loss 0.58450 (xe: 0.585)\n", - "17:39 madminer.utils.ml.tr INFO Epoch 13: train loss 0.58597 (xe: 0.586)\n", - "17:39 madminer.utils.ml.tr INFO val. loss 0.58468 (xe: 0.585)\n", - "17:40 madminer.utils.ml.tr INFO Epoch 14: train loss 0.58593 (xe: 0.586)\n", - "17:40 madminer.utils.ml.tr INFO val. loss 0.58440 (xe: 0.584)\n", - "17:40 madminer.utils.ml.tr INFO Epoch 15: train loss 0.58588 (xe: 0.586)\n", - "17:40 madminer.utils.ml.tr INFO val. loss 0.58429 (xe: 0.584)\n", - "17:40 madminer.utils.ml.tr INFO Epoch 16: train loss 0.58582 (xe: 0.586)\n", - "17:40 madminer.utils.ml.tr INFO val. loss 0.58439 (xe: 0.584)\n", - "17:40 madminer.utils.ml.tr INFO Epoch 17: train loss 0.58574 (xe: 0.586)\n", - "17:40 madminer.utils.ml.tr INFO val. loss 0.58460 (xe: 0.585)\n", - "17:40 madminer.utils.ml.tr INFO Epoch 18: train loss 0.58579 (xe: 0.586)\n", - "17:40 madminer.utils.ml.tr INFO val. loss 0.58431 (xe: 0.584)\n", - "17:40 madminer.utils.ml.tr INFO Epoch 19: train loss 0.58572 (xe: 0.586)\n", - "17:40 madminer.utils.ml.tr INFO val. loss 0.58421 (xe: 0.584)\n", - "17:41 madminer.utils.ml.tr INFO Epoch 20: train loss 0.58567 (xe: 0.586)\n", - "17:41 madminer.utils.ml.tr INFO val. loss 0.58421 (xe: 0.584)\n", - "17:41 madminer.utils.ml.tr INFO Early stopping did not improve performance\n" + "11:26 madminer.ml INFO Starting training\n", + "11:26 madminer.ml INFO Method: carl\n", + "11:26 madminer.ml INFO Batch size: 200\n", + "11:26 madminer.ml INFO Optimizer: amsgrad\n", + "11:26 madminer.ml INFO Epochs: 20\n", + "11:26 madminer.ml INFO Learning rate: 0.001 initially, decaying to 0.0001\n", + "11:26 madminer.ml INFO Validation split: 0.25\n", + "11:26 madminer.ml INFO Early stopping: True\n", + "11:26 madminer.ml INFO Scale inputs: True\n", + "11:26 madminer.ml INFO Shuffle labels False\n", + "11:26 madminer.ml INFO Samples: all\n", + "11:26 madminer.ml INFO Loading training data\n", + "11:26 madminer.ml INFO Found 100000 samples with 1 parameters and 1 observables\n", + "11:26 madminer.ml INFO Rescaling inputs\n", + "11:26 madminer.ml INFO Creating model\n", + "11:26 madminer.ml INFO Training model\n", + "11:27 madminer.utils.ml.tr INFO Epoch 1: train loss 0.63546 (xe: 0.635)\n", + "11:27 madminer.utils.ml.tr INFO val. loss 0.59570 (xe: 0.596)\n", + "11:27 madminer.utils.ml.tr INFO Epoch 2: train loss 0.59274 (xe: 0.593)\n", + "11:27 madminer.utils.ml.tr INFO val. loss 0.59289 (xe: 0.593)\n", + "11:27 madminer.utils.ml.tr INFO Epoch 3: train loss 0.59106 (xe: 0.591)\n", + "11:27 madminer.utils.ml.tr INFO val. loss 0.59155 (xe: 0.592)\n", + "11:27 madminer.utils.ml.tr INFO Epoch 4: train loss 0.58970 (xe: 0.590)\n", + "11:27 madminer.utils.ml.tr INFO val. loss 0.59107 (xe: 0.591)\n", + "11:27 madminer.utils.ml.tr INFO Epoch 5: train loss 0.58887 (xe: 0.589)\n", + "11:27 madminer.utils.ml.tr INFO val. loss 0.59029 (xe: 0.590)\n", + "11:27 madminer.utils.ml.tr INFO Epoch 6: train loss 0.58854 (xe: 0.589)\n", + "11:27 madminer.utils.ml.tr INFO val. loss 0.59009 (xe: 0.590)\n", + "11:27 madminer.utils.ml.tr INFO Epoch 7: train loss 0.58803 (xe: 0.588)\n", + "11:27 madminer.utils.ml.tr INFO val. loss 0.59006 (xe: 0.590)\n", + "11:27 madminer.utils.ml.tr INFO Epoch 8: train loss 0.58782 (xe: 0.588)\n", + "11:27 madminer.utils.ml.tr INFO val. loss 0.58939 (xe: 0.589)\n", + "11:28 madminer.utils.ml.tr INFO Epoch 9: train loss 0.58747 (xe: 0.587)\n", + "11:28 madminer.utils.ml.tr INFO val. loss 0.58919 (xe: 0.589)\n", + "11:28 madminer.utils.ml.tr INFO Epoch 10: train loss 0.58730 (xe: 0.587)\n", + "11:28 madminer.utils.ml.tr INFO val. loss 0.58897 (xe: 0.589)\n", + "11:28 madminer.utils.ml.tr INFO Epoch 11: train loss 0.58727 (xe: 0.587)\n", + "11:28 madminer.utils.ml.tr INFO val. loss 0.58894 (xe: 0.589)\n", + "11:28 madminer.utils.ml.tr INFO Epoch 12: train loss 0.58705 (xe: 0.587)\n", + "11:28 madminer.utils.ml.tr INFO val. loss 0.58877 (xe: 0.589)\n", + "11:28 madminer.utils.ml.tr INFO Epoch 13: train loss 0.58688 (xe: 0.587)\n", + "11:28 madminer.utils.ml.tr INFO val. loss 0.58873 (xe: 0.589)\n", + "11:28 madminer.utils.ml.tr INFO Epoch 14: train loss 0.58687 (xe: 0.587)\n", + "11:28 madminer.utils.ml.tr INFO val. loss 0.58925 (xe: 0.589)\n", + "11:28 madminer.utils.ml.tr INFO Epoch 15: train loss 0.58659 (xe: 0.587)\n", + "11:28 madminer.utils.ml.tr INFO val. loss 0.58855 (xe: 0.589)\n", + "11:29 madminer.utils.ml.tr INFO Epoch 16: train loss 0.58669 (xe: 0.587)\n", + "11:29 madminer.utils.ml.tr INFO val. loss 0.58853 (xe: 0.589)\n", + "11:29 madminer.utils.ml.tr INFO Epoch 17: train loss 0.58662 (xe: 0.587)\n", + "11:29 madminer.utils.ml.tr INFO val. loss 0.58847 (xe: 0.588)\n", + "11:29 madminer.utils.ml.tr INFO Epoch 18: train loss 0.58656 (xe: 0.587)\n", + "11:29 madminer.utils.ml.tr INFO val. loss 0.58849 (xe: 0.588)\n", + "11:29 madminer.utils.ml.tr INFO Epoch 19: train loss 0.58652 (xe: 0.587)\n", + "11:29 madminer.utils.ml.tr INFO val. loss 0.58845 (xe: 0.588)\n", + "11:29 madminer.utils.ml.tr INFO Epoch 20: train loss 0.58646 (xe: 0.586)\n", + "11:29 madminer.utils.ml.tr INFO val. loss 0.58843 (xe: 0.588)\n", + "11:29 madminer.utils.ml.tr INFO Early stopping did not improve performance\n" ] } ], "source": [ - "forge = MLForge()\n", + "carl = ParameterizedRatioEstimator(\n", + " n_hidden=(20,20)\n", + ")\n", "\n", - "forge.train(\n", + "carl.train(\n", " method='carl',\n", - " x_filename='data/x_train.npy',\n", - " y_filename='data/y_train.npy',\n", - " theta0_filename='data/theta0_train.npy',\n", + " x='data/x_train.npy',\n", + " y='data/y_train.npy',\n", + " theta='data/theta0_train.npy',\n", " n_epochs=20,\n", - " n_hidden=(20,20),\n", ")\n", "\n", - "forge.save('models/carl')" + "carl.save('models/carl')" ] }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "11:02 madminer.ml INFO Starting training\n", - "11:02 madminer.ml INFO Method: alices\n", - "11:02 madminer.ml INFO Training data: x at data/x_train.npy\n", - "11:02 madminer.ml INFO theta0 at data/theta0_train.npy\n", - "11:02 madminer.ml INFO y at data/y_train.npy\n", - "11:02 madminer.ml INFO r_xz at data/r_xz_train.npy\n", - "11:02 madminer.ml INFO t_xz (theta0) at data/t_xz_train.npy\n", - "11:02 madminer.ml INFO Features: all\n", - "11:02 madminer.ml INFO Method: alices\n", - "11:02 madminer.ml INFO Hidden layers: (20, 20)\n", - "11:02 madminer.ml INFO Activation function: tanh\n", - "11:02 madminer.ml INFO alpha: 0.1\n", - "11:02 madminer.ml INFO Batch size: 200\n", - "11:02 madminer.ml INFO Optimizer: amsgrad\n", - "11:02 madminer.ml INFO Epochs: 20\n", - "11:02 madminer.ml INFO Learning rate: 0.001 initially, decaying to 0.0001\n", - "11:02 madminer.ml INFO Validation split: 0.25\n", - "11:02 madminer.ml INFO Early stopping: True\n", - "11:02 madminer.ml INFO Scale inputs: True\n", - "11:02 madminer.ml INFO Shuffle labels False\n", - "11:02 madminer.ml INFO Samples: all\n", - "11:02 madminer.ml INFO Loading training data\n", - "11:02 madminer.ml INFO Found 100000 samples with 1 parameters and 1 observables\n", - "11:02 madminer.ml INFO Rescaling inputs\n", - "11:02 madminer.ml INFO Creating model for method alices\n", - "11:02 madminer.ml INFO Training model\n", - "11:02 madminer.utils.ml.tr INFO Epoch 1: train loss 0.63722 (improved_xe: 0.628, mse_score: 0.088)\n", - "11:02 madminer.utils.ml.tr INFO val. loss 0.59300 (improved_xe: 0.590, mse_score: 0.034)\n", - "11:02 madminer.utils.ml.tr INFO Epoch 2: train loss 0.59024 (improved_xe: 0.588, mse_score: 0.025)\n", - "11:02 madminer.utils.ml.tr INFO val. loss 0.58816 (improved_xe: 0.586, mse_score: 0.017)\n", - "11:03 madminer.utils.ml.tr INFO Epoch 3: train loss 0.58742 (improved_xe: 0.586, mse_score: 0.015)\n", - "11:03 madminer.utils.ml.tr INFO val. loss 0.58671 (improved_xe: 0.586, mse_score: 0.012)\n", - "11:03 madminer.utils.ml.tr INFO Epoch 4: train loss 0.58656 (improved_xe: 0.585, mse_score: 0.011)\n", - "11:03 madminer.utils.ml.tr INFO val. loss 0.58610 (improved_xe: 0.585, mse_score: 0.009)\n", - "11:03 madminer.utils.ml.tr INFO Epoch 5: train loss 0.58618 (improved_xe: 0.585, mse_score: 0.010)\n", - "11:03 madminer.utils.ml.tr INFO val. loss 0.58585 (improved_xe: 0.585, mse_score: 0.008)\n", - "11:03 madminer.utils.ml.tr INFO Epoch 6: train loss 0.58599 (improved_xe: 0.585, mse_score: 0.009)\n", - "11:03 madminer.utils.ml.tr INFO val. loss 0.58575 (improved_xe: 0.585, mse_score: 0.008)\n", - "11:03 madminer.utils.ml.tr INFO Epoch 7: train loss 0.58586 (improved_xe: 0.585, mse_score: 0.008)\n", - "11:03 madminer.utils.ml.tr INFO val. loss 0.58574 (improved_xe: 0.585, mse_score: 0.007)\n", - "11:03 madminer.utils.ml.tr INFO Epoch 8: train loss 0.58579 (improved_xe: 0.585, mse_score: 0.007)\n", - "11:03 madminer.utils.ml.tr INFO val. loss 0.58554 (improved_xe: 0.585, mse_score: 0.007)\n", - "11:03 madminer.utils.ml.tr INFO Epoch 9: train loss 0.58570 (improved_xe: 0.585, mse_score: 0.007)\n", - "11:03 madminer.utils.ml.tr INFO val. loss 0.58557 (improved_xe: 0.585, mse_score: 0.007)\n", - "11:04 madminer.utils.ml.tr INFO Epoch 10: train loss 0.58565 (improved_xe: 0.585, mse_score: 0.007)\n", - "11:04 madminer.utils.ml.tr INFO val. loss 0.58558 (improved_xe: 0.585, mse_score: 0.006)\n", - "11:04 madminer.utils.ml.tr INFO Epoch 11: train loss 0.58560 (improved_xe: 0.585, mse_score: 0.007)\n", - "11:04 madminer.utils.ml.tr INFO val. loss 0.58543 (improved_xe: 0.585, mse_score: 0.006)\n", - "11:04 madminer.utils.ml.tr INFO Epoch 12: train loss 0.58557 (improved_xe: 0.585, mse_score: 0.006)\n", - "11:04 madminer.utils.ml.tr INFO val. loss 0.58547 (improved_xe: 0.585, mse_score: 0.006)\n", - "11:04 madminer.utils.ml.tr INFO Epoch 13: train loss 0.58555 (improved_xe: 0.585, mse_score: 0.006)\n", - "11:04 madminer.utils.ml.tr INFO val. loss 0.58547 (improved_xe: 0.585, mse_score: 0.006)\n", - "11:04 madminer.utils.ml.tr INFO Epoch 14: train loss 0.58552 (improved_xe: 0.585, mse_score: 0.006)\n", - "11:04 madminer.utils.ml.tr INFO val. loss 0.58541 (improved_xe: 0.585, mse_score: 0.006)\n", - "11:04 madminer.utils.ml.tr INFO Epoch 15: train loss 0.58549 (improved_xe: 0.585, mse_score: 0.006)\n", - "11:04 madminer.utils.ml.tr INFO val. loss 0.58536 (improved_xe: 0.585, mse_score: 0.006)\n", - "11:04 madminer.utils.ml.tr INFO Epoch 16: train loss 0.58549 (improved_xe: 0.585, mse_score: 0.006)\n", - "11:04 madminer.utils.ml.tr INFO val. loss 0.58541 (improved_xe: 0.585, mse_score: 0.006)\n", - "11:05 madminer.utils.ml.tr INFO Epoch 17: train loss 0.58547 (improved_xe: 0.585, mse_score: 0.006)\n", - "11:05 madminer.utils.ml.tr INFO val. loss 0.58535 (improved_xe: 0.585, mse_score: 0.006)\n", - "11:05 madminer.utils.ml.tr INFO Epoch 18: train loss 0.58546 (improved_xe: 0.585, mse_score: 0.006)\n", - "11:05 madminer.utils.ml.tr INFO val. loss 0.58534 (improved_xe: 0.585, mse_score: 0.005)\n", - "11:05 madminer.utils.ml.tr INFO Epoch 19: train loss 0.58546 (improved_xe: 0.585, mse_score: 0.006)\n", - "11:05 madminer.utils.ml.tr INFO val. loss 0.58534 (improved_xe: 0.585, mse_score: 0.005)\n", - "11:05 madminer.utils.ml.tr INFO Epoch 20: train loss 0.58544 (improved_xe: 0.585, mse_score: 0.006)\n", - "11:05 madminer.utils.ml.tr INFO val. loss 0.58532 (improved_xe: 0.585, mse_score: 0.006)\n", - "11:05 madminer.utils.ml.tr INFO Early stopping did not improve performance\n" + "11:29 madminer.ml INFO Starting training\n", + "11:29 madminer.ml INFO Method: alices\n", + "11:29 madminer.ml INFO alpha: 0.1\n", + "11:29 madminer.ml INFO Batch size: 200\n", + "11:29 madminer.ml INFO Optimizer: amsgrad\n", + "11:29 madminer.ml INFO Epochs: 20\n", + "11:29 madminer.ml INFO Learning rate: 0.001 initially, decaying to 0.0001\n", + "11:29 madminer.ml INFO Validation split: 0.25\n", + "11:29 madminer.ml INFO Early stopping: True\n", + "11:29 madminer.ml INFO Scale inputs: True\n", + "11:29 madminer.ml INFO Shuffle labels False\n", + "11:29 madminer.ml INFO Samples: all\n", + "11:29 madminer.ml INFO Loading training data\n", + "11:29 madminer.ml INFO Found 100000 samples with 1 parameters and 1 observables\n", + "11:29 madminer.ml INFO Rescaling inputs\n", + "11:29 madminer.ml INFO Creating model\n", + "11:29 madminer.ml INFO Training model\n", + "11:29 madminer.utils.ml.tr INFO Epoch 1: train loss 0.62598 (improved_xe: 0.619, mse_score: 0.074)\n", + "11:29 madminer.utils.ml.tr INFO val. loss 0.59175 (improved_xe: 0.589, mse_score: 0.031)\n", + "11:29 madminer.utils.ml.tr INFO Epoch 2: train loss 0.58943 (improved_xe: 0.587, mse_score: 0.021)\n", + "11:29 madminer.utils.ml.tr INFO val. loss 0.58757 (improved_xe: 0.586, mse_score: 0.017)\n", + "11:29 madminer.utils.ml.tr INFO Epoch 3: train loss 0.58713 (improved_xe: 0.586, mse_score: 0.013)\n", + "11:29 madminer.utils.ml.tr INFO val. loss 0.58621 (improved_xe: 0.585, mse_score: 0.012)\n", + "11:30 madminer.utils.ml.tr INFO Epoch 4: train loss 0.58627 (improved_xe: 0.585, mse_score: 0.010)\n", + "11:30 madminer.utils.ml.tr INFO val. loss 0.58559 (improved_xe: 0.585, mse_score: 0.010)\n", + "11:30 madminer.utils.ml.tr INFO Epoch 5: train loss 0.58587 (improved_xe: 0.585, mse_score: 0.008)\n", + "11:30 madminer.utils.ml.tr INFO val. loss 0.58530 (improved_xe: 0.584, mse_score: 0.009)\n", + "11:30 madminer.utils.ml.tr INFO Epoch 6: train loss 0.58563 (improved_xe: 0.585, mse_score: 0.007)\n", + "11:30 madminer.utils.ml.tr INFO val. loss 0.58508 (improved_xe: 0.584, mse_score: 0.007)\n", + "11:30 madminer.utils.ml.tr INFO Epoch 7: train loss 0.58547 (improved_xe: 0.585, mse_score: 0.007)\n", + "11:30 madminer.utils.ml.tr INFO val. loss 0.58504 (improved_xe: 0.584, mse_score: 0.007)\n", + "11:30 madminer.utils.ml.tr INFO Epoch 8: train loss 0.58538 (improved_xe: 0.585, mse_score: 0.006)\n", + "11:30 madminer.utils.ml.tr INFO val. loss 0.58491 (improved_xe: 0.584, mse_score: 0.007)\n", + "11:30 madminer.utils.ml.tr INFO Epoch 9: train loss 0.58531 (improved_xe: 0.585, mse_score: 0.006)\n", + "11:30 madminer.utils.ml.tr INFO val. loss 0.58486 (improved_xe: 0.584, mse_score: 0.006)\n", + "11:31 madminer.utils.ml.tr INFO Epoch 10: train loss 0.58526 (improved_xe: 0.585, mse_score: 0.006)\n", + "11:31 madminer.utils.ml.tr INFO val. loss 0.58481 (improved_xe: 0.584, mse_score: 0.006)\n", + "11:31 madminer.utils.ml.tr INFO Epoch 11: train loss 0.58522 (improved_xe: 0.585, mse_score: 0.006)\n", + "11:31 madminer.utils.ml.tr INFO val. loss 0.58477 (improved_xe: 0.584, mse_score: 0.006)\n", + "11:31 madminer.utils.ml.tr INFO Epoch 12: train loss 0.58518 (improved_xe: 0.585, mse_score: 0.005)\n", + "11:31 madminer.utils.ml.tr INFO val. loss 0.58482 (improved_xe: 0.584, mse_score: 0.006)\n", + "11:31 madminer.utils.ml.tr INFO Epoch 13: train loss 0.58516 (improved_xe: 0.585, mse_score: 0.005)\n", + "11:31 madminer.utils.ml.tr INFO val. loss 0.58481 (improved_xe: 0.584, mse_score: 0.006)\n", + "11:31 madminer.utils.ml.tr INFO Epoch 14: train loss 0.58513 (improved_xe: 0.585, mse_score: 0.005)\n", + "11:31 madminer.utils.ml.tr INFO val. loss 0.58472 (improved_xe: 0.584, mse_score: 0.006)\n", + "11:31 madminer.utils.ml.tr INFO Epoch 15: train loss 0.58512 (improved_xe: 0.585, mse_score: 0.005)\n", + "11:31 madminer.utils.ml.tr INFO val. loss 0.58473 (improved_xe: 0.584, mse_score: 0.006)\n", + "11:32 madminer.utils.ml.tr INFO Epoch 16: train loss 0.58511 (improved_xe: 0.585, mse_score: 0.005)\n", + "11:32 madminer.utils.ml.tr INFO val. loss 0.58472 (improved_xe: 0.584, mse_score: 0.006)\n", + "11:32 madminer.utils.ml.tr INFO Epoch 17: train loss 0.58509 (improved_xe: 0.585, mse_score: 0.005)\n", + "11:32 madminer.utils.ml.tr INFO val. loss 0.58466 (improved_xe: 0.584, mse_score: 0.006)\n", + "11:32 madminer.utils.ml.tr INFO Epoch 18: train loss 0.58508 (improved_xe: 0.585, mse_score: 0.005)\n", + "11:32 madminer.utils.ml.tr INFO val. loss 0.58468 (improved_xe: 0.584, mse_score: 0.006)\n", + "11:32 madminer.utils.ml.tr INFO Epoch 19: train loss 0.58507 (improved_xe: 0.585, mse_score: 0.005)\n", + "11:32 madminer.utils.ml.tr INFO val. loss 0.58474 (improved_xe: 0.584, mse_score: 0.005)\n", + "11:32 madminer.utils.ml.tr INFO Epoch 20: train loss 0.58506 (improved_xe: 0.585, mse_score: 0.005)\n", + "11:32 madminer.utils.ml.tr INFO val. loss 0.58464 (improved_xe: 0.584, mse_score: 0.006)\n", + "11:32 madminer.utils.ml.tr INFO Early stopping did not improve performance\n" ] } ], "source": [ - "forge = MLForge()\n", + "alices = ParameterizedRatioEstimator(\n", + " n_hidden=(20,20)\n", + ")\n", "\n", - "forge.train(\n", + "alices.train(\n", " method='alices',\n", - " x_filename='data/x_train.npy',\n", - " y_filename='data/y_train.npy',\n", - " theta0_filename='data/theta0_train.npy',\n", - " r_xz_filename='data/r_xz_train.npy',\n", - " t_xz0_filename='data/t_xz_train.npy',\n", + " x='data/x_train.npy',\n", + " y='data/y_train.npy',\n", + " theta='data/theta0_train.npy',\n", + " r_xz='data/r_xz_train.npy',\n", + " t_xz='data/t_xz_train.npy',\n", " alpha=0.1,\n", " n_epochs=20,\n", - " n_hidden=(20,20),\n", ")\n", "\n", - "forge.save('models/alices')" + "alices.save('models/alices')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also try a little bit of mixing and matching -- let's train a model with CARL first and then with ALICES:" ] }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "11:05 madminer.ml INFO Starting training\n", - "11:05 madminer.ml INFO Method: scandal\n", - "11:05 madminer.ml INFO Training data: x at data/x_train_density.npy\n", - "11:05 madminer.ml INFO theta0 at data/theta0_train_density.npy\n", - "11:05 madminer.ml INFO t_xz (theta0) at data/t_xz_train_density.npy\n", - "11:05 madminer.ml INFO Features: all\n", - "11:05 madminer.ml INFO Method: scandal\n", - "11:05 madminer.ml INFO Neural density est.: maf\n", - "11:05 madminer.ml INFO MAF, number MADEs: 3\n", - "11:05 madminer.ml INFO MAF, batch norm: False\n", - "11:05 madminer.ml INFO MAF, BN alpha: 0.1\n", - "11:05 madminer.ml INFO MAF MoG, components: 10\n", - "11:05 madminer.ml INFO Activation function: tanh\n", - "11:05 madminer.ml INFO alpha: 50.0\n", - "11:05 madminer.ml INFO Batch size: 200\n", - "11:05 madminer.ml INFO Optimizer: amsgrad\n", - "11:05 madminer.ml INFO Epochs: 20\n", - "11:05 madminer.ml INFO Learning rate: 0.001 initially, decaying to 0.0001\n", - "11:05 madminer.ml INFO Validation split: 0.25\n", - "11:05 madminer.ml INFO Early stopping: True\n", - "11:05 madminer.ml INFO Scale inputs: True\n", - "11:05 madminer.ml INFO Shuffle labels False\n", - "11:05 madminer.ml INFO Samples: all\n", - "11:05 madminer.ml INFO Loading training data\n", - "11:05 madminer.ml INFO Found 50000 samples with 1 parameters and 1 observables\n", - "11:05 madminer.ml INFO Rescaling inputs\n", - "11:05 madminer.ml INFO Creating model for method scandal\n", - "11:05 madminer.ml INFO Training model\n", - "11:05 madminer.utils.ml.tr INFO Epoch 1: train loss 4.34533 (nll: 1.124, mse_score: 0.064)\n", - "11:05 madminer.utils.ml.tr INFO val. loss 1.18938 (nll: 1.064, mse_score: 0.003)\n", - "11:05 madminer.utils.ml.tr INFO Epoch 2: train loss 1.14191 (nll: 1.071, mse_score: 0.001)\n", - "11:05 madminer.utils.ml.tr INFO val. loss 1.10522 (nll: 1.067, mse_score: 0.001)\n", - "11:05 madminer.utils.ml.tr INFO Epoch 3: train loss 1.09866 (nll: 1.072, mse_score: 0.001)\n", - "11:05 madminer.utils.ml.tr INFO val. loss 1.08518 (nll: 1.068, mse_score: 0.000)\n", - "11:05 madminer.utils.ml.tr INFO Epoch 4: train loss 1.08538 (nll: 1.072, mse_score: 0.000)\n", - "11:05 madminer.utils.ml.tr INFO val. loss 1.07815 (nll: 1.066, mse_score: 0.000)\n", - "11:05 madminer.utils.ml.tr INFO Epoch 5: train loss 1.08161 (nll: 1.071, mse_score: 0.000)\n", - "11:05 madminer.utils.ml.tr INFO val. loss 1.07443 (nll: 1.066, mse_score: 0.000)\n", - "11:05 madminer.utils.ml.tr INFO Epoch 6: train loss 1.07892 (nll: 1.071, mse_score: 0.000)\n", - "11:05 madminer.utils.ml.tr INFO val. loss 1.07287 (nll: 1.066, mse_score: 0.000)\n", - "11:05 madminer.utils.ml.tr INFO Epoch 7: train loss 1.07803 (nll: 1.071, mse_score: 0.000)\n", - "11:05 madminer.utils.ml.tr INFO val. loss 1.07254 (nll: 1.066, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO Epoch 8: train loss 1.07773 (nll: 1.072, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO val. loss 1.07069 (nll: 1.065, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO Epoch 9: train loss 1.07675 (nll: 1.072, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO val. loss 1.07098 (nll: 1.067, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO Epoch 10: train loss 1.07594 (nll: 1.071, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO val. loss 1.07090 (nll: 1.067, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO Epoch 11: train loss 1.07572 (nll: 1.071, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO val. loss 1.06878 (nll: 1.065, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO Epoch 12: train loss 1.07565 (nll: 1.072, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO val. loss 1.06987 (nll: 1.066, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO Epoch 13: train loss 1.07490 (nll: 1.071, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO val. loss 1.06968 (nll: 1.066, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO Epoch 14: train loss 1.07507 (nll: 1.072, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO val. loss 1.07054 (nll: 1.067, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO Epoch 15: train loss 1.07418 (nll: 1.071, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO val. loss 1.06996 (nll: 1.067, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO Epoch 16: train loss 1.07446 (nll: 1.071, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO val. loss 1.06844 (nll: 1.065, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO Epoch 17: train loss 1.07460 (nll: 1.072, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO val. loss 1.06835 (nll: 1.066, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO Epoch 18: train loss 1.07443 (nll: 1.071, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO val. loss 1.06960 (nll: 1.067, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO Epoch 19: train loss 1.07422 (nll: 1.071, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO val. loss 1.06920 (nll: 1.067, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO Epoch 20: train loss 1.07374 (nll: 1.071, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO val. loss 1.06874 (nll: 1.066, mse_score: 0.000)\n", - "11:06 madminer.utils.ml.tr INFO Early stopping did not improve performance\n" + "11:47 madminer.ml INFO Starting training\n", + "11:47 madminer.ml INFO Method: carl\n", + "11:47 madminer.ml INFO Batch size: 200\n", + "11:47 madminer.ml INFO Optimizer: amsgrad\n", + "11:47 madminer.ml INFO Epochs: 10\n", + "11:47 madminer.ml INFO Learning rate: 0.001 initially, decaying to 0.0003\n", + "11:47 madminer.ml INFO Validation split: 0.25\n", + "11:47 madminer.ml INFO Early stopping: True\n", + "11:47 madminer.ml INFO Scale inputs: True\n", + "11:47 madminer.ml INFO Shuffle labels False\n", + "11:47 madminer.ml INFO Samples: all\n", + "11:47 madminer.ml INFO Loading training data\n", + "11:47 madminer.ml INFO Found 100000 samples with 1 parameters and 1 observables\n", + "11:47 madminer.ml INFO Rescaling inputs\n", + "11:47 madminer.ml INFO Creating model\n", + "11:47 madminer.ml INFO Training model\n", + "11:47 madminer.utils.ml.tr INFO Epoch 1: train loss 0.63224 (xe: 0.632)\n", + "11:47 madminer.utils.ml.tr INFO val. loss 0.59488 (xe: 0.595)\n", + "11:47 madminer.utils.ml.tr INFO Epoch 2: train loss 0.59448 (xe: 0.594)\n", + "11:47 madminer.utils.ml.tr INFO val. loss 0.59229 (xe: 0.592)\n", + "11:47 madminer.utils.ml.tr INFO Epoch 3: train loss 0.59231 (xe: 0.592)\n", + "11:47 madminer.utils.ml.tr INFO val. loss 0.59073 (xe: 0.591)\n", + "11:48 madminer.utils.ml.tr INFO Epoch 4: train loss 0.59127 (xe: 0.591)\n", + "11:48 madminer.utils.ml.tr INFO val. loss 0.59010 (xe: 0.590)\n", + "11:48 madminer.utils.ml.tr INFO Epoch 5: train loss 0.59012 (xe: 0.590)\n", + "11:48 madminer.utils.ml.tr INFO val. loss 0.58883 (xe: 0.589)\n", + "11:48 madminer.utils.ml.tr INFO Epoch 6: train loss 0.58952 (xe: 0.590)\n", + "11:48 madminer.utils.ml.tr INFO val. loss 0.58939 (xe: 0.589)\n", + "11:48 madminer.utils.ml.tr INFO Epoch 7: train loss 0.58919 (xe: 0.589)\n", + "11:48 madminer.utils.ml.tr INFO val. loss 0.58817 (xe: 0.588)\n", + "11:48 madminer.utils.ml.tr INFO Epoch 8: train loss 0.58878 (xe: 0.589)\n", + "11:48 madminer.utils.ml.tr INFO val. loss 0.58800 (xe: 0.588)\n", + "11:48 madminer.utils.ml.tr INFO Epoch 9: train loss 0.58860 (xe: 0.589)\n", + "11:48 madminer.utils.ml.tr INFO val. loss 0.58770 (xe: 0.588)\n", + "11:48 madminer.utils.ml.tr INFO Epoch 10: train loss 0.58839 (xe: 0.588)\n", + "11:48 madminer.utils.ml.tr INFO val. loss 0.58745 (xe: 0.587)\n", + "11:48 madminer.utils.ml.tr INFO Early stopping did not improve performance\n", + "11:48 madminer.ml INFO Starting training\n", + "11:48 madminer.ml INFO Method: alices\n", + "11:48 madminer.ml INFO alpha: 0.1\n", + "11:48 madminer.ml INFO Batch size: 200\n", + "11:48 madminer.ml INFO Optimizer: amsgrad\n", + "11:48 madminer.ml INFO Epochs: 10\n", + "11:48 madminer.ml INFO Learning rate: 0.0003 initially, decaying to 0.0001\n", + "11:48 madminer.ml INFO Validation split: 0.25\n", + "11:48 madminer.ml INFO Early stopping: True\n", + "11:48 madminer.ml INFO Scale inputs: True\n", + "11:48 madminer.ml INFO Shuffle labels False\n", + "11:48 madminer.ml INFO Samples: all\n", + "11:48 madminer.ml INFO Loading training data\n", + "11:48 madminer.ml INFO Found 100000 samples with 1 parameters and 1 observables\n", + "11:48 madminer.ml INFO Rescaling inputs\n", + "11:48 madminer.ml INFO Training model\n", + "11:48 madminer.utils.ml.tr INFO Epoch 1: train loss 0.58869 (improved_xe: 0.586, mse_score: 0.024)\n", + "11:48 madminer.utils.ml.tr INFO val. loss 0.58817 (improved_xe: 0.586, mse_score: 0.020)\n", + "11:49 madminer.utils.ml.tr INFO Epoch 2: train loss 0.58764 (improved_xe: 0.586, mse_score: 0.018)\n", + "11:49 madminer.utils.ml.tr INFO val. loss 0.58755 (improved_xe: 0.586, mse_score: 0.016)\n", + "11:49 madminer.utils.ml.tr INFO Epoch 3: train loss 0.58709 (improved_xe: 0.586, mse_score: 0.015)\n", + "11:49 madminer.utils.ml.tr INFO val. loss 0.58707 (improved_xe: 0.586, mse_score: 0.014)\n", + "11:49 madminer.utils.ml.tr INFO Epoch 4: train loss 0.58675 (improved_xe: 0.585, mse_score: 0.014)\n", + "11:49 madminer.utils.ml.tr INFO val. loss 0.58673 (improved_xe: 0.585, mse_score: 0.013)\n", + "11:49 madminer.utils.ml.tr INFO Epoch 5: train loss 0.58650 (improved_xe: 0.585, mse_score: 0.013)\n", + "11:49 madminer.utils.ml.tr INFO val. loss 0.58653 (improved_xe: 0.585, mse_score: 0.012)\n", + "11:49 madminer.utils.ml.tr INFO Epoch 6: train loss 0.58634 (improved_xe: 0.585, mse_score: 0.012)\n", + "11:49 madminer.utils.ml.tr INFO val. loss 0.58642 (improved_xe: 0.585, mse_score: 0.011)\n", + "11:49 madminer.utils.ml.tr INFO Epoch 7: train loss 0.58621 (improved_xe: 0.585, mse_score: 0.011)\n", + "11:49 madminer.utils.ml.tr INFO val. loss 0.58635 (improved_xe: 0.585, mse_score: 0.011)\n", + "11:49 madminer.utils.ml.tr INFO Epoch 8: train loss 0.58610 (improved_xe: 0.585, mse_score: 0.011)\n", + "11:49 madminer.utils.ml.tr INFO val. loss 0.58618 (improved_xe: 0.585, mse_score: 0.011)\n", + "11:49 madminer.utils.ml.tr INFO Epoch 9: train loss 0.58601 (improved_xe: 0.585, mse_score: 0.011)\n", + "11:49 madminer.utils.ml.tr INFO val. loss 0.58612 (improved_xe: 0.585, mse_score: 0.010)\n", + "11:50 madminer.utils.ml.tr INFO Epoch 10: train loss 0.58595 (improved_xe: 0.585, mse_score: 0.010)\n", + "11:50 madminer.utils.ml.tr INFO val. loss 0.58605 (improved_xe: 0.585, mse_score: 0.010)\n", + "11:50 madminer.utils.ml.tr INFO Early stopping did not improve performance\n" ] } ], "source": [ - "forge = MLForge()\n", + "mix = ParameterizedRatioEstimator(\n", + " n_hidden=(20,20)\n", + ")\n", "\n", - "forge.train(\n", - " method='scandal',\n", - " x_filename='data/x_train_density.npy',\n", - " theta0_filename='data/theta0_train_density.npy',\n", - " t_xz0_filename='data/t_xz_train_density.npy',\n", - " alpha=50.,\n", - " n_epochs=20,\n", - " n_hidden=(20,20),\n", - " nde_type=\"maf\"\n", + "mix.train(\n", + " method='carl',\n", + " x='data/x_train.npy',\n", + " y='data/y_train.npy',\n", + " theta='data/theta0_train.npy',\n", + " n_epochs=10,\n", + " initial_lr=0.001,\n", + " final_lr=0.0003,\n", ")\n", "\n", - "forge.save('models/scandal')" + "mix.train(\n", + " method='alices',\n", + " x='data/x_train.npy',\n", + " y='data/y_train.npy',\n", + " theta='data/theta0_train.npy',\n", + " r_xz='data/r_xz_train.npy',\n", + " t_xz='data/t_xz_train.npy',\n", + " alpha=0.1,\n", + " n_epochs=10,\n", + " initial_lr=0.0003,\n", + " final_lr=0.0001,\n", + ")\n", + "\n", + "mix.save('models/mix')" ] }, { @@ -622,7 +622,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -639,7 +639,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -656,13 +656,12 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "theta_grid = np.linspace(-5.,5.,100).reshape(-1, 1)\n", - "np.save('data/theta_grid.npy', theta_grid)\n", - "np.save('data/theta1.npy', np.zeros((1,1)))" + "np.save('data/theta_grid.npy', theta_grid)" ] }, { @@ -674,7 +673,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -701,15 +700,15 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ - "forge = MLForge()\n", - "forge.load('models/carl')\n", + "carl = ParameterizedRatioEstimator()\n", + "carl.load('models/carl')\n", "\n", - "log_r, _, _ = forge.evaluate(\n", - " theta0_filename='data/theta_grid.npy',\n", + "log_r, _ = carl.evaluate(\n", + " theta='data/theta_grid.npy',\n", " x='data/x_test.npy',\n", " evaluate_score=False\n", ")\n", @@ -719,15 +718,15 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ - "forge = MLForge()\n", - "forge.load('models/alices')\n", + "alices = ParameterizedRatioEstimator()\n", + "alices.load('models/alices')\n", "\n", - "log_r, _, _ = forge.evaluate(\n", - " theta0_filename='data/theta_grid.npy',\n", + "log_r, _ = alices.evaluate(\n", + " theta='data/theta_grid.npy',\n", " x='data/x_test.npy',\n", " evaluate_score=False\n", ")\n", @@ -737,26 +736,20 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ - "forge = MLForge()\n", - "forge.load('models/scandal')\n", + "mix = ParameterizedRatioEstimator()\n", + "mix.load('models/mix')\n", "\n", - "log_p0, _ = forge.evaluate(\n", - " theta0_filename='data/theta_grid.npy',\n", - " x='data/x_test.npy',\n", - " evaluate_score=False\n", - ")\n", - "log_p1, _ = forge.evaluate(\n", - " theta0_filename='data/theta1.npy',\n", + "log_r, _ = mix.evaluate(\n", + " theta='data/theta_grid.npy',\n", " x='data/x_test.npy',\n", " evaluate_score=False\n", ")\n", - "log_r = log_p0 - log_p1\n", "\n", - "nllr_test_scandal = -2. * np.mean(log_r, axis=1)" + "nllr_test_mix = -2. * np.mean(log_r, axis=1)" ] }, { @@ -768,12 +761,12 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 22, "metadata": {}, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAFgCAYAAACFYaNMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAAIABJREFUeJzs3XdcVtUfwPHPYQkibpzgwIEKIgpuRc2RuWeOysyVqak5cqdpjtymNjQr9yq1rLTcoP6coOYeqAkOEByAbM7vj0cI5AEekGcA5/16Pa/g3nPO/eLLvl7uPed8hZQSRVEUxfDMjB2AoihKXqUSsKIoipGoBKwoimIkKgEriqIYiUrAiqIoRqISsKIoipGoBKwoimIkKgEriqIYiUrAiqIoRmJh7ADSU7x4cVmhQgVjh6EoipIpZ8+efSyltM+onUkn4AoVKnDmzBljh6EoipIpQoi7urRTjyAURVGMRCVgRVEUI1EJWFEUxUhM+hmwougiNjaWgIAAoqKijB2KksdYW1vj4OCApaVllvqrBKzkeAEBAdjZ2VGhQgWEEMYOR8kjpJSEhIQQEBBAxYoVszSGegSh5HhRUVEUK1ZMJV/FoIQQFCtW7LV+8zJYAhZCOAshziX7PBdCjDbU9ZXcTSVfxRhe9++dwR5BSCmvAe4AQghzIBDYaajrK4qimBpjPYJoCdySUuo0WVlRFCU3MlYC7g1sNtK1FUVRTILBE7AQwgroBGxP4/wQIcQZIcSZ4OBgwwanKK/h0aNH9O3bFycnJzw8PGjYsCE7dxr2KduMGTNYuHBhquNPnz7l66+/zvR4r/a7c+cOrq6uGfaLjIykWbNmxMfHAxAfH8+oUaNwcXGhZs2a+Pv7ZzqWRAMGDKBEiRLpxrF3716cnZ2pXLky8+bNSzoeExODl5cXcXFxWb5+djLGHfBbgK+U8pG2k1LKVVJKTymlp719hntZpBAaGsrKlSsJCAjIjjgVRWdSSrp06YKXlxf+/v6cPXuWLVu2pPq7KKUkISHB4PGll4DTiymrifuHH36gW7dumJubAzB37lycnJy4dOkSI0eOzNKYifr378/evXvTPB8fH8/w4cPZs2cPly9fZvPmzVy+fBkAKysrWrZsydatW7N8/exkjATcBz09fggODmbEiBHs2rVLH8MrOUTz5s1TfRL/h3/x4oXW8z/99BMAjx8/TnVOFwcPHsTKyoqhQ4cmHStfvjwff/wxd+7coXr16gwbNow6depw7949Fi9ejKurK66urixduhRIfXe5cOFCZsyYkXSuevXqDB48GBcXF9q0aUNkZCQAs2fPxtnZmVatWnHt2jWt8U2cOJFbt27h7u7O+PHjU8Xk4+Oj9dqv9gNNgtMWR3IbN26kc+fOAERERLBz505GjRoFQMWKFbl586ZOf67aeHl5UbRo0TTPnzp1isqVK+Pk5ISVlRW9e/fm119/TTrfpUsXNm7cmOXrZyeDJmAhRH6gNbBDH+M7OztTtWpVdu/erY/hFSVNly5dok6dOmmev3btGv369cPPz4/Hjx/z448/cvLkSU6cOMHq1avx8/PL8Bo3btxg+PDhXLp0icKFC/PLL78k3Wn7+fmxY8cOTp8+rbXvvHnzqFSpEufOnWPBggWpYipfvrzO/bTFkVxMTAz+/v4kbiW7f/9+7t27h7u7O+7u7gwYMCBVAm3atGnS+eSf/fv3Z/jn8qrAwEAcHR2TvndwcCAwMDDpe1dX1zT/nAzNoCvhpJQvgGL6vEanTp1YtmwZz58/p2DBgvq8lGKiDh8+nOa5/Pnzp3u+ePHi6Z7X1fDhwzl69ChWVlZs376d8uXL06BBAwCOHj1K165dsbW1BaBbt274+PjQqVOndMesWLEi7u7uAHh4eHDnzh0eP35M165dyZ8/P0CGYySXPKbM0BZHco8fP6Zw4cJJ3587d46ZM2cm/XYwaNAg3NzcUvTx8fHJdBxpkVKmOpZ8vq65uTlWVlaEhYVhZ2eXbdfNily3Eq5jx47Exsby999/GzsUJQ9xcXHB19c36fuVK1dy4MABEl8kJyZb0J4gACwsLFI8i311hVW+fPmSvjY3N096kZTVxQDJY8ro2rrEkcjGxiZF/ydPniT9AxEXF8fff/9Nx44dU/TJzjtgBwcH7t27l/R9QEAAZcqUSdEmOjoaa2vrTI+d3XJdAm7UqBHFixfn0qVLxg5FyUPeeOMNoqKi+Oabb5KOvXjxQmtbLy8vdu3axYsXL5KejzZt2pSSJUsSFBRESEgI0dHR/P777xle18vLi507dxIZGUlYWFiaj9/s7OwICwtLc5y0rp1RP22KFClCfHx8UhKuWrUqJ06cAGDJkiW0b98+1d4JPj4+nDt3LtWnVatWmbo2QN26dblx4wa3b98mJiaGLVu2pPjNICQkBHt7+yxvoJOdcl0CtrCw4M6dO0yfPt3YoSh5iBCCXbt2ceTIESpWrEi9evV4//33+fLLL1O1rVOnDv3796devXrUr1+fQYMGUbt2bSwtLfnss8+oX78+HTp0oFq1ahlet06dOvTq1Qt3d3e6d+9O06ZNtbYrVqwYjRs3xtXVNellWnJpXTujfmlp06YNR48eBaBPnz74+vpSuXJlLly4wOLFi3UeR5s+ffrQsGFDrl27hoODA2vWrAGgXbt23L9/HwsLC1asWMGbb75J9erVefvtt3FxcUnqf+jQIdq1a/daMWQXkdavQ6bA09NTqpJESkauXLlC9erVjR2Gkoyfnx+LFy9m/fr1xg4llW7dujF37lycnZ2zZTxtf/+EEGellJ4Z9c11d8CgecbWqVMnpk6dauxQFCVPql27Ni1atEhaiGEqYmJi6NKlS7Yl39eVKxOwEIKoqCi2b9e62E5RFAMYMGBA0kIMU2FlZUW/fv2MHUaSXJmAQTMd5/r162lOTFcURTG2XJuAE1fhJF8BoyiKYkpybQJ2dHSkTp06KgErimKycnVNuI8//phHjx4hpVQVExRFMTm5OgH379/f2CEoiqKkKdc+gkgUHh7O8ePHjR2GoihKKrk+AU+dOpVWrVqluSxUUXKjf/75h1KlSnHx4kVjh6KkI9cn4I4dOxIZGak251HylDlz5nD8+HHmzJlj7FCUdOT6BOzl5UXhwoXVJu1KnrJ582acnJzYtGmTsUNR0pHrE7ClpSXt27fn999/N5k6UIqiKJAHEjBoSpCEhIQkbYmnKPrw8OFDevfuTaVKlahRowbt2rXj+vXrAOzcuRMhBFevXk3Rx9zcHHd3d1xdXenYsSNPnz5NOlegQIEMr6mv4pdpFbV81bJly3B1dcXFxSWptBJoasn16NGDatWqUb16df73v/+lO7apFcs0GCmlyX48PDxkdggPD5fnz5+XCQkJ2TKeYlouX75s7BBkQkKCbNCggfzmm2+Sjvn5+Ulvb28ppZQ9e/aUTZo0kdOnT0/Rz9bWNunrfv36yS+++ELrubSsWLFCLl26NOn7WbNmJX2/atUqOXbs2Ez/LHFxcdLJyUneunVLRkdHSzc3N3np0qVU7f755x/p4uIiIyIiZGxsrGzZsqW8fv160s+yevVqKaWU0dHR8smTJxmOPWPGDLlhw4ZMx2ts2v7+AWekDjkuT9wB29ra4ubmphZjKHpz6NAhLC0tUxTldHd3p2nTpoSHh3Ps2DHWrFnDli1b0hyjYcOGKWqX6UIfxS8zKmqZ6MqVKzRo0ID8+fNjYWFBs2bN2LlzJ8+fP8fb25uBAwcCmg1wEksUpTe2KRXLNJRcvRAjubt37zJjxgzGjBlDzZo1jR2Ooief777E5fvPs3XMGmUKMr2jS7ptLl68iIeHh9Zzu3btom3btlStWpWiRYvi6+ubqoBnfHw8Bw4cSEpaukiv+CVAaGhoqooSTZs21VrhYuHChUlttRW1PHnyZKo+rq6uTJkyhZCQEGxsbPjzzz/x9PTE398fe3t7PvjgA86fP4+HhwfLli3D1tY23bFNqVimoeSJO2DQ1Klau3ZtqgquiqJvmzdvpnfv3gD07t2bzZs3J52LjIzE3d2dYsWKERoaSuvWrXUeN63il4nlfNq0aZOUjBPpUvpHZlDUMlH16tWZMGECrVu3pm3bttSqVQsLCwvi4uLw9fXlo48+ws/PD1tb26RnvemNnbxYZl6RZ+6AS5QoQePGjdm5cyczZswwdjiKnmR0p6ovLi4u/Pzzz6mOh4SEcPDgQS5evIgQgvj4eIQQzJ8/HyEENjY2nDt3jmfPntGhQwdWrlzJyJEjdbqmtuKXibXWEotfTpkyJUUfXe6AdSlqmWjgwIFJd+2TJ0/GwcEh6VO/fn0AevTokZSAMxrbVIplGowuD4qN9cmul3CJFi1aJAF569atbB1XMS5TeQlXr149uWrVqqRjp06dkjNmzJBDhgxJ0dbLyyvp5VzyF22+vr7S0dFRxsTEpDqXFgcHBxkZGSmllHLlypXyo48+klJKOX/+fDl06NAs/SyxsbGyYsWK0t/fP+lF2cWLF7W2ffTokZRSyrt370pnZ2cZGhoqpZSySZMm8urVq1JKKadPny7HjRuX4diPHz+W1apVy1LMxvQ6L+GMnmTT+2R3Ar5165YE5KJFi7J1XMW4TCEBSyllYGCg7Nmzp3RycpI1atSQ7dq1k2XKlJF79uxJ0W7ZsmVJyfHVJNuhQwe5bt06KaWUQghZtmzZpI+2v7cDBgyQ+/btk1JKGRoaKuvXry8rVaok3333XfnixYss/yx//PGHrFKlinRyckoxM0NKKd966y0ZGBgopdQk2urVq0s3Nze5f//+pDZ+fn7Sw8ND1qxZU3bu3DkpMac39vbt2+WYMWOyHLOxqAScCe3bt5dfffVVto+rGI+pJGBj8PX1le+++66xw8gWXbt2TbprzkleJwHnmWfAiX7//Xdjh6Ao2SZ58UtTq7+WGaZWLNNQ8swsiOTi4+MJDg42dhiKki1MsfhlZplasUxDyXN3wADNmjWjQIEC7N2719ihKIqSh+XJO+BGjRpx4MCBFOvuFUVRDC1PJuBu3boRFxfHH3/8YexQFEXJwwyagIUQhYUQPwshrgohrgghGhry+onq1atHmTJl2LFjhzEuryiKAhj+DngZsFdKWQ2oBVwx8PUBMDMzo0uXLuzZs0eVKlIUxWgM9hJOCFEQ8AL6A0gpY4AYQ13/VcOHD6dz585YWVkZKwRFUfI4Q86CcAKCgR+FELWAs8AoKWVE8kZCiCHAEIBy5crpLZgaNWpQo0YNvY2vKIqSEUM+grAA6gDfSClrAxHAxFcbSSlXSSk9pZSe9vb2eg3o5s2bTJ06lejoaL1eR1FMjaqabBoMmYADgAApZeLGoj+jSchGc/36dWbPns3+/fuNGYaiGJyqmmwaDJaApZQPgXtCiMS1hi2By4a6vjYtW7akUKFCWrcRVJTcTFVNNg2GXgn3MbBRCGEF+AMfGPj6KeTLl4+OHTvy66+/Ehsbi6WlpTHDURQljzHoNDQp5bmXz3fdpJRdpJRPsv0iESGZat69e3eePHnC4cOHsz0UJe/RVv04rerGyY+nV1E5sXJy4idxc/PZs2fj4uKCm5sb7u7uWssGQc6rnJzWuLmycrIuW6YZ65Pp7SjPrpPyi1JSPvlX5y4vXryQpUqVSrGRtpKzmNJ2lNqqH6e1sXri8YwqKmvrf/z4cdmgQQMZFRUlpZQyODg4aY/eV+WkyskZjWuKlZNVVeRETs0hIR4Ozda5i42NDQEBAQwePFhvYSl5g67Vj1+VXkXltDx48IDixYuTL18+AIoXL55m2aCcVDk5o3FzW+Xk3LUbWmFHqP8hHF8ODYdDKd2qHydu5aeeA+cCeybCw3+yd8xSNeGttH/FTqRL9WNt0quoDP8V7kw0adIk2rdvz8yZM6latSqtWrWiV69eNGvWLFXfnFY5OaNxc1vl5NyVgAGajgHfdbBvOryn214PUkoaNWqEh4cHK1as0HOASm61efNmRo8eDfxX/ViXBJyRxMKdrzp79iw+Pj4cOnSIXr16MW/ePPr375+iTVqVkxPvtgcNGoSbm1uKPj4+PhnGpPktO6WMKicXKFAgVeXk5cuXU79+fUaNGsW8efNSxfLquMkrJ9vZ2WUYp6nLfQnYpgh4jYe/p8CtQ1CpRYZdhBCULVuWX375hWXLluX4za3zNB3uVPUhverHGUmronJGzM3Nad68Oc2bN6dmzZqsXbs2VQLOaZWT27Vrl+G4ualycu56Bpyo3mAoVA72fQYJCTp16dmzJw8fPuTYsWN6Dk7JjX7++Wf69evH3bt3uXPnDvfu3aNixYocPXo0w75vvPEG0dHRrF69OunY6dOnOXLkSJp9rl27xo0bN5K+P3fuHOXLl0/VrkiRIsTHxycl4apVq3LixAkAlixZQvv27ZMSciIfHx/OnTuX6pP8UUXdunW5ceMGt2/fJiYmhi1bttCpUyetsQYFBQHw77//smPHDvr06UOpUqVwdHTk2rVrABw4cIAaNWpkOG5ISAj29va551GhLm/qjPV5raKc57ZIOb2glBe269Q8LCxMWltbyxEjRmT9mopRmMIsiGbNmqVZ/Tit6sbJZzdoq6icOFvAzMxM1qpVK+kzYcIEeebMGdmwYUNZvXp1WbNmTdm1a1cZHBysNTZDV05OXjVZysxXTk6vIrMpVk5WVZG1iY+X8utGUi6tJWVstE5dunbtKkuXLi3j4+Ozfl3F4EwhAZsyVTlZv9Q0NG3MzKDldHhyG3zX6tRl9OjRzJs3L2nCuqLkBskrJ+dkubFyspBa3maaCk9PT3nmzJmsDyAl/NQeHt+AUefAyjb7glNMxpUrV6hevbqxw1DyKG1//4QQZ6WUnhn1zb13wABCQKvPISIITnytU5cHDx7w9ddfk6DjyztFUZSsyt0JGMCxLji3h2NfwYvQDJsfPnyY4cOHq9kQiqLoXe5PwABvTIXoMDi2LMOmHTp0wNramm3bthkgMEVR8rK8kYBL1gC3t+HkdxD2MN2mdnZ2tG/fnp9//jnHv7RQFMW05Y0EDNB8IiTEgveCDJv26tWLhw8f6rQkU1EUJavyTgIu6gR13oezP0Ho7XSbtmvXDjs7O3x9fQ0Tm6IoeVLeScCg2SPCzAIOp79fgK2tLYGBgYwZM8ZAgSmKkhflrQRcsLRmn4h/tkHwtXSbJu60ZMrzpBUlp1HVmFPKWwkYoPFosMwPh+dm2LRHjx4MHz7cAEEpSt6gqjGnlPcSsG1xaPARXNqZ4cbd1tbWbN26lZiYGAMFpyi5m6rGnFLeS8AADUeAdSE4mH7pot69exMaGsr+/fsNFJiiKHlJ3kzANoWh0cdwfQ8EpL3XRJs2bShcuHCm6nspeVd6lY21VUuG/yoeu7q60rFjR54+fZp0Lq1qyq9Ka+y8XI05rUrMYGLVmHXZMs1Yn9fajjIjUWFSfllRynVd0202cOBAaWdn91r7pir6ZQrbUWZU2VhbtWQpU+4J3K9fvxT736ZVTflVuoyt7XhurcacXiVmKbO/GrPajjIr8hWAxqPg1gH490SazQYPHsy0adP086+fkmukV9lY12rJDRs2JDAwMFPXzWol5oxiTktOqMacViVmwOSqMefdBAxQdxDY2sOhtN/I1q9fn/Hjx+eKAoCK/qRX2VhbteRXxcfHc+DAgTTL+qRFl7GzEjP8V4058bN161batGnDvXv3qFq1KsOGDUuzbFJ61Zjd3d0ZMGAARYsWTdGnadOmKa6X+En+DkZb1eRX/9FydXXF29ubkJAQXrx4wZ9//plUZy55NebatWszaNAgIiIiMhxXX9WYc19RzsywsoUmn8Bfk+HOUajQRGuziIgIfvvtNzp06KASsYn78tSXXA29mnHDTKhWtBoT6k3Icv/0qiUnJrk7d+7g4eFB69ats23s15VTqzGnVYkZMLlqzHn7DhjAcwAUKKW5C05j0YWfnx99+/ZN9auOoiRycXHh7NmzqY4nVkseNGgQFSpUYMGCBWzdujUpkSQmubt37xITE8PKlSvTvc7KlSuT7gwDAwPTHTurMWcksRrz559/zooVK/jll19StdFWjTl//vzAf9WYO3bsmKKPLnfAulZjHjhwIL6+vnh7e1O0aFGqVKmS1P/Vasy+vr46jauXasy6PCg21kevL+GSO/GtpoDnrcNaT8fHx0tHR0fZrl07w8SjZIqpvISrV6+eXLVqVdKxU6dOyRkzZsghQ4akaOvl5aX1RZevr690dHSUMTExqc5p8+233+o8dnLJX8Jpi/nw4cNp9r969WqKF1pTpkyRw4cP13odBwcHGRkZKaWUcuXKlfKjjz6SUko5f/58OXTo0HR/trTExsbKihUrSn9//6SXZRcvXkzV7tGjR1JKKe/evSudnZ2TCn5KqSkSmlhXbvr06XLcuHEZjvv48WNZrVo1rTGpopyvKyZSyoXOUq5pK2VCgtYmn376qbSwsEiz8qxiPKaQgKXUXtm4TJkyaVZLljJ1kuvQoYNct26dlFKmWU05UXqVmNPrnxeqMadViVnK7K/GnGMSMHAH+Ac4p0uABkvAUkp54ruXd8GHtJ728/OTQIopO4ppMJUErKSUV6ox57RpaC2klO5Sh4J1BlWnH9iV0eyUJlM/Q6tVqxbVq1fn+PHjRghOUXIeVY05Y3l7FkRyltbQdAz8OQ5uHwGn5ilOCyHw8fFJNXVGUZS0DRgwwNghvDYrKyv69eunl7ENfQcsgb+FEGeFEEO0NRBCDBFCnBFCnAkODjZsdIl3wYfmar0LLlasGEKIxMcpiqIor8XQCbixlLIO8BYwXAjh9WoDKeUqKaWnlNLT3t7esNFZ5NPcBd87obkL1mLOnDk0b97csHEpipIrGTQBSynvv/xvELATqGfI6+uk9ntgVxoOf6n1LrhAgQJ4e3tz+fJlIwSnKEpuYrAELISwFULYJX4NtAGydVv8hxEPmX96PnEJr7Fvg6W1ZnXcv8c1q+Ne0atXL8zNzfWyLlzJOvVYSDGG1/17Z8g74JLAUSHEeeAU8IeUcm92XuDkg5Osv7yeOSfnvN4fTJ33NavjjnyZ6lTJkiVp3bo1mzZtIiEh4TWiVbKLtbU1ISEhKgkrBiWlJCQk5LVWxxlsFoSU0h+opc9rdK7cGf9n/vxw8QfKFijLwJoDszaQpTU0GQ17J8KdY1ChcYrT77zzDu+99x7Hjx+nSRPt+0cohuPg4EBAQAAGf2mr5HnW1tY4ODhkuX+um4Y2qs4oHoQ/YKnvUsoUKMNbFd/K2kAe/eHoEjgyDyrsTnGqS5cujBkzhtKlS79+wMprs7S0pGLFisYOQ1EyLddtxmMmzJjVZBZ1StRh6tGpnAtKvZuTTixtoNFIuO0N/6bc8b9AgQIsWrSISpUqZUPEiqLkVbkuAQPkM8/HshbLKGlbklGHRnE//H7WBvL8APIXA+/5qU4lJCRw6NChTO2/qiiKklyuTMAAha0Ls6LlCmLjYxlxcAQRsRGZH8TKVlM77uZ+CEi5bV9CQgK9evVi7tyMy9sriqJok2sTMIBTIScWNluI/1N/JnpPJEFmYdZC3UFgUwS8F6Q4bGFhQZ8+fdi9e3eKQoqKoii6ytUJGKBR2UaMrzuewwGHWXku/c2utcpnBw2GayooPzif4tS7775LdHQ0P//8czZFqyhKXpLrEzBA32p96ValG6surOKvO39lfoD6QyBfIfBemOKwp6cn1apVY926ddkUqaIoeUmeSMBCCKbUn0It+1pMOzaNa6HXMjeAdSGo/yFc+Q2CrqQYt1+/fly/fp3nz59nc9SKouR2eSIBA1iZW7G0xVLsLO0YfWg0z6KfZW6ABh+BpS34LEpxeOTIkdy7d4+CBQtmY7SKouQFeSYBAxS3Kc7iFot5+OIhk3wmZe6lXP6iUHcgXPwFQm4lHba1tcXS0pKEhAS1FFZRlEzJUwkYoJZ9LSbWnYhPoA/fnv82c50bjgBzK/BZnOLw2bNnqVixIidPnkyjo6IoSmoZJmAhRFEdPoUNEWx2edv5bTpV6sQ357/BO8Bb9452JTVLlC9sgSd3kw5XqVKFoKAg9TJOUZRM0eUO+D5wBjibzueCvgLUByEE0xpMw7mIM5OPTuZB+APdOzcaCQg4/lXSoYIFC9K1a1e2bNlCdHR09gesKEqupEsCviKldJJSVkzrA4ToO9DsZm1hzaLmi4hLiGPckXHExsfq1rFQWXDvC77rIexh0uH333+fJ0+esHv37nQ6K4qi/EeXBNwwm9qYnPIFyzOr8SwuPL7A4rOLM+6QqMloSIiF48uTDrVq1YoyZcqwdu1aPUSqKEpulGECllJGJf/+ZWUL8/Ta5CSty7fm3ervsuHKBg7cPaBbp6JO4NoDzvwIEZqbf3NzcxYuXMiwYcP0GK2iKLmJLi/hzIQQfYUQfwghgoCrwAMhxCUhxAIhRBX9h6lfYzzG4FLMhWnHpxEYHqhbp6ZjITYCTn6TdKhPnz689VYW9x9WFCXP0eURxCGgEjAJKCWldJRSlgCaAieAeUKId/UYo95ZmluyoNkCpJR86v0psQk6PA8uUQ2qd4STqyDqv0Ud/v7+LF68WM0JVhQlQ7ok4FZSyllSygtS/rdyQUoZKqX8RUrZHdiqvxANw9HOkemNpnMh+ALL/ZZn3AE0d8HRz+D090mH/vrrL8aOHYufn5+eIlUUJbfQ5RlwhreDurTJCdpWaEvPqj358eKPHL9/POMOZWpD5Vbwv68h5gUAvXv3xsrKSr2MUxQlQ6+9Ek4IMSE7AjEV4+uOp1KhSkw5OoXQqNCMOzQdBy8eg69mEUaRIkXo0qULGzduVHOCFUVJV6YTsBBiW7LPdmCQHuIyGhsLG770+pLn0c+Zdmxaxs9yyzeEco00CzPiYgD44IMPCAkJUXOCFUVJV1bugJ9LKd9++ekJ7M/uoIzNuagzYzzH4B3gzaarmzLu4DUWngdqligDrVu3plKlSty+fVvPkSqKkpOJzL6tF0JUlFLeTvZ9USmlDr+rZ56np6c8c+aMPobOkJSS4QeGc+rhKba030LlIpXTawyrW2hmQww/DeYWxMXFYWFhYbiAFUUxGUKIs1JKz4za6TIPuMIwBAmDAAAgAElEQVTL+b47hBDfA+2FEOUTz+sr+RqbEIKZjWdia2nLpKOTiImPSa+xZkZEqD9c3gWQlHyfPcvkvsOKouQZujyC+BXN4ouVQGugFuAthFgphMinz+CMrbhNcWY0nMHV0KusOLci/cbO7aG4s2arype/VYwYMYK6deuqOcGKomilSwI2l1KukVIeAEKllIPRLMy4A6zSZ3CmoEW5FvSo2oOfLv7E6Yen025oZgZNx0DQJbiuqTvn6enJjRs3OHbsmIGiVRQlJ9ElAe8XQox4+bUEkFLGSSkXkEM34cms8Z7jcbRzZOrRqYTHhKfd0LU7FC4HPgtBSnr27ImdnR1r1qwxXLCKouQYuiTgMUAhIcQZoIwQYogQ4l0hxEpy4DaUWZHfMj+zm8zm4YuHLDizIO2G5pbQeBQEnIY7Ptja2tK7d2+2bdumngUripKKLivhEqSUswEvYAhQCvAALgJ5ZucZ9xLuDHAdwI4bOzh873A6Dd+FAiWTStgPGjSIFy9esGXLFsMEqihKjpHpaWivfUHNVpZngEApZYf02hpzGpo2sfGx9PmjD8GRwezqvIsi1kW0Nzy2DPZ9BoMOIsvWYdOmTXTo0IFChQoZNmBFUYwi26ah6cEo4IoRrvvaLM0tmd1kNs9jnjPn5Jy0G3oOAOtCcHQxQgjeeecdlXwVRUklywlYCFE6s9PQhBAOQHvg+4zamirnos58VOsj9t7Zy993/tbeKJ8d1B8KV3+HoKsArFmzhqVLlxowUkVRTN3r3AGvB64KIRZmos9S4FMgIa0GL1/ynRFCnAkODn6N8PRngOsAahSrwRcnviAkMo33kPWHgqUtHF0CwP79+5k5cyZRUTm2eIiiKNksywlYStkKcAJ+1KW9EKIDECSlPJvBuKuklJ5SSk97e/ushqdXFmYWzG48m/DYcGafnK29Uf6i4PkB/LMdntxh0KBBPHnyhB07dhg2WEVRTFamlyILIUYkLkWWGpd0vFZjoJMQ4g6wBXhDCLEhy5EbWeUilRnmPox9d/fx152/tDdqOBzMzOHYV7Ro0YJKlSqxalWuX7uiKIqODLYUWUo5SUrpIKWsAPQGDkopc3Qpo/4u/XEp5sKck3N4EvUkdYOCZTQl7P02YBYRzODBgzly5AjXr183fLCKopgctRT5NViYWTCz8Uyexzxn7qm52hs1HqUpYX9iJf3796dp06ZqUYaiKICRliJLKQ9nNAc4p6hapCpD3Iaw5/YeDv57MHWDok7g0g1Or6FkQSu8vb2pW7eu4QNVFMXkZHYpcllTXor89EUMx24+JiHBsItLBtUchHMRZ7448QXPY56nbtDkE4gJh1OrAQgJCeHq1asGjVFRFNOT2aXIgzHhpcg7/QJ55/uTNF94mJWHbhL03DBTvizNLPm88eeERIWw+Mzi1A1KuULVtnDia2R0GE2aNGHYsGEGiU1RFNOlyyyIckKIckBx4BzwE7AE+AMomHheCFFQr5HqoE+9cizr7U6ZwtYs+Osajb88yCdbz/FPgP6fuboUc+F9l/f55cYvnHxwMnWDpmMh8gnCdx3vvfcehw4d4tq1a3qPS1EU05XhXhBCiENonv2Kl4cSO4hkzSTwk5RyXXYG9zp7Qdx+HMG6/91h2+l7RMTE09CpGB+3rExDp2IIITLsnxVRcVF0/607CTKBHZ13YGNhk7LBTx0g5CaP+uzDobwTI0eOZNGiRXqJRVEU49F1LwiDb8aTGdmxGc/zqFi2nrrHah9/gsKi8SxfhDFtqtKoUvFsijKl0w9PM+CvAfR36c9Yz7EpT946COu7Qsev6DX/T/bv309AQAA2NjbaB1MUJUfK9s14hBA5ciODgtaWDPZywvvTFszs7ELg00j6rj5Jvx9Ocel+9j+aqFuqLt2rdGfd5XVcDrmc8qRTCyhTG44u4aMPBxMaGoq3t3e2x6AoSs6QmaXI4UKI3UIIWwAhRBshRI6ptWNtaU6/hhU4NK45U9pV50LAU9p/dZTx288THBadrdf6xOMTiuQrwozjM4hLiPvvRGLxzie3aWb/lJs3b/Lmm29m67UVRck5dE7AUsqpwGbgsBDiKDAWmKivwPTF2tKcwV5OHBnfgiFeTuw6F8gbCw/zvY8/cfFp7hGUKYXyFWJS/UlcCb3CxisbU558WbxT+CymkpMTgCraqSh5VGYeQbREMw0tArAHRkopffQVmL4VsrFkcrvq/DXaC48KRfjijyt0WnGMCwFPs2X8NuXb0MyhGSvPrSQgLOC/E8mKd8rre+nbty8jR47MlmsqipKzZOYRxBRgmpSyOdAD2CqEeEMvURmQk30Bfuxfl2/eqcPj8Gi6rDzG7D8uExUb/1rjCiGY2mAqAsGck3NS3uW+LN4pfBZhZWXJTz/9xPPnWhZwKIqSq2XmEcQbUsqjL7/+B80ijC/0FZghCSF4q2Zp9o1pRu965Vjtc5v2X/m89t1wKdtSjKg9Ap9AH/66m2zHtGTFOyf2akJ4eDjr169/zZ9CUZScRpd5wEKm0UgIYSOljEyvzeswVk047+vBTPjlAkFh0Yx8owoj3qiMuVnW5g7HJcTR94++BEcG82uXXylo9XK9SmwULK0JJWtQd/l9IiIiuHTpkt7mKCuKYjjZOQ3tkBDi45er4ZJfwApoKIRYC7yfxThNkldVe/aO9qJTrTIs2X+dPqtPcP9pZJbGsjCzYHqj6YRGhfKV71f/nbC0hkYjwP8w0wd14MqVKxw5ciSbfgJFUXICXRJwWyAe2CyEuC+EuCyE8AduAH2AJVLKn/QYo1EUsrFkSS93Fr9di4uBz2j3lQ8HrjzK0lguxVzoW60v265t43zw+f9OeA4A68K8ZXuJWbNmUa1atWyKXlGUnECnlXBCCDNgEjAfzZ4QkVLK7JkukA5TKUt/+3EEwzf6cvnBcz5+ozKjW1XN9COJiNgIOu3qROF8hdnaYSsWZhaaE4fmwpF58NFxKOmih+gVRTG0bF0JJ6VMAN6QUsZKKR8YIvmakorFbdkxrBE9PRxYfvAm/X88xdMXMZkaw9bSlkn1JnH9yfWUc4PrfwhWBcBnMb/++qt6GacoeUhmpqGdE0JMf3k3nOdYW5ozv4cbc7vV5KR/KF1WHuNmUFimxmhZriVeDl6sPLeShxEPNQfzF9U8iri0g9/XfcW4ceOIjs7elXmKopimzCRTRzS13O4LIX4VQswSQvTUU1wmSQhBn3rl2DykPuHRcXRdeZxD14Iy1X9SvUlIKZl7MlkJo4YjwMyS6S0LERQUxPbt2/UQvaIopiYz84DfllJWB8oDnwM3gXr6CsyUeZQvyq8jmuBYND8DfzrN+v/d0bmvg50DH9b6kIP3DnLk3stZD3Yloc57lH3sTbPalVm+fLle4lYUxbRk+nGClDJaSukrpVwrpRyvj6BygrKFbfj5o4a0cC7BtF8vMffPKzqXQnq/xvs4FXJi7qm5RMa9nN7WeBQCybK3K3Hq1ClOntSyqbuiKLlKnnyem13yW1nw3XsevNegPN95+zNyix8xcRlv6GNpbsnUBlMJDA9k9QVNnTgKlwO33rjF+dKyQU2ePNFS5l5RlFzFQteGQogxWg4/A85KKc9lX0g5i4W5GTM7u1C2iA3z9lzlWWQs377rgW2+9P9o65aqSwenDvx46Uc6VupIxUIVockniPOb2D+zK7Rua6CfQFEUY8nMHbAnMBQo+/IzBGgOrBZCfJr9oeUcQgiGNqvE/B5uHL8VQt/vTxIakfE0tbGeY7Ext2H2ydmazXqKVwaXrnD6eyJD7+Pn52eA6BVFMZbMJOBiQB0p5Vgp5Vg0CdkeTbXk/nqILcd529ORb9/14MqD5/Re9T+CwtKvylzcpjgf1/mYkw9O/rdZT9OxEBPOn9M70bZtWzUlTVFyscwk4HJA8tu6WKC8lDISUFnipdY1SvLTB3UJeBJJr+8y3kPi7apvU71odRacWkBEbIRmNVy1DnQqcZ/Ip0Fs2bLFQJErimJomUnAm4ATLxdjTAeOodkfwha4nH7XvKVRpeKsG1CPx2HRvP3d/7gX+iLNtuZm5kxpMIWgyCC+Pf+t5mDTsVjGR/B5B0eWLl2qKmYoSi6VmXnAs9BUxHiK5uXbUCnlTCllhJTyHX0FmFN5VijKxsH1CYuKo/eqE+km4Vr2tehepTsbLm/gxpMbULYOVG7F0FpxXL90ThXuVJRcKlPT0KSUZ6WUy6SUS6WUxt8lx8S5ORRmw8D6hEXF0mf1CQKepJ2ER9UZha2V7X/VM7w+xSYhgk+aFua3334zYNSKohhKphKwEKKWEGLEy08tfQWVm9R0KMSGQfV5FqlJwg+eaX8mXMS6CCNrj+TMozPsub0HytWHCk35/M1iLJyXKwqPKIryiswU5RwFbARKvPxsEEJ8nIn+1kKIU0KI80KIS0KIzzMfbs6UeCf8JCKWd1afJDhM+zvL7lW641LMhYVnFhIeEw7NPsX8RTDCbwMxMZnbfU1RFNOXmTvggUB9KeVnUsrPgAZongnrKhrNlpa1AHegrRCiQSb652i1HAvz4wd1uf8skvfWnNS6naW5mTlTG0zlceRjzQu5Ck3BsQERf8/GqbyDWh2nKLlMZhKwQFMZI1H8y2M6kRrhL7+1fPnJU6/361Yoyup+nvgHR/D+j6eJiI5L1ca1uCvdqnRjw5UN3Hx6C5qNxzbuCW1LP+W7774zQtSKouhLZhLwj8BJIcSMl48PTgI/ZOZiQghzIcQ5IAjYJ6VMteOMEGKIEOKMEOJMcHBwZobPEZpWsWflO3W4GPiMoRvOEh0Xn6rNqDqjKGBVgLmn5iKd3oCyHsxqU5hvVnylHkUoigEsWrSI4cOHEx+f+v/P7JSZaWiLgQ+AkJef96WUSzJzMSllvJTSHXAA6gkhXLW0WSWl9JRSetrb22dm+ByjdY2SzOtWE58bjxmz7Tzxr+yilvhC7tTDU/x1929oNoHS1tG0KP5YLcxQFD2Liopi/vz53L59G3Nzc71eK8MELIQIE0I8F0I8Bw4Dc4DZgM/LY5n2sqTRYTQFP/Oknp6OTG5XjT8uPGDGb5dSLbboXqW7ZoXcmQW8qNAEWboWM1vZsWzJIrUwQ1H0aOPGjQQFBTFu3Di9XyvDBCyltJNSFkz2sUv2KajrhYQQ9kKIwi+/tgFaAVezHnrON8SrEh96ObH+xF2+OXIrxTlzM3Mm159M0IsgvvtnFcLrU8oViOO32f0QInMFQRVF0U1CQgKLFi3C3d2dFi1a6P16htwPuDRwSAhxATiN5hnw7wa8vkma0LYanWqVYf7ea+zwDUhxzr2EO50rdWbd5XXcLlUDSrpS9tYmSNDvcylFyav27NnDlStXGDt2rEFudAyWgKWUF6SUtaWUblJKVynlTENd25SZmQkW9HSjoVMxPv35AsduPk5xfrTHaKzNrZl35ktk03EQcpPlHzbn7NmzRopYUXKvatWqMWbMGHr16mWQ6+mUgIUQ1YQQLYUQBV45nmef4WanfBbmfPueB5XsCzB0w1luPPqv2nJxm+IMdx/O8fvHOVjAjvhizrxpc4GFC+YbMWJFyZ0qVarEokWLsLS0hOf3wW8jJGRc5SardHkJNxL4FfgYuCiE6Jzs9Bx9BZbXFLKx5IcP6mJtac4HP51OsVquV7VeVC5cmflnFhDbfBxVi4L5lV3cvn3biBErSu6yYMECTp8+/d+B/TPg90/geaDerqnLHfBgwENK2QVNBYxpL5clQyYWYigZK1vYhjXve/I4PJrB684QFat51mtpZsmkepO4H3GfH+KCiS1SmalNrVi8aIGRI1aU3OHq1atMmDCB3bt3aw7cOw0XtkKjEVDYUW/X1SUBmyeuYJNS3kGThN8SQixGJeBs5+ZQmKW9anM+4Cnjf76QNOWsXul6tK3Qlh8u/cijJsOoVtyM58fXEhQUZOSIFSXnW7BgAdbW1nz88ceaRw57J0CBUtBEWynM7KNLAn4ohHBP/OZlMu4AFAdq6iuwvKytayk+fbMau8/fZ/nBm0nHx3qOxUyYseDZeaILObGgQ3EszNS/gYryOgIDA1m/fj0ffPAB9vb2mjvfwLPQagbkK5BR99eiSwLuBzxMfkBKGSel7IemHpyiB0ObOdGtTlkW77vOnn8eAFDKthRD3IZw8N4hztbvSwmzpxS9f8jIkSpKzrZo0SISEhIYP348RIdrnv2W9QA3/c+E0GUhRoCU8mEa545lf0gKaCotz+lakzrlCvPJtnNcDHwGQL8a/ShnV465j7yJLVGD8D8/Y+vmjUaOVlFyrhIlSjBs2DAqVKgARxdD+ENo+yWY6X+WbqavIIToqI9AlNSsLc357j1PiuS34sP1ZwkJj8bK3IoJ9SZw5/kdNjg3oUDUA3y+HkVUVPoVmBVF0W7ixIl89dVXEHobjq/Q3Pk61jXItbOS4mdnexRKmuzt8rHqPc3MiGEbfYmNT8DLwYtmDs349qE3/nblGeUexU8/fG/sUBUlRwkLC+PXX38lIXGe775pYGauefZrIFlJwOqtj4HVdCjEl93dOHk7lFm/awpQT6g7gdiEWFY716JKMXNu7ZhNbGyskSNVlJzj22+/pUuXLly4cAFue8OV3dB0DBQsY7AYspKA1VZcRtCldlmGeDmx7n932X7mHo4FHenv0p/fQ87hXag8H7mEs3nDWmOHqSg5QmRkJIsWLaJVq1a413SFvZOgUDloOMKgcRhyMx7lNX36pjONKxdjyq6L/BPwjEE1B1HKthTLHUpTvogZ1SLV/hCKoovVq1fz6NEjpk6dCr4/waOL0GYmWNoYNA6VgHMQC3MzvupdG/sC+Ri64SyR0eaM9RzL1Rf32V7OhXpRRyBWvYxTlPRER0czf/58mjZtSrO6NeHgbCjfBGp0MXgsWUnAj7I9CkVnxQrk49t3PQgOj2bkFj9aObahXql6LM8Xz5PwB5xdPfK/lwqKoqRy69YtLCwsmDZtGhz5EqKewlvzwAj7bGc6AUspW+sjEEV3NR0K8UUXV47dDGHp/htMrDeRiPho5tuXp6z/Zn77RZUtUpS01KhRgxs3btCqlgOcWg0e/aGUcRb1qkcQOdTbno70ruvIikM3ufugIH2q9eGP/AmEFs3H7a2T1V2womhx5coVoqKisLSwQOydpFlq3GKq0eLRZTtK3+xoo2S/GZ1ccC1bkE+2naNDufcpYl2EGfZledcphN9/2WTs8BTFpMTGxtK+fXt69OgB1/4E/0PQfDLYFjNaTLrcAVcXQlxI5/MPmo15FAOztjTnm3c8MBOCT7fdYIT7KK5YJXC8hB0Pfpmi7oIVJZn169dz+/ZtPhr8Afw1GeyrQd2BRo3JQoc21bQcswSSz/pXRcqMxLFofha/XYuBa8/gd6kSbvZuLIz/h5+r3iP47lVKVqxh7BAVxehiY2P54osv8PT0pF2RO+B3B97bBeaWafaJjo/m4uOLeJT00FtcumzGc/fVDzARCH75dXkpZUAGwyh61LJ6ST5s5sSmUwE0LjyYZ+awtnghSl5Xm/QoCsDatWu5ffs2cyePRPgsgmodoFLaVY+llHx+/HMG/jWQe2H39BZXVl/CTQfWCCHWA4bZtUJJ1/g2ztSrUJQVe6No7diZTYXsuOH3A1dOHjB2aIpidPv27aNu3bq0lN6QEAdtvki3/dpLa9ntv5uhtYbiaGfcihjazAKuoVmWvC37wlGyysLcjOV9a2Njac6FCw0pYGnH7MIFuLjyPeLj1RMiJW/bsmUL+9bMQlzYpikzVLRimm29A7xZfHYxbcq34UO3D/Ual65Vkfu+cuhTKeUM4CM0d8OKCShZ0Jolvdy5+UjiIHpw1sYaW5cE9qxfZuzQFMUooqKiCAoKQsgECh2fDQXLQtOxabb3f+bPBO8JOBd1ZlbjWQg9L87Q9Q64eeIXQogWUsrHL791BvT7T4SSKV5V7RnevDInzlehTL4KLCxWhPijXxIXF2fs0BTF4L777jucnJx4vG8JPDgPbWaBla3Wts9jnjPq4CiszK1Y1mIZ+S3z6z0+XRNw8n8G+iT7eqiUUv1+a2JGt6pCvYrF+fdWe4ItLbhV05o9a9Q2zkreEhERwZw5c3ijUR2Kn/8ayjcGl25a28YnxDPBewIBYQEsaraIMgUMsyWlrgnYQghR++XXyZOx2hvYBCVu2mOTUAm7aE/WF7LD9sEmkGonUSXvWLFiBUFBQXzbqzxEPoG3vkxzv4flfss5GniUSfUn4VnK02Ax6pqAEwBbIUQfQAgh+gkhSqP2BjZZpQpZs6hnLR7cfRMLLNlcKh55Y7+xw1IUg3j27Bnz58/no25elAn4HTwHprnfw97be1lzcQ09qvbgbee3DRqnrgl4GuAEFAGOA/5AHaCKnuJSskGLaiUY1MiNsEdvcsLGhj/3fUpE2HNjh6Uoerd3716ePn3Kl80SwKYItJistd210GtMOzaN2iVqM7me9jb6pFMCllLel1Kuk1J+LaX8AXgC2AKX9Bqd8trGv1mNqgXaYRddkCXWUexZNsjYISmK3vXq1YuH+1ZgF3oBWk6H/EVTtXka9ZRRh0ZRMF9BFjdfjGU6q+L0JUvzgKWUl6SU26SUhq3foWSalYUZK3p7Evb4XR5ZWHAl3wlCg+4bOyxF0ZtHjx5BdBj2fsugTG2o/V6qNnEJcYzzHkfQiyCWNl9KcRvjbGdjsO0ohRCOQohDQogrQohLQohRhrp2XleuWH7mtO9M4WeV2GFfkN++ftfYISmKXty6dYvy5cvzz8p+EPYA2i0Es9RpbunZpZx8cJJpDaZR0944ewGDYfcDjgPGSimrAw2A4UIItVOMgXSsVQbP0mOxTDDjcIl73Lt2ztghKUq2++yzz6he3AzXMG/Nna9D6hkNf/r/ydrLa+nt3JuuVbqmO15IeLS+QgUMmICllA+klL4vvw4DrgBlDXV9BeZ0bkSxsBactbVm7760VwMpSk7k6+vLpk2b+HlABUS+AtBqRqo210KvMf34dOqUqMOndT9NcywpJfP3XqX1Em8CnrzQW8xGqYghhKgA1AZOajk3RAhxRghxJjg42NCh5Wo2VuYs7DmDUlH52GTzgLD76i5YyR2klIwfP54B9YtQySwQWk4D25TPdZO/dFvUfFGaL91i4hL4ZOs5vj58izddSlKqoLXe4jZ4AhZCFAB+AUZLKVPNiZJSrpJSekopPe3t7Q0dXq7nUqYIb5WfQIi5GbN/HWrscBQlW9y6dYt/zv6PZW/ZQula4PFBivPxCfFM8JlA0IsgljRfkuZLt+dRsfT/8RS7zt1n/JvOzOlaEwtz/aVJgyZgIYQlmuS7UUq5w5DXVv7zyZs9qBlegr024axZN83Y4SjKa6tcuTK31gzGVoZB+8VgZp7i/HK/5Ry/f5wp9afgZu+mdYyg51H0+u4Ep26HsvjtWgxvUdlkNuN5bULzk6wBrkgpFxvqukpqQgg+6/wdtgnw54udPAsPN3ZIipJl9+/fRz68iN3F9QiP91O9eNt3dx9rLq6hZ9WedK/aXesYtx9H0P3b49wNieCH/nXpVsfBEKEb9A64MfAe8IYQ4tzLTzsDXl9JxtmxCi2ja3LdRjBl9QcZd1AUExQeHo6npwf+K7qBdSHNootkbj29xdSjU3Gzd2NivYlax7gY+Iwe3xwnIjqezYMb4FXVcI8+DTkL4qiUUkgp3aSU7i8/fxrq+kpqM4ZswDkiAb9Cl9nsc8zY4ShKps2fP5/WJUKoZPEIWs9MseItLCaMUYdGYWNhw+Jmi7Eyt0rV/6R/CH1WncDa0pyfhzaklmNhQ4ZvnFkQimkwMzfnfYf+vDAT7Do3mcCnkcYOSVF09u+///L98gUs71AIHOqB+ztJ5xJkApOPTiYwLJBFzRdR0rZkqv4Hrz6i3w+nKFEwHz9/1BAn+wKGDB9QCTjP69hpPG+bleZywVAmbP6auHhVyl7JGSZNmsQXzc2xs4yHDktSrHhbdWEVh+8dZlzdcVqrGv9+4T5D1p2lakk7tn3YkNKFbAwZehKVgBVGdfiO0nHxPLRYz1cHrxo7HEXJ0LNnz4i4eoABtcwRDYdBKdekc94B3nx97ms6OHWgb7VXq6nBtjP3GLnZj9rlCrNpcH2KFchnyNBTUAlYIX9RJ7qFleeRVTx/nZ/NmTuhxg5JUdJVqEB+fvmgPAl2ZaDZfy/X/n3+LxO9J+Jc1JnPGn6WahrZuv/d4dOfL9C4cnHWDqiHnbXhd0BLTiVgBYD3PthIw6eRPC52nlHb/uRZZKyxQ1IUrS5evEjc0a8wf3wVs/YLIZ/m2e2L2BeMOjQKMzMzljRfgo1FyscKq739+ezXS7SqXpLv3/ckv5WFMcJPQSVgBQDbQkVpZ9MRSyT5C3zHpB0XkKqEkWJinj9/zoCubxB/4Atwbg/V2gOapcjTj0/H/5k/85vOx8Eu5TzelYduMvvPK7SvWZpv3q1DPgtzbcMbnErASpLOgxfT4d8oHto+5cTdX9h+JsDYISlKCrNmzWRm/QgsLPNBu/lJx9deWsveO3v5uPbHNCrbKOm4lJIl+66z4K9rdK1dlmW93bHU49LizDKdSBSjE0LQ+Y0luEZFk6/UH0z//RS3gtUqOcU0XL16lYf7VtC2sgXmbT6HQpq73BMPTrDEdwmty7dmoOvApPZSShbvu86yAzfo6eHAwp619LqvQ1aYVjSK0dVs2okZ5TsTZRZPafsNjNzsR3RcvLHDUvI4KSWTP/mIRa3zEVvCDepqSmvdD7/P+CPjqVCwArMaz0p66SalZMFf11h+8Ca96zryZXc3zM1Mr4i7SsBKKs4tZ/J+lCDY7iY3n55mwd5rxg5JyeOePHnC+6VvUNzWDMtuX4OZOVFxUYw+NJq4hDiWtViGraUt8F/y/frwLfrUK8ecrjUxM8HkCyoBK9pY2lCNN3GMjaVEmU18f+w6h6CBUTkAABmDSURBVK8FGTsqJQ8r+uQ8ncuFQeNRUKomUkpmnZjFldArzGs6jwqFKgApk2/f+uWY3cXVZJMvqASspKHNkPl0uRHFU8tonBz/ZNz28wSFRRk7LCUP2rV9E/G/fgxFK2HWfAIAm65u4rdbvzHMfRjNHJsBmuS78O//7ny/6GzayRdUAlbSYGZmRpMuq+j4NIyQ/McJl3cYu+08CQlqappiOBcuXMD/+0GYP78Hnb4CSxtOPzzNgtMLaO7YnA/dPkxqu2TfdVYeukWfeo4mf+ebSCVgJU01GrbBLagKReMTcHRYh8+NR6zy8Td2WEoekZCQwNLx7zGqviVRLn2gQhMehD9g3JFxlCtYjrlN5mImNCls2f4bfHXwJr08HZndxXSf+b5KJWAlXR0mbGfwwygemj3FvfpJFv51Db9/nxg7LCUPWP3tSsZUukWUVWGsO36peel2eDQx8TEsa7GMAlaaFXArDt5gyf7r9PBwYG63nJN8QSVgJQMFChamb//ttI54wb/spnjRp4zc4sfzKLVUWdGfBw8e8Oy3qbiWMCf/26uQ+Qoy838zuRxymblN51KxUEUAvjtyi4V/X6dr7bJ82d0tRyVfUAlY0UVZD0bbt8Y2Po4Spddy/+kLJu/4Ry1VVvTGPPgSY+sLnju1R1R9kw1XNrDbfzfD3YfT3LE5AD8cvc3cPVfp4FaaBT1Mc55vRlQCVnRiWW8sgwOecDvuPq3qXuT3Cw/YevqescNScqO4GEocm4G5XQkK9ljBiQcnWHRmES3LtWSI2xAA1p+4y8zfL/OmS0mW9HI3uRVuusqZUSsGV7p8FYoX6UvziBecfraJupXjmLH7EtcfhRk7NCUXefr0Kb+MbgiP/oEOS7kXH8G4I+OoWKgis5vMxkyYse30PabtukjLaiVY3qeOSe3tkFk5N3LF4NoOX0DTKwlYJ8QjC36PbT4zhm/0JTJGLVVWssfySQPoXPQGIQ5tiKjUjJEHRyKlTFrpttMvgAk7LuBV1Z6v362DlUXOTmE5O3rFoMzMzPD4cBvD74dyLfIO7Rtd5WZwODN+u2Ts0JRc4NC+PXQTfxMhClCk73dM9pmM/zN/FjZbSLmC5fjjwgPGbjtPg4rFWPWeh8lsKfk6VAJWMqWSa12cyw/hjYgX/HHvB/o2sWLrmXvs8gs0dmhKDhYWFsa1b/vjUsIc67dX8831zRy89//27jyuyjL///jrYgdlEUEUBZfSDBdMAdcxK3Ncyja10dQsXLI0bdTMaZmaqWmqMW35llqaaa5pWZqmLaZZZoJ7mmgqioIgCBzgcNbr94fO99Hj+2sxPZzrAJ/nXx48eL9vfPj2Otd939f1JVNTptI1viufHTzLpOW76ZhYj/mjUggJrP7lC1LA4jKkjHiGJ2O7EeZykmV9hdRmkfztw/2ydKW4bMv+PZGxba3kNenHV3UCmLN3DrdddRvDrx3OlqwCHlqyizbxEbxzX6pP7GThKVLA4rLU7z+LyXnF/FieTWpyBsEBF+aDKx0yHyz+oMpSRtfPoCI4jqIB03hi2xMkxybzVNen+O5YEWMXZXBVg7o+sYebp0kBi8ui6sQQFDCAAWXlLDk8n0kDQvgxz8LfP5L5YHHpLBYLzk+m4Wc5TeWQN3h423SiQqKYfcNs9ueUkf7uThKjw3gvPY2osCDTcT1OClhctgFT59D1oCbW5WRF1t8Z16sJKzJOsSpTtjISl+adaXcQsH85FV0nMilrIaX2Ul678TXOFAYwasFO4iJCWGJ46/iqJAUsLpufnx/XTV7LpOxCcuzncNb9kC4tonlizX4O58n9weK3rVs6l3sidpCjG/B0sJV9Bft4vsfzuCvjGTH/eyLDAlkyujMNwkNMR60yUsDiiiS2bEtQ/BiGl5Sy8qfV3NOrgrrBgYxfkkmZzWk6nvBRp0+dJOKLRwkN8ufjPiPZcOJTJnWcRGJIGiPm7yAsyJ9lY7oQHxX6+39YNSYFLK5Yn3HPMqHFIFra7fwn4wn+Nagp2YUVTF8lW9uL/5/b7WbDk/3omQDrUkbw5tGV3NriVm5s+BeGvb0DPz/F0jFdSIgOMx21ynmtgJVSC5RS+UqpA946pvAOpRR1+v6L552RlNksrD72HNP6tOKT/bks+OaE6XjCx1gOb2FUYg7rw5N4sWw7HWI7MLr1o9zz9g5cbs3S0Z1pHlPHdEyv8OYIeCHQ14vHE94UEExZ88k8fO483xZkUjd2O32S4nh+/SF2nigynU74CmsxkRsnkVc/gRcaBxIbGstjnV5g5IJdVNhdvJfemZZx4aZTeo3XClhrvRWQf4k1WKe+w4gs7UTPCiszd77E2JtDSIgO48EluzhbKvvJ1XalJSXs/mdPSi1nmNC4MQ7t5B9dZvHQosOUVDhYnJ5GUnyE6Zhe5XNzwEqpsUqpDKVURkFBgek44g8a8OQq+h+0Uc/l4ImvJjJ76LWU25w8uGQXdqfbdDxhiNaa1TMG0DYomwebJpFdkc/fO7/I9OV5FFhsvJueRvsmUaZjep3PFbDWep7WOkVrnRIbG2s6jviDAoOCaD9xPVNOFJJrK2DFkZd4aVAymdnn+ee6g6bjCUM+mvMP/hK9jykxiezV55na6Qn+s8ZFbnElC+9Po2NiPdMRjfC5AhbVX0KrdsR1+gfjii2szd6II3QH465vweLvslmx86TpeMLLDuzcRvLhmbxerz6bw+G+pHEs/iyWU+crWDAqldRm0aYjGiMFLKpEyu0PMC51CmnWSp799mluS1X8qWUMT6w5QGa2XAqoNdxuihf+hYzGdVkUU5d+TW9l07ftOX6unLdHptL1qvqmExrlzdvQlgHbgWuUUjlKqXRvHVuY4d9tIqMLwgl32nhk41heGNSKxlGhjFu8i9wSq+l4whu+mY070cGzDWJIi+vK/r19OFZQzlsjU+jRMsZ0OuO8eRfEUK11I611oNa6idZ6vreOLQxRiqbDl/LIsSLyHEX8+9vpzBvRCavdybjFmbJyWg2378PZ7N/2b6Y2bMjVUdeQe2QwPxVYeWtkCj1byfUdkCkIUcXim7emUbeXeLCgmM352/m24H1m/+U69p8uYer7e+VJuRrq648Xo3f9g/FxDYgKi6P85L0czXMyd0Qnrpfy/V9SwKLKpQ4YSYLuQ+/yCl7OnE1EvRNM79uadftyefWLo6bjCQ87kXUQNk9iatM4VEgknB3HsbP+zB3ZiRuuaWA6nk+RAhZe0W/afHqfakBzh4NpX0xkYMdQ7urYhFmfZ7Fu3xnT8YSHWEpL2P1yb2YmxVISFEZw0YOcyAtj3ggp318iBSy8QinFgOe28UpISxyOciZtTOepgVeR0rQeU1buJTP7vOmIwgM+frwXq6+rS3ZgMGEl4zmVF81bI1PoJeX7i6SAhff4+dNs0GKeOGfncNkpnvx8EnNHdKJhZAhjFmWQXVhuOqG4ArZ9y9ncpox9wcHUsaSTl9+YBaNSZc73N0gBC+8KjSIx9UXG5hezufB7lh14jXdGpeLWmvsW7qS4wm46obgMx7d/wLRvnmJHaAgRlqEUFVzDwvvS6H613Gr2W6SAhde163UH7RuOZ0BpGXMPv8vR0q28NTKFnCIro9/NkNvTqpmtaxcz6/upbA4Lpl7xAErOdWJxehpdWtTuhywuhRSwMKLnyMfpXt6d5EobM7ZOJ6RODi/fnUzmyfM8vGw3LrfcnlYd7N+5lVVZT7E5qg4xxT2wlNzIsrFd6NS09j5e/EdIAQtjbpm+mNuzY4h1OZnwaTodmrt56pYkNh08y5MfHZB7hH3cyWNHmPPlKDbH1KXR+euwWO5i+diutG0caTpatSEFLIxRSjHouW94o14XnI5yxq8dyp0p0Txw/VUs3XGSWZ8fMR1R/IpKaznPrejH1oZ1aF7UknLbvax6oCvXNKw9i6l7ghSwMEspWtz+Fs/aYjltP8/9K25nUu+mDO7UhFe/OMLbXx8znVD8H263ixfX3Mm38aFcW5RAhX6Y1Q90p1kt2UbIk6SAhXn+AXQcvJxxx0s44i5g/MrB/PP2a+nXtiHPfnKIlTtPmU4oLrJWWpnxXl/et5/huqI4KoP+xspx3WgYWXO3jq9KUsDCJ0TGNOKO9C9IP1VCpjObRz+8l1l3J/OnljE89sE+Pt4rT8uZVlFZwajXOrNe59GtKAoinmX5mC5E1wkyHa3akgIWPqNBYisG372Wu/NK2Ww9wPMbJzJneEdSmkXzyIo9rN+fazpirWW1WRn+P9042EDTuzCCsIazmD8qjbCgANPRqjUpYOFT4lt1YHi/pdxRUskHhdt4Z8czLBiVSoeEKB5etpuNP+SZjljrWCotDHmzG0diXNxaUJf41nOZeXcnAv2lPq6U/ASFz2nWvjtPD9/EXTaYe/wjXv90CgvvS6Vt40geWrKLDTIS9poSWwn3vNeH7CgHd+eH0abbIqb1b4ufnzIdrUaQAhY+yS+6OU8O+ogbS6wsKf6S19dPZlF6GskJUUxYtpuP9pw2HbHGy6/IZ/iqW8kJsDDqbDhd+67inm4tTceqUaSAhc/yj27B5Ovn0/N8BUstW3l5zRjevT+NlKb1mLxiDysz5O6IqnIg9wC3L+lDvr2QcWej6TvsY25ql2A6Vo0jBSx8WvPk65nR/32uL6pgtS2Tfy0fxjujUulxdQyPrtrHvK0/mY5Y43z+42fcu34YwX52HsxtwO1jPyEpQVY0qwpSwMLnNWnVkafu+oRehVbWcohXPktn3siODGjXiH+t/5HnNxySx5Y9ZN72t5my/a8kuG2MzW/B3VM2EldPnm6rKnIPiagWGiS25tmRW3hj7WCWFu6i+MPb+c/g1USFBTJ3yzEKSm38+672BAXImOJyuNwupqx7jC/Of0pnWyW3VXTmlumLUH7y86xKUsCi2oiMacxjI74iZtWdvGrN5sTcVN4YvokG4SHM+jyLMyVW5g5PITIs0HTUasVitzBm3QR+sOxiSEkZN0QMoccDL5iOVSvIf2+iWlEBQYwe8jHDzkVxOFwzcvmN9G6Sy6y7k9mVXcwdb37DiXOys8alOnQui96L+3GoNJNHz5UyvttMeoyS8vUWKWBR7Sg/P2ZM+ZoJ7lQKg/14aNs4As+uZnF6GkXldga+vo0tWQWmY/q8hbtXM/TjIYRQxKy8coYMWkVMyp2mY9UqUsCi2hp9/zs8Ez8WPw1P584j//h/WDuhB/FRodz3zve8+dVPcnHuF1Q6Kxm6chIz9z1NG0cFL51W9Bq/jeCmqaaj1TpSwKJa+3PfScy7YSHXuoP4W846lm28i4VDr6Z/u0a88OmPjFmUIfvM/cyevCx6vjuAA9YvubeklMcrWpI2IxO/yMamo9VKUsCi2rv66s4sGPktw+pczWJ7DhNX9WJwTCZ/vzWJLVkF9H/lazKzi0zHNEprzZNfzuPeDUMIUHm8nFvIiFYPkzR5HQTKUpKmSAGLGiEwMJQZgz7kweCeZIcG8njOLEq+G8/KMWn4+ysGz9nOzE2HsTvdpqN63cGzp+i+YDBrTr1G58oyVpQGcvOoTcT1nQJK1nQwSfnyHFlKSorOyMgwHUNUM/uyvmLGlw9xMtSPXgVWRnSZycrcZqzKzKFNfAQzhyTTumGE6ZhVzu50Mu3TuWzOn0+gsjO16Dz9G/UlctCrECS7V1QlpVSm1jrl997n1RGwUqqvUuqwUuqoUuoxbx5b1B7tW/Xig/RM+joS2BITwpOHZzAkegXz7kkmr6SSW17dxouf/kilw2U6apVZ88Muui64ky8L55Bit7DwZBG39HqFyGHzpXx9iNdGwEopfyALuBnIAXYCQ7XWB3/te2QELK7U91kbeGb745zEwZ9LHfSMHsmWwAGs3pVDYnQYTw9M4oZrGqBqyEfxg7n5PLLpJc7ojdRxu5lWVERqSAqJ9y+EMNkq3lt8cQScBhzVWh/TWtuB5cBtXjy+qIXSWvVj1bBt3BvTjc/DA3jR9h7Rh4bwcg8nAf6K+xdmMHLB9/yYV2o66hU5VWRh6NKXGbr+VnL1p9xZZmF1kaZ//wUkTvhYytdHefNR5MbAz9cPzAE6e/H4opYKDQxj6oC59D+dyePrxrK8mZ0WZx5loCWM0O6v8MauEvq/8jUDk+N5+KaWtIitazryJcsuLOOZL1aQWbwId3ARyQ4bf80vp2PP6dD5AQiQ/dp8mTenIAYDf9Zaj774egSQprWe+H/eNxYYC5CYmNgpOzvbK/lE7aC1Zk3GAl7bM5uCIOhkc5Ae25Od+h7e2m3F5nQxMDmeMT1b0CY+0nTcX6S1Zs+pIp7fupLD5Stxh5yjmd3Bw4XFtK7Xk4Rhr0F4nOmYtdqlTkF4s4C7Ak9rrf988fUMAK3187/2PTIHLKqKw+VgyY5ZLD66nHztoIPVxnW5wVhiJ7IiP5EKu4suLaIZ1a0ZN7aO84lV1iyVDlbv/on5e1ZgCfgMV1AJzewOxpwvoV1wB5qNeAMV3dx0TIFvFnAAFy7C3QSc5sJFuGFa6x9+7XukgEVVs7lsrMx4g7f3v0NRoKaJw0G3XBt1/PqwRt/KyTJFdJ0gbusQz20dGtO+caRX90Oz2l1sycpnye5v2F24noCI3bj8nSRX2hhRVkmPZgMJu2GKFK+P8bkCBlBK9QdmA/7AAq31c7/1filg4S1Ot5MNP7zPgh2zORpYQYDWdLfa6RmWxKnyHiw43ZJyVwANI0K4OSmOP7WMoXPz+h5f+tLt1hzJL2PH8UI2HN7HvsLNhIXvpDK4hCC35uaKCvqfd9EhZSwRf3oA6tT36PGFZ/hkAf9RUsDChCOFh1m05QW+tuyhEAfBbjcp5ZU0Kg7B6erId/aeHHE1QilF64YRtGscQVKjCFo1DCehXhiNIkMI+J0t27XWFFc4OF1s5di5cg7llnLgTD578jMICd5NQJ0jlAVfWFazQ6WNPiVWOgS3oVWfvxJ8TW+QhdJ9mhSwEFfI6XayO/d71mx/k2+L93Iu8MK/lRini1ZWO4kBDVCOJpwsjiPbehXZ7gQqCMFPQf26wYSHBBAREkjQxTJ2a02ZzUmJ1UaxrRA/v1NEBZ8gNOQk9tA8ioMq0QqC3W462mxc7wqid5OehDftQ1ib/rJmQzUiBSyEB2mtOVV6ki0HVrMtaz0nXOfIDXChfzYdHO1yEe50E+qEIO1PQEAI4IfV6cSuXNj9NGUBmpIAcP/swY8Il4u2NjvNrXCViiP16r407TwYFXuNrNVQTUkBC1HFKhwV/HTuIIePf8ORkxnkWXKwuMoo0zac/pqAoACc2g0OB0FOTYiGCJcfUTqImKBI0q5Ko0VMEvVj2+HfMAkCQ02fkvCQSy1g2RNOiMsUFhhGu0YptGuUAt1MpxHVkczkCyGEIVLAQghhiBSwEEIYIgUshBCGSAELIYQhUsBCCGGIFLAQQhgiBSyEEIZIAQshhCFSwEIIYYgUsBBCGCIFLIQQhvj0amhKqQLA13fljAHOmQ5RReTcqq+afH7V4dyaaq1jf+9NPl3A1YFSKuNSlp2rjuTcqq+afH416dxkCkIIIQyRAhZCCEOkgK/cPNMBqpCcW/VVk8+vxpybzAELIYQhMgIWQghDpICFEMIQKWAPUkpNVUpppVSM6SyeopR6SSn1o1Jqn1LqQ6VUlOlMV0op1VcpdVgpdVQp9ZjpPJ6ilEpQSm1WSh1SSv2glJpkOpOnKaX8lVK7lVLrTGfxBClgD1FKJQA3AydNZ/Gwz4C2Wuv2QBYww3CeK6KU8gf+B+gHJAFDlVJJZlN5jBOYorW+FugCPFSDzu2/JgGHTIfwFClgz5kFPArUqKuaWutNWmvnxZffAU1M5vGANOCo1vqY1toOLAduM5zJI7TWuVrrXRd/beFCUTU2m8pzlFJNgAHA26azeIoUsAcopQYCp7XWe01nqWL3AxtMh7hCjYFTP3udQw0qqf9SSjUDrgN2mE3iUbO5MMhxmw7iKQGmA1QXSqnPgYa/8FuPA38D+ng3kef81rlprT+6+J7HufARd4k3s1UB9Qtfq1GfWpRSdYHVwGStdanpPJ6glLoFyNdaZyqlepnO4ylSwJdIa937l76ulGoHNAf2KqXgwkf0XUqpNK11nhcjXrZfO7f/UkrdC9wC3KSr/43jOUDCz143Ac4YyuJxSqlALpTvEq31B6bzeFB3YKBSqj8QAkQopd7TWg83nOuKyIMYHqaUOgGkaK19fbWmS6KU6gu8DFyvtS4wnedKKaUCuHAx8SbgNLATGKa1/sFoMA9QF0YA7wJFWuvJpvNUlYsj4Kla61tMZ7lSMgcsfs/rQDjwmVJqj1JqjulAV+LiBcUJwEYuXKRaWRPK96LuwAjgxot/V3sujhiFj5IRsBBCGCIjYCGEMEQKWAghDJECFkIIQ6SAhRDCEClgIYQwRApYCCEMkQIWQghDpIBFrXJxPdlXLq6Xu18p1cJ0JlF7SQGL2mYGcExr3QZ4FXjQcB5Ri8liPKLWUErVAe7QWne6+KXjXFhfVggjpIBFbdIbSFBK7bn4Ohr43GAeUcvJFISoTToAT2mtO2itOwCbgD2/8z1CVBkpYFGb1AMq4H+XpewDrDWaSNRqUsCiNsniwmaVAI8An2itjxvMI2o5WY5S1BpKqXpc2NMuBtgOjNVaW82mErWZFLAQQhgiUxBCCGGIFLAQQhgiBSyEEIZIAQshhCFSwEIIYYgUsBBCGCIFLIQQhvw/kORSkEwGci4AAAAASUVORK5CYII=\n", "text/plain": [ "
" ] @@ -787,14 +780,14 @@ "source": [ "best_fit_alices = float(theta_grid[np.argmin(nllr_test_alices)])\n", "best_fit_carl = float(theta_grid[np.argmin(nllr_test_carl)])\n", - "best_fit_scandal = float(theta_grid[np.argmin(nllr_test_scandal)])\n", + "best_fit_mix = float(theta_grid[np.argmin(nllr_test_mix)])\n", "\n", "fig = plt.figure(figsize=(5,5))\n", "\n", - "plt.plot(theta_grid, nllr_test_true, label=r'Ground truth ($\\theta = 1.0$)')\n", - "plt.plot(theta_grid, nllr_test_carl, label=r'CARL ($\\hat{\\theta} = ' + '{:.2f}$)'.format(best_fit_carl))\n", - "plt.plot(theta_grid, nllr_test_alices, label=r'ALICES ($\\hat{\\theta} = ' + '{:.2f}$)'.format(best_fit_alices))\n", - "plt.plot(theta_grid, nllr_test_scandal, label=r'SCANDAL ($\\hat{\\theta} = ' + '{:.2f}$)'.format(best_fit_scandal))\n", + "plt.plot(theta_grid, nllr_test_true, ls='--', c='black', label=r'Ground truth ($\\theta = 1.0$)')\n", + "plt.plot(theta_grid, nllr_test_carl, label=r'CARL ($\\hat{\\theta} = ' + '{:.3f}$)'.format(best_fit_carl))\n", + "plt.plot(theta_grid, nllr_test_alices, label=r'ALICES ($\\hat{\\theta} = ' + '{:.3f}$)'.format(best_fit_alices))\n", + "plt.plot(theta_grid, nllr_test_mix, label=r'CARL-ALICES ($\\hat{\\theta} = ' + '{:.3f}$)'.format(best_fit_mix))\n", "\n", "plt.xlabel(r'$\\theta$')\n", "plt.ylabel(r'$\\mathbb{E}_x [ -2\\, \\log \\,r(x | \\theta, \\theta_{1}) ]$')\n", diff --git a/madminer/__version__.py b/madminer/__version__.py index c49a95c35..493f7415d 100644 --- a/madminer/__version__.py +++ b/madminer/__version__.py @@ -1 +1 @@ -__version__ = "0.2.8" +__version__ = "0.3.0" diff --git a/madminer/analysis.py b/madminer/analysis.py new file mode 100644 index 000000000..018a58dae --- /dev/null +++ b/madminer/analysis.py @@ -0,0 +1,665 @@ +from __future__ import absolute_import, division, print_function, unicode_literals + +import logging +import numpy as np +import six + +from madminer.utils.interfaces.madminer_hdf5 import load_madminer_settings, madminer_event_loader +from madminer.utils.morphing import PhysicsMorpher, NuisanceMorpher +from madminer.utils.various import format_benchmark, mdot + +logger = logging.getLogger(__name__) + + +class DataAnalyzer(object): + """ + Collects common functionality that is used when analysing data in the MadMiner file. + + Parameters + ---------- + filename : str + Path to MadMiner file (for instance the output of `madminer.delphes.DelphesProcessor.save()`). + + disable_morphing : bool, optional + If True, the morphing setup is not loaded from the file. Default value: False. + + include_nuisance_parameters : bool, optional + If True, nuisance parameters are taken into account. Default value: True. + + """ + + def __init__(self, filename, disable_morphing=False, include_nuisance_parameters=True): + # Save setup + self.include_nuisance_parameters = include_nuisance_parameters + self.madminer_filename = filename + + logger.info("Loading data from %s", filename) + + # Load data + ( + self.parameters, + self.benchmarks, + self.benchmark_is_nuisance, + self.morphing_components, + self.morphing_matrix, + self.observables, + self.n_samples, + _, + self.reference_benchmark, + self.nuisance_parameters, + ) = load_madminer_settings(filename, include_nuisance_benchmarks=include_nuisance_parameters) + + self.n_parameters = len(self.parameters) + self.n_benchmarks = len(self.benchmarks) + self.n_benchmarks_phys = np.sum(np.logical_not(self.benchmark_is_nuisance)) + self.n_observables = len(self.observables) + + self.n_nuisance_parameters = 0 + if self.nuisance_parameters is not None and include_nuisance_parameters: + self.n_nuisance_parameters = len(self.nuisance_parameters) + else: + self.nuisance_parameters = None + + logger.info("Found %s parameters", self.n_parameters) + for key, values in six.iteritems(self.parameters): + logger.debug( + " %s (LHA: %s %s, maximal power in squared ME: %s, range: %s)", + key, + values[0], + values[1], + values[2], + values[3], + ) + + if self.nuisance_parameters is not None: + logger.info("Found %s nuisance parameters", self.n_nuisance_parameters) + for key, values in six.iteritems(self.nuisance_parameters): + logger.debug(" %s (%s)", key, values) + else: + logger.info("Did not find nuisance parameters") + + logger.info("Found %s benchmarks, of which %s physical", self.n_benchmarks, self.n_benchmarks_phys) + for (key, values), is_nuisance in zip(six.iteritems(self.benchmarks), self.benchmark_is_nuisance): + if is_nuisance: + logger.debug(" %s: systematics", key) + else: + logger.debug(" %s: %s", key, format_benchmark(values)) + + logger.info("Found %s observables", len(self.observables)) + for i, obs in enumerate(self.observables): + logger.debug(" %2.2s %s", i, obs) + logger.info("Found %s events", self.n_samples) + + # Morphing + self.morpher = None + if self.morphing_matrix is not None and self.morphing_components is not None and not disable_morphing: + self.morpher = PhysicsMorpher(self.parameters) + self.morpher.set_components(self.morphing_components) + self.morpher.set_basis(self.benchmarks, morphing_matrix=self.morphing_matrix) + + logger.info("Found morphing setup with %s components", len(self.morphing_components)) + + else: + logger.info("Did not find morphing setup.") + + # Nuisance morphing + self.nuisance_morpher = None + if self.nuisance_parameters is not None: + self.nuisance_morpher = NuisanceMorpher( + self.nuisance_parameters, list(self.benchmarks.keys()), self.reference_benchmark + ) + logger.info("Found nuisance morphing setup") + + def event_loader(self, start=0, end=None, batch_size=100000, include_nuisance_parameters=None): + if include_nuisance_parameters is None: + include_nuisance_parameters = self.include_nuisance_parameters + for data in madminer_event_loader( + self.madminer_filename, + start, + end, + batch_size, + include_nuisance_parameters, + benchmark_is_nuisance=self.benchmark_is_nuisance, + ): + yield data + + def weighted_events(self, theta=None, nu=None, start_event=None, end_event=None, derivative=False): + """ + Returns all events together with the benchmark weights (if theta is None) or weights for a given theta. + + Parameters + ---------- + theta : None or ndarray or str, optional + If None, the function returns all benchmark weights. If str, the function returns the weights for a given + benchmark name. If ndarray, it uses morphing to calculate the weights for this value of theta. Default + value: None. + + nu : None or ndarray, optional + If None, the nuisance parameters are set to their nominal values. Otherwise, and if theta is an ndarray, + sets the values of the nuisance parameters. + + start_event : int + Index (in the MadMiner file) of the first event to consider. + + end_event : int + Index (in the MadMiner file) of the last unweighted event to consider. + + derivative : bool, optional + If True and if theta is not None, the derivative of the weights with respect to theta are returned. Default + value: False. + + Returns + ------- + x : ndarray + Observables with shape `(n_unweighted_samples, n_observables)`. + + weights : ndarray + If theta is None and derivative is False, benchmark weights with shape + `(n_unweighted_samples, n_benchmarks)` in pb. If theta is not None and derivative is True, the gradient of + the weight for the given parameter with respect to theta with shape `(n_unweighted_samples, n_gradients)` + in pb. Otherwise, weights for the given parameter theta with shape `(n_unweighted_samples,)` in pb. + + """ + + x, weights_benchmarks = next(self.event_loader(batch_size=None, start=start_event, end=end_event)) + + if theta is None: + return x, weights_benchmarks + + elif isinstance(theta, six.string_types): + i_benchmark = list(self.benchmarks.keys()).index(theta) + return x, weights_benchmarks[:, i_benchmark] + + elif derivative: + dtheta_matrix = self._get_dtheta_benchmark_matrix(theta) + + gradients_theta = mdot(dtheta_matrix, weights_benchmarks) # (n_gradients, n_samples) + gradients_theta = gradients_theta.T + + return x, gradients_theta + + else: + # TODO: nuisance params + if nu is not None: + raise NotImplementedError + + theta_matrix = self._get_theta_benchmark_matrix(theta) + weights_theta = mdot(theta_matrix, weights_benchmarks) + + return x, weights_theta + + def xsecs( + self, thetas=None, nus=None, events="all", test_split=0.2, include_nuisance_benchmarks=True, batch_size=100000 + ): + """ + Returns the total cross sections for benchmarks or parameter points. + + Parameters + ---------- + thetas : None or list of (ndarray or str), optional + If None, the function returns all benchmark cross sections. Otherwise, it returns the cross sections for a + series of parameter points that are either given by their benchmark name (as a str), their benchmark index + (as an int), or their parameter value (as an ndarray, using morphing). Default value: None. + + nus : None or list of (None or ndarray), optional + If None, the nuisance parameters are set to their nominal values (0), i.e. no systematics are taken into + account. Otherwise, the list has to have the same number of elements as thetas, and each entry can specify + nuisance parameters at nominal value (None) or a value of the nuisance parameters (ndarray). + + include_nuisance_benchmarks : bool, optional + Whether to include nuisance benchmarks if thetas is None. Default value: True. + + test_split : float, optional + Fraction of events reserved for testing. Default value: 0.2. + + events : {"train", "test", "all"}, optional + Which events to use. Default: "all". + + batch_size : int, optional + Size of the batches of events that are loaded into memory at the same time. Default value: 100000. + + Returns + ------- + xsecs : ndarray + Calculated cross sections in pb. + + xsec_uncertainties : ndarray + Cross-section uncertainties in pb. Basically calculated as sum(weights**2)**0.5. + """ + + logger.debug("Calculating cross sections for thetas = %s and nus = %s", thetas, nus) + + # Inputs + if thetas is not None: + include_nuisance_benchmarks = True + + if thetas is not None: + if nus is None: + nus = [None for _ in thetas] + assert len(nus) == len(thetas), "Numbers of thetas and nus don't match!" + + # Which events to use + if events == "all": + start_event, end_event = None, None + correction_factor = 1.0 + elif events == "train": + start_event, end_event, correction_factor = self._train_test_split(True, test_split) + elif events == "test": + start_event, end_event, correction_factor = self._train_test_split(False, test_split) + else: + raise ValueError("Events has to be either 'all', 'train', or 'test', but got {}!".format(events)) + + # Theta matrices (translation of benchmarks to theta, at nominal nuisance params) + theta_matrices = [self._get_theta_benchmark_matrix(theta) for theta in thetas] + theta_matrices = np.asarray(theta_matrices) # Shape (n_thetas, n_benchmarks) + + # Loop over events + xsecs = 0.0 + xsec_uncertainties = 0.0 + + for i_batch, (_, benchmark_weights) in enumerate( + madminer_event_loader( + self.madminer_filename, + start=start_event, + end=end_event, + include_nuisance_parameters=include_nuisance_benchmarks, + benchmark_is_nuisance=self.benchmark_is_nuisance, + batch_size=batch_size, + ) + ): + n_batch, _ = benchmark_weights.shape + logger.debug("Batch %s with %s events", i_batch + 1, n_batch) + + # Benchmark xsecs + if thetas is None: + xsecs += np.sum(benchmark_weights, axis=0) + xsec_uncertainties += np.sum(benchmark_weights * benchmark_weights, axis=0) + + # xsecs at given parame ters(theta, nu) + else: + # Weights at nominal nuisance params (nu=0) + weights_nom = mdot(theta_matrices, benchmark_weights) # Shape (n_thetas, n_batch) + weights_sq_nom = mdot(theta_matrices, benchmark_weights * benchmark_weights) # same + logger.debug("Nominal weights: %s", weights_nom) + + # Effect of nuisance parameters + nuisance_factors = self._calculate_nuisance_factors(nus, benchmark_weights) + weights = nuisance_factors * weights_nom + weights_sq = nuisance_factors * weights_sq_nom + logger.debug("Nuisance factors: %s", nuisance_factors) + + # Sum up + xsecs += np.sum(weights, axis=1) + xsec_uncertainties += np.sum(weights_sq, axis=1) + + xsec_uncertainties = xsec_uncertainties ** 0.5 + + # Correct for not using all events + xsecs *= correction_factor + xsec_uncertainties *= correction_factor + + logger.debug("xsecs and uncertainties [pb]:") + for this_xsec, this_uncertainty in zip(xsecs, xsec_uncertainties): + logger.debug(" (%4f +/- %4f) pb (%4f %%)", this_xsec, this_uncertainty, 100 * this_uncertainty / this_xsec) + + return xsecs, xsec_uncertainties + + def _calculate_nuisance_factors(self, nus, benchmark_weights): + if self._any_nontrivial_nus(nus): + return np.asarray( + [self.nuisance_morpher.calculate_nuisance_factors(nu, benchmark_weights) for nu in nus] + ) # Shape (n_thetas, n_batch) + else: + return 1.0 + + @staticmethod + def _any_nontrivial_nus(nus): + if nus is None: + return False + for nu in nus: + if nu is not None: + return True + return False + + def xsec_gradients(self, thetas, nus=None, events="all", test_split=0.2, gradients="all", batch_size=100000): + """ + Returns the gradient of total cross sections with respect to parameters. + + Parameters + ---------- + thetas : list of (ndarray or str), optional + If None, the function returns all benchmark cross sections. Otherwise, it returns the cross sections for a + series of parameter points that are either given by their benchmark name (as a str), their benchmark index + (as an int), or their parameter value (as an ndarray, using morphing). Default value: None. + + nus : None or list of (None or ndarray), optional + If None, the nuisance parameters are set to their nominal values (0), i.e. no systematics are taken into + account. Otherwise, the list has to have the same number of elements as thetas, and each entry can specify + nuisance parameters at nominal value (None) or a value of the nuisance parameters (ndarray). + + test_split : float, optional + Fraction of events reserved for testing. Default value: 0.2. + + events : {"train", "test", "all"}, optional + Which events to use. Default: "all". + + gradients : {"all", "theta", "nu"}, optional + Which gradients to calculate. Default value: "all". + + batch_size : int, optional + Size of the batches of events that are loaded into memory at the same time. Default value: 100000. + + Returns + ------- + xsecs_gradients : ndarray + Calculated cross section gradients in pb with shape (n_gradients,). + """ + + logger.debug("Calculating cross section gradients for thetas = %s and nus = %s", thetas, nus) + + # Inputs + include_nuisance_benchmarks = nus is not None or gradients in ["all", "nu"] + if nus is None: + nus = [None for _ in thetas] + assert len(nus) == len(thetas), "Numbers of thetas and nus don't match!" + if gradients not in ["all", "theta", "nu"]: + raise RuntimeError("Gradients has to be 'all', 'theta', or 'nu', but got {}".format(gradients)) + + # Which events to use + if events == "all": + start_event, end_event = None, None + correction_factor = 1.0 + elif events == "train": + start_event, end_event, correction_factor = self._train_test_split(True, test_split) + elif events == "test": + start_event, end_event, correction_factor = self._train_test_split(False, test_split) + else: + raise ValueError("Events has to be either 'all', 'train', or 'test', but got {}!".format(events)) + + # Theta matrices (translation of benchmarks to theta, at nominal nuisance params) + theta_matrices = np.asarray( + [self._get_theta_benchmark_matrix(theta) for theta in thetas] + ) # shape (n_thetas, n_benchmarks) + theta_gradient_matrices = np.asarray( + [self._get_dtheta_benchmark_matrix(theta) for theta in thetas] + ) # shape (n_thetas, n_gradients, n_benchmarks) + + # Loop over events + xsec_gradients = 0.0 + + for i_batch, (_, benchmark_weights) in enumerate( + madminer_event_loader( + self.madminer_filename, + start=start_event, + end=end_event, + include_nuisance_parameters=include_nuisance_benchmarks, + benchmark_is_nuisance=self.benchmark_is_nuisance, + batch_size=batch_size, + ) + ): + n_batch, _ = benchmark_weights.shape + logger.debug("Batch %s with %s events", i_batch + 1, n_batch) + + if gradients in ["all", "theta"]: + nom_gradients = mdot( + theta_gradient_matrices, benchmark_weights + ) # Shape (n_thetas, n_phys_gradients, n_batch) + nuisance_factors = self._calculate_nuisance_factors(nus, benchmark_weights) # Shape (n_thetas, n_batch) + try: + dweight_dtheta = nuisance_factors[:, np.newaxis, :] * nom_gradients + except TypeError: + dweight_dtheta = nom_gradients + + if gradients in ["all", "nu"]: + weights_nom = mdot(theta_matrices, benchmark_weights) # Shape (n_thetas, n_batch) + nuisance_factor_gradients = np.asarray( + [self.nuisance_morpher.calculate_nuisance_factor_gradients(nu, benchmark_weights) for nu in nus] + ) # Shape (n_thetas, n_nuisance_gradients, n_batch) + dweight_dnu = nuisance_factor_gradients * weights_nom[:, np.newaxis, :] + + if gradients == "all": + dweight_dall = np.concatenate((dweight_dtheta, dweight_dnu), 1) + elif gradients == "theta": + dweight_dall = dweight_dtheta + elif gradients == "nu": + dweight_dall = dweight_dnu + xsec_gradients += np.sum(dweight_dall, axis=2) + + # Correct for not using all events + xsec_gradients *= correction_factor + + return xsec_gradients + + def _weights(self, thetas, nus, benchmark_weights, theta_matrices=None): + """ + Turns benchmark weights into weights for given parameter points (theta, nu). + + Parameters + ---------- + thetas : list of (ndarray or str) + If None, the function returns all benchmark cross sections. Otherwise, it returns the cross sections for a + series of parameter points that are either given by their benchmark name (as a str), their benchmark index + (as an int), or their parameter value (as an ndarray, using morphing). + + nus : None or list of (None or ndarray) + If None, the nuisance parameters are set to their nominal values (0), i.e. no systematics are taken into + account. Otherwise, the list has to have the same number of elements as thetas, and each entry can specify + nuisance parameters at nominal value (None) or a value of the nuisance parameters (ndarray). + + Returns + ------- + weights : ndarray + Calculated weights in pb. + """ + + n_events, _ = benchmark_weights.shape + + # Inputs + include_nuisance_benchmarks = nus is not None + if nus is None: + nus = [None for _ in thetas] + assert len(nus) == len(thetas), "Numbers of thetas and nus don't match!" + + # Theta matrices (translation of benchmarks to theta, at nominal nuisance params) + if theta_matrices is None: + theta_matrices = [self._get_theta_benchmark_matrix(theta) for theta in thetas] + theta_matrices = np.asarray(theta_matrices) # Shape (n_thetas, n_benchmarks) + + # Weights at nominal nuisance params (nu=0) + weights_nom = mdot(theta_matrices, benchmark_weights) # Shape (n_thetas, n_batch) + + # Effect of nuisance parameters + nuisance_factors = self._calculate_nuisance_factors(nus, benchmark_weights) + weights = nuisance_factors * weights_nom + + return weights + + def _weight_gradients( + self, thetas, nus, benchmark_weights, gradients="all", theta_matrices=None, theta_gradient_matrices=None + ): + """ + Turns benchmark weights into weights for given parameter points (theta, nu). + + Parameters + ---------- + thetas : list of (ndarray or str) + If None, the function returns all benchmark cross sections. Otherwise, it returns the cross sections for a + series of parameter points that are either given by their benchmark name (as a str), their benchmark index + (as an int), or their parameter value (as an ndarray, using morphing). + + nus : None or list of (None or ndarray) + If None, the nuisance parameters are set to their nominal values (0), i.e. no systematics are taken into + account. Otherwise, the list has to have the same number of elements as thetas, and each entry can specify + nuisance parameters at nominal value (None) or a value of the nuisance parameters (ndarray). + + gradients : {"all", "theta", "nu"}, optional + Which gradients to calculate. Default value: "all". + + Returns + ------- + gradients : ndarray + Calculated gradients in pb. + """ + + n_events, _ = benchmark_weights.shape + + # Inputs + if gradients == "all" and self.n_nuisance_parameters == 0: + gradients = "theta" + if nus is None: + nus = [None for _ in thetas] + assert len(nus) == len(thetas), "Numbers of thetas and nus don't match!" + + # Theta matrices (translation of benchmarks to theta, at nominal nuisance params) + if theta_matrices is None: + theta_matrices = [self._get_theta_benchmark_matrix(theta) for theta in thetas] + if theta_gradient_matrices is None: + theta_gradient_matrices = [self._get_dtheta_benchmark_matrix(theta) for theta in thetas] + theta_matrices = np.asarray(theta_matrices) # Shape (n_thetas, n_benchmarks) + theta_gradient_matrices = np.asarray(theta_gradient_matrices) # Shape (n_thetas, n_gradients, n_benchmarks) + + # Calculate theta gradient + if gradients in ["all", "theta"]: + nom_gradients = mdot(theta_gradient_matrices, benchmark_weights) # (n_thetas, n_phys_gradients, n_batch) + nuisance_factors = self._calculate_nuisance_factors(nus, benchmark_weights) + try: + dweight_dtheta = nuisance_factors[:, np.newaxis, :] * nom_gradients + except TypeError: + dweight_dtheta = nom_gradients + else: + dweight_dtheta = None + + # Calculate nu gradient + if gradients in ["all", "nu"]: + weights_nom = mdot(theta_matrices, benchmark_weights) # Shape (n_thetas, n_batch) + nuisance_factor_gradients = np.asarray( + [self.nuisance_morpher.calculate_nuisance_factor_gradients(nu, benchmark_weights) for nu in nus] + ) # Shape (n_thetas, n_nuisance_gradients, n_batch) + dweight_dnu = nuisance_factor_gradients * weights_nom[:, np.newaxis, :] + else: + dweight_dnu = None + + if gradients == "theta": + return dweight_dtheta + elif gradients == "nu": + return dweight_dnu + return np.concatenate((dweight_dtheta, dweight_dnu), 1) + + def _train_test_split(self, train, test_split): + """ + Returns the start and end event for train samples (train = True) or test samples (train = False). + + Parameters + ---------- + train : bool + True if training data is generated, False if test data is generated. + + test_split : float + Fraction of events reserved for testing. + + Returns + ------- + start_event : int + Index (in the MadMiner file) of the first event to consider. + + end_event : int + Index (in the MadMiner file) of the last unweighted event to consider. + + correction_factor : float + Factor with which the weights and cross sections will have to be multiplied to make up for the missing + events. + + """ + if train: + start_event = 0 + + if test_split is None or test_split <= 0.0 or test_split >= 1.0: + end_event = None + correction_factor = 1.0 + else: + end_event = int(round((1.0 - test_split) * self.n_samples, 0)) + correction_factor = 1.0 / (1.0 - test_split) + if end_event < 0 or end_event > self.n_samples: + raise ValueError("Irregular train / test split: sample {} / {}", end_event, self.n_samples) + + else: + if test_split is None or test_split <= 0.0 or test_split >= 1.0: + start_event = 0 + correction_factor = 1.0 + else: + start_event = int(round((1.0 - test_split) * self.n_samples, 0)) + 1 + correction_factor = 1.0 / (test_split) + if start_event < 0 or start_event > self.n_samples: + raise ValueError("Irregular train / test split: sample {} / {}", start_event, self.n_samples) + + end_event = None + + return start_event, end_event, correction_factor + + def _get_theta_value(self, theta): + if isinstance(theta, six.string_types): + benchmark = self.benchmarks[theta] + theta_value = np.array([benchmark[key] for key in benchmark]) + elif isinstance(theta, int): + benchmark = self.benchmarks[list(self.benchmarks.keys())[theta]] + theta_value = np.array([benchmark[key] for key in benchmark]) + else: + theta_value = np.asarray(theta) + return theta_value + + def _get_nu_value(self, nu): + if nu is None: + nu_value = np.zeros(self.n_nuisance_parameters) + else: + nu_value = np.asarray(nu) + return nu_value + + def _get_theta_benchmark_matrix(self, theta, zero_pad=True): + """Calculates vector A such that dsigma(theta) = A * dsigma_benchmarks""" + + if zero_pad: + unpadded_theta_matrix = self._get_theta_benchmark_matrix(theta, zero_pad=False) + theta_matrix = np.zeros(self.n_benchmarks) + theta_matrix[: unpadded_theta_matrix.shape[0]] = unpadded_theta_matrix + + elif isinstance(theta, six.string_types): + i_benchmark = list(self.benchmarks).index(theta) + theta_matrix = self._get_theta_benchmark_matrix(i_benchmark) + + elif isinstance(theta, int): + n_benchmarks = len(self.benchmarks) + theta_matrix = np.zeros(n_benchmarks) + theta_matrix[theta] = 1.0 + + else: + theta_matrix = self.morpher.calculate_morphing_weights(theta) + + return theta_matrix + + def _get_dtheta_benchmark_matrix(self, theta, zero_pad=True): + """Calculates matrix A_ij such that d dsigma(theta) / d theta_i = A_ij * dsigma (benchmark j)""" + + if self.morpher is None: + raise RuntimeError("Cannot calculate score without morphing") + + if zero_pad: + unpadded_theta_matrix = self._get_dtheta_benchmark_matrix(theta, zero_pad=False) + dtheta_matrix = np.zeros((unpadded_theta_matrix.shape[0], self.n_benchmarks)) + dtheta_matrix[:, : unpadded_theta_matrix.shape[1]] = unpadded_theta_matrix + + elif isinstance(theta, six.string_types): + benchmark = self.benchmarks[theta] + benchmark = np.array([value for _, value in six.iteritems(benchmark)]) + dtheta_matrix = self._get_dtheta_benchmark_matrix(benchmark) + + elif isinstance(theta, int): + benchmark = self.benchmarks[list(self.benchmarks.keys())[theta]] + benchmark = np.array([value for _, value in six.iteritems(benchmark)]) + dtheta_matrix = self._get_dtheta_benchmark_matrix(benchmark) + + else: + dtheta_matrix = self.morpher.calculate_morphing_weight_gradient( + theta + ) # Shape (n_parameters, n_benchmarks_phys) + + return dtheta_matrix diff --git a/madminer/core.py b/madminer/core.py index caca7e3f9..37cafc9e4 100644 --- a/madminer/core.py +++ b/madminer/core.py @@ -6,11 +6,11 @@ from collections import OrderedDict import tempfile -from madminer.morphing import Morpher +from madminer.utils.morphing import PhysicsMorpher from madminer.utils.interfaces.madminer_hdf5 import save_madminer_settings, load_madminer_settings from madminer.utils.interfaces.mg_cards import export_param_card, export_reweight_card, export_run_card from madminer.utils.interfaces.mg import generate_mg_process, setup_mg_with_scripts, run_mg, create_master_script -from madminer.utils.various import create_missing_folders, format_benchmark, make_file_executable, copy_file +from madminer.utils.various import create_missing_folders, format_benchmark, copy_file logger = logging.getLogger(__name__) @@ -336,7 +336,7 @@ def set_morphing( if isinstance(max_overall_power, int): max_overall_power = (max_overall_power,) - morpher = Morpher(parameters_from_madminer=self.parameters) + morpher = PhysicsMorpher(parameters_from_madminer=self.parameters) morpher.find_components(max_overall_power) if include_existing_benchmarks: @@ -476,7 +476,7 @@ def load(self, filename, disable_morphing=False): self.export_morphing = False if morphing_matrix is not None and morphing_components is not None and not disable_morphing: - self.morpher = Morpher(self.parameters) + self.morpher = PhysicsMorpher(self.parameters) self.morpher.set_components(morphing_components) self.morpher.set_basis(self.benchmarks, morphing_matrix=morphing_matrix) self.export_morphing = True diff --git a/madminer/delphes.py b/madminer/delphes.py index e7d2e258f..9ab1247d1 100644 --- a/madminer/delphes.py +++ b/madminer/delphes.py @@ -19,7 +19,7 @@ logger = logging.getLogger(__name__) -class DelphesProcessor: +class DelphesReader: """ Detector simulation with Delphes and simple calculation of observables. diff --git a/madminer/fisherinformation.py b/madminer/fisherinformation.py index 24dda92fb..44223680f 100755 --- a/madminer/fisherinformation.py +++ b/madminer/fisherinformation.py @@ -2,177 +2,17 @@ import logging import numpy as np -import six import os -from madminer.utils.interfaces.madminer_hdf5 import load_madminer_settings, madminer_event_loader -from madminer.utils.analysis import get_theta_benchmark_matrix, get_dtheta_benchmark_matrix, mdot -from madminer.morphing import Morpher, NuisanceMorpher -from madminer.utils.various import format_benchmark, math_commands, weighted_quantile, sanitize_array -from madminer.ml import MLForge, EnsembleForge +from madminer.analysis import DataAnalyzer +from madminer.utils.various import math_commands, weighted_quantile, sanitize_array, mdot +from madminer.utils.various import separate_information_blocks +from madminer.ml import ScoreEstimator, Ensemble logger = logging.getLogger(__name__) -def project_information(fisher_information, remaining_components, covariance=None): - """ - Calculates projections of a Fisher information matrix, that is, "deletes" the rows and columns corresponding to - some parameters not of interest. - - Parameters - ---------- - fisher_information : ndarray - Original n x n Fisher information. - - remaining_components : list of int - List with m entries, each an int with 0 <= remaining_compoinents[i] < n. Denotes which parameters are kept, and - their new order. All other parameters or projected out. - - covariance : ndarray or None, optional - The covariance matrix of the original Fisher information with shape (n, n, n, n). If None, the error on the - profiled information is not calculated. Default value: None. - - Returns - ------- - projected_fisher_information : ndarray - Projected m x m Fisher information, where the `i`-th row or column corresponds to the - `remaining_components[i]`-th row or column of fisher_information. - - profiled_fisher_information_covariance : ndarray - Covariance matrix of the projected Fisher information matrix with shape (m, m, m, m). Only returned if - covariance is not None. - - """ - n_new = len(remaining_components) - fisher_information_new = np.zeros([n_new, n_new]) - - # Project information - for xnew, xold in enumerate(remaining_components): - for ynew, yold in enumerate(remaining_components): - fisher_information_new[xnew, ynew] = fisher_information[xold, yold] - - # Project covariance matrix - if covariance is not None: - covariance_new = np.zeros([n_new, n_new, n_new, n_new]) - for xnew, xold in enumerate(remaining_components): - for ynew, yold in enumerate(remaining_components): - for znew, zold in enumerate(remaining_components): - for zznew, zzold in enumerate(remaining_components): - covariance_new[xnew, ynew, znew, zznew] = covariance[xold, yold, zold, zzold] - - return fisher_information_new, covariance_new - - return fisher_information_new - - -def profile_information( - fisher_information, - remaining_components, - covariance=None, - error_propagation_n_ensemble=1000, - error_propagation_factor=1.0e-3, -): - """ - Calculates the profiled Fisher information matrix as defined in Appendix A.4 of arXiv:1612.05261. - - Parameters - ---------- - fisher_information : ndarray - Original n x n Fisher information. - - remaining_components : list of int - List with m entries, each an int with 0 <= remaining_compoinents[i] < n. Denotes which parameters are kept, and - their new order. All other parameters or profiled out. - - covariance : ndarray or None, optional - The covariance matrix of the original Fisher information with shape (n, n, n, n). If None, the error on the - profiled information is not calculated. Default value: None. - - error_propagation_n_ensemble : int, optional - If covariance is not None, this sets the number of Fisher information matrices drawn from a normal distribution - for the Monte-Carlo error propagation. Default value: 1000. - - error_propagation_factor : float, optional - If covariance is not None, this factor multiplies the covariance of the distribution of Fisher information - matrices. Smaller factors can avoid problems with ill-behaved Fisher information matrices. Default value: 1.e-3. - - Returns - ------- - profiled_fisher_information : ndarray - Profiled m x m Fisher information, where the `i`-th row or column corresponds to the - `remaining_components[i]`-th row or column of fisher_information. - - profiled_fisher_information_covariance : ndarray - Covariance matrix of the profiled Fishere information matrix with shape (m, m, m, m). - - """ - - logger.debug("Profiling Fisher information") - - # Group components - n_components = len(fisher_information) - n_remaining_components = len(remaining_components) - - remaining_components_checked = [] - profiled_components = [] - - for i in range(n_components): - if i in remaining_components: - remaining_components_checked.append(i) - else: - profiled_components.append(i) - - assert n_remaining_components == len(remaining_components_checked), "Inconsistent input" - - # Error propagation - if covariance is not None: - # Central value - profiled_information = profile_information( - fisher_information, remaining_components=remaining_components, covariance=None - ) - - # Draw toys - information_toys = np.random.multivariate_normal( - mean=fisher_information.reshape((-1,)), - cov=error_propagation_factor * covariance.reshape(n_components ** 2, n_components ** 2), - size=error_propagation_n_ensemble, - ) - information_toys = information_toys.reshape(-1, n_components, n_components) - - # Profile each toy - profiled_information_toys = np.array( - [ - profile_information(info, remaining_components=remaining_components, covariance=None) - for info in information_toys - ] - ) - - # Calculate ensemble covariance - toy_covariance = np.cov(profiled_information_toys.reshape(-1, n_remaining_components ** 2).T) - toy_covariance = toy_covariance.reshape( - (n_remaining_components, n_remaining_components, n_remaining_components, n_remaining_components) - ) - profiled_information_covariance = toy_covariance / error_propagation_factor - - # Cross-check: toy mean - toy_mean = np.mean(profiled_information_toys, axis=0) - logger.debug("Central Fisher info:\n%s\nToy mean Fisher info:\n%s", profiled_information, toy_mean) - - return profiled_information, profiled_information_covariance - - # Separate Fisher information parts - information_phys = fisher_information[remaining_components, :][:, remaining_components] - information_mix = fisher_information[profiled_components, :][:, remaining_components] - information_nuisance = fisher_information[profiled_components, :][:, profiled_components] - - # Calculate profiled information - inverse_information_nuisance = np.linalg.inv(information_nuisance) - profiled_information = information_phys - information_mix.T.dot(inverse_information_nuisance.dot(information_mix)) - - return profiled_information - - -class FisherInformation: +class FisherInformation(DataAnalyzer): """ Functions to calculate expected Fisher information matrices. @@ -208,83 +48,7 @@ class FisherInformation: """ def __init__(self, filename, include_nuisance_parameters=True): - # Save settings - self.madminer_filename = filename - self.include_nuisance_parameters = include_nuisance_parameters - - logger.info("Loading data from %s", filename) - - # Load data - ( - self.parameters, - self.benchmarks, - self.benchmark_is_nuisance, - self.morphing_components, - self.morphing_matrix, - self.observables, - self.n_samples, - _, - self.reference_benchmark, - self.nuisance_parameters, - ) = load_madminer_settings(filename, include_nuisance_benchmarks=include_nuisance_parameters) - self.n_parameters = len(self.parameters) - self.n_benchmarks = len(self.benchmarks) - self.n_benchmarks_phys = np.sum(np.logical_not(self.benchmark_is_nuisance)) - - self.n_nuisance_parameters = 0 - if self.nuisance_parameters is not None and include_nuisance_parameters: - self.n_nuisance_parameters = len(self.nuisance_parameters) - else: - self.nuisance_parameters = None - - logger.info("Found %s parameters", len(self.parameters)) - for key, values in six.iteritems(self.parameters): - logger.debug( - " %s (LHA: %s %s, maximal power in squared ME: %s, range: %s)", - key, - values[0], - values[1], - values[2], - values[3], - ) - - if self.nuisance_parameters is not None and include_nuisance_parameters: - logger.info("Found %s nuisance parameters", self.n_nuisance_parameters) - for key, values in six.iteritems(self.nuisance_parameters): - logger.debug(" %s (%s)", key, values) - elif include_nuisance_parameters: - self.include_nuisance_parameters = False - logger.warning("Did not find nuisance parameters!") - - logger.info("Found %s benchmarks, of which %s physical", self.n_benchmarks, self.n_benchmarks_phys) - for (key, values), is_nuisance in zip(six.iteritems(self.benchmarks), self.benchmark_is_nuisance): - if is_nuisance: - logger.debug(" %s: nuisance parameter", key) - else: - logger.debug(" %s: %s", key, format_benchmark(values)) - - logger.info("Found %s observables: %s", len(self.observables), ", ".join(self.observables)) - logger.info("Found %s events", self.n_samples) - - # Morphing - self.morpher = None - if self.morphing_matrix is not None and self.morphing_components is not None: - self.morpher = Morpher(self.parameters) - self.morpher.set_components(self.morphing_components) - self.morpher.set_basis(self.benchmarks, morphing_matrix=self.morphing_matrix) - - logger.info("Found morphing setup with %s components", len(self.morphing_components)) - - else: - raise RuntimeError("Did not find morphing setup.") - - # Nuisance morphing - self.nuisance_morpher = None - if self.include_nuisance_parameters: - self.nuisance_morpher = NuisanceMorpher( - self.nuisance_parameters, list(self.benchmarks.keys()), self.reference_benchmark - ) - logger.info("Found nuisance morphing setup") + super(FisherInformation, self).__init__(filename, False, include_nuisance_parameters) def calculate_fisher_information_full_truth( self, theta, luminosity=300000.0, cuts=None, efficiency_functions=None, include_nuisance_parameters=True @@ -339,9 +103,7 @@ def calculate_fisher_information_full_truth( fisher_info = np.zeros((n_all_parameters, n_all_parameters)) covariance = np.zeros((n_all_parameters, n_all_parameters, n_all_parameters, n_all_parameters)) - for observations, weights in madminer_event_loader( - self.madminer_filename, include_nuisance_parameters=include_nuisance_parameters - ): + for observations, weights in self.event_loader(): # Cuts cut_filter = [self._pass_cuts(obs_event, cuts) for obs_event in observations] observations = observations[cut_filter] @@ -375,10 +137,9 @@ def calculate_fisher_information_full_detector( luminosity=300000.0, include_xsec_info=True, mode="score", - uncertainty="ensemble", - ensemble_vote_expectation_weight=None, + calculate_covariance=True, batch_size=100000, - test_split=0.5, + test_split=0.2, ): """ Calculates the full Fisher information in realistic detector-level observations, estimated with neural networks. @@ -393,7 +154,7 @@ def calculate_fisher_information_full_detector( model_file : str Filename of a trained local score regression model that was trained on samples from `theta` (see - `madminer.ml.MLForge`). + `madminer.ml.Estimator`). unweighted_x_sample_file : str or None Filename of an unweighted x sample that is sampled according to theta and obeys the cuts @@ -412,24 +173,15 @@ def calculate_fisher_information_full_detector( are the sample mean and covariance calculated. If mode is "score", the sample mean is calculated for the score for each event. Default value: "score". - uncertainty : {"ensemble", "expectation", "sum"}, optional - How the covariance matrix of the Fisher information estimate is calculated. With "ensemble", the ensemble - covariance is used. With "expectation", the expectation of the score is used as a measure of the uncertainty - of the score estimator, and this uncertainty is propagated through to the covariance matrix. With "sum", - both terms are summed. Default value: "ensemble". - - ensemble_vote_expectation_weight : float or list of float or None, optional - For ensemble models, the factor that determines how much more weight is given to those estimators with small - expectation value. If a list is given, results are returned for each element in the list. If None, or if - `EnsembleForge.calculate_expectation()` has not been called, all estimators are treated equal. Default - value: None. + calculate_covariance : bool, optional + If True, the covariance between the different estimators is calculated. Default value: True. batch_size : int, optional Batch size. Default value: 100000. test_split : float or None, optional If unweighted_x_sample_file is None, this determines the fraction of weighted events used for evaluation. - If None, all events are used (this will probably include events used during training!). Default value: 0.5. + If None, all events are used (this will probably include events used during training!). Default value: 0.2. Returns ------- @@ -451,13 +203,13 @@ def calculate_fisher_information_full_detector( raise ValueError("Unknown mode {}, has to be 'score' or 'information'!".format(mode)) # Load SALLY model - if os.path.isdir(model_file): + if os.path.isdir(model_file) and os.path.exists(model_file + "/ensemble.json"): model_is_ensemble = True - model = EnsembleForge() + model = Ensemble() model.load(model_file) else: model_is_ensemble = False - model = MLForge() + model = ScoreEstimator() model.load(model_file) # Nuisance parameters? @@ -519,7 +271,7 @@ def calculate_fisher_information_full_detector( total_sum_weights_theta = total_xsec # Theta morphing matrix - theta_matrix = get_theta_benchmark_matrix("morphing", theta, self.benchmarks, self.morpher) + theta_matrix = self._get_theta_benchmark_matrix(theta) # Prepare output fisher_info_kin = None @@ -530,11 +282,8 @@ def calculate_fisher_information_full_detector( n_batches_verbose = max(int(round(n_batches / 10, 0)), 1) for i_batch, (observations, weights_benchmarks) in enumerate( - madminer_event_loader( - self.madminer_filename, - batch_size=batch_size, - start=start_event, - include_nuisance_parameters=include_nuisance_parameters, + self.event_loader( + batch_size=batch_size, start=start_event, include_nuisance_parameters=include_nuisance_parameters ) ): if (i_batch + 1) % n_batches_verbose == 0: @@ -550,9 +299,8 @@ def calculate_fisher_information_full_detector( x=observations, obs_weights=weights_theta, n_events=luminosity * total_xsec * np.sum(weights_theta) / total_sum_weights_theta, - vote_expectation_weight=ensemble_vote_expectation_weight, + calculate_covariance=calculate_covariance, mode=mode, - uncertainty=uncertainty, ) else: this_fisher_info = model.calculate_fisher_information( @@ -584,30 +332,20 @@ def calculate_fisher_information_full_detector( else: if model_is_ensemble: fisher_info_kin, covariance = model.calculate_fisher_information( - unweighted_x_sample_file, + x=unweighted_x_sample_file, n_events=luminosity * total_xsec, - vote_expectation_weight=ensemble_vote_expectation_weight, mode=mode, - uncertainty=uncertainty, + calculate_covariance=calculate_covariance, ) else: fisher_info_kin = model.calculate_fisher_information( - unweighted_x_sample_file, n_events=luminosity * total_xsec + x=unweighted_x_sample_file, n_events=luminosity * total_xsec ) covariance = None # Returns if model_is_ensemble: - if isinstance(ensemble_vote_expectation_weight, list) and len(ensemble_vote_expectation_weight) > 1: - fisher_info_results = [ - fisher_info_rate + this_fisher_info_kin for this_fisher_info_kin in fisher_info_kin - ] - covariance_results = [rate_covariance + this_covariance for this_covariance in covariance] - - return fisher_info_results, covariance_results - - else: - return fisher_info_rate + fisher_info_kin, rate_covariance + covariance + return fisher_info_rate + fisher_info_kin, rate_covariance + covariance return fisher_info_rate + fisher_info_kin, rate_covariance @@ -754,7 +492,7 @@ def calculate_fisher_information_hist1d( weights_benchmarks = np.zeros((n_bins_total, self.n_benchmarks)) weights_squared_benchmarks = np.zeros((n_bins_total, self.n_benchmarks)) - for observations, weights in madminer_event_loader(self.madminer_filename): + for observations, weights in self.event_loader(): # Cuts cut_filter = [self._pass_cuts(obs_event, cuts) for obs_event in observations] observations = observations[cut_filter] @@ -901,7 +639,7 @@ def calculate_fisher_information_hist2d( weights_benchmarks = np.zeros((n_bins1_total, n_bins2_total, self.n_benchmarks)) weights_squared_benchmarks = np.zeros((n_bins1_total, n_bins2_total, self.n_benchmarks)) - for observations, weights in madminer_event_loader(self.madminer_filename): + for observations, weights in self.event_loader(): # Cuts cut_filter = [self._pass_cuts(obs_event, cuts) for obs_event in observations] observations = observations[cut_filter] @@ -963,7 +701,7 @@ def histogram_of_fisher_information( cuts=None, efficiency_functions=None, batch_size=100000, - test_split=0.5, + test_split=0.2, ): """ Calculates the full and rate-only Fisher information in slices of one observable. For the full @@ -987,7 +725,8 @@ def histogram_of_fisher_information( model_file : str or None, optional If None, the truth-level Fisher information is calculated. If str, filename of a trained local score - regression model that was trained on samples from `theta` (see `madminer.ml.MLForge`). Default value: None. + regression model that was trained on samples from `theta` (see `madminer.ml.Estimator`). Default value: + None. luminosity : float, optional Luminosity in pb^-1. Default value: 300000. @@ -1006,7 +745,7 @@ def histogram_of_fisher_information( test_split : float or None, optional If model_file is not None: If unweighted_x_sample_file is None, this determines the fraction of weighted events used for evaluation. - If None, all events are used (this will probably include events used during training!). Default value: 0.5. + If None, all events are used (this will probably include events used during training!). Default value: 0.2. Returns @@ -1033,7 +772,7 @@ def histogram_of_fisher_information( efficiency_functions = [] # Theta morphing matrix - theta_matrix = get_theta_benchmark_matrix("morphing", theta, self.benchmarks, self.morpher) + theta_matrix = self._get_theta_benchmark_matrix(theta) # Number of bins n_bins_total = nbins + 2 @@ -1045,7 +784,7 @@ def histogram_of_fisher_information( # Main loop: truth-level case if model_file is None: - for observations, weights in madminer_event_loader(self.madminer_filename): + for observations, weights in self.event_loader(): # Cuts cut_filter = [self._pass_cuts(obs_event, cuts) for obs_event in observations] observations = observations[cut_filter] @@ -1081,13 +820,13 @@ def histogram_of_fisher_information( # ML case else: # Load SALLY model - if os.path.isdir(model_file): + if os.path.isdir(model_file) and os.path.exists(model_file + "/ensemble.json"): model_is_ensemble = True - model = EnsembleForge() + model = Ensemble() model.load(model_file) else: model_is_ensemble = False - model = MLForge() + model = ScoreEstimator() model.load(model_file) # Nuisance parameters? @@ -1128,11 +867,8 @@ def histogram_of_fisher_information( # ML main loop for i_batch, (observations, weights_benchmarks) in enumerate( - madminer_event_loader( - self.madminer_filename, - batch_size=batch_size, - start=start_event, - include_nuisance_parameters=include_nuisance_parameters, + self.event_loader( + batch_size=batch_size, start=start_event, include_nuisance_parameters=include_nuisance_parameters ) ): if (i_batch + 1) % n_batches_verbose == 0: @@ -1160,7 +896,7 @@ def histogram_of_fisher_information( obs_weights=weights_theta, n_events=luminosity * np.sum(weights_theta), mode="score", - uncertainty="none", + calculate_covariance=False, sum_events=False, ) else: @@ -1224,69 +960,6 @@ def calculate_fisher_information_nuisance_constraints(self): diagonal = np.array([0.0 for _ in range(self.n_parameters)] + [1.0 for _ in range(self.n_nuisance_parameters)]) return np.diag(diagonal) - def extract_raw_data(self, theta=None): - - """ - Returns all events together with the benchmark weights (if theta is None) or weights for a given theta. - - Parameters - ---------- - theta : None or ndarray, optional - If None, the function returns the benchmark weights. Otherwise it uses morphing to calculate the weights for - this value of theta. Default value: None. - - Returns - ------- - x : ndarray - Observables with shape `(n_unweighted_samples, n_observables)`. - - weights : ndarray - If theta is None, benchmark weights with shape `(n_unweighted_samples, n_benchmarks_phys)` in pb. Otherwise, - weights for the given parameter theta with shape `(n_unweighted_samples,)` in pb. - - """ - - x, weights_benchmarks = next(madminer_event_loader(self.madminer_filename, batch_size=None)) - - if theta is not None: - theta_matrix = get_theta_benchmark_matrix("morphing", theta, self.benchmarks, self.morpher) - - weights_theta = mdot(theta_matrix, weights_benchmarks) - - return x, weights_theta - - return x, weights_benchmarks - - def extract_observables_and_weights(self, thetas): - """ - Extracts observables and weights for given parameter points. - - Parameters - ---------- - thetas : ndarray - Parameter points, with shape `(n_thetas, n_parameters)`. - - Returns - ------- - x : ndarray - Observations `x` with shape `(n_events, n_observables)`. - - weights : ndarray - Weights `dsigma(x|theta)` in pb with shape `(n_thetas, n_events)`. - - """ - - x, weights_benchmarks = next(madminer_event_loader(self.madminer_filename, batch_size=None)) - - weights_thetas = [] - for theta in thetas: - theta_matrix = get_theta_benchmark_matrix("morphing", theta, self.benchmarks, self.morpher) - weights_thetas.append(mdot(theta_matrix, weights_benchmarks)) - - weights_thetas = np.array(weights_thetas) - - return x, weights_thetas - def _calculate_fisher_information( self, theta, @@ -1343,13 +1016,11 @@ def _calculate_fisher_information( """ + include_nuisance_parameters = include_nuisance_parameters and self.include_nuisance_parameters + # Get morphing matrices - theta_matrix = get_theta_benchmark_matrix( - "morphing", theta, self.benchmarks, self.morpher - ) # (n_benchmarks_phys,) - dtheta_matrix = get_dtheta_benchmark_matrix( - "morphing", theta, self.benchmarks, self.morpher - ) # (n_parameters, n_benchmarks_phys) + theta_matrix = self._get_theta_benchmark_matrix(theta, zero_pad=False) # (n_benchmarks_phys,) + dtheta_matrix = self._get_dtheta_benchmark_matrix(theta, zero_pad=False) # (n_parameters, n_benchmarks_phys) # Get differential xsec per event, and the derivative wrt to theta sigma = mdot(theta_matrix, weights_benchmarks) # Shape (n_events,) @@ -1361,7 +1032,7 @@ def _calculate_fisher_information( fisher_info_phys = luminosity * np.einsum("n,in,jn->nij", inv_sigma, dsigma, dsigma) # Nuisance parameter Fisher info - if include_nuisance_parameters and self.include_nuisance_parameters: + if include_nuisance_parameters: nuisance_a = self.nuisance_morpher.calculate_a(weights_benchmarks) # Shape (n_nuisance_params, n_events) # grad_i dsigma(x), where i is a nuisance parameter, is given by # sigma[np.newaxis, :] * a @@ -1383,10 +1054,12 @@ def _calculate_fisher_information( # Error propagation if calculate_uncertainty: - weights_benchmarks_phys = weights_benchmarks[:, np.logical_not(self.benchmark_is_nuisance)] + if weights_benchmarks.shape[1] > self.n_benchmarks_phys: + weights_benchmarks_phys = weights_benchmarks[:, np.logical_not(self.benchmark_is_nuisance)] + else: + weights_benchmarks_phys = weights_benchmarks n_events = weights_benchmarks_phys.shape[0] - n_benchmarks_phys = weights_benchmarks_phys.shape[1] # Input uncertainties if weights_benchmark_uncertainties is None: @@ -1394,10 +1067,10 @@ def _calculate_fisher_information( # Build covariance matrix of inputs # We assume full correlation between weights_benchmarks[i, b1] and weights_benchmarks[i, b2] - covariance_inputs = np.zeros((n_events, n_benchmarks_phys, n_benchmarks_phys)) + covariance_inputs = np.zeros((n_events, self.n_benchmarks_phys, self.n_benchmarks_phys)) for i in range(n_events): - for b1 in range(n_benchmarks_phys): - for b2 in range(n_benchmarks_phys): + for b1 in range(self.n_benchmarks_phys): + for b2 in range(self.n_benchmarks_phys): if b1 == b2: # Diagonal covariance_inputs[i, b1, b2] = weights_benchmark_uncertainties[i, b1] ** 2 @@ -1611,8 +1284,8 @@ def _calculate_xsec( xsecs_benchmarks = None xsecs_uncertainty_benchmarks = None - for observations, weights in madminer_event_loader( - self.madminer_filename, start=start_event, include_nuisance_parameters=include_nuisance_parameters + for observations, weights in self.event_loader( + start=start_event, include_nuisance_parameters=include_nuisance_parameters ): # Cuts cut_filter = [self._pass_cuts(obs_event, cuts) for obs_event in observations] @@ -1645,7 +1318,7 @@ def _calculate_xsec( return xsecs_benchmarks # Translate to xsec for theta - theta_matrix = get_theta_benchmark_matrix("morphing", theta, self.benchmarks, self.morpher) + theta_matrix = self._get_theta_benchmark_matrix(theta) xsec = mdot(theta_matrix, xsecs_benchmarks) xsec_error = mdot(theta_matrix, xsecs_uncertainty_benchmarks) @@ -1669,7 +1342,7 @@ def _calculate_dynamic_binning( quantile_values = np.linspace(0.0, 1.0, n_bins + 1) # Get data - x_pilot, weights_pilot = next(madminer_event_loader(self.madminer_filename, batch_size=n_events)) + x_pilot, weights_pilot = next(self.event_loader(batch_size=n_events)) # Cuts cut_filter = [self._pass_cuts(x, cuts) for x in x_pilot] @@ -1684,7 +1357,7 @@ def _calculate_dynamic_binning( histo_observables_pilot = np.asarray([self._eval_observable(x, observable) for x in x_pilot]) # Weights at theta - theta_matrix = get_theta_benchmark_matrix("morphing", theta, self.benchmarks, self.morpher) + theta_matrix = self._get_theta_benchmark_matrix(theta) weight_theta_pilot = mdot(theta_matrix, weights_pilot) # Bin boundaries @@ -1692,3 +1365,147 @@ def _calculate_dynamic_binning( bin_boundaries = bin_boundaries[1:-1] return bin_boundaries + + +def project_information(fisher_information, remaining_components, covariance=None): + """ + Calculates projections of a Fisher information matrix, that is, "deletes" the rows and columns corresponding to + some parameters not of interest. + + Parameters + ---------- + fisher_information : ndarray + Original n x n Fisher information. + + remaining_components : list of int + List with m entries, each an int with 0 <= remaining_compoinents[i] < n. Denotes which parameters are kept, and + their new order. All other parameters or projected out. + + covariance : ndarray or None, optional + The covariance matrix of the original Fisher information with shape (n, n, n, n). If None, the error on the + profiled information is not calculated. Default value: None. + + Returns + ------- + projected_fisher_information : ndarray + Projected m x m Fisher information, where the `i`-th row or column corresponds to the + `remaining_components[i]`-th row or column of fisher_information. + + profiled_fisher_information_covariance : ndarray + Covariance matrix of the projected Fisher information matrix with shape (m, m, m, m). Only returned if + covariance is not None. + + """ + n_new = len(remaining_components) + fisher_information_new = np.zeros([n_new, n_new]) + + # Project information + for xnew, xold in enumerate(remaining_components): + for ynew, yold in enumerate(remaining_components): + fisher_information_new[xnew, ynew] = fisher_information[xold, yold] + + # Project covariance matrix + if covariance is not None: + covariance_new = np.zeros([n_new, n_new, n_new, n_new]) + for xnew, xold in enumerate(remaining_components): + for ynew, yold in enumerate(remaining_components): + for znew, zold in enumerate(remaining_components): + for zznew, zzold in enumerate(remaining_components): + covariance_new[xnew, ynew, znew, zznew] = covariance[xold, yold, zold, zzold] + + return fisher_information_new, covariance_new + + return fisher_information_new + + +def profile_information( + fisher_information, + remaining_components, + covariance=None, + error_propagation_n_ensemble=1000, + error_propagation_factor=1.0e-3, +): + """ + Calculates the profiled Fisher information matrix as defined in Appendix A.4 of arXiv:1612.05261. + + Parameters + ---------- + fisher_information : ndarray + Original n x n Fisher information. + + remaining_components : list of int + List with m entries, each an int with 0 <= remaining_compoinents[i] < n. Denotes which parameters are kept, and + their new order. All other parameters or profiled out. + + covariance : ndarray or None, optional + The covariance matrix of the original Fisher information with shape (n, n, n, n). If None, the error on the + profiled information is not calculated. Default value: None. + + error_propagation_n_ensemble : int, optional + If covariance is not None, this sets the number of Fisher information matrices drawn from a normal distribution + for the Monte-Carlo error propagation. Default value: 1000. + + error_propagation_factor : float, optional + If covariance is not None, this factor multiplies the covariance of the distribution of Fisher information + matrices. Smaller factors can avoid problems with ill-behaved Fisher information matrices. Default value: 1.e-3. + + Returns + ------- + profiled_fisher_information : ndarray + Profiled m x m Fisher information, where the `i`-th row or column corresponds to the + `remaining_components[i]`-th row or column of fisher_information. + + profiled_fisher_information_covariance : ndarray + Covariance matrix of the profiled Fishere information matrix with shape (m, m, m, m). + + """ + + logger.debug("Profiling Fisher information") + n_components = len(fisher_information) + n_remaining_components = len(remaining_components) + + _, information_phys, information_mix, information_nuisance = separate_information_blocks( + fisher_information, remaining_components + ) + + # Error propagation + if covariance is not None: + # Central value + profiled_information = profile_information( + fisher_information, remaining_components=remaining_components, covariance=None + ) + + # Draw toys + information_toys = np.random.multivariate_normal( + mean=fisher_information.reshape((-1,)), + cov=error_propagation_factor * covariance.reshape(n_components ** 2, n_components ** 2), + size=error_propagation_n_ensemble, + ) + information_toys = information_toys.reshape(-1, n_components, n_components) + + # Profile each toy + profiled_information_toys = np.array( + [ + profile_information(info, remaining_components=remaining_components, covariance=None) + for info in information_toys + ] + ) + + # Calculate ensemble covariance + toy_covariance = np.cov(profiled_information_toys.reshape(-1, n_remaining_components ** 2).T) + toy_covariance = toy_covariance.reshape( + (n_remaining_components, n_remaining_components, n_remaining_components, n_remaining_components) + ) + profiled_information_covariance = toy_covariance / error_propagation_factor + + # Cross-check: toy mean + toy_mean = np.mean(profiled_information_toys, axis=0) + logger.debug("Central Fisher info:\n%s\nToy mean Fisher info:\n%s", profiled_information, toy_mean) + + return profiled_information, profiled_information_covariance + + # Calculate profiled information + inverse_information_nuisance = np.linalg.inv(information_nuisance) + profiled_information = information_phys - information_mix.T.dot(inverse_information_nuisance.dot(information_mix)) + + return profiled_information diff --git a/madminer/lhe.py b/madminer/lhe.py index 0b9c66df2..7c594d0f7 100644 --- a/madminer/lhe.py +++ b/madminer/lhe.py @@ -19,7 +19,7 @@ logger = logging.getLogger(__name__) -class LHEProcessor: +class LHEReader: """ Detector simulation with smearing functions and simple calculation of observables. @@ -40,6 +40,7 @@ class LHEProcessor: `LHEProcessor.add_observable_from_function()`. A simple set of default observables is provided in `LHEProcessor.add_default_observables()` * Optionally, cuts can be set with `LHEProcessor.add_cut()` + * Optionally, efficiencies can be set with `LHEProcessor.add_efficiency()` * Calculating the observables from the Delphes ROOT files with `LHEProcessor.analyse_delphes_samples()` * Saving the results with `LHEProcessor.save()` @@ -68,6 +69,10 @@ def __init__(self, filename): self.cuts = [] self.cuts_default_pass = [] + # Initialize efficiencies + self.efficiencies = [] + self.efficiencies_default_pass = [] + # Smearing function parameters self.energy_resolution = {} self.pt_resolution = {} @@ -448,6 +453,38 @@ def add_cut(self, definition, pass_if_not_parsed=False): self.cuts.append(definition) self.cuts_default_pass.append(pass_if_not_parsed) + def add_efficiency(self, definition, value_if_not_parsed=1.0): + + """ + Adds an efficiency as a string that can be parsed by Python's `eval()` function and returns a bool. + + Parameters + ---------- + definition : str + An expression that can be parsed by Python's `eval()` function and returns a floating number which reweights + the event weights. In the definition, all visible particles can be used: `e`, `mu`, `j`, `a`, and `l` provide + lists of electrons, muons, jets, photons, and leptons (electrons and muons combined), in each case sorted + by descending transverse momentum. `met` provides a missing ET object. `visible` and `all` provide access to + the sum of all visible particles and the sum of all visible particles plus MET, respectively. All these + objects are instances of `MadMinerParticle`, which inherits from scikit-hep's + [LorentzVector](http://scikit-hep.org/api/math.html#vector-classes). See the link for a + documentation of their properties. In addition, `MadMinerParticle` have properties `charge` and `pdg_id`, + which return the charge in units of elementary charges (i.e. an electron has `e[0].charge = -1.`), and the + PDG particle ID. + + value_if_not_parsed : float, optional + Value if te efficiency function cannot be parsed. Default value: 1. + + Returns + ------- + None + + """ + logger.debug("Adding efficiency %s", definition) + + self.efficiencies.append(definition) + self.efficiencies_default_pass.append(value_if_not_parsed) + def reset_observables(self): """ Resets all observables. """ @@ -465,10 +502,18 @@ def reset_cuts(self): self.cuts = [] self.cuts_default_pass = [] + def reset_efficiencies(self): + """ Resets all efficiencies. """ + + logger.debug("Resetting efficiencies") + + self.efficiencies = [] + self.efficiencies_default_pass = [] + def analyse_samples(self, reference_benchmark=None, parse_events_as_xml=True): """ - Main function that parses the LHE samples, applies detector effects, checks cuts, and extracts - the observables and weights. + Main function that parses the LHE samples, applies detector effects, checks cuts, + evaulate efficiencies, and extracts the observables and weights. Parameters ---------- @@ -570,6 +615,8 @@ def _parse_sample( observables_defaults=self.observables_defaults, cuts=self.cuts, cuts_default_pass=self.cuts_default_pass, + efficiencies=self.efficiencies, + efficiencies_default_pass=self.efficiencies_default_pass, energy_resolutions=self.energy_resolution, pt_resolutions=self.pt_resolution, eta_resolutions=self.eta_resolution, @@ -587,6 +634,7 @@ def _parse_sample( n_events = None for key, obs in six.iteritems(this_observations): this_n_events = len(obs) + logger.debug("Found {} events in Obs {}".format(this_n_events, key)) if n_events is None: n_events = this_n_events logger.debug("Found %s events", n_events) diff --git a/madminer/limits.py b/madminer/limits.py new file mode 100644 index 000000000..3e21805e5 --- /dev/null +++ b/madminer/limits.py @@ -0,0 +1,378 @@ +from __future__ import absolute_import, division, print_function, unicode_literals + +import logging +import numpy as np +import os +from scipy.stats import chi2, poisson + +from madminer.analysis import DataAnalyzer +from madminer.utils.various import mdot +from madminer.ml import ParameterizedRatioEstimator, Ensemble +from madminer.utils.histo import Histo +from madminer.sampling import SampleAugmenter +from madminer import sampling +from madminer.ml import ScoreEstimator + +logger = logging.getLogger(__name__) + + +class AsymptoticLimits(DataAnalyzer): + """ + Functions to calculate observed and expected constraints, using asymptotic properties of the likelihood ratio as + test statistics. + + Parameters + ---------- + filename : str + Path to MadMiner file (for instance the output of `madminer.delphes.DelphesProcessor.save()`). + + include_nuisance_parameters : bool, optional + If True, nuisance parameters are taken into account. Default value: False. + """ + + def __init__(self, filename=None, include_nuisance_parameters=False): + super(AsymptoticLimits, self).__init__(filename, False, include_nuisance_parameters) + + def observed_limits( + self, + x_observed, + theta_ranges, + mode="ml", + model_file=None, + hist_vars=None, + hist_bins=20, + include_xsec=True, + resolutions=25, + luminosity=300000.0, + ): + theta_grid, p_values, i_ml = self._analyse( + len(x_observed), + x_observed, + theta_ranges, + resolutions, + mode, + model_file, + hist_vars, + hist_bins, + include_xsec, + None, + luminosity, + ) + return theta_grid, p_values, i_ml + + def expected_limits( + self, + theta_true, + theta_ranges, + mode="ml", + model_file=None, + hist_vars=None, + hist_bins=20, + include_xsec=True, + resolutions=25, + luminosity=300000.0, + ): + x_asimov, x_weights = self._asimov_data(theta_true) + n_observed = luminosity * self._calculate_xsecs([theta_true])[0] + theta_grid, p_values, i_ml = self._analyse( + n_observed, + x_asimov, + theta_ranges, + resolutions, + mode, + model_file, + hist_vars, + hist_bins, + include_xsec, + x_weights, + luminosity, + ) + return theta_grid, p_values, i_ml + + def asymptotic_p_value(self, log_likelihood_ratio): + q = -2.0 * log_likelihood_ratio + p_value = chi2.sf(x=q, df=self.n_parameters) + return p_value + + def _analyse( + self, + n_events, + x, + theta_ranges, + theta_resolutions, + mode="ml", + model_file=None, + hist_vars=None, + hist_bins=20, + include_xsec=True, + obs_weights=None, + luminosity=300000.0, + ): + logger.debug("Calculating p-values for %s expected events", n_events) + + # Observation weights + if obs_weights is None: + obs_weights = np.ones(len(x)) + obs_weights /= np.sum(obs_weights) + obs_weights = obs_weights.astype(np.float64) + + # Theta grid + theta_grid = self._make_theta_grid(theta_ranges, theta_resolutions) + + # Kinematic part + if mode == "rate": + log_r_kin = 0.0 + elif mode == "ml": + assert model_file is not None + logger.info("Loading kinematic likelihood ratio estimator") + model = self._load_ratio_model(model_file) + + logger.info("Calculating kinematic log likelihood ratio with estimator") + log_r_kin = self._calculate_log_likelihood_ratio_kinematics(x, theta_grid, model) + log_r_kin = log_r_kin.astype(np.float64) + log_r_kin = self._clean_nans(log_r_kin) + logger.debug("Raw mean -2 log r: %s", np.mean(-2.0 * log_r_kin, axis=1)) + log_r_kin = n_events * np.sum(log_r_kin * obs_weights[np.newaxis, :], axis=1) + logger.debug("Rescaled -2 log r: %s", -2.0 * log_r_kin) + + elif mode == "histo": + if hist_vars is not None: + logger.info("Setting up standard summary statistics") + summary_function = self._make_summary_statistic_function("observables", observables=hist_vars) + elif model_file is not None: + logger.info("Loading score estimator and setting it up as summary statistics") + model = self._load_score_model(model_file) + summary_function = self._make_summary_statistic_function("sally", model=model) + else: + raise RuntimeError("For 'histo' mode, either provide histo_vars or model_file!") + summary_stats = summary_function(x) + + logger.info("Creating histogram with %s bins for the summary statistics", hist_bins) + histo = self._make_histo(summary_function, hist_bins, theta_grid, theta_resolutions) + + logger.info("Calculating kinematic log likelihood with histograms") + log_r_kin = self._calculate_log_likelihood_histo(summary_stats, theta_grid, histo) + log_r_kin = log_r_kin.astype(np.float64) + log_r_kin = self._clean_nans(log_r_kin) + log_r_kin = n_events * np.sum(log_r_kin * obs_weights[np.newaxis, :], axis=1) + + else: + raise ValueError("Unknown mode {}, has to be 'ml' or 'histo' or 'xsec'".format(mode)) + + # xsec part + if include_xsec: + logger.info("Calculating rate log likelihood") + log_p_xsec = self._calculate_log_likelihood_xsec(n_events, theta_grid, luminosity) + logger.debug("Rate -2 log p: %s", -2.0 * log_p_xsec) + else: + log_p_xsec = 0.0 + + # Combine and get p-values + logger.info("Calculating p-values") + log_r = log_r_kin + log_p_xsec + logger.debug("Combined -2 log r: %s", -2.0 * log_r) + log_r, i_ml = self._subtract_ml(log_r) + logger.debug("Min-subtracted -2 log r: %s", -2.0 * log_r) + p_values = self.asymptotic_p_value(log_r) + + return theta_grid, p_values, i_ml + + def _make_summary_statistic_function(self, mode, model=None, observables=None): + if mode == "observables": + assert observables is not None + x_indices = self._find_x_indices(observables) + + def summary_function(x): + return x[:, x_indices] + + elif mode == "sally": + assert isinstance(model, ScoreEstimator) + + def summary_function(x): + return model.evaluate_score(x) + + else: + raise RuntimeError("Unknown mode {}, has to be 'observables' or 'sally'".format(mode)) + + return summary_function + + @staticmethod + def _load_ratio_model(filename): + if os.path.isdir(filename): + model = Ensemble() + model.load(filename) + else: + model = ParameterizedRatioEstimator() + model.load(filename) + return model + + @staticmethod + def _load_score_model(filename): + if os.path.isdir(filename): + model = Ensemble() + model.load(filename) + else: + model = ScoreEstimator() + model.load(filename) + return model + + def _calculate_xsecs(self, thetas, test_split=0.2): + # Test split + start_event, end_event = self._train_test_split(False, test_split) + + # Total xsecs for benchmarks + xsecs_benchmarks = 0.0 + for observations, weights in self.event_loader(start=start_event, end=end_event): + xsecs_benchmarks += np.sum(weights, axis=0) + + # xsecs at thetas + xsecs = [] + for theta in thetas: + theta_matrix = self._get_theta_benchmark_matrix(theta) + xsecs.append(mdot(theta_matrix, xsecs_benchmarks)) + return np.asarray(xsecs) + + def _asimov_data(self, theta, test_split=0.2): + start_event, end_event = self._train_test_split(False, test_split) + x, weights_benchmarks = next(self.event_loader(start=start_event, end=end_event, batch_size=None)) + + theta_matrix = self._get_theta_benchmark_matrix(theta) + weights_theta = mdot(theta_matrix, weights_benchmarks) + weights_theta /= np.sum(weights_theta) + + return x, weights_theta + + @staticmethod + def _make_theta_grid(theta_ranges, resolutions): + if isinstance(resolutions, int): + resolutions = [resolutions for _ in range(theta_ranges)] + theta_each = [] + for resolution, (theta_min, theta_max) in zip(resolutions, theta_ranges): + theta_each.append(np.linspace(theta_min, theta_max, resolution)) + theta_grid_each = np.meshgrid(*theta_each) + theta_grid_each = [theta.flatten() for theta in theta_grid_each] + theta_grid = np.vstack(theta_grid_each).T + return theta_grid + + def _make_histo(self, summary_function, x_bins, theta_grid, theta_bins, n_samples_per_theta=1000): + logger.info("Building histogram with %s bins per parameter and %s bins per observable") + histo = Histo(theta_bins, x_bins) + theta, x = self._make_histo_data(theta_grid, n_samples_per_theta * len(theta_grid)) + summary_stats = summary_function(x) + histo.fit(theta, summary_stats, fill_empty_bins=True) + return histo + + def _make_histo_data(self, thetas, n_samples, test_split=0.2): + sampler = SampleAugmenter(self.madminer_filename, include_nuisance_parameters=self.include_nuisance_parameters) + x, theta, _ = sampler.sample_train_plain( + theta=sampling.morphing_points(thetas), + n_samples=n_samples, + test_split=test_split, + filename=None, + folder=None, + ) + return theta, x + + def _find_x_indices(self, observables): + x_names = list(self.observables.keys()) + x_indices = [] + for obs in observables: + try: + x_indices.append(x_names.index(obs)) + except ValueError: + raise RuntimeError("Unknown observable {}, has to be one of {}".format(obs, x_names)) + logger.debug("Using x indices %s", x_indices) + return x_indices + + @staticmethod + def _calculate_log_likelihood_histo(x, theta_grid, histo): + log_p = [] + for theta in theta_grid: + log_p.append(histo.log_likelihood(theta, x)) + log_p = np.asarray(log_p) + return log_p + + def _calculate_log_likelihood_xsec(self, n_observed, theta_grid, luminosity=300000.0): + n_observed_rounded = int(np.round(n_observed, 0)) + n_predicted = self._calculate_xsecs(theta_grid) * luminosity + logger.debug("Observed events: %s", n_observed) + logger.debug("Expected events: %s", n_predicted) + log_p = poisson.logpmf(k=n_observed_rounded, mu=n_predicted) + return log_p + + def _calculate_log_likelihood_ratio_kinematics(self, x_observed, theta_grid, model, theta1=None): + if isinstance(model, ParameterizedRatioEstimator): + log_r, _ = model.evaluate_log_likelihood_ratio( + x=x_observed, theta=theta_grid, test_all_combinations=True, evaluate_score=False + ) + elif isinstance(model, Ensemble) and model.estimator_type == "parameterized_ratio": + log_r, _ = model.evaluate_log_likelihood_ratio( + x=x_observed, + theta=theta_grid, + test_all_combinations=True, + evaluate_score=False, + calculate_covariance=False, + ) + else: + raise NotImplementedError( + "Likelihood ratio estimation is currently only implemented for " + "ParameterizedRatioEstimator instancees" + ) + return log_r + + @staticmethod + def _subtract_ml(log_r): + i_ml = np.argmax(log_r) + log_r_subtracted = log_r[:] - log_r[i_ml] + return log_r_subtracted, i_ml + + @staticmethod + def _clean_nans(array): + not_finite = np.any(~np.isfinite(array), axis=0) + if np.sum(not_finite) > 0: + logger.warning("Removing %s inf / nan results from calculation") + array[:, not_finite] = 0.0 + return array + + def _train_test_split(self, train, test_split): + """ + Returns the start and end event for train samples (train = True) or test samples (train = False). + + Parameters + ---------- + train : bool + True if training data is generated, False if test data is generated. + + test_split : float + Fraction of events reserved for testing. + + Returns + ------- + start_event : int + Index of the first unweighted event to consider. + + end_event : int + Index of the last unweighted event to consider. + + """ + if train: + start_event = 0 + + if test_split is None or test_split <= 0.0 or test_split >= 1.0: + end_event = None + else: + end_event = int(round((1.0 - test_split) * self.n_samples, 0)) + if end_event < 0 or end_event > self.n_samples: + raise ValueError("Irregular train / test split: sample {} / {}", end_event, self.n_samples) + + else: + if test_split is None or test_split <= 0.0 or test_split >= 1.0: + start_event = 0 + else: + start_event = int(round((1.0 - test_split) * self.n_samples, 0)) + 1 + if start_event < 0 or start_event > self.n_samples: + raise ValueError("Irregular train / test split: sample {} / {}", start_event, self.n_samples) + + end_event = None + + return start_event, end_event diff --git a/madminer/ml.py b/madminer/ml.py index 2cc868716..c8ba0a037 100644 --- a/madminer/ml.py +++ b/madminer/ml.py @@ -5,72 +5,251 @@ import os import json import numpy as np +from collections import OrderedDict import torch -from torch import optim from madminer.utils.ml.models.maf import ConditionalMaskedAutoregressiveFlow from madminer.utils.ml.models.maf_mog import ConditionalMixtureMaskedAutoregressiveFlow -from madminer.utils.ml.models.ratio import ParameterizedRatioEstimator, DoublyParameterizedRatioEstimator -from madminer.utils.ml.models.score import LocalScoreEstimator +from madminer.utils.ml.models.ratio import DenseSingleParameterizedRatioModel, DenseDoublyParameterizedRatioModel +from madminer.utils.ml.models.score import DenseLocalScoreModel from madminer.utils.ml.eval import evaluate_flow_model, evaluate_ratio_model, evaluate_local_score_model -from madminer.utils.ml.utils import check_required_data +from madminer.utils.ml.utils import get_optimizer, get_loss from madminer.utils.various import create_missing_folders, load_and_check, shuffle, restrict_samplesize -from madminer.utils.ml.methods import get_method_type, get_trainer, get_loss, package_training_data +from madminer.utils.various import separate_information_blocks +from madminer.utils.ml.trainer import SingleParameterizedRatioTrainer, DoubleParameterizedRatioTrainer +from madminer.utils.ml.trainer import LocalScoreTrainer, FlowTrainer logger = logging.getLogger(__name__) -class MLForge: +class Estimator(object): """ - Estimating likelihood ratios and scores with machine learning. + Abstract class for any ML estimator. Subclassed by ParameterizedRatioEstimator, DoubleParameterizedRatioEstimator, + ScoreEstimator, and LikelihoodEstimator. Each instance of this class represents one neural estimator. The most important functions are: - * `MLForge.train()` to train an estimator. The keyword `method` determines the inference technique + * `Estimator.train()` to train an estimator. The keyword `method` determines the inference technique and whether a class instance represents a single-parameterized likelihood ratio estimator, a doubly-parameterized likelihood ratio estimator, or a local score estimator. - * `MLForge.evaluate()` to evaluate the estimator. - * `MLForge.save()` to save the trained model to files. - * `MLForge.load()` to load the trained model from files. + * `Estimator.evaluate()` to evaluate the estimator. + * `Estimator.save()` to save the trained model to files. + * `Estimator.load()` to load the trained model from files. Please see the tutorial for a detailed walk-through. """ - def __init__(self): - self.method_type = None + def __init__(self, features=None, n_hidden=(100, 100), activation="tanh"): + self.features = features + self.n_hidden = n_hidden + self.activation = activation + self.model = None - self.method = None - self.nde_type = None self.n_observables = None self.n_parameters = None - self.n_hidden = None - self.activation = None - self.maf_n_mades = None - self.maf_batch_norm = None - self.maf_batch_norm_alpha = None - self.maf_mog_n_components = None - self.features = None self.x_scaling_means = None self.x_scaling_stds = None + def train(self, *args, **kwargs): + raise NotImplementedError + + def evaluate_log_likelihood(self, *args, **kwargs): + """ + Log likelihood estimation. Signature depends on the type of estimator. The first returned value is the log + likelihood with shape `(n_thetas, n_x)`. + """ + raise NotImplementedError + + def evaluate_log_likelihood_ratio(self, *args, **kwargs): + """ + Log likelihood ratio estimation. Signature depends on the type of estimator. The first returned value is the log + likelihood ratio with shape `(n_thetas, n_x)` or `(n_x)`. + """ + raise NotImplementedError + + def evaluate_score(self, *args, **kwargs): + """ + Score estimation. Signature depends on the type of estimator. The only returned value is the score with shape + `(n_x)`. + """ + raise NotImplementedError + + def evaluate(self, *args, **kwargs): + raise NotImplementedError + + def calculate_fisher_information(self, *args, **kwargs): + raise NotImplementedError + + def save(self, filename, save_model=False): + + """ + Saves the trained model to four files: a JSON file with the settings, a pickled pyTorch state dict + file, and numpy files for the mean and variance of the inputs (used for input scaling). + + Parameters + ---------- + filename : str + Path to the files. '_settings.json' and '_state_dict.pl' will be added. + + save_model : bool, optional + If True, the whole model is saved in addition to the state dict. This is not necessary for loading it + again with Estimator.load(), but can be useful for debugging, for instance to plot the computational graph. + + Returns + ------- + None + + """ + + if self.model is None: + raise ValueError("No model -- train or load model before saving!") + + # Check paths + create_missing_folders([os.path.dirname(filename)]) + + # Save settings + logger.debug("Saving settings to %s_settings.json", filename) + + settings = self._wrap_settings() + + with open(filename + "_settings.json", "w") as f: + json.dump(settings, f) + + # Save scaling + if self.x_scaling_stds is not None and self.x_scaling_means is not None: + logger.debug("Saving input scaling information to %s_x_means.npy and %s_x_stds.npy", filename, filename) + np.save(filename + "_x_means.npy", self.x_scaling_means) + np.save(filename + "_x_stds.npy", self.x_scaling_stds) + + # Save state dict + logger.debug("Saving state dictionary to %s_state_dict.pt", filename) + torch.save(self.model.state_dict(), filename + "_state_dict.pt") + + # Save model + if save_model: + logger.debug("Saving model to %s_model.pt", filename) + torch.save(self.model, filename + "_model.pt") + + def load(self, filename): + + """ + Loads a trained model from files. + + Parameters + ---------- + filename : str + Path to the files. '_settings.json' and '_state_dict.pl' will be added. + + Returns + ------- + None + + """ + + # Load settings and create model + logger.debug("Loading settings from %s_settings.json", filename) + with open(filename + "_settings.json", "r") as f: + settings = json.load(f) + self._unwrap_settings(settings) + self._create_model() + + # Load scaling + try: + self.x_scaling_means = np.load(filename + "_x_means.npy") + self.x_scaling_stds = np.load(filename + "_x_stds.npy") + logger.debug( + " Found input scaling information: means %s, stds %s", self.x_scaling_means, self.x_scaling_stds + ) + except FileNotFoundError: + logger.warning("Scaling information not found in %s", filename) + self.x_scaling_means = None + self.x_scaling_stds = None + + # Load state dict + logger.debug("Loading state dictionary from %s_state_dict.pt", filename) + self.model.load_state_dict(torch.load(filename + "_state_dict.pt", map_location="cpu")) + + def _initialize_input_transform(self, x, transform=True): + if transform: + self.x_scaling_means = np.mean(x, axis=0) + self.x_scaling_stds = np.maximum(np.std(x, axis=0), 1.0e-6) + else: + n_parameters = x.shape[0] + + self.x_scaling_means = np.zeros(n_parameters) + self.x_scaling_stds = np.ones(n_parameters) + + def _transform_inputs(self, x): + if self.x_scaling_means is not None and self.x_scaling_stds is not None: + x_scaled = x - self.x_scaling_means + x_scaled /= self.x_scaling_stds + else: + x_scaled = x + return x_scaled + + def _wrap_settings(self): + settings = { + "n_observables": self.n_observables, + "n_parameters": self.n_parameters, + "features": self.features, + "n_hidden": list(self.n_hidden), + "activation": self.activation, + } + return settings + + def _unwrap_settings(self, settings): + try: + _ = str(settings["estimator_type"]) + except KeyError: + raise RuntimeError( + "Can't find estimator type information in file. Maybe this file was created with" + " an incompatible MadMiner version < v0.3.0?" + ) + + self.n_observables = int(settings["n_observables"]) + self.n_parameters = int(settings["n_parameters"]) + self.n_hidden = tuple([int(item) for item in settings["n_hidden"]]) + self.activation = str(settings["activation"]) + self.features = settings["features"] + if self.features == "None": + self.features = None + if self.features is not None: + self.features = list([int(item) for item in self.features]) + + def _create_model(self): + raise NotImplementedError + + +class ParameterizedRatioEstimator(Estimator): + """ + A neural estimator of the likelihood ratio as a function of the observation x as well as + the numerator hypothesis theta. The reference (denominator) hypothesis is kept fixed at some + reference value and NOT modeled by the network. + + Parameters + ---------- + features : list of int or None, optional + Indices of observables (features) that are used as input to the neural networks. If None, all observables + are used. Default value: None. + + n_hidden : tuple of int, optional + Units in each hidden layer in the neural networks. If method is 'nde' or 'scandal', this refers to the + setup of each individual MADE layer. Default value: (100, 100). + + activation : {'tanh', 'sigmoid', 'relu'}, optional + Activation function. Default value: 'tanh'. + + + """ + def train( self, method, - x_filename, - y_filename=None, - theta0_filename=None, - theta1_filename=None, - r_xz_filename=None, - t_xz0_filename=None, - t_xz1_filename=None, - features=None, - nde_type="mafmog", - n_hidden=(100, 100), - activation="tanh", - maf_n_mades=3, - maf_batch_norm=False, - maf_batch_norm_alpha=0.1, - maf_mog_n_components=10, + x, + y, + theta, + r_xz=None, + t_xz=None, alpha=1.0, optimizer="amsgrad", n_epochs=50, @@ -82,105 +261,38 @@ def train( early_stopping=True, scale_inputs=True, shuffle_labels=False, - grad_x_regularization=None, limit_samplesize=None, verbose="some", ): """ - Trains a neural network to estimate either the likelihood, the likelihood ratio, or the - score. - - The keyword method determines the structure of the estimator that an instance of this class represents: - - * For 'alice', 'alices', 'carl', 'nde', 'rascal', 'rolr', and 'scandal', the neural network models - the likelihood ratio as a function of the observables `x` and the numerator hypothesis `theta0`, while - the denominator hypothesis is kept at a fixed reference value ("single-parameterized likelihood ratio - estimator"). In addition to the likelihood ratio, the estimator allows to estimate the score at `theta0`. - * For 'alice2', 'alices2', 'carl2', 'rascal2', and 'rolr2', the neural network models - the likelihood ratio as a function of the observables `x`, the numerator hypothesis `theta0`, and the - denominator hypothesis `theta1` ("doubly parameterized likelihood ratio estimator"). The score at `theta0` - and `theta1` can also be evaluated. - * For 'sally' and 'sallino', the neural networks models the score evaluated at some reference hypothesis - ("local score regression"). The likelihood ratio cannot be estimated directly from the neural network, but - can be estimated in a second step through density estimation in the estimated score space. + Trains the network. Parameters ---------- method : str - The inference method used. Allows values are 'alice', 'alices', 'carl', 'nde', 'rascal', 'rolr', and - 'scandal' for a single-parameterized likelihood ratio estimator; 'alice2', 'alices2', 'carl2', 'rascal2', - and 'rolr2' for a doubly-parameterized likelihood ratio estimator; and 'sally' and 'sallino' for local - score regression. - - x_filename : str + The inference method used for training. Allowed values are 'alice', 'alices', 'carl', 'cascal', 'rascal', + and 'rolr'. + + x : ndarray or str Path to an unweighted sample of observations, as saved by the `madminer.sampling.SampleAugmenter` functions. Required for all inference methods. - - y_filename : str or None, optional - Path to an unweighted sample of class labels, as saved by the `madminer.sampling.SampleAugmenter` functions. - Required for the 'alice', 'alice2', 'alices', 'alices2', 'carl', 'carl2', 'rascal', 'rascal2', 'rolr', - and 'rolr2' methods. Default value: None. - - theta0_filename : str or None, optional - Path to an unweighted sample of numerator parameters, as saved by the `madminer.sampling.SampleAugmenter` - functions. Required for the 'alice', 'alice2', 'alices', 'alices2', 'carl', 'carl2', 'nde', 'rascal', - 'rascal2', 'rolr', 'rolr2', and 'scandal' methods. Default value: None. - - theta1_filename : str or None, optional - Path to an unweighted sample of denominator parameters, as saved by the `madminer.sampling.SampleAugmenter` - functions. Required for the 'alice2', 'alices2', 'carl2', 'rascal2', and 'rolr2' methods. Default value: - None. - - r_xz_filename : str or None, optional - Path to an unweighted sample of joint likelihood ratios, as saved by the `madminer.sampling.SampleAugmenter` - functions. Required for the 'alice', 'alice2', 'alices', 'alices2', 'rascal', 'rascal2', 'rolr', and 'rolr2' - methods. Default value: None. - - t_xz0_filename : str or None, optional - Path to an unweighted sample of joint scores at theta0, as saved by the `madminer.sampling.SampleAugmenter` - functions. Required for the 'alices', 'alices2', 'rascal', 'rascal2', 'sallino', 'sally', and 'scandal' - methods. Default value: None. - - t_xz1_filename : str or None, optional - Path to an unweighted sample of joint scores at theta1, as saved by the `madminer.sampling.SampleAugmenter` - functions. Required for the 'rascal2' and 'alices2' methods. Default value: None. - - features : list of int or None, optional - Indices of observables (features) that are used as input to the neural networks. If None, all observables - are used. Default value: None. - - nde_type : {'maf', 'mafmog'}, optional - If the method is 'nde' or 'scandal', nde_type determines the architecture used in the neural density - estimator. Currently supported are 'maf' for a Masked Autoregressive Flow with a Gaussian base density, or - 'mafmog' for a Masked Autoregressive Flow with a mixture of Gaussian base densities. Default value: - 'mafmog'. - - n_hidden : tuple of int, optional - Units in each hidden layer in the neural networks. If method is 'nde' or 'scandal', this refers to the - setup of each individual MADE layer. Default value: (100, 100). - - activation : {'tanh', 'sigmoid', 'relu'}, optional - Activation function. Default value: 'tanh'. - - maf_n_mades : int, optional - If method is 'nde' or 'scandal', this sets the number of MADE layers. Default value: 3. - - maf_batch_norm : bool, optional - If method is 'nde' or 'scandal', switches batch normalization layers after each MADE layer on or off. - Default: False. - - maf_batch_norm_alpha : float, optional - If method is 'nde' or 'scandal' and maf_batch_norm is True, this sets the alpha parameter in the calculation - of the running average of the mean and variance. Default value: 0.1. - - maf_mog_n_components : int, optional - If method is 'nde' or 'scandal' and nde_type is 'mafmog', this sets the number of Gaussian base components. - Default value: 10. + + y : ndarray or str + Class labels (0 = numeerator, 1 = denominator), or filename of a pickled numpy array. + + theta : ndarray or str + Numerator parameter point, or filename of a pickled numpy array. + + r_xz : ndarray or str or None, optional + Joint likelihood ratio, or filename of a pickled numpy array. Default value: None. + + t_xz : ndarray or str or None, optional + Joint scores at theta, or filename of a pickled numpy array. Default value: None. alpha : float, optional - Hyperparameter weighting the score error in the loss function of the 'alices', 'alices2', 'rascal', - 'rascal2', and 'scandal' methods. Default value: 1. + Hyperparameter weighting the score error in the loss function of the 'alices', 'rascal', and 'cascal' + methods. Default value: 1. optimizer : {"adam", "amsgrad", "sgd"}, optional Optimization algorithm. Default value: "amsgrad". @@ -217,9 +329,6 @@ def train( normal order. This serves as a closure test, in particular as cross-check against overfitting: an estimator trained with shuffle_labels=True should predict to likelihood ratios around 1 and scores around 0. - grad_x_regularization : None - Currently not supported. - limit_samplesize : int or None, optional If not None, only this number of samples (events) is used to train the estimator. Default value: None. @@ -234,35 +343,7 @@ def train( logger.info("Starting training") logger.info(" Method: %s", method) - logger.info(" Training data: x at %s", x_filename) - if theta0_filename is not None: - logger.info(" theta0 at %s", theta0_filename) - if theta1_filename is not None: - logger.info(" theta1 at %s", theta1_filename) - if y_filename is not None: - logger.info(" y at %s", y_filename) - if r_xz_filename is not None: - logger.info(" r_xz at %s", r_xz_filename) - if t_xz0_filename is not None: - logger.info(" t_xz (theta0) at %s", t_xz0_filename) - if t_xz1_filename is not None: - logger.info(" t_xz (theta1) at %s", t_xz1_filename) - if features is None: - logger.info(" Features: all") - else: - logger.info(" Features: %s", features) - logger.info(" Method: %s", method) - if method in ["nde", "scandal"]: - logger.info(" Neural density est.: %s", nde_type) - if method not in ["nde", "scandal"]: - logger.info(" Hidden layers: %s", n_hidden) - if method in ["nde", "scandal"]: - logger.info(" MAF, number MADEs: %s", maf_n_mades) - logger.info(" MAF, batch norm: %s", maf_batch_norm) - logger.info(" MAF, BN alpha: %s", maf_batch_norm_alpha) - logger.info(" MAF MoG, components: %s", maf_mog_n_components) - logger.info(" Activation function: %s", activation) - if method in ["cascal", "cascal2", "rascal", "rascal2", "scandal", "alices"]: + if method in ["cascal", "rascal", "alices"]: logger.info(" alpha: %s", alpha) logger.info(" Batch size: %s", batch_size) logger.info(" Optimizer: %s", optimizer) @@ -279,41 +360,26 @@ def train( else: logger.info(" Samples: %s", limit_samplesize) - # Check - if grad_x_regularization is not None: - logger.warning("grad_x_regularization is not supported in this version of MadMiner") - # Load training data logger.info("Loading training data") - theta0 = load_and_check(theta0_filename) - theta1 = load_and_check(theta1_filename) - x = load_and_check(x_filename) - y = load_and_check(y_filename) - r_xz = load_and_check(r_xz_filename) - t_xz0 = load_and_check(t_xz0_filename) - t_xz1 = load_and_check(t_xz1_filename) - if y is not None: - y = y.reshape((-1, 1)) - - # Check necessary information is there - if not check_required_data(method, r_xz, t_xz0, t_xz1, theta0, theta1, x, y): - raise ValueError("Not all required data for method {} provided!".format(method)) + theta = load_and_check(theta) + x = load_and_check(x) + y = load_and_check(y) + r_xz = load_and_check(r_xz) + t_xz = load_and_check(t_xz) + + self._check_required_data(method, r_xz, t_xz) # Infer dimensions of problem n_samples = x.shape[0] n_observables = x.shape[1] - if theta0 is not None: - n_parameters = theta0.shape[1] - else: - n_parameters = t_xz0.shape[1] + n_parameters = theta.shape[1] logger.info("Found %s samples with %s parameters and %s observables", n_samples, n_parameters, n_observables) # Limit sample size if limit_samplesize is not None and limit_samplesize < n_samples: logger.info("Only using %s of %s training samples", limit_samplesize, n_samples) - x, theta0, theta1, y, r_xz, t_xz0, t_xz1 = restrict_samplesize( - limit_samplesize, x, theta0, theta1, y, r_xz, t_xz0, t_xz1 - ) + x, theta, y, r_xz, t_xz = restrict_samplesize(limit_samplesize, x, theta, y, r_xz, t_xz) # Scale features if scale_inputs: @@ -323,67 +389,49 @@ def train( else: self._initialize_input_transform(x, False) - logger.debug("Observable ranges:") - for i in range(n_observables): - logger.debug( - " x_%s: mean %s, std %s, range %s ... %s", - i + 1, - np.mean(x[:, i]), - np.std(x[:, i]), - np.min(x[:, i]), - np.max(x[:, i]), - ) - # Shuffle labels if shuffle_labels: logger.info("Shuffling labels") - y, r_xz, t_xz0, t_xz1 = shuffle(y, r_xz, t_xz0, t_xz1) + y, r_xz, t_xz = shuffle(y, r_xz, t_xz) # Features - self.features = features - if features is not None: - x = x[:, features] + if self.features is not None: + x = x[:, self.features] logger.info("Only using %s of %s observables", x.shape[1], n_observables) n_observables = x.shape[1] + # Check consistency of input with model + if self.n_observables is None: + self.n_observables = n_observables + if self.n_parameters is None: + self.n_parameters = n_parameters + + if n_parameters != self.n_parameters: + raise RuntimeError( + "Number of parameters does not match model: {} vs {}".format(n_parameters, self.n_parameters) + ) + if n_observables != self.n_observables: + raise RuntimeError( + "Number of observables does not match model: {} vs {}".format(n_observables, self.n_observables) + ) + # Data - data = package_training_data(method, x, theta0, theta1, y, r_xz, t_xz0, t_xz1) - - # Create model and save settings - logger.info("Creating model for method %s", method) - self._create_model( - method, - n_observables, - n_parameters, - n_hidden, - activation, - nde_type, - maf_n_mades, - maf_batch_norm, - maf_batch_norm_alpha, - maf_mog_n_components, - ) + data = self._package_training_data(method, x, theta, y, r_xz, t_xz) + + # Create model + if self.model is None: + logger.info("Creating model") + self._create_model() # Losses loss_functions, loss_labels, loss_weights = get_loss(method, alpha) # Optimizer - opt_kwargs = None - if optimizer == "adam": - opt = optim.Adam - elif optimizer == "amsgrad": - opt = optim.Adam - opt_kwargs = {"amsgrad": True} - elif optimizer == "sgd": - opt = optim.SGD - if nesterov_momentum is not None: - opt_kwargs = {"momentum": nesterov_momentum} - else: - raise ValueError("Unknown optimizer {}".format(optimizer)) + opt, opt_kwargs = get_optimizer(optimizer, nesterov_momentum) # Train model logger.info("Training model") - trainer = get_trainer(method)(self.model) + trainer = SingleParameterizedRatioTrainer(self.model) result = trainer.train( data=data, loss_functions=loss_functions, @@ -401,146 +449,47 @@ def train( ) return result - def evaluate(self, x, theta0_filename=None, theta1_filename=None, test_all_combinations=True, evaluate_score=False): - - """ - Evaluates a trained estimator of the log likelihood ratio, the log likelihood, or the score, depending on the - method. - - Parameters - ---------- - x : str or ndarray - Sample of observations, or path to numpy file with observations, as saved by the - `madminer.sampling.SampleAugmenter` functions. - - theta0_filename : str or None, optional - Path to an unweighted sample of numerator parameters, as saved by the `madminer.sampling.SampleAugmenter` - functions. Required if the estimator was trained with the 'alice', 'alice2', 'alices', 'alices2', 'carl', - 'carl2', 'nde', 'rascal', 'rascal2', 'rolr', 'rolr2', or 'scandal' method. Default value: None. - - theta1_filename : str or None, optional - Path to an unweighted sample of denominator parameters, as saved by the `madminer.sampling.SampleAugmenter` - functions. Required if the estimator was trained with the 'alice2', 'alices2', 'carl2', 'rascal2', or - 'rolr2' method. Default value: None. - - test_all_combinations : bool, optional - If method is not 'sally' and not 'sallino': If False, the number of samples in the observable and theta - files has to match, and the likelihood ratio is evaluated only for the combinations - `r(x_i | theta0_i, theta1_i)`. If True, `r(x_i | theta0_j, theta1_j)` for all pairwise combinations `i, j` - are evaluated. Default value: True. - - evaluate_score : bool, optional - If method is not 'sally' and not 'sallino', this sets whether in addition to the likelihood ratio the score - is evaluated. Default value: False. - - return_grad_x : bool, optional - If True, `grad_x log r(x)` or `grad_x t(x)` (for 'sally' or 'sallino' estimators) are returned in addition - to the other outputs. Default value: False. - - Returns - ------- - sally_estimated_score : ndarray - Only returned if the network was trained with `method='sally'` or `method='sallino'`. In this case, an - array of the estimator for `t(x_i | theta_ref)` is returned for all events `i`. - - log_likelihood_ratio : ndarray - Only returned if the network was trained with neither `method='sally'` nor `method='sallino'`. The estimated - log likelihood ratio. If test_all_combinations is True, the result has shape `(n_thetas, n_x)`. Otherwise, - it has shape `(n_samples,)`. - - score_theta0 : ndarray or None - Only returned if the network was trained with neither `method='sally'` nor `method='sallino'`. None if - evaluate_score is False. Otherwise the derived estimated score at `theta0`. If test_all_combinations is - True, the result has shape `(n_thetas, n_x, n_parameters)`. Otherwise, it has shape - `(n_samples, n_parameters)`. - - score_theta1 : ndarray or None - Only returned if the network was trained with neither `method='sally'` nor `method='sallino'`. None if - evaluate_score is False, or the network was trained with any method other than 'alice2', 'alices2', 'carl2', - 'rascal2', or 'rolr2'. Otherwise the derived estimated score at `theta1`. If test_all_combinations is - True, the result has shape `(n_thetas, n_x, n_parameters)`. Otherwise, it has shape - `(n_samples, n_parameters)`. - - grad_x : ndarray - Only returned if return_grad_x is True. - - """ - if self.method_type in ["parameterized", "doubly_parameterized"]: - return self.evaluate_log_likelihood_ratio( - x, theta0_filename, theta1_filename, test_all_combinations, evaluate_score - ) - elif self.method_type == "nde": - return self.evaluate_log_likelihood(x, theta0_filename, test_all_combinations, evaluate_score) - elif self.method_type == "local_score": - return self.evaluate_score(x) - else: - raise RuntimeError("Unknown method type %s", self.method_type) - - def evaluate_log_likelihood_ratio( - self, x, theta0_filename=None, theta1_filename=None, test_all_combinations=True, evaluate_score=False - ): - + def evaluate_log_likelihood_ratio(self, x, theta, test_all_combinations=True, evaluate_score=False): """ - Evaluates a trained estimator of the log likelihood ratio, the log likelihood, or the score, depending on the - method. + Evaluates the log likelihood ratio for given observations x betwen the given parameter point theta and the + reference hypothesis. Parameters ---------- x : str or ndarray - Sample of observations, or path to numpy file with observations, as saved by the - `madminer.sampling.SampleAugmenter` functions. - - theta0_filename : str or None, optional - Path to an unweighted sample of numerator parameters, as saved by the `madminer.sampling.SampleAugmenter` - functions. Required if the estimator was trained with the 'alice', 'alice2', 'alices', 'alices2', 'carl', - 'carl2', 'nde', 'rascal', 'rascal2', 'rolr', 'rolr2', or 'scandal' method. Default value: None. + Observations or filename of a pickled numpy array. - theta1_filename : str or None, optional - Path to an unweighted sample of denominator parameters, as saved by the `madminer.sampling.SampleAugmenter` - functions. Required if the estimator was trained with the 'alice2', 'alices2', 'carl2', 'rascal2', or - 'rolr2' method. Default value: None. + theta : ndarray or str + Parameter points or filename of a pickled numpy array. test_all_combinations : bool, optional - If method is not 'sally' and not 'sallino': If False, the number of samples in the observable and theta + If False, the number of samples in the observable and theta files has to match, and the likelihood ratio is evaluated only for the combinations `r(x_i | theta0_i, theta1_i)`. If True, `r(x_i | theta0_j, theta1_j)` for all pairwise combinations `i, j` are evaluated. Default value: True. evaluate_score : bool, optional - If method is not 'sally' and not 'sallino', this sets whether in addition to the likelihood ratio the score - is evaluated. Default value: False. + Sets whether in addition to the likelihood ratio the score is evaluated. Default value: False. Returns ------- log_likelihood_ratio : ndarray - Only returned if the network was trained with neither `method='sally'` nor `method='sallino'`. The estimated - log likelihood ratio. If test_all_combinations is True, the result has shape `(n_thetas, n_x)`. Otherwise, - it has shape `(n_samples,)`. - - score_theta0 : ndarray or None - Only returned if the network was trained with neither `method='sally'` nor `method='sallino'`. None if - evaluate_score is False. Otherwise the derived estimated score at `theta0`. If test_all_combinations is - True, the result has shape `(n_thetas, n_x, n_parameters)`. Otherwise, it has shape - `(n_samples, n_parameters)`. + The estimated log likelihood ratio. If test_all_combinations is True, the result has shape + `(n_thetas, n_x)`. Otherwise, it has shape `(n_samples,)`. - score_theta1 : ndarray or None - Only returned if the network was trained with neither `method='sally'` nor `method='sallino'`. None if - evaluate_score is False, or the network was trained with any method other than 'alice2', 'alices2', 'carl2', - 'rascal2', or 'rolr2'. Otherwise the derived estimated score at `theta1`. If test_all_combinations is - True, the result has shape `(n_thetas, n_x, n_parameters)`. Otherwise, it has shape + score : ndarray or None + None if evaluate_score is False. Otherwise the derived estimated score at `theta0`. If test_all_combinations + is True, the result has shape `(n_thetas, n_x, n_parameters)`. Otherwise, it has shape `(n_samples, n_parameters)`. """ - if self.model is None: raise ValueError("No model -- train or load model before evaluating it!") # Load training data logger.debug("Loading evaluation data") - theta0s = load_and_check(theta0_filename) - theta1s = load_and_check(theta1_filename) - if isinstance(x, six.string_types): - x = load_and_check(x) + x = load_and_check(x) + theta = load_and_check(theta) # Scale observables x = self._transform_inputs(x) @@ -549,214 +498,866 @@ def evaluate_log_likelihood_ratio( if self.features is not None: x = x[:, self.features] - # Balance thetas - if theta1s is None and theta0s is not None: - theta1s = [None for _ in theta0s] - elif theta1s is not None and theta0s is not None: - if len(theta1s) > len(theta0s): - theta0s = [theta0s[i % len(theta0s)] for i in range(len(theta1s))] - elif len(theta1s) < len(theta0s): - theta1s = [theta1s[i % len(theta1s)] for i in range(len(theta0s))] - - # Evaluation for all other methods all_log_r_hat = [] - all_t_hat0 = [] - all_t_hat1 = [] + all_t_hat = [] if test_all_combinations: logger.debug("Starting ratio evaluation for all combinations") - for i, (theta0, theta1) in enumerate(zip(theta0s, theta1s)): - logger.debug( - "Starting ratio evaluation for thetas %s / %s: %s vs %s", i + 1, len(theta0s), theta0, theta1 - ) - _, log_r_hat, t_hat0, t_hat1 = evaluate_ratio_model( + for i, this_theta in enumerate(theta): + logger.debug("Starting ratio evaluation for thetas %s / %s: %s", i + 1, len(theta), this_theta) + _, log_r_hat, t_hat, _ = evaluate_ratio_model( model=self.model, - method_type=self.method_type, - theta0s=[theta0], - theta1s=[theta1] if theta1 is not None else None, + method_type="parameterized_ratio", + theta0s=[this_theta], + theta1s=None, xs=x, evaluate_score=evaluate_score, ) all_log_r_hat.append(log_r_hat) - all_t_hat0.append(t_hat0) - all_t_hat1.append(t_hat1) + all_t_hat.append(t_hat) all_log_r_hat = np.array(all_log_r_hat) - all_t_hat0 = np.array(all_t_hat0) - all_t_hat1 = np.array(all_t_hat1) + all_t_hat = np.array(all_t_hat) else: logger.debug("Starting ratio evaluation") - _, all_log_r_hat, all_t_hat0, all_t_hat1 = evaluate_ratio_model( + _, all_log_r_hat, all_t_hat, _ = evaluate_ratio_model( model=self.model, - method_type=self.method_type, - theta0s=theta0s, - theta1s=None if None in theta1s else theta1s, + method_type="parameterized_ratio", + theta0s=theta, + theta1s=None, xs=x, evaluate_score=evaluate_score, ) logger.debug("Evaluation done") - return all_log_r_hat, all_t_hat0, all_t_hat1 + return all_log_r_hat, all_t_hat - def evaluate_score(self, x, return_grad_x=False): + def evaluate_log_likelihood(self, *args, **kwargs): + raise TheresAGoodReasonThisDoesntWork( + "This estimator can only estimate likelihood ratios, not the likelihood " "itself!" + ) - """ - Evaluates a trained estimator of the the score. + def evaluate_score(self, *args, **kwargs): + raise NotImplementedError("Please use evaluate_log_likelihood_ratio(evaluate_score=True).") - Parameters - ---------- - x : str or ndarray - Sample of observations, or path to numpy file with observations, as saved by the - `madminer.sampling.SampleAugmenter` functions. + def calculate_fisher_information(self, *args, **kwargs): + raise NotImplementedError( + "Please use evaluate_log_likelihood_ratio(evaluate_score=True) and calculate the " + "Fisher information manually." + ) - return_grad_x : bool, optional - If True, `grad_x log r(x)` or `grad_x t(x)` (for 'sally' or 'sallino' estimators) are returned in addition - to the other outputs. Default value: False. + def evaluate(self, *args, **kwargs): + return self.evaluate_log_likelihood_ratio(*args, **kwargs) - Returns - ------- - sally_estimated_score : ndarray - Only returned if the network was trained with `method='sally'` or `method='sallino'`. In this case, an - array of the estimator for `t(x_i | theta_ref)` is returned for all events `i`. + def _create_model(self): + self.model = DenseSingleParameterizedRatioModel( + n_observables=self.n_observables, + n_parameters=self.n_parameters, + n_hidden=self.n_hidden, + activation=self.activation, + ) - grad_x : ndarray - Only returned if return_grad_x is True. + @staticmethod + def _check_required_data(method, r_xz, t_xz): + if method in ["cascal", "alices", "rascal"] and t_xz is None: + raise RuntimeError("Method {} requires joint score information".format(method)) + if method in ["rolr", "alices", "rascal"] and r_xz is None: + raise RuntimeError("Method {} requires joint likelihood ratio information".format(method)) + + @staticmethod + def _package_training_data(method, x, theta, y, r_xz, t_xz): + data = OrderedDict() + data["x"] = x + data["theta"] = theta + data["y"] = y + if method in ["rolr", "alice", "alices", "rascal"]: + data["r_xz"] = r_xz + if method in ["cascal", "alices", "rascal"]: + data["t_xz"] = t_xz + return data + + def _wrap_settings(self): + settings = super(ParameterizedRatioEstimator, self)._wrap_settings() + settings["estimator_type"] = "parameterized_ratio" + return settings + + def _unwrap_settings(self, settings): + super(ParameterizedRatioEstimator, self)._unwrap_settings(settings) + + estimator_type = str(settings["estimator_type"]) + if estimator_type != "parameterized_ratio": + raise RuntimeError("Saved model is an incompatible estimator type {}.".format(estimator_type)) + + +class DoubleParameterizedRatioEstimator(Estimator): + """ + A neural estimator of the likelihood ratio as a function of the observation x, the numerator hypothesis theta0, and + the denominator hypothesis theta1. - """ + Parameters + ---------- + features : list of int or None, optional + Indices of observables (features) that are used as input to the neural networks. If None, all observables + are used. Default value: None. - if self.model is None: - raise ValueError("No model -- train or load model before evaluating it!") + n_hidden : tuple of int, optional + Units in each hidden layer in the neural networks. If method is 'nde' or 'scandal', this refers to the + setup of each individual MADE layer. Default value: (100, 100). - # Load training data - logger.debug("Loading evaluation data") - if isinstance(x, six.string_types): - x = load_and_check(x) + activation : {'tanh', 'sigmoid', 'relu'}, optional + Activation function. Default value: 'tanh'. - # Scale observables - x = self._transform_inputs(x) - # Restrict featuers + """ + + def train( + self, + method, + x, + y, + theta0, + theta1, + r_xz=None, + t_xz0=None, + t_xz1=None, + alpha=1.0, + optimizer="amsgrad", + n_epochs=50, + batch_size=200, + initial_lr=0.001, + final_lr=0.0001, + nesterov_momentum=None, + validation_split=0.25, + early_stopping=True, + scale_inputs=True, + shuffle_labels=False, + limit_samplesize=None, + verbose="some", + ): + + """ + Trains the network. + + Parameters + ---------- + method : str + The inference method used for training. Allowed values are 'alice', 'alices', 'carl', 'cascal', 'rascal', + and 'rolr'. + + x : ndarray or str + Path to an unweighted sample of observations, as saved by the `madminer.sampling.SampleAugmenter` functions. + Required for all inference methods. + + y : ndarray or str + Class labels (0 = numeerator, 1 = denominator), or filename of a pickled numpy array. + + theta0 : ndarray or str + Numerator parameter point, or filename of a pickled numpy array. + + theta1 : ndarray or str + Denominator parameter point, or filename of a pickled numpy array. + + r_xz : ndarray or str or None, optional + Joint likelihood ratio, or filename of a pickled numpy array. Default value: None. + + t_xz0 : ndarray or str or None, optional + Joint scores at theta0, or filename of a pickled numpy array. Default value: None. + + t_xz1 : ndarray or str or None, optional + Joint scores at theta1, or filename of a pickled numpy array. Default value: None. + + alpha : float, optional + Hyperparameter weighting the score error in the loss function of the 'alices', 'rascal', and 'cascal' + methods. Default value: 1. + + optimizer : {"adam", "amsgrad", "sgd"}, optional + Optimization algorithm. Default value: "amsgrad". + + n_epochs : int, optional + Number of epochs. Default value: 50. + + batch_size : int, optional + Batch size. Default value: 200. + + initial_lr : float, optional + Learning rate during the first epoch, after which it exponentially decays to final_lr. Default value: + 0.001. + + final_lr : float, optional + Learning rate during the last epoch. Default value: 0.0001. + + nesterov_momentum : float or None, optional + If trainer is "sgd", sets the Nesterov momentum. Default value: None. + + validation_split : float or None, optional + Fraction of samples used for validation and early stopping (if early_stopping is True). If None, the entire + sample is used for training and early stopping is deactivated. Default value: 0.25. + + early_stopping : bool, optional + Activates early stopping based on the validation loss (only if validation_split is not None). Default value: + True. + + scale_inputs : bool, optional + Scale the observables to zero mean and unit variance. Default value: True. + + shuffle_labels : bool, optional + If True, the labels (`y`, `r_xz`, `t_xz`) are shuffled, while the observations (`x`) remain in their + normal order. This serves as a closure test, in particular as cross-check against overfitting: an estimator + trained with shuffle_labels=True should predict to likelihood ratios around 1 and scores around 0. + + limit_samplesize : int or None, optional + If not None, only this number of samples (events) is used to train the estimator. Default value: None. + + verbose : {"all", "many", "some", "few", "none}, optional + Determines verbosity of training. Default value: "some". + + Returns + ------- + None + + """ + + logger.info("Starting training") + logger.info(" Method: %s", method) + if method in ["cascal", "rascal", "alices"]: + logger.info(" alpha: %s", alpha) + logger.info(" Batch size: %s", batch_size) + logger.info(" Optimizer: %s", optimizer) + logger.info(" Epochs: %s", n_epochs) + logger.info(" Learning rate: %s initially, decaying to %s", initial_lr, final_lr) + if optimizer == "sgd": + logger.info(" Nesterov momentum: %s", nesterov_momentum) + logger.info(" Validation split: %s", validation_split) + logger.info(" Early stopping: %s", early_stopping) + logger.info(" Scale inputs: %s", scale_inputs) + logger.info(" Shuffle labels %s", shuffle_labels) + if limit_samplesize is None: + logger.info(" Samples: all") + else: + logger.info(" Samples: %s", limit_samplesize) + + # Load training data + logger.info("Loading training data") + theta0 = load_and_check(theta0) + theta1 = load_and_check(theta1) + x = load_and_check(x) + y = load_and_check(y) + r_xz = load_and_check(r_xz) + t_xz0 = load_and_check(t_xz0) + t_xz1 = load_and_check(t_xz1) + + self._check_required_data(method, r_xz, t_xz0, t_xz1) + + # Infer dimensions of problem + n_samples = x.shape[0] + n_observables = x.shape[1] + n_parameters = theta0.shape[1] + logger.info("Found %s samples with %s parameters and %s observables", n_samples, n_parameters, n_observables) + + # Limit sample size + if limit_samplesize is not None and limit_samplesize < n_samples: + logger.info("Only using %s of %s training samples", limit_samplesize, n_samples) + x, theta0, theta1, y, r_xz, t_xz0, t_xz1 = restrict_samplesize( + limit_samplesize, x, theta0, theta1, y, r_xz, t_xz0, t_xz1 + ) + + # Scale features + if scale_inputs: + logger.info("Rescaling inputs") + self._initialize_input_transform(x) + x = self._transform_inputs(x) + else: + self._initialize_input_transform(x, False) + + # Shuffle labels + if shuffle_labels: + logger.info("Shuffling labels") + y, r_xz, t_xz0, t_xz1 = shuffle(y, r_xz, t_xz0, t_xz1) + + # Features if self.features is not None: x = x[:, self.features] + logger.info("Only using %s of %s observables", x.shape[1], n_observables) + n_observables = x.shape[1] - # SALLY evaluation - if self.method not in ["sally", "sallino"]: - raise NotImplementedError("Score evaluation only implemented for methods SALLY and SALLINO.") + # Check consistency of input with model + if self.n_observables is None: + self.n_observables = n_observables + if self.n_parameters is None: + self.n_parameters = n_parameters - logger.debug("Starting score evaluation") + if n_parameters != self.n_parameters: + raise RuntimeError( + "Number of parameters does not match model: {} vs {}".format(n_parameters, self.n_parameters) + ) + if n_observables != self.n_observables: + raise RuntimeError( + "Number of observables does not match model: {} vs {}".format(n_observables, self.n_observables) + ) + + # Data + data = self._package_training_data(method, x, theta0, theta1, y, r_xz, t_xz0, t_xz1) - all_t_hat = evaluate_local_score_model(model=self.model, xs=x) - return all_t_hat + # Create model + if self.model is None: + logger.info("Creating model", method) + self._create_model() + + # Losses + loss_functions, loss_labels, loss_weights = get_loss(method + "2", alpha) - def evaluate_log_likelihood(self, x, theta0_filename=None, test_all_combinations=True, evaluate_score=False): + # Optimizer + opt, opt_kwargs = get_optimizer(optimizer, nesterov_momentum) + # Train model + logger.info("Training model") + trainer = DoubleParameterizedRatioTrainer(self.model) + result = trainer.train( + data=data, + loss_functions=loss_functions, + loss_weights=loss_weights, + loss_labels=loss_labels, + epochs=n_epochs, + batch_size=batch_size, + optimizer=opt, + optimizer_kwargs=opt_kwargs, + initial_lr=initial_lr, + final_lr=final_lr, + validation_split=validation_split, + early_stopping=early_stopping, + verbose=verbose, + ) + return result + + def evaluate_log_likelihood_ratio(self, x, theta0, theta1, test_all_combinations=True, evaluate_score=False): """ - Evaluates a trained estimator of the log likelihood. + Evaluates the log likelihood ratio as a function of the observation x, the numerator hypothesis theta0, and + the denominator hypothesis theta1. Parameters ---------- x : str or ndarray - Sample of observations, or path to numpy file with observations, as saved by the - `madminer.sampling.SampleAugmenter` functions. + Observations or filename of a pickled numpy array. - theta0_filename : str or None, optional - Path to an unweighted sample of numerator parameters, as saved by the `madminer.sampling.SampleAugmenter` - functions. Required if the estimator was trained with the 'alice', 'alice2', 'alices', 'alices2', 'carl', - 'carl2', 'nde', 'rascal', 'rascal2', 'rolr', 'rolr2', or 'scandal' method. Default value: None. + theta0 : ndarray or str + Numerator parameter points or filename of a pickled numpy array. + + theta1 : ndarray or str + Denominator parameter points or filename of a pickled numpy array. test_all_combinations : bool, optional - If method is not 'sally' and not 'sallino': If False, the number of samples in the observable and theta + If False, the number of samples in the observable and theta files has to match, and the likelihood ratio is evaluated only for the combinations `r(x_i | theta0_i, theta1_i)`. If True, `r(x_i | theta0_j, theta1_j)` for all pairwise combinations `i, j` are evaluated. Default value: True. evaluate_score : bool, optional - If method is not 'sally' and not 'sallino', this sets whether in addition to the likelihood ratio the score - is evaluated. Default value: False. + Sets whether in addition to the likelihood ratio the score is evaluated. Default value: False. Returns ------- + log_likelihood_ratio : ndarray + The estimated log likelihood ratio. If test_all_combinations is True, the result has shape + `(n_thetas, n_x)`. Otherwise, it has shape `(n_samples,)`. - log_likelihood : ndarray - The estimated log likelihood. If test_all_combinations is True, the result has shape `(n_thetas, n_x)`. - Otherwise, it has shape `(n_samples,)`. + score0 : ndarray or None + None if evaluate_score is False. Otherwise the derived estimated score at `theta0`. If test_all_combinations + is True, the result has shape `(n_thetas, n_x, n_parameters)`. Otherwise, it has shape + `(n_samples, n_parameters)`. - score_theta0 : ndarray or None - None if - evaluate_score is False. Otherwise the derived estimated score at `theta0`. If test_all_combinations is - True, the result has shape `(n_thetas, n_x, n_parameters)`. Otherwise, it has shape + score1 : ndarray or None + None if evaluate_score is False. Otherwise the derived estimated score at `theta1`. If test_all_combinations + is True, the result has shape `(n_thetas, n_x, n_parameters)`. Otherwise, it has shape `(n_samples, n_parameters)`. """ - if self.model is None: raise ValueError("No model -- train or load model before evaluating it!") # Load training data logger.debug("Loading evaluation data") - thetas = load_and_check(theta0_filename) - if isinstance(x, six.string_types): - x = load_and_check(x) + x = load_and_check(x) + theta0 = load_and_check(theta0) + theta1 = load_and_check(theta1) # Scale observables x = self._transform_inputs(x) - # Restrict featuers + # Restrict features if self.features is not None: x = x[:, self.features] - if self.method_type != "nde": - raise RuntimeError("Likelihood estimation only possible for methods NDE and SCANDAL") + # Balance thetas + if len(theta1) > len(theta0): + theta0 = [theta0[i % len(theta0)] for i in range(len(theta1))] + elif len(theta1) < len(theta0): + theta1 = [theta1[i % len(theta1)] for i in range(len(theta0))] - # Evaluation for all other methods - all_log_p_hat = [] - all_t_hat = [] + all_log_r_hat = [] + all_t_hat0 = [] + all_t_hat1 = [] if test_all_combinations: logger.debug("Starting ratio evaluation for all combinations") - for i, theta in enumerate(thetas): - logger.debug("Starting log likelihood evaluation for theta %s / %s: %s", i + 1, len(thetas), theta) + for i, (this_theta0, this_theta1) in enumerate(zip(theta0, theta1)): + logger.debug( + "Starting ratio evaluation for thetas %s / %s: %s vs %s", + i + 1, + len(theta0), + this_theta0, + this_theta1, + ) + _, log_r_hat, t_hat0, t_hat1 = evaluate_ratio_model( + model=self.model, + method_type="double_parameterized_ratio", + theta0s=[this_theta0], + theta1s=[this_theta1], + xs=x, + evaluate_score=evaluate_score, + ) + + all_log_r_hat.append(log_r_hat) + all_t_hat0.append(t_hat0) + all_t_hat1.append(t_hat1) + + all_log_r_hat = np.array(all_log_r_hat) + all_t_hat0 = np.array(all_t_hat0) + all_t_hat1 = np.array(all_t_hat1) + + else: + logger.debug("Starting ratio evaluation") + _, all_log_r_hat, all_t_hat0, all_t_hat1 = evaluate_ratio_model( + model=self.model, + method_type="double_parameterized_ratio", + theta0s=theta0, + theta1s=theta1, + xs=x, + evaluate_score=evaluate_score, + ) + + logger.debug("Evaluation done") + return all_log_r_hat, all_t_hat0, all_t_hat1 + + def evaluate_log_likelihood(self, *args, **kwargs): + raise TheresAGoodReasonThisDoesntWork( + "This estimator can only estimate likelihood ratios, not the likelihood " "itself!" + ) + + def evaluate_score(self, *args, **kwargs): + raise NotImplementedError("Please use evaluate_log_likelihood_ratio(evaluate_score=True).") + + def calculate_fisher_information(self, *args, **kwargs): + raise NotImplementedError( + "Please use evaluate_log_likelihood_ratio(evaluate_score=True) and calculate the " + "Fisher information manually." + ) + + def evaluate(self, *args, **kwargs): + return self.evaluate_log_likelihood_ratio(*args, **kwargs) + + def _create_model(self): + self.model = DenseDoublyParameterizedRatioModel( + n_observables=self.n_observables, + n_parameters=self.n_parameters, + n_hidden=self.n_hidden, + activation=self.activation, + ) + + @staticmethod + def _check_required_data(method, r_xz, t_xz0, t_xz1): + if method in ["cascal", "alices", "rascal"] and (t_xz0 is None or t_xz1 is None): + raise RuntimeError("Method {} requires joint score information".format(method)) + if method in ["rolr", "alice", "alices", "rascal"] and r_xz is None: + raise RuntimeError("Method {} requires joint likelihood ratio information".format(method)) + + @staticmethod + def _package_training_data(method, x, theta0, theta1, y, r_xz, t_xz0, t_xz1): + data = OrderedDict() + data["x"] = x + data["theta0"] = theta0 + data["theta1"] = theta1 + data["y"] = y + if method in ["rolr", "alice", "alices", "rascal"]: + data["r_xz"] = r_xz + if method in ["cascal", "alices", "rascal"]: + data["t_xz0"] = t_xz0 + data["t_xz1"] = t_xz1 + return data + + def _wrap_settings(self): + settings = super(DoubleParameterizedRatioEstimator, self)._wrap_settings() + settings["estimator_type"] = "double_parameterized_ratio" + return settings + + def _unwrap_settings(self, settings): + super(DoubleParameterizedRatioEstimator, self)._unwrap_settings(settings) + + estimator_type = str(settings["estimator_type"]) + if estimator_type != "double_parameterized_ratio": + raise RuntimeError("Saved model is an incompatible estimator type {}.".format(estimator_type)) + + +class ScoreEstimator(Estimator): + """ A neural estimator of the score evaluated at a fixed reference hypothesis as a function of the + observation x. + + Parameters + ---------- + features : list of int or None, optional + Indices of observables (features) that are used as input to the neural networks. If None, all observables + are used. Default value: None. + + n_hidden : tuple of int, optional + Units in each hidden layer in the neural networks. If method is 'nde' or 'scandal', this refers to the + setup of each individual MADE layer. Default value: (100, 100). + + activation : {'tanh', 'sigmoid', 'relu'}, optional + Activation function. Default value: 'tanh'. + + """ + + def __init__(self, features=None, n_components=1, n_mades=5, n_hidden=(100,), activation="tanh", batch_norm=None): + super(ScoreEstimator, self).__init__(features, n_hidden, activation) + + self.nuisance_profile_matrix = None + self.nuisance_project_matrix = None + self.nuisance_mode_default = "keep" + + def train( + self, + method, + x, + t_xz, + optimizer="amsgrad", + n_epochs=50, + batch_size=200, + initial_lr=0.001, + final_lr=0.0001, + nesterov_momentum=None, + validation_split=0.25, + early_stopping=True, + scale_inputs=True, + shuffle_labels=False, + limit_samplesize=None, + verbose="some", + ): + + """ + Trains the network. + + Parameters + ---------- + method : str + The inference method used for training. Currently values are 'sally' and 'sallino', but at the training + stage they are identical. So right now it doesn't matter which one you use. + + x : ndarray or str + Path to an unweighted sample of observations, as saved by the `madminer.sampling.SampleAugmenter` functions. + Required for all inference methods. + + t_xz : ndarray or str + Joint scores at the reference hypothesis, or filename of a pickled numpy array. + + optimizer : {"adam", "amsgrad", "sgd"}, optional + Optimization algorithm. Default value: "amsgrad". + + n_epochs : int, optional + Number of epochs. Default value: 50. + + batch_size : int, optional + Batch size. Default value: 200. + + initial_lr : float, optional + Learning rate during the first epoch, after which it exponentially decays to final_lr. Default value: + 0.001. + + final_lr : float, optional + Learning rate during the last epoch. Default value: 0.0001. + + nesterov_momentum : float or None, optional + If trainer is "sgd", sets the Nesterov momentum. Default value: None. + + validation_split : float or None, optional + Fraction of samples used for validation and early stopping (if early_stopping is True). If None, the entire + sample is used for training and early stopping is deactivated. Default value: 0.25. + + early_stopping : bool, optional + Activates early stopping based on the validation loss (only if validation_split is not None). Default value: + True. + + scale_inputs : bool, optional + Scale the observables to zero mean and unit variance. Default value: True. + + shuffle_labels : bool, optional + If True, the labels (`y`, `r_xz`, `t_xz`) are shuffled, while the observations (`x`) remain in their + normal order. This serves as a closure test, in particular as cross-check against overfitting: an estimator + trained with shuffle_labels=True should predict to likelihood ratios around 1 and scores around 0. + + limit_samplesize : int or None, optional + If not None, only this number of samples (events) is used to train the estimator. Default value: None. + + verbose : {"all", "many", "some", "few", "none}, optional + Determines verbosity of training. Default value: "some". + + Returns + ------- + None + + """ + + if method not in ["sally", "sallino"]: + logger.warning("Method %s not allowed for score estimators. Using 'sally' instead.", method) + method = "sally" + + logger.info("Starting training") + logger.info(" Batch size: %s", batch_size) + logger.info(" Optimizer: %s", optimizer) + logger.info(" Epochs: %s", n_epochs) + logger.info(" Learning rate: %s initially, decaying to %s", initial_lr, final_lr) + if optimizer == "sgd": + logger.info(" Nesterov momentum: %s", nesterov_momentum) + logger.info(" Validation split: %s", validation_split) + logger.info(" Early stopping: %s", early_stopping) + logger.info(" Scale inputs: %s", scale_inputs) + logger.info(" Shuffle labels %s", shuffle_labels) + if limit_samplesize is None: + logger.info(" Samples: all") + else: + logger.info(" Samples: %s", limit_samplesize) + + # Load training data + logger.info("Loading training data") + x = load_and_check(x) + t_xz = load_and_check(t_xz) + + # Infer dimensions of problem + n_samples = x.shape[0] + n_observables = x.shape[1] + n_parameters = t_xz.shape[1] + logger.info("Found %s samples with %s parameters and %s observables", n_samples, n_parameters, n_observables) + + # Limit sample size + if limit_samplesize is not None and limit_samplesize < n_samples: + logger.info("Only using %s of %s training samples", limit_samplesize, n_samples) + x, t_xz = restrict_samplesize(limit_samplesize, x, t_xz) + + # Scale features + if scale_inputs: + logger.info("Rescaling inputs") + self._initialize_input_transform(x) + x = self._transform_inputs(x) + else: + self._initialize_input_transform(x, False) + + # Shuffle labels + if shuffle_labels: + logger.info("Shuffling labels") + t_xz = shuffle(t_xz) + + # Features + if self.features is not None: + x = x[:, self.features] + logger.info("Only using %s of %s observables", x.shape[1], n_observables) + n_observables = x.shape[1] + + # Check consistency of input with model + if self.n_observables is None: + self.n_observables = n_observables + if self.n_parameters is None: + self.n_parameters = n_parameters + + if n_parameters != self.n_parameters: + raise RuntimeError( + "Number of parameters does not match model: {} vs {}".format(n_parameters, self.n_parameters) + ) + if n_observables != self.n_observables: + raise RuntimeError( + "Number of observables does not match model: {} vs {}".format(n_observables, self.n_observables) + ) + + # Data + data = self._package_training_data(x, t_xz) + + # Create model + if self.model is None: + logger.info("Creating model") + self._create_model() + + # Losses + loss_functions, loss_labels, loss_weights = get_loss(method, None) + + # Optimizer + opt, opt_kwargs = get_optimizer(optimizer, nesterov_momentum) + + # Train model + logger.info("Training model") + trainer = LocalScoreTrainer(self.model) + result = trainer.train( + data=data, + loss_functions=loss_functions, + loss_weights=loss_weights, + loss_labels=loss_labels, + epochs=n_epochs, + batch_size=batch_size, + optimizer=opt, + optimizer_kwargs=opt_kwargs, + initial_lr=initial_lr, + final_lr=final_lr, + validation_split=validation_split, + early_stopping=early_stopping, + verbose=verbose, + ) + return result + + def set_nuisance(self, fisher_information, parameters_of_interest): + """ + Prepares the calculation of profiled scores, see https://arxiv.org/pdf/1903.01473.pdf. + + Parameters + ---------- + fisher_information : ndarray + Fisher informatioin with shape `(n_parameters, n_parameters)`. + + parameters_of_interest : list of int + List of int, with 0 <= remaining_compoinents[i] < n_parameters. Denotes which parameters are kept in the + profiling, and their new order. + + Returns + ------- + None + + """ + if fisher_information.shape != (self.n_parameters, self.n_parameters): + raise ValueError( + "Fisher information has wrong shape {}, expected {}".format( + fisher_information.shape, (self.n_parameters, self.n_parameters) + ) + ) + + n_parameters_of_interest = len(parameters_of_interest) + + # Separate Fisher information parts + nuisance_parameters, information_phys, information_mix, information_nuisance = separate_information_blocks( + fisher_information, parameters_of_interest + ) + + # Calculate projection matrix + self.nuisance_project_matrix = np.zeros((n_parameters_of_interest, self.n_parameters)) # (n_phys, n_all) + for theta_new, theta_old in enumerate(parameters_of_interest): + self.nuisance_project_matrix[theta_new, theta_old] = 1.0 + + logger.debug("Nuisance projection matrix:/n%s", self.nuisance_project_matrix) + + # Calculate profiling matrix + inverse_information_nuisance = np.linalg.inv(information_nuisance) # (n_nuisance, n_nuisance) + profiling_matrix = -information_mix.T.dot(inverse_information_nuisance) # (n_phys, n_nuisance) + + self.nuisance_profile_matrix = np.copy(self.nuisance_project_matrix) # (n_phys, n_all) + for theta_new, theta_old in enumerate(parameters_of_interest): + for nuis_new, nuis_old in enumerate(nuisance_parameters): + self.nuisance_profile_matrix[theta_new, nuis_old] += profiling_matrix[theta_new, nuis_new] + + logger.debug("Nuisance profiling matrix:/n%s", self.nuisance_project_matrix) + + def evaluate_score(self, x, nuisance_mode="auto"): + """ + Evaluates the score. + + Parameters + ---------- + x : str or ndarray + Observations, or filename of a pickled numpy array. + + nuisance_mode : {"auto", "keep", "profile", "project"} + Decides how nuisance parameters are treated. If nuisance_mode is "auto", the returned score is the (n+k)- + dimensional score in the space of n parameters of interest and k nuisance parameters if `set_profiling` + has not been called, and the n-dimensional profiled score in the space of the parameters of interest + if it has been called. For "keep", the returned score is always (n+k)-dimensional. For "profile", it is + the n-dimensional profiled score. For "project", it is the n-dimensional projected score, i.e. ignoring + the nuisance parameters. + + Returns + ------- + score : ndarray + Estimated score with shape `(n_observations, n_parameters)`. + """ + + if self.model is None: + raise ValueError("No model -- train or load model before evaluating it!") + + if nuisance_mode == "auto": + logger.debug("Using nuisance mode %s", self.nuisance_mode_default) + nuisance_mode = self.nuisance_mode_default + + # Load training data + logger.debug("Loading evaluation data") + x = load_and_check(x) + + # Scale observables + x = self._transform_inputs(x) + + # Restrict featuers + if self.features is not None: + x = x[:, self.features] + + # Evaluation + logger.debug("Starting score evaluation") + t_hat = evaluate_local_score_model(model=self.model, xs=x) + + # Treatment of nuisance paramters + if nuisance_mode == "keep": + logging.debug("Keeping nuisance parameter score") - log_p_hat, t_hat = evaluate_flow_model( - model=self.model, thetas=[theta], xs=x, evaluate_score=evaluate_score + elif nuisance_mode == "project": + if self.nuisance_project_matrix is None: + raise ValueError( + "evaluate_score() was called with nuisance_mode = project, but nuisance parameters " + "have not been set up yet. Please call set_nuisance() first!" ) + logging.debug("Projecting nuisance parameter score") + t_hat = np.einsum("ij,xj->xi", self.nuisance_project_matrix, t_hat) + + elif nuisance_mode == "profile": + if self.nuisance_profile_matrix is None: + raise ValueError( + "evaluate_score() was called with nuisance_mode = profile, but nuisance parameters " + "have not been set up yet. Please call set_nuisance() first!" + ) + logging.debug("Profiling nuisance parameter score") + t_hat = np.einsum("ij,xj->xi", self.nuisance_profile_matrix, t_hat) - all_log_p_hat.append(log_p_hat) - all_t_hat.append(t_hat) + else: + raise ValueError("Unknown nuisance_mode {}".format(nuisance_mode)) - all_log_p_hat = np.array(all_log_p_hat) - all_t_hat = np.array(all_t_hat) + return t_hat - else: - logger.debug("Starting log likelihood evaluation") + def evaluate_log_likelihood(self, *args, **kwargs): + raise TheresAGoodReasonThisDoesntWork("This estimator can only estimate the score, not the likelihood!") - all_log_p_hat, all_t_hat = evaluate_flow_model( - model=self.model, thetas=thetas, xs=x, evaluate_score=evaluate_score - ) + def evaluate_log_likelihood_ratio(self, *args, **kwargs): + raise TheresAGoodReasonThisDoesntWork("This estimator can only estimate the score, not the likelihood ratio!") - logger.debug("Evaluation done") - return all_log_p_hat, all_t_hat + def evaluate(self, *args, **kwargs): + return self.evaluate_score(*args, **kwargs) def calculate_fisher_information(self, x, weights=None, n_events=1, sum_events=True): - """ Calculates the expected Fisher information matrix based on the kinematic information in a given number of - events. Currently only supported for estimators trained with `method='sally'` or `method='sallino'`. + events. Parameters ---------- x : str or ndarray - Sample of observations, or path to numpy file with observations, as saved by the - `madminer.sampling.SampleAugmenter` functions. Note that this sample has to be sampled from the reference - parameter where the score is estimated with the SALLY / SALLINO estimator! + Sample of observations, or path to numpy file with observations. Note that this sample has to be sampled + from the reference parameter where the score is estimated with the SALLY / SALLINO estimator. weights : None or ndarray, optional Weights for the observations. If None, all events are taken to have equal weight. Default value: None. - + n_events : float, optional Expected number of events for which the kinematic Fisher information should be calculated. Default value: 1. @@ -771,14 +1372,12 @@ def calculate_fisher_information(self, x, weights=None, n_events=1, sum_events=T sum_events is False or `(n_parameters, n_parameters)` if sum_events is True. """ - if self.model is None: raise ValueError("No model -- train or load model before evaluating it!") # Load training data logger.debug("Loading evaluation data") - if isinstance(x, six.string_types): - x = load_and_check(x) + x = load_and_check(x) n_samples = x.shape[0] # Scale observables @@ -789,12 +1388,8 @@ def calculate_fisher_information(self, x, weights=None, n_events=1, sum_events=T x = x[:, self.features] # Estimate scores - if self.method in ["sally", "sallino"]: - logger.debug("Starting score evaluation") - - t_hats = evaluate_local_score_model(model=self.model, xs=x) - else: - raise NotImplementedError("Fisher information calculation only implemented for SALLY estimators") + logger.debug("Starting score evaluation") + t_hats = evaluate_local_score_model(model=self.model, xs=x) # Weights if weights is None: @@ -814,19 +1409,194 @@ def calculate_fisher_information(self, x, weights=None, n_events=1, sum_events=T return fisher_information def save(self, filename, save_model=False): + super(ScoreEstimator, self).save(filename, save_model) + + # Also save Fisher information information for profiling / projections + if self.nuisance_profile_matrix is not None and self.nuisance_project_matrix is not None: + logger.debug( + "Saving nuisance profiling / projection information to %s_nuisance_profile_matrix.npy and " + "%s_nuisance_project_matrix.npy", + filename, + filename, + ) + np.save(filename + "_nuisance_profile_matrix.npy", self.nuisance_profile_matrix) + np.save(filename + "_nuisance_project_matrix.npy", self.nuisance_project_matrix) + + def load(self, filename): + super(ScoreEstimator, self).load(filename) + + # Load scaling + try: + self.nuisance_profile_matrix = np.load(filename + "_nuisance_profile_matrix.npy") + self.nuisance_project_matrix = np.load(filename + "_nuisance_project_matrix.npy") + logger.debug( + " Found nuisance profiling / projection matrices:\nProfiling:\n%s\nProjection:\n%s", + self.nuisance_profile_matrix, + self.nuisance_project_matrix, + ) + except: + logger.debug("Did not find nuisance profiling / projection setup in %s", filename) + self.nuisance_profile_matrix = None + self.nuisance_project_matrix = None + + def _create_model(self): + self.model = DenseLocalScoreModel( + n_observables=self.n_observables, + n_parameters=self.n_parameters, + n_hidden=self.n_hidden, + activation=self.activation, + ) + + @staticmethod + def _package_training_data(x, t_xz): + data = OrderedDict() + data["x"] = x + data["t_xz"] = t_xz + return data + + def _wrap_settings(self): + settings = super(ScoreEstimator, self)._wrap_settings() + settings["estimator_type"] = "score" + settings["estimator_type"] = "score" + settings["nuisance_mode_default"] = self.nuisance_mode_default + return settings + + def _unwrap_settings(self, settings): + super(ScoreEstimator, self)._unwrap_settings(settings) + + estimator_type = str(settings["estimator_type"]) + if estimator_type != "score": + raise RuntimeError("Saved model is an incompatible estimator type {}.".format(estimator_type)) + + try: + self.nuisance_mode_default = str(settings["nuisance_mode_default"]) + except KeyError: + self.nuisance_mode_default = "keep" + logger.warning("Did not find entry nuisance_mode_default in saved model, using default 'keep'.") + + +class LikelihoodEstimator(Estimator): + """ A neural estimator of the density or likelihood evaluated at a reference hypothesis as a function + of the observation x. + + Parameters + ---------- + features : list of int or None, optional + Indices of observables (features) that are used as input to the neural networks. If None, all observables + are used. Default value: None. + + n_components : int, optional + The number of Gaussian base components in a MADE MoG. If 1, a plain MADE is used. + Default value: 1. + + n_mades : int, optional + The number of MADE layers. Default value: 3. + + + n_hidden : tuple of int, optional + Units in each hidden layer in the neural networks. If method is 'nde' or 'scandal', this refers to the + setup of each individual MADE layer. Default value: (100, 100). + + activation : {'tanh', 'sigmoid', 'relu'}, optional + Activation function. Default value: 'tanh'. + + batch_norm : None or floar, optional + If not None, batch normalization is used, where this value sets the alpha parameter in the calculation + of the running average of the mean and variance. Default value: None. + + + """ + + def __init__(self, features=None, n_components=1, n_mades=5, n_hidden=(100,), activation="tanh", batch_norm=None): + super(LikelihoodEstimator, self).__init__(features, n_hidden, activation) + + self.n_components = n_components + self.n_mades = n_mades + self.batch_norm = batch_norm + + def train( + self, + method, + x, + theta, + t_xz=None, + alpha=1.0, + optimizer="amsgrad", + n_epochs=50, + batch_size=200, + initial_lr=0.001, + final_lr=0.0001, + nesterov_momentum=None, + validation_split=0.25, + early_stopping=True, + scale_inputs=True, + shuffle_labels=False, + limit_samplesize=None, + verbose="some", + ): """ - Saves the trained model to four files: a JSON file with the settings, a pickled pyTorch state dict - file, and numpy files for the mean and variance of the inputs (used for input scaling). + Trains the network. Parameters ---------- - filename : str - Path to the files. '_settings.json' and '_state_dict.pl' will be added. + method : str + The inference method used for training. Allowed values are 'nde' and 'scandal'. - save_model : bool, optional - If True, the whole model is saved in addition to the state dict. This is not necessary for loading it - again with MLForge.load(), but can be useful for debugging, for instance to plot the computational graph. + x : ndarray or str + Path to an unweighted sample of observations, as saved by the `madminer.sampling.SampleAugmenter` functions. + Required for all inference methods. + + theta : ndarray or str + Numerator parameter point, or filename of a pickled numpy array. + + t_xz : ndarray or str or None, optional + Joint scores at theta, or filename of a pickled numpy array. Default value: None. + + alpha : float, optional + Hyperparameter weighting the score error in the loss function of the 'alices', 'rascal', and 'cascal' + methods. Default value: 1. + + optimizer : {"adam", "amsgrad", "sgd"}, optional + Optimization algorithm. Default value: "amsgrad". + + n_epochs : int, optional + Number of epochs. Default value: 50. + + batch_size : int, optional + Batch size. Default value: 200. + + initial_lr : float, optional + Learning rate during the first epoch, after which it exponentially decays to final_lr. Default value: + 0.001. + + final_lr : float, optional + Learning rate during the last epoch. Default value: 0.0001. + + nesterov_momentum : float or None, optional + If trainer is "sgd", sets the Nesterov momentum. Default value: None. + + validation_split : float or None, optional + Fraction of samples used for validation and early stopping (if early_stopping is True). If None, the entire + sample is used for training and early stopping is deactivated. Default value: 0.25. + + early_stopping : bool, optional + Activates early stopping based on the validation loss (only if validation_split is not None). Default value: + True. + + scale_inputs : bool, optional + Scale the observables to zero mean and unit variance. Default value: True. + + shuffle_labels : bool, optional + If True, the labels (`y`, `r_xz`, `t_xz`) are shuffled, while the observations (`x`) remain in their + normal order. This serves as a closure test, in particular as cross-check against overfitting: an estimator + trained with shuffle_labels=True should predict to likelihood ratios around 1 and scores around 0. + + limit_samplesize : int or None, optional + If not None, only this number of samples (events) is used to train the estimator. Default value: None. + + verbose : {"all", "many", "some", "few", "none}, optional + Determines verbosity of training. Default value: "some". Returns ------- @@ -834,238 +1604,368 @@ def save(self, filename, save_model=False): """ - if self.model is None: - raise ValueError("No model -- train or load model before saving!") + logger.info("Starting training") + logger.info(" Method: %s", method) + if method == "scandal": + logger.info(" alpha: %s", alpha) + logger.info(" Batch size: %s", batch_size) + logger.info(" Optimizer: %s", optimizer) + logger.info(" Epochs: %s", n_epochs) + logger.info(" Learning rate: %s initially, decaying to %s", initial_lr, final_lr) + if optimizer == "sgd": + logger.info(" Nesterov momentum: %s", nesterov_momentum) + logger.info(" Validation split: %s", validation_split) + logger.info(" Early stopping: %s", early_stopping) + logger.info(" Scale inputs: %s", scale_inputs) + logger.info(" Shuffle labels %s", shuffle_labels) + if limit_samplesize is None: + logger.info(" Samples: all") + else: + logger.info(" Samples: %s", limit_samplesize) + + # Load training data + logger.info("Loading training data") + theta = load_and_check(theta) + x = load_and_check(x) + t_xz = load_and_check(t_xz) + + self._check_required_data(method, t_xz) + + # Infer dimensions of problem + n_samples = x.shape[0] + n_observables = x.shape[1] + n_parameters = theta.shape[1] + logger.info("Found %s samples with %s parameters and %s observables", n_samples, n_parameters, n_observables) + + # Limit sample size + if limit_samplesize is not None and limit_samplesize < n_samples: + logger.info("Only using %s of %s training samples", limit_samplesize, n_samples) + x, theta, t_xz = restrict_samplesize(limit_samplesize, x, theta, t_xz) + + # Scale features + if scale_inputs: + logger.info("Rescaling inputs") + self._initialize_input_transform(x) + x = self._transform_inputs(x) + else: + self._initialize_input_transform(x, False) + + # Shuffle labels + if shuffle_labels: + logger.info("Shuffling labels") + t_xz = shuffle(t_xz) + + # Features + if self.features is not None: + x = x[:, self.features] + logger.info("Only using %s of %s observables", x.shape[1], n_observables) + n_observables = x.shape[1] + + # Check consistency of input with model + if self.n_observables is None: + self.n_observables = n_observables + if self.n_parameters is None: + self.n_parameters = n_parameters + + if n_parameters != self.n_parameters: + raise RuntimeError( + "Number of parameters does not match model: {} vs {}".format(n_parameters, self.n_parameters) + ) + if n_observables != self.n_observables: + raise RuntimeError( + "Number of observables does not match model: {} vs {}".format(n_observables, self.n_observables) + ) + + # Data + data = self._package_training_data(method, x, theta, t_xz) + + # Create model + if self.model is None: + logger.info("Creating model", method) + self._create_model() + + # Losses + loss_functions, loss_labels, loss_weights = get_loss(method, alpha) + + # Optimizer + opt, opt_kwargs = get_optimizer(optimizer, nesterov_momentum) + + # Train model + logger.info("Training model") + trainer = FlowTrainer(self.model) + result = trainer.train( + data=data, + loss_functions=loss_functions, + loss_weights=loss_weights, + loss_labels=loss_labels, + epochs=n_epochs, + batch_size=batch_size, + optimizer=opt, + optimizer_kwargs=opt_kwargs, + initial_lr=initial_lr, + final_lr=final_lr, + validation_split=validation_split, + early_stopping=early_stopping, + verbose=verbose, + ) + return result + + def evaluate_log_likelihood(self, x, theta, test_all_combinations=True, evaluate_score=False): + + """ + Evaluates the log likelihood as a function of the observation x and the parameter point theta. + + Parameters + ---------- + x : ndarray or str + Sample of observations, or path to numpy file with observations. + + theta : ndarray or str + Parameter points, or path to numpy file with parameter points. + + test_all_combinations : bool, optional + If method is not 'sally' and not 'sallino': If False, the number of samples in the observable and theta + files has to match, and the likelihood ratio is evaluated only for the combinations + `r(x_i | theta0_i, theta1_i)`. If True, `r(x_i | theta0_j, theta1_j)` for all pairwise combinations `i, j` + are evaluated. Default value: True. + + evaluate_score : bool, optional + If method is not 'sally' and not 'sallino', this sets whether in addition to the likelihood ratio the score + is evaluated. Default value: False. + + Returns + ------- + + log_likelihood : ndarray + The estimated log likelihood. If test_all_combinations is True, the result has shape `(n_thetas, n_x)`. + Otherwise, it has shape `(n_samples,)`. + + score : ndarray or None + None if + evaluate_score is False. Otherwise the derived estimated score at `theta`. If test_all_combinations is + True, the result has shape `(n_thetas, n_x, n_parameters)`. Otherwise, it has shape + `(n_samples, n_parameters)`. + + """ + + if self.model is None: + raise ValueError("No model -- train or load model before evaluating it!") + + # Load training data + logger.debug("Loading evaluation data") + theta = load_and_check(theta) + x = load_and_check(x) + + # Scale observables + x = self._transform_inputs(x) + + # Restrict featuers + if self.features is not None: + x = x[:, self.features] + + # Evaluation for all other methods + all_log_p_hat = [] + all_t_hat = [] + + if test_all_combinations: + logger.debug("Starting ratio evaluation for all combinations") - # Check paths - create_missing_folders([os.path.dirname(filename)]) + for i, this_theta in enumerate(theta): + logger.debug("Starting log likelihood evaluation for thetas %s / %s: %s", i + 1, len(theta), this_theta) - # Save settings - logger.debug("Saving settings to %s_settings.json", filename) + log_p_hat, t_hat = evaluate_flow_model( + model=self.model, thetas=[this_theta], xs=x, evaluate_score=evaluate_score + ) - settings = { - "method": self.method, - "method_type": self.method_type, - "n_observables": self.n_observables, - "n_parameters": self.n_parameters, - "n_hidden": list(self.n_hidden), - "activation": self.activation, - "features": self.features, - "nde_type": self.nde_type, - "maf_n_mades": self.maf_n_mades, - "maf_batch_norm": self.maf_batch_norm, - "maf_batch_norm_alpha": self.maf_batch_norm_alpha, - "maf_mog_n_components": self.maf_mog_n_components, - } + all_log_p_hat.append(log_p_hat) + all_t_hat.append(t_hat) - with open(filename + "_settings.json", "w") as f: - json.dump(settings, f) + all_log_p_hat = np.array(all_log_p_hat) + all_t_hat = np.array(all_t_hat) - # Save scaling - if self.x_scaling_stds is not None and self.x_scaling_means is not None: - logger.debug("Saving input scaling information to %s_x_means.npy and %s_x_stds.npy", filename, filename) - np.save(filename + "_x_means.npy", self.x_scaling_means) - np.save(filename + "_x_stds.npy", self.x_scaling_stds) + else: + logger.debug("Starting log likelihood evaluation") - # Save state dict - logger.debug("Saving state dictionary to %s_state_dict.pt", filename) - torch.save(self.model.state_dict(), filename + "_state_dict.pt") + all_log_p_hat, all_t_hat = evaluate_flow_model( + model=self.model, thetas=theta, xs=x, evaluate_score=evaluate_score + ) - # Save model - if save_model: - logger.debug("Saving model to %s_model.pt", filename) - torch.save(self.model, filename + "_model.pt") + logger.debug("Evaluation done") + return all_log_p_hat, all_t_hat - def load(self, filename): + def evaluate_log_likelihood_ratio(self, x, theta0, theta1, test_all_combinations, evaluate_score=False): """ - Loads a trained model from files. + Evaluates the log likelihood ratio as a function of the observation x, the numerator parameter point theta0, + and the denominator parameter point theta1. Parameters ---------- - filename : str - Path to the files. '_settings.json' and '_state_dict.pl' will be added. + x : ndarray or str + Sample of observations, or path to numpy file with observations. + + theta0 : ndarray or str + Numerator parameters, or path to numpy file. + + theta1 : ndarray or str + Denominator parameters, or path to numpy file. + + test_all_combinations : bool, optional + If method is not 'sally' and not 'sallino': If False, the number of samples in the observable and theta + files has to match, and the likelihood ratio is evaluated only for the combinations + `r(x_i | theta0_i, theta1_i)`. If True, `r(x_i | theta0_j, theta1_j)` for all pairwise combinations `i, j` + are evaluated. Default value: True. + + evaluate_score : bool, optional + If method is not 'sally' and not 'sallino', this sets whether in addition to the likelihood ratio the score + is evaluated. Default value: False. Returns ------- - None - """ + log_likelihood : ndarray + The estimated log likelihood. If test_all_combinations is True, the result has shape `(n_thetas, n_x)`. + Otherwise, it has shape `(n_samples,)`. - # Load settings - logger.debug("Loading settings from %s_settings.json", filename) + score : ndarray or None + None if + evaluate_score is False. Otherwise the derived estimated score at `theta`. If test_all_combinations is + True, the result has shape `(n_thetas, n_x, n_parameters)`. Otherwise, it has shape + `(n_samples, n_parameters)`. - with open(filename + "_settings.json", "r") as f: - settings = json.load(f) + """ - method = settings["method"] - n_observables = int(settings["n_observables"]) - n_parameters = int(settings["n_parameters"]) - n_hidden = tuple([int(item) for item in settings["n_hidden"]]) - activation = str(settings["activation"]) - features = settings["features"] - nde_type = settings["nde_type"] - maf_n_mades = int(settings["maf_n_mades"]) - maf_batch_norm = bool(settings["maf_batch_norm"]) - maf_batch_norm_alpha = float(settings["maf_batch_norm_alpha"]) - maf_mog_n_components = int(settings["maf_mog_n_components"]) - - logger.debug( - " Found method %s, %s observables, %s parameters, %s hidden layers, %s activation function, " - "features %s", - method, - n_observables, - n_parameters, - n_hidden, - activation, - features, - ) + if self.model is None: + raise ValueError("No model -- train or load model before evaluating it!") - # Features - if features == "None": - self.features = None - if features is not None: - self.features = list([int(item) for item in features]) + # Load training data + logger.debug("Loading evaluation data") + x = load_and_check(x) + theta0 = load_and_check(theta0) + theta1 = load_and_check(theta1) - # Load scaling - try: - self.x_scaling_means = np.load(filename + "_x_means.npy") - self.x_scaling_stds = np.load(filename + "_x_stds.npy") - logger.debug( - " Found input scaling information: means %s, stds %s", self.x_scaling_means, self.x_scaling_stds - ) - except FileNotFoundError: - logger.warning("Scaling information not found in %s", filename) - self.x_scaling_means = None - self.x_scaling_stds = None + # Scale observables + x = self._transform_inputs(x) - # Create model and save in self - self._create_model( - method, - n_observables, - n_parameters, - n_hidden, - activation, - nde_type, - maf_n_mades, - maf_batch_norm, - maf_batch_norm_alpha, - maf_mog_n_components, - ) + # Restrict features + if self.features is not None: + x = x[:, self.features] - # Load state dict - logger.debug("Loading state dictionary from %s_state_dict.pt", filename) - self.model.load_state_dict(torch.load(filename + "_state_dict.pt")) + # Balance thetas + if len(theta1) > len(theta0): + theta0 = [theta0[i % len(theta0)] for i in range(len(theta1))] + elif len(theta1) < len(theta0): + theta1 = [theta1[i % len(theta1)] for i in range(len(theta0))] - def _initialize_input_transform(self, x, transform=True): - if transform: - self.x_scaling_means = np.mean(x, axis=0) - self.x_scaling_stds = np.maximum(np.std(x, axis=0), 1.0e-6) - else: - n_parameters = x.shape[0] + log_p_hat0, t_hat0 = self.evaluate_log_likelihood( + x, theta0, test_all_combinations=test_all_combinations, evaluate_score=evaluate_score + ) + log_p_hat1, t_hat1 = self.evaluate_log_likelihood( + x, theta1, test_all_combinations=test_all_combinations, evaluate_score=evaluate_score + ) + log_r_hat = log_p_hat0 - log_p_hat1 - self.x_scaling_means = np.zeros(n_parameters) - self.x_scaling_stds = np.ones(n_parameters) + return log_r_hat, t_hat0, t_hat1 - def _transform_inputs(self, x): - if self.x_scaling_means is not None and self.x_scaling_stds is not None: - x_scaled = x - self.x_scaling_means - x_scaled /= self.x_scaling_stds - else: - x_scaled = x - return x_scaled + def evaluate_score(self, *args, **kwargs): + raise NotImplementedError("Please use evaluate_log_likelihood(evaluate_score=True).") - def _create_model( - self, - method, - n_observables, - n_parameters, - n_hidden, - activation, - nde_type=None, - maf_n_mades=None, - maf_batch_norm=None, - maf_batch_norm_alpha=None, - maf_mog_n_components=None, - ): + def calculate_fisher_information(self, *args, **kwargs): + raise NotImplementedError( + "Please use evaluate_log_likelihood_ratio(evaluate_score=True) and calculate the " + "Fisher information manually." + ) - self.method = method - self.n_observables = n_observables - self.n_parameters = n_parameters - self.n_hidden = n_hidden - self.activation = activation - self.maf_n_mades = maf_n_mades - self.maf_batch_norm = maf_batch_norm - self.maf_batch_norm_alpha = maf_batch_norm_alpha - self.maf_mog_n_components = maf_mog_n_components - - self.method_type = get_method_type(method) - if self.method_type == "parameterized": - self.model = ParameterizedRatioEstimator( - n_observables=n_observables, n_parameters=n_parameters, n_hidden=n_hidden, activation=activation + def evaluate(self, *args, **kwargs): + return self.evaluate_log_likelihood(*args, **kwargs) + + def _create_model(self): + if self.n_components > 1: + self.model = ConditionalMixtureMaskedAutoregressiveFlow( + n_conditionals=self.n_parameters, + n_inputs=self.n_observables, + n_components=self.n_components, + n_hiddens=self.n_hidden, + n_mades=self.n_mades, + activation=self.activation, + batch_norm=self.batch_norm is not None, + alpha=self.batch_norm, ) - elif self.method_type == "doubly_parameterized": - self.model = DoublyParameterizedRatioEstimator( - n_observables=n_observables, n_parameters=n_parameters, n_hidden=n_hidden, activation=activation - ) - elif self.method_type == "local_score": - self.model = LocalScoreEstimator( - n_observables=n_observables, n_parameters=n_parameters, n_hidden=n_hidden, activation=activation - ) - elif self.method_type == "nde": - self.nde_type = nde_type - if nde_type == "maf": - self.model = ConditionalMaskedAutoregressiveFlow( - n_conditionals=n_parameters, - n_inputs=n_observables, - n_hiddens=n_hidden, - n_mades=maf_n_mades, - activation=activation, - batch_norm=maf_batch_norm, - alpha=maf_batch_norm_alpha, - ) - elif nde_type == "mafmog": - self.model = ConditionalMixtureMaskedAutoregressiveFlow( - n_conditionals=n_parameters, - n_inputs=n_observables, - n_components=maf_mog_n_components, - n_hiddens=n_hidden, - n_mades=maf_n_mades, - activation=activation, - batch_norm=maf_batch_norm, - alpha=maf_batch_norm_alpha, - ) - else: - raise RuntimeError("Unknown NDE type {}".format(nde_type)) else: - raise RuntimeError("Unknown method {}".format(method)) - + self.model = ConditionalMaskedAutoregressiveFlow( + n_conditionals=self.n_parameters, + n_inputs=self.n_observables, + n_hiddens=self.n_hidden, + n_mades=self.n_mades, + activation=self.activation, + batch_norm=self.batch_norm is not None, + alpha=self.batch_norm, + ) -class EnsembleForge: + @staticmethod + def _check_required_data(method, t_xz): + if method == ["scandal"] and t_xz is None: + raise RuntimeError("Method {} requires joint score information".format(method)) + + @staticmethod + def _package_training_data(method, x, theta, t_xz): + data = OrderedDict() + data["x"] = x + data["theta"] = theta + if method in ["scandal"]: + data["t_xz"] = t_xz + return data + + def _wrap_settings(self): + settings = super(LikelihoodEstimator, self)._wrap_settings() + settings["estimator_type"] = "likelihood" + settings["n_components"] = self.n_components + settings["batch_norm"] = self.batch_norm + settings["n_mades"] = self.n_mades + return settings + + def _unwrap_settings(self, settings): + super(LikelihoodEstimator, self)._unwrap_settings(settings) + + estimator_type = str(settings["estimator_type"]) + if estimator_type != "likelihood": + raise RuntimeError("Saved model is an incompatible estimator type {}.".format(estimator_type)) + + self.n_components = int(settings["n_components"]) + self.n_mades = int(settings["n_mades"]) + self.batch_norm = settings["batch_norm"] + if self.batch_norm == "None": + self.batch_norm = None + if self.batch_norm is not None: + self.batch_norm = float(self.batch_norm) + + +class Ensemble: """ - Ensemble methods for likelihood ratio and score information. + Ensemble methods for likelihood, likelihood ratio, and score estimation. - Generally, EnsembleForge instances can be used very similarly to MLForge instances: + Generally, Ensemble instances can be used very similarly to Estimator instances: - * The initialization of EnsembleForge takes a list of (trained or untrained) MLForge instances. - * The methods `EnsembleForge.train_one()` and `EnsembleForge.train_all()` train the estimators (this can also be - done outside of EnsembleForge). - * `EnsembleForge.calculate_expectation()` can be used to calculate the expectation of the estimation likelihood + * The initialization of Ensemble takes a list of (trained or untrained) Estimator instances. + * The methods `Ensemble.train_one()` and `Ensemble.train_all()` train the estimators (this can also be + done outside of Ensemble). + * `Ensemble.calculate_expectation()` can be used to calculate the expectation of the estimation likelihood ratio or the expected estimated score over a validation sample. Ideally (and assuming the correct sampling), these expectation values should be close to zero. Deviations from zero therefore point out that the estimator is probably inaccurate. - * `EnsembleForge.evaluate()` and `EnsembleForge.calculate_fisher_information()` can then be used to calculate - ensemble predictions. The user has the option to treat all estimators equally ('committee method') or to give those - with expected score / ratio close to zero a higher weight. - * `EnsembleForge.save()` and `EnsembleForge.load()` can store all estimators in one folder. + * `Ensemble.evaluate_log_likelihood()`, `Ensemble.evaluate_log_likelihood_ratio()`, `Ensemble.evaluate_score()`, + and `Ensemble.calculate_fisher_information()` can then be used to calculate + ensemble predictions. + * `Ensemble.save()` and `Ensemble.load()` can store all estimators in one folder. The individual estimators in the ensemble can be trained with different methods, but they have to be of the same - type: either all estimators are single-parameterized likelihood ratio estimators, or all estimators are - doubly-parameterized likelihood estimators, or all estimators are local score regressors. + type: either all estimators are ParameterizedRatioEstimator instances, or all estimators are + DoubleParameterizedRatioEstimator instances, or all estimators are ScoreEstimator instances, or all estimators are + LikelihoodEstimator instances.. Parameters ---------- - estimators : None or int or list of (MLForge or str), optional + estimators : None or list of Estimator, optional If int, sets the number of estimators that will be created as new MLForge instances. If list, sets the estimators directly, either from MLForge instances or filenames (that are then loaded with `MLForge.load()`). If None, the ensemble is initialized without estimators. Note that the estimators have @@ -1077,39 +1977,27 @@ class EnsembleForge: Attributes ---------- - estimators : list of MLForge + estimators : list of Estimator The estimators in the form of MLForge instances. """ def __init__(self, estimators=None): self.n_parameters = None self.n_observables = None + self.estimator_type = None # Initialize estimators if estimators is None: self.estimators = [] - elif isinstance(estimators, int): - self.estimators = [MLForge() for _ in range(estimators)] else: self.estimators = [] for estimator in estimators: - if isinstance(estimator, six.string_types): - estimator_object = MLForge() - estimator_object.load(estimator) - elif isinstance(estimator, MLForge): - estimator_object = estimator + if isinstance(estimator, Estimator): + self.estimators.append(estimator) else: - raise ValueError("Entry {} in estimators is neither str nor MLForge instance") - - self.estimators.append(estimator_object) + raise ValueError("Entry {} in estimators is neither str nor Estimator instance") self.n_estimators = len(self.estimators) - self.expectations = None - - # Consistency checks - for estimator in self.estimators: - assert isinstance(estimator, MLForge), "Estimator is no MLForge instance!" - self._check_consistency() def add_estimator(self, estimator): @@ -1118,28 +2006,24 @@ def add_estimator(self, estimator): Parameters ---------- - estimator : MLForge or str - The estimator, either as MLForge instance or filename (which is then loaded with `MLForge.load()`). + estimator : Estimator + The estimator. Returns ------- None """ - if isinstance(estimator, six.string_types): - estimator_object = MLForge() - estimator_object.load(estimator) - elif isinstance(estimator, MLForge): - estimator_object = estimator - else: - raise ValueError("Entry {} in estimators is neither str nor MLForge instance") + if not isinstance(estimator, Estimator): + raise ValueError("Entry {} in estimators is neither str nor Estimator instance") - self.estimators.append(estimator_object) + self.estimators.append(estimator) self.n_estimators = len(self.estimators) + self._check_consistency() def train_one(self, i, **kwargs): """ - Trains an individual estimator. See `MLForge.train()`. + Trains an individual estimator. See `Estimator.train()`. Parameters ---------- @@ -1147,7 +2031,7 @@ def train_one(self, i, **kwargs): The index `0 <= i < n_estimators` of the estimator to be trained. kwargs : dict - Parameters for `MLForge.train()`. + Parameters for `Estimator.train()`. Returns ------- @@ -1155,18 +2039,16 @@ def train_one(self, i, **kwargs): """ - self._check_consistency(kwargs) - self.estimators[i].train(**kwargs) def train_all(self, **kwargs): """ - Trains all estimators. See `MLForge.train()`. + Trains all estimators. See `Estimator.train()`. Parameters ---------- kwargs : dict - Parameters for `MLForge.train()`. If a value in this dict is a list, it has to have length `n_estimators` + Parameters for `Estimator.train()`. If a value in this dict is a list, it has to have length `n_estimators` and contain one value of this parameter for each of the estimators. Otherwise the value is used as parameter for the training of all the estimators. @@ -1183,8 +2065,6 @@ def train_all(self, **kwargs): assert len(kwargs[key]) == self.n_estimators, "Keyword {} has wrong length {}".format(key, len(value)) - self._check_consistency(kwargs) - for i, estimator in enumerate(self.estimators): kwargs_this_estimator = {} for key, value in six.iteritems(kwargs): @@ -1193,212 +2073,180 @@ def train_all(self, **kwargs): logger.info("Training estimator %s / %s in ensemble", i + 1, self.n_estimators) estimator.train(**kwargs_this_estimator) - def calculate_expectation(self, x_filename, theta0_filename=None, theta1_filename=None): + def evaluate_log_likelihood(self, estimator_weights=None, calculate_covariance=False, **kwargs): """ - Calculates the expectation of the estimation likelihood ratio or the expected estimated score over a validation - sample. Ideally (and assuming the correct sampling), these expectation values should be close to zero. - Deviations from zero therefore point out that the estimator is probably inaccurate. + Estimates the log likelihood from each estimator and returns the ensemble mean (and, if calculate_covariance is + True, the covariance between them). Parameters ---------- - x_filename : str - Path to an unweighted sample of observations, as saved by the `madminer.sampling.SampleAugmenter` functions. + estimator_weights : ndarray or None, optional + Weights for each estimator in the ensemble. If None, all estimators have an equal vote. Default value: None. - theta0_filename : str or None, optional - Path to an unweighted sample of numerator parameters, as saved by the `madminer.sampling.SampleAugmenter` - functions. Required if the estimators were trained with the 'alice', 'alice2', 'alices', 'alices2', 'carl', - 'carl2', 'nde', 'rascal', 'rascal2', 'rolr', 'rolr2', or 'scandal' method. Default value: None. + calculate_covariance : bool, optional + If True, the covariance between the different estimators is calculated. Default value: False. - theta1_filename : str or None, optional - Path to an unweighted sample of denominator parameters, as saved by the `madminer.sampling.SampleAugmenter` - functions. Required if the estimators were trained with the 'alice2', 'alices2', 'carl2', 'rascal2', or - 'rolr2' method. Default value: None. + kwargs + Arguments for the evaluation. See the documentation of the relevant Estimator class. Returns ------- - expectations : ndarray - Expected score (if the estimators were trained with the 'sally' or 'sallino' methods) or likelihood ratio - (otherwise). + log_likelihood : ndarray + Mean prediction for the log likelihood. + + covariance : ndarray or None + If calculate_covariance is True, the covariance matrix between the estimators. Otherwise None. """ - logger.info("Calculating expectation for %s estimators in ensemble", self.n_estimators) + logger.info("Evaluating %s estimators in ensemble", self.n_estimators) - self.expectations = [] - method_type = self._check_consistency() + # Calculate weights of each estimator in vote + if estimator_weights is None: + estimator_weights = np.ones(self.n_estimators) + assert len(estimator_weights) == self.n_estimators + estimator_weights /= np.sum(estimator_weights) + logger.debug("Estimator weights: %s", estimator_weights) + # Calculate estimator predictions + predictions = [] for i, estimator in enumerate(self.estimators): logger.info("Starting evaluation for estimator %s / %s in ensemble", i + 1, self.n_estimators) + predictions.append(estimator.evaluate_log_likelihood(**kwargs)[0]) + predictions = np.array(predictions) - # Calculate expected score / ratio - if method_type == "local_score": - prediction = estimator.evaluate(x_filename, theta0_filename, theta1_filename) - else: - raise NotImplementedError("Expectation calculation currently only implemented for SALLY and SALLINO!") + # Calculate weighted means and covariance matrices + mean = np.average(predictions, axis=0, weights=estimator_weights) - self.expectations.append(np.mean(prediction, axis=0)) + if calculate_covariance: + predictions_flat = predictions.reshape((predictions.shape[0], -1)) + covariance = np.cov(predictions_flat.T, aweights=estimator_weights) + covariance = covariance.reshape(list(predictions.shape) + list(predictions.shape)) + else: + covariance = None - self.expectations = np.array(self.expectations) + return mean, covariance - return self.expectations + def evaluate_log_likelihood_ratio(self, estimator_weights=None, calculate_covariance=False, **kwargs): + """ + Estimates the log likelihood ratio from each estimator and returns the ensemble mean (and, if + calculate_covariance is True, the covariance between them). - def evaluate( - self, - x, - theta0_filename=None, - theta1_filename=None, - test_all_combinations=True, - vote_expectation_weight=None, - calculate_covariance=False, - return_individual_predictions=False, - ): + Parameters + ---------- + estimator_weights : ndarray or None, optional + Weights for each estimator in the ensemble. If None, all estimators have an equal vote. Default value: None. + + calculate_covariance : bool, optional + If True, the covariance between the different estimators is calculated. Default value: False. + + kwargs + Arguments for the evaluation. See the documentation of the relevant Estimator class. + + Returns + ------- + log_likelihood_ratio : ndarray + Mean prediction for the log likelihood ratio. + + covariance : ndarray or None + If calculate_covariance is True, the covariance matrix between the estimators. Otherwise None. """ - Evaluates the estimators of the likelihood ratio (or, if method is 'sally' or 'sallino', the score), and - calculates the ensemble mean or variance. - The user has the option to treat all estimators equally ('committee method') or to give those with expected - score / ratio close to zero (as calculated by `calculate_expectation()`) a higher weight. In the latter case, - the ensemble mean `f(x)` is calculated as `f(x) = sum_i w_i f_i(x)` with weights - `w_i = exp(-vote_expectation_weight |E[f_i]|) / sum_j exp(-vote_expectation_weight |E[f_j]|)`. Here `f_i(x)` - are the individual estimators and `E[f_i]` is the expectation value calculated by `calculate_expectation()`. + logger.info("Evaluating %s estimators in ensemble", self.n_estimators) - Parameters - ---------- - x : str or ndarray - Sample of observations, or path to numpy file with observations, as saved by the - `madminer.sampling.SampleAugmenter` functions. Note that this sample has to be sampled from the reference - parameter where the score is estimated with the SALLY / SALLINO estimator! + # Calculate weights of each estimator in vote + if estimator_weights is None: + estimator_weights = np.ones(self.n_estimators) + assert len(estimator_weights) == self.n_estimators + estimator_weights /= np.sum(estimator_weights) + logger.debug("Estimator weights: %s", estimator_weights) - theta0_filename : str or None, optional - Path to an unweighted sample of numerator parameters, as saved by the `madminer.sampling.SampleAugmenter` - functions. Required if the estimator was trained with the 'alice', 'alice2', 'alices', 'alices2', 'carl', - 'carl2', 'nde', 'rascal', 'rascal2', 'rolr', 'rolr2', or 'scandal' method. Default value: None. + # Calculate estimator predictions + predictions = [] + for i, estimator in enumerate(self.estimators): + logger.info("Starting evaluation for estimator %s / %s in ensemble", i + 1, self.n_estimators) + predictions.append(estimator.evaluate_log_likelihood_ratio(**kwargs)[0]) + predictions = np.array(predictions) - theta1_filename : str or None, optional - Path to an unweighted sample of denominator parameters, as saved by the `madminer.sampling.SampleAugmenter` - functions. Required if the estimator was trained with the 'alice2', 'alices2', 'carl2', 'rascal2', or - 'rolr2' method. Default value: None. + # Calculate weighted means and covariance matrices + mean = np.average(predictions, axis=0, weights=estimator_weights) - test_all_combinations : bool, optional - If method is not 'sally' and not 'sallino': If False, the number of samples in the observable and theta - files has to match, and the likelihood ratio is evaluated only for the combinations - `r(x_i | theta0_i, theta1_i)`. If True, `r(x_i | theta0_j, theta1_j)` for all pairwise combinations `i, j` - are evaluated. Default value: True. + if calculate_covariance: + predictions_flat = predictions.reshape((predictions.shape[0], -1)) + covariance = np.cov(predictions_flat.T, aweights=estimator_weights) + covariance = covariance.reshape(list(predictions.shape) + list(predictions.shape)) + else: + covariance = None - vote_expectation_weight : float or list of float or None, optional - Factor that determines how much more weight is given to those estimators with small expectation value (as - calculated by `calculate_expectation()`). If a list is given, results are returned for each element in the - list. If None, or if `calculate_expectation()` has not been called, all estimators are treated equal. - Default value: None. + return mean, covariance + + def evaluate_score(self, estimator_weights=None, calculate_covariance=False, **kwargs): + """ + Estimates the score from each estimator and returns the ensemble mean (and, if + calculate_covariance is True, the covariance between them). + + Parameters + ---------- + estimator_weights : ndarray or None, optional + Weights for each estimator in the ensemble. If None, all estimators have an equal vote. Default value: None. calculate_covariance : bool, optional - Whether the covariance matrix is calculated. Default value: False. + If True, the covariance between the different estimators is calculated. Default value: False. - return_individual_predictions : bool, optional - Whether the individual estimator predictions are returned. Default value: False. + kwargs + Arguments for the evaluation. See the documentation of the relevant Estimator class. Returns ------- - mean_prediction : ndarray or list of ndarray - The (weighted) ensemble mean of the estimators. If the estimators were trained with `method='sally'` or - `method='sallino'`, this is an array of the estimator for `t(x_i | theta_ref)` for all events `i`. - Otherwise, the estimated likelihood ratio (if test_all_combinations is True, the result has shape - `(n_thetas, n_x)`, otherwise, it has shape `(n_samples,)`). If more then one value vote_expectation_weight - is given, this is a list with results for all entries in vote_expectation_weight. - - covariance : None or ndarray or list of ndarray - The covariance matrix of the (flattened) predictions, defined as the ensemble covariance. If more then one - value vote_expectation_weight is given, this is a list with results - for all entries in vote_expectation_weight. If calculate_covariance is False, None is returned. - - weights : ndarray or list of ndarray - Only returned if return_individual_predictions is True. The estimator weights `w_i`. If more then one value - vote_expectation_weight is given, this is a list with results for all entries in vote_expectation_weight. + log_likelihood_ratio : ndarray + Mean prediction for the log likelihood ratio. - individual_predictions : ndarray - Only returned if return_individual_predictions is True. The individual estimator predictions. + covariance : ndarray or None + If calculate_covariance is True, the covariance matrix between the estimators. Otherwise None. """ + logger.info("Evaluating %s estimators in ensemble", self.n_estimators) # Calculate weights of each estimator in vote - if self.expectations is None or vote_expectation_weight is None: - weights = [np.ones(self.n_estimators)] - else: - if len(self.expectations.shape) == 1: - expectations_norm = self.expectations - elif len(self.expectations.shape) == 2: - expectations_norm = np.linalg.norm(self.expectations, axis=1) - else: - expectations_norm = [np.linalg.norm(expectation) for expectation in self.expectations] - - if not isinstance(vote_expectation_weight, list): - vote_expectation_weight = [vote_expectation_weight] - - weights = [] - for vote_weight in vote_expectation_weight: - if vote_weight is None: - these_weights = np.ones(self.n_estimators) - else: - these_weights = np.exp(-vote_weight * expectations_norm) - these_weights /= np.sum(these_weights) - weights.append(these_weights) - - logger.debug("Estimator weights: %s", weights) + if estimator_weights is None: + estimator_weights = np.ones(self.n_estimators) + assert len(estimator_weights) == self.n_estimators + estimator_weights /= np.sum(estimator_weights) + logger.debug("Estimator weights: %s", estimator_weights) # Calculate estimator predictions predictions = [] for i, estimator in enumerate(self.estimators): logger.info("Starting evaluation for estimator %s / %s in ensemble", i + 1, self.n_estimators) - - predictions.append( - estimator.evaluate(x, theta0_filename, theta1_filename, test_all_combinations, evaluate_score=False) - ) - - logger.debug("Estimator %s predicts %s for first event", i + 1, predictions[-1][0, :]) + predictions.append(estimator.evaluate_score(**kwargs)) predictions = np.array(predictions) # Calculate weighted means and covariance matrices - means = [] - covariances = [] - - for these_weights in weights: - mean = np.average(predictions, axis=0, weights=these_weights) - means.append(mean) - - if calculate_covariance: - predictions_flat = predictions.reshape((predictions.shape[0], -1)) - - covariance = np.cov(predictions_flat.T, aweights=these_weights) - else: - covariance = None - - covariances.append(covariance) + mean = np.average(predictions, axis=0, weights=estimator_weights) - # Returns - if len(weights) == 1: - if return_individual_predictions: - return means[0], covariances[0], weights[0], predictions - return means[0], covariances[0] + if calculate_covariance: + predictions_flat = predictions.reshape((predictions.shape[0], -1)) + covariance = np.cov(predictions_flat.T, aweights=estimator_weights) + covariance = covariance.reshape(list(predictions.shape) + list(predictions.shape)) + else: + covariance = None - if return_individual_predictions: - return means, covariances, weights, predictions - return means, covariances + return mean, covariance def calculate_fisher_information( self, x, obs_weights=None, + estimator_weights=None, n_events=1, mode="score", - uncertainty="ensemble", - vote_expectation_weight=None, - return_individual_predictions=False, + calculate_covariance=True, sum_events=True, ): """ - Calculates expected Fisher information matrices for an ensemble of SALLY estimators. + Calculates expected Fisher information matrices for an ensemble of ScoreEstimator instances. There are two ways of calculating the ensemble average. In the default "score" mode, the ensemble average for the score is calculated for each event, and the Fisher information is calculated based on these mean scores. In @@ -1433,6 +2281,9 @@ def calculate_fisher_information( obs_weights : None or ndarray, optional Weights for the observations. If None, all events are taken to have equal weight. Default value: None. + estimator_weights : ndarray or None, optional + Weights for each estimator in the ensemble. If None, all estimators have an equal vote. Default value: None. + n_events : float, optional Expected number of events for which the kinematic Fisher information should be calculated. Default value: 1. @@ -1441,22 +2292,8 @@ def calculate_fisher_information( are the sample mean and covariance calculated. If mode is "score", the sample mean is calculated for the score for each event. Default value: "score". - uncertainty : {"ensemble", "expectation", "sum", "none"}, optional - How the covariance matrix of the Fisher information estimate is calculate. With "ensemble", the ensemble - covariance is used (only supported if mode is "information"). With "expectation", the expectation of the - score is used as a measure of the uncertainty of the score estimator, and this uncertainty is propagated - through to the covariance matrix. With "sum", both terms are summed (only supported if mode is - "information"). With "none", no uncertainties are calculated. Default value: "ensemble". - - vote_expectation_weight : float or list of float or None, optional - If mode is "information", this factor determines how much more weight is given to those estimators with - small expectation value (as calculated by `calculate_expectation()`). If a list is given, results are - returned for each element in the list. If None, or if `calculate_expectation()` has not been called, all - estimators are treated equal. Default value: None. - - return_individual_predictions : bool, optional - If mode is "information", sets whether the individual estimator predictions are returned. Default value: - False. + calculate_covariance : bool, optional + If True, the covariance between the different estimators is calculated. Default value: True. sum_events : bool, optional If True or mode is "information", the expected Fisher information summed over the events x is calculated. @@ -1465,86 +2302,53 @@ def calculate_fisher_information( Returns ------- - mean_prediction : ndarray or list of ndarray + mean_prediction : ndarray Expected kinematic Fisher information matrix with shape `(n_events, n_parameters, n_parameters)` if sum_events is False and mode is "score", or `(n_parameters, n_parameters)` in any other case. - covariance : ndarray or list of ndarray - The covariance matrix of the Fisher information estimate. Its definition depends on the value of - uncertainty; by default, the covariance is defined as the ensemble covariance (only supported if mode is - "information"). This object has four indices, `cov_(ij)(i'j')`, ordered as i j i' j'. It has shape - `(n_parameters, n_parameters, n_parameters, n_parameters)`. If more then one value vote_expectation_weight - is given, this is a list with results for all entries in vote_expectation_weight. - - weights : ndarray or list of ndarray - Only returned if return_individual_predictions is True. The estimator weights `w_i`. If more then one value - vote_expectation_weight is given, this is a list with results for all entries in vote_expectation_weight. - - individual_predictions : ndarray - Only returned if return_individual_predictions is True. The individual estimator predictions. - + covariance : ndarray or None + The covariance of the estimated Fisher information matrix. This object has four indices, `cov_(ij)(i'j')`, + ordered as i j i' j'. It has shape `(n_parameters, n_parameters, n_parameters, n_parameters)`. """ logger.debug("Evaluating Fisher information for %s estimators in ensemble", self.n_estimators) + # Check ensemble + if self.estimator_type != "score": + raise NotImplementedError( + "Fisher information calculation is only implemented for local score estimators " + "(ScoreEstimator instances)." + ) + # Check input if mode not in ["score", "information"]: raise ValueError("Unknown mode {}, has to be 'score' or 'information'!".format(mode)) - if mode == "score": - vote_expectation_weight = None - - if uncertainty == "expectation" or uncertainty == "sum": - if self.expectations is None: - raise RuntimeError( - "Expectations have not been calculated, cannot use uncertainty mode 'expectation' " "or 'sum'!" - ) - # Calculate estimator_weights of each estimator in vote - if self.expectations is None or vote_expectation_weight is None: - estimator_weights = [np.ones(self.n_estimators)] - else: - if len(self.expectations.shape) == 1: - expectations_norm = self.expectations - elif len(self.expectations.shape) == 2: - expectations_norm = np.linalg.norm(self.expectations, axis=1) - else: - expectations_norm = [np.linalg.norm(expectation) for expectation in self.expectations] - - if not isinstance(vote_expectation_weight, list): - vote_expectation_weight = [vote_expectation_weight] - - estimator_weights = [] - for vote_weight in vote_expectation_weight: - if vote_weight is None: - these_weights = np.ones(self.n_estimators) - else: - these_weights = np.exp(-vote_weight * expectations_norm) - these_weights /= np.sum(these_weights) - estimator_weights.append(these_weights) - - logger.debug(" Estimator estimator_weights: %s", estimator_weights) + if estimator_weights is None: + estimator_weights = np.ones(self.n_estimators) + assert len(estimator_weights) == self.n_estimators + estimator_weights /= np.sum(estimator_weights) + logger.debug("Estimator weights: %s", estimator_weights) - predictions = [] + covariance = None # "information" mode if mode == "information": # Calculate estimator predictions + predictions = [] for i, estimator in enumerate(self.estimators): logger.debug("Starting evaluation for estimator %s / %s in ensemble", i + 1, self.n_estimators) predictions.append(estimator.calculate_fisher_information(x=x, weights=obs_weights, n_events=n_events)) predictions = np.array(predictions) - # Calculate weighted means and covariance matrices - means = [] - ensemble_covariances = [] + # Calculate weighted mean and covariance + information = np.average(predictions, axis=0, weights=estimator_weights) - for these_weights in estimator_weights: - mean = np.average(predictions, axis=0, weights=these_weights) - means.append(mean) + predictions_flat = predictions.reshape((predictions.shape[0], -1)) - predictions_flat = predictions.reshape((predictions.shape[0], -1)) - covariance = np.cov(predictions_flat.T, aweights=these_weights) + if calculate_covariance: + covariance = np.cov(predictions_flat.T, aweights=estimator_weights) covariance_shape = ( predictions.shape[1], predictions.shape[2], @@ -1553,8 +2357,6 @@ def calculate_fisher_information( ) covariance = covariance.reshape(covariance_shape) - ensemble_covariances.append(covariance) - # "score" mode: else: # Load training data @@ -1586,71 +2388,39 @@ def calculate_fisher_information( # Fisher information prediction (based on mean scores) if sum_events: - information_mean = float(n_events) * np.sum( + information = float(n_events) * np.sum( obs_weights[:, np.newaxis, np.newaxis] * score_mean[:, :, np.newaxis] * score_mean[:, np.newaxis, :], axis=0, ) else: - information_mean = ( + information = ( float(n_events) * obs_weights[:, np.newaxis, np.newaxis] * score_mean[:, :, np.newaxis] * score_mean[:, np.newaxis, :] ) - means = [information_mean] - - # Fisher information predictions based on shifted scores - informations_shifted = float(n_events) * np.sum( - obs_weights[np.newaxis, :, np.newaxis, np.newaxis] - * score_shifted_predictions[:, :, :, np.newaxis] - * score_shifted_predictions[:, :, np.newaxis, :], - axis=1, - ) # (n_estimators, n_parameters, n_parameters) - - n_params = score_mean.shape[1] - informations_shifted = informations_shifted.reshape(-1, n_params ** 2) - information_cov = np.cov(informations_shifted.T) - information_cov = information_cov.reshape(n_params, n_params, n_params, n_params) - ensemble_covariances = [information_cov] + + if calculate_covariance: + # Fisher information predictions based on shifted scores + informations_shifted = float(n_events) * np.sum( + obs_weights[np.newaxis, :, np.newaxis, np.newaxis] + * score_shifted_predictions[:, :, :, np.newaxis] + * score_shifted_predictions[:, :, np.newaxis, :], + axis=1, + ) # (n_estimators, n_parameters, n_parameters) + + n_params = score_mean.shape[1] + informations_shifted = informations_shifted.reshape(-1, n_params ** 2) + covariance = np.cov(informations_shifted.T) + covariance = covariance.reshape(n_params, n_params, n_params, n_params) # Let's check the expected score expected_score = [np.einsum("n,ni->i", obs_weights, score_mean)] logger.debug("Expected per-event score (should be close to zero):\n%s", expected_score) - # Calculate uncertainty through non-zero score expectation - expectation_covariances = None - if uncertainty == "expectation" or uncertainty == "sum": - expectation_covariances = [] - for these_weights, expectation in zip(estimator_weights, self.expectations): - mean_expectation = np.average(expectation, weights=these_weights, axis=0) - expectation_covariances.append( - n_events - * np.einsum("a,b,c,d->abcd", mean_expectation, mean_expectation, mean_expectation, mean_expectation) - ) - - # Final covariances - if uncertainty == "ensemble": - covariances = ensemble_covariances - elif uncertainty == "expectation": - covariances = expectation_covariances - elif uncertainty == "sum": - covariances = [cov1 + cov2 for cov1, cov2 in zip(ensemble_covariances, expectation_covariances)] - elif uncertainty == "none": - covariances = [None for cov in ensemble_covariances] - else: - raise ValueError("Unknown uncertainty mode {}".format(uncertainty)) - - # Returns - if len(estimator_weights) == 1: - if return_individual_predictions and mode == "information": - return means[0], covariances[0], estimator_weights[0], predictions - return means[0], covariances[0] - - if return_individual_predictions and mode == "information": - return means, covariances, estimator_weights, predictions - return means, covariances + return information, covariance def save(self, folder, save_model=False): """ @@ -1663,7 +2433,7 @@ def save(self, folder, save_model=False): save_model : bool, optional If True, the whole model is saved in addition to the state dict. This is not necessary for loading it - again with EnsembleForge.load(), but can be useful for debugging, for instance to plot the computational + again with Ensemble.load(), but can be useful for debugging, for instance to plot the computational graph. Returns @@ -1677,13 +2447,7 @@ def save(self, folder, save_model=False): # Save ensemble settings logger.debug("Saving ensemble setup to %s/ensemble.json", folder) - - if self.expectations is None: - expectations = "None" - else: - expectations = self.expectations.tolist() - - settings = {"n_estimators": self.n_estimators, "expectations": expectations} + settings = {"estimator_type": self.estimator_type, "n_estimators": self.n_estimators} with open(folder + "/ensemble.json", "w") as f: json.dump(settings, f) @@ -1706,48 +2470,35 @@ def load(self, folder): None """ - # Load ensemble settings logger.debug("Loading ensemble setup from %s/ensemble.json", folder) - with open(folder + "/ensemble.json", "r") as f: settings = json.load(f) - self.n_estimators = settings["n_estimators"] - self.expectations = settings["expectations"] - if self.expectations == "None": - self.expectations = None - if self.expectations is not None: - self.expectations = np.array(self.expectations) - - logger.info("Found ensemble with %s estimators and expectations %s", self.n_estimators, self.expectations) + self.n_estimators = int(settings["n_estimators"]) + try: + estimator_type = str(settings["estimator_type"]) + except KeyError: + raise RuntimeError( + "Can't find estimator type information in file. Maybe this file was created with" + " an incompatible MadMiner version < v0.3.0?" + ) + logger.info("Found %s ensemble with %s estimators", estimator_type, self.n_estimators) # Load estimators self.estimators = [] for i in range(self.n_estimators): - estimator = MLForge() + estimator = self._get_estimator_class(estimator_type)() estimator.load(folder + "/estimator_" + str(i)) self.estimators.append(estimator) - - # Check consistency and update n_parameters, n_observables self._check_consistency() - def _check_consistency(self, keywords=None): + def _check_consistency(self): """ Internal function that checks if all estimators belong to the same category (local score regression, single-parameterized likelihood ratio estimator, doubly parameterized likelihood ratio estimator). - Parameters - ---------- - keywords : dict or None, optional - kwargs passed to `train_one()` or `train_all()`. - - Returns - ------- - method_type : {"local_score", "parameterized", "doubly_parameterized"} - Method type of this ensemble. - Raises ------ RuntimeError @@ -1755,40 +2506,22 @@ def _check_consistency(self, keywords=None): """ # Accumulate methods of all estimators - methods = [estimator.method for estimator in self.estimators] + all_types = [self._get_estimator_type(estimator) for estimator in self.estimators] all_n_parameters = [estimator.n_parameters for estimator in self.estimators] all_n_observables = [estimator.n_observables for estimator in self.estimators] - if keywords is not None: - keyword_method = keywords.get("method", None) - if isinstance(keyword_method, list): - methods += keyword_method - else: - methods.append(keyword_method) - # Check consistency of methods - self.method_type = None - - for method in methods: - if method in ["sally", "sallino"]: - this_method_type = "local_score" - elif method in ["carl", "rolr", "rascal", "alice", "alices", "nde", "scandal"]: - this_method_type = "parameterized" - elif method in ["carl2", "rolr2", "rascal2", "alice2", "alices2"]: - this_method_type = "doubly_parameterized" - elif method is None: - continue - else: - raise RuntimeError("Unknown method %s", method) + self.estimator_type = None - if self.method_type is None: - self.method_type = this_method_type + for estimator_type in all_types: + if self.estimator_type is None: + self.estimator_type = estimator_type - if self.method_type != this_method_type: + if self.estimator_type != estimator_type: raise RuntimeError( "Ensemble with inconsistent estimator methods! All methods have to be either" " single-parameterized ratio estimators, doubly parameterized ratio estimators," - " or local score estimators. Found methods " + ", ".join(methods) + "." + " or local score estimators. Found types " + ", ".join(all_types) + "." ) # Check consistency of parameter and observable numnbers @@ -1810,5 +2543,35 @@ def _check_consistency(self, keywords=None): "Ensemble with inconsistent numbers of parameters for different estimators: %s", all_n_observables ) - # Return method type of ensemble - return self.method_type + @staticmethod + def _get_estimator_type(estimator): + if not isinstance(estimator, Estimator): + raise RuntimeError("Estimator is not an Estimator instance!") + + if isinstance(estimator, ParameterizedRatioEstimator): + return "parameterized_ratio" + elif isinstance(estimator, DoubleParameterizedRatioEstimator): + return "double_parameterized_ratio" + elif isinstance(estimator, ScoreEstimator): + return "score" + elif isinstance(estimator, LikelihoodEstimator): + return "likelihood" + else: + raise RuntimeError("Estimator is an unknown Estimator type!") + + @staticmethod + def _get_estimator_class(estimator_type): + if estimator_type == "parameterized_ratio": + return ParameterizedRatioEstimator + elif estimator_type == "double_parameterized_ratio": + return DoubleParameterizedRatioEstimator + elif estimator_type == "score": + return ScoreEstimator + elif estimator_type == "likelihood": + return LikelihoodEstimator + else: + raise RuntimeError("Unknown estimator type {}!".format(estimator_type)) + + +class TheresAGoodReasonThisDoesntWork(Exception): + pass diff --git a/madminer/plotting.py b/madminer/plotting.py index 584c16989..1b53c0377 100644 --- a/madminer/plotting.py +++ b/madminer/plotting.py @@ -1,6 +1,5 @@ from __future__ import absolute_import, division, print_function, unicode_literals -import six import numpy as np from matplotlib import pyplot as plt import matplotlib @@ -8,9 +7,8 @@ import logging from madminer.sampling import SampleAugmenter -from madminer.utils.analysis import mdot, get_theta_benchmark_matrix -from madminer.morphing import NuisanceMorpher -from madminer.utils.various import weighted_quantile, sanitize_array, shuffle +from madminer.utils.morphing import NuisanceMorpher +from madminer.utils.various import weighted_quantile, sanitize_array, shuffle, mdot logger = logging.getLogger(__name__) @@ -93,11 +91,11 @@ def plot_uncertainty( obs_idx = list(sa.observables.keys()).index(observable) # Get event data (observations and weights) - x, weights_benchmarks = sa.extract_raw_data() + x, weights_benchmarks = sa.weighted_events() x = x[:, obs_idx] # Theta matrix - theta_matrix = get_theta_benchmark_matrix("morphing", theta, sa.benchmarks, sa.morpher) + theta_matrix = sa._get_theta_benchmark_matrix(theta) weights = mdot(theta_matrix, weights_benchmarks) # Remove negative weights @@ -412,7 +410,7 @@ def plot_distributions( observable_labels = [all_observables[obs] for obs in observable_indices] # Get event data (observations and weights) - x, weights_benchmarks = sa.extract_raw_data() + x, weights_benchmarks = sa.weighted_events() logger.debug("Loaded raw data with shapes %s, %s", x.shape, weights_benchmarks.shape) # Remove negative weights @@ -443,14 +441,7 @@ def plot_distributions( if draw_nuisance_toys is not None: n_nuisance_toys_drawn = draw_nuisance_toys - theta_matrices = [] - for theta in parameter_points: - if isinstance(theta, six.string_types): - matrix = get_theta_benchmark_matrix("benchmark", theta, sa.benchmarks) - else: - matrix = get_theta_benchmark_matrix("morphing", theta, sa.benchmarks, sa.morpher) - theta_matrices.append(matrix) - + theta_matrices = [sa._get_theta_benchmark_matrix(theta) for theta in parameter_points] logger.debug("Calculated %s theta matrices", len(theta_matrices)) # Nuisance parameters @@ -611,8 +602,8 @@ def plot_2d_morphing_basis( Parameters ---------- - morpher : Morpher - Morpher instance with defined basis. + morpher : PhysicsMorpher + PhysicsMorpher instance with defined basis. xlabel : str, optional Label for the x axis. Default value: r'$\theta_0$'. @@ -682,8 +673,8 @@ def plot_nd_morphing_basis_scatter(morpher, crange=(1.0, 100.0), n_test_thetas=1 Parameters ---------- - morpher : Morpher - Morpher instance with defined basis. + morpher : PhysicsMorpher + PhysicsMorpher instance with defined basis. crange : tuple of float, optional Range `(min, max)` for the color map. Default value: (1. 100.). @@ -741,8 +732,8 @@ def plot_nd_morphing_basis_slices(morpher, crange=(1.0, 100.0), resolution=50): Parameters ---------- - morpher : Morpher - Morpher instance with defined basis. + morpher : PhysicsMorpher + PhysicsMorpher instance with defined basis. crange : tuple of float, optional Range `(min, max)` for the color map. @@ -1091,7 +1082,6 @@ def plot_fisherinfo_barplot( if eigenvalue_colors is None: eigenvalue_colors = ["C{}".format(str(i)) for i in range(10)] - operator_order = [i for i in range(0, size_upper)] eigenvalue_linewidth = 1.5 # Upper plot @@ -1134,7 +1124,7 @@ def plot_fisherinfo_barplot( ax1.set_ylim(0.0001 * y_max, 2.0 * y_max) ax1.set_xticks(xpos_ticks) - ax1.set_xticklabels(["" for l in labels], rotation=40, ha="right") + ax1.set_xticklabels(["" for _ in labels], rotation=40, ha="right") ax1.set_ylabel(r"$I_{ij}$ eigenvalues") # Lower plot diff --git a/madminer/sampling.py b/madminer/sampling.py index 60dd8761f..96ade3ba9 100644 --- a/madminer/sampling.py +++ b/madminer/sampling.py @@ -1,215 +1,25 @@ from __future__ import absolute_import, division, print_function, unicode_literals +import time import logging import numpy as np -import collections -import six +import multiprocessing +from functools import partial -from madminer.utils.interfaces.madminer_hdf5 import load_madminer_settings, madminer_event_loader +from madminer.analysis import DataAnalyzer +from madminer.utils.interfaces.madminer_hdf5 import madminer_event_loader from madminer.utils.interfaces.madminer_hdf5 import save_preformatted_events_to_madminer_file -from madminer.utils.analysis import get_theta_value, get_theta_benchmark_matrix, get_dtheta_benchmark_matrix -from madminer.utils.analysis import calculate_augmented_data, parse_theta, mdot -from madminer.morphing import Morpher, NuisanceMorpher -from madminer.utils.various import format_benchmark, create_missing_folders, shuffle, balance_thetas +from madminer.utils.various import create_missing_folders, shuffle logger = logging.getLogger(__name__) -def combine_and_shuffle( - input_filenames, output_filename, k_factors=None, overwrite_existing_file=True, shuffle_sample=True -): +class SampleAugmenter(DataAnalyzer): """ - Combines multiple MadMiner files into one, and shuffles the order of the events. - - Note that this function assumes that all samples are generated with the same setup, including identical benchmarks - (and thus morphing setup). If it is used with samples with different settings, there will be wrong results! - There are no explicit cross checks in place yet! - - Parameters - ---------- - input_filenames : list of str - List of paths to the input MadMiner files. - - output_filename : str - Path to the combined MadMiner file. - - k_factors : float or list of float, optional - Multiplies the weights in input_filenames with a universal factor (if k_factors is a float) or with independent - factors (if it is a list of float). Default value: None. - - overwrite_existing_file : bool, optional - If True and if the output file exists, it is overwritten. Default value: True. - - shuffle_sample : bool, optional - If True, the output shuffle will be shuffled. Default value: True. - - Returns - ------- - None - - """ - - logger.debug("Combining and shuffling samples") - - if len(input_filenames) > 1: - logger.warning( - "Careful: this tool assumes that all samples are generated with the same setup, including" - " identical benchmarks (and thus morphing setup). If it is used with samples with different" - " settings, there will be wrong results! There are no explicit cross checks in place yet." - ) - - # k factors - if k_factors is None: - k_factors = [1.0 for _ in input_filenames] - elif isinstance(k_factors, float): - k_factors = [k_factors for _ in input_filenames] - - # Copy first file to output_filename - logger.info("Copying setup from %s to %s", input_filenames[0], output_filename) - - # TODO: More memory efficient strategy - - # Load events - all_observations = None - all_weights = None - - for i, (filename, k_factor) in enumerate(zip(input_filenames, k_factors)): - logger.info( - "Loading samples from file %s / %s at %s, multiplying weights with k factor %s", - i + 1, - len(input_filenames), - filename, - k_factor, - ) - - for observations, weights in madminer_event_loader(filename): - if all_observations is None: - all_observations = observations - all_weights = k_factor * weights - else: - all_observations = np.vstack((all_observations, observations)) - all_weights = np.vstack((all_weights, k_factor * weights)) - - # Shuffle - if shuffle_sample: - all_observations, all_weights = shuffle(all_observations, all_weights) - - # Save result - save_preformatted_events_to_madminer_file( - filename=output_filename, - observations=all_observations, - weights=all_weights, - copy_setup_from=input_filenames[0], - overwrite_existing_samples=overwrite_existing_file, - ) - - -def constant_benchmark_theta(benchmark_name): - """ - Utility function to be used as input to various SampleAugmenter functions, specifying a single parameter benchmark. - - Parameters - ---------- - benchmark_name : str - Name of the benchmark (as in `madminer.core.MadMiner.add_benchmark`) - - - Returns - ------- - output : tuple - Input to various SampleAugmenter functions - - """ - return "benchmark", benchmark_name - - -def multiple_benchmark_thetas(benchmark_names): - """ - Utility function to be used as input to various SampleAugmenter functions, specifying multiple parameter benchmarks. - - Parameters - ---------- - benchmark_names : list of str - List of names of the benchmarks (as in `madminer.core.MadMiner.add_benchmark`) - - - Returns - ------- - output : tuple - Input to various SampleAugmenter functions - - """ - return "benchmarks", benchmark_names - - -def constant_morphing_theta(theta): - """ - Utility function to be used as input to various SampleAugmenter functions, specifying a single parameter point theta - in a morphing setup. - - Parameters - ---------- - theta : ndarray or list - Parameter point with shape `(n_parameters,)` - - Returns - ------- - output : tuple - Input to various SampleAugmenter functions - - """ - return "theta", np.asarray(theta) - - -def multiple_morphing_thetas(thetas): - """ - Utility function to be used as input to various SampleAugmenter functions, specifying multiple parameter points - theta in a morphing setup. - - Parameters - ---------- - thetas : ndarray or list of lists or list of ndarrays - Parameter points with shape `(n_thetas, n_parameters)` - - Returns - ------- - output : tuple - Input to various SampleAugmenter functions - - """ - return "thetas", [np.asarray(theta) for theta in thetas] - - -def random_morphing_thetas(n_thetas, priors): - """ - Utility function to be used as input to various SampleAugmenter functions, specifying random parameter points - sampled from a prior in a morphing setup. - - Parameters - ---------- - n_thetas : int - Number of parameter points to be sampled - - priors : list of tuples - Priors for each parameter is characterized by a tuple of the form `(prior_shape, prior_param_0, prior_param_1)`. - Currently, the supported prior_shapes are `flat`, in which case the two other parameters are the lower and upper - bound of the flat prior, and `gaussian`, in which case they are the mean and standard deviation of a Gaussian. - - Returns - ------- - output : tuple - Input to various SampleAugmenter functions - - """ - return "random", (n_thetas, priors) - - -class SampleAugmenter: - """ - Sampling and data augmentation. + Sampling / unweighting and data augmentation. After the generated events have been analyzed and the observables and weights have been saved into a MadMiner file, - for instance with `madminer.delphes.DelphesProcessor` or `madminer.lhe.LHEProcessor`, the next step is typically + for instance with `madminer.delphes.DelphesReader` or `madminer.lhe.LHEReader`, the next step is typically the generation of training and evaluation data for the machine learning algorithms. This generally involves two (related) tasks: unweighting, i.e. the creation of samples that do not carry individual weights but follow some distribution, and the extraction of the joint likelihood ratio and / or joint score (the "augmented data"). @@ -217,18 +27,18 @@ class SampleAugmenter: After inializing `SampleAugmenter` with the filename of a MadMiner file, this is done with a single function call. Depending on the downstream inference algorithm, there are different possibilities: - * `SampleAugmenter.extract_samples_train_plain()` creates plain training samples without augmented data. - * `SampleAugmenter.extract_samples_train_local()` creates training samples for local methods based on the score, + * `SampleAugmenter.sample_train_plain()` creates plain training samples without augmented data. + * `SampleAugmenter.sample_train_local()` creates training samples for local methods based on the score, such as SALLY and SALLINO. - * `SampleAugmenter.extract_samples_train_global()` creates training samples for non-local methods based on density + * `SampleAugmenter.sample_train_density()` creates training samples for non-local methods based on density estimation and the score, such as SCANDAL. - * `SampleAugmenter.extract_samples_train_ratio()` creates training samples for non-local, ratio-based methods + * `SampleAugmenter.sample_train_ratio()` creates training samples for non-local, ratio-based methods like RASCAL or ALICE. - * `SampleAugmenter.extract_samples_train_more_ratios()` does the same, but can extract joint ratios and scores + * `SampleAugmenter.sample_train_more_ratios()` does the same, but can extract joint ratios and scores at more parameter points. This additional information can be used efficiently in the setup with a "doubly parameterized" likelihood ratio estimator that models the dependence on both the numerator and denominator hypothesis. - * `SampleAugmenter.extract_samples_test()` creates evaluation samples for all methods. + * `SampleAugmenter.sample_test()` creates evaluation samples for all methods. Please see the tutorial for a walkthrough. @@ -262,88 +72,18 @@ class SampleAugmenter: """ def __init__(self, filename, disable_morphing=False, include_nuisance_parameters=True): - # Save setup - self.include_nuisance_parameters = include_nuisance_parameters - self.madminer_filename = filename - - logger.info("Loading data from %s", filename) - - # Load data - ( - self.parameters, - self.benchmarks, - self.benchmark_is_nuisance, - self.morphing_components, - self.morphing_matrix, - self.observables, - self.n_samples, - _, - self.reference_benchmark, - self.nuisance_parameters, - ) = load_madminer_settings(filename, include_nuisance_benchmarks=include_nuisance_parameters) - - self.n_parameters = len(self.parameters) - self.n_benchmarks = len(self.benchmarks) - self.n_benchmarks_phys = np.sum(np.logical_not(self.benchmark_is_nuisance)) - - self.n_nuisance_parameters = 0 - if self.nuisance_parameters is not None and include_nuisance_parameters: - self.n_nuisance_parameters = len(self.nuisance_parameters) - else: - self.nuisance_parameters = None - - logger.info("Found %s parameters", self.n_parameters) - for key, values in six.iteritems(self.parameters): - logger.debug( - " %s (LHA: %s %s, maximal power in squared ME: %s, range: %s)", - key, - values[0], - values[1], - values[2], - values[3], - ) - - if self.nuisance_parameters is not None: - logger.info("Found %s nuisance parameters", self.n_nuisance_parameters) - for key, values in six.iteritems(self.nuisance_parameters): - logger.debug(" %s (%s)", key, values) - else: - logger.info("Did not find nuisance parameters") - - logger.info("Found %s benchmarks, of which %s physical", self.n_benchmarks, self.n_benchmarks_phys) - for (key, values), is_nuisance in zip(six.iteritems(self.benchmarks), self.benchmark_is_nuisance): - if is_nuisance: - logger.debug(" %s: nuisance parameter", key) - else: - logger.debug(" %s: %s", key, format_benchmark(values)) - - logger.info("Found %s observables", len(self.observables)) - for i, obs in enumerate(self.observables): - logger.debug(" %2.2s %s", i, obs) - logger.info("Found %s events", self.n_samples) - - # Morphing - self.morpher = None - if self.morphing_matrix is not None and self.morphing_components is not None and not disable_morphing: - self.morpher = Morpher(self.parameters) - self.morpher.set_components(self.morphing_components) - self.morpher.set_basis(self.benchmarks, morphing_matrix=self.morphing_matrix) - - logger.info("Found morphing setup with %s components", len(self.morphing_components)) - - else: - logger.info("Did not find morphing setup.") + super(SampleAugmenter, self).__init__(filename, disable_morphing, include_nuisance_parameters) - # Nuisance morphing - self.nuisance_morpher = None - if self.nuisance_parameters is not None: - self.nuisance_morpher = NuisanceMorpher( - self.nuisance_parameters, list(self.benchmarks.keys()), self.reference_benchmark - ) - logger.info("Found nuisance morphing setup") - - def extract_samples_train_plain( - self, theta, n_samples, folder, filename, test_split=0.5, switch_train_test_events=False + def sample_train_plain( + self, + theta, + n_samples, + nu=None, + folder=None, + filename=None, + test_split=0.2, + switch_train_test_events=False, + n_processes=1, ): """ Extracts plain training samples `x ~ p(x|theta)` without any augmented data. This can be use for standard @@ -360,21 +100,32 @@ def extract_samples_train_plain( n_samples : int Total number of events to be drawn. - folder : str - Path to the folder where the resulting samples should be saved (ndarrays in .npy format). + nu : None or tuple, optional + Tuple (type, value) that defines the nuisance parameter point or prior over parameter points for the + sampling. Default value: None - filename : str + folder : str or None + Path to the folder where the resulting samples should be saved (ndarrays in .npy format). Default value: + None. + + filename : str or None Filenames for the resulting samples. A prefix such as 'x' or 'theta0' as well as the extension - '.npy' will be added automatically. + '.npy' will be added automatically. Default value: + None. test_split : float or None, optional Fraction of events reserved for the evaluation sample (that will not be used for any training samples). - Default value: 0.5. + Default value: 0.2. switch_train_test_events : bool, optional If True, this function generates a training sample from the events normally reserved for test samples. Default value: False. + n_processes : None or int, optional + If None or larger than 1, MadMiner will use multiprocessing to parallelize the sampling. In this case, + n_workers sets the number of jobs running in parallel, and None will use the number of CPUs. Default value: + 1. + Returns ------- x : ndarray @@ -385,25 +136,28 @@ def extract_samples_train_plain( Parameter points used for sampling with shape `(n_samples, n_parameters)`. The same information is saved as a file in the given folder. + effective_n_samples : int + Effective number of samples, defined as 1/max(event_probabilities), where event_probabilities are the + fractions of the cross section carried by each event. + """ logger.info("Extracting plain training sample. Sampling according to %s", theta) create_missing_folders([folder]) - # Thetas - theta_types, theta_values, n_samples_per_theta = parse_theta(theta, n_samples) - - # Train / test split - start_event, end_event = self._train_test_split(not switch_train_test_events, test_split) + # Parameters + parsed_thetas, n_samples_per_theta = self._parse_theta(theta, n_samples) + parsed_nus = self._parse_nu(nu, len(parsed_thetas)) + sets = self._build_sets([parsed_thetas], [parsed_nus]) # Start - x, _, (theta,) = self._extract_sample( - theta_sets_types=[theta_types], - theta_sets_values=[theta_values], - n_samples_per_theta=n_samples_per_theta, - start_event=start_event, - end_event=end_event, + x, _, (theta,), effective_n_samples = self._sample( + sets=sets, + n_samples_per_set=n_samples_per_theta, + use_train_events=not switch_train_test_events, + test_split=test_split, + n_processes=n_processes, ) # Save data @@ -411,17 +165,19 @@ def extract_samples_train_plain( np.save(folder + "/theta_" + filename + ".npy", theta) np.save(folder + "/x_" + filename + ".npy", x) - return x, theta + return x, theta, min(effective_n_samples) - def extract_samples_train_local( + def sample_train_local( self, theta, n_samples, - folder, - filename, - nuisance_score=False, - test_split=0.5, + nu=None, + folder=None, + filename=None, + nuisance_score="auto", + test_split=0.2, switch_train_test_events=False, + n_processes=1, log_message=True, ): """ @@ -437,26 +193,37 @@ def extract_samples_train_local( n_samples : int Total number of events to be drawn. - folder : str - Path to the folder where the resulting samples should be saved (ndarrays in .npy format). + nu : None or tuple, optional + Tuple (type, value) that defines the nuisance parameter point or prior over parameter points for the + sampling. Default value: None - filename : str + folder : str or None + Path to the folder where the resulting samples should be saved (ndarrays in .npy format). Default value: + None. + + filename : str or None Filenames for the resulting samples. A prefix such as 'x' or 'theta0' as well as the extension - '.npy' will be added automatically. + '.npy' will be added automatically. Default value: + None. - nuisance_score : bool, optional - If True and if the sample contains nuisance parameters, the score with respect to the nuisance parameters - (at the default position) will also be calculated. Otherwise, only the score with respect to the - physics parameters is calculated. Default: False. + nuisance_score : bool or "auto", optional + If True, the score with respect to the nuisance parameters (at the default position) will also be + calculated. If False, only the score with respect to the physics parameters is calculated. For "auto", + the nuisance score will be calculated if a nuisance setup is defined. Default: True. test_split : float or None, optional Fraction of events reserved for the evaluation sample (that will not be used for any training samples). - Default value: 0.5. + Default value: 0.2. switch_train_test_events : bool, optional If True, this function generates a training sample from the events normally reserved for test samples. Default value: False. + n_processes : None or int, optional + If None or larger than 1, MadMiner will use multiprocessing to parallelize the sampling. In this case, + n_workers sets the number of jobs running in parallel, and None will use the number of CPUs. Default value: + 1. + log_message : bool, optional If True, logging output. This option is only designed for internal use. @@ -475,6 +242,10 @@ def extract_samples_train_local( nuisance_score is True) or `(n_samples, n_parameters)`. The same information is saved as a file in the given folder. + effective_n_samples : int + Effective number of samples, defined as 1/max(event_probabilities), where event_probabilities are the + fractions of the cross section carried by each event. + """ if log_message: @@ -486,47 +257,32 @@ def extract_samples_train_local( create_missing_folders([folder]) # Check setup + if nuisance_score == "auto": + nuisance_score = self.nuisance_morpher is not None if self.morpher is None: raise RuntimeError("No morphing setup loaded. Cannot calculate score.") - if self.nuisance_morpher is None and nuisance_score: raise RuntimeError("No nuisance parameters defined. Cannot calculate nuisance score.") - # Thetas - theta_types, theta_values, n_samples_per_theta = parse_theta(theta, n_samples) + # Parameters + parsed_thetas, n_samples_per_theta = self._parse_theta(theta, n_samples) + parsed_nus = self._parse_nu(nu, len(parsed_thetas)) + sets = self._build_sets([parsed_thetas], [parsed_nus]) # Augmented data (gold) augmented_data_definitions = [("score", 0)] - if nuisance_score: - augmented_data_definitions += [("nuisance_score",)] - - # Train / test split - start_event, end_event = self._train_test_split(not switch_train_test_events, test_split) # Start - x, augmented_data, (theta,) = self._extract_sample( - theta_sets_types=[theta_types], - theta_sets_values=[theta_values], - n_samples_per_theta=n_samples_per_theta, + x, augmented_data, (theta,), effective_n_samples = self._sample( + sets=sets, + n_samples_per_set=n_samples_per_theta, augmented_data_definitions=augmented_data_definitions, nuisance_score=nuisance_score, - start_event=start_event, - end_event=end_event, + use_train_events=not switch_train_test_events, + test_split=test_split, + n_processes=n_processes, ) - - t_xz_physics = augmented_data[0] - if nuisance_score: - t_xz_nuisance = augmented_data[1] - t_xz = np.hstack([t_xz_physics, t_xz_nuisance]) - - logger.debug( - "Found physical score with shape %s, nuisance score with shape %s, combined shape %s", - t_xz_physics.shape, - t_xz_nuisance.shape, - t_xz.shape, - ) - else: - t_xz = t_xz_physics + t_xz = augmented_data[0] # Save data if filename is not None and folder is not None: @@ -534,10 +290,19 @@ def extract_samples_train_local( np.save(folder + "/x_" + filename + ".npy", x) np.save(folder + "/t_xz_" + filename + ".npy", t_xz) - return x, theta, t_xz + return x, theta, t_xz, min(effective_n_samples) - def extract_samples_train_global( - self, theta, n_samples, folder, filename, test_split=0.5, switch_train_test_events=False + def sample_train_density( + self, + theta, + n_samples, + nu=None, + folder=None, + filename=None, + nuisance_score="auto", + test_split=0.2, + switch_train_test_events=False, + n_processes=1, ): """ Extracts training samples x ~ p(x|theta) as well as the joint score t(x, z|theta), where theta is sampled @@ -553,21 +318,36 @@ def extract_samples_train_global( n_samples : int Total number of events to be drawn. - folder : str - Path to the folder where the resulting samples should be saved (ndarrays in .npy format). + nu : None or tuple, optional + Tuple (type, value) that defines the nuisance parameter point or prior over parameter points for the + sampling. Default value: None - filename : str + folder : str or None + Path to the folder where the resulting samples should be saved (ndarrays in .npy format). Default value: + None. + + filename : str or None Filenames for the resulting samples. A prefix such as 'x' or 'theta0' as well as the extension - '.npy' will be added automatically. + '.npy' will be added automatically. Default value: None. + + nuisance_score : bool or "auto", optional + If True, the score with respect to the nuisance parameters (at the default position) will also be + calculated. If False, only the score with respect to the physics parameters is calculated. For "auto", + the nuisance score will be calculated if a nuisance setup is defined. Default: True. test_split : float or None, optional Fraction of events reserved for the evaluation sample (that will not be used for any training samples). - Default value: 0.5. + Default value: 0.2. switch_train_test_events : bool, optional If True, this function generates a training sample from the events normally reserved for test samples. Default value: False. + n_processes : None or int, optional + If None or larger than 1, MadMiner will use multiprocessing to parallelize the sampling. In this case, + n_workers sets the number of jobs running in parallel, and None will use the number of CPUs. Default value: + 1. + Returns ------- x : ndarray @@ -582,6 +362,10 @@ def extract_samples_train_global( Joint score evaluated at theta with shape `(n_samples, n_parameters)`. The same information is saved as a file in the given folder. + effective_n_samples : int + Effective number of samples, defined as 1/max(event_probabilities), where event_probabilities are the + fractions of the cross section carried by each event. + """ logger.info( @@ -590,18 +374,32 @@ def extract_samples_train_global( theta, ) - return self.extract_samples_train_local( - theta, - n_samples, - folder, - filename, + return self.sample_train_local( + theta=theta, + n_samples=n_samples, + nu=nu, + folder=folder, + filename=filename, + nuisance_score=nuisance_score, test_split=test_split, switch_train_test_events=switch_train_test_events, + n_processes=n_processes, log_message=False, ) - def extract_samples_train_ratio( - self, theta0, theta1, n_samples, folder, filename, test_split=0.5, switch_train_test_events=False + def sample_train_ratio( + self, + theta0, + theta1, + n_samples, + nu0=None, + nu1=None, + folder=None, + filename=None, + nuisance_score="auto", + test_split=0.2, + switch_train_test_events=False, + n_processes=1, ): """ Extracts training samples `x ~ p(x|theta0)` and `x ~ p(x|theta1)` together with the class label `y`, the joint @@ -623,21 +421,41 @@ def extract_samples_train_ratio( n_samples : int Total number of events to be drawn. - folder : str - Path to the folder where the resulting samples should be saved (ndarrays in .npy format). + nu0 : None or tuple, optional + Tuple (type, value) that defines the numerator nuisance parameter point or prior over parameter points for + the sampling. Default value: None - filename : str + nu1 : None or tuple, optional + Tuple (type, value) that defines the denominator nuisance parameter point or prior over parameter points for + the sampling. Default value: None + + folder : str or None + Path to the folder where the resulting samples should be saved (ndarrays in .npy format). Default value: + None. + + filename : str or None Filenames for the resulting samples. A prefix such as 'x' or 'theta0' as well as the extension - '.npy' will be added automatically. + '.npy' will be added automatically. Default value: + None. + + nuisance_score : bool or "auto", optional + If True, the score with respect to the nuisance parameters (at the default position) will also be + calculated. If False, only the score with respect to the physics parameters is calculated. For "auto", + the nuisance score will be calculated if a nuisance setup is defined. Default: True. test_split : float or None, optional Fraction of events reserved for the evaluation sample (that will not be used for any training samples). - Default value: 0.5. + Default value: 0.2. switch_train_test_events : bool, optional If True, this function generates a training sample from the events normally reserved for test samples. Default value: False. + n_processes : None or int, optional + If None or larger than 1, MadMiner will use multiprocessing to parallelize the sampling. In this case, + n_workers sets the number of jobs running in parallel, and None will use the number of CPUs. Default value: + 1. + Returns ------- x : ndarray @@ -665,6 +483,10 @@ def extract_samples_train_ratio( information is saved as a file in the given folder. If morphing is not set up, None is returned (and no file is saved). + effective_n_samples : int + Effective number of samples, defined as 1/max(event_probabilities), where event_probabilities are the + fractions of the cross section carried by each event. + """ logger.info( @@ -674,76 +496,87 @@ def extract_samples_train_ratio( theta1, ) + create_missing_folders([folder]) + + # Check setup + if nuisance_score == "auto": + nuisance_score = self.nuisance_morpher is not None if self.morpher is None: logging.warning("No morphing setup loaded. Cannot calculate joint score.") - - create_missing_folders([folder]) + if self.nuisance_morpher is None and nuisance_score: + raise RuntimeError("No nuisance parameters defined. Cannot calculate nuisance score.") # Augmented data (gold) augmented_data_definitions = [("ratio", 0, 1)] if self.morpher is not None: augmented_data_definitions.append(("score", 0)) - # Train / test split - start_event, end_event = self._train_test_split(not switch_train_test_events, test_split) - # Thetas for theta0 sampling - theta0_types, theta0_values, n_samples_per_theta0 = parse_theta(theta0, n_samples // 2) - theta1_types, theta1_values, n_samples_per_theta1 = parse_theta(theta1, n_samples // 2) + parsed_theta0s, n_samples_per_theta0 = self._parse_theta(theta0, n_samples // 2) + parsed_theta1s, n_samples_per_theta1 = self._parse_theta(theta1, n_samples // 2) + parsed_nu0s = self._parse_nu(nu0, len(parsed_theta0s)) + parsed_nu1s = self._parse_nu(nu1, len(parsed_theta1s)) + sets = self._build_sets([parsed_theta0s, parsed_theta1s], [parsed_nu0s, parsed_nu1s]) n_samples_per_theta = min(n_samples_per_theta0, n_samples_per_theta1) # Start for theta0 - if self.morpher is None: - x0, (r_xz0,), (theta0_0, theta1_0) = self._extract_sample( - theta_sets_types=[theta0_types, theta1_types], - theta_sets_values=[theta0_values, theta1_values], - sampling_theta_index=0, - n_samples_per_theta=n_samples_per_theta, + x0, (r_xz0,), (theta0_0, theta1_0), n_effective_samples_0 = self._sample( + sets=sets, + sampling_index=0, + n_samples_per_set=n_samples_per_theta, augmented_data_definitions=augmented_data_definitions, - start_event=start_event, - end_event=end_event, + nuisance_score=nuisance_score, + use_train_events=not switch_train_test_events, + test_split=test_split, + n_processes=n_processes, ) t_xz0 = None else: - x0, (r_xz0, t_xz0), (theta0_0, theta1_0) = self._extract_sample( - theta_sets_types=[theta0_types, theta1_types], - theta_sets_values=[theta0_values, theta1_values], - sampling_theta_index=0, - n_samples_per_theta=n_samples_per_theta, + x0, (r_xz0, t_xz0), (theta0_0, theta1_0), n_effective_samples_0 = self._sample( + sets=sets, + sampling_index=0, + n_samples_per_set=n_samples_per_theta, augmented_data_definitions=augmented_data_definitions, - start_event=start_event, - end_event=end_event, + nuisance_score=nuisance_score, + use_train_events=not switch_train_test_events, + test_split=test_split, + n_processes=n_processes, ) # Thetas for theta1 sampling (could be different if num or denom are random) - theta0_types, theta0_values, n_samples_per_theta0 = parse_theta(theta0, n_samples // 2) - theta1_types, theta1_values, n_samples_per_theta1 = parse_theta(theta1, n_samples // 2) + parsed_theta0s, n_samples_per_theta0 = self._parse_theta(theta0, n_samples // 2) + parsed_theta1s, n_samples_per_theta1 = self._parse_theta(theta1, n_samples // 2) + parsed_nu0s = self._parse_nu(nu0, len(parsed_theta0s)) + parsed_nu1s = self._parse_nu(nu1, len(parsed_theta1s)) + sets = self._build_sets([parsed_theta0s, parsed_theta1s], [parsed_nu0s, parsed_nu1s]) n_samples_per_theta = min(n_samples_per_theta0, n_samples_per_theta1) # Start for theta1 if self.morpher is None: - x1, (r_xz1,), (theta0_1, theta1_1) = self._extract_sample( - theta_sets_types=[theta0_types, theta1_types], - theta_sets_values=[theta0_values, theta1_values], - sampling_theta_index=1, - n_samples_per_theta=n_samples_per_theta, + x1, (r_xz1,), (theta0_1, theta1_1), n_effective_samples_1 = self._sample( + sets=sets, + sampling_index=1, + n_samples_per_set=n_samples_per_theta, augmented_data_definitions=augmented_data_definitions, - start_event=start_event, - end_event=end_event, + nuisance_score=nuisance_score, + use_train_events=not switch_train_test_events, + test_split=test_split, + n_processes=n_processes, ) t_xz1 = None else: - x1, (r_xz1, t_xz1), (theta0_1, theta1_1) = self._extract_sample( - theta_sets_types=[theta0_types, theta1_types], - theta_sets_values=[theta0_values, theta1_values], - sampling_theta_index=1, - n_samples_per_theta=n_samples_per_theta, + x1, (r_xz1, t_xz1), (theta0_1, theta1_1), n_effective_samples_1 = self._sample( + sets=sets, + sampling_index=1, + n_samples_per_set=n_samples_per_theta, augmented_data_definitions=augmented_data_definitions, - start_event=start_event, - end_event=end_event, + nuisance_score=nuisance_score, + use_train_events=not switch_train_test_events, + test_split=test_split, + n_processes=n_processes, ) # Combine @@ -774,18 +607,22 @@ def extract_samples_train_ratio( if self.morpher is not None: np.save(folder + "/t_xz_" + filename + ".npy", t_xz) - return x, theta0, theta1, y, r_xz, t_xz + return x, theta0, theta1, y, r_xz, t_xz, min(min(n_effective_samples_0), min(n_effective_samples_1)) - def extract_samples_train_more_ratios( + def sample_train_more_ratios( self, theta0, theta1, n_samples, - folder, - filename, + nu0=None, + nu1=None, + folder=None, + filename=None, additional_thetas=None, - test_split=0.5, + nuisance_score="auto", + test_split=0.2, switch_train_test_events=False, + n_processes=1, ): """ Extracts training samples `x ~ p(x|theta0)` and `x ~ p(x|theta1)` together with the class label `y`, the joint @@ -812,12 +649,22 @@ def extract_samples_train_more_ratios( n_samples : int Total number of events to be drawn. - folder : str - Path to the folder where the resulting samples should be saved (ndarrays in .npy format). + nu0 : None or tuple, optional + Tuple (type, value) that defines the numerator nuisance parameter point or prior over parameter points for + the sampling. Default value: None + + nu1 : None or tuple, optional + Tuple (type, value) that defines the denominator nuisance parameter point or prior over parameter points for + the sampling. Default value: None + + folder : str or None + Path to the folder where the resulting samples should be saved (ndarrays in .npy format). Default value: + None. - filename : str + filename : str or None Filenames for the resulting samples. A prefix such as 'x' or 'theta0' as well as the extension - '.npy' will be added automatically. + '.npy' will be added automatically. Default value: + None. additional_thetas : list of tuple or None list of tuples `(type, value)` that defines additional theta points at which ratio and score are evaluated, @@ -827,14 +674,24 @@ def extract_samples_train_more_ratios( `constant_benchmark_theta()`, `multiple_benchmark_thetas()`, `constant_morphing_theta()`, `multiple_morphing_thetas()`, or `random_morphing_thetas()`. Default value: None. + nuisance_score : bool or "auto", optional + If True, the score with respect to the nuisance parameters (at the default position) will also be + calculated. If False, only the score with respect to the physics parameters is calculated. For "auto", + the nuisance score will be calculated if a nuisance setup is defined. Default: True. + test_split : float or None, optional Fraction of events reserved for the evaluation sample (that will not be used for any training samples). - Default value: 0.5. + Default value: 0.2. switch_train_test_events : bool, optional If True, this function generates a training sample from the events normally reserved for test samples. Default value: False. + n_processes : None or int, optional + If None or larger than 1, MadMiner will use multiprocessing to parallelize the sampling. In this case, + n_workers sets the number of jobs running in parallel, and None will use the number of CPUs. Default value: + 1. + Returns ------- x : ndarray @@ -861,6 +718,10 @@ def extract_samples_train_more_ratios( Joint score evaluated at theta0 with shape `(n_samples, n_parameters)`. The same information is saved as a file in the given folder. + effective_n_samples : int + Effective number of samples, defined as 1/max(event_probabilities), where event_probabilities are the + fractions of the cross section carried by each event. + """ logger.info( @@ -870,11 +731,15 @@ def extract_samples_train_more_ratios( theta1, ) - if self.morpher is None: - raise RuntimeError("No morphing setup loaded. Cannot calculate score.") - create_missing_folders([folder]) + # Check setup + if nuisance_score == "auto": + nuisance_score = self.nuisance_morpher is not None + if self.morpher is None: + raise RuntimeError("No morphing setup loaded. Cannot calculate score.") + if self.nuisance_morpher is None and nuisance_score: + raise RuntimeError("No nuisance parameters defined. Cannot calculate nuisance score.") if additional_thetas is None: additional_thetas = [] n_additional_thetas = len(additional_thetas) @@ -888,41 +753,42 @@ def extract_samples_train_more_ratios( augmented_data_definitions_1.append(("ratio", i + 2, 1)) augmented_data_definitions_1.append(("score", i + 2)) - # Train / test split - start_event, end_event = self._train_test_split(not switch_train_test_events, test_split) - # Parse thetas for theta0 sampling - theta_types = [] - theta_values = [] + parsed_thetas = [] + parsed_nus = [] n_samples_per_theta = 1000000 - theta0_types, theta0_values, this_n_samples = parse_theta(theta0, n_samples // 2) - theta_types.append(theta0_types) - theta_values.append(theta0_values) + parsed_theta0s, this_n_samples = self._parse_theta(theta0, n_samples // 2) + parsed_nu0s = self._parse_nu(nu0, len(parsed_theta0s)) + parsed_thetas.append(parsed_theta0s) + parsed_nus.append(parsed_nu0s) n_samples_per_theta = min(this_n_samples, n_samples_per_theta) - theta1_types, theta1_values, this_n_samples = parse_theta(theta1, n_samples // 2) - theta_types.append(theta1_types) - theta_values.append(theta1_values) + parsed_theta1s, this_n_samples = self._parse_theta(theta1, n_samples // 2) + parsed_nu1s = self._parse_nu(nu1, len(parsed_theta1s)) + parsed_thetas.append(parsed_theta1s) + parsed_nus.append(parsed_nu1s) n_samples_per_theta = min(this_n_samples, n_samples_per_theta) for additional_theta in additional_thetas: - additional_theta_types, additional_theta_values, this_n_samples = parse_theta( - additional_theta, n_samples // 2 - ) - theta_types.append(additional_theta_types) - theta_values.append(additional_theta_values) + additional_parsed_thetas, this_n_samples = self._parse_theta(additional_theta, n_samples // 2) + parsed_thetas.append(additional_parsed_thetas) + additional_parsed_nu = self._parse_nu(nu1, len(additional_parsed_thetas)) + parsed_nus.append(additional_parsed_nu) n_samples_per_theta = min(this_n_samples, n_samples_per_theta) + sets = self._build_sets(parsed_thetas, parsed_nus) + # Start for theta0 - x_0, augmented_data_0, thetas_0 = self._extract_sample( - theta_sets_types=theta_types, - theta_sets_values=theta_values, - n_samples_per_theta=n_samples_per_theta, + x_0, augmented_data_0, thetas_0, n_effective_samples_0 = self._sample( + sets=sets, + n_samples_per_set=n_samples_per_theta, augmented_data_definitions=augmented_data_definitions_0, - sampling_theta_index=0, - start_event=start_event, - end_event=end_event, + sampling_index=0, + nuisance_score=nuisance_score, + use_train_events=not switch_train_test_events, + test_split=test_split, + n_processes=n_processes, ) n_actual_samples = x_0.shape[0] @@ -950,37 +816,41 @@ def extract_samples_train_more_ratios( theta1_0 = np.vstack([theta1_0] + thetas_eval) # Parse thetas for theta1 sampling - theta_types = [] - theta_values = [] + parsed_thetas = [] + parsed_nus = [] n_samples_per_theta = 1000000 - theta0_types, theta0_values, this_n_samples = parse_theta(theta0, n_samples // 2) - theta_types.append(theta0_types) - theta_values.append(theta0_values) + parsed_thetas0, this_n_samples = self._parse_theta(theta0, n_samples // 2) + parsed_nu0s = self._parse_nu(nu0, len(parsed_theta0s)) + parsed_thetas.append(parsed_thetas0) + parsed_nus.append(parsed_nu0s) n_samples_per_theta = min(this_n_samples, n_samples_per_theta) - theta1_types, theta1_values, this_n_samples = parse_theta(theta1, n_samples // 2) - theta_types.append(theta1_types) - theta_values.append(theta1_values) + parsed_thetas1, this_n_samples = self._parse_theta(theta1, n_samples // 2) + parsed_nu1s = self._parse_nu(nu1, len(parsed_theta1s)) + parsed_thetas.append(parsed_thetas1) + parsed_nus.append(parsed_nu1s) n_samples_per_theta = min(this_n_samples, n_samples_per_theta) for additional_theta in additional_thetas: - additional_theta_types, additional_theta_values, this_n_samples = parse_theta( - additional_theta, n_samples // 2 - ) - theta_types.append(additional_theta_types) - theta_values.append(additional_theta_values) + additional_parsed_thetas, this_n_samples = self._parse_theta(additional_theta, n_samples // 2) + additional_parsed_nu = self._parse_nu(nu0, len(additional_parsed_thetas)) + parsed_thetas.append(additional_parsed_thetas) + parsed_nus.append(additional_parsed_nu) n_samples_per_theta = min(this_n_samples, n_samples_per_theta) + sets = self._build_sets(parsed_thetas, parsed_nus) + # Start for theta1 - x_1, augmented_data_1, thetas_1 = self._extract_sample( - theta_sets_types=theta_types, - theta_sets_values=theta_values, - n_samples_per_theta=n_samples_per_theta, + x_1, augmented_data_1, thetas_1, n_effective_samples_1 = self._sample( + sets=sets, + n_samples_per_set=n_samples_per_theta, augmented_data_definitions=augmented_data_definitions_1, - sampling_theta_index=1, - start_event=start_event, - end_event=end_event, + sampling_index=1, + nuisance_score=nuisance_score, + use_train_events=not switch_train_test_events, + test_split=test_split, + n_processes=n_processes, ) n_actual_samples += x_1.shape[0] @@ -1040,9 +910,19 @@ def extract_samples_train_more_ratios( np.save(folder + "/t_xz0_" + filename + ".npy", t_xz0) np.save(folder + "/t_xz1_" + filename + ".npy", t_xz1) - return x, theta0, theta1, y, r_xz, t_xz0, t_xz1 + return x, theta0, theta1, y, r_xz, t_xz0, t_xz1, min(min(n_effective_samples_0), min(n_effective_samples_1)) - def extract_samples_test(self, theta, n_samples, folder, filename, test_split=0.5, switch_train_test_events=False): + def sample_test( + self, + theta, + n_samples, + nu=None, + folder=None, + filename=None, + test_split=0.2, + switch_train_test_events=False, + n_processes=1, + ): """ Extracts evaluation samples `x ~ p(x|theta)` without any augmented data. @@ -1056,21 +936,32 @@ def extract_samples_test(self, theta, n_samples, folder, filename, test_split=0. n_samples : int Total number of events to be drawn. - folder : str - Path to the folder where the resulting samples should be saved (ndarrays in .npy format). + nu : None or tuple, optional + Tuple (type, value) that defines the nuisance parameter point or prior over parameter points for the + sampling. Default value: None - filename : str + folder : str or None + Path to the folder where the resulting samples should be saved (ndarrays in .npy format). Default value: + None. + + filename : str or None Filenames for the resulting samples. A prefix such as 'x' or 'theta0' as well as the extension - '.npy' will be added automatically. + '.npy' will be added automatically. Default value: + None. test_split : float or None, optional Fraction of events reserved for the evaluation sample (that will not be used for any training samples). - Default value: 0.5. + Default value: 0.2. switch_train_test_events : bool, optional If True, this function generates a test sample from the events normally reserved for training samples. Default value: False. + n_processes : None or int, optional + If None or larger than 1, MadMiner will use multiprocessing to parallelize the sampling. In this case, + n_workers sets the number of jobs running in parallel, and None will use the number of CPUs. Default value: + 1. + Returns ------- x : ndarray @@ -1081,6 +972,10 @@ def extract_samples_test(self, theta, n_samples, folder, filename, test_split=0. Parameter points used for sampling with shape `(n_samples, n_parameters)`. The same information is saved as a file in the given folder. + effective_n_samples : int + Effective number of samples, defined as 1/max(event_probabilities), where event_probabilities are the + fractions of the cross section carried by each event. + """ logger.info("Extracting evaluation sample. Sampling according to %s", theta) @@ -1088,18 +983,17 @@ def extract_samples_test(self, theta, n_samples, folder, filename, test_split=0. create_missing_folders([folder]) # Thetas - theta_types, theta_values, n_samples_per_theta = parse_theta(theta, n_samples) - - # Train / test split - start_event, end_event = self._train_test_split(switch_train_test_events, test_split) + parsed_thetas, n_samples_per_theta = self._parse_theta(theta, n_samples) + parsed_nus = self._parse_nu(nu, len(parsed_thetas)) + sets = self._build_sets([parsed_thetas], [parsed_nus]) # Extract information - x, _, (theta,) = self._extract_sample( - theta_sets_types=[theta_types], - theta_sets_values=[theta_values], - n_samples_per_theta=n_samples_per_theta, - start_event=start_event, - end_event=end_event, + x, _, (theta,), n_effective_samples = self._sample( + sets=sets, + n_samples_per_set=n_samples_per_theta, + use_train_events=switch_train_test_events, + test_split=test_split, + n_processes=n_processes, ) # Save data @@ -1107,9 +1001,9 @@ def extract_samples_test(self, theta, n_samples, folder, filename, test_split=0. np.save(folder + "/theta_" + filename + ".npy", theta) np.save(folder + "/x_" + filename + ".npy", x) - return x, theta + return x, theta, min(n_effective_samples) - def extract_cross_sections(self, theta): + def cross_sections(self, theta, nu=None): """ Calculates the total cross sections for all specified thetas. @@ -1118,14 +1012,21 @@ def extract_cross_sections(self, theta): ---------- theta : tuple Tuple (type, value) that defines the parameter point or prior over parameter points at which the cross - section is calculated. Pass the output of the functions `constant_benchmark_theta()`, - `multiple_benchmark_thetas()`, `constant_morphing_theta()`, `multiple_morphing_thetas()`, or - `random_morphing_thetas()`. + section is calculated. Pass the output of the functions `benchmark()`, + `benchmarks()`, `morphing_point()`, `morphing_points()`, or + `random_morphing_points()`. + + nu : tuple or None, optional + Tuple (type, value) that defines the nuisance parameter point or prior over nuisance parameter points at + which the cross section is calculated. Pass the output of the functions `benchmark()`, + `benchmarks()`, `morphing_point()`, `morphing_points()`, or + `random_morphing_points()`. Default valuee: None. Returns ------- thetas : ndarray - Parameter points with shape `(n_thetas, n_parameters)`. + Parameter points with shape `(n_thetas, n_parameters)` or + `(n_thetas, n_parameters + n_nuisance_parameters)`. xsecs : ndarray Total cross sections in pb with shape `(n_thetas, )`. @@ -1134,134 +1035,55 @@ def extract_cross_sections(self, theta): Statistical uncertainties on the total cross sections in pb with shape `(n_thetas, )`. """ - logger.info("Starting cross-section calculation") + parsed_thetas, _ = self._parse_theta(theta, None) + theta_values = np.asarray([self._get_theta_value(parsed_theta) for parsed_theta in parsed_thetas]) - # Total xsecs for benchmarks - xsecs_benchmarks = None - squared_weight_sum_benchmarks = None - - for obs, weights in madminer_event_loader(self.madminer_filename): - if xsecs_benchmarks is None: - xsecs_benchmarks = np.sum(weights, axis=0) - squared_weight_sum_benchmarks = np.sum(weights * weights, axis=0) - else: - xsecs_benchmarks += np.sum(weights, axis=0) - squared_weight_sum_benchmarks += np.sum(weights * weights, axis=0) - - # Parse thetas for evaluation - theta_types, theta_values, _ = parse_theta(theta, 1) - - # Loop over thetas - all_thetas = [] - all_xsecs = [] - all_xsec_uncertainties = [] - - for (theta_type, theta_value) in zip(theta_types, theta_values): - - if self.morpher is None and theta_type == "morphing": - raise RuntimeError("Theta defined through morphing, but no morphing setup has been loaded.") - - theta = get_theta_value(theta_type, theta_value, self.benchmarks) - theta_matrix = get_theta_benchmark_matrix(theta_type, theta_value, self.benchmarks, self.morpher) - - # Total xsec for this theta - xsec_theta = mdot(theta_matrix, xsecs_benchmarks) - rms_xsec_theta = mdot(theta_matrix * theta_matrix, squared_weight_sum_benchmarks) ** 0.5 - - all_thetas.append(theta) - all_xsecs.append(xsec_theta) - all_xsec_uncertainties.append(rms_xsec_theta) - - logger.debug("theta %s: xsec = (%s +/- %s) pb", theta, xsec_theta, rms_xsec_theta) - - # Return - all_thetas = np.array(all_thetas) - all_xsecs = np.array(all_xsecs) - all_xsec_uncertainties = np.array(all_xsec_uncertainties) - - return all_thetas, all_xsecs, all_xsec_uncertainties - - def extract_raw_data(self, theta=None, derivative=False): - - """ - Returns all events together with the benchmark weights (if theta is None) or weights for a given theta. - - Parameters - ---------- - theta : None or ndarray or str, optional - If None, the function returns all benchmark weights. If str, the function returns the weights for a given - benchmark name. If ndarray, it uses morphing to calculate the weights for this value of theta. Default - value: None. - - derivative : bool, optional - If True and if theta is not None, the derivative of the weights with respect to theta are returned. Default - value: False. - - Returns - ------- - x : ndarray - Observables with shape `(n_unweighted_samples, n_observables)`. - - weights : ndarray - If theta is None and derivative is False, benchmark weights with shape - `(n_unweighted_samples, n_benchmarks_phys)` in pb. If theta is not None and derivative is True, the gradient of - the weight for the given parameter with respect to theta with shape `(n_unweighted_samples, n_gradients)` - in pb. Otherwise, weights for the given parameter theta with shape `(n_unweighted_samples,)` in pb. - - """ - - x, weights_benchmarks = next(madminer_event_loader(self.madminer_filename, batch_size=None)) - - if theta is None: - return x, weights_benchmarks - - elif isinstance(theta, six.string_types): - i_benchmark = list(self.benchmarks.keys()).index(theta) - return x, weights_benchmarks[:, i_benchmark] - - elif derivative: - dtheta_matrix = get_dtheta_benchmark_matrix("morphing", theta, self.benchmarks, self.morpher) - - gradients_theta = mdot(dtheta_matrix, weights_benchmarks) # (n_gradients, n_samples) - gradients_theta = gradients_theta.T - - return x, gradients_theta - + if nu is not None: + parsed_nus = self._parse_nu(nu, len(parsed_thetas)) + nu_values = np.asarray([self._get_nu_value(parsed_nu for parsed_nu in parsed_nus)]) + param_values = np.hstack((theta_values, nu_values)) else: - theta_matrix = get_theta_benchmark_matrix("morphing", theta, self.benchmarks, self.morpher) + parsed_nus = None + param_values = theta_values - weights_theta = mdot(theta_matrix, weights_benchmarks) + xsecs, uncertainties = self.xsecs(thetas=parsed_thetas, nus=parsed_nus) - return x, weights_theta + return param_values, xsecs, uncertainties - def _extract_sample( + def _sample( self, - theta_sets_types, - theta_sets_values, - n_samples_per_theta, - sampling_theta_index=0, + sets, + n_samples_per_set, + sampling_index=0, augmented_data_definitions=None, - nuisance_score=False, - start_event=0, - end_event=None, + nuisance_score=True, + use_train_events=True, + test_split=0.2, + verbose="some", + n_processes=1, + update_patience=0.01, ): """ Low-level function for the extraction of information from the event samples. Do not use this function directly. + The sampling is organized in terms of "sets". For each set, a number of parameter points (thetas and nus) is + fixed, and `n_samples_per_theta` events are sampled from one of them. + Parameters ---------- - theta_sets_types : list of list of str - Each entry can be 'benchmark' or 'morphing'. + sets : list of list of tuples + The outer list goes over sets, the inner list goes over parameter points, the tuples have the form + (theta, nu). Here theta can be a str or int (for benchmarks) or ndarray (with morphing), while nu can be + None (for nominal value) or ndarray (for nuisance morphing). - theta_sets_values : list of list - Each entry is int and labels the benchmark index (if the corresponding - theta_sampling_types entry is 'benchmark') or a numpy array with the theta values - (of the corresponding theta_sampling_types entry is 'morphing') - - n_samples_per_theta : int + n_samples_per_set : int Number of samples to be drawn per entry in theta_sampling_types. + sampling_index : int + Marking the index of the theta set defined through thetas_types and + thetas_values that should be used for sampling. Default value: 0. + augmented_data_definitions : list of tuple or None Each tuple can either be ('ratio', num_theta, den_theta) or ('score', theta), where num_theta, den_theta, and theta are indexes marking @@ -1269,19 +1091,23 @@ def _extract_sample( used. Default value: None. nuisance_score : bool, optional - If True and if the sample contains nuisance parameters, any joint score in the augmented data definitions - is also calculated with respect to the nuisance parameters (evaluated at their default position). Default - value: False. + If True, any joint score in the augmented data definitions is also calculated with respect to the nuisance + parameters. Default value: True. - sampling_theta_index : int - Marking the index of the theta set defined through thetas_types and - thetas_values that should be used for sampling. Default value: 0. + use_train_events : bool, optional + Decides whether to use the train or test split of the events. Default value: True. + + test_split : float or None, optional + Fraction of events reserved for the evaluation sample (that will not be used for any training samples). + Default value: 0.2. - start_event : int - Index of first event to consider. Default value: 0. + n_processes : None or int, optional + If None or larger than 1, MadMiner will use multiprocessing to parallelize the sampling. In this case, + n_workers sets the number of jobs running in parallel, and None will use the number of CPUs. Default value: + 1. - end_event : int or None - Index of last event to consider. If None, use the last event. Default value: None. + update_patience : float, optional + Wait time (in s) between log updates with n_workers > 1 (or None). Default value: 0.01 Returns ------- @@ -1291,277 +1117,394 @@ def _extract_sample( augmented_data : list of ndarray Augmented data. - theta : list of ndarray + theta_values : list of ndarray Parameter values. """ logger.debug("Starting sample extraction") - assert n_samples_per_theta > 0, "Requested {} samples per theta!".format(n_samples_per_theta) - + # Check inputs if augmented_data_definitions is None: augmented_data_definitions = [] - logger.debug("Augmented data requested:") - for augmented_data_definition in augmented_data_definitions: - logger.debug(" %s", augmented_data_definition) - - # Nuisance parameters? - include_nuisance_parameters = self.include_nuisance_parameters and nuisance_score - - # Calculate total xsecs for benchmarks - xsecs_benchmarks = None - squared_weight_sum_benchmarks = None - n_observables = 0 - - for obs, weights in madminer_event_loader( - self.madminer_filename, - start=start_event, - end=end_event, - include_nuisance_parameters=include_nuisance_parameters, - benchmark_is_nuisance=self.benchmark_is_nuisance, - ): - # obs has shape (n_events, n_observables) - # weights has shape (n_events, n_benchmarks_phys) - # sampled_from_benchmark has shape (n_events,) - - if xsecs_benchmarks is None: - xsecs_benchmarks = np.sum(weights, axis=0) - squared_weight_sum_benchmarks = np.sum(weights * weights, axis=0) - else: - xsecs_benchmarks += np.sum(weights, axis=0) - squared_weight_sum_benchmarks += np.sum(weights * weights, axis=0) - - n_observables = obs.shape[1] + n_sets, n_params = self._check_sets(sets) - logger.debug("Benchmark cross sections [pb]: %s", xsecs_benchmarks) + # What needs to be calculated? + needs_gradients = self._check_gradient_need(augmented_data_definitions) - # Balance thetas - theta_sets_types, theta_sets_values = balance_thetas(theta_sets_types, theta_sets_values) - - # Check whether we need to calculate scores (which will require the gradients of the morphing matrices) - needs_gradients = False - for augmented_data_definition in augmented_data_definitions: - if augmented_data_definition[0] == "score": - needs_gradients = True + # Prepare outputs + all_x = [] + all_augmented_data = [[] for _ in augmented_data_definitions] + all_thetas = [[] for _ in range(n_params)] + all_nus = [[] for _ in range(n_params)] + all_effective_n_samples = [] - if self.morpher is None: - raise RuntimeError("Cannot calculate score without morphing setup!") + n_stats_warnings = 0 + n_neg_weights_warnings = 0 - # Consistency checks - n_benchmarks = xsecs_benchmarks.shape[0] - expected_n_benchmarks = self.n_benchmarks if include_nuisance_parameters else self.n_benchmarks_phys - if self.morphing_matrix is None: - if n_benchmarks != expected_n_benchmarks: - raise ValueError( - "Inconsistent numbers of benchmarks: {} in observations," - "{} in benchmark list".format(n_benchmarks, len(self.benchmarks)) - ) - else: - if n_benchmarks != expected_n_benchmarks or n_benchmarks < self.morphing_matrix.shape[0]: - raise ValueError( - "Inconsistent numbers of benchmarks: {} in observations, {} in benchmark list, " - "{} in morphing matrix".format(n_benchmarks, len(self.benchmarks), self.morphing_matrix.shape[0]) - ) + # Multiprocessing approach + if n_processes is None or n_processes > 1: + if n_processes is None: + n_processes = multiprocessing.cpu_count() - if n_observables != len(self.observables): - raise ValueError( - "Inconsistent numbers of observables: {} in observations," - "{} in observable list".format(n_observables, len(self.observables)) + job = partial( + self._sample_set, + n_samples=n_samples_per_set, + augmented_data_definitions=augmented_data_definitions, + sampling_index=sampling_index, + needs_gradients=needs_gradients, + use_train_events=use_train_events, + test_split=test_split, + nuisance_score=nuisance_score, + n_stats_warnings=1000, + n_neg_weights_warnings=1000, ) - n_thetas = len(theta_sets_types) - assert n_thetas == len(theta_sets_values) - # Sets (within each set, all thetas (sampling, numerator, ...) have a constant value) - n_sets = len(theta_sets_types[sampling_theta_index]) - for theta_types, theta_values in zip(theta_sets_types, theta_sets_values): - assert n_sets == len(theta_types) == len(theta_values) + logger.info("Starting sampling jobs in parallel, using %s processes", n_processes) - # Number of samples to be drawn - if not isinstance(n_samples_per_theta, collections.Iterable): - n_samples_per_theta = [n_samples_per_theta] * n_sets - elif len(n_samples_per_theta) == 1: - n_samples_per_theta = [n_samples_per_theta[0]] * n_sets + pool = multiprocessing.Pool(processes=n_processes) + r = pool.map_async(job, sets, chunksize=1) - # Prepare output - all_x = [] - all_augmented_data = [[] for _ in augmented_data_definitions] - all_thetas = [[] for _ in range(n_thetas)] - all_effective_n_samples = [] + next_verbose = 0 + verbose_steps = n_sets // 10 + + while not r.ready(): + n_done = max(n_sets - r._number_left * r._chunksize, 0) + if n_done >= next_verbose: + logger.info("%s / %s jobs done", max(n_sets - r._number_left * r._chunksize, 0), n_sets) + while next_verbose <= n_done: + next_verbose += verbose_steps + time.sleep(update_patience) - n_statistics_warnings = 0 - n_negative_weights_warnings = 0 + r.wait() - # Main loop over thetas - for i_set in range(n_sets): + logger.info("All jobs done!") - # Setup for set - n_samples = n_samples_per_theta[i_set] + for x, thetas, nus, augmented_data, eff_n_samples, _, _ in r.get(): + all_x.append(x) + for i, values in enumerate(augmented_data): + all_augmented_data[i].append(values) + for i, values in enumerate(thetas): + all_thetas[i].append(values) + for i, values in enumerate(nus): + all_nus[i].append(values) + all_effective_n_samples.append(eff_n_samples) - theta_types = [t[i_set] for t in theta_sets_types] - theta_values = [t[i_set] for t in theta_sets_values] + # Serial approach + else: + logger.info("Starting sampling serially") + + # Verbosity + if verbose == "all": # Print output after every epoch + n_sets_verbose = 1 + elif verbose == "many": # Print output after 2%, 4%, ..., 100% progress + n_sets_verbose = max(int(round(n_sets / 50, 0)), 1) + elif verbose == "some": # Print output after 10%, 20%, ..., 100% progress + n_sets_verbose = max(int(round(n_sets / 20, 0)), 1) + elif verbose == "few": # Print output after 20%, 40%, ..., 100% progress + n_sets_verbose = max(int(round(n_sets / 5, 0)), 1) + elif verbose == "none": # Never print output + n_sets_verbose = n_sets + 2 + else: + raise ValueError("Unknown value %s for keyword verbose", verbose) + logger.debug("Will print training progress every %s sets", n_sets_verbose) + + # Loop over sets + for i_set, set_ in enumerate(sets): + if (i_set + 1) % n_sets_verbose == 0: + logger.info("Sampling from parameter point %s / %s", i_set + 1, n_sets) + else: + logger.debug("Sampling from parameter point %s / %s", i_set + 1, n_sets) + + x, thetas, nus, augmented_data, eff_n_samples, n_stats_warnings, n_neg_weights_warnings = self._sample_set( + set_, + n_samples=n_samples_per_set, + augmented_data_definitions=augmented_data_definitions, + sampling_index=sampling_index, + needs_gradients=needs_gradients, + use_train_events=use_train_events, + test_split=test_split, + nuisance_score=nuisance_score, + n_stats_warnings=n_stats_warnings, + n_neg_weights_warnings=n_neg_weights_warnings, + ) - if self.morpher is None and "morphing" in theta_types: - raise RuntimeError("Theta defined through morphing, but no morphing setup has been loaded.") + all_x.append(x) + for i, values in enumerate(augmented_data): + all_augmented_data[i].append(values) + for i, values in enumerate(thetas): + all_thetas[i].append(values) + for i, values in enumerate(nus): + all_nus[i].append(values) + all_effective_n_samples.append(eff_n_samples) - # Parse thetas and calculate the w_c(theta) for them - thetas = [] - theta_matrices = [] - theta_gradient_matrices = [] + # Combine and return results + all_x = np.vstack(all_x) + for i, values in enumerate(all_thetas): + all_thetas[i] = np.vstack(values) + for i, values in enumerate(all_nus): + all_nus[i] = np.vstack(values) + for i, values in enumerate(all_augmented_data): + all_augmented_data[i] = np.vstack(values) + all_effective_n_samples = np.array(all_effective_n_samples) + all_thetas = self._combine_thetas_nus(all_thetas, all_nus) - logger.debug("Drawing %s events for the following thetas:", n_samples) + # Report effective number of samples + self._report_effective_n_samples(all_effective_n_samples) + + return all_x, all_augmented_data, all_thetas, all_effective_n_samples + + @staticmethod + def _check_sets(sets): + n_sets = len(sets) + n_params = None + for set_ in sets: + if n_params is None: + n_params = len(set_) + assert len(set_) == n_params + for param_point in set_: + assert len(param_point) == 2 + + return n_sets, n_params + + @staticmethod + def _check_gradient_need(augmented_data_definitions): + for definition in augmented_data_definitions: + if definition[0] == "score": + return True + return False + + def _sample_set( + self, + set_, + n_samples, + augmented_data_definitions, + sampling_index=0, + needs_gradients=True, + nuisance_score=True, + use_train_events=True, + test_split=0.2, + n_stats_warnings=0, + n_neg_weights_warnings=0, + ): + # Parse thetas and nus + thetas, nus = [], [] + theta_values, nu_values = [], [] + theta_matrices, theta_gradient_matrices = [], [] - for i_theta, (theta_type, theta_value) in enumerate(zip(theta_types, theta_values)): - theta = get_theta_value(theta_type, theta_value, self.benchmarks) - theta = np.broadcast_to(theta, (n_samples, theta.size)) - thetas.append(theta) + logger.debug("Drawing %s events for the following parameter points:", n_samples) - theta_matrices.append( - get_theta_benchmark_matrix(theta_type, theta_value, self.benchmarks, self.morpher) - ) - if needs_gradients: - theta_gradient_matrices.append( - get_dtheta_benchmark_matrix(theta_type, theta_value, self.benchmarks, self.morpher) - ) + for i_param, (theta, nu) in enumerate(set_): + thetas.append(theta) + nus.append(nu) - logger.debug( - " theta %s = %s%s", i_theta, theta[0, :], " (sampling)" if i_theta == sampling_theta_index else "" - ) + theta_value = self._get_theta_value(theta) + theta_value = np.broadcast_to(theta_value, (n_samples, theta_value.size)) + theta_values.append(theta_value) - sampling_theta_matrix = theta_matrices[sampling_theta_index] + if nu is None: + nu_value = None + nu_values.append([[None] for _ in range(n_samples)]) + else: + nu_value = self._get_nu_value(nu) + nu_values.append(np.broadcast_to(nu_value, (n_samples, nu_value.size))) - # Total xsec for sampling theta - xsec_sampling_theta = mdot(sampling_theta_matrix, xsecs_benchmarks) - rms_xsec_sampling_theta = ( - mdot(sampling_theta_matrix * sampling_theta_matrix, squared_weight_sum_benchmarks) - ) ** 0.5 + theta_matrices.append(self._get_theta_benchmark_matrix(theta)) + if needs_gradients: + theta_gradient_matrices.append(self._get_dtheta_benchmark_matrix(theta)) - if rms_xsec_sampling_theta > 0.1 * xsec_sampling_theta: - n_statistics_warnings += 1 + if i_param == sampling_index: + logger.debug(" %s: theta = %s, nu = %s (sampling)", i_param, theta_value[0, :], nu_value) + else: + logger.debug(" %s: theta = %s, nu = %s", i_param, theta_value[0, :], nu_value) - if n_statistics_warnings <= 1: - logger.warning( - "Large statistical uncertainty on the total cross section for theta = %s: " - "(%4f +/- %4f) pb. Skipping these warnings in the future...", - thetas[sampling_theta_index][0], - xsec_sampling_theta, - rms_xsec_sampling_theta, - ) + # Cross sections + xsecs, xsec_uncertainties = self.xsecs( + thetas, nus, events="train" if use_train_events else "test", test_split=test_split + ) + if needs_gradients: + xsec_gradients = self.xsec_gradients( + thetas, + nus, + gradients="all" if nuisance_score else "theta", + events="train" if use_train_events else "test", + test_split=test_split, + ) + else: + xsec_gradients = None + + # Report large uncertainties + if xsec_uncertainties[sampling_index] > 0.1 * xsecs[sampling_index]: + n_stats_warnings += 1 + if n_stats_warnings <= 1: + logger.warning( + "Large statistical uncertainty on the total cross section when sampling from theta = %s: " + "(%4f +/- %4f) pb (%s %%). Skipping these warnings in the future...", + theta_values[sampling_index][0], + xsecs[sampling_index], + xsec_uncertainties[sampling_index], + 100.0 * xsec_uncertainties[sampling_index] / xsecs[sampling_index], + ) - # Prepare output - samples_done = np.zeros(n_samples, dtype=np.bool) - samples_x = np.zeros((n_samples, n_observables)) - samples_augmented_data = [] - for definition in augmented_data_definitions: - if definition[0] == "ratio": - samples_augmented_data.append(np.zeros((n_samples, 1))) - elif definition[0] == "score": - samples_augmented_data.append(np.zeros((n_samples, self.n_parameters))) - elif definition[0] == "nuisance_score": - samples_augmented_data.append(np.zeros((n_samples, self.n_nuisance_parameters))) - - largest_weight = 0.0 - - # Main sampling loop - while not np.all(samples_done): - - # Draw random numbers in [0, 1] - u = np.random.rand(n_samples) # Shape: (n_samples,) - - # Loop over weighted events - cumulative_p = np.array([0.0]) - - for x_batch, weights_benchmarks_batch in madminer_event_loader( - self.madminer_filename, start=start_event, end=end_event - ): - # Evaluate p(x | sampling theta) - weights_theta = mdot(sampling_theta_matrix, weights_benchmarks_batch) # Shape (n_batch_size,) - p_theta = weights_theta / xsec_sampling_theta # Shape: (n_batch_size,) - - # Handle negative weights (should be rare) - n_negative_weights = np.sum(p_theta < 0.0) - if n_negative_weights > 0: - n_negative_weights_warnings += 1 - # n_negative_benchmark_weights = np.sum(weights_benchmarks_batch < 0.0) - - if n_negative_weights_warnings <= 3: - logger.warning( - "For this value of theta, %s / %s events have negative weight and will be ignored", - n_negative_weights, - p_theta.size, - ) - if n_negative_weights_warnings == 3: - logger.warning("Skipping warnings about negative weights in the future...") - - # filter_negative_weights = p_theta < 0.0 - # for weight_theta_neg, weight_benchmarks_neg in zip( - # weights_theta[filter_negative_weights], weights_benchmarks_batch[filter_negative_weights] - # ): - # logger.debug( - # " weight(theta): %s, benchmark weights: %s", weight_theta_neg, weight_benchmarks_neg - # ) - - p_theta[p_theta < 0.0] = 0.0 - - # Remember largest weights (to calculate effective number of samples) - largest_weight = max(largest_weight, np.max(p_theta)) - - # Calculate cumulative p (summing up all events until here) - cumulative_p = cumulative_p.flatten()[-1] + np.cumsum(p_theta) # Shape: (n_batch_size,) - - # When cumulative_p hits u, we store the events - indices = np.searchsorted(cumulative_p, u, side="left").flatten() - # Shape: (n_samples,), values: [0, ..., n_batch_size] - - found_now = np.invert(samples_done) & (indices < len(cumulative_p)) # Shape: (n_samples,) - samples_x[found_now] = x_batch[indices[found_now]] - samples_done[found_now] = True - - # Extract augmented data - relevant_augmented_data = calculate_augmented_data( - augmented_data_definitions, - weights_benchmarks_batch[indices[found_now], :], - xsecs_benchmarks, - theta_matrices, - theta_gradient_matrices, - nuisance_morpher=self.nuisance_morpher, + # Prepare output + done = np.zeros(n_samples, dtype=np.bool) + x = np.zeros((n_samples, self.n_observables)) + augmented_data = [] + for definition in augmented_data_definitions: + if definition[0] == "ratio": + augmented_data.append(np.zeros((n_samples, 1))) + elif definition[0] == "score": + if nuisance_score: + augmented_data.append(np.zeros((n_samples, self.n_parameters + self.n_nuisance_parameters))) + else: + augmented_data.append(np.zeros((n_samples, self.n_parameters))) + largest_event_probability = 0.0 + + # Main sampling loop + start_event, end_event, correction_factor = self._train_test_split(use_train_events, test_split) + while not np.all(done): + # Draw random numbers in [0, 1] + u = np.random.rand(n_samples) # Shape: (n_samples,) + cumulative_p = np.array([0.0]) + + # Loop over weighted events + for x_batch, weights_benchmarks_batch in madminer_event_loader( + self.madminer_filename, start=start_event, end=end_event + ): + weights_benchmarks_batch *= correction_factor + + # Weights + weights = self._weights(thetas, nus, weights_benchmarks_batch, theta_matrices) + if needs_gradients: + weight_gradients = self._weight_gradients( + thetas, + nus, + weights_benchmarks_batch, + gradients="all" if nuisance_score else "theta", + theta_matrices=theta_matrices, + theta_gradient_matrices=theta_gradient_matrices, ) + else: + weight_gradients = None + + # Evaluate p(x | sampling theta) + p_sampling = weights[sampling_index] / xsecs[sampling_index] # Shape: (n_batch_size,) + + # Handle negative weights (should be rare) + n_negative_weights = np.sum(p_sampling < 0.0) + if n_negative_weights > 0: + n_neg_weights_warnings += 1 + if n_neg_weights_warnings <= 3: + logger.warning( + "For this value of theta, %s / %s events have negative weight and will be ignored", + n_negative_weights, + p_sampling.size, + ) + if n_neg_weights_warnings == 3: + logger.warning("Skipping warnings about negative weights in the future...") + p_sampling[p_sampling < 0.0] = 0.0 + + # Remember largest weights (to calculate effective number of samples) + largest_event_probability = max(largest_event_probability, np.max(p_sampling)) + + # Calculate cumulative p (summing up all events until here) + cumulative_p = cumulative_p.flatten()[-1] + np.cumsum(p_sampling) # Shape: (n_batch_size,) + + # When cumulative_p hits u, we store the events + indices = np.searchsorted(cumulative_p, u, side="left").flatten() + # Shape: (n_samples,), values: [0, ..., n_batch_size] + + found_now = np.invert(done) & (indices < len(cumulative_p)) # Shape: (n_samples,) + x[found_now] = x_batch[indices[found_now]] + done[found_now] = True + + # Extract augmented data + relevant_augmented_data = self._calculate_augmented_data( + augmented_data_definitions=augmented_data_definitions, + weights=weights[:, indices[found_now]], + weight_gradients=None if weight_gradients is None else weight_gradients[:, :, indices[found_now]], + xsecs=xsecs, + xsec_gradients=xsec_gradients, + ) + for i, this_relevant_augmented_data in enumerate(relevant_augmented_data): + augmented_data[i][found_now] = this_relevant_augmented_data - for i, this_relevant_augmented_data in enumerate(relevant_augmented_data): - samples_augmented_data[i][found_now] = this_relevant_augmented_data - - if np.all(samples_done): - break + # Finished? + if np.all(done): + break - # Cross-check cumulative probabilities at end - logger.debug(" Cumulative probability (should be close to 1): %s", cumulative_p[-1]) + # Cross-check cumulative probabilities at end + logger.debug(" Cumulative probability (should be close to 1): %s", cumulative_p[-1]) - # Check that we got 'em all, otherwise repeat - if not np.all(samples_done): - logger.debug( - " After full pass through event files, {} / {} samples not found, u = {}".format( - np.sum(np.invert(samples_done)), samples_done.size, u[np.invert(samples_done)] - ) + # Check that we got 'em all, otherwise repeat + if not np.all(done): + logger.debug( + " After full pass through event files, {} / {} samples not found, with u = {}".format( + np.sum(np.invert(done)), done.size, u[np.invert(done)] ) + ) - all_x.append(samples_x) - for i, theta in enumerate(thetas): - all_thetas[i].append(theta) - for i, this_samples_augmented_data in enumerate(samples_augmented_data): - all_augmented_data[i].append(this_samples_augmented_data) - all_effective_n_samples.append(1.0 / max(1.0e-12, largest_weight)) + n_eff_samples = 1.0 / max(1.0e-12, largest_event_probability) - # Combine and return results - all_x = np.vstack(all_x) - for i in range(n_thetas): - all_thetas[i] = np.vstack(all_thetas[i]) - for i in range(len(all_augmented_data)): - all_augmented_data[i] = np.vstack(all_augmented_data[i]) - all_effective_n_samples = np.array(all_effective_n_samples) + return x, theta_values, nu_values, augmented_data, n_eff_samples, n_stats_warnings, n_neg_weights_warnings - # Report effective number of samples - if n_sets > 1: + @staticmethod + def _calculate_augmented_data( + augmented_data_definitions, + weights, # shape (n_thetas, n_events) + weight_gradients, # grad_theta dsigma(theta, nu) with shape (n_thetas, n_gradients, n_events) + xsecs, # shape (n_thetas,) + xsec_gradients, # grad_theta sigma(theta, nu) with shape (n_params, n_gradients) + ): + augmented_data = [] + for definition in augmented_data_definitions: + if definition[0] == "ratio": + _, i_num, i_den = definition + ratio = (weights[i_num] / xsecs[i_num]) / (weights[i_den] / xsecs[i_den]) + ratio = ratio.reshape((-1, 1)) # (n_samples, 1) + augmented_data.append(ratio) + elif definition[0] == "score": + _, i = definition + score = weight_gradients[i, :, :] / weights[i, np.newaxis, :] # (n_gradients, n_samples) + score = score - xsec_gradients[i, :, np.newaxis] / xsecs[i, np.newaxis, np.newaxis] + score = score.T # (n_samples, n_gradients) + augmented_data.append(score) + else: + raise ValueError("Unknown augmented data type {}".format(definition[0])) + + return augmented_data + + def _combine_thetas_nus(self, all_thetas, all_nus): + n_thetas = len(all_thetas) + assert n_thetas == len(all_nus) + + # all_nus is a list of a list of (None or ndarray) + # Figure out if there's anything nontrivial in there + add_nuisance_params = False + for nus in all_nus: + if self._any_nontrivial_nus(nus): + add_nuisance_params = True + + # No nuisance params? + if not add_nuisance_params or self.nuisance_morpher is None or self.n_nuisance_parameters == 0: + return all_thetas + + all_combined = [] + for thetas, nus in zip(all_thetas, all_nus): + combined = [] + if nus is None: + nus = [None for _ in range(thetas)] + for theta, nu in zip(thetas, nus): + if nu is None or None in nu: + nu = np.zeros(self.n_nuisance_parameters) + combined.append(np.hstack((theta, nu))) + all_combined.append(np.asarray(combined)) + return all_combined + + @staticmethod + def _report_effective_n_samples(all_effective_n_samples): + if len(all_effective_n_samples) > 1: logger.info( "Effective number of samples: mean %s, with individual thetas ranging from %s to %s", np.mean(all_effective_n_samples), @@ -1572,47 +1515,345 @@ def _extract_sample( else: logger.info("Effective number of samples: %s", all_effective_n_samples[0]) - return all_x, all_augmented_data, all_thetas + @staticmethod + def _parse_theta(theta, n_samples): + theta_type_in = theta[0] + theta_value_in = theta[1] - def _train_test_split(self, train, test_split): - """ - Returns the start and end event for train samples (train = True) or test samples (train = False). + if theta_type_in == "benchmark": + thetas_out = [theta_value_in] + if n_samples is None: + n_samples_per_theta = 1 + else: + n_samples_per_theta = n_samples - Parameters - ---------- - train : bool - True if training data is generated, False if test data is generated. + elif theta_type_in == "benchmarks": + n_benchmarks = len(theta_value_in) + if n_samples is None: + n_samples_per_theta = 1 + else: + n_samples_per_theta = max(int(round(n_samples / n_benchmarks, 0)), 1) + thetas_out = theta_value_in - test_split : float - Fraction of events reserved for testing. + elif theta_type_in == "morphing_point": + thetas_out = [np.asarray(theta_value_in)] + if n_samples is None: + n_samples_per_theta = 1 + else: + n_samples_per_theta = n_samples - Returns - ------- - start_event : int - Index of the first unweighted event to consider. + elif theta_type_in == "morphing_points": + n_benchmarks = len(theta_value_in) + if n_samples is None: + n_samples_per_theta = 1 + else: + n_samples_per_theta = max(int(round(n_samples / n_benchmarks, 0)), 1) + thetas_out = theta_value_in + + elif theta_type_in == "random_morphing_points": + n_benchmarks, priors = theta_value_in + if n_benchmarks is None or n_benchmarks <= 0 or (n_samples is not None and n_benchmarks > n_samples): + n_benchmarks = n_samples + if n_samples is None: + n_samples_per_theta = 1 + else: + n_samples_per_theta = max(int(round(n_samples / n_benchmarks, 0)), 1) + + thetas_out = [] + for prior in priors: + if prior[0] == "flat": + prior_min = prior[1] + prior_max = prior[2] + thetas_out.append(prior_min + (prior_max - prior_min) * np.random.rand(n_benchmarks)) + elif prior[0] == "gaussian": + prior_mean = prior[1] + prior_std = prior[2] + thetas_out.append(np.random.normal(loc=prior_mean, scale=prior_std, size=n_benchmarks)) + else: + raise ValueError("Unknown prior {}".format(prior)) + thetas_out = np.array(thetas_out).T - end_event : int - Index of the last unweighted event to consider. + else: + raise ValueError("Unknown theta specification {}".format(theta)) - """ - if train: - start_event = 0 + return thetas_out, n_samples_per_theta - if test_split is None or test_split <= 0.0 or test_split >= 1.0: - end_event = None - else: - end_event = int(round((1.0 - test_split) * self.n_samples, 0)) - if end_event < 0 or end_event > self.n_samples: - raise ValueError("Irregular train / test split: sample {} / {}", end_event, self.n_samples) + def _parse_nu(self, nu, n_thetas): + if nu is None: + nu_type_in = "nominal" + nu_value_in = None + else: + nu_type_in = nu[0] + nu_value_in = nu[1] + + if nu_type_in == "nominal": + nu_out = [None for _ in range(n_thetas)] + + elif nu_type_in == "iid": + priors = [nu_value_in for _ in range(self.n_nuisance_parameters)] + return self._parse_nu(("random_morphing_points", (None, priors)), n_thetas) + + elif nu_type_in == "morphing_point": + nu_out = np.asarray([nu_value_in for _ in range(n_thetas)]) + + elif nu_type_in == "morphing_points": + n_nus = len(nu_value_in) + nu_out = np.asarray([nu_value_in[i % n_nus] for i in range(n_thetas)]) + + elif nu_type_in == "random_morphing_points": + _, priors = nu_value_in + + nu_out = [] + for prior in priors: + if prior[0] == "flat": + prior_min = prior[1] + prior_max = prior[2] + nu_out.append(prior_min + (prior_max - prior_min) * np.random.rand(n_thetas)) + elif prior[0] == "gaussian": + prior_mean = prior[1] + prior_std = prior[2] + nu_out.append(np.random.normal(loc=prior_mean, scale=prior_std, size=n_thetas)) + else: + raise ValueError("Unknown prior {}".format(prior)) + nu_out = np.array(nu_out).T else: - if test_split is None or test_split <= 0.0 or test_split >= 1.0: - start_event = 0 + raise ValueError("Unknown nu specification {}".format(nu)) + + return nu_out + + @staticmethod + def _build_sets(thetas, nus): + if len(nus) != len(thetas): + raise RuntimeError("Mismatching thetas and nus: {} vs {}".format(len(thetas), len(nus))) + + n_sets = max([len(param) for param in thetas + nus]) + sets = [[] for _ in range(n_sets)] + + for (theta, nu) in zip(thetas, nus): + n_theta_sets_before = len(theta) + n_nu_sets_before = len(nu) + + for i_set in range(n_sets): + sets[i_set].append((theta[i_set % n_theta_sets_before], nu[i_set % n_nu_sets_before])) + + return sets + + +def combine_and_shuffle(input_filenames, output_filename, k_factors=None, overwrite_existing_file=True): + """ + Combines multiple MadMiner files into one, and shuffles the order of the events. + + Note that this function assumes that all samples are generated with the same setup, including identical benchmarks + (and thus morphing setup). If it is used with samples with different settings, there will be wrong results! + There are no explicit cross checks in place yet! + + Parameters + ---------- + input_filenames : list of str + List of paths to the input MadMiner files. + + output_filename : str + Path to the combined MadMiner file. + + k_factors : float or list of float, optional + Multiplies the weights in input_filenames with a universal factor (if k_factors is a float) or with independent + factors (if it is a list of float). Default value: None. + + overwrite_existing_file : bool, optional + If True and if the output file exists, it is overwritten. Default value: True. + + Returns + ------- + None + + """ + + logger.debug("Combining and shuffling samples") + + if len(input_filenames) > 1: + logger.warning( + "Careful: this tool assumes that all samples are generated with the same setup, including" + " identical benchmarks (and thus morphing setup). If it is used with samples with different" + " settings, there will be wrong results! There are no explicit cross checks in place yet." + ) + + # k factors + if k_factors is None: + k_factors = [1.0 for _ in input_filenames] + elif isinstance(k_factors, float): + k_factors = [k_factors for _ in input_filenames] + + # Copy first file to output_filename + logger.info("Copying setup from %s to %s", input_filenames[0], output_filename) + + # TODO: More memory efficient strategy + + # Load events + all_observations = None + all_weights = None + + for i, (filename, k_factor) in enumerate(zip(input_filenames, k_factors)): + logger.info( + "Loading samples from file %s / %s at %s, multiplying weights with k factor %s", + i + 1, + len(input_filenames), + filename, + k_factor, + ) + + for observations, weights in madminer_event_loader(filename): + if all_observations is None: + all_observations = observations + all_weights = k_factor * weights else: - start_event = int(round((1.0 - test_split) * self.n_samples, 0)) + 1 - if start_event < 0 or start_event > self.n_samples: - raise ValueError("Irregular train / test split: sample {} / {}", start_event, self.n_samples) + all_observations = np.vstack((all_observations, observations)) + all_weights = np.vstack((all_weights, k_factor * weights)) + + # Shuffle + all_observations, all_weights = shuffle(all_observations, all_weights) + + # Save result + save_preformatted_events_to_madminer_file( + filename=output_filename, + observations=all_observations, + weights=all_weights, + copy_setup_from=input_filenames[0], + overwrite_existing_samples=overwrite_existing_file, + ) + + +def benchmark(benchmark_name): + """ + Utility function to be used as input to various SampleAugmenter functions, specifying a single parameter benchmark. + + Parameters + ---------- + benchmark_name : str + Name of the benchmark (as in `madminer.core.MadMiner.add_benchmark`) + + + Returns + ------- + output : tuple + Input to various SampleAugmenter functions - end_event = None + """ + return "benchmark", benchmark_name + + +def benchmarks(benchmark_names): + """ + Utility function to be used as input to various SampleAugmenter functions, specifying multiple parameter benchmarks. + + Parameters + ---------- + benchmark_names : list of str + List of names of the benchmarks (as in `madminer.core.MadMiner.add_benchmark`) + + + Returns + ------- + output : tuple + Input to various SampleAugmenter functions + + """ + return "benchmarks", benchmark_names - return start_event, end_event + +def morphing_point(theta): + """ + Utility function to be used as input to various SampleAugmenter functions, specifying a single parameter point theta + in a morphing setup. + + Parameters + ---------- + theta : ndarray or list + Parameter point with shape `(n_parameters,)` + + Returns + ------- + output : tuple + Input to various SampleAugmenter functions + + """ + return "morphing_point", np.asarray(theta) + + +def morphing_points(thetas): + """ + Utility function to be used as input to various SampleAugmenter functions, specifying multiple parameter points + theta in a morphing setup. + + Parameters + ---------- + thetas : ndarray or list of lists or list of ndarrays + Parameter points with shape `(n_thetas, n_parameters)` + + Returns + ------- + output : tuple + Input to various SampleAugmenter functions + + """ + return "morphing_points", [np.asarray(theta) for theta in thetas] + + +def random_morphing_points(n_thetas, priors): + """ + Utility function to be used as input to various SampleAugmenter functions, specifying random parameter points + sampled from a prior in a morphing setup. + + Parameters + ---------- + n_thetas : int + Number of parameter points to be sampled + + priors : list of tuples + Priors for each parameter is characterized by a tuple of the form `(prior_shape, prior_param_0, prior_param_1)`. + Currently, the supported prior_shapes are `flat`, in which case the two other parameters are the lower and upper + bound of the flat prior, and `gaussian`, in which case they are the mean and standard deviation of a Gaussian. + + Returns + ------- + output : tuple + Input to various SampleAugmenter functions + + """ + return "random_morphing_points", (n_thetas, priors) + + +def iid_nuisance_parameters(shape="gaussian", param0=0.0, param1=1.0): + """ + Utility function to be used as input to various SampleAugmenter functions, specifying that nuisance parameters are + fixed at their nominal valuees. + + Parameters + ---------- + prior : tuple + Prior for all nuisance parameters with form `(prior_shape, prior_param_0, prior_param_1)`. + Currently, the supported prior_shapes are `flat`, in which case the two other parameters are the lower and upper + bound of the flat prior, and `gaussian`, in which case they are the mean and standard deviation of a Gaussian. + + Returns + ------- + output : tuple + Input to various SampleAugmenter functions + + """ + return "iid", (shape, param0, param1) + + +def nominal_nuisance_parameters(): + """ + Utility function to be used as input to various SampleAugmenter functions, specifying that nuisance parameters are + fixed at their nominal valuees. + + + Returns + ------- + output : tuple + Input to various SampleAugmenter functions + + """ + return "nominal", None diff --git a/madminer/utils/analysis.py b/madminer/utils/analysis.py deleted file mode 100644 index b8a7cc763..000000000 --- a/madminer/utils/analysis.py +++ /dev/null @@ -1,207 +0,0 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - -import numpy as np -import six -import logging - -logger = logging.getLogger(__name__) - - -def get_theta_value(theta_type, theta_value, benchmarks): - if theta_type == "benchmark": - benchmark = benchmarks[theta_value] - benchmark_theta = np.array([benchmark[key] for key in benchmark]) - return benchmark_theta - - elif theta_type == "morphing": - return theta_value - - else: - raise ValueError("Unknown theta {}".format(theta_type)) - - -def get_theta_benchmark_matrix(theta_type, theta_value, benchmarks, morpher=None): - """Calculates vector A such that dsigma(theta) = A * dsigma_benchmarks""" - - if theta_type == "benchmark": - n_benchmarks = len(benchmarks) - index = list(benchmarks).index(theta_value) - theta_matrix = np.zeros(n_benchmarks) - theta_matrix[index] = 1.0 - - elif theta_type == "morphing": - theta_matrix = morpher.calculate_morphing_weights(theta_value) - - else: - raise ValueError("Unknown theta {}".format(theta_type)) - - return theta_matrix - - -def get_dtheta_benchmark_matrix(theta_type, theta_value, benchmarks, morpher=None): - """Calculates matrix A_ij such that d dsigma(theta) / d theta_i = A_ij * dsigma (benchmark j)""" - - if theta_type == "benchmark": - if morpher is None: - raise RuntimeError("Cannot calculate score without morphing") - - theta = benchmarks[theta_value] - theta = np.array([value for _, value in six.iteritems(theta)]) - - return get_dtheta_benchmark_matrix("morphing", theta, benchmarks, morpher) - - elif theta_type == "morphing": - if morpher is None: - raise RuntimeError("Cannot calculate score without morphing") - - dtheta_matrix = morpher.calculate_morphing_weight_gradient( - theta_value - ) # Shape (n_parameters, n_benchmarks_phys) - - else: - raise ValueError("Unknown theta {}".format(theta_type)) - - return dtheta_matrix - - -def calculate_augmented_data( - augmented_data_definitions, - weights_benchmarks, - xsecs_benchmarks, - theta_matrices, - theta_gradient_matrices, - nuisance_morpher=None, -): - """Extracts augmented data from benchmark weights""" - - augmented_data = [] - - for definition in augmented_data_definitions: - - if definition[0] == "ratio": - i_num = definition[1] - i_den = definition[2] - - dsigma_num = mdot(theta_matrices[i_num], weights_benchmarks) - sigma_num = mdot(theta_matrices[i_num], xsecs_benchmarks) - dsigma_den = mdot(theta_matrices[i_den], weights_benchmarks) - sigma_den = mdot(theta_matrices[i_den], xsecs_benchmarks) - - ratio = (dsigma_num / sigma_num) / (dsigma_den / sigma_den) - ratio = ratio.reshape((-1, 1)) - - augmented_data.append(ratio) - - elif definition[0] == "score": - i = definition[1] - - gradient_dsigma = mdot(theta_gradient_matrices[i], weights_benchmarks) # (n_gradients, n_samples) - gradient_sigma = mdot(theta_gradient_matrices[i], xsecs_benchmarks) # (n_gradients,) - - dsigma = mdot(theta_matrices[i], weights_benchmarks) # (n_samples,) - sigma = mdot(theta_matrices[i], xsecs_benchmarks) # scalar - - score = gradient_dsigma / dsigma # (n_gradients, n_samples) - score = score.T # (n_samples, n_gradients) - score = score - np.broadcast_to(gradient_sigma / sigma, score.shape) # (n_samples, n_gradients) - - augmented_data.append(score) - - elif definition[0] == "nuisance_score": - a_weights = nuisance_morpher.calculate_a(weights_benchmarks) - a_xsec = nuisance_morpher.calculate_a(xsecs_benchmarks[np.newaxis, :]) - - nuisance_score = a_weights - a_xsec # Shape (n_nuisance_parameters, n_samples) - nuisance_score = nuisance_score.T # Shape (n_samples, n_nuisance_parameters) - - logger.debug("Nuisance score: shape %s, content %s", nuisance_score.shape, nuisance_score) - - augmented_data.append(nuisance_score) - - else: - raise ValueError("Unknown augmented data type {}".format(definition[0])) - - return augmented_data - - -def parse_theta(theta, n_samples): - theta_type_in = theta[0] - theta_value_in = theta[1] - - if theta_type_in == "benchmark": - theta_types = ["benchmark"] - theta_values = [theta_value_in] - n_samples_per_theta = n_samples - - elif theta_type_in == "benchmarks": - n_benchmarks = len(theta_value_in) - theta_types = ["benchmark"] * n_benchmarks - theta_values = theta_value_in - n_samples_per_theta = int(round(n_samples / n_benchmarks, 0)) - - elif theta_type_in == "theta": - theta_types = ["morphing"] - theta_values = [theta_value_in] - n_samples_per_theta = n_samples - - elif theta_type_in == "thetas": - n_benchmarks = len(theta_value_in) - theta_types = ["morphing"] * n_benchmarks - theta_values = theta_value_in - n_samples_per_theta = int(round(n_samples / n_benchmarks, 0)) - - elif theta_type_in == "random": - n_benchmarks, priors = theta_value_in - - if n_benchmarks is None or n_benchmarks <= 0: - n_benchmarks = n_samples - - theta_values = [] - for prior in priors: - if prior[0] == "flat": - prior_min = prior[1] - prior_max = prior[2] - theta_values.append(prior_min + (prior_max - prior_min) * np.random.rand(n_benchmarks)) - - elif prior[0] == "gaussian": - prior_mean = prior[1] - prior_std = prior[2] - theta_values.append(np.random.normal(loc=prior_mean, scale=prior_std, size=n_benchmarks)) - - else: - raise ValueError("Unknown prior {}".format(prior)) - - theta_types = ["morphing"] * n_benchmarks - theta_values = np.array(theta_values).T - n_samples_per_theta = int(round(n_samples / n_benchmarks, 0)) - - logger.debug( - "Total n_samples: %s, n_benchmarks_phys: %s, n_samples_per_theta: %s", - n_samples, - n_benchmarks, - n_samples_per_theta, - ) - - else: - raise ValueError("Unknown theta {}".format(theta)) - - return theta_types, theta_values, n_samples_per_theta - - -def mdot(matrix, benchmark_information): - """ Calculates a product between a matrix with shape (a, n1) and a weight list with shape (?, n2) with n1 <= n2 """ - - n_benchmarks_matrix = matrix.shape[-1] - weights_benchmarks_T = benchmark_information.T - n_benchmarks_list = weights_benchmarks_T.shape[0] - n_smaller = min(n_benchmarks_matrix, n_benchmarks_list) - - if n_benchmarks_matrix == n_benchmarks_list: - return matrix.dot(weights_benchmarks_T) - - if n_benchmarks_matrix < n_benchmarks_list: - matrix = matrix.T - matrix = matrix[:n_smaller] - matrix = matrix.T - - return matrix.dot(weights_benchmarks_T[:n_smaller]) diff --git a/madminer/utils/histo.py b/madminer/utils/histo.py new file mode 100644 index 000000000..938052000 --- /dev/null +++ b/madminer/utils/histo.py @@ -0,0 +1,193 @@ +from __future__ import absolute_import, division, print_function, unicode_literals + +import numpy as np +import logging + +logger = logging.getLogger(__name__) + + +class Histo: + def __init__(self, n_bins_thetas, n_bins_x, separate_1d_histos=False): + self.n_bins_thetas = n_bins_thetas + self.n_bins_x = n_bins_x + self.separate_1d_x_histos = separate_1d_histos + + logger.debug("Initialized histogram with the following settings:") + logger.debug(" Bins per parameter: %s", self.n_bins_thetas) + logger.debug(" Bins per observable: %s", self.n_bins_x) + + # Not yet trained + self.n_parameters = None + self.n_observables = None + self.n_bins = None + self.edges = None + self.histos = None + + def _calculate_binning( + self, theta, x, observables=None, lower_cutoff_percentile=0.0, upper_cutoff_percentile=100.0 + ): + all_theta_x = np.hstack([theta, x]).T + + # Number of bins + n_samples = x.shape[0] + n_parameters = theta.shape[1] + n_all_observables = x.shape[1] + + # Observables to actually use + if observables is None: + observables = list(range(n_all_observables)) + + # Number of bins + all_n_bins_x = [1 for _ in range(n_all_observables)] + for i in observables: + all_n_bins_x[i] = self.n_bins_x + + if isinstance(self.n_bins_thetas, int): + all_n_bins_theta = [self.n_bins_thetas for _ in range(n_parameters)] + elif len(self.n_bins_thetas) == n_parameters: + all_n_bins_theta = self.n_bins_thetas + else: + raise RuntimeError( + "Inconsistent bin numbers for parameteers: {} vs {} parameters".format(self.n_bins_thetas, n_parameters) + ) + + all_n_bins = all_n_bins_theta + all_n_bins_x + + # Find edges based on percentiles + all_edges = [] + all_ranges = [] + + for i, (data, n_bins) in enumerate(zip(all_theta_x, all_n_bins)): + edges = np.percentile(data, np.linspace(lower_cutoff_percentile, upper_cutoff_percentile, n_bins + 1)) + range_ = (np.nanmin(data) - 0.01, np.nanmax(data) + 0.01) + edges[0], edges[-1] = range_ + + # Remove zero-width bins + widths = np.array(list(edges[1:] - edges[:-1]) + [1.0]) + edges = edges[widths > 1.0e-9] + + all_n_bins[i] = len(edges) - 1 + all_edges.append(edges) + all_ranges.append(range_) + + return all_n_bins, all_edges, all_ranges + + def fit(self, theta, x, fill_empty_bins=False): + + n_samples = x.shape[0] + self.n_parameters = theta.shape[1] + self.n_observables = x.shape[1] + assert theta.shape[0] == n_samples + + logger.debug("Filling histogram with settings:") + logger.debug(" Samples: %s", n_samples) + logger.debug(" Parameters: %s with means %s", self.n_parameters, np.mean(theta, axis=0)) + logger.debug(" Observables: %s with means %s", self.n_observables, np.mean(x, axis=0)) + logger.debug(" No empty bins: %s", fill_empty_bins) + + # Find bins + logger.debug("Calculating binning") + + self.n_bins = [] + self.edges = [] + ranges = [] + + if self.separate_1d_x_histos: + for observable in range(self.n_observables): + histo_n_bins, histo_edges, histo_ranges = self._calculate_binning(theta, x, [observable]) + + self.n_bins.append(histo_n_bins) + self.edges.append(histo_edges) + ranges.append(histo_ranges) + + else: + histo_n_bins, histo_edges, histo_ranges = self._calculate_binning(theta, x) + + self.n_bins.append(histo_n_bins) + self.edges.append(histo_edges) + ranges.append(histo_ranges) + + for h, (histo_n_bins, histo_edges, histo_ranges) in enumerate(zip(self.n_bins, self.edges, ranges)): + logger.debug("Histogram %s: bin edges", h + 1) + for i, (axis_bins, axis_edges, axis_range) in enumerate(zip(histo_n_bins, histo_edges, histo_ranges)): + if i < theta.shape[1]: + logger.debug(" theta %s: %s bins, range %s, edges %s", i + 1, axis_bins, axis_range, axis_edges) + else: + logger.debug( + " x %s: %s bins, range %s, edges %s", + i + 1 - theta.shape[1], + axis_bins, + axis_range, + axis_edges, + ) + + # Fill histograms + logger.debug("Filling histograms") + self.histos = [] + theta_x = np.hstack([theta, x]) + + for histo_edges, histo_ranges, histo_n_bins in zip(self.edges, ranges, self.n_bins): + histo, _ = np.histogramdd(theta_x, bins=histo_edges, range=histo_ranges, normed=False, weights=None) + + # Avoid empty bins + if fill_empty_bins: + histo[histo <= 1.0] = 1.0 + + # Calculate cell volumes + original_shape = tuple(histo_n_bins) + flat_shape = tuple([-1] + list(histo_n_bins[self.n_parameters :])) + + # Fix edges for bvolume calculation (to avoid larger volumes for more training data) + modified_histo_edges = [] + for i in range(x.shape[1]): + axis_edges = histo_edges[self.n_parameters + i] + axis_edges[0] = min(np.percentile(x[:, i], 5.0), axis_edges[1] - 0.01) + axis_edges[-1] = max(np.percentile(x[:, i], 95.0), axis_edges[-2] + 0.01) + modified_histo_edges.append(axis_edges) + + bin_widths = [axis_edges[1:] - axis_edges[:-1] for axis_edges in modified_histo_edges] + + volumes = np.ones(flat_shape[1:]) + for obs in range(self.n_observables): + # Broadcast bin widths to array with shape like volumes + bin_widths_broadcasted = np.ones(flat_shape[1:]) + for indices in np.ndindex(flat_shape[1:]): + bin_widths_broadcasted[indices] = bin_widths[obs][indices[obs]] + volumes[:] *= bin_widths_broadcasted + + # Normalize histograms (for each theta bin) + histo = histo.reshape(flat_shape) + + for i in range(histo.shape[0]): + histo[i] /= np.sum(histo[i]) + histo[i] /= volumes + + histo = histo.reshape(original_shape) + + # Avoid NaNs + histo[np.invert(np.isfinite(histo))] = 0.0 + + self.histos.append(histo) + + def log_likelihood(self, theta, x): + if len(theta.shape) == 1: + theta_ = np.broadcast_to(theta, (x.shape[0], theta.shape[0])) + else: + theta_ = theta + theta_x = np.hstack([theta_, x]) + + log_p = 0.0 + for histo, histo_edges, n_bins in zip(self.histos, self.edges, self.n_bins): + histo_indices = [] + + for j in range(theta_x.shape[1]): + indices = np.searchsorted(histo_edges[j], theta_x[:, j], side="right") - 1 + + indices[indices < 0] = 0 + indices[indices >= n_bins[j]] = n_bins[j] - 1 + + histo_indices.append(indices) + + log_p += np.log(histo[histo_indices]) + + return log_p diff --git a/madminer/utils/interfaces/delphes_root.py b/madminer/utils/interfaces/delphes_root.py index 325e67ff3..f8d15dfa8 100644 --- a/madminer/utils/interfaces/delphes_root.py +++ b/madminer/utils/interfaces/delphes_root.py @@ -247,7 +247,7 @@ def get_objects(ievent): def _get_n_events(tree): - es = tree.array("Particle.E") + es = tree.array("Event") n_events = len(es) return n_events diff --git a/madminer/utils/interfaces/lhe.py b/madminer/utils/interfaces/lhe.py index a6d610901..a474532a7 100644 --- a/madminer/utils/interfaces/lhe.py +++ b/madminer/utils/interfaces/lhe.py @@ -29,6 +29,8 @@ def parse_lhe_file( observables_defaults=None, cuts=None, cuts_default_pass=None, + efficiencies=None, + efficiencies_default_pass=None, benchmark_names=None, is_background=False, energy_resolutions=None, @@ -68,6 +70,12 @@ def parse_lhe_file( if cuts_default_pass is None: cuts_default_pass = {key: False for key in six.iterkeys(cuts)} + if efficiencies is None: + efficiencies = OrderedDict() + + if efficiencies_default_pass is None: + efficiencies_default_pass = {key: 1.0 for key in six.iterkeys(efficiencies)} + # Untar and open LHE file root, filename = _untar_and_parse_lhe_file(filename) @@ -126,6 +134,9 @@ def parse_lhe_file( pass_cuts = [0 for _ in cuts] fail_cuts = [0 for _ in cuts] + pass_efficiencies = [0 for _ in efficiencies] + fail_efficiencies = [0 for _ in efficiencies] + avg_efficiencies = [0 for _ in efficiencies] # Option one: XML parsing if parse_events_as_xml: @@ -150,7 +161,7 @@ def parse_lhe_file( if weight_names_all_events is None: weight_names_all_events = list(weights.keys()) - weights = list(weights.values()) + weights = np.array(list(weights.values())) # Apply smearing particles = _smear_particles( @@ -162,13 +173,14 @@ def parse_lhe_file( # Calculate observables observations = [] + pass_all_observation = True for obs_name, obs_definition in six.iteritems(observables): if isinstance(obs_definition, six.string_types): try: observations.append(eval(obs_definition, variables)) except (SyntaxError, NameError, TypeError, ZeroDivisionError, IndexError): if observables_required[obs_name]: - continue + pass_all_abservation = False default = observables_defaults[obs_name] if default is None: @@ -179,13 +191,16 @@ def parse_lhe_file( observations.append(obs_definition(particles)) except RuntimeError: if observables_required[obs_name]: - continue + pass_all_abservation = False default = observables_defaults[obs_name] if default is None: default = np.nan observations.append(default) + if not pass_all_observation: + continue + # Objects for cuts for obs_name, obs_value in zip(observables.keys(), observations): variables[obs_name] = obs_value @@ -211,6 +226,34 @@ def parse_lhe_file( if not pass_all_cuts: continue + # Apply efficiencies + pass_all_efficiencies = True + total_efficiency = 1.0 + for i_efficiency, (efficiency, default_pass) in enumerate(zip(efficiencies, efficiencies_default_pass)): + try: + efficiency_result = eval(efficiency, variables) + if efficiency_result > 0.0: + pass_efficiencies[i_efficiency] += 1 + total_efficiency *= efficiency_result + avg_efficiencies[i_efficiency] += efficiency_result + else: + fail_efficiencies[i_efficiency] += 1 + pass_all_efficiencies = False + + except (SyntaxError, NameError, TypeError, ZeroDivisionError, IndexError): + if default_pass > 0.0: + pass_efficiencies[i_efficiency] += 1 + total_efficiency *= default_pass + avg_efficiencies[i_efficiency] += default_pass + else: + fail_efficiencies[i_efficiency] += 1 + pass_all_efficiencies = False + + if pass_all_efficiencies: + weights *= total_efficiency + else: + continue + # Store results observations_all_events.append(observations) weights_all_events.append(weights) @@ -238,7 +281,7 @@ def parse_lhe_file( if weight_names_all_events is None: weight_names_all_events = list(weights.keys()) - weights = list(weights.values()) + weights = np.array(list(weights.values())) # Apply smearing particles = _smear_particles( @@ -250,13 +293,14 @@ def parse_lhe_file( # Calculate observables observations = [] + pass_all_observation = True for obs_name, obs_definition in six.iteritems(observables): if isinstance(obs_definition, six.string_types): try: observations.append(eval(obs_definition, variables)) except (SyntaxError, NameError, TypeError, ZeroDivisionError, IndexError): if observables_required[obs_name]: - continue + pass_all_observation = False default = observables_defaults[obs_name] if default is None: @@ -267,13 +311,16 @@ def parse_lhe_file( observations.append(obs_definition(particles)) except RuntimeError: if observables_required[obs_name]: - continue + pass_all_observation = False default = observables_defaults[obs_name] if default is None: default = np.nan observations.append(default) + if not pass_all_observation: + continue + # Objects for cuts for obs_name, obs_value in zip(observables.keys(), observations): variables[obs_name] = obs_value @@ -299,6 +346,34 @@ def parse_lhe_file( if not pass_all_cuts: continue + # Apply efficiencies + pass_all_efficiencies = True + total_efficiency = 1.0 + for i_efficiency, (efficiency, default_pass) in enumerate(zip(efficiencies, efficiencies_default_pass)): + try: + efficiency_result = eval(efficiency, variables) + if efficiency_result > 0.0: + pass_efficiencies[i_efficiency] += 1 + total_efficiency *= efficiency_result + avg_efficiencies[i_efficiency] += efficiency_result + else: + fail_efficiencies[i_efficiency] += 1 + pass_all_efficiencies = False + + except (SyntaxError, NameError, TypeError, ZeroDivisionError, IndexError): + if default_pass > 0.0: + pass_efficiencies[i_efficiency] += 1 + total_efficiency *= default_pass + avg_efficiencies[i_efficiency] += default_pass + else: + fail_efficiencies[i_efficiency] += 1 + pass_all_efficiencies = False + + if pass_all_efficiencies: + weights *= total_efficiency + else: + continue + # Store results observations_all_events.append(observations) weights_all_events.append(weights) @@ -306,9 +381,14 @@ def parse_lhe_file( # Check results for n_pass, n_fail, cut in zip(pass_cuts, fail_cuts, cuts): logger.debug(" %s / %s events pass cut %s", n_pass, n_pass + n_fail, cut) + for n_pass, n_fail, efficiency in zip(pass_efficiencies, fail_efficiencies, efficiencies): + logger.debug(" %s / %s events pass efficiency %s", n_pass, n_pass + n_fail, efficiency) + for n_eff, efficiency, n_pass, n_fail in zip(avg_efficiencies, efficiencies, pass_efficiencies, fail_efficiencies): + logger.debug(" average efficiency for %s is %s", efficiency, n_eff / (n_pass + n_fail)) + n_events_pass = len(observations_all_events) if len(cuts) > 0: - logger.info(" %s events pass all cuts", n_events_pass) + logger.info(" %s events pass all cuts/efficiencies", n_events_pass) if n_events_with_negative_weights > 0: logger.warning(" %s events contain negative weights", n_events_with_negative_weights) diff --git a/madminer/utils/interfaces/madminer_hdf5.py b/madminer/utils/interfaces/madminer_hdf5.py index 0f623f332..492e88751 100644 --- a/madminer/utils/interfaces/madminer_hdf5.py +++ b/madminer/utils/interfaces/madminer_hdf5.py @@ -243,6 +243,9 @@ def load_madminer_settings(filename, include_nuisance_benchmarks=False): def madminer_event_loader( filename, start=0, end=None, batch_size=100000, include_nuisance_parameters=True, benchmark_is_nuisance=None ): + if start is None: + start = 0 + # Nuisance parameter filtering if not include_nuisance_parameters: if benchmark_is_nuisance is None: diff --git a/madminer/utils/ml/eval.py b/madminer/utils/ml/eval.py index 6bc27e677..89d267e6d 100644 --- a/madminer/utils/ml/eval.py +++ b/madminer/utils/ml/eval.py @@ -5,7 +5,7 @@ import torch from torch import tensor -from madminer.utils.ml.models.ratio import ParameterizedRatioEstimator, DoublyParameterizedRatioEstimator +from madminer.utils.ml.models.ratio import DenseSingleParameterizedRatioModel, DenseDoublyParameterizedRatioModel logger = logging.getLogger(__name__) @@ -77,9 +77,9 @@ def evaluate_ratio_model( # Figure out method type if method_type is None: - if isinstance(model, ParameterizedRatioEstimator): + if isinstance(model, DenseSingleParameterizedRatioModel): method_type = "parameterized" - elif isinstance(model, DoublyParameterizedRatioEstimator): + elif isinstance(model, DenseDoublyParameterizedRatioModel): method_type = "doubly_parameterized" else: raise RuntimeError("Cannot infer method type automatically") @@ -111,7 +111,7 @@ def evaluate_ratio_model( if evaluate_score or return_grad_x: model.eval() - if method_type == "parameterized": + if method_type == "parameterized_ratio": if return_grad_x: s_hat, log_r_hat, t_hat0, x_gradients = model( theta0s, xs, return_grad_x=True, track_score=evaluate_score, create_gradient_graph=False @@ -120,7 +120,7 @@ def evaluate_ratio_model( s_hat, log_r_hat, t_hat0 = model(theta0s, xs, track_score=evaluate_score, create_gradient_graph=False) x_gradients = None t_hat1 = None - elif method_type == "doubly_parameterized": + elif method_type == "double_parameterized_ratio": if return_grad_x: s_hat, log_r_hat, t_hat0, t_hat1, x_gradients = model( theta0s, theta1s, xs, return_grad_x=True, track_score=evaluate_score, create_gradient_graph=False @@ -155,9 +155,9 @@ def evaluate_ratio_model( with torch.no_grad(): model.eval() - if method_type == "parameterized": + if method_type == "parameterized_ratio": s_hat, log_r_hat, _ = model(theta0s, xs, track_score=False, create_gradient_graph=False) - elif method_type == "doubly_parameterized": + elif method_type == "double_parameterized_ratio": s_hat, log_r_hat, _, _ = model(theta0s, theta1s, xs, track_score=False, create_gradient_graph=False) else: raise ValueError("Unknown method type %s", method_type) diff --git a/madminer/utils/ml/methods.py b/madminer/utils/ml/methods.py deleted file mode 100644 index 96cfce311..000000000 --- a/madminer/utils/ml/methods.py +++ /dev/null @@ -1,123 +0,0 @@ -from __future__ import absolute_import, division, print_function - -from collections import OrderedDict - -import madminer.utils.ml.losses -from madminer.utils.ml import losses -from madminer.utils.ml.trainer import SingleParameterizedRatioTrainer, DoubleParameterizedRatioTrainer -from madminer.utils.ml.trainer import FlowTrainer, LocalScoreTrainer - - -def get_method_type(method): - if method in ["carl", "rolr", "cascal", "rascal", "alice", "alices"]: - method_type = "parameterized" - elif method in ["carl2", "rolr2", "rascal2", "alice2", "alices2"]: - method_type = "doubly_parameterized" - elif method in ["sally", "sallino"]: - method_type = "local_score" - elif method in ["nde", "scandal"]: - method_type = "nde" - else: - raise RuntimeError("Unknown method {}".format(method)) - return method_type - - -def package_training_data(method, x, theta0, theta1, y, r_xz, t_xz0, t_xz1): - method_type = get_method_type(method) - data = OrderedDict() - if method_type == "parameterized": - data["x"] = x - data["theta"] = theta0 - data["y"] = y - if r_xz is not None: - data["r_xz"] = r_xz - if t_xz0 is not None: - data["t_xz"] = t_xz0 - elif method_type == "doubly_parameterized": - data["x"] = x - data["theta0"] = theta0 - data["theta1"] = theta1 - data["y"] = y - if r_xz is not None: - data["r_xz"] = r_xz - if t_xz0 is not None: - data["t_xz0"] = t_xz0 - if t_xz1 is not None: - data["t_xz1"] = t_xz1 - elif method_type == "local_score": - data["x"] = x - data["t_xz"] = t_xz0 - elif method_type == "nde": - data["x"] = x - data["theta"] = theta0 - if t_xz0 is not None: - data["t_xz"] = t_xz0 - return data - - -def get_trainer(method): - method_type = get_method_type(method) - if method_type == "parameterized": - return SingleParameterizedRatioTrainer - elif method_type == "doubly_parameterized": - return DoubleParameterizedRatioTrainer - elif method_type == "local_score": - return LocalScoreTrainer - elif method_type == "nde": - return FlowTrainer - else: - raise RuntimeError("Unknown method %s", method) - - -def get_loss(method, alpha): - if method in ["carl", "carl2"]: - loss_functions = [losses.ratio_xe] - loss_weights = [1.0] - loss_labels = ["xe"] - elif method in ["rolr", "rolr2"]: - loss_functions = [losses.ratio_mse] - loss_weights = [1.0] - loss_labels = ["mse_r"] - elif method == "cascal": - loss_functions = [losses.ratio_xe, losses.ratio_score_mse_num] - loss_weights = [1.0, alpha] - loss_labels = ["xe", "mse_score"] - elif method == "cascal2": - loss_functions = [losses.ratio_xe, losses.ratio_score_mse] - loss_weights = [1.0, alpha] - loss_labels = ["xe", "mse_score"] - elif method == "rascal": - loss_functions = [losses.ratio_mse, losses.ratio_score_mse_num] - loss_weights = [1.0, alpha] - loss_labels = ["mse_r", "mse_score"] - elif method == "rascal2": - loss_functions = [losses.ratio_mse, losses.ratio_score_mse] - loss_weights = [1.0, alpha] - loss_labels = ["mse_r", "mse_score"] - elif method in ["alice", "alice2"]: - loss_functions = [losses.ratio_augmented_xe] - loss_weights = [1.0] - loss_labels = ["improved_xe"] - elif method == "alices": - loss_functions = [losses.ratio_augmented_xe, losses.ratio_score_mse_num] - loss_weights = [1.0, alpha] - loss_labels = ["improved_xe", "mse_score"] - elif method == "alices2": - loss_functions = [losses.ratio_augmented_xe, losses.ratio_score_mse] - loss_weights = [1.0, alpha] - loss_labels = ["improved_xe", "mse_score"] - elif method in ["sally", "sallino"]: - loss_functions = [losses.local_score_mse] - loss_weights = [1.0] - loss_labels = ["mse_score"] - elif method == "nde": - loss_functions = [madminer.utils.ml.losses.flow_nll] - loss_weights = [1.0] - loss_labels = ["nll"] - elif method == "scandal": - loss_functions = [madminer.utils.ml.losses.flow_nll, madminer.utils.ml.losses.flow_score_mse] - loss_weights = [1.0, alpha] - loss_labels = ["nll", "mse_score"] - else: - raise NotImplementedError("Unknown method {}".format(method)) - return loss_functions, loss_labels, loss_weights diff --git a/madminer/utils/ml/models/ratio.py b/madminer/utils/ml/models/ratio.py index c77e146c5..e7a94e685 100644 --- a/madminer/utils/ml/models/ratio.py +++ b/madminer/utils/ml/models/ratio.py @@ -9,13 +9,13 @@ logger = logging.getLogger(__name__) -class ParameterizedRatioEstimator(nn.Module): +class DenseSingleParameterizedRatioModel(nn.Module): """ Module that implements agnostic parameterized likelihood estimators such as RASCAL or ALICES. Only the numerator of the ratio is parameterized. """ def __init__(self, n_observables, n_parameters, n_hidden, activation="tanh"): - super(ParameterizedRatioEstimator, self).__init__() + super(DenseSingleParameterizedRatioModel, self).__init__() # Save input self.n_hidden = n_hidden @@ -84,7 +84,7 @@ def forward(self, theta, x, track_score=True, return_grad_x=False, create_gradie return s_hat, log_r_hat, t_hat def to(self, *args, **kwargs): - self = super(ParameterizedRatioEstimator, self).to(*args, **kwargs) + self = super(DenseSingleParameterizedRatioModel, self).to(*args, **kwargs) for i, layer in enumerate(self.layers): self.layers[i] = layer.to(*args, **kwargs) @@ -92,13 +92,13 @@ def to(self, *args, **kwargs): return self -class DoublyParameterizedRatioEstimator(nn.Module): +class DenseDoublyParameterizedRatioModel(nn.Module): """ Module that implements agnostic parameterized likelihood estimators such as RASCAL or ALICES. Both numerator and denominator of the ratio are parameterized. """ def __init__(self, n_observables, n_parameters, n_hidden, activation="tanh"): - super(DoublyParameterizedRatioEstimator, self).__init__() + super(DenseDoublyParameterizedRatioModel, self).__init__() # Save input self.n_hidden = n_hidden @@ -189,7 +189,7 @@ def forward(self, theta0, theta1, x, track_score=True, return_grad_x=False, crea return s_hat, log_r_hat, t_hat0, t_hat1 def to(self, *args, **kwargs): - self = super(DoublyParameterizedRatioEstimator, self).to(*args, **kwargs) + self = super(DenseDoublyParameterizedRatioModel, self).to(*args, **kwargs) for i, layer in enumerate(self.layers): self.layers[i] = layer.to(*args, **kwargs) diff --git a/madminer/utils/ml/models/score.py b/madminer/utils/ml/models/score.py index 6aaa64381..f882db867 100644 --- a/madminer/utils/ml/models/score.py +++ b/madminer/utils/ml/models/score.py @@ -9,13 +9,13 @@ logger = logging.getLogger(__name__) -class LocalScoreEstimator(nn.Module): +class DenseLocalScoreModel(nn.Module): """Module that implements local score estimators for methods like SALLY and SALLINO, or the calculation of Fisher information matrices.""" def __init__(self, n_observables, n_parameters, n_hidden, activation="tanh"): - super(LocalScoreEstimator, self).__init__() + super(DenseLocalScoreModel, self).__init__() # Save input self.n_hidden = n_hidden @@ -57,7 +57,7 @@ def forward(self, x, return_grad_x=False): return t_hat def to(self, *args, **kwargs): - self = super(LocalScoreEstimator, self).to(*args, **kwargs) + self = super(DenseLocalScoreModel, self).to(*args, **kwargs) for i, layer in enumerate(self.layers): self.layers[i] = layer.to(*args, **kwargs) diff --git a/madminer/utils/ml/trainer.py b/madminer/utils/ml/trainer.py index 6f505671d..78e986559 100644 --- a/madminer/utils/ml/trainer.py +++ b/madminer/utils/ml/trainer.py @@ -17,6 +17,10 @@ class EarlyStoppingException(Exception): pass +class NanException(Exception): + pass + + class Trainer(object): """ Trainer class. Any subclass has to implement the forward_pass() function. """ @@ -100,11 +104,15 @@ def train( self.set_lr(opt, lr) logger.debug("Learning rate: %s", lr) - loss_train, loss_val, loss_contributions_train, loss_contributions_val = self.epoch( - i_epoch, data_labels, train_loader, val_loader, opt, loss_functions, loss_weights, clip_gradient - ) - losses_train.append(loss_train) - losses_val.append(loss_val) + try: + loss_train, loss_val, loss_contributions_train, loss_contributions_val = self.epoch( + i_epoch, data_labels, train_loader, val_loader, opt, loss_functions, loss_weights, clip_gradient + ) + losses_train.append(loss_train) + losses_val.append(loss_val) + except NanException: + logger.info("Ending training during epoch %s because NaNs appeared", i_epoch + 1) + break if early_stopping: try: @@ -112,7 +120,7 @@ def train( best_loss, best_model, best_epoch, loss_val, i_epoch, early_stopping_patience ) except EarlyStoppingException: - logger.debug("Early stopping: ending training after %s epochs", i_epoch + 1) + logger.info("Early stopping: ending training after %s epochs", i_epoch + 1) break verbose_epoch = (i_epoch + 1) % n_epochs_verbose == 0 @@ -126,7 +134,7 @@ def train( verbose=verbose_epoch, ) - if early_stopping: + if early_stopping and len(losses_val) > 0: self.wrap_up_early_stopping(best_model, losses_val[-1], best_loss, best_epoch) logger.debug("Training finished") @@ -185,6 +193,8 @@ def make_dataloaders(self, dataset, validation_split, batch_size): @staticmethod def calculate_lr(i_epoch, n_epochs, initial_lr, final_lr): + if n_epochs == 1: + return initial_lr return initial_lr * (final_lr / initial_lr) ** float(i_epoch / (n_epochs - 1.0)) @staticmethod @@ -347,6 +357,15 @@ def wrap_up_early_stopping(self, best_model, currrent_loss, best_loss, best_epoc else: logger.info("Early stopping did not improve performance") + @staticmethod + def _check_for_nans(label, *tensors): + for tensor in tensors: + if tensor is None: + continue + if torch.isnan(tensor).any(): + logger.warning("%s contains NaNs, aborting training! Data:\n%s", label, tensor) + raise NanException + class SingleParameterizedRatioTrainer(Trainer): def __init__(self, model, run_on_gpu=True, double_precision=False): @@ -392,10 +411,15 @@ def forward_pass(self, batch_data, loss_functions): t_xz = batch_data["t_xz"].to(self.device, self.dtype) except KeyError: t_xz = None + self._check_for_nans("Training data", theta, x, y) + self._check_for_nans("Augmented training data", r_xz, t_xz) s_hat, log_r_hat, t_hat = self.model(theta, x, track_score=self.calculate_model_score, return_grad_x=False) + self._check_for_nans("Model output", s_hat, log_r_hat, t_hat) losses = [loss_function(s_hat, log_r_hat, t_hat, None, y, r_xz, t_xz, None) for loss_function in loss_functions] + self._check_for_nans("Loss", *losses) + return losses @@ -448,14 +472,19 @@ def forward_pass(self, batch_data, loss_functions): t_xz1 = batch_data["t_xz1"].to(self.device, self.dtype) except KeyError: t_xz1 = None + self._check_for_nans("Training data", theta0, theta1, x, y) + self._check_for_nans("Augmented training data", r_xz, t_xz0, t_xz1) s_hat, log_r_hat, t_hat0, t_hat1 = self.model( theta0, theta1, x, track_score=self.calculate_model_score, return_grad_x=False ) + self._check_for_nans("Model output", s_hat, log_r_hat, t_hat0, t_hat1) losses = [ loss_function(s_hat, log_r_hat, t_hat0, t_hat1, y, r_xz, t_xz0, t_xz1) for loss_function in loss_functions ] + self._check_for_nans("Loss", *losses) + return losses @@ -472,10 +501,15 @@ def check_data(self, data): def forward_pass(self, batch_data, loss_functions): x = batch_data["x"].to(self.device, self.dtype) t_xz = batch_data["t_xz"].to(self.device, self.dtype) + self._check_for_nans("Training data", x) + self._check_for_nans("Augmented training data", t_xz) t_hat = self.model(x) + self._check_for_nans("Model output", t_hat) losses = [loss_function(t_hat, t_xz) for loss_function in loss_functions] + self._check_for_nans("Loss", *losses) + return losses @@ -518,12 +552,17 @@ def forward_pass(self, batch_data, loss_functions): t_xz = batch_data["t_xz"].to(self.device, self.dtype) except KeyError: t_xz = None + self._check_for_nans("Training data", theta, x) + self._check_for_nans("Augmented training data", t_xz) if self.calculate_model_score: _, log_likelihood, t_hat = self.model.log_likelihood_and_score(theta, x) else: _, log_likelihood = self.model.log_likelihood(theta, x) t_hat = None + self._check_for_nans("Model output", log_likelihood, t_hat) losses = [loss_function(log_likelihood, t_hat, t_xz) for loss_function in loss_functions] + self._check_for_nans("Loss", *losses) + return losses diff --git a/madminer/utils/ml/utils.py b/madminer/utils/ml/utils.py index 5ee948fbd..606c4652d 100644 --- a/madminer/utils/ml/utils.py +++ b/madminer/utils/ml/utils.py @@ -3,6 +3,10 @@ import numpy as np import torch import logging +from torch import optim + +import madminer.utils +from madminer.utils.ml import losses logger = logging.getLogger(__name__) @@ -78,3 +82,73 @@ def check_required_data(method, r_xz, t_xz0, t_xz1, theta0, theta1, x, y): if method in ["rascal2", "alices2"] and t_xz1 is None: data_is_there = False return data_is_there + + +def get_optimizer(optimizer, nesterov_momentum): + opt_kwargs = None + if optimizer == "adam": + opt = optim.Adam + elif optimizer == "amsgrad": + opt = optim.Adam + opt_kwargs = {"amsgrad": True} + elif optimizer == "sgd": + opt = optim.SGD + if nesterov_momentum is not None: + opt_kwargs = {"momentum": nesterov_momentum} + else: + raise ValueError("Unknown optimizer {}".format(optimizer)) + return opt, opt_kwargs + + +def get_loss(method, alpha): + if method in ["carl", "carl2"]: + loss_functions = [losses.ratio_xe] + loss_weights = [1.0] + loss_labels = ["xe"] + elif method in ["rolr", "rolr2"]: + loss_functions = [losses.ratio_mse] + loss_weights = [1.0] + loss_labels = ["mse_r"] + elif method == "cascal": + loss_functions = [losses.ratio_xe, losses.ratio_score_mse_num] + loss_weights = [1.0, alpha] + loss_labels = ["xe", "mse_score"] + elif method == "cascal2": + loss_functions = [losses.ratio_xe, losses.ratio_score_mse] + loss_weights = [1.0, alpha] + loss_labels = ["xe", "mse_score"] + elif method == "rascal": + loss_functions = [losses.ratio_mse, losses.ratio_score_mse_num] + loss_weights = [1.0, alpha] + loss_labels = ["mse_r", "mse_score"] + elif method == "rascal2": + loss_functions = [losses.ratio_mse, losses.ratio_score_mse] + loss_weights = [1.0, alpha] + loss_labels = ["mse_r", "mse_score"] + elif method in ["alice", "alice2"]: + loss_functions = [losses.ratio_augmented_xe] + loss_weights = [1.0] + loss_labels = ["improved_xe"] + elif method == "alices": + loss_functions = [losses.ratio_augmented_xe, losses.ratio_score_mse_num] + loss_weights = [1.0, alpha] + loss_labels = ["improved_xe", "mse_score"] + elif method == "alices2": + loss_functions = [losses.ratio_augmented_xe, losses.ratio_score_mse] + loss_weights = [1.0, alpha] + loss_labels = ["improved_xe", "mse_score"] + elif method in ["sally", "sallino"]: + loss_functions = [losses.local_score_mse] + loss_weights = [1.0] + loss_labels = ["mse_score"] + elif method == "nde": + loss_functions = [madminer.utils.ml.losses.flow_nll] + loss_weights = [1.0] + loss_labels = ["nll"] + elif method == "scandal": + loss_functions = [madminer.utils.ml.losses.flow_nll, madminer.utils.ml.losses.flow_score_mse] + loss_weights = [1.0, alpha] + loss_labels = ["nll", "mse_score"] + else: + raise NotImplementedError("Unknown method {}".format(method)) + return loss_functions, loss_labels, loss_weights diff --git a/madminer/morphing.py b/madminer/utils/morphing.py similarity index 88% rename from madminer/morphing.py rename to madminer/utils/morphing.py index b2b38688b..1d3c70bd0 100644 --- a/madminer/morphing.py +++ b/madminer/utils/morphing.py @@ -1,6 +1,6 @@ from __future__ import absolute_import, division, print_function, unicode_literals -import six +import six import logging import numpy as np from collections import OrderedDict @@ -11,14 +11,14 @@ logger = logging.getLogger(__name__) -class Morpher: +class PhysicsMorpher: """ Morphing functionality for theory parameters. Morphing is a technique that allows MadMax to infer the full probability distribution `p(x_i | theta)` for each simulated event `x_i` and any `theta`, not just the benchmarks. For a typical MadMiner application, it is not necessary to use the morphing classes directly. The other MadMiner classes use the morphing functions "under the hood" when needed. Only for an isolated study of the morphing setup - (e.g. to optimize the morphing basis), the Morpher class itself may be of interest. + (e.g. to optimize the morphing basis), the PhysicsMorpher class itself may be of interest. A typical morphing basis setup involves the following steps: @@ -360,7 +360,8 @@ def calculate_morphing_matrix(self, basis=None): if basis is None: raise RuntimeError( - "No basis defined or given. Use Morpher.set_basis(), Morpher.optimize_basis(), or the " "basis keyword." + "No basis defined or given. Use PhysicsMorpher.set_basis(), PhysicsMorpher.optimize_basis(), or the " + "basis keyword." ) n_benchmarks = len(basis) @@ -437,7 +438,8 @@ def calculate_morphing_weights(self, theta, basis=None, morphing_matrix=None): if basis is None: raise RuntimeError( - "No basis defined or given. Use Morpher.set_basis(), Morpher.optimize_basis(), or the " "basis keyword." + "No basis defined or given. Use PhysicsMorpher.set_basis(), PhysicsMorpher.optimize_basis(), or the " + "basis keyword." ) if morphing_matrix is None: @@ -497,7 +499,8 @@ def calculate_morphing_weight_gradient(self, theta, basis=None, morphing_matrix= if basis is None: raise RuntimeError( - "No basis defined or given. Use Morpher.set_basis(), Morpher.optimize_basis(), or the " "basis keyword." + "No basis defined or given. Use PhysicsMorpher.set_basis(), PhysicsMorpher.optimize_basis(), or the " + "basis keyword." ) if morphing_matrix is None: @@ -578,7 +581,8 @@ def evaluate_morphing(self, basis=None, morphing_matrix=None, n_test_thetas=100, if basis is None: raise RuntimeError( - "No basis defined or given. Use Morpher.set_basis(), Morpher.optimize_basis(), or the " "basis keyword." + "No basis defined or given. Use PhysicsMorpher.set_basis(), PhysicsMorpher.optimize_basis(), or the " + "basis keyword." ) if morphing_matrix is None: @@ -760,11 +764,82 @@ def calculate_nuisance_factors(self, nuisance_parameters, benchmark_weights): """ + if nuisance_parameters is None: + nuisance_parameters = np.zeros(self.n_nuisance_parameters) + a = self.calculate_a(benchmark_weights) # Shape (n_nuisance_parameters, n_events) b = self.calculate_b(benchmark_weights) # Shape (n_nuisance_parameters, n_events) exponent = np.sum(a * nuisance_parameters[:, np.newaxis] + b * nuisance_parameters[:, np.newaxis] ** 2, axis=0) - nuisance_factors = np.exp(exponent) return nuisance_factors + + def calculate_log_nuisance_factor_gradients(self, nuisance_parameters, benchmark_weights): + """ + Calculates the gradient of the log of the nuisance factors with respect to the nuisance parameters. + + Parameters + ---------- + nuisance_parameters : ndarray + Values of the nuisance parameters `nu`, with shape `(n_nuisance_parameters,)`. + + benchmark_weights : ndarray + Event weights `dsigma(x | theta_i, nu_i)` with shape `(n_events, n_benchmarks)`. The benchmarks are expected + to be sorted in the same order as the keyword benchmark_names used during initialization, and the + nuisance benchmarks are expected to be rescaled to have the same physics parameters theta as the + reference_benchmark given during initialization. + + Returns + ------- + log_nuisance_factor_gradients : ndarray + Log nuisance factor gradients `grad_nu log (dsigma(x | theta, nu) / dsigma(x | theta, 0))` with shape + `(n_parameters, n_events)`. + + """ + + if nuisance_parameters is None: + nuisance_parameters = np.zeros(self.n_nuisance_parameters) + + a = self.calculate_a(benchmark_weights) # Shape (n_nuisance_parameters, n_events) + b = self.calculate_b(benchmark_weights) # Shape (n_nuisance_parameters, n_events) + + log_gradients = a + 2.0 * b * nuisance_parameters[:, np.newaxis] + + return log_gradients + + def calculate_nuisance_factor_gradients(self, nuisance_parameters, benchmark_weights): + """ + Calculates the gradient of the nuisance factors with respect to the nuisance parameters. + + Parameters + ---------- + nuisance_parameters : ndarray + Values of the nuisance parameters `nu`, with shape `(n_nuisance_parameters,)`. + + benchmark_weights : ndarray + Event weights `dsigma(x | theta_i, nu_i)` with shape `(n_events, n_benchmarks)`. The benchmarks are expected + to be sorted in the same order as the keyword benchmark_names used during initialization, and the + nuisance benchmarks are expected to be rescaled to have the same physics parameters theta as the + reference_benchmark given during initialization. + + Returns + ------- + nuisance_factor_gradients : ndarray + Nuisance factor gradients `grad_nu (dsigma(x | theta, nu) / dsigma(x | theta, 0))` with shape + `(n_parameters, n_events)`. + + """ + + if nuisance_parameters is None: + nuisance_parameters = np.zeros(self.n_nuisance_parameters) + + a = self.calculate_a(benchmark_weights) # Shape (n_nuisance_parameters, n_events) + b = self.calculate_b(benchmark_weights) # Shape (n_nuisance_parameters, n_events) + + exponent = np.sum(a * nuisance_parameters[:, np.newaxis] + b * nuisance_parameters[:, np.newaxis] ** 2, axis=0) + nuisance_factors = np.exp(exponent) + log_gradients = a + 2.0 * b * nuisance_parameters[:, np.newaxis] + gradients = log_gradients * nuisance_factors[np.newaxis, :] + + return gradients diff --git a/madminer/utils/various.py b/madminer/utils/various.py index e436336f9..23f48c0ce 100644 --- a/madminer/utils/various.py +++ b/madminer/utils/various.py @@ -100,21 +100,20 @@ def shuffle(*arrays): def restrict_samplesize(n, *arrays): restricted_arrays = [] - for i, a in enumerate(arrays): if a is None: restricted_arrays.append(None) continue - restricted_arrays.append(a[:n]) return restricted_arrays -def balance_thetas(theta_sets_types, theta_sets_values): +def balance_thetas(theta_sets_types, theta_sets_values, n_sets=None): """Repeats theta values such that all thetas lists have the same length """ - n_sets = max([len(thetas) for thetas in theta_sets_types]) + if n_sets is None: + n_sets = max([len(thetas) for thetas in theta_sets_types]) for i, (types, values) in enumerate(zip(theta_sets_types, theta_sets_values)): assert len(types) == len(values) @@ -142,12 +141,14 @@ def load_and_check(filename, warning_threshold=1.0e9): if filename is None: return None - data = np.load(filename) + if not isinstance(filename, six.string_types): + data = filename + else: + data = np.load(filename) n_nans = np.sum(np.isnan(data)) n_infs = np.sum(np.isinf(data)) n_finite = np.sum(np.isfinite(data)) - if n_nans + n_infs > 0: logger.warning( "Warning: file %s contains %s NaNs and %s Infs, compared to %s finite numbers!", @@ -159,10 +160,12 @@ def load_and_check(filename, warning_threshold=1.0e9): smallest = np.nanmin(data) largest = np.nanmax(data) - if np.abs(smallest) > warning_threshold or np.abs(largest) > warning_threshold: logger.warning("Warning: file %s has some large numbers, rangin from %s to %s", filename, smallest, largest) + if len(data.shape) == 1: + data = data.reshape(-1, 1) + return data @@ -264,3 +267,48 @@ def weighted_quantile(values, quantiles, sample_weight=None, values_sorted=False def approx_equal(a, b, epsilon=1.0e-6): return abs(a - b) < epsilon + + +def separate_information_blocks(fisher_information, parameters_of_interest): + # Find indices + n_parameters = len(fisher_information) + n_poi = len(parameters_of_interest) + + poi_checked = [] + nuisance_params = [] + + for i in range(n_parameters): + if i in parameters_of_interest: + poi_checked.append(i) + else: + nuisance_params.append(i) + + assert n_poi == len(poi_checked), "Inconsistent input" + + # Separate Fisher information parts + information_phys = fisher_information[parameters_of_interest, :][:, parameters_of_interest] + information_mix = fisher_information[nuisance_params, :][:, parameters_of_interest] + information_nuisance = fisher_information[nuisance_params, :][:, nuisance_params] + + return nuisance_params, information_phys, information_mix, information_nuisance + + +def mdot(matrix, benchmark_information): + """ + Calculates a product between a matrix / matrices with shape (n1) or (a, n1) and a weight list with shape (b, n2) + or (n2,), where n1 and n2 do not have to be the same + """ + + n1 = matrix.shape[-1] + weights_t = benchmark_information.T + n2 = weights_t.shape[0] + n_smaller = min(n1, n2) + + if n1 > n2: + matrix = matrix.T + matrix = matrix[:n_smaller] + matrix = matrix.T + elif n2 > n1: + weights_t = weights_t[:n_smaller] + + return matrix.dot(weights_t) diff --git a/setup.py b/setup.py index e744f119c..4fda420b9 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ EMAIL = 'johann.brehmer@nyu.edu' AUTHOR = 'Johann Brehmer, Felix Kling, Irina Espejo, Kyle Cranmer' REQUIRES_PYTHON = '>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4' -VERSION = '0.2.8' +VERSION = '0.3.0' # What packages are required for this module to be executed? REQUIRED = [ diff --git a/tests/test_imports.py b/tests/test_imports.py index 972157e24..1d60b3ca2 100644 --- a/tests/test_imports.py +++ b/tests/test_imports.py @@ -1,13 +1,2 @@ -from madminer.core import MadMiner -from madminer.delphes import DelphesProcessor -from madminer.lhe import LHEProcessor -from madminer.ml import EnsembleForge, MLForge -from madminer.morphing import Morpher, NuisanceMorpher -from madminer.plotting import plot_2d_morphing_basis, plot_distribution_of_information, plot_distributions -from madminer.plotting import plot_fisher_information_contours_2d, plot_fisherinfo_barplot -from madminer.plotting import plot_nd_morphing_basis_scatter, plot_2d_morphing_basis -from madminer.sampling import SampleAugmenter - - def test_imports(): assert True diff --git a/tests/test_nuisance.py b/tests/test_nuisance.py index a8208d005..c3127e09a 100644 --- a/tests/test_nuisance.py +++ b/tests/test_nuisance.py @@ -5,7 +5,7 @@ from collections import OrderedDict from madminer.core import MadMiner -from madminer.lhe import LHEProcessor +from madminer.lhe import LHEReader from madminer.fisherinformation import FisherInformation, profile_information @@ -25,7 +25,7 @@ def theta_limit_madminer(xsec=0.001, lumi=1000000.0, effect_phys=0.1, effect_sys miner.save(".data.h5") # Set up observations - proc = LHEProcessor(".data.h5") + proc = LHEReader(".data.h5") proc.add_observable("x", "no one cares") proc.reference_benchmark = "benchmark_0" proc.nuisance_parameters = OrderedDict() diff --git a/tests/test_toy_workflow.py b/tests/test_toy_workflow.py index 32ce4b499..e3a4b46dc 100644 --- a/tests/test_toy_workflow.py +++ b/tests/test_toy_workflow.py @@ -1,11 +1,10 @@ from __future__ import absolute_import, division, print_function, unicode_literals import os -import logging import numpy as np from scipy.stats import norm -from madminer.ml import MLForge +from madminer.ml import ParameterizedRatioEstimator if not os.path.exists("tests/data"): os.makedirs("tests/data") @@ -77,18 +76,16 @@ def run_test(): np.save("tests/data/t_xz_train.npy", t_xz_train) # Train model - forge = MLForge() - - forge.train( + estimator = ParameterizedRatioEstimator(n_hidden=(20, 20)) + estimator.train( method="alices", - x_filename="tests/data/x_train.npy", - y_filename="tests/data/y_train.npy", - theta0_filename="tests/data/theta0_train.npy", - r_xz_filename="tests/data/r_xz_train.npy", - t_xz0_filename="tests/data/t_xz_train.npy", + x="tests/data/x_train.npy", + y="tests/data/y_train.npy", + theta="tests/data/theta0_train.npy", + r_xz="tests/data/r_xz_train.npy", + t_xz="tests/data/t_xz_train.npy", alpha=0.1, n_epochs=10, - n_hidden=(20, 20), validation_split=None, batch_size=256, ) @@ -112,8 +109,8 @@ def run_test(): log_r_test_true = np.array(log_r_test_true) # Evaluation - log_r_tests_alices, _, _ = forge.evaluate( - theta0_filename="tests/data/theta_grid.npy", x="tests/data/x_test.npy", evaluate_score=False + log_r_tests_alices, _ = estimator.evaluate( + theta="tests/data/theta_grid.npy", x="tests/data/x_test.npy", evaluate_score=False ) # Calculate error