About_parallel_computations.html



<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">
  <script type="text/javascript">

      var _gaq = _gaq || [];
      _gaq.push(['_setAccount', 'UA-38514290-2']);
      _gaq.push(['_trackPageview']);

      (function() {
        var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
        ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
        var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
      })();
    </script>
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>About parallel computations &mdash; SMPyBandits 0.9.6 documentation</title>
  

  <script type="text/javascript" src="_static/js/modernizr.min.js"></script>
  
    
      <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
        <script type="text/javascript" src="_static/jquery.js"></script>
        <script type="text/javascript" src="_static/underscore.js"></script>
        <script type="text/javascript" src="_static/doctools.js"></script>
        <script type="text/javascript" src="_static/language_data.js"></script>
        <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
        <script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
        <script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script>
    
    <script type="text/javascript" src="_static/js/theme.js"></script>

    
  <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
  <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
    <link rel="index" title="Index" href="genindex.html" />
    <link rel="search" title="Search" href="search.html" />
    <link rel="next" title="💥 TODO" href="TODO.html" />
    <link rel="prev" title="Short documentation of the API" href="API.html" /> 
</head>

<body class="wy-body-for-nav">

   
  <div class="wy-grid-for-nav">
    
    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search" >
          

            <a href="index.html" class="icon icon-home"> SMPyBandits
          

            <img src="_static/logo.png" class="logo" alt="Logo"/>
          
          </a>

          
              <div class="version">
                0.9
              </div>
            
          
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>

          
        </div>

        <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
          
            
              <p class="caption"><span class="caption-text">Contents:</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="README.html"><em>SMPyBandits</em></a></li>
<li class="toctree-l1"><a class="reference internal" href="docs/modules.html">SMPyBandits modules</a></li>
<li class="toctree-l1"><a class="reference internal" href="How_to_run_the_code.html">How to run the code ?</a></li>
<li class="toctree-l1"><a class="reference internal" href="PublicationsWithSMPyBandits.html">List of research publications using Lilian Besson’s SMPyBandits project</a></li>
<li class="toctree-l1"><a class="reference internal" href="Aggregation.html"><strong>Policy aggregation algorithms</strong></a></li>
<li class="toctree-l1"><a class="reference internal" href="MultiPlayers.html"><strong>Multi-players simulation environment</strong></a></li>
<li class="toctree-l1"><a class="reference internal" href="DoublingTrick.html"><strong>Doubling Trick for Multi-Armed Bandits</strong></a></li>
<li class="toctree-l1"><a class="reference internal" href="SparseBandits.html"><strong>Structure and Sparsity of Stochastic Multi-Armed Bandits</strong></a></li>
<li class="toctree-l1"><a class="reference internal" href="NonStationaryBandits.html"><strong>Non-Stationary Stochastic Multi-Armed Bandits</strong></a></li>
<li class="toctree-l1"><a class="reference internal" href="API.html">Short documentation of the API</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">About parallel computations</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#what-we-did-implement-joblib-for-multi-core-simulations">What we did implement: <code class="docutils literal notranslate"><span class="pre">Joblib</span></code> for multi-core simulations.</a></li>
<li class="toctree-l2"><a class="reference internal" href="#approaches-we-did-not-try">Approaches we did not try</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#gpu">GPU</a></li>
<li class="toctree-l3"><a class="reference internal" href="#large-scale-cluster">Large scale cluster</a></li>
<li class="toctree-l3"><a class="reference internal" href="#other-ideas">Other ideas?</a></li>
<li class="toctree-l3"><a class="reference internal" href="#scroll-license-github-license">📜 License ? </a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="TODO.html">💥 TODO</a></li>
<li class="toctree-l1"><a class="reference internal" href="plots/README.html">Some illustrations for this project</a></li>
<li class="toctree-l1"><a class="reference internal" href="notebooks/README.html">Jupyter Notebooks 📓 by Naereen &#64; GitHub</a></li>
<li class="toctree-l1"><a class="reference internal" href="notebooks/list.html">List of notebooks for SMPyBandits</a></li>
<li class="toctree-l1"><a class="reference internal" href="Profiling.html">A note on execution times, speed and profiling</a></li>
<li class="toctree-l1"><a class="reference internal" href="uml_diagrams/README.html">UML diagrams</a></li>
<li class="toctree-l1"><a class="reference internal" href="logs/README.html"><code class="docutils literal notranslate"><span class="pre">logs</span></code> files</a></li>
</ul>

            
        </div>
      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">

      
      <nav class="wy-nav-top" aria-label="top navigation">
        
          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
          <a href="index.html">SMPyBandits</a>
        
      </nav>


      <div class="wy-nav-content">
        
        <div class="rst-content">
        
          
<div role="navigation" aria-label="breadcrumbs navigation">

  <ul class="wy-breadcrumbs">
    
      <li><a href="index.html">Docs</a> &raquo;</li>
        
      <li>About parallel computations</li>
    
    
      <li class="wy-breadcrumbs-aside">
        
            
            <a href="_sources/About_parallel_computations.md.txt" rel="nofollow"> View page source</a>
          
        
      </li>
    
  </ul>

  
  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
            
  
<style>
/* CSS overrides for sphinx_rtd_theme */

/* 24px margin */
.nbinput.nblast,
.nboutput.nblast {
    margin-bottom: 19px;  /* padding has already 5px */
}

/* ... except between code cells! */
.nblast + .nbinput {
    margin-top: -19px;
}

.admonition > p:before {
    margin-right: 4px;  /* make room for the exclamation icon */
}

/* Fix math alignment, see https://github.com/rtfd/sphinx_rtd_theme/pull/686 */
.math {
    text-align: unset;
}
</style>
<div class="section" id="about-parallel-computations">
<h1>About parallel computations<a class="headerlink" href="#about-parallel-computations" title="Permalink to this headline">¶</a></h1>
<blockquote>
<div><p>This short page explains quickly we used multi-core computations to speed-up the simulations in SMPyBandits.</p>
</div></blockquote>
<p>Nowadays, parallelism is everywhere in the computational world, and any serious framework for numerical simulations must explore at least one of the three main approaches to (try to) gain performance from parallelism.</p>
<p>For all the different numerical simulations for which SMPyBandits is designed, the setting is the same: we consider a small set of p different problems, of time horizon T that we want to simulate for N independent runs (e.g., p=6, T=10000 and N=100).
On the first hand, because of the fundamentally sequential nature of bandit games, each repetition of the simulation must be sequential regarding the time steps t=1,…,T, and so no parallelism can be done to speed up this axis.
On the other hand, parallelism can help greatly for the two other axes: if we have a way to run in parallel 4 processes, and we have p=4 problems to simulate, then running a process for each problem directly brings a speed-up factor of 4.
Similarly, if we want to run 100 repetitions of the same (random) problem, and we can run 4 processes in parallel, then running 100/4=25 repetitions on each process also bring a speed-up factor of 4.</p>
<p>In this page, we quickly review the chosen approach for SMPyBandits (multi-core on one machine), and we explain why the two other approaches were less appropriate for our study of multi-armed bandit problems.</p>
<div class="section" id="what-we-did-implement-joblib-for-multi-core-simulations">
<h2>What we did implement: <code class="docutils literal notranslate"><span class="pre">Joblib</span></code> for multi-core simulations.<a class="headerlink" href="#what-we-did-implement-joblib-for-multi-core-simulations" title="Permalink to this headline">¶</a></h2>
<p>The first approach is to use multiple cores of the same machines, and because it is both the simplest and the less financially as well as ecologically costly, this is the approach implemented in SMPyBandits.
The machines I had access to during my thesis, either my own laptop or a workstation hosted the SCEE team in CentraleSupélec campus, were equipped with i5 or i7 Intel CPU with 4 or 12 cores.</p>
<p>As explained in the page <a class="reference external" href="https://smpybandits.github.io/How_to_run_the_code.html"><code class="docutils literal notranslate"><span class="pre">How_to_run_the_code.html</span></code></a>, we implemented in SMPyBandits an easy way to run any simulations on n cores of a machine, using the <a class="reference external" href="https://github.com/joblib/joblib"><code class="docutils literal notranslate"><span class="pre">Joblib</span></code></a> library.
It is implemented in a completely transparent way, and if someone uses the command-line variable to configure experiments, using one core or all the cores of the machine one changes <code class="docutils literal notranslate"><span class="pre">N_JOBS=1</span></code> to <code class="docutils literal notranslate"><span class="pre">N_JOBS=-1</span></code>, like in this example.</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span> <span class="nv">BAYES</span><span class="o">=</span>False <span class="nv">ARM_TYPE</span><span class="o">=</span>Bernoulli <span class="nv">N</span><span class="o">=</span><span class="m">100</span> <span class="nv">T</span><span class="o">=</span><span class="m">10000</span> <span class="nv">K</span><span class="o">=</span><span class="m">9</span> <span class="nv">N_JOBS</span><span class="o">=</span><span class="m">1</span> <span class="se">\</span>
  python3 main.py configuration.py
</pre></div>
</div>
<p>As long as the number of jobs (<code class="docutils literal notranslate"><span class="pre">N_JOBS</span></code> here) is less then or equal to the number of physical cores in the CPU of the computer, the final speed-up in terms of total computation runtime is almost optimal.</p>
<p>But jobs are implemented as threads, so the speed-up cannot be more than the number of cores, and using for instance 20 jobs on 4-cores for the 20 repetitions is sub-optimal, as the CPU will essentially spend all its time (and memory) managing the different jobs, and not actually doing the simulations.
Using the above example, we illustrate the effect of using multi-jobs and multi-cores on the time efficiency of simulations using SMPyBandits. We consider three values of <code class="docutils literal notranslate"><span class="pre">N_JOBS</span></code>, 1 to use only one core and one job, 4 to use all the 4 cores of my i5 Intel CPU, and 20 jobs.</p>
<p>We give in the Table below an example of running time of an experiment with T=1000, and different number of repetitions and number of jobs.
It clearly illustrates that using more jobs than the number of CPU is sub-optimal, and that as soon as the number of repetitions is large enough, using one job by available CPU core (\ie, here 4 jobs) gives a significant speed-up time.
Due to the cost of orchestrating the different jobs, and memory exchanges at the end of each repetition, the parallel version is \emph{not} 4 times faster, but empirically we always found it to be 2 to 3.5 times faster.</p>
<p>For a simulation with 9 different algorithms, for K=9 arms, a time horizon of T=10000,
we illustrate the effect on the running time of using <code class="docutils literal notranslate"><span class="pre">N_JOBS</span></code> jobs in parallel.
For different number of repetitions and different number of jobs <code class="docutils literal notranslate"><span class="pre">N_JOBS</span></code>, for 1, 4 (= nb cores), 20 (&gt; nb cores) jobs:</p>
<ul class="simple">
<li><p>1 repetition: 15 seconds, 26 seconds, 43 seconds</p></li>
<li><p>10 repetitions: 87 seconds, 51 seconds, 76 seconds</p></li>
<li><p>100 repetitions: 749 seconds, 272 seconds, 308 seconds</p></li>
<li><p>500 repetitions: 2944 seconds, 1530 seconds, 1846 seconds</p></li>
</ul>
<p><img alt="The table above shows the effect on the running time of using N_JOBS jobs in parallel, for a simulation with 9 different algorithms, for K=9 arms, a time horizon of T=10000." src="_images/About_parallel_computations.png" /></p>
</div>
<hr class="docutils" />
<div class="section" id="approaches-we-did-not-try">
<h2>Approaches we did not try<a class="headerlink" href="#approaches-we-did-not-try" title="Permalink to this headline">¶</a></h2>
<p>The two other approaches we could have consider is parallel computations running on not multiple cores but multiple machines, in a computer cluster, or parallel computations running in a Graphical Processing Unit (GPU).</p>
<div class="section" id="gpu">
<h3>GPU<a class="headerlink" href="#gpu" title="Permalink to this headline">¶</a></h3>
<p>I did not try to add in SMPyBandits the possibility to run simulations using a GPU, or any general purpose computation libraries offering a GPU-backend.
Initially designed for graphical simulations and mainly for video-games applications, the use of GPU for scientific computations have been gaining attention for numerical simulation in the research world since the last 15 years, and NVidia CUDA for GPGPU (General Purpose GPU) started to become popular in 2011.
Since 2016, we saw a large press coverage as well as an extensive use in research of deep learning libraries that make general-purpose machine learning algorithms train on the GPU of a user’s laptop or a cluster of GPU.
This success is mainly possible because of the heavy parallelism of such training algorithms, and the parallel nature of GPU.
To the best of the author knowledge, nobody has tried to implement high performance MAB simulations by using the “parallelism power” of a GPU (at least, no code for such experiments were made public in 2019).</p>
<p>I worked on a GPU, implementing fluid dynamic simulations in an internship in 2012, and I have since then kept a curiosity on how to use GPU-powered libraries and code.
I have contributed to and used famous deep-learning libraries, like <a class="reference external" href="http://deeplearning.net/software/theano">Theano</a> or <a class="reference external" href="https://keras.io/">Keras</a>, and my limited knowledge on such libraries made me believe that it was not easy to use a GPU for bandit simulations, and most surely it would not have been worth the time.</p>
<p>I would be very curious to understand how a GPU could be used to implement highly efficient simulations for sequential learning problems, because it seemed hard whenever I thought about it.</p>
</div>
<div class="section" id="large-scale-cluster">
<h3>Large scale cluster<a class="headerlink" href="#large-scale-cluster" title="Permalink to this headline">¶</a></h3>
<p>I also did not try to use any large scale computer cluster, even if I was aware of the possibility offered by the Grid 5000 project, for instance.
It is partly due to time constraint, as I would have been curious to try, but mainly because we found that it would not have helped us much to use a large scale cluster.
The main reason is that in the multi-armed bandit and sequential learning literature, most research papers do not even include an experimental section, and for the papers who did take the time to implement and test their proposed algorithms, it is almost done on just a few problems and for short- or medium- duration experiments.</p>
<p>For instance, the papers we consider to be the best ones regarding their empirical sections are <a class="reference external" href="https://arxiv.org/abs/1711.03539">Liu &amp; Lee &amp; Shroff, 2017, arXiv:1711.03539</a> and <a class="reference external" href="https://arxiv.org/abs/1802.03692">Cao &amp; Zhen &amp; Kveton &amp; Xie, 2018, arXiv:1802.03692</a>, for piece-wise stationary bandits, and they mainly consider reasonable problems of horizon T=10000 and no more than 1000 independent repetitions. Each paper considers one harder problem, of horizon T=1000000 and less repetitions.</p>
<p>In each article written during my thesis, we included extensive numerical simulations, and even the longest ones (for <a class="reference external" href="https://hal.archives-ouvertes.fr/hal-02006471">Besson &amp; Kaufmann, 2019, HAL-02006471</a>) were short enough to run in less than 12 hours on a 12-core workstation, so we could run a few large-scale simulations over night.
For such reasons, we prefer to not try to run simulations on a cluster.</p>
</div>
<div class="section" id="other-ideas">
<h3>Other ideas?<a class="headerlink" href="#other-ideas" title="Permalink to this headline">¶</a></h3>
<p>And you, dear reader, do you have any idea of a technology I should have tried?
If so, please <a class="reference external" href="https://github.com/SMPyBandits/SMPyBandits/issues/new">fill an issue</a> on GitHub! Thanks!</p>
</div>
<hr class="docutils" />
<div class="section" id="scroll-license-github-license">
<h3>📜 License ? <a class="reference external" href="https://github.com/SMPyBandits/SMPyBandits/blob/master/LICENSE"><img alt="GitHub license" src="https://img.shields.io/github/license/SMPyBandits/SMPyBandits.svg" /></a><a class="headerlink" href="#scroll-license-github-license" title="Permalink to this headline">¶</a></h3>
<p><a class="reference external" href="https://lbesson.mit-license.org/">MIT Licensed</a> (file <a class="reference external" href="LICENSE">LICENSE</a>).</p>
<p>© 2016-2018 <a class="reference external" href="https://GitHub.com/Naereen">Lilian Besson</a>.</p>
<p><a class="reference external" href="https://github.com/SMPyBandits/SMPyBandits/"><img alt="Open Source? Yes!" src="https://badgen.net/badge/Open%20Source%20%3F/Yes%21/blue?icon=github" /></a>
<a class="reference external" href="https://GitHub.com/SMPyBandits/SMPyBandits/graphs/commit-activity"><img alt="Maintenance" src="https://img.shields.io/badge/Maintained%3F-yes-green.svg" /></a>
<a class="reference external" href="https://GitHub.com/Naereen/ama"><img alt="Ask Me Anything !" src="https://img.shields.io/badge/Ask%20me-anything-1abc9c.svg" /></a>
<a class="reference external" href="https://GitHub.com/SMPyBandits/SMPyBandits/"><img alt="Analytics" src="https://ga-beacon.appspot.com/UA-38514290-17/github.com/SMPyBandits/SMPyBandits/README.md?pixel" /></a>
<img alt="https://pypi.org/project/SMPyBandits" src="https://pypi.org/project/SMPyBandits" /><img alt="PyPI version" src="https://img.shields.io/pypi/v/smpybandits.svg" />
<img alt="https://pypi.org/project/SMPyBandits" src="https://pypi.org/project/SMPyBandits" /><img alt="PyPI implementation" src="https://img.shields.io/pypi/implementation/smpybandits.svg" />
<a class="reference external" href="https://pypi.org/project/SMPyBandits"><img alt="https://pypi.org/project/SMPyBandits" src="https://pypi.org/project/SMPyBandits" /><img alt="PyPI pyversions" src="https://img.shields.io/pypi/pyversions/smpybandits.svg?logo=python" />
</a>
<a class="reference external" href="https://pypi.org/project/SMPyBandits"><img alt="https://pypi.org/project/SMPyBandits" src="https://pypi.org/project/SMPyBandits" /><img alt="PyPI download" src="https://img.shields.io/pypi/dm/smpybandits.svg" /></a>
<a class="reference external" href="https://pypi.org/project/SMPyBandits"><img alt="https://pypi.org/project/SMPyBandits" src="https://pypi.org/project/SMPyBandits" /><img alt="PyPI status" src="https://img.shields.io/pypi/status/smpybandits.svg" /></a>
<a class="reference external" href="https://SMPyBandits.ReadTheDocs.io/en/latest/?badge=latest"><img alt="Documentation Status" src="https://readthedocs.org/projects/smpybandits/badge/?version=latest" /></a>
<a class="reference external" href="https://travis-ci.org/SMPyBandits/SMPyBandits"><img alt="Build Status" src="https://travis-ci.org/SMPyBandits/SMPyBandits.svg?branch=master" /></a>
<a class="reference external" href="https://GitHub.com/SMPyBandits/SMPyBandits/stargazers"><img alt="Stars of https://github.com/SMPyBandits/SMPyBandits/" src="https://badgen.net/github/stars/SMPyBandits/SMPyBandits" /></a>
<a class="reference external" href="https://github.com/SMPyBandits/SMPyBandits/releases"><img alt="Releases of https://github.com/SMPyBandits/SMPyBandits/" src="https://badgen.net/github/release/SMPyBandits/SMPyBandits" /></a></p>
</div>
</div>
</div>


           </div>
           
          </div>
          <footer>
  
    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
      
        <a href="TODO.html" class="btn btn-neutral float-right" title="💥 TODO" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
      
      
        <a href="API.html" class="btn btn-neutral float-left" title="Short documentation of the API" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
      
    </div>
  

  <hr/>

  <div role="contentinfo">
    <p>
        &copy; Copyright 2016-2018, Lilian Besson (Naereen)
      <span class="lastupdated">
        Last updated on 25 Feb 2020, 14h.
      </span>

    </p>
  </div>
  Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. 

</footer>

        </div>
      </div>

    </section>

  </div>
  

  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script>

  
</body>
</html>