From f3b32a7c379cb668065084cd8e23223a96db9607 Mon Sep 17 00:00:00 2001 From: zingale Date: Tue, 30 Jan 2024 16:22:41 +0000 Subject: [PATCH] deploy: 8ab93c3d94f438edb5543a985cc7aec2c99bdf42 --- .../process.xrb | 8 ++--- _sources/nersc-hpss.rst.txt | 34 +++++++++++-------- nersc-hpss.html | 30 ++++++++++------ searchindex.js | 2 +- 4 files changed, 44 insertions(+), 30 deletions(-) diff --git a/_downloads/f1505febfbe5937d242281ec790ebc2d/process.xrb b/_downloads/f1505febfbe5937d242281ec790ebc2d/process.xrb index 46cb632..25972d9 100644 --- a/_downloads/f1505febfbe5937d242281ec790ebc2d/process.xrb +++ b/_downloads/f1505febfbe5937d242281ec790ebc2d/process.xrb @@ -17,7 +17,7 @@ work_dir=`pwd` HPSS_DIR=`basename $work_dir` # set HTAR command -HTAR=/usr/bin/htar +HTAR=htar # path to the ftime executable -- used for making a simple ftime.out file # listing the name of the plotfile and its simulation time @@ -229,12 +229,12 @@ function process_files datestr=$(date +"%Y%m%d_%H%M_%S") ftime_files=$(find . -maxdepth 1 -name "ftime.out" -print) inputs_files=$(find . -maxdepth 1 -name "inputs*" -print) -probin_files=$(find . -maxdepth 1 -name "probin*" -print) +diag_files=$(find . -maxdepth 1 -name "*diag.out" -print) model_files=$(find . -maxdepth 1 -name "*.hse.*" -print) -slurm_files=$(find . -maxdepth 1 -name "*.slurm" -print) +job_files=$(find . -maxdepth 1 -name "*.slurm" -print) $(find . -maxdepth 1 -name "*.submit" -print) process_files=$(find . -maxdepth 1 -name "process*" -print) -${HTAR} -cvf ${HPSS_DIR}/diag_files_${datestr}.tar ${model_files} ${ftime_files} ${inputs_files} ${probin_files} ${slurm_files} ${process_files} >> /dev/null +${HTAR} -cvf ${HPSS_DIR}/diag_files_${datestr}.tar ${model_files} ${ftime_files} ${inputs_files} ${probin_files} ${job_files} ${process_files} >> /dev/null # Loop, waiting for plt and chk directories to appear. diff --git a/_sources/nersc-hpss.rst.txt b/_sources/nersc-hpss.rst.txt index 1e1c15e..4a3dae1 100644 --- a/_sources/nersc-hpss.rst.txt +++ b/_sources/nersc-hpss.rst.txt @@ -67,21 +67,27 @@ The following describes how to use the scripts: overwriting the stored copy, especially if a purge took place. The same is done with checkpoint files. +Some additional notes: -Additionally, if the ``ftime`` executable is in your path -(``ftime.cpp`` lives in ``amrex/Tools/Plotfile/``), then -the script will create a file called ``ftime.out`` that lists the name -of the plotfile and the corresponding simulation time. - -Finally, right when the job is submitted, the script will tar up all -of the diagnostic files, ``ftime.out``, submission script, inputs and -probin, and archive them on HPSS. The .tar file is given a name that -contains the date-string to allow multiple archives to co-exist. When -``process.xrb`` is running, it creates a lockfile (called -``process.pid``) that ensures that only one instance of the script is -running at any one time. Sometimes if the machine crashes, the -``process.pid`` file will be left behind, in which case, the script -aborts. Just delete that if you know the script is not running. +* If the ``ftime`` executable is in your path (``ftime.cpp`` lives in + ``amrex/Tools/Plotfile/``), then the script will create a file + called ``ftime.out`` that lists the name of the plotfile and the + corresponding simulation time. + +* Right when the job is run, the script will tar up all of the + diagnostic files, ``ftime.out``, submission script, and inputs and + archive them on HPSS. The ``.tar`` file is given a name that contains + the date-string to allow multiple archives to co-exist. + +* When ``process.xrb`` is running, it creates a lockfile (called + ``process.pid``) that ensures that only one instance of the script + is running at any one time. + + .. warning:: + + Sometimes if the job is not terminated normally, the + ``process.pid`` file will be left behind, in which case, the script + aborts. Just delete that if you know the script is not running. Jobs in the xfer queue start up quickly. The best approach is to start one as you start your main job (or make it dependent on the main diff --git a/nersc-hpss.html b/nersc-hpss.html index 11fdbab..d236fbc 100644 --- a/nersc-hpss.html +++ b/nersc-hpss.html @@ -144,19 +144,27 @@

Archiving Data to HPSS -

Additionally, if the ftime executable is in your path -(ftime.cpp lives in amrex/Tools/Plotfile/), then -the script will create a file called ftime.out that lists the name -of the plotfile and the corresponding simulation time.

-

Finally, right when the job is submitted, the script will tar up all -of the diagnostic files, ftime.out, submission script, inputs and -probin, and archive them on HPSS. The .tar file is given a name that -contains the date-string to allow multiple archives to co-exist. When -process.xrb is running, it creates a lockfile (called -process.pid) that ensures that only one instance of the script is -running at any one time. Sometimes if the machine crashes, the +

Some additional notes:

+
    +
  • If the ftime executable is in your path (ftime.cpp lives in +amrex/Tools/Plotfile/), then the script will create a file +called ftime.out that lists the name of the plotfile and the +corresponding simulation time.

  • +
  • Right when the job is run, the script will tar up all of the +diagnostic files, ftime.out, submission script, and inputs and +archive them on HPSS. The .tar file is given a name that contains +the date-string to allow multiple archives to co-exist.

  • +
  • When process.xrb is running, it creates a lockfile (called +process.pid) that ensures that only one instance of the script +is running at any one time.

    +
    +

    Warning

    +

    Sometimes if the job is not terminated normally, the process.pid file will be left behind, in which case, the script aborts. Just delete that if you know the script is not running.

    +
    +
  • +

Jobs in the xfer queue start up quickly. The best approach is to start one as you start your main job (or make it dependent on the main job). The sample process.xrb script will wait for output and then diff --git a/searchindex.js b/searchindex.js index 84b26c5..7cb0241 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["alcf", "iacs", "index", "nersc", "nersc-compilers", "nersc-hpss", "nersc-visualization", "nersc-workflow", "olcf", "olcf-andes", "olcf-compilers", "olcf-jupyter", "olcf-workflow", "workstations"], "filenames": ["alcf.rst", "iacs.rst", "index.rst", "nersc.rst", "nersc-compilers.rst", "nersc-hpss.rst", "nersc-visualization.rst", "nersc-workflow.rst", "olcf.rst", "olcf-andes.rst", "olcf-compilers.rst", "olcf-jupyter.rst", "olcf-workflow.rst", "workstations.rst"], "titles": ["Working at ALCF", "Working at IACS", "AMReX Astrophysics Suite", "Working at NERSC", "Compiling at NERSC", "Archiving Data to HPSS", "Visualization at NERSC", "Managing Jobs at NERSC", "Working at OLCF", "Batch Visualization on Andes", "Compiling at OLCF", "Running Jupyter Remotely from OLCF", "Managing Jobs at OLCF", "Linux Workstations"], "terms": {"polari": 0, "ha": [0, 1, 5, 7, 11], "560": 0, "node": [0, 1, 6, 7, 9, 12], "each": [0, 1, 7, 12], "4": [0, 1, 4, 7, 10, 12, 13], "nvidia": [0, 7], "a100": [0, 7], "gpu": [0, 2, 7, 9, 10, 12], "The": [0, 1, 5, 6, 7, 10, 12], "pb": 0, "schedul": [0, 12], "i": [0, 1, 5, 6, 7, 9, 10, 11, 12], "us": [0, 1, 4, 5, 7, 9, 10, 11, 12, 13], "ssh": [0, 1, 4, 13], "ornl": [0, 9, 10, 12], "gov": [0, 4, 5, 6, 9, 10, 12], "To": [0, 1, 7, 11, 12, 13], "have": [0, 1, 7, 9, 10, 11, 12], "custom": 0, "bashrc": [0, 9], "creat": [0, 5, 6, 7, 8, 9, 12], "bash": [0, 1, 6, 7, 9, 12], "expert": 0, "file": [0, 1, 5, 7, 12], "add": [0, 1, 11, 12, 13], "anyth": 0, "thi": [0, 1, 5, 6, 7, 9, 10, 11, 12], "read": [0, 12], "end": [0, 7, 12], "etc": 0, "load": [0, 1, 4, 6, 7, 9, 10, 11, 12, 13], "modul": [0, 1, 4, 6, 7, 9, 10, 11, 12, 13], "swap": [0, 10], "prgenv": [0, 4, 7, 10, 12], "nvhpc": 0, "gnu": [0, 1, 4, 7, 10, 12, 13], "gcc": [0, 3, 10, 12, 13], "11": [0, 6, 10, 12, 13], "2": [0, 7, 9, 10, 12, 13], "0": [0, 1, 6, 7, 10, 11, 12], "version": [0, 1, 10, 12, 13], "sinc": [0, 5, 7, 10, 12], "cuda": [0, 3, 7, 10, 12, 13], "doesn": [0, 10, 12], "39": 0, "t": [0, 5, 6, 7, 9, 10, 12], "support": [0, 1, 9, 10, 12], "12": [0, 1], "yet": 0, "mix": [0, 10, 12], "Then": [0, 9, 10, 12], "you": [0, 1, 4, 5, 6, 7, 9, 10, 11, 12, 13], "can": [0, 1, 5, 6, 7, 9, 11, 12, 13], "via": [0, 1, 4, 7, 9, 10, 11, 12], "make": [0, 1, 4, 5, 9, 10, 11, 12, 13], "comp": [0, 1, 4, 10, 13], "use_cuda": [0, 4, 10, 13], "true": [0, 1, 4, 10, 12, 13], "project": [0, 1, 7, 9], "workspac": 0, "lu": 0, "grand": 0, "astroexplos": 0, "http": [0, 1, 2, 5, 6, 10, 12, 13], "www": [0, 1], "anl": 0, "user": 0, "guid": [0, 1, 12], "run": [0, 1, 2, 5, 6, 7, 8, 9, 10, 12, 13], "index": 0, "html": [0, 10, 12], "For": [0, 7, 9, 12], "product": [0, 12], "prod": 0, "smallest": [0, 12], "count": 0, "seem": [0, 1, 10], "allow": [0, 5, 7, 12], "10": [0, 10, 12, 13], "clone": [0, 9], "gettingstart": 0, "repo": [0, 9], "git": [0, 1, 9], "github": [0, 2, 4, 9, 12], "com": [0, 1, 2, 9, 12], "argonn": 0, "lcf": 0, "ll": [0, 9], "want": [0, 5, 7, 9, 12], "exampl": [0, 7, 10, 12], "affinity_gpu": 0, "particular": [0, 2, 6, 7, 12], "need": [0, 1, 5, 6, 7, 9, 10, 11, 12, 13], "script": [0, 5, 6, 7, 8, 9, 10], "set_affinity_gpu_polari": 0, "sh": [0, 7, 12], "copi": [0, 5], "your": [0, 5, 6, 7, 9, 10, 11, 12, 13], "directori": [0, 5, 7, 9, 12], "here": [0, 6, 7, 9, 12], "": [0, 7, 11, 12], "submiss": [0, 5, 6, 7, 9, 10, 12], "bin": [0, 6, 7, 9, 12], "l": [0, 7, 13], "select": [0, 7, 12], "system": [0, 7, 10, 12], "place": [0, 5], "scatter": 0, "walltim": [0, 12], "30": [0, 12], "00": [0, 6, 7, 9, 12], "q": [0, 6, 7, 12], "debug": [0, 7, 8], "A": [0, 6, 7, 9, 12], "exec": [0, 12], "castro2d": [0, 7, 12], "mpi": [0, 1, 4, 7, 12], "smplsdc": [0, 7], "ex": [0, 1, 7, 12], "input": [0, 1, 5, 7, 12], "inputs_2d": [0, 7], "n14": [0, 7], "coars": [0, 7], "enabl": [0, 4], "applic": [0, 12], "export": [0, 1, 6, 7, 12], "mpich_gpu_support_en": 0, "1": [0, 1, 6, 7, 9, 12], "chang": [0, 7, 12], "cd": [0, 7, 9], "pbs_o_workdir": 0, "openmp": [0, 1, 7, 10, 12], "set": [0, 1, 7, 12], "nnode": [0, 12], "wc": 0, "pbs_nodefil": 0, "nranks_per_nod": 0, "ndepth": 0, "8": [0, 6, 7, 12], "nthread": 0, "ntotrank": 0, "mpiexec": [0, 1], "bind": [0, 7, 12], "rank": 0, "n": [0, 1, 6, 7, 9, 11, 12, 13], "ppn": 0, "depth": 0, "cpu": [0, 6, 7, 12], "env": [0, 9], "omp_num_thread": [0, 1, 6, 7, 12], "omp_plac": [0, 6, 7], "thread": [0, 1, 7, 12], "do": [0, 1, 5, 6, 7, 9, 11, 12, 13], "qsub": 0, "check": [0, 7, 12], "statu": [0, 7, 8], "qstat": 0, "u": [0, 7, 12], "usernam": [0, 7, 12], "from": [0, 4, 5, 7, 8, 9, 12], "last": [0, 7, 12], "checkpoint": [0, 5, 7, 12], "6": [0, 7, 12], "j": [0, 1, 4, 6, 7, 9, 10, 12, 13], "eo": 0, "function": [0, 7, 12], "find_chk_fil": [0, 7, 12], "take": [0, 1, 7, 12], "singl": [0, 7, 12], "argument": [0, 7, 12], "wildcard": [0, 7, 12], "pattern": [0, 7, 12], "look": [0, 5, 7, 12], "through": [0, 7, 12], "chk": [0, 7, 12], "find": [0, 1, 7, 10, 12], "latest": [0, 1, 7, 12], "wai": [0, 6, 7, 12], "didn": [0, 7, 12], "complet": [0, 7, 12], "we": [0, 1, 4, 7, 9, 12, 13], "fall": [0, 7, 12], "back": [0, 7, 12], "previou": [0, 7, 12], "one": [0, 5, 7, 12], "temp_fil": [0, 7, 12], "maxdepth": [0, 7, 12], "name": [0, 5, 6, 7, 12], "print": [0, 7, 12], "sort": [0, 7, 12], "tail": [0, 7, 12], "restartfil": [0, 7, 12], "f": [0, 7, 12], "header": [0, 7, 12, 13], "thing": [0, 7, 12], "written": [0, 5, 7, 12], "updat": [0, 7, 12], "fi": [0, 7, 12], "done": [0, 1, 5, 7, 12], "7": [0, 4, 7, 9, 10, 11, 12, 13], "digit": [0, 7, 12], "5": [0, 1, 7, 10, 11, 12], "restartstr": [0, 7, 12], "empti": [0, 7, 12], "ar": [0, 1, 4, 5, 7, 10, 12], "found": [0, 7, 12], "e": [0, 4, 5, 7, 9, 11, 12], "new": [0, 7, 9, 11, 12], "els": [0, 7, 12], "amr": [0, 1, 7, 12], "chainqsub": 0, "echo": [0, 7, 12], "usag": [0, 12], "jobid": [0, 7], "number": [0, 1, 7, 12], "initi": [0, 7, 12], "depend": [0, 5, 7, 12], "exit": [0, 7, 12], "3": [0, 1, 4, 6, 7, 9, 10, 11, 12, 13], "oldjob": 0, "numjob": 0, "gt": 0, "20": [0, 7], "too": [0, 12], "mani": [0, 12], "request": [0, 8], "firstcount": 0, "eq": 0, "start": [0, 1, 5, 7, 12], "aout": 0, "sleep": 0, "seq": 0, "w": [0, 12], "afterani": 0, "48": 1, "comput": [1, 7, 12], "core": [1, 6, 7, 12], "group": 1, "pool": 1, "actual": [1, 5, 7], "13th": 1, "o": [1, 6, 7, 12], "stuff": 1, "so": [1, 7, 10, 12, 13], "an": [1, 7, 9, 10, 12, 13], "ideal": 1, "config": [1, 11], "would": [1, 7, 9, 12], "log": [1, 4, 10, 12], "login": 1, "stonybrook": 1, "edu": [1, 13], "tell": [1, 7, 12], "about": [1, 2, 12], "machin": [1, 2, 5, 8], "put": [1, 12], "follow": [1, 5, 6, 7, 9, 12], "local": [1, 13], "tool": [1, 5], "gnumak": 1, "raw": 1, "githubusercont": 1, "astro": [1, 10, 12, 13], "workflow": [1, 12], "main": [1, 5, 12], "job_script": [1, 12], "onli": [1, 5, 7, 12], "access": [1, 5, 7, 10, 12], "environ": [1, 6, 8, 9, 10, 12], "note": [1, 12], "srun": [1, 6, 7, 9, 12], "p": [1, 9, 12], "short": 1, "pty": 1, "interact": [1, 12], "slurm": [1, 5, 7, 9, 12], "job": [1, 3, 5, 8], "time": [1, 5, 7, 12], "out": [1, 5, 6, 7, 9, 12], "after": [1, 7, 12], "hour": [1, 12], "infinit": 1, "fj": 1, "debug1": 1, "debug2": 1, "them": [1, 5, 12], "There": [1, 7, 12, 13], "cce": 1, "sve": 1, "former": 1, "newer": 1, "llvm": 1, "base": [1, 7], "ocompil": 1, "fortran": 1, "doe": [1, 9, 10], "arm": 1, "architectur": [1, 8], "latter": 1, "older": [1, 13], "even": 1, "though": 1, "both": 1, "form": [1, 12], "x": [1, 12, 13], "thei": [1, 11, 12], "differ": [1, 7], "option": [1, 7, 10, 12], "see": [1, 7, 10, 12], "commcm": 1, "faq": 1, "get": [1, 12], "php": 1, "cpe": 1, "mvapich2_nogpu": 1, "should": [1, 4, 7, 10, 12], "test": [1, 4, 7, 10, 12], "mak": 1, "recogn": 1, "switch": 1, "old": 1, "flag": [1, 12], "build": [1, 2, 4, 10, 12, 13], "24": 1, "use_mpi": [1, 4, 10, 12, 13], "fals": [1, 4, 10, 12, 13], "long": [1, 5], "At": [1, 11], "moment": [1, 10], "link": 1, "cannot": 1, "nopattern": 1, "error": [1, 12], "which": [1, 5, 7, 9, 10, 11, 12], "why": 1, "comment": 1, "abov": [1, 12], "lustr": [1, 12], "global": 1, "softwar": 1, "a64fx": 1, "modulefil": 1, "mvapich2": 1, "use_omp": [1, 4, 10], "know": [1, 5], "chip": 1, "specif": [1, 9], "mv2_enable_affin": 1, "castro3d": [1, 12], "omp": [1, 7], "3d": [1, 12], "sph": [1, 12], "max_level": 1, "max_step": 1, "These": 2, "doc": [2, 5, 6, 10, 12], "provid": [2, 12], "inform": [2, 12], "code": [2, 9, 10, 12, 13], "nyx": 2, "maestroex": 2, "castro": [2, 4, 7, 10, 12], "variou": 2, "includ": [2, 7, 12], "hpc": [2, 12], "center": 2, "workstat": 2, "gener": [2, 12, 13], "work": [2, 9, 10, 12, 13], "alcf": 2, "nersc": [2, 5], "olcf": [2, 9], "iac": 2, "linux": 2, "compil": [3, 7, 8, 12, 13], "perlmutt": 3, "hypr": 3, "manag": [3, 8], "filesystem": [3, 5, 12], "chain": [3, 8], "archiv": [3, 8], "data": [3, 12], "hpss": [3, 8], "visual": [3, 8, 13], "p1": 4, "cudatoolkit": [4, 7], "python": [4, 6, 9, 10, 11, 12], "requir": [4, 11, 12], "process": [4, 5, 7, 12], "g": [4, 5, 11, 12], "sedov": 4, "hydro": 4, "problem": [4, 7, 12], "tiny_profil": 4, "obtain": [4, 7], "built": 4, "same": [4, 5, 7, 10, 12], "hypre_cuda_sm": 4, "80": [4, 12], "cxx": 4, "cc": 4, "fc": 4, "ftn": 4, "configur": 4, "prefix": 4, "path": [4, 5, 12, 13], "instal": [4, 6, 9, 11, 12], "unifi": 4, "memori": [4, 7, 9, 10, 12], "larg": [5, 9], "tape": 5, "librari": [5, 7, 10, 11], "store": [5, 12], "simul": [5, 12], "period": 5, "It": [5, 9, 12], "recommend": [5, 10], "move": [5, 12], "frequent": 5, "scratch": [5, 7, 12], "fill": 5, "up": [5, 7, 12], "purg": 5, "xfer": 5, "automat": [5, 12], "submit": [5, 7, 8, 9], "queue": [5, 7, 12], "xrb": [5, 9, 12], "continu": [5, 13], "output": [5, 12, 13], "describ": [5, 11, 12], "how": [5, 12], "plotfil": [5, 9, 12], "locat": [5, 7], "just": [5, 12, 13], "full": [5, 12], "call": [5, 9, 12], "pscratch": 5, "sd": 5, "z": 5, "zingal": 5, "wdconvect": 5, "hsi": [5, 12], "mkdir": 5, "wdconvect_run": 5, "If": [5, 7, 9, 11, 12], "command": [5, 12, 13], "prompt": [5, 13], "password": [5, 13], "talk": 5, "help": [5, 12], "desk": 5, "ask": [5, 12], "less": 5, "sbatch": [5, 6, 7, 9, 12], "background": [5, 7, 12], "wait": [5, 7, 12], "until": [5, 7, 12], "alwai": [5, 12], "leav": [5, 12], "most": [5, 7], "recent": [5, 6, 12], "alon": [5, 12], "mai": [5, 7, 12], "still": [5, 12], "htar": [5, 12], "wa": [5, 12], "success": 5, "subdirectori": [5, 12], "import": [5, 10, 12], "don": 5, "try": [5, 12], "second": [5, 7, 12], "overwrit": 5, "especi": 5, "took": 5, "addition": 5, "ftime": [5, 12], "execut": [5, 7, 12], "cpp": 5, "live": 5, "amrex": [5, 10, 12], "list": [5, 12], "correspond": [5, 12], "final": [5, 7, 9, 12], "right": 5, "when": [5, 9, 10, 11, 12], "tar": [5, 12], "all": [5, 7, 12], "diagnost": 5, "probin": [5, 12], "given": 5, "contain": [5, 7, 12], "date": 5, "string": 5, "multipl": [5, 7], "co": 5, "exist": [5, 7, 9, 12], "lockfil": 5, "pid": [5, 7], "ensur": [5, 12], "instanc": [5, 7], "ani": [5, 7, 12], "sometim": [5, 12], "crash": [5, 12], "left": [5, 7, 12], "behind": 5, "case": [5, 7, 12], "abort": [5, 12], "delet": 5, "quickli": 5, "best": [5, 6, 7, 9, 10, 11, 12], "approach": 5, "sampl": [5, 6, 12], "produc": [5, 12], "yt": [6, 9, 11], "setup": [6, 9, 10, 12], "own": 6, "conda": [6, 8, 9], "step": [6, 7, 12], "develop": 6, "languag": 6, "someth": 6, "like": [6, 7, 12], "init": [6, 9], "myenv": 6, "activ": [6, 9], "more": [6, 7, 9, 12], "c": [6, 7, 9, 10, 11, 12], "forg": [6, 9, 11], "deactiv": 6, "m3018": [6, 7], "vi": 6, "vis_": 6, "01": 6, "ntask": [6, 7, 12], "per": [6, 7, 12], "regular": [6, 7], "omp_proc_bind": [6, 7], "spread": [6, 7], "massive_star_multi": 6, "py": [6, 7, 9, 12], "plt19862": 6, "1536": 7, "therefor": [7, 12], "task": [7, 12], "otherwis": [7, 10], "fail": 7, "runtim": [7, 10, 12], "becaus": [7, 12], "below": [7, 12], "16": 7, "also": [7, 10, 11, 12], "restart": [7, 12], "logic": 7, "m3018_g": 7, "subch_": 7, "map_gpu": 7, "signal": [7, 12], "b": [7, 12], "urg": [7, 12], "castro_exec": 7, "clean": [7, 12], "over": [7, 12], "rm": [7, 12], "dump_and_stop": [7, 12], "send": [7, 12], "sigurg": [7, 12], "batch": [7, 8, 12], "minut": [7, 12], "befor": [7, 12], "limit": [7, 12], "gracefulli": [7, 12], "sig_handl": [7, 12], "touch": [7, 12], "disabl": [7, 12], "handler": [7, 12], "trap": [7, 12], "alloc": [7, 8], "soon": [7, 12], "dump": [7, 12], "stop": [7, 12], "workdir": 7, "basenam": 7, "slurm_submit_dir": [7, 9], "slack_job_start": 7, "michael": 7, "builtin": [7, 12], "shell": [7, 12], "handl": [7, 12], "64": 7, "ret": 7, "128": [7, 12], "23": 7, "receiv": [7, 12], "keep": [7, 12], "refer": 7, "distribut": [7, 11], "parallel": 7, "hyper": 7, "share": [7, 10], "socket": [7, 12], "256": [7, 12], "howev": [7, 12], "assign": 7, "physic": [7, 12], "detail": [7, 8], "instruct": 7, "within": 7, "perlmutter_script": 7, "account": 7, "qo": 7, "02": 7, "constraint": 7, "In": [7, 10, 11, 12, 13], "order": [7, 10, 11, 12], "coupl": 7, "design": [7, 12], "strategi": 7, "first": [7, 9, 11, 12], "fix": [7, 12], "next": [7, 12], "virtual": 7, "avail": [7, 12], "compos": [7, 12], "two": [7, 12], "where": [7, 12], "numa": 7, "domain": [7, 12], "lower": 7, "shortag": 7, "principl": 7, "squeez": 7, "resourc": [7, 12], "wall": 7, "clock": 7, "timestep": [7, 12], "grep": 7, "slurm_output": 7, "repeat": 7, "perfect": 7, "balanc": [7, 12], "reach": 7, "choic": 7, "compar": 7, "max_grid_s": 7, "optim": 7, "valu": [7, 12], "usual": [7, 10, 12], "half": 7, "level": [7, 9, 12], "half_siz": 7, "furthermor": 7, "sever": [7, 12], "blocking_factor": 7, "size": [7, 12], "increas": [7, 12], "scale": 7, "correctli": 7, "go": 7, "down": [7, 10], "factor": 7, "break": 7, "bigger": 7, "chainslurm": [7, 12], "mpich_max_thread_safeti": 7, "x86": [7, 12], "milan": 7, "inputs_fil": 7, "inputs_nova_t7": 7, "slurm_ntasks_per_nod": 7, "slurm_nnod": 7, "slurm_cpus_per_task": 7, "By": [7, 12], "default": [7, 10, 12, 13], "altern": [7, 12], "common": 7, "cf": 7, "everyon": 7, "squeue": [7, 12], "me": [7, 12], "estim": [7, 12], "cancel": 7, "scancel": 7, "tb": 7, "quota": 7, "showquota": 7, "finish": [7, 12], "origin": 7, "remain": [7, 12], "view": [7, 12], "id": [7, 12], "summit": [8, 11], "frontier": 8, "writ": 8, "monitor": 8, "templat": 8, "troubleshoot": 8, "jupyt": 8, "remot": 8, "Andes": 8, "andes": 9, "andes_env": 9, "anaconda": 9, "anaconda3": [9, 11], "modifi": [9, 12], "ad": 9, "y": [9, 11], "ipykernel": [9, 11], "nb_conda_kernel": [9, 11], "sourc": 9, "top": 9, "pip": 9, "uninstal": 9, "ast106": [9, 12], "plot": 9, "vol": 9, "enuc": 9, "flame_wave_1000hz_25cm_smallplt203204": 9, "veri": [9, 12], "might": 9, "solut": 9, "accomplish": 9, "xl": 10, "atleast": 10, "due": 10, "17": 10, "won": [10, 12], "present": 10, "warn": [10, 12], "packag": 10, "fine": 10, "sure": [10, 11, 12], "current": [10, 12], "disallow": 10, "line": [10, 12], "pair": 10, "offload": 10, "control": 10, "use_omp_offload": 10, "featur": [10, 12], "frontier_user_guid": [10, 12], "program": 10, "crayp": [10, 12], "accel": [10, 12], "amd": [10, 12], "gfx90a": [10, 12], "crai": [10, 12], "mpich": [10, 12], "rocm": [10, 12], "higher": 10, "issu": 10, "burner": 10, "tabul": 10, "rate": 10, "exhibit": 10, "strang": 10, "slow": 10, "without": [10, 12, 13], "use_hip": [10, 12], "jupyterhub": 11, "document": [11, 12], "extra": 11, "part": 11, "notebook": 11, "wish": 11, "correct": [11, 12], "point": [11, 12, 13], "good": [11, 12], "idea": [11, 12], "my_env": 11, "jupyterlab": 11, "subsequ": 11, "channel": 11, "search": [11, 12], "let": 12, "review": 12, "our": 12, "goal": 12, "necessari": 12, "insight": 12, "better": 12, "decis": 12, "construct": 12, "explain": 12, "expos": 12, "section": 12, "condens": 12, "replac": 12, "21": 12, "reserv": 12, "ram": 12, "bank": 12, "connect": [12, 13], "bu": 12, "commun": 12, "among": 12, "defin": 12, "whole": 12, "structur": 12, "depict": 12, "figur": 12, "extract": 12, "summit_user_guid": 12, "launcher": 12, "jsrun": 12, "minim": 12, "collect": 12, "certain": 12, "oper": 12, "extend": 12, "discuss": 12, "now": 12, "determin": 12, "maximum": 12, "fit": 12, "accord": 12, "summon": 12, "bsub": 12, "addit": 12, "some": 12, "descript": 12, "perform": 12, "calcul": 12, "format": 12, "room": 12, "03": 12, "three": 12, "alloc_flag": 12, "smt4": 12, "consid": 12, "smt1": 12, "stand": 12, "interest": 12, "standard": 12, "stream": 12, "insid": 12, "similar": 12, "suppli": 12, "assum": 12, "between": 12, "small": 12, "smoothli": 12, "bug": [12, 13], "unix": 12, "mention": 12, "stdout_to_show": 12, "stderr_to_show": 12, "No": 12, "onc": 12, "grant": 12, "variabl": 12, "total": 12, "r": 12, "max": 12, "a1": 12, "c1": 12, "g1": 12, "r6": 12, "placehold": 12, "respect": 12, "match": 12, "box": 12, "grid": 12, "biggest": 12, "piec": 12, "32768": 12, "cell": 12, "100": 12, "131072": 12, "524288": 12, "32": 12, "2097152": 12, "7864320": 12, "93": 12, "75": 12, "480": 12, "30408704": 12, "90": 12, "625": 12, "assert": 12, "equival": 12, "impli": 12, "398": 12, "idl": 12, "sweep": 12, "entir": 12, "possibl": 12, "maxim": 12, "life": 12, "easier": 12, "instead": 12, "write": 12, "anoth": 12, "pack": 12, "statement": 12, "luna_script": 12, "luna_output": 12, "luna_sniffing_output": 12, "inputs_luna": 12, "n_re": 12, "n_cpu_cores_per_r": 12, "n_max_res_per_nod": 12, "n_mpi_per_r": 12, "n_gpu_per_r": 12, "downgrad": 12, "kill": 12, "As": 12, "week": 12, "month": 12, "mayb": 12, "year": 12, "come": 12, "salvat": 12, "mandatori": 12, "chkxxxxxxx": 12, "chkxxxxxx": 12, "chkxxxxx": 12, "implement": 12, "append": 12, "minimum": 12, "pick": 12, "amount": 12, "expir": 12, "pass": 12, "wt": 12, "cleanli": 12, "couldn": 12, "anywher": 12, "ignor": 12, "immedi": 12, "upon": 12, "sigchld": 12, "sigwinch": 12, "least": 12, "trigger": 12, "other": 12, "event": 12, "launch": [12, 13], "must": 12, "jswait": 12, "save": 12, "bjob": 12, "slightli": 12, "nicer": 12, "jobstat": 12, "lead": 12, "enviro": 12, "simultan": 12, "n_res_1": 12, "n_res2": 12, "give": 12, "avoid": 12, "head": 12, "quiet": 12, "level_": 12, "dev": 12, "null": 12, "warning_tim": 12, "nohead": 12, "action_warning_tim": 12, "lsb_jobid": 12, "chain_submit": 12, "submit_script": 12, "return": 12, "transfer": 12, "15": 12, "cluster": 12, "dtn": 12, "test_hpss": 12, "associ": 12, "summit_hpss": 12, "jobs_script": 12, "togeth": 12, "detect": 12, "onto": 12, "under": 12, "enter": [12, 13], "being": 12, "unarchiv": 12, "bulk": 12, "hpss_xfer": 12, "plt00000": 12, "hpss_dir": 12, "plotfile_dir": 12, "fetch": 12, "unpack": 12, "attempt": 12, "recov": 12, "titan": 12, "polici": 12, "orion": 12, "storag": 12, "05": 12, "closest": 12, "hip": 12, "trento": 12, "nmpi_per_nod": 12, "total_nmpi": 12, "slurm_job_num_nod": 12, "june": 12, "2023": 12, "explicitli": 12, "blob": 12, "warpx": 12, "readthedoc": 12, "io": 12, "en": 12, "queu": 12, "rocgdb": 12, "27": 12, "turn": 12, "startup": 12, "session": 12, "salloc": 12, "mz": 12, "restor": 12, "reload": 12, "hip_enable_deferred_load": 12, "amd_serialize_kernel": 12, "amd_serialize_copi": 12, "amd_log_level": 12, "lot": 12, "debugg": 12, "pagin": 12, "off": 12, "trace": 12, "interrupt": 12, "bt": 12, "workaround": 12, "prevent": 12, "hang": 12, "fi_mr_cache_monitor": 12, "memhook": 12, "report": 12, "arena": 12, "big": 12, "the_arena_init_s": 12, "grow": 12, "suggest": 12, "larger": 12, "than": 12, "well": 13, "nvcc": 13, "cuda_vers": 13, "cc60": 13, "compile_cuda_path": 13, "usr": 13, "no_device_launch": 13, "around": 13, "cc70": 13, "On": 13, "lab": 13, "browser": 13, "ip": 13, "localhost": 13, "8888": 13, "sunysb": 13, "termin": 13, "window": 13, "web": 13, "token": 13, "appear": 13}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"work": [0, 1, 3, 8], "alcf": 0, "log": 0, "In": 0, "compil": [0, 1, 4, 10], "disk": 0, "queue": 0, "submit": [0, 12], "automat": 0, "restart": 0, "job": [0, 7, 12], "chain": [0, 7, 12], "iac": 1, "ookami": 1, "amrex": [1, 2], "setup": 1, "crai": 1, "gcc": [1, 4], "10": 1, "2": 1, "astrophys": 2, "suit": 2, "astro": 2, "basic": 2, "nersc": [3, 4, 6, 7], "perlmutt": [4, 7], "cuda": 4, "hypr": 4, "archiv": [5, 12], "data": 5, "hpss": [5, 12], "visual": [6, 9], "manag": [7, 12], "filesystem": 7, "olcf": [8, 10, 11, 12], "batch": 9, "Andes": 9, "summit": [10, 12], "frontier": [10, 12], "run": 11, "jupyt": [11, 13], "remot": [11, 13], "from": 11, "creat": 11, "conda": 11, "environ": 11, "architectur": 12, "request": 12, "alloc": 12, "writ": 12, "script": 12, "monitor": 12, "templat": 12, "machin": 12, "detail": 12, "statu": 12, "debug": 12, "troubleshoot": 12, "linux": 13, "workstat": 13, "gpu": 13, "offload": 13, "bender": 13, "groot": 13, "vi": 13}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx": 60}, "alltitles": {"Working at ALCF": [[0, "working-at-alcf"]], "Logging In": [[0, "logging-in"]], "Compiling": [[0, "compiling"]], "Disks": [[0, "disks"]], "Queues": [[0, "queues"]], "Submitting": [[0, "submitting"]], "Automatic Restarting": [[0, "automatic-restarting"]], "Job Chaining": [[0, "job-chaining"], [12, "job-chaining"]], "Working at IACS": [[1, "working-at-iacs"]], "Ookami": [[1, "ookami"]], "AMReX setup": [[1, "amrex-setup"]], "Cray compilers": [[1, "cray-compilers"]], "GCC": [[1, "gcc"]], "GCC 10.2": [[1, "gcc-10-2"]], "AMReX Astrophysics Suite": [[2, "amrex-astrophysics-suite"]], "AMReX Astro basics": [[2, null]], "Working at NERSC": [[3, "working-at-nersc"]], "Compiling at NERSC": [[4, "compiling-at-nersc"]], "Perlmutter": [[4, "perlmutter"], [7, "perlmutter"]], "Compiling with GCC + CUDA": [[4, "compiling-with-gcc-cuda"]], "Hypre": [[4, "hypre"]], "Archiving Data to HPSS": [[5, "archiving-data-to-hpss"]], "Visualization at NERSC": [[6, "visualization-at-nersc"]], "Managing Jobs at NERSC": [[7, "managing-jobs-at-nersc"]], "Filesystems": [[7, "filesystems"]], "Chaining": [[7, "chaining"]], "Working at OLCF": [[8, "working-at-olcf"]], "Batch Visualization on Andes": [[9, "batch-visualization-on-andes"]], "Compiling at OLCF": [[10, "compiling-at-olcf"]], "Summit": [[10, "summit"], [12, "summit"]], "Frontier": [[10, "frontier"], [12, "frontier"]], "Running Jupyter Remotely from OLCF": [[11, "running-jupyter-remotely-from-olcf"]], "Creating a conda environment": [[11, "creating-a-conda-environment"]], "Managing Jobs at OLCF": [[12, "managing-jobs-at-olcf"]], "Summit Architecture:": [[12, "summit-architecture"]], "Requesting Allocation:": [[12, "requesting-allocation"]], "Submitting a Job:": [[12, "submitting-a-job"]], "Writting a Job Script:": [[12, "writting-a-job-script"]], "Monitoring a Job:": [[12, "monitoring-a-job"]], "Script Template:": [[12, "script-template"]], "Chaining jobs": [[12, "chaining-jobs"]], "Archiving to HPSS": [[12, "archiving-to-hpss"]], "Machine details": [[12, "machine-details"]], "Submitting jobs": [[12, "submitting-jobs"]], "Job Status": [[12, "job-status"]], "Debugging": [[12, "debugging"]], "Troubleshooting": [[12, "troubleshooting"]], "Linux Workstations": [[13, "linux-workstations"]], "GPU offloading": [[13, "gpu-offloading"]], "bender": [[13, "bender"]], "groot": [[13, "groot"]], "Remote vis with Jupyter": [[13, "remote-vis-with-jupyter"]]}, "indexentries": {}}) \ No newline at end of file +Search.setIndex({"docnames": ["alcf", "iacs", "index", "nersc", "nersc-compilers", "nersc-hpss", "nersc-visualization", "nersc-workflow", "olcf", "olcf-andes", "olcf-compilers", "olcf-jupyter", "olcf-workflow", "workstations"], "filenames": ["alcf.rst", "iacs.rst", "index.rst", "nersc.rst", "nersc-compilers.rst", "nersc-hpss.rst", "nersc-visualization.rst", "nersc-workflow.rst", "olcf.rst", "olcf-andes.rst", "olcf-compilers.rst", "olcf-jupyter.rst", "olcf-workflow.rst", "workstations.rst"], "titles": ["Working at ALCF", "Working at IACS", "AMReX Astrophysics Suite", "Working at NERSC", "Compiling at NERSC", "Archiving Data to HPSS", "Visualization at NERSC", "Managing Jobs at NERSC", "Working at OLCF", "Batch Visualization on Andes", "Compiling at OLCF", "Running Jupyter Remotely from OLCF", "Managing Jobs at OLCF", "Linux Workstations"], "terms": {"polari": 0, "ha": [0, 1, 5, 7, 11], "560": 0, "node": [0, 1, 6, 7, 9, 12], "each": [0, 1, 7, 12], "4": [0, 1, 4, 7, 10, 12, 13], "nvidia": [0, 7], "a100": [0, 7], "gpu": [0, 2, 7, 9, 10, 12], "The": [0, 1, 5, 6, 7, 10, 12], "pb": 0, "schedul": [0, 12], "i": [0, 1, 5, 6, 7, 9, 10, 11, 12], "us": [0, 1, 4, 5, 7, 9, 10, 11, 12, 13], "ssh": [0, 1, 4, 13], "ornl": [0, 9, 10, 12], "gov": [0, 4, 5, 6, 9, 10, 12], "To": [0, 1, 7, 11, 12, 13], "have": [0, 1, 7, 9, 10, 11, 12], "custom": 0, "bashrc": [0, 9], "creat": [0, 5, 6, 7, 8, 9, 12], "bash": [0, 1, 6, 7, 9, 12], "expert": 0, "file": [0, 1, 5, 7, 12], "add": [0, 1, 11, 12, 13], "anyth": 0, "thi": [0, 1, 5, 6, 7, 9, 10, 11, 12], "read": [0, 12], "end": [0, 7, 12], "etc": 0, "load": [0, 1, 4, 6, 7, 9, 10, 11, 12, 13], "modul": [0, 1, 4, 6, 7, 9, 10, 11, 12, 13], "swap": [0, 10], "prgenv": [0, 4, 7, 10, 12], "nvhpc": 0, "gnu": [0, 1, 4, 7, 10, 12, 13], "gcc": [0, 3, 10, 12, 13], "11": [0, 6, 10, 12, 13], "2": [0, 7, 9, 10, 12, 13], "0": [0, 1, 6, 7, 10, 11, 12], "version": [0, 1, 10, 12, 13], "sinc": [0, 5, 7, 10, 12], "cuda": [0, 3, 7, 10, 12, 13], "doesn": [0, 10, 12], "39": 0, "t": [0, 5, 6, 7, 9, 10, 12], "support": [0, 1, 9, 10, 12], "12": [0, 1], "yet": 0, "mix": [0, 10, 12], "Then": [0, 9, 10, 12], "you": [0, 1, 4, 5, 6, 7, 9, 10, 11, 12, 13], "can": [0, 1, 5, 6, 7, 9, 11, 12, 13], "via": [0, 1, 4, 7, 9, 10, 11, 12], "make": [0, 1, 4, 5, 9, 10, 11, 12, 13], "comp": [0, 1, 4, 10, 13], "use_cuda": [0, 4, 10, 13], "true": [0, 1, 4, 10, 12, 13], "project": [0, 1, 7, 9], "workspac": 0, "lu": 0, "grand": 0, "astroexplos": 0, "http": [0, 1, 2, 5, 6, 10, 12, 13], "www": [0, 1], "anl": 0, "user": 0, "guid": [0, 1, 12], "run": [0, 1, 2, 5, 6, 7, 8, 9, 10, 12, 13], "index": 0, "html": [0, 10, 12], "For": [0, 7, 9, 12], "product": [0, 12], "prod": 0, "smallest": [0, 12], "count": 0, "seem": [0, 1, 10], "allow": [0, 5, 7, 12], "10": [0, 10, 12, 13], "clone": [0, 9], "gettingstart": 0, "repo": [0, 9], "git": [0, 1, 9], "github": [0, 2, 4, 9, 12], "com": [0, 1, 2, 9, 12], "argonn": 0, "lcf": 0, "ll": [0, 9], "want": [0, 5, 7, 9, 12], "exampl": [0, 7, 10, 12], "affinity_gpu": 0, "particular": [0, 2, 6, 7, 12], "need": [0, 1, 5, 6, 7, 9, 10, 11, 12, 13], "script": [0, 5, 6, 7, 8, 9, 10], "set_affinity_gpu_polari": 0, "sh": [0, 7, 12], "copi": [0, 5], "your": [0, 5, 6, 7, 9, 10, 11, 12, 13], "directori": [0, 5, 7, 9, 12], "here": [0, 6, 7, 9, 12], "": [0, 7, 11, 12], "submiss": [0, 5, 6, 7, 9, 10, 12], "bin": [0, 6, 7, 9, 12], "l": [0, 7, 13], "select": [0, 7, 12], "system": [0, 7, 10, 12], "place": [0, 5], "scatter": 0, "walltim": [0, 12], "30": [0, 12], "00": [0, 6, 7, 9, 12], "q": [0, 6, 7, 12], "debug": [0, 7, 8], "A": [0, 6, 7, 9, 12], "exec": [0, 12], "castro2d": [0, 7, 12], "mpi": [0, 1, 4, 7, 12], "smplsdc": [0, 7], "ex": [0, 1, 7, 12], "input": [0, 1, 5, 7, 12], "inputs_2d": [0, 7], "n14": [0, 7], "coars": [0, 7], "enabl": [0, 4], "applic": [0, 12], "export": [0, 1, 6, 7, 12], "mpich_gpu_support_en": 0, "1": [0, 1, 6, 7, 9, 12], "chang": [0, 7, 12], "cd": [0, 7, 9], "pbs_o_workdir": 0, "openmp": [0, 1, 7, 10, 12], "set": [0, 1, 7, 12], "nnode": [0, 12], "wc": 0, "pbs_nodefil": 0, "nranks_per_nod": 0, "ndepth": 0, "8": [0, 6, 7, 12], "nthread": 0, "ntotrank": 0, "mpiexec": [0, 1], "bind": [0, 7, 12], "rank": 0, "n": [0, 1, 6, 7, 9, 11, 12, 13], "ppn": 0, "depth": 0, "cpu": [0, 6, 7, 12], "env": [0, 9], "omp_num_thread": [0, 1, 6, 7, 12], "omp_plac": [0, 6, 7], "thread": [0, 1, 7, 12], "do": [0, 1, 5, 6, 7, 9, 11, 12, 13], "qsub": 0, "check": [0, 7, 12], "statu": [0, 7, 8], "qstat": 0, "u": [0, 7, 12], "usernam": [0, 7, 12], "from": [0, 4, 5, 7, 8, 9, 12], "last": [0, 7, 12], "checkpoint": [0, 5, 7, 12], "6": [0, 7, 12], "j": [0, 1, 4, 6, 7, 9, 10, 12, 13], "eo": 0, "function": [0, 7, 12], "find_chk_fil": [0, 7, 12], "take": [0, 1, 7, 12], "singl": [0, 7, 12], "argument": [0, 7, 12], "wildcard": [0, 7, 12], "pattern": [0, 7, 12], "look": [0, 5, 7, 12], "through": [0, 7, 12], "chk": [0, 7, 12], "find": [0, 1, 7, 10, 12], "latest": [0, 1, 7, 12], "wai": [0, 6, 7, 12], "didn": [0, 7, 12], "complet": [0, 7, 12], "we": [0, 1, 4, 7, 9, 12, 13], "fall": [0, 7, 12], "back": [0, 7, 12], "previou": [0, 7, 12], "one": [0, 5, 7, 12], "temp_fil": [0, 7, 12], "maxdepth": [0, 7, 12], "name": [0, 5, 6, 7, 12], "print": [0, 7, 12], "sort": [0, 7, 12], "tail": [0, 7, 12], "restartfil": [0, 7, 12], "f": [0, 7, 12], "header": [0, 7, 12, 13], "thing": [0, 7, 12], "written": [0, 5, 7, 12], "updat": [0, 7, 12], "fi": [0, 7, 12], "done": [0, 1, 5, 7, 12], "7": [0, 4, 7, 9, 10, 11, 12, 13], "digit": [0, 7, 12], "5": [0, 1, 7, 10, 11, 12], "restartstr": [0, 7, 12], "empti": [0, 7, 12], "ar": [0, 1, 4, 5, 7, 10, 12], "found": [0, 7, 12], "e": [0, 4, 5, 7, 9, 11, 12], "new": [0, 7, 9, 11, 12], "els": [0, 7, 12], "amr": [0, 1, 7, 12], "chainqsub": 0, "echo": [0, 7, 12], "usag": [0, 12], "jobid": [0, 7], "number": [0, 1, 7, 12], "initi": [0, 7, 12], "depend": [0, 5, 7, 12], "exit": [0, 7, 12], "3": [0, 1, 4, 6, 7, 9, 10, 11, 12, 13], "oldjob": 0, "numjob": 0, "gt": 0, "20": [0, 7], "too": [0, 12], "mani": [0, 12], "request": [0, 8], "firstcount": 0, "eq": 0, "start": [0, 1, 5, 7, 12], "aout": 0, "sleep": 0, "seq": 0, "w": [0, 12], "afterani": 0, "48": 1, "comput": [1, 7, 12], "core": [1, 6, 7, 12], "group": 1, "pool": 1, "actual": [1, 5, 7], "13th": 1, "o": [1, 6, 7, 12], "stuff": 1, "so": [1, 7, 10, 12, 13], "an": [1, 7, 9, 10, 12, 13], "ideal": 1, "config": [1, 11], "would": [1, 7, 9, 12], "log": [1, 4, 10, 12], "login": 1, "stonybrook": 1, "edu": [1, 13], "tell": [1, 7, 12], "about": [1, 2, 12], "machin": [1, 2, 8], "put": [1, 12], "follow": [1, 5, 6, 7, 9, 12], "local": [1, 13], "tool": [1, 5], "gnumak": 1, "raw": 1, "githubusercont": 1, "astro": [1, 10, 12, 13], "workflow": [1, 12], "main": [1, 5, 12], "job_script": [1, 12], "onli": [1, 5, 7, 12], "access": [1, 5, 7, 10, 12], "environ": [1, 6, 8, 9, 10, 12], "note": [1, 5, 12], "srun": [1, 6, 7, 9, 12], "p": [1, 9, 12], "short": 1, "pty": 1, "interact": [1, 12], "slurm": [1, 5, 7, 9, 12], "job": [1, 3, 5, 8], "time": [1, 5, 7, 12], "out": [1, 5, 6, 7, 9, 12], "after": [1, 7, 12], "hour": [1, 12], "infinit": 1, "fj": 1, "debug1": 1, "debug2": 1, "them": [1, 5, 12], "There": [1, 7, 12, 13], "cce": 1, "sve": 1, "former": 1, "newer": 1, "llvm": 1, "base": [1, 7], "ocompil": 1, "fortran": 1, "doe": [1, 9, 10], "arm": 1, "architectur": [1, 8], "latter": 1, "older": [1, 13], "even": 1, "though": 1, "both": 1, "form": [1, 12], "x": [1, 12, 13], "thei": [1, 11, 12], "differ": [1, 7], "option": [1, 7, 10, 12], "see": [1, 7, 10, 12], "commcm": 1, "faq": 1, "get": [1, 12], "php": 1, "cpe": 1, "mvapich2_nogpu": 1, "should": [1, 4, 7, 10, 12], "test": [1, 4, 7, 10, 12], "mak": 1, "recogn": 1, "switch": 1, "old": 1, "flag": [1, 12], "build": [1, 2, 4, 10, 12, 13], "24": 1, "use_mpi": [1, 4, 10, 12, 13], "fals": [1, 4, 10, 12, 13], "long": [1, 5], "At": [1, 11], "moment": [1, 10], "link": 1, "cannot": 1, "nopattern": 1, "error": [1, 12], "which": [1, 5, 7, 9, 10, 11, 12], "why": 1, "comment": 1, "abov": [1, 12], "lustr": [1, 12], "global": 1, "softwar": 1, "a64fx": 1, "modulefil": 1, "mvapich2": 1, "use_omp": [1, 4, 10], "know": [1, 5], "chip": 1, "specif": [1, 9], "mv2_enable_affin": 1, "castro3d": [1, 12], "omp": [1, 7], "3d": [1, 12], "sph": [1, 12], "max_level": 1, "max_step": 1, "These": 2, "doc": [2, 5, 6, 10, 12], "provid": [2, 12], "inform": [2, 12], "code": [2, 9, 10, 12, 13], "nyx": 2, "maestroex": 2, "castro": [2, 4, 7, 10, 12], "variou": 2, "includ": [2, 7, 12], "hpc": [2, 12], "center": 2, "workstat": 2, "gener": [2, 12, 13], "work": [2, 9, 10, 12, 13], "alcf": 2, "nersc": [2, 5], "olcf": [2, 9], "iac": 2, "linux": 2, "compil": [3, 7, 8, 12, 13], "perlmutt": 3, "hypr": 3, "manag": [3, 8], "filesystem": [3, 5, 12], "chain": [3, 8], "archiv": [3, 8], "data": [3, 12], "hpss": [3, 8], "visual": [3, 8, 13], "p1": 4, "cudatoolkit": [4, 7], "python": [4, 6, 9, 10, 11, 12], "requir": [4, 11, 12], "process": [4, 5, 7, 12], "g": [4, 5, 11, 12], "sedov": 4, "hydro": 4, "problem": [4, 7, 12], "tiny_profil": 4, "obtain": [4, 7], "built": 4, "same": [4, 5, 7, 10, 12], "hypre_cuda_sm": 4, "80": [4, 12], "cxx": 4, "cc": 4, "fc": 4, "ftn": 4, "configur": 4, "prefix": 4, "path": [4, 5, 12, 13], "instal": [4, 6, 9, 11, 12], "unifi": 4, "memori": [4, 7, 9, 10, 12], "larg": [5, 9], "tape": 5, "librari": [5, 7, 10, 11], "store": [5, 12], "simul": [5, 12], "period": 5, "It": [5, 9, 12], "recommend": [5, 10], "move": [5, 12], "frequent": 5, "scratch": [5, 7, 12], "fill": 5, "up": [5, 7, 12], "purg": 5, "xfer": 5, "automat": [5, 12], "submit": [5, 7, 8, 9], "queue": [5, 7, 12], "xrb": [5, 9, 12], "continu": [5, 13], "output": [5, 12, 13], "describ": [5, 11, 12], "how": [5, 12], "plotfil": [5, 9, 12], "locat": [5, 7], "just": [5, 12, 13], "full": [5, 12], "call": [5, 9, 12], "pscratch": 5, "sd": 5, "z": 5, "zingal": 5, "wdconvect": 5, "hsi": [5, 12], "mkdir": 5, "wdconvect_run": 5, "If": [5, 7, 9, 11, 12], "command": [5, 12, 13], "prompt": [5, 13], "password": [5, 13], "talk": 5, "help": [5, 12], "desk": 5, "ask": [5, 12], "less": 5, "sbatch": [5, 6, 7, 9, 12], "background": [5, 7, 12], "wait": [5, 7, 12], "until": [5, 7, 12], "alwai": [5, 12], "leav": [5, 12], "most": [5, 7], "recent": [5, 6, 12], "alon": [5, 12], "mai": [5, 7, 12], "still": [5, 12], "htar": [5, 12], "wa": [5, 12], "success": 5, "subdirectori": [5, 12], "import": [5, 10, 12], "don": 5, "try": [5, 12], "second": [5, 7, 12], "overwrit": 5, "especi": 5, "took": 5, "some": [5, 12], "addit": [5, 12], "ftime": [5, 12], "execut": [5, 7, 12], "cpp": 5, "live": 5, "amrex": [5, 10, 12], "list": [5, 12], "correspond": [5, 12], "right": 5, "when": [5, 9, 10, 11, 12], "tar": [5, 12], "all": [5, 7, 12], "diagnost": 5, "given": 5, "contain": [5, 7, 12], "date": 5, "string": 5, "multipl": [5, 7], "co": 5, "exist": [5, 7, 9, 12], "lockfil": 5, "pid": [5, 7], "ensur": [5, 12], "instanc": [5, 7], "ani": [5, 7, 12], "sometim": [5, 12], "termin": [5, 13], "normal": 5, "left": [5, 7, 12], "behind": 5, "case": [5, 7, 12], "abort": [5, 12], "delet": 5, "quickli": 5, "best": [5, 6, 7, 9, 10, 11, 12], "approach": 5, "sampl": [5, 6, 12], "produc": [5, 12], "yt": [6, 9, 11], "setup": [6, 9, 10, 12], "own": 6, "conda": [6, 8, 9], "step": [6, 7, 12], "develop": 6, "languag": 6, "someth": 6, "like": [6, 7, 12], "init": [6, 9], "myenv": 6, "activ": [6, 9], "more": [6, 7, 9, 12], "c": [6, 7, 9, 10, 11, 12], "forg": [6, 9, 11], "deactiv": 6, "m3018": [6, 7], "vi": 6, "vis_": 6, "01": 6, "ntask": [6, 7, 12], "per": [6, 7, 12], "regular": [6, 7], "omp_proc_bind": [6, 7], "spread": [6, 7], "massive_star_multi": 6, "py": [6, 7, 9, 12], "plt19862": 6, "1536": 7, "therefor": [7, 12], "task": [7, 12], "otherwis": [7, 10], "fail": 7, "runtim": [7, 10, 12], "becaus": [7, 12], "below": [7, 12], "16": 7, "also": [7, 10, 11, 12], "restart": [7, 12], "logic": 7, "m3018_g": 7, "subch_": 7, "map_gpu": 7, "signal": [7, 12], "b": [7, 12], "urg": [7, 12], "castro_exec": 7, "clean": [7, 12], "over": [7, 12], "rm": [7, 12], "dump_and_stop": [7, 12], "send": [7, 12], "sigurg": [7, 12], "batch": [7, 8, 12], "minut": [7, 12], "befor": [7, 12], "limit": [7, 12], "gracefulli": [7, 12], "sig_handl": [7, 12], "touch": [7, 12], "disabl": [7, 12], "handler": [7, 12], "trap": [7, 12], "alloc": [7, 8], "soon": [7, 12], "dump": [7, 12], "stop": [7, 12], "workdir": 7, "basenam": 7, "slurm_submit_dir": [7, 9], "slack_job_start": 7, "michael": 7, "builtin": [7, 12], "shell": [7, 12], "handl": [7, 12], "64": 7, "ret": 7, "128": [7, 12], "23": 7, "receiv": [7, 12], "keep": [7, 12], "refer": 7, "distribut": [7, 11], "parallel": 7, "hyper": 7, "share": [7, 10], "socket": [7, 12], "256": [7, 12], "howev": [7, 12], "assign": 7, "physic": [7, 12], "detail": [7, 8], "instruct": 7, "within": 7, "perlmutter_script": 7, "account": 7, "qo": 7, "02": 7, "constraint": 7, "In": [7, 10, 11, 12, 13], "order": [7, 10, 11, 12], "coupl": 7, "design": [7, 12], "strategi": 7, "first": [7, 9, 11, 12], "fix": [7, 12], "next": [7, 12], "virtual": 7, "avail": [7, 12], "compos": [7, 12], "two": [7, 12], "where": [7, 12], "numa": 7, "domain": [7, 12], "lower": 7, "shortag": 7, "principl": 7, "squeez": 7, "resourc": [7, 12], "wall": 7, "clock": 7, "timestep": [7, 12], "grep": 7, "slurm_output": 7, "repeat": 7, "perfect": 7, "balanc": [7, 12], "reach": 7, "choic": 7, "compar": 7, "max_grid_s": 7, "optim": 7, "valu": [7, 12], "usual": [7, 10, 12], "half": 7, "level": [7, 9, 12], "half_siz": 7, "furthermor": 7, "sever": [7, 12], "blocking_factor": 7, "size": [7, 12], "final": [7, 9, 12], "increas": [7, 12], "scale": 7, "correctli": 7, "go": 7, "down": [7, 10], "factor": 7, "break": 7, "bigger": 7, "chainslurm": [7, 12], "mpich_max_thread_safeti": 7, "x86": [7, 12], "milan": 7, "inputs_fil": 7, "inputs_nova_t7": 7, "slurm_ntasks_per_nod": 7, "slurm_nnod": 7, "slurm_cpus_per_task": 7, "By": [7, 12], "default": [7, 10, 12, 13], "altern": [7, 12], "common": 7, "cf": 7, "everyon": 7, "squeue": [7, 12], "me": [7, 12], "estim": [7, 12], "cancel": 7, "scancel": 7, "tb": 7, "quota": 7, "showquota": 7, "finish": [7, 12], "origin": 7, "remain": [7, 12], "view": [7, 12], "id": [7, 12], "summit": [8, 11], "frontier": 8, "writ": 8, "monitor": 8, "templat": 8, "troubleshoot": 8, "jupyt": 8, "remot": 8, "Andes": 8, "andes": 9, "andes_env": 9, "anaconda": 9, "anaconda3": [9, 11], "modifi": [9, 12], "ad": 9, "y": [9, 11], "ipykernel": [9, 11], "nb_conda_kernel": [9, 11], "sourc": 9, "top": 9, "pip": 9, "uninstal": 9, "ast106": [9, 12], "plot": 9, "vol": 9, "enuc": 9, "flame_wave_1000hz_25cm_smallplt203204": 9, "veri": [9, 12], "might": 9, "solut": 9, "accomplish": 9, "xl": 10, "atleast": 10, "due": 10, "17": 10, "won": [10, 12], "present": 10, "warn": [10, 12], "packag": 10, "fine": 10, "sure": [10, 11, 12], "current": [10, 12], "disallow": 10, "line": [10, 12], "pair": 10, "offload": 10, "control": 10, "use_omp_offload": 10, "featur": [10, 12], "frontier_user_guid": [10, 12], "program": 10, "crayp": [10, 12], "accel": [10, 12], "amd": [10, 12], "gfx90a": [10, 12], "crai": [10, 12], "mpich": [10, 12], "rocm": [10, 12], "higher": 10, "issu": 10, "burner": 10, "tabul": 10, "rate": 10, "exhibit": 10, "strang": 10, "slow": 10, "without": [10, 12, 13], "use_hip": [10, 12], "jupyterhub": 11, "document": [11, 12], "extra": 11, "part": 11, "notebook": 11, "wish": 11, "correct": [11, 12], "point": [11, 12, 13], "good": [11, 12], "idea": [11, 12], "my_env": 11, "jupyterlab": 11, "subsequ": 11, "channel": 11, "search": [11, 12], "let": 12, "review": 12, "our": 12, "goal": 12, "necessari": 12, "insight": 12, "better": 12, "decis": 12, "construct": 12, "explain": 12, "expos": 12, "section": 12, "condens": 12, "replac": 12, "21": 12, "reserv": 12, "ram": 12, "bank": 12, "connect": [12, 13], "bu": 12, "commun": 12, "among": 12, "defin": 12, "whole": 12, "structur": 12, "depict": 12, "figur": 12, "extract": 12, "summit_user_guid": 12, "launcher": 12, "jsrun": 12, "minim": 12, "collect": 12, "certain": 12, "oper": 12, "extend": 12, "discuss": 12, "now": 12, "determin": 12, "maximum": 12, "fit": 12, "accord": 12, "summon": 12, "bsub": 12, "descript": 12, "perform": 12, "calcul": 12, "format": 12, "room": 12, "03": 12, "three": 12, "alloc_flag": 12, "smt4": 12, "consid": 12, "smt1": 12, "stand": 12, "interest": 12, "standard": 12, "stream": 12, "insid": 12, "similar": 12, "suppli": 12, "assum": 12, "between": 12, "small": 12, "smoothli": 12, "bug": [12, 13], "unix": 12, "mention": 12, "stdout_to_show": 12, "stderr_to_show": 12, "No": 12, "onc": 12, "grant": 12, "variabl": 12, "total": 12, "r": 12, "max": 12, "a1": 12, "c1": 12, "g1": 12, "r6": 12, "placehold": 12, "respect": 12, "match": 12, "box": 12, "grid": 12, "biggest": 12, "piec": 12, "32768": 12, "cell": 12, "100": 12, "131072": 12, "524288": 12, "32": 12, "2097152": 12, "7864320": 12, "93": 12, "75": 12, "480": 12, "30408704": 12, "90": 12, "625": 12, "assert": 12, "equival": 12, "impli": 12, "398": 12, "idl": 12, "sweep": 12, "entir": 12, "possibl": 12, "maxim": 12, "life": 12, "easier": 12, "instead": 12, "write": 12, "anoth": 12, "pack": 12, "statement": 12, "luna_script": 12, "luna_output": 12, "luna_sniffing_output": 12, "inputs_luna": 12, "n_re": 12, "n_cpu_cores_per_r": 12, "n_max_res_per_nod": 12, "n_mpi_per_r": 12, "n_gpu_per_r": 12, "downgrad": 12, "kill": 12, "As": 12, "week": 12, "month": 12, "mayb": 12, "year": 12, "come": 12, "salvat": 12, "mandatori": 12, "chkxxxxxxx": 12, "chkxxxxxx": 12, "chkxxxxx": 12, "implement": 12, "append": 12, "minimum": 12, "pick": 12, "amount": 12, "expir": 12, "pass": 12, "wt": 12, "cleanli": 12, "couldn": 12, "anywher": 12, "ignor": 12, "immedi": 12, "crash": 12, "upon": 12, "sigchld": 12, "sigwinch": 12, "least": 12, "trigger": 12, "other": 12, "event": 12, "launch": [12, 13], "must": 12, "jswait": 12, "save": 12, "bjob": 12, "slightli": 12, "nicer": 12, "jobstat": 12, "lead": 12, "enviro": 12, "simultan": 12, "n_res_1": 12, "n_res2": 12, "give": 12, "avoid": 12, "head": 12, "quiet": 12, "level_": 12, "dev": 12, "null": 12, "warning_tim": 12, "nohead": 12, "action_warning_tim": 12, "lsb_jobid": 12, "chain_submit": 12, "submit_script": 12, "return": 12, "transfer": 12, "15": 12, "cluster": 12, "dtn": 12, "test_hpss": 12, "associ": 12, "summit_hpss": 12, "jobs_script": 12, "togeth": 12, "detect": 12, "onto": 12, "probin": 12, "under": 12, "enter": [12, 13], "being": 12, "unarchiv": 12, "bulk": 12, "hpss_xfer": 12, "plt00000": 12, "hpss_dir": 12, "plotfile_dir": 12, "fetch": 12, "unpack": 12, "attempt": 12, "recov": 12, "titan": 12, "polici": 12, "orion": 12, "storag": 12, "05": 12, "closest": 12, "hip": 12, "trento": 12, "nmpi_per_nod": 12, "total_nmpi": 12, "slurm_job_num_nod": 12, "june": 12, "2023": 12, "explicitli": 12, "blob": 12, "warpx": 12, "readthedoc": 12, "io": 12, "en": 12, "queu": 12, "rocgdb": 12, "27": 12, "turn": 12, "startup": 12, "session": 12, "salloc": 12, "mz": 12, "restor": 12, "reload": 12, "hip_enable_deferred_load": 12, "amd_serialize_kernel": 12, "amd_serialize_copi": 12, "amd_log_level": 12, "lot": 12, "debugg": 12, "pagin": 12, "off": 12, "trace": 12, "interrupt": 12, "bt": 12, "workaround": 12, "prevent": 12, "hang": 12, "fi_mr_cache_monitor": 12, "memhook": 12, "report": 12, "arena": 12, "big": 12, "the_arena_init_s": 12, "grow": 12, "suggest": 12, "larger": 12, "than": 12, "well": 13, "nvcc": 13, "cuda_vers": 13, "cc60": 13, "compile_cuda_path": 13, "usr": 13, "no_device_launch": 13, "around": 13, "cc70": 13, "On": 13, "lab": 13, "browser": 13, "ip": 13, "localhost": 13, "8888": 13, "sunysb": 13, "window": 13, "web": 13, "token": 13, "appear": 13}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"work": [0, 1, 3, 8], "alcf": 0, "log": 0, "In": 0, "compil": [0, 1, 4, 10], "disk": 0, "queue": 0, "submit": [0, 12], "automat": 0, "restart": 0, "job": [0, 7, 12], "chain": [0, 7, 12], "iac": 1, "ookami": 1, "amrex": [1, 2], "setup": 1, "crai": 1, "gcc": [1, 4], "10": 1, "2": 1, "astrophys": 2, "suit": 2, "astro": 2, "basic": 2, "nersc": [3, 4, 6, 7], "perlmutt": [4, 7], "cuda": 4, "hypr": 4, "archiv": [5, 12], "data": 5, "hpss": [5, 12], "visual": [6, 9], "manag": [7, 12], "filesystem": 7, "olcf": [8, 10, 11, 12], "batch": 9, "Andes": 9, "summit": [10, 12], "frontier": [10, 12], "run": 11, "jupyt": [11, 13], "remot": [11, 13], "from": 11, "creat": 11, "conda": 11, "environ": 11, "architectur": 12, "request": 12, "alloc": 12, "writ": 12, "script": 12, "monitor": 12, "templat": 12, "machin": 12, "detail": 12, "statu": 12, "debug": 12, "troubleshoot": 12, "linux": 13, "workstat": 13, "gpu": 13, "offload": 13, "bender": 13, "groot": 13, "vi": 13}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx": 60}, "alltitles": {"Working at ALCF": [[0, "working-at-alcf"]], "Logging In": [[0, "logging-in"]], "Compiling": [[0, "compiling"]], "Disks": [[0, "disks"]], "Queues": [[0, "queues"]], "Submitting": [[0, "submitting"]], "Automatic Restarting": [[0, "automatic-restarting"]], "Job Chaining": [[0, "job-chaining"], [12, "job-chaining"]], "Working at IACS": [[1, "working-at-iacs"]], "Ookami": [[1, "ookami"]], "AMReX setup": [[1, "amrex-setup"]], "Cray compilers": [[1, "cray-compilers"]], "GCC": [[1, "gcc"]], "GCC 10.2": [[1, "gcc-10-2"]], "AMReX Astrophysics Suite": [[2, "amrex-astrophysics-suite"]], "AMReX Astro basics": [[2, null]], "Working at NERSC": [[3, "working-at-nersc"]], "Compiling at NERSC": [[4, "compiling-at-nersc"]], "Perlmutter": [[4, "perlmutter"], [7, "perlmutter"]], "Compiling with GCC + CUDA": [[4, "compiling-with-gcc-cuda"]], "Hypre": [[4, "hypre"]], "Archiving Data to HPSS": [[5, "archiving-data-to-hpss"]], "Visualization at NERSC": [[6, "visualization-at-nersc"]], "Managing Jobs at NERSC": [[7, "managing-jobs-at-nersc"]], "Filesystems": [[7, "filesystems"]], "Chaining": [[7, "chaining"]], "Working at OLCF": [[8, "working-at-olcf"]], "Batch Visualization on Andes": [[9, "batch-visualization-on-andes"]], "Compiling at OLCF": [[10, "compiling-at-olcf"]], "Summit": [[10, "summit"], [12, "summit"]], "Frontier": [[10, "frontier"], [12, "frontier"]], "Running Jupyter Remotely from OLCF": [[11, "running-jupyter-remotely-from-olcf"]], "Creating a conda environment": [[11, "creating-a-conda-environment"]], "Managing Jobs at OLCF": [[12, "managing-jobs-at-olcf"]], "Summit Architecture:": [[12, "summit-architecture"]], "Requesting Allocation:": [[12, "requesting-allocation"]], "Submitting a Job:": [[12, "submitting-a-job"]], "Writting a Job Script:": [[12, "writting-a-job-script"]], "Monitoring a Job:": [[12, "monitoring-a-job"]], "Script Template:": [[12, "script-template"]], "Chaining jobs": [[12, "chaining-jobs"]], "Archiving to HPSS": [[12, "archiving-to-hpss"]], "Machine details": [[12, "machine-details"]], "Submitting jobs": [[12, "submitting-jobs"]], "Job Status": [[12, "job-status"]], "Debugging": [[12, "debugging"]], "Troubleshooting": [[12, "troubleshooting"]], "Linux Workstations": [[13, "linux-workstations"]], "GPU offloading": [[13, "gpu-offloading"]], "bender": [[13, "bender"]], "groot": [[13, "groot"]], "Remote vis with Jupyter": [[13, "remote-vis-with-jupyter"]]}, "indexentries": {}}) \ No newline at end of file