From e9a777b0e89b4d4a9b1ffa2b90daadd4c0f340e4 Mon Sep 17 00:00:00 2001
From: zingale The script The script can be used to archive data to
HPSS automatically. This is submitted to the xfer queue and runs the
-script which continually looks for output and stores
-it to HPSS.Archiving Data to HPSS
-
nersc.xfer.slurm
:nersc.xfer.slurm
in job_scripts/perlmutter/
:process.xrb
:process.xrb
in job_scripts/hpss/
:
which continually looks for output and stores it to HPSS.
+By default, the destination directory on HPSS will be have the same name
+as the directory your plotfiles are located in. This can be changed by
+editing the``$HPSS_DIR`` variable at the top of process.xrb
.
The following describes how to use the scripts:
Create a directory in HPSS that has the same
-name as the directory your plotfiles are located in
-(just the directory name, not the full path). e.g. if you are running in a directory call
-/pscratch/sd/z/zingale/wdconvect/
run, then do:
Copy the process.xrb
script and the slurm script nersc.xfer.slurm
+into the directory with the plotfiles.
Submit the archive job:
hsi
-mkdir wdconvect
-
Note
-If the hsi
command prompts you for your password, you will need
-to talk to the NERSC help desk to ask for password-less access to
-HPSS.
Copy the process.xrb
script and the slurm script nersc.xfer.slurm
-into the directory with the plotfiles.
Submit the archive job:
-sbatch nersc.xfer.slurm
+sbatch nersc.xfer.slurm
The script process.xrb
is called from the xfer job and will run in
the background and continually wait until checkpoint or plotfiles are
created.
When process.xrb
is running, it creates a lockfile (called
-process.pid
) that ensures that only one instance of the script
+process.jobid
) that ensures that only one instance of the script
is running at any one time.
Warning
Sometimes if the job is not terminated normally, the
-process.pid
file will be left behind, in which case, the script
-aborts. Just delete that if you know the script is not running.
process.jobid
file will be left behind. Later jobs should be
+able to detect this and clean up the stale lockfile, but if this
+doesn’t work, you can delete the file if you know the script is not
+running.
htar
commands needed to archive your data. This uses slurm
as the job
manager.
-An example is provided by the process.xrb
archiving script and
-associated summit_hpss.submit
submission script in
-jobs_scripts/summit/
. Together these will detect new plotfiles as
-they are generated, tar them up (using htar
) and archive them onto
-HPSS. They will also store the inputs, probin, and other runtime
-generated files. If ftime
is found in your path, it will also
-create a file called ftime.out
that lists the simulation time
-corresponding to each plotfile.
An example is provided by the process.xrb
archiving script in
+job_scripts/hpss/
and associated summit_hpss.submit
submission script
+in jobs_scripts/summit/
. Together these will detect new plotfiles as they
+are generated, tar them up (using htar
) and archive them onto HPSS. They
+will also store the inputs, probin, and other runtime generated files. If
+ftime
is found in your path, it will also create a file called
+ftime.out
that lists the simulation time corresponding to each plotfile.
Once the plotfiles are archived they are moved to a subdirectory under
your run directory called plotfiles/
.
By default, the files will be archived to a directory in HPSS with the same
+name as the directory your plotfiles are located in. This can be changed
+by editing the $HPSS_DIR
variable at the top of process.xrb
.
To use this, we do the following:
Enter the HPSS system via hsi
Create the output directory – this should have the same name as the directory -you are running in on summit
Exit HPSS
Copy the process.xrb
and summit_hpss.submit
scripts into the
+directory with the plotfiles.
Launch the script via:
sbatch summit_hpss.submit
-
It will for the full time you asked, searching for plotfiles as +
It will run for the full time you asked, searching for plotfiles as they are created and moving them to HPSS as they are produced (it will always leave the very last plotfile alone, since it can’t tell if it is still being written).
diff --git a/searchindex.js b/searchindex.js index 9a4314f..23d460e 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["alcf", "iacs", "index", "nersc", "nersc-compilers", "nersc-hpss", "nersc-visualization", "nersc-workflow", "olcf", "olcf-andes", "olcf-compilers", "olcf-jupyter", "olcf-workflow", "workstations"], "filenames": ["alcf.rst", "iacs.rst", "index.rst", "nersc.rst", "nersc-compilers.rst", "nersc-hpss.rst", "nersc-visualization.rst", "nersc-workflow.rst", "olcf.rst", "olcf-andes.rst", "olcf-compilers.rst", "olcf-jupyter.rst", "olcf-workflow.rst", "workstations.rst"], "titles": ["Working at ALCF", "Working at IACS", "AMReX Astrophysics Suite", "Working at NERSC", "Compiling at NERSC", "Archiving Data to HPSS", "Visualization at NERSC", "Managing Jobs at NERSC", "Working at OLCF", "Batch Visualization on Andes", "Compiling at OLCF", "Running Jupyter Remotely from OLCF", "Managing Jobs at OLCF", "Linux Workstations"], "terms": {"polari": 0, "ha": [0, 1, 5, 7, 11], "560": 0, "node": [0, 1, 6, 7, 9, 12], "each": [0, 1, 7, 12], "4": [0, 1, 4, 7, 10, 12, 13], "nvidia": [0, 7], "a100": [0, 7], "gpu": [0, 2, 7, 9, 10, 12], "The": [0, 1, 5, 6, 7, 10, 12], "pb": 0, "schedul": [0, 12], "i": [0, 1, 5, 6, 7, 9, 10, 11, 12], "us": [0, 1, 4, 5, 7, 9, 10, 11, 12, 13], "ssh": [0, 1, 4, 13], "ornl": [0, 9, 10, 12], "gov": [0, 4, 6, 9, 10, 12], "To": [0, 1, 7, 11, 12, 13], "have": [0, 1, 7, 9, 10, 11, 12], "custom": 0, "bashrc": [0, 9], "creat": [0, 5, 6, 7, 8, 9, 12], "bash": [0, 1, 6, 7, 9, 12], "expert": 0, "file": [0, 1, 5, 7, 12], "add": [0, 1, 11, 12, 13], "anyth": 0, "thi": [0, 1, 5, 6, 7, 9, 10, 11, 12], "read": [0, 12], "end": [0, 7, 12], "etc": 0, "load": [0, 1, 4, 6, 7, 9, 10, 11, 12, 13], "modul": [0, 1, 4, 6, 7, 9, 10, 11, 12, 13], "swap": [0, 10], "prgenv": [0, 4, 7, 10, 12], "nvhpc": 0, "gnu": [0, 1, 4, 7, 10, 12, 13], "gcc": [0, 3, 10, 12, 13], "11": [0, 6, 10, 12, 13], "2": [0, 7, 9, 10, 12, 13], "0": [0, 1, 6, 7, 10, 11, 12], "version": [0, 1, 10, 12, 13], "sinc": [0, 5, 7, 10, 12], "cuda": [0, 3, 7, 10, 12, 13], "doesn": [0, 10, 12], "39": 0, "t": [0, 5, 6, 7, 9, 10, 12], "support": [0, 1, 9, 10, 12], "12": [0, 1], "yet": 0, "mix": [0, 10, 12], "Then": [0, 9, 10, 12], "you": [0, 1, 4, 5, 6, 7, 9, 10, 11, 12, 13], "can": [0, 1, 5, 6, 7, 9, 11, 12, 13], "via": [0, 1, 4, 7, 9, 10, 11, 12], "make": [0, 1, 4, 5, 9, 10, 11, 12, 13], "comp": [0, 1, 4, 10, 13], "use_cuda": [0, 4, 10, 13], "true": [0, 1, 4, 10, 12, 13], "project": [0, 1, 7, 9], "workspac": 0, "lu": 0, "grand": 0, "astroexplos": 0, "http": [0, 1, 2, 6, 10, 12, 13], "www": [0, 1], "anl": 0, "user": 0, "guid": [0, 1, 12], "run": [0, 1, 2, 5, 6, 7, 8, 9, 10, 12, 13], "index": 0, "html": [0, 10, 12], "For": [0, 7, 9, 12], "product": [0, 12], "prod": 0, "smallest": [0, 12], "count": 0, "seem": [0, 1, 10], "allow": [0, 5, 7, 12], "10": [0, 10, 12, 13], "clone": [0, 9], "gettingstart": 0, "repo": [0, 9], "git": [0, 1, 9], "github": [0, 2, 4, 9, 12], "com": [0, 1, 2, 9, 12], "argonn": 0, "lcf": 0, "ll": [0, 9], "want": [0, 5, 7, 9, 12], "exampl": [0, 7, 10, 12], "affinity_gpu": 0, "particular": [0, 2, 6, 7, 12], "need": [0, 1, 5, 6, 7, 9, 10, 11, 12, 13], "script": [0, 5, 6, 7, 8, 9, 10], "set_affinity_gpu_polari": 0, "sh": [0, 7, 12], "copi": [0, 5], "your": [0, 5, 6, 7, 9, 10, 11, 12, 13], "directori": [0, 5, 7, 9, 12], "here": [0, 6, 7, 9, 12], "": [0, 7, 11, 12], "submiss": [0, 5, 6, 7, 9, 10, 12], "bin": [0, 6, 7, 9, 12], "l": [0, 7, 13], "select": [0, 7, 12], "system": [0, 7, 10, 12], "place": [0, 5], "scatter": 0, "walltim": [0, 12], "30": [0, 12], "00": [0, 6, 7, 9, 12], "q": [0, 6, 7, 12], "debug": [0, 7, 8], "A": [0, 6, 7, 9, 12], "exec": [0, 12], "castro2d": [0, 7, 12], "mpi": [0, 1, 4, 7, 12], "smplsdc": [0, 7], "ex": [0, 1, 7, 12], "input": [0, 1, 5, 7, 12], "inputs_2d": [0, 7], "n14": [0, 7], "coars": [0, 7], "enabl": [0, 4], "applic": [0, 12], "export": [0, 1, 6, 7, 12], "mpich_gpu_support_en": 0, "1": [0, 1, 6, 7, 9, 12], "chang": [0, 7, 12], "cd": [0, 7, 9], "pbs_o_workdir": 0, "openmp": [0, 1, 7, 10, 12], "set": [0, 1, 7, 12], "nnode": [0, 12], "wc": 0, "pbs_nodefil": 0, "nranks_per_nod": 0, "ndepth": 0, "8": [0, 6, 7, 12], "nthread": 0, "ntotrank": 0, "mpiexec": [0, 1], "bind": [0, 7, 12], "rank": 0, "n": [0, 1, 6, 7, 9, 11, 12, 13], "ppn": 0, "depth": 0, "cpu": [0, 6, 7, 12], "env": [0, 9], "omp_num_thread": [0, 1, 6, 7, 12], "omp_plac": [0, 6, 7], "thread": [0, 1, 7, 12], "do": [0, 1, 5, 6, 7, 9, 11, 12, 13], "qsub": 0, "check": [0, 7, 12], "statu": [0, 7, 8], "qstat": 0, "u": [0, 7, 12], "usernam": [0, 7, 12], "from": [0, 4, 5, 7, 8, 9, 12], "last": [0, 7, 12], "checkpoint": [0, 5, 7, 12], "6": [0, 7, 12], "j": [0, 1, 4, 6, 7, 9, 10, 12, 13], "eo": 0, "function": [0, 7, 12], "find_chk_fil": [0, 7, 12], "take": [0, 1, 7, 12], "singl": [0, 7, 12], "argument": [0, 7, 12], "wildcard": [0, 7, 12], "pattern": [0, 7, 12], "look": [0, 5, 7, 12], "through": [0, 7, 12], "chk": [0, 7, 12], "find": [0, 1, 7, 10, 12], "latest": [0, 1, 7, 12], "wai": [0, 6, 7, 12], "didn": [0, 7, 12], "complet": [0, 7, 12], "we": [0, 1, 4, 7, 9, 12, 13], "fall": [0, 7, 12], "back": [0, 7, 12], "previou": [0, 7, 12], "one": [0, 5, 7, 12], "temp_fil": [0, 7, 12], "maxdepth": [0, 7, 12], "name": [0, 5, 6, 7, 12], "print": [0, 7, 12], "sort": [0, 7, 12], "tail": [0, 7, 12], "restartfil": [0, 7, 12], "f": [0, 7, 12], "header": [0, 7, 12, 13], "thing": [0, 7, 12], "written": [0, 5, 7, 12], "updat": [0, 7, 12], "fi": [0, 7, 12], "done": [0, 1, 5, 7, 12], "7": [0, 4, 7, 9, 10, 11, 12, 13], "digit": [0, 7, 12], "5": [0, 1, 7, 10, 11, 12], "restartstr": [0, 7, 12], "empti": [0, 7, 12], "ar": [0, 1, 4, 5, 7, 10, 12], "found": [0, 7, 12], "e": [0, 4, 5, 7, 9, 11, 12], "new": [0, 7, 9, 11, 12], "els": [0, 7, 12], "amr": [0, 1, 7, 12], "chainqsub": 0, "echo": [0, 7, 12], "usag": [0, 12], "jobid": [0, 7], "number": [0, 1, 7, 12], "initi": [0, 7, 12], "depend": [0, 5, 7, 12], "exit": [0, 7, 12], "3": [0, 1, 4, 6, 7, 9, 10, 11, 12, 13], "oldjob": 0, "numjob": 0, "gt": 0, "20": [0, 7], "too": [0, 12], "mani": [0, 12], "request": [0, 8], "firstcount": 0, "eq": 0, "start": [0, 1, 5, 7, 12], "aout": 0, "sleep": 0, "seq": 0, "w": [0, 12], "afterani": 0, "48": 1, "comput": [1, 7, 12], "core": [1, 6, 7, 12], "group": 1, "pool": 1, "actual": [1, 5, 7], "13th": 1, "o": [1, 6, 7, 12], "stuff": 1, "so": [1, 7, 10, 12, 13], "an": [1, 7, 9, 10, 12, 13], "ideal": 1, "config": [1, 11], "would": [1, 7, 9, 12], "log": [1, 4, 10, 12], "login": 1, "stonybrook": 1, "edu": [1, 13], "tell": [1, 7, 12], "about": [1, 2, 12], "machin": [1, 2, 8], "put": [1, 12], "follow": [1, 5, 6, 7, 9, 12], "local": [1, 13], "tool": [1, 5], "gnumak": 1, "raw": 1, "githubusercont": 1, "astro": [1, 10, 12, 13], "workflow": [1, 12], "main": [1, 5, 12], "job_script": [1, 12], "onli": [1, 5, 7, 12], "access": [1, 5, 7, 10, 12], "environ": [1, 6, 8, 9, 10, 12], "note": [1, 5, 12], "srun": [1, 6, 7, 9, 12], "p": [1, 9, 12], "short": 1, "pty": 1, "interact": [1, 12], "slurm": [1, 5, 7, 9, 12], "job": [1, 3, 5, 8], "time": [1, 5, 7, 12], "out": [1, 5, 6, 7, 9, 12], "after": [1, 7, 12], "hour": [1, 12], "infinit": 1, "fj": 1, "debug1": 1, "debug2": 1, "them": [1, 5, 12], "There": [1, 7, 12, 13], "cce": 1, "sve": 1, "former": 1, "newer": 1, "llvm": 1, "base": [1, 7], "ocompil": 1, "fortran": 1, "doe": [1, 9, 10], "arm": 1, "architectur": [1, 8], "latter": 1, "older": [1, 13], "even": 1, "though": 1, "both": 1, "form": [1, 12], "x": [1, 12, 13], "thei": [1, 11, 12], "differ": [1, 7], "option": [1, 7, 10, 12], "see": [1, 7, 10, 12], "commcm": 1, "faq": 1, "get": [1, 12], "php": 1, "cpe": 1, "mvapich2_nogpu": 1, "should": [1, 4, 7, 10, 12], "test": [1, 4, 7, 10, 12], "mak": 1, "recogn": 1, "switch": 1, "old": 1, "flag": [1, 12], "build": [1, 2, 4, 10, 12, 13], "24": 1, "use_mpi": [1, 4, 10, 12, 13], "fals": [1, 4, 10, 12, 13], "long": [1, 5], "At": [1, 11], "moment": [1, 10], "link": 1, "cannot": 1, "nopattern": 1, "error": [1, 12], "which": [1, 5, 7, 9, 10, 11, 12], "why": 1, "comment": 1, "abov": [1, 12], "lustr": [1, 12], "global": 1, "softwar": 1, "a64fx": 1, "modulefil": 1, "mvapich2": 1, "use_omp": [1, 4, 10], "know": [1, 5], "chip": 1, "specif": [1, 9], "mv2_enable_affin": 1, "castro3d": [1, 12], "omp": [1, 7], "3d": [1, 12], "sph": [1, 12], "max_level": 1, "max_step": 1, "These": 2, "doc": [2, 6, 10, 12], "provid": [2, 12], "inform": [2, 12], "code": [2, 9, 10, 12, 13], "nyx": 2, "maestroex": 2, "castro": [2, 4, 7, 10, 12], "variou": 2, "includ": [2, 7, 12], "hpc": [2, 12], "center": 2, "workstat": 2, "gener": [2, 12, 13], "work": [2, 9, 10, 12, 13], "alcf": 2, "nersc": [2, 5], "olcf": [2, 9], "iac": 2, "linux": 2, "compil": [3, 7, 8, 12, 13], "perlmutt": 3, "hypr": 3, "manag": [3, 8], "filesystem": [3, 5, 12], "chain": [3, 8], "archiv": [3, 8], "data": [3, 12], "hpss": [3, 8], "visual": [3, 8, 13], "p1": 4, "cudatoolkit": [4, 7], "python": [4, 6, 9, 10, 11, 12], "requir": [4, 11, 12], "process": [4, 5, 7, 12], "g": [4, 5, 11, 12], "sedov": 4, "hydro": 4, "problem": [4, 7, 12], "tiny_profil": 4, "obtain": [4, 7], "built": 4, "same": [4, 5, 7, 10, 12], "hypre_cuda_sm": 4, "80": [4, 12], "cxx": 4, "cc": 4, "fc": 4, "ftn": 4, "configur": 4, "prefix": 4, "path": [4, 5, 12, 13], "instal": [4, 6, 9, 11, 12], "unifi": 4, "memori": [4, 7, 9, 10, 12], "larg": [5, 9], "tape": 5, "librari": [5, 7, 10, 11], "store": [5, 12], "simul": [5, 12], "period": 5, "It": [5, 9, 12], "recommend": [5, 10], "move": [5, 12], "frequent": 5, "scratch": [5, 7, 12], "fill": 5, "up": [5, 7, 12], "purg": 5, "xfer": 5, "automat": [5, 12], "submit": [5, 7, 8, 9], "queue": [5, 7, 12], "xrb": [5, 9, 12], "continu": [5, 13], "output": [5, 12, 13], "describ": [5, 11, 12], "how": [5, 12], "plotfil": [5, 9, 12], "locat": [5, 7], "just": [5, 12, 13], "full": [5, 12], "call": [5, 9, 12], "pscratch": 5, "sd": 5, "z": 5, "zingal": 5, "wdconvect": 5, "hsi": [5, 12], "mkdir": 5, "If": [5, 7, 9, 11, 12], "command": [5, 12, 13], "prompt": [5, 13], "password": [5, 13], "talk": 5, "help": [5, 12], "desk": 5, "ask": [5, 12], "less": 5, "sbatch": [5, 6, 7, 9, 12], "background": [5, 7, 12], "wait": [5, 7, 12], "until": [5, 7, 12], "alwai": [5, 12], "leav": [5, 12], "most": [5, 7], "recent": [5, 6, 12], "alon": [5, 12], "mai": [5, 7, 12], "still": [5, 12], "htar": [5, 12], "wa": [5, 12], "success": 5, "subdirectori": [5, 12], "import": [5, 10, 12], "don": 5, "try": [5, 12], "second": [5, 7, 12], "overwrit": 5, "especi": 5, "took": 5, "some": [5, 12], "addit": [5, 12], "ftime": [5, 12], "execut": [5, 7, 12], "cpp": 5, "live": 5, "amrex": [5, 10, 12], "list": [5, 12], "correspond": [5, 12], "right": 5, "when": [5, 9, 10, 11, 12], "tar": [5, 12], "all": [5, 7, 12], "diagnost": 5, "given": 5, "contain": [5, 7, 12], "date": 5, "string": 5, "multipl": [5, 7], "co": 5, "exist": [5, 7, 9, 12], "lockfil": 5, "pid": [5, 7], "ensur": [5, 12], "instanc": [5, 7], "ani": [5, 7, 12], "sometim": [5, 12], "termin": [5, 13], "normal": 5, "left": [5, 7, 12], "behind": 5, "case": [5, 7, 12], "abort": [5, 12], "delet": 5, "quickli": 5, "best": [5, 6, 7, 9, 10, 11, 12], "approach": 5, "sampl": [5, 6, 12], "produc": [5, 12], "yt": [6, 9, 11], "setup": [6, 9, 10, 12], "own": 6, "conda": [6, 8, 9], "step": [6, 7, 12], "develop": 6, "languag": 6, "someth": 6, "like": [6, 7, 12], "init": [6, 9], "myenv": 6, "activ": [6, 9], "more": [6, 7, 9, 12], "c": [6, 7, 9, 10, 11, 12], "forg": [6, 9, 11], "deactiv": 6, "m3018": [6, 7], "vi": 6, "vis_": 6, "01": 6, "ntask": [6, 7, 12], "per": [6, 7, 12], "regular": [6, 7], "omp_proc_bind": [6, 7], "spread": [6, 7], "massive_star_multi": 6, "py": [6, 7, 9, 12], "plt19862": 6, "1536": 7, "therefor": [7, 12], "task": [7, 12], "otherwis": [7, 10], "fail": 7, "runtim": [7, 10, 12], "becaus": [7, 12], "below": [7, 12], "16": 7, "also": [7, 10, 11, 12], "restart": [7, 12], "logic": 7, "m3018_g": 7, "subch_": 7, "map_gpu": 7, "signal": [7, 12], "b": [7, 12], "urg": [7, 12], "castro_exec": 7, "clean": [7, 12], "over": [7, 12], "rm": [7, 12], "dump_and_stop": [7, 12], "send": [7, 12], "sigurg": [7, 12], "batch": [7, 8, 12], "minut": [7, 12], "befor": [7, 12], "limit": [7, 12], "gracefulli": [7, 12], "sig_handl": [7, 12], "touch": [7, 12], "disabl": [7, 12], "handler": [7, 12], "trap": [7, 12], "alloc": [7, 8], "soon": [7, 12], "dump": [7, 12], "stop": [7, 12], "workdir": 7, "basenam": 7, "slurm_submit_dir": [7, 9], "slack_job_start": 7, "michael": 7, "builtin": [7, 12], "shell": [7, 12], "handl": [7, 12], "64": 7, "ret": 7, "128": [7, 12], "23": 7, "receiv": [7, 12], "keep": [7, 12], "refer": 7, "distribut": [7, 11], "parallel": 7, "hyper": 7, "share": [7, 10], "socket": [7, 12], "256": [7, 12], "howev": [7, 12], "assign": 7, "physic": [7, 12], "detail": [7, 8], "instruct": 7, "within": 7, "perlmutter_script": 7, "account": 7, "qo": 7, "02": 7, "constraint": 7, "In": [7, 10, 11, 12, 13], "order": [7, 10, 11, 12], "coupl": 7, "design": [7, 12], "strategi": 7, "first": [7, 9, 11, 12], "fix": [7, 12], "next": [7, 12], "virtual": 7, "avail": [7, 12], "compos": [7, 12], "two": [7, 12], "where": [7, 12], "numa": 7, "domain": [7, 12], "lower": 7, "shortag": 7, "principl": 7, "squeez": 7, "resourc": [7, 12], "wall": 7, "clock": 7, "timestep": [7, 12], "grep": 7, "slurm_output": 7, "repeat": 7, "perfect": 7, "balanc": [7, 12], "reach": 7, "choic": 7, "compar": 7, "max_grid_s": 7, "optim": 7, "valu": [7, 12], "usual": [7, 10, 12], "half": 7, "level": [7, 9, 12], "half_siz": 7, "furthermor": 7, "sever": [7, 12], "blocking_factor": 7, "size": [7, 12], "final": [7, 9, 12], "increas": [7, 12], "scale": 7, "correctli": 7, "go": 7, "down": [7, 10], "factor": 7, "break": 7, "bigger": 7, "chainslurm": [7, 12], "mpich_max_thread_safeti": 7, "x86": [7, 12], "milan": 7, "inputs_fil": 7, "inputs_nova_t7": 7, "slurm_ntasks_per_nod": 7, "slurm_nnod": 7, "slurm_cpus_per_task": 7, "By": [7, 12], "default": [7, 10, 12, 13], "altern": [7, 12], "common": 7, "cf": 7, "everyon": 7, "squeue": [7, 12], "me": [7, 12], "estim": [7, 12], "cancel": 7, "scancel": 7, "tb": 7, "quota": 7, "showquota": 7, "finish": [7, 12], "origin": 7, "remain": [7, 12], "view": [7, 12], "id": [7, 12], "summit": [8, 11], "frontier": 8, "writ": 8, "monitor": 8, "templat": 8, "troubleshoot": 8, "jupyt": 8, "remot": 8, "Andes": 8, "andes": 9, "andes_env": 9, "anaconda": 9, "anaconda3": [9, 11], "modifi": [9, 12], "ad": 9, "y": [9, 11], "ipykernel": [9, 11], "nb_conda_kernel": [9, 11], "sourc": 9, "top": 9, "pip": 9, "uninstal": 9, "ast106": [9, 12], "plot": 9, "vol": 9, "enuc": 9, "flame_wave_1000hz_25cm_smallplt203204": 9, "veri": [9, 12], "might": 9, "solut": 9, "accomplish": 9, "xl": 10, "atleast": 10, "due": 10, "17": 10, "won": [10, 12], "present": 10, "warn": [10, 12], "packag": 10, "fine": 10, "sure": [10, 11, 12], "current": [10, 12], "disallow": 10, "line": [10, 12], "pair": 10, "offload": 10, "control": 10, "use_omp_offload": 10, "featur": [10, 12], "frontier_user_guid": [10, 12], "program": 10, "crayp": [10, 12], "accel": [10, 12], "amd": [10, 12], "gfx90a": [10, 12], "crai": [10, 12], "mpich": [10, 12], "rocm": [10, 12], "higher": 10, "issu": 10, "burner": 10, "tabul": 10, "rate": 10, "exhibit": 10, "strang": 10, "slow": 10, "without": [10, 12, 13], "use_hip": [10, 12], "jupyterhub": 11, "document": [11, 12], "extra": 11, "part": 11, "notebook": 11, "wish": 11, "correct": [11, 12], "point": [11, 12, 13], "good": [11, 12], "idea": [11, 12], "my_env": 11, "jupyterlab": 11, "subsequ": 11, "channel": 11, "search": [11, 12], "let": 12, "review": 12, "our": 12, "goal": 12, "necessari": 12, "insight": 12, "better": 12, "decis": 12, "construct": 12, "explain": 12, "expos": 12, "section": 12, "condens": 12, "replac": 12, "21": 12, "reserv": 12, "ram": 12, "bank": 12, "connect": [12, 13], "bu": 12, "commun": 12, "among": 12, "defin": 12, "whole": 12, "structur": 12, "depict": 12, "figur": 12, "extract": 12, "summit_user_guid": 12, "launcher": 12, "jsrun": 12, "minim": 12, "collect": 12, "certain": 12, "oper": 12, "extend": 12, "discuss": 12, "now": 12, "determin": 12, "maximum": 12, "fit": 12, "accord": 12, "summon": 12, "bsub": 12, "descript": 12, "perform": 12, "calcul": 12, "format": 12, "room": 12, "03": 12, "three": 12, "alloc_flag": 12, "smt4": 12, "consid": 12, "smt1": 12, "stand": 12, "interest": 12, "standard": 12, "stream": 12, "insid": 12, "similar": 12, "suppli": 12, "assum": 12, "between": 12, "small": 12, "smoothli": 12, "bug": [12, 13], "unix": 12, "mention": 12, "stdout_to_show": 12, "stderr_to_show": 12, "No": 12, "onc": 12, "grant": 12, "variabl": 12, "total": 12, "r": 12, "max": 12, "a1": 12, "c1": 12, "g1": 12, "r6": 12, "placehold": 12, "respect": 12, "match": 12, "box": 12, "grid": 12, "biggest": 12, "piec": 12, "32768": 12, "cell": 12, "100": 12, "131072": 12, "524288": 12, "32": 12, "2097152": 12, "7864320": 12, "93": 12, "75": 12, "480": 12, "30408704": 12, "90": 12, "625": 12, "assert": 12, "equival": 12, "impli": 12, "398": 12, "idl": 12, "sweep": 12, "entir": 12, "possibl": 12, "maxim": 12, "life": 12, "easier": 12, "instead": 12, "write": 12, "anoth": 12, "pack": 12, "statement": 12, "luna_script": 12, "luna_output": 12, "luna_sniffing_output": 12, "inputs_luna": 12, "n_re": 12, "n_cpu_cores_per_r": 12, "n_max_res_per_nod": 12, "n_mpi_per_r": 12, "n_gpu_per_r": 12, "downgrad": 12, "kill": 12, "As": 12, "week": 12, "month": 12, "mayb": 12, "year": 12, "come": 12, "salvat": 12, "mandatori": 12, "chkxxxxxxx": 12, "chkxxxxxx": 12, "chkxxxxx": 12, "implement": 12, "append": 12, "minimum": 12, "pick": 12, "amount": 12, "expir": 12, "pass": 12, "wt": 12, "cleanli": 12, "couldn": 12, "anywher": 12, "ignor": 12, "immedi": 12, "crash": 12, "upon": 12, "sigchld": 12, "sigwinch": 12, "least": 12, "trigger": 12, "other": 12, "event": 12, "launch": [12, 13], "must": 12, "jswait": 12, "save": 12, "bjob": 12, "slightli": 12, "nicer": 12, "jobstat": 12, "lead": 12, "enviro": 12, "simultan": 12, "n_res_1": 12, "n_res2": 12, "give": 12, "avoid": 12, "head": 12, "quiet": 12, "level_": 12, "dev": 12, "null": 12, "warning_tim": 12, "nohead": 12, "action_warning_tim": 12, "lsb_jobid": 12, "chain_submit": 12, "submit_script": 12, "return": 12, "transfer": 12, "15": 12, "cluster": 12, "dtn": 12, "test_hpss": 12, "associ": 12, "summit_hpss": 12, "jobs_script": 12, "togeth": 12, "detect": 12, "onto": 12, "probin": 12, "under": 12, "enter": [12, 13], "being": 12, "unarchiv": 12, "bulk": 12, "hpss_xfer": 12, "plt00000": 12, "hpss_dir": 12, "plotfile_dir": 12, "fetch": 12, "unpack": 12, "attempt": 12, "recov": 12, "titan": 12, "polici": 12, "orion": 12, "storag": 12, "05": 12, "closest": 12, "hip": 12, "trento": 12, "nmpi_per_nod": 12, "total_nmpi": 12, "slurm_job_num_nod": 12, "june": 12, "2023": 12, "explicitli": 12, "blob": 12, "warpx": 12, "readthedoc": 12, "io": 12, "en": 12, "queu": 12, "rocgdb": 12, "27": 12, "turn": 12, "startup": 12, "session": 12, "salloc": 12, "mz": 12, "restor": 12, "reload": 12, "hip_enable_deferred_load": 12, "amd_serialize_kernel": 12, "amd_serialize_copi": 12, "amd_log_level": 12, "lot": 12, "debugg": 12, "pagin": 12, "off": 12, "trace": 12, "interrupt": 12, "bt": 12, "workaround": 12, "prevent": 12, "hang": 12, "fi_mr_cache_monitor": 12, "memhook": 12, "report": 12, "arena": 12, "big": 12, "the_arena_init_s": 12, "grow": 12, "suggest": 12, "larger": 12, "than": 12, "well": 13, "nvcc": 13, "cuda_vers": 13, "cc60": 13, "compile_cuda_path": 13, "usr": 13, "no_device_launch": 13, "around": 13, "cc70": 13, "On": 13, "lab": 13, "browser": 13, "ip": 13, "localhost": 13, "8888": 13, "sunysb": 13, "window": 13, "web": 13, "token": 13, "appear": 13}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"work": [0, 1, 3, 8], "alcf": 0, "log": 0, "In": 0, "compil": [0, 1, 4, 10], "disk": 0, "queue": 0, "submit": [0, 12], "automat": 0, "restart": 0, "job": [0, 7, 12], "chain": [0, 7, 12], "iac": 1, "ookami": 1, "amrex": [1, 2], "setup": 1, "crai": 1, "gcc": [1, 4], "10": 1, "2": 1, "astrophys": 2, "suit": 2, "astro": 2, "basic": 2, "nersc": [3, 4, 6, 7], "perlmutt": [4, 7], "cuda": 4, "hypr": 4, "archiv": [5, 12], "data": 5, "hpss": [5, 12], "visual": [6, 9], "manag": [7, 12], "filesystem": 7, "olcf": [8, 10, 11, 12], "batch": 9, "Andes": 9, "summit": [10, 12], "frontier": [10, 12], "run": 11, "jupyt": [11, 13], "remot": [11, 13], "from": 11, "creat": 11, "conda": 11, "environ": 11, "architectur": 12, "request": 12, "alloc": 12, "writ": 12, "script": 12, "monitor": 12, "templat": 12, "machin": 12, "detail": 12, "statu": 12, "debug": 12, "troubleshoot": 12, "linux": 13, "workstat": 13, "gpu": 13, "offload": 13, "bender": 13, "groot": 13, "vi": 13}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx": 60}, "alltitles": {"Working at ALCF": [[0, "working-at-alcf"]], "Logging In": [[0, "logging-in"]], "Compiling": [[0, "compiling"]], "Disks": [[0, "disks"]], "Queues": [[0, "queues"]], "Submitting": [[0, "submitting"]], "Automatic Restarting": [[0, "automatic-restarting"]], "Job Chaining": [[0, "job-chaining"], [12, "job-chaining"]], "Working at IACS": [[1, "working-at-iacs"]], "Ookami": [[1, "ookami"]], "AMReX setup": [[1, "amrex-setup"]], "Cray compilers": [[1, "cray-compilers"]], "GCC": [[1, "gcc"]], "GCC 10.2": [[1, "gcc-10-2"]], "AMReX Astrophysics Suite": [[2, "amrex-astrophysics-suite"]], "AMReX Astro basics": [[2, null]], "Working at NERSC": [[3, "working-at-nersc"]], "Compiling at NERSC": [[4, "compiling-at-nersc"]], "Perlmutter": [[4, "perlmutter"], [7, "perlmutter"]], "Compiling with GCC + CUDA": [[4, "compiling-with-gcc-cuda"]], "Hypre": [[4, "hypre"]], "Archiving Data to HPSS": [[5, "archiving-data-to-hpss"]], "Visualization at NERSC": [[6, "visualization-at-nersc"]], "Managing Jobs at NERSC": [[7, "managing-jobs-at-nersc"]], "Filesystems": [[7, "filesystems"]], "Chaining": [[7, "chaining"]], "Working at OLCF": [[8, "working-at-olcf"]], "Batch Visualization on Andes": [[9, "batch-visualization-on-andes"]], "Compiling at OLCF": [[10, "compiling-at-olcf"]], "Summit": [[10, "summit"], [12, "summit"]], "Frontier": [[10, "frontier"], [12, "frontier"]], "Running Jupyter Remotely from OLCF": [[11, "running-jupyter-remotely-from-olcf"]], "Creating a conda environment": [[11, "creating-a-conda-environment"]], "Managing Jobs at OLCF": [[12, "managing-jobs-at-olcf"]], "Summit Architecture:": [[12, "summit-architecture"]], "Requesting Allocation:": [[12, "requesting-allocation"]], "Submitting a Job:": [[12, "submitting-a-job"]], "Writting a Job Script:": [[12, "writting-a-job-script"]], "Monitoring a Job:": [[12, "monitoring-a-job"]], "Script Template:": [[12, "script-template"]], "Chaining jobs": [[12, "chaining-jobs"]], "Archiving to HPSS": [[12, "archiving-to-hpss"]], "Machine details": [[12, "machine-details"]], "Submitting jobs": [[12, "submitting-jobs"]], "Job Status": [[12, "job-status"]], "Debugging": [[12, "debugging"]], "Troubleshooting": [[12, "troubleshooting"]], "Linux Workstations": [[13, "linux-workstations"]], "GPU offloading": [[13, "gpu-offloading"]], "bender": [[13, "bender"]], "groot": [[13, "groot"]], "Remote vis with Jupyter": [[13, "remote-vis-with-jupyter"]]}, "indexentries": {}}) \ No newline at end of file +Search.setIndex({"docnames": ["alcf", "iacs", "index", "nersc", "nersc-compilers", "nersc-hpss", "nersc-visualization", "nersc-workflow", "olcf", "olcf-andes", "olcf-compilers", "olcf-jupyter", "olcf-workflow", "workstations"], "filenames": ["alcf.rst", "iacs.rst", "index.rst", "nersc.rst", "nersc-compilers.rst", "nersc-hpss.rst", "nersc-visualization.rst", "nersc-workflow.rst", "olcf.rst", "olcf-andes.rst", "olcf-compilers.rst", "olcf-jupyter.rst", "olcf-workflow.rst", "workstations.rst"], "titles": ["Working at ALCF", "Working at IACS", "AMReX Astrophysics Suite", "Working at NERSC", "Compiling at NERSC", "Archiving Data to HPSS", "Visualization at NERSC", "Managing Jobs at NERSC", "Working at OLCF", "Batch Visualization on Andes", "Compiling at OLCF", "Running Jupyter Remotely from OLCF", "Managing Jobs at OLCF", "Linux Workstations"], "terms": {"polari": 0, "ha": [0, 1, 7, 11], "560": 0, "node": [0, 1, 6, 7, 9, 12], "each": [0, 1, 7, 12], "4": [0, 1, 4, 7, 10, 12, 13], "nvidia": [0, 7], "a100": [0, 7], "gpu": [0, 2, 7, 9, 10, 12], "The": [0, 1, 5, 6, 7, 10, 12], "pb": 0, "schedul": [0, 12], "i": [0, 1, 5, 6, 7, 9, 10, 11, 12], "us": [0, 1, 4, 5, 7, 9, 10, 11, 12, 13], "ssh": [0, 1, 4, 13], "ornl": [0, 9, 10, 12], "gov": [0, 4, 6, 9, 10, 12], "To": [0, 1, 7, 11, 12, 13], "have": [0, 1, 5, 7, 9, 10, 11, 12], "custom": 0, "bashrc": [0, 9], "creat": [0, 5, 6, 7, 8, 9, 12], "bash": [0, 1, 6, 7, 9, 12], "expert": 0, "file": [0, 1, 5, 7, 12], "add": [0, 1, 11, 12, 13], "anyth": 0, "thi": [0, 1, 5, 6, 7, 9, 10, 11, 12], "read": [0, 12], "end": [0, 7, 12], "etc": 0, "load": [0, 1, 4, 6, 7, 9, 10, 11, 12, 13], "modul": [0, 1, 4, 6, 7, 9, 10, 11, 12, 13], "swap": [0, 10], "prgenv": [0, 4, 7, 10, 12], "nvhpc": 0, "gnu": [0, 1, 4, 7, 10, 12, 13], "gcc": [0, 3, 10, 12, 13], "11": [0, 6, 10, 12, 13], "2": [0, 7, 9, 10, 12, 13], "0": [0, 1, 6, 7, 10, 11, 12], "version": [0, 1, 10, 12, 13], "sinc": [0, 5, 7, 10, 12], "cuda": [0, 3, 7, 10, 12, 13], "doesn": [0, 5, 10, 12], "39": 0, "t": [0, 5, 6, 7, 9, 10, 12], "support": [0, 1, 9, 10, 12], "12": [0, 1], "yet": 0, "mix": [0, 10, 12], "Then": [0, 9, 10, 12], "you": [0, 1, 4, 5, 6, 7, 9, 10, 11, 12, 13], "can": [0, 1, 5, 6, 7, 9, 11, 12, 13], "via": [0, 1, 4, 7, 9, 10, 11, 12], "make": [0, 1, 4, 5, 9, 10, 11, 12, 13], "comp": [0, 1, 4, 10, 13], "use_cuda": [0, 4, 10, 13], "true": [0, 1, 4, 10, 12, 13], "project": [0, 1, 7, 9], "workspac": 0, "lu": 0, "grand": 0, "astroexplos": 0, "http": [0, 1, 2, 6, 10, 12, 13], "www": [0, 1], "anl": 0, "user": 0, "guid": [0, 1, 12], "run": [0, 1, 2, 5, 6, 7, 8, 9, 10, 12, 13], "index": 0, "html": [0, 10, 12], "For": [0, 7, 9, 12], "product": [0, 12], "prod": 0, "smallest": [0, 12], "count": 0, "seem": [0, 1, 10], "allow": [0, 5, 7, 12], "10": [0, 10, 12, 13], "clone": [0, 9], "gettingstart": 0, "repo": [0, 9], "git": [0, 1, 9], "github": [0, 2, 4, 9, 12], "com": [0, 1, 2, 9, 12], "argonn": 0, "lcf": 0, "ll": [0, 9], "want": [0, 5, 7, 9, 12], "exampl": [0, 7, 10, 12], "affinity_gpu": 0, "particular": [0, 2, 6, 7, 12], "need": [0, 1, 6, 7, 9, 10, 11, 12, 13], "script": [0, 5, 6, 7, 8, 9, 10], "set_affinity_gpu_polari": 0, "sh": [0, 7, 12], "copi": [0, 5, 12], "your": [0, 5, 6, 7, 9, 10, 11, 12, 13], "directori": [0, 5, 7, 9, 12], "here": [0, 6, 7, 9, 12], "": [0, 7, 11, 12], "submiss": [0, 5, 6, 7, 9, 10, 12], "bin": [0, 6, 7, 9, 12], "l": [0, 7, 13], "select": [0, 7, 12], "system": [0, 7, 10, 12], "place": [0, 5], "scatter": 0, "walltim": [0, 12], "30": [0, 12], "00": [0, 6, 7, 9, 12], "q": [0, 6, 7, 12], "debug": [0, 7, 8], "A": [0, 6, 7, 9, 12], "exec": [0, 12], "castro2d": [0, 7, 12], "mpi": [0, 1, 4, 7, 12], "smplsdc": [0, 7], "ex": [0, 1, 7, 12], "input": [0, 1, 5, 7, 12], "inputs_2d": [0, 7], "n14": [0, 7], "coars": [0, 7], "enabl": [0, 4], "applic": [0, 12], "export": [0, 1, 6, 7, 12], "mpich_gpu_support_en": 0, "1": [0, 1, 6, 7, 9, 12], "chang": [0, 5, 7, 12], "cd": [0, 7, 9], "pbs_o_workdir": 0, "openmp": [0, 1, 7, 10, 12], "set": [0, 1, 7, 12], "nnode": [0, 12], "wc": 0, "pbs_nodefil": 0, "nranks_per_nod": 0, "ndepth": 0, "8": [0, 6, 7, 12], "nthread": 0, "ntotrank": 0, "mpiexec": [0, 1], "bind": [0, 7, 12], "rank": 0, "n": [0, 1, 6, 7, 9, 11, 12, 13], "ppn": 0, "depth": 0, "cpu": [0, 6, 7, 12], "env": [0, 9], "omp_num_thread": [0, 1, 6, 7, 12], "omp_plac": [0, 6, 7], "thread": [0, 1, 7, 12], "do": [0, 1, 6, 7, 9, 11, 12, 13], "qsub": 0, "check": [0, 7, 12], "statu": [0, 7, 8], "qstat": 0, "u": [0, 7, 12], "usernam": [0, 7, 12], "from": [0, 4, 5, 7, 8, 9, 12], "last": [0, 7, 12], "checkpoint": [0, 5, 7, 12], "6": [0, 7, 12], "j": [0, 1, 4, 6, 7, 9, 10, 12, 13], "eo": 0, "function": [0, 7, 12], "find_chk_fil": [0, 7, 12], "take": [0, 1, 7, 12], "singl": [0, 7, 12], "argument": [0, 7, 12], "wildcard": [0, 7, 12], "pattern": [0, 7, 12], "look": [0, 5, 7, 12], "through": [0, 7, 12], "chk": [0, 7, 12], "find": [0, 1, 7, 10, 12], "latest": [0, 1, 7, 12], "wai": [0, 6, 7, 12], "didn": [0, 7, 12], "complet": [0, 7, 12], "we": [0, 1, 4, 7, 9, 12, 13], "fall": [0, 7, 12], "back": [0, 7, 12], "previou": [0, 7, 12], "one": [0, 5, 7, 12], "temp_fil": [0, 7, 12], "maxdepth": [0, 7, 12], "name": [0, 5, 6, 7, 12], "print": [0, 7, 12], "sort": [0, 7, 12], "tail": [0, 7, 12], "restartfil": [0, 7, 12], "f": [0, 7, 12], "header": [0, 7, 12, 13], "thing": [0, 7, 12], "written": [0, 5, 7, 12], "updat": [0, 7, 12], "fi": [0, 7, 12], "done": [0, 1, 5, 7, 12], "7": [0, 4, 7, 9, 10, 11, 12, 13], "digit": [0, 7, 12], "5": [0, 1, 7, 10, 11, 12], "restartstr": [0, 7, 12], "empti": [0, 7, 12], "ar": [0, 1, 4, 5, 7, 10, 12], "found": [0, 7, 12], "e": [0, 4, 7, 9, 11, 12], "new": [0, 7, 9, 11, 12], "els": [0, 7, 12], "amr": [0, 1, 7, 12], "chainqsub": 0, "echo": [0, 7, 12], "usag": [0, 12], "jobid": [0, 5, 7], "number": [0, 1, 7, 12], "initi": [0, 7, 12], "depend": [0, 5, 7, 12], "exit": [0, 7, 12], "3": [0, 1, 4, 6, 7, 9, 10, 11, 12, 13], "oldjob": 0, "numjob": 0, "gt": 0, "20": [0, 7], "too": [0, 12], "mani": [0, 12], "request": [0, 8], "firstcount": 0, "eq": 0, "start": [0, 1, 5, 7, 12], "aout": 0, "sleep": 0, "seq": 0, "w": [0, 12], "afterani": 0, "48": 1, "comput": [1, 7, 12], "core": [1, 6, 7, 12], "group": 1, "pool": 1, "actual": [1, 5, 7], "13th": 1, "o": [1, 6, 7, 12], "stuff": 1, "so": [1, 7, 10, 12, 13], "an": [1, 7, 9, 10, 12, 13], "ideal": 1, "config": [1, 11], "would": [1, 7, 9, 12], "log": [1, 4, 10, 12], "login": 1, "stonybrook": 1, "edu": [1, 13], "tell": [1, 7, 12], "about": [1, 2, 12], "machin": [1, 2, 8], "put": [1, 12], "follow": [1, 5, 6, 7, 9, 12], "local": [1, 13], "tool": [1, 5], "gnumak": 1, "raw": 1, "githubusercont": 1, "astro": [1, 10, 12, 13], "workflow": [1, 12], "main": [1, 5, 12], "job_script": [1, 5, 12], "onli": [1, 5, 7, 12], "access": [1, 7, 10, 12], "environ": [1, 6, 8, 9, 10, 12], "note": [1, 5, 12], "srun": [1, 6, 7, 9, 12], "p": [1, 9, 12], "short": 1, "pty": 1, "interact": [1, 12], "slurm": [1, 5, 7, 9, 12], "job": [1, 3, 5, 8], "time": [1, 5, 7, 12], "out": [1, 5, 6, 7, 9, 12], "after": [1, 7, 12], "hour": [1, 12], "infinit": 1, "fj": 1, "debug1": 1, "debug2": 1, "them": [1, 5, 12], "There": [1, 7, 12, 13], "cce": 1, "sve": 1, "former": 1, "newer": 1, "llvm": 1, "base": [1, 7], "ocompil": 1, "fortran": 1, "doe": [1, 9, 10], "arm": 1, "architectur": [1, 8], "latter": 1, "older": [1, 13], "even": 1, "though": 1, "both": 1, "form": [1, 12], "x": [1, 12, 13], "thei": [1, 11, 12], "differ": [1, 7], "option": [1, 7, 10, 12], "see": [1, 7, 10, 12], "commcm": 1, "faq": 1, "get": [1, 12], "php": 1, "cpe": 1, "mvapich2_nogpu": 1, "should": [1, 4, 5, 7, 10, 12], "test": [1, 4, 7, 10, 12], "mak": 1, "recogn": 1, "switch": 1, "old": 1, "flag": [1, 12], "build": [1, 2, 4, 10, 12, 13], "24": 1, "use_mpi": [1, 4, 10, 12, 13], "fals": [1, 4, 10, 12, 13], "long": [1, 5], "At": [1, 11], "moment": [1, 10], "link": 1, "cannot": 1, "nopattern": 1, "error": [1, 12], "which": [1, 5, 7, 9, 10, 11, 12], "why": 1, "comment": 1, "abov": [1, 12], "lustr": [1, 12], "global": 1, "softwar": 1, "a64fx": 1, "modulefil": 1, "mvapich2": 1, "use_omp": [1, 4, 10], "know": [1, 5], "chip": 1, "specif": [1, 9], "mv2_enable_affin": 1, "castro3d": [1, 12], "omp": [1, 7], "3d": [1, 12], "sph": [1, 12], "max_level": 1, "max_step": 1, "These": 2, "doc": [2, 6, 10, 12], "provid": [2, 12], "inform": [2, 12], "code": [2, 9, 10, 12, 13], "nyx": 2, "maestroex": 2, "castro": [2, 4, 7, 10, 12], "variou": 2, "includ": [2, 7, 12], "hpc": [2, 12], "center": 2, "workstat": 2, "gener": [2, 12, 13], "work": [2, 5, 9, 10, 12, 13], "alcf": 2, "nersc": [2, 5], "olcf": [2, 9], "iac": 2, "linux": 2, "compil": [3, 7, 8, 12, 13], "perlmutt": [3, 5], "hypr": 3, "manag": [3, 8], "filesystem": [3, 5, 12], "chain": [3, 8], "archiv": [3, 8], "data": [3, 12], "hpss": [3, 8], "visual": [3, 8, 13], "p1": 4, "cudatoolkit": [4, 7], "python": [4, 6, 9, 10, 11, 12], "requir": [4, 11, 12], "process": [4, 5, 7, 12], "g": [4, 11, 12], "sedov": 4, "hydro": 4, "problem": [4, 7, 12], "tiny_profil": 4, "obtain": [4, 7], "built": 4, "same": [4, 5, 7, 10, 12], "hypre_cuda_sm": 4, "80": [4, 12], "cxx": 4, "cc": 4, "fc": 4, "ftn": 4, "configur": 4, "prefix": 4, "path": [4, 5, 12, 13], "instal": [4, 6, 9, 11, 12], "unifi": 4, "memori": [4, 7, 9, 10, 12], "larg": [5, 9], "tape": 5, "librari": [5, 7, 10, 11], "store": [5, 12], "simul": [5, 12], "period": 5, "It": [5, 9, 12], "recommend": [5, 10], "move": [5, 12], "frequent": 5, "scratch": [5, 7, 12], "fill": 5, "up": [5, 7, 12], "purg": 5, "xfer": 5, "automat": [5, 12], "submit": [5, 7, 8, 9], "queue": [5, 7, 12], "xrb": [5, 9, 12], "continu": [5, 13], "output": [5, 12, 13], "By": [5, 7, 12], "default": [5, 7, 10, 12, 13], "destin": 5, "plotfil": [5, 9, 12], "locat": [5, 7, 12], "edit": [5, 12], "hpss_dir": [5, 12], "variabl": [5, 12], "top": [5, 9, 12], "describ": [5, 11, 12], "how": [5, 12], "sbatch": [5, 6, 7, 9, 12], "call": [5, 9, 12], "background": [5, 7, 12], "wait": [5, 7, 12], "until": [5, 7, 12], "alwai": [5, 12], "leav": [5, 12], "most": [5, 7], "recent": [5, 6, 12], "alon": [5, 12], "mai": [5, 7, 12], "still": [5, 12], "htar": [5, 12], "If": [5, 7, 9, 11, 12], "command": [5, 12, 13], "wa": [5, 12], "success": 5, "subdirectori": [5, 12], "import": [5, 10, 12], "don": 5, "try": [5, 12], "second": [5, 7, 12], "overwrit": 5, "especi": 5, "took": 5, "some": [5, 12], "addit": [5, 12], "ftime": [5, 12], "execut": [5, 7, 12], "cpp": 5, "live": 5, "amrex": [5, 10, 12], "list": [5, 12], "correspond": [5, 12], "right": 5, "when": [5, 9, 10, 11, 12], "tar": [5, 12], "all": [5, 7, 12], "diagnost": 5, "given": 5, "contain": [5, 7, 12], "date": 5, "string": 5, "multipl": [5, 7], "co": 5, "exist": [5, 7, 9, 12], "lockfil": 5, "ensur": [5, 12], "instanc": [5, 7], "ani": [5, 7, 12], "sometim": [5, 12], "termin": [5, 13], "normal": 5, "left": [5, 7, 12], "behind": 5, "later": 5, "abl": 5, "detect": [5, 12], "clean": [5, 7, 12], "stale": 5, "delet": 5, "quickli": 5, "best": [5, 6, 7, 9, 10, 11, 12], "approach": 5, "sampl": [5, 6, 12], "produc": [5, 12], "yt": [6, 9, 11], "setup": [6, 9, 10, 12], "own": 6, "conda": [6, 8, 9], "step": [6, 7, 12], "develop": 6, "languag": 6, "someth": 6, "like": [6, 7, 12], "init": [6, 9], "myenv": 6, "activ": [6, 9], "more": [6, 7, 9, 12], "c": [6, 7, 9, 10, 11, 12], "forg": [6, 9, 11], "deactiv": 6, "m3018": [6, 7], "vi": 6, "vis_": 6, "01": 6, "ntask": [6, 7, 12], "per": [6, 7, 12], "regular": [6, 7], "omp_proc_bind": [6, 7], "spread": [6, 7], "massive_star_multi": 6, "py": [6, 7, 9, 12], "plt19862": 6, "1536": 7, "therefor": [7, 12], "task": [7, 12], "otherwis": [7, 10], "fail": 7, "runtim": [7, 10, 12], "becaus": [7, 12], "below": [7, 12], "16": 7, "also": [7, 10, 11, 12], "restart": [7, 12], "logic": 7, "m3018_g": 7, "subch_": 7, "map_gpu": 7, "signal": [7, 12], "b": [7, 12], "urg": [7, 12], "castro_exec": 7, "over": [7, 12], "rm": [7, 12], "dump_and_stop": [7, 12], "send": [7, 12], "sigurg": [7, 12], "batch": [7, 8, 12], "minut": [7, 12], "befor": [7, 12], "limit": [7, 12], "gracefulli": [7, 12], "sig_handl": [7, 12], "touch": [7, 12], "disabl": [7, 12], "handler": [7, 12], "trap": [7, 12], "alloc": [7, 8], "soon": [7, 12], "dump": [7, 12], "stop": [7, 12], "workdir": 7, "basenam": 7, "slurm_submit_dir": [7, 9], "slack_job_start": 7, "michael": 7, "builtin": [7, 12], "shell": [7, 12], "handl": [7, 12], "64": 7, "pid": 7, "ret": 7, "128": [7, 12], "23": 7, "receiv": [7, 12], "keep": [7, 12], "refer": 7, "distribut": [7, 11], "parallel": 7, "hyper": 7, "share": [7, 10], "socket": [7, 12], "256": [7, 12], "howev": [7, 12], "assign": 7, "physic": [7, 12], "detail": [7, 8], "instruct": 7, "within": 7, "perlmutter_script": 7, "account": 7, "qo": 7, "02": 7, "constraint": 7, "In": [7, 10, 11, 12, 13], "order": [7, 10, 11, 12], "coupl": 7, "design": [7, 12], "strategi": 7, "first": [7, 9, 11, 12], "fix": [7, 12], "next": [7, 12], "virtual": 7, "avail": [7, 12], "compos": [7, 12], "two": [7, 12], "where": [7, 12], "numa": 7, "domain": [7, 12], "lower": 7, "case": [7, 12], "shortag": 7, "principl": 7, "squeez": 7, "resourc": [7, 12], "wall": 7, "clock": 7, "timestep": [7, 12], "grep": 7, "slurm_output": 7, "repeat": 7, "perfect": 7, "balanc": [7, 12], "reach": 7, "choic": 7, "compar": 7, "max_grid_s": 7, "optim": 7, "valu": [7, 12], "usual": [7, 10, 12], "half": 7, "level": [7, 9, 12], "half_siz": 7, "furthermor": 7, "sever": [7, 12], "blocking_factor": 7, "size": [7, 12], "final": [7, 9, 12], "increas": [7, 12], "scale": 7, "correctli": 7, "go": 7, "down": [7, 10], "factor": 7, "break": 7, "bigger": 7, "chainslurm": [7, 12], "mpich_max_thread_safeti": 7, "x86": [7, 12], "milan": 7, "inputs_fil": 7, "inputs_nova_t7": 7, "slurm_ntasks_per_nod": 7, "slurm_nnod": 7, "slurm_cpus_per_task": 7, "altern": [7, 12], "common": 7, "cf": 7, "everyon": 7, "squeue": [7, 12], "me": [7, 12], "estim": [7, 12], "cancel": 7, "scancel": 7, "tb": 7, "quota": 7, "showquota": 7, "finish": [7, 12], "origin": 7, "remain": [7, 12], "view": [7, 12], "id": [7, 12], "summit": [8, 11], "frontier": 8, "writ": 8, "monitor": 8, "templat": 8, "troubleshoot": 8, "jupyt": 8, "remot": 8, "Andes": 8, "andes": 9, "andes_env": 9, "anaconda": 9, "anaconda3": [9, 11], "modifi": [9, 12], "ad": 9, "y": [9, 11], "ipykernel": [9, 11], "nb_conda_kernel": [9, 11], "sourc": 9, "pip": 9, "uninstal": 9, "ast106": [9, 12], "plot": 9, "vol": 9, "enuc": 9, "flame_wave_1000hz_25cm_smallplt203204": 9, "veri": [9, 12], "might": 9, "solut": 9, "accomplish": 9, "xl": 10, "atleast": 10, "due": 10, "17": 10, "won": [10, 12], "present": 10, "warn": [10, 12], "packag": 10, "fine": 10, "sure": [10, 11, 12], "current": [10, 12], "disallow": 10, "line": [10, 12], "pair": 10, "offload": 10, "control": 10, "use_omp_offload": 10, "featur": [10, 12], "frontier_user_guid": [10, 12], "program": 10, "crayp": [10, 12], "accel": [10, 12], "amd": [10, 12], "gfx90a": [10, 12], "crai": [10, 12], "mpich": [10, 12], "rocm": [10, 12], "higher": 10, "issu": 10, "burner": 10, "tabul": 10, "rate": 10, "exhibit": 10, "strang": 10, "slow": 10, "without": [10, 12, 13], "use_hip": [10, 12], "jupyterhub": 11, "document": [11, 12], "extra": 11, "part": 11, "notebook": 11, "wish": 11, "correct": [11, 12], "point": [11, 12, 13], "good": [11, 12], "idea": [11, 12], "my_env": 11, "jupyterlab": 11, "subsequ": 11, "channel": 11, "search": [11, 12], "let": 12, "review": 12, "our": 12, "goal": 12, "necessari": 12, "insight": 12, "better": 12, "decis": 12, "construct": 12, "explain": 12, "expos": 12, "section": 12, "condens": 12, "replac": 12, "21": 12, "reserv": 12, "ram": 12, "bank": 12, "connect": [12, 13], "bu": 12, "commun": 12, "among": 12, "defin": 12, "whole": 12, "structur": 12, "depict": 12, "figur": 12, "extract": 12, "summit_user_guid": 12, "launcher": 12, "jsrun": 12, "minim": 12, "collect": 12, "certain": 12, "oper": 12, "extend": 12, "discuss": 12, "now": 12, "just": [12, 13], "determin": 12, "maximum": 12, "fit": 12, "accord": 12, "summon": 12, "bsub": 12, "descript": 12, "perform": 12, "calcul": 12, "format": 12, "room": 12, "03": 12, "three": 12, "alloc_flag": 12, "smt4": 12, "consid": 12, "smt1": 12, "stand": 12, "interest": 12, "standard": 12, "stream": 12, "insid": 12, "similar": 12, "suppli": 12, "assum": 12, "between": 12, "small": 12, "smoothli": 12, "bug": [12, 13], "unix": 12, "mention": 12, "stdout_to_show": 12, "stderr_to_show": 12, "No": 12, "onc": 12, "grant": 12, "total": 12, "r": 12, "max": 12, "a1": 12, "c1": 12, "g1": 12, "r6": 12, "placehold": 12, "respect": 12, "match": 12, "box": 12, "grid": 12, "biggest": 12, "piec": 12, "32768": 12, "cell": 12, "100": 12, "131072": 12, "524288": 12, "32": 12, "2097152": 12, "7864320": 12, "93": 12, "75": 12, "480": 12, "30408704": 12, "90": 12, "625": 12, "assert": 12, "equival": 12, "impli": 12, "398": 12, "idl": 12, "sweep": 12, "entir": 12, "possibl": 12, "maxim": 12, "life": 12, "easier": 12, "instead": 12, "write": 12, "anoth": 12, "pack": 12, "statement": 12, "luna_script": 12, "luna_output": 12, "luna_sniffing_output": 12, "inputs_luna": 12, "n_re": 12, "n_cpu_cores_per_r": 12, "n_max_res_per_nod": 12, "n_mpi_per_r": 12, "n_gpu_per_r": 12, "downgrad": 12, "kill": 12, "As": 12, "week": 12, "month": 12, "mayb": 12, "year": 12, "come": 12, "salvat": 12, "mandatori": 12, "chkxxxxxxx": 12, "chkxxxxxx": 12, "chkxxxxx": 12, "implement": 12, "append": 12, "minimum": 12, "pick": 12, "ask": 12, "amount": 12, "expir": 12, "pass": 12, "wt": 12, "cleanli": 12, "couldn": 12, "anywher": 12, "ignor": 12, "immedi": 12, "crash": 12, "upon": 12, "sigchld": 12, "sigwinch": 12, "least": 12, "trigger": 12, "other": 12, "event": 12, "launch": [12, 13], "must": 12, "jswait": 12, "save": 12, "bjob": 12, "slightli": 12, "nicer": 12, "jobstat": 12, "lead": 12, "enviro": 12, "simultan": 12, "n_res_1": 12, "n_res2": 12, "give": 12, "avoid": 12, "head": 12, "quiet": 12, "level_": 12, "dev": 12, "null": 12, "warning_tim": 12, "nohead": 12, "action_warning_tim": 12, "lsb_jobid": 12, "chain_submit": 12, "submit_script": 12, "return": 12, "transfer": 12, "15": 12, "cluster": 12, "dtn": 12, "test_hpss": 12, "associ": 12, "summit_hpss": 12, "jobs_script": 12, "togeth": 12, "onto": 12, "probin": 12, "under": 12, "full": 12, "being": 12, "unarchiv": 12, "bulk": 12, "hpss_xfer": 12, "plt00000": 12, "plotfile_dir": 12, "fetch": 12, "unpack": 12, "attempt": 12, "recov": 12, "titan": 12, "help": 12, "polici": 12, "orion": 12, "storag": 12, "05": 12, "closest": 12, "hip": 12, "trento": 12, "nmpi_per_nod": 12, "total_nmpi": 12, "slurm_job_num_nod": 12, "june": 12, "2023": 12, "explicitli": 12, "blob": 12, "warpx": 12, "readthedoc": 12, "io": 12, "en": 12, "queu": 12, "rocgdb": 12, "27": 12, "turn": 12, "startup": 12, "session": 12, "salloc": 12, "mz": 12, "restor": 12, "reload": 12, "hip_enable_deferred_load": 12, "amd_serialize_kernel": 12, "amd_serialize_copi": 12, "amd_log_level": 12, "lot": 12, "debugg": 12, "pagin": 12, "off": 12, "abort": 12, "trace": 12, "interrupt": 12, "bt": 12, "workaround": 12, "prevent": 12, "hang": 12, "fi_mr_cache_monitor": 12, "memhook": 12, "report": 12, "arena": 12, "big": 12, "the_arena_init_s": 12, "grow": 12, "suggest": 12, "larger": 12, "than": 12, "well": 13, "nvcc": 13, "cuda_vers": 13, "cc60": 13, "compile_cuda_path": 13, "usr": 13, "no_device_launch": 13, "around": 13, "cc70": 13, "On": 13, "lab": 13, "browser": 13, "ip": 13, "localhost": 13, "8888": 13, "sunysb": 13, "enter": 13, "password": 13, "window": 13, "web": 13, "prompt": 13, "token": 13, "appear": 13}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"work": [0, 1, 3, 8], "alcf": 0, "log": 0, "In": 0, "compil": [0, 1, 4, 10], "disk": 0, "queue": 0, "submit": [0, 12], "automat": 0, "restart": 0, "job": [0, 7, 12], "chain": [0, 7, 12], "iac": 1, "ookami": 1, "amrex": [1, 2], "setup": 1, "crai": 1, "gcc": [1, 4], "10": 1, "2": 1, "astrophys": 2, "suit": 2, "astro": 2, "basic": 2, "nersc": [3, 4, 6, 7], "perlmutt": [4, 7], "cuda": 4, "hypr": 4, "archiv": [5, 12], "data": 5, "hpss": [5, 12], "visual": [6, 9], "manag": [7, 12], "filesystem": 7, "olcf": [8, 10, 11, 12], "batch": 9, "Andes": 9, "summit": [10, 12], "frontier": [10, 12], "run": 11, "jupyt": [11, 13], "remot": [11, 13], "from": 11, "creat": 11, "conda": 11, "environ": 11, "architectur": 12, "request": 12, "alloc": 12, "writ": 12, "script": 12, "monitor": 12, "templat": 12, "machin": 12, "detail": 12, "statu": 12, "debug": 12, "troubleshoot": 12, "linux": 13, "workstat": 13, "gpu": 13, "offload": 13, "bender": 13, "groot": 13, "vi": 13}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx": 60}, "alltitles": {"Working at ALCF": [[0, "working-at-alcf"]], "Logging In": [[0, "logging-in"]], "Compiling": [[0, "compiling"]], "Disks": [[0, "disks"]], "Queues": [[0, "queues"]], "Submitting": [[0, "submitting"]], "Automatic Restarting": [[0, "automatic-restarting"]], "Job Chaining": [[0, "job-chaining"], [12, "job-chaining"]], "Working at IACS": [[1, "working-at-iacs"]], "Ookami": [[1, "ookami"]], "AMReX setup": [[1, "amrex-setup"]], "Cray compilers": [[1, "cray-compilers"]], "GCC": [[1, "gcc"]], "GCC 10.2": [[1, "gcc-10-2"]], "AMReX Astrophysics Suite": [[2, "amrex-astrophysics-suite"]], "AMReX Astro basics": [[2, null]], "Working at NERSC": [[3, "working-at-nersc"]], "Compiling at NERSC": [[4, "compiling-at-nersc"]], "Perlmutter": [[4, "perlmutter"], [7, "perlmutter"]], "Compiling with GCC + CUDA": [[4, "compiling-with-gcc-cuda"]], "Hypre": [[4, "hypre"]], "Archiving Data to HPSS": [[5, "archiving-data-to-hpss"]], "Visualization at NERSC": [[6, "visualization-at-nersc"]], "Managing Jobs at NERSC": [[7, "managing-jobs-at-nersc"]], "Filesystems": [[7, "filesystems"]], "Chaining": [[7, "chaining"]], "Working at OLCF": [[8, "working-at-olcf"]], "Batch Visualization on Andes": [[9, "batch-visualization-on-andes"]], "Compiling at OLCF": [[10, "compiling-at-olcf"]], "Summit": [[10, "summit"], [12, "summit"]], "Frontier": [[10, "frontier"], [12, "frontier"]], "Running Jupyter Remotely from OLCF": [[11, "running-jupyter-remotely-from-olcf"]], "Creating a conda environment": [[11, "creating-a-conda-environment"]], "Managing Jobs at OLCF": [[12, "managing-jobs-at-olcf"]], "Summit Architecture:": [[12, "summit-architecture"]], "Requesting Allocation:": [[12, "requesting-allocation"]], "Submitting a Job:": [[12, "submitting-a-job"]], "Writting a Job Script:": [[12, "writting-a-job-script"]], "Monitoring a Job:": [[12, "monitoring-a-job"]], "Script Template:": [[12, "script-template"]], "Chaining jobs": [[12, "chaining-jobs"]], "Archiving to HPSS": [[12, "archiving-to-hpss"]], "Machine details": [[12, "machine-details"]], "Submitting jobs": [[12, "submitting-jobs"]], "Job Status": [[12, "job-status"]], "Debugging": [[12, "debugging"]], "Troubleshooting": [[12, "troubleshooting"]], "Linux Workstations": [[13, "linux-workstations"]], "GPU offloading": [[13, "gpu-offloading"]], "bender": [[13, "bender"]], "groot": [[13, "groot"]], "Remote vis with Jupyter": [[13, "remote-vis-with-jupyter"]]}, "indexentries": {}}) \ No newline at end of file