chtc/debug_job.sub

JobBatchName            = "LLM batch inference (vllm + Turso)"

# Environment
universe                = docker
docker_image            = ghcr.io/jasonlo/text2graph_llm_chtc:v240611
docker_network_type     = host

# Artefact
Requirements            = (Target.HasCHTCStaging == true)
transfer_input_files    = main.sh, .env, preprocess_extraction_direct.py, geoarchive_paragraph_ids.pkl, db.py
executable              = main.sh
arguments               = $(job_index)
should_transfer_files   = YES
transfer_output_files   = ""

# Logging
stream_output           = true
stream_error            = true
output                  = condor_log/output.$(Cluster)-$(Process).txt
error                   = condor_log/error.$(Cluster)-$(Process).txt
log                     = condor_log/log.$(Cluster)-$(Process).txt

# Compute resources
request_cpus            = 4
request_memory          = 32GB
request_disk            = 50GB
request_gpus            = 1
require_gpus            = GlobalMemoryMb >= 30000
Requirements            = (Target.CUDADriverVersion >= 10.1)
+WantGPULab             = true
+GPUJobLength           = "short"

queue job_index from seq 192 192 |