diff --git a/README.md b/README.md
index 33a4258..58cd6ee 100644
--- a/README.md
+++ b/README.md
@@ -101,10 +101,15 @@ _(Disclaimer: nonsensical responses have been removed.)_
 th train.lua [-h / options]
 ```
 
-Use the `--dataset NUMBER` option to control the size of the dataset. Training on the full dataset takes about 5h for a single epoch.
-
 The model will be saved to `data/model.t7` after each epoch if it has improved (error decreased).
 
+### Options (some, not all)
+- `--opencl` use opencl for computation (requires [torch-cl](https://github.com/hughperkins/distro-cl))
+- `--cuda` use cuda for computation
+- `--gpu [index]` use the nth GPU for computation (eg. on a 2015 MacBook `--gpu 0` results in the Intel GPU being used while `--gpu 1` uses the far more powerful AMD GPU)
+- `-- dataset [size]` control the size of the dataset
+- `--maxEpoch [amount]` specify the number of epochs to run
+
 ## Testing
 
 To load the model and have a conversation:
diff --git a/train.lua b/train.lua
index ccd3260..d066898 100644
--- a/train.lua
+++ b/train.lua
@@ -16,6 +16,7 @@ cmd:option('--minLR', 0.00001, 'minimum learning rate')
 cmd:option('--saturateEpoch', 20, 'epoch at which linear decayed LR will reach minLR')
 cmd:option('--maxEpoch', 50, 'maximum number of epochs to run')
 cmd:option('--batchSize', 10, 'mini-batch size')
+cmd:option('--gpu', 0, 'Zero-indexed ID of the GPU to use. Optional.')
 
 cmd:text()
 options = cmd:parse(arg)
@@ -55,10 +56,12 @@ local minMeanError = nil
 if options.cuda then
   require 'cutorch'
   require 'cunn'
+  cutorch.setDevice(options.gpu + 1)
   model:cuda()
 elseif options.opencl then
   require 'cltorch'
   require 'clnn'
+  cltorch.setDevice(options.gpu + 1)
   model:cl()
 end
 
@@ -125,7 +128,6 @@ for epoch = 1, options.maxEpoch do
 
   for i=1, dataset.examplesCount/options.batchSize do
     collectgarbage()
-    
     local _,tloss = optim.adam(feval, params, optimState)
     err = tloss[1] -- optim returns a list