modelB_time.jl

cd("/share/tmschaef/jkott/modelB/KZ")

using Distributions
using Printf
using JLD2
using Random
using CUDA
using CUDA.CUFFT
using CodecZlib

Random.seed!(parse(Int, ARGS[3]))
CUDA.seed!(parse(Int, ARGS[3]))

const L = parse(Int, ARGS[2]) # must be a multiple of 4
const λ = 4.0e0
const Γ = 1.0e0
const T = 1.0e0
const z = 4.0e0

const Δt = 0.04e0/Γ
const Rate = Float64(sqrt(2.0*Δt*Γ))

m² = [-1.8, -1.9, -2.0, -2.1, -2.15, -2.2, -2.22, -2.24, -2.26]
ξ = [3, 3, 3, 4, 5, 7, 8, 11, 22]

function ΔH(x, ϕ, q, m², L)
    @inbounds ϕold = ϕ[x...]
    ϕt = ϕold + q
    Δϕ = ϕt - ϕold
    Δϕ² = ϕt^2 - ϕold^2

    @inbounds ∑nn = ϕ[x[1]%L+1, x[2], x[3]] + ϕ[x[1], x[2]%L+1, x[3]] + ϕ[x[1], x[2], x[3]%L+1] + ϕ[(x[1]+L-2)%L+1, x[2], x[3]] + ϕ[x[1], (x[2]+L-2)%L+1, x[3]] + ϕ[x[1], x[2], (x[3]+L-2)%L+1]

    return 3Δϕ² - Δϕ * ∑nn + 0.5m² * Δϕ² + 0.25λ * (ϕt^4 - ϕold^4)
end

function step(m², ϕ, x1, x2, L)
    norm = cos(2π*rand())*sqrt(-2*log(rand()))
    q = Rate*norm

    δH = ΔH(x1, ϕ, q, m², L) + ΔH(x2, ϕ, -q, m², L) + q^2
    P = min(1.0f0, exp(-δH))
    r = rand()

    @inbounds ϕ[x1...] += q * (r<P)
    @inbounds ϕ[x2...] -= q * (r<P)
end

function sweep(m², ϕ, threads, blocks)
    #=
    n=0 : (i,j,k)->(x,y,z)
    n=1 : (i,j,k)->(y,z,x)
    n=2 : (i,j,k)->(z,x,y)
    pairs are in i direction
    =#
    for m in 1:4
        kernel_i(m², ϕ, L, m; threads, blocks)
        kernel_j(m², ϕ, L, m; threads, blocks)
        kernel_k(m², ϕ, L, m; threads, blocks)
    end
end

function gpu_sweep_i(m², ϕ, L, m)
    index = (blockIdx().x - 1) * blockDim().x + threadIdx().x - 1
    stride = gridDim().x * blockDim().x

    for l in index:stride:L^3÷4-1
        i = l ÷ L^2
        j = (l÷L) % L
        k = l%L

        x1 = ((4i + 2j + m%2)%L+1, (j + k + m÷2)%L+1, k%L+1)
        @inbounds x2 = (x1[1]%L+1, x1[2], x1[3])

        step(m², ϕ, x1, x2, L)
    end
    return
end

function gpu_sweep_j(m², ϕ, L, m)
    index = (blockIdx().x - 1) * blockDim().x + threadIdx().x - 1
    stride = gridDim().x * blockDim().x

    for l in index:stride:L^3÷4-1
        i = l ÷ L^2
        j = (l÷L) % L
        k = l%L

        x1 = (k%L+1, (4i + 2j + m%2)%L+1, (j + k + m÷2)%L+1)
        @inbounds x2 = (x1[1], x1[2]%L+1, x1[3])

        step(m², ϕ, x1, x2, L)
    end
    return
end

function gpu_sweep_k(m², ϕ, L, m)
    index = (blockIdx().x - 1) * blockDim().x + threadIdx().x - 1
    stride = gridDim().x * blockDim().x

    for l in index:stride:L^3÷4-1
        i = l ÷ L^2
        j = (l÷L) % L
        k = l%L

        x1 = ((j + k + m÷2)%L+1, k%L+1, (4i + 2j + m%2)%L+1)
        @inbounds x2 = (x1[1], x1[2], x1[3]%L+1)

        step(m², ϕ, x1, x2, L)
    end
    return
end

function CorrFunc(ϕ, L)
    C = zeros(L÷2+1)

    for r in 0:(L÷2), i in 1:L, j in 1:L, k in 1:L
        C[r+1] = C[r+1] + ϕ[mod(i+r-1,L)+1,j,k]*ϕ[i,j,k] + ϕ[i,mod(j+r-1,L)+1,k]*ϕ[i,j,k] + ϕ[i,j,mod(k+r-1,L)+1]*ϕ[i,j,k]
    end
    C
end

function thermalize(m², ϕ, threads, blocks, N=10000)
    for i in 0:N-1
        sweep(m², ϕ, threads, blocks)
    end
end

ϕ = CUDA.zeros(Float64, L, L, L)

const N = L^3÷4
const m_id = parse(Int, ARGS[1])
#run = parse(Int, ARGS[4])
const τ_C = trunc(Int, (4 * 10^-3 * ξ[m_id]^z) / Δt)
const skip = 10
#maxt = (100 * max(5000, τ_C)) ÷ skip
const maxt = (2000000) ÷ skip

kernel_i = @cuda launch=false gpu_sweep_i(m²[m_id], ϕ, L, 1)
kernel_j = @cuda launch=false gpu_sweep_j(m²[m_id], ϕ, L, 1)
kernel_k = @cuda launch=false gpu_sweep_k(m²[m_id], ϕ, L, 1)
config = launch_configuration(kernel_i.fun)
threads = min(N, config.threads)
blocks = cld(N, threads)

#df = load("IC_20_L_16_id_$(m_id).jld2")
df = load("corr/phi_L_16_m2_$(m_id).jld2") # reuse previous configuration
ϕ .= CuArray(df["ϕ"])

for run in 1:16
	thermalize(m²[m_id], ϕ, threads, blocks, 10*maxt)
	
	open("corr/time_L_$(L)_m2_$(m_id)_run_$run.dat","w") do io 
	    for i in 0:maxt-1
	        thermalize(m²[m_id], ϕ, threads, blocks, skip)
	        ϕk = Array(fft(ϕ))
	
		Printf.@printf(io, "%i ", skip*i)
		for k in 1:5
	        	Printf.@printf(io, "%f %f ", real(ϕk[k,1,1]), imag(ϕk[k,1,1]))
		end
		Printf.@printf(io, "\n")
	        if i%100==0
	            jldsave("corr/phi_L_$(L)_m2_$(m_id).jld2", true; ϕ=Array(ϕ), m2=m²[m_id])
	        end
	        flush(io)
	    end
	end
end