diff --git a/src/main/scala/BIDMach/Clustering.scala b/src/main/scala/BIDMach/Clustering.scala
index 930eb378..a28335bf 100755
--- a/src/main/scala/BIDMach/Clustering.scala
+++ b/src/main/scala/BIDMach/Clustering.scala
@@ -24,7 +24,7 @@ class PAMmodel(val opts:PAMmodel.Options = new PAMmodel.Options) {
}
def dists(a:FMat):FMat = {
- val dd = if (Mat.hasCUDA > 0) a xTG a else a xT a;
+ val dd = if (Mat.hasCUDA > 0) a xTG a else a xT a
val d1 = getdiag(dd)
dd ~ dd * 2.0f
dd ~ d1 - dd
@@ -44,9 +44,9 @@ class PAMmodel(val opts:PAMmodel.Options = new PAMmodel.Options) {
var j = 0
while (j < ncache && continue) {
if (centmap(iss(j, ii)) > 0) {
- imin(ii) = centmap(iss(j, ii)) - 1
- vmin(ii) = ds(j, ii)
- continue = false
+ imin(ii) = centmap(iss(j, ii)) - 1
+ vmin(ii) = ds(j, ii)
+ continue = false
}
j += 1
}
@@ -71,9 +71,9 @@ class PAMmodel(val opts:PAMmodel.Options = new PAMmodel.Options) {
var idepth = 0
while (isamps.length > 0) {
val isx = centmap(iss(isamps, idepth), 0)
- val ifound = find(isx > 0)
- imin(isamps(ifound)) = isx(ifound) - 1
- vmin(isamps(ifound)) = ds(isamps(ifound), idepth)
+ val ifound = find(isx > 0)
+ imin(isamps(ifound)) = isx(ifound) - 1
+ vmin(isamps(ifound)) = ds(isamps(ifound), idepth)
Mat.nflops += 4*isamps.length
isamps = isamps(find(isx == 0))
idepth += 1
@@ -89,37 +89,37 @@ class PAMmodel(val opts:PAMmodel.Options = new PAMmodel.Options) {
}
def pointdiffs(ds:FMat, iss:IMat, vd:DMat):DMat = {
- val deltas = dzeros(nsamps,1) // Array to hold improvements in distance over vd
- var i = 0
- while (i < nsamps) { // Calculate improvements over vd for new candidate centers
- var j = 0
- while (j < nsamps && ds(j,i) < vd(i)) { // using sorted order of ds
- deltas(iss(j,i)) += ds(j,i) - vd(i)
- j += 1
- }
- maxdepth = math.max(maxdepth, j)
- Mat.nflops += 16*j
- i += 1
- }
- deltas
+ val deltas = dzeros(nsamps,1) // Array to hold improvements in distance over vd
+ var i = 0
+ while (i < nsamps) { // Calculate improvements over vd for new candidate centers
+ var j = 0
+ while (j < nsamps && ds(j,i) < vd(i)) { // using sorted order of ds
+ deltas(iss(j,i)) += ds(j,i) - vd(i)
+ j += 1
+ }
+ maxdepth = math.max(maxdepth, j)
+ Mat.nflops += 16*j
+ i += 1
+ }
+ deltas
}
def pointdiffs2(ds:FMat, iss:IMat, vd:FMat):FMat = {
- val deltas = zeros(nsamps,1) // Array to hold improvements in distance over vd
- var ii = icol(0->nsamps)
- var idepth = 0
- while (ii.length > 0) { // Calculate improvements over vd for new candidate centers
- ii = ii(find(ds(ii,idepth) < vd(ii,0)))
- var j = 0
- while (j < ii.length) {
- deltas(iss(ii(j),idepth)) += (ds(ii(j),idepth) - vd(ii(j)))
- j += 1
- }
- Mat.nflops += 16*j
- idepth += 1
- maxdepth = math.max(maxdepth, idepth)
- }
- deltas
+ val deltas = zeros(nsamps,1) // Array to hold improvements in distance over vd
+ var ii = icol(0->nsamps)
+ var idepth = 0
+ while (ii.length > 0) { // Calculate improvements over vd for new candidate centers
+ ii = ii(find(ds(ii,idepth) < vd(ii,0)))
+ var j = 0
+ while (j < ii.length) {
+ deltas(iss(ii(j),idepth)) += (ds(ii(j),idepth) - vd(ii(j)))
+ j += 1
+ }
+ Mat.nflops += 16*j
+ idepth += 1
+ maxdepth = math.max(maxdepth, idepth)
+ }
+ deltas
}
def sortgen(dd:FMat):(FMat,IMat) = {
@@ -151,52 +151,52 @@ class PAMmodel(val opts:PAMmodel.Options = new PAMmodel.Options) {
var itry = 0
while (itry < ntrys) {
println("Try %d" format itry)
- val rr = rand(nsamps,1) // Get a random permutation for the centers
- val (rs,irs) = sort2(rr,1)
- val icenters = irs(0->ncenters,0) // Pick centers from the permutation
- val ics = icol(0->nsamps)
- val (vdists, imin) = mindists(ds, iss, ics, icenters) // Get min distances from points to centers, and best center ids
- println(" pass=0, mean dist=%f" format mean(vdists,1).v)
- val vtmp = vdists.copy
- val itmp = imin.copy
- var nchanged = 1
- var ipass = 0
- var totchanged = 0
- while (nchanged > 0 && ipass < options.maxpasses) { // Keep making passes until no improvements
- ipass += 1
- nchanged = 0
- var ipc = 0
- while (ipc < ncenters) { // Try to improve this center (ipc)
- vtmp <-- vdists // Copy distances
- val ifix = find(imin == ipc) // Find points in cluster with this center
- val tcents = icenters((0->ipc) \ ((ipc+1)->ncenters),0) // List of centers minus the current one
- mindists(ds, iss, ifix, tcents, vtmp, itmp) // vtmp holds distances to centers minus the current center
- val deltas = pointdiffs(ds, iss, vtmp) // deltas holds improvements for each potential center over vtmp
- val (vs,is) = mini2(deltas) // Find best new center
- if (vs.v + sum(vtmp).v < sum(vdists).v && is.v != icenters(ipc,0)) { // Is the new center better than the old (and not equal to it)?
- icenters(ipc) = is.v // If yes, update the center list
- mindists(ds, iss, ics, icenters, vdists, imin) // Compute new distances and centers
- nchanged += 1
- if (options.verb) println(" pass=%d, ipc=%d, mean dist=%f, nchanged=%d" format (ipass, ipc, mean(vdists,1).v, nchanged))
- }
- ipc += 1
- }
- println(" pass=%d, mean dist=%f, nchanged=%d, nspills=%d" format (ipass, mean(vdists,1).v, nchanged, nspills))
- totchanged += nchanged
- }
- val mv = mean(vdists).v
- if (mv < bestvd) {
- bestc = icenters
- bestv = vdists
- besti = imin
- bestvd = mv
- }
- itry += 1
+ val rr = rand(nsamps,1) // Get a random permutation for the centers
+ val (rs,irs) = sort2(rr,1)
+ val icenters = irs(0->ncenters,0) // Pick centers from the permutation
+ val ics = icol(0->nsamps)
+ val (vdists, imin) = mindists(ds, iss, ics, icenters) // Get min distances from points to centers, and best center ids
+ println(" pass=0, mean dist=%f" format mean(vdists,1).v)
+ val vtmp = vdists.copy
+ val itmp = imin.copy
+ var nchanged = 1
+ var ipass = 0
+ var totchanged = 0
+ while (nchanged > 0 && ipass < options.maxpasses) { // Keep making passes until no improvements
+ ipass += 1
+ nchanged = 0
+ var ipc = 0
+ while (ipc < ncenters) { // Try to improve this center (ipc)
+ vtmp <-- vdists // Copy distances
+ val ifix = find(imin == ipc) // Find points in cluster with this center
+ val tcents = icenters((0->ipc) \ ((ipc+1)->ncenters),0) // List of centers minus the current one
+ mindists(ds, iss, ifix, tcents, vtmp, itmp) // vtmp holds distances to centers minus the current center
+ val deltas = pointdiffs(ds, iss, vtmp) // deltas holds improvements for each potential center over vtmp
+ val (vs,is) = mini2(deltas) // Find best new center
+ if (vs.v + sum(vtmp).v < sum(vdists).v && is.v != icenters(ipc,0)) { // Is the new center better than the old (and not equal to it)?
+ icenters(ipc) = is.v // If yes, update the center list
+ mindists(ds, iss, ics, icenters, vdists, imin) // Compute new distances and centers
+ nchanged += 1
+ if (options.verb) println(" pass=%d, ipc=%d, mean dist=%f, nchanged=%d" format (ipass, ipc, mean(vdists,1).v, nchanged))
+ }
+ ipc += 1
+ }
+ println(" pass=%d, mean dist=%f, nchanged=%d, nspills=%d" format (ipass, mean(vdists,1).v, nchanged, nspills))
+ totchanged += nchanged
+ }
+ val mv = mean(vdists).v
+ if (mv < bestvd) {
+ bestc = icenters
+ bestv = vdists
+ besti = imin
+ bestvd = mv
+ }
+ itry += 1
}
val t3=gflop
val vdists2 = mini(dd(?,bestc),2)
println("Optimum in %f secs, %f gflops, mean dist=%f, verify=%f, maxdepth=%d, nspills=%d\nTotal time %f seconds" format
- (t3._2, t3._1, bestvd, mean(DMat(vdists2),1).v, maxdepth, nspills, t3._2+ft2._2+ft1._2))
+ (t3._2, t3._1, bestvd, mean(DMat(vdists2),1).v, maxdepth, nspills, t3._2+ft2._2+ft1._2))
}
}
diff --git a/src/main/scala/BIDMach/Experiments.scala b/src/main/scala/BIDMach/Experiments.scala
index d011d7a8..8dbef9d0 100755
--- a/src/main/scala/BIDMach/Experiments.scala
+++ b/src/main/scala/BIDMach/Experiments.scala
@@ -25,24 +25,24 @@ object MNIST {
def datasource(dir:String="/data/MNIST8M/parts/", nlast:Int = 80, n:Int = 1, i:Int = 0) = {
implicit val ec = ExecutionContext.fromExecutor(Executors.newFixedThreadPool(8))
val opts1 = new FileSource.Options {
- fnames = List(FileSource.simpleEnum(dir+"/part%02d.imat.lz4", n, i));
- nstart = 0;
- nend = nlast;
- order = 0;
- batchSize = 10000;
- lookahead = 2;
- featType = 2;
- featThreshold = 128;
+ fnames = List(FileSource.simpleEnum(dir+"/part%02d.imat.lz4", n, i))
+ nstart = 0
+ nend = nlast
+ order = 0
+ batchSize = 10000
+ lookahead = 2
+ featType = 2
+ featThreshold = 128
}
val opts2 = new SFileSource.Options {
- fnames = List(FileSource.simpleEnum(dir+"/cats3col%02d.imat.lz4", n, i));
- nstart = opts1.nstart;
- nend = opts1.nend;
- order = opts1.order;
- batchSize = opts1.batchSize;
- lookahead = opts1.lookahead;
- fcounts = irow(10);
- eltsPerSample = 2;
+ fnames = List(FileSource.simpleEnum(dir+"/cats3col%02d.imat.lz4", n, i))
+ nstart = opts1.nstart
+ nend = opts1.nend
+ order = opts1.order
+ batchSize = opts1.batchSize
+ lookahead = opts1.lookahead
+ fcounts = irow(10)
+ eltsPerSample = 2
}
new StackedDS(new FileSource(opts1), new SFileSource(opts2))
}
@@ -63,7 +63,7 @@ object NYTIMES {
object DIGITS {
def preprocess(dict:String, fname:String) {
- println("Processing digits");
+ println("Processing digits")
val mat = loadFMat(dict+fname+".txt")
val srow = sum(abs(mat),2)
val inds = IMat((cumsum(srow==0)-1)/660)
@@ -153,105 +153,105 @@ object Twitter {
implicit val ec = ExecutionContext.fromExecutor(Executors.newFixedThreadPool(8))
def dodicts(threshold:Int=10, rebuild:Boolean=false):Unit = {
- val stokdir = "/twitter/smiley/tokenized/"
- val tokdir = "/twitter/tokenized/"
- val dy1 = mergedicts(2011, 2013, "/disk%02d" + stokdir, "/big" + stokdir, threshold, rebuild)
- val dy2 = mergedicts(2011, 2013, "/disk%02d" + tokdir, "/big" + tokdir, threshold, rebuild)
- val dy = Dict.union(dy1, dy2)
- val (sv, iv) = sortdown2(dy.counts)
- HMat.saveSBMat("/big"+tokdir+"alldict.gz", SBMat(dy.cstr(iv)))
- HMat.saveDMat("/big"+tokdir+"allwcount.gz", sv)
- }
+ val stokdir = "/twitter/smiley/tokenized/"
+ val tokdir = "/twitter/tokenized/"
+ val dy1 = mergedicts(2011, 2013, "/disk%02d" + stokdir, "/big" + stokdir, threshold, rebuild)
+ val dy2 = mergedicts(2011, 2013, "/disk%02d" + tokdir, "/big" + tokdir, threshold, rebuild)
+ val dy = Dict.union(dy1, dy2)
+ val (sv, iv) = sortdown2(dy.counts)
+ HMat.saveSBMat("/big"+tokdir+"alldict.gz", SBMat(dy.cstr(iv)))
+ HMat.saveDMat("/big"+tokdir+"allwcount.gz", sv)
+ }
- def mergedicts(year1:Int, year2:Int, infname:String, outfname:String, threshold:Int=10, rebuild:Boolean=false):Dict = {
- val dd = new Array[Dict](6)
- val md = new Array[Dict](6)
- val yd = new Array[Dict](5)
- var dy:Dict = null
- var nmerged = 0
- for (yy <- year1 to year2) {
- for (mm <- 1 to 12) {
- print("\n%d/%02d" format (yy, mm))
- val ff = new File(outfname + "%04d/%02d/wcount.gz" format (yy, mm))
- if (rebuild || ! ff.exists) {
- var ndone = 0
- for (id <- 1 to 31) {
- var ielem = 372*yy + 31*mm + id
- var idisk = ielem % 16
- val fname = (infname + "%04d/%02d/%02d/" format (idisk, yy, mm, id))
- val ff = new File(fname + "wcount.gz")
- if (ff.exists) {
- val bb = HMat.loadSBMat(fname + "dict.gz")
- val cc = HMat.loadIMat(fname + "wcount.gz")
- dd(ndone % 6) = Dict(bb, cc, threshold)
- ndone = ndone + 1
- print("-")
- if (ndone % 6 == 0) {
- md(ndone / 6 - 1) = Dict.union(dd:_*)
- print("+")
- }
- }
- }
- if (ndone % 6 != 0) {
- md(ndone / 6) = Dict.union(dd.slice(0, ndone % 6):_*)
- print("+")
- }
- if (ndone > 0) {
- val dx = Dict.union(md.slice(0, (ndone-1)/6+1):_*)
- val (sv, iv) = sortdown2(dx.counts)
- val dxx = Dict(dx.cstr(iv), sv)
- HMat.saveSBMat(outfname + "%04d/%02d/dict.gz" format (yy, mm), SBMat(dxx.cstr))
- HMat.saveDMat(outfname + "%04d/%02d/wcount.gz" format (yy, mm), dxx.counts)
- }
-// println("")
- }
- val f2 = new File(outfname + "%04d/%02d/wcount.gz" format (yy, mm))
- if (f2.exists) {
- val bb = HMat.loadSBMat(outfname + "%04d/%02d/dict.gz" format (yy, mm))
- val cc = HMat.loadDMat(outfname + "%04d/%02d/wcount.gz" format (yy, mm))
- yd(nmerged % 5) = Dict(bb, cc, 4*threshold)
- nmerged += 1
- print("*")
- if (nmerged % 5 == 0) {
- val dm = Dict.union(yd:_*)
- if (nmerged == 5) {
- dy = dm
- } else {
- dy = Dict.union(dy, dm)
- }
- }
- }
- }
- }
- if (nmerged % 5 != 0) {
- val dm = Dict.union(yd.slice(0, nmerged % 5):_*)
- dy = Dict.union(dy, dm)
- }
- println
- val (sv, iv) = sortdown2(dy.counts)
- val dyy = Dict(dy.cstr(iv), sv)
- HMat.saveSBMat(outfname + "dict.gz", SBMat(dyy.cstr))
- HMat.saveDMat(outfname + "wcount.gz", dyy.counts)
- dyy
- }
-
- def getDict = {
- val bd = loadSBMat("/big/twitter/tokenized/alldict.gz")
+ def mergedicts(year1:Int, year2:Int, infname:String, outfname:String, threshold:Int=10, rebuild:Boolean=false):Dict = {
+ val dd = new Array[Dict](6)
+ val md = new Array[Dict](6)
+ val yd = new Array[Dict](5)
+ var dy:Dict = null
+ var nmerged = 0
+ for (yy <- year1 to year2) {
+ for (mm <- 1 to 12) {
+ print("\n%d/%02d" format (yy, mm))
+ val ff = new File(outfname + "%04d/%02d/wcount.gz" format (yy, mm))
+ if (rebuild || ! ff.exists) {
+ var ndone = 0
+ for (id <- 1 to 31) {
+ var ielem = 372*yy + 31*mm + id
+ var idisk = ielem % 16
+ val fname = (infname + "%04d/%02d/%02d/" format (idisk, yy, mm, id))
+ val ff = new File(fname + "wcount.gz")
+ if (ff.exists) {
+ val bb = HMat.loadSBMat(fname + "dict.gz")
+ val cc = HMat.loadIMat(fname + "wcount.gz")
+ dd(ndone % 6) = Dict(bb, cc, threshold)
+ ndone = ndone + 1
+ print("-")
+ if (ndone % 6 == 0) {
+ md(ndone / 6 - 1) = Dict.union(dd:_*)
+ print("+")
+ }
+ }
+ }
+ if (ndone % 6 != 0) {
+ md(ndone / 6) = Dict.union(dd.slice(0, ndone % 6):_*)
+ print("+")
+ }
+ if (ndone > 0) {
+ val dx = Dict.union(md.slice(0, (ndone-1)/6+1):_*)
+ val (sv, iv) = sortdown2(dx.counts)
+ val dxx = Dict(dx.cstr(iv), sv)
+ HMat.saveSBMat(outfname + "%04d/%02d/dict.gz" format (yy, mm), SBMat(dxx.cstr))
+ HMat.saveDMat(outfname + "%04d/%02d/wcount.gz" format (yy, mm), dxx.counts)
+ }
+// println("")
+ }
+ val f2 = new File(outfname + "%04d/%02d/wcount.gz" format (yy, mm))
+ if (f2.exists) {
+ val bb = HMat.loadSBMat(outfname + "%04d/%02d/dict.gz" format (yy, mm))
+ val cc = HMat.loadDMat(outfname + "%04d/%02d/wcount.gz" format (yy, mm))
+ yd(nmerged % 5) = Dict(bb, cc, 4*threshold)
+ nmerged += 1
+ print("*")
+ if (nmerged % 5 == 0) {
+ val dm = Dict.union(yd:_*)
+ if (nmerged == 5) {
+ dy = dm
+ } else {
+ dy = Dict.union(dy, dm)
+ }
+ }
+ }
+ }
+ }
+ if (nmerged % 5 != 0) {
+ val dm = Dict.union(yd.slice(0, nmerged % 5):_*)
+ dy = Dict.union(dy, dm)
+ }
+ println
+ val (sv, iv) = sortdown2(dy.counts)
+ val dyy = Dict(dy.cstr(iv), sv)
+ HMat.saveSBMat(outfname + "dict.gz", SBMat(dyy.cstr))
+ HMat.saveDMat(outfname + "wcount.gz", dyy.counts)
+ dyy
+ }
+
+ def getDict = {
+ val bd = loadSBMat("/big/twitter/tokenized/alldict.gz")
val bc = loadDMat("/big/twitter/tokenized/allwcount.gz")
Dict(bd, bc)
- }
-
+ }
+
def getBiDict = {
- val bd = loadIMat("/big/twitter/tokenized/allbdict.lz4")
+ val bd = loadIMat("/big/twitter/tokenized/allbdict.lz4")
val bc = loadDMat("/big/twitter/tokenized/allbcnts.lz4")
IDict(bd, bc)
- }
+ }
def getTriDict = {
- val bd = loadIMat("/big/twitter/tokenized/alltdict.lz4")
+ val bd = loadIMat("/big/twitter/tokenized/alltdict.lz4")
val bc = loadDMat("/big/twitter/tokenized/alltcnts.lz4")
IDict(bd, bc)
- }
+ }
def junk:CSMat = {
csrow("", "", "", "", "", "", "",
@@ -264,8 +264,8 @@ object Twitter {
"", "", "", "", "", "",
"http", "https", "apos", "kml", "amp", "www", "quot", "id", "latitude", "longitude", "latlonbox", "geo", "json")
}
-
- def findEmoticons(n:Int, dd:Dict) = {
+
+ def findEmoticons(n:Int, dd:Dict) = {
val smiles = csrow(":-)", ":)", ":o)", ":]", ":3", ":c)", ":>", "=]", "8)", "=)", ":}", ":^)", ":っ)")
val laughs = csrow(":-d", ":d", "8-d", "8d", "x-d", "xd", "x-x", "=-d", "=d", "=-3", "=3", "b^d")
val frowns = csrow(">:[", ":-(", ":(", "", ":-c", ":c", ":-<", "", ":っc", ":<", ":-[", ":[", ":{")
@@ -286,125 +286,125 @@ object Twitter {
}
}
out
- }
-
- def getGramDict(nuni0:Int=50, nbi0:Int=100, ntri0:Int=200, rebuild:Boolean=false):Dict = {
- val nuni = nuni0 * 1000
- val nbi = nbi0 * 1000
- val ntri = ntri0 * 1000
- val fname = "/big/twitter/tokenized/dict_%d_%d_%d" format (nuni0, nbi0, ntri0)
- if (!rebuild && (new File(fname + "_SBMat.lz4").exists) && (new File(fname + "_dmat.lz4").exists)) {
- val bm = loadSBMat(fname + "_SBMat.lz4")
- val dm = loadDMat(fname + "_dmat.lz4")
- Dict(bm, dm)
- } else {
- val ud = getDict
- val bd = getBiDict
- val td = getTriDict
- val dd = IDict.gramDict(nuni, nbi, ntri, ud, bd, td)
- saveSBMat(fname + "_SBMat.lz4", SBMat(dd.cstr))
- saveDMat(fname + "_dmat.lz4", dd.counts)
- dd
- }
- }
-
- def getEmoticonMap(nuni0:Int=50, nbi0:Int=100, ntri0:Int=200, rebuild:Boolean=false):FMat = {
- val nuni = nuni0 * 1000
- val nbi = nbi0 * 1000
- val ntri = ntri0 * 1000
- val fname = "/big/twitter/tokenized/dict_%d_%d_%d" format (nuni0, nbi0, ntri0)
- if (!rebuild && (new File(fname + "_emos.lz4").exists)) {
- loadFMat(fname + "_emos.lz4")
- } else {
- val ud = getDict
- val bdt = getBiDict.grams(0->nbi,?)
- val tdt = getTriDict.grams(0->ntri,?)
- val em = findEmoticons(1 + maxi(irow(nuni) \ maxi(bdt) \ maxi(tdt)).v, ud)
- val bv = zeros(em.nrows, nbi)
- val tv = zeros(em.nrows, ntri)
- for (i <- 0 until em.nrows) {
- bv(i, ?) = max(em(i, bdt(?, 0)), em(i, bdt(?, 1)))
- tv(i, ?) = max(em(i, tdt(?, 0)), max(em(i, tdt(?, 1)), em(i, tdt(?, 2))))
- }
- val emos = em(?, 0->nuni) \ bv(?, 0->nbi) \ tv(?, 0->ntri)
- saveFMat(fname + "_emos.lz4", emos)
- emos
- }
- }
-
- def logisticModelPar(
- nstart0:Int = FileSource.encodeDate(2012,3,1,0),
- nend0:Int = FileSource.encodeDate(2013,7,1,0),
- nuni0:Int = 50,
- nbi0:Int = 100,
- ntri0:Int = 200
- ) = {
- val ds = twitterNgramBlend(nstart0, nend0)
-// val ds = SFilesDataSource.twitterWords(nstart0, nend0)
- ds.opts.addConstFeat = true
- ds.opts.featType = 0
- val gd = getGramDict(nuni0, nbi0, ntri0)
- val em = getEmoticonMap(nuni0, nbi0, ntri0)
- val nfeats = gd.length + 1
- val mask = (sum(em) == 0f) \ 1
-// val targets = em(0->(em.nrows-1), ?) \ zeros(em.nrows-1,1)
- val targets = em(0->1, ?) \ 0
- val ntargets = targets.nrows
- val exptsv = col(0.5, 0.6, 0.7, 0.8, 0.9, 1.0)
- val exptst = col(0.5, 0.6, 0.7, 0.8, 0.9, 1.0)
-// val expts = col(0.5)
- val avalues = col(0.1f, 1f, 10f)
- val expts1 = ones(avalues.length*ntargets, 1) ⊗ exptsv ⊗ ones(exptst.length, 1)
- val expts2 = ones(avalues.length*exptsv.length*ntargets, 1) ⊗ exptst
- val lrates = ones(ntargets, 1) ⊗ avalues ⊗ ones(exptst.length*exptsv.length, 1)
- val aopts = new ADAGrad.Options
- aopts.vexp = expts1
- aopts.texp = expts2
- aopts.lrate = lrates
- aopts.mask = mask
- val gopts = new GLM.Options
- gopts.links = iones(expts1.length, 1)
- gopts.rmask = mask
- gopts.targmap = mkdiag(ones(ntargets, 1)) ⊗ ones(expts1.length/ntargets, 1)
- gopts.targets = targets
- new ParLearnerF(ds, gopts, GLM.mkGLMModel _, null, null, aopts, GLM.mkUpdater _, null, null)
- }
-
- def logisticModel(
- mat:SMat,
- ntargs:Int = 1,
- exptsv:FMat = col(0.4, 0.5, 0.6),
- exptst:FMat = col(0.4, 0.5, 0.6),
- avalues:FMat = col(0.1, 0.3, 1),
- nuni0:Int = 50,
- nbi0:Int = 100,
- ntri0:Int = 200
- ) = {
- val ds = new MatSource(Array(mat:Mat))
- val gd = getGramDict(nuni0, nbi0, ntri0)
- val em = getEmoticonMap(nuni0, nbi0, ntri0)
- val nfeats = gd.length + 1
- val mask = (sum(em) == 0f) \ 1
- val targets0 = em(0->(em.nrows-1), ?) \ zeros(em.nrows-1,1)
- val targets = targets0(0->ntargs, ?)
- val ntargets = targets.nrows
- val expts1 = ones(avalues.length*ntargets, 1) ⊗ exptsv ⊗ ones(exptst.length, 1)
- val expts2 = ones(avalues.length*exptsv.length*ntargets, 1) ⊗ exptst
- val lrates = ones(ntargets, 1) ⊗ avalues ⊗ ones(exptst.length*exptsv.length, 1)
- val aopts = new ADAGrad.Options
- aopts.vexp = expts1
- aopts.texp = expts2
- aopts.lrate = lrates
- aopts.mask = mask
- val gopts = new GLM.Options
- gopts.links = iones(expts1.length, 1)
- gopts.rmask = mask
- gopts.targmap = mkdiag(ones(ntargets, 1)) ⊗ ones(expts1.length/ntargets, 1)
- gopts.targets = targets
- Learner(ds, new GLM(gopts), null, new ADAGrad(aopts), null)
- }
-
-
+ }
+
+ def getGramDict(nuni0:Int=50, nbi0:Int=100, ntri0:Int=200, rebuild:Boolean=false):Dict = {
+ val nuni = nuni0 * 1000
+ val nbi = nbi0 * 1000
+ val ntri = ntri0 * 1000
+ val fname = "/big/twitter/tokenized/dict_%d_%d_%d" format (nuni0, nbi0, ntri0)
+ if (!rebuild && (new File(fname + "_SBMat.lz4").exists) && (new File(fname + "_dmat.lz4").exists)) {
+ val bm = loadSBMat(fname + "_SBMat.lz4")
+ val dm = loadDMat(fname + "_dmat.lz4")
+ Dict(bm, dm)
+ } else {
+ val ud = getDict
+ val bd = getBiDict
+ val td = getTriDict
+ val dd = IDict.gramDict(nuni, nbi, ntri, ud, bd, td)
+ saveSBMat(fname + "_SBMat.lz4", SBMat(dd.cstr))
+ saveDMat(fname + "_dmat.lz4", dd.counts)
+ dd
+ }
+ }
+
+ def getEmoticonMap(nuni0:Int=50, nbi0:Int=100, ntri0:Int=200, rebuild:Boolean=false):FMat = {
+ val nuni = nuni0 * 1000
+ val nbi = nbi0 * 1000
+ val ntri = ntri0 * 1000
+ val fname = "/big/twitter/tokenized/dict_%d_%d_%d" format (nuni0, nbi0, ntri0)
+ if (!rebuild && (new File(fname + "_emos.lz4").exists)) {
+ loadFMat(fname + "_emos.lz4")
+ } else {
+ val ud = getDict
+ val bdt = getBiDict.grams(0->nbi,?)
+ val tdt = getTriDict.grams(0->ntri,?)
+ val em = findEmoticons(1 + maxi(irow(nuni) \ maxi(bdt) \ maxi(tdt)).v, ud)
+ val bv = zeros(em.nrows, nbi)
+ val tv = zeros(em.nrows, ntri)
+ for (i <- 0 until em.nrows) {
+ bv(i, ?) = max(em(i, bdt(?, 0)), em(i, bdt(?, 1)))
+ tv(i, ?) = max(em(i, tdt(?, 0)), max(em(i, tdt(?, 1)), em(i, tdt(?, 2))))
+ }
+ val emos = em(?, 0->nuni) \ bv(?, 0->nbi) \ tv(?, 0->ntri)
+ saveFMat(fname + "_emos.lz4", emos)
+ emos
+ }
+ }
+
+ def logisticModelPar(
+ nstart0:Int = FileSource.encodeDate(2012,3,1,0),
+ nend0:Int = FileSource.encodeDate(2013,7,1,0),
+ nuni0:Int = 50,
+ nbi0:Int = 100,
+ ntri0:Int = 200
+ ) = {
+ val ds = twitterNgramBlend(nstart0, nend0)
+// val ds = SFilesDataSource.twitterWords(nstart0, nend0)
+ ds.opts.addConstFeat = true
+ ds.opts.featType = 0
+ val gd = getGramDict(nuni0, nbi0, ntri0)
+ val em = getEmoticonMap(nuni0, nbi0, ntri0)
+ val nfeats = gd.length + 1
+ val mask = (sum(em) == 0f) \ 1
+// val targets = em(0->(em.nrows-1), ?) \ zeros(em.nrows-1,1)
+ val targets = em(0->1, ?) \ 0
+ val ntargets = targets.nrows
+ val exptsv = col(0.5, 0.6, 0.7, 0.8, 0.9, 1.0)
+ val exptst = col(0.5, 0.6, 0.7, 0.8, 0.9, 1.0)
+// val expts = col(0.5)
+ val avalues = col(0.1f, 1f, 10f)
+ val expts1 = ones(avalues.length*ntargets, 1) ⊗ exptsv ⊗ ones(exptst.length, 1)
+ val expts2 = ones(avalues.length*exptsv.length*ntargets, 1) ⊗ exptst
+ val lrates = ones(ntargets, 1) ⊗ avalues ⊗ ones(exptst.length*exptsv.length, 1)
+ val aopts = new ADAGrad.Options
+ aopts.vexp = expts1
+ aopts.texp = expts2
+ aopts.lrate = lrates
+ aopts.mask = mask
+ val gopts = new GLM.Options
+ gopts.links = iones(expts1.length, 1)
+ gopts.rmask = mask
+ gopts.targmap = mkdiag(ones(ntargets, 1)) ⊗ ones(expts1.length/ntargets, 1)
+ gopts.targets = targets
+ new ParLearnerF(ds, gopts, GLM.mkGLMModel _, null, null, aopts, GLM.mkUpdater _, null, null)
+ }
+
+ def logisticModel(
+ mat:SMat,
+ ntargs:Int = 1,
+ exptsv:FMat = col(0.4, 0.5, 0.6),
+ exptst:FMat = col(0.4, 0.5, 0.6),
+ avalues:FMat = col(0.1, 0.3, 1),
+ nuni0:Int = 50,
+ nbi0:Int = 100,
+ ntri0:Int = 200
+ ) = {
+ val ds = new MatSource(Array(mat:Mat))
+ val gd = getGramDict(nuni0, nbi0, ntri0)
+ val em = getEmoticonMap(nuni0, nbi0, ntri0)
+ val nfeats = gd.length + 1
+ val mask = (sum(em) == 0f) \ 1
+ val targets0 = em(0->(em.nrows-1), ?) \ zeros(em.nrows-1,1)
+ val targets = targets0(0->ntargs, ?)
+ val ntargets = targets.nrows
+ val expts1 = ones(avalues.length*ntargets, 1) ⊗ exptsv ⊗ ones(exptst.length, 1)
+ val expts2 = ones(avalues.length*exptsv.length*ntargets, 1) ⊗ exptst
+ val lrates = ones(ntargets, 1) ⊗ avalues ⊗ ones(exptst.length*exptsv.length, 1)
+ val aopts = new ADAGrad.Options
+ aopts.vexp = expts1
+ aopts.texp = expts2
+ aopts.lrate = lrates
+ aopts.mask = mask
+ val gopts = new GLM.Options
+ gopts.links = iones(expts1.length, 1)
+ gopts.rmask = mask
+ gopts.targmap = mkdiag(ones(ntargets, 1)) ⊗ ones(expts1.length/ntargets, 1)
+ gopts.targets = targets
+ Learner(ds, new GLM(gopts), null, new ADAGrad(aopts), null)
+ }
+
+
val twitterFeatureDir = "/disk%02d/twitter/featurized/%04d/%02d/%02d/"
val twitterSmileyFeatureDir = "/disk%02d/twitter/smiley/featurized/%04d/%02d/%02d/"
@@ -564,4 +564,4 @@ object Twitter {
stop
}
}
-}
\ No newline at end of file
+}
\ No newline at end of file
diff --git a/src/main/scala/BIDMach/Featurizer.scala b/src/main/scala/BIDMach/Featurizer.scala
index 042d99f2..71de96c2 100755
--- a/src/main/scala/BIDMach/Featurizer.scala
+++ b/src/main/scala/BIDMach/Featurizer.scala
@@ -13,157 +13,157 @@ class Featurizer(val opts:Featurizer.Options = new Featurizer.Options) {
var alldict:Dict = null
var allbdict:IDict = null
var alltdict:IDict = null
-
+
def mergeDicts(rebuild:Int,dictname:String="dict.gz",wcountname:String="wcount.gz"):Dict = {
val dd = new Array[Dict](5) // Big enough to hold log2(days per month)
- val nmonths = 2 + (opts.nend - opts.nstart)/31
- val md = new Array[Dict](1+(math.log(nmonths)/math.log(2)).toInt) // Big enough to hold log2(num months)
- println("Building monthly dicts for "+opts.thisDir)
- for (d <- opts.nstart to opts.nend) { // Conditional on rebuild, merge the dictionaries for each month
- val (year, month, day) = Featurizer.decodeDate(d)
- val fm = new File(opts.fromMonthDir(d) + wcountname)
- if (rebuild > 1 || ! fm.exists) {
- val fd = new File(opts.fromDayDir(d) + wcountname)
- if (fd.exists) {
- val bb = loadSBMat(opts.fromDayDir(d) + dictname)
- val cc = loadIMat(opts.fromDayDir(d) + wcountname)
- Dict.treeAdd(Dict(bb, cc, opts.threshold), dd)
- print(".")
- }
- if (day == 31) {
- val dx = Dict.treeFlush(dd)
- if (dx != null) {
- val (sv, iv) = sortdown2(dx.counts)
- val dxx = Dict(dx.cstr(iv), sv)
- val fd = new File(opts.fromMonthDir(d))
- if (!fd.exists) fd.mkdirs
- saveSBMat(opts.fromMonthDir(d)+dictname, SBMat(dxx.cstr))
- saveDMat(opts.fromMonthDir(d)+wcountname, dxx.counts)
- println("%04d-%02d" format (year,month))
- }
- }
- }
- }
+ val nmonths = 2 + (opts.nend - opts.nstart)/31
+ val md = new Array[Dict](1+(math.log(nmonths)/math.log(2)).toInt) // Big enough to hold log2(num months)
+ println("Building monthly dicts for "+opts.thisDir)
+ for (d <- opts.nstart to opts.nend) { // Conditional on rebuild, merge the dictionaries for each month
+ val (year, month, day) = Featurizer.decodeDate(d)
+ val fm = new File(opts.fromMonthDir(d) + wcountname)
+ if (rebuild > 1 || ! fm.exists) {
+ val fd = new File(opts.fromDayDir(d) + wcountname)
+ if (fd.exists) {
+ val bb = loadSBMat(opts.fromDayDir(d) + dictname)
+ val cc = loadIMat(opts.fromDayDir(d) + wcountname)
+ Dict.treeAdd(Dict(bb, cc, opts.threshold), dd)
+ print(".")
+ }
+ if (day == 31) {
+ val dx = Dict.treeFlush(dd)
+ if (dx != null) {
+ val (sv, iv) = sortdown2(dx.counts)
+ val dxx = Dict(dx.cstr(iv), sv)
+ val fd = new File(opts.fromMonthDir(d))
+ if (!fd.exists) fd.mkdirs
+ saveSBMat(opts.fromMonthDir(d)+dictname, SBMat(dxx.cstr))
+ saveDMat(opts.fromMonthDir(d)+wcountname, dxx.counts)
+ println("%04d-%02d" format (year,month))
+ }
+ }
+ }
+ }
if (rebuild > 0) {
- println("Merging monthly dicts for "+opts.thisDir)
- for (d <- opts.nstart to opts.nend) { // Conditionally merge all monthly dictionaries
- val (year, month, day) = Featurizer.decodeDate(d)
- if (day == 31) {
- val fm = new File(opts.fromMonthDir(d) + wcountname)
- if (fm.exists) {
- val bb = loadSBMat(opts.fromMonthDir(d) + dictname)
- val cc = loadDMat(opts.fromMonthDir(d) + wcountname)
- Dict.treeAdd(Dict(bb, cc, 4*opts.threshold), md)
- println("%04d-%02d" format (year,month))
- }
- }
- }
- println
- val dy = Dict.treeFlush(md) // Get merged dictionary, sort by counts descending
- val (sv, iv) = sortdown2(dy.counts)
- val dyy = Dict(dy.cstr(iv), sv)
- saveSBMat(opts.thisDir + dictname, SBMat(dyy.cstr))
- saveDMat(opts.thisDir + wcountname, dyy.counts)
- dyy
+ println("Merging monthly dicts for "+opts.thisDir)
+ for (d <- opts.nstart to opts.nend) { // Conditionally merge all monthly dictionaries
+ val (year, month, day) = Featurizer.decodeDate(d)
+ if (day == 31) {
+ val fm = new File(opts.fromMonthDir(d) + wcountname)
+ if (fm.exists) {
+ val bb = loadSBMat(opts.fromMonthDir(d) + dictname)
+ val cc = loadDMat(opts.fromMonthDir(d) + wcountname)
+ Dict.treeAdd(Dict(bb, cc, 4*opts.threshold), md)
+ println("%04d-%02d" format (year,month))
+ }
+ }
+ }
+ println
+ val dy = Dict.treeFlush(md) // Get merged dictionary, sort by counts descending
+ val (sv, iv) = sortdown2(dy.counts)
+ val dyy = Dict(dy.cstr(iv), sv)
+ saveSBMat(opts.thisDir + dictname, SBMat(dyy.cstr))
+ saveDMat(opts.thisDir + wcountname, dyy.counts)
+ dyy
} else {
Dict(loadSBMat(opts.thisDir + dictname), loadDMat(opts.thisDir + wcountname))
}
- }
+ }
def mergeIDicts(rebuild:Int = 0, dictname:String="bdict.lz4", wcountname:String="bcnts.lz4", mapit:Boolean=true):IDict = {
println("Building monthly IDicts for " + opts.thisDir + " " + dictname)
if (alldict == null) alldict = Dict(loadSBMat(opts.mainDict))
- val dd = new Array[IDict](5) // Big enough to hold log2(days per month)
- val nmonths = 2 + (opts.nend - opts.nstart)/31
- val md = new Array[IDict](1+(math.log(nmonths)/math.log(2)).toInt) // Big enough to hold log2(num months)
- var dy:IDict = null
- var mdict:Dict = null
- var domonth:Boolean = false
- var lastmonth = 0
- for (d <- opts.nstart to opts.nend) {
- val (year, month, day) = Featurizer.decodeDate(d)
- if (month != lastmonth) {
- val dfname = opts.fromMonthDir(d) + opts.localDict
- if (fileExists(dfname)) {
- mdict = Dict(loadSBMat(dfname)) // Load token dictionary for this month
- val fm = new File(opts.fromMonthDir(d) + wcountname) // Did we process this month?
- domonth = rebuild > 1 || !fm.exists
- } else {
- mdict = null
- domonth = false
- }
- lastmonth = month
- }
- if (domonth) {
- val fd = new File(opts.fromDayDir(d) + wcountname)
- if (fd.exists) {
- val bb = loadIMat(opts.fromDayDir(d) + dictname) // Load IDict info for this day
- val cc = loadDMat(opts.fromDayDir(d) + wcountname)
+ val dd = new Array[IDict](5) // Big enough to hold log2(days per month)
+ val nmonths = 2 + (opts.nend - opts.nstart)/31
+ val md = new Array[IDict](1+(math.log(nmonths)/math.log(2)).toInt) // Big enough to hold log2(num months)
+ var dy:IDict = null
+ var mdict:Dict = null
+ var domonth:Boolean = false
+ var lastmonth = 0
+ for (d <- opts.nstart to opts.nend) {
+ val (year, month, day) = Featurizer.decodeDate(d)
+ if (month != lastmonth) {
+ val dfname = opts.fromMonthDir(d) + opts.localDict
+ if (fileExists(dfname)) {
+ mdict = Dict(loadSBMat(dfname)) // Load token dictionary for this month
+ val fm = new File(opts.fromMonthDir(d) + wcountname) // Did we process this month?
+ domonth = rebuild > 1 || !fm.exists
+ } else {
+ mdict = null
+ domonth = false
+ }
+ lastmonth = month
+ }
+ if (domonth) {
+ val fd = new File(opts.fromDayDir(d) + wcountname)
+ if (fd.exists) {
+ val bb = loadIMat(opts.fromDayDir(d) + dictname) // Load IDict info for this day
+ val cc = loadDMat(opts.fromDayDir(d) + wcountname)
// Kludge to deal with (old) scanner problem
- val ig = find(maxi(bb, 2) < 0x7fffffff)
- val bb2 = bb(ig, ?)
- val bm = if (mapit) {
- val dict = Dict(loadSBMat(opts.fromDayDir(d) + opts.localDict)) // Load token dictionary for this day
- val map = dict --> mdict // Map from this days tokens to month dictionary
- map(bb2) // Map the ngrams
- } else {
- bb2
- }
- val cc2 = cc(ig,0)
+ val ig = find(maxi(bb, 2) < 0x7fffffff)
+ val bb2 = bb(ig, ?)
+ val bm = if (mapit) {
+ val dict = Dict(loadSBMat(opts.fromDayDir(d) + opts.localDict)) // Load token dictionary for this day
+ val map = dict --> mdict // Map from this days tokens to month dictionary
+ map(bb2) // Map the ngrams
+ } else {
+ bb2
+ }
+ val cc2 = cc(ig,0)
// Done kludge
- val igood = find(mini(bm, 2) >= 0) // Find the good ones
- val bg = bm(igood,?)
- val cg = cc2(igood)
- val ip = icol(0->igood.length)
- sortlexInds(bg, ip) // lex sort them
- IDict.treeAdd(IDict(bg, cg(ip), opts.threshold), dd) // accumulate them
- print(".")
- }
- if (day == 31) { // On the last day, save the accumulated results
- val dx = IDict.treeFlush(dd)
- if (dx != null) {
- saveIMat(opts.fromMonthDir(d)+dictname, dx.grams)
- saveDMat(opts.fromMonthDir(d)+wcountname, dx.counts)
- }
- println("%04d-%02d" format (year,month))
- }
- }
- }
+ val igood = find(mini(bm, 2) >= 0) // Find the good ones
+ val bg = bm(igood,?)
+ val cg = cc2(igood)
+ val ip = icol(0->igood.length)
+ sortlexInds(bg, ip) // lex sort them
+ IDict.treeAdd(IDict(bg, cg(ip), opts.threshold), dd) // accumulate them
+ print(".")
+ }
+ if (day == 31) { // On the last day, save the accumulated results
+ val dx = IDict.treeFlush(dd)
+ if (dx != null) {
+ saveIMat(opts.fromMonthDir(d)+dictname, dx.grams)
+ saveDMat(opts.fromMonthDir(d)+wcountname, dx.counts)
+ }
+ println("%04d-%02d" format (year,month))
+ }
+ }
+ }
if (rebuild > 0) {
- println("Merging monthly IDicts for " + opts.thisDir)
- for (d <- opts.nstart to opts.nend) {
- val (year, month, day) = Featurizer.decodeDate(d)
- if (day == 31) { // Conditionally accumulate monthly dicts
- val dfname = opts.fromMonthDir(d) + opts.localDict
- if (fileExists(dfname) || ! mapit) {
- mdict = if (mapit) Dict(loadSBMat(dfname)) else null
- val fm = new File(opts.fromMonthDir(d) + wcountname)
- if (fm.exists) {
- val bb = HMat.loadIMat(opts.fromMonthDir(d) + dictname) // Load the IDict data for this month
- val cc = HMat.loadDMat(opts.fromMonthDir(d) + wcountname)
- val bm = if (mapit) {
- val map = mdict --> alldict
- map(bb) // Map to global token dictionary
- } else bb
- val igood = find(mini(bm, 2) >= 0) // Save the good stuff
- val bg = bm(igood,?)
- val cg = cc(igood)
- val ip = icol(0->igood.length)
- sortlexInds(bg, ip)
- IDict.treeAdd(IDict(bg, cg(ip), 4*opts.threshold), md)
- println("%04d-%02d" format (year,month))
- }
- }
- }
- }
- dy = IDict.treeFlush(md) // Final dictionary for the time period
- println
- val (sv, iv) = sortdown2(dy.counts) // Sort down by ngram frequency
- val dyy = IDict(dy.grams(iv,?), sv)
- saveIMat(opts.thisDir + dictname, dyy.grams)
- saveDMat(opts.thisDir + wcountname, dyy.counts)
- dy // Return the lex-sorted dictionary
+ println("Merging monthly IDicts for " + opts.thisDir)
+ for (d <- opts.nstart to opts.nend) {
+ val (year, month, day) = Featurizer.decodeDate(d)
+ if (day == 31) { // Conditionally accumulate monthly dicts
+ val dfname = opts.fromMonthDir(d) + opts.localDict
+ if (fileExists(dfname) || ! mapit) {
+ mdict = if (mapit) Dict(loadSBMat(dfname)) else null
+ val fm = new File(opts.fromMonthDir(d) + wcountname)
+ if (fm.exists) {
+ val bb = HMat.loadIMat(opts.fromMonthDir(d) + dictname) // Load the IDict data for this month
+ val cc = HMat.loadDMat(opts.fromMonthDir(d) + wcountname)
+ val bm = if (mapit) {
+ val map = mdict --> alldict
+ map(bb) // Map to global token dictionary
+ } else bb
+ val igood = find(mini(bm, 2) >= 0) // Save the good stuff
+ val bg = bm(igood,?)
+ val cg = cc(igood)
+ val ip = icol(0->igood.length)
+ sortlexInds(bg, ip)
+ IDict.treeAdd(IDict(bg, cg(ip), 4*opts.threshold), md)
+ println("%04d-%02d" format (year,month))
+ }
+ }
+ }
+ }
+ dy = IDict.treeFlush(md) // Final dictionary for the time period
+ println
+ val (sv, iv) = sortdown2(dy.counts) // Sort down by ngram frequency
+ val dyy = IDict(dy.grams(iv,?), sv)
+ saveIMat(opts.thisDir + dictname, dyy.grams)
+ saveDMat(opts.thisDir + wcountname, dyy.counts)
+ dy // Return the lex-sorted dictionary
} else {
val gyy = loadIMat(opts.thisDir + dictname)
val cyy = loadDMat(opts.thisDir + wcountname)
@@ -171,7 +171,7 @@ class Featurizer(val opts:Featurizer.Options = new Featurizer.Options) {
sortlexInds(gyy, iperm)
IDict(gyy, cyy(iperm))
}
- }
+ }
def mkIDicts(rebuild:Int, scanner:Scanner=TwitterScanner) = { // Build ngram dictionaries for each day
@@ -181,48 +181,48 @@ class Featurizer(val opts:Featurizer.Options = new Featurizer.Options) {
for (ithread <- 0 until nthreads) {
Future {
if (Mat.hasCUDA > 0) setGPU(ithread+Mat.hasCUDA-nthreads)
- val bigramsx = IMat(opts.guessSize, 3) // Temp storage for grams
- val trigramsx = IMat(opts.guessSize, 4)
- val useridsx = IMat(opts.guessSize/10, 2)
- val bdicts = new Array[IDict](5) // Trees to hold partial merges
- val tdicts = new Array[IDict](5)
- val udicts = new Array[IDict](5)
+ val bigramsx = IMat(opts.guessSize, 3) // Temp storage for grams
+ val trigramsx = IMat(opts.guessSize, 4)
+ val useridsx = IMat(opts.guessSize/10, 2)
+ val bdicts = new Array[IDict](5) // Trees to hold partial merges
+ val tdicts = new Array[IDict](5)
+ val udicts = new Array[IDict](5)
- for (d <- (opts.nstart+ithread) to opts.nend by nthreads) {
- val (year, month, day) = Featurizer.decodeDate(d)
- val fname = opts.fromDayDir(d)+opts.localDict
- val fnew = opts.fromDayDir(d)+opts.usrCnts // Check if the userid dictionary was built yet
- if (fileExists(fname) && (rebuild > 1 || !fileExists(fnew))) {
- val dict = Dict(loadSBMat(fname)) // load token dictionary for this day
- for (ifile <- 0 until 24) {
- val fn = opts.fromDayDir(d)+opts.fromFile(ifile)
- if (fileExists(fn)) {
- val idata = loadIMat(fn)
- val (nuni, nbi, ntri, nusers) = scanner.scan(opts, dict, idata, null, bigramsx, trigramsx, useridsx)
- val bigrams = bigramsx(0->nbi, 0->2)
- val bid = if (nbi > 0) IDict.dictFromData(bigrams) else null
- val trigrams = trigramsx(0->ntri, 0->3)
- val trid = if (ntri > 0) IDict.dictFromData(trigrams) else null
- val userids = useridsx(0->nusers, 0)
- val uid = if (nusers > 0) IDict.dictFromData(userids) else null
- IDict.treeAdd(bid, bdicts)
- IDict.treeAdd(trid, tdicts)
- IDict.treeAdd(uid, udicts)
- }
- }
- val bf = IDict.treeFlush(bdicts)
- val tf = IDict.treeFlush(tdicts)
- val uf = IDict.treeFlush(udicts)
- saveIMat(opts.fromDayDir(d) + opts.biDict, bf.grams)
- saveDMat(opts.fromDayDir(d) + opts.biCnts, bf.counts)
- saveIMat(opts.fromDayDir(d) + opts.triDict, tf.grams)
- saveDMat(opts.fromDayDir(d) + opts.triCnts, tf.counts)
- saveIMat(opts.fromDayDir(d) + opts.usrDict, uf.grams)
- saveDMat(opts.fromDayDir(d) + opts.usrCnts, uf.counts)
- print(".")
- }
- if (ithread == 0 && day/nthreads == 31/nthreads) println("%04d-%02d" format (year,month))
- }
+ for (d <- (opts.nstart+ithread) to opts.nend by nthreads) {
+ val (year, month, day) = Featurizer.decodeDate(d)
+ val fname = opts.fromDayDir(d)+opts.localDict
+ val fnew = opts.fromDayDir(d)+opts.usrCnts // Check if the userid dictionary was built yet
+ if (fileExists(fname) && (rebuild > 1 || !fileExists(fnew))) {
+ val dict = Dict(loadSBMat(fname)) // load token dictionary for this day
+ for (ifile <- 0 until 24) {
+ val fn = opts.fromDayDir(d)+opts.fromFile(ifile)
+ if (fileExists(fn)) {
+ val idata = loadIMat(fn)
+ val (nuni, nbi, ntri, nusers) = scanner.scan(opts, dict, idata, null, bigramsx, trigramsx, useridsx)
+ val bigrams = bigramsx(0->nbi, 0->2)
+ val bid = if (nbi > 0) IDict.dictFromData(bigrams) else null
+ val trigrams = trigramsx(0->ntri, 0->3)
+ val trid = if (ntri > 0) IDict.dictFromData(trigrams) else null
+ val userids = useridsx(0->nusers, 0)
+ val uid = if (nusers > 0) IDict.dictFromData(userids) else null
+ IDict.treeAdd(bid, bdicts)
+ IDict.treeAdd(trid, tdicts)
+ IDict.treeAdd(uid, udicts)
+ }
+ }
+ val bf = IDict.treeFlush(bdicts)
+ val tf = IDict.treeFlush(tdicts)
+ val uf = IDict.treeFlush(udicts)
+ saveIMat(opts.fromDayDir(d) + opts.biDict, bf.grams)
+ saveDMat(opts.fromDayDir(d) + opts.biCnts, bf.counts)
+ saveIMat(opts.fromDayDir(d) + opts.triDict, tf.grams)
+ saveDMat(opts.fromDayDir(d) + opts.triCnts, tf.counts)
+ saveIMat(opts.fromDayDir(d) + opts.usrDict, uf.grams)
+ saveDMat(opts.fromDayDir(d) + opts.usrCnts, uf.counts)
+ print(".")
+ }
+ if (ithread == 0 && day/nthreads == 31/nthreads) println("%04d-%02d" format (year,month))
+ }
done(ithread,0) = 1
}
}
@@ -230,10 +230,10 @@ class Featurizer(val opts:Featurizer.Options = new Featurizer.Options) {
}
def mkUniFeats(map:IMat, gramsx:IMat, ng:Int):IMat = {
- val unis = map(gramsx(0->ng, 0))
- val igood = find(unis >= 0)
- val gg = unis(igood, 0)
- val ggn = gramsx(igood, 1)
+ val unis = map(gramsx(0->ng, 0))
+ val igood = find(unis >= 0)
+ val gg = unis(igood, 0)
+ val ggn = gramsx(igood, 1)
val feats = ggn \ gg
sortlex(feats)
val (outr, ix, iy) = uniquerows(feats)
@@ -242,12 +242,12 @@ class Featurizer(val opts:Featurizer.Options = new Featurizer.Options) {
}
def mkGramFeats(map:IMat, gramsx:IMat, ng:Int, alldict:IDict):IMat = {
- val grams = map(gramsx(0->ng, 0->(gramsx.ncols-1)))
- val igood = find(mini(grams, 2) >= 0)
- val gg = grams(igood,?)
- val ggn = gramsx(igood, gramsx.ncols-1)
- val gmap = IDict(gg) --> alldict
- val igood2 = find(gmap >= 0)
+ val grams = map(gramsx(0->ng, 0->(gramsx.ncols-1)))
+ val igood = find(mini(grams, 2) >= 0)
+ val gg = grams(igood,?)
+ val ggn = gramsx(igood, gramsx.ncols-1)
+ val gmap = IDict(gg) --> alldict
+ val igood2 = find(gmap >= 0)
val feats = ggn(igood2,0) \ gmap(igood2,0)
sortlex(feats)
val (outr, ix, iy) = uniquerows(feats)
@@ -258,51 +258,51 @@ class Featurizer(val opts:Featurizer.Options = new Featurizer.Options) {
def featurize(rebuild:Int, scanner:Scanner=TwitterScanner) = {
println("Featurizing in " + opts.thisDir)
if (alldict == null) alldict = Dict(HMat.loadSBMat(opts.mainDict))
- if (allbdict == null) allbdict = IDict(HMat.loadIMat(opts.mainBDict))
- if (alltdict == null) alltdict = IDict(HMat.loadIMat(opts.mainTDict))
- alldict.makeHash
- allbdict.makeSorted
- alltdict.makeSorted
+ if (allbdict == null) allbdict = IDict(HMat.loadIMat(opts.mainBDict))
+ if (alltdict == null) alltdict = IDict(HMat.loadIMat(opts.mainTDict))
+ alldict.makeHash
+ allbdict.makeSorted
+ alltdict.makeSorted
val nthreads = math.min(opts.nthreads, math.max(1, Mat.hasCUDA))
val done = izeros(nthreads,1)
for (ithread <- 0 until nthreads) {
Future {
if (Mat.hasCUDA > 0) setGPU(ithread+Mat.hasCUDA-nthreads)
val unigramsx = IMat(opts.guessSize, 2)
- val bigramsx = IMat(opts.guessSize, 3)
- val trigramsx = IMat(opts.guessSize, 4)
- val userids = IMat(opts.guessSize/10, 2)
- for (d <- (opts.nstart+ithread) to opts.nend by nthreads) {
- val (year, month, day) = Featurizer.decodeDate(d)
- val fdict = opts.fromDayDir(d)+opts.localDict
- if (fileExists(fdict)) {
- var dict:Dict = null
- var map:IMat = null
- val fd = new File(opts.toDayDir(d))
- if (!fd.exists) fd.mkdirs
- for (ifile <- 0 until 24) {
- val fn = opts.fromDayDir(d)+opts.fromFile(ifile)
- val fx = opts.toDayDir(d)+opts.toTriFeats(ifile)
- if (fileExists(fn) && (rebuild > 0 || !fileExists(fx))) {
- if (dict == null) {
- dict = Dict(loadSBMat(fdict))
- map = dict --> alldict
- }
- val idata = loadIMat(fn)
- val (nuni, nbi, ntri, nstatuses) = scanner.scan(opts, dict, idata, unigramsx, bigramsx, trigramsx, userids)
- val unifeats = mkUniFeats(map, unigramsx, nuni)
- val bifeats = mkGramFeats(map, bigramsx, nbi, allbdict)
- val trifeats = mkGramFeats(map, trigramsx, ntri, alltdict)
- saveIMat(opts.toDayDir(d) + opts.toUniFeats(ifile), unifeats)
- saveIMat(opts.toDayDir(d) + opts.toBiFeats(ifile), bifeats)
- saveIMat(opts.toDayDir(d) + opts.toTriFeats(ifile), trifeats)
- saveIMat(opts.toDayDir(d) + opts.toUserids(ifile), userids(0->nstatuses, ?))
- if (ifile == 23) print(".")
- }
- }
- }
- if (ithread == 0 && day/nthreads == 31/nthreads) println("%04d-%02d" format (year,month))
- }
+ val bigramsx = IMat(opts.guessSize, 3)
+ val trigramsx = IMat(opts.guessSize, 4)
+ val userids = IMat(opts.guessSize/10, 2)
+ for (d <- (opts.nstart+ithread) to opts.nend by nthreads) {
+ val (year, month, day) = Featurizer.decodeDate(d)
+ val fdict = opts.fromDayDir(d)+opts.localDict
+ if (fileExists(fdict)) {
+ var dict:Dict = null
+ var map:IMat = null
+ val fd = new File(opts.toDayDir(d))
+ if (!fd.exists) fd.mkdirs
+ for (ifile <- 0 until 24) {
+ val fn = opts.fromDayDir(d)+opts.fromFile(ifile)
+ val fx = opts.toDayDir(d)+opts.toTriFeats(ifile)
+ if (fileExists(fn) && (rebuild > 0 || !fileExists(fx))) {
+ if (dict == null) {
+ dict = Dict(loadSBMat(fdict))
+ map = dict --> alldict
+ }
+ val idata = loadIMat(fn)
+ val (nuni, nbi, ntri, nstatuses) = scanner.scan(opts, dict, idata, unigramsx, bigramsx, trigramsx, userids)
+ val unifeats = mkUniFeats(map, unigramsx, nuni)
+ val bifeats = mkGramFeats(map, bigramsx, nbi, allbdict)
+ val trifeats = mkGramFeats(map, trigramsx, ntri, alltdict)
+ saveIMat(opts.toDayDir(d) + opts.toUniFeats(ifile), unifeats)
+ saveIMat(opts.toDayDir(d) + opts.toBiFeats(ifile), bifeats)
+ saveIMat(opts.toDayDir(d) + opts.toTriFeats(ifile), trifeats)
+ saveIMat(opts.toDayDir(d) + opts.toUserids(ifile), userids(0->nstatuses, ?))
+ if (ifile == 23) print(".")
+ }
+ }
+ }
+ if (ithread == 0 && day/nthreads == 31/nthreads) println("%04d-%02d" format (year,month))
+ }
done(ithread,0) = 1
}
}
@@ -315,25 +315,25 @@ class Featurizer(val opts:Featurizer.Options = new Featurizer.Options) {
}
def loadDicts() = {
- if (alldict == null) alldict = Dict(HMat.loadSBMat(opts.mainDict))
- if (allbdict == null) allbdict = IDict(HMat.loadIMat(opts.mainBDict))
- if (alltdict == null) alltdict = IDict(HMat.loadIMat(opts.mainTDict))
- val alld = alldict.cstr
- val bg = allbdict.grams
- val tg = alltdict.grams
- val bd = CSMat(bg.nrows,1)
- val td = CSMat(tg.nrows,1)
- var i = 0
- while (i < bg.nrows) {
- bd(i) = alld(bg(i,0)) + " " + alld(bg(i,1))
- i += 1
- }
- i = 0
- while (i < tg.nrows) {
- td(i) = (alld(tg(i,0)) + " " + alld(tg(i,1))) + (" " + alld(tg(i,2)))
- i += 1
- }
- (alld, bd, td)
+ if (alldict == null) alldict = Dict(HMat.loadSBMat(opts.mainDict))
+ if (allbdict == null) allbdict = IDict(HMat.loadIMat(opts.mainBDict))
+ if (alltdict == null) alltdict = IDict(HMat.loadIMat(opts.mainTDict))
+ val alld = alldict.cstr
+ val bg = allbdict.grams
+ val tg = alltdict.grams
+ val bd = CSMat(bg.nrows,1)
+ val td = CSMat(tg.nrows,1)
+ var i = 0
+ while (i < bg.nrows) {
+ bd(i) = alld(bg(i,0)) + " " + alld(bg(i,1))
+ i += 1
+ }
+ i = 0
+ while (i < tg.nrows) {
+ td(i) = (alld(tg(i,0)) + " " + alld(tg(i,1))) + (" " + alld(tg(i,2)))
+ i += 1
+ }
+ (alld, bd, td)
}
}
@@ -371,48 +371,48 @@ object Featurizer {
}
def buildMainDict(rebuild:Int) = {
- val (ff,fs) = alloptions
+ val (ff,fs) = alloptions
val d1 = ff.mergeDicts(rebuild)
val d2 = fs.mergeDicts(rebuild)
if (rebuild>0) {
- val dd = Dict.union(d1, d2)
- val (sc, ic) = sortdown2(dd.counts)
- saveSBMat(ff.opts.mainDict, SBMat(dd.cstr(ic,0)))
- saveDMat(ff.opts.mainCounts, sc)
+ val dd = Dict.union(d1, d2)
+ val (sc, ic) = sortdown2(dd.counts)
+ saveSBMat(ff.opts.mainDict, SBMat(dd.cstr(ic,0)))
+ saveDMat(ff.opts.mainCounts, sc)
}
}
def buildMainGDicts(rebuild:Int) = {
val (ff, fs) = alloptions
- val bd1 = ff.mergeIDicts(rebuild)
- val bd2 = fs.mergeIDicts(rebuild)
- if (rebuild>0) {
- val bdd = IDict.merge2(bd1,bd2)
- val (sbc, ibc) = sortdown2(bdd.counts)
- saveIMat(ff.opts.mainBDict, IMat(bdd.grams(ibc,?)))
- saveDMat(ff.opts.mainBCounts, sbc)
- }
-
- val td1 = ff.mergeIDicts(rebuild, "tdict.lz4", "tcnts.lz4")
- val td2 = fs.mergeIDicts(rebuild, "tdict.lz4", "tcnts.lz4")
- if (rebuild>0) {
- val tdd = IDict.merge2(td1,td2)
- val (stc, itc) = sortdown2(tdd.counts)
- saveIMat(ff.opts.mainTDict, IMat(tdd.grams(itc,?)))
- saveDMat(ff.opts.mainTCounts, stc)
- }
+ val bd1 = ff.mergeIDicts(rebuild)
+ val bd2 = fs.mergeIDicts(rebuild)
+ if (rebuild>0) {
+ val bdd = IDict.merge2(bd1,bd2)
+ val (sbc, ibc) = sortdown2(bdd.counts)
+ saveIMat(ff.opts.mainBDict, IMat(bdd.grams(ibc,?)))
+ saveDMat(ff.opts.mainBCounts, sbc)
+ }
+
+ val td1 = ff.mergeIDicts(rebuild, "tdict.lz4", "tcnts.lz4")
+ val td2 = fs.mergeIDicts(rebuild, "tdict.lz4", "tcnts.lz4")
+ if (rebuild>0) {
+ val tdd = IDict.merge2(td1,td2)
+ val (stc, itc) = sortdown2(tdd.counts)
+ saveIMat(ff.opts.mainTDict, IMat(tdd.grams(itc,?)))
+ saveDMat(ff.opts.mainTCounts, stc)
+ }
- ff.opts.threshold = 1
- fs.opts.threshold = 1
+ ff.opts.threshold = 1
+ fs.opts.threshold = 1
val usr1 = ff.mergeIDicts(rebuild, "usrdict.lz4", "usrcnts.lz4", false)
- val usr2 = fs.mergeIDicts(rebuild, "usrdict.lz4", "usrcnts.lz4", false)
- if (rebuild>0) {
- val usr = IDict.merge2(usr1,usr2)
- val (usrs, usrc) = sortdown2(usr.counts)
- saveIMat(ff.opts.mainUsrDict, IMat(usr.grams(usrc,?)))
- saveDMat(ff.opts.mainUsrCounts, usrs)
- }
+ val usr2 = fs.mergeIDicts(rebuild, "usrdict.lz4", "usrcnts.lz4", false)
+ if (rebuild>0) {
+ val usr = IDict.merge2(usr1,usr2)
+ val (usrs, usrc) = sortdown2(usr.counts)
+ saveIMat(ff.opts.mainUsrDict, IMat(usr.grams(usrc,?)))
+ saveDMat(ff.opts.mainUsrCounts, usrs)
+ }
}
def buildFeatures(rebuild:Int) = {
@@ -433,15 +433,15 @@ object Featurizer {
def dirxMap(fname:String):(Int)=>String = {
(n:Int) => {
- val (yy, mm, dd) = decodeDate(n)
- (fname format (n % 16, yy, mm, dd))
+ val (yy, mm, dd) = decodeDate(n)
+ (fname format (n % 16, yy, mm, dd))
}
}
def dirMap(fname:String):(Int)=>String = {
(n:Int) => {
- val (yy, mm, dd) = decodeDate(n)
- (fname format (yy, mm, dd))
+ val (yy, mm, dd) = decodeDate(n)
+ (fname format (yy, mm, dd))
}
}
@@ -458,8 +458,8 @@ object Featurizer {
val triCnts:String = "tcnts.lz4"
val usrCnts:String = "usrcnts.lz4"
def thisDir = "/big/" + tokDirName
- def mainDir = "/big/twitter/tokenized/"
- def mainDict:String = mainDir + "all" + localDict
+ def mainDir = "/big/twitter/tokenized/"
+ def mainDict:String = mainDir + "all" + localDict
def mainCounts:String = mainDir + "all" + localCount
def mainBDict:String = mainDir + "all" + biDict
def mainBCounts:String = mainDir + "all" + biCnts
@@ -467,7 +467,7 @@ object Featurizer {
def mainTCounts:String = mainDir + "all" + triCnts
def mainUsrDict:String = mainDir + "all" + usrDict
def mainUsrCounts:String = mainDir + "all" + usrCnts
- def fromYearDir:(Int)=>String = dirMap(thisDir + "%04d/")
+ def fromYearDir:(Int)=>String = dirMap(thisDir + "%04d/")
def fromMonthDir:(Int)=>String = dirMap(thisDir + "%04d/%02d/")
def fromDayDir:(Int)=>String = dirxMap("/disk%02d/" + tokDirName + "%04d/%02d/%02d/")
def toDayDir:(Int)=>String = dirxMap("/disk%02d/" + featDirName + "%04d/%02d/%02d/")
@@ -485,146 +485,146 @@ object Featurizer {
trait Scanner {
- def scan(opts:Featurizer.Options, dict:Dict, idata:IMat, unigramsx:IMat, bigramsx:IMat, trigramsx:IMat, userids:IMat):(Int, Int, Int, Int)
+ def scan(opts:Featurizer.Options, dict:Dict, idata:IMat, unigramsx:IMat, bigramsx:IMat, trigramsx:IMat, userids:IMat):(Int, Int, Int, Int)
}
object TwitterScanner extends Scanner {
- final val OutsideStatus = 0
- final val InsideStatus = 1
- final val InsideUser = 2
- final val InsideUserId = 3
- final val InsideText = 4
- final val InsideRetweet = 5
- final val InsideStatusL2 = 6
- final val InsideUserL2 = 7
- final val InsideUserIdL2 = 8
- final val InsideTextL2 = 9
-
- def scan(opts:Featurizer.Options, dict:Dict, idata:IMat, unigramsx:IMat, bigramsx:IMat, trigramsx:IMat, userids:IMat):(Int, Int, Int, Int) = {
+ final val OutsideStatus = 0
+ final val InsideStatus = 1
+ final val InsideUser = 2
+ final val InsideUserId = 3
+ final val InsideText = 4
+ final val InsideRetweet = 5
+ final val InsideStatusL2 = 6
+ final val InsideUserL2 = 7
+ final val InsideUserIdL2 = 8
+ final val InsideTextL2 = 9
+
+ def scan(opts:Featurizer.Options, dict:Dict, idata:IMat, unigramsx:IMat, bigramsx:IMat, trigramsx:IMat, userids:IMat):(Int, Int, Int, Int) = {
- val Isstart = dict("")
- val Isend = dict("")
- val Irstart = dict("")
- val Irend = dict("")
- val Itstart = dict("")
- val Itend = dict("")
- val Iuser = dict("")
- val Iuend = dict("")
- val Iistart = dict("")
- val Iiend = dict("")
- var state = 0
+ val Isstart = dict("")
+ val Isend = dict("")
+ val Irstart = dict("")
+ val Irend = dict("")
+ val Itstart = dict("")
+ val Itend = dict("")
+ val Iuser = dict("")
+ val Iuend = dict("")
+ val Iistart = dict("")
+ val Iiend = dict("")
+ var state = 0
- var istatus = -1
- var nuni = 0
- var nbi = 0
- var ntri = 0
- var len = idata.length
- var i = 0
- while (i < len) {
- val tok = idata.data(i)-1
-// if (tok+1 >0) println(dict(tok)+ " " + state)
-// else println("num " +(-(tok+1))+ " " + state)
- if (tok == Isend) {
- state = OutsideStatus
- } else {
- (state: @switch) match {
- case OutsideStatus =>
- if (tok == Isstart) {
- state = InsideStatus
- istatus += 1
- }
- case InsideStatus =>
- tok match {
- case Iuser => state = InsideUser
- case Itstart => state = InsideText
- case Irstart => state = InsideRetweet
- case _ => {}
- }
- case InsideUser =>
- tok match {
- case Iistart => state = InsideUserId
- case Irstart => state = InsideRetweet
- case Iuend => state = InsideStatus
- case _ => {}
- }
- case InsideUserId =>
- if (tok == Iiend) {
- state = InsideUser
- } else if (tok+1 < 0) {
- if (userids != null) {
- userids(istatus,0) = -(tok+1)
- userids(istatus,1) = 0
- }
- }
- case InsideText =>
- tok match {
- case Iuser => state = InsideUser
- case Itend => state = InsideStatus
- case _ => if (tok+1 > 0) {
- if (unigramsx != null) {
- unigramsx(nuni, 0) = tok
- unigramsx(nuni, 1) = istatus
- nuni += 1
- }
- if (idata.data(i-1) > 0) {
- val tok1 = idata.data(i-1)-1
- if (tok1 != Itstart) {
- bigramsx(nbi, 0) = tok1
- bigramsx(nbi, 1) = tok
- bigramsx(nbi, 2) = istatus
- nbi += 1
- if (idata.data(i-2) > 0) {
- val tok2 = idata.data(i-2)-1
- if (tok2 != Itstart) {
- trigramsx(ntri, 0) = tok2
- trigramsx(ntri, 1) = tok1
- trigramsx(ntri, 2) = tok
- trigramsx(ntri, 3) = istatus
- ntri += 1
- }
- }
- }
- }
- }
- }
- case InsideRetweet =>
- tok match {
- case Isstart => state = InsideStatusL2
- case Irend => state = InsideStatus
- case _ => {}
- }
- case InsideStatusL2 =>
- tok match {
- case Iuser => state = InsideUserL2
- case Itstart => state = InsideTextL2
- case _ => {}
- }
- case InsideUserL2 =>
- tok match {
- case Iistart => state = InsideUserIdL2
- case Iuend => state = InsideStatusL2
- case _ => {}
- }
- case InsideUserIdL2 =>
- tok match {
- case Iiend => state = InsideUserL2
- case _ => if (tok-1 < 0) {
- if (userids != null) userids(istatus, 1) = -(tok+1)
- }
- }
- case InsideTextL2 =>
- tok match {
- case Itend => state = InsideStatusL2
- case Iuser => state = InsideUserL2
- case _ => {}
- }
- case _ => {}
- }
-
- }
- i += 1
- }
- (nuni, nbi, ntri, istatus)
- }
+ var istatus = -1
+ var nuni = 0
+ var nbi = 0
+ var ntri = 0
+ var len = idata.length
+ var i = 0
+ while (i < len) {
+ val tok = idata.data(i)-1
+// if (tok+1 >0) println(dict(tok)+ " " + state)
+// else println("num " +(-(tok+1))+ " " + state)
+ if (tok == Isend) {
+ state = OutsideStatus
+ } else {
+ (state: @switch) match {
+ case OutsideStatus =>
+ if (tok == Isstart) {
+ state = InsideStatus
+ istatus += 1
+ }
+ case InsideStatus =>
+ tok match {
+ case Iuser => state = InsideUser
+ case Itstart => state = InsideText
+ case Irstart => state = InsideRetweet
+ case _ => {}
+ }
+ case InsideUser =>
+ tok match {
+ case Iistart => state = InsideUserId
+ case Irstart => state = InsideRetweet
+ case Iuend => state = InsideStatus
+ case _ => {}
+ }
+ case InsideUserId =>
+ if (tok == Iiend) {
+ state = InsideUser
+ } else if (tok+1 < 0) {
+ if (userids != null) {
+ userids(istatus,0) = -(tok+1)
+ userids(istatus,1) = 0
+ }
+ }
+ case InsideText =>
+ tok match {
+ case Iuser => state = InsideUser
+ case Itend => state = InsideStatus
+ case _ => if (tok+1 > 0) {
+ if (unigramsx != null) {
+ unigramsx(nuni, 0) = tok
+ unigramsx(nuni, 1) = istatus
+ nuni += 1
+ }
+ if (idata.data(i-1) > 0) {
+ val tok1 = idata.data(i-1)-1
+ if (tok1 != Itstart) {
+ bigramsx(nbi, 0) = tok1
+ bigramsx(nbi, 1) = tok
+ bigramsx(nbi, 2) = istatus
+ nbi += 1
+ if (idata.data(i-2) > 0) {
+ val tok2 = idata.data(i-2)-1
+ if (tok2 != Itstart) {
+ trigramsx(ntri, 0) = tok2
+ trigramsx(ntri, 1) = tok1
+ trigramsx(ntri, 2) = tok
+ trigramsx(ntri, 3) = istatus
+ ntri += 1
+ }
+ }
+ }
+ }
+ }
+ }
+ case InsideRetweet =>
+ tok match {
+ case Isstart => state = InsideStatusL2
+ case Irend => state = InsideStatus
+ case _ => {}
+ }
+ case InsideStatusL2 =>
+ tok match {
+ case Iuser => state = InsideUserL2
+ case Itstart => state = InsideTextL2
+ case _ => {}
+ }
+ case InsideUserL2 =>
+ tok match {
+ case Iistart => state = InsideUserIdL2
+ case Iuend => state = InsideStatusL2
+ case _ => {}
+ }
+ case InsideUserIdL2 =>
+ tok match {
+ case Iiend => state = InsideUserL2
+ case _ => if (tok-1 < 0) {
+ if (userids != null) userids(istatus, 1) = -(tok+1)
+ }
+ }
+ case InsideTextL2 =>
+ tok match {
+ case Itend => state = InsideStatusL2
+ case Iuser => state = InsideUserL2
+ case _ => {}
+ }
+ case _ => {}
+ }
+
+ }
+ i += 1
+ }
+ (nuni, nbi, ntri, istatus)
+ }
}
-}
\ No newline at end of file
+}
\ No newline at end of file
diff --git a/src/main/scala/BIDMach/Learner.scala b/src/main/scala/BIDMach/Learner.scala
index 5982ff74..36be2606 100755
--- a/src/main/scala/BIDMach/Learner.scala
+++ b/src/main/scala/BIDMach/Learner.scala
@@ -30,40 +30,40 @@ case class Learner(
var results:FMat = null
val dopts:DataSource.Opts = if (datasource != null) datasource.opts else null
- val mopts:Model.Opts = model.opts
+ val mopts:Model.Opts = model.opts
val ropts:Mixin.Opts = if (mixins != null) mixins(0).opts else null
val uopts:Updater.Opts = if (updater != null) updater.opts else null
var useGPU = false
- var reslist:ListBuffer[FMat] = null;
- var samplist:ListBuffer[Float] = null;
- var lastCheckPoint = 0;
- var done = false;
- var paused = false;
- var ipass = 0;
- var here = 0L;
- var lasti = 0;
- var bytes = 0L;
- var cacheState = false;
- var debugMemState = false;
-
+ var reslist:ListBuffer[FMat] = null
+ var samplist:ListBuffer[Float] = null
+ var lastCheckPoint = 0
+ var done = false
+ var paused = false
+ var ipass = 0
+ var here = 0L
+ var lasti = 0
+ var bytes = 0L
+ var cacheState = false
+ var debugMemState = false
+
def setup = {
- Learner.setupPB(datasource, dopts.putBack, mopts.dim)
+ Learner.setupPB(datasource, dopts.putBack, mopts.dim)
}
def init = {
- var cacheState = Mat.useCache;
- Mat.useCache = opts.useCache;
- datasource.init;
- model.bind(datasource);
+ var cacheState = Mat.useCache
+ Mat.useCache = opts.useCache
+ datasource.init
+ model.bind(datasource)
if (datasink.asInstanceOf[AnyRef] != null) {
- datasink.init;
- model.bind(datasink);
+ datasink.init
+ model.bind(datasink)
}
- model.init;
+ model.init
if (model.opts.logDataSink.asInstanceOf[AnyRef] != null) model.opts.logDataSink.init
if (mixins != null) mixins map (_ init(model))
if (updater != null) updater.init(model)
- Mat.useCache = cacheState;
+ Mat.useCache = cacheState
useGPU = model.useGPU
}
@@ -73,64 +73,64 @@ case class Learner(
def retrain() = {
flip
- var cacheState = Mat.useCache;
- Mat.useCache = opts.useCache;
- debugMemState = Mat.debugMem;
- if (updater != null) updater.clear;
- reslist = new ListBuffer[FMat];
- samplist = new ListBuffer[Float];
- firstPass(null);
+ var cacheState = Mat.useCache
+ Mat.useCache = opts.useCache
+ debugMemState = Mat.debugMem
+ if (updater != null) updater.clear
+ reslist = new ListBuffer[FMat]
+ samplist = new ListBuffer[Float]
+ firstPass(null)
updateM(ipass-1)
while (ipass < opts.npasses && ! done) {
nextPass(null)
updateM(ipass-1)
}
- wrapUp;
+ wrapUp
}
def firstPass(iter:Iterator[(AnyRef, MatIOtrait)]):Unit = {
setup
init
- done = false;
- ipass = 0;
- here = 0L;
- lasti = 0;
- bytes = 0L;
- if (updater != null) updater.clear;
- cacheState = Mat.useCache;
- Mat.useCache = opts.useCache;
- reslist = new ListBuffer[FMat];
- samplist = new ListBuffer[Float];
- flip;
- nextPass(iter);
+ done = false
+ ipass = 0
+ here = 0L
+ lasti = 0
+ bytes = 0L
+ if (updater != null) updater.clear
+ cacheState = Mat.useCache
+ Mat.useCache = opts.useCache
+ reslist = new ListBuffer[FMat]
+ samplist = new ListBuffer[Float]
+ flip
+ nextPass(iter)
}
def nextPass(iter:Iterator[(AnyRef, MatIOtrait)]): Unit = {
- if (opts.debugMem && ipass > 0) Mat.debugMem = true;
+ if (opts.debugMem && ipass > 0) Mat.debugMem = true
var lastp = 0f
if (iter != null) {
- datasource.asInstanceOf[IteratorSource].opts.iter = iter;
+ datasource.asInstanceOf[IteratorSource].opts.iter = iter
}
datasource.reset
var istep = 0
println("pass=%2d" format ipass)
while (datasource.hasNext) {
while (paused) Thread.sleep(10)
- val mats = datasource.next;
+ val mats = datasource.next
here += datasource.opts.batchSize
- bytes += mats.map(Learner.numBytes _).reduce(_+_);
- val dsp = datasource.progress;
- val gprogress = (ipass + dsp)/opts.npasses;
+ bytes += mats.map(Learner.numBytes _).reduce(_+_)
+ val dsp = datasource.progress
+ val gprogress = (ipass + dsp)/opts.npasses
if ((istep - 1) % opts.evalStep == 0 || (istep > 0 && (! datasource.hasNext))) {
if (opts.updateAll) {
- model.dobatchg(mats, ipass, here);
- if (mixins != null) mixins map (_ compute(mats, here));
- if (updater != null) updater.update(ipass, here, gprogress);
+ model.dobatchg(mats, ipass, here)
+ if (mixins != null) mixins map (_ compute(mats, here))
+ if (updater != null) updater.update(ipass, here, gprogress)
}
- val scores = model.evalbatchg(mats, ipass, here);
- if (datasink != null) datasink.put;
+ val scores = model.evalbatchg(mats, ipass, here)
+ if (datasink != null) datasink.put
reslist.append(scores.newcopy)
samplist.append(here)
} else {
@@ -153,12 +153,12 @@ case class Learner(
if (useGPU) {
print(", GPUmem=%3.6f" format GPUmem._1)
}
- println;
- lasti = reslist.length;
+ println
+ lasti = reslist.length
}
if (opts.checkPointFile != null && toc > 3600 * opts.checkPointInterval * (1 + lastCheckPoint)) {
- model.save(opts.checkPointFile format lastCheckPoint);
- lastCheckPoint += 1;
+ model.save(opts.checkPointFile format lastCheckPoint)
+ lastCheckPoint += 1
}
}
ipass += 1
@@ -169,9 +169,9 @@ case class Learner(
}
def wrapUp {
- val gf = gflop;
- Mat.useCache = cacheState;
- Mat.debugMem = debugMemState;
+ val gf = gflop
+ Mat.useCache = cacheState
+ Mat.debugMem = debugMemState
println("Time=%5.4f secs, gflops=%4.2f" format (gf._2, gf._1))
if (opts.autoReset && useGPU) {
Learner.toCPU(modelmats)
@@ -179,22 +179,22 @@ case class Learner(
Mat.clearCaches
}
- datasource.close;
- if (datasink != null) datasink.close;
+ datasource.close
+ if (datasink != null) datasink.close
if (model.opts.logDataSink.asInstanceOf[AnyRef] != null) model.opts.logDataSink.close
- results = Learner.scores2FMat(reslist) on row(samplist.toList);
- done = true;
+ results = Learner.scores2FMat(reslist) on row(samplist.toList)
+ done = true
}
def predict() = {
- setup;
- datasource.init;
- model.bind(datasource);
+ setup
+ datasource.init
+ model.bind(datasource)
if (datasink.asInstanceOf[AnyRef] != null) {
- datasink.init;
- model.bind(datasink);
+ datasink.init
+ model.bind(datasink)
}
- val rstate = model.refresh;
+ val rstate = model.refresh
model.refresh = false
model.init
val results = repredict
@@ -218,12 +218,12 @@ case class Learner(
while (datasource.hasNext) {
val mats = datasource.next
here += datasource.opts.batchSize
- bytes += mats.map(Learner.numBytes _).reduce(_+_);
- val scores = model.evalbatchg(mats, 0, here);
+ bytes += mats.map(Learner.numBytes _).reduce(_+_)
+ val scores = model.evalbatchg(mats, 0, here)
if (datasink != null) datasink.put
- reslist.append(scores.newcopy);
- samplist.append(here);
- val dsp = datasource.progress;
+ reslist.append(scores.newcopy)
+ samplist.append(here)
+ val dsp = datasource.progress
if (dsp > lastp + opts.pstep && reslist.length > lasti) {
val gf = gflop
lastp = dsp - (dsp % opts.pstep)
@@ -243,21 +243,21 @@ case class Learner(
}
val gf = gflop
Mat.useCache = cacheState
- println("Time=%5.4f secs, gflops=%4.2f" format (gf._2, gf._1));
+ println("Time=%5.4f secs, gflops=%4.2f" format (gf._2, gf._1))
if (opts.autoReset && useGPU) {
Learner.toCPU(modelmats)
resetGPUs
Mat.clearCaches
}
- datasource.close;
- if (datasink != null) datasink.close;
+ datasource.close
+ if (datasink != null) datasink.close
results = Learner.scores2FMat(reslist) on row(samplist.toList)
}
- def datamats = datasource.asInstanceOf[MatSource].mats;
- def modelmats = model.modelmats;
- def datamat = datasource.asInstanceOf[MatSource].mats(0);
- def modelmat = model.modelmats(0);
+ def datamats = datasource.asInstanceOf[MatSource].mats
+ def modelmats = model.modelmats
+ def datamat = datasource.asInstanceOf[MatSource].mats(0)
+ def modelmat = model.modelmats(0)
def preds = datasink.asInstanceOf[MatSink].mats
}
@@ -281,8 +281,8 @@ case class ParLearner(
var useGPU = false
def setup = {
- val dopts = datasource.opts
- Learner.setupPB(datasource, datasource.opts.putBack, models(0).opts.dim)
+ val dopts = datasource.opts
+ Learner.setupPB(datasource, datasource.opts.putBack, models(0).opts.dim)
}
def init = {
@@ -291,19 +291,19 @@ case class ParLearner(
val thisGPU = if (useGPU) getGPU else 0
for (i <- 0 until opts.nthreads) {
if (useGPU && i < Mat.hasCUDA) setGPU(i)
- models(i).bind(datasource)
- models(i).init
- if (mixins != null) mixins(i) map (_ init(models(i)))
- if (updaters != null && updaters(i) != null) updaters(i).init(models(i))
+ models(i).bind(datasource)
+ models(i).init
+ if (mixins != null) mixins(i) map (_ init(models(i)))
+ if (updaters != null && updaters(i) != null) updaters(i).init(models(i))
}
if (useGPU) setGPU(thisGPU)
val mml = models(0).modelmats.length
um = new Array[Mat](mml)
mm = new Array[Mat](mml)
for (i <- 0 until mml) {
- val mm0 = models(0).modelmats(i)
- mm(i) = zeros(mm0.nrows, mm0.ncols)
- um(i) = zeros(mm0.nrows, mm0.ncols)
+ val mm0 = models(0).modelmats(i)
+ mm(i) = zeros(mm0.nrows, mm0.ncols)
+ um(i) = zeros(mm0.nrows, mm0.ncols)
}
ParLearner.syncmodels(models, mm, um, 0, useGPU)
}
@@ -322,11 +322,11 @@ case class ParLearner(
cmats = new Array[Array[Mat]](opts.nthreads)
for (i <- 0 until opts.nthreads) cmats(i) = new Array[Mat](datasource.omats.length)
val thisGPU = if (useGPU) getGPU else 0
- if (useGPU) {
- for (i <- 0 until opts.nthreads) {
-// if (i != thisGPU) connect(i)
- }
- }
+ if (useGPU) {
+ for (i <- 0 until opts.nthreads) {
+// if (i != thisGPU) connect(i)
+ }
+ }
@volatile var done = iones(opts.nthreads, 1)
var ipass = 0
var here = 0L
@@ -335,91 +335,91 @@ case class ParLearner(
val reslist = new ListBuffer[FMat]
val samplist = new ListBuffer[Float]
for (i <- 0 until opts.nthreads) {
- if (useGPU && i < Mat.hasCUDA) setGPU(i)
- if (updaters != null && updaters(i) != null) updaters(i).clear
+ if (useGPU && i < Mat.hasCUDA) setGPU(i)
+ if (updaters != null && updaters(i) != null) updaters(i).clear
}
setGPU(thisGPU)
var istep = 0
var lastp = 0f
var running = true
- var progress = 0f;
- var gprogress = 0f;
+ var progress = 0f
+ var gprogress = 0f
for (ithread <- 0 until opts.nthreads) {
- Future {
- if (useGPU && ithread < Mat.hasCUDA) setGPU(ithread)
- while (running) {
- while (done(ithread) == 1) Thread.sleep(1)
- try {
- if ((istep + ithread + 1) % opts.evalStep == 0 || !datasource.hasNext ) {
- val scores = models(ithread).evalbatchg(cmats(ithread), ipass, here)
- reslist.synchronized { reslist.append(scores(0)) }
- samplist.synchronized { samplist.append(here) }
- } else {
- models(ithread).dobatchg(cmats(ithread), ipass, here)
- if (mixins != null && mixins(ithread) != null) mixins(ithread) map (_ compute(cmats(ithread), here))
- if (updaters != null && updaters(ithread) != null) updaters(ithread).update(ipass, here, gprogress)
- }
- } catch {
- case e:Exception => {
- print("Caught exception in thread %d %s\n" format (ithread, e.toString));
- val se = e.getStackTrace();
- for (i <- 0 until 8) {
- println("thread %d, %s" format (ithread, se(i).toString));
- }
- restart(ithread)
- println("Restarted: Keep on truckin...")
- }
- }
- done(ithread) = 1
- }
- }
+ Future {
+ if (useGPU && ithread < Mat.hasCUDA) setGPU(ithread)
+ while (running) {
+ while (done(ithread) == 1) Thread.sleep(1)
+ try {
+ if ((istep + ithread + 1) % opts.evalStep == 0 || !datasource.hasNext ) {
+ val scores = models(ithread).evalbatchg(cmats(ithread), ipass, here)
+ reslist.synchronized { reslist.append(scores(0)) }
+ samplist.synchronized { samplist.append(here) }
+ } else {
+ models(ithread).dobatchg(cmats(ithread), ipass, here)
+ if (mixins != null && mixins(ithread) != null) mixins(ithread) map (_ compute(cmats(ithread), here))
+ if (updaters != null && updaters(ithread) != null) updaters(ithread).update(ipass, here, gprogress)
+ }
+ } catch {
+ case e:Exception => {
+ print("Caught exception in thread %d %s\n" format (ithread, e.toString))
+ val se = e.getStackTrace()
+ for (i <- 0 until 8) {
+ println("thread %d, %s" format (ithread, se(i).toString))
+ }
+ restart(ithread)
+ println("Restarted: Keep on truckin...")
+ }
+ }
+ done(ithread) = 1
+ }
+ }
}
while (ipass < opts.npasses) {
- datasource.reset
+ datasource.reset
istep = 0
lastp = 0f
println("pass=%2d" format ipass)
- while (datasource.hasNext) {
- for (ithread <- 0 until opts.nthreads) {
- if (datasource.hasNext) {
- val mats = datasource.next
+ while (datasource.hasNext) {
+ for (ithread <- 0 until opts.nthreads) {
+ if (datasource.hasNext) {
+ val mats = datasource.next
progress = datasource.progress
gprogress = (ipass + progress)/opts.npasses
- for (j <- 0 until mats.length) {
- cmats(ithread)(j) = safeCopy(mats(j), ithread)
- }
- if (ithread == 0) here += datasource.opts.batchSize
- done(ithread) = 0;
- bytes += mats.map(Learner.numBytes _).reduce(_+_);
- }
- }
- while (mini(done).v == 0) Thread.sleep(1)
- Thread.sleep(opts.coolit)
- istep += opts.nthreads
- if (istep % opts.syncStep == 0) ParLearner.syncmodels(models, mm, um, istep/opts.syncStep, useGPU)
- if (datasource.progress > lastp + opts.pstep) {
- while (datasource.progress > lastp + opts.pstep) lastp += opts.pstep
- val gf = gflop
- if (reslist.length > lasti) {
- print("%5.2f%%, %s, gf=%5.3f, secs=%3.1f, GB=%4.2f, MB/s=%5.2f" format (
- 100f*lastp,
- Learner.scoreSummary(reslist, lasti, reslist.length, opts.cumScore),
- gf._1,
- gf._2,
- bytes*1e-9,
- bytes/gf._2*1e-6))
- if (useGPU) {
- for (i <- 0 until math.min(opts.nthreads, Mat.hasCUDA)) {
- setGPU(i)
- if (i==0) print(", GPUmem=%3.2f" format GPUmem._1) else print(", %3.2f" format GPUmem._1)
- }
- setGPU(thisGPU)
- }
- println
- }
- lasti = reslist.length
- }
+ for (j <- 0 until mats.length) {
+ cmats(ithread)(j) = safeCopy(mats(j), ithread)
+ }
+ if (ithread == 0) here += datasource.opts.batchSize
+ done(ithread) = 0
+ bytes += mats.map(Learner.numBytes _).reduce(_+_)
+ }
+ }
+ while (mini(done).v == 0) Thread.sleep(1)
+ Thread.sleep(opts.coolit)
+ istep += opts.nthreads
+ if (istep % opts.syncStep == 0) ParLearner.syncmodels(models, mm, um, istep/opts.syncStep, useGPU)
+ if (datasource.progress > lastp + opts.pstep) {
+ while (datasource.progress > lastp + opts.pstep) lastp += opts.pstep
+ val gf = gflop
+ if (reslist.length > lasti) {
+ print("%5.2f%%, %s, gf=%5.3f, secs=%3.1f, GB=%4.2f, MB/s=%5.2f" format (
+ 100f*lastp,
+ Learner.scoreSummary(reslist, lasti, reslist.length, opts.cumScore),
+ gf._1,
+ gf._2,
+ bytes*1e-9,
+ bytes/gf._2*1e-6))
+ if (useGPU) {
+ for (i <- 0 until math.min(opts.nthreads, Mat.hasCUDA)) {
+ setGPU(i)
+ if (i==0) print(", GPUmem=%3.2f" format GPUmem._1) else print(", %3.2f" format GPUmem._1)
+ }
+ setGPU(thisGPU)
+ }
+ println
+ }
+ lasti = reslist.length
+ }
}
for (i <- 0 until opts.nthreads) {
if (useGPU && i < Mat.hasCUDA) setGPU(i);
@@ -429,17 +429,17 @@ case class ParLearner(
ParLearner.syncmodelsPass(models, mm, um, ipass)
ipass += 1
if (opts.resFile != null) {
- saveAs(opts.resFile, Learner.scores2FMat(reslist) on row(samplist.toList), "results")
+ saveAs(opts.resFile, Learner.scores2FMat(reslist) on row(samplist.toList), "results")
}
}
- running = false;
+ running = false
datasource.close
val gf = gflop
Mat.useCache = cacheState
if (useGPU) {
- for (i <- 0 until opts.nthreads) {
- // if (i != thisGPU) disconnect(i);
- }
+ for (i <- 0 until opts.nthreads) {
+ // if (i != thisGPU) disconnect(i)
+ }
}
if (opts.autoReset && useGPU) {
Learner.toCPU(models(0).modelmats)
@@ -495,7 +495,7 @@ case class ParLearnerx(
val mixins:Array[Array[Mixin]],
val updaters:Array[Updater],
val datasinks:Array[DataSink],
- val opts:ParLearner.Options = new ParLearner.Options) extends Serializable {
+ val opts:ParLearner.Options = new ParLearner.Options) extends Serializable {
var um:Array[Mat] = null
var mm:Array[Mat] = null
@@ -503,30 +503,30 @@ case class ParLearnerx(
var useGPU = false
def setup = {
- for (i <- 0 until opts.nthreads) {
- Learner.setupPB(datasources(i), datasources(i).opts.putBack, models(i).opts.dim)
- }
+ for (i <- 0 until opts.nthreads) {
+ Learner.setupPB(datasources(i), datasources(i).opts.putBack, models(i).opts.dim)
+ }
}
def init = {
val thisGPU = if (Mat.hasCUDA > 0) getGPU else 0
- for (i <- 0 until opts.nthreads) {
- if (i < Mat.hasCUDA) setGPU(i)
- datasources(i).init
- models(i).bind(datasources(i))
- models(i).init
- if (mixins != null) mixins(i) map(_ init(models(i)))
- updaters(i).init(models(i))
- }
- useGPU = models(0).useGPU
- if (Mat.hasCUDA > 0) setGPU(thisGPU)
- val mml = models(0).modelmats.length
+ for (i <- 0 until opts.nthreads) {
+ if (i < Mat.hasCUDA) setGPU(i)
+ datasources(i).init
+ models(i).bind(datasources(i))
+ models(i).init
+ if (mixins != null) mixins(i) map(_ init(models(i)))
+ updaters(i).init(models(i))
+ }
+ useGPU = models(0).useGPU
+ if (Mat.hasCUDA > 0) setGPU(thisGPU)
+ val mml = models(0).modelmats.length
um = new Array[Mat](mml)
mm = new Array[Mat](mml)
for (i <- 0 until mml) {
- val mm0 = models(0).modelmats(i)
- mm(i) = zeros(mm0.nrows, mm0.ncols)
- um(i) = zeros(mm0.nrows, mm0.ncols)
+ val mm0 = models(0).modelmats(i)
+ mm(i) = zeros(mm0.nrows, mm0.ncols)
+ um(i) = zeros(mm0.nrows, mm0.ncols)
}
}
@@ -537,137 +537,137 @@ case class ParLearnerx(
}
def retrain() = {
- flip
- var cacheState = Mat.useCache
+ flip
+ var cacheState = Mat.useCache
Mat.useCache = opts.useCache
- val thisGPU = if (useGPU) getGPU else 0
- if (useGPU) {
- for (i <- 0 until opts.nthreads) {
- if (i != thisGPU) connect(i)
- }
- }
+ val thisGPU = if (useGPU) getGPU else 0
+ if (useGPU) {
+ for (i <- 0 until opts.nthreads) {
+ if (i != thisGPU) connect(i)
+ }
+ }
- @volatile var done = izeros(opts.nthreads, 1)
- var ipass = 0
- var istep0 = 0L
- var ilast0 = 0L
- var bytes = 0L
- val reslist = new ListBuffer[FMat]
- val samplist = new ListBuffer[Float]
- var lastp = 0f
- var lasti = 0
+ @volatile var done = izeros(opts.nthreads, 1)
+ var ipass = 0
+ var istep0 = 0L
+ var ilast0 = 0L
+ var bytes = 0L
+ val reslist = new ListBuffer[FMat]
+ val samplist = new ListBuffer[Float]
+ var lastp = 0f
+ var lasti = 0
var gprogress = 0f
- done.clear
- for (ithread <- 0 until opts.nthreads) {
- Future {
- if (useGPU && ithread < Mat.hasCUDA) setGPU(ithread)
- var here = 0L
- updaters(ithread).clear
- while (done(ithread) < opts.npasses) {
- var istep = 0
- while (datasources(ithread).hasNext) {
- val mats = datasources(ithread).next
- here += datasources(ithread).opts.batchSize
- bytes += mats.map(Learner.numBytes _).reduce(_+_);
+ done.clear
+ for (ithread <- 0 until opts.nthreads) {
+ Future {
+ if (useGPU && ithread < Mat.hasCUDA) setGPU(ithread)
+ var here = 0L
+ updaters(ithread).clear
+ while (done(ithread) < opts.npasses) {
+ var istep = 0
+ while (datasources(ithread).hasNext) {
+ val mats = datasources(ithread).next
+ here += datasources(ithread).opts.batchSize
+ bytes += mats.map(Learner.numBytes _).reduce(_+_)
gprogress = (dsProgress + ipass)/opts.npasses
- models(0).synchronized {
- istep += 1
- istep0 += 1
- }
- try {
- if (istep % opts.evalStep == 0) {
- val scores = models(ithread).synchronized {models(ithread).evalbatchg(mats, ipass, here)}
- reslist.synchronized { reslist.append(scores) }
- samplist.synchronized { samplist.append(here) }
- } else {
- models(ithread).synchronized {
- models(ithread).dobatchg(mats, ipass, here)
- if (mixins != null && mixins(ithread) != null) mixins(ithread) map (_ compute(mats, here))
- updaters(ithread).update(ipass, here, gprogress)
- }
- }
- } catch {
- case e:Exception => {
- print("Caught exception in thread %d %s\nTrying restart..." format (ithread, e.toString))
- restart(ithread)
- println("Keep on truckin...")
- }
- }
- if (useGPU) Thread.sleep(opts.coolit)
- if (datasources(ithread).opts.putBack >= 0) datasources(ithread).putBack(mats, datasources(ithread).opts.putBack)
-// if (istep % (opts.syncStep/opts.nthreads) == 0) syncmodel(models, ithread)
- }
- models(ithread).synchronized { updaters(ithread).updateM(ipass) }
- done(ithread) += 1
- while (done(ithread) > ipass) Thread.sleep(1)
- }
- }
- }
- println("pass=%2d" format ipass)
- while (ipass < opts.npasses) {
- while (mini(done).v == ipass) {
- if (istep0 >= ilast0 + opts.syncStep) {
- ParLearner.syncmodels(models, mm, um, istep0/opts.syncStep, useGPU)
- ilast0 += opts.syncStep
- }
- if (dsProgress > lastp + opts.pstep) {
- while (dsProgress > lastp + opts.pstep) lastp += opts.pstep
- val gf = gflop
- if (reslist.length > lasti) {
- print("%5.2f%%, %s, gf=%5.3f, secs=%3.1f, GB=%4.2f, MB/s=%5.2f" format (
- 100f*lastp,
- reslist.synchronized {
- Learner.scoreSummary(reslist, lasti, reslist.length)
- },
- gf._1,
- gf._2,
- bytes*1e-9,
- bytes/gf._2*1e-6))
- if (useGPU) {
- for (i <- 0 until math.min(opts.nthreads, Mat.hasCUDA)) {
- setGPU(i)
- if (i==0) print(", GPUmem=%3.2f" format GPUmem._1) else print(", %3.2f" format GPUmem._1)
- }
- setGPU(thisGPU)
- }
- println
- }
- lasti = reslist.length
- } else {
- Thread.sleep(1)
- }
- }
- lastp = 0f
- if (ipass < opts.npasses) {
- for (i <- 0 until opts.nthreads) datasources(i).reset
- println("pass=%2d" format ipass+1)
- }
- if (opts.resFile != null) {
- saveAs(opts.resFile, Learner.scores2FMat(reslist) on row(samplist.toList), "results")
+ models(0).synchronized {
+ istep += 1
+ istep0 += 1
+ }
+ try {
+ if (istep % opts.evalStep == 0) {
+ val scores = models(ithread).synchronized {models(ithread).evalbatchg(mats, ipass, here)}
+ reslist.synchronized { reslist.append(scores) }
+ samplist.synchronized { samplist.append(here) }
+ } else {
+ models(ithread).synchronized {
+ models(ithread).dobatchg(mats, ipass, here)
+ if (mixins != null && mixins(ithread) != null) mixins(ithread) map (_ compute(mats, here))
+ updaters(ithread).update(ipass, here, gprogress)
+ }
+ }
+ } catch {
+ case e:Exception => {
+ print("Caught exception in thread %d %s\nTrying restart..." format (ithread, e.toString))
+ restart(ithread)
+ println("Keep on truckin...")
+ }
+ }
+ if (useGPU) Thread.sleep(opts.coolit)
+ if (datasources(ithread).opts.putBack >= 0) datasources(ithread).putBack(mats, datasources(ithread).opts.putBack)
+// if (istep % (opts.syncStep/opts.nthreads) == 0) syncmodel(models, ithread)
+ }
+ models(ithread).synchronized { updaters(ithread).updateM(ipass) }
+ done(ithread) += 1
+ while (done(ithread) > ipass) Thread.sleep(1)
+ }
+ }
+ }
+ println("pass=%2d" format ipass)
+ while (ipass < opts.npasses) {
+ while (mini(done).v == ipass) {
+ if (istep0 >= ilast0 + opts.syncStep) {
+ ParLearner.syncmodels(models, mm, um, istep0/opts.syncStep, useGPU)
+ ilast0 += opts.syncStep
+ }
+ if (dsProgress > lastp + opts.pstep) {
+ while (dsProgress > lastp + opts.pstep) lastp += opts.pstep
+ val gf = gflop
+ if (reslist.length > lasti) {
+ print("%5.2f%%, %s, gf=%5.3f, secs=%3.1f, GB=%4.2f, MB/s=%5.2f" format (
+ 100f*lastp,
+ reslist.synchronized {
+ Learner.scoreSummary(reslist, lasti, reslist.length)
+ },
+ gf._1,
+ gf._2,
+ bytes*1e-9,
+ bytes/gf._2*1e-6))
+ if (useGPU) {
+ for (i <- 0 until math.min(opts.nthreads, Mat.hasCUDA)) {
+ setGPU(i)
+ if (i==0) print(", GPUmem=%3.2f" format GPUmem._1) else print(", %3.2f" format GPUmem._1)
+ }
+ setGPU(thisGPU)
+ }
+ println
+ }
+ lasti = reslist.length
+ } else {
+ Thread.sleep(1)
+ }
+ }
+ lastp = 0f
+ if (ipass < opts.npasses) {
+ for (i <- 0 until opts.nthreads) datasources(i).reset
+ println("pass=%2d" format ipass+1)
+ }
+ if (opts.resFile != null) {
+ saveAs(opts.resFile, Learner.scores2FMat(reslist) on row(samplist.toList), "results")
}
- ipass += 1
- }
- val gf = gflop
- Mat.useCache = cacheState
- println("Time=%5.4f secs, gflops=%4.2f, MB/s=%5.2f, GB=%5.2f" format (gf._2, gf._1, bytes/gf._2*1e-6, bytes*1e-9))
- if (opts.autoReset && useGPU) {
- Learner.toCPU(modelmats)
- resetGPUs
- }
- for (ithread <- 0 until opts.nthreads) datasources(ithread).close
- results = Learner.scores2FMat(reslist) on row(samplist.toList)
+ ipass += 1
+ }
+ val gf = gflop
+ Mat.useCache = cacheState
+ println("Time=%5.4f secs, gflops=%4.2f, MB/s=%5.2f, GB=%5.2f" format (gf._2, gf._1, bytes/gf._2*1e-6, bytes*1e-9))
+ if (opts.autoReset && useGPU) {
+ Learner.toCPU(modelmats)
+ resetGPUs
+ }
+ for (ithread <- 0 until opts.nthreads) datasources(ithread).close
+ results = Learner.scores2FMat(reslist) on row(samplist.toList)
}
def syncmodel(models:Array[Model], ithread:Int) = {
- mm.synchronized {
- for (i <- 0 until models(ithread).modelmats.length) {
- um(i) <-- models(ithread).modelmats(i)
- um(i) ~ um(i) *@ (1f/opts.nthreads)
- mm(i) ~ mm(i) *@ (1 - 1f/opts.nthreads)
- mm(i) ~ mm(i) + um(i)
- models(ithread).modelmats(i) <-- mm(i)
- }
- }
+ mm.synchronized {
+ for (i <- 0 until models(ithread).modelmats.length) {
+ um(i) <-- models(ithread).modelmats(i)
+ um(i) ~ um(i) *@ (1f/opts.nthreads)
+ mm(i) ~ mm(i) *@ (1 - 1f/opts.nthreads)
+ mm(i) ~ mm(i) + um(i)
+ models(ithread).modelmats(i) <-- mm(i)
+ }
+ }
}
def restart(ithread:Int) = {
@@ -678,7 +678,7 @@ case class ParLearnerx(
models(ithread).bind(datasources(ithread))
models(ithread).init
for (i <- 0 until models(ithread).modelmats.length) {
- models(ithread).modelmats(i) <-- mm(i)
+ models(ithread).modelmats(i) <-- mm(i)
}
updaters(ithread).init(models(ithread))
}
@@ -703,18 +703,18 @@ case class ParLearnerx(
class ParLearnerxF(
dopts:DataSource.Opts,
- ddfun:(DataSource.Opts, Int)=>DataSource,
- mopts:Model.Opts,
- mkmodel:(Model.Opts)=>Model,
- ropts:Mixin.Opts,
- mkreg:(Mixin.Opts)=>Array[Mixin],
- uopts:Updater.Opts,
- mkupdater:(Updater.Opts)=>Updater,
- sopts:DataSink.Opts,
- ssfun:(DataSink.Opts, Int)=>DataSink,
- val lopts:ParLearner.Options = new ParLearner.Options) extends Serializable {
+ ddfun:(DataSource.Opts, Int)=>DataSource,
+ mopts:Model.Opts,
+ mkmodel:(Model.Opts)=>Model,
+ ropts:Mixin.Opts,
+ mkreg:(Mixin.Opts)=>Array[Mixin],
+ uopts:Updater.Opts,
+ mkupdater:(Updater.Opts)=>Updater,
+ sopts:DataSink.Opts,
+ ssfun:(DataSink.Opts, Int)=>DataSink,
+ val lopts:ParLearner.Options = new ParLearner.Options) extends Serializable {
- var dds:Array[DataSource] = null;
+ var dds:Array[DataSource] = null
var sss:Array[DataSink] = null
var models:Array[Model] = null
var mixins:Array[Array[Mixin]] = null
@@ -722,18 +722,18 @@ class ParLearnerxF(
var learner:ParLearnerx = null
def setup = {
- dds = new Array[DataSource](lopts.nthreads);
- sss = new Array[DataSink](lopts.nthreads);
- models = new Array[Model](lopts.nthreads);
+ dds = new Array[DataSource](lopts.nthreads)
+ sss = new Array[DataSink](lopts.nthreads)
+ models = new Array[Model](lopts.nthreads)
if (mkreg != null) mixins = new Array[Array[Mixin]](lopts.nthreads)
updaters = new Array[Updater](lopts.nthreads)
val thisGPU = if (Mat.hasCUDA > 0) getGPU else 0
for (i <- 0 until lopts.nthreads) {
if (mopts.useGPU && i < Mat.hasCUDA) setGPU(i)
- dds(i) = ddfun(dopts, i)
- models(i) = mkmodel(mopts)
- if (mkreg != null) mixins(i) = mkreg(ropts)
- updaters(i) = mkupdater(uopts)
+ dds(i) = ddfun(dopts, i)
+ models(i) = mkmodel(mopts)
+ if (mkreg != null) mixins(i) = mkreg(ropts)
+ updaters(i) = mkupdater(uopts)
}
if (0 < Mat.hasCUDA) setGPU(thisGPU)
learner = new ParLearnerx(dds, models, mixins, updaters, sss, lopts)
@@ -755,16 +755,16 @@ class ParLearnerxF(
*/
class ParLearnerF(
- val ds:DataSource,
- val mopts:Model.Opts,
- mkmodel:(Model.Opts)=>Model,
- ropts:Mixin.Opts,
- mkreg:(Mixin.Opts)=>Array[Mixin],
- val uopts:Updater.Opts,
- mkupdater:(Updater.Opts)=>Updater,
- val sopts:DataSink.Opts,
- val ss:DataSink,
- val lopts:ParLearner.Options = new ParLearner.Options) extends Serializable {
+ val ds:DataSource,
+ val mopts:Model.Opts,
+ mkmodel:(Model.Opts)=>Model,
+ ropts:Mixin.Opts,
+ mkreg:(Mixin.Opts)=>Array[Mixin],
+ val uopts:Updater.Opts,
+ mkupdater:(Updater.Opts)=>Updater,
+ val sopts:DataSink.Opts,
+ val ss:DataSink,
+ val lopts:ParLearner.Options = new ParLearner.Options) extends Serializable {
var models:Array[Model] = null
var mixins:Array[Array[Mixin]] = null
var updaters:Array[Updater] = null
@@ -777,16 +777,16 @@ class ParLearnerF(
val thisGPU = if (Mat.hasCUDA > 0) getGPU else 0
for (i <- 0 until lopts.nthreads) {
if (mopts.useGPU && i < Mat.hasCUDA) setGPU(i)
- models(i) = mkmodel(mopts)
- if (mkreg != null) mixins(i) = mkreg(ropts)
- if (mkupdater != null) updaters(i) = mkupdater(uopts)
+ models(i) = mkmodel(mopts)
+ if (mkreg != null) mixins(i) = mkreg(ropts)
+ if (mkupdater != null) updaters(i) = mkupdater(uopts)
}
if (0 < Mat.hasCUDA) setGPU(thisGPU)
learner = new ParLearner(ds, models, mixins, updaters, ss, lopts)
learner.setup
}
- def init = learner.init
+ def init = learner.init
def train = {
setup
@@ -800,27 +800,27 @@ class ParLearnerF(
object Learner {
class Options extends BIDMat.Opts {
- var npasses = 2;
- var evalStep = 11;
- var pstep = 0.01f;
- var resFile:String = null;
- var autoReset = true;
- var useCache = true;
- var updateAll = false;
- var debugMem = false;
- var cumScore = 0;
- var checkPointFile:String = null;
- var checkPointInterval = 0f;
+ var npasses = 2
+ var evalStep = 11
+ var pstep = 0.01f
+ var resFile:String = null
+ var autoReset = true
+ var useCache = true
+ var updateAll = false
+ var debugMem = false
+ var cumScore = 0
+ var checkPointFile:String = null
+ var checkPointInterval = 0f
}
def numBytes(mat:Mat):Long = {
mat match {
- case a:FMat => 4L * mat.length;
- case a:IMat => 4L * mat.length;
- case a:DMat => 8L * mat.length;
- case a:LMat => 8L * mat.length;
- case a:SMat => 8L * mat.nnz;
- case a:SDMat => 12L * mat.nnz;
+ case a:FMat => 4L * mat.length
+ case a:IMat => 4L * mat.length
+ case a:DMat => 8L * mat.length
+ case a:LMat => 8L * mat.length
+ case a:SMat => 8L * mat.nnz
+ case a:SDMat => 12L * mat.nnz
}
}
@@ -842,18 +842,18 @@ object Learner {
def setupPB(ds:DataSource, npb:Int, dim:Int) = {
ds match {
case ddm:MatSource => {
- if (npb >= 0) {
- ddm.setupPutBack(npb, dim)
- }
+ if (npb >= 0) {
+ ddm.setupPutBack(npb, dim)
+ }
}
case _ => {}
}
}
def scoreSummary(reslist:ListBuffer[FMat], lasti:Int, len:Int, cumScore:Int = 0):String = {
- val istart = if (cumScore == 0) lasti else {if (cumScore == 1) 0 else if (cumScore == 2) len/2 else 3*len/4};
+ val istart = if (cumScore == 0) lasti else {if (cumScore == 1) 0 else if (cumScore == 2) len/2 else 3*len/4}
var i = 0
- var sum = 0.0;
+ var sum = 0.0
for (scoremat <- reslist) {
if (i >= istart) sum += mean(scoremat(?,0)).v
i += 1
@@ -863,7 +863,7 @@ object Learner {
def scores2FMat(reslist:ListBuffer[FMat]):FMat = {
val out = FMat(reslist(0).nrows, reslist.length)
- var i = 0;
+ var i = 0
while (i < reslist.length) {
val scoremat = reslist(i)
out(?, i) = scoremat(?,0)
@@ -877,17 +877,17 @@ object ParLearner {
class Options extends
Learner.Options {
- var nthreads = math.max(0, Mat.hasCUDA)
- var syncStep = 32
- var coolit = 60
+ var nthreads = math.max(0, Mat.hasCUDA)
+ var syncStep = 32
+ var coolit = 60
}
def syncmodelsPass(models:Array[Model], mm:Array[Mat], um:Array[Mat], ipass:Int) = {
- models(0).mergeModelPassFn(models, mm, um, ipass);
+ models(0).mergeModelPassFn(models, mm, um, ipass)
}
def syncmodels(models:Array[Model], mm:Array[Mat], um:Array[Mat], istep:Long, useGPU:Boolean) = {
- models(0).mergeModelFn(models, mm, um, istep);
+ models(0).mergeModelFn(models, mm, um, istep)
}
}
diff --git a/src/main/scala/BIDMach/Logging.scala b/src/main/scala/BIDMach/Logging.scala
index 227df348..eeb7dc85 100644
--- a/src/main/scala/BIDMach/Logging.scala
+++ b/src/main/scala/BIDMach/Logging.scala
@@ -1,38 +1,36 @@
-package BIDMach
-import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,HMat,GDMat,GLMat,GMat,GIMat,GSDMat,GSMat,LMat,SMat,SDMat,TMat}
-import BIDMat.MatFunctions._
-import BIDMat.SciFunctions._
-import BIDMat.Plotting._
-import BIDMach.models._
-import BIDMach.datasinks._
-
-
-object Logging{
- def logGradientL2Norm(model:Model,data:Array[Mat]):Array[Mat] = {
- val m = model.modelmats
- val res = new Array[Float](m.length)
- for(i<-0 until m.length){
- res(i) = sum(snorm(m(i))).dv.toFloat
- }
- Array(FMat(m.length,1,res))
- }
-
- def logGradientL1Norm(model:Model,data:Array[Mat]):Array[Mat] = {
- val m = model.modelmats
- val res = new Array[Float](m.length)
- for(i<-0 until m.length){
- res(i) = sum(sum(abs(m(i)))).dv.toFloat
- }
- Array(FMat(m.length,1,res))
- }
-
- def getResults(model:Model): Array[Mat] = {
- model.opts.logDataSink match {
- case f:FileSink=>{println("Found results at "+f.opts.ofnames.head(0));null}
- case m:MatSink=>m.mats
- case null=>{println("No logDataSink found");null}
- }
- }
-
- def getResults(l:Learner): Array[Mat] = getResults(l.model)
-}
+package BIDMach
+import BIDMach.datasinks._
+import BIDMach.models._
+import BIDMat.SciFunctions._
+import BIDMat.{FMat, Mat}
+
+
+object Logging{
+ def logGradientL2Norm(model:Model,data:Array[Mat]):Array[Mat] = {
+ val m = model.modelmats
+ val res = new Array[Float](m.length)
+ for(i<-0 until m.length){
+ res(i) = sum(snorm(m(i))).dv.toFloat
+ }
+ Array(FMat(m.length,1,res))
+ }
+
+ def logGradientL1Norm(model:Model,data:Array[Mat]):Array[Mat] = {
+ val m = model.modelmats
+ val res = new Array[Float](m.length)
+ for(i<-0 until m.length){
+ res(i) = sum(sum(abs(m(i)))).dv.toFloat
+ }
+ Array(FMat(m.length,1,res))
+ }
+
+ def getResults(model:Model): Array[Mat] = {
+ model.opts.logDataSink match {
+ case f:FileSink=>{println("Found results at "+f.opts.ofnames.head(0));null}
+ case m:MatSink=>m.mats
+ case null=>{println("No logDataSink found");null}
+ }
+ }
+
+ def getResults(l:Learner): Array[Mat] = getResults(l.model)
+}
diff --git a/src/main/scala/BIDMach/allreduce/Command.scala b/src/main/scala/BIDMach/allreduce/Command.scala
index 73a1944b..ce0bbee2 100644
--- a/src/main/scala/BIDMach/allreduce/Command.scala
+++ b/src/main/scala/BIDMach/allreduce/Command.scala
@@ -1,260 +1,246 @@
package BIDMach.allreduce
-import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,HMat,GDMat,GLMat,GMat,GIMat,GSDMat,GSMat,LMat,SMat,SDMat}
+import java.io.{ByteArrayOutputStream, PrintStream}
+import java.nio.ByteBuffer
+
+import BIDMat.IMat
import BIDMat.MatFunctions._
-import BIDMat.SciFunctions._
-import edu.berkeley.bid.comm._
-import scala.collection.parallel._
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.net.ServerSocket;
-import java.net.Socket;
-import java.net.InetSocketAddress;
-import java.net.SocketException;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.PrintStream;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.FloatBuffer;
-import java.nio.IntBuffer;
class Command(val ctype:Int, val dest0:Int, val clen:Int, val bytes:Array[Byte]) {
- val magic = Command.magic;
- var dest = dest0;
- val byteData = ByteBuffer.wrap(bytes);
- val intData = byteData.asIntBuffer;
- val floatData = byteData.asFloatBuffer;
- val longData = byteData.asLongBuffer;
+ val magic = Command.magic
+ var dest = dest0
+ val byteData = ByteBuffer.wrap(bytes)
+ val intData = byteData.asIntBuffer
+ val floatData = byteData.asFloatBuffer
+ val longData = byteData.asLongBuffer
def encode() = {}
def decode() = {}
- def this(ctype0:Int, dest0:Int, clen0:Int) = this(ctype0, dest0, clen0, new Array[Byte](4*clen0));
+ def this(ctype0:Int, dest0:Int, clen0:Int) = this(ctype0, dest0, clen0, new Array[Byte](4*clen0))
override def toString():String = {
- "Command %s, length %d bytes" format (Command.names(ctype), clen*4);
+ "Command %s, length %d bytes" format (Command.names(ctype), clen*4)
}
}
object Command {
- val magic = 0xa6b38734;
- final val configCtype = 1;
- final val permuteCtype = 2;
- final val allreduceCtype = 3;
- final val permuteAllreduceCtype = 4;
- final val setMachineCtype = 5;
- final val startLearnerCtype = 6;
- final val names = Array[String]("", "config", "permute", "allreduce", "permuteAllreduce", "setMachine", "startLearner");
-
-
+ val magic = 0xa6b38734
+ final val configCtype = 1
+ final val permuteCtype = 2
+ final val allreduceCtype = 3
+ final val permuteAllreduceCtype = 4
+ final val setMachineCtype = 5
+ final val startLearnerCtype = 6
+ final val names = Array[String]("", "config", "permute", "allreduce", "permuteAllreduce", "setMachine", "startLearner")
+
+
def toAddress(v:Int):String = {
- val p0 = (v >> 24) & 255;
- val p1 = (v >> 16) & 255;
- val p2 = (v >> 8) & 255;
- val p3 = v & 255;
- "%d.%d.%d.%d" format(p0,p1,p2,p3);
+ val p0 = (v >> 24) & 255
+ val p1 = (v >> 16) & 255
+ val p2 = (v >> 8) & 255
+ val p3 = v & 255
+ "%d.%d.%d.%d" format(p0,p1,p2,p3)
}
def address(a:Int, b:Int, c:Int, d:Int):Int = {
- d + ((c + ((b + (a << 8)) << 8)) << 8);
+ d + ((c + ((b + (a << 8)) << 8)) << 8)
}
def printStackTrace(e:Exception):String = {
- val baos = new ByteArrayOutputStream();
- val ps = new PrintStream(baos);
- e.printStackTrace(ps);
- val str = baos.toString();
- ps.close();
- str;
+ val baos = new ByteArrayOutputStream()
+ val ps = new PrintStream(baos)
+ e.printStackTrace(ps)
+ val str = baos.toString()
+ ps.close()
+ str
}
}
class ConfigCommand(clen:Int, dest0:Int, bytes:Array[Byte]) extends Command(Command.configCtype, dest0, clen, bytes) {
- var gmods:IMat = null;
- var gridmachines:IMat = null;
- var workerIPs:IMat = null;
+ var gmods:IMat = null
+ var gridmachines:IMat = null
+ var workerIPs:IMat = null
- def this(clen0:Int, dest0:Int) = this(clen0, dest0, new Array[Byte](clen0*4));
+ def this(clen0:Int, dest0:Int) = this(clen0, dest0, new Array[Byte](clen0*4))
def setFields(imach0:Int, gmods0:IMat, gridmachines0:IMat, workerIPs0:IMat) {
- dest = imach0;
- gmods = gmods;
- gridmachines = gridmachines0;
- workerIPs = workerIPs0;
+ dest = imach0
+ gmods = gmods
+ gridmachines = gridmachines0
+ workerIPs = workerIPs0
}
override def encode ():Unit = {
- intData.rewind();
- intData.put(gmods.length);
- intData.put(gmods.data, 0, gmods.length);
- intData.put(gridmachines.length);
- intData.put(gridmachines.data, 0, gridmachines.length);
- intData.put(workerIPs.length);
- intData.put(workerIPs.data, 0, workerIPs.length);
+ intData.rewind()
+ intData.put(gmods.length)
+ intData.put(gmods.data, 0, gmods.length)
+ intData.put(gridmachines.length)
+ intData.put(gridmachines.data, 0, gridmachines.length)
+ intData.put(workerIPs.length)
+ intData.put(workerIPs.data, 0, workerIPs.length)
}
override def decode():Unit = {
- intData.rewind();
- val lgmods = intData.get();
- gmods = izeros(lgmods,1);
- intData.get(gmods.data, 0, lgmods);
- val lgm = intData.get();
- gridmachines = izeros(lgm, 1);
- intData.get(gridmachines.data, 0, lgm);
- val lwips = intData.get();
- workerIPs = izeros(lwips, 1);
+ intData.rewind()
+ val lgmods = intData.get()
+ gmods = izeros(lgmods,1)
+ intData.get(gmods.data, 0, lgmods)
+ val lgm = intData.get()
+ gridmachines = izeros(lgm, 1)
+ intData.get(gridmachines.data, 0, lgm)
+ val lwips = intData.get()
+ workerIPs = izeros(lwips, 1)
intData.get(workerIPs.data, 0, lwips);
}
override def toString():String = {
- var ostring = new StringBuilder("Command %s, length %d words" format (Command.names(ctype), clen));
+ var ostring = new StringBuilder("Command %s, length %d words" format (Command.names(ctype), clen))
ostring.append("\nGroups: ")
for (i <- 0 until gmods.length) {
- ostring.append("%d " format gmods(i));
+ ostring.append("%d " format gmods(i))
}
- ostring.append("\nGridmachines: ");
+ ostring.append("\nGridmachines: ")
for (i <- 0 until math.min(20, gridmachines.length)) {
- ostring.append("%d " format gridmachines(i));
+ ostring.append("%d " format gridmachines(i))
}
- ostring.append("\nWorkerIPs: ");
+ ostring.append("\nWorkerIPs: ")
for (i <- 0 until math.min(20, gridmachines.length)) {
- ostring.append("%s " format Command.toAddress(workerIPs(i)));
+ ostring.append("%s " format Command.toAddress(workerIPs(i)))
}
ostring.append("\n")
- ostring.toString;
+ ostring.toString
}
}
class PermuteCommand(dest0:Int, bytes:Array[Byte]) extends Command(Command.permuteCtype, dest0, 2, bytes) {
- var seed:Long = 0;
+ var seed:Long = 0
- def this(dest0:Int) = this(dest0, new Array[Byte](2*4));
+ def this(dest0:Int) = this(dest0, new Array[Byte](2*4))
def setFields(seed0:Long) {
- seed = seed0;
+ seed = seed0
}
override def encode ():Unit = {
- longData.rewind();
- longData.put(seed);
+ longData.rewind()
+ longData.put(seed)
}
override def decode():Unit = {
- longData.rewind();
+ longData.rewind()
seed = longData.get();
}
override def toString():String = {
- "Command %s, length %d words, seed %d" format (Command.names(ctype), clen, seed);
+ "Command %s, length %d words, seed %d" format (Command.names(ctype), clen, seed)
}
}
class SetMachineCommand(dest0:Int, newdest0:Int, bytes:Array[Byte]) extends Command(Command.setMachineCtype, dest0, 1, bytes) {
- dest = dest0;
- var newdest = newdest0;
+ dest = dest0
+ var newdest = newdest0
- def this(dest0:Int, newdest0:Int) = this(dest0, newdest0, new Array[Byte](1*4));
+ def this(dest0:Int, newdest0:Int) = this(dest0, newdest0, new Array[Byte](1*4))
override def encode ():Unit = {
- intData.rewind();
- intData.put(newdest);
+ intData.rewind()
+ intData.put(newdest)
}
override def decode():Unit = {
- intData.rewind();
+ intData.rewind()
newdest = intData.get();
}
override def toString():String = {
- "Command %s, length %d words, machine %d newdest %d" format (Command.names(ctype), clen, dest, newdest);
+ "Command %s, length %d words, machine %d newdest %d" format (Command.names(ctype), clen, dest, newdest)
}
}
class StartLearnerCommand(dest0:Int, bytes:Array[Byte]) extends Command(Command.startLearnerCtype, dest0, 1, bytes) {
- dest = dest0;
+ dest = dest0
- def this(dest0:Int) = this(dest0, new Array[Byte](1*4));
+ def this(dest0:Int) = this(dest0, new Array[Byte](1*4))
override def encode ():Unit = {
- intData.rewind();
- intData.put(dest);
+ intData.rewind()
+ intData.put(dest)
}
override def decode():Unit = {
}
override def toString():String = {
- "Command %s, length %d words, machine %d" format (Command.names(ctype), clen, dest);
+ "Command %s, length %d words, machine %d" format (Command.names(ctype), clen, dest)
}
}
class AllreduceCommand(dest0:Int, bytes:Array[Byte]) extends Command(Command.allreduceCtype, dest0, 4, bytes) {
- var round:Int = 0;
- var limit:Long = 0;
+ var round:Int = 0
+ var limit:Long = 0
- def this(dest0:Int) = this(dest0, new Array[Byte](4*4));
+ def this(dest0:Int) = this(dest0, new Array[Byte](4*4))
def setFields(round0:Int, limit0:Long) {
- round = round0;
- limit = limit0;
+ round = round0
+ limit = limit0
}
override def encode():Unit = {
- longData.rewind();
- longData.put(round);
- longData.put(limit);
+ longData.rewind()
+ longData.put(round)
+ longData.put(limit)
}
override def decode():Unit = {
- longData.rewind();
- round = longData.get().toInt;
- limit = longData.get();
+ longData.rewind()
+ round = longData.get().toInt
+ limit = longData.get()
}
override def toString():String = {
- "Command %s, length %d words, round %d limit %d" format (Command.names(ctype), clen, round, limit);
+ "Command %s, length %d words, round %d limit %d" format (Command.names(ctype), clen, round, limit)
}
}
class PermuteAllreduceCommand(dest0:Int, bytes:Array[Byte]) extends Command(Command.permuteAllreduceCtype, dest0, 6, bytes) {
- def this(dest0:Int) = this(dest0, new Array[Byte](6*4));
+ def this(dest0:Int) = this(dest0, new Array[Byte](6*4))
- var seed:Long = 0;
- var round:Int = 0;
- var limit:Long = 0;
+ var seed:Long = 0
+ var round:Int = 0
+ var limit:Long = 0
def setFields(round0:Int, seed0:Long, limit0:Long) {
- round = round0;
- seed = seed0;
- limit = limit0;
+ round = round0
+ seed = seed0
+ limit = limit0
}
override def encode():Unit = {
- longData.rewind();
- longData.put(round);
- longData.put(seed);
- longData.put(limit);
+ longData.rewind()
+ longData.put(round)
+ longData.put(seed)
+ longData.put(limit)
}
override def decode():Unit = {
- longData.rewind();
- round = longData.get().toInt;
- seed = longData.get();
- limit = longData.get();
+ longData.rewind()
+ round = longData.get().toInt
+ seed = longData.get()
+ limit = longData.get()
}
override def toString():String = {
- "Command %s, length %d words, round %d seed %d limit %d" format (Command.names(ctype), clen, round, seed, limit);
+ "Command %s, length %d words, round %d seed %d limit %d" format (Command.names(ctype), clen, round, seed, limit)
}
}
diff --git a/src/main/scala/BIDMach/allreduce/Master.scala b/src/main/scala/BIDMach/allreduce/Master.scala
index fd0bbed1..88d9e587 100644
--- a/src/main/scala/BIDMach/allreduce/Master.scala
+++ b/src/main/scala/BIDMach/allreduce/Master.scala
@@ -5,259 +5,259 @@ import BIDMat.MatFunctions._
import BIDMat.SciFunctions._
import edu.berkeley.bid.comm._
import scala.collection.parallel._
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.net.ServerSocket;
-import java.net.Socket;
-import java.net.SocketException;
-import java.net.InetSocketAddress;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
+import java.util.concurrent.ExecutorService
+import java.util.concurrent.Executors
+import java.util.concurrent.Future
+import java.net.ServerSocket
+import java.net.Socket
+import java.net.SocketException
+import java.net.InetSocketAddress
+import java.io.DataInputStream
+import java.io.DataOutputStream
+import java.io.IOException
class Master(val opts:Master.Opts = new Master.Options) extends Serializable {
- var M = 0;
- var gmods:IMat = null;
- var gridmachines:IMat = null;
- var workerIPs:IMat = null
- var executor:ExecutorService = null;
- var reduceTask:Future[_] = null;
- var reducer:Reducer = null
- var sendTiming = false
+ var M = 0
+ var gmods:IMat = null
+ var gridmachines:IMat = null
+ var workerIPs:IMat = null
+ var executor:ExecutorService = null
+ var reduceTask:Future[_] = null
+ var reducer:Reducer = null
+ var sendTiming = false
-
- def init() {
- executor = Executors.newFixedThreadPool(opts.numThreads);
- }
-
- def readConfig(configDir:String) {
- val clengths = loadIMat(configDir + "dims.imat.lz4");
- val allgmods = loadIMat(configDir + "gmods.imat.lz4");
- val allmachinecodes = loadIMat(configDir + "machines.imat.lz4");
- gmods = allgmods(0->clengths(M-1), M-1);
- gridmachines = allmachinecodes(0->M, M-1);
- }
+
+ def init() {
+ executor = Executors.newFixedThreadPool(opts.numThreads)
+ }
+
+ def readConfig(configDir:String) {
+ val clengths = loadIMat(configDir + "dims.imat.lz4")
+ val allgmods = loadIMat(configDir + "gmods.imat.lz4")
+ val allmachinecodes = loadIMat(configDir + "machines.imat.lz4")
+ gmods = allgmods(0->clengths(M-1), M-1)
+ gridmachines = allmachinecodes(0->M, M-1)
+ }
def config(gmods0:IMat, gridmachines0:IMat, workerIPs0:IMat) {
- gmods = gmods0;
- gridmachines = gridmachines0;
- workerIPs = workerIPs0;
- M = workerIPs.length;
+ gmods = gmods0
+ gridmachines = gridmachines0
+ workerIPs = workerIPs0
+ M = workerIPs.length
}
def sendConfig() {
- val clen = 3 + gmods.length + gridmachines.length + workerIPs.length;
- val cmd = new ConfigCommand(clen, 0);
- cmd.gmods = gmods;
- cmd.gridmachines = gridmachines;
- cmd.workerIPs = workerIPs;
- broadcastCommand(cmd);
+ val clen = 3 + gmods.length + gridmachines.length + workerIPs.length
+ val cmd = new ConfigCommand(clen, 0)
+ cmd.gmods = gmods
+ cmd.gridmachines = gridmachines
+ cmd.workerIPs = workerIPs
+ broadcastCommand(cmd)
}
def permuteNodes(seed:Long) {
- val cmd = new PermuteCommand(0);
- cmd.seed = seed;
- broadcastCommand(cmd);
+ val cmd = new PermuteCommand(0)
+ cmd.seed = seed
+ broadcastCommand(cmd)
}
def startUpdates() {
- reducer = new Reducer();
- reduceTask = executor.submit(reducer);
+ reducer = new Reducer()
+ reduceTask = executor.submit(reducer)
}
def stopUpdates() {
- reducer.stop = true;
+ reducer.stop = true
reduceTask.cancel(true);
}
def startLearners() {
- val cmd = new StartLearnerCommand(0);
- broadcastCommand(cmd);
+ val cmd = new StartLearnerCommand(0)
+ broadcastCommand(cmd)
}
def permuteAllreduce(round:Int, limit:Int) {
- val cmd = new PermuteAllreduceCommand(0);
- cmd.round = round;
- cmd.seed = round;
- cmd.limit = limit;
- broadcastCommand(cmd);
+ val cmd = new PermuteAllreduceCommand(0)
+ cmd.round = round
+ cmd.seed = round
+ cmd.limit = limit
+ broadcastCommand(cmd)
}
def log(msg:String) {
- print(msg);
- }
+ print(msg);
+ }
def broadcastCommand(cmd:Command) {
- cmd.encode;
- if (opts.trace > 2) log("Broadcasting cmd %s\n" format cmd);
- val futures = new Array[Future[_]](M);
- sendTiming = true;
- val timeout = executor.submit(new TimeoutThread(opts.sendTimeout, futures));
- for (imach <- 0 until M) {
- val newcmd = new Command(cmd.ctype, imach, cmd.clen, cmd.bytes);
- futures(imach) = send(newcmd, workerIPs(imach));
- }
- for (imach <- 0 until M) {
- try {
- futures(imach).get()
- } catch {
- case e:Exception => {}
- }
- if (futures(imach).isCancelled()) {
- if (opts.trace > 0) log("Broadcast to machine %d timed out, cmd %s\n" format (imach, cmd));
- }
- }
- sendTiming = false;
- timeout.cancel(true);
+ cmd.encode
+ if (opts.trace > 2) log("Broadcasting cmd %s\n" format cmd)
+ val futures = new Array[Future[_]](M)
+ sendTiming = true
+ val timeout = executor.submit(new TimeoutThread(opts.sendTimeout, futures))
+ for (imach <- 0 until M) {
+ val newcmd = new Command(cmd.ctype, imach, cmd.clen, cmd.bytes)
+ futures(imach) = send(newcmd, workerIPs(imach));
+ }
+ for (imach <- 0 until M) {
+ try {
+ futures(imach).get()
+ } catch {
+ case e:Exception => {}
+ }
+ if (futures(imach).isCancelled()) {
+ if (opts.trace > 0) log("Broadcast to machine %d timed out, cmd %s\n" format (imach, cmd))
+ }
+ }
+ sendTiming = false
+ timeout.cancel(true)
}
def setMachineNumbers {
- if (opts.trace > 2) log("Broadcasting setMachineNumbers\n");
- val futures = new Array[Future[_]](M);
- sendTiming = true;
- val timeout = executor.submit(new TimeoutThread(opts.sendTimeout, futures));
- for (imach <- 0 until M) {
- val cmd = new SetMachineCommand(0, imach);
- cmd.encode
- futures(imach) = send(cmd, workerIPs(imach));
- }
- for (imach <- 0 until M) {
- try {
- futures(imach).get()
- } catch {
- case e:Exception => {}
- }
- if (futures(imach).isCancelled()) {
- if (opts.trace > 0) log("Broadcast to machine %d timed out, cmd setMachineNumbers\n" format (imach));
- }
- }
- sendTiming = false;
- timeout.cancel(true);
+ if (opts.trace > 2) log("Broadcasting setMachineNumbers\n")
+ val futures = new Array[Future[_]](M)
+ sendTiming = true
+ val timeout = executor.submit(new TimeoutThread(opts.sendTimeout, futures))
+ for (imach <- 0 until M) {
+ val cmd = new SetMachineCommand(0, imach)
+ cmd.encode
+ futures(imach) = send(cmd, workerIPs(imach));
+ }
+ for (imach <- 0 until M) {
+ try {
+ futures(imach).get()
+ } catch {
+ case e:Exception => {}
+ }
+ if (futures(imach).isCancelled()) {
+ if (opts.trace > 0) log("Broadcast to machine %d timed out, cmd setMachineNumbers\n" format (imach))
+ }
+ }
+ sendTiming = false
+ timeout.cancel(true)
}
def send(cmd:Command, address:Int):Future[_] = {
- val cw = new CommandWriter(Command.toAddress(address), opts.commandSocketNum, cmd);
- executor.submit(cw);
+ val cw = new CommandWriter(Command.toAddress(address), opts.commandSocketNum, cmd)
+ executor.submit(cw)
}
class CommandWriter(dest:String, socketnum:Int, command:Command) extends Runnable {
- def run() {
- var socket:Socket = null;
- try {
- socket = new Socket();
- socket.setReuseAddress(true);
- socket.connect(new InetSocketAddress(dest, socketnum), opts.sendTimeout);
- if (socket.isConnected()) {
- val ostr = new DataOutputStream(socket.getOutputStream());
- ostr.writeInt(command.magic)
- ostr.writeInt(command.ctype);
- ostr.writeInt(command.dest);
- ostr.writeInt(command.clen);
- ostr.write(command.bytes, 0, command.clen*4);
- }
- } catch {
- case e:Exception =>
- if (opts.trace > 0) {
- log("Master problem sending command %s\n%s\n" format (command.toString, Command.printStackTrace(e)));
- }
- } finally {
- try { if (socket != null) socket.close(); } catch {
- case e:Exception =>
- if (opts.trace > 0) log("Master problem closing socket\n%s\n" format Command.printStackTrace(e));
- }
- }
- }
+ def run() {
+ var socket:Socket = null
+ try {
+ socket = new Socket()
+ socket.setReuseAddress(true)
+ socket.connect(new InetSocketAddress(dest, socketnum), opts.sendTimeout)
+ if (socket.isConnected()) {
+ val ostr = new DataOutputStream(socket.getOutputStream())
+ ostr.writeInt(command.magic)
+ ostr.writeInt(command.ctype)
+ ostr.writeInt(command.dest)
+ ostr.writeInt(command.clen)
+ ostr.write(command.bytes, 0, command.clen*4);
+ }
+ } catch {
+ case e:Exception =>
+ if (opts.trace > 0) {
+ log("Master problem sending command %s\n%s\n" format (command.toString, Command.printStackTrace(e)))
+ }
+ } finally {
+ try { if (socket != null) socket.close(); } catch {
+ case e:Exception =>
+ if (opts.trace > 0) log("Master problem closing socket\n%s\n" format Command.printStackTrace(e));
+ }
+ }
+ }
}
class Reducer() extends Runnable {
- var stop = false;
+ var stop = false
- def run() {
- var round = 0;
- var limit = 0;
- while (!stop) {
- val newlimit0 = if (opts.limitFctn != null) {
- opts.limitFctn(round, opts.limit);
- } else {
- opts.limit;
- }
- limit = if (newlimit0 <= 0) 2000000000 else newlimit0;
- val cmd = if (opts.permuteAlways) {
- val cmd0 = new PermuteAllreduceCommand(0);
- cmd0.round = round;
- cmd0.seed = round;
- cmd0.limit = limit;
- cmd0;
- } else {
- val cmd0 = new AllreduceCommand(0);
- cmd0.round = round;
- cmd0.limit = limit;
- cmd0;
- }
- broadcastCommand(cmd);
- val timems = opts.intervalMsec + (limit * opts.timeScaleMsec).toInt;
- if (opts.trace > 2) log("Sleeping for %d msec\n" format timems);
- Thread.sleep(timems);
- round += 1;
- }
- }
+ def run() {
+ var round = 0
+ var limit = 0
+ while (!stop) {
+ val newlimit0 = if (opts.limitFctn != null) {
+ opts.limitFctn(round, opts.limit)
+ } else {
+ opts.limit
+ }
+ limit = if (newlimit0 <= 0) 2000000000 else newlimit0
+ val cmd = if (opts.permuteAlways) {
+ val cmd0 = new PermuteAllreduceCommand(0)
+ cmd0.round = round
+ cmd0.seed = round
+ cmd0.limit = limit
+ cmd0
+ } else {
+ val cmd0 = new AllreduceCommand(0)
+ cmd0.round = round
+ cmd0.limit = limit
+ cmd0
+ }
+ broadcastCommand(cmd)
+ val timems = opts.intervalMsec + (limit * opts.timeScaleMsec).toInt
+ if (opts.trace > 2) log("Sleeping for %d msec\n" format timems)
+ Thread.sleep(timems)
+ round += 1
+ }
+ }
}
- class TimeoutThread(mtime:Int, futures:Array[Future[_]]) extends Runnable {
- def run() {
- try {
- Thread.sleep(mtime);
- if (sendTiming) {
- for (i <- 0 until futures.length) {
- if (futures(i) != null) {
- if (opts.trace > 0) log("Master cancelling thread %d\n" format i);
- futures(i).cancel(true);
- }
- }
- }
- } catch {
- case e:InterruptedException => if (opts.trace > 3) log("Master interrupted timeout thread %s\n" format Command.printStackTrace(e));
- }
- }
+ class TimeoutThread(mtime:Int, futures:Array[Future[_]]) extends Runnable {
+ def run() {
+ try {
+ Thread.sleep(mtime)
+ if (sendTiming) {
+ for (i <- 0 until futures.length) {
+ if (futures(i) != null) {
+ if (opts.trace > 0) log("Master cancelling thread %d\n" format i)
+ futures(i).cancel(true)
+ }
+ }
+ }
+ } catch {
+ case e:InterruptedException => if (opts.trace > 3) log("Master interrupted timeout thread %s\n" format Command.printStackTrace(e))
+ }
+ }
}
}
object Master {
- trait Opts extends BIDMat.Opts{
- var limit = 0;
- var limitFctn:(Int,Int)=>Int = null;
- var intervalMsec = 1000;
- var timeScaleMsec = 1e-4f;
- var permuteAlways = true;
- var sendTimeout = 1000;
- var recvTimeout = 1000;
- var trace = 0;
- var commandSocketNum = 50050;
- var numThreads = 16;
+ trait Opts extends BIDMat.Opts{
+ var limit = 0
+ var limitFctn:(Int,Int)=>Int = null
+ var intervalMsec = 1000
+ var timeScaleMsec = 1e-4f
+ var permuteAlways = true
+ var sendTimeout = 1000
+ var recvTimeout = 1000
+ var trace = 0
+ var commandSocketNum = 50050
+ var numThreads = 16
}
-
- class Options extends Opts {}
-
- def powerLimit(round:Int, limit:Int, power:Float):Int = {
- if (round < 2) {
- limit
- } else {
- var rnd = round;
- var nzeros = 0;
- while ((rnd & 1) == 0) {
- rnd = (rnd >> 1);
- nzeros += 1;
- }
- (limit * math.pow(2, nzeros*power)).toInt
- }
- }
-
- def powerLimit(round:Int, limit:Int):Int = powerLimit(round, limit, 1f);
-
- var powerLimitFctn = powerLimit(_:Int,_:Int);
+
+ class Options extends Opts {}
+
+ def powerLimit(round:Int, limit:Int, power:Float):Int = {
+ if (round < 2) {
+ limit
+ } else {
+ var rnd = round
+ var nzeros = 0
+ while ((rnd & 1) == 0) {
+ rnd = (rnd >> 1);
+ nzeros += 1
+ }
+ (limit * math.pow(2, nzeros*power)).toInt
+ }
+ }
+
+ def powerLimit(round:Int, limit:Int):Int = powerLimit(round, limit, 1f)
+
+ var powerLimitFctn = powerLimit(_:Int,_:Int)
}
diff --git a/src/main/scala/BIDMach/allreduce/Worker.scala b/src/main/scala/BIDMach/allreduce/Worker.scala
index ff38a8b2..cf87a343 100755
--- a/src/main/scala/BIDMach/allreduce/Worker.scala
+++ b/src/main/scala/BIDMach/allreduce/Worker.scala
@@ -3,278 +3,278 @@ package BIDMach.allreduce
import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,HMat,GDMat,GLMat,GMat,GIMat,GSDMat,GSMat,LMat,SMat,SDMat}
import BIDMat.MatFunctions._
import BIDMat.SciFunctions._
-import BIDMach.Learner;
-import BIDMach.models.Model;
+import BIDMach.Learner
+import BIDMach.models.Model
import edu.berkeley.bid.comm._
import scala.collection.parallel._
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.net.ServerSocket;
-import java.net.Socket;
-import java.net.SocketException;
-import java.io.DataInputStream;
-import java.io.IOException;
+import java.util.concurrent.ExecutorService
+import java.util.concurrent.Executors
+import java.util.concurrent.Future
+import java.net.ServerSocket
+import java.net.Socket
+import java.net.SocketException
+import java.io.DataInputStream
+import java.io.IOException
class Worker(val opts:Worker.Opts = new Worker.Options) extends Serializable {
- var M = 0;
- var imach = 0;
- var gmods:IMat = null;
- var gridmachines:IMat = null;
- var machineIPs:Array[String] = null;
- var groups:Groups = null;
+ var M = 0
+ var imach = 0
+ var gmods:IMat = null
+ var gridmachines:IMat = null;
+ var machineIPs:Array[String] = null
+ var groups:Groups = null
- var executor:ExecutorService = null;
- var listener:CommandListener = null;
- var listenerTask:Future[_] = null;
- var machine:Machine = null;
- var learner:Learner = null;
- var model:Model = null;
-
- def start(learner0:Learner) = {
- learner = learner0;
- if (model == null && learner != null) model = learner.model;
- executor = Executors.newFixedThreadPool(8);
- listener = new CommandListener(opts.commandSocketNum);
- listenerTask = executor.submit(listener);
- }
+ var executor:ExecutorService = null
+ var listener:CommandListener = null
+ var listenerTask:Future[_] = null
+ var machine:Machine = null
+ var learner:Learner = null
+ var model:Model = null
+
+ def start(learner0:Learner) = {
+ learner = learner0
+ if (model == null && learner != null) model = learner.model
+ executor = Executors.newFixedThreadPool(8)
+ listener = new CommandListener(opts.commandSocketNum)
+ listenerTask = executor.submit(listener)
+ }
def config(imach0:Int, gmods0:IMat, gridmachines0:IMat, machineIPs0:IMat) = {
- val t1 = toc;
- imach = imach0;
- gmods = gmods0;
- gridmachines = gridmachines0;
- M = gridmachines.length;
- groups = new Groups(M, gmods.data, gridmachines.data, 0);
- machineIPs = machineIPs0.data.map(Command.toAddress(_));
- if (machine != null) machine.stop;
- machine = new Machine(null, groups, imach, M, opts.useLong, opts.bufsize, false, opts.machineTrace, opts.replicate, machineIPs);
- machine.configTimeout = opts.configTimeout;
- machine.reduceTimeout = opts.reduceTimeout;
- machine.sendTimeout = opts.sendTimeout;
- machine.recvTimeout = opts.recvTimeout;
- machine.sockBase = opts.peerSocketNum;
- machine.sockOffset = 0;
- machine.start(machine.maxk);
+ val t1 = toc
+ imach = imach0
+ gmods = gmods0
+ gridmachines = gridmachines0
+ M = gridmachines.length
+ groups = new Groups(M, gmods.data, gridmachines.data, 0)
+ machineIPs = machineIPs0.data.map(Command.toAddress(_))
+ if (machine != null) machine.stop
+ machine = new Machine(null, groups, imach, M, opts.useLong, opts.bufsize, false, opts.machineTrace, opts.replicate, machineIPs)
+ machine.configTimeout = opts.configTimeout
+ machine.reduceTimeout = opts.reduceTimeout
+ machine.sendTimeout = opts.sendTimeout
+ machine.recvTimeout = opts.recvTimeout
+ machine.sockBase = opts.peerSocketNum
+ machine.sockOffset = 0
+ machine.start(machine.maxk)
val t2 = toc
if (opts.trace > 2) log("Machine config took %4.3f secs\n" format(t2-t1))
}
def permute(seed:Long) = {
- machine.groups.permute(seed.toInt);
+ machine.groups.permute(seed.toInt)
}
def allReduce(round:Int, limit:Long) = {
if (model != null) {
- val t1=toc;
- model.snapshot(limit.toInt, opts.doAvg);
- val sendmat = model.sendmat;
- val indexmat = if (model.indexmat.asInstanceOf[AnyRef] != null) {
- model.indexmat
- } else {
- irow(0 -> sendmat.ncols)
- }
+ val t1=toc
+ model.snapshot(limit.toInt, opts.doAvg)
+ val sendmat = model.sendmat
+ val indexmat = if (model.indexmat.asInstanceOf[AnyRef] != null) {
+ model.indexmat
+ } else {
+ irow(0 -> sendmat.ncols)
+ }
- val result = if (opts.fuseConfigReduce) {
- (indexmat, sendmat) match {
- case (lmat:LMat, fsendmat:FMat) => machine.configReduce(lmat.data, lmat.data, fsendmat.data, sendmat.nrows, round);
- case (imat:IMat, fsendmat:FMat) => machine.configReduce(imat.data, imat.data, fsendmat.data, sendmat.nrows, round);
- }
- } else {
- (indexmat, sendmat) match {
- case (lmat:LMat, fsendmat:FMat) => machine.config(lmat.data, lmat.data, round);
- case (imat:IMat, fsendmat:FMat) => machine.config(imat.data, imat.data, round);
- }
- machine.reduce(sendmat.asInstanceOf[FMat].data, sendmat.nrows, round);
- }
- model.recvmat = new FMat(sendmat.nrows, sendmat.ncols, result);
- model.addStep(limit.toInt, opts.doAvg);
- val t2 = toc;
- val nbytes = indexmat match {
- case im:IMat => math.min(limit, im.length)*(2 + 2*sendmat.nrows)*8f;
- case im:LMat => math.min(limit, im.length)*(4 + 2*sendmat.nrows)*8f;
- }
- if (opts.trace > 2) log("Allreduce %5.2f MB took %5.4f secs at %5.2f MB/sec\n" format (nbytes/1e6f, t2-t1, nbytes/(t2-t1)/1e6f))
+ val result = if (opts.fuseConfigReduce) {
+ (indexmat, sendmat) match {
+ case (lmat:LMat, fsendmat:FMat) => machine.configReduce(lmat.data, lmat.data, fsendmat.data, sendmat.nrows, round)
+ case (imat:IMat, fsendmat:FMat) => machine.configReduce(imat.data, imat.data, fsendmat.data, sendmat.nrows, round)
+ }
+ } else {
+ (indexmat, sendmat) match {
+ case (lmat:LMat, fsendmat:FMat) => machine.config(lmat.data, lmat.data, round)
+ case (imat:IMat, fsendmat:FMat) => machine.config(imat.data, imat.data, round)
+ }
+ machine.reduce(sendmat.asInstanceOf[FMat].data, sendmat.nrows, round)
+ }
+ model.recvmat = new FMat(sendmat.nrows, sendmat.ncols, result)
+ model.addStep(limit.toInt, opts.doAvg)
+ val t2 = toc
+ val nbytes = indexmat match {
+ case im:IMat => math.min(limit, im.length)*(2 + 2*sendmat.nrows)*8f
+ case im:LMat => math.min(limit, im.length)*(4 + 2*sendmat.nrows)*8f
+ }
+ if (opts.trace > 2) log("Allreduce %5.2f MB took %5.4f secs at %5.2f MB/sec\n" format (nbytes/1e6f, t2-t1, nbytes/(t2-t1)/1e6f))
} else {
if (opts.trace > 2) log("Allreduce model is null\n")
}
- }
+ }
def stop = {
- listener.stop = true;
- listenerTask.cancel(true);
- machine.stop;
+ listener.stop = true
+ listenerTask.cancel(true)
+ machine.stop
}
def shutdown = {
- executor.shutdownNow();
- val tt= toc;
+ executor.shutdownNow()
+ val tt= toc
}
def handleCMD(cmd:Command) = {
if (cmd.magic != Command.magic) {
- if (opts.trace > 0) log("Machine %d got message with bad magic number %d\n" format (imach, cmd.magic));
+ if (opts.trace > 0) log("Machine %d got message with bad magic number %d\n" format (imach, cmd.magic))
} else if (cmd.dest != imach) {
- if (opts.trace > 0) log("Machine %d got message with bad destination %d\n" format (imach, cmd.dest));
+ if (opts.trace > 0) log("Machine %d got message with bad destination %d\n" format (imach, cmd.dest))
} else {
- cmd.ctype match {
- case Command.configCtype => {
- val newcmd = new ConfigCommand(cmd.clen, imach, cmd.bytes);
- newcmd.decode;
- if (opts.trace > 2) log("Received %s\n" format newcmd.toString);
- config(newcmd.dest, newcmd.gmods, newcmd.gridmachines, newcmd.workerIPs);
- }
- case Command.permuteCtype => {
- val newcmd = new PermuteCommand(cmd.dest, cmd.bytes);
- newcmd.decode;
- if (opts.trace > 2) log("Received %s\n" format newcmd.toString);
- permute(newcmd.seed);
- }
- case Command.allreduceCtype => {
- val newcmd = new AllreduceCommand(cmd.dest, cmd.bytes);
- newcmd.decode;
- if (opts.trace > 2) log("Received %s\n" format newcmd.toString);
- allReduce(newcmd.round, newcmd.limit);
- }
- case Command.permuteAllreduceCtype => {
- val newcmd = new PermuteAllreduceCommand(cmd.dest, cmd.bytes);
- newcmd.decode;
- if (opts.trace > 2) log("Received %s\n" format newcmd.toString);
- permute(newcmd.seed);
- allReduce(newcmd.round, newcmd.limit);
- }
- case Command.setMachineCtype => {
- val newcmd = new SetMachineCommand(cmd.dest, 0, cmd.bytes);
- newcmd.decode;
- if (opts.trace > 2) log("Received %s\n" format newcmd.toString);
- imach = newcmd.newdest;
- }
- case Command.startLearnerCtype => {
- val newcmd = new StartLearnerCommand(cmd.dest, cmd.bytes);
- newcmd.decode;
- if (opts.trace > 2) log("Received %s\n" format newcmd.toString);
- if (learner != null) {
- learner.paused = false;
- }
- }
- }
+ cmd.ctype match {
+ case Command.configCtype => {
+ val newcmd = new ConfigCommand(cmd.clen, imach, cmd.bytes)
+ newcmd.decode
+ if (opts.trace > 2) log("Received %s\n" format newcmd.toString)
+ config(newcmd.dest, newcmd.gmods, newcmd.gridmachines, newcmd.workerIPs)
+ }
+ case Command.permuteCtype => {
+ val newcmd = new PermuteCommand(cmd.dest, cmd.bytes)
+ newcmd.decode
+ if (opts.trace > 2) log("Received %s\n" format newcmd.toString)
+ permute(newcmd.seed)
+ }
+ case Command.allreduceCtype => {
+ val newcmd = new AllreduceCommand(cmd.dest, cmd.bytes)
+ newcmd.decode
+ if (opts.trace > 2) log("Received %s\n" format newcmd.toString)
+ allReduce(newcmd.round, newcmd.limit)
+ }
+ case Command.permuteAllreduceCtype => {
+ val newcmd = new PermuteAllreduceCommand(cmd.dest, cmd.bytes)
+ newcmd.decode
+ if (opts.trace > 2) log("Received %s\n" format newcmd.toString)
+ permute(newcmd.seed)
+ allReduce(newcmd.round, newcmd.limit)
+ }
+ case Command.setMachineCtype => {
+ val newcmd = new SetMachineCommand(cmd.dest, 0, cmd.bytes)
+ newcmd.decode
+ if (opts.trace > 2) log("Received %s\n" format newcmd.toString)
+ imach = newcmd.newdest
+ }
+ case Command.startLearnerCtype => {
+ val newcmd = new StartLearnerCommand(cmd.dest, cmd.bytes)
+ newcmd.decode
+ if (opts.trace > 2) log("Received %s\n" format newcmd.toString)
+ if (learner != null) {
+ learner.paused = false
+ }
+ }
+ }
}
}
- class CommandListener(val socketnum:Int) extends Runnable {
- var stop = false;
- var ss:ServerSocket = null;
+ class CommandListener(val socketnum:Int) extends Runnable {
+ var stop = false
+ var ss:ServerSocket = null
- def start() {
- try {
- ss = new ServerSocket(socketnum);
- } catch {
- case e:Exception => {if (opts.trace > 0) log("Problem in CommandListener\n%s" format Command.printStackTrace(e));}
- }
- }
+ def start() {
+ try {
+ ss = new ServerSocket(socketnum)
+ } catch {
+ case e:Exception => {if (opts.trace > 0) log("Problem in CommandListener\n%s" format Command.printStackTrace(e));}
+ }
+ }
- def run() {
- start();
- while (!stop) {
- try {
- val scs = new CommandReader(ss.accept());
- if (opts.trace > 2) log("Command Listener got a message\n");
- val fut = executor.submit(scs);
- } catch {
- case e:SocketException => {
- if (opts.trace > 0) log("Problem starting a socket reader\n%s" format Command.printStackTrace(e));
- }
- // This is probably due to the server shutting to. Don't do anything.
- case e:Exception => {
- if (opts.trace > 0) log("Machine %d Command listener had a problem "+e format imach);
- }
- }
- }
- }
+ def run() {
+ start()
+ while (!stop) {
+ try {
+ val scs = new CommandReader(ss.accept())
+ if (opts.trace > 2) log("Command Listener got a message\n")
+ val fut = executor.submit(scs)
+ } catch {
+ case e:SocketException => {
+ if (opts.trace > 0) log("Problem starting a socket reader\n%s" format Command.printStackTrace(e))
+ }
+ // This is probably due to the server shutting to. Don't do anything.
+ case e:Exception => {
+ if (opts.trace > 0) log("Machine %d Command listener had a problem "+e format imach)
+ }
+ }
+ }
+ }
- def stop(force:Boolean) {
- stop = true;
- if (force) {
- try {
- stop = true;
- ss.close();
- } catch {
- case e:Exception => {
- if (opts.trace > 0) log("Machine %d trouble closing command listener\n%s" format (imach, Command.printStackTrace(e)));
- }
- }
- }
- }
- }
+ def stop(force:Boolean) {
+ stop = true
+ if (force) {
+ try {
+ stop = true
+ ss.close()
+ } catch {
+ case e:Exception => {
+ if (opts.trace > 0) log("Machine %d trouble closing command listener\n%s" format (imach, Command.printStackTrace(e)))
+ }
+ }
+ }
+ }
+ }
- class CommandReader(socket:Socket) extends Runnable {
- def run() {
- try {
- val istr = new DataInputStream(socket.getInputStream());
- val magic = istr.readInt();
- val ctype = istr.readInt();
- val dest = istr.readInt();
- val clen = istr.readInt();
- val cmd = new Command(ctype, dest, clen, new Array[Byte](clen*4));
- if (opts.trace > 2) log("Worker %d got packet %s\n" format (imach, cmd.toString));
- istr.readFully(cmd.bytes, 0, clen*4);
- try {
- socket.close();
- } catch {
- case e:IOException => {if (opts.trace > 0) log("Worker %d Problem closing socket "+Command.printStackTrace(e)+"\n" format (imach))}
- }
- handleCMD(cmd);
- } catch {
- case e:Exception => if (opts.trace > 0) log("Worker %d Problem reading socket "+Command.printStackTrace(e)+"\n" format (imach));
- } finally {
- try {
- if (!socket.isClosed) socket.close();
- } catch {
- case e:IOException => {if (opts.trace > 0) log("Worker %d Final Problem closing socket "+Command.printStackTrace(e)+"\n" format (imach))}
- }
- }
- }
- }
-
- class TimeoutThread(mtime:Int, futures:Array[Future[_]]) extends Runnable {
- def run() {
- try {
- Thread.sleep(mtime);
- for (i <- 0 until futures.length) {
- if (futures(i) != null) {
- if (opts.trace > 0) log("Worker cancelling thread %d" format i);
- futures(i).cancel(true);
- }
- }
- } catch {
- case e:InterruptedException => if (opts.trace > 2) log("Worker interrupted timeout thread");
- }
- }
- }
+ class CommandReader(socket:Socket) extends Runnable {
+ def run() {
+ try {
+ val istr = new DataInputStream(socket.getInputStream())
+ val magic = istr.readInt()
+ val ctype = istr.readInt()
+ val dest = istr.readInt()
+ val clen = istr.readInt()
+ val cmd = new Command(ctype, dest, clen, new Array[Byte](clen*4))
+ if (opts.trace > 2) log("Worker %d got packet %s\n" format (imach, cmd.toString))
+ istr.readFully(cmd.bytes, 0, clen*4)
+ try {
+ socket.close()
+ } catch {
+ case e:IOException => {if (opts.trace > 0) log("Worker %d Problem closing socket "+Command.printStackTrace(e)+"\n" format (imach))}
+ }
+ handleCMD(cmd)
+ } catch {
+ case e:Exception => if (opts.trace > 0) log("Worker %d Problem reading socket "+Command.printStackTrace(e)+"\n" format (imach))
+ } finally {
+ try {
+ if (!socket.isClosed) socket.close()
+ } catch {
+ case e:IOException => {if (opts.trace > 0) log("Worker %d Final Problem closing socket "+Command.printStackTrace(e)+"\n" format (imach))}
+ }
+ }
+ }
+ }
+
+ class TimeoutThread(mtime:Int, futures:Array[Future[_]]) extends Runnable {
+ def run() {
+ try {
+ Thread.sleep(mtime)
+ for (i <- 0 until futures.length) {
+ if (futures(i) != null) {
+ if (opts.trace > 0) log("Worker cancelling thread %d" format i)
+ futures(i).cancel(true)
+ }
+ }
+ } catch {
+ case e:InterruptedException => if (opts.trace > 2) log("Worker interrupted timeout thread")
+ }
+ }
+ }
def log(msg:String) {
- print(msg);
- }
+ print(msg);
+ }
}
object Worker {
- trait Opts extends BIDMat.Opts{
- var configTimeout = 3000;
- var reduceTimeout = 3000;
- var sendTimeout = 1000;
- var recvTimeout = 1000;
- var cmdTimeout = 1000;
- var commandSocketNum = 50050;
- var peerSocketNum = 50051;
- var fuseConfigReduce = false;
- var doAvg = true;
- var useLong = false;
- var trace = 0;
- var machineTrace = 0;
- var replicate = 1;
- var bufsize = 10*1000000;
+ trait Opts extends BIDMat.Opts{
+ var configTimeout = 3000
+ var reduceTimeout = 3000
+ var sendTimeout = 1000
+ var recvTimeout = 1000
+ var cmdTimeout = 1000
+ var commandSocketNum = 50050
+ var peerSocketNum = 50051
+ var fuseConfigReduce = false
+ var doAvg = true
+ var useLong = false
+ var trace = 0
+ var machineTrace = 0
+ var replicate = 1
+ var bufsize = 10*1000000
}
-
- class Options extends Opts {}
-}
\ No newline at end of file
+
+ class Options extends Opts {}
+}
\ No newline at end of file
diff --git a/src/main/scala/BIDMach/caffe/Classifier.scala b/src/main/scala/BIDMach/caffe/Classifier.scala
index 8934fa77..45406ef9 100755
--- a/src/main/scala/BIDMach/caffe/Classifier.scala
+++ b/src/main/scala/BIDMach/caffe/Classifier.scala
@@ -14,9 +14,9 @@ class Classifier {
def init(model_file:String, pretrained_file:String, image_dims:Array[Int] = Array(256, 256),
gpu:Boolean = false, mean_file:String = null, input_scale:Float = 1f, channel_swap:IMat = 2\1\0) = {
- net.init(model_file, pretrained_file);
+ net.init(model_file, pretrained_file)
- CAFFE.set_phase(1);
+ CAFFE.set_phase(1)
CAFFE.set_mode(if (gpu) 1 else 0)
@@ -35,11 +35,11 @@ class Classifier {
}
def classify(im:Image):FND = {
- val fnd = net.preprocess(im)
- net.clear_inputs
- net.add_input(fnd, 0, 0)
- net.forward
- net.output_data(0)(?,?,?,0)
+ val fnd = net.preprocess(im)
+ net.clear_inputs
+ net.add_input(fnd, 0, 0)
+ net.forward
+ net.output_data(0)(?,?,?,0)
}
diff --git a/src/main/scala/BIDMach/caffe/Net.scala b/src/main/scala/BIDMach/caffe/Net.scala
index 8104c093..174ab548 100755
--- a/src/main/scala/BIDMach/caffe/Net.scala
+++ b/src/main/scala/BIDMach/caffe/Net.scala
@@ -18,27 +18,27 @@ class Net () {
def initIO = {
input_data = new Array[FND](num_inputs)
- for (i <- 0 until num_inputs) {
- val iblob = _net.input_blob(i)
- input_data(i) = FND(iblob.width, iblob.height, iblob.channels, iblob.num)
- input_diff(i) = FND(iblob.width, iblob.height, iblob.channels, iblob.num)
- }
- output_data = new Array[FND](num_outputs)
- for (i <- 0 until num_outputs) {
- val oblob = _net.output_blob(i)
- output_data(i) = FND(oblob.width, oblob.height, oblob.channels, oblob.num)
- output_diff(i) = FND(oblob.width, oblob.height, oblob.channels, oblob.num)
- }
+ for (i <- 0 until num_inputs) {
+ val iblob = _net.input_blob(i)
+ input_data(i) = FND(iblob.width, iblob.height, iblob.channels, iblob.num)
+ input_diff(i) = FND(iblob.width, iblob.height, iblob.channels, iblob.num)
+ }
+ output_data = new Array[FND](num_outputs)
+ for (i <- 0 until num_outputs) {
+ val oblob = _net.output_blob(i)
+ output_data(i) = FND(oblob.width, oblob.height, oblob.channels, oblob.num)
+ output_diff(i) = FND(oblob.width, oblob.height, oblob.channels, oblob.num)
+ }
}
def init(modelfile:String, paramfile:String) = {
- _net.init(modelfile, paramfile)
- initIO
+ _net.init(modelfile, paramfile)
+ initIO
}
def init(modelfile:String) = {
- _net.init(modelfile)
- initIO
+ _net.init(modelfile)
+ initIO
}
def num_inputs = _net.num_inputs()
@@ -62,7 +62,7 @@ class Net () {
def blobs:TreeMap[String,FND] = {
val out = new TreeMap[String, FND]
for (bname <- _net.blob_names) {
- out.insert(bname, BLOBtoFND(_net.blob_by_name(bname)))
+ out.insert(bname, BLOBtoFND(_net.blob_by_name(bname)))
}
out
}
@@ -73,9 +73,9 @@ class Net () {
val layer = _net.layer_by_name(lname)
val nblobs = layer.num_blobs
if (nblobs > 0) {
- val bb = new Array[FND](nblobs);
- for (i <- 0 until nblobs) bb(i) = BLOBtoFND(layer.blob(i));
- out.insert(lname, bb);
+ val bb = new Array[FND](nblobs)
+ for (i <- 0 until nblobs) bb(i) = BLOBtoFND(layer.blob(i))
+ out.insert(lname, bb)
}
}
out
@@ -84,17 +84,17 @@ class Net () {
def set_mean(mfile:String, varname:String = "image_mean") = {
var meanf:FND = load(mfile, varname) // Matlab means file is W < H < D, BGR
if (meanf.dims(0) != _image_dims(0) || meanf.dims(1) != _image_dims(1)) {
- meanf = meanf.transpose(2, 0, 1) // First go to resizing order D < W < H
- meanf = Image(meanf).resize(inwidth, inheight).toFND // Resize if needed
- meanf = meanf.transpose(1, 2, 0) // Now back to W < H < D
+ meanf = meanf.transpose(2, 0, 1) // First go to resizing order D < W < H
+ meanf = Image(meanf).resize(inwidth, inheight).toFND // Resize if needed
+ meanf = meanf.transpose(1, 2, 0) // Now back to W < H < D
}
meanf = crop(meanf)
_mean = meanf
}
- def set_input_scale(v:Float) = {_scale = v};
+ def set_input_scale(v:Float) = {_scale = v}
- def set_channel_swap(v:IMat) = {_channel_swap = v};
+ def set_channel_swap(v:IMat) = {_channel_swap = v}
def set_image_dims(dd:Array[Int]) = {_image_dims = dd};
@@ -115,7 +115,7 @@ class Net () {
if (inputs != null) {
push(inputs)
} else {
- push_inputs
+ push_inputs
}
_net.forward
pull_outputs
@@ -139,15 +139,15 @@ class Net () {
val layer = _net.layer_by_name(x._1)
val nblobs = layer.num_blobs
if (nblobs > 0) {
- val bb = x._2
- for (i <- 0 until nblobs) {
- val blob = layer.blob(i)
- val fnd = bb(i)
- checkBlobDims(blob, fnd, "update params blob dim mismatch");
- blob.put_data(fnd.data);
- }
+ val bb = x._2
+ for (i <- 0 until nblobs) {
+ val blob = layer.blob(i)
+ val fnd = bb(i)
+ checkBlobDims(blob, fnd, "update params blob dim mismatch")
+ blob.put_data(fnd.data)
+ }
}
- });
+ })
}
def checkBlobDims(blob:BLOB, fnd:FND, fname:String) {
@@ -158,69 +158,69 @@ class Net () {
def pull(blobs:Iterable[(String,FND)]) = {
blobs.foreach((x:Tuple2[String,FND]) => {
- val bname = x._1;
- val fnd = x._2;
- val blob = _net.blob_by_name(bname);
- checkBlobDims(blob, fnd, "pull blob data");
- blob.get_data(fnd.data);
+ val bname = x._1
+ val fnd = x._2
+ val blob = _net.blob_by_name(bname)
+ checkBlobDims(blob, fnd, "pull blob data")
+ blob.get_data(fnd.data)
})
}
def pull_diffs(blobs:Iterable[(String,FND)]) = {
blobs.foreach((x:Tuple2[String,FND]) => {
- val bname = x._1;
- val fnd = x._2;
- val blob = _net.blob_by_name(bname);
- checkBlobDims(blob, fnd, "pull blob diffs");
- blob.get_diff(fnd.data);
+ val bname = x._1
+ val fnd = x._2
+ val blob = _net.blob_by_name(bname)
+ checkBlobDims(blob, fnd, "pull blob diffs")
+ blob.get_diff(fnd.data)
})
}
def push(blobs:Iterable[(String,FND)]) = {
blobs.foreach((x:Tuple2[String,FND]) => {
- val bname = x._1;
- val fnd = x._2;
- val blob = _net.blob_by_name(bname);
- checkBlobDims(blob, fnd, "push blob data");
- blob.put_data(fnd.data);
+ val bname = x._1
+ val fnd = x._2
+ val blob = _net.blob_by_name(bname)
+ checkBlobDims(blob, fnd, "push blob data")
+ blob.put_data(fnd.data)
})
}
def preprocess(im:Image):FND = { // Preprocess a D < W < H image
- var cafimg = im.toFND;
+ var cafimg = im.toFND
if (cafimg.dims(1) != _image_dims(0) || cafimg.dims(2) != _image_dims(1)) {
- cafimg = Image(cafimg).resize(_image_dims(0), _image_dims(1)).toFND;
+ cafimg = Image(cafimg).resize(_image_dims(0), _image_dims(1)).toFND
}
if (_scale != 1f) {
- cafimg = cafimg *@ _scale;
+ cafimg = cafimg *@ _scale
}
if (_channel_swap.asInstanceOf[AnyRef] != null) {
- cafimg = cafimg(_channel_swap, ?, ?);
+ cafimg = cafimg(_channel_swap, ?, ?)
}
cafimg = cafimg.transpose(1, 2, 0); // to W < H < D
- cafimg = crop(cafimg);
+ cafimg = crop(cafimg)
if (_mean.asInstanceOf[AnyRef] != null) {
- cafimg = cafimg - _mean;
+ cafimg = cafimg - _mean
}
- cafimg;
+ cafimg
}
def crop(im:FND):FND = { // Image should be D < W < H
if (im.dims(0) > inwidth || im.dims(1) > inheight) {
- val x0 = (im.dims(0) - inwidth)/2;
- val y0 = (im.dims(1) - inheight)/2;
- val x1 = x0 + inwidth;
- val y1 = y0 + inheight;
- im(icol(x0->x1), icol(y0->y1), ?);
+ val x0 = (im.dims(0) - inwidth)/2
+ val y0 = (im.dims(1) - inheight)/2
+ val x1 = x0 + inwidth
+ val y1 = y0 + inheight
+ im(icol(x0->x1), icol(y0->y1), ?)
} else {
im
}
}
def clear_inputs = {
- for (i <- 0 until num_inputs) {
- input_data(i).clear
- }
+ for (i <- 0 until num_inputs) {
+ input_data(i).clear
+ }
}
def add_input(im:FND, i:Int, j:Int) = {
@@ -235,20 +235,20 @@ class Net () {
def push_inputs = {
for (i <- 0 until num_inputs) {
- _net.input_blob(i).put_data(input_data(i).data)
+ _net.input_blob(i).put_data(input_data(i).data)
}
}
def pull_outputs = {
- for (i <- 0 until num_outputs) {
- _net.output_blob(i).get_data(output_data(i).data)
- }
+ for (i <- 0 until num_outputs) {
+ _net.output_blob(i).get_data(output_data(i).data)
+ }
}
def pull_input_diffs = {
- for (i <- 0 until num_inputs) {
- _net.input_blob(i).get_diff(input_diff(i).data)
- }
+ for (i <- 0 until num_inputs) {
+ _net.input_blob(i).get_diff(input_diff(i).data)
+ }
}
def BLOBtoFND(b:BLOB):FND = {
diff --git a/src/main/scala/BIDMach/causal/IPTW.scala b/src/main/scala/BIDMach/causal/IPTW.scala
index 4d70855c..5da00e4a 100755
--- a/src/main/scala/BIDMach/causal/IPTW.scala
+++ b/src/main/scala/BIDMach/causal/IPTW.scala
@@ -32,7 +32,7 @@ class IPTW(opts:IPTW.Opts) extends RegressionModel(opts) {
for (i <- 0 until opts.links.length) {
totflops += GLM.linkArray(opts.links(i)).fnflops
}
- otargets = targets.rowslice(targets.nrows/2, targets.nrows);
+ otargets = targets.rowslice(targets.nrows/2, targets.nrows)
val tmats = new Array[Mat](3)
tmats(0) = modelmats(0)
tmats(1) = modelmats(0).zeros(targets.nrows/2,1)
@@ -53,7 +53,7 @@ class IPTW(opts:IPTW.Opts) extends RegressionModel(opts) {
def mupdate2(in:Mat, targ:Mat, ipass:Int, pos:Long) = {
val ftarg = full(targ)
- val treatment = ftarg.rowslice(0, ftarg.nrows/2);
+ val treatment = ftarg.rowslice(0, ftarg.nrows/2)
val outcome = ftarg.rowslice(ftarg.nrows/2, ftarg.nrows)
val eta = modelmats(0) * in
val feta = eta + 0f
@@ -109,11 +109,11 @@ object IPTW {
class Options extends Opts {}
def mkModel(fopts:Model.Opts) = {
- new IPTW(fopts.asInstanceOf[IPTW.Opts])
+ new IPTW(fopts.asInstanceOf[IPTW.Opts])
}
def mkUpdater(nopts:Updater.Opts) = {
- new ADAGrad(nopts.asInstanceOf[ADAGrad.Opts])
+ new ADAGrad(nopts.asInstanceOf[ADAGrad.Opts])
}
def mkRegularizer(nopts:Mixin.Opts):Array[Mixin] = {
@@ -132,13 +132,13 @@ object IPTW {
opts.batchSize = math.min(10000, mat0.ncols/30 + 1)
opts.lrate = 1f
opts.links = 1
- val nn = new Learner(
- new MatSource(Array(mat0:Mat), opts),
- new IPTW(opts),
- mkRegularizer(opts),
- new ADAGrad(opts),
- null,
- opts)
+ val nn = new Learner(
+ new MatSource(Array(mat0:Mat), opts),
+ new IPTW(opts),
+ mkRegularizer(opts),
+ new ADAGrad(opts),
+ null,
+ opts)
(nn, opts)
}
@@ -148,13 +148,13 @@ object IPTW {
val opts = new LearnParOptions
opts.batchSize = math.min(10000, mat0.ncols/30 + 1)
opts.lrate = 1f
- val nn = new ParLearnerF(
- new MatSource(Array(mat0), opts),
- opts, mkModel _,
- opts, mkRegularizer _,
- opts, mkUpdater _,
- null, null,
- opts)
+ val nn = new ParLearnerF(
+ new MatSource(Array(mat0), opts),
+ opts, mkModel _,
+ opts, mkRegularizer _,
+ opts, mkUpdater _,
+ null, null,
+ opts)
(nn, opts)
}
@@ -182,40 +182,40 @@ object IPTW {
def learnFParx(
nstart:Int=FileSource.encodeDate(2012,3,1,0),
- nend:Int=FileSource.encodeDate(2012,12,1,0),
- d:Int = 0
- ) = {
-
- val opts = new LearnFParOptions
- opts.lrate = 1f
- val nn = new ParLearnerxF(
- null,
- (dopts:DataSource.Opts, i:Int) => Experiments.Twitter.twitterWords(nstart, nend, opts.nthreads, i),
- opts, mkModel _,
- opts, mkRegularizer _,
- opts, mkUpdater _,
- null, null,
- opts
- )
- (nn, opts)
+ nend:Int=FileSource.encodeDate(2012,12,1,0),
+ d:Int = 0
+ ) = {
+
+ val opts = new LearnFParOptions
+ opts.lrate = 1f
+ val nn = new ParLearnerxF(
+ null,
+ (dopts:DataSource.Opts, i:Int) => Experiments.Twitter.twitterWords(nstart, nend, opts.nthreads, i),
+ opts, mkModel _,
+ opts, mkRegularizer _,
+ opts, mkUpdater _,
+ null, null,
+ opts
+ )
+ (nn, opts)
}
def learnFPar(
nstart:Int=FileSource.encodeDate(2012,3,1,0),
- nend:Int=FileSource.encodeDate(2012,12,1,0),
- d:Int = 0
- ) = {
- val opts = new LearnFParOptions
- opts.lrate = 1f
- val nn = new ParLearnerF(
- Experiments.Twitter.twitterWords(nstart, nend),
- opts, mkModel _,
+ nend:Int=FileSource.encodeDate(2012,12,1,0),
+ d:Int = 0
+ ) = {
+ val opts = new LearnFParOptions
+ opts.lrate = 1f
+ val nn = new ParLearnerF(
+ Experiments.Twitter.twitterWords(nstart, nend),
+ opts, mkModel _,
opts, mkRegularizer _,
- opts, mkUpdater _,
- null, null,
- opts
- )
- (nn, opts)
+ opts, mkUpdater _,
+ null, null,
+ opts
+ )
+ (nn, opts)
}
}
diff --git a/src/main/scala/BIDMach/datasinks/DataSink.scala b/src/main/scala/BIDMach/datasinks/DataSink.scala
index b479bc0c..5a8178f7 100755
--- a/src/main/scala/BIDMach/datasinks/DataSink.scala
+++ b/src/main/scala/BIDMach/datasinks/DataSink.scala
@@ -9,11 +9,11 @@ abstract class DataSink(val opts:DataSink.Opts = new DataSink.Options) extends S
private var _GUID = Mat.myrand.nextLong
def setGUID(v:Long):Unit = {_GUID = v}
def GUID:Long = _GUID
- def put;
+ def put
def init:Unit = {}
def close = {}
- private var _nmats = 0;
- def nmats = _nmats;
+ private var _nmats = 0
+ def nmats = _nmats
def setnmats(k:Int) = {_nmats = k;}
var omats:Array[Mat] = null
}
diff --git a/src/main/scala/BIDMach/datasinks/FileSink.scala b/src/main/scala/BIDMach/datasinks/FileSink.scala
index 54ee7592..dcc63a1a 100755
--- a/src/main/scala/BIDMach/datasinks/FileSink.scala
+++ b/src/main/scala/BIDMach/datasinks/FileSink.scala
@@ -6,51 +6,51 @@ import BIDMach.datasources._
import scala.collection.mutable.ListBuffer
class FileSink(override val opts:FileSink.Opts = new FileSink.Options) extends MatSink(opts) {
- var ifile = 0;
- var colsdone = 0;
+ var ifile = 0
+ var colsdone = 0
override def init = {
- blocks = new ListBuffer[Array[Mat]]();
- setnmats(opts.ofnames.length);
- omats = new Array[Mat](nmats);
- ifile = 0;
+ blocks = new ListBuffer[Array[Mat]]()
+ setnmats(opts.ofnames.length)
+ omats = new Array[Mat](nmats)
+ ifile = 0
opts match {
case fopts:FileSource.Opts => {
- ifile = fopts.nstart;
+ ifile = fopts.nstart
}
}
- colsdone = 0;
+ colsdone = 0
}
override def put = {
- blocks += omats.map(MatSink.copyCPUmat);
- colsdone += omats(0).ncols;
+ blocks += omats.map(MatSink.copyCPUmat)
+ colsdone += omats(0).ncols
if (colsdone >= opts.ofcols) {
- mergeSaveBlocks;
- colsdone = 0;
- ifile += 1;
- blocks = new ListBuffer[Array[Mat]]();
+ mergeSaveBlocks
+ colsdone = 0
+ ifile += 1
+ blocks = new ListBuffer[Array[Mat]]()
}
}
override def close () = {
- mergeSaveBlocks;
+ mergeSaveBlocks
}
def mergeSaveBlocks = {
mergeBlocks
if (blocks.size > 0) {
- for (i <- 0 until opts.ofnames.length) {
- saveMat(opts.ofnames(i)(ifile), mats(i));
- }
+ for (i <- 0 until opts.ofnames.length) {
+ saveMat(opts.ofnames(i)(ifile), mats(i))
+ }
}
}
}
object FileSink {
trait Opts extends MatSink.Opts {
- var ofnames:List[(Int)=>String] = null;
- var ofcols = 100000;
+ var ofnames:List[(Int)=>String] = null
+ var ofcols = 100000
}
class Options extends Opts {
diff --git a/src/main/scala/BIDMach/datasinks/MatSink.scala b/src/main/scala/BIDMach/datasinks/MatSink.scala
index 0ecae365..4d905fa3 100755
--- a/src/main/scala/BIDMach/datasinks/MatSink.scala
+++ b/src/main/scala/BIDMach/datasinks/MatSink.scala
@@ -6,56 +6,56 @@ import scala.collection.mutable.ListBuffer
class MatSink(override val opts:MatSink.Opts = new MatSink.Options) extends DataSink(opts) {
- var blocks = new ListBuffer[Array[Mat]]();
- var mats:Array[Mat] = null;
+ var blocks = new ListBuffer[Array[Mat]]()
+ var mats:Array[Mat] = null
override def init = {
- blocks = new ListBuffer[Array[Mat]]();
- setnmats(opts.nmats);
- omats = new Array[Mat](nmats);
+ blocks = new ListBuffer[Array[Mat]]()
+ setnmats(opts.nmats)
+ omats = new Array[Mat](nmats)
}
def put = {
- blocks += omats.map(MatSink.copyCPUmat);
+ blocks += omats.map(MatSink.copyCPUmat)
}
- override def close () = mergeBlocks;
+ override def close () = mergeBlocks
def mergeBlocks = {
if (blocks.size > 0) {
- val ncols = blocks.map(_(0).ncols).reduce(_+_);
- val imats = blocks(0);
- val ablocks = blocks.toArray;
- mats = new Array[Mat](nmats);
- for (i <- 0 until nmats) {
- val nrows = imats(i).nrows;
- val nnz0 = imats(i) match {
- case i:SMat => i.nnz;
- case i:GSMat => i.nnz;
- case i:SDMat => i.nnz;
- case i:GSDMat => i.nnz;
- case _ => -1;
- }
- mats(i) = if (nnz0 >= 0) {
- val nnz = ablocks.map(_(i).nnz).reduce(_+_);
- SMat(nrows, ncols, nnz);
- } else {
- MatSink.makeCPUmat(imats(i), nrows, ncols);
- }
- var here = 0;
- for (j <- 0 until ablocks.length) {
- val am = ablocks(j)(i);
- am.colslice(0, am.ncols, mats(i), here, true);
- here += am.ncols;
- }
- }
+ val ncols = blocks.map(_(0).ncols).reduce(_+_)
+ val imats = blocks(0)
+ val ablocks = blocks.toArray
+ mats = new Array[Mat](nmats)
+ for (i <- 0 until nmats) {
+ val nrows = imats(i).nrows
+ val nnz0 = imats(i) match {
+ case i:SMat => i.nnz
+ case i:GSMat => i.nnz
+ case i:SDMat => i.nnz
+ case i:GSDMat => i.nnz
+ case _ => -1
+ }
+ mats(i) = if (nnz0 >= 0) {
+ val nnz = ablocks.map(_(i).nnz).reduce(_+_)
+ SMat(nrows, ncols, nnz)
+ } else {
+ MatSink.makeCPUmat(imats(i), nrows, ncols)
+ }
+ var here = 0
+ for (j <- 0 until ablocks.length) {
+ val am = ablocks(j)(i)
+ am.colslice(0, am.ncols, mats(i), here, true)
+ here += am.ncols
+ }
+ }
}
}
}
object MatSink {
trait Opts extends DataSink.Opts {
- var nmats = 1;
+ var nmats = 1
}
class Options extends Opts {
@@ -63,28 +63,28 @@ object MatSink {
}
def copyCPUmat(m:Mat):Mat = {
- val nr = m.nrows;
- val nc = m.ncols;
- val out = makeCPUmat(m, nr, nc);
- out <-- m;
+ val nr = m.nrows
+ val nc = m.ncols
+ val out = makeCPUmat(m, nr, nc)
+ out <-- m
out;
}
def makeCPUmat(m:Mat,nr:Int, nc:Int):Mat = {
- m match {
- case f:FMat => zeros(nr,nc);
- case g:GMat => zeros(nr,nc);
- case f:DMat => dzeros(nr,nc);
- case g:GDMat => dzeros(nr,nc);
- case i:IMat => izeros(nr,nc);
- case gi:GIMat => izeros(nr,nc);
- case l:LMat => lzeros(nr,nc);
- case l:GLMat => lzeros(nr,nc);
- case s:SMat => SMat(nr,nc,s.nnz);
- case s:GSMat => SMat(nr,nc,s.nnz);
- case s:SDMat => SDMat(nr,nc,s.nnz);
- case s:GSDMat => SDMat(nr,nc,s.nnz);
- }
+ m match {
+ case f:FMat => zeros(nr,nc)
+ case g:GMat => zeros(nr,nc)
+ case f:DMat => dzeros(nr,nc)
+ case g:GDMat => dzeros(nr,nc)
+ case i:IMat => izeros(nr,nc)
+ case gi:GIMat => izeros(nr,nc)
+ case l:LMat => lzeros(nr,nc)
+ case l:GLMat => lzeros(nr,nc)
+ case s:SMat => SMat(nr,nc,s.nnz)
+ case s:GSMat => SMat(nr,nc,s.nnz)
+ case s:SDMat => SDMat(nr,nc,s.nnz)
+ case s:GSDMat => SDMat(nr,nc,s.nnz)
+ }
}
}
diff --git a/src/main/scala/BIDMach/datasources/BlendedSource.scala b/src/main/scala/BIDMach/datasources/BlendedSource.scala
index 08abd242..ca15c6d4 100755
--- a/src/main/scala/BIDMach/datasources/BlendedSource.scala
+++ b/src/main/scala/BIDMach/datasources/BlendedSource.scala
@@ -83,7 +83,7 @@ class BlendedSource(val s1:DataSource, val s2:DataSource,
iptr1 = jptr1
} else {
while (iptr2 < mats2(0).ncols && rands2.data(iptr2/bBlock) > opts.samp2) iptr2 += bBlock
- if (iptr2 >= mats2(0).ncols) {
+ if (iptr2 >= mats2(0).ncols) {
mats2 = s2.next
iptr2 = 0
rand(0, 1f, opts.samp2)
@@ -129,10 +129,10 @@ class BlendedSource(val s1:DataSource, val s2:DataSource,
object BlendedSource {
trait Opts extends DataSource.Opts {
- var bBlock = 1000
- var afrac = 0.5f
- var samp1 = 1f
- var samp2 = 1f
+ var bBlock = 1000
+ var afrac = 0.5f
+ var samp1 = 1f
+ var samp2 = 1f
}
class Options extends Opts {}
diff --git a/src/main/scala/BIDMach/datasources/DataSource.scala b/src/main/scala/BIDMach/datasources/DataSource.scala
index 588fbee5..b0f00d27 100755
--- a/src/main/scala/BIDMach/datasources/DataSource.scala
+++ b/src/main/scala/BIDMach/datasources/DataSource.scala
@@ -1,40 +1,40 @@
-package BIDMach.datasources
-import BIDMat.{Mat,SBMat,CMat,CSMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
-import BIDMat.MatFunctions._
-import BIDMat.SciFunctions._
-import java.io._
-
-@SerialVersionUID(100L)
-abstract class DataSource(val opts:DataSource.Opts = new DataSource.Options) extends Serializable {
- private var _GUID = Mat.myrand.nextLong
- def setGUID(v:Long):Unit = {_GUID = v}
- def GUID:Long = _GUID
- def next:Array[Mat]
- def hasNext:Boolean
- def reset:Unit
- def putBack(mats:Array[Mat],i:Int):Unit = {throw new RuntimeException("putBack not implemented")}
- def setupPutBack(n:Int,dim:Int):Unit = {throw new RuntimeException("putBack not implemented")}
- def nmats:Int
- def init:Unit
- def progress:Float
- def close = {}
- var omats:Array[Mat] = null
- var endmats:Array[Mat] = null
- var fullmats:Array[Mat] = null
-}
-
-
-object DataSource {
- trait Opts extends BIDMat.Opts {
- var batchSize = 10000
- var sizeMargin = 3f
- var sample = 1f
- var addConstFeat:Boolean = false
- var featType:Int = 1 // 0 = binary features, 1 = linear features, 2 = threshold features
- var featThreshold:Mat = null
- var putBack = -1
- }
-
- class Options extends Opts {}
-}
-
+package BIDMach.datasources
+import BIDMat.{Mat,SBMat,CMat,CSMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
+import BIDMat.MatFunctions._
+import BIDMat.SciFunctions._
+import java.io._
+
+@SerialVersionUID(100L)
+abstract class DataSource(val opts:DataSource.Opts = new DataSource.Options) extends Serializable {
+ private var _GUID = Mat.myrand.nextLong
+ def setGUID(v:Long):Unit = {_GUID = v}
+ def GUID:Long = _GUID
+ def next:Array[Mat]
+ def hasNext:Boolean
+ def reset:Unit
+ def putBack(mats:Array[Mat],i:Int):Unit = {throw new RuntimeException("putBack not implemented")}
+ def setupPutBack(n:Int,dim:Int):Unit = {throw new RuntimeException("putBack not implemented")}
+ def nmats:Int
+ def init:Unit
+ def progress:Float
+ def close = {}
+ var omats:Array[Mat] = null
+ var endmats:Array[Mat] = null
+ var fullmats:Array[Mat] = null
+}
+
+
+object DataSource {
+ trait Opts extends BIDMat.Opts {
+ var batchSize = 10000
+ var sizeMargin = 3f
+ var sample = 1f
+ var addConstFeat:Boolean = false
+ var featType:Int = 1 // 0 = binary features, 1 = linear features, 2 = threshold features
+ var featThreshold:Mat = null
+ var putBack = -1
+ }
+
+ class Options extends Opts {}
+}
+
diff --git a/src/main/scala/BIDMach/datasources/FileSource.scala b/src/main/scala/BIDMach/datasources/FileSource.scala
index 13a2d74c..26e6b251 100755
--- a/src/main/scala/BIDMach/datasources/FileSource.scala
+++ b/src/main/scala/BIDMach/datasources/FileSource.scala
@@ -2,9 +2,9 @@ package BIDMach.datasources
import BIDMat.{Mat,SBMat,CMat,CSMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
import BIDMat.MatFunctions._
import BIDMat.SciFunctions._
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
+import java.util.concurrent.ExecutorService
+import java.util.concurrent.Executors
+import java.util.concurrent.Future
import java.io._
class FileSource(override val opts:FileSource.Opts = new FileSource.Options) extends DataSource(opts) {
@@ -14,20 +14,20 @@ class FileSource(override val opts:FileSource.Opts = new FileSource.Options) ext
var rowno = 0
var nstart = 0
var nend = 0
- var fnames:List[(Int)=>String] = null;
- omats = null;
- var matqueue:Array[Array[Mat]] = null;
- var ready:IMat = null;
- var stop:Boolean = false;
- var pause:Boolean = true;
- var permfn:(Int)=>Int = null;
- var totalSize = 0;
- var fprogress:Float = 0;
- var lastMat:Array[Mat] = null;
- var lastFname:Array[String] = null;
- var executor:ExecutorService = null;
- var prefetchTasks:Array[Future[_]] = null;
- var prefetchers:Array[Prefetcher] = null;
+ var fnames:List[(Int)=>String] = null
+ omats = null
+ var matqueue:Array[Array[Mat]] = null
+ var ready:IMat = null
+ var stop:Boolean = false
+ var pause:Boolean = true
+ var permfn:(Int)=>Int = null
+ var totalSize = 0
+ var fprogress:Float = 0
+ var lastMat:Array[Mat] = null
+ var lastFname:Array[String] = null
+ var executor:ExecutorService = null
+ var prefetchTasks:Array[Future[_]] = null
+ var prefetchers:Array[Prefetcher] = null
def softperm(nstart:Int, nend:Int) = {
val dd1 = nstart / 24
@@ -36,50 +36,50 @@ class FileSource(override val opts:FileSource.Opts = new FileSource.Options) ext
val hh2 = nend % 24
val (dmy, ii) = sort2(rand(dd2-dd1+1+opts.lookahead,1))
(n:Int) => {
- val dd = n / 24
- val hh = n % 24
- val ddx = ii(dd-dd1)+dd1
- val ddx0 = ddx % 31
- val ddx1 = ddx / 31
- val hhdd = hh + 24 * (ddx0 - 1)
- (ddx1 * 31 + (hhdd % 31 + 1)) * 24 + hhdd / 31
+ val dd = n / 24
+ val hh = n % 24
+ val ddx = ii(dd-dd1)+dd1
+ val ddx0 = ddx % 31
+ val ddx1 = ddx / 31
+ val hhdd = hh + 24 * (ddx0 - 1)
+ (ddx1 * 31 + (hhdd % 31 + 1)) * 24 + hhdd / 31
}
}
def genperm(nstart:Int, nend:Int) = {
- val (dmy, ii) = sort2(rand(nend - nstart - 1,1));
+ val (dmy, ii) = sort2(rand(nend - nstart - 1,1))
(n:Int) => {
if (n >= nend - 1) {
n
} else {
- nstart + ii(n - nstart, 0);
+ nstart + ii(n - nstart, 0)
}
}
}
def initbase = {
- stop = false;
- pause = true;
+ stop = false
+ pause = true
if (opts.lookahead > 0) {
- executor = Executors.newFixedThreadPool(opts.lookahead + 2);
- prefetchers = new Array[Prefetcher](opts.lookahead);
- prefetchTasks = new Array[Future[_]](opts.lookahead);
+ executor = Executors.newFixedThreadPool(opts.lookahead + 2)
+ prefetchers = new Array[Prefetcher](opts.lookahead)
+ prefetchTasks = new Array[Future[_]](opts.lookahead)
}
ready = -iones(math.max(opts.lookahead,1), 1) // Numbers of files currently loaded in queue
reset
- rowno = 0;
- fileno = nstart; // Number of the current output file
+ rowno = 0
+ fileno = nstart // Number of the current output file
matqueue = new Array[Array[Mat]](math.max(1,opts.lookahead)) // Queue of matrices for each output matrix
for (i <- 0 until math.max(1,opts.lookahead)) {
- matqueue(i) = new Array[Mat](fnames.size);
+ matqueue(i) = new Array[Mat](fnames.size)
}
if (opts.putBack < 0) {
- for (i <- 0 until opts.lookahead) {
- prefetchers(i) = new Prefetcher(nstart + i);
- prefetchTasks(i) = executor.submit(prefetchers(i));
- }
+ for (i <- 0 until opts.lookahead) {
+ prefetchers(i) = new Prefetcher(nstart + i)
+ prefetchTasks(i) = executor.submit(prefetchers(i))
+ }
}
- pause = false;
+ pause = false
}
def reset = {
@@ -92,7 +92,7 @@ class FileSource(override val opts:FileSource.Opts = new FileSource.Options) ext
}
while (!fileExists(fnames(0)(nstart)) && nstart < nend) {nstart += 1}
if (nstart == nend) {
- throw new RuntimeException("Couldnt find any files");
+ throw new RuntimeException("Couldnt find any files")
}
if (opts.order == 0) {
permfn = (a:Int) => a
@@ -105,33 +105,33 @@ class FileSource(override val opts:FileSource.Opts = new FileSource.Options) ext
FileSource.encodeDate(yy, mm, hhdd % 31 + 1, hhdd / 31)
}
}
- rowno = 0;
- fileno = nstart;
+ rowno = 0
+ fileno = nstart
for (i <- 0 until math.max(1,opts.lookahead)) {
- val ifile = nstart + i;
- val ifilex = ifile % math.max(opts.lookahead, 1);
+ val ifile = nstart + i
+ val ifilex = ifile % math.max(opts.lookahead, 1)
ready.synchronized {
- ready(ifilex) = ifile - math.max(1, opts.lookahead);
+ ready(ifilex) = ifile - math.max(1, opts.lookahead)
}
}
- totalSize = nend - nstart;
- lastMat = new Array[Mat](fnames.size);
- lastFname = new Array[String](fnames.size);
- for (i <- 0 until lastMat.length) {lastMat(i) = null;}
- for (i <- 0 until lastFname.length) {lastFname(i) = null;}
+ totalSize = nend - nstart
+ lastMat = new Array[Mat](fnames.size)
+ lastFname = new Array[String](fnames.size)
+ for (i <- 0 until lastMat.length) {lastMat(i) = null}
+ for (i <- 0 until lastFname.length) {lastFname(i) = null}
}
def init = {
initbase
omats = new Array[Mat](fnames.size)
for (i <- 0 until fnames.size) {
- var mm = HMat.loadMat(fnames(i)(nstart));
- val (nr, nc) = if (opts.dorows) (blockSize, mm.ncols) else (mm.nrows, blockSize);
+ var mm = HMat.loadMat(fnames(i)(nstart))
+ val (nr, nc) = if (opts.dorows) (blockSize, mm.ncols) else (mm.nrows, blockSize)
omats(i) = mm match {
- case mf:FMat => FMat.newOrCheckFMat(nr, nc, null, GUID, i, ((nr*1L) << 32) + nc, "FileSource_FMat".##);
- case mi:IMat => IMat.newOrCheckIMat(nr, nc, null, GUID, i, ((nr*1L) << 32) + nc, "FileSource_IMat".##);
- case md:DMat => DMat.newOrCheckDMat(nr, nc, null, GUID, i, ((nr*1L) << 32) + nc, "FileSource_DMat".##);
- case ms:SMat => SMat.newOrCheckSMat(nr, nc, nc * opts.eltsPerSample, null, GUID, i, ((nr*1L) << 32) + nc, "FileSource_SMat".##);
+ case mf:FMat => FMat.newOrCheckFMat(nr, nc, null, GUID, i, ((nr*1L) << 32) + nc, "FileSource_FMat".##)
+ case mi:IMat => IMat.newOrCheckIMat(nr, nc, null, GUID, i, ((nr*1L) << 32) + nc, "FileSource_IMat".##)
+ case md:DMat => DMat.newOrCheckDMat(nr, nc, null, GUID, i, ((nr*1L) << 32) + nc, "FileSource_DMat".##)
+ case ms:SMat => SMat.newOrCheckSMat(nr, nc, nc * opts.eltsPerSample, null, GUID, i, ((nr*1L) << 32) + nc, "FileSource_SMat".##)
}
}
}
@@ -143,63 +143,63 @@ class FileSource(override val opts:FileSource.Opts = new FileSource.Options) ext
def nmats = omats.length
def next:Array[Mat] = {
- var donextfile = false;
- var todo = blockSize;
- val featType = opts.featType;
- val threshold = opts.featThreshold;
+ var donextfile = false
+ var todo = blockSize
+ val featType = opts.featType
+ val threshold = opts.featThreshold
while (todo > 0 && fileno < nend) {
- var nrow = rowno;
- val filex = fileno % math.max(1, opts.lookahead);
-// println("todo %d, fileno %d, filex %d, rowno %d" format (todo, fileno, filex, rowno))
- if (opts.putBack < 0 && opts.lookahead > 0) {
- while (ready(filex) < fileno) {
- if (opts.traceFileSource > 0) println("next %d %d %s" format (fileno, filex, ready.t.toString));
- Thread.sleep(1); //`yield`
- }
- } else {
- fetch
- }
- var matqnr = 0
- for (i <- 0 until fnames.size) {
- val matq = matqueue(filex)(i);
- if (matq.asInstanceOf[AnyRef] != null) {
- matqnr = if (opts.dorows) matq.nrows else matq.ncols;
- nrow = math.min(rowno + todo, matqnr);
- val off = Mat.oneBased
- if (opts.dorows) {
- val nc = omats(i).ncols;
- val nr = nrow - rowno + blockSize - todo - off;
- omats(i) = checkCaches(nr, nc, omats(i), GUID, i); // otherwise, check for a cached copy
- omats(i) = matq.rowslice(rowno, nrow, omats(i), blockSize - todo);
- } else {
- val nr = omats(i).nrows;
- val nc = nrow - rowno + blockSize - todo - off;
- omats(i) = checkCaches(nr, nc, omats(i), GUID, i);
- omats(i) = matq.colslice(rowno, nrow, omats(i), blockSize - todo);
- }
+ var nrow = rowno
+ val filex = fileno % math.max(1, opts.lookahead)
+// println("todo %d, fileno %d, filex %d, rowno %d" format (todo, fileno, filex, rowno))
+ if (opts.putBack < 0 && opts.lookahead > 0) {
+ while (ready(filex) < fileno) {
+ if (opts.traceFileSource > 0) println("next %d %d %s" format (fileno, filex, ready.t.toString))
+ Thread.sleep(1) //`yield`
+ }
+ } else {
+ fetch
+ }
+ var matqnr = 0
+ for (i <- 0 until fnames.size) {
+ val matq = matqueue(filex)(i)
+ if (matq.asInstanceOf[AnyRef] != null) {
+ matqnr = if (opts.dorows) matq.nrows else matq.ncols
+ nrow = math.min(rowno + todo, matqnr)
+ val off = Mat.oneBased
+ if (opts.dorows) {
+ val nc = omats(i).ncols
+ val nr = nrow - rowno + blockSize - todo - off
+ omats(i) = checkCaches(nr, nc, omats(i), GUID, i) // otherwise, check for a cached copy
+ omats(i) = matq.rowslice(rowno, nrow, omats(i), blockSize - todo)
+ } else {
+ val nr = omats(i).nrows
+ val nc = nrow - rowno + blockSize - todo - off
+ omats(i) = checkCaches(nr, nc, omats(i), GUID, i)
+ omats(i) = matq.colslice(rowno, nrow, omats(i), blockSize - todo)
+ }
- if (featType == 0) {
- min(1f, omats(i), omats(i));
- } else if (featType == 2) {
- omats(i) ~ omats(i) >= threshold;
- }
- if (matqnr == nrow) donextfile = true;
- } else {
- if (opts.throwMissing) {
- throw new RuntimeException("Missing file "+fileno);
- }
- donextfile = true;
- }
- }
- todo -= nrow - rowno;
- if (donextfile) {
- rowno = 0;
- fileno += 1;
- donextfile = false;
- } else {
- rowno = nrow;
- }
- fprogress = rowno*1f / matqnr;
+ if (featType == 0) {
+ min(1f, omats(i), omats(i))
+ } else if (featType == 2) {
+ omats(i) ~ omats(i) >= threshold
+ }
+ if (matqnr == nrow) donextfile = true
+ } else {
+ if (opts.throwMissing) {
+ throw new RuntimeException("Missing file "+fileno)
+ }
+ donextfile = true
+ }
+ }
+ todo -= nrow - rowno
+ if (donextfile) {
+ rowno = 0
+ fileno += 1
+ donextfile = false
+ } else {
+ rowno = nrow
+ }
+ fprogress = rowno*1f / matqnr
}
omats
}
@@ -220,99 +220,99 @@ class FileSource(override val opts:FileSource.Opts = new FileSource.Options) ext
class Prefetcher(val ifile:Int) extends Runnable {
- def run() = {
- val ifilex = ifile % opts.lookahead;
- ready.synchronized {
- ready(ifilex) = ifile - opts.lookahead;
- }
- while (!stop) {
- while (pause || (ready(ifilex) >= fileno && !stop)) {
- if (opts.traceFileSource > 0) println("prefetch %d %d %s" format (ifilex, fileno, ready.t.toString));
- Thread.sleep(1); // Thread.`yield`
- }
- if (!stop) {
- val inew = ready(ifilex) + opts.lookahead;
- val pnew = permfn(inew);
- val fexists = fileExists(fnames(0)(pnew)) && (rand(1,1).v <= opts.sampleFiles);
- if (opts.traceFileSource > 0) println("prefetch %d %d pnew %d %b" format (ifilex, fileno, pnew, fexists));
- for (i <- 0 until fnames.size) {
- if (fexists) {
- val fname = fnames(i)(pnew);
-// println("loading %d %d %d %s" format (inew, pnew, i, fname));
- var oldmat:Mat = null;
- matqueue.synchronized {
- oldmat = matqueue(ifilex)(i);
- }
- if (opts.traceFileSource > 0) println("prefetch %d %d pnew %d reading %d %s" format (ifilex, fileno, pnew, i, fname));
- val newmat:Mat = try {
- HMat.loadMat(fname, oldmat);
- } catch {
- case e:Exception => {println(stackTraceString(e)); null}
- case _:Throwable => null
- }
- if (opts.traceFileSource > 0) println("prefetch %d %d pnew %d read %d %s " format (ifilex, fileno, pnew, i, fname));
- matqueue.synchronized {
- matqueue(ifilex)(i) = newmat;
- }
- } else {
- if (opts.throwMissing && inew < nend) {
- throw new RuntimeException("Missing file "+fnames(i)(pnew));
- }
- matqueue.synchronized {
- matqueue(ifilex)(i) = null;
- }
- }
- // println("%d" format inew)
- }
- ready.synchronized {
- ready(ifilex) = inew;
- }
- }
- }
- }
+ def run() = {
+ val ifilex = ifile % opts.lookahead
+ ready.synchronized {
+ ready(ifilex) = ifile - opts.lookahead
+ }
+ while (!stop) {
+ while (pause || (ready(ifilex) >= fileno && !stop)) {
+ if (opts.traceFileSource > 0) println("prefetch %d %d %s" format (ifilex, fileno, ready.t.toString))
+ Thread.sleep(1) // Thread.`yield`
+ }
+ if (!stop) {
+ val inew = ready(ifilex) + opts.lookahead
+ val pnew = permfn(inew)
+ val fexists = fileExists(fnames(0)(pnew)) && (rand(1,1).v <= opts.sampleFiles)
+ if (opts.traceFileSource > 0) println("prefetch %d %d pnew %d %b" format (ifilex, fileno, pnew, fexists))
+ for (i <- 0 until fnames.size) {
+ if (fexists) {
+ val fname = fnames(i)(pnew)
+// println("loading %d %d %d %s" format (inew, pnew, i, fname))
+ var oldmat:Mat = null
+ matqueue.synchronized {
+ oldmat = matqueue(ifilex)(i)
+ }
+ if (opts.traceFileSource > 0) println("prefetch %d %d pnew %d reading %d %s" format (ifilex, fileno, pnew, i, fname))
+ val newmat:Mat = try {
+ HMat.loadMat(fname, oldmat)
+ } catch {
+ case e:Exception => {println(stackTraceString(e)); null}
+ case _:Throwable => null
+ }
+ if (opts.traceFileSource > 0) println("prefetch %d %d pnew %d read %d %s " format (ifilex, fileno, pnew, i, fname))
+ matqueue.synchronized {
+ matqueue(ifilex)(i) = newmat
+ }
+ } else {
+ if (opts.throwMissing && inew < nend) {
+ throw new RuntimeException("Missing file "+fnames(i)(pnew))
+ }
+ matqueue.synchronized {
+ matqueue(ifilex)(i) = null
+ }
+ }
+ // println("%d" format inew)
+ }
+ ready.synchronized {
+ ready(ifilex) = inew
+ }
+ }
+ }
+ }
}
def checkCaches(nr:Int, nc:Int, out:Mat, GUID:Long, i:Int):Mat = {
if (nr == out.nrows && nc == out.ncols) {
out
} else {
- out match {
- case a:FMat => FMat.newOrCheckFMat(nr, nc, null, GUID, i, ((nr*1L) << 32) + nc, "FileSource_FMat".##);
- case a:IMat => IMat.newOrCheckIMat(nr, nc, null, GUID, i, ((nr*1L) << 32) + nc, "FileSource_IMat".##);
- case a:DMat => DMat.newOrCheckDMat(nr, nc, null, GUID, i, ((nr*1L) << 32) + nc, "FileSource_DMat".##);
- case a:SMat => SMat.newOrCheckSMat(nr, nc, a.nnz, null, GUID, i, ((nr*1L) << 32) + nc, "FileSource_SMat".##);
- }
+ out match {
+ case a:FMat => FMat.newOrCheckFMat(nr, nc, null, GUID, i, ((nr*1L) << 32) + nc, "FileSource_FMat".##)
+ case a:IMat => IMat.newOrCheckIMat(nr, nc, null, GUID, i, ((nr*1L) << 32) + nc, "FileSource_IMat".##)
+ case a:DMat => DMat.newOrCheckDMat(nr, nc, null, GUID, i, ((nr*1L) << 32) + nc, "FileSource_DMat".##)
+ case a:SMat => SMat.newOrCheckSMat(nr, nc, a.nnz, null, GUID, i, ((nr*1L) << 32) + nc, "FileSource_SMat".##)
+ }
}
}
def fetch = {
if (ready(0) < fileno) {
- val pnew = permfn(fileno);
- val fexists = fileExists(fnames(0)(pnew)) && (rand(1,1).v <= opts.sampleFiles);
+ val pnew = permfn(fileno)
+ val fexists = fileExists(fnames(0)(pnew)) && (rand(1,1).v <= opts.sampleFiles)
for (i <- 0 until fnames.size) {
if (fexists && lastMat(i).asInstanceOf[AnyRef] != null) {
-// HMat.saveMat(lastFname(i), lastMat(i));
+// HMat.saveMat(lastFname(i), lastMat(i))
}
matqueue(0)(i) = if (fexists) {
- val tmp = HMat.loadMat(fnames(i)(pnew), matqueue(0)(i));
- lastFname(i) = fnames(i)(pnew);
- lastMat(i) = tmp;
- tmp;
+ val tmp = HMat.loadMat(fnames(i)(pnew), matqueue(0)(i))
+ lastFname(i) = fnames(i)(pnew)
+ lastMat(i) = tmp
+ tmp
} else {
if ((opts.sampleFiles >= 1.0f) && opts.throwMissing) {
- throw new RuntimeException("Missing file "+fnames(i)(pnew));
+ throw new RuntimeException("Missing file "+fnames(i)(pnew))
}
- null;
+ null
}
}
- ready(0) = fileno;
+ ready(0) = fileno
}
}
def stackTraceString(e:Exception):String = {
- val sw = new StringWriter;
- e.printStackTrace(new PrintWriter(sw));
- sw.toString;
+ val sw = new StringWriter
+ e.printStackTrace(new PrintWriter(sw))
+ sw.toString
}
@@ -323,9 +323,9 @@ class FileSource(override val opts:FileSource.Opts = new FileSource.Options) ext
override def close = {
stop = true
for (i <- 0 until opts.lookahead) {
- prefetchTasks(i).cancel(true);
+ prefetchTasks(i).cancel(true)
}
- if (executor != null) executor.shutdown();
+ if (executor != null) executor.shutdown()
}
}
@@ -333,29 +333,29 @@ class FileSource(override val opts:FileSource.Opts = new FileSource.Options) ext
object FileSource {
def apply(opts:FileSource.Opts, nthreads:Int):FileSource = {
- implicit val ec = threadPool(nthreads);
- new FileSource(opts);
+ implicit val ec = threadPool(nthreads)
+ new FileSource(opts)
}
- def apply(opts:FileSource.Opts):FileSource = apply(opts, 4);
+ def apply(opts:FileSource.Opts):FileSource = apply(opts, 4)
def apply(fname:String, opts:FileSource.Opts, nthreads:Int):FileSource = {
- opts.fnames = List(simpleEnum(fname, 1, 0));
- implicit val ec = threadPool(nthreads);
- new FileSource(opts);
+ opts.fnames = List(simpleEnum(fname, 1, 0))
+ implicit val ec = threadPool(nthreads)
+ new FileSource(opts)
}
- def apply(fname:String, opts:FileSource.Opts):FileSource = apply(fname, opts, 4);
+ def apply(fname:String, opts:FileSource.Opts):FileSource = apply(fname, opts, 4)
- def apply(fname:String):FileSource = apply(fname, new FileSource.Options, 4);
+ def apply(fname:String):FileSource = apply(fname, new FileSource.Options, 4)
def apply(fn1:String, fn2:String, opts:FileSource.Opts, nthreads:Int) = {
- opts.fnames = List(simpleEnum(fn1, 1, 0), simpleEnum(fn2, 1, 0));
- implicit val ec = threadPool(nthreads);
- new FileSource(opts);
+ opts.fnames = List(simpleEnum(fn1, 1, 0), simpleEnum(fn2, 1, 0))
+ implicit val ec = threadPool(nthreads)
+ new FileSource(opts)
}
- def apply(fn1:String, fn2:String, opts:FileSource.Opts):FileSource = apply(fn1, fn2, opts, 4);
+ def apply(fn1:String, fn2:String, opts:FileSource.Opts):FileSource = apply(fn1, fn2, opts, 4)
def encodeDate(yy:Int, mm:Int, dd:Int, hh:Int) = (((12*yy + mm) * 31) + dd)*24 + hh
@@ -370,16 +370,16 @@ object FileSource {
def sampleFun(fname:String):(Int)=>String = {
(n:Int) => {
- val (yy, mm, dd, hh) = decodeDate(n)
- (fname format ((n / 24) % 16, yy, mm, dd, hh))
+ val (yy, mm, dd, hh) = decodeDate(n)
+ (fname format ((n / 24) % 16, yy, mm, dd, hh))
}
}
def sampleFun(fname:String, m:Int, i:Int):(Int)=>String = {
(n0:Int) => {
val n = n0 * m + i
- val (yy, mm, dd, hh) = decodeDate(n)
- (fname format ((n / 24) % 16, yy, mm, dd, hh))
+ val (yy, mm, dd, hh) = decodeDate(n)
+ (fname format ((n / 24) % 16, yy, mm, dd, hh))
}
}
@@ -390,20 +390,20 @@ object FileSource {
}
}
- def simpleEnum(fname:String):(Int)=>String = simpleEnum(fname,1,0);
+ def simpleEnum(fname:String):(Int)=>String = simpleEnum(fname,1,0)
trait Opts extends DataSource.Opts {
- val localDir:String = ""
- var fnames:List[(Int)=>String] = null
- var lookahead = 2
- var sampleFiles = 1.0f
+ val localDir:String = ""
+ var fnames:List[(Int)=>String] = null
+ var lookahead = 2
+ var sampleFiles = 1.0f
var nstart:Int = 0
var nend:Int = 0
var dorows:Boolean = false
var order:Int = 0 // 0 = sequential order, 1 = random
- var eltsPerSample = 10;
+ var eltsPerSample = 10
var throwMissing:Boolean = false
- var traceFileSource = 0;
+ var traceFileSource = 0
}
class Options extends Opts {}
diff --git a/src/main/scala/BIDMach/datasources/IteratorSource.scala b/src/main/scala/BIDMach/datasources/IteratorSource.scala
index dbefe36f..6f5513f5 100755
--- a/src/main/scala/BIDMach/datasources/IteratorSource.scala
+++ b/src/main/scala/BIDMach/datasources/IteratorSource.scala
@@ -9,117 +9,117 @@ import java.io._
/**
* Datasource designed to work with Iterators as provided by Spark.
- * We assume the iterator returns pairs from a Sequencefile of (StringWritable, MatIO)
+ * We assume the iterator returns pairs from a Sequencefile of (StringWritable, MatIO)
*/
class IteratorSource(override val opts:IteratorSource.Opts = new IteratorSource.Options) extends DataSource(opts) {
- var sizeMargin = 0f;
- var blockSize = 0;
- var samplesDone = 0;
- var nmats = 1;
- omats = null;
+ var sizeMargin = 0f
+ var blockSize = 0
+ var samplesDone = 0
+ var nmats = 1
+ omats = null
var fprogress:Float = 0
- var inMats:Array[Mat] = null;
- var inFname:Array[String] = null;
- @transient var iter:Iterator[(AnyRef, MatIOtrait)] = null;
- var nblocks = -1;
- var iblock = 0;
+ var inMats:Array[Mat] = null
+ var inFname:Array[String] = null
+ @transient var iter:Iterator[(AnyRef, MatIOtrait)] = null
+ var nblocks = -1
+ var iblock = 0
def reset = {
- samplesDone = 0;
- iblock = 0;
+ samplesDone = 0
+ iblock = 0
}
def init = {
- samplesDone = 0;
- iter = opts.iter;
- blockSize = opts.batchSize;
- iterNext;
- nmats = inMats.length;
- inFname = new Array[String](nmats);
- omats = new Array[Mat](nmats);
+ samplesDone = 0
+ iter = opts.iter
+ blockSize = opts.batchSize
+ iterNext
+ nmats = inMats.length
+ inFname = new Array[String](nmats)
+ omats = new Array[Mat](nmats)
for (i <- 0 until nmats) {
- val mm = inMats(i);
- val (nr, nc) = if (opts.dorows) (blockSize, mm.ncols) else (mm.nrows, blockSize);
+ val mm = inMats(i)
+ val (nr, nc) = if (opts.dorows) (blockSize, mm.ncols) else (mm.nrows, blockSize)
omats(i) = mm match {
- case mf:FMat => FMat.newOrCheckFMat(nr, nc, null, GUID, i, ((nr*1L) << 32) + nc, "IteratorSource_FMat".##);
- case mi:IMat => IMat.newOrCheckIMat(nr, nc, null, GUID, i, ((nr*1L) << 32) + nc, "IteratorSource_IMat".##);
- case md:DMat => DMat.newOrCheckDMat(nr, nc, null, GUID, i, ((nr*1L) << 32) + nc, "IteratorSource_DMat".##);
- case ms:SMat => SMat.newOrCheckSMat(nr, nc, nc * opts.eltsPerSample, null, GUID, i, ((nr*1L) << 32) + nc, "IteratorSource_SMat".##);
+ case mf:FMat => FMat.newOrCheckFMat(nr, nc, null, GUID, i, ((nr*1L) << 32) + nc, "IteratorSource_FMat".##)
+ case mi:IMat => IMat.newOrCheckIMat(nr, nc, null, GUID, i, ((nr*1L) << 32) + nc, "IteratorSource_IMat".##)
+ case md:DMat => DMat.newOrCheckDMat(nr, nc, null, GUID, i, ((nr*1L) << 32) + nc, "IteratorSource_DMat".##)
+ case ms:SMat => SMat.newOrCheckSMat(nr, nc, nc * opts.eltsPerSample, null, GUID, i, ((nr*1L) << 32) + nc, "IteratorSource_SMat".##)
}
}
}
- def next:Array[Mat] = {;
- var donextfile = false;
- var todo = blockSize;
- val featType = opts.featType;
- val threshold = opts.featThreshold;
+ def next:Array[Mat] = {
+ var donextfile = false
+ var todo = blockSize
+ val featType = opts.featType
+ val threshold = opts.featThreshold
while (todo > 0) {
- var samplesTodo = samplesDone;
- var matqnr = 0
- for (i <- 0 until nmats) {
- val matq = inMats(i);
- if (matq.asInstanceOf[AnyRef] != null) {
- matqnr = if (opts.dorows) matq.nrows else matq.ncols;
- samplesTodo = math.min(samplesDone + todo, matqnr);
- val off = Mat.oneBased
- if (opts.dorows) {
- val nc = omats(i).ncols;
- val nr = samplesTodo - samplesDone + blockSize - todo - off;
- omats(i) = checkCaches(nr, nc, omats(i), GUID, i); // otherwise, check for a cached copy
- omats(i) = matq.rowslice(samplesDone, samplesTodo, omats(i), blockSize - todo);
- } else {
- val nr = omats(i).nrows;
- val nc = samplesTodo - samplesDone + blockSize - todo - off;
- omats(i) = checkCaches(nr, nc, omats(i), GUID, i);
- omats(i) = matq.colslice(samplesDone, samplesTodo, omats(i), blockSize - todo);
- }
+ var samplesTodo = samplesDone
+ var matqnr = 0
+ for (i <- 0 until nmats) {
+ val matq = inMats(i)
+ if (matq.asInstanceOf[AnyRef] != null) {
+ matqnr = if (opts.dorows) matq.nrows else matq.ncols
+ samplesTodo = math.min(samplesDone + todo, matqnr)
+ val off = Mat.oneBased
+ if (opts.dorows) {
+ val nc = omats(i).ncols
+ val nr = samplesTodo - samplesDone + blockSize - todo - off;
+ omats(i) = checkCaches(nr, nc, omats(i), GUID, i); // otherwise, check for a cached copy
+ omats(i) = matq.rowslice(samplesDone, samplesTodo, omats(i), blockSize - todo);
+ } else {
+ val nr = omats(i).nrows
+ val nc = samplesTodo - samplesDone + blockSize - todo - off
+ omats(i) = checkCaches(nr, nc, omats(i), GUID, i);
+ omats(i) = matq.colslice(samplesDone, samplesTodo, omats(i), blockSize - todo)
+ }
- if (featType == 0) {
- min(1f, omats(i), omats(i));
- } else if (featType == 2) {
- omats(i) ~ omats(i) >= threshold;
- }
- if (matqnr == samplesTodo) donextfile = true;
- } else {
- donextfile = true;
- }
- }
- todo -= samplesTodo - samplesDone;
- if (donextfile) {
- samplesDone = 0;
- if (iterHasNext) {
- iterNext();
- }
- donextfile = false;
- } else {
- samplesDone = samplesTodo;
- }
- fprogress = samplesDone*1f / matqnr;
+ if (featType == 0) {
+ min(1f, omats(i), omats(i))
+ } else if (featType == 2) {
+ omats(i) ~ omats(i) >= threshold
+ }
+ if (matqnr == samplesTodo) donextfile = true
+ } else {
+ donextfile = true
+ }
+ }
+ todo -= samplesTodo - samplesDone
+ if (donextfile) {
+ samplesDone = 0
+ if (iterHasNext) {
+ iterNext()
+ }
+ donextfile = false
+ } else {
+ samplesDone = samplesTodo
+ }
+ fprogress = samplesDone*1f / matqnr
}
- omats;
+ omats
}
def progress:Float = {
if (nblocks > 0) {
- (fprogress + iblock-1)/nblocks;
+ (fprogress + iblock-1)/nblocks
} else 0f
}
def hasNext:Boolean = {
- val matq = inMats(0);
- val matqnr = if (opts.dorows) matq.nrows else matq.ncols;
- val ihn = iter.hasNext;
+ val matq = inMats(0)
+ val matqnr = if (opts.dorows) matq.nrows else matq.ncols
+ val ihn = iter.hasNext
if (! ihn && iblock > 0) {
- nblocks = iblock;
+ nblocks = iblock
}
- (ihn || (matqnr - samplesDone) == 0);
+ (ihn || (matqnr - samplesDone) == 0)
}
def iterHasNext:Boolean = {
- iblock += 1;
- iter.hasNext;
+ iblock += 1
+ iter.hasNext
}
def iterNext() = {
@@ -139,21 +139,21 @@ class IteratorSource(override val opts:IteratorSource.Opts = new IteratorSource.
if (nr == out.nrows && nc == out.ncols) {
out
} else {
- out match {
- case a:FMat => FMat.newOrCheckFMat(nr, nc, null, GUID, i, ((nr*1L) << 32) + nc, "IteratorSource_FMat".##);
- case a:IMat => IMat.newOrCheckIMat(nr, nc, null, GUID, i, ((nr*1L) << 32) + nc, "IteratorSource_IMat".##);
- case a:DMat => DMat.newOrCheckDMat(nr, nc, null, GUID, i, ((nr*1L) << 32) + nc, "IteratorSource_DMat".##);
- case a:SMat => SMat.newOrCheckSMat(nr, nc, a.nnz, null, GUID, i, ((nr*1L) << 32) + nc, "IteratorSource_SMat".##);
- }
+ out match {
+ case a:FMat => FMat.newOrCheckFMat(nr, nc, null, GUID, i, ((nr*1L) << 32) + nc, "IteratorSource_FMat".##)
+ case a:IMat => IMat.newOrCheckIMat(nr, nc, null, GUID, i, ((nr*1L) << 32) + nc, "IteratorSource_IMat".##)
+ case a:DMat => DMat.newOrCheckDMat(nr, nc, null, GUID, i, ((nr*1L) << 32) + nc, "IteratorSource_DMat".##)
+ case a:SMat => SMat.newOrCheckSMat(nr, nc, a.nnz, null, GUID, i, ((nr*1L) << 32) + nc, "IteratorSource_SMat".##)
+ }
}
}
override def close = {
- inMats = null;
- omats = null
- opts.iter = null
- iter = null
+ inMats = null
+ omats = null
+ opts.iter = null
+ iter = null
// stop = true
}
}
@@ -162,14 +162,14 @@ class IteratorSource(override val opts:IteratorSource.Opts = new IteratorSource.
object IteratorSource {
def apply(opts:IteratorSource.Opts):IteratorSource = {
- new IteratorSource(opts);
+ new IteratorSource(opts)
}
trait Opts extends DataSource.Opts {
- var nmats = 1;
+ var nmats = 1
var dorows:Boolean = false
- @transient var iter:Iterator[Tuple2[AnyRef, MatIOtrait]] = null;
- var eltsPerSample = 10;
+ @transient var iter:Iterator[Tuple2[AnyRef, MatIOtrait]] = null
+ var eltsPerSample = 10
var throwMissing:Boolean = false;
}
diff --git a/src/main/scala/BIDMach/datasources/MatSource.scala b/src/main/scala/BIDMach/datasources/MatSource.scala
index e45c167c..7551ef75 100755
--- a/src/main/scala/BIDMach/datasources/MatSource.scala
+++ b/src/main/scala/BIDMach/datasources/MatSource.scala
@@ -11,7 +11,7 @@ class MatSource(var mats:Array[Mat], override val opts:MatSource.Opts = new MatS
var there = 0
var blockSize = 0
var totalSize = 0
- var umat:Mat = null;
+ var umat:Mat = null
def init = {
sizeMargin = opts.sizeMargin
@@ -38,16 +38,16 @@ class MatSource(var mats:Array[Mat], override val opts:MatSource.Opts = new MatS
def next:Array[Mat] = {
here = math.min(here+blockSize, mats(0).ncols)
there = math.min(here+blockSize, mats(0).ncols)
- for (i <- 0 until mats.length) {
- if (there - here == blockSize) {
- fullmats(i) = mats(i).colslice(here, there, fullmats(i))
- omats(i) = fullmats(i)
- } else {
- endmats(i) = mats(i).colslice(here, there, endmats(i))
- omats(i) = endmats(i)
- }
- }
- omats
+ for (i <- 0 until mats.length) {
+ if (there - here == blockSize) {
+ fullmats(i) = mats(i).colslice(here, there, fullmats(i))
+ omats(i) = fullmats(i)
+ } else {
+ endmats(i) = mats(i).colslice(here, there, endmats(i))
+ omats(i) = endmats(i)
+ }
+ }
+ omats
}
def hasNext:Boolean = {
@@ -61,7 +61,7 @@ class MatSource(var mats:Array[Mat], override val opts:MatSource.Opts = new MatS
newmats(i) = mats(i)
}
for (i <- mats.length until n+1) {
- newmats(i) = zeros(dim, mats(0).ncols)
+ newmats(i) = zeros(dim, mats(0).ncols)
}
mats = newmats
}
@@ -69,7 +69,7 @@ class MatSource(var mats:Array[Mat], override val opts:MatSource.Opts = new MatS
override def putBack(tmats:Array[Mat],n:Int):Unit = {
for (i <- 1 to n)
- tmats(i).colslice(0, tmats(i).ncols, mats(i), here, true);
+ tmats(i).colslice(0, tmats(i).ncols, mats(i), here, true)
}
def progress = {
diff --git a/src/main/scala/BIDMach/datasources/SFileSource.scala b/src/main/scala/BIDMach/datasources/SFileSource.scala
index 6b3f67d8..10b9cced 100755
--- a/src/main/scala/BIDMach/datasources/SFileSource.scala
+++ b/src/main/scala/BIDMach/datasources/SFileSource.scala
@@ -12,7 +12,7 @@ import java.io._
* The IMats are 3-column with column, row indices and integer values.
* This format allows dynamic construction of the SMat with a specified bound on the max row index,
* and with specified featurization (e.g. clipped to 1, linear, logarithmic etc.).
- * fcounts is an IMat specifying the numbers of rows to use for each input block.
+ * fcounts is an IMat specifying the numbers of rows to use for each input block.
*/
class SFileSourcev1(override val opts:SFileSource.Opts = new SFileSource.Options) extends FileSource(opts) {
@@ -58,8 +58,8 @@ class SFileSourcev1(override val opts:SFileSource.Opts = new SFileSource.Options
val threshold = opts.featThreshold
var j = 0
while (j < nfiles) {
- inptrs(j, 0) = binFind(rowno, inmat(j))
- j += 1
+ inptrs(j, 0) = binFind(rowno, inmat(j))
+ j += 1
}
var irow = rowno
while (irow < nrow) {
@@ -108,8 +108,8 @@ class SFileSourcev1(override val opts:SFileSource.Opts = new SFileSource.Options
var maxv = 0
for (i <- 0 until matq.length) {
if (matq(i).asInstanceOf[AnyRef] != null) {
- val mat = matq(i).asInstanceOf[IMat]
- maxv = math.max(maxv, mat(mat.nrows-1,0))
+ val mat = matq(i).asInstanceOf[IMat]
+ maxv = math.max(maxv, mat(mat.nrows-1,0))
}
}
maxv
@@ -137,35 +137,35 @@ class SFileSourcev1(override val opts:SFileSource.Opts = new SFileSource.Options
var todo = opts.batchSize
flushMat(omats(0))
while (todo > 0 && fileno < nend) {
- var nrow = rowno
- val filex = fileno % math.max(1, opts.lookahead)
- if (opts.lookahead > 0) {
- while (ready(filex) < fileno) Thread.sleep(1); // `yield`
- } else {
+ var nrow = rowno
+ val filex = fileno % math.max(1, opts.lookahead)
+ if (opts.lookahead > 0) {
+ while (ready(filex) < fileno) Thread.sleep(1); // `yield`
+ } else {
fetch
}
- val spm = spmax(matqueue(filex)) + 1
-// println("spm %d" format spm)
- nrow = math.min(rowno + todo, spm)
- val matq = matqueue(filex)
- if (matq(0).asInstanceOf[AnyRef] != null) {
-// println("Here %d %d %d" format(rowno, nrow, todo))
- omats(0) = sprowslice(matq, rowno, nrow, omats(0), opts.batchSize - todo)
- if (rowno + todo >= spm) donextfile = true
- } else {
- if (opts.throwMissing) {
- throw new RuntimeException("Missing file "+fileno)
- }
- donextfile = true
- }
- todo -= nrow - rowno
- if (donextfile) {
- rowno = 0;
- fileno += 1;
- donextfile = false
- } else {
- rowno = nrow;
- }
+ val spm = spmax(matqueue(filex)) + 1
+// println("spm %d" format spm)
+ nrow = math.min(rowno + todo, spm)
+ val matq = matqueue(filex)
+ if (matq(0).asInstanceOf[AnyRef] != null) {
+// println("Here %d %d %d" format(rowno, nrow, todo))
+ omats(0) = sprowslice(matq, rowno, nrow, omats(0), opts.batchSize - todo)
+ if (rowno + todo >= spm) donextfile = true
+ } else {
+ if (opts.throwMissing) {
+ throw new RuntimeException("Missing file "+fileno)
+ }
+ donextfile = true
+ }
+ todo -= nrow - rowno
+ if (donextfile) {
+ rowno = 0
+ fileno += 1
+ donextfile = false
+ } else {
+ rowno = nrow
+ }
}
if (todo > 0) {
fillup(omats(0), todo)
@@ -233,28 +233,28 @@ class SFileSource(override val opts:SFileSource.Opts = new SFileSource.Options)
val addConstFeat = opts.addConstFeat
val featType = opts.featType
val threshold = opts.featThreshold
- var icol = colno;
+ var icol = colno
while (icol < endcol) {
- var j = 0;
+ var j = 0
while (j < nfiles) {
- val mat = inmat(j).asInstanceOf[SMat];
- var k = mat.jc(icol) - ioff;
- var lastk = mat.jc(icol+1) - ioff;
- val xoff = innz - k;
+ val mat = inmat(j).asInstanceOf[SMat]
+ var k = mat.jc(icol) - ioff
+ var lastk = mat.jc(icol+1) - ioff
+ val xoff = innz - k
// println("here %d %d %d %d %d" format (k, mat.nrows, mat.ncols, lims.length, j))
while (k < lastk && mat.ir(k)-ioff < lims(j)) {
if (xoff + k >= omat.ir.length) {
- throw new RuntimeException("SFileSource index out of range. Try increasing opts.eltsPerSample");
+ throw new RuntimeException("SFileSource index out of range. Try increasing opts.eltsPerSample")
}
- omat.ir(xoff + k) = mat.ir(k) + offsets(j);
+ omat.ir(xoff + k) = mat.ir(k) + offsets(j)
omat.data(xoff + k) = if (featType == 0) {
- 1f;
+ 1f
} else if (featType == 1) {
- mat.data(k) ;
+ mat.data(k)
} else {
if (mat.data(k).toDouble >= threshold.dv) 1f else 0f;
}
- k += 1;
+ k += 1
}
innz = xoff + k
j += 1
@@ -273,10 +273,10 @@ class SFileSource(override val opts:SFileSource.Opts = new SFileSource.Options)
}
def spmax(matq:Array[Mat]):Int = {
- var maxv = 0;
+ var maxv = 0
for (i <- 0 until matq.length) {
if (matq(i).asInstanceOf[AnyRef] != null) {
- maxv = matq(i).ncols
+ maxv = matq(i).ncols
}
}
maxv - 1
@@ -304,37 +304,37 @@ class SFileSource(override val opts:SFileSource.Opts = new SFileSource.Options)
var todo = opts.batchSize
flushMat(omats(0))
while (todo > 0 && fileno < nend) {
- var nrow = rowno
- val filex = fileno % math.max(1, opts.lookahead)
- if (opts.lookahead > 0) {
- while (ready(filex) < fileno) Thread.sleep(1);// `yield`
- } else {
- fetch
- }
- val spm = spmax(matqueue(filex)) + 1
-// println("spm %d" format spm)
- nrow = math.min(rowno + todo, spm)
- val matq = matqueue(filex)
- if (matq(0).asInstanceOf[AnyRef] != null) {
-// println("Here %d %d %d %d" format(rowno, nrow, todo, spm))
- omats(0) = spcolslice(matq, rowno, nrow, omats(0), opts.batchSize - todo)
- if (rowno + todo >= spm) donextfile = true
- } else {
- if (opts.throwMissing) {
- throw new RuntimeException("Missing file "+fileno)
- }
- donextfile = true;
- }
- todo -= nrow - rowno
- fprogress = nrow*1f / spm
- if (donextfile) {
- rowno = 0;
- fileno += 1;
- fprogress = 0
- donextfile = false
- } else {
- rowno = nrow
- }
+ var nrow = rowno
+ val filex = fileno % math.max(1, opts.lookahead)
+ if (opts.lookahead > 0) {
+ while (ready(filex) < fileno) Thread.sleep(1);// `yield`
+ } else {
+ fetch
+ }
+ val spm = spmax(matqueue(filex)) + 1
+// println("spm %d" format spm)
+ nrow = math.min(rowno + todo, spm)
+ val matq = matqueue(filex)
+ if (matq(0).asInstanceOf[AnyRef] != null) {
+// println("Here %d %d %d %d" format(rowno, nrow, todo, spm))
+ omats(0) = spcolslice(matq, rowno, nrow, omats(0), opts.batchSize - todo)
+ if (rowno + todo >= spm) donextfile = true
+ } else {
+ if (opts.throwMissing) {
+ throw new RuntimeException("Missing file "+fileno)
+ }
+ donextfile = true
+ }
+ todo -= nrow - rowno
+ fprogress = nrow*1f / spm
+ if (donextfile) {
+ rowno = 0
+ fileno += 1
+ fprogress = 0
+ donextfile = false
+ } else {
+ rowno = nrow
+ }
}
if (todo > 0) {
fillup(omats(0), todo)
@@ -350,7 +350,7 @@ class SFileSource(override val opts:SFileSource.Opts = new SFileSource.Options)
object SFileSource {
trait Opts extends FileSource.Opts {
- var fcounts:IMat = null
+ var fcounts:IMat = null
}
class Options extends Opts {}
diff --git a/src/main/scala/BIDMach/datasources/StackedSource.scala b/src/main/scala/BIDMach/datasources/StackedSource.scala
index f37a8fd3..20a4d884 100755
--- a/src/main/scala/BIDMach/datasources/StackedSource.scala
+++ b/src/main/scala/BIDMach/datasources/StackedSource.scala
@@ -10,43 +10,43 @@ class StackedDS(val s1:DataSource, val s2:DataSource,
omats = null
def init = {
- s1.opts.batchSize = opts.batchSize;
- s2.opts.batchSize = opts.batchSize;
- s1.init;
- s2.init;
- val mats1 = s1.omats;
- val mats2 = s2.omats;
- omats = new Array[Mat](mats1.length + mats2.length);
+ s1.opts.batchSize = opts.batchSize
+ s2.opts.batchSize = opts.batchSize
+ s1.init
+ s2.init
+ val mats1 = s1.omats
+ val mats2 = s2.omats
+ omats = new Array[Mat](mats1.length + mats2.length)
for (i <- 0 until mats1.length) {
- omats(i) = mats1(i);
+ omats(i) = mats1(i)
}
for (i <- 0 until mats2.length) {
- omats(i+mats1.length) = mats2(i);
+ omats(i+mats1.length) = mats2(i)
}
}
def nmats = omats.length
def reset = {
- s1.reset;
- s2.reset;
+ s1.reset
+ s2.reset
}
def next:Array[Mat] = {
- val mats1 = s1.next;
- val mats2 = s2.next;
- val fs1 = s1.asInstanceOf[FileSource];
- val fs2 = s2.asInstanceOf[FileSource];
+ val mats1 = s1.next
+ val mats2 = s2.next
+ val fs1 = s1.asInstanceOf[FileSource]
+ val fs2 = s2.asInstanceOf[FileSource]
if (fs1.fileno != fs2.fileno || fs1.rowno != fs2.rowno) {
throw new RuntimeException("Data source skew %d %d %d %d" format (fs1.fileno, fs2.fileno, fs1.rowno, fs2.rowno))
}
for (i <- 0 until mats1.length) {
- omats(i) = mats1(i);
+ omats(i) = mats1(i)
}
for (i <- 0 until mats2.length) {
- omats(i+mats1.length) = mats2(i);
+ omats(i+mats1.length) = mats2(i)
}
- omats;
+ omats
}
def hascol(mats:Array[Mat], iptr:Int, ss:DataSource):Boolean = {
diff --git a/src/main/scala/BIDMach/mixins/Mixin.scala b/src/main/scala/BIDMach/mixins/Mixin.scala
index 86fc4cd3..def79b4f 100755
--- a/src/main/scala/BIDMach/mixins/Mixin.scala
+++ b/src/main/scala/BIDMach/mixins/Mixin.scala
@@ -21,7 +21,7 @@ abstract class Mixin(val opts:Mixin.Opts = new Mixin.Options) extends Serializab
}
object Mixin {
- trait Opts extends BIDMat.Opts {}
-
- class Options extends Opts {}
+ trait Opts extends BIDMat.Opts {}
+
+ class Options extends Opts {}
}
diff --git a/src/main/scala/BIDMach/mixins/Regularizer.scala b/src/main/scala/BIDMach/mixins/Regularizer.scala
index eef41065..e9cba0c3 100755
--- a/src/main/scala/BIDMach/mixins/Regularizer.scala
+++ b/src/main/scala/BIDMach/mixins/Regularizer.scala
@@ -7,7 +7,7 @@ import BIDMach.models._
class L1Regularizer(override val opts:L1Regularizer.Opts = new L1Regularizer.Options) extends Mixin(opts) {
def compute(mats:Array[Mat], step:Float) = {
for (i <- 0 until opts.r1nmats) {
- val v = if (opts.reg1weight.ncols == 1) - opts.reg1weight else - opts.reg1weight(?,i);
+ val v = if (opts.reg1weight.ncols == 1) - opts.reg1weight else - opts.reg1weight(?,i)
updatemats(i) ~ updatemats(i) + (sign(modelmats(i)) ∘ v)
}
}
@@ -23,10 +23,10 @@ class L1Regularizer(override val opts:L1Regularizer.Opts = new L1Regularizer.Opt
class L2Regularizer(override val opts:L2Regularizer.Opts = new L2Regularizer.Options) extends Mixin(opts) {
def compute(mats:Array[Mat], step:Float) = {
- for (i <- 0 until opts.r2nmats) {
- val v = if (opts.reg2weight.ncols == 1) - opts.reg2weight else - opts.reg2weight(?,i);
- updatemats(i) ~ updatemats(i) + (modelmats(i) ∘ v)
- }
+ for (i <- 0 until opts.r2nmats) {
+ val v = if (opts.reg2weight.ncols == 1) - opts.reg2weight else - opts.reg2weight(?,i)
+ updatemats(i) ~ updatemats(i) + (modelmats(i) ∘ v)
+ }
}
def score(mats:Array[Mat], step:Float):FMat = {
@@ -40,12 +40,12 @@ class L2Regularizer(override val opts:L2Regularizer.Opts = new L2Regularizer.Opt
object L1Regularizer {
- trait Opts extends Mixin.Opts {
- var reg1weight:FMat = 1e-7f
- var r1nmats:Int = 1
- }
-
- class Options extends Opts {}
+ trait Opts extends Mixin.Opts {
+ var reg1weight:FMat = 1e-7f
+ var r1nmats:Int = 1
+ }
+
+ class Options extends Opts {}
}
object L2Regularizer {
diff --git a/src/main/scala/BIDMach/models/BayesNet.scala b/src/main/scala/BIDMach/models/BayesNet.scala
index 1318da41..0e1f19b2 100755
--- a/src/main/scala/BIDMach/models/BayesNet.scala
+++ b/src/main/scala/BIDMach/models/BayesNet.scala
@@ -1,972 +1,972 @@
-package BIDMach.models
-
-import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
-import BIDMat.MatFunctions._
-import BIDMat.SciFunctions._
-import BIDMach.datasources._
-import BIDMach.updaters._
-import BIDMach._
-
-import java.text.NumberFormat
-import edu.berkeley.bid.CUMACH._
-import scala.collection.mutable._
-
-/**
- * This code is for Gibbs sampling on Bayesian networks or factor graphs. It assumes that there
- * exists partially observed data generated by some distribution P(X | Z, \Theta). The goal is to
- * perform sampling of Z (the hidden data) to figure out a "good" \Theta value. Here, \Theta encodes
- * the CPTs for Bayesian networks, or the factor tables for factor graphs. That there is also a SAME
- * parameter to replicate the data, as well as an adjustable Dirichlet prior.
- *
- * @param dag For a Bayesian network, this is an adjacency matrix with a 1 at (i,j) iff node i has
- * an edge TOWARDS node j. For a factor graph, (i,j) = 1 iff node i is in factor j.
- * @param states Indicates the number of states for each node, one per line.
- * @param isFactorModel If true, then we use a FactorGraph rather than a Graph and don't normalize.
- * @param opts The options from the BayesNet learner, e.g., the number of passes over the data.
- */
-class BayesNet(val dag:Mat,
- val states:Mat,
- val isFactorModel:Boolean,
- override val opts:BayesNet.Opts = new BayesNet.Options) extends Model(opts) {
-
- // Miscellaneous stuff that we should probably record.
- val randSeed:Int = 0
-
- var mm:Mat = null // Copy of the cpt, but be careful of aliasing. We keep this normalized.
- var cptOffset:Mat = null // Holds global variable offsets (into the mm = cpt) of each variable.
- var cptOffsetSAME:Mat = null // A vertically stacked version of cptOffset, for SAME.
- var graph:Graph = null // Data structure representing the DAG, "columns = parents."
- var iproject:Mat = null // Local CPT offsets; we do "usertrans * iproject" to get the offsets.
- var iprojectBlockedSAME:Mat = null // A diagonal, blocked version of iproject, for SAME local CPT offsets.
- var pproject:Mat = null // Parent tracking matrix, for combining probabilities together.
- var statesPerNode:Mat = null // Variables can have an arbitrary number of states.
- var statesPerNodeSAME:Mat = null // A vertically stacked version of statesPerNode, for SAME.
- var colorInfo:Array[ColorGroup] = null // Gives us, for each color, a colorStuff class (of arrays).
- var zeroMap:HashMap[(Int,Int),Mat] = null // Map from (nr,nc) -> a zero matrix (to avoid allocation).
- var randMap:HashMap[(Int,Int),Mat] = null // Map from (nr,nc) -> a rand matrix (to avoid allocation).
- var normMat:Mat = null // Normalizes a counts vector K by doing K / (K.t * normMat *^ normMat).t.
- var useGPUnow:Boolean = false // Checks (during initialization only) if we're using GPUs or not.
- var batchSize:Int = -1 // Holds the batchSize, which we use for some colorInfo matrices.
-
- var counts1:Mat = null // This will accumulate counts that we use for the actual distribution.
- var counts2:Mat = null // This will be the counts that we use for the *previous* step that we SUBTRACT.
- var counts3:Mat = null // This is like counts1, but WITH Dirichlets!
-
- var dirichletPrior:Mat = null // The prior we use to smooth the distribution. If all 1s, SAME will keep it the same.
- var dirichletScale:Mat = null // The scale we use as part of the prior (typically all 1s).
- var onesSAMEvector:Mat = null // This the (g)iones(opts.copiesForSAME,1), for certain special uses.
-
- // Extra debugging/info gathering for the Koller data only!
- val real1 = .6 on .4 on .7 on .3 on .3 on .4 on .3 on .9 on .08 on .02 on .05 on .25 on .7
- val real2 = .5 on .3 on .2 on .95 on .05 on .2 on .8 on .1 on .9 on .4 on .6 on .99 on .01
- val real = real1 on real2
-
- /**
- * Performs a series of initialization steps.
- *
- * - Builds iproject/pproject for local offsets and computing probabilities, respectively.
- * - For each color group, determine some necessary matrices for uupdate later.
- * - Build the CPT, which is actually counts, not probabilities. I initialize it randomly.
- *
- * Note that the randomization of the input data to be put back in the data is done in uupdate.
- */
- override def init() = {
- // Some stuff for experiments, predictions, and benchmarking.
- setseed(randSeed);
- println("randSeed = " + randSeed);
- runtimes = zeros(1,6);
- useGPUnow = opts.useGPU && (Mat.hasCUDA > 0)
-
- // Establish the states per node, the (colored) Graph data structure, and its projection matrices.
- onesSAMEvector = if (useGPUnow) giones(opts.copiesForSAME,1) else iones(opts.copiesForSAME,1)
- statesPerNode = IMat(states)
- statesPerNodeSAME = kron(onesSAMEvector, IMat(statesPerNode))
- if (isFactorModel) {
- graph = new FactorGraph(dag, opts.dim, statesPerNode)
- } else {
- graph = new Graph(dag, opts.dim, statesPerNode)
- }
- graph.color
- iproject = if (useGPUnow) GSMat((graph.iproject).t) else (graph.iproject).t
- pproject = if (useGPUnow) GSMat(graph.pproject) else graph.pproject
- iprojectBlockedSAME = createBlockedDiagonal(iproject)
-
- // Build the CPT. To avoid div-by-zero errors, initialize randomly.
- val numSlotsInCpt = IMat(exp(ln(FMat(statesPerNode).t) * SMat(pproject)) + 1e-4)
- cptOffset = izeros(graph.nFactor, 1)
- cptOffset(1 until graph.nFactor) = cumsum(numSlotsInCpt)(0 until graph.nFactor-1)
- cptOffset = convertMat(cptOffset)
- cptOffsetSAME = kron(onesSAMEvector,cptOffset)
- val lengthCPT = sum(numSlotsInCpt).dv.toInt
- val cpt = convertMat(rand(lengthCPT,1) + opts.initSmoothFactor)
-
- // To finish CPT/counts, we normalize using a "factored form" of normalizing.
- if (!isFactorModel) {
- normMat = getNormConstMatrix(lengthCPT)
- cpt <-- ( cpt / (cpt.t * normMat *^ normMat).t )
- println("cpt.t: " + cpt.t)
- }
- setmodelmats(new Array[Mat](1))
- modelmats(0) = cpt
- mm = modelmats(0)
- updatemats = new Array[Mat](1)
- updatemats(0) = mm.zeros(mm.nrows, mm.ncols)
-
- // For each color group, pre-compute most relevant matrices we need later (this does a lot!).
- colorInfo = new Array[ColorGroup](graph.ncolors)
- for (c <- 0 until graph.ncolors) {
- colorInfo(c) = computeAllColorGroupInfo(c)
- }
- zeroMap = new HashMap[(Int,Int),Mat]()
- randMap = new HashMap[(Int,Int),Mat]()
-
- // Finally, create/convert a few matrices, reset some variables, and add some debugging info.
- counts1 = mm.zeros(mm.length, 1)
- counts2 = mm.zeros(mm.length, 1)
- counts3 = mm.zeros(mm.length, 1)
- dirichletPrior = mm.ones(mm.length, 1)
- dirichletScale = mm.ones(mm.length, 1)
- statesPerNode = convertMat(statesPerNode)
- batchSize = -1
- }
-
- /**
- * Calls a uupdate/mupdate sequence to sample values and to update parameters. We compute
- * counts2 here (counts to subtract later) because it relies on gmats(1), which gets overrided
- * in uupdate.
- *
- * @param gmats An array of matrices that contains desired mini-batch data: gmats(0) represents
- * the original, raw data with 0s = unknown. The sampled data is in gmats(1), which we
- * later refer to as 'user'. Here, everything is shifted by -1 from gmats(0), and unknown
- * values are probabilistically assigned to be one of the eligible values.
- * @param ipass The current pass over the data.
- * @param here The total number of samples (columns) of the data seen thus far.
- */
- override def dobatch(gmats:Array[Mat], ipass:Int, here:Long) = {
- if (ipass > 0) {
- val index = int(cptOffsetSAME + (gmats(1).t * iprojectBlockedSAME).t)
- val linearIndices = index(?)
- counts2 <-- float(accum(linearIndices, 1, counts2.length, 1))
- }
- uupdate(gmats(0), gmats(1), ipass)
- mupdate(gmats(0), gmats(1), ipass)
- }
-
- /** Calls a uupdate/evalfun sequence. Known data is in gmats(0), sampled data is in gmats(1). */
- override def evalbatch(gmats:Array[Mat], ipass:Int, here:Long):FMat = {
- //println("runtimes: " + runtimes)
- return FMat(0);
- }
-
- /**
- * Computes an update for the conditional probability table by sampling each variable once (for now).
- *
- * In the first ipass, it randomizes the user matrix except for those values are already known from
- * sdata. It also establishes various matrices to be put in the colorInfo array or the hash maps (for
- * caching purposes). For each data batch, it iterates through color groups and samples in parallel.
- *
- * @param sdata The sparse data matrix for this batch (0s = unknowns). The user matrix shifts it by -1.
- * @param user A data matrix with the same dimensions as sdata, and whose columns represent various iid
- * assignments to all the variables. The known values of sdata are inserted in the same spots in this
- * matrix, but the unknown values are randomized to be in {0,1,...,k}.
- * @param ipass The current pass over the full data source (not the Gibbs sampling iteration number).
- */
- def uupdate(sdata:Mat, user:Mat, ipass:Int):Unit = {
-
- // For SAME, we stack matrices. If kron is missing (type) cases, add them in MatFunctions.scala.
- val stackedData = kron(onesSAMEvector, sdata)
- val select = stackedData > 0
-
- // For the first pass, we need to create a lot of matrices that rely on knowledge of the batch size.
- if (ipass == 0) {
- establishMatrices(sdata.ncols)
- val state = convertMat(rand(sdata.nrows * opts.copiesForSAME, sdata.ncols))
- state <-- float( min( int(statesPerNodeSAME ∘ state), int(statesPerNodeSAME-1) ) )
- user ~ (select ∘ (stackedData-1)) + ((1-select) ∘ state)
- }
-
- // Now back to normal from prediction accuracy; usertrans is still user.t.
- val t0 = toc;
- val usertrans = user.t;
- val t1 = toc;
- runtimes(0) += t1 - t0;
-
- for (c <- 0 until graph.ncolors) {
-
- // Prepare data by establishing appropriate offset matrices for various CPT blocks. First, clear out usertrans.
- val t2 = toc;
- usertrans(?, colorInfo(c).idsInColorSAME) = zeroMap( (usertrans.nrows, colorInfo(c).numNodes*opts.copiesForSAME) )
- val offsetMatrix = usertrans * colorInfo(c).iprojectSlicedSAME + (colorInfo(c).globalOffsetVectorSAME).t
- val replicatedOffsetMatrix = int(offsetMatrix * colorInfo(c).replicationMatrixSAME) + colorInfo(c).strideVectorSAME
- val logProbs = ln(mm(replicatedOffsetMatrix))
- val nonExponentiatedProbs = (logProbs * colorInfo(c).combinationMatrixSAME).t
- val t3 = toc;
- runtimes(1) += t3 - t2;
-
- // Establish matrices needed for the multinomial sampling
- val keys = if (user.ncols == batchSize) colorInfo(c).keysMatrix else colorInfo(c).keysMatrixLast
- val bkeys = if (user.ncols == batchSize) colorInfo(c).bkeysMatrix else colorInfo(c).bkeysMatrixLast
- val bkeysOff = if (user.ncols == batchSize) colorInfo(c).bkeysOffsets else colorInfo(c).bkeysOffsetsLast
- val randIndices = if (user.ncols == batchSize) colorInfo(c).randMatrixIndices else colorInfo(c).randMatrixIndicesLast
- val sampleIndices = if (user.ncols == batchSize) colorInfo(c).sampleIDindices else colorInfo(c).sampleIDindicesLast
-
- // Parallel multinomial sampling. Check the colorInfo matrices since they contain a lot of info.
- //val maxInGroup = cummaxByKey(nonExponentiatedProbs, keys)(bkeys) // To prevent overflow (if needed).
- //val probs = exp(nonExponentiatedProbs - maxInGroup) // To prevent overflow (if needed).
- val t4 = toc;
- val probs = exp(nonExponentiatedProbs)
- probs <-- (probs + 1e-30f) // Had to add this for the DLM MOOC data to prevent 0/(0+0) problems.
- val cumprobs = cumsumByKey(probs, keys)
- val normedProbs = cumprobs / cumprobs(bkeys)
- val t5 = toc;
- runtimes(2) += t5 - t4;
-
- // With cumulative probabilities set up in normedProbs matrix, create a random matrix and sample
- val randMatrix = randMap( (colorInfo(c).numNodes*opts.copiesForSAME, usertrans.nrows) )
- rand(randMatrix)
- randMatrix <-- randMatrix * 0.99999f
- val lessThan = normedProbs < randMatrix(randIndices)
- val sampleIDs = cumsumByKey(lessThan, keys)(sampleIndices)
- usertrans(?, colorInfo(c).idsInColorSAME) = sampleIDs.t // Note the SAME now...
- val t6 = toc;
- runtimes(3) += t6 - t5;
-
- // After sampling with this color group over all copies (from SAME), we override the known values.
- usertrans ~ (select ∘ (stackedData-1)).t + ((1-select) ∘ usertrans.t).t;
- val t7 = toc;
- runtimes(4) += t7 - t6;
- }
-
- user <-- usertrans.t;
- }
-
- /**
- * After one set of Gibbs sampling iterations, we have a set of counts for each slot in the cpt.
- * We add values from the dirichletPrior, then sample all the parameters independently from a Gamma
- * distribution Gamma(shape,scale=1), where the shape is the count they have. Then the values are
- * put in updatemats(0) to be "averaged into" the cpt based on IncNorm.
- *
- * @param sdata The sparse data matrix for this batch (0s = unknowns), which we do not use here.
- * @param user A data matrix with the same dimensions as sdata, and whose columns represent various
- * iid assignments to all the variables. The known values of sdata are inserted in the same spots
- * in this matrix, but the unknown values are randomized to be in {0,1,...,k}.
- * @param ipass The current pass over the full data source (not the Gibbs sampling iteration number).
- */
- def mupdate(sdata:Mat, user:Mat, ipass:Int):Unit = {
- val t8 = toc;
- val index = int(cptOffsetSAME + (user.t * iprojectBlockedSAME).t)
- val linearIndices = index(?)
-
- // Drop the corresponding previous mini-batch and accumulate w/current mini-batch.
- if (ipass > 0) {
- counts1 ~ counts1 - counts2
- }
- counts1 ~ counts1 + float(accum(linearIndices, 1, counts1.length, 1))
- gamrnd(counts1 + dirichletPrior, dirichletScale, counts3)
-
- if (!isFactorModel) {
- updatemats(0) <-- (counts3 / (counts3.t * normMat *^ normMat).t);
- } else {
- updatemats(0) <-- counts3;
- }
- println("updatemats(0).t = " + updatemats(0).t)
-
- val t9 = toc;
- runtimes(5) += t9 - t8;
- }
-
- /**
- * I'm not quite sure what to put here.
- */
- def evalfun(sdata:Mat, user:Mat):FMat = {
- return FMat(0)
- }
-
- // -----------------------------------
- // Various debugging or helper methods
- // -----------------------------------
-
- /**
- * Determines a variety of information for this color group, and stores it in a ColorGroup object.
- * First, it establishes some basic information from each color group. Then it computes the more
- * complicated replication matrices, stride vectors, and combination matrices. Check the colorInfo
- * class for details on what the individual matrices represent.
- *
- * Actually, this method name is a bit misleading because some of the color group info relies on
- * knowing the batch size, and we can't do that until we actually see the data.
- *
- * @param c The integer index of the given color group.
- */
- def computeAllColorGroupInfo(c:Int) : ColorGroup = {
- val cg = new ColorGroup
- cg.idsInColor = find(IMat(graph.colors) == c)
- cg.numNodes = cg.idsInColor.length
- cg.chIdsInColor = find(FMat(sum(SMat(pproject)(cg.idsInColor,?),1)))
- cg.idsInColorSAME = cg.idsInColor
- for (i <- 1 until opts.copiesForSAME) {
- // Unlike other things where we could use kron, here we change indices b/c we use this
- // for matrix indexing when "clearing out columns" in usertrans when sampling.
- cg.idsInColorSAME = cg.idsInColorSAME on (cg.idsInColor + i*graph.n)
- }
- cg.numNodesCh = cg.chIdsInColor.length
- cg.iprojectSliced = SMat(iproject)(?,cg.chIdsInColor)
- cg.iprojectSlicedSAME = createBlockedDiagonal(cg.iprojectSliced)
- cg.globalOffsetVector = convertMat(FMat(cptOffset(cg.chIdsInColor))) // Need FMat to avoid GMat+GIMat
- cg.globalOffsetVectorSAME = kron(onesSAMEvector, cg.globalOffsetVector)
- val startingIndices = izeros(cg.numNodes,1)
- startingIndices(1 until cg.numNodes) = cumsum(IMat(statesPerNode(cg.idsInColor)))(0 until cg.numNodes-1)
- cg.startingIndices = convertMat(startingIndices)
-
- // Gather useful information for determining the replication, stride, and combination matrices
- var ncols = 0
- val numOnes = izeros(1,cg.numNodesCh) // Determine how many 1s to have
- val strideFactors = izeros(1,cg.numNodesCh) // Get stride factors for the stride vector
- val parentOf = izeros(1,cg.numNodesCh) // Get index of parent (or itself) in idsInColor
- val fullIproject = full(iproject)
- for (i <- 0 until cg.numNodesCh) {
- var nodeIndex = cg.chIdsInColor(i).dv.toInt
- if (IMat(cg.idsInColor).data.contains(nodeIndex)) { // This node is in the color group
- numOnes(i) = statesPerNode(nodeIndex)
- ncols = ncols + statesPerNode(nodeIndex).dv.toInt
- strideFactors(i) = 1
- parentOf(i) = IMat(cg.idsInColor).data.indexOf(nodeIndex)
- } else { // This node is a child of a node in the color group
- val parentIndices = find( FMat( sum(SMat(pproject)(?,nodeIndex),2) ) )
- var parentIndex = -1
- var k = 0
- while (parentIndex == -1 && k < parentIndices.length) {
- if (IMat(cg.idsInColor).data.contains(parentIndices(k))) {
- parentIndex = parentIndices(k)
- parentOf(i) = IMat(cg.idsInColor).data.indexOf(parentIndices(k))
- }
- k = k + 1
- }
- if (parentIndex == -1) {
- throw new RuntimeException("Node at index " +nodeIndex+ " is missing a parent in its color group.")
- }
- numOnes(i) = statesPerNode(parentIndex)
- ncols = ncols + statesPerNode(parentIndex).dv.toInt
- strideFactors(i) = fullIproject(parentIndex,IMat(nodeIndex)).dv.toInt
- }
- }
-
- // Form the replication (the dim is (#-of-ch_id-variables x ncols)) and stride matrices
- var col = 0
- val strideVector = izeros(1, ncols)
- val ii = izeros(ncols, 1)
- for (i <- 0 until cg.numNodesCh) {
- val num = numOnes(i)
- ii(col until col+num) = i
- strideVector(col until col+num) = (0 until num)*strideFactors(i)
- col = col + num
- }
- val jj = icol(0 until ncols)
- val vv = ones(ncols, 1)
- cg.strideVector = convertMat(strideVector)
- // A bit confusing, since strideVector is a ROW vector
- cg.strideVectorSAME = kron( onesSAMEvector.t, cg.strideVector)
- cg.replicationMatrix = if (useGPUnow) GSMat(sparse(ii,jj,vv)) else sparse(ii,jj,vv)
- cg.replicationMatrixSAME = createBlockedDiagonal(cg.replicationMatrix)
-
- // Form keys and ikeys vectors
- val numStatesIds = statesPerNode(cg.idsInColor)
- val ncolsCombo = sum(numStatesIds).dv.toInt
- val keys = izeros(1, ncolsCombo)
- val scaledKeys = izeros(1, ncolsCombo)
- val ikeys = izeros(1, cg.numNodes)
- var keyIndex = 0
- for (i <- 0 until cg.numNodes) {
- val nodeIndex = cg.idsInColor(i)
- val numStates = statesPerNode(nodeIndex).dv.toInt
- keys(keyIndex until keyIndex+numStates) = nodeIndex * iones(1,numStates)
- scaledKeys(keyIndex until keyIndex+numStates) = i * iones(1,numStates)
- keyIndex += numStates
- ikeys(i) = keyIndex-1
- }
- cg.scaledKeys = convertMat(scaledKeys)
- cg.keys = convertMat(keys)
- cg.ikeys = convertMat(ikeys)
- cg.bkeys = cg.ikeys(cg.scaledKeys)
-
- // Now make SAME versions of these! The keys needs to have extra appended at end,
- // incremented by graph.n just in case we have a color group with just one node.
- cg.keysSAME = keys
- for (i <- 1 until opts.copiesForSAME) {
- cg.keysSAME = cg.keysSAME \ (keys + i*graph.n)
- }
- cg.keysSAME = convertMat(cg.keysSAME)
- cg.bkeysSAME = cg.bkeys
- for (i <- 1 until opts.copiesForSAME) {
- cg.bkeysSAME = cg.bkeysSAME \ (cg.bkeys + i*(cg.bkeys).length)
- }
- cg.scaledKeysSAME = cg.scaledKeys
- for (i <- 1 until opts.copiesForSAME) {
- cg.scaledKeysSAME = cg.scaledKeysSAME \ (cg.scaledKeys + cg.numNodes)
- }
- cg.ikeysSAME = cg.ikeys
- for (i <- 1 until opts.copiesForSAME) {
- cg.ikeysSAME = cg.ikeysSAME \ (cg.ikeys + i*(cg.bkeys).length)
- }
-
- // Form the combination matrix (# of rows is # of columns of replication matrix)
- val indicesColumns = izeros(1,cg.numNodes)
- indicesColumns(1 until cg.numNodes) = cumsum(numStatesIds.asInstanceOf[IMat])(0 until cg.numNodes-1)
- val nrowsCombo = ncols
- val indicesRows = izeros(1,cg.numNodesCh)
- indicesRows(1 until cg.numNodesCh) = cumsum(numOnes)(0 until numOnes.length-1)
- val iii = izeros(nrowsCombo,1)
- val jjj = izeros(nrowsCombo,1)
- val vvv = ones(nrowsCombo,1)
- for (i <- 0 until cg.numNodesCh) {
- val p = parentOf(i) // Index into the node itself or its parent if it isn't in the color group
- iii(indicesRows(i) until indicesRows(i)+numOnes(i)) = indicesRows(i) until indicesRows(i)+numOnes(i)
- jjj(indicesRows(i) until indicesRows(i)+numOnes(i)) = indicesColumns(p) until indicesColumns(p)+numOnes(i)
- }
- cg.combinationMatrix = if (useGPUnow) {
- GSMat(sparse(iii,jjj,vvv,nrowsCombo,ncolsCombo))
- } else {
- sparse(iii,jjj,vvv,nrowsCombo,ncolsCombo)
- }
- cg.combinationMatrixSAME = createBlockedDiagonal(cg.combinationMatrix)
-
- cg.idsInColor = convertMat(cg.idsInColor)
- cg.chIdsInColor = convertMat(cg.chIdsInColor)
- if (useGPUnow) {
- cg.iprojectSliced = GSMat(cg.iprojectSliced.asInstanceOf[SMat])
- }
- return cg
- }
-
- /**
- * Called during the first pass over the data to set up matrices for later. These matrices are
- * used in future uupdate calls, and they depend on the batch size, hence why we can only form
- * these during the pass over the data, and not in init().
- *
- * There are several types of matrices we create:
- *
- * - "zero" matrices to put in zeroMap, for clearing out usertrans (must consider opts.copiesForSAME!)
- * - "rand" matries to put in randMap, for containers to randomize values during sampling
- * - five colorInfo(c) matrices for the purposes of sampling
- *
- * In the very likely case that the last batch does not have the same number of columns as the
- * first n-1 batches, then we need to repeat this process for that batch.
- *
- * @param ncols The number of columns in the current data, or the batch size.
- */
- def establishMatrices(ncols:Int) = {
- if (batchSize == -1) { // Only true if we're on the first mini-batch of ipass = 0.
- batchSize = ncols
- val onesVector = mm.ones(1, ncols)
- val untilVector = convertMat( float(0 until ncols) )
- for (c <- 0 until graph.ncolors) {
- val numVars = colorInfo(c).numNodes * opts.copiesForSAME // SAME!
- val randOffsets = int(untilVector * numVars)
- zeroMap += ((ncols,numVars) -> mm.zeros(ncols,numVars))
- randMap += ((numVars,ncols) -> mm.zeros(numVars,ncols))
- colorInfo(c).keysMatrix = (colorInfo(c).keysSAME).t * onesVector // keys -> keysSAME
- colorInfo(c).bkeysOffsets = int(untilVector * colorInfo(c).keysSAME.ncols) // keys -> keysSAME
- colorInfo(c).bkeysMatrix = int(colorInfo(c).bkeysSAME.t * onesVector) + colorInfo(c).bkeysOffsets // bkeys -> bkeysSAME
- colorInfo(c).randMatrixIndices = int((colorInfo(c).scaledKeysSAME).t * onesVector) + randOffsets // scaledKeys -> scaledKeysSAME
- colorInfo(c).sampleIDindices = int((colorInfo(c).ikeysSAME).t * onesVector) + colorInfo(c).bkeysOffsets // ikeys -> ikeysSAME
- }
- }
- else if (ncols != batchSize) { // On the last batch of ipass = 0 w/different # of columns
- val onesVectorLast = mm.ones(1, ncols)
- val untilVectorLast = convertMat( float(0 until ncols) )
- for (c <- 0 until graph.ncolors) {
- val numVars = colorInfo(c).numNodes * opts.copiesForSAME // SAME!
- val randOffsets = int(untilVectorLast * numVars)
- zeroMap += ((ncols,numVars) -> mm.zeros(ncols,numVars))
- randMap += ((numVars,ncols) -> mm.zeros(numVars,ncols))
- colorInfo(c).keysMatrixLast = (colorInfo(c).keysSAME).t * onesVectorLast
- colorInfo(c).bkeysOffsetsLast = int(untilVectorLast * colorInfo(c).keysSAME.ncols)
- colorInfo(c).bkeysMatrixLast = int(colorInfo(c).bkeysSAME.t * onesVectorLast) + colorInfo(c).bkeysOffsetsLast
- colorInfo(c).randMatrixIndicesLast = int((colorInfo(c).scaledKeysSAME).t * onesVectorLast) + randOffsets
- colorInfo(c).sampleIDindicesLast = int((colorInfo(c).ikeysSAME).t * onesVectorLast) + colorInfo(c).bkeysOffsetsLast
- }
- }
- }
-
- /**
- * Creates a matrix P such that, if our cpt is a ROW vector of COUNTS, then we NORMALIZE it by:
- *
- * cpt <-- (cpt / (cpt * P *^ P))
- *
- * If we use a column vector, it has to be "cpt <-- (cpt / (cpt.t * P *^ P).t)." Previously, we
- * had a single matrix, but P *^ P will work better as it saves more space.
- *
- * P is structured so that columns represent a single distribution, and rows indicate the CPT
- * components contributing to the distribution's normalizing constant. P *^ P will result in a
- * matrix that has blocks of "1"s across the diagonal, with sizes varying due to the cardinality
- * of variables. The cpt gets multiplied to sum up the components to get the normalizing
- * constants (we normalize via the component-wise vector division). Finally, P is independent of
- * the SAME parameter as it is only based on CPT length.
- *
- * @param cptLength The number of components in the CPT.
- */
- def getNormConstMatrix(cptLength : Int) : Mat = {
- var numDistributions = 0
- var jj = izeros(1,1)
-
- for (k <- 0 until graph.n) {
- var offset = cptOffset(k).dv.toInt
- val numStates = statesPerNode(k).dv.toInt
- val parentIndices = find(SMat(graph.dag)(?,k))
-
- // Split based on no parents (one distribution) or >0 parents (>=2 distributions)
- if (parentIndices.length == 0) {
- jj = jj on ( iones(numStates,1) * numDistributions )
- numDistributions += 1
- } else {
- val totalParentSlots = prod(IMat(statesPerNode)(parentIndices)).dv.toInt
- for (i <- 0 until totalParentSlots) {
- jj = jj on ( iones(numStates,1) * numDistributions )
- numDistributions += 1
- }
- }
- }
-
- // Form our matrix using the standard 'sparse' method and return depending on GPU usage.
- val P = sparse( (0 until cptLength) , jj(1 until jj.length) , ones(jj.length-1, 1) , cptLength, numDistributions)
- if (useGPUnow) {
- return GSMat(P)
- } else {
- return P
- }
- }
-
- /**
- * Given a matrix as input, we form a diagonal, blocked version of it. So if a is a (sparse) mat, it is
- * like calling kron(mkdiag(ones(1,n)), full(a)), except I think this will be a lot more flexible later.
- * Places where we use this: user.t * iproject, usertrans * colorInfo(c).iprojectSliced, etc.
- *
- * @input a A sparse matrix. It does not have to be square!
- */
- def createBlockedDiagonal(a:Mat) : Mat = {
- val (ii,jj,vv) = find3(SMat(a))
- val vvv = iones(opts.copiesForSAME,1) kron vv
- var iii = izeros(1,1)
- var jjj = izeros(1,1)
- for (k <- 0 until opts.copiesForSAME) {
- iii = iii on (ii + k*a.nrows)
- jjj = jjj on (jj + k*a.ncols)
- }
- val res = sparse(iii(1 until iii.length), jjj(1 until jjj.length), vvv, a.nrows*opts.copiesForSAME, a.ncols*opts.copiesForSAME)
- if (useGPUnow) return GSMat(res) else return res
- }
-
- // ---------------------------------------------
- // The remaining methods are for debugging only.
- // ---------------------------------------------
-
- /** A debugging method to print matrices, without being constrained by the command line's cropping. */
- def printMatrix(mat: Mat) = {
- for(i <- 0 until mat.nrows) {
- for (j <- 0 until mat.ncols) {
- print(mat(IMat(i),IMat(j)) + " ")
- }
- println()
- }
- }
-
- /**
- * A debugging method to compute the norm of difference between normalized real/estimated cpts.
- * Note: this *does* assume our mm is already normalized!
- * Obviously we'll have to replace the real cpt with what we already have...
- */
- def computeNormDifference(ipass:Int, here:Long) = {
- val real = .7 on .3 on .6 on .4 on .95 on .05 on .2 on .8 on
- .3 on .4 on .3 on .05 on .25 on .7 on .9 on .08 on .02 on .5 on .3 on .2 on .1 on .9 on .4 on .6 on .99 on .01
- val differenceNorm = norm(real - mm)
- println("Currently on ipass = " + ipass + " with here = " + here + "; l-2 norm of (realCpt - mm) is: " + differenceNorm)
- }
-
- /** KL divergence. We assume our mm is normalized. */
- def computeKL(ipass:Int, here:Long, comparisonCPT:Mat) {
-
- // EDIT: let's just make a copy of the cpt here
- val cptCopy = mm + 0
- cptCopy <-- (cptCopy / (cptCopy.t * normMat *^ normMat).t);
-
- var klDivergence = convertMat(float(0))
- var numDistributions = 0
-
- for (k <- 0 until graph.n) {
- var offset = cptOffset(k).dv.toInt
- val numStates = statesPerNode(k).dv.toInt
- val parentIndices = find(SMat(graph.dag)(?,k))
-
- // Then split based on no parents (one distribution) or some parents (two or more distributions)
- if (parentIndices.length == 0) {
- var thisKL = convertMat(float(0))
- for (j <- 0 until numStates) {
- thisKL = thisKL + (comparisonCPT(offset+j) * ln( comparisonCPT(offset+j) / cptCopy(offset+j) ))
- }
- klDivergence = klDivergence + thisKL
- numDistributions += 1
- } else {
- val totalParentSlots = prod(IMat(statesPerNode)(parentIndices)).dv.toInt
- numDistributions += totalParentSlots
- for (i <- 0 until totalParentSlots) {
- var thisKL = convertMat(float(0))
- for (j <- 0 until numStates) {
- thisKL = thisKL + ( comparisonCPT(offset+j) * ln( comparisonCPT(offset+j) / cptCopy(offset+j) ))
- }
- klDivergence = klDivergence + thisKL
- offset += numStates
- }
- }
- }
-
- klDivergence = klDivergence / numDistributions
- println(klDivergence + " " + ipass + " KLDiv")
- }
-
- /** A one-liner that we can insert in a place with ipass and here to debug the cpt. */
- def debugCpt(ipass:Int, here:Long) {
- println("\n\nCurrently on ipass = " + ipass + " with here = " + here + ". This is the CPT:")
- for (k <- 0 until graph.n) {
- showCpt(k)
- }
- println()
- }
-
- /** A debugging method to print out the CPT of one variable (prettily). */
- def showCpt(nodeID: Int) {
- println("\nCPT for node indexed at " + nodeID)
- val startingOffset = cptOffset(nodeID)
- val numStates = statesPerNode(nodeID).dv.toInt
- val normalizedCPT = ( mm / (mm.t * normMat *^ normMat).t )
- val parentIndices = find(SMat(graph.dag)(?,nodeID))
- println("Parents: " + parentIndices.t)
-
- if (parentIndices.length == 0) {
- var str = "\t"
- for (j <- 0 until numStates) {
- str += " %.4f".format(normalizedCPT(startingOffset + j).dv)
- }
- println(str)
- } else {
- val totalParentSlots = prod(IMat(statesPerNode)(parentIndices)).dv.toInt
- val parentStates = statesPerNode(parentIndices)
- val statesList = izeros(1,parentIndices.length)
- var currentOffset = startingOffset
- for (i <- 0 until totalParentSlots) {
- if (i > 0) updateStatesString(statesList, parentStates, parentIndices.length-1)
- var str = ""
- for (i <- 0 until statesList.length) {
- str += statesList(i).dv.toInt + " "
- }
- str += "\t"
- for (j <- 0 until numStates) {
- str += " %.4f".format(normalizedCPT(currentOffset + j).dv)
- }
- println(str)
- currentOffset += numStates
- }
- }
- }
-
- /** Recursive, helper method for updating the states list. */
- def updateStatesString(statesList:Mat, parentStates:Mat, j:Int) {
- if (statesList(j).dv.toInt < parentStates(j).dv.toInt-1) {
- statesList(j) += 1
- } else {
- statesList(j) = 0
- updateStatesString(statesList, parentStates, j-1)
- }
- }
-
-}
-
-
-/**
- * For the input to the BayesNet, see the documentation at the top of this program. It's similar,
- * except we need to have the data set up. We can set options such as the SAME parameter here.
- */
-object BayesNet {
-
- trait Opts extends Model.Opts {
- var copiesForSAME = 1
- var initSmoothFactor = 1
- }
-
- class Options extends Opts {}
-
- /**
- * A learner with a matrix data source, with states per node, and with a dag prepared. Call this
- * using some form of: val (nn,opts) = BayesNet.learner(states , dag , true , data).
- */
- def learner(statesPerNode:Mat, dag:Mat, isFactorModel:Boolean, data:Mat) = {
-
- class xopts extends Learner.Options with BayesNet.Opts with MatSource.Opts with IncNorm.Opts
- val opts = new xopts
- opts.dim = dag.nrows
- opts.batchSize = math.min(100000, data.ncols/50 + 1)
- opts.useGPU = true
- opts.npasses = 10
- opts.isprob = false // Our CPT should NOT be normalized across their (one) column.
- opts.putBack = 1 // Because this stores samples across ipasses, as required by Gibbs sampling
- opts.power = 0.0f // So that the sampled CPT parameters are exactly what we use next iteration
- val secondMatrix = data.zeros(opts.copiesForSAME*data.nrows,data.ncols)
-
- val nn = new Learner(
- new MatSource(Array(data:Mat, secondMatrix), opts),
- new BayesNet(SMat(dag), statesPerNode, isFactorModel, opts),
- null,
- new IncNorm(opts),
- null,
- opts)
- (nn, opts)
- }
-}
-
-/**
- * Graph structure for factor graph. Since it's factor graph, we don't need to moralize. We can
- * color it directly. This code overrides the moralize, iproject, and pproject definitions, but when
- * we color, we use the Graph's method as it only relies on the moralized graph (matrix).
- *
- * @param factorSet, a 2-d mat (i,j), which contains the componenet index (row) for each factor i.
- * @param statesPerNode, 1-d mat, contains the cardinality of each variable
- * @param n the number of vertices in the graph
- */
-class FactorGraph(val factorSet: Mat, override val n: Int, override val statesPerNode: Mat) extends Graph(factorSet, n, statesPerNode) {
-
- nFactor = factorSet.ncols // revised by Haoyu, this is the column of the pproject, for Bayes net, nFactor == n
-
- /**
- * Build the dag from the input variables, i.e. re-construct the graph structure matrix.
- * If there is a self-edge (caused by factor only contains one vertex), we ignore this
- * self-edge for mrf.
- */
- override def moralize = {
- mrf = izeros(n, n)
- for (i <- 0 until factorSet.ncols) {
- val factors = find(SMat(factorSet(?, i)))
- if (factors.length > 1) {
- // we ignore the self-edge here
- for (orign <- factors.data) {
- for (des <- factors.data) {
- if (orign != des) {
- mrf(orign, des) = 1
- }
- }
- }
- }
- }
- }
-
- /**
- * Function to construct the iproject. It has the shape: num of factors * n.
- * (x1, x2,..., xn) * iproject.t -> local index for corresponding probability value in cpt.
- */
- override def iproject : SMat = {
- var res = zeros(nFactor, n)
- for (i <- 0 until nFactor) {
- val parents = find(SMat(factorSet(?, i)))
- var cumRes = 1f
- val parentsLen = parents.length
- for (j <- 0 until parentsLen) {
- if (j > 0) {
- cumRes = cumRes * IMat(statesPerNode)(parents(parentsLen - j))
- }
- res(i, parents(parentsLen - j - 1)) = cumRes
- }
- }
- return sparse(res)
- }
-
- /**
- * Function to derive pproject matrix. The pproject represent the responding relationship
- * between vertice id and factor. Its each column represents one factor. The row is the
- * binary indicator whether we have this vertex in the factor group.
- **/
- override def pproject : SMat = {
- return SMat(factorSet)
- }
-
-}
-
-
-/**
- * A graph structure for Bayesian Networks. Includes features for:
- *
- * (1) moralizing graphs, 'moral' matrix must be (i,j) = 1 means node i is connected to node j
- * (2) coloring moralized graphs, not sure why there is a maxColor here, though...
- *
- * @param dag An adjacency matrix with a 1 at (i,j) if node i has an edge TOWARDS node j.
- * @param n The number of vertices in the graph.
- * @param statesPerNode A column vector where elements denote number of states for corresponding variables.
- */
-class Graph(val dag: Mat, val n: Int, val statesPerNode: Mat) {
-
- var mrf: Mat = null
- var colors: Mat = null
- var ncolors = 0
- val maxColor = 100
- var nFactor = n // revised by Haoyu, this is the column of the pproject, for Bayes net, nFactor == n
-
- /**
- * Connects the parents of a certain node, a single step in the process of moralizing the graph.
- *
- * Iterates through the parent indices and insert 1s in the 'moral' matrix to indicate an edge.
- *
- * @param moral A matrix that represents an adjacency matrix "in progress" in the sense that it
- * is continually getting updated each iteration from the "moralize" method.
- * @param parents An array representing the parent indices of the node of interest.
- */
- def connectParents(moral: FMat, parents: IMat) = {
- val l = parents.length
- for (i <- 0 until l) {
- for (j <- 0 until l) {
- if (parents(i) != parents(j)) {
- moral(parents(i), parents(j)) = 1f
- }
- }
- }
- moral
- }
-
- /** Forms the pproject matrix (dag + identity) used for computing model parameters. */
- def pproject : SMat = {
- return SMat(dag) + sparse(IMat(0 until n), IMat(0 until n), ones(1, n))
- }
-
- /**
- * Forms the iproject matrix, which is left-multiplied to send a Pr(X_i | parents) query to its
- * appropriate spot in the cpt via LOCAL offsets for X_i.
- */
- def iproject : SMat = {
- var res = (pproject.copy).t
- for (i <- 0 until n) {
- val parents = find(SMat(pproject(?, i)))
- var cumRes = 1f
- val parentsLen = parents.length
- for (j <- 1 until parentsLen) {
- cumRes = cumRes * IMat(statesPerNode)(parents(parentsLen - j))
- res.asInstanceOf[SMat](i, parents(parentsLen - j - 1)) = cumRes
- }
- }
- return SMat(res)
- }
-
- /**
- * Moralize the graph.
- *
- * This means we convert the graph from directed to undirected and connect parents of nodes in
- * the directed graph. First, copy the dag to the moral graph because all 1s in the dag matrix
- * are 1s in the moral matrix (these are adjacency matrices). For each node, find its parents,
- * connect them, and update the matrix. Then make it symmetric because the graph is undirected.
- */
- def moralize = {
- var moral = full(dag)
- for (i <- 0 until n) {
- var parents = find(SMat(dag(?, i)))
- moral = connectParents(FMat(moral), parents)
- }
- mrf = ((moral + moral.t) > 0)
- }
-
- /**
- * Sequentially colors the moralized graph of the dag so that one can run parallel Gibbs sampling.
- *
- * Steps: first, moralize the graph. Then iterate through each node, find its neighbors, and apply a
- * "color mask" to ensure current node doesn't have any of those colors. Then find the legal color
- * with least count (a useful heuristic). If that's not possible, then increase "ncolor".
- */
- def color = {
- moralize
- var colorCount = izeros(maxColor, 1)
- colors = -1 * iones(n, 1)
- ncolors = 0
-
- // Access nodes sequentially. Find the color map of its neighbors, then find the legal color w/least count
- val seq = IMat(0 until n)
- // Can also access nodes randomly
- // val r = rand(n, 1); val (v, seq) = sort2(r)
- for (i <- 0 until n) {
- var node = seq(i)
- var nbs = find(FMat(mrf(?, node)))
- var colorMap = iones(ncolors, 1)
- for (j <- 0 until nbs.length) {
- if (colors(nbs(j)).dv.toInt > -1) {
- colorMap(colors(nbs(j))) = 0
- }
- }
- var c = -1
- var minc = 999999
- for (k <- 0 until ncolors) {
- if ((colorMap(k) > 0) && (colorCount(k) < minc)) {
- c = k
- minc = colorCount(k)
- }
- }
- if (c == -1) {
- c = ncolors
- ncolors = ncolors + 1
- }
- colors(node) = c
- colorCount(c) += 1
- }
- colors
- }
-}
-
-
-/**
- * This will store a lot of pre-computed variables (mostly matrices) for each color group.
- *
- * A high-level description of the categories:
- *
- * - numNodes and numNodesCh are the number of nodes, and the number of nodes and children
- * in this color group, respectively.
- * - idsInColor and chIdsInColor are indices of the variables in this color group, and in
- * this color group plus children of those nodes, respectively.
- * - replicationMatrix is a sparse matrix of rows of ones, used to replicate columns
- * - strideVector is a vector where groups are (0 until k)*stride(x) where k is determined
- * by the node or its parent, and stride(x) is 1 if the node is in the color group.
- * - combinationMatrix is a sparse identity matrix that combines parents with children for
- * probability computations
- * - keys, scaledKeys, ikeys, and bkeys help us with multinomial sampling
- * - The remaining ten (!) matrices rely on knowledge of the batch size. They are expanded
- * versions of the previous matrices that use the batch size to increase their elements.
- * - Oh! Don't forge that we have SAME versions of these!
- */
-class ColorGroup {
- var numNodes:Int = -1
- var numNodesCh:Int = -1
- var idsInColor:Mat = null
- var idsInColorSAME:Mat = null
- var chIdsInColor:Mat = null
- var globalOffsetVector:Mat = null
- var globalOffsetVectorSAME:Mat = null
- var iprojectSliced:Mat = null
- var iprojectSlicedSAME:Mat = null
- var startingIndices:Mat = null
- var replicationMatrix:Mat = null
- var replicationMatrixSAME:Mat = null
- var strideVector:Mat = null
- var strideVectorSAME:Mat = null
- var combinationMatrix:Mat = null
- var combinationMatrixSAME:Mat = null
-
- var keys:Mat = null
- var scaledKeys:Mat = null
- var ikeys:Mat = null
- var bkeys:Mat = null
- var keysMatrix:Mat = null
- var keysMatrixLast:Mat = null
- var bkeysMatrix:Mat = null
- var bkeysMatrixLast:Mat = null
- var bkeysOffsets:Mat = null
- var bkeysOffsetsLast:Mat = null
- var sampleIDindices:Mat = null
- var sampleIDindicesLast:Mat = null
- var randMatrixIndices:Mat = null
- var randMatrixIndicesLast:Mat = null
-
- var keysSAME:Mat = null
- var bkeysSAME:Mat = null
- var scaledKeysSAME:Mat = null
- var ikeysSAME:Mat = null
-}
+package BIDMach.models
+
+import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
+import BIDMat.MatFunctions._
+import BIDMat.SciFunctions._
+import BIDMach.datasources._
+import BIDMach.updaters._
+import BIDMach._
+
+import java.text.NumberFormat
+import edu.berkeley.bid.CUMACH._
+import scala.collection.mutable._
+
+/**
+ * This code is for Gibbs sampling on Bayesian networks or factor graphs. It assumes that there
+ * exists partially observed data generated by some distribution P(X | Z, \Theta). The goal is to
+ * perform sampling of Z (the hidden data) to figure out a "good" \Theta value. Here, \Theta encodes
+ * the CPTs for Bayesian networks, or the factor tables for factor graphs. That there is also a SAME
+ * parameter to replicate the data, as well as an adjustable Dirichlet prior.
+ *
+ * @param dag For a Bayesian network, this is an adjacency matrix with a 1 at (i,j) iff node i has
+ * an edge TOWARDS node j. For a factor graph, (i,j) = 1 iff node i is in factor j.
+ * @param states Indicates the number of states for each node, one per line.
+ * @param isFactorModel If true, then we use a FactorGraph rather than a Graph and don't normalize.
+ * @param opts The options from the BayesNet learner, e.g., the number of passes over the data.
+ */
+class BayesNet(val dag:Mat,
+ val states:Mat,
+ val isFactorModel:Boolean,
+ override val opts:BayesNet.Opts = new BayesNet.Options) extends Model(opts) {
+
+ // Miscellaneous stuff that we should probably record.
+ val randSeed:Int = 0
+
+ var mm:Mat = null // Copy of the cpt, but be careful of aliasing. We keep this normalized.
+ var cptOffset:Mat = null // Holds global variable offsets (into the mm = cpt) of each variable.
+ var cptOffsetSAME:Mat = null // A vertically stacked version of cptOffset, for SAME.
+ var graph:Graph = null // Data structure representing the DAG, "columns = parents."
+ var iproject:Mat = null // Local CPT offsets; we do "usertrans * iproject" to get the offsets.
+ var iprojectBlockedSAME:Mat = null // A diagonal, blocked version of iproject, for SAME local CPT offsets.
+ var pproject:Mat = null // Parent tracking matrix, for combining probabilities together.
+ var statesPerNode:Mat = null // Variables can have an arbitrary number of states.
+ var statesPerNodeSAME:Mat = null // A vertically stacked version of statesPerNode, for SAME.
+ var colorInfo:Array[ColorGroup] = null // Gives us, for each color, a colorStuff class (of arrays).
+ var zeroMap:HashMap[(Int,Int),Mat] = null // Map from (nr,nc) -> a zero matrix (to avoid allocation).
+ var randMap:HashMap[(Int,Int),Mat] = null // Map from (nr,nc) -> a rand matrix (to avoid allocation).
+ var normMat:Mat = null // Normalizes a counts vector K by doing K / (K.t * normMat *^ normMat).t.
+ var useGPUnow:Boolean = false // Checks (during initialization only) if we're using GPUs or not.
+ var batchSize:Int = -1 // Holds the batchSize, which we use for some colorInfo matrices.
+
+ var counts1:Mat = null // This will accumulate counts that we use for the actual distribution.
+ var counts2:Mat = null // This will be the counts that we use for the *previous* step that we SUBTRACT.
+ var counts3:Mat = null // This is like counts1, but WITH Dirichlets!
+
+ var dirichletPrior:Mat = null // The prior we use to smooth the distribution. If all 1s, SAME will keep it the same.
+ var dirichletScale:Mat = null // The scale we use as part of the prior (typically all 1s).
+ var onesSAMEvector:Mat = null // This the (g)iones(opts.copiesForSAME,1), for certain special uses.
+
+ // Extra debugging/info gathering for the Koller data only!
+ val real1 = .6 on .4 on .7 on .3 on .3 on .4 on .3 on .9 on .08 on .02 on .05 on .25 on .7
+ val real2 = .5 on .3 on .2 on .95 on .05 on .2 on .8 on .1 on .9 on .4 on .6 on .99 on .01
+ val real = real1 on real2
+
+ /**
+ * Performs a series of initialization steps.
+ *
+ * - Builds iproject/pproject for local offsets and computing probabilities, respectively.
+ * - For each color group, determine some necessary matrices for uupdate later.
+ * - Build the CPT, which is actually counts, not probabilities. I initialize it randomly.
+ *
+ * Note that the randomization of the input data to be put back in the data is done in uupdate.
+ */
+ override def init() = {
+ // Some stuff for experiments, predictions, and benchmarking.
+ setseed(randSeed)
+ println("randSeed = " + randSeed)
+ runtimes = zeros(1,6)
+ useGPUnow = opts.useGPU && (Mat.hasCUDA > 0)
+
+ // Establish the states per node, the (colored) Graph data structure, and its projection matrices.
+ onesSAMEvector = if (useGPUnow) giones(opts.copiesForSAME,1) else iones(opts.copiesForSAME,1)
+ statesPerNode = IMat(states)
+ statesPerNodeSAME = kron(onesSAMEvector, IMat(statesPerNode))
+ if (isFactorModel) {
+ graph = new FactorGraph(dag, opts.dim, statesPerNode)
+ } else {
+ graph = new Graph(dag, opts.dim, statesPerNode)
+ }
+ graph.color
+ iproject = if (useGPUnow) GSMat((graph.iproject).t) else (graph.iproject).t
+ pproject = if (useGPUnow) GSMat(graph.pproject) else graph.pproject
+ iprojectBlockedSAME = createBlockedDiagonal(iproject)
+
+ // Build the CPT. To avoid div-by-zero errors, initialize randomly.
+ val numSlotsInCpt = IMat(exp(ln(FMat(statesPerNode).t) * SMat(pproject)) + 1e-4)
+ cptOffset = izeros(graph.nFactor, 1)
+ cptOffset(1 until graph.nFactor) = cumsum(numSlotsInCpt)(0 until graph.nFactor-1)
+ cptOffset = convertMat(cptOffset)
+ cptOffsetSAME = kron(onesSAMEvector,cptOffset)
+ val lengthCPT = sum(numSlotsInCpt).dv.toInt
+ val cpt = convertMat(rand(lengthCPT,1) + opts.initSmoothFactor)
+
+ // To finish CPT/counts, we normalize using a "factored form" of normalizing.
+ if (!isFactorModel) {
+ normMat = getNormConstMatrix(lengthCPT)
+ cpt <-- ( cpt / (cpt.t * normMat *^ normMat).t )
+ println("cpt.t: " + cpt.t)
+ }
+ setmodelmats(new Array[Mat](1))
+ modelmats(0) = cpt
+ mm = modelmats(0)
+ updatemats = new Array[Mat](1)
+ updatemats(0) = mm.zeros(mm.nrows, mm.ncols)
+
+ // For each color group, pre-compute most relevant matrices we need later (this does a lot!).
+ colorInfo = new Array[ColorGroup](graph.ncolors)
+ for (c <- 0 until graph.ncolors) {
+ colorInfo(c) = computeAllColorGroupInfo(c)
+ }
+ zeroMap = new HashMap[(Int,Int),Mat]()
+ randMap = new HashMap[(Int,Int),Mat]()
+
+ // Finally, create/convert a few matrices, reset some variables, and add some debugging info.
+ counts1 = mm.zeros(mm.length, 1)
+ counts2 = mm.zeros(mm.length, 1)
+ counts3 = mm.zeros(mm.length, 1)
+ dirichletPrior = mm.ones(mm.length, 1)
+ dirichletScale = mm.ones(mm.length, 1)
+ statesPerNode = convertMat(statesPerNode)
+ batchSize = -1
+ }
+
+ /**
+ * Calls a uupdate/mupdate sequence to sample values and to update parameters. We compute
+ * counts2 here (counts to subtract later) because it relies on gmats(1), which gets overrided
+ * in uupdate.
+ *
+ * @param gmats An array of matrices that contains desired mini-batch data: gmats(0) represents
+ * the original, raw data with 0s = unknown. The sampled data is in gmats(1), which we
+ * later refer to as 'user'. Here, everything is shifted by -1 from gmats(0), and unknown
+ * values are probabilistically assigned to be one of the eligible values.
+ * @param ipass The current pass over the data.
+ * @param here The total number of samples (columns) of the data seen thus far.
+ */
+ override def dobatch(gmats:Array[Mat], ipass:Int, here:Long) = {
+ if (ipass > 0) {
+ val index = int(cptOffsetSAME + (gmats(1).t * iprojectBlockedSAME).t)
+ val linearIndices = index(?)
+ counts2 <-- float(accum(linearIndices, 1, counts2.length, 1))
+ }
+ uupdate(gmats(0), gmats(1), ipass)
+ mupdate(gmats(0), gmats(1), ipass)
+ }
+
+ /** Calls a uupdate/evalfun sequence. Known data is in gmats(0), sampled data is in gmats(1). */
+ override def evalbatch(gmats:Array[Mat], ipass:Int, here:Long):FMat = {
+ //println("runtimes: " + runtimes)
+ return FMat(0)
+ }
+
+ /**
+ * Computes an update for the conditional probability table by sampling each variable once (for now).
+ *
+ * In the first ipass, it randomizes the user matrix except for those values are already known from
+ * sdata. It also establishes various matrices to be put in the colorInfo array or the hash maps (for
+ * caching purposes). For each data batch, it iterates through color groups and samples in parallel.
+ *
+ * @param sdata The sparse data matrix for this batch (0s = unknowns). The user matrix shifts it by -1.
+ * @param user A data matrix with the same dimensions as sdata, and whose columns represent various iid
+ * assignments to all the variables. The known values of sdata are inserted in the same spots in this
+ * matrix, but the unknown values are randomized to be in {0,1,...,k}.
+ * @param ipass The current pass over the full data source (not the Gibbs sampling iteration number).
+ */
+ def uupdate(sdata:Mat, user:Mat, ipass:Int):Unit = {
+
+ // For SAME, we stack matrices. If kron is missing (type) cases, add them in MatFunctions.scala.
+ val stackedData = kron(onesSAMEvector, sdata)
+ val select = stackedData > 0
+
+ // For the first pass, we need to create a lot of matrices that rely on knowledge of the batch size.
+ if (ipass == 0) {
+ establishMatrices(sdata.ncols)
+ val state = convertMat(rand(sdata.nrows * opts.copiesForSAME, sdata.ncols))
+ state <-- float( min( int(statesPerNodeSAME ∘ state), int(statesPerNodeSAME-1) ) )
+ user ~ (select ∘ (stackedData-1)) + ((1-select) ∘ state)
+ }
+
+ // Now back to normal from prediction accuracy; usertrans is still user.t.
+ val t0 = toc
+ val usertrans = user.t
+ val t1 = toc
+ runtimes(0) += t1 - t0
+
+ for (c <- 0 until graph.ncolors) {
+
+ // Prepare data by establishing appropriate offset matrices for various CPT blocks. First, clear out usertrans.
+ val t2 = toc
+ usertrans(?, colorInfo(c).idsInColorSAME) = zeroMap( (usertrans.nrows, colorInfo(c).numNodes*opts.copiesForSAME) )
+ val offsetMatrix = usertrans * colorInfo(c).iprojectSlicedSAME + (colorInfo(c).globalOffsetVectorSAME).t
+ val replicatedOffsetMatrix = int(offsetMatrix * colorInfo(c).replicationMatrixSAME) + colorInfo(c).strideVectorSAME
+ val logProbs = ln(mm(replicatedOffsetMatrix))
+ val nonExponentiatedProbs = (logProbs * colorInfo(c).combinationMatrixSAME).t
+ val t3 = toc
+ runtimes(1) += t3 - t2
+
+ // Establish matrices needed for the multinomial sampling
+ val keys = if (user.ncols == batchSize) colorInfo(c).keysMatrix else colorInfo(c).keysMatrixLast
+ val bkeys = if (user.ncols == batchSize) colorInfo(c).bkeysMatrix else colorInfo(c).bkeysMatrixLast
+ val bkeysOff = if (user.ncols == batchSize) colorInfo(c).bkeysOffsets else colorInfo(c).bkeysOffsetsLast
+ val randIndices = if (user.ncols == batchSize) colorInfo(c).randMatrixIndices else colorInfo(c).randMatrixIndicesLast
+ val sampleIndices = if (user.ncols == batchSize) colorInfo(c).sampleIDindices else colorInfo(c).sampleIDindicesLast
+
+ // Parallel multinomial sampling. Check the colorInfo matrices since they contain a lot of info.
+ //val maxInGroup = cummaxByKey(nonExponentiatedProbs, keys)(bkeys) // To prevent overflow (if needed).
+ //val probs = exp(nonExponentiatedProbs - maxInGroup) // To prevent overflow (if needed).
+ val t4 = toc
+ val probs = exp(nonExponentiatedProbs)
+ probs <-- (probs + 1e-30f) // Had to add this for the DLM MOOC data to prevent 0/(0+0) problems.
+ val cumprobs = cumsumByKey(probs, keys)
+ val normedProbs = cumprobs / cumprobs(bkeys)
+ val t5 = toc
+ runtimes(2) += t5 - t4
+
+ // With cumulative probabilities set up in normedProbs matrix, create a random matrix and sample
+ val randMatrix = randMap( (colorInfo(c).numNodes*opts.copiesForSAME, usertrans.nrows) )
+ rand(randMatrix)
+ randMatrix <-- randMatrix * 0.99999f
+ val lessThan = normedProbs < randMatrix(randIndices)
+ val sampleIDs = cumsumByKey(lessThan, keys)(sampleIndices)
+ usertrans(?, colorInfo(c).idsInColorSAME) = sampleIDs.t // Note the SAME now...
+ val t6 = toc
+ runtimes(3) += t6 - t5
+
+ // After sampling with this color group over all copies (from SAME), we override the known values.
+ usertrans ~ (select ∘ (stackedData-1)).t + ((1-select) ∘ usertrans.t).t
+ val t7 = toc
+ runtimes(4) += t7 - t6
+ }
+
+ user <-- usertrans.t
+ }
+
+ /**
+ * After one set of Gibbs sampling iterations, we have a set of counts for each slot in the cpt.
+ * We add values from the dirichletPrior, then sample all the parameters independently from a Gamma
+ * distribution Gamma(shape,scale=1), where the shape is the count they have. Then the values are
+ * put in updatemats(0) to be "averaged into" the cpt based on IncNorm.
+ *
+ * @param sdata The sparse data matrix for this batch (0s = unknowns), which we do not use here.
+ * @param user A data matrix with the same dimensions as sdata, and whose columns represent various
+ * iid assignments to all the variables. The known values of sdata are inserted in the same spots
+ * in this matrix, but the unknown values are randomized to be in {0,1,...,k}.
+ * @param ipass The current pass over the full data source (not the Gibbs sampling iteration number).
+ */
+ def mupdate(sdata:Mat, user:Mat, ipass:Int):Unit = {
+ val t8 = toc
+ val index = int(cptOffsetSAME + (user.t * iprojectBlockedSAME).t)
+ val linearIndices = index(?)
+
+ // Drop the corresponding previous mini-batch and accumulate w/current mini-batch.
+ if (ipass > 0) {
+ counts1 ~ counts1 - counts2
+ }
+ counts1 ~ counts1 + float(accum(linearIndices, 1, counts1.length, 1))
+ gamrnd(counts1 + dirichletPrior, dirichletScale, counts3)
+
+ if (!isFactorModel) {
+ updatemats(0) <-- (counts3 / (counts3.t * normMat *^ normMat).t)
+ } else {
+ updatemats(0) <-- counts3
+ }
+ println("updatemats(0).t = " + updatemats(0).t)
+
+ val t9 = toc
+ runtimes(5) += t9 - t8
+ }
+
+ /**
+ * I'm not quite sure what to put here.
+ */
+ def evalfun(sdata:Mat, user:Mat):FMat = {
+ return FMat(0)
+ }
+
+ // -----------------------------------
+ // Various debugging or helper methods
+ // -----------------------------------
+
+ /**
+ * Determines a variety of information for this color group, and stores it in a ColorGroup object.
+ * First, it establishes some basic information from each color group. Then it computes the more
+ * complicated replication matrices, stride vectors, and combination matrices. Check the colorInfo
+ * class for details on what the individual matrices represent.
+ *
+ * Actually, this method name is a bit misleading because some of the color group info relies on
+ * knowing the batch size, and we can't do that until we actually see the data.
+ *
+ * @param c The integer index of the given color group.
+ */
+ def computeAllColorGroupInfo(c:Int) : ColorGroup = {
+ val cg = new ColorGroup
+ cg.idsInColor = find(IMat(graph.colors) == c)
+ cg.numNodes = cg.idsInColor.length
+ cg.chIdsInColor = find(FMat(sum(SMat(pproject)(cg.idsInColor,?),1)))
+ cg.idsInColorSAME = cg.idsInColor
+ for (i <- 1 until opts.copiesForSAME) {
+ // Unlike other things where we could use kron, here we change indices b/c we use this
+ // for matrix indexing when "clearing out columns" in usertrans when sampling.
+ cg.idsInColorSAME = cg.idsInColorSAME on (cg.idsInColor + i*graph.n)
+ }
+ cg.numNodesCh = cg.chIdsInColor.length
+ cg.iprojectSliced = SMat(iproject)(?,cg.chIdsInColor)
+ cg.iprojectSlicedSAME = createBlockedDiagonal(cg.iprojectSliced)
+ cg.globalOffsetVector = convertMat(FMat(cptOffset(cg.chIdsInColor))) // Need FMat to avoid GMat+GIMat
+ cg.globalOffsetVectorSAME = kron(onesSAMEvector, cg.globalOffsetVector)
+ val startingIndices = izeros(cg.numNodes,1)
+ startingIndices(1 until cg.numNodes) = cumsum(IMat(statesPerNode(cg.idsInColor)))(0 until cg.numNodes-1)
+ cg.startingIndices = convertMat(startingIndices)
+
+ // Gather useful information for determining the replication, stride, and combination matrices
+ var ncols = 0
+ val numOnes = izeros(1,cg.numNodesCh) // Determine how many 1s to have
+ val strideFactors = izeros(1,cg.numNodesCh) // Get stride factors for the stride vector
+ val parentOf = izeros(1,cg.numNodesCh) // Get index of parent (or itself) in idsInColor
+ val fullIproject = full(iproject)
+ for (i <- 0 until cg.numNodesCh) {
+ var nodeIndex = cg.chIdsInColor(i).dv.toInt
+ if (IMat(cg.idsInColor).data.contains(nodeIndex)) { // This node is in the color group
+ numOnes(i) = statesPerNode(nodeIndex)
+ ncols = ncols + statesPerNode(nodeIndex).dv.toInt
+ strideFactors(i) = 1
+ parentOf(i) = IMat(cg.idsInColor).data.indexOf(nodeIndex)
+ } else { // This node is a child of a node in the color group
+ val parentIndices = find( FMat( sum(SMat(pproject)(?,nodeIndex),2) ) )
+ var parentIndex = -1
+ var k = 0
+ while (parentIndex == -1 && k < parentIndices.length) {
+ if (IMat(cg.idsInColor).data.contains(parentIndices(k))) {
+ parentIndex = parentIndices(k)
+ parentOf(i) = IMat(cg.idsInColor).data.indexOf(parentIndices(k))
+ }
+ k = k + 1
+ }
+ if (parentIndex == -1) {
+ throw new RuntimeException("Node at index " +nodeIndex+ " is missing a parent in its color group.")
+ }
+ numOnes(i) = statesPerNode(parentIndex)
+ ncols = ncols + statesPerNode(parentIndex).dv.toInt
+ strideFactors(i) = fullIproject(parentIndex,IMat(nodeIndex)).dv.toInt
+ }
+ }
+
+ // Form the replication (the dim is (#-of-ch_id-variables x ncols)) and stride matrices
+ var col = 0
+ val strideVector = izeros(1, ncols)
+ val ii = izeros(ncols, 1)
+ for (i <- 0 until cg.numNodesCh) {
+ val num = numOnes(i)
+ ii(col until col+num) = i
+ strideVector(col until col+num) = (0 until num)*strideFactors(i)
+ col = col + num
+ }
+ val jj = icol(0 until ncols)
+ val vv = ones(ncols, 1)
+ cg.strideVector = convertMat(strideVector)
+ // A bit confusing, since strideVector is a ROW vector
+ cg.strideVectorSAME = kron( onesSAMEvector.t, cg.strideVector)
+ cg.replicationMatrix = if (useGPUnow) GSMat(sparse(ii,jj,vv)) else sparse(ii,jj,vv)
+ cg.replicationMatrixSAME = createBlockedDiagonal(cg.replicationMatrix)
+
+ // Form keys and ikeys vectors
+ val numStatesIds = statesPerNode(cg.idsInColor)
+ val ncolsCombo = sum(numStatesIds).dv.toInt
+ val keys = izeros(1, ncolsCombo)
+ val scaledKeys = izeros(1, ncolsCombo)
+ val ikeys = izeros(1, cg.numNodes)
+ var keyIndex = 0
+ for (i <- 0 until cg.numNodes) {
+ val nodeIndex = cg.idsInColor(i)
+ val numStates = statesPerNode(nodeIndex).dv.toInt
+ keys(keyIndex until keyIndex+numStates) = nodeIndex * iones(1,numStates)
+ scaledKeys(keyIndex until keyIndex+numStates) = i * iones(1,numStates)
+ keyIndex += numStates
+ ikeys(i) = keyIndex-1
+ }
+ cg.scaledKeys = convertMat(scaledKeys)
+ cg.keys = convertMat(keys)
+ cg.ikeys = convertMat(ikeys)
+ cg.bkeys = cg.ikeys(cg.scaledKeys)
+
+ // Now make SAME versions of these! The keys needs to have extra appended at end,
+ // incremented by graph.n just in case we have a color group with just one node.
+ cg.keysSAME = keys
+ for (i <- 1 until opts.copiesForSAME) {
+ cg.keysSAME = cg.keysSAME \ (keys + i*graph.n)
+ }
+ cg.keysSAME = convertMat(cg.keysSAME)
+ cg.bkeysSAME = cg.bkeys
+ for (i <- 1 until opts.copiesForSAME) {
+ cg.bkeysSAME = cg.bkeysSAME \ (cg.bkeys + i*(cg.bkeys).length)
+ }
+ cg.scaledKeysSAME = cg.scaledKeys
+ for (i <- 1 until opts.copiesForSAME) {
+ cg.scaledKeysSAME = cg.scaledKeysSAME \ (cg.scaledKeys + cg.numNodes)
+ }
+ cg.ikeysSAME = cg.ikeys
+ for (i <- 1 until opts.copiesForSAME) {
+ cg.ikeysSAME = cg.ikeysSAME \ (cg.ikeys + i*(cg.bkeys).length)
+ }
+
+ // Form the combination matrix (# of rows is # of columns of replication matrix)
+ val indicesColumns = izeros(1,cg.numNodes)
+ indicesColumns(1 until cg.numNodes) = cumsum(numStatesIds.asInstanceOf[IMat])(0 until cg.numNodes-1)
+ val nrowsCombo = ncols
+ val indicesRows = izeros(1,cg.numNodesCh)
+ indicesRows(1 until cg.numNodesCh) = cumsum(numOnes)(0 until numOnes.length-1)
+ val iii = izeros(nrowsCombo,1)
+ val jjj = izeros(nrowsCombo,1)
+ val vvv = ones(nrowsCombo,1)
+ for (i <- 0 until cg.numNodesCh) {
+ val p = parentOf(i) // Index into the node itself or its parent if it isn't in the color group
+ iii(indicesRows(i) until indicesRows(i)+numOnes(i)) = indicesRows(i) until indicesRows(i)+numOnes(i)
+ jjj(indicesRows(i) until indicesRows(i)+numOnes(i)) = indicesColumns(p) until indicesColumns(p)+numOnes(i)
+ }
+ cg.combinationMatrix = if (useGPUnow) {
+ GSMat(sparse(iii,jjj,vvv,nrowsCombo,ncolsCombo))
+ } else {
+ sparse(iii,jjj,vvv,nrowsCombo,ncolsCombo)
+ }
+ cg.combinationMatrixSAME = createBlockedDiagonal(cg.combinationMatrix)
+
+ cg.idsInColor = convertMat(cg.idsInColor)
+ cg.chIdsInColor = convertMat(cg.chIdsInColor)
+ if (useGPUnow) {
+ cg.iprojectSliced = GSMat(cg.iprojectSliced.asInstanceOf[SMat])
+ }
+ return cg
+ }
+
+ /**
+ * Called during the first pass over the data to set up matrices for later. These matrices are
+ * used in future uupdate calls, and they depend on the batch size, hence why we can only form
+ * these during the pass over the data, and not in init().
+ *
+ * There are several types of matrices we create:
+ *
+ * - "zero" matrices to put in zeroMap, for clearing out usertrans (must consider opts.copiesForSAME!)
+ * - "rand" matries to put in randMap, for containers to randomize values during sampling
+ * - five colorInfo(c) matrices for the purposes of sampling
+ *
+ * In the very likely case that the last batch does not have the same number of columns as the
+ * first n-1 batches, then we need to repeat this process for that batch.
+ *
+ * @param ncols The number of columns in the current data, or the batch size.
+ */
+ def establishMatrices(ncols:Int) = {
+ if (batchSize == -1) { // Only true if we're on the first mini-batch of ipass = 0.
+ batchSize = ncols
+ val onesVector = mm.ones(1, ncols)
+ val untilVector = convertMat( float(0 until ncols) )
+ for (c <- 0 until graph.ncolors) {
+ val numVars = colorInfo(c).numNodes * opts.copiesForSAME // SAME!
+ val randOffsets = int(untilVector * numVars)
+ zeroMap += ((ncols,numVars) -> mm.zeros(ncols,numVars))
+ randMap += ((numVars,ncols) -> mm.zeros(numVars,ncols))
+ colorInfo(c).keysMatrix = (colorInfo(c).keysSAME).t * onesVector // keys -> keysSAME
+ colorInfo(c).bkeysOffsets = int(untilVector * colorInfo(c).keysSAME.ncols) // keys -> keysSAME
+ colorInfo(c).bkeysMatrix = int(colorInfo(c).bkeysSAME.t * onesVector) + colorInfo(c).bkeysOffsets // bkeys -> bkeysSAME
+ colorInfo(c).randMatrixIndices = int((colorInfo(c).scaledKeysSAME).t * onesVector) + randOffsets // scaledKeys -> scaledKeysSAME
+ colorInfo(c).sampleIDindices = int((colorInfo(c).ikeysSAME).t * onesVector) + colorInfo(c).bkeysOffsets // ikeys -> ikeysSAME
+ }
+ }
+ else if (ncols != batchSize) { // On the last batch of ipass = 0 w/different # of columns
+ val onesVectorLast = mm.ones(1, ncols)
+ val untilVectorLast = convertMat( float(0 until ncols) )
+ for (c <- 0 until graph.ncolors) {
+ val numVars = colorInfo(c).numNodes * opts.copiesForSAME // SAME!
+ val randOffsets = int(untilVectorLast * numVars)
+ zeroMap += ((ncols,numVars) -> mm.zeros(ncols,numVars))
+ randMap += ((numVars,ncols) -> mm.zeros(numVars,ncols))
+ colorInfo(c).keysMatrixLast = (colorInfo(c).keysSAME).t * onesVectorLast
+ colorInfo(c).bkeysOffsetsLast = int(untilVectorLast * colorInfo(c).keysSAME.ncols)
+ colorInfo(c).bkeysMatrixLast = int(colorInfo(c).bkeysSAME.t * onesVectorLast) + colorInfo(c).bkeysOffsetsLast
+ colorInfo(c).randMatrixIndicesLast = int((colorInfo(c).scaledKeysSAME).t * onesVectorLast) + randOffsets
+ colorInfo(c).sampleIDindicesLast = int((colorInfo(c).ikeysSAME).t * onesVectorLast) + colorInfo(c).bkeysOffsetsLast
+ }
+ }
+ }
+
+ /**
+ * Creates a matrix P such that, if our cpt is a ROW vector of COUNTS, then we NORMALIZE it by:
+ *
+ * cpt <-- (cpt / (cpt * P *^ P))
+ *
+ * If we use a column vector, it has to be "cpt <-- (cpt / (cpt.t * P *^ P).t)." Previously, we
+ * had a single matrix, but P *^ P will work better as it saves more space.
+ *
+ * P is structured so that columns represent a single distribution, and rows indicate the CPT
+ * components contributing to the distribution's normalizing constant. P *^ P will result in a
+ * matrix that has blocks of "1"s across the diagonal, with sizes varying due to the cardinality
+ * of variables. The cpt gets multiplied to sum up the components to get the normalizing
+ * constants (we normalize via the component-wise vector division). Finally, P is independent of
+ * the SAME parameter as it is only based on CPT length.
+ *
+ * @param cptLength The number of components in the CPT.
+ */
+ def getNormConstMatrix(cptLength : Int) : Mat = {
+ var numDistributions = 0
+ var jj = izeros(1,1)
+
+ for (k <- 0 until graph.n) {
+ var offset = cptOffset(k).dv.toInt
+ val numStates = statesPerNode(k).dv.toInt
+ val parentIndices = find(SMat(graph.dag)(?,k))
+
+ // Split based on no parents (one distribution) or >0 parents (>=2 distributions)
+ if (parentIndices.length == 0) {
+ jj = jj on ( iones(numStates,1) * numDistributions )
+ numDistributions += 1
+ } else {
+ val totalParentSlots = prod(IMat(statesPerNode)(parentIndices)).dv.toInt
+ for (i <- 0 until totalParentSlots) {
+ jj = jj on ( iones(numStates,1) * numDistributions )
+ numDistributions += 1
+ }
+ }
+ }
+
+ // Form our matrix using the standard 'sparse' method and return depending on GPU usage.
+ val P = sparse( (0 until cptLength) , jj(1 until jj.length) , ones(jj.length-1, 1) , cptLength, numDistributions)
+ if (useGPUnow) {
+ return GSMat(P)
+ } else {
+ return P
+ }
+ }
+
+ /**
+ * Given a matrix as input, we form a diagonal, blocked version of it. So if a is a (sparse) mat, it is
+ * like calling kron(mkdiag(ones(1,n)), full(a)), except I think this will be a lot more flexible later.
+ * Places where we use this: user.t * iproject, usertrans * colorInfo(c).iprojectSliced, etc.
+ *
+ * @input a A sparse matrix. It does not have to be square!
+ */
+ def createBlockedDiagonal(a:Mat) : Mat = {
+ val (ii,jj,vv) = find3(SMat(a))
+ val vvv = iones(opts.copiesForSAME,1) kron vv
+ var iii = izeros(1,1)
+ var jjj = izeros(1,1)
+ for (k <- 0 until opts.copiesForSAME) {
+ iii = iii on (ii + k*a.nrows)
+ jjj = jjj on (jj + k*a.ncols)
+ }
+ val res = sparse(iii(1 until iii.length), jjj(1 until jjj.length), vvv, a.nrows*opts.copiesForSAME, a.ncols*opts.copiesForSAME)
+ if (useGPUnow) return GSMat(res) else return res
+ }
+
+ // ---------------------------------------------
+ // The remaining methods are for debugging only.
+ // ---------------------------------------------
+
+ /** A debugging method to print matrices, without being constrained by the command line's cropping. */
+ def printMatrix(mat: Mat) = {
+ for(i <- 0 until mat.nrows) {
+ for (j <- 0 until mat.ncols) {
+ print(mat(IMat(i),IMat(j)) + " ")
+ }
+ println()
+ }
+ }
+
+ /**
+ * A debugging method to compute the norm of difference between normalized real/estimated cpts.
+ * Note: this *does* assume our mm is already normalized!
+ * Obviously we'll have to replace the real cpt with what we already have...
+ */
+ def computeNormDifference(ipass:Int, here:Long) = {
+ val real = .7 on .3 on .6 on .4 on .95 on .05 on .2 on .8 on
+ .3 on .4 on .3 on .05 on .25 on .7 on .9 on .08 on .02 on .5 on .3 on .2 on .1 on .9 on .4 on .6 on .99 on .01
+ val differenceNorm = norm(real - mm)
+ println("Currently on ipass = " + ipass + " with here = " + here + "; l-2 norm of (realCpt - mm) is: " + differenceNorm)
+ }
+
+ /** KL divergence. We assume our mm is normalized. */
+ def computeKL(ipass:Int, here:Long, comparisonCPT:Mat) {
+
+ // EDIT: let's just make a copy of the cpt here
+ val cptCopy = mm + 0
+ cptCopy <-- (cptCopy / (cptCopy.t * normMat *^ normMat).t)
+
+ var klDivergence = convertMat(float(0))
+ var numDistributions = 0
+
+ for (k <- 0 until graph.n) {
+ var offset = cptOffset(k).dv.toInt
+ val numStates = statesPerNode(k).dv.toInt
+ val parentIndices = find(SMat(graph.dag)(?,k))
+
+ // Then split based on no parents (one distribution) or some parents (two or more distributions)
+ if (parentIndices.length == 0) {
+ var thisKL = convertMat(float(0))
+ for (j <- 0 until numStates) {
+ thisKL = thisKL + (comparisonCPT(offset+j) * ln( comparisonCPT(offset+j) / cptCopy(offset+j) ))
+ }
+ klDivergence = klDivergence + thisKL
+ numDistributions += 1
+ } else {
+ val totalParentSlots = prod(IMat(statesPerNode)(parentIndices)).dv.toInt
+ numDistributions += totalParentSlots
+ for (i <- 0 until totalParentSlots) {
+ var thisKL = convertMat(float(0))
+ for (j <- 0 until numStates) {
+ thisKL = thisKL + ( comparisonCPT(offset+j) * ln( comparisonCPT(offset+j) / cptCopy(offset+j) ))
+ }
+ klDivergence = klDivergence + thisKL
+ offset += numStates
+ }
+ }
+ }
+
+ klDivergence = klDivergence / numDistributions
+ println(klDivergence + " " + ipass + " KLDiv")
+ }
+
+ /** A one-liner that we can insert in a place with ipass and here to debug the cpt. */
+ def debugCpt(ipass:Int, here:Long) {
+ println("\n\nCurrently on ipass = " + ipass + " with here = " + here + ". This is the CPT:")
+ for (k <- 0 until graph.n) {
+ showCpt(k)
+ }
+ println()
+ }
+
+ /** A debugging method to print out the CPT of one variable (prettily). */
+ def showCpt(nodeID: Int) {
+ println("\nCPT for node indexed at " + nodeID)
+ val startingOffset = cptOffset(nodeID)
+ val numStates = statesPerNode(nodeID).dv.toInt
+ val normalizedCPT = ( mm / (mm.t * normMat *^ normMat).t )
+ val parentIndices = find(SMat(graph.dag)(?,nodeID))
+ println("Parents: " + parentIndices.t)
+
+ if (parentIndices.length == 0) {
+ var str = "\t"
+ for (j <- 0 until numStates) {
+ str += " %.4f".format(normalizedCPT(startingOffset + j).dv)
+ }
+ println(str)
+ } else {
+ val totalParentSlots = prod(IMat(statesPerNode)(parentIndices)).dv.toInt
+ val parentStates = statesPerNode(parentIndices)
+ val statesList = izeros(1,parentIndices.length)
+ var currentOffset = startingOffset
+ for (i <- 0 until totalParentSlots) {
+ if (i > 0) updateStatesString(statesList, parentStates, parentIndices.length-1)
+ var str = ""
+ for (i <- 0 until statesList.length) {
+ str += statesList(i).dv.toInt + " "
+ }
+ str += "\t"
+ for (j <- 0 until numStates) {
+ str += " %.4f".format(normalizedCPT(currentOffset + j).dv)
+ }
+ println(str)
+ currentOffset += numStates
+ }
+ }
+ }
+
+ /** Recursive, helper method for updating the states list. */
+ def updateStatesString(statesList:Mat, parentStates:Mat, j:Int) {
+ if (statesList(j).dv.toInt < parentStates(j).dv.toInt-1) {
+ statesList(j) += 1
+ } else {
+ statesList(j) = 0
+ updateStatesString(statesList, parentStates, j-1)
+ }
+ }
+
+}
+
+
+/**
+ * For the input to the BayesNet, see the documentation at the top of this program. It's similar,
+ * except we need to have the data set up. We can set options such as the SAME parameter here.
+ */
+object BayesNet {
+
+ trait Opts extends Model.Opts {
+ var copiesForSAME = 1
+ var initSmoothFactor = 1
+ }
+
+ class Options extends Opts {}
+
+ /**
+ * A learner with a matrix data source, with states per node, and with a dag prepared. Call this
+ * using some form of: val (nn,opts) = BayesNet.learner(states , dag , true , data).
+ */
+ def learner(statesPerNode:Mat, dag:Mat, isFactorModel:Boolean, data:Mat) = {
+
+ class xopts extends Learner.Options with BayesNet.Opts with MatSource.Opts with IncNorm.Opts
+ val opts = new xopts
+ opts.dim = dag.nrows
+ opts.batchSize = math.min(100000, data.ncols/50 + 1)
+ opts.useGPU = true
+ opts.npasses = 10
+ opts.isprob = false // Our CPT should NOT be normalized across their (one) column.
+ opts.putBack = 1 // Because this stores samples across ipasses, as required by Gibbs sampling
+ opts.power = 0.0f // So that the sampled CPT parameters are exactly what we use next iteration
+ val secondMatrix = data.zeros(opts.copiesForSAME*data.nrows,data.ncols)
+
+ val nn = new Learner(
+ new MatSource(Array(data:Mat, secondMatrix), opts),
+ new BayesNet(SMat(dag), statesPerNode, isFactorModel, opts),
+ null,
+ new IncNorm(opts),
+ null,
+ opts)
+ (nn, opts)
+ }
+}
+
+/**
+ * Graph structure for factor graph. Since it's factor graph, we don't need to moralize. We can
+ * color it directly. This code overrides the moralize, iproject, and pproject definitions, but when
+ * we color, we use the Graph's method as it only relies on the moralized graph (matrix).
+ *
+ * @param factorSet, a 2-d mat (i,j), which contains the componenet index (row) for each factor i.
+ * @param statesPerNode, 1-d mat, contains the cardinality of each variable
+ * @param n the number of vertices in the graph
+ */
+class FactorGraph(val factorSet: Mat, override val n: Int, override val statesPerNode: Mat) extends Graph(factorSet, n, statesPerNode) {
+
+ nFactor = factorSet.ncols // revised by Haoyu, this is the column of the pproject, for Bayes net, nFactor == n
+
+ /**
+ * Build the dag from the input variables, i.e. re-construct the graph structure matrix.
+ * If there is a self-edge (caused by factor only contains one vertex), we ignore this
+ * self-edge for mrf.
+ */
+ override def moralize = {
+ mrf = izeros(n, n)
+ for (i <- 0 until factorSet.ncols) {
+ val factors = find(SMat(factorSet(?, i)))
+ if (factors.length > 1) {
+ // we ignore the self-edge here
+ for (orign <- factors.data) {
+ for (des <- factors.data) {
+ if (orign != des) {
+ mrf(orign, des) = 1
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Function to construct the iproject. It has the shape: num of factors * n.
+ * (x1, x2,..., xn) * iproject.t -> local index for corresponding probability value in cpt.
+ */
+ override def iproject : SMat = {
+ var res = zeros(nFactor, n)
+ for (i <- 0 until nFactor) {
+ val parents = find(SMat(factorSet(?, i)))
+ var cumRes = 1f
+ val parentsLen = parents.length
+ for (j <- 0 until parentsLen) {
+ if (j > 0) {
+ cumRes = cumRes * IMat(statesPerNode)(parents(parentsLen - j))
+ }
+ res(i, parents(parentsLen - j - 1)) = cumRes
+ }
+ }
+ return sparse(res)
+ }
+
+ /**
+ * Function to derive pproject matrix. The pproject represent the responding relationship
+ * between vertice id and factor. Its each column represents one factor. The row is the
+ * binary indicator whether we have this vertex in the factor group.
+ **/
+ override def pproject : SMat = {
+ return SMat(factorSet)
+ }
+
+}
+
+
+/**
+ * A graph structure for Bayesian Networks. Includes features for:
+ *
+ * (1) moralizing graphs, 'moral' matrix must be (i,j) = 1 means node i is connected to node j
+ * (2) coloring moralized graphs, not sure why there is a maxColor here, though...
+ *
+ * @param dag An adjacency matrix with a 1 at (i,j) if node i has an edge TOWARDS node j.
+ * @param n The number of vertices in the graph.
+ * @param statesPerNode A column vector where elements denote number of states for corresponding variables.
+ */
+class Graph(val dag: Mat, val n: Int, val statesPerNode: Mat) {
+
+ var mrf: Mat = null
+ var colors: Mat = null
+ var ncolors = 0
+ val maxColor = 100
+ var nFactor = n // revised by Haoyu, this is the column of the pproject, for Bayes net, nFactor == n
+
+ /**
+ * Connects the parents of a certain node, a single step in the process of moralizing the graph.
+ *
+ * Iterates through the parent indices and insert 1s in the 'moral' matrix to indicate an edge.
+ *
+ * @param moral A matrix that represents an adjacency matrix "in progress" in the sense that it
+ * is continually getting updated each iteration from the "moralize" method.
+ * @param parents An array representing the parent indices of the node of interest.
+ */
+ def connectParents(moral: FMat, parents: IMat) = {
+ val l = parents.length
+ for (i <- 0 until l) {
+ for (j <- 0 until l) {
+ if (parents(i) != parents(j)) {
+ moral(parents(i), parents(j)) = 1f
+ }
+ }
+ }
+ moral
+ }
+
+ /** Forms the pproject matrix (dag + identity) used for computing model parameters. */
+ def pproject : SMat = {
+ return SMat(dag) + sparse(IMat(0 until n), IMat(0 until n), ones(1, n))
+ }
+
+ /**
+ * Forms the iproject matrix, which is left-multiplied to send a Pr(X_i | parents) query to its
+ * appropriate spot in the cpt via LOCAL offsets for X_i.
+ */
+ def iproject : SMat = {
+ var res = (pproject.copy).t
+ for (i <- 0 until n) {
+ val parents = find(SMat(pproject(?, i)))
+ var cumRes = 1f
+ val parentsLen = parents.length
+ for (j <- 1 until parentsLen) {
+ cumRes = cumRes * IMat(statesPerNode)(parents(parentsLen - j))
+ res.asInstanceOf[SMat](i, parents(parentsLen - j - 1)) = cumRes
+ }
+ }
+ return SMat(res)
+ }
+
+ /**
+ * Moralize the graph.
+ *
+ * This means we convert the graph from directed to undirected and connect parents of nodes in
+ * the directed graph. First, copy the dag to the moral graph because all 1s in the dag matrix
+ * are 1s in the moral matrix (these are adjacency matrices). For each node, find its parents,
+ * connect them, and update the matrix. Then make it symmetric because the graph is undirected.
+ */
+ def moralize = {
+ var moral = full(dag)
+ for (i <- 0 until n) {
+ var parents = find(SMat(dag(?, i)))
+ moral = connectParents(FMat(moral), parents)
+ }
+ mrf = ((moral + moral.t) > 0)
+ }
+
+ /**
+ * Sequentially colors the moralized graph of the dag so that one can run parallel Gibbs sampling.
+ *
+ * Steps: first, moralize the graph. Then iterate through each node, find its neighbors, and apply a
+ * "color mask" to ensure current node doesn't have any of those colors. Then find the legal color
+ * with least count (a useful heuristic). If that's not possible, then increase "ncolor".
+ */
+ def color = {
+ moralize
+ var colorCount = izeros(maxColor, 1)
+ colors = -1 * iones(n, 1)
+ ncolors = 0
+
+ // Access nodes sequentially. Find the color map of its neighbors, then find the legal color w/least count
+ val seq = IMat(0 until n)
+ // Can also access nodes randomly
+ // val r = rand(n, 1); val (v, seq) = sort2(r)
+ for (i <- 0 until n) {
+ var node = seq(i)
+ var nbs = find(FMat(mrf(?, node)))
+ var colorMap = iones(ncolors, 1)
+ for (j <- 0 until nbs.length) {
+ if (colors(nbs(j)).dv.toInt > -1) {
+ colorMap(colors(nbs(j))) = 0
+ }
+ }
+ var c = -1
+ var minc = 999999
+ for (k <- 0 until ncolors) {
+ if ((colorMap(k) > 0) && (colorCount(k) < minc)) {
+ c = k
+ minc = colorCount(k)
+ }
+ }
+ if (c == -1) {
+ c = ncolors
+ ncolors = ncolors + 1
+ }
+ colors(node) = c
+ colorCount(c) += 1
+ }
+ colors
+ }
+}
+
+
+/**
+ * This will store a lot of pre-computed variables (mostly matrices) for each color group.
+ *
+ * A high-level description of the categories:
+ *
+ * - numNodes and numNodesCh are the number of nodes, and the number of nodes and children
+ * in this color group, respectively.
+ * - idsInColor and chIdsInColor are indices of the variables in this color group, and in
+ * this color group plus children of those nodes, respectively.
+ * - replicationMatrix is a sparse matrix of rows of ones, used to replicate columns
+ * - strideVector is a vector where groups are (0 until k)*stride(x) where k is determined
+ * by the node or its parent, and stride(x) is 1 if the node is in the color group.
+ * - combinationMatrix is a sparse identity matrix that combines parents with children for
+ * probability computations
+ * - keys, scaledKeys, ikeys, and bkeys help us with multinomial sampling
+ * - The remaining ten (!) matrices rely on knowledge of the batch size. They are expanded
+ * versions of the previous matrices that use the batch size to increase their elements.
+ * - Oh! Don't forge that we have SAME versions of these!
+ */
+class ColorGroup {
+ var numNodes:Int = -1
+ var numNodesCh:Int = -1
+ var idsInColor:Mat = null
+ var idsInColorSAME:Mat = null
+ var chIdsInColor:Mat = null
+ var globalOffsetVector:Mat = null
+ var globalOffsetVectorSAME:Mat = null
+ var iprojectSliced:Mat = null
+ var iprojectSlicedSAME:Mat = null
+ var startingIndices:Mat = null
+ var replicationMatrix:Mat = null
+ var replicationMatrixSAME:Mat = null
+ var strideVector:Mat = null
+ var strideVectorSAME:Mat = null
+ var combinationMatrix:Mat = null
+ var combinationMatrixSAME:Mat = null
+
+ var keys:Mat = null
+ var scaledKeys:Mat = null
+ var ikeys:Mat = null
+ var bkeys:Mat = null
+ var keysMatrix:Mat = null
+ var keysMatrixLast:Mat = null
+ var bkeysMatrix:Mat = null
+ var bkeysMatrixLast:Mat = null
+ var bkeysOffsets:Mat = null
+ var bkeysOffsetsLast:Mat = null
+ var sampleIDindices:Mat = null
+ var sampleIDindicesLast:Mat = null
+ var randMatrixIndices:Mat = null
+ var randMatrixIndicesLast:Mat = null
+
+ var keysSAME:Mat = null
+ var bkeysSAME:Mat = null
+ var scaledKeysSAME:Mat = null
+ var ikeysSAME:Mat = null
+}
diff --git a/src/main/scala/BIDMach/models/Click.scala b/src/main/scala/BIDMach/models/Click.scala
index 03479c94..71e43f4f 100755
--- a/src/main/scala/BIDMach/models/Click.scala
+++ b/src/main/scala/BIDMach/models/Click.scala
@@ -42,7 +42,7 @@ import BIDMach._
* nn.train // train the model
* nn.modelmat // get the final model
* nn.datamat // get the other factor
- * }}}
+ * }}}
*/
class Click(override val opts:Click.Opts = new Click.Options) extends FactorModel(opts) {
@@ -52,12 +52,12 @@ class Click(override val opts:Click.Opts = new Click.Options) extends FactorMode
/** Sets up the modelmats and updatemats arrays and initializes modelmats(0) randomly unless stated otherwise. */
override def init() = {
- super.init();
- mm = modelmats(0);
+ super.init()
+ mm = modelmats(0)
if (refresh) {
- setmodelmats(Array(mm, mm.ones(mm.nrows, 1)));
+ setmodelmats(Array(mm, mm.ones(mm.nrows, 1)))
}
- updatemats = new Array[Mat](2);
+ updatemats = new Array[Mat](2)
updatemats(0) = mm.zeros(mm.nrows, mm.ncols); // The actual model matrix
updatemats(1) = mm.zeros(mm.nrows, 1);
}
@@ -75,13 +75,13 @@ class Click(override val opts:Click.Opts = new Click.Options) extends FactorMode
* @param ipass Index of the pass over the data (0 = first pass, 1 = second pass, etc.).
*/
def uupdate(views:Mat, clicks:Mat, user:Mat, ipass:Int, pos:Long):Unit = {
- if (putBack < 0 || ipass == 0) user.set(1f);
+ if (putBack < 0 || ipass == 0) user.set(1f)
for (i <- 0 until opts.uiter) {
- val preds = DDS(mm, user, views);
+ val preds = DDS(mm, user, views);
// if (ipass == 0 && pos <= 10000) println("preds "+preds.contents(0->20))
val dc = clicks.contents - opts.clickOffset; // Subtract one assuming click counts incremented to avoid sparse matrix misalignment
- val dv = views.contents;
- val pc = preds.contents;
+ val dv = views.contents
+ val pc = preds.contents
pc ~ pc ∘ dv; // scale the click prediction by the number of views
max(opts.weps, pc, pc)
pc ~ dc / pc
@@ -89,7 +89,7 @@ class Click(override val opts:Click.Opts = new Click.Options) extends FactorMode
if (opts.exppsi) exppsi(unew, unew)
user <-- unew
// if (ipass == 0 && pos <= 10000) println("user "+ user(0->20))
- }
+ }
}
/**
@@ -99,21 +99,21 @@ class Click(override val opts:Click.Opts = new Click.Options) extends FactorMode
* typically much smaller than the total number of documents, so sdata is usually a portion of the full input.
* @param user An (opts.dim x opts.batchSize) matrix that stores some intermediate/temporary data and gets left-
* multiplied by modelmats(0) to form sdata.
- * @param ipass Index of the pass over the data (0 = first pass, 1 = second pass, etc.).
+ * @param ipass Index of the pass over the data (0 = first pass, 1 = second pass, etc.).
*/
def mupdate(views:Mat, clicks:Mat, user:Mat, ipass:Int, pos:Long):Unit = {
- val preds = DDS(mm, user, views);
- val dc = clicks.contents -opts.clickOffset;
- val dv = views.contents;
- val pc = preds.contents;
+ val preds = DDS(mm, user, views)
+ val dc = clicks.contents -opts.clickOffset
+ val dv = views.contents
+ val pc = preds.contents
pc ~ pc ∘ dv;
- max(opts.weps, pc, pc);
+ max(opts.weps, pc, pc)
pc ~ dc / pc
val ud = user *^ preds
ud ~ ud ∘ mm
ud ~ ud + opts.beta
- updatemats(0) <-- ud
- sum(ud, 2, updatemats(1))
+ updatemats(0) <-- ud
+ sum(ud, 2, updatemats(1))
}
/**
@@ -126,33 +126,33 @@ class Click(override val opts:Click.Opts = new Click.Options) extends FactorMode
* @param ipass Index of the pass over the data (0 = first pass, 1 = second pass, etc.).
*/
override def evalfun(views:Mat, clicks:Mat, user:Mat, ipass:Int, pos:Long):FMat = {
- if (ogmats != null) ogmats(0) = user;
- val preds = DDS(mm, user, views);
- val dc = clicks.contents - opts.clickOffset;
+ if (ogmats != null) ogmats(0) = user
+ val preds = DDS(mm, user, views)
+ val dc = clicks.contents - opts.clickOffset
val dv = views.contents;
- val pc = preds.contents;
- pc ~ pc ∘ dv;
- max(opts.weps, pc, pc);
- val spc = sum(pc);
- ln(pc, pc);
- val vv = ((dc ∙ pc) - sum(gammaln(dc + 1)) - spc).dv / dc.length;
- row(vv)
+ val pc = preds.contents
+ pc ~ pc ∘ dv
+ max(opts.weps, pc, pc)
+ val spc = sum(pc)
+ ln(pc, pc)
+ val vv = ((dc ∙ pc) - sum(gammaln(dc + 1)) - spc).dv / dc.length
+ row(vv)
}
override def dobatch(gmats:Array[Mat], ipass:Int, i:Long) = {
- val views = gmats(0);
- val clicks = gmats(1);
+ val views = gmats(0)
+ val clicks = gmats(1)
val user = if (gmats.length > 2) gmats(2) else FactorModel.reuseuser(gmats(0), opts.dim, opts.initUval)
uupdate(views, clicks, user, ipass, i)
mupdate(views, clicks, user, ipass, i)
}
override def evalbatch(mats:Array[Mat], ipass:Int, here:Long):FMat = {
- val views = gmats(0);
- val clicks = gmats(1);
- val user = if (gmats.length > 2) gmats(2) else FactorModel.reuseuser(gmats(0), opts.dim, opts.initUval);
- uupdate(views, clicks, user, ipass, here);
- evalfun(views, clicks, user, ipass, here);
+ val views = gmats(0)
+ val clicks = gmats(1)
+ val user = if (gmats.length > 2) gmats(2) else FactorModel.reuseuser(gmats(0), opts.dim, opts.initUval)
+ uupdate(views, clicks, user, ipass, here)
+ evalfun(views, clicks, user, ipass, here)
}
def uupdate(data:Mat, user:Mat, ipass:Int, pos:Long) = {}
@@ -175,12 +175,12 @@ object Click {
/** Creates a new Click model. */
def mkClickmodel(fopts:Model.Opts) = {
- new Click(fopts.asInstanceOf[Click.Opts])
+ new Click(fopts.asInstanceOf[Click.Opts])
}
/** Creates a new IncNorm updater. */
def mkUpdater(nopts:Updater.Opts) = {
- new IncNorm(nopts.asInstanceOf[IncNorm.Opts])
+ new IncNorm(nopts.asInstanceOf[IncNorm.Opts])
}
/** Online Variational Bayes Click algorithm with a two matrix datasource. */
@@ -189,13 +189,13 @@ object Click {
val opts = new xopts
opts.dim = 1
opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
- val nn = new Learner(
- new MatSource(Array(mat0, mat1), opts),
- new Click(opts),
- null,
- new IncNorm(opts),
- null,
- opts)
+ val nn = new Learner(
+ new MatSource(Array(mat0, mat1), opts),
+ new Click(opts),
+ null,
+ new IncNorm(opts),
+ null,
+ opts)
(nn, opts)
}
@@ -208,16 +208,16 @@ object Click {
val opts = new FsOpts
opts.dim = d
opts.fnames = fnames
- opts.batchSize = 100000;
- opts.eltsPerSample = 500;
+ opts.batchSize = 100000
+ opts.eltsPerSample = 500
implicit val threads = threadPool(4)
- val nn = new Learner(
- new SFileSource(opts),
- new Click(opts),
- null,
- new IncNorm(opts),
- null,
- opts)
+ val nn = new Learner(
+ new SFileSource(opts),
+ new Click(opts),
+ null,
+ new IncNorm(opts),
+ null,
+ opts)
(nn, opts)
}
@@ -237,14 +237,14 @@ object Click {
(nn, opts)
}
- class PredOptions extends Learner.Options with Click.Opts with MatSource.Opts with MatSink.Opts;
+ class PredOptions extends Learner.Options with Click.Opts with MatSource.Opts with MatSink.Opts
// This function constructs a predictor from an existing model
def predictor(model:Model, mat0:Mat, mat1:Mat):(Learner, PredOptions) = {
- val nopts = new PredOptions;
+ val nopts = new PredOptions
nopts.batchSize = math.min(10000, mat1.ncols/30 + 1)
- nopts.dim = model.opts.dim;
- val newmod = new Click(nopts);
+ nopts.dim = model.opts.dim
+ val newmod = new Click(nopts)
newmod.refresh = false
model.copyTo(newmod)
val nn = new Learner(
@@ -259,44 +259,44 @@ object Click {
/** Parallel online Click algorithm with a matrix datasource. */
def learnPar(mat0:Mat, mat1:Mat) = {
- class xopts extends ParLearner.Options with Click.Opts with MatSource.Opts with IncNorm.Opts;
- val opts = new xopts;
- opts.dim = 1;
- opts.batchSize = math.min(100000, mat0.ncols/30/opts.nthreads + 1);
+ class xopts extends ParLearner.Options with Click.Opts with MatSource.Opts with IncNorm.Opts
+ val opts = new xopts
+ opts.dim = 1
+ opts.batchSize = math.min(100000, mat0.ncols/30/opts.nthreads + 1)
opts.coolit = 0 // Assume we dont need cooling on a matrix input
- val nn = new ParLearnerF(
- new MatSource(Array(mat0:Mat), opts),
- opts, mkClickmodel _,
- null, null,
- opts, mkUpdater _,
- null, null,
- opts)
+ val nn = new ParLearnerF(
+ new MatSource(Array(mat0:Mat), opts),
+ opts, mkClickmodel _,
+ null, null,
+ opts, mkUpdater _,
+ null, null,
+ opts)
(nn, opts)
}
class SFDSopts extends ParLearner.Options with Click.Opts with SFileSource.Opts with IncNorm.Opts
- def learnPar(fnames:String, d:Int):(ParLearnerF, SFDSopts) = learnPar(List(FileSource.simpleEnum(fnames, 1, 0)), d);
+ def learnPar(fnames:String, d:Int):(ParLearnerF, SFDSopts) = learnPar(List(FileSource.simpleEnum(fnames, 1, 0)), d)
/** Parallel online Click algorithm with one file datasource. */
def learnPar(fnames:List[(Int) => String], d:Int):(ParLearnerF, SFDSopts) = {
- val opts = new SFDSopts;
- opts.dim = d;
- opts.npasses = 4;
- opts.fnames = fnames;
- opts.batchSize = 100000;
- opts.eltsPerSample = 500;
- opts.resFile = "../results.mat"
- implicit val threads = threadPool(4)
- val nn = new ParLearnerF(
- new SFileSource(opts),
- opts, mkClickmodel _,
- null, null,
- opts, mkUpdater _,
- null, null,
- opts
- )
- (nn, opts)
+ val opts = new SFDSopts
+ opts.dim = d
+ opts.npasses = 4
+ opts.fnames = fnames
+ opts.batchSize = 100000
+ opts.eltsPerSample = 500
+ opts.resFile = "../results.mat"
+ implicit val threads = threadPool(4)
+ val nn = new ParLearnerF(
+ new SFileSource(opts),
+ opts, mkClickmodel _,
+ null, null,
+ opts, mkUpdater _,
+ null, null,
+ opts
+ )
+ (nn, opts)
}
}
diff --git a/src/main/scala/BIDMach/models/Clustering.scala b/src/main/scala/BIDMach/models/Clustering.scala
index 54e53c0d..4ef41870 100755
--- a/src/main/scala/BIDMach/models/Clustering.scala
+++ b/src/main/scala/BIDMach/models/Clustering.scala
@@ -8,7 +8,7 @@ import BIDMach.updaters._
import BIDMach._
/**
- * An abstract class with shared code for Clustering Models
+ * An abstract class with shared code for Clustering Models
*/
abstract class ClusteringModel(override val opts:ClusteringModel.Opts) extends Model {
var lastpos = 0L
@@ -19,13 +19,13 @@ abstract class ClusteringModel(override val opts:ClusteringModel.Opts) extends M
val data0 = mats(0)
val m = data0.nrows
if (refresh) {
- val mmi = rand(opts.dim, m);
- setmodelmats(Array(mmi));
+ val mmi = rand(opts.dim, m);
+ setmodelmats(Array(mmi))
}
modelmats(0) = convertMat(modelmats(0))
updatemats = new Array[Mat](1)
updatemats(0) = modelmats(0).zeros(modelmats(0).nrows, modelmats(0).ncols)
- lastpos = 0;
+ lastpos = 0
}
def mupdate(data:Mat, ipass:Int):Unit
@@ -35,40 +35,40 @@ abstract class ClusteringModel(override val opts:ClusteringModel.Opts) extends M
def evalfun(data:Mat, targ:Mat):FMat = {col(0)}
def dobatch(gmats:Array[Mat], ipass:Int, here:Long) = {
- val mm = modelmats(0);
- val gm = gmats(0);
+ val mm = modelmats(0)
+ val gm = gmats(0)
if (ipass == 0) {
if (here.toInt == gm.ncols) {
println("First pass random centroid initialization")
}
- val gg = full(gm).t;
+ val gg = full(gm).t
val lastp = lastpos.toInt
if (lastp < mm.nrows - 1) {
- val step = math.min(gg.nrows, mm.nrows - lastp);
- mm(lastp->(lastp+step),?) = gg(0->step, ?);
+ val step = math.min(gg.nrows, mm.nrows - lastp)
+ mm(lastp->(lastp+step),?) = gg(0->step, ?)
// full(gm).t.rowslice(0, math.min(gm.ncols, mm.nrows - lastp), mm, lastp)
} else {
- val rp1 = randperm(gm.ncols);
- val rp2 = randperm(mm.nrows);
- val pp = ((here - lastpos) * mm.nrows / here).toInt;
+ val rp1 = randperm(gm.ncols)
+ val rp2 = randperm(mm.nrows)
+ val pp = ((here - lastpos) * mm.nrows / here).toInt
// println("here %d lastpos %d pp %d" format (here, lastpos,pp))
if (pp > 0) {
mm(rp2(0->pp), ?) = gg(rp1(0->pp), ?);
}
}
- lastpos = here;
+ lastpos = here
} else {
mupdate(gmats(0), ipass)
}
}
def evalbatch(mats:Array[Mat], ipass:Int, here:Long):FMat = {
- lastpos = here;
- if (mats.length == 1) {
- evalfun(gmats(0));
- } else {
- evalfun(gmats(0), gmats(1));
- }
+ lastpos = here
+ if (mats.length == 1) {
+ evalfun(gmats(0))
+ } else {
+ evalfun(gmats(0), gmats(1))
+ }
}
}
diff --git a/src/main/scala/BIDMach/models/FM.scala b/src/main/scala/BIDMach/models/FM.scala
index e6eb0e63..9ff5d7f8 100755
--- a/src/main/scala/BIDMach/models/FM.scala
+++ b/src/main/scala/BIDMach/models/FM.scala
@@ -59,13 +59,13 @@ import BIDMach._
* // typically set options, then do mm.train; nn.predict with results in pc.
* val (mm, opts) = learner(ds) // Build a learner for a general datasource ds (e.g. a files data source).
* }}}
- *
+ *
*/
class FM(override val opts:FM.Opts = new FM.Options) extends RegressionModel(opts) {
- var mylinks:Mat = null;
- var iweight:Mat = null;
+ var mylinks:Mat = null
+ var iweight:Mat = null
val linkArray = GLM.linkArray
@@ -82,39 +82,39 @@ class FM(override val opts:FM.Opts = new FM.Options) extends RegressionModel(opt
var llim:Mat = null
override def copyTo(mod:Model) = {
- super.copyTo(mod);
- val rmod = mod.asInstanceOf[FM];
- rmod.mylinks = mylinks;
+ super.copyTo(mod)
+ val rmod = mod.asInstanceOf[FM]
+ rmod.mylinks = mylinks
rmod.iweight = iweight;
- rmod.mv = mv;
- rmod.mm1 = mm1;
- if (opts.dim2 > 0) rmod.mm2 = mm2;
- rmod.uv = uv;
- rmod.um1 = um1;
- if (opts.dim2 > 0) rmod.um2 = um2;
+ rmod.mv = mv
+ rmod.mm1 = mm1
+ if (opts.dim2 > 0) rmod.mm2 = mm2
+ rmod.uv = uv
+ rmod.um1 = um1
+ if (opts.dim2 > 0) rmod.um2 = um2
}
override def init() = {
super.init()
mylinks = if (useGPU) GIMat(opts.links) else opts.links
- iweight = if (opts.iweight.asInstanceOf[AnyRef] != null) convertMat(opts.iweight) else null;
- ulim = convertMat(row(opts.lim));
- llim = convertMat(row(-opts.lim));
+ iweight = if (opts.iweight.asInstanceOf[AnyRef] != null) convertMat(opts.iweight) else null
+ ulim = convertMat(row(opts.lim))
+ llim = convertMat(row(-opts.lim))
if (refresh) {
- mv = modelmats(0);
- mm1 = convertMat(normrnd(0, opts.initscale/math.sqrt(opts.dim1).toFloat, opts.dim1, mv.ncols));
- if (opts.dim2 > 0) mm2 = convertMat(normrnd(0, opts.initscale/math.sqrt(opts.dim2).toFloat, opts.dim2, mv.ncols));
- if (opts.dim2 > 0) setmodelmats(Array(mv, mm1, mm2)) else setmodelmats(Array(mv, mm1))
- if (mask.asInstanceOf[AnyRef] != null) {
- mv ~ mv ∘ mask;
- mm1 ~ mm1 ∘ mask;
- if (opts.dim2 > 0) mm2 ~ mm2 ∘ mask;
- }
+ mv = modelmats(0)
+ mm1 = convertMat(normrnd(0, opts.initscale/math.sqrt(opts.dim1).toFloat, opts.dim1, mv.ncols))
+ if (opts.dim2 > 0) mm2 = convertMat(normrnd(0, opts.initscale/math.sqrt(opts.dim2).toFloat, opts.dim2, mv.ncols))
+ if (opts.dim2 > 0) setmodelmats(Array(mv, mm1, mm2)) else setmodelmats(Array(mv, mm1))
+ if (mask.asInstanceOf[AnyRef] != null) {
+ mv ~ mv ∘ mask
+ mm1 ~ mm1 ∘ mask
+ if (opts.dim2 > 0) mm2 ~ mm2 ∘ mask
+ }
}
- (0 until modelmats.length).map((i) => modelmats(i) = convertMat(modelmats(i)));
- mv = modelmats(0);
- mm1 = modelmats(1);
- if (opts.dim2 > 0) mm2 = modelmats(2);
+ (0 until modelmats.length).map((i) => modelmats(i) = convertMat(modelmats(i)))
+ mv = modelmats(0)
+ mm1 = modelmats(1)
+ if (opts.dim2 > 0) mm2 = modelmats(2)
uv = updatemats(0)
um1 = uv.zeros(opts.dim1, uv.ncols)
if (opts.dim2 > 0) um2 = uv.zeros(opts.dim2, uv.ncols)
@@ -133,30 +133,30 @@ class FM(override val opts:FM.Opts = new FM.Options) extends RegressionModel(opt
mupdate3(in, alltargs, dweights)
}
- def mupdate2(in:Mat, targ:Mat, ipass:Int, pos:Long) = mupdate3(in, targ, null);
+ def mupdate2(in:Mat, targ:Mat, ipass:Int, pos:Long) = mupdate3(in, targ, null)
// Update the positive/negative factorizations
def mupdate3(in:Mat, targ:Mat, dweights:Mat) = {
- val ftarg = full(targ);
+ val ftarg = full(targ)
val vt1 = mm1 * in
var vt2:Mat = null
val eta = mv * in + (vt1 ∙ vt1)
if (opts.dim2 > 0) {
- vt2 = mm2 * in;
- eta ~ eta - (vt2 ∙ vt2);
+ vt2 = mm2 * in
+ eta ~ eta - (vt2 ∙ vt2)
}
if (opts.strictFM) { // Strictly follow the FM formula (remove diag terms) vs. let linear predictor cancel them.
xs = in.copy
(xs.contents ~ xs.contents) ∘ xs.contents // xs is the element-wise square of in.
if (opts.dim2 > 0) {
- eta ~ eta - (((mm1 ∘ mm1) - (mm2 ∘ mm2)) * xs)
+ eta ~ eta - (((mm1 ∘ mm1) - (mm2 ∘ mm2)) * xs)
} else {
eta ~ eta - ((mm1 ∘ mm1) * xs)
}
}
if (opts.lim > 0) {
- max(eta, llim, eta);
- min(eta, ulim, eta);
+ max(eta, llim, eta)
+ min(eta, ulim, eta)
}
GLM.preds(eta, eta, mylinks, totflops)
GLM.derivs(eta, ftarg, eta, mylinks, totflops)
@@ -166,21 +166,21 @@ class FM(override val opts:FM.Opts = new FM.Options) extends RegressionModel(opt
if (opts.dim2 > 0) um2 ~ ((eta * -2f) ∘ vt2) *^ in
if (opts.strictFM) {
val xeta = (eta * 2f) *^ xs
- um1 ~ um1 - (mm1 ∘ xeta);
- if (opts.dim2 > 0) um2 ~ um2 + (mm2 ∘ xeta);
+ um1 ~ um1 - (mm1 ∘ xeta)
+ if (opts.dim2 > 0) um2 ~ um2 + (mm2 ∘ xeta)
}
if (mask.asInstanceOf[AnyRef] != null) {
- uv ~ uv ∘ mask;
- um1 ~ um1 ∘ mask;
- if (opts.dim2 > 0) um2 ~ um2 ∘ mask;
+ uv ~ uv ∘ mask
+ um1 ~ um1 ∘ mask
+ if (opts.dim2 > 0) um2 ~ um2 ∘ mask
}
}
// Update a simple factorization A*B for the second order terms.
def mupdate4(in:Mat, targ:Mat, dweights:Mat) = {
- val ftarg = full(targ);
- val vt1 = mm1 * in;
- val vt2 = mm2 * in;
+ val ftarg = full(targ)
+ val vt1 = mm1 * in
+ val vt2 = mm2 * in
val eta = mv * in + (vt1 ∙ vt2)
GLM.preds(eta, eta, mylinks, totflops)
GLM.derivs(eta, ftarg, eta, mylinks, totflops)
@@ -189,9 +189,9 @@ class FM(override val opts:FM.Opts = new FM.Options) extends RegressionModel(opt
um1 ~ (eta ∘ vt2) *^ in
um2 ~ (eta ∘ vt1) *^ in
if (mask.asInstanceOf[AnyRef] != null) {
- uv ~ uv ∘ mask;
- um1 ~ um1 ∘ mask;
- um2 ~ um2 ∘ mask;
+ uv ~ uv ∘ mask
+ um1 ~ um1 ∘ mask
+ um2 ~ um2 ∘ mask
}
}
@@ -209,49 +209,49 @@ class FM(override val opts:FM.Opts = new FM.Options) extends RegressionModel(opt
def meval3(in:Mat, targ:Mat, dweights:Mat):FMat = {
val ftarg = full(targ)
- val vt1 = mm1 * in;
- var vt2:Mat = null;
+ val vt1 = mm1 * in
+ var vt2:Mat = null
if (opts.dim2 > 0) {
- vt2 = mm2 * in;
+ vt2 = mm2 * in
}
- val eta = mv * in + (vt1 dot vt1);
+ val eta = mv * in + (vt1 dot vt1)
if (opts.dim2 > 0) {
- eta ~ eta - (vt2 dot vt2);
+ eta ~ eta - (vt2 dot vt2)
}
if (opts.strictFM) {
- in.contents ~ in.contents ∘ in.contents;
- eta ~ eta - ((mm1 ∘ mm1) * in);
- if (opts.dim2 > 0) eta ~ eta + ((mm2 ∘ mm2) * in);
+ in.contents ~ in.contents ∘ in.contents
+ eta ~ eta - ((mm1 ∘ mm1) * in)
+ if (opts.dim2 > 0) eta ~ eta + ((mm2 ∘ mm2) * in)
}
if (opts.lim > 0) {
- max(eta, llim, eta);
- min(eta, ulim, eta);
+ max(eta, llim, eta)
+ min(eta, ulim, eta)
}
- GLM.preds(eta, eta, mylinks, totflops);
- if (ogmats != null) ogmats(0) = eta;
- val v = GLM.llfun(eta, ftarg, mylinks, totflops);
+ GLM.preds(eta, eta, mylinks, totflops)
+ if (ogmats != null) ogmats(0) = eta
+ val v = GLM.llfun(eta, ftarg, mylinks, totflops)
if (dweights.asInstanceOf[AnyRef] != null) {
- FMat(sum(v ∘ dweights, 2) / sum(dweights));
+ FMat(sum(v ∘ dweights, 2) / sum(dweights))
} else {
- FMat(mean(v, 2));
+ FMat(mean(v, 2))
}
}
// evaluate a simple A*B factorization of the interactions.
def meval4(in:Mat, targ:Mat, dweights:Mat):FMat = {
- val ftarg = full(targ);
- val vt1 = mm1 * in;
- val vt2 = mm2 * in;
- val eta = mv * in + (vt1 dot vt2);
- GLM.preds(eta, eta, mylinks, totflops);
- if (ogmats != null) ogmats(0) = eta;
- val v = GLM.llfun(eta, ftarg, mylinks, totflops);
- if (ogmats != null) {ogmats(0) = eta};
+ val ftarg = full(targ)
+ val vt1 = mm1 * in
+ val vt2 = mm2 * in
+ val eta = mv * in + (vt1 dot vt2)
+ GLM.preds(eta, eta, mylinks, totflops)
+ if (ogmats != null) ogmats(0) = eta
+ val v = GLM.llfun(eta, ftarg, mylinks, totflops)
+ if (ogmats != null) {ogmats(0) = eta}
if (dweights.asInstanceOf[AnyRef] != null) {
- FMat(sum(v ∘ dweights, 2) / sum(dweights));
+ FMat(sum(v ∘ dweights, 2) / sum(dweights))
} else {
- FMat(mean(v, 2));
+ FMat(mean(v, 2))
}
}
@@ -259,7 +259,7 @@ class FM(override val opts:FM.Opts = new FM.Options) extends RegressionModel(opt
object FM {
trait Opts extends GLM.Opts {
- var strictFM = false;
+ var strictFM = false
var dim1 = 32
var dim2 = 32
var initscale = 0.1f
@@ -268,11 +268,11 @@ object FM {
class Options extends Opts {}
def mkFMModel(fopts:Model.Opts) = {
- new FM(fopts.asInstanceOf[FM.Opts])
+ new FM(fopts.asInstanceOf[FM.Opts])
}
def mkUpdater(nopts:Updater.Opts) = {
- new ADAGrad(nopts.asInstanceOf[ADAGrad.Opts])
+ new ADAGrad(nopts.asInstanceOf[ADAGrad.Opts])
}
def mkRegularizer(nopts:Mixin.Opts):Array[Mixin] = {
@@ -284,13 +284,13 @@ object FM {
def learner(mat0:Mat, d:Int = 0) = {
val opts = new LearnOptions
opts.batchSize = math.min(10000, mat0.ncols/30 + 1)
- val nn = new Learner(
- new MatSource(Array(mat0:Mat), opts),
- new FM(opts),
- mkRegularizer(opts),
- new ADAGrad(opts),
- null,
- opts)
+ val nn = new Learner(
+ new MatSource(Array(mat0:Mat), opts),
+ new FM(opts),
+ mkRegularizer(opts),
+ new ADAGrad(opts),
+ null,
+ opts)
(nn, opts)
}
@@ -317,16 +317,16 @@ object FM {
// This function constructs a predictor from an existing model
def predictor(model:Model, mat1:Mat):(Learner, PredOptions) = {
- val mod = model.asInstanceOf[FM];
- val mopts = mod.opts;
- val nopts = new PredOptions;
+ val mod = model.asInstanceOf[FM]
+ val mopts = mod.opts
+ val nopts = new PredOptions
nopts.batchSize = math.min(10000, mat1.ncols/30 + 1)
- nopts.links = mopts.links.copy;
- nopts.putBack = 1;
- nopts.dim1 = mopts.dim1;
- nopts.dim2 = mopts.dim2;
- nopts.strictFM = mopts.strictFM;
- val newmod = new FM(nopts);
+ nopts.links = mopts.links.copy
+ nopts.putBack = 1
+ nopts.dim1 = mopts.dim1
+ nopts.dim2 = mopts.dim2
+ nopts.strictFM = mopts.strictFM
+ val newmod = new FM(nopts)
newmod.refresh = false
model.copyTo(newmod)
val nn = new Learner(
@@ -344,7 +344,7 @@ object FM {
// A learner that uses a general data source (e.g. a files data source).
// The datasource options (like batchSize) need to be set externally.
def learner(ds:DataSource):(Learner, FMOptions) = {
- val mopts = new FMOptions;
+ val mopts = new FMOptions
mopts.lrate = row(0.01f, 0.001f, 0.001f)
mopts.autoReset = false
val model = new FM(mopts)
@@ -362,10 +362,10 @@ object FM {
// A learner that uses a files data source specified by a list of strings.
def learner(fnames:List[String]):(Learner, FGOptions) = {
- val mopts = new FGOptions;
- mopts.lrate = 1f;
- val model = new FM(mopts);
- mopts.fnames = fnames.map((a:String) => FileSource.simpleEnum(a,1,0));
+ val mopts = new FGOptions
+ mopts.lrate = 1f
+ val model = new FM(mopts)
+ mopts.fnames = fnames.map((a:String) => FileSource.simpleEnum(a,1,0))
val ds = new FileSource(mopts);
val mm = new Learner(
ds,
@@ -397,13 +397,13 @@ object FM {
val opts = new LearnParOptions
opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
opts.links.set(d)
- val nn = new ParLearnerF(
- new MatSource(Array(mat0), opts),
- opts, mkFMModel _,
- opts, mkRegularizer _,
- opts, mkUpdater _,
- null, null,
- opts)
+ val nn = new ParLearnerF(
+ new MatSource(Array(mat0), opts),
+ opts, mkFMModel _,
+ opts, mkRegularizer _,
+ opts, mkUpdater _,
+ null, null,
+ opts)
(nn, opts)
}
@@ -430,37 +430,37 @@ object FM {
def learnFParx(
nstart:Int=FileSource.encodeDate(2012,3,1,0),
- nend:Int=FileSource.encodeDate(2012,12,1,0),
- d:Int = 0
- ) = {
- val opts = new LearnFParOptions
- val nn = new ParLearnerxF(
- null,
- (dopts:DataSource.Opts, i:Int) => Experiments.Twitter.twitterWords(nstart, nend, opts.nthreads, i),
- opts, mkFMModel _,
+ nend:Int=FileSource.encodeDate(2012,12,1,0),
+ d:Int = 0
+ ) = {
+ val opts = new LearnFParOptions
+ val nn = new ParLearnerxF(
+ null,
+ (dopts:DataSource.Opts, i:Int) => Experiments.Twitter.twitterWords(nstart, nend, opts.nthreads, i),
+ opts, mkFMModel _,
opts, mkRegularizer _,
- opts, mkUpdater _,
- null, null,
- opts
- )
- (nn, opts)
+ opts, mkUpdater _,
+ null, null,
+ opts
+ )
+ (nn, opts)
}
def learnFPar(
nstart:Int=FileSource.encodeDate(2012,3,1,0),
- nend:Int=FileSource.encodeDate(2012,12,1,0),
- d:Int = 0
- ) = {
- val opts = new LearnFParOptions
- val nn = new ParLearnerF(
- Experiments.Twitter.twitterWords(nstart, nend),
- opts, mkFMModel _,
+ nend:Int=FileSource.encodeDate(2012,12,1,0),
+ d:Int = 0
+ ) = {
+ val opts = new LearnFParOptions
+ val nn = new ParLearnerF(
+ Experiments.Twitter.twitterWords(nstart, nend),
+ opts, mkFMModel _,
opts, mkRegularizer _,
- opts, mkUpdater _,
- null, null,
- opts
- )
- (nn, opts)
+ opts, mkUpdater _,
+ null, null,
+ opts
+ )
+ (nn, opts)
}
}
diff --git a/src/main/scala/BIDMach/models/FactorModel.scala b/src/main/scala/BIDMach/models/FactorModel.scala
index 0235f1fe..151963fc 100755
--- a/src/main/scala/BIDMach/models/FactorModel.scala
+++ b/src/main/scala/BIDMach/models/FactorModel.scala
@@ -6,7 +6,7 @@ import BIDMat.SciFunctions._
import BIDMach.datasources._
/**
- * An Abstract class with shared code for Factor Models
+ * An Abstract class with shared code for Factor Models
*/
abstract class FactorModel(override val opts:FactorModel.Opts) extends Model(opts) {
@@ -19,14 +19,14 @@ abstract class FactorModel(override val opts:FactorModel.Opts) extends Model(opt
println("corpus perplexity=%f" format math.exp(- (sp ddot ln(sp))) )
if (refresh) {
- val modelmat = rand(d,m);
- modelmat ~ modelmat *@ sdat;
- val msum = sum(modelmat, 2);
- modelmat ~ modelmat / msum;
- setmodelmats(Array[Mat](1));
- modelmats(0) = modelmat;
+ val modelmat = rand(d,m)
+ modelmat ~ modelmat *@ sdat
+ val msum = sum(modelmat, 2)
+ modelmat ~ modelmat / msum
+ setmodelmats(Array[Mat](1))
+ modelmats(0) = modelmat
}
- modelmats(0) = convertMat(modelmats(0));
+ modelmats(0) = convertMat(modelmats(0))
if (datasource.opts.putBack > 0) {
while (datasource.hasNext) {
@@ -58,12 +58,12 @@ abstract class FactorModel(override val opts:FactorModel.Opts) extends Model(opt
def evalbatch(mats:Array[Mat], ipass:Int, here:Long):FMat = {
val sdata = gmats(0)
- val user = if (datasource.opts.putBack > 0) gmats(datasource.opts.putBack) else FactorModel.reuseuser(gmats(0), opts.dim, opts.initUval);
- uupdate(sdata, user, ipass, here);
+ val user = if (datasource.opts.putBack > 0) gmats(datasource.opts.putBack) else FactorModel.reuseuser(gmats(0), opts.dim, opts.initUval)
+ uupdate(sdata, user, ipass, here)
if (gmats.length > 2) {
- evalfun(sdata, user, gmats(2), ipass, here);
+ evalfun(sdata, user, gmats(2), ipass, here)
} else {
- evalfun(sdata, user, ipass, here);
+ evalfun(sdata, user, ipass, here)
}
}
}
diff --git a/src/main/scala/BIDMach/models/GLM.scala b/src/main/scala/BIDMach/models/GLM.scala
index 1671a2a4..cd89a954 100755
--- a/src/main/scala/BIDMach/models/GLM.scala
+++ b/src/main/scala/BIDMach/models/GLM.scala
@@ -47,37 +47,37 @@ import BIDMach._
* // returns a training learner mm, with options mopts. Also returns a prediction model nn with its own options.
* // typically set options, then do mm.train; nn.predict with results in pc.
* val (mm, opts) = learner(ds) // Build a learner for a general datasource ds (e.g. a files data source).
- * }}}
+ * }}}
*/
class GLM(opts:GLM.Opts) extends RegressionModel(opts) {
val linkArray = GLM.linkArray
- var mylinks:Mat = null;
- var iweight:Mat = null;
- var ulim:Mat = null;
- var llim:Mat = null;
- var totflops = 0L;
- var hashFeatures = 0;
+ var mylinks:Mat = null
+ var iweight:Mat = null
+ var ulim:Mat = null
+ var llim:Mat = null
+ var totflops = 0L
+ var hashFeatures = 0
// For integrated ADAGrad updater
- var vexp:Mat = null;
- var texp:Mat = null;
- var lrate:Mat = null;
- var sumsq:Mat = null;
- var firststep = -1f;
- var waitsteps = 0;
- var epsilon = 0f;
+ var vexp:Mat = null
+ var texp:Mat = null
+ var lrate:Mat = null
+ var sumsq:Mat = null
+ var firststep = -1f
+ var waitsteps = 0
+ var epsilon = 0f
override def copyTo(mod:Model) = {
- super.copyTo(mod);
- val rmod = mod.asInstanceOf[GLM];
- rmod.mylinks = mylinks;
+ super.copyTo(mod)
+ val rmod = mod.asInstanceOf[GLM]
+ rmod.mylinks = mylinks
rmod.iweight = iweight;
}
override def init() = {
- useGPU = opts.useGPU && Mat.hasCUDA > 0
+ useGPU = opts.useGPU && Mat.hasCUDA > 0
val data0 = mats(0)
val m = if (opts.hashFeatures > 0) opts.hashFeatures else size(data0, 1)
val targetData = mats.length > 1
@@ -88,54 +88,54 @@ class GLM(opts:GLM.Opts) extends RegressionModel(opts) {
} else if (mats.length > 1) {
mats(1).nrows
} else {
- modelmats(0).nrows;
+ modelmats(0).nrows
}
val sdat = (sum(data0,2).t + 0.5f).asInstanceOf[FMat]
sp = sdat / sum(sdat)
println("corpus perplexity=%f" format (math.exp(-(sp ddot ln(sp)))))
if (refresh) {
- val mm = zeros(d,m);
+ val mm = zeros(d,m)
setmodelmats(Array(mm))
}
- modelmats(0) = convertMat(modelmats(0));
- updatemats = Array(modelmats(0).zeros(modelmats(0).nrows, modelmats(0).ncols));
+ modelmats(0) = convertMat(modelmats(0))
+ updatemats = Array(modelmats(0).zeros(modelmats(0).nrows, modelmats(0).ncols))
targmap = if (opts.targmap.asInstanceOf[AnyRef] != null) convertMat(opts.targmap) else opts.targmap
if (! targetData) {
targets = if (opts.targets.asInstanceOf[AnyRef] != null) convertMat(opts.targets) else opts.targets
mask = if (opts.rmask.asInstanceOf[AnyRef] != null) convertMat(opts.rmask) else opts.rmask
}
- mylinks = if (useGPU) GIMat(opts.links) else opts.links;
- iweight = opts.iweight;
- if (iweight.asInstanceOf[AnyRef] != null && useGPU) iweight = convertMat(iweight);
- if (mask.asInstanceOf[AnyRef] != null) modelmats(0) ~ modelmats(0) ∘ mask;
- totflops = 0L;
+ mylinks = if (useGPU) GIMat(opts.links) else opts.links
+ iweight = opts.iweight
+ if (iweight.asInstanceOf[AnyRef] != null && useGPU) iweight = convertMat(iweight)
+ if (mask.asInstanceOf[AnyRef] != null) modelmats(0) ~ modelmats(0) ∘ mask
+ totflops = 0L
for (i <- 0 until opts.links.length) {
- totflops += linkArray(opts.links(i)).fnflops;
+ totflops += linkArray(opts.links(i)).fnflops
}
ulim = convertMat(opts.lim)
- llim = - ulim;
- hashFeatures = opts.hashFeatures;
+ llim = - ulim
+ hashFeatures = opts.hashFeatures
if (opts.aopts != null) {
- initADAGrad(d, m);
+ initADAGrad(d, m)
} else {
- vexp = null;
- texp = null;
- lrate = null;
- sumsq = null;
+ vexp = null
+ texp = null
+ lrate = null
+ sumsq = null
}
}
def initADAGrad(d:Int, m:Int) = {
- val aopts = opts.asInstanceOf[ADAGrad.Opts];
- firststep = -1f;
- lrate = convertMat(aopts.lrate);
- texp = convertMat(aopts.texp);
- vexp = convertMat(aopts.vexp);
- sumsq = convertMat(zeros(d, m));
- sumsq.set(aopts.initsumsq);
- waitsteps = aopts.waitsteps;
- epsilon = aopts.epsilon;
+ val aopts = opts.asInstanceOf[ADAGrad.Opts]
+ firststep = -1f
+ lrate = convertMat(aopts.lrate)
+ texp = convertMat(aopts.texp)
+ vexp = convertMat(aopts.vexp)
+ sumsq = convertMat(zeros(d, m))
+ sumsq.set(aopts.initsumsq)
+ waitsteps = aopts.waitsteps
+ epsilon = aopts.epsilon
}
def mupdate(in:Mat, ipass:Int, pos:Long) = {
@@ -148,62 +148,62 @@ class GLM(opts:GLM.Opts) extends RegressionModel(opts) {
def mupdate2(in:Mat, targ:Mat, ipass:Int, pos:Long) = mupdate3(in, targ, null, ipass, pos)
def mupdate3(in:Mat, targ:Mat, dweights:Mat, ipass:Int, pos:Long) = {
- val ftarg = full(targ);
- val targs = if (targmap.asInstanceOf[AnyRef] != null) targmap * ftarg else ftarg;
+ val ftarg = full(targ)
+ val targs = if (targmap.asInstanceOf[AnyRef] != null) targmap * ftarg else ftarg
val eta = if (hashFeatures > 0) GLM.hashMult(modelmats(0), in, opts.hashBound1, opts.hashBound2) else modelmats(0) * in
if (opts.lim > 0) {
- max(eta, llim, eta);
- min(eta, ulim, eta);
+ max(eta, llim, eta)
+ min(eta, ulim, eta)
}
- GLM.preds(eta, eta, mylinks, totflops);
- GLM.derivs(eta, targs, eta, mylinks, totflops);
- if (dweights.asInstanceOf[AnyRef] != null) eta ~ eta ∘ dweights;
+ GLM.preds(eta, eta, mylinks, totflops)
+ GLM.derivs(eta, targs, eta, mylinks, totflops)
+ if (dweights.asInstanceOf[AnyRef] != null) eta ~ eta ∘ dweights
if (opts.aopts != null) {
- if (firststep <= 0) firststep = pos.toFloat;
- val step = (pos + firststep)/firststep;
+ if (firststep <= 0) firststep = pos.toFloat
+ val step = (pos + firststep)/firststep
if (hashFeatures == 0) {
- ADAGrad.multUpdate(eta, in, modelmats(0), sumsq, mask, lrate, vexp, texp, epsilon, step, waitsteps);
+ ADAGrad.multUpdate(eta, in, modelmats(0), sumsq, mask, lrate, vexp, texp, epsilon, step, waitsteps)
} else {
ADAGrad.hashmultUpdate(eta, in, hashFeatures, opts.hashBound1, opts.hashBound2, 1,
- modelmats(0), sumsq, mask, lrate, vexp, texp, epsilon, step, waitsteps);
+ modelmats(0), sumsq, mask, lrate, vexp, texp, epsilon, step, waitsteps)
}
} else {
- if (hashFeatures > 0) {
- updatemats(0) <-- GLM.hashMultT(eta, in, modelmats(0).ncols, opts.hashBound1, opts.hashBound2);
- } else {
- updatemats(0) ~ eta *^ in;
- }
- if (mask.asInstanceOf[AnyRef] != null) {
- updatemats(0) ~ updatemats(0) ∘ mask
- }
+ if (hashFeatures > 0) {
+ updatemats(0) <-- GLM.hashMultT(eta, in, modelmats(0).ncols, opts.hashBound1, opts.hashBound2)
+ } else {
+ updatemats(0) ~ eta *^ in
+ }
+ if (mask.asInstanceOf[AnyRef] != null) {
+ updatemats(0) ~ updatemats(0) ∘ mask
+ }
}
}
def meval(in:Mat):FMat = {
val targs = if (targets.asInstanceOf[AnyRef] != null) {val targs0 = targets * in; min(targs0, 1f, targs0); targs0} else null
- val dweights = if (iweight.asInstanceOf[AnyRef] != null) iweight * in else null;
- meval3(in, targs, dweights);
+ val dweights = if (iweight.asInstanceOf[AnyRef] != null) iweight * in else null
+ meval3(in, targs, dweights)
}
def meval2(in:Mat, targ:Mat):FMat = meval3(in, targ, null)
def meval3(in:Mat, targ:Mat, dweights:Mat):FMat = {
- val ftarg = if (targ.asInstanceOf[AnyRef] != null) full(targ) else null;
- val targs = if (targmap.asInstanceOf[AnyRef] != null && ftarg.asInstanceOf[AnyRef] != null) targmap * ftarg else ftarg;
- val eta = if (hashFeatures > 0) GLM.hashMult(modelmats(0), in, opts.hashBound1, opts.hashBound2) else modelmats(0) * in;
- GLM.preds(eta, eta, mylinks, totflops);
+ val ftarg = if (targ.asInstanceOf[AnyRef] != null) full(targ) else null
+ val targs = if (targmap.asInstanceOf[AnyRef] != null && ftarg.asInstanceOf[AnyRef] != null) targmap * ftarg else ftarg
+ val eta = if (hashFeatures > 0) GLM.hashMult(modelmats(0), in, opts.hashBound1, opts.hashBound2) else modelmats(0) * in
+ GLM.preds(eta, eta, mylinks, totflops)
if (ogmats != null) {ogmats(0) = eta;}
if (targs.asInstanceOf[AnyRef] != null) {
- val v = GLM.llfun(eta, targs, mylinks, totflops);
- if (dweights.asInstanceOf[AnyRef] != null) {
- FMat(sum(v ∘ dweights, 2) / sum(dweights))
- } else {
- if (opts.doVariance) {
- FMat(mean(v, 2)) on FMat(variance(v, 2));
- } else {
- FMat(mean(v, 2));
- }
- }
+ val v = GLM.llfun(eta, targs, mylinks, totflops)
+ if (dweights.asInstanceOf[AnyRef] != null) {
+ FMat(sum(v ∘ dweights, 2) / sum(dweights))
+ } else {
+ if (opts.doVariance) {
+ FMat(mean(v, 2)) on FMat(variance(v, 2))
+ } else {
+ FMat(mean(v, 2))
+ }
+ }
} else {
row(0)
}
@@ -214,159 +214,159 @@ class GLM(opts:GLM.Opts) extends RegressionModel(opts) {
object GLM {
trait Opts extends RegressionModel.Opts {
- var links:IMat = null;
- var iweight:FMat = null;
- var lim = 0f;
- var hashFeatures = 0;
- var hashBound1:Int = 1000000;
- var hashBound2:Int = 1000000;
- var aopts:ADAGrad.Opts = null;
+ var links:IMat = null
+ var iweight:FMat = null
+ var lim = 0f
+ var hashFeatures = 0
+ var hashBound1:Int = 1000000
+ var hashBound2:Int = 1000000
+ var aopts:ADAGrad.Opts = null
}
- val linear = 0;
- val logistic = 1;
- val maxp = 2;
- val svm = 3;
+ val linear = 0
+ val logistic = 1
+ val maxp = 2
+ val svm = 3
object LinearLink extends GLMlink {
- def link(in:Float) = {
- in
- }
+ def link(in:Float) = {
+ in
+ }
- def mean(in:Float) = {
- in
- }
+ def mean(in:Float) = {
+ in
+ }
- def derivlink(in:Float, targ:Float) = {
- targ - in;
- }
+ def derivlink(in:Float, targ:Float) = {
+ targ - in
+ }
- def likelihood(pred:Float, targ:Float) = {
- val diff = targ - pred;
- - diff * diff;
- }
+ def likelihood(pred:Float, targ:Float) = {
+ val diff = targ - pred
+ - diff * diff
+ }
- override val linkfn = link _;
+ override val linkfn = link _
- override val derivfn = derivlink _;
+ override val derivfn = derivlink _
- override val meanfn = mean _;
+ override val meanfn = mean _
- override val likelihoodfn = likelihood _;
+ override val likelihoodfn = likelihood _
- val fnflops = 2;
+ val fnflops = 2
}
object LogisticLink extends GLMlink {
- def link(in:Float) = {
- math.log(in / (1.0f - in)).toFloat;
- }
+ def link(in:Float) = {
+ math.log(in / (1.0f - in)).toFloat
+ }
- def mean(in:Float) = {
- if (in > 0) {
- val tmp = math.exp(-in);
- (1.0 / (1.0 + tmp)).toFloat;
- } else {
- val tmp = math.exp(in);
- (tmp / (1.0 + tmp)).toFloat;
- }
- }
+ def mean(in:Float) = {
+ if (in > 0) {
+ val tmp = math.exp(-in)
+ (1.0 / (1.0 + tmp)).toFloat
+ } else {
+ val tmp = math.exp(in)
+ (tmp / (1.0 + tmp)).toFloat
+ }
+ }
- def derivlink(in:Float, targ:Float) = {
- targ - in;
- }
+ def derivlink(in:Float, targ:Float) = {
+ targ - in
+ }
- def likelihood(pred:Float, targ:Float) = {
- math.log(targ * pred + (1.0f - targ) * (1.0f - pred) + 1e-20).toFloat
- }
+ def likelihood(pred:Float, targ:Float) = {
+ math.log(targ * pred + (1.0f - targ) * (1.0f - pred) + 1e-20).toFloat
+ }
- override val linkfn = link _;
+ override val linkfn = link _
- override val derivfn = derivlink _;
+ override val derivfn = derivlink _
- override val meanfn = mean _;
+ override val meanfn = mean _
- override val likelihoodfn = likelihood _;
+ override val likelihoodfn = likelihood _
- val fnflops = 20;
+ val fnflops = 20
}
object MaxpLink extends GLMlink {
- def link(in:Float) = {
- math.log(in / (1.0f - in)).toFloat;
- }
+ def link(in:Float) = {
+ math.log(in / (1.0f - in)).toFloat
+ }
- def mean(in:Float) = {
- if (in > 0) {
- val tmp = math.exp(-in);
- (1.0 / (1.0 + tmp)).toFloat;
- } else {
- val tmp = math.exp(in);
- (tmp / (1.0 + tmp)).toFloat;
- }
- }
+ def mean(in:Float) = {
+ if (in > 0) {
+ val tmp = math.exp(-in)
+ (1.0 / (1.0 + tmp)).toFloat
+ } else {
+ val tmp = math.exp(in)
+ (tmp / (1.0 + tmp)).toFloat
+ }
+ }
- def derivlink(p:Float, targ:Float) = {
- (2.0f * targ - 1.0f) * p * (1.0f - p);
- }
+ def derivlink(p:Float, targ:Float) = {
+ (2.0f * targ - 1.0f) * p * (1.0f - p)
+ }
- def likelihood(pred:Float, targ:Float) = {
- targ * pred + (1.0f - targ) * (1.0f - pred) -1.0f;
- }
+ def likelihood(pred:Float, targ:Float) = {
+ targ * pred + (1.0f - targ) * (1.0f - pred) -1.0f
+ }
- override val linkfn = link _;
+ override val linkfn = link _
- override val derivfn = derivlink _;
+ override val derivfn = derivlink _
- override val meanfn = mean _;
+ override val meanfn = mean _
- override val likelihoodfn = likelihood _;
+ override val likelihoodfn = likelihood _
- val fnflops = 20;
+ val fnflops = 20
}
object SVMLink extends GLMlink {
- def link(in:Float) = {
- in
- }
+ def link(in:Float) = {
+ in
+ }
- def mean(in:Float) = {
- in
- }
+ def mean(in:Float) = {
+ in
+ }
- def derivlink(pred:Float, targ:Float) = {
- val ttarg = 2 * targ - 1;
- if (pred * ttarg < 1f) ttarg else 0f;
- }
+ def derivlink(pred:Float, targ:Float) = {
+ val ttarg = 2 * targ - 1
+ if (pred * ttarg < 1f) ttarg else 0f
+ }
- def likelihood(pred:Float, targ:Float) = {
- val ttarg = 2 * targ - 1;
- scala.math.min(0f, ttarg * pred - 1f);
- }
+ def likelihood(pred:Float, targ:Float) = {
+ val ttarg = 2 * targ - 1
+ scala.math.min(0f, ttarg * pred - 1f)
+ }
- override val linkfn = link _;
+ override val linkfn = link _
- override val derivfn = derivlink _;
+ override val derivfn = derivlink _
- override val meanfn = mean _;
+ override val meanfn = mean _
- override val likelihoodfn = likelihood _;
+ override val likelihoodfn = likelihood _
- val fnflops = 2;
+ val fnflops = 2
}
object LinkEnum extends Enumeration {
- type LinkEnum = Value;
- val Linear, Logistic, Maxp, SVMLink = Value
+ type LinkEnum = Value
+ val Linear, Logistic, Maxp, SVMLink = Value
}
abstract class GLMlink {
- val linkfn:(Float => Float)
- val derivfn:((Float,Float) => Float)
- val meanfn:(Float => Float)
- val likelihoodfn:((Float,Float) => Float)
- val fnflops:Int
+ val linkfn:(Float => Float)
+ val derivfn:((Float,Float) => Float)
+ val meanfn:(Float => Float)
+ val likelihoodfn:((Float,Float) => Float)
+ val fnflops:Int
}
val linkArray = Array[GLMlink](LinearLink, LogisticLink, MaxpLink, SVMLink)
@@ -421,7 +421,7 @@ object GLM {
gout
}
case (geta:GDMat, gilinks:GIMat) => {
- val gout = GDMat.newOrCheckGDMat(eta.nrows, eta.ncols, null, eta.GUID, links.GUID, "GLM.preds".##)
+ val gout = GDMat.newOrCheckGDMat(eta.nrows, eta.ncols, null, eta.GUID, links.GUID, "GLM.preds".##)
Mat.nflops += totflops * geta.ncols
CUMACH.applydpreds(geta.data, gilinks.data, gout.data, geta.nrows, geta.ncols)
gout
@@ -464,18 +464,18 @@ object GLM {
def derivs(pred:Mat, targ:Mat, out:Mat, links:Mat, totflops:Long) = {
(pred, targ, out, links) match {
case (fpred:FMat, ftarg:FMat, fout:FMat, ilinks:IMat) => {
- Mat.nflops += 10L * ftarg.length;
- var i = 0;
- while (i < ftarg.ncols) {
- var j = 0;
- while (j < ftarg.nrows) {
- val fun = GLM.linkArray(ilinks(j)).derivfn;
- fout.data(j + i * out.nrows) = fun(fpred.data(j + i * ftarg.nrows), ftarg.data(j + i * ftarg.nrows));
- j += 1;
- }
- i += 1;
- }
- fout;
+ Mat.nflops += 10L * ftarg.length
+ var i = 0
+ while (i < ftarg.ncols) {
+ var j = 0
+ while (j < ftarg.nrows) {
+ val fun = GLM.linkArray(ilinks(j)).derivfn
+ fout.data(j + i * out.nrows) = fun(fpred.data(j + i * ftarg.nrows), ftarg.data(j + i * ftarg.nrows))
+ j += 1
+ }
+ i += 1
+ }
+ fout
}
case (gpred:GMat, gtarg:GMat, gout:GMat, gilinks:GIMat) => {
Mat.nflops += totflops * gpred.ncols
@@ -493,22 +493,22 @@ object GLM {
def derivs(pred:Mat, targ:Mat, links:Mat, totflops:Long) = {
(pred, targ, links) match {
case (fpred:FMat, ftarg:FMat, ilinks:IMat) => {
- val fout = FMat.newOrCheckFMat(pred.nrows, pred.ncols, null, pred.GUID, targ.GUID, links.GUID, "GLM.derivs".##)
- Mat.nflops += 10L * ftarg.length;
- var i = 0;
- while (i < ftarg.ncols) {
- var j = 0
- while (j < ftarg.nrows) {
- val fun = GLM.linkArray(ilinks(j)).derivfn;
- fout.data(j + i * fout.nrows) = fun(fpred.data(j + i * ftarg.nrows), ftarg.data(j + i * ftarg.nrows));
- j += 1;
- }
- i += 1;
- }
- fout;
+ val fout = FMat.newOrCheckFMat(pred.nrows, pred.ncols, null, pred.GUID, targ.GUID, links.GUID, "GLM.derivs".##)
+ Mat.nflops += 10L * ftarg.length
+ var i = 0
+ while (i < ftarg.ncols) {
+ var j = 0
+ while (j < ftarg.nrows) {
+ val fun = GLM.linkArray(ilinks(j)).derivfn
+ fout.data(j + i * fout.nrows) = fun(fpred.data(j + i * ftarg.nrows), ftarg.data(j + i * ftarg.nrows))
+ j += 1
+ }
+ i += 1
+ }
+ fout
}
case (gpred:GMat, gtarg:GMat, gilinks:GIMat) => {
- val gout = GMat.newOrCheckGMat(pred.nrows, pred.ncols, null, pred.GUID, targ.GUID, links.GUID, "GLM.derivs".##)
+ val gout = GMat.newOrCheckGMat(pred.nrows, pred.ncols, null, pred.GUID, targ.GUID, links.GUID, "GLM.derivs".##)
Mat.nflops += totflops * gpred.ncols
CUMACH.applyderivs(gpred.data, gtarg.data, gilinks.data, gout.data, gpred.nrows, gpred.ncols)
gout
@@ -523,235 +523,235 @@ object GLM {
}
def hashMult(a:GMat, b:GSMat, bound1:Int, bound2:Int):GMat = {
- val c = GMat.newOrCheckGMat(a.nrows, b.ncols, null, a.GUID, b.GUID, "hashMult".##);
- c.clear;
- val npercol = b.nnz / b.ncols;
- Mat.nflops += 1L * a.nrows * npercol * b.nnz;
- CUMACH.hashMult(a.nrows, a.ncols, b.ncols, bound1, bound2, a.data, b.data, b.ir, b.jc, c.data, 0);
+ val c = GMat.newOrCheckGMat(a.nrows, b.ncols, null, a.GUID, b.GUID, "hashMult".##)
+ c.clear
+ val npercol = b.nnz / b.ncols
+ Mat.nflops += 1L * a.nrows * npercol * b.nnz
+ CUMACH.hashMult(a.nrows, a.ncols, b.ncols, bound1, bound2, a.data, b.data, b.ir, b.jc, c.data, 0)
c
}
def hashMult(a:Mat, b:Mat, bound1:Int, bound2:Int):Mat = {
- (a, b) match {
- case (ga:GMat, gb:GSMat) => hashMult(ga, gb, bound1, bound2)
- }
+ (a, b) match {
+ case (ga:GMat, gb:GSMat) => hashMult(ga, gb, bound1, bound2)
+ }
}
-
+
def hashMultT(a:GMat, b:GSMat, nfeats:Int, bound1:Int, bound2:Int):GMat = {
- val c = GMat.newOrCheckGMat(a.nrows, nfeats, null, a.GUID, b.GUID, nfeats, "hashMultT".##);
- c.clear;
- val npercol = b.nnz / b.ncols;
- Mat.nflops += 1L * a.nrows * npercol * b.nnz;
- CUMACH.hashMult(a.nrows, nfeats, b.ncols, bound1, bound2, a.data, b.data, b.ir, b.jc, c.data, 1);
+ val c = GMat.newOrCheckGMat(a.nrows, nfeats, null, a.GUID, b.GUID, nfeats, "hashMultT".##)
+ c.clear
+ val npercol = b.nnz / b.ncols
+ Mat.nflops += 1L * a.nrows * npercol * b.nnz
+ CUMACH.hashMult(a.nrows, nfeats, b.ncols, bound1, bound2, a.data, b.data, b.ir, b.jc, c.data, 1)
c
}
def hashMultT(a:Mat, b:Mat, nfeats:Int, bound1:Int, bound2:Int):Mat = {
- (a, b) match {
- case (ga:GMat, gb:GSMat) => hashMultT(ga, gb, nfeats, bound1, bound2)
- }
+ (a, b) match {
+ case (ga:GMat, gb:GSMat) => hashMultT(ga, gb, nfeats, bound1, bound2)
+ }
}
-
+
def hashCross(a:GMat, b:GSMat, c:GSMat):GMat = {
- val d = GMat.newOrCheckGMat(a.nrows, b.ncols, null, a.GUID, b.GUID, "hashCross".##);
- val npercol = b.nnz / b.ncols;
- Mat.nflops += 1L * a.nrows * npercol * b.nnz;
- d.clear;
- CUMACH.hashCross(a.nrows, a.ncols, b.ncols, a.data, b.data, b.ir, b.jc, c.data, c.ir, c.jc, d.data, 0);
+ val d = GMat.newOrCheckGMat(a.nrows, b.ncols, null, a.GUID, b.GUID, "hashCross".##)
+ val npercol = b.nnz / b.ncols
+ Mat.nflops += 1L * a.nrows * npercol * b.nnz
+ d.clear
+ CUMACH.hashCross(a.nrows, a.ncols, b.ncols, a.data, b.data, b.ir, b.jc, c.data, c.ir, c.jc, d.data, 0)
d
}
def hashCross(a:Mat, b:Mat, c:Mat):Mat = {
- (a, b, c) match {
- case (ga:GMat, gb:GSMat, gc:GSMat) => hashCross(ga, gb, gc)
- }
+ (a, b, c) match {
+ case (ga:GMat, gb:GSMat, gc:GSMat) => hashCross(ga, gb, gc)
+ }
}
def hashCrossT(a:GMat, b:GSMat, c:GSMat, nfeats:Int):GMat = {
- val d = GMat.newOrCheckGMat(a.nrows, nfeats, null, a.GUID, b.GUID, "hashCrossT".##);
- val npercol = b.nnz / b.ncols;
- Mat.nflops += 1L * a.nrows * npercol * b.nnz;
- d.clear;
- CUMACH.hashCross(a.nrows, nfeats, b.ncols, a.data, b.data, b.ir, b.jc, c.data, c.ir, c.jc, d.data, 1);
+ val d = GMat.newOrCheckGMat(a.nrows, nfeats, null, a.GUID, b.GUID, "hashCrossT".##)
+ val npercol = b.nnz / b.ncols
+ Mat.nflops += 1L * a.nrows * npercol * b.nnz
+ d.clear
+ CUMACH.hashCross(a.nrows, nfeats, b.ncols, a.data, b.data, b.ir, b.jc, c.data, c.ir, c.jc, d.data, 1)
d
}
def hashCrossT(a:Mat, b:Mat, c:Mat, nfeats:Int):Mat = {
- (a, b, c) match {
- case (ga:GMat, gb:GSMat, gc:GSMat) => hashCrossT(ga, gb, gc, nfeats)
- }
+ (a, b, c) match {
+ case (ga:GMat, gb:GSMat, gc:GSMat) => hashCrossT(ga, gb, gc, nfeats)
+ }
}
def pairMult(nr:Int, nc:Int, kk:Int, a:GMat, aroff:Int, acoff:Int, b:GSMat, broff:Int, bcoff:Int, c:GMat, croff:Int, ccoff:Int):GMat = {
if (aroff < 0 || acoff < 0 || broff < 0 || bcoff < 0 || croff < 0 || ccoff < 0 || nr < 0 || nc < 0 || kk < 0) {
- throw new RuntimeException("pairMult: cant have negative offsets or dimensions");
+ throw new RuntimeException("pairMult: cant have negative offsets or dimensions")
} else if (aroff + nr > a.nrows || acoff + 2*kk > a.ncols || broff + kk > b.nrows || bcoff + nc > b.ncols || croff + nr > c.nrows || ccoff + nc > c.ncols) {
- throw new RuntimeException("pairMult: tile strays outside matrix dimensions");
+ throw new RuntimeException("pairMult: tile strays outside matrix dimensions")
} else {
- Mat.nflops += 2L * nr * b.nnz;
+ Mat.nflops += 2L * nr * b.nnz
val err = CUMACH.pairMultTile(nr, nc, kk, kk,
a.data.withByteOffset(Sizeof.FLOAT.toLong*(aroff+acoff*2*a.nrows)), a.nrows*2,
a.data.withByteOffset(Sizeof.FLOAT.toLong*(aroff+(acoff*2+1)*a.nrows)), a.nrows*2,
b.data, b.ir, b.jc, broff, bcoff,
c.data.withByteOffset(Sizeof.FLOAT.toLong*(croff+ccoff*c.nrows)), c.nrows,
- 0);
+ 0)
if (err != 0) {
throw new RuntimeException("CUMAT.pairMult error " + cudaGetErrorString(err))
}
- c;
+ c
}
}
def pairMultNT(nr:Int, nc:Int, kk:Int, a:GMat, aroff:Int, acoff:Int, b:GSMat, broff:Int, bcoff:Int, c:GMat, croff:Int, ccoff:Int):GMat = {
if (aroff < 0 || acoff < 0 || broff < 0 || bcoff < 0 || croff < 0 || ccoff < 0 || nr < 0 || nc < 0 || kk < 0) {
- throw new RuntimeException("pairMultNT: cant have negative offsets or dimensions");
+ throw new RuntimeException("pairMultNT: cant have negative offsets or dimensions")
} else if (aroff + nr > a.nrows || acoff + 2*kk > a.ncols || broff + nc > b.nrows || bcoff + kk > b.ncols || croff + nr > c.nrows || ccoff + nc > c.ncols) {
- throw new RuntimeException("pairMultNT: tile strays outside matrix dimensions");
+ throw new RuntimeException("pairMultNT: tile strays outside matrix dimensions")
} else {
- Mat.nflops += 2L * nr * b.nnz * kk / b.ncols;
+ Mat.nflops += 2L * nr * b.nnz * kk / b.ncols
val err = CUMACH.pairMultTile(nr, nc, kk, kk,
a.data.withByteOffset(Sizeof.FLOAT.toLong*(aroff+acoff*2*a.nrows)), a.nrows*2,
a.data.withByteOffset(Sizeof.FLOAT.toLong*(aroff+(acoff*2+1)*a.nrows)), a.nrows*2,
b.data, b.ir, b.jc, broff, bcoff,
c.data.withByteOffset(Sizeof.FLOAT.toLong*(croff+ccoff*c.nrows)), c.nrows,
- 1);
+ 1)
if (err != 0) {
throw new RuntimeException("CUMAT.pairMultNT error " + cudaGetErrorString(err))
}
- c;
+ c
}
}
def pairMult(nr:Int, nc:Int, kk:Int, a:Mat, aroff:Int, acoff:Int, b:Mat, broff:Int, bcoff:Int, c:Mat, croff:Int, ccoff:Int):Mat = {
(a, b, c) match {
- case (fa:GMat, sb:GSMat, fc:GMat) => pairMult(nr, nc, kk, fa, aroff, acoff, sb, broff, bcoff, fc, croff, ccoff);
- case (fa:FMat, sb:SMat, fc:FMat) => pairMult(nr, nc, kk, fa, aroff, acoff, sb, broff, bcoff, fc, croff, ccoff);
+ case (fa:GMat, sb:GSMat, fc:GMat) => pairMult(nr, nc, kk, fa, aroff, acoff, sb, broff, bcoff, fc, croff, ccoff)
+ case (fa:FMat, sb:SMat, fc:FMat) => pairMult(nr, nc, kk, fa, aroff, acoff, sb, broff, bcoff, fc, croff, ccoff)
case _ => throw new RuntimeException("pairMult couldnt match matrix types")
}
}
def pairMultNT(nr:Int, nc:Int, kk:Int, a:Mat, aroff:Int, acoff:Int, b:Mat, broff:Int, bcoff:Int, c:Mat, croff:Int, ccoff:Int):Mat = {
(a, b, c) match {
- case (fa:GMat, sb:GSMat, fc:GMat) => pairMultNT(nr, nc, kk, fa, aroff, acoff, sb, broff, bcoff, fc, croff, ccoff);
-// case (fb:GMat, fc:GMat) => pairMultNT(nr, nc, kk, aroff, acoff, fb, broff, bcoff, fc, croff, ccoff);
+ case (fa:GMat, sb:GSMat, fc:GMat) => pairMultNT(nr, nc, kk, fa, aroff, acoff, sb, broff, bcoff, fc, croff, ccoff)
+// case (fb:GMat, fc:GMat) => pairMultNT(nr, nc, kk, aroff, acoff, fb, broff, bcoff, fc, croff, ccoff)
case _ => throw new RuntimeException("pairMultT couldnt match matrix types")
}
}
@inline def pairembed(r1x:Long, r2x:Int):Long = {
- val r1 = r1x + 1;
- val r2 = r2x + 1;
- val b1 = java.lang.Float.floatToRawIntBits(r1.toFloat);
- val b2 = java.lang.Float.floatToRawIntBits(r2.toFloat);
- val nbits1 = (b1 >> 23) - 126;
- val nbits2 = (b2 >> 23) - 126;
- val len = nbits1 + nbits2 - 2;
- val b3 = java.lang.Float.floatToRawIntBits(len.toFloat);
- val lenbits = if (len > 1) ((b3 >> 23) - 127) else 0;
- val r2t = r2 & ((1 << (nbits2-1)) - 1);
- val x = (((r1 << (nbits2-1)) | r2t) << lenbits) | (nbits2-1);
- math.max(0, x-2);
+ val r1 = r1x + 1
+ val r2 = r2x + 1
+ val b1 = java.lang.Float.floatToRawIntBits(r1.toFloat)
+ val b2 = java.lang.Float.floatToRawIntBits(r2.toFloat)
+ val nbits1 = (b1 >> 23) - 126
+ val nbits2 = (b2 >> 23) - 126
+ val len = nbits1 + nbits2 - 2
+ val b3 = java.lang.Float.floatToRawIntBits(len.toFloat)
+ val lenbits = if (len > 1) ((b3 >> 23) - 127) else 0
+ val r2t = r2 & ((1 << (nbits2-1)) - 1)
+ val x = (((r1 << (nbits2-1)) | r2t) << lenbits) | (nbits2-1)
+ math.max(0, x-2)
}
@inline def solve1(j:Int):Int = {
- var v = math.sqrt(j).toFloat;
+ var v = math.sqrt(j).toFloat
v = v - (v*(v+1)-2*j)/(2*v+1); // Newton iterations to find first index.
- v = v - (v*(v+1)-2*j)/(2*v+1);
- v = v - (v*(v+1)-2*j)/(2*v+1);
- v = v - (v*(v+1)-2*j)/(2*v+1);
- v = v - (v*(v+1)-2*j)/(2*v+1);
+ v = v - (v*(v+1)-2*j)/(2*v+1)
+ v = v - (v*(v+1)-2*j)/(2*v+1)
+ v = v - (v*(v+1)-2*j)/(2*v+1)
+ v = v - (v*(v+1)-2*j)/(2*v+1)
(v+2e-5f).toInt;
}
def pairMult(nrows:Int, ncols:Int, kk:Int, A:FMat, aroff:Int, acoff:Int, B:SMat, broff:Int, bcoff:Int,
- C:FMat, croff:Int, ccoff:Int):Unit = {
- pairMult(nrows, ncols, kk, kk, A, aroff + acoff * 2 * A.nrows, A.nrows*2, A, aroff + (acoff*2+1) * A.nrows, A.nrows*2,
- B, broff, bcoff, C, croff + ccoff * C.nrows, 0);
+ C:FMat, croff:Int, ccoff:Int):Unit = {
+ pairMult(nrows, ncols, kk, kk, A, aroff + acoff * 2 * A.nrows, A.nrows*2, A, aroff + (acoff*2+1) * A.nrows, A.nrows*2,
+ B, broff, bcoff, C, croff + ccoff * C.nrows, 0)
}
def pairMultNT(nrows:Int, ncols:Int, kk:Int, A:FMat, aroff:Int, acoff:Int, B:SMat, broff:Int, bcoff:Int,
- C:FMat, croff:Int, ccoff:Int):Unit = {
- pairMult(nrows, ncols, kk, kk, A, aroff + acoff * 2 * A.nrows, A.nrows*2, A, aroff + (acoff*2+1) * A.nrows, A.nrows*2,
- B, broff, bcoff, C, croff + ccoff * C.nrows, 1);
+ C:FMat, croff:Int, ccoff:Int):Unit = {
+ pairMult(nrows, ncols, kk, kk, A, aroff + acoff * 2 * A.nrows, A.nrows*2, A, aroff + (acoff*2+1) * A.nrows, A.nrows*2,
+ B, broff, bcoff, C, croff + ccoff * C.nrows, 1)
}
def pairMult(nrows:Int, ncols:Int, bound1:Int, bound2:Int, A:FMat, aoff:Int, lda:Int, A2:FMat, a2off:Int, lda2:Int,
- B:SMat, broff:Int, bcoff:Int, C:FMat, coff:Int, transpose:Int):Unit = {
- val Bdata = B.data;
- val Bir = B.ir;
- val Bjc = B.jc;
- var doit = false;
- val ioff = Mat.ioneBased;
- val istart = 0;
- val iend = ncols;
- var AX:Array[Float] = null;
- var ldax = 0;
- var aoffx = 0;
- val ldc = C.nrows;
- var i = istart;
- while (i < iend) { // i is the column index
- val jstart = Bjc(i + bcoff)-ioff; // Range of nz rows in this column
- val jend = Bjc(i+1 + bcoff)-ioff;
- val nr = jend - jstart; // Number of nz rows
- val todo = nr * (nr + 1) / 2; // Number of pairs to process (including k,k pairs)
- var j = 0;
- while (j < todo) { // j indexes a worker for this column
- val j1 = solve1(j); // Compute the first and second indices
- val j2 = j - j1*(j1+1)/2;
- val f1 = Bdata(jstart + j1); // Get the two features
- val f2 = Bdata(jstart + j2);
- val r1 = Bir(jstart + j1) - broff-ioff; // And their row indices
- val r2 = Bir(jstart + j2) - broff-ioff;
- var rank = r1.toLong;
- var prod = f1;
- doit = (r1 >= 0 && r1 < bound1 && r2 >= 0 && r2 < bound1);
- if (j1 == j2) {
- AX = A.data;
- ldax = lda;
- aoffx = aoff;
- } else {
- rank = pairembed(r1, r2);
- doit = doit && (rank >= 0 && rank < bound2);
- if (doit) {
- prod *= f2;
- AX = A2.data;
- ldax = lda2;
- aoffx = a2off;
- }
- }
- if (doit) {
- if (transpose > 0) {
- var k = 0;
- while (k < nrows) {
- val sum = AX(aoffx + k + ldax * i) * prod; // Do the product
- C.data(coff + k + ldc * rank.toInt) += sum;
- k += 1;
- }
- } else {
- var k = 0;
- while (k < nrows) {
- val sum = AX(aoffx + k + ldax * rank.toInt) * prod; // Do the product
- C.data(coff + k + ldc * i) += sum;
- k += 1;
- }
- }
- }
- j += 1;
- }
- i += 1;
- }
+ B:SMat, broff:Int, bcoff:Int, C:FMat, coff:Int, transpose:Int):Unit = {
+ val Bdata = B.data
+ val Bir = B.ir
+ val Bjc = B.jc
+ var doit = false
+ val ioff = Mat.ioneBased
+ val istart = 0
+ val iend = ncols
+ var AX:Array[Float] = null
+ var ldax = 0
+ var aoffx = 0
+ val ldc = C.nrows
+ var i = istart
+ while (i < iend) { // i is the column index
+ val jstart = Bjc(i + bcoff)-ioff; // Range of nz rows in this column
+ val jend = Bjc(i+1 + bcoff)-ioff
+ val nr = jend - jstart; // Number of nz rows
+ val todo = nr * (nr + 1) / 2; // Number of pairs to process (including k,k pairs)
+ var j = 0
+ while (j < todo) { // j indexes a worker for this column
+ val j1 = solve1(j); // Compute the first and second indices
+ val j2 = j - j1*(j1+1)/2;
+ val f1 = Bdata(jstart + j1); // Get the two features
+ val f2 = Bdata(jstart + j2)
+ val r1 = Bir(jstart + j1) - broff-ioff; // And their row indices
+ val r2 = Bir(jstart + j2) - broff-ioff
+ var rank = r1.toLong
+ var prod = f1
+ doit = (r1 >= 0 && r1 < bound1 && r2 >= 0 && r2 < bound1)
+ if (j1 == j2) {
+ AX = A.data
+ ldax = lda
+ aoffx = aoff
+ } else {
+ rank = pairembed(r1, r2)
+ doit = doit && (rank >= 0 && rank < bound2)
+ if (doit) {
+ prod *= f2
+ AX = A2.data
+ ldax = lda2
+ aoffx = a2off
+ }
+ }
+ if (doit) {
+ if (transpose > 0) {
+ var k = 0
+ while (k < nrows) {
+ val sum = AX(aoffx + k + ldax * i) * prod; // Do the product
+ C.data(coff + k + ldc * rank.toInt) += sum
+ k += 1
+ }
+ } else {
+ var k = 0
+ while (k < nrows) {
+ val sum = AX(aoffx + k + ldax * rank.toInt) * prod; // Do the product
+ C.data(coff + k + ldc * i) += sum
+ k += 1
+ }
+ }
+ }
+ j += 1
+ }
+ i += 1
+ }
}
def mkGLMModel(fopts:Model.Opts) = {
- new GLM(fopts.asInstanceOf[GLM.Opts])
+ new GLM(fopts.asInstanceOf[GLM.Opts])
}
def mkUpdater(nopts:Updater.Opts) = {
- new ADAGrad(nopts.asInstanceOf[ADAGrad.Opts])
+ new ADAGrad(nopts.asInstanceOf[ADAGrad.Opts])
}
def mkRegularizer(nopts:Mixin.Opts):Array[Mixin] = {
@@ -764,7 +764,7 @@ object GLM {
def mkL1L2Regularizers(nopts:Mixin.Opts):Array[Mixin] = {
Array(new L1Regularizer(nopts.asInstanceOf[L1Regularizer.Opts]),
- new L2Regularizer(nopts.asInstanceOf[L2Regularizer.Opts]))
+ new L2Regularizer(nopts.asInstanceOf[L2Regularizer.Opts]))
}
class LearnOptions extends Learner.Options with GLM.Opts with MatSource.Opts with ADAGrad.Opts with L1Regularizer.Opts
@@ -775,13 +775,13 @@ object GLM {
val opts = new LearnOptions
opts.batchSize = math.min(10000, mat0.ncols/30 + 1)
opts.lrate = 1f
- val nn = new Learner(
- new MatSource(Array(mat0:Mat), opts),
- new GLM(opts),
- mkRegularizer(opts),
- new ADAGrad(opts),
- null,
- opts)
+ val nn = new Learner(
+ new MatSource(Array(mat0:Mat), opts),
+ new GLM(opts),
+ mkRegularizer(opts),
+ new ADAGrad(opts),
+ null,
+ opts)
(nn, opts)
}
@@ -793,13 +793,13 @@ object GLM {
opts.batchSize = math.min(10000, mat0.ncols/30 + 1)
opts.lrate = 1f
opts.aopts = opts
- val nn = new Learner(
- new MatSource(Array(mat0:Mat), opts),
- new GLM(opts),
- mkRegularizer(opts),
- null,
- null,
- opts)
+ val nn = new Learner(
+ new MatSource(Array(mat0:Mat), opts),
+ new GLM(opts),
+ mkRegularizer(opts),
+ null,
+ null,
+ opts)
(nn, opts)
}
@@ -807,7 +807,7 @@ object GLM {
// Basic in-memory learner with explicit target
def learner(mat0:Mat, targ:Mat, d:Int):(Learner, LearnOptions) = {
- val mopts = new LearnOptions;
+ val mopts = new LearnOptions
mopts.lrate = 1f
mopts.batchSize = math.min(10000, mat0.ncols/30 + 1)
if (mopts.links == null) mopts.links = izeros(1,targ.nrows)
@@ -826,13 +826,13 @@ object GLM {
// Basic in-memory learner with explicit target
def learnerX(mat0:Mat, targ:Mat, d:Int):(Learner, LearnOptions) = {
- val mopts = new LearnOptions;
+ val mopts = new LearnOptions
mopts.lrate = 1f
mopts.batchSize = math.min(10000, mat0.ncols/30 + 1)
if (mopts.links == null) mopts.links = izeros(1,targ.nrows)
mopts.links.set(d)
val model = new GLM(mopts)
- mopts.aopts = mopts;
+ mopts.aopts = mopts
val mm = new Learner(
new MatSource(Array(mat0, targ), mopts),
model,
@@ -849,8 +849,8 @@ object GLM {
// This function constructs a learner and a predictor.
def learner(mat0:Mat, targ:Mat, mat1:Mat, preds:Mat, d:Int):(Learner, LearnOptions, Learner, LearnOptions) = {
- val mopts = new LearnOptions;
- val nopts = new LearnOptions;
+ val mopts = new LearnOptions
+ val nopts = new LearnOptions
mopts.lrate = 1f
mopts.batchSize = math.min(10000, mat0.ncols/30 + 1)
mopts.autoReset = false
@@ -882,7 +882,7 @@ object GLM {
// A learner that uses a general data source (e.g. a files data source).
// The datasource options (like batchSize) need to be set externally.
def learner(ds:DataSource):(Learner, GOptions) = {
- val mopts = new GOptions;
+ val mopts = new GOptions
mopts.lrate = 1f
val model = new GLM(mopts)
val mm = new Learner(
@@ -896,9 +896,9 @@ object GLM {
}
def learnerX(ds:DataSource):(Learner, GOptions) = {
- val mopts = new GOptions;
+ val mopts = new GOptions
mopts.lrate = 1f
- mopts.aopts = mopts;
+ mopts.aopts = mopts
val model = new GLM(mopts)
val mm = new Learner(
ds,
@@ -906,7 +906,7 @@ object GLM {
mkRegularizer(mopts),
null,
null,
- mopts);
+ mopts)
(mm, mopts)
}
@@ -914,10 +914,10 @@ object GLM {
// A learner that uses a files data source specified by a list of strings.
def learner(fnames:List[String]):(Learner, FGOptions) = {
- val mopts = new FGOptions;
- mopts.lrate = 1f;
- val model = new GLM(mopts);
- mopts.fnames = fnames.map((a:String) => FileSource.simpleEnum(a,1,0));
+ val mopts = new FGOptions
+ mopts.lrate = 1f
+ val model = new GLM(mopts)
+ mopts.fnames = fnames.map((a:String) => FileSource.simpleEnum(a,1,0))
val ds = new FileSource(mopts);
val mm = new Learner(
ds,
@@ -931,11 +931,11 @@ object GLM {
// A learner that uses a files data source specified by a list of strings.
def learnerX(fnames:List[String]):(Learner, FGOptions) = {
- val mopts = new FGOptions;
- mopts.lrate = 1f;
- mopts.aopts = mopts;
- val model = new GLM(mopts);
- mopts.fnames = fnames.map((a:String) => FileSource.simpleEnum(a,1,0));
+ val mopts = new FGOptions
+ mopts.lrate = 1f
+ mopts.aopts = mopts
+ val model = new GLM(mopts)
+ mopts.fnames = fnames.map((a:String) => FileSource.simpleEnum(a,1,0))
val ds = new FileSource(mopts);
val mm = new Learner(
ds,
@@ -952,20 +952,20 @@ object GLM {
// This function constructs a predictor from an existing model
def predictor(model0:Model, mat1:Mat):(Learner, PredOptions) = {
val model = model0.asInstanceOf[GLM]
- val nopts = new PredOptions;
+ val nopts = new PredOptions
nopts.batchSize = math.min(10000, mat1.ncols/30 + 1)
nopts.putBack = 0
- val newmod = new GLM(nopts);
+ val newmod = new GLM(nopts)
newmod.refresh = false
- newmod.copyFrom(model);
- val mopts = model.opts.asInstanceOf[GLM.Opts];
- nopts.targmap = mopts.targmap;
- nopts.links = mopts.links;
- nopts.targets = mopts.targets;
- nopts.iweight = mopts.iweight;
- nopts.lim = mopts.lim;
- nopts.hashFeatures = mopts.hashFeatures;
- nopts.hashBound1 = mopts.hashBound1;
+ newmod.copyFrom(model)
+ val mopts = model.opts.asInstanceOf[GLM.Opts]
+ nopts.targmap = mopts.targmap
+ nopts.links = mopts.links
+ nopts.targets = mopts.targets
+ nopts.iweight = mopts.iweight
+ nopts.lim = mopts.lim
+ nopts.hashFeatures = mopts.hashFeatures
+ nopts.hashBound1 = mopts.hashBound1
nopts.hashBound2 = mopts.hashBound2;
val nn = new Learner(
new MatSource(Array(mat1), nopts),
@@ -979,7 +979,7 @@ object GLM {
// Basic in-memory SVM learner with explicit target
def SVMlearner(mat0:Mat, targ:Mat):(Learner, Learn12Options) = {
- val mopts = new Learn12Options;
+ val mopts = new Learn12Options
mopts.lrate = 1f
mopts.batchSize = math.min(10000, mat0.ncols/30 + 1)
if (mopts.links == null) mopts.links = izeros(targ.nrows,1)
@@ -998,8 +998,8 @@ object GLM {
// This function constructs a learner and a predictor.
def SVMlearner(mat0:Mat, targ:Mat, mat1:Mat, preds:Mat):(Learner, Learn12Options, Learner, Learn12Options) = {
- val mopts = new Learn12Options;
- val nopts = new Learn12Options;
+ val mopts = new Learn12Options
+ val nopts = new Learn12Options
mopts.lrate = 1f
mopts.batchSize = math.min(10000, mat0.ncols/30 + 1)
if (mopts.links == null) mopts.links = izeros(targ.nrows,1)
@@ -1028,7 +1028,7 @@ object GLM {
// This function constructs a predictor from an existing model
def SVMpredictor(model:Model, mat1:Mat, preds:Mat):(Learner, LearnOptions) = {
- val nopts = new LearnOptions;
+ val nopts = new LearnOptions
nopts.batchSize = math.min(10000, mat1.ncols/30 + 1)
if (nopts.links == null) nopts.links = izeros(preds.nrows,1)
nopts.links.set(3)
@@ -1064,13 +1064,13 @@ object GLM {
val opts = new LearnParOptions
opts.batchSize = math.min(10000, mat0.ncols/30 + 1)
opts.lrate = 1f
- val nn = new ParLearnerF(
- new MatSource(Array(mat0), opts),
- opts, mkGLMModel _,
- opts, mkRegularizer _,
- opts, mkUpdater _,
- null, null,
- opts)
+ val nn = new ParLearnerF(
+ new MatSource(Array(mat0), opts),
+ opts, mkGLMModel _,
+ opts, mkRegularizer _,
+ opts, mkUpdater _,
+ null, null,
+ opts)
(nn, opts)
}
@@ -1097,40 +1097,40 @@ object GLM {
class LearnFParOptions extends ParLearner.Options with GLM.Opts with SFileSource.Opts with ADAGrad.Opts with L1Regularizer.Opts
def learnFParx(
- nstart:Int=FileSource.encodeDate(2012,3,1,0),
- nend:Int=FileSource.encodeDate(2012,12,1,0),
- d:Int = 0
- ) = {
- val opts = new LearnFParOptions;
- opts.lrate = 1f;
- val nn = new ParLearnerxF(
- null,
- (dopts:DataSource.Opts, i:Int) => Experiments.Twitter.twitterWords(nstart, nend, opts.nthreads, i),
- opts, mkGLMModel _,
- opts, mkRegularizer _,
- opts, mkUpdater _,
- null, null,
- opts
- )
- (nn, opts)
+ nstart:Int=FileSource.encodeDate(2012,3,1,0),
+ nend:Int=FileSource.encodeDate(2012,12,1,0),
+ d:Int = 0
+ ) = {
+ val opts = new LearnFParOptions
+ opts.lrate = 1f
+ val nn = new ParLearnerxF(
+ null,
+ (dopts:DataSource.Opts, i:Int) => Experiments.Twitter.twitterWords(nstart, nend, opts.nthreads, i),
+ opts, mkGLMModel _,
+ opts, mkRegularizer _,
+ opts, mkUpdater _,
+ null, null,
+ opts
+ )
+ (nn, opts)
}
def learnFPar(
- nstart:Int=FileSource.encodeDate(2012,3,1,0),
- nend:Int=FileSource.encodeDate(2012,12,1,0),
- d:Int = 0
- ) = {
- val opts = new LearnFParOptions;
- opts.lrate = 1f;
- val nn = new ParLearnerF(
- Experiments.Twitter.twitterWords(nstart, nend),
- opts, mkGLMModel _,
- opts, mkRegularizer _,
- opts, mkUpdater _,
- null, null,
- opts
- )
- (nn, opts)
+ nstart:Int=FileSource.encodeDate(2012,3,1,0),
+ nend:Int=FileSource.encodeDate(2012,12,1,0),
+ d:Int = 0
+ ) = {
+ val opts = new LearnFParOptions
+ opts.lrate = 1f
+ val nn = new ParLearnerF(
+ Experiments.Twitter.twitterWords(nstart, nend),
+ opts, mkGLMModel _,
+ opts, mkRegularizer _,
+ opts, mkUpdater _,
+ null, null,
+ opts
+ )
+ (nn, opts)
}
}
diff --git a/src/main/scala/BIDMach/models/GaussianMixture.scala b/src/main/scala/BIDMach/models/GaussianMixture.scala
index e444546b..aa57075a 100755
--- a/src/main/scala/BIDMach/models/GaussianMixture.scala
+++ b/src/main/scala/BIDMach/models/GaussianMixture.scala
@@ -69,9 +69,9 @@ class GaussianMixture(override val opts:GaussianMixture.Opts = new GaussianMixtu
object GaussianMixture {
trait Opts extends Model.Opts {}
- class Options extends Opts {}
-
- /** A learner with a single matrix data source. */
+ class Options extends Opts {}
+
+ /** A learner with a single matrix data source. */
def learner(data:Mat) = {
class xopts extends Learner.Options with GaussianMixture.Opts with MatSource.Opts with ADAGrad.Opts
val opts = new xopts
diff --git a/src/main/scala/BIDMach/models/ICA.scala b/src/main/scala/BIDMach/models/ICA.scala
index a1cea5ea..9c00bfc9 100644
--- a/src/main/scala/BIDMach/models/ICA.scala
+++ b/src/main/scala/BIDMach/models/ICA.scala
@@ -1,312 +1,312 @@
-package BIDMach.models
-
-import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
-import BIDMat.MatFunctions._
-import BIDMat.SciFunctions._
-import BIDMat.Solvers._
-import BIDMach._
-import BIDMach.datasources._
-import BIDMach.updaters._
-import java.lang.ref._
-import jcuda.NativePointerObject
-import java.lang.Math;
-
-/**
- * Independent Component Analysis, using FastICA. It has the ability to center and whiten data. It is
- * based on the method presented in:
- *
- * A. Hyvärinen and E. Oja. Independent Component Analysis: Algorithms and Applications.
- * Neural Networks, 13(4-5):411-430, 2000.
- *
- * In particular, we provide the logcosh, exponential, and kurtosis "G" functions.
- *
- * This algorithm computes the following modelmats array:
- * - modelmats(0) stores the inverse of the mixing matrix. If X = A*S represents the data, then it's the
- * estimated A^-1^, which we assume is square and invertible for now.
- * - modelmats(1) stores the mean vector of the data, which is computed entirely on the first pass. This
- * means once we estimate A^-1^ in modelmats(0), we need to first shift the data by this amount, and
- * then multiply to recover the (centered) sources. Example:
- * {{{
- * modelmats(0) * (data - modelmats(1))
- * }}}
- * Here, data is an n x N matrix, whereas modelmats(1) is an n x 1 matrix. For efficiency reasons, we
- * assume a constant batch size for each block of data so we take the mean across all batches. This is
- * true except for (usually) the last batch, but this almost always isn't enough to make a difference.
- *
- * Thus, modelmats(1) helps to center the data. The whitening in this algorithm happens during the updates
- * to W in both the orthogonalization and the fixed point steps. The former uses the computed covariance
- * matrix and the latter relies on an approximation of W^T^*W to the inverse covariance matrix. It is fine
- * if the data is already pre-whitened before being passed to BIDMach.
- *
- * Currently, we are thinking about the following extensions:
- * - Allowing ICA to handle non-square mixing matrices. Most research about ICA assumes that A is n x n.
- * - Improving the way we handle the computation of the mean, so it doesn't rely on the last batch being
- * of similar size to all prior batches. Again, this is minor, especially for large data sets.
- * - Thinking of ways to make this scale better to a large variety of datasets
- *
- * For additional references, see Aapo Hyvärinen's other papers, and visit:
- * http://research.ics.aalto.fi/ica/fastica/
- */
-class ICA(override val opts:ICA.Opts = new ICA.Options) extends FactorModel(opts) {
-
- // Some temp variables. The most important one is mm, which is our W = A^{-1}.
- var mm:Mat = null
- var batchIteration = 0.0f
- var G_fun: Mat=>Mat = null
- var g_fun: Mat=>Mat = null
- var g_d_fun: Mat=>Mat = null
- var stdNorm:FMat = null
-
- var debug = false
-
- override def init() {
- super.init()
- if (refresh) {
- mm = modelmats(0)
- setmodelmats(Array(mm, mm.zeros(mm.nrows,1)))
- }
- updatemats = new Array[Mat](2)
- updatemats(0) = mm.zeros(mm.nrows, mm.nrows)
- updatemats(1) = mm.zeros(mm.nrows,1) // Keep to avoid null pointer exceptions, but we don't use it
- opts.G_function match {
- case "logcosh" => {
- G_fun = G_logcosh; g_fun = g_logcosh; g_d_fun = g_d_logcosh;
- stdNorm = FMat(0.375);
- }
- case "exponent" => {
- G_fun = G_exponent; g_fun = g_exponent; g_d_fun = g_d_exponent;
- stdNorm = FMat(-1.0 / sqrt(2.0));
- }
- case "kurtosis" => {
- G_fun = G_kurtosis; g_fun = g_kurtosis; g_d_fun = g_d_kurtosis;
- stdNorm = FMat(0.75);
- }
- case _ => throw new RuntimeException("opts.G_function is not a valid value: " + opts.G_function)
- }
- }
-
- /**
- * Store data in "user" for use in the next mupdate() call, and updates the moving average if necessary.
- * Also "orthogonalizes" the model matrix after each update, as required by the algorithm.
- *
- * First, it checks if this is the first pass over the data, and if so, updates the moving average assuming
- * that the number of data samples in each block is the same for all blocks. After the first pass, the data
- * mean vector is fixed in modelmats(1). Then the data gets centered via: "data ~ data - modelmats(1)".
- *
- * We also use "user ~ mm * data" to store all (w_j^T^) * (x^i^) values, where w_j^T^ is the j^th^ row of
- * our estimated W = A^-1^, and x^i^ is the i^th^ sample in this block of data. These values are later used
- * as part of fixed point updates.
- *
- * @param data An n x batchSize matrix, where each column corresponds to a data sample.
- * @param user An intermediate matrix that stores (w_j^T^) * (x^i^) values.
- * @param ipass The current pass through the data.
- */
- def uupdate(data : Mat, user : Mat, ipass : Int, pos:Long) {
- if (ipass == 0) {
- batchIteration = batchIteration + 1.0f
- modelmats(1) <-- (modelmats(1)*(batchIteration-1) + mean(data,2)) / batchIteration
- }
- data ~ data - modelmats(1)
- mm <-- orthogonalize(mm,data)
- user ~ mm * data
- }
-
- /**
- * This performs the matrix fixed point update to the estimated W = A^{-1}:
- *
- * W^+^ = W + diag(alpha,,i,,) * [ diag(beta,,i,,) - Expec[g(Wx)*(Wx)^T^] ] * W,
- *
- * where g = G', beta,,i,, = -Expec[(Wx),,i,,g(Wx),,i,,], and alpha,,i,, = -1/(beta,,i,, - Expec[g'(Wx),,i,,]).
- * We need to be careful to take expectations of the appropriate items. The gwtx and g_wtx terms are matrices
- * with useful intermediate values that represent the full data matrix X rather than a single column/element x.
- * The above update for W^+^ goes in updatemats(0), except the additive W since that should be taken care of by
- * the ADAGrad updater.
- *
- * I don't think anything here changes if the data is not white, since one of Hyvärinen's papers implied
- * that the update here includes an approximation to the inverse covariance matrix.
- *
- * @param data An n x batchSize matrix, where each column corresponds to a data sample.
- * @param user An intermediate matrix that stores (w_j^T^) * (x^i^) values.
- * @param ipass The current pass through the data.
- */
- def mupdate(data : Mat, user : Mat, ipass : Int, pos:Long) {
- val gwtx = g_fun(user)
- val g_wtx = g_d_fun(user)
- val termBeta = mkdiag( -mean(user *@ gwtx, 2) )
- val termAlpha = mkdiag( -1.0f / (getdiag(termBeta) - (mean(g_wtx,2))) )
- val termExpec = (gwtx *^ user) / data.ncols
- updatemats(0) <-- termAlpha * (termBeta + termExpec) * mm
- }
-
- /**
- * Currently, this computes the approximation of negentropy, which is the objective function to maximize.
- *
- * To understand this, let w be a single row vector of W, let x be a single data vector, and let v be a
- * standard normal random variable. To find this one independent component, we maximize
- *
- * J(w^T^x) \approx ( Expec[G(w^T^x)] - Expec[G(v)] )^2^,
- *
- * where G is the function set at opts.G_function. So long as the W matrix (capital "W") is orthogonal,
- * which we do enforce, then w^T^x satisfies the requirement that the variance be one. To extend this to
- * the whole matrix W, take the sum over all the rows, so the problem is: maximize{ \sum,,w,, J(w^T^x) }.
- *
- * On the other hand, the batchSize should be much greater than one, so "data" consists of many columns.
- * Denoting the data matrix as X, we can obtain the expectations by taking the sample means. In other words,
- * we take the previous "user" matrix, W*X, apply the function G to the data, and THEN take the mean across
- * rows, so mean(G(W*X),2). The mean across rows gives what we want since it's applying the same row of W
- * to different x (column) vectors in our data.
- *
- * @param data An n x batchSize matrix, where each column corresponds to a data sample.
- * @param user An intermediate matrix that stores (w_j^T^) * (x^i^) values.
- * @param ipass The current pass through the data.
- */
- def evalfun(data : Mat, user : Mat, ipass : Int, pos:Long) : FMat = {
- val big_gwtx = G_fun(user)
- val rowMean = FMat(mean(big_gwtx,2)) - stdNorm
- return sum(rowMean *@ rowMean)
- }
-
- /** Assumes G(x) = log(cosh(x)), a good general-purpose contrast function. */
- private def G_logcosh(m : Mat) : Mat = {
- return ln(cosh(m))
- }
-
- /** Assumes g(x) = d/dx log(cosh(x)) = tanh(x). */
- private def g_logcosh(m : Mat) : Mat = {
- return tanh(m)
- }
-
- /** Assumes g'(x) = d/dx tanh(x). This is pretty complicated; see WolframAlpha for confirmation. */
- private def g_d_logcosh(m : Mat) : Mat = {
- val a = (2*cosh(m))/(cosh(2*m)+1)
- a ~ a *@ a
- return a
- }
-
- /** Assumes G(x) = -exp(-x^2/2), good if data is super-Gaussian or robustness is needed. */
- private def G_exponent(m : Mat) : Mat = {
- return -exp(-0.5f * (m *@ m))
- }
-
- /** Assumes g(x) = d/dx -exp(-x^2/2) = x*exp(-x^2/2). */
- private def g_exponent(m : Mat) : Mat = {
- return m *@ exp(-0.5f * (m *@ m))
- }
-
- /** Assumes g'(x) = d/dx x*exp(-x^2/2) = (1-x^2)*exp(-x^2/2). */
- private def g_d_exponent(m : Mat) : Mat = {
- return (1 - (m *@ m)) *@ exp(-0.5f * (m *@ m))
- }
-
- /** Assumes G(x) = x^4/4, a weak contrast function, but OK for sub-Gaussian data w/no outliers. */
- private def G_kurtosis(m: Mat) : Mat = {
- val c = m *@ m
- c ~ c *@ c
- return c / 4.0f
- }
-
- /** Assumes g(x) = d/dx x^4/4 = x^3. */
- private def g_kurtosis(m : Mat) : Mat = {
- return m *@ m *@ m
- }
-
- /** Assumes g'(x) = d/dx x^3 = 3x^2. */
- private def g_d_kurtosis(m : Mat) : Mat = {
- return 3 * (m *@ m)
- }
-
- /**
- * Takes in the model matrix and returns an orthogonal version of it, so WW^T = identity. We use a method
- * from A. Hyvärinen and E. Oja (2000): an iterative algorithm that uses a norm that is NOT the Frobenius
- * norm, and then iterate a W = 1.5*W - 0.5*W*^W*W update until convergence (it's quadratic in convergence).
- * This involves no eigendecompositions and should be fast. We use the maximum absolute row sum norm, so we
- * take the absolute value of elements, sum over rows, and pick the largest of the values. The above assumes
- * that the covariance matrix of the data is the identity, i.e., C = I. If not, plug in C.
- *
- * @param w The model matrix that we want to transform to be orthogonal (often referred to as "mm" here).
- * @param dat The data matrix, used to compute the covariance matrices if necessary.
- */
- private def orthogonalize(w : Mat, dat : Mat) : Mat = {
- var C:Mat = null
- if (opts.preWhitened) {
- C = mkdiag(ones(dat.nrows,1))
- } else {
- C = getSampleCovariance(dat)
- }
- val WWT = w * C *^ w
- val result = w / sqrt(maxi(sum(abs(WWT), 2)))
- if (sum(sum(result)).dv.isNaN) {
- println("Error: sum(sum(result)) = NaN, indicating issues wiht sqrt(maxi(sum(abs(WWT),2))).")
- }
- var a = 0
- while (a < opts.numOrthogIter) { // Can result in NaNs, be careful.
- val newResult = ((1.5f * result) - 0.5f * (result * C *^ result * result))
- result <-- newResult
- if (sum(sum(result)).dv.isNaN) {
- println("Error: sum(sum(result)) = NaN, indicating that NaNs are appearing.")
- }
- a = a + 1
- }
- return result
- }
-
- /** Gets sample covariance matrix (one column of m is one sample). See Wikipedia for matrix formulation. */
- private def getSampleCovariance(m : Mat) : Mat = {
- val F = m - mean(m,2)
- return (F *^ F) / (m.ncols - 1)
- }
-}
-
-
-object ICA {
-
- trait Opts extends FactorModel.Opts {
- var G_function:String = "logcosh"
- var numOrthogIter:Int = 10
- var preWhitened:Boolean = false
- }
-
- class Options extends Opts {}
-
- /** ICA with a single matrix datasource. The dimension is based on the input matrix. */
- def learner(mat0:Mat) = {
- class xopts extends Learner.Options with MatSource.Opts with ICA.Opts with ADAGrad.Opts
- val opts = new xopts
- opts.dim = size(mat0)(0)
- opts.npasses = 10
- opts.batchSize = math.min(250000, mat0.ncols/15 + 1) // Just a heuristic
- opts.numOrthogIter = math.min(10, 5+math.sqrt(opts.dim).toInt)
- val nn = new Learner(
- new MatSource(Array(mat0:Mat), opts),
- new ICA(opts),
- null,
- new ADAGrad(opts),
- null,
- opts)
- (nn, opts)
- }
-
- /** ICA with a files dataSource. */
- def learner(fnames:List[(Int)=>String], d:Int) = {
- class xopts extends Learner.Options with FileSource.Opts with ICA.Opts with ADAGrad.Opts
- val opts = new xopts
- opts.dim = d
- opts.fnames = fnames
- opts.batchSize = 25000;
- implicit val threads = threadPool(4)
- val nn = new Learner(
- new FileSource(opts),
- new ICA(opts),
- null,
- new ADAGrad(opts),
- null,
- opts)
- (nn, opts)
- }
-
- /** Ranks the independent components by their contribution to the original data. */
- def rankComponents() = {
- println("rankComponents() not yet implemented.")
- }
-
-}
+package BIDMach.models
+
+import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
+import BIDMat.MatFunctions._
+import BIDMat.SciFunctions._
+import BIDMat.Solvers._
+import BIDMach._
+import BIDMach.datasources._
+import BIDMach.updaters._
+import java.lang.ref._
+import jcuda.NativePointerObject
+import java.lang.Math
+
+/**
+ * Independent Component Analysis, using FastICA. It has the ability to center and whiten data. It is
+ * based on the method presented in:
+ *
+ * A. Hyvärinen and E. Oja. Independent Component Analysis: Algorithms and Applications.
+ * Neural Networks, 13(4-5):411-430, 2000.
+ *
+ * In particular, we provide the logcosh, exponential, and kurtosis "G" functions.
+ *
+ * This algorithm computes the following modelmats array:
+ * - modelmats(0) stores the inverse of the mixing matrix. If X = A*S represents the data, then it's the
+ * estimated A^-1^, which we assume is square and invertible for now.
+ * - modelmats(1) stores the mean vector of the data, which is computed entirely on the first pass. This
+ * means once we estimate A^-1^ in modelmats(0), we need to first shift the data by this amount, and
+ * then multiply to recover the (centered) sources. Example:
+ * {{{
+ * modelmats(0) * (data - modelmats(1))
+ * }}}
+ * Here, data is an n x N matrix, whereas modelmats(1) is an n x 1 matrix. For efficiency reasons, we
+ * assume a constant batch size for each block of data so we take the mean across all batches. This is
+ * true except for (usually) the last batch, but this almost always isn't enough to make a difference.
+ *
+ * Thus, modelmats(1) helps to center the data. The whitening in this algorithm happens during the updates
+ * to W in both the orthogonalization and the fixed point steps. The former uses the computed covariance
+ * matrix and the latter relies on an approximation of W^T^*W to the inverse covariance matrix. It is fine
+ * if the data is already pre-whitened before being passed to BIDMach.
+ *
+ * Currently, we are thinking about the following extensions:
+ * - Allowing ICA to handle non-square mixing matrices. Most research about ICA assumes that A is n x n.
+ * - Improving the way we handle the computation of the mean, so it doesn't rely on the last batch being
+ * of similar size to all prior batches. Again, this is minor, especially for large data sets.
+ * - Thinking of ways to make this scale better to a large variety of datasets
+ *
+ * For additional references, see Aapo Hyvärinen's other papers, and visit:
+ * http://research.ics.aalto.fi/ica/fastica/
+ */
+class ICA(override val opts:ICA.Opts = new ICA.Options) extends FactorModel(opts) {
+
+ // Some temp variables. The most important one is mm, which is our W = A^{-1}.
+ var mm:Mat = null
+ var batchIteration = 0.0f
+ var G_fun: Mat=>Mat = null
+ var g_fun: Mat=>Mat = null
+ var g_d_fun: Mat=>Mat = null
+ var stdNorm:FMat = null
+
+ var debug = false
+
+ override def init() {
+ super.init()
+ if (refresh) {
+ mm = modelmats(0)
+ setmodelmats(Array(mm, mm.zeros(mm.nrows,1)))
+ }
+ updatemats = new Array[Mat](2)
+ updatemats(0) = mm.zeros(mm.nrows, mm.nrows)
+ updatemats(1) = mm.zeros(mm.nrows,1) // Keep to avoid null pointer exceptions, but we don't use it
+ opts.G_function match {
+ case "logcosh" => {
+ G_fun = G_logcosh; g_fun = g_logcosh; g_d_fun = g_d_logcosh;
+ stdNorm = FMat(0.375)
+ }
+ case "exponent" => {
+ G_fun = G_exponent; g_fun = g_exponent; g_d_fun = g_d_exponent;
+ stdNorm = FMat(-1.0 / sqrt(2.0))
+ }
+ case "kurtosis" => {
+ G_fun = G_kurtosis; g_fun = g_kurtosis; g_d_fun = g_d_kurtosis
+ stdNorm = FMat(0.75)
+ }
+ case _ => throw new RuntimeException("opts.G_function is not a valid value: " + opts.G_function)
+ }
+ }
+
+ /**
+ * Store data in "user" for use in the next mupdate() call, and updates the moving average if necessary.
+ * Also "orthogonalizes" the model matrix after each update, as required by the algorithm.
+ *
+ * First, it checks if this is the first pass over the data, and if so, updates the moving average assuming
+ * that the number of data samples in each block is the same for all blocks. After the first pass, the data
+ * mean vector is fixed in modelmats(1). Then the data gets centered via: "data ~ data - modelmats(1)".
+ *
+ * We also use "user ~ mm * data" to store all (w_j^T^) * (x^i^) values, where w_j^T^ is the j^th^ row of
+ * our estimated W = A^-1^, and x^i^ is the i^th^ sample in this block of data. These values are later used
+ * as part of fixed point updates.
+ *
+ * @param data An n x batchSize matrix, where each column corresponds to a data sample.
+ * @param user An intermediate matrix that stores (w_j^T^) * (x^i^) values.
+ * @param ipass The current pass through the data.
+ */
+ def uupdate(data : Mat, user : Mat, ipass : Int, pos:Long) {
+ if (ipass == 0) {
+ batchIteration = batchIteration + 1.0f
+ modelmats(1) <-- (modelmats(1)*(batchIteration-1) + mean(data,2)) / batchIteration
+ }
+ data ~ data - modelmats(1)
+ mm <-- orthogonalize(mm,data)
+ user ~ mm * data
+ }
+
+ /**
+ * This performs the matrix fixed point update to the estimated W = A^{-1}:
+ *
+ * W^+^ = W + diag(alpha,,i,,) * [ diag(beta,,i,,) - Expec[g(Wx)*(Wx)^T^] ] * W,
+ *
+ * where g = G', beta,,i,, = -Expec[(Wx),,i,,g(Wx),,i,,], and alpha,,i,, = -1/(beta,,i,, - Expec[g'(Wx),,i,,]).
+ * We need to be careful to take expectations of the appropriate items. The gwtx and g_wtx terms are matrices
+ * with useful intermediate values that represent the full data matrix X rather than a single column/element x.
+ * The above update for W^+^ goes in updatemats(0), except the additive W since that should be taken care of by
+ * the ADAGrad updater.
+ *
+ * I don't think anything here changes if the data is not white, since one of Hyvärinen's papers implied
+ * that the update here includes an approximation to the inverse covariance matrix.
+ *
+ * @param data An n x batchSize matrix, where each column corresponds to a data sample.
+ * @param user An intermediate matrix that stores (w_j^T^) * (x^i^) values.
+ * @param ipass The current pass through the data.
+ */
+ def mupdate(data : Mat, user : Mat, ipass : Int, pos:Long) {
+ val gwtx = g_fun(user)
+ val g_wtx = g_d_fun(user)
+ val termBeta = mkdiag( -mean(user *@ gwtx, 2) )
+ val termAlpha = mkdiag( -1.0f / (getdiag(termBeta) - (mean(g_wtx,2))) )
+ val termExpec = (gwtx *^ user) / data.ncols
+ updatemats(0) <-- termAlpha * (termBeta + termExpec) * mm
+ }
+
+ /**
+ * Currently, this computes the approximation of negentropy, which is the objective function to maximize.
+ *
+ * To understand this, let w be a single row vector of W, let x be a single data vector, and let v be a
+ * standard normal random variable. To find this one independent component, we maximize
+ *
+ * J(w^T^x) \approx ( Expec[G(w^T^x)] - Expec[G(v)] )^2^,
+ *
+ * where G is the function set at opts.G_function. So long as the W matrix (capital "W") is orthogonal,
+ * which we do enforce, then w^T^x satisfies the requirement that the variance be one. To extend this to
+ * the whole matrix W, take the sum over all the rows, so the problem is: maximize{ \sum,,w,, J(w^T^x) }.
+ *
+ * On the other hand, the batchSize should be much greater than one, so "data" consists of many columns.
+ * Denoting the data matrix as X, we can obtain the expectations by taking the sample means. In other words,
+ * we take the previous "user" matrix, W*X, apply the function G to the data, and THEN take the mean across
+ * rows, so mean(G(W*X),2). The mean across rows gives what we want since it's applying the same row of W
+ * to different x (column) vectors in our data.
+ *
+ * @param data An n x batchSize matrix, where each column corresponds to a data sample.
+ * @param user An intermediate matrix that stores (w_j^T^) * (x^i^) values.
+ * @param ipass The current pass through the data.
+ */
+ def evalfun(data : Mat, user : Mat, ipass : Int, pos:Long) : FMat = {
+ val big_gwtx = G_fun(user)
+ val rowMean = FMat(mean(big_gwtx,2)) - stdNorm
+ return sum(rowMean *@ rowMean)
+ }
+
+ /** Assumes G(x) = log(cosh(x)), a good general-purpose contrast function. */
+ private def G_logcosh(m : Mat) : Mat = {
+ return ln(cosh(m))
+ }
+
+ /** Assumes g(x) = d/dx log(cosh(x)) = tanh(x). */
+ private def g_logcosh(m : Mat) : Mat = {
+ return tanh(m)
+ }
+
+ /** Assumes g'(x) = d/dx tanh(x). This is pretty complicated; see WolframAlpha for confirmation. */
+ private def g_d_logcosh(m : Mat) : Mat = {
+ val a = (2*cosh(m))/(cosh(2*m)+1)
+ a ~ a *@ a
+ return a
+ }
+
+ /** Assumes G(x) = -exp(-x^2/2), good if data is super-Gaussian or robustness is needed. */
+ private def G_exponent(m : Mat) : Mat = {
+ return -exp(-0.5f * (m *@ m))
+ }
+
+ /** Assumes g(x) = d/dx -exp(-x^2/2) = x*exp(-x^2/2). */
+ private def g_exponent(m : Mat) : Mat = {
+ return m *@ exp(-0.5f * (m *@ m))
+ }
+
+ /** Assumes g'(x) = d/dx x*exp(-x^2/2) = (1-x^2)*exp(-x^2/2). */
+ private def g_d_exponent(m : Mat) : Mat = {
+ return (1 - (m *@ m)) *@ exp(-0.5f * (m *@ m))
+ }
+
+ /** Assumes G(x) = x^4/4, a weak contrast function, but OK for sub-Gaussian data w/no outliers. */
+ private def G_kurtosis(m: Mat) : Mat = {
+ val c = m *@ m
+ c ~ c *@ c
+ return c / 4.0f
+ }
+
+ /** Assumes g(x) = d/dx x^4/4 = x^3. */
+ private def g_kurtosis(m : Mat) : Mat = {
+ return m *@ m *@ m
+ }
+
+ /** Assumes g'(x) = d/dx x^3 = 3x^2. */
+ private def g_d_kurtosis(m : Mat) : Mat = {
+ return 3 * (m *@ m)
+ }
+
+ /**
+ * Takes in the model matrix and returns an orthogonal version of it, so WW^T = identity. We use a method
+ * from A. Hyvärinen and E. Oja (2000): an iterative algorithm that uses a norm that is NOT the Frobenius
+ * norm, and then iterate a W = 1.5*W - 0.5*W*^W*W update until convergence (it's quadratic in convergence).
+ * This involves no eigendecompositions and should be fast. We use the maximum absolute row sum norm, so we
+ * take the absolute value of elements, sum over rows, and pick the largest of the values. The above assumes
+ * that the covariance matrix of the data is the identity, i.e., C = I. If not, plug in C.
+ *
+ * @param w The model matrix that we want to transform to be orthogonal (often referred to as "mm" here).
+ * @param dat The data matrix, used to compute the covariance matrices if necessary.
+ */
+ private def orthogonalize(w : Mat, dat : Mat) : Mat = {
+ var C:Mat = null
+ if (opts.preWhitened) {
+ C = mkdiag(ones(dat.nrows,1))
+ } else {
+ C = getSampleCovariance(dat)
+ }
+ val WWT = w * C *^ w
+ val result = w / sqrt(maxi(sum(abs(WWT), 2)))
+ if (sum(sum(result)).dv.isNaN) {
+ println("Error: sum(sum(result)) = NaN, indicating issues wiht sqrt(maxi(sum(abs(WWT),2))).")
+ }
+ var a = 0
+ while (a < opts.numOrthogIter) { // Can result in NaNs, be careful.
+ val newResult = ((1.5f * result) - 0.5f * (result * C *^ result * result))
+ result <-- newResult
+ if (sum(sum(result)).dv.isNaN) {
+ println("Error: sum(sum(result)) = NaN, indicating that NaNs are appearing.")
+ }
+ a = a + 1
+ }
+ return result
+ }
+
+ /** Gets sample covariance matrix (one column of m is one sample). See Wikipedia for matrix formulation. */
+ private def getSampleCovariance(m : Mat) : Mat = {
+ val F = m - mean(m,2)
+ return (F *^ F) / (m.ncols - 1)
+ }
+}
+
+
+object ICA {
+
+ trait Opts extends FactorModel.Opts {
+ var G_function:String = "logcosh"
+ var numOrthogIter:Int = 10
+ var preWhitened:Boolean = false
+ }
+
+ class Options extends Opts {}
+
+ /** ICA with a single matrix datasource. The dimension is based on the input matrix. */
+ def learner(mat0:Mat) = {
+ class xopts extends Learner.Options with MatSource.Opts with ICA.Opts with ADAGrad.Opts
+ val opts = new xopts
+ opts.dim = size(mat0)(0)
+ opts.npasses = 10
+ opts.batchSize = math.min(250000, mat0.ncols/15 + 1) // Just a heuristic
+ opts.numOrthogIter = math.min(10, 5+math.sqrt(opts.dim).toInt)
+ val nn = new Learner(
+ new MatSource(Array(mat0:Mat), opts),
+ new ICA(opts),
+ null,
+ new ADAGrad(opts),
+ null,
+ opts)
+ (nn, opts)
+ }
+
+ /** ICA with a files dataSource. */
+ def learner(fnames:List[(Int)=>String], d:Int) = {
+ class xopts extends Learner.Options with FileSource.Opts with ICA.Opts with ADAGrad.Opts
+ val opts = new xopts
+ opts.dim = d
+ opts.fnames = fnames
+ opts.batchSize = 25000
+ implicit val threads = threadPool(4)
+ val nn = new Learner(
+ new FileSource(opts),
+ new ICA(opts),
+ null,
+ new ADAGrad(opts),
+ null,
+ opts)
+ (nn, opts)
+ }
+
+ /** Ranks the independent components by their contribution to the original data. */
+ def rankComponents() = {
+ println("rankComponents() not yet implemented.")
+ }
+
+}
diff --git a/src/main/scala/BIDMach/models/KMeans.scala b/src/main/scala/BIDMach/models/KMeans.scala
index 0a1f5014..5582ce0d 100755
--- a/src/main/scala/BIDMach/models/KMeans.scala
+++ b/src/main/scala/BIDMach/models/KMeans.scala
@@ -27,18 +27,18 @@ import BIDMach._
class KMeans(override val opts:KMeans.Opts = new KMeans.Options) extends ClusteringModel(opts) {
// var mm:Mat = null
- def um = {updatemats(0)};
- def umcount = {updatemats(1)};
+ def um = {updatemats(0)}
+ def umcount = {updatemats(1)}
// var umcount:Mat = null
var modelsreduced:Int = 1
- def mm = {modelmats(0)};
- def mmnorm = {modelmats(1)};
+ def mm = {modelmats(0)}
+ def mmnorm = {modelmats(1)}
override def init() = {
super.init()
if (refresh) {
- setmodelmats(Array(mm, mm dotr mm));
+ setmodelmats(Array(mm, mm dotr mm))
}
for (i <- 0 until modelmats.length) modelmats(i) = convertMat(modelmats(i))
updatemats = Array(um, mm.zeros(mm.nrows, 1))
@@ -49,7 +49,7 @@ class KMeans(override val opts:KMeans.Opts = new KMeans.Options) extends Cluster
}
def mupdate(sdata:Mat, ipass:Int):Unit = {
-// println("trace data %f" format sum(sum(sdata)).dv);
+// println("trace data %f" format sum(sum(sdata)).dv)
val vmatch = -2 * mm * sdata + mmnorm + snorm(sdata) // vmatch(i,j) = squared distance from data sample j to centroid i
val bestm = vmatch <= mini(vmatch) // mini(vmatch) are the minimum
bestm ~ bestm / sum(bestm)
@@ -58,64 +58,64 @@ class KMeans(override val opts:KMeans.Opts = new KMeans.Options) extends Cluster
}
def evalfun(sdata:Mat):FMat = {
- val vmatch = -2 * mm * sdata + mmnorm + snorm(sdata);
- val (vm, im) = mini2(vmatch);
- if (ogmats != null) {ogmats(0) = im;};
- max(vm, 0f, vm);
- val vv = mean(vm).dv;
- row(-vv);
+ val vmatch = -2 * mm * sdata + mmnorm + snorm(sdata)
+ val (vm, im) = mini2(vmatch)
+ if (ogmats != null) {ogmats(0) = im;}
+ max(vm, 0f, vm)
+ val vv = mean(vm).dv
+ row(-vv)
}
override def evalfun(sdata:Mat, targ:Mat):FMat = {
- val vmatch = -2 * mm * sdata + mmnorm + snorm(sdata);
- val (vm, im) = mini2(vmatch);
- if (ogmats != null) {ogmats(0) = im;};
- max(vm, 0f, vm);
- val vv = mean(vm).dv;
- row(-vv);
+ val vmatch = -2 * mm * sdata + mmnorm + snorm(sdata)
+ val (vm, im) = mini2(vmatch)
+ if (ogmats != null) {ogmats(0) = im;}
+ max(vm, 0f, vm)
+ val vv = mean(vm).dv
+ row(-vv)
}
override def updatePass(ipass:Int) = {
if (ipass > 0) {
- max(umcount, 1f, umcount);
- mm ~ um / umcount;
+ max(umcount, 1f, umcount)
+ mm ~ um / umcount
}
- um.clear;
- umcount.clear;
- mmnorm ~ mm dotr mm;
+ um.clear
+ umcount.clear
+ mmnorm ~ mm dotr mm
}
override def mergeModelFn(models:Array[Model], mm:Array[Mat], um:Array[Mat], istep:Long) = {}
override def mergeModelPassFn(models:Array[Model], mmx:Array[Mat], umx:Array[Mat], ipass:Int) = {
- val nmodels = models.length;
+ val nmodels = models.length
mmx(0).clear
if (ipass == 0) { // on first pass, model is random samples, so take a mixed sample
- val m0 = models(0).modelmats(0);
- val isel = umx(0).zeros(m0.nrows, 1);
- val vsel = min((nmodels-1).toFloat, floor(nmodels*rand(m0.nrows, 1)));
+ val m0 = models(0).modelmats(0)
+ val isel = umx(0).zeros(m0.nrows, 1)
+ val vsel = min((nmodels-1).toFloat, floor(nmodels*rand(m0.nrows, 1)))
for (i <- 0 until nmodels) {
- isel <-- (vsel == i.toFloat);
- umx(0) <-- models(i).modelmats(0);
- umx(0) ~ isel *@ umx(0);
- mmx(0) ~ mmx(0) + umx(0);
+ isel <-- (vsel == i.toFloat)
+ umx(0) <-- models(i).modelmats(0)
+ umx(0) ~ isel *@ umx(0)
+ mmx(0) ~ mmx(0) + umx(0)
}
} else { // on later passes, average the centers
for (i <- 0 until nmodels) {
- umx(0) <-- models(i).modelmats(0);
- mmx(0) ~ mmx(0) + umx(0);
+ umx(0) <-- models(i).modelmats(0)
+ mmx(0) ~ mmx(0) + umx(0)
}
- mmx(0) ~ mmx(0) * (1f/nmodels);
+ mmx(0) ~ mmx(0) * (1f/nmodels)
}
- mmx(1) ~ mmx(0) dotr mmx(0);
+ mmx(1) ~ mmx(0) dotr mmx(0)
for (i <- 0 until nmodels) {
- models(i).modelmats(0) <-- mmx(0);
- models(i).modelmats(1) <-- mmx(1);
+ models(i).modelmats(0) <-- mmx(0)
+ models(i).modelmats(1) <-- mmx(1)
}
}
override def combineModels(ipass:Int, model: Model):Model = {
- val other:KMeans = model.asInstanceOf[KMeans];
+ val other:KMeans = model.asInstanceOf[KMeans]
if (ipass == 0) {
val total_models_reduced = modelsreduced + other.modelsreduced
val isel = mm.zeros(mm.nrows, 1)
@@ -125,8 +125,8 @@ class KMeans(override val opts:KMeans.Opts = new KMeans.Options) extends Cluster
mm ~ mm + (1-isel) *@ other.mm
modelsreduced = total_models_reduced
} else {
- um ~ um + other.um;
- umcount ~ umcount + other.umcount;
+ um ~ um + other.um
+ umcount ~ umcount + other.umcount
}
this
}
@@ -139,11 +139,11 @@ object KMeans {
class Options extends Opts {}
def mkKMeansModel(fopts:Model.Opts) = {
- new KMeans(fopts.asInstanceOf[KMeans.Opts])
+ new KMeans(fopts.asInstanceOf[KMeans.Opts])
}
def mkUpdater(nopts:Updater.Opts) = {
- new Batch(nopts.asInstanceOf[Batch.Opts])
+ new Batch(nopts.asInstanceOf[Batch.Opts])
}
class MatOptions extends Learner.Options with KMeans.Opts with MatSource.Opts with Batch.Opts
@@ -153,13 +153,13 @@ object KMeans {
opts.dim = d
opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
opts.npasses = 10
- val nn = new Learner(
- new MatSource(Array(mat0:Mat), opts),
- new KMeans(opts),
- null,
- new Batch(opts),
- null,
- opts)
+ val nn = new Learner(
+ new MatSource(Array(mat0:Mat), opts),
+ new KMeans(opts),
+ null,
+ new Batch(opts),
+ null,
+ opts)
(nn, opts)
}
@@ -173,23 +173,23 @@ object KMeans {
val opts = new FileOptions
opts.dim = d
opts.fnames = fnames
- opts.batchSize = 10000;
+ opts.batchSize = 10000
implicit val threads = threadPool(4)
- val nn = new Learner(
- new FileSource(opts),
- new KMeans(opts),
- null,
- new Batch(opts),
- null,
- opts)
+ val nn = new Learner(
+ new FileSource(opts),
+ new KMeans(opts),
+ null,
+ new Batch(opts),
+ null,
+ opts)
(nn, opts)
}
- def learner(fnames:List[(Int)=>String]):(Learner, FileOptions) = learner(fnames, 256);
+ def learner(fnames:List[(Int)=>String]):(Learner, FileOptions) = learner(fnames, 256)
- def learner(fnames:String, d:Int):(Learner, FileOptions) = learner(List(FileSource.simpleEnum(fnames,1,0)), d);
+ def learner(fnames:String, d:Int):(Learner, FileOptions) = learner(List(FileSource.simpleEnum(fnames,1,0)), d)
- def learner(fnames:String):(Learner, FileOptions) = learner(List(FileSource.simpleEnum(fnames,1,0)), 256);
+ def learner(fnames:String):(Learner, FileOptions) = learner(List(FileSource.simpleEnum(fnames,1,0)), 256)
class IteratorOptions extends Learner.Options with KMeans.Opts with IteratorSource.Opts with Batch.Opts
@@ -205,14 +205,14 @@ object KMeans {
(nn, opts)
}
- class PredOptions extends Learner.Options with KMeans.Opts with MatSource.Opts with MatSink.Opts;
+ class PredOptions extends Learner.Options with KMeans.Opts with MatSource.Opts with MatSink.Opts
// This function constructs a predictor from an existing model
def predictor(model:Model, mat1:Mat):(Learner, PredOptions) = {
- val nopts = new PredOptions;
+ val nopts = new PredOptions
nopts.batchSize = math.min(10000, mat1.ncols/30 + 1)
- nopts.dim = model.opts.dim;
- val newmod = new KMeans(nopts);
+ nopts.dim = model.opts.dim
+ val newmod = new KMeans(nopts)
newmod.refresh = false
model.copyTo(newmod)
val nn = new Learner(
@@ -225,19 +225,19 @@ object KMeans {
(nn, nopts)
}
- class FilePredOptions extends Learner.Options with KMeans.Opts with FileSource.Opts with FileSink.Opts;
+ class FilePredOptions extends Learner.Options with KMeans.Opts with FileSource.Opts with FileSink.Opts
// This function constructs a file-based predictor from an existing model
def predictor(model:Model, infnames:String, outfnames:String):(Learner, FilePredOptions) = {
- val nopts = new FilePredOptions;
- nopts.batchSize = 10000;
- nopts.dim = model.opts.dim;
- nopts.fnames = List(FileSource.simpleEnum(infnames,1,0));
- nopts.ofnames = List(FileSource.simpleEnum(outfnames,1,0));
- val newmod = new KMeans(nopts);
+ val nopts = new FilePredOptions
+ nopts.batchSize = 10000
+ nopts.dim = model.opts.dim
+ nopts.fnames = List(FileSource.simpleEnum(infnames,1,0))
+ nopts.ofnames = List(FileSource.simpleEnum(outfnames,1,0))
+ val newmod = new KMeans(nopts)
newmod.refresh = false
- model.copyTo(newmod);
- implicit val threads = threadPool(4);
+ model.copyTo(newmod)
+ implicit val threads = threadPool(4)
val nn = new Learner(
new FileSource(nopts),
newmod,
@@ -256,13 +256,13 @@ object KMeans {
opts.batchSize = math.min(100000, mat0.ncols/30/opts.nthreads + 1)
opts.npasses = 10
opts.coolit = 0 // Assume we dont need cooling on a matrix input
- val nn = new ParLearnerF(
- new MatSource(Array(mat0:Mat), opts),
- opts, mkKMeansModel _,
- null, null,
- opts, mkUpdater _,
- null, null,
- opts)
+ val nn = new ParLearnerF(
+ new MatSource(Array(mat0:Mat), opts),
+ opts, mkKMeansModel _,
+ null, null,
+ opts, mkUpdater _,
+ null, null,
+ opts)
(nn, opts)
}
@@ -277,15 +277,15 @@ object KMeans {
opts.dim = d
opts.npasses = 10
opts.fnames = fnames
- opts.batchSize = 20000;
- implicit val threads = threadPool(12);
- val nn = new ParLearnerF(
- new FileSource(opts),
- opts, mkKMeansModel _,
- null, null,
- opts, mkUpdater _,
- null, null,
- opts)
+ opts.batchSize = 20000
+ implicit val threads = threadPool(12)
+ val nn = new ParLearnerF(
+ new FileSource(opts),
+ opts, mkKMeansModel _,
+ null, null,
+ opts, mkUpdater _,
+ null, null,
+ opts)
(nn, opts)
}
diff --git a/src/main/scala/BIDMach/models/KMeansw.scala b/src/main/scala/BIDMach/models/KMeansw.scala
index ba0a562d..69b1235e 100755
--- a/src/main/scala/BIDMach/models/KMeansw.scala
+++ b/src/main/scala/BIDMach/models/KMeansw.scala
@@ -29,7 +29,7 @@ import BIDMach.models._
* opts.nthreads=2 // number of threads (defaults to number of GPUs)
* nn.train // train the learner
* nn.modelmat // get the final model
- * }}}
+ * }}}
*/
class KMeansw(override val opts:KMeansw.Opts = new KMeansw.Options) extends Model(opts) {
@@ -51,14 +51,14 @@ class KMeansw(override val opts:KMeansw.Opts = new KMeansw.Options) extends Mode
throw new RuntimeException("KMeansw need batchsize >= dim")
if (refresh) {
- val rp = randperm(nc);
- val mmi = full(data0(?,rp(0,0->opts.dim))).t;
- mm = convertMat(mmi);
- mcounts = mm.zeros(mm.nrows, 1);
- mweights = mm.zeros(mm.nrows, 1);
- setmodelmats(Array(mm, mcounts, mweights));
+ val rp = randperm(nc)
+ val mmi = full(data0(?,rp(0,0->opts.dim))).t
+ mm = convertMat(mmi)
+ mcounts = mm.zeros(mm.nrows, 1)
+ mweights = mm.zeros(mm.nrows, 1)
+ setmodelmats(Array(mm, mcounts, mweights))
}
- for (i <- 0 until 3) modelmats(i) = convertMat(modelmats(i));
+ for (i <- 0 until 3) modelmats(i) = convertMat(modelmats(i))
um = modelmats(0).zeros(mm.nrows, mm.ncols)
umcounts = mm.zeros(mm.nrows, 1)
umweights = mm.zeros(mm.nrows, 1)
@@ -84,11 +84,11 @@ class KMeansw(override val opts:KMeansw.Opts = new KMeansw.Options) extends Mode
}
def mupdate(sdata:Mat, weights:Mat, ipass:Int):Unit = {
- val vmatch = -2 * mm * sdata + snorm(sdata) + ((mm dotr mm) + (opts.wsize * mweights));
- val bestm = vmatch <= mini(vmatch);
- bestm ~ bestm / sum(bestm);
- um ~ bestm *^ sdata;
- sum(bestm, 2, umcounts);
+ val vmatch = -2 * mm * sdata + snorm(sdata) + ((mm dotr mm) + (opts.wsize * mweights))
+ val bestm = vmatch <= mini(vmatch)
+ bestm ~ bestm / sum(bestm)
+ um ~ bestm *^ sdata
+ sum(bestm, 2, umcounts)
if (weights.asInstanceOf[AnyRef] != null) {
umweights ~ bestm *^ weights
} else {
@@ -105,17 +105,17 @@ class KMeansw(override val opts:KMeansw.Opts = new KMeansw.Options) extends Mode
} else {
mean(sqrt(vm)).dv
}
- row(-vv, math.exp(vv))
+ row(-vv, math.exp(vv))
}
override def updatePass(ipass:Int) = {
if (ipass > 0) {
- max(umcounts, 1f, umcounts);
- mm ~ um / umcounts;
- mweights <-- umweights;
- um.clear;
- umcounts.clear;
- umweights.clear;
+ max(umcounts, 1f, umcounts)
+ mm ~ um / umcounts
+ mweights <-- umweights
+ um.clear
+ umcounts.clear
+ umweights.clear
}
}
}
@@ -128,11 +128,11 @@ object KMeansw {
class Options extends Opts {}
def mkKMeansModel(fopts:Model.Opts) = {
- new KMeansw(fopts.asInstanceOf[KMeansw.Opts])
+ new KMeansw(fopts.asInstanceOf[KMeansw.Opts])
}
def mkUpdater(nopts:Updater.Opts) = {
- new IncNorm(nopts.asInstanceOf[IncNorm.Opts])
+ new IncNorm(nopts.asInstanceOf[IncNorm.Opts])
}
class FsOpts extends Learner.Options with KMeansw.Opts with FileSource.Opts with IncNorm.Opts
@@ -145,13 +145,13 @@ object KMeansw {
opts.batchSize = math.min(100000, datamat.ncols/30 + 1)
opts.isprob = false
opts.power = 0.5f
- val nn = new Learner(
- new MatSource(Array(datamat, wghts), opts),
- new KMeansw(opts),
- null,
- new IncNorm(opts),
- null,
- opts)
+ val nn = new Learner(
+ new MatSource(Array(datamat, wghts), opts),
+ new KMeansw(opts),
+ null,
+ new IncNorm(opts),
+ null,
+ opts)
(nn, opts)
}
@@ -173,10 +173,10 @@ object KMeansw {
// This function constructs a predictor from an existing model
def predictor(model:Model, mat1:Mat, preds:Mat, d:Int):(Learner, MemOpts) = {
- val nopts = new MemOpts;
+ val nopts = new MemOpts
nopts.batchSize = math.min(10000, mat1.ncols/30 + 1)
nopts.putBack = 1
- val newmod = new KMeansw(nopts);
+ val newmod = new KMeansw(nopts)
newmod.refresh = false
model.copyTo(newmod)
val nn = new Learner(
@@ -196,13 +196,13 @@ object KMeansw {
opts.batchSize = math.min(100000, mat0.ncols/30/opts.nthreads + 1)
opts.coolit = 0 // Assume we dont need cooling on a matrix input
opts.power = 0.5f
- val nn = new ParLearnerF(
- new MatSource(Array(mat0:Mat), opts),
- opts, mkKMeansModel _,
- null, null,
- opts, mkUpdater _,
- null, null,
- opts)
+ val nn = new ParLearnerF(
+ new MatSource(Array(mat0:Mat), opts),
+ opts, mkKMeansModel _,
+ null, null,
+ opts, mkUpdater _,
+ null, null,
+ opts)
(nn, opts)
}
}
diff --git a/src/main/scala/BIDMach/models/LDA.scala b/src/main/scala/BIDMach/models/LDA.scala
index e68b8d5d..578bac74 100755
--- a/src/main/scala/BIDMach/models/LDA.scala
+++ b/src/main/scala/BIDMach/models/LDA.scala
@@ -40,7 +40,7 @@ import BIDMach._
* nn.train // train the model
* nn.modelmat // get the final model
* nn.datamat // get the other factor
- * }}}
+ * }}}
*/
class LDA(override val opts:LDA.Opts = new LDA.Options) extends FactorModel(opts) {
@@ -50,14 +50,14 @@ class LDA(override val opts:LDA.Opts = new LDA.Options) extends FactorModel(opts
/** Sets up the modelmats and updatemats arrays and initializes modelmats(0) randomly unless stated otherwise. */
override def init() = {
- super.init();
- mm = modelmats(0);
+ super.init()
+ mm = modelmats(0)
if (refresh) {
- setmodelmats(Array(mm, mm.ones(mm.nrows, 1)));
+ setmodelmats(Array(mm, mm.ones(mm.nrows, 1)))
}
- updatemats = new Array[Mat](2);
- updatemats(0) = mm.zeros(mm.nrows, mm.ncols);
- updatemats(1) = mm.zeros(mm.nrows, 1);
+ updatemats = new Array[Mat](2)
+ updatemats(0) = mm.zeros(mm.nrows, mm.ncols)
+ updatemats(1) = mm.zeros(mm.nrows, 1)
}
/**
@@ -75,7 +75,7 @@ class LDA(override val opts:LDA.Opts = new LDA.Options) extends FactorModel(opts
def uupdate(sdata:Mat, user:Mat, ipass:Int, pos:Long):Unit = {
if (putBack < 0 || ipass == 0) user.set(1f)
for (i <- 0 until opts.uiter) {
- val preds = DDS(mm, user, sdata)
+ val preds = DDS(mm, user, sdata)
val dc = sdata.contents
val pc = preds.contents
max(opts.weps, pc, pc)
@@ -83,7 +83,7 @@ class LDA(override val opts:LDA.Opts = new LDA.Options) extends FactorModel(opts
val unew = user ∘ (mm * preds) + opts.alpha
if (opts.exppsi) exppsi(unew, unew)
user <-- unew
- }
+ }
}
/**
@@ -93,7 +93,7 @@ class LDA(override val opts:LDA.Opts = new LDA.Options) extends FactorModel(opts
* typically much smaller than the total number of documents, so sdata is usually a portion of the full input.
* @param user An (opts.dim x opts.batchSize) matrix that stores some intermediate/temporary data and gets left-
* multiplied by modelmats(0) to form sdata.
- * @param ipass Index of the pass over the data (0 = first pass, 1 = second pass, etc.).
+ * @param ipass Index of the pass over the data (0 = first pass, 1 = second pass, etc.).
*/
def mupdate(sdata:Mat, user:Mat, ipass:Int, pos:Long):Unit = {
val preds = DDS(mm, user, sdata)
@@ -104,8 +104,8 @@ class LDA(override val opts:LDA.Opts = new LDA.Options) extends FactorModel(opts
val ud = user *^ preds
ud ~ ud ∘ mm
ud ~ ud + opts.beta
- updatemats(0) <-- ud
- sum(ud, 2, updatemats(1))
+ updatemats(0) <-- ud
+ sum(ud, 2, updatemats(1))
}
/**
@@ -118,17 +118,17 @@ class LDA(override val opts:LDA.Opts = new LDA.Options) extends FactorModel(opts
* @param ipass Index of the pass over the data (0 = first pass, 1 = second pass, etc.).
*/
def evalfun(sdata:Mat, user:Mat, ipass:Int, pos:Long):FMat = {
- if (ogmats != null) ogmats(0) = user;
- val preds = DDS(mm, user, sdata);
- val dc = sdata.contents;
- val pc = preds.contents;
- max(opts.weps, pc, pc);
- ln(pc, pc);
- val sdat = sum(sdata,1);
- val mms = sum(mm,2);
- val suu = ln(mms ^* user);
- val vv = ((pc ddot dc) - (sdat ddot suu))/sum(sdat,2).dv;
- row(vv, math.exp(-vv))
+ if (ogmats != null) ogmats(0) = user
+ val preds = DDS(mm, user, sdata)
+ val dc = sdata.contents
+ val pc = preds.contents
+ max(opts.weps, pc, pc)
+ ln(pc, pc)
+ val sdat = sum(sdata,1)
+ val mms = sum(mm,2)
+ val suu = ln(mms ^* user)
+ val vv = ((pc ddot dc) - (sdat ddot suu))/sum(sdat,2).dv
+ row(vv, math.exp(-vv))
}
}
@@ -144,30 +144,30 @@ object LDA {
/** Creates a new LDA model. */
def mkLDAmodel(fopts:Model.Opts) = {
- new LDA(fopts.asInstanceOf[LDA.Opts])
+ new LDA(fopts.asInstanceOf[LDA.Opts])
}
/** Creates a new IncNorm updater. */
def mkUpdater(nopts:Updater.Opts) = {
- new IncNorm(nopts.asInstanceOf[IncNorm.Opts])
+ new IncNorm(nopts.asInstanceOf[IncNorm.Opts])
}
class MatOpts extends Learner.Options with LDA.Opts with MatSource.Opts with IncNorm.Opts
/** Online Variational Bayes LDA algorithm with a matrix datasource. */
- def learner(mat0:Mat):(Learner, MatOpts) = learner(mat0, 256);
+ def learner(mat0:Mat):(Learner, MatOpts) = learner(mat0, 256)
def learner(mat0:Mat, d:Int):(Learner, MatOpts) = {
val opts = new MatOpts
opts.dim = d
opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
- val nn = new Learner(
- new MatSource(Array(mat0:Mat), opts),
- new LDA(opts),
- null,
- new IncNorm(opts),
- null,
- opts)
+ val nn = new Learner(
+ new MatSource(Array(mat0:Mat), opts),
+ new LDA(opts),
+ null,
+ new IncNorm(opts),
+ null,
+ opts)
(nn, opts)
}
@@ -182,27 +182,27 @@ object LDA {
val opts = new FileOpts
opts.dim = d
opts.fnames = fnames
- opts.batchSize = 100000;
- opts.eltsPerSample = 500;
+ opts.batchSize = 100000
+ opts.eltsPerSample = 500
implicit val threads = threadPool(4)
- val nn = new Learner(
- new SFileSource(opts),
- new LDA(opts),
- null,
- new IncNorm(opts),
- null,
- opts)
+ val nn = new Learner(
+ new SFileSource(opts),
+ new LDA(opts),
+ null,
+ new IncNorm(opts),
+ null,
+ opts)
(nn, opts)
}
- class PredOptions extends Learner.Options with LDA.Opts with MatSource.Opts with MatSink.Opts;
+ class PredOptions extends Learner.Options with LDA.Opts with MatSource.Opts with MatSink.Opts
// This function constructs a predictor from an existing model
def predictor(model:Model, mat1:Mat):(Learner, PredOptions) = {
- val nopts = new PredOptions;
+ val nopts = new PredOptions
nopts.batchSize = math.min(10000, mat1.ncols/30 + 1)
- nopts.dim = model.opts.dim;
- val newmod = new LDA(nopts);
+ nopts.dim = model.opts.dim
+ val newmod = new LDA(nopts)
newmod.refresh = false
model.copyTo(newmod)
val nn = new Learner(
@@ -215,13 +215,13 @@ object LDA {
(nn, nopts)
}
- class MatBatchOpts extends Learner.Options with LDA.Opts with MatSource.Opts with BatchNorm.Opts;
+ class MatBatchOpts extends Learner.Options with LDA.Opts with MatSource.Opts with BatchNorm.Opts
/** Batch Variational Bayes LDA algorithm with a matrix datasource. */
- def learnBatch(mat0:Mat):(Learner, MatBatchOpts) = learnBatch(mat0, 256);
+ def learnBatch(mat0:Mat):(Learner, MatBatchOpts) = learnBatch(mat0, 256)
def learnBatch(mat0:Mat, d:Int):(Learner, MatBatchOpts) = {
- val opts = new MatBatchOpts;
+ val opts = new MatBatchOpts
opts.dim = d
opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
val nn = new Learner(
@@ -234,49 +234,49 @@ object LDA {
(nn, opts)
}
- class MatParOpts extends ParLearner.Options with LDA.Opts with MatSource.Opts with IncNorm.Opts;
+ class MatParOpts extends ParLearner.Options with LDA.Opts with MatSource.Opts with IncNorm.Opts
/** Parallel online LDA algorithm with a matrix datasource. */
- def learnPar(mat0:Mat):(ParLearnerF, MatParOpts) = learnPar(mat0, 256);
+ def learnPar(mat0:Mat):(ParLearnerF, MatParOpts) = learnPar(mat0, 256)
def learnPar(mat0:Mat, d:Int):(ParLearnerF, MatParOpts) = {
- val opts = new MatParOpts;
+ val opts = new MatParOpts
opts.dim = d
opts.batchSize = math.min(100000, mat0.ncols/30/opts.nthreads + 1)
opts.coolit = 0 // Assume we dont need cooling on a matrix input
- val nn = new ParLearnerF(
- new MatSource(Array(mat0:Mat), opts),
- opts, mkLDAmodel _,
- null, null,
- opts, mkUpdater _,
- null, null,
- opts)
+ val nn = new ParLearnerF(
+ new MatSource(Array(mat0:Mat), opts),
+ opts, mkLDAmodel _,
+ null, null,
+ opts, mkUpdater _,
+ null, null,
+ opts)
(nn, opts)
}
class SFDSopts extends ParLearner.Options with LDA.Opts with SFileSource.Opts with IncNorm.Opts
- def learnPar(fnames:String, d:Int):(ParLearnerF, SFDSopts) = learnPar(List(FileSource.simpleEnum(fnames, 1, 0)), d);
+ def learnPar(fnames:String, d:Int):(ParLearnerF, SFDSopts) = learnPar(List(FileSource.simpleEnum(fnames, 1, 0)), d)
/** Parallel online LDA algorithm with one file datasource. */
def learnPar(fnames:List[(Int) => String], d:Int):(ParLearnerF, SFDSopts) = {
- val opts = new SFDSopts;
- opts.dim = d;
- opts.npasses = 4;
- opts.fnames = fnames;
- opts.batchSize = 100000;
- opts.eltsPerSample = 500;
- opts.resFile = "../results.mat"
- implicit val threads = threadPool(12)
- val nn = new ParLearnerF(
- new SFileSource(opts),
- opts, mkLDAmodel _,
- null, null,
- opts, mkUpdater _,
- null, null,
- opts
- )
- (nn, opts)
+ val opts = new SFDSopts
+ opts.dim = d
+ opts.npasses = 4
+ opts.fnames = fnames
+ opts.batchSize = 100000
+ opts.eltsPerSample = 500
+ opts.resFile = "../results.mat"
+ implicit val threads = threadPool(12)
+ val nn = new ParLearnerF(
+ new SFileSource(opts),
+ opts, mkLDAmodel _,
+ null, null,
+ opts, mkUpdater _,
+ null, null,
+ opts
+ )
+ (nn, opts)
}
}
diff --git a/src/main/scala/BIDMach/models/LDAgibbs.scala b/src/main/scala/BIDMach/models/LDAgibbs.scala
index 1af12db9..137fa17e 100755
--- a/src/main/scala/BIDMach/models/LDAgibbs.scala
+++ b/src/main/scala/BIDMach/models/LDAgibbs.scala
@@ -41,7 +41,7 @@ import BIDMach._
* nn.modelmat // get the final model
* nn.datamat // get the other factor
* }}}
- *
+ *
*/
class LDAgibbs(override val opts:LDAgibbs.Opts = new LDAgibbs.Options) extends FactorModel(opts) {
@@ -53,8 +53,8 @@ class LDAgibbs(override val opts:LDAgibbs.Opts = new LDAgibbs.Options) extends F
override def init() = {
super.init
if (refresh) {
- mm = modelmats(0);
- setmodelmats(Array(mm, mm.ones(mm.nrows, 1)));
+ mm = modelmats(0)
+ setmodelmats(Array(mm, mm.ones(mm.nrows, 1)))
}
updatemats = new Array[Mat](2)
updatemats(0) = mm.zeros(mm.nrows, mm.ncols)
@@ -63,44 +63,44 @@ class LDAgibbs(override val opts:LDAgibbs.Opts = new LDAgibbs.Options) extends F
def uupdate(sdata:Mat, user:Mat, ipass: Int, pos:Long):Unit = {
- if (putBack < 0 || ipass == 0) user.set(1f)
+ if (putBack < 0 || ipass == 0) user.set(1f)
for (i <- 0 until opts.uiter) yield {
- val preds = DDS(mm, user, sdata)
- if (traceMem) println("uupdate %d %d %d, %d %f %d" format (mm.GUID, user.GUID, sdata.GUID, preds.GUID, GPUmem._1, getGPU))
- val dc = sdata.contents
- val pc = preds.contents
- pc ~ pc / dc
-
- val unew = user*0
- val mnew = updatemats(0)
- mnew.set(0f)
+ val preds = DDS(mm, user, sdata)
+ if (traceMem) println("uupdate %d %d %d, %d %f %d" format (mm.GUID, user.GUID, sdata.GUID, preds.GUID, GPUmem._1, getGPU))
+ val dc = sdata.contents
+ val pc = preds.contents
+ pc ~ pc / dc
+
+ val unew = user*0
+ val mnew = updatemats(0)
+ mnew.set(0f)
- LDAgibbs.LDAsample(mm, user, mnew, unew, preds, dc, opts.nsamps, opts.useBino)
+ LDAgibbs.LDAsample(mm, user, mnew, unew, preds, dc, opts.nsamps, opts.useBino)
- if (traceMem) println("uupdate %d %d %d, %d %d %d %d %f %d" format (mm.GUID, user.GUID, sdata.GUID, preds.GUID, dc.GUID, pc.GUID, unew.GUID, GPUmem._1, getGPU))
- user ~ unew + opts.alpha
- }
+ if (traceMem) println("uupdate %d %d %d, %d %d %d %d %f %d" format (mm.GUID, user.GUID, sdata.GUID, preds.GUID, dc.GUID, pc.GUID, unew.GUID, GPUmem._1, getGPU))
+ user ~ unew + opts.alpha
+ }
}
def mupdate(sdata:Mat, user:Mat, ipass: Int, pos:Long):Unit = {
- val um = updatemats(0)
- um ~ um + opts.beta
- sum(um, 2, updatemats(1))
+ val um = updatemats(0)
+ um ~ um + opts.beta
+ sum(um, 2, updatemats(1))
}
def evalfun(sdata:Mat, user:Mat, ipass:Int, pos:Long):FMat = {
- val preds = DDS(mm, user, sdata)
- val dc = sdata.contents
- val pc = preds.contents
- max(opts.weps, pc, pc)
- ln(pc, pc)
- val sdat = sum(sdata,1)
- val mms = sum(mm,2)
- val suu = ln(mms ^* user)
- if (traceMem) println("evalfun %d %d %d, %d %d %d, %d %f" format (sdata.GUID, user.GUID, preds.GUID, pc.GUID, sdat.GUID, mms.GUID, suu.GUID, GPUmem._1))
- val vv = ((pc ddot dc) - (sdat ddot suu))/sum(sdat,2).dv
- row(vv, math.exp(-vv))
+ val preds = DDS(mm, user, sdata)
+ val dc = sdata.contents
+ val pc = preds.contents
+ max(opts.weps, pc, pc)
+ ln(pc, pc)
+ val sdat = sum(sdata,1)
+ val mms = sum(mm,2)
+ val suu = ln(mms ^* user)
+ if (traceMem) println("evalfun %d %d %d, %d %d %d, %d %f" format (sdata.GUID, user.GUID, preds.GUID, pc.GUID, sdat.GUID, mms.GUID, suu.GUID, GPUmem._1))
+ val vv = ((pc ddot dc) - (sdat ddot suu))/sum(sdat,2).dv
+ row(vv, math.exp(-vv))
}
}
@@ -150,15 +150,15 @@ object LDAgibbs {
}
def mkGibbsLDAmodel(fopts:Model.Opts) = {
- new LDAgibbs(fopts.asInstanceOf[LDAgibbs.Opts])
+ new LDAgibbs(fopts.asInstanceOf[LDAgibbs.Opts])
}
def mkUpdater(nopts:Updater.Opts) = {
- new IncNorm(nopts.asInstanceOf[IncNorm.Opts])
+ new IncNorm(nopts.asInstanceOf[IncNorm.Opts])
}
/*
- * This learner uses stochastic updates (like the standard LDA model)
+ * This learner uses stochastic updates (like the standard LDA model)
*/
def learner(mat0:Mat, d:Int = 256) = {
class xopts extends Learner.Options with LDAgibbs.Opts with MatSource.Opts with IncNorm.Opts
@@ -166,18 +166,18 @@ object LDAgibbs {
opts.dim = d
opts.putBack = 1
opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
- val nn = new Learner(
- new MatSource(Array(mat0:Mat), opts),
- new LDAgibbs(opts),
- null,
- new IncNorm(opts),
- null,
- opts)
+ val nn = new Learner(
+ new MatSource(Array(mat0:Mat), opts),
+ new LDAgibbs(opts),
+ null,
+ new IncNorm(opts),
+ null,
+ opts)
(nn, opts)
}
/*
- * Batch learner
+ * Batch learner
*/
def learnBatch(mat0:Mat, d:Int = 256) = {
class xopts extends Learner.Options with LDAgibbs.Opts with MatSource.Opts with BatchNorm.Opts
@@ -197,7 +197,7 @@ object LDAgibbs {
}
/*
- * Parallel learner with matrix source
+ * Parallel learner with matrix source
*/
def learnPar(mat0:Mat, d:Int = 256) = {
class xopts extends ParLearner.Options with LDAgibbs.Opts with MatSource.Opts with IncNorm.Opts
@@ -218,7 +218,7 @@ object LDAgibbs {
}
/*
- * Parallel learner with multiple file datasources
+ * Parallel learner with multiple file datasources
*/
def learnFParx(
nstart:Int=FileSource.encodeDate(2012,3,1,0),
@@ -231,19 +231,19 @@ object LDAgibbs {
opts.npasses = 4
opts.resFile = "/big/twitter/test/results.mat"
val nn = new ParLearnerxF(
- null,
- (dopts:DataSource.Opts, i:Int) => Experiments.Twitter.twitterWords(nstart, nend, opts.nthreads, i),
- opts, mkGibbsLDAmodel _,
- null, null,
- opts, mkUpdater _,
- null, null,
+ null,
+ (dopts:DataSource.Opts, i:Int) => Experiments.Twitter.twitterWords(nstart, nend, opts.nthreads, i),
+ opts, mkGibbsLDAmodel _,
+ null, null,
+ opts, mkUpdater _,
+ null, null,
opts
)
(nn, opts)
}
/*
- * Parallel learner with single file datasource
+ * Parallel learner with single file datasource
*/
def learnFPar(
nstart:Int=FileSource.encodeDate(2012,3,1,0),
@@ -264,7 +264,7 @@ object LDAgibbs {
opts
)
(nn, opts)
- }
+ }
}
diff --git a/src/main/scala/BIDMach/models/LDAgibbsv.scala b/src/main/scala/BIDMach/models/LDAgibbsv.scala
index 66a8c97e..dc6697e2 100755
--- a/src/main/scala/BIDMach/models/LDAgibbsv.scala
+++ b/src/main/scala/BIDMach/models/LDAgibbsv.scala
@@ -56,8 +56,8 @@ class LDAgibbsv(override val opts:LDAgibbsv.Opts = new LDAgibbsv.Options) extend
override def init() = {
super.init
if (refresh) {
- mm = modelmats(0);
- setmodelmats(Array(mm, mm.ones(mm.nrows, 1)));
+ mm = modelmats(0)
+ setmodelmats(Array(mm, mm.ones(mm.nrows, 1)))
}
updatemats = new Array[Mat](2)
updatemats(0) = mm.zeros(mm.nrows, mm.ncols)
@@ -74,7 +74,7 @@ class LDAgibbsv(override val opts:LDAgibbsv.Opts = new LDAgibbsv.Options) extend
mnew.set(0f)
for (i <- 0 until opts.uiter) yield {
- val preds = DDS(mm, user, sdata)
+ val preds = DDS(mm, user, sdata)
if (traceMem) println("uupdate %d %d %d, %d %f %d" format (mm.GUID, user.GUID, sdata.GUID, preds.GUID, GPUmem._1, getGPU))
val dc = sdata.contents
val pc = preds.contents
@@ -115,7 +115,7 @@ um ~ um + opts.beta
// call this if nsamps matrix is changed during optimization
def updateSamps = {
- nsamps <-- opts.nsamps;
+ nsamps <-- opts.nsamps
}
}
@@ -179,4 +179,4 @@ object LDAgibbsv {
(nn, opts)
}
-}
\ No newline at end of file
+}
\ No newline at end of file
diff --git a/src/main/scala/BIDMach/models/MHTest.scala b/src/main/scala/BIDMach/models/MHTest.scala
index d21cd5a0..2a191fb5 100644
--- a/src/main/scala/BIDMach/models/MHTest.scala
+++ b/src/main/scala/BIDMach/models/MHTest.scala
@@ -1,1017 +1,1016 @@
-package BIDMach.models
-
-import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
-import BIDMat.MatFunctions._
-import BIDMat.SciFunctions._
-import BIDMach.datasources._
-import BIDMach.updaters._
-import BIDMach._
-import BIDMach.networks._
-
-import java.text.NumberFormat
-import edu.berkeley.bid.CUMACH._
-import scala.collection.mutable._
-
-class MHTest(var objective:Model, val proposer:Proposer, val ecdfmat: FMat, val hash_ecdf:FMat,
- override val opts:MHTest.Opts = new MHTest.Options) extends Model(opts) {
-
- var ecdf:Ecdf = new Ecdf(ecdfmat, hash_ecdf)
- var delta:Double = 1.0
- var var_estimate_mat:FMat = null
- var sd_smooth_exp_param:Double = 0.7 // use the exp update to estimate var
- var estimated_sd:Double = 1.0
- var accpet_count:Float = 0.0f
- var reject_count:Float = 0.0f
- var batch_est_data:Array[Array[Mat]] = null
- var help_mats:Array[Mat] = null
- var data_buffer:Array[Mat] = null // the array to hold the previous data batch
-
- override def init() = {
- // init the ecdf
-
- objective.mats = mats
- objective.putBack = datasource.opts.putBack;
- objective.useGPU = opts.useGPU && Mat.hasCUDA > 0;
- objective.useDouble = opts.useDouble;
- objective.gmats = new Array[Mat](mats.length)
-
- objective.init()
- _modelmats = new Array[Mat](objective.modelmats.length)
- println("init")
- // init the proposer class
- proposer.init()
-
- if (proposer.has_help_mats) {
- help_mats = new Array[Mat](objective.modelmats.length)
- }
-
- for (i <- 0 until objective.modelmats.length) {
- _modelmats(i) = objective.modelmats(i).zeros(objective.modelmats(i).nrows, objective.modelmats(i).ncols)
- _modelmats(i) <-- objective.modelmats(i)
- if (proposer.has_help_mats) {
- help_mats(i) = objective.modelmats(i).zeros(objective.modelmats(i).nrows, objective.modelmats(i).ncols)
- }
- println(_modelmats(i))
- }
-
-
- // init the batch_est_sd0/1
- var mat = datasource.next
- // put the mat into the data buffer
- data_buffer = new Array[Mat](mat.length)
- for (i <- 0 until mat.length) {
- data_buffer(i) = GMat(mat(i).zeros(mat(i).nrows, mat(i).ncols))
- data_buffer(i) <-- mat(i)
- }
-
- // init the container
- var_estimate_mat = zeros(1, opts.num_data_est_sd)
-
- batch_est_data = Array.ofDim[Mat](opts.num_data_est_sd, mat.length)
- for (i_batch <- 0 until opts.num_data_est_sd) {
- mat = datasource.next
- for (i_mat <- 0 until mat.length) {
- batch_est_data(i_batch)(i_mat) = GMat(mat(i_mat))
- }
- }
-
- // init ecdf
- ecdf.init()
- }
-
- // call proposer to get the theta',
- // then generate a x_corr from distribution of X_corr
- // Then decide whether to replace (i.e. accpet) _modelmats
- override def dobatch(mats:Array[Mat], ipass:Int, here:Long) = {
-
- // estimate the variance
- estimated_sd = estimated_sd * sd_smooth_exp_param + (1-sd_smooth_exp_param) * computeVarDelta()
- if (java.lang.Double.isNaN(estimated_sd)) {
- throw new RuntimeException("NaN for the sd 3 ")
- }
- if (here == 0) {
- accpet_count = 0.0f
- reject_count = 0.0f
- }
- proposer.changeToUpdateState()
- // propose the data
- val (next_mat:Array[Mat], update_v, delta:Double) = proposer.proposeNext(_modelmats, help_mats, mats, ipass, here)
-
- // compute the delta by another batch
- val delta_new = proposer.computeDelta(next_mat, _modelmats, update_v, help_mats, data_buffer, 0, 0)
-
- // update the data buffer
-
- for (i <- 0 until mats.length) {
- data_buffer(i) <-- mats(i)
- }
-
- // do the test
- // println ("the delta is " + delta)
- if (opts.is_always_accpet) {
- // always accept
- for (i <- 0 until _modelmats.length) {
- // println ("model mats " + _modelmats(i))
- // println("next: " + next_mat(i))
- if (proposer.has_help_mats) {
- help_mats(i) <-- (update_v.asInstanceOf[Array[Mat]])(i)
- }
- _modelmats(i) <-- next_mat(i)
- }
- changeObjectiveModelMat(objective, _modelmats)
- accpet_count += 1.0f
- } else {
- if (estimated_sd < 1.2f) {
- ecdf.updateSd(estimated_sd)
- var x_corr = ecdf.generateXcorr
- if (x_corr + delta_new > 0) {
- // accpet the candiate
- // println("accpet" + " " + delta + "; X_corr: " + x_corr)
- for (i <- 0 until _modelmats.length) {
- // println ("model mats " + _modelmats(i))
- // println("next: " + next_mat(i))
- if (proposer.has_help_mats) {
- help_mats(i) <-- (update_v.asInstanceOf[Array[Mat]])(i)
- }
- _modelmats(i) <-- next_mat(i)
- }
- changeObjectiveModelMat(objective, _modelmats)
- accpet_count += 1.0f
- //println ("updated modelmats " + objective.modelmats(0))
- } else {
- reject_count += 1.0f
- }
- } else {
- println ("skip the large var " + estimated_sd)
- reject_count += 1.0f
- }
- }
-
-
-
- }
-
- // Call the parent class to compute the loss of the model
- override def evalbatch(mats:Array[Mat], ipass:Int, here:Long):FMat = {
- // copy back the parameters
- // Notice: this is not the deep copy, we just
- // change the reference of the parent_model
- // objective.setmodelmats(_modelmats)
-
- changeObjectiveModelMat(objective, _modelmats)
- var accpe_ratio = accpet_count / (accpet_count + reject_count)
- if (java.lang.Double.isNaN(estimated_sd)) {
- throw new RuntimeException("ADA0 2 ")
-
- }
- val loss = objective.evalbatch(mats, ipass, here)
- println ("REST the sd of delat sdDelta: " + estimated_sd + " accpet ratio is AccRate: " + accpe_ratio + " the loss: " + loss)
- loss
- //rand(1,1)
- }
-
- // help methods
-
-
- // change the reference of the modelmats in the model
- // as well as change the reference of modelmats at each layer
- def changeObjectiveModelMat(model:Model, mats:Array[Mat]):Unit = {
-
- for (i <- 0 until model.modelmats.length) {
- model.modelmats(i) <-- mats(i)
- }
- }
-
- def computeVarDelta():Double = {
-
-
- proposer.changeToEstimateSdState()
-
- for (i <- 0 until opts.num_data_est_sd) {
-
- var (next_mat0, update_v, delta) = proposer.proposeNext(_modelmats, help_mats, batch_est_data(i), 0, 0)
- var_estimate_mat(0,i) = delta
- }
- proposer.changeToUpdateState()
- var varianceVal = variance(var_estimate_mat)
- // println("the var is "+ varianceVal + ", the vect is " + var_estimate_mat)
- if (varianceVal.dv < 0) {
- varianceVal(0,0) = 1e-5f
- }
- (varianceVal^0.5).dv
-
- }
-}
-
-
-object MHTest {
- trait Opts extends Model.Opts {
- // TODO: define the parameters here
- // var num_iter_estimate_var:Int = 100
- // var batchSize:Int = 200 // the parents class already has it
- var ratio_decomposite:Double = 0.994
- var num_data_est_sd:Int = 3
- var is_always_accpet:Boolean = false
- }
-
- class Options extends Opts {}
-
- def learner(mat0:Mat, targ:Mat, model:Model, proposer:Proposer, ecdfmat: FMat, hash_ecdf:FMat) = {
- class xopts extends Learner.Options with MHTest.Opts with MatSource.Opts with IncNorm.Opts
- val opts = new xopts
-
- val nn = new Learner(
- new MatSource(Array(mat0, targ), opts),
- new MHTest(model, proposer, ecdfmat, hash_ecdf, opts),
- null,
- new IncNorm(opts),
- null,
- opts)
- (nn, opts)
- }
-
- class FDSopts extends Learner.Options with MHTest.Opts with FileSource.Opts
-
- def learner(fn1:String, fn2:String, model:Model, proposer:Proposer, ecdfmat: FMat, hash_ecdf:FMat):(Learner, FDSopts) = learner(List(FileSource.simpleEnum(fn1,1,0),
- FileSource.simpleEnum(fn2,1,0)), model, proposer, ecdfmat, hash_ecdf);
-
-
- def learner(fnames:List[(Int)=>String], model:Model, proposer:Proposer, ecdfmat: FMat, hash_ecdf:FMat):(Learner, FDSopts) = {
-
- val opts = new FDSopts;
- opts.fnames = fnames
- opts.batchSize = 200;
- opts.eltsPerSample = 500;
- implicit val threads = threadPool(4);
- val ds = new FileSource(opts)
- val nn = new Learner(
- ds,
- new MHTest(model, proposer, ecdfmat, hash_ecdf, opts),
- null,
- null,
- null,
- opts)
- (nn, opts)
- }
-
- // just for testing
- def Ecdf(ecdfmat: FMat, hash:FMat) = {
- val ecdf = new Ecdf(ecdfmat, hash)
- ecdf
- }
-
- // for testing
- def Langevin_Proposer(lr:Float, t:Float, v:Float, cp:Float, model:Model):Proposer = {
- val lp = new Langevin_Proposer(lr, t, v, cp, model)
- lp
- }
-
- def Gradient_descent_proposer(lr:Float, u:Float, t:Float, v:Float, cp:Float, model:Model):Proposer = {
-
- val lp = new Gradient_descent_proposer(lr, u, t, v, cp, model)
- lp
- }
-
- def SGHMC_proposer (lr:Float, a:Float, t:Float, v:Float, cp:Float, k:Float, batchSize:Float, model:Model):Proposer = {
- val lp = new SGHMC_proposer(lr, a, t, v, cp, k, batchSize, model)
- lp
- }
-
-
- // create a fully connected nn model, just model,
- // not learner
- // TODO: We need to write this function so that it can generate a model,
- // which we can use to compute the jump prob and loss.
- def constructNNModel(nslabs:Int, width:Int, taper:Float, ntargs:Int, nonlin:Int = 1):Model = {
- val opts = new Net.LearnOptions
- if (opts.links == null) {
- opts.links = izeros(1,1);
- opts.links.set(1);
- }
- // opts.nend = 10
- opts.npasses = 50
- opts.batchSize = 200
- opts.reg1weight = 0.0001;
- opts.hasBias = true;
- opts.links = iones(1,1);
- opts.nweight = 1e-4f
- val net = Net.dnodes3(nslabs, width, taper, ntargs, opts, nonlin);
- opts.nodeset = net
- // opts.lookahead = 0 /// turn off prefetch
- // opts.debug = 1
- val model = new Net(opts)
- model
- }
-
-}
-
-abstract class Proposer() {
- // init the proposer class.
- var has_help_mats:Boolean
- def init():Unit = {
-
- }
-
- def changeToUpdateState():Unit = {}
-
- def changeToEstimateSdState():Unit = {}
-
- // Function to propose the next parameter, i.e. theta' and the delta
- def proposeNext(modelmats:Array[Mat], prev_v:Array[Mat], gmats:Array[Mat], ipass:Int, pos:Long):(Array[Mat], Array[Mat], Double) = {
- null
- }
-
- def computeDelta(mats_new:Array[Mat], mats_old:Array[Mat], new_v:Array[Mat], prev_v:Array[Mat], gmats:Array[Mat], ipass:Int, pos:Long): Double ={
- -1.0
- }
-}
-
-class Langevin_Proposer(val lr:Float, val t:Float, val v:Float, val cp:Float, val model:Model) extends Proposer() {
-
- var step:Mat = null // record the step by itself
- var candidate:Array[Mat] = null
- var stepi:Mat = null
- var is_estimte_sd = true
- var sumSq:Array[Mat] = null // container for g*g
- var lrate:Mat = null
- var te:Mat = null
- var ve:Mat = null
- var updatemats:Array[Mat] = null // just a reference
- var epsilon:Float = 1e-5f
- var initsumsq = 1e-5f
- var clipByValue:Mat = null
- var newsquares:Array[Mat] = null
- var random_matrix:Array[Mat] = null
- var sumSq_tmp_container:Array[Mat] = null
- override var has_help_mats:Boolean = false
-
- override def init():Unit = {
-
- candidate = new Array[Mat](model.modelmats.length)
- sumSq = new Array[Mat](model.modelmats.length)
- sumSq_tmp_container = new Array[Mat](model.modelmats.length)
- newsquares = new Array[Mat](model.modelmats.length)
- random_matrix = new Array[Mat](model.modelmats.length)
-
- stepi = model.modelmats(0).zeros(1,1)
- step = model.modelmats(0).ones(1,1)
-
- te = model.modelmats(0).zeros(1,1)
- te(0,0) = t
- ve = model.modelmats(0).zeros(1,1)
- ve(0,0) = v
- lrate = model.modelmats(0).zeros(1,1)
- lrate(0,0) = lr
-
- if (cp > 0) {
- clipByValue = model.modelmats(0).zeros(1,1)
- clipByValue(0,0) = cp
- }
- for (i <- 0 until candidate.length) {
- candidate(i) = model.modelmats(i).zeros(model.modelmats(i).nrows, model.modelmats(i).ncols)
- sumSq(i) = model.modelmats(i).ones(model.modelmats(i).nrows, model.modelmats(i).ncols) *@ initsumsq
- sumSq_tmp_container(i) = model.modelmats(i).ones(model.modelmats(i).nrows, model.modelmats(i).ncols) *@ initsumsq
- newsquares(i) = model.modelmats(i).zeros(model.modelmats(i).nrows, model.modelmats(i).ncols)
- random_matrix(i) = model.modelmats(i).zeros(model.modelmats(i).nrows, model.modelmats(i).ncols)
- }
- println("finish init the proposer")
- println("step: " + step + ", stepi" + stepi + ", te: " + te + ", ve: " + ve +", lrate: " + lrate)
-
- }
-
- override def changeToUpdateState():Unit = {
- is_estimte_sd = false
- }
-
- override def changeToEstimateSdState():Unit = {
- is_estimte_sd = true
- }
-
- override def proposeNext(modelmats:Array[Mat], prev_v:Array[Mat], gmats:Array[Mat], ipass:Int, pos:Long):(Array[Mat], Array[Mat], Double) = {
- // deep copy the parameter value to the model's mat
- for (i <- 0 until modelmats.length) {
- model.modelmats(i) <-- modelmats(i)
- }
-
- // compute the gradient
- model.dobatch(gmats, ipass, pos)
-
- updatemats = model.updatemats
-
- // sample the new model parameters by the gradient and the stepsize
- // and store the sample results into the candidate array
- stepi <-- lrate / (step ^ te) / 2.0f
-
- // adagrad to revise the grad
- for (i <- 0 until candidate.length) {
- // clip
- if (cp > 0f) {
- min(updatemats(i), clipByValue,updatemats(i));
- max(updatemats(i),-clipByValue,updatemats(i));
- }
-
- // compute the ss
- val ss = sumSq(i)
- val um = updatemats(i)
- newsquares(i) <-- um *@ um
-
- sumSq_tmp_container(i) <-- ss // copy to tmp container
-
- ss ~ ss *@ (step - 1)
- ss ~ ss + newsquares(i)
- ss ~ ss / step
- val grad = ss ^ ve
-
- grad ~ grad + epsilon
- grad ~ um / grad
- grad ~ grad *@ stepi
-
- // for add the gassian noisy
- normrnd(0, ((stepi*2) ^ 0.5).dv, random_matrix(i))
- grad ~ grad + random_matrix(i)
-
- candidate(i) <-- modelmats(i) + grad
- if (java.lang.Double.isNaN(sum(sum(candidate(i))).dv)) throw new RuntimeException("candidate"+i);
- }
-
-
- // compute the delta
-
- val delta = computeDelta(candidate, modelmats, null, null, gmats, ipass, pos)
-
- // update the iteration only if it's update
- if (!is_estimte_sd) {
- step ~ step + 1.0f
- }
- // println ("delta:" + delta + " loss_new:" + loss_new + " loss_prev:" + loss_prev + " loglik_new_to_prev:" + loglik_new_to_prev + " loglik_prev_to_new:" + loglik_prev_to_new)
-
- if (java.lang.Double.isNaN(delta)) {
- // println ("delta:" + delta + " loss_new:" + loss_new + " loss_prev:" + loss_prev + " loglik_new_to_prev:" + loglik_new_to_prev + " loglik_prev_to_new:" + loglik_prev_to_new)
- throw new RuntimeException("Delta")
- }
-
- (candidate, null, delta)
- }
-
-
- override def computeDelta(mats_new:Array[Mat], mats_old:Array[Mat], new_v:Array[Mat], prev_v:Array[Mat], gmats:Array[Mat], ipass:Int, pos:Long): Double ={
- // copy the mats_old to the model
- for (i <- 0 until mats_old.length) {
- model.modelmats(i) <-- mats_old(i)
- }
-
- // compute the loss
- var loss_mat_prev = model.evalbatch(gmats, ipass, pos)
- val loss_prev = (sum(loss_mat_prev)).dv
-
- // compute the gradient and rescale it
- model.dobatch(gmats, ipass, pos)
-
- updatemats = model.updatemats
-
- // sample the new model parameters by the gradient and the stepsize
- // and store the sample results into the candidate array
-
- var loglik_prev_to_new = 0.0
- var loglik_new_to_prev = 0.0
-
- // adagrad to revise the grad
- for (i <- 0 until updatemats.length) {
- // clip
- if (cp > 0f) {
- min(updatemats(i), clipByValue,updatemats(i));
- max(updatemats(i),-clipByValue,updatemats(i));
- }
-
- // compute the ss
- val ss2 = sumSq_tmp_container(i)
- val um2 = updatemats(i)
- newsquares(i) <-- um2 *@ um2 // it's OK to reuse the newsquares
-
- ss2 ~ ss2 *@ (step - 1)
- ss2 ~ ss2 + newsquares(i)
- ss2 ~ ss2 / step
- val grad2 = ss2 ^ ve
-
- // de-affect of the ss2
- ss2 <-- ss2 *@ step
- ss2 <-- ss2 - newsquares(i)
- if (step.dv > 1) {
- ss2 <-- ss2 / (step - 1)
- }
-
- // so sumSq_tmp_container is still the old ss val
-
- grad2 ~ grad2 + epsilon
- grad2 ~ um2 / grad2
- grad2 ~ grad2 *@ stepi
-
- // re-use the space newsquares here
- // the pnt jump from modelmats is modelmats + grad2
- // println("the grad in the new to prev " + grad2)
- // println(" the newsquares: " + newsquares(i))
- // println("the stepi " + stepi)
- newsquares(i) <-- mats_old(i) + grad2
- newsquares(i) ~ newsquares(i) - mats_new(i)
- loglik_prev_to_new += (-1.0*sum(sum(newsquares(i) *@ newsquares(i))) / 2.0 / (stepi*2)).dv
-
- }
-
- // then jump from the new mats to the old ones
- // copy the data to the models
- for (i <- 0 until mats_new.length) {
- model.modelmats(i) <-- mats_new(i)
- }
-
- // eval the new data
- model.dobatch(gmats, ipass, pos)
- updatemats = model.updatemats
- loss_mat_prev = model.evalbatch(gmats, ipass, pos) // re-use the old reference here
- val loss_new = (sum(loss_mat_prev)).dv
-
- // compute the new scaled gradient
- for (i <- 0 until updatemats.length) {
- // clip
- if (cp > 0f) {
- min(updatemats(i), clipByValue,updatemats(i));
- max(updatemats(i),-clipByValue,updatemats(i));
- }
-
- // compute the ss
- val ss2 = sumSq_tmp_container(i)
- val um2 = updatemats(i)
- newsquares(i) <-- um2 *@ um2 // it's OK to reuse the newsquares
-
- ss2 ~ ss2 *@ (step - 1)
- ss2 ~ ss2 + newsquares(i)
- ss2 ~ ss2 / step
- val grad2 = ss2 ^ ve
-
- // de-affect the ss2
- ss2 ~ ss2 *@ step
- ss2 ~ ss2 - newsquares(i)
- if (step.dv > 1) {
- ss2 ~ ss2 / (step - 1)
- }
-
-
- grad2 ~ grad2 + epsilon
- grad2 ~ um2 / grad2
- grad2 ~ grad2 *@ stepi
-
- // re-use the space newsquares here
- // the pnt jump from candidate is candidate + grad2
- newsquares(i) <-- mats_new(i) + grad2
- newsquares(i) ~ newsquares(i) - mats_old(i)
- loglik_new_to_prev += (-1.0*sum(sum(newsquares(i) *@ newsquares(i))) / 2.0 / (stepi*2)).dv
- }
-
- val delta = (loss_new) - (loss_prev) + loglik_new_to_prev - loglik_prev_to_new
-
- if (java.lang.Double.isNaN(delta)) {
- println ("delta:" + delta + " loss_new:" + loss_new + " loss_prev:" + loss_prev + " loglik_new_to_prev:" + loglik_new_to_prev + " loglik_prev_to_new:" + loglik_prev_to_new)
- throw new RuntimeException("Delta")
- }
- delta
-
- }
-
-}
-
-
-// the stochastic gradient hamiltonian monte carlo updater
-class SGHMC_proposer (val lr:Float, val a:Float, val t:Float, val v:Float, val cp:Float, val k:Float, val batchSize:Float, val model:Model) extends Proposer() {
-
- var step:Mat = null // record the step by itself
- var candidate:Array[Mat] = null
- var stepi:Mat = null
- var is_estimte_sd:Boolean = true
- var alpha:Mat = null
- var v_old:Array[Mat] = null // the v in the paper
- var sumSq:Array[Mat] = null // container for g*g
- var lrate:Mat = null
- var te:Mat = null
- var ve:Mat = null
- var noise_matrix:Array[Mat] = null // contain the v_new
- var epsilon:Float = 1e-5f
- var initsumsq = 1e-5f
- var clipByValue:Mat = null
- var newsquares:Array[Mat] = null
- var estimated_v:Mat = null
- var kir:Mat = null
- var m:Int = 1
- var adj_alpha:Mat = null
- var t_init:Mat = null
-
- override var has_help_mats:Boolean = true
-
-
- override def init():Unit = {
- // init the container here
-
- candidate = new Array[Mat](model.modelmats.length)
- sumSq = new Array[Mat](model.modelmats.length)
- newsquares = new Array[Mat](model.modelmats.length)
-
- stepi = model.modelmats(0).zeros(1,1)
- step = model.modelmats(0).ones(1,1)
-
- te = model.modelmats(0).zeros(1,1)
- te(0,0) = t
- ve = model.modelmats(0).zeros(1,1)
- ve(0,0) = v
- lrate = model.modelmats(0).zeros(1,1)
- lrate(0,0) = lr
- v_old = new Array[Mat](model.modelmats.length)
- noise_matrix = new Array[Mat](model.modelmats.length)
- alpha = model.modelmats(0).zeros(1,1)
- alpha(0,0) = a
-
- estimated_v = model.modelmats(0).zeros(1,1)
-
- kir = model.modelmats(0).zeros(1,1)
- kir(0,0) = k
-
- t_init = model.modelmats(0).ones(1,1)
- t_init(0,0) = 1000.0f
-
- adj_alpha = model.modelmats(0).zeros(1,1)
-
- if (cp > 0) {
- clipByValue = model.modelmats(0).zeros(1,1)
- clipByValue(0,0) = cp
- }
- for (i <- 0 until candidate.length) {
- candidate(i) = model.modelmats(i).zeros(model.modelmats(i).nrows, model.modelmats(i).ncols)
- sumSq(i) = model.modelmats(i).ones(model.modelmats(i).nrows, model.modelmats(i).ncols) *@ initsumsq
- newsquares(i) = model.modelmats(i).zeros(model.modelmats(i).nrows, model.modelmats(i).ncols)
- v_old(i) = model.modelmats(i).zeros(model.modelmats(i).nrows, model.modelmats(i).ncols)
- noise_matrix(i) = model.modelmats(i).zeros(model.modelmats(i).nrows, model.modelmats(i).ncols)
- }
- println("finish init the proposer")
- println("step: " + step + ", stepi" + stepi + ", te: " + te + ", ve: " + ve +", lrate: " + lrate)
- }
-
-
- override def changeToUpdateState():Unit = {
- is_estimte_sd = false
- }
-
- override def changeToEstimateSdState():Unit = {
- is_estimte_sd = true
- }
-
- // notice, the gradient computed by system is for max the objective...
- override def proposeNext(modelmats:Array[Mat], prev_v:Array[Mat], gmats:Array[Mat], ipass:Int, pos:Long):(Array[Mat], Array[Mat], Double) = {
-
- // compute the new v
-
- // copy the modelmats to the model
- for (i <- 0 until modelmats.length) {
- model.modelmats(i) <-- modelmats(i)
- }
-
- stepi <-- lrate / (step ^ te);
-
- // resample the v_old
- for (i <- 0 until v_old.length) {
- // normrnd(0, (stepi^0.5).dv, v_old(i))
-
- if (step.dv < -1.0) {
- normrnd(0, (stepi^0.5).dv, v_old(i))
- } else {
- v_old(i) <-- prev_v(i)
- }
- // normrnd(0, (stepi^0.5).dv, v_old(i))
- }
-
-
- // copy the modelmats to candidates
- for (i <- 0 until modelmats.length) {
- candidate(i) <-- modelmats(i)
- }
- // do update for m steps
- for (j <- 0 until m) {
- for (i <- 0 until modelmats.length) {
- candidate(i) <-- candidate(i) + v_old(i)
- model.modelmats(i) <-- candidate(i)
- }
-
- model.dobatch(gmats, ipass, pos)
-
- for (i <- 0 until candidate.length) {
- // clip
- if (cp > 0f) {
- min(model.updatemats(i), clipByValue, model.updatemats(i));
- max(model.updatemats(i),-clipByValue, model.updatemats(i));
- }
-
- // compute the ss
- val ss = sumSq(i)
- // since the gradient is the revise of the max for min problem
- val um = model.updatemats(i)
- newsquares(i) <-- um *@ um
-
- ss ~ ss *@ (step - 1)
- ss ~ ss + newsquares(i)
- ss ~ ss / step
- val grad = ss ^ ve
-
- grad ~ grad + epsilon
- grad ~ um / grad
-
- // estimate beta
- estimated_v ~ estimated_v *@ (1 - kir)
- estimated_v <-- estimated_v + sum(sum(grad *@ grad)) *@ kir / batchSize * 1000000 / grad.length
- // var tmp = 1 / batchSize * 1000000 / grad.length
- // println(tmp)
- // just add by my understanding not sure right
- // estimated_v <-- estimated_v / grad.length
-
- // just debug
- // println("estimated_v: " + estimated_v)
-
- adj_alpha <-- alpha
-
-
- if ((estimated_v*stepi/2.0).dv > alpha.dv) {
- adj_alpha = (estimated_v*stepi/2.0) + 1e-6f
- // println ("alpha change to be " + adj_alpha)
- }
- if (adj_alpha.dv > 0.2) {
- adj_alpha <-- alpha
- }
-
-
-
- grad ~ grad *@ stepi
-
- // put the val into the container
- v_old(i) <-- (1.0-adj_alpha) *@ v_old(i) + grad
- // add the random noise
- val est_var = 2*(adj_alpha - estimated_v*stepi / 2.0) * stepi
- // println("the est var is " + estimated_v +" ,the var is " + est_var)
- if (est_var.dv < 0) {
- // println("the est var is " + estimated_v +" ,the var is " + est_var)
- est_var(0,0) = 1e-5f
- }
-
- normrnd(0, (est_var^0.5).dv, noise_matrix(i))
- v_old(i) <-- v_old(i) + noise_matrix(i)
- // println("the inserted noise is " + (est_var^0.5) + ", and " + ((stepi * 0.001)^0.5) )
- /**
- // insert more noise?
- normrnd(0, ((stepi * 0.00001)^0.5).dv, noise_matrix(i))
- v_old(i) <-- v_old(i) + noise_matrix(i)
- **/
- }
-
- }
-
-
- // compute the delta here
- // place the modelmats by the proposed one
- /**
- for (i <- 0 until candidate.length) {
- model.modelmats(i) <-- candidate(i)
- }
- val score_new = -1.0 * sum(model.evalbatch(gmats, ipass, pos))
-
- var enery_new = v_old(0).zeros(1,1)
- for (i <- 0 until candidate.length) {
- enery_new <-- enery_new + sum(sum(v_old(i) *@ v_old(i)))
- }
- enery_new ~ enery_new / 2 / stepi
- // println ("score_old: " + score_old + ", score_new: " + score_new + ", enery_new:" + enery_new + ", enery_old:"+enery_old)
- val delta = score_old + enery_old - score_new - enery_new
- **/
- // println ("the delta is " + delta)
- // incremental the count
- val delta = computeDelta(candidate, modelmats, v_old, prev_v, gmats, ipass, pos)
- if (!is_estimte_sd) {
- step ~ step + 1.0f
- }
- if (java.lang.Double.isNaN(delta.dv)) {
- throw new RuntimeException("Delta for proposer")
- }
- (candidate, v_old, delta)
- }
-
-
- override def computeDelta(mats_new:Array[Mat], mats_old:Array[Mat], new_v:Array[Mat], prev_v:Array[Mat], gmats:Array[Mat], ipass:Int, pos:Long): Double ={
-
- // compute the temperature
- val t_i = t_init / step ^(0.5)
- if (t_i.dv <= 1.0f) {
- t_i(0,0) = 1.0f
- }
- // val t_i = t_init
-
- for (i <- 0 until mats_old.length) {
- model.modelmats(i) <-- mats_old(i)
- }
- val score_old = -1.0 *sum(model.evalbatch(gmats, ipass, pos)) / t_i
- var enery_old = prev_v(0).zeros(1,1)
- for (i <- 0 until prev_v.length) {
- enery_old <-- enery_old + sum(sum(prev_v(i) *@ prev_v(i)))
- }
- enery_old ~ enery_old / 2 / stepi
-
-
- for (i <- 0 until mats_new.length) {
- model.modelmats(i) <-- mats_new(i)
- }
- val score_new = -1.0 *sum(model.evalbatch(gmats, ipass, pos)) / t_i
-
- var enery_new = v_old(0).zeros(1,1)
- for (i <- 0 until candidate.length) {
- enery_new <-- enery_new + sum(sum(v_old(i) *@ v_old(i)))
- }
- enery_new ~ enery_new / 2 / stepi
- // println ("score_old: " + score_old + ", score_new: " + score_new + ", enery_new:" + enery_new + ", enery_old:"+enery_old)
- val delta = score_old + enery_old - score_new - enery_new
- if (java.lang.Double.isNaN(delta.dv)) {
- throw new RuntimeException("Delta for proposer")
- }
- delta.dv
- }
-}
-
-
-class Gradient_descent_proposer (val lr:Float, val u:Float, val t:Float, val v:Float, val cp:Float, val model:Model) extends Proposer() {
- var step:Mat = null // record the step by itself
- var candidate:Array[Mat] = null
- var stepi:Mat = null
- var is_estimte_sd = true
- var mu:Mat = null
- var momentum:Array[Mat] = null
- var sumSq:Array[Mat] = null // container for g*g
- var lrate:Mat = null
- var te:Mat = null
- var ve:Mat = null
- var hasmomentum:Boolean = true
- var updatemats:Array[Mat] = null // just a reference
- var epsilon:Float = 1e-5f
- var initsumsq = 1e-5f
- var clipByValue:Mat = null
- var newsquares:Array[Mat] = null
- override var has_help_mats:Boolean = false
-
-
- override def init():Unit = {
- // init the container here
- hasmomentum = (u > 0)
-
- candidate = new Array[Mat](model.modelmats.length)
- sumSq = new Array[Mat](model.modelmats.length)
- newsquares = new Array[Mat](model.modelmats.length)
-
- stepi = model.modelmats(0).zeros(1,1)
- step = model.modelmats(0).ones(1,1)
-
- te = model.modelmats(0).zeros(1,1)
- te(0,0) = t
- ve = model.modelmats(0).zeros(1,1)
- ve(0,0) = v
- lrate = model.modelmats(0).zeros(1,1)
- lrate(0,0) = lr
- if (hasmomentum) {
- momentum = new Array[Mat](model.modelmats.length)
- mu = model.modelmats(0).zeros(1,1)
- mu(0,0) = u
- }
-
- if (cp > 0) {
- clipByValue = model.modelmats(0).zeros(1,1)
- clipByValue(0,0) = cp
- }
- for (i <- 0 until candidate.length) {
- candidate(i) = model.modelmats(i).zeros(model.modelmats(i).nrows, model.modelmats(i).ncols)
- sumSq(i) = model.modelmats(i).ones(model.modelmats(i).nrows, model.modelmats(i).ncols) *@ initsumsq
- newsquares(i) = model.modelmats(i).zeros(model.modelmats(i).nrows, model.modelmats(i).ncols)
-
- if (hasmomentum) {
- momentum(i) = model.modelmats(i).zeros(model.modelmats(i).nrows, model.modelmats(i).ncols)
- }
- }
- println("finish init the proposer")
- println("step: " + step + ", stepi" + stepi + ", te: " + te + ", ve: " + ve +", lrate: " + lrate)
- }
-
- override def proposeNext(modelmats:Array[Mat], prev_v:Array[Mat], gmats:Array[Mat], ipass:Int, pos:Long):(Array[Mat], Array[Mat], Double) = {
- // just do the one step gradient descent
- if (!is_estimte_sd) {
-
- for (i <- 0 until modelmats.length) {
- model.modelmats(i) <-- modelmats(i)
- }
- // compute the gradient
- model.dobatch(gmats, ipass, pos)
- updatemats = model.updatemats
-
- // sample the new model parameters by the gradient and the stepsize
- // and store the sample results into the candidate array
- stepi <-- lrate / (step ^ te);
- for (i <- 0 until candidate.length) {
- // clip
- if (cp > 0f) {
- min(updatemats(i), clipByValue,updatemats(i));
- max(updatemats(i),-clipByValue,updatemats(i));
- }
-
- // compute the ss
- val ss = sumSq(i)
- val um = updatemats(i)
- newsquares(i) <-- um *@ um
-
- ss ~ ss *@ (step - 1)
- ss ~ ss + newsquares(i)
- ss ~ ss / step
- val grad = ss ^ ve
-
- grad ~ grad + epsilon
- grad ~ um / grad
- grad ~ grad *@ stepi
- if (hasmomentum) {
- grad ~ grad + momentum(i)
- momentum(i) ~ grad *@ mu
- }
-
- candidate(i) <-- modelmats(i) + grad
- }
- step ~ step + 1.0f
- }
- // for delta, we just return a very large value
- (candidate, null, 1000000.0)
- }
-
- override def changeToUpdateState():Unit = {
- is_estimte_sd = false
- }
-
- override def changeToEstimateSdState():Unit = {
- is_estimte_sd = true
- }
-
- override def computeDelta(mats_new:Array[Mat], mats_old:Array[Mat], new_v:Array[Mat], prev_v:Array[Mat], gmats:Array[Mat], ipass:Int, pos:Long): Double ={
- 100.0
- }
-}
-
-// Class of the emprical cdf of X_corr, there should be three
-// matrix to hold the data computed from the matlab
-// there are pre-computed txt file at /data/EcdfForMHtest
-
-class Ecdf(val ecdfmat:FMat, val varvect:FMat) {
- var sd = 1.0f
- var f:FMat = null
- var x:FMat = null
-
- def init() = {
- // read the x
- x = ecdfmat(0, ?)
- updateSd(1.0)
- }
-
- def generateXcorr = {
- var u:Float = rand(1,1)(0,0)
- // println ("u is " + u)
- val index = binarySearch(u, f)
- // println ("f is " + f)
- // println ("index is "+ index)
- x(0, index)
- }
-
- def updateSd (inputsd:Double):Unit = {
- sd = inputsd.toFloat
- if (sd > 1.2f) {
- throw new RuntimeException("Too large sd of Delta'")
- }
- // update the f
- // looking for the closest index in the hash
- val index = binarySearch(sd, varvect)
- f = ecdfmat(index+1, ?)
- }
-
- // return the closest index in xarray for u
- def binarySearch(u:Float, xarray:FMat) : Int = {
- var start : Int = 0
- var end : Int = xarray.ncols - 1
- var mid : Int = 0
- // println ("mid: "+ mid + " ,start: " + start + " ,end " + end)
- while (end > start + 1) {
- // println ("mid: "+ mid + " ,start: " + start + " ,end " + end)
- mid = (start + end) / 2
- if (u < xarray(0, mid)) {
- end = mid;
- } else if (u > xarray(0, mid)) {
- start = mid;
- } else {
- return mid
- }
- }
- // (x(start) + x(end))/2 * sd
- start
- }
-}
-
+package BIDMach.models
+
+import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
+import BIDMat.MatFunctions._
+import BIDMat.SciFunctions._
+import BIDMach.datasources._
+import BIDMach.updaters._
+import BIDMach._
+import BIDMach.networks._
+
+import java.text.NumberFormat
+import edu.berkeley.bid.CUMACH._
+import scala.collection.mutable._
+
+class MHTest(var objective:Model, val proposer:Proposer, val ecdfmat: FMat, val hash_ecdf:FMat,
+ override val opts:MHTest.Opts = new MHTest.Options) extends Model(opts) {
+
+ var ecdf:Ecdf = new Ecdf(ecdfmat, hash_ecdf)
+ var delta:Double = 1.0
+ var var_estimate_mat:FMat = null
+ var sd_smooth_exp_param:Double = 0.7 // use the exp update to estimate var
+ var estimated_sd:Double = 1.0
+ var accpet_count:Float = 0.0f
+ var reject_count:Float = 0.0f
+ var batch_est_data:Array[Array[Mat]] = null
+ var help_mats:Array[Mat] = null
+ var data_buffer:Array[Mat] = null // the array to hold the previous data batch
+
+ override def init() = {
+ // init the ecdf
+
+ objective.mats = mats
+ objective.putBack = datasource.opts.putBack
+ objective.useGPU = opts.useGPU && Mat.hasCUDA > 0
+ objective.useDouble = opts.useDouble
+ objective.gmats = new Array[Mat](mats.length)
+
+ objective.init()
+ _modelmats = new Array[Mat](objective.modelmats.length)
+ println("init")
+ // init the proposer class
+ proposer.init()
+
+ if (proposer.has_help_mats) {
+ help_mats = new Array[Mat](objective.modelmats.length)
+ }
+
+ for (i <- 0 until objective.modelmats.length) {
+ _modelmats(i) = objective.modelmats(i).zeros(objective.modelmats(i).nrows, objective.modelmats(i).ncols)
+ _modelmats(i) <-- objective.modelmats(i)
+ if (proposer.has_help_mats) {
+ help_mats(i) = objective.modelmats(i).zeros(objective.modelmats(i).nrows, objective.modelmats(i).ncols)
+ }
+ println(_modelmats(i))
+ }
+
+
+ // init the batch_est_sd0/1
+ var mat = datasource.next
+ // put the mat into the data buffer
+ data_buffer = new Array[Mat](mat.length)
+ for (i <- 0 until mat.length) {
+ data_buffer(i) = GMat(mat(i).zeros(mat(i).nrows, mat(i).ncols))
+ data_buffer(i) <-- mat(i)
+ }
+
+ // init the container
+ var_estimate_mat = zeros(1, opts.num_data_est_sd)
+
+ batch_est_data = Array.ofDim[Mat](opts.num_data_est_sd, mat.length)
+ for (i_batch <- 0 until opts.num_data_est_sd) {
+ mat = datasource.next
+ for (i_mat <- 0 until mat.length) {
+ batch_est_data(i_batch)(i_mat) = GMat(mat(i_mat))
+ }
+ }
+
+ // init ecdf
+ ecdf.init()
+ }
+
+ // call proposer to get the theta',
+ // then generate a x_corr from distribution of X_corr
+ // Then decide whether to replace (i.e. accpet) _modelmats
+ override def dobatch(mats:Array[Mat], ipass:Int, here:Long) = {
+
+ // estimate the variance
+ estimated_sd = estimated_sd * sd_smooth_exp_param + (1-sd_smooth_exp_param) * computeVarDelta()
+ if (java.lang.Double.isNaN(estimated_sd)) {
+ throw new RuntimeException("NaN for the sd 3 ")
+ }
+ if (here == 0) {
+ accpet_count = 0.0f
+ reject_count = 0.0f
+ }
+ proposer.changeToUpdateState()
+ // propose the data
+ val (next_mat:Array[Mat], update_v, delta:Double) = proposer.proposeNext(_modelmats, help_mats, mats, ipass, here)
+
+ // compute the delta by another batch
+ val delta_new = proposer.computeDelta(next_mat, _modelmats, update_v, help_mats, data_buffer, 0, 0)
+
+ // update the data buffer
+
+ for (i <- 0 until mats.length) {
+ data_buffer(i) <-- mats(i)
+ }
+
+ // do the test
+ // println ("the delta is " + delta)
+ if (opts.is_always_accpet) {
+ // always accept
+ for (i <- 0 until _modelmats.length) {
+ // println ("model mats " + _modelmats(i))
+ // println("next: " + next_mat(i))
+ if (proposer.has_help_mats) {
+ help_mats(i) <-- (update_v.asInstanceOf[Array[Mat]])(i)
+ }
+ _modelmats(i) <-- next_mat(i)
+ }
+ changeObjectiveModelMat(objective, _modelmats)
+ accpet_count += 1.0f
+ } else {
+ if (estimated_sd < 1.2f) {
+ ecdf.updateSd(estimated_sd)
+ var x_corr = ecdf.generateXcorr
+ if (x_corr + delta_new > 0) {
+ // accpet the candiate
+ // println("accpet" + " " + delta + "; X_corr: " + x_corr)
+ for (i <- 0 until _modelmats.length) {
+ // println ("model mats " + _modelmats(i))
+ // println("next: " + next_mat(i))
+ if (proposer.has_help_mats) {
+ help_mats(i) <-- (update_v.asInstanceOf[Array[Mat]])(i)
+ }
+ _modelmats(i) <-- next_mat(i)
+ }
+ changeObjectiveModelMat(objective, _modelmats)
+ accpet_count += 1.0f
+ //println ("updated modelmats " + objective.modelmats(0))
+ } else {
+ reject_count += 1.0f
+ }
+ } else {
+ println ("skip the large var " + estimated_sd)
+ reject_count += 1.0f
+ }
+ }
+
+
+
+ }
+
+ // Call the parent class to compute the loss of the model
+ override def evalbatch(mats:Array[Mat], ipass:Int, here:Long):FMat = {
+ // copy back the parameters
+ // Notice: this is not the deep copy, we just
+ // change the reference of the parent_model
+ // objective.setmodelmats(_modelmats)
+
+ changeObjectiveModelMat(objective, _modelmats)
+ var accpe_ratio = accpet_count / (accpet_count + reject_count)
+ if (java.lang.Double.isNaN(estimated_sd)) {
+ throw new RuntimeException("ADA0 2 ")
+
+ }
+ val loss = objective.evalbatch(mats, ipass, here)
+ println ("REST the sd of delat sdDelta: " + estimated_sd + " accpet ratio is AccRate: " + accpe_ratio + " the loss: " + loss)
+ loss
+ //rand(1,1)
+ }
+
+ // help methods
+
+
+ // change the reference of the modelmats in the model
+ // as well as change the reference of modelmats at each layer
+ def changeObjectiveModelMat(model:Model, mats:Array[Mat]):Unit = {
+
+ for (i <- 0 until model.modelmats.length) {
+ model.modelmats(i) <-- mats(i)
+ }
+ }
+
+ def computeVarDelta():Double = {
+
+
+ proposer.changeToEstimateSdState()
+
+ for (i <- 0 until opts.num_data_est_sd) {
+
+ var (next_mat0, update_v, delta) = proposer.proposeNext(_modelmats, help_mats, batch_est_data(i), 0, 0)
+ var_estimate_mat(0,i) = delta
+ }
+ proposer.changeToUpdateState()
+ var varianceVal = variance(var_estimate_mat)
+ // println("the var is "+ varianceVal + ", the vect is " + var_estimate_mat)
+ if (varianceVal.dv < 0) {
+ varianceVal(0,0) = 1e-5f
+ }
+ (varianceVal^0.5).dv
+
+ }
+}
+
+
+object MHTest {
+ trait Opts extends Model.Opts {
+ // TODO: define the parameters here
+ // var num_iter_estimate_var:Int = 100
+ // var batchSize:Int = 200 // the parents class already has it
+ var ratio_decomposite:Double = 0.994
+ var num_data_est_sd:Int = 3
+ var is_always_accpet:Boolean = false
+ }
+
+ class Options extends Opts {}
+
+ def learner(mat0:Mat, targ:Mat, model:Model, proposer:Proposer, ecdfmat: FMat, hash_ecdf:FMat) = {
+ class xopts extends Learner.Options with MHTest.Opts with MatSource.Opts with IncNorm.Opts
+ val opts = new xopts
+
+ val nn = new Learner(
+ new MatSource(Array(mat0, targ), opts),
+ new MHTest(model, proposer, ecdfmat, hash_ecdf, opts),
+ null,
+ new IncNorm(opts),
+ null,
+ opts)
+ (nn, opts)
+ }
+
+ class FDSopts extends Learner.Options with MHTest.Opts with FileSource.Opts
+
+ def learner(fn1:String, fn2:String, model:Model, proposer:Proposer, ecdfmat: FMat, hash_ecdf:FMat):(Learner, FDSopts) = learner(List(FileSource.simpleEnum(fn1,1,0),
+ FileSource.simpleEnum(fn2,1,0)), model, proposer, ecdfmat, hash_ecdf)
+
+
+ def learner(fnames:List[(Int)=>String], model:Model, proposer:Proposer, ecdfmat: FMat, hash_ecdf:FMat):(Learner, FDSopts) = {
+
+ val opts = new FDSopts
+ opts.fnames = fnames
+ opts.batchSize = 200
+ opts.eltsPerSample = 500
+ implicit val threads = threadPool(4)
+ val ds = new FileSource(opts)
+ val nn = new Learner(
+ ds,
+ new MHTest(model, proposer, ecdfmat, hash_ecdf, opts),
+ null,
+ null,
+ null,
+ opts)
+ (nn, opts)
+ }
+
+ // just for testing
+ def Ecdf(ecdfmat: FMat, hash:FMat) = {
+ val ecdf = new Ecdf(ecdfmat, hash)
+ ecdf
+ }
+
+ // for testing
+ def Langevin_Proposer(lr:Float, t:Float, v:Float, cp:Float, model:Model):Proposer = {
+ val lp = new Langevin_Proposer(lr, t, v, cp, model)
+ lp
+ }
+
+ def Gradient_descent_proposer(lr:Float, u:Float, t:Float, v:Float, cp:Float, model:Model):Proposer = {
+
+ val lp = new Gradient_descent_proposer(lr, u, t, v, cp, model)
+ lp
+ }
+
+ def SGHMC_proposer (lr:Float, a:Float, t:Float, v:Float, cp:Float, k:Float, batchSize:Float, model:Model):Proposer = {
+ val lp = new SGHMC_proposer(lr, a, t, v, cp, k, batchSize, model)
+ lp
+ }
+
+
+ // create a fully connected nn model, just model,
+ // not learner
+ // TODO: We need to write this function so that it can generate a model,
+ // which we can use to compute the jump prob and loss.
+ def constructNNModel(nslabs:Int, width:Int, taper:Float, ntargs:Int, nonlin:Int = 1):Model = {
+ val opts = new Net.LearnOptions
+ if (opts.links == null) {
+ opts.links = izeros(1,1)
+ opts.links.set(1)
+ }
+ // opts.nend = 10
+ opts.npasses = 50
+ opts.batchSize = 200
+ opts.reg1weight = 0.0001
+ opts.hasBias = true
+ opts.links = iones(1,1)
+ opts.nweight = 1e-4f
+ val net = Net.dnodes3(nslabs, width, taper, ntargs, opts, nonlin)
+ opts.nodeset = net
+ // opts.lookahead = 0 /// turn off prefetch
+ // opts.debug = 1
+ val model = new Net(opts)
+ model
+ }
+
+}
+
+abstract class Proposer() {
+ // init the proposer class.
+ var has_help_mats:Boolean
+ def init():Unit = {
+
+ }
+
+ def changeToUpdateState():Unit = {}
+
+ def changeToEstimateSdState():Unit = {}
+
+ // Function to propose the next parameter, i.e. theta' and the delta
+ def proposeNext(modelmats:Array[Mat], prev_v:Array[Mat], gmats:Array[Mat], ipass:Int, pos:Long):(Array[Mat], Array[Mat], Double) = {
+ null
+ }
+
+ def computeDelta(mats_new:Array[Mat], mats_old:Array[Mat], new_v:Array[Mat], prev_v:Array[Mat], gmats:Array[Mat], ipass:Int, pos:Long): Double ={
+ -1.0
+ }
+}
+
+class Langevin_Proposer(val lr:Float, val t:Float, val v:Float, val cp:Float, val model:Model) extends Proposer() {
+
+ var step:Mat = null // record the step by itself
+ var candidate:Array[Mat] = null
+ var stepi:Mat = null
+ var is_estimte_sd = true
+ var sumSq:Array[Mat] = null // container for g*g
+ var lrate:Mat = null
+ var te:Mat = null
+ var ve:Mat = null
+ var updatemats:Array[Mat] = null // just a reference
+ var epsilon:Float = 1e-5f
+ var initsumsq = 1e-5f
+ var clipByValue:Mat = null
+ var newsquares:Array[Mat] = null
+ var random_matrix:Array[Mat] = null
+ var sumSq_tmp_container:Array[Mat] = null
+ override var has_help_mats:Boolean = false
+
+ override def init():Unit = {
+
+ candidate = new Array[Mat](model.modelmats.length)
+ sumSq = new Array[Mat](model.modelmats.length)
+ sumSq_tmp_container = new Array[Mat](model.modelmats.length)
+ newsquares = new Array[Mat](model.modelmats.length)
+ random_matrix = new Array[Mat](model.modelmats.length)
+
+ stepi = model.modelmats(0).zeros(1,1)
+ step = model.modelmats(0).ones(1,1)
+
+ te = model.modelmats(0).zeros(1,1)
+ te(0,0) = t
+ ve = model.modelmats(0).zeros(1,1)
+ ve(0,0) = v
+ lrate = model.modelmats(0).zeros(1,1)
+ lrate(0,0) = lr
+
+ if (cp > 0) {
+ clipByValue = model.modelmats(0).zeros(1,1)
+ clipByValue(0,0) = cp
+ }
+ for (i <- 0 until candidate.length) {
+ candidate(i) = model.modelmats(i).zeros(model.modelmats(i).nrows, model.modelmats(i).ncols)
+ sumSq(i) = model.modelmats(i).ones(model.modelmats(i).nrows, model.modelmats(i).ncols) *@ initsumsq
+ sumSq_tmp_container(i) = model.modelmats(i).ones(model.modelmats(i).nrows, model.modelmats(i).ncols) *@ initsumsq
+ newsquares(i) = model.modelmats(i).zeros(model.modelmats(i).nrows, model.modelmats(i).ncols)
+ random_matrix(i) = model.modelmats(i).zeros(model.modelmats(i).nrows, model.modelmats(i).ncols)
+ }
+ println("finish init the proposer")
+ println("step: " + step + ", stepi" + stepi + ", te: " + te + ", ve: " + ve +", lrate: " + lrate)
+
+ }
+
+ override def changeToUpdateState():Unit = {
+ is_estimte_sd = false
+ }
+
+ override def changeToEstimateSdState():Unit = {
+ is_estimte_sd = true
+ }
+
+ override def proposeNext(modelmats:Array[Mat], prev_v:Array[Mat], gmats:Array[Mat], ipass:Int, pos:Long):(Array[Mat], Array[Mat], Double) = {
+ // deep copy the parameter value to the model's mat
+ for (i <- 0 until modelmats.length) {
+ model.modelmats(i) <-- modelmats(i)
+ }
+
+ // compute the gradient
+ model.dobatch(gmats, ipass, pos)
+
+ updatemats = model.updatemats
+
+ // sample the new model parameters by the gradient and the stepsize
+ // and store the sample results into the candidate array
+ stepi <-- lrate / (step ^ te) / 2.0f
+
+ // adagrad to revise the grad
+ for (i <- 0 until candidate.length) {
+ // clip
+ if (cp > 0f) {
+ min(updatemats(i), clipByValue,updatemats(i))
+ max(updatemats(i),-clipByValue,updatemats(i))
+ }
+
+ // compute the ss
+ val ss = sumSq(i)
+ val um = updatemats(i)
+ newsquares(i) <-- um *@ um
+
+ sumSq_tmp_container(i) <-- ss // copy to tmp container
+
+ ss ~ ss *@ (step - 1)
+ ss ~ ss + newsquares(i)
+ ss ~ ss / step
+ val grad = ss ^ ve
+
+ grad ~ grad + epsilon
+ grad ~ um / grad
+ grad ~ grad *@ stepi
+
+ // for add the gassian noisy
+ normrnd(0, ((stepi*2) ^ 0.5).dv, random_matrix(i))
+ grad ~ grad + random_matrix(i)
+
+ candidate(i) <-- modelmats(i) + grad
+ if (java.lang.Double.isNaN(sum(sum(candidate(i))).dv)) throw new RuntimeException("candidate"+i)
+ }
+
+
+ // compute the delta
+
+ val delta = computeDelta(candidate, modelmats, null, null, gmats, ipass, pos)
+
+ // update the iteration only if it's update
+ if (!is_estimte_sd) {
+ step ~ step + 1.0f
+ }
+ // println ("delta:" + delta + " loss_new:" + loss_new + " loss_prev:" + loss_prev + " loglik_new_to_prev:" + loglik_new_to_prev + " loglik_prev_to_new:" + loglik_prev_to_new)
+
+ if (java.lang.Double.isNaN(delta)) {
+ // println ("delta:" + delta + " loss_new:" + loss_new + " loss_prev:" + loss_prev + " loglik_new_to_prev:" + loglik_new_to_prev + " loglik_prev_to_new:" + loglik_prev_to_new)
+ throw new RuntimeException("Delta")
+ }
+
+ (candidate, null, delta)
+ }
+
+
+ override def computeDelta(mats_new:Array[Mat], mats_old:Array[Mat], new_v:Array[Mat], prev_v:Array[Mat], gmats:Array[Mat], ipass:Int, pos:Long): Double ={
+ // copy the mats_old to the model
+ for (i <- 0 until mats_old.length) {
+ model.modelmats(i) <-- mats_old(i)
+ }
+
+ // compute the loss
+ var loss_mat_prev = model.evalbatch(gmats, ipass, pos)
+ val loss_prev = (sum(loss_mat_prev)).dv
+
+ // compute the gradient and rescale it
+ model.dobatch(gmats, ipass, pos)
+
+ updatemats = model.updatemats
+
+ // sample the new model parameters by the gradient and the stepsize
+ // and store the sample results into the candidate array
+
+ var loglik_prev_to_new = 0.0
+ var loglik_new_to_prev = 0.0
+
+ // adagrad to revise the grad
+ for (i <- 0 until updatemats.length) {
+ // clip
+ if (cp > 0f) {
+ min(updatemats(i), clipByValue,updatemats(i))
+ max(updatemats(i),-clipByValue,updatemats(i))
+ }
+
+ // compute the ss
+ val ss2 = sumSq_tmp_container(i)
+ val um2 = updatemats(i)
+ newsquares(i) <-- um2 *@ um2 // it's OK to reuse the newsquares
+
+ ss2 ~ ss2 *@ (step - 1)
+ ss2 ~ ss2 + newsquares(i)
+ ss2 ~ ss2 / step
+ val grad2 = ss2 ^ ve
+
+ // de-affect of the ss2
+ ss2 <-- ss2 *@ step
+ ss2 <-- ss2 - newsquares(i)
+ if (step.dv > 1) {
+ ss2 <-- ss2 / (step - 1)
+ }
+
+ // so sumSq_tmp_container is still the old ss val
+
+ grad2 ~ grad2 + epsilon
+ grad2 ~ um2 / grad2
+ grad2 ~ grad2 *@ stepi
+
+ // re-use the space newsquares here
+ // the pnt jump from modelmats is modelmats + grad2
+ // println("the grad in the new to prev " + grad2)
+ // println(" the newsquares: " + newsquares(i))
+ // println("the stepi " + stepi)
+ newsquares(i) <-- mats_old(i) + grad2
+ newsquares(i) ~ newsquares(i) - mats_new(i)
+ loglik_prev_to_new += (-1.0*sum(sum(newsquares(i) *@ newsquares(i))) / 2.0 / (stepi*2)).dv
+
+ }
+
+ // then jump from the new mats to the old ones
+ // copy the data to the models
+ for (i <- 0 until mats_new.length) {
+ model.modelmats(i) <-- mats_new(i)
+ }
+
+ // eval the new data
+ model.dobatch(gmats, ipass, pos)
+ updatemats = model.updatemats
+ loss_mat_prev = model.evalbatch(gmats, ipass, pos) // re-use the old reference here
+ val loss_new = (sum(loss_mat_prev)).dv
+
+ // compute the new scaled gradient
+ for (i <- 0 until updatemats.length) {
+ // clip
+ if (cp > 0f) {
+ min(updatemats(i), clipByValue,updatemats(i))
+ max(updatemats(i),-clipByValue,updatemats(i))
+ }
+
+ // compute the ss
+ val ss2 = sumSq_tmp_container(i)
+ val um2 = updatemats(i)
+ newsquares(i) <-- um2 *@ um2 // it's OK to reuse the newsquares
+
+ ss2 ~ ss2 *@ (step - 1)
+ ss2 ~ ss2 + newsquares(i)
+ ss2 ~ ss2 / step
+ val grad2 = ss2 ^ ve
+
+ // de-affect the ss2
+ ss2 ~ ss2 *@ step
+ ss2 ~ ss2 - newsquares(i)
+ if (step.dv > 1) {
+ ss2 ~ ss2 / (step - 1)
+ }
+
+
+ grad2 ~ grad2 + epsilon
+ grad2 ~ um2 / grad2
+ grad2 ~ grad2 *@ stepi
+
+ // re-use the space newsquares here
+ // the pnt jump from candidate is candidate + grad2
+ newsquares(i) <-- mats_new(i) + grad2
+ newsquares(i) ~ newsquares(i) - mats_old(i)
+ loglik_new_to_prev += (-1.0*sum(sum(newsquares(i) *@ newsquares(i))) / 2.0 / (stepi*2)).dv
+ }
+
+ val delta = (loss_new) - (loss_prev) + loglik_new_to_prev - loglik_prev_to_new
+
+ if (java.lang.Double.isNaN(delta)) {
+ println ("delta:" + delta + " loss_new:" + loss_new + " loss_prev:" + loss_prev + " loglik_new_to_prev:" + loglik_new_to_prev + " loglik_prev_to_new:" + loglik_prev_to_new)
+ throw new RuntimeException("Delta")
+ }
+ delta
+
+ }
+
+}
+
+
+// the stochastic gradient hamiltonian monte carlo updater
+class SGHMC_proposer (val lr:Float, val a:Float, val t:Float, val v:Float, val cp:Float, val k:Float, val batchSize:Float, val model:Model) extends Proposer() {
+
+ var step:Mat = null // record the step by itself
+ var candidate:Array[Mat] = null
+ var stepi:Mat = null
+ var is_estimte_sd:Boolean = true
+ var alpha:Mat = null
+ var v_old:Array[Mat] = null // the v in the paper
+ var sumSq:Array[Mat] = null // container for g*g
+ var lrate:Mat = null
+ var te:Mat = null
+ var ve:Mat = null
+ var noise_matrix:Array[Mat] = null // contain the v_new
+ var epsilon:Float = 1e-5f
+ var initsumsq = 1e-5f
+ var clipByValue:Mat = null
+ var newsquares:Array[Mat] = null
+ var estimated_v:Mat = null
+ var kir:Mat = null
+ var m:Int = 1
+ var adj_alpha:Mat = null
+ var t_init:Mat = null
+
+ override var has_help_mats:Boolean = true
+
+
+ override def init():Unit = {
+ // init the container here
+
+ candidate = new Array[Mat](model.modelmats.length)
+ sumSq = new Array[Mat](model.modelmats.length)
+ newsquares = new Array[Mat](model.modelmats.length)
+
+ stepi = model.modelmats(0).zeros(1,1)
+ step = model.modelmats(0).ones(1,1)
+
+ te = model.modelmats(0).zeros(1,1)
+ te(0,0) = t
+ ve = model.modelmats(0).zeros(1,1)
+ ve(0,0) = v
+ lrate = model.modelmats(0).zeros(1,1)
+ lrate(0,0) = lr
+ v_old = new Array[Mat](model.modelmats.length)
+ noise_matrix = new Array[Mat](model.modelmats.length)
+ alpha = model.modelmats(0).zeros(1,1)
+ alpha(0,0) = a
+
+ estimated_v = model.modelmats(0).zeros(1,1)
+
+ kir = model.modelmats(0).zeros(1,1)
+ kir(0,0) = k
+
+ t_init = model.modelmats(0).ones(1,1)
+ t_init(0,0) = 1000.0f
+
+ adj_alpha = model.modelmats(0).zeros(1,1)
+
+ if (cp > 0) {
+ clipByValue = model.modelmats(0).zeros(1,1)
+ clipByValue(0,0) = cp
+ }
+ for (i <- 0 until candidate.length) {
+ candidate(i) = model.modelmats(i).zeros(model.modelmats(i).nrows, model.modelmats(i).ncols)
+ sumSq(i) = model.modelmats(i).ones(model.modelmats(i).nrows, model.modelmats(i).ncols) *@ initsumsq
+ newsquares(i) = model.modelmats(i).zeros(model.modelmats(i).nrows, model.modelmats(i).ncols)
+ v_old(i) = model.modelmats(i).zeros(model.modelmats(i).nrows, model.modelmats(i).ncols)
+ noise_matrix(i) = model.modelmats(i).zeros(model.modelmats(i).nrows, model.modelmats(i).ncols)
+ }
+ println("finish init the proposer")
+ println("step: " + step + ", stepi" + stepi + ", te: " + te + ", ve: " + ve +", lrate: " + lrate)
+ }
+
+
+ override def changeToUpdateState():Unit = {
+ is_estimte_sd = false
+ }
+
+ override def changeToEstimateSdState():Unit = {
+ is_estimte_sd = true
+ }
+
+ // notice, the gradient computed by system is for max the objective...
+ override def proposeNext(modelmats:Array[Mat], prev_v:Array[Mat], gmats:Array[Mat], ipass:Int, pos:Long):(Array[Mat], Array[Mat], Double) = {
+
+ // compute the new v
+
+ // copy the modelmats to the model
+ for (i <- 0 until modelmats.length) {
+ model.modelmats(i) <-- modelmats(i)
+ }
+
+ stepi <-- lrate / (step ^ te)
+
+ // resample the v_old
+ for (i <- 0 until v_old.length) {
+ // normrnd(0, (stepi^0.5).dv, v_old(i))
+
+ if (step.dv < -1.0) {
+ normrnd(0, (stepi^0.5).dv, v_old(i))
+ } else {
+ v_old(i) <-- prev_v(i)
+ }
+ // normrnd(0, (stepi^0.5).dv, v_old(i))
+ }
+
+
+ // copy the modelmats to candidates
+ for (i <- 0 until modelmats.length) {
+ candidate(i) <-- modelmats(i)
+ }
+ // do update for m steps
+ for (j <- 0 until m) {
+ for (i <- 0 until modelmats.length) {
+ candidate(i) <-- candidate(i) + v_old(i)
+ model.modelmats(i) <-- candidate(i)
+ }
+
+ model.dobatch(gmats, ipass, pos)
+
+ for (i <- 0 until candidate.length) {
+ // clip
+ if (cp > 0f) {
+ min(model.updatemats(i), clipByValue, model.updatemats(i))
+ max(model.updatemats(i),-clipByValue, model.updatemats(i))
+ }
+
+ // compute the ss
+ val ss = sumSq(i)
+ // since the gradient is the revise of the max for min problem
+ val um = model.updatemats(i)
+ newsquares(i) <-- um *@ um
+
+ ss ~ ss *@ (step - 1)
+ ss ~ ss + newsquares(i)
+ ss ~ ss / step
+ val grad = ss ^ ve
+
+ grad ~ grad + epsilon
+ grad ~ um / grad
+
+ // estimate beta
+ estimated_v ~ estimated_v *@ (1 - kir)
+ estimated_v <-- estimated_v + sum(sum(grad *@ grad)) *@ kir / batchSize * 1000000 / grad.length
+ // var tmp = 1 / batchSize * 1000000 / grad.length
+ // println(tmp)
+ // just add by my understanding not sure right
+ // estimated_v <-- estimated_v / grad.length
+
+ // just debug
+ // println("estimated_v: " + estimated_v)
+
+ adj_alpha <-- alpha
+
+
+ if ((estimated_v*stepi/2.0).dv > alpha.dv) {
+ adj_alpha = (estimated_v*stepi/2.0) + 1e-6f
+ // println ("alpha change to be " + adj_alpha)
+ }
+ if (adj_alpha.dv > 0.2) {
+ adj_alpha <-- alpha
+ }
+
+
+
+ grad ~ grad *@ stepi
+
+ // put the val into the container
+ v_old(i) <-- (1.0-adj_alpha) *@ v_old(i) + grad
+ // add the random noise
+ val est_var = 2*(adj_alpha - estimated_v*stepi / 2.0) * stepi
+ // println("the est var is " + estimated_v +" ,the var is " + est_var)
+ if (est_var.dv < 0) {
+ // println("the est var is " + estimated_v +" ,the var is " + est_var)
+ est_var(0,0) = 1e-5f
+ }
+
+ normrnd(0, (est_var^0.5).dv, noise_matrix(i))
+ v_old(i) <-- v_old(i) + noise_matrix(i)
+ // println("the inserted noise is " + (est_var^0.5) + ", and " + ((stepi * 0.001)^0.5) )
+ /**
+ // insert more noise?
+ normrnd(0, ((stepi * 0.00001)^0.5).dv, noise_matrix(i))
+ v_old(i) <-- v_old(i) + noise_matrix(i)
+ **/
+ }
+
+ }
+
+
+ // compute the delta here
+ // place the modelmats by the proposed one
+ /**
+ for (i <- 0 until candidate.length) {
+ model.modelmats(i) <-- candidate(i)
+ }
+ val score_new = -1.0 * sum(model.evalbatch(gmats, ipass, pos))
+
+ var enery_new = v_old(0).zeros(1,1)
+ for (i <- 0 until candidate.length) {
+ enery_new <-- enery_new + sum(sum(v_old(i) *@ v_old(i)))
+ }
+ enery_new ~ enery_new / 2 / stepi
+ // println ("score_old: " + score_old + ", score_new: " + score_new + ", enery_new:" + enery_new + ", enery_old:"+enery_old)
+ val delta = score_old + enery_old - score_new - enery_new
+ **/
+ // println ("the delta is " + delta)
+ // incremental the count
+ val delta = computeDelta(candidate, modelmats, v_old, prev_v, gmats, ipass, pos)
+ if (!is_estimte_sd) {
+ step ~ step + 1.0f
+ }
+ if (java.lang.Double.isNaN(delta.dv)) {
+ throw new RuntimeException("Delta for proposer")
+ }
+ (candidate, v_old, delta)
+ }
+
+
+ override def computeDelta(mats_new:Array[Mat], mats_old:Array[Mat], new_v:Array[Mat], prev_v:Array[Mat], gmats:Array[Mat], ipass:Int, pos:Long): Double ={
+
+ // compute the temperature
+ val t_i = t_init / step ^(0.5)
+ if (t_i.dv <= 1.0f) {
+ t_i(0,0) = 1.0f
+ }
+ // val t_i = t_init
+
+ for (i <- 0 until mats_old.length) {
+ model.modelmats(i) <-- mats_old(i)
+ }
+ val score_old = -1.0 *sum(model.evalbatch(gmats, ipass, pos)) / t_i
+ var enery_old = prev_v(0).zeros(1,1)
+ for (i <- 0 until prev_v.length) {
+ enery_old <-- enery_old + sum(sum(prev_v(i) *@ prev_v(i)))
+ }
+ enery_old ~ enery_old / 2 / stepi
+
+
+ for (i <- 0 until mats_new.length) {
+ model.modelmats(i) <-- mats_new(i)
+ }
+ val score_new = -1.0 *sum(model.evalbatch(gmats, ipass, pos)) / t_i
+
+ var enery_new = v_old(0).zeros(1,1)
+ for (i <- 0 until candidate.length) {
+ enery_new <-- enery_new + sum(sum(v_old(i) *@ v_old(i)))
+ }
+ enery_new ~ enery_new / 2 / stepi
+ // println ("score_old: " + score_old + ", score_new: " + score_new + ", enery_new:" + enery_new + ", enery_old:"+enery_old)
+ val delta = score_old + enery_old - score_new - enery_new
+ if (java.lang.Double.isNaN(delta.dv)) {
+ throw new RuntimeException("Delta for proposer")
+ }
+ delta.dv
+ }
+}
+
+
+class Gradient_descent_proposer (val lr:Float, val u:Float, val t:Float, val v:Float, val cp:Float, val model:Model) extends Proposer() {
+ var step:Mat = null // record the step by itself
+ var candidate:Array[Mat] = null
+ var stepi:Mat = null
+ var is_estimte_sd = true
+ var mu:Mat = null
+ var momentum:Array[Mat] = null
+ var sumSq:Array[Mat] = null // container for g*g
+ var lrate:Mat = null
+ var te:Mat = null
+ var ve:Mat = null
+ var hasmomentum:Boolean = true
+ var updatemats:Array[Mat] = null // just a reference
+ var epsilon:Float = 1e-5f
+ var initsumsq = 1e-5f
+ var clipByValue:Mat = null
+ var newsquares:Array[Mat] = null
+ override var has_help_mats:Boolean = false
+
+
+ override def init():Unit = {
+ // init the container here
+ hasmomentum = (u > 0)
+
+ candidate = new Array[Mat](model.modelmats.length)
+ sumSq = new Array[Mat](model.modelmats.length)
+ newsquares = new Array[Mat](model.modelmats.length)
+
+ stepi = model.modelmats(0).zeros(1,1)
+ step = model.modelmats(0).ones(1,1)
+
+ te = model.modelmats(0).zeros(1,1)
+ te(0,0) = t
+ ve = model.modelmats(0).zeros(1,1)
+ ve(0,0) = v
+ lrate = model.modelmats(0).zeros(1,1)
+ lrate(0,0) = lr
+ if (hasmomentum) {
+ momentum = new Array[Mat](model.modelmats.length)
+ mu = model.modelmats(0).zeros(1,1)
+ mu(0,0) = u
+ }
+
+ if (cp > 0) {
+ clipByValue = model.modelmats(0).zeros(1,1)
+ clipByValue(0,0) = cp
+ }
+ for (i <- 0 until candidate.length) {
+ candidate(i) = model.modelmats(i).zeros(model.modelmats(i).nrows, model.modelmats(i).ncols)
+ sumSq(i) = model.modelmats(i).ones(model.modelmats(i).nrows, model.modelmats(i).ncols) *@ initsumsq
+ newsquares(i) = model.modelmats(i).zeros(model.modelmats(i).nrows, model.modelmats(i).ncols)
+
+ if (hasmomentum) {
+ momentum(i) = model.modelmats(i).zeros(model.modelmats(i).nrows, model.modelmats(i).ncols)
+ }
+ }
+ println("finish init the proposer")
+ println("step: " + step + ", stepi" + stepi + ", te: " + te + ", ve: " + ve +", lrate: " + lrate)
+ }
+
+ override def proposeNext(modelmats:Array[Mat], prev_v:Array[Mat], gmats:Array[Mat], ipass:Int, pos:Long):(Array[Mat], Array[Mat], Double) = {
+ // just do the one step gradient descent
+ if (!is_estimte_sd) {
+
+ for (i <- 0 until modelmats.length) {
+ model.modelmats(i) <-- modelmats(i)
+ }
+ // compute the gradient
+ model.dobatch(gmats, ipass, pos)
+ updatemats = model.updatemats
+
+ // sample the new model parameters by the gradient and the stepsize
+ // and store the sample results into the candidate array
+ stepi <-- lrate / (step ^ te)
+ for (i <- 0 until candidate.length) {
+ // clip
+ if (cp > 0f) {
+ min(updatemats(i), clipByValue,updatemats(i))
+ max(updatemats(i),-clipByValue,updatemats(i))
+ }
+
+ // compute the ss
+ val ss = sumSq(i)
+ val um = updatemats(i)
+ newsquares(i) <-- um *@ um
+
+ ss ~ ss *@ (step - 1)
+ ss ~ ss + newsquares(i)
+ ss ~ ss / step
+ val grad = ss ^ ve
+
+ grad ~ grad + epsilon
+ grad ~ um / grad
+ grad ~ grad *@ stepi
+ if (hasmomentum) {
+ grad ~ grad + momentum(i)
+ momentum(i) ~ grad *@ mu
+ }
+
+ candidate(i) <-- modelmats(i) + grad
+ }
+ step ~ step + 1.0f
+ }
+ // for delta, we just return a very large value
+ (candidate, null, 1000000.0)
+ }
+
+ override def changeToUpdateState():Unit = {
+ is_estimte_sd = false
+ }
+
+ override def changeToEstimateSdState():Unit = {
+ is_estimte_sd = true
+ }
+
+ override def computeDelta(mats_new:Array[Mat], mats_old:Array[Mat], new_v:Array[Mat], prev_v:Array[Mat], gmats:Array[Mat], ipass:Int, pos:Long): Double ={
+ 100.0
+ }
+}
+
+// Class of the emprical cdf of X_corr, there should be three
+// matrix to hold the data computed from the matlab
+// there are pre-computed txt file at /data/EcdfForMHtest
+
+class Ecdf(val ecdfmat:FMat, val varvect:FMat) {
+ var sd = 1.0f
+ var f:FMat = null
+ var x:FMat = null
+
+ def init() = {
+ // read the x
+ x = ecdfmat(0, ?)
+ updateSd(1.0)
+ }
+
+ def generateXcorr = {
+ var u:Float = rand(1,1)(0,0)
+ // println ("u is " + u)
+ val index = binarySearch(u, f)
+ // println ("f is " + f)
+ // println ("index is "+ index)
+ x(0, index)
+ }
+
+ def updateSd (inputsd:Double):Unit = {
+ sd = inputsd.toFloat
+ if (sd > 1.2f) {
+ throw new RuntimeException("Too large sd of Delta'")
+ }
+ // update the f
+ // looking for the closest index in the hash
+ val index = binarySearch(sd, varvect)
+ f = ecdfmat(index+1, ?)
+ }
+
+ // return the closest index in xarray for u
+ def binarySearch(u:Float, xarray:FMat) : Int = {
+ var start : Int = 0
+ var end : Int = xarray.ncols - 1
+ var mid : Int = 0
+ // println ("mid: "+ mid + " ,start: " + start + " ,end " + end)
+ while (end > start + 1) {
+ // println ("mid: "+ mid + " ,start: " + start + " ,end " + end)
+ mid = (start + end) / 2
+ if (u < xarray(0, mid)) {
+ end = mid
+ } else if (u > xarray(0, mid)) {
+ start = mid
+ } else {
+ return mid
+ }
+ }
+ // (x(start) + x(end))/2 * sd
+ start
+ }
+}
diff --git a/src/main/scala/BIDMach/models/Model.scala b/src/main/scala/BIDMach/models/Model.scala
index 031be81b..ad1f755b 100755
--- a/src/main/scala/BIDMach/models/Model.scala
+++ b/src/main/scala/BIDMach/models/Model.scala
@@ -15,13 +15,13 @@ import scala.collection.mutable.ListBuffer
abstract class Model(val opts:Model.Opts = new Model.Options) extends Serializable {
- var datasource:DataSource = null;
+ var datasource:DataSource = null
- var datasink:DataSink = null;
+ var datasink:DataSink = null
- var _modelmats:Array[Mat] = null;
+ var _modelmats:Array[Mat] = null
- var parent_model:Model = null;
+ var parent_model:Model = null
def modelmats:Array[Mat] = {
if (_modelmats != null) {
@@ -34,71 +34,71 @@ abstract class Model(val opts:Model.Opts = new Model.Options) extends Serializab
}
def setmodelmats(a:Array[Mat]) = {
- _modelmats = a;
+ _modelmats = a
}
- var updatemats:Array[Mat] = null;
+ var updatemats:Array[Mat] = null
// For Allreduce: the local indices
- var indexmat:Mat = null;
+ var indexmat:Mat = null
// For Allreduce: cached local matrices:
- var sendmat:Mat = null;
+ var sendmat:Mat = null
- var recvmat:Mat = null;
+ var recvmat:Mat = null
- var mats:Array[Mat] = null;
+ var mats:Array[Mat] = null
- var gmats:Array[Mat] = null;
+ var gmats:Array[Mat] = null
- var omats:Array[Mat] = null;
+ var omats:Array[Mat] = null
- var ogmats:Array[Mat] = null;
+ var ogmats:Array[Mat] = null
- var useGPU = false;
+ var useGPU = false
- var useDouble = false;
+ var useDouble = false
- var putBack = -1;
+ var putBack = -1
- var refresh = true;
+ var refresh = true
- var runtimes:FMat = null;
+ var runtimes:FMat = null
def mergeModelFn(models:Array[Model], mm:Array[Mat], um:Array[Mat], istep:Long):Unit = {
- val mlen = models(0).modelmats.length;
- val thisGPU = getGPU;
+ val mlen = models(0).modelmats.length
+ val thisGPU = getGPU
for (j <- 0 until mlen) {
mm(j).clear
for (i <- 0 until models.length) {
- if (useGPU && i < Mat.hasCUDA) setGPU(i);
- um(j) <-- models(i).modelmats(j);
- mm(j) ~ mm(j) + um(j);
+ if (useGPU && i < Mat.hasCUDA) setGPU(i)
+ um(j) <-- models(i).modelmats(j)
+ mm(j) ~ mm(j) + um(j)
}
- mm(j) ~ mm(j) * (1f/models.length);
+ mm(j) ~ mm(j) * (1f/models.length)
for (i <- 0 until models.length) {
- models(i).modelmats(j) <-- mm(j);
- }
+ models(i).modelmats(j) <-- mm(j)
+ }
}
- setGPU(thisGPU);
+ setGPU(thisGPU)
}
def mergeModelPassFn(models:Array[Model], mm:Array[Mat], um:Array[Mat], ipass:Int) {}
def copyTo(mod:Model) = {
- mod.datasource = datasource;
- mod._modelmats = modelmats;
- mod.updatemats = updatemats;
- mod.mats = mats;
- mod.gmats = gmats;
- mod.omats = omats;
- mod.ogmats = ogmats;
+ mod.datasource = datasource
+ mod._modelmats = modelmats
+ mod.updatemats = updatemats
+ mod.mats = mats
+ mod.gmats = gmats
+ mod.omats = omats
+ mod.ogmats = ogmats
}
def copyFrom(mod:Model) = {
- setmodelmats(new Array[Mat](mod.modelmats.length));
+ setmodelmats(new Array[Mat](mod.modelmats.length))
for (i <- 0 until modelmats.length) {
- modelmats(i) = mod.modelmats(i);
+ modelmats(i) = mod.modelmats(i)
}
}
@@ -113,70 +113,70 @@ abstract class Model(val opts:Model.Opts = new Model.Options) extends Serializab
def save(fname:String) = {
import java.io._
- val metadataname = new File(fname+"options.json");
- val parentdir = metadataname.getParentFile();
- if (parentdir != null) parentdir.mkdirs();
- val pw = new PrintWriter(metadataname);
- pw.print(JSON.toJSON(opts, true));
- pw.close;
+ val metadataname = new File(fname+"options.json")
+ val parentdir = metadataname.getParentFile()
+ if (parentdir != null) parentdir.mkdirs()
+ val pw = new PrintWriter(metadataname)
+ pw.print(JSON.toJSON(opts, true))
+ pw.close
val out = new FileOutputStream(fname+"options.ser")
- val output = new ObjectOutputStream(out);
- output.writeObject(opts);
- output.close;
+ val output = new ObjectOutputStream(out)
+ output.writeObject(opts)
+ output.close
for (i <- 0 until modelmats.length) {
- val mat = modelmats(i);
- val f = new File(fname+"modelmat%02d.lz4" format i);
- saveMat(fname+"modelmat%02d.lz4" format i, cpu(mat));
+ val mat = modelmats(i)
+ val f = new File(fname+"modelmat%02d.lz4" format i)
+ saveMat(fname+"modelmat%02d.lz4" format i, cpu(mat))
}
- saveMetaData(fname);
+ saveMetaData(fname)
}
def load(fname:String) = {
- import java.io._
+ import java.io._
import BIDMat.JSON
if (modelmats != null && modelmats.length > 0) {
- for (i <- 0 until modelmats.length) {
- modelmats(i) = loadMat(fname+"modelmat%02d.lz4" format i);
- }
+ for (i <- 0 until modelmats.length) {
+ modelmats(i) = loadMat(fname+"modelmat%02d.lz4" format i)
+ }
} else {
- var n = 0;
- var mlist = new ListBuffer[Mat]();
+ var n = 0
+ var mlist = new ListBuffer[Mat]()
while ((new File(fname+"modelmat%02d.lz4" format n)).exists) {
- mlist += loadMat(fname+"modelmat%02d.lz4" format n);
- n += 1;
+ mlist += loadMat(fname+"modelmat%02d.lz4" format n)
+ n += 1
}
- setmodelmats(mlist.toArray);
+ setmodelmats(mlist.toArray)
}
- if (new File(fname+"options.ser").exists) {
- val in = new FileInputStream(fname+"options.ser");
- val input = new ObjectInputStream(in);
- val newopts = input.readObject.asInstanceOf[Model.Opts];
- input.close;
- /* val fr = new BufferedReader(new FileReader(fname+"options.json"));
- val strbuf = new StringBuffer;
- var line:String = null;
+ if (new File(fname+"options.ser").exists) {
+ val in = new FileInputStream(fname+"options.ser")
+ val input = new ObjectInputStream(in)
+ val newopts = input.readObject.asInstanceOf[Model.Opts]
+ input.close
+ /* val fr = new BufferedReader(new FileReader(fname+"options.json"))
+ val strbuf = new StringBuffer
+ var line:String = null
while ({line = fr.readLine(); line != null}) {
- strbuf.append(line).append("\n");
+ strbuf.append(line).append("\n")
}
val newopts = JSON.fromJSON(strbuf.toString).asInstanceOf[Model.Opts]; */
- opts.copyFrom(newopts);
- }
+ opts.copyFrom(newopts)
+ }
}
def bind(ds:DataSource):Unit = {
- datasource = ds;
- mats = datasource.next;
- datasource.reset;
- putBack = datasource.opts.putBack;
- useGPU = opts.useGPU && Mat.hasCUDA > 0;
- useDouble = opts.useDouble;
- gmats = new Array[Mat](mats.length);
+ datasource = ds
+ mats = datasource.next
+ datasource.reset
+ putBack = datasource.opts.putBack
+ useGPU = opts.useGPU && Mat.hasCUDA > 0
+ useDouble = opts.useDouble
+ gmats = new Array[Mat](mats.length)
}
def bind(ds:DataSink):Unit = {
- datasink = ds;
- omats = datasink.omats;
- ogmats = new Array[Mat](omats.length);
+ datasink = ds
+ omats = datasink.omats
+ ogmats = new Array[Mat](omats.length)
}
def init():Unit
@@ -187,7 +187,7 @@ abstract class Model(val opts:Model.Opts = new Model.Options) extends Serializab
def logging(gmats:Array[Mat],ipass:Int, here:Long) = {
if (opts.logFuncs!=null){
- val res = opts.logFuncs.map(f=>f(this,gmats));
+ val res = opts.logFuncs.map(f=>f(this,gmats))
if (opts.logDataSink != null){
opts.logDataSink.omats = res.flatten
opts.logDataSink.setnmats(res.length)
@@ -197,9 +197,9 @@ abstract class Model(val opts:Model.Opts = new Model.Options) extends Serializab
}
def dobatchg(amats:Array[Mat], ipass:Int, here:Long) = {
- copyMats(amats, gmats);
- dobatch(gmats, ipass, here);
- logging(gmats, ipass, here);
+ copyMats(amats, gmats);
+ dobatch(gmats, ipass, here)
+ logging(gmats, ipass, here)
}
def evalbatchg(amats:Array[Mat], ipass:Int, here:Long):FMat = {
@@ -207,88 +207,88 @@ abstract class Model(val opts:Model.Opts = new Model.Options) extends Serializab
val v = evalbatch(gmats, ipass, here)
if (omats != null) {
for (i <- 0 until omats.length) {
- omats(i) = cpu(ogmats(i));
+ omats(i) = cpu(ogmats(i))
}
}
- v
+ v
}
def snapshot(len:Int, avg:Boolean) = {
- val len0 = math.min(len, modelmats(0).ncols);
- modelmats(0).synchronized {
- sendmat = cpu(modelmats(0).colslice(0, len0));
- }
- if (avg) {
- sendmat = ones(1, len0) on sendmat;
- }
+ val len0 = math.min(len, modelmats(0).ncols)
+ modelmats(0).synchronized {
+ sendmat = cpu(modelmats(0).colslice(0, len0))
+ }
+ if (avg) {
+ sendmat = ones(1, len0) on sendmat
+ }
}
def addStep(len:Int, avg:Boolean) = {
- val len0 = math.min(len, modelmats(0).ncols);
- if (avg) recvmat = recvmat / max(recvmat(0,?), 1f);
- recvmat = recvmat - sendmat;
- val nr = modelmats(0).nrows;
- modelmats(0).synchronized {
- val head = modelmats(0).view(nr, len0);
- val chead = sendmat.view(nr, len0);
- chead <-- head;
- chead ~ chead + (if (avg) recvmat(1 -> (nr+1), ?) else recvmat);
- head <-- chead;
- }
+ val len0 = math.min(len, modelmats(0).ncols)
+ if (avg) recvmat = recvmat / max(recvmat(0,?), 1f)
+ recvmat = recvmat - sendmat
+ val nr = modelmats(0).nrows
+ modelmats(0).synchronized {
+ val head = modelmats(0).view(nr, len0)
+ val chead = sendmat.view(nr, len0)
+ chead <-- head
+ chead ~ chead + (if (avg) recvmat(1 -> (nr+1), ?) else recvmat)
+ head <-- chead
+ }
}
def elasticStep(len:Int, avg:Boolean, ee:Float) = {
- val len0 = math.min(len, modelmats(0).ncols);
- if (avg) recvmat = recvmat / max(recvmat(0,?), 1f);
- recvmat = recvmat - sendmat;
- val nr = modelmats(0).nrows;
- modelmats(0).synchronized {
- val head = modelmats(0).view(nr, len0);
- val chead = sendmat.view(nr, len0);
- chead <-- head;
- chead ~ chead * (1 - ee) + (if (avg) recvmat(1 -> (nr+1), ?) else recvmat) * ee;
- head <-- chead;
- }
+ val len0 = math.min(len, modelmats(0).ncols)
+ if (avg) recvmat = recvmat / max(recvmat(0,?), 1f)
+ recvmat = recvmat - sendmat
+ val nr = modelmats(0).nrows
+ modelmats(0).synchronized {
+ val head = modelmats(0).view(nr, len0)
+ val chead = sendmat.view(nr, len0)
+ chead <-- head
+ chead ~ chead * (1 - ee) + (if (avg) recvmat(1 -> (nr+1), ?) else recvmat) * ee
+ head <-- chead
+ }
}
def copyMats(from:Array[Mat], to:Array[Mat]) = {
for (i <- 0 until from.length) {
if (useGPU) {
if (useDouble) {
- to(i) = from(i) match {
- case aa:FMat => GDMat(aa)
- case aa:IMat => GIMat(aa)
- case aa:DMat => GDMat(aa)
- case aa:SMat => GSDMat(aa)
- case aa:GDMat => aa
- case aa:GMat => GDMat(aa)
- }
+ to(i) = from(i) match {
+ case aa:FMat => GDMat(aa)
+ case aa:IMat => GIMat(aa)
+ case aa:DMat => GDMat(aa)
+ case aa:SMat => GSDMat(aa)
+ case aa:GDMat => aa
+ case aa:GMat => GDMat(aa)
+ }
} else {
- to(i) = from(i) match {
- case aa:FMat => GMat(aa)
- case aa:DMat => GMat(aa)
- case aa:IMat => GIMat(aa)
- case aa:SMat => GSMat(aa)
- case aa:GMat => aa
- case aa:GDMat => GMat(aa)
- }
+ to(i) = from(i) match {
+ case aa:FMat => GMat(aa)
+ case aa:DMat => GMat(aa)
+ case aa:IMat => GIMat(aa)
+ case aa:SMat => GSMat(aa)
+ case aa:GMat => aa
+ case aa:GDMat => GMat(aa)
+ }
}
} else {
- if (useDouble) {
- to(i) = from(i) match {
- case aa:FMat => DMat(aa)
- case aa:SMat => SDMat(aa)
- case aa:DMat => aa;
- case aa:SDMat => aa;
- }
- } else {
- to(i) = from(i) match {
- case aa:FMat => aa
- case aa:SMat => aa
- case aa:DMat => FMat(aa);
- case aa:SDMat => SMat(aa);
- }
- }
+ if (useDouble) {
+ to(i) = from(i) match {
+ case aa:FMat => DMat(aa)
+ case aa:SMat => SDMat(aa)
+ case aa:DMat => aa
+ case aa:SDMat => aa
+ }
+ } else {
+ to(i) = from(i) match {
+ case aa:FMat => aa
+ case aa:SMat => aa
+ case aa:DMat => FMat(aa)
+ case aa:SDMat => SMat(aa)
+ }
+ }
}
}
}
@@ -296,11 +296,11 @@ abstract class Model(val opts:Model.Opts = new Model.Options) extends Serializab
def updatePass(ipass:Int) = {}
def convertMat(a:Mat):Mat = {
- Model.convertMat(a, useGPU, opts.useDouble).asInstanceOf[Mat];
+ Model.convertMat(a, useGPU, opts.useDouble).asInstanceOf[Mat]
}
def convertMat(a:ND):ND = {
- Model.convertMat(a, useGPU, opts.useDouble);
+ Model.convertMat(a, useGPU, opts.useDouble)
}
def combineModels(ipass:Int, model: Model):Model = this
@@ -308,94 +308,94 @@ abstract class Model(val opts:Model.Opts = new Model.Options) extends Serializab
object Model {
- trait Opts extends BIDMat.Opts{
- var nzPerColumn:Int = 0;
- var startBlock = 8000;
- var useGPU = true;
- var useDouble = false;
- var doubleScore = false;
- var doVariance = false;
- var dim = 256;
- var debug = 0;
- var doAllReduce = false;
- var logFuncs : Array[(Model,Array[Mat]) => Array[Mat]] = null;
- var logDataSink : DataSink = null;
+ trait Opts extends BIDMat.Opts{
+ var nzPerColumn:Int = 0
+ var startBlock = 8000
+ var useGPU = true
+ var useDouble = false
+ var doubleScore = false
+ var doVariance = false
+ var dim = 256
+ var debug = 0
+ var doAllReduce = false
+ var logFuncs : Array[(Model,Array[Mat]) => Array[Mat]] = null
+ var logDataSink : DataSink = null
}
- class Options extends Opts {}
+ class Options extends Opts {}
- def convertMat(a:ND, useGPU:Boolean, useDouble:Boolean):ND = {
- a match {
+ def convertMat(a:ND, useGPU:Boolean, useDouble:Boolean):ND = {
+ a match {
case f:FMat =>
if (useGPU) {
- if (useDouble) {
- GDMat(f);
- } else {
- GMat(f);
- }
+ if (useDouble) {
+ GDMat(f)
+ } else {
+ GMat(f)
+ }
} else {
- if (useDouble) {
- DMat(f);
- } else {
- f
- }
+ if (useDouble) {
+ DMat(f)
+ } else {
+ f
+ }
}
case i:IMat =>
if (useGPU) {
- GIMat(i);
+ GIMat(i)
} else {
- i;
+ i
}
case g:GMat => if (useGPU) {
- if (useDouble) {
- GDMat(g);
- } else {
- g
- }
+ if (useDouble) {
+ GDMat(g)
+ } else {
+ g
+ }
} else {
- if (useDouble) {
- DMat(FMat(g));
- } else {
- FMat(g);
- }
+ if (useDouble) {
+ DMat(FMat(g))
+ } else {
+ FMat(g)
+ }
}
case g:GDMat => if (useGPU) {
- if (useDouble) {
- g;
- } else {
- GMat(g)
- }
+ if (useDouble) {
+ g
+ } else {
+ GMat(g)
+ }
} else {
- if (useDouble) {
- DMat(g);
- } else {
- FMat(g);
- }
+ if (useDouble) {
+ DMat(g)
+ } else {
+ FMat(g)
+ }
}
case g:GSMat => if (useGPU) {
- if (useDouble) {
- GSDMat(g);
- } else {
- g;
- }
+ if (useDouble) {
+ GSDMat(g)
+ } else {
+ g
+ }
} else {
- if (useDouble) {
- SDMat(SMat(g));
- } else {
- SMat(g);
- }
+ if (useDouble) {
+ SDMat(SMat(g))
+ } else {
+ SMat(g)
+ }
}
case g:FND => if (useGPU) {
- GND(g);
+ GND(g)
} else {
- g
+ g
}
case g:GND => if (useGPU) {
- g
+ g
} else {
- FND(g)
+ FND(g)
}
- case tt:TMat => new TMat(tt.nrows, tt.ncols, tt.y, tt.x, tt.tiles.map(convertMat(_, useGPU, useDouble).asInstanceOf[Mat]));
+ case tt:TMat => new TMat(tt.nrows, tt.ncols, tt.y, tt.x, tt.tiles.map(convertMat(_, useGPU, useDouble).asInstanceOf[Mat]))
}
}
}
diff --git a/src/main/scala/BIDMach/models/NMF.scala b/src/main/scala/BIDMach/models/NMF.scala
index 0e7fcc41..eb7950af 100755
--- a/src/main/scala/BIDMach/models/NMF.scala
+++ b/src/main/scala/BIDMach/models/NMF.scala
@@ -48,14 +48,14 @@ class NMF(opts:NMF.Opts = new NMF.Options) extends FactorModel(opts) {
var udiag:Mat = null
override def init() = {
- super.init()
- mm = modelmats(0)
- setmodelmats(Array(mm, mm.zeros(mm.nrows, mm.ncols)));
- updatemats = new Array[Mat](2)
+ super.init()
+ mm = modelmats(0)
+ setmodelmats(Array(mm, mm.zeros(mm.nrows, mm.ncols)))
+ updatemats = new Array[Mat](2)
updatemats(0) = mm.zeros(mm.nrows, mm.ncols)
updatemats(1) = mm.zeros(mm.nrows, mm.ncols)
udiag = mkdiag(opts.uprior*ones(opts.dim,1))
- mdiag = mkdiag(opts.mprior*ones(opts.dim,1))
+ mdiag = mkdiag(opts.mprior*ones(opts.dim,1))
if (useGPU) {
udiag = GMat(udiag)
mdiag = GMat(mdiag)
@@ -63,14 +63,14 @@ class NMF(opts:NMF.Opts = new NMF.Options) extends FactorModel(opts) {
}
override def uupdate(sdata:Mat, user:Mat, ipass:Int, pos:Long) = {
- if (putBack < 0 || ipass == 0) user.set(1f)
- val modeldata = mm * sdata
- val mmu = mm *^ mm + udiag
+ if (putBack < 0 || ipass == 0) user.set(1f)
+ val modeldata = mm * sdata
+ val mmu = mm *^ mm + udiag
for (i <- 0 until opts.uiter) {
- val quot = modeldata / (mmu * user)
- min(10.0f, max(0.1f, quot, quot), quot)
- user ~ user ∘ quot
- max(opts.minuser, user, user)
+ val quot = modeldata / (mmu * user)
+ min(10.0f, max(0.1f, quot, quot), quot)
+ user ~ user ∘ quot
+ max(opts.minuser, user, user)
}
}
@@ -88,20 +88,20 @@ class NMF(opts:NMF.Opts = new NMF.Options) extends FactorModel(opts) {
}
override def evalfun(sdata:Mat, user:Mat, ipass:Int, pos:Long):FMat = {
- if (ogmats != null) ogmats(0) = user;
+ if (ogmats != null) ogmats(0) = user
if (opts.doubleScore) {
evalfunx(sdata, user)
} else {
- val modeldata = mm * sdata
- val uu = user *^ user + mdiag *@ (1.0f*size(user,2)/opts.nusers)
- val mmm = mm *^ mm
+ val modeldata = mm * sdata
+ val uu = user *^ user + mdiag *@ (1.0f*size(user,2)/opts.nusers)
+ val mmm = mm *^ mm
- val ll0 = sdata.contents ddot sdata.contents
- val ll1 = modeldata ddot user
- val ll2 = uu ddot mmm
- val v1 = (-ll0 + 2*ll1 - ll2)/sdata.nnz
- val v2 = -opts.uprior*(user ddot user)/sdata.nnz
- row(v1,v2)
+ val ll0 = sdata.contents ddot sdata.contents
+ val ll1 = modeldata ddot user
+ val ll2 = uu ddot mmm
+ val v1 = (-ll0 + 2*ll1 - ll2)/sdata.nnz
+ val v2 = -opts.uprior*(user ddot user)/sdata.nnz
+ row(v1,v2)
}
}
@@ -111,7 +111,7 @@ class NMF(opts:NMF.Opts = new NMF.Options) extends FactorModel(opts) {
val mmf = DMat(mm)
val mdiagf = DMat(mdiag)
- val modeldata = mmf * sdata
+ val modeldata = mmf * sdata
val uu = user *^ user + mdiagf *@ (1.0f*size(user,2)/opts.nusers)
val mmm = mmf *^ mmf
@@ -135,11 +135,11 @@ object NMF {
class Options extends Opts {}
def mkNMFmodel(fopts:Model.Opts) = {
- new NMF(fopts.asInstanceOf[NMF.Opts])
+ new NMF(fopts.asInstanceOf[NMF.Opts])
}
def mkUpdater(nopts:Updater.Opts) = {
- new IncNorm(nopts.asInstanceOf[IncNorm.Opts])
+ new IncNorm(nopts.asInstanceOf[IncNorm.Opts])
}
def learner(mat0:Mat, d:Int = 256) = {
@@ -148,24 +148,24 @@ object NMF {
opts.dim = d
opts.uiter = 2
opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
- val nn = new Learner(
- new MatSource(Array(mat0:Mat), opts),
- new NMF(opts),
- null,
- new IncNorm(opts),
- null,
- opts)
+ val nn = new Learner(
+ new MatSource(Array(mat0:Mat), opts),
+ new NMF(opts),
+ null,
+ new IncNorm(opts),
+ null,
+ opts)
(nn, opts)
}
- class PredOptions extends Learner.Options with NMF.Opts with MatSource.Opts with MatSink.Opts;
+ class PredOptions extends Learner.Options with NMF.Opts with MatSource.Opts with MatSink.Opts
// This function constructs a predictor from an existing model
def predictor(model:Model, mat1:Mat):(Learner, PredOptions) = {
- val nopts = new PredOptions;
+ val nopts = new PredOptions
nopts.batchSize = math.min(10000, mat1.ncols/30 + 1)
- nopts.dim = model.opts.dim;
- val newmod = new NMF(nopts);
+ nopts.dim = model.opts.dim
+ val newmod = new NMF(nopts)
newmod.refresh = false
model.copyTo(newmod)
val nn = new Learner(
@@ -201,13 +201,13 @@ object NMF {
opts.npasses = 4
opts.batchSize = math.min(100000, mat0.ncols/30/opts.nthreads + 1)
opts.coolit = 0 // Assume we dont need cooling on a matrix input
- val nn = new ParLearnerF(
- new MatSource(Array(mat0:Mat), opts),
- opts, mkNMFmodel _,
- null, null,
- opts, mkUpdater _,
- null, null,
- opts)
+ val nn = new ParLearnerF(
+ new MatSource(Array(mat0:Mat), opts),
+ opts, mkNMFmodel _,
+ null, null,
+ opts, mkUpdater _,
+ null, null,
+ opts)
(nn, opts)
}
diff --git a/src/main/scala/BIDMach/models/RandomForest.scala b/src/main/scala/BIDMach/models/RandomForest.scala
index 0dbbdee1..c1aa17b3 100755
--- a/src/main/scala/BIDMach/models/RandomForest.scala
+++ b/src/main/scala/BIDMach/models/RandomForest.scala
@@ -1,1498 +1,1498 @@
-package BIDMach.models
-
-import BIDMat.{SBMat,CMat,CSMat,DMat,Dict,IDict,FMat,GMat,GIMat,GLMat,GSMat,HMat,IMat,LMat,Mat,SMat,SDMat}
-import BIDMach.Learner
-import BIDMach.datasources.{DataSource,MatSource,FileSource,SFileSource}
-import BIDMach.datasinks._
-import BIDMach.updaters.Batch
-import BIDMat.MatFunctions._
-import BIDMat.SciFunctions._
-import edu.berkeley.bid.CUMAT
-import edu.berkeley.bid.CUMACH
-import edu.berkeley.bid.CUMACH
-import jcuda._
-import jcuda.runtime.JCuda._
-import jcuda.runtime.cudaMemcpyKind._
-import scala.util.hashing.MurmurHash3
-import java.util.Arrays
-import scala.concurrent.Future
-import scala.concurrent.ExecutionContextExecutor
-
- /**
- * Random Forests. Given a datasource of data and labels, compute a random classification or regression Forest.
- *
- * * '''Options'''
- - depth(20): Bound on the tree depth, also the number of passes over the dataset.
- - ntrees(20): Number of trees in the Forest.
- - nsamps(32): Number of random features to try to split each node.
- - nnodes(200000): Bound on the size of each tree (number of nodes).
- - nbits(16): Number of bits to use for feature values.
- - gain(0.01f): Lower bound on impurity gain in order to split a node.
- - catsPerSample(1f): Number of cats per sample for multilabel classification.
- - ncats(0): Number of cats or regression values. 0 means guess from datasource.
- - training(true): Run for training (true) or prediction (false)
- - impurity(0): Impurity type, 0=entropy, 1=Gini
- - regression(false): Build a regression Forest (true) or classification Forest (false).
- - seed(1): Random seed for selecting features. Use this to train distinct Forests in multiple runs.
- - useIfeats(false): An internal var, when true use explicit feature indices vs compute them.
- - MAE(true): true=Use Mean Absolute Error when reporting performance vs. false=Mean Squared Error
- - trace(0): level of debugging information to print (0,1,2).
- *
- * NOTE: The algorithm uses a packed representation of the dataset statistics with fixed precision fields.
- * Setting nbits selects how many bits to use from each input data. For integer data, the lower nbits are used.
- * For floating point data, the leading nbits are used. So e.g. 16 float bits gives sign, 8 bits of exponent,
- * and 7 bits of mantissa with a leading 1.
- *
- * The category labels in the cats matrix should be contiguous, non-negative integer labels starting with zero.
- *
- * For regression, discrete (integer) target values should be used in the training data. The output will be continuous
- * values interpolated from them.
- *
- * Other key parameters inherited from the learner, datasource and updater:
- - batchSize(10000): The number of samples processed in a block
- - putBack(-1): Whether to put predictions back into the datasource target. Should be 1 for prediction.
- - useGPU(true): Use GPU acceleration if available
- *
- * '''Example:'''
- *
- * a is an nfeats x ninstances data matrix, c is a 1 x ninstances vector of labels
- * {{{
- * val (nn, opts) = RandomForest.learner(a,c)
- * opts.what // prints the available options
- * opts.depth=25 // Set depth - something like log2(ninstances / 10) is good
- * opts.ntrees=20 // Good starting value. Increasing this usually increases accuracy.
- * opts.nsamps=30 // Typically sqrt(nfeats) is good. Larger values may work better.
- * opts.nnodes // Bounded by 2^depth, but usually smaller than this.
- * opts.ncats=10 // Its a good idea to set this - learner will try to guess it, but may get it wrong
- * opts.nbits=10 // Number of bits to use from input data.
- * nn.train // train the learner.
- * nn.modelmats // get the final model (4 matrices)
- * }}}
- */
-
-
-
-class RandomForest(override val opts:RandomForest.Opts = new RandomForest.Options) extends Model(opts) {
-
- val ITree = 0; val INode = 1; val JFeat = 2; val IFeat = 3; val IVFeat = 4; val ICat = 5
-
- var nnodes = 0;
- var ntrees = 0;
- var nsamps = 0;
- var nfeats = 0;
- var nbits = 0;
- var ncats = 0;
- var seed = 0;
- var batchSize = 0;
- var blockv:SVec = null;
- var gtmpinds:GLMat = null;
- var gpiones:GIMat = null;
- var gtmpcounts:GIMat = null;
- var totals:Array[SVTree] = null;
-// var tt:Array[SVec] = null;
- var nodecounts:IMat = null;
-// var tflags:IMat = null;
- var itrees:IMat = null; // Index of left child (right child is at this value + 1)
- var ftrees:IMat = null; // The feature index for this node
- var vtrees:IMat = null; // The value to compare with for this node
- var ctrees:FMat = null; // Majority class for this node
- var gitrees:GIMat = null; // Index of left child (right child is at this value + 1)
- var gftrees:GIMat = null; // The feature index for this node
- var gvtrees:GIMat = null; // The value to compare with for this node
- var gctrees:GMat = null;
- var gftree:GIMat = null;
- var gitree:GIMat = null;
- var lout:LMat = null;
- var gout:GLMat = null;
- var gtnodes:GIMat = null;
- var gfnodes:GMat = null;
- var outv:IMat = null; // Threshold values returned by minImpurity
- var outf:IMat = null; // Features returned by minImpurity
- var outn:IMat = null; // Node numbers returned by minImpurity
- var outg:FMat = null; // Node impurity gain returned by minImpurity
- var outc:FMat = null; // Category label (or avg) returned by minImpurity
- var outleft:FMat = null; // child categories returned by minImpurity
- var outright:FMat = null;
- var jc:IMat = null;
- var xnodes:IMat = null;
- var ynodes:FMat = null;
- var gains:FMat = null;
- var igains:FMat = null;
- val fieldlengths = izeros(1,6);
- var gfieldlengths:GIMat = null;
- var fieldmasks:Array[Int] = null;
- var fieldshifts:Array[Int] = null;
- var t0 = 0f;
- var t1 = 0f;
- var t2 = 0f;
- var t3 = 0f;
- var t4 = 0f;
- var t5 = 0f;
- var t6 = 0f;
- runtimes = zeros(8,1);
- var x:Mat = null;
- var y:Mat = null;
- var useIfeats = false;
- var lens0 = 0L;
- var lens1 = 0L;
-
- @inline def rhash(v1:Int, v2:Int, v3:Int, nb:Int):Int = {
- math.abs(MurmurHash3.mix(MurmurHash3.mix(v1, v2), v3) % nb)
- }
-
- @inline def rhash(v1:Int, v2:Int, v3:Int, v4:Int, nb:Int):Int = {
- math.abs(MurmurHash3.mix(MurmurHash3.mix(MurmurHash3.mix(v1, v2), v3), v4) % nb)
- }
-
- @inline def packFields(itree:Int, inode:Int, jfeat:Int, ifeat:Int, ivfeat:Int, icat:Int, fieldlengths:Array[Int]):Long = {
- icat.toLong +
- ((ivfeat.toLong +
- ((ifeat.toLong +
- ((jfeat.toLong +
- ((inode.toLong +
- (itree.toLong << fieldlengths(INode))
- ) << fieldlengths(JFeat))
- ) << fieldlengths(IFeat))
- ) << fieldlengths(IVFeat))
- ) << fieldlengths(ICat))
- }
-
- @inline def unpackFields(im:Long, fieldlengths:Array[Int]):(Int, Int, Int, Int, Int, Int) = {
- var v = im;
- val icat = (v & ((1 << fieldlengths(ICat))-1)).toInt;
- v = v >>> fieldlengths(ICat);
- val ivfeat = (v & ((1 << fieldlengths(IVFeat))-1)).toInt;
- v = v >>> fieldlengths(IVFeat);
- val ifeat = (v & ((1 << fieldlengths(IFeat))-1)).toInt;
- v = v >>> fieldlengths(IFeat);
- val jfeat = (v & ((1 << fieldlengths(JFeat))-1)).toInt;
- v = v >>> fieldlengths(JFeat);
- val inode = (v & ((1 << fieldlengths(INode))-1)).toInt;
- v = v >>> fieldlengths(INode);
- val itree = v.toInt;
- (itree, inode, jfeat, ifeat, ivfeat, icat)
- }
-
- @inline def extractAbove(fieldNum : Int, packedFields : Long, fieldshifts:Array[Int]) : Int = {
- (packedFields >>> fieldshifts(fieldNum)).toInt
- }
-
- @inline def extractField(fieldNum : Int, packedFields : Long, fieldshifts:Array[Int], fieldmasks:Array[Int]) : Int = {
- (packedFields >>> fieldshifts(fieldNum)).toInt & fieldmasks(fieldNum)
- }
-
- def init() = {
- mats = datasource.next;
- nfeats = mats(0).nrows;
- val nc = mats(0).ncols;
- batchSize = nc;
- datasource.reset;
- nnodes = opts.nnodes;
- ntrees = opts.ntrees;
- nsamps = opts.nsamps;
- nbits = opts.nbits;
- seed = opts.seed;
- useIfeats = opts.useIfeats;
- lens0 = 0;
- lens1 = 0;
- ncats = if (opts.ncats > 0) opts.ncats else (maxi(mats(1)).dv.toInt + 1);
- fieldlengths(ITree) = RandomForest.countbits(ntrees);
- fieldlengths(INode) = RandomForest.countbits(nnodes);
- fieldlengths(JFeat) = RandomForest.countbits(nsamps);
- fieldlengths(IFeat) = if (useIfeats) RandomForest.countbits(nfeats) else 0;
- fieldlengths(IVFeat) = nbits;
- fieldlengths(ICat) = RandomForest.countbits(ncats);
- fieldmasks = getFieldMasks(fieldlengths);
- fieldshifts = getFieldShifts(fieldlengths);
- if (refresh) {
- if (sum(fieldlengths).v > 63) {
- throw new RuntimeException("RandomForest: Too many bits in treepack! "+ sum(fieldlengths).v);
- }
- opts.asInstanceOf[Learner.Options].npasses = opts.depth; // Make sure we make the correct number of passes
- itrees = izeros(nnodes, ntrees);
- ftrees = izeros(nnodes, ntrees);
- vtrees = izeros(nnodes, ntrees);
- ctrees = zeros(nnodes, ntrees);
- gains = zeros(ntrees,1);
- igains = zeros(ntrees,1);
-// tflags = izeros(ntrees,1);
-// implicit val ec = threadPool(ntrees) // make sure there are enough threads (more than the lookahead count)
-// for (i <- 0 until ntrees) Future {driver_thread(i)(ec)}
- nodecounts = iones(ntrees, 1);
- ctrees.set(-1);
- ctrees(0,?) = 0;
- ftrees.set(-1)
- setmodelmats(Array(itrees, ftrees, vtrees, ctrees));
- // Small buffers hold results of batch treepack and sort
- val bsize = (opts.catsPerSample * batchSize * ntrees * nsamps).toInt;
- totals = new Array[SVTree](ntrees);
- for (i <- 0 until ntrees) totals(i) = new SVTree(20);
-// tt = new Array[SVec](ntrees);
- outv = IMat(nsamps, nnodes);
- outf = IMat(nsamps, nnodes);
- outn = IMat(nsamps, nnodes);
- outg = FMat(nsamps, nnodes);
- outc = FMat(nsamps, nnodes);
- outleft = FMat(nsamps, nnodes);
- outright = FMat(nsamps, nnodes);
- jc = IMat(1, ntrees * nnodes * nsamps);
- lout = LMat(1, batchSize * nsamps * ntrees);
- if (useGPU) {
- gpiones = giones(1, bsize);
- gtmpinds = glzeros(1, bsize);
- gtmpcounts = gizeros(1, bsize);
- gout = GLMat(1, batchSize * nsamps * ntrees);
- }
- }
- itrees = modelmats(0).asInstanceOf[IMat];
- ftrees = modelmats(1).asInstanceOf[IMat];
- vtrees = modelmats(2).asInstanceOf[IMat];
- ctrees = modelmats(3).asInstanceOf[FMat];
- if (useGPU) {
- gfieldlengths = GIMat(fieldlengths);
- gtnodes = GIMat(ntrees, batchSize);
- gfnodes = GMat(ntrees, batchSize);
- gftree = GIMat(nnodes, 1);
- gitree = GIMat(nnodes, 1);
- gitrees = GIMat(itrees);
- gftrees = GIMat(ftrees);
- gvtrees = GIMat(vtrees);
- gctrees = GMat(ctrees);
- }
- }
-
- def dobatch(gmats:Array[Mat], ipass:Int, i:Long) = {
- val data = full(gmats(0));
- val cats = gmats(1);
-// val xcats = IMat(cats);println("trace data %s %f" format (xcats(0,0->10).toString, sum(data(120,?)).dv));
-
- val t0 = toc;
-// var blockv0:SVec = null;
- data match {
- case (fdata:FMat) => {
- val nnodes = if (gmats.length > 2) gmats(2).asInstanceOf[IMat] else izeros(ntrees, data.ncols);
- if (gmats.length > 2) {
- treeStep(fdata, nnodes, null, itrees, ftrees, vtrees, ctrees, false);
- } else {
- treeWalk(fdata, nnodes, null, itrees, ftrees, vtrees, ctrees, ipass, false);
- }
- t1 = toc; runtimes(0) += t1 - t0;
- cats match {
- case (icats:IMat) => {
- lout = treePack(fdata, nnodes, icats, lout, seed);
- }
- case (fcats:FMat) => {
- lout = treePack(fdata, nnodes, fcats, lout, seed);
- }
- }
- t2 = toc; runtimes(1) += t2 - t1;
- java.util.Arrays.sort(lout.data, 0, lout.length);
- Mat.nflops += lout.length * math.log(lout.length).toLong;
- t3 = toc; runtimes(2) += t3 - t2;
- blockv = makeV(lout);
- }
- case (gdata:GMat) => {
- gtreeWalk(gdata, gtnodes, gfnodes, gitrees, gftrees, gvtrees, gctrees, ipass, false);
- t1 = toc; runtimes(0) += t1 - t0;
- cats match {
- case (gicats:GIMat) => {
- gout = gtreePack(gdata, gtnodes, gicats, gout, seed);
- }
- case (gfcats:GMat) => {
- gout = gtreePack(gdata, gtnodes, gfcats, gout, seed);
- }
- }
- t2 = toc; runtimes(1) += t2 - t1;
- gpsort(gout);
- t3 = toc; runtimes(2) += t3 - t2;
- blockv = gmakeV(gout, gpiones, gtmpinds, gtmpcounts);
- }
- case _ => {
- throw new RuntimeException("RandomForest dobatch types dont match %s %s" format (data.mytype, cats.mytype))
- }
- }
- lens0 += blockv.length;
-// while (mini(tflags).v > 0) Thread.`yield`
-// blockv = blockv0.copy;
-// tflags.set(1);
- val tblocks = splittableNodes(blockv);
- lens1 += tblocks.map(_.length).reduce(_+_);
- t4 = toc; runtimes(3) += t4 - t3;
- addSVecs(tblocks, totals);
- t5 = toc; runtimes(4) += t5 - t4;
- }
-
- def evalbatch(mats:Array[Mat], ipass:Int, here:Long):FMat = {
- val depth = if (opts.training) ipass else opts.depth
- val data = full(gmats(0));
- val cats = if (gmats.length > 1) gmats(1) else null;
- val nnodes:Mat = if (gmats.length > 2) gmats(2) else null;
- val fnodes:FMat = zeros(ntrees, data.ncols);
- data match {
- case fdata:FMat => {
- if (nnodes.asInstanceOf[AnyRef] != null) {
- val nn = nnodes.asInstanceOf[IMat];
- treeStep(fdata, nn, fnodes, itrees, ftrees, vtrees, ctrees, true);
- } else {
- treeWalk(fdata, null, fnodes, itrees, ftrees, vtrees, ctrees, depth, true);
- }
- }
- case gdata:GMat => {
- gtreeWalk(gdata, gtnodes, gfnodes, gitrees, gftrees, gvtrees, gctrees, depth, true);
- val gff = new GMat(fnodes.nrows, fnodes.ncols, gfnodes.data, gfnodes.realsize);
- fnodes <-- gff;
- }
- }
- ynodes = fnodes;
- if (opts.regression) {
- var mm = mean(fnodes);
- if (ogmats != null) {
- val pcats = if (cats.asInstanceOf[AnyRef] == null || cats.nrows == 1) mm else mm on sqrt(variance(fnodes))
- ogmats(0) = pcats;
- }
- if (gmats.length > 1) {
- val diff = mm - FMat(cats);
- if (opts.MAE) -mean(abs(diff)) else -(diff dotr diff)/diff.length;
- } else {
- row(0);
- }
- } else {
- val mm = tally(fnodes);
- if (ogmats != null) {
- ogmats(0) = mm;
- }
- if (gmats.length > 1) {
- -mean(FMat(mm != IMat(cats)));
- } else {
- row(0);
- }
- }
- }
-
- def tally(nodes:FMat):IMat = {
- val tallys = izeros(ncats, 1);
- val best = izeros(1, nodes.ncols);
- var i = 0;
- while (i < nodes.ncols) {
- var j = 0;
- var maxind = -1;
- var maxv = -1;
- tallys.clear
- while (j < nodes.nrows) {
- val ct = nodes.data(j + i * nodes.nrows).toInt;
- tallys.data(ct) += 1;
- if (tallys.data(ct) > maxv) {
- maxv = tallys.data(ct);
- maxind = ct;
- }
- j += 1;
- }
- best.data(i) = maxind;
- i += 1;
- }
- best
- }
-
- def tallyv(nodes:FMat):FMat = {
- mean(nodes)
- }
-
- override def updatePass(ipass:Int) = {
-// while (mini(tflags).v > 0) Thread.`yield`
-// tflags.set(2);
- val tt = getSum(totals);
- t6 = toc;
- runtimes(5) += t6 - t5;
-// while (mini(tflags).v > 0) Thread.`yield`
- var itree = 0;
- var impure = 0.0;
- while (itree < ntrees) {
- val totalinds = tt(itree).inds;
- val totalcounts = tt(itree).counts;
- val (jc0, jtree) = findBoundaries(totalinds, jc);
- t0 = toc;
- val (gg, ifrac) = minImpurity(totalinds, totalcounts, outv, outf, outn, outg, outc, outleft, outright, jc0, jtree, itree, opts.impurity, opts.regression);
- impure += ifrac;
- t1 = toc;
- runtimes(6) += t1 - t0;
- val (vm, im) = maxi2(gg); // Find feats with maximum -impurity gain
- val inds = im.t + icol(0->im.length) * gg.nrows; // Turn into an index for the "out" matrices
- val inodes = outn(inds); // get the node indices
- ctrees(inodes, itree) = outc(inds); // Save the node class for these nodes
- vtrees(inodes, itree) = outv(inds); // Threshold values
- val reqgain = opts.gain
- val igain = find(vm > reqgain); // find nodes above the impurity gain threshold
- gains(itree) = if (vm.length>0) mean(vm).v else 0;
- igains(itree) = igain.length
- if (igain.length > 0) {
- val inn = inodes(igain);
- val igg = inds(igain);
- val ifff = outf(igg);
- if (! useIfeats) jfeatsToIfeats(itree, inn, ifff, seed, gitree, gftree);
- ftrees(inn, itree) = ifff; // Set the threshold features
- val ibase = nodecounts(itree);
- itrees(inn, itree) = icol(ibase until (ibase + 2 * igain.length) by 2); // Create indices for new child nodes
- nodecounts(itree) += 2 * igain.length; // Update node counts for this tree
- tochildren(itree, inn, outleft(igg), outright(igg)); // Save class ids to children in case we don't visit them later
- }
- itree += 1;
- t2 = toc;
- runtimes(7) += t2 - t1;
- }
- if (useGPU) {
- gitrees <-- itrees;
- gftrees <-- ftrees;
- gvtrees <-- vtrees;
- gctrees <-- ctrees;
- }
- seed = opts.seed + 341211*(ipass+1);
- println("purity gain %5.4f, fraction impure %4.3f, nnew %2.1f, nnodes %2.1f" format (mean(gains).v, lens1*1f/lens0, 2*mean(igains).v, mean(FMat(nodecounts)).v));
- lens0 = 0;
- lens1 = 0;
-// if (ipass == opts.depth-1) tflags.set(-1);
- }
-
- def tochildren(itree:Int, inodes:IMat, left:FMat, right:FMat) {
- var i = 0;
- while (i < inodes.length) {
- val inode = inodes(i);
- val itr = itrees(inode, itree);
- if (itr+1 >= nnodes) {
- throw new RuntimeException("Tree %d size exceeds the node limit %d, try increasing nnodes or reducing depth" format (itree, nnodes));
- }
- ctrees(itr, itree) = left(i) ;
- ctrees(itr+1, itree) = right(i);
- i += 1;
- }
-
- }
-
-
- def getFieldShifts(fL : IMat) : Array[Int]= {
- val out = new Array[Int](fL.length);
- var i = fL.length - 2
- while (i >= 0) {
- out(i) = out(i+1) + fL(i+1)
- i -= 1
- }
- out
- }
-
- def getFieldMasks(fL : IMat) : Array[Int] = {
- val out = new Array[Int](fL.length);
- var i = 0
- while (i < fL.length) {
- out(i) = (1 << fL(i)) - 1
- i += 1
- }
- out
- }
-
- final val signbit:Int = 1 << 31;
- final val magnitude:Int = signbit - 1;
-
- @inline def floatConvert(a:Float):Int = {
- val vmask = fieldmasks(4);
- val fshift = 32 - fieldlengths(4);
- var ai = java.lang.Float.floatToRawIntBits(a);
- if ((ai & signbit) > 0) {
- ai = -(ai & magnitude);
- }
- ai += signbit;
- (ai >> fshift) & vmask;
- }
-
- @inline def floatConvert2(a:Float):Int = {
- a.toInt
- }
-
- def treePack(fdata:FMat, treenodes:IMat, cats:IMat, out:LMat, seed:Int):LMat = {
- val nfeats = fdata.nrows;
- val nitems = fdata.ncols;
- val ntrees = treenodes.nrows;
- val ionebased = Mat.ioneBased;
- var icolx = 0;
- var nxvals = 0;
- while (icolx < nitems) {
- var itree = 0;
- while (itree < ntrees) {
- val inode0 = treenodes(itree, icolx);
- val inode = inode0 & magnitude
- val isign = ((inode0 & signbit) ^ signbit).toLong << 32;
- if (inode >= 0) {
- var jfeat = 0;
- while (jfeat < nsamps) {
- val ifeat = rhash(seed, itree, inode, jfeat, nfeats);
- val ivfeat = floatConvert(fdata(ifeat, icolx));
- val ic = cats(icolx);
- out.data(nxvals) = packFields(itree, inode, jfeat, if (useIfeats) ifeat else 0, ivfeat, ic, fieldlengths.data) | isign;
- nxvals += 1;
- jfeat += 1;
- }
- }
- itree += 1;
- }
- icolx += 1;
- }
- Mat.nflops += 50L * nxvals
- new LMat(nxvals, 1, out.data);
- }
-
- def treePack(fdata:FMat, treenodes:IMat, fcats:FMat, out:LMat, seed:Int):LMat = {
- val nfeats = fdata.nrows;
- val nitems = fdata.ncols;
- val ntrees = treenodes.nrows;
- val ionebased = Mat.ioneBased;
- var icolx = 0;
- var nxvals = 0;
- while (icolx < nitems) {
- var itree = 0;
- while (itree < ntrees) {
- val inode0 = treenodes(itree, icolx);
- val inode = inode0 & magnitude
- val isign = ((inode0 & signbit) ^ signbit).toLong << 32;
- if (inode >= 0) {
- var jfeat = 0;
- while (jfeat < nsamps) {
- val ifeat = rhash(seed, itree, inode, jfeat, nfeats);
- val ivfeat = floatConvert(fdata(ifeat, icolx));
- val ic = fcats(icolx).toInt;
- out.data(nxvals) = packFields(itree, inode, jfeat, if (useIfeats) ifeat else 0, ivfeat, ic, fieldlengths.data) | isign;
- nxvals += 1;
- jfeat += 1;
- }
- }
- itree += 1;
- }
- icolx += 1;
- }
- Mat.nflops += 50L * nxvals
- new LMat(nxvals, 1, out.data);
- }
-
- def treeStep(fdata:FMat, tnodes:IMat, fnodes:FMat, itrees:IMat, ftrees:IMat, vtrees:IMat, ctrees:FMat, getcat:Boolean) {
- val nfeats = fdata.nrows;
- val nitems = fdata.ncols;
- val ntrees = tnodes.nrows;
- var icol = 0;
- while (icol < nitems) {
- var itree = 0;
- while (itree < ntrees) {
- var inode = tnodes(itree, icol);
- val ileft = itrees(inode, itree);
- if (ileft >= 0) { // Has children so step down
- val ifeat = ftrees(inode, itree);
- val ithresh = vtrees(inode, itree);
- val ivfeat = floatConvert(fdata(ifeat, icol));
- if (ivfeat > ithresh) {
- inode = ileft + 1;
- } else {
- inode = ileft;
- }
- }
- if (getcat) {
- fnodes(itree, icol) = ctrees(inode, itree);
- } else {
- tnodes(itree, icol) = inode;
- }
- itree += 1;
- }
- icol += 1;
- }
- Mat.nflops += 1L * nitems * ntrees;
- }
-
- def treeWalk(fdata:FMat, tnodes:IMat, fnodes:FMat, itrees:IMat, ftrees:IMat, vtrees:IMat, ctrees:FMat, depth:Int, getcat:Boolean) = {
- val nfeats = fdata.nrows;
- val nitems = fdata.ncols;
- var icol = 0;
- while (icol < nitems) {
- var itree = 0;
- while (itree < ntrees) {
- var inode = 0;
- var id = 0;
- while (id < depth) {
- val ileft = itrees(inode, itree);
- val ithresh = vtrees(inode, itree);
- if (ileft == 0) { // This is a leaf, so
- id = depth; // just skip out of the loop
- if (ithresh == -2) { // this node is not splittable
- inode = inode | signbit; // so mark it negative
- }
- } else {
- val ifeat = ftrees(inode, itree); // Test this node and branch
- val ivfeat = floatConvert(fdata(ifeat, icol));
- if (ivfeat > ithresh) {
- inode = ileft + 1;
- } else {
- inode = ileft;
- }
- }
- id += 1;
- }
- if (getcat) {
- fnodes(itree, icol) = ctrees(inode & magnitude, itree);
- } else {
- tnodes(itree, icol) = inode;
- }
- itree += 1;
- }
- icol += 1;
- }
- Mat.nflops += 1L * nitems * ntrees * depth;
- fnodes
- }
-
- def gtreeWalk(fdata:GMat, tnodes:GIMat, fnodes:GMat, itrees:GIMat, ftrees:GIMat, vtrees:GIMat, ctrees:GMat, depth:Int, getcat:Boolean) = {
- val nrows = fdata.nrows;
- val ncols = fdata.ncols;
- Mat.nflops += 1L * ncols * ntrees * depth;
- val err = CUMACH.treeWalk(fdata.data, tnodes.data, fnodes.data, itrees.data, ftrees.data, vtrees.data, ctrees.data,
- nrows, ncols, ntrees, nnodes, if (getcat) 1 else 0, nbits, depth);
- if (err != 0) {throw new RuntimeException("gtreeWalk: error " + cudaGetErrorString(err))}
- }
-
- def gtreeStep(gdata:GMat, tnodes:GIMat, fnodes:GMat, itrees:GIMat, ftrees:GIMat, vtrees:GIMat, ctrees:GMat, getcat:Boolean) {}
-
- def gmakeV(keys:GLMat, vals:GIMat, tmpkeys:GLMat, tmpcounts:GIMat):SVec = {
- val (ginds, gcounts) = GLMat.collectLVec(keys, vals, tmpkeys, tmpcounts);
- Mat.nflops += 1L * keys.length;
- val ovec = SVec(ginds.length);
- ovec.inds <-- ginds;
- ovec.counts <-- gcounts;
- ovec
- }
-
- def makeV(ind:LMat):SVec = {
- Mat.nflops += ind.length;
- val n = ind.length;
- val indd = ind.data;
- var ngroups = 0;
- var i = 1;
- while (i <= n) {
- if (i == n || indd(i) != indd(i-1)) {
- ngroups += 1;
- }
- i += 1;
- }
- val ovec = SVec(ngroups);
- val okeys = ovec.inds.data;
- val ovals = ovec.counts.data;
- var cc = 0;
- ngroups = 0;
- i = 1;
- while (i <= n) {
- cc += 1;
- if (i == n || indd(i) != indd(i-1)) {
- okeys(ngroups) = indd(i-1);
- ovals(ngroups) = cc;
- ngroups += 1;
- cc = 0;
- }
- i += 1;
- }
- ovec;
- }
-
- def countV(ind1:LMat, counts1:IMat, ind2:LMat, counts2:IMat):Int = {
- var count = 0
- val n1 = counts1.length
- val n2 = counts2.length
- var i1 = 0
- var i2 = 0
- while (i1 < n1 || i2 < n2) {
- if (i1 >= n1 || (i2 < n2 && ind2(i2) < ind1(i1))) {
- count += 1
- i2 += 1
- } else if (i2 >= n2 || (i1 < n1 && ind1(i1) < ind2(i2))) {
- count += 1
- i1 += 1
- } else {
- count += 1
- i1 += 1
- i2 += 1
- }
- }
- return count
- }
-
- // Add a short sparse Lvector (first arg) to a short one (2nd arg). Reuses the storage of the long vector.
-
- def addV(ind1:LMat, counts1:IMat, ind2:LMat, counts2:IMat):(LMat, IMat) = {
- if (ind1.length + ind2.length > ind2.data.length) {
- throw new RuntimeException("temporary sparse Long storage too small %d %d" format (ind1.length+ind2.length, ind2.data.length));
- }
- val offset = ind1.length;
- var i = ind2.length - 1;
- while (i >= 0) {
- ind2.data(i + offset) = ind2.data(i);
- counts2.data(i + offset) = counts2.data(i);
- i -= 1;
- }
- var count = 0;
- var i1 = 0;
- val n1 = ind1.length;
- var i2 = offset;
- val n2 = ind2.length + offset;
- while (i1 < n1 || i2 < n2) {
- if (i1 >= n1 || (i2 < n2 && ind2.data(i2) < ind1.data(i1))) {
- ind2.data(count) = ind2.data(i2)
- counts2.data(count) = counts2.data(i2)
- count += 1
- i2 += 1
- } else if (i2 >= n2 || (i1 < n1 && ind1.data(i1) < ind2.data(i2))) {
- ind2.data(count) = ind1.data(i1)
- counts2.data(count) = counts1.data(i1)
- count += 1
- i1 += 1
- } else {
- ind2.data(count) = ind1.data(i1)
- counts2.data(count) = counts1.data(i1) + counts2.data(i2)
- count += 1
- i1 += 1
- i2 += 1
- }
- }
- (new LMat(1, count, ind2.data), new IMat(1, count, counts2.data))
- }
-
- def gaddV(gix:GLMat, gcx:GIMat, gmidinds:GLMat, gmidcounts:GIMat, gmergedinds:GLMat, gmergedcounts:GIMat):(GLMat, GIMat) = {
- val (ai, ac) = GLMat.mergeLVecs(gix, gcx, gmidinds, gmidcounts, gmergedinds, gmergedcounts);
- GLMat.collectLVec(ai, ac, gmidinds, gmidcounts);
- }
-
- def copyinds(inds:LMat, tmp:LMat) = {
- val out = new LMat(inds.length, 1, tmp.data);
- out <-- inds;
- out
- }
-
- def copycounts(cnts:IMat, tmpc:IMat) = {
- val out = new IMat(cnts.length, 1, tmpc.data);
- out <-- cnts;
- out
- }
-
- def gtreePack(fdata:FMat, tnodes:IMat, icats:IMat, gout:GLMat, seed:Int):GLMat ={
- val nrows = fdata.nrows
- val ncols = fdata.ncols
- val nxvals = ncols * ntrees * nsamps;
- Mat.nflops += 1L * nxvals;
- val gdata = GMat(fdata);
- val gcats = GIMat(icats);
- cudaMemcpy(gtnodes.data, Pointer.to(tnodes.data), ncols*ntrees*Sizeof.INT, cudaMemcpyHostToDevice)
- cudaDeviceSynchronize();
- var err = cudaGetLastError
- if (err != 0) {throw new RuntimeException("fgtreePack: error " + cudaGetErrorString(err))}
- err= CUMACH.treePack(gdata.data, gtnodes.data, gcats.data, gout.data, gfieldlengths.data, nrows, ncols, ntrees, nsamps, seed)
- if (err != 0) {throw new RuntimeException("fgtreePack: error " + cudaGetErrorString(err))}
- new GLMat(1, nxvals, gout.data, gout.realsize);
- }
-
- def gtreePack(gdata:GMat, gtnodes:GIMat, gcats:GIMat, gout:GLMat, seed:Int):GLMat ={
- val nrows = gdata.nrows
- val ncols = gdata.ncols
- val nxvals = ncols * ntrees * nsamps;
- Mat.nflops += 1L * nxvals;
- val err= CUMACH.treePack(gdata.data, gtnodes.data, gcats.data, gout.data, gfieldlengths.data, nrows, ncols, ntrees, nsamps, seed)
- if (err != 0) {throw new RuntimeException("gtreePack: error " + cudaGetErrorString(err))}
- new GLMat(1, nxvals, gout.data, gout.realsize);
- }
-
- def gtreePack(gdata:GMat, gtnodes:GIMat, gcats:GMat, gout:GLMat, seed:Int):GLMat ={
- val nrows = gdata.nrows
- val ncols = gdata.ncols
- val nxvals = ncols * ntrees * nsamps;
- Mat.nflops += 1L * nxvals;
- val err= CUMACH.treePackfc(gdata.data, gtnodes.data, gcats.data, gout.data, gfieldlengths.data, nrows, ncols, ntrees, nsamps, seed)
- if (err != 0) {throw new RuntimeException("gtreePack: error " + cudaGetErrorString(err))}
- new GLMat(1, nxvals, gout.data, gout.realsize);
- }
-
- def gpsort(gout:GLMat) = {
- val nxvals = gout.length;
- Mat.nflops += 2L * nxvals * math.log(nxvals).toInt;
- val err = CUMAT.lsort(gout.data, nxvals, 1);
- if (err != 0) {throw new RuntimeException("gpsort: error " + cudaGetErrorString(err))}
- cudaDeviceSynchronize()
- }
-
- def jfeatsToIfeats(itree:Int, inodes:IMat, ifeats:IMat, seed:Int, gitree:GIMat, gftree:GIMat) {
- if (useGPU) {
- gjfeatsToIfeats(itree, inodes, ifeats, seed, gitree, gftree)
- } else {
- val len = inodes.length;
- var i = 0;
- while (i < len) {
- val inode = inodes.data(i);
- val jfeat = ifeats.data(i);
- val ifeat = rhash(seed, itree, inode, jfeat, nfeats);
- ifeats(i) = ifeat;
- i += 1;
- }
- }
- }
-
- def gjfeatsToIfeats(itree:Int, inodes:IMat, ifeats:IMat, seed:Int, gitree:GIMat, gftree:GIMat) {
- val len = inodes.length;
- val gi = new GIMat(inodes.nrows, inodes.ncols, gitree.data, gitree.realsize);
- val gf = new GIMat(ifeats.nrows, ifeats.ncols, gftree.data, gftree.realsize);
- gi <-- inodes;
- gf <-- ifeats;
- val err = CUMACH.jfeatsToIfeats(itree, gi.data, gf.data, gf.data, len, nfeats, seed);
- if (err != 0) {throw new RuntimeException("gjfeatsToIfeats: error " + cudaGetErrorString(err))}
- ifeats <-- gf;
- }
-
-/* def driver_thread(i:Int)(implicit ec:ExecutionContextExecutor) = {
- while (tflags(i) >= 0) {
- while (tflags(i) == 0) Thread.`yield`
- if (tflags(i) == 1) {
- val t3 = toc;
- val sp = splittableNodes_thread(blockv, i);
- val t4 = toc;
- runtimes(3) += t4 - t3;
- totals(i).addSVec(sp);
- val t5 = toc;
- lens1 += sp.length;
- runtimes(4) += t5 - t4;
- tflags(i) == 0;
- } else if (tflags(i) == 2) {
- val t5 = toc;
- tt(i) = totals(i).getSum;
- val t6 = toc;
- runtimes(5) += t6 - t5;
- tflags(i) == 0;
- }
- }
- } */
-
- def splittableNodes(blockv:SVec):Array[SVec] = {
- (0 until ntrees).par.map(i => {splittableNodes_thread(blockv, i);}).toArray;
- }
-
- def splittableNodes_thread(blockv:SVec, itree:Int):SVec = {
- val keys = blockv.inds.data;
- val istart = findIndex(blockv, itree);
- val iend = findIndex(blockv, itree+1);
- val out = SVec(iend - istart);
- val body = (1L << 63) - 1;
- var i = istart;
- var j = 0;
- while (i < iend) {
- var ki = keys(i);
- ki = ki & body;
- val itree = extractField(ITree, ki, fieldshifts, fieldmasks);
- out.inds.data(j) = ki;
- out.counts.data(j) = blockv.counts.data(i);
- j += 1;
- i += 1;
- }
- out;
- }
-
- def findIndex(blockv:SVec, itree:Int):Int = {
- val keys = blockv.inds.data;
- var istart = 0;
- var iend = blockv.length;
- val lsign = 1L << 63;
- while (iend - istart > 1) {
- var mid = (istart + iend)/2
- val key = keys(mid);
- val ktree = if ((key & lsign) != 0) extractField(ITree, key, fieldshifts, fieldmasks) else ntrees;
- if (itree <= ktree) iend = mid else istart = mid
- }
- val key = keys(istart);
- val ktree = if ((key & lsign) != 0) extractField(ITree, key, fieldshifts, fieldmasks) else ntrees;
- if (itree <= ktree) istart else iend;
- }
-
- // Find boundaries where JFeat or ITree changes
-
- def findBoundaries(keys:LMat, jc:IMat):(IMat,IMat) = {
- val fieldshifts = getFieldShifts(fieldlengths);
- val fshift = fieldshifts(JFeat);
- val tshift = fieldshifts(ITree);
- val tmat = izeros(ntrees+1,1);
- var oldv = -1L;
- var v = -1;
- var t = 0;
- var nt = 0;
- var i = 0
- var n = 0;
- while (i < keys.length) {
- v = extractAbove(JFeat, keys(i), fieldshifts);
- t = (keys(i) >>> tshift).toInt;
- while (t > nt) {
- tmat(nt+1) = n;
- nt += 1;
- }
- if (oldv != v) {
- jc(n) = i;
- n += 1;
- oldv = v;
- }
- i += 1
- }
- jc(n) = i;
- while (ntrees > nt) {
- tmat(nt+1) = n;
- nt += 1;
- }
- n += 1;
- if ((n-1) % nsamps != 0) throw new RuntimeException("boundaries %d not a multiple of nsamps %d" format (n-1, nsamps));
- (new IMat(n, 1, jc.data), tmat)
- }
-
- trait imptyType {
- val update: (Int)=>Double;
- val result: (Double, Int)=>Double;
- val combine: (Double, Double, Int, Int) => Double;
- }
-
- object entImpurity extends imptyType {
- def updatefn(a:Int):Double = { val v = math.max(a,1); v * math.log(v) }
- def resultfn(acc:Double, tot:Int):Double = { val v = math.max(tot,1); math.log(v) - acc / v }
- def combinefn(ent1:Double, ent2:Double, tot1:Int, tot2:Int):Double = { (ent1 * tot1 + ent2 * tot2)/math.max(1, tot1 + tot2) }
- val update = updatefn _ ;
- val result = resultfn _ ;
- val combine = combinefn _ ;
- }
-
- object giniImpurity extends imptyType {
- def updatefn(a:Int):Double = { val v = a.toDouble; v * v }
- def resultfn(acc:Double, tot:Int) = { val v = math.max(tot,1).toDouble; 1f - acc / (v * v) }
- def combinefn(ent1:Double, ent2:Double, tot1:Int, tot2:Int):Double = { (ent1 * tot1 + ent2 * tot2)/math.max(1, tot1 + tot2) }
- val update = updatefn _ ;
- val result = resultfn _ ;
- val combine = combinefn _ ;
- }
-
- /*object varImpurity extends imptyType {
- def updatefn(a:Int):Double = { val v = a; v * v }
- def resultfn(acc:Double, tot:Int, n:Int):Double = {val v:Double = tot; acc - v*v/n }
- def combinefn(a1:Double, a2:Double, tot1:Int, tot2:Int, n1:Int, n2:Int):Double = {
- val n = n1+n2; val tot:Double = tot1 + tot2; (a1 + a2 - tot*tot/n)/n }
- val update = updatefn _ ;
- val result = resultfn _ ;
- val combine = combinefn _ ;
- }*/
-
- def regressVar(sumsq:Double, tott:Int, acc:Double, tot:Int, acct:Double, tot2:Int):Double = {
- (sumsq - (acc * acc / tot + acct * acct / tot2)) / tott;
- }
-
- val imptyFunArray = Array[imptyType](entImpurity,giniImpurity)
-
- // Pass in one of the two object above as the last argument (imptyFns) to control the impurity
- // outv should be an nsamps * nnodes array to hold the feature threshold value
- // outf should be an nsamps * nnodes array to hold the feature index
- // outg should be an nsamps * nnodes array holding the impurity gain (use maxi2 to get the best)
- // jc should be a zero-based array that points to the start and end of each group of fixed node, jfeat
-
- def minImpurityx(keys:LMat, cnts:IMat, outv:IMat, outf:IMat, outn:IMat, outg:FMat, outc:FMat, outleft:FMat, outright:FMat,
- jc:IMat, jtree:IMat, itree:Int, fnum:Int, regression:Boolean):(FMat, Double) = {
- minImpurity_thread(keys, cnts, outv, outf, outn, outg, outc, outleft, outright, jc, jtree, itree, fnum, regression, 0, 1);
- }
-
- def minImpurity(keys:LMat, cnts:IMat, outv:IMat, outf:IMat, outn:IMat, outg:FMat, outc:FMat, outleft:FMat, outright:FMat,
- jc:IMat, jtree:IMat, itree:Int, fnum:Int, regression:Boolean):(FMat, Double) = {
- val nthreads = 1 + (Mat.numThreads - 1)/2;
- val fm = new Array[FMat](nthreads);
- val impure = DMat(1, nthreads);
- (0 until nthreads).par.foreach(i => {
- val (f, im) = minImpurity_thread(keys, cnts, outv, outf, outn, outg, outc, outleft, outright, jc, jtree, itree, fnum, regression, i, nthreads);
- fm(i) = f;
- impure(i) = im;
- })
- (fm(0), mean(impure).v);
- }
-
- def minImpurity_thread(keys:LMat, cnts:IMat, outv:IMat, outf:IMat, outn:IMat, outg:FMat, outc:FMat, outleft:FMat, outright:FMat,
- jc:IMat, jtree:IMat, itree:Int, fnum:Int, regression:Boolean, ithread:Int, nthreads:Int):(FMat, Double) = {
-
- val update = imptyFunArray(fnum).update
- val result = imptyFunArray(fnum).result
- val combine = imptyFunArray(fnum).combine
-
- val totcounts = izeros(1,ncats);
- val counts = izeros(1,ncats);
- val fieldshifts = getFieldShifts(fieldlengths);
- val fieldmasks = getFieldMasks(fieldlengths);
-
- var j = 0;
- var tot = 0;
- var tott = 0;
- var acc = 0.0;
- var acct = 0.0;
- var i = ithread;
- val todo = jtree(itree+1) - jtree(itree);
- Mat.nflops += todo * 4L * 10;
- var all = 0.0;
- var impure = 0.0;
- while (i < todo) {
- val jci = jc(i + jtree(itree));
- val jcn = jc(i + jtree(itree) + 1);
-
- totcounts.clear;
- counts.clear;
- tott = 0;
- j = jci;
- var maxcnt = -1;
- var imaxcnt = -1;
- var totcats = 0.0;
- var sumsq = 0.0;
- while (j < jcn) { // First get the total counts for each group, and the most frequent cat
- val key = keys(j)
- val cnt = cnts(j)
- val icat = extractField(ICat, key, fieldshifts, fieldmasks);
- val newcnt = totcounts(icat) + cnt;
- totcounts(icat) = newcnt;
- totcats += 1.0 * cnt * icat;
- sumsq += 1.0 * icat * icat * cnt;
- tott += cnt;
- if (newcnt > maxcnt) {
- maxcnt = newcnt;
- imaxcnt = icat;
- }
- j += 1;
- }
- val inode = extractField(INode, keys(jci), fieldshifts, fieldmasks);
- val ifeat = extractField(if (useIfeats) IFeat else JFeat, keys(jci), fieldshifts, fieldmasks);
- var minImpty = 0.0;
- var lastImpty = 0.0;
- var nodeImpty = 0.0;
- var partv = -2; // Will pass through for pure nodes
- var lastkey = -1L;
- var jmaxcnt = 0;
- var kmaxcnt = 0;
- all += tott;
- var lefttotcats = 0.0;
- var lefttot = 0;
- if (maxcnt < tott) { // This is not a pure node
- partv = -1;
- impure += tott;
- acct = 0;
- // println("totcounts "+totcounts.toString);
- j = 0;
- if (regression) { // Get the impurity for the node
- acct = totcats;
- val mmean = totcats / tott;
- nodeImpty = sumsq / tott - mmean * mmean;
- } else {
- while (j < ncats) {
- acct += update(totcounts(j));
- j += 1
- }
- nodeImpty = result(acct, tott);
- }
- totcats = 0.0;
- var lastival = -1;
- minImpty = nodeImpty;
- lastImpty = Double.MaxValue;
- acc = 0;
- tot = 0;
- j = jci;
- maxcnt = -1;
- var jmax = j;
-
- while (j < jcn) {
- val key = keys(j);
- val cnt = cnts(j);
- val ival = extractField(IVFeat, key, fieldshifts, fieldmasks);
- val icat = extractField(ICat, key, fieldshifts, fieldmasks);
-
- if (j > jci && ival != lastival) {
- if (regression) {
- lastImpty = regressVar(sumsq, tott, acc, tot, acct, tott - tot);
- } else {
- lastImpty = combine(result(acc, tot), result(acct, tott - tot), tot, tott - tot); // Dont compute every time!
- }
- if (lastImpty < minImpty) {
- minImpty = lastImpty;
- partv = lastival;
- jmax = j;
- lefttotcats = totcats;
- lefttot = tot;
- }
- }
- val oldcnt = counts(icat);
- val newcnt = oldcnt + cnt;
- counts(icat) = newcnt;
- if (newcnt > maxcnt) {
- maxcnt = newcnt;
- jmaxcnt = icat;
- }
- val oldcntt = totcounts(icat) - oldcnt;
- val newcntt = totcounts(icat) - newcnt;
- tot += cnt;
- if (regression) {
- acc += 1.0 * icat * cnt;
- acct -= 1.0 * icat * cnt;
- } else {
- acc += update(newcnt) - update(oldcnt);
- acct += update(newcntt) - update(oldcntt);
- }
- totcats += cnt * icat;
- lastkey = key;
- lastival = ival;
- j += 1;
- }
- if (! regression) {
- counts.clear;
- maxcnt = -1;
- while (j > jmax) {
- j -= 1;
- val key = keys(j);
- val cnt = cnts(j);
- val ival = extractField(IVFeat, key, fieldshifts, fieldmasks);
- val icat = extractField(ICat, key, fieldshifts, fieldmasks);
- val oldcnt = counts(icat);
- val newcnt = oldcnt + cnt;
- counts(icat) = newcnt;
- if (newcnt > maxcnt) {
- maxcnt = newcnt;
- kmaxcnt = icat;
- }
- }
- }
-// lastImpty = combine(result(acc, tot), result(acct, tott - tot), tot, tott - tot); // For checking
- }
-// println("Impurity %f, %f, min %f, %d, %d" format (nodeImpty, lastImpty, minImpty, partv, ifeat))
- outv(i) = partv;
- outg(i) = (nodeImpty - minImpty).toFloat;
- outf(i) = ifeat;
- if (regression) {
- val defv = if (tott > 0) totcats.toFloat / tott else ncats/2.0f;
- outc(i) = defv;
- outleft(i) = if (lefttot > 0) lefttotcats.toFloat / lefttot else defv;
- outright(i) = if (tott - lefttot > 0) (totcats - lefttotcats) / (tott - lefttot) else defv;
- } else {
- outc(i) = imaxcnt;
- outleft(i) = jmaxcnt;
- outright(i) = kmaxcnt;
- }
- outn(i) = inode;
- i += nthreads;
- }
- if (opts.trace > 0) println("fraction of impure nodes %f" format impure/all);
- (new FMat(nsamps, todo/nsamps, outg.data), impure/all);
- }
-
- override def save(fname:String) = {
- saveIMat(fname+"itrees.imat.lz4", itrees);
- saveIMat(fname+"ftrees.imat.lz4", ftrees);
- saveIMat(fname+"vtrees.imat.lz4", vtrees);
- saveFMat(fname+"ctrees.fmat.lz4", ctrees);
- }
-
- override def load(fname:String) = {
- itrees = loadIMat(fname+"itrees.imat.lz4");
- ftrees = loadIMat(fname+"ftrees.imat.lz4");
- vtrees = loadIMat(fname+"vtrees.imat.lz4");
- ctrees = loadFMat(fname+"ctrees.fmat.lz4");
- }
-
- def addSVecs(a:Array[SVec], totals:Array[SVTree]) {
- (0 until ntrees).par.foreach(i => {totals(i).addSVec(a(i));});
- }
-
- def getSum(totals:Array[SVTree]):Array[SVec] = {
- (0 until ntrees).par.map(i => {totals(i).getSum;}).toArray;
- }
-
-}
-
-class SVec(val inds:LMat, val counts:IMat) {
-
- def length = inds.length
-
- def add(b:SVec):SVec = {
-
- val inds1 = inds.data;
- val counts1 = counts.data;
- val inds2 = b.inds.data;
- val counts2 = b.counts.data;
-
- var count = 0;
- var i1 = 0;
- val n1 = length;
- var i2 = 0;
- val n2 = b.length;
- // First calculate the output size
- while (i1 < n1 || i2 < n2) {
- if (i1 >= n1 || (i2 < n2 && inds2(i2) < inds1(i1))) {
- count += 1;
- i2 += 1;
- } else if (i2 >= n2 || (i1 < n1 && inds1(i1) < inds2(i2))) {
- count += 1;
- i1 += 1;
- } else {
- count += 1;
- i1 += 1;
- i2 += 1;
- }
- }
- // now make the output vector
- val out = SVec(count);
- val inds3 = out.inds.data;
- val counts3 = out.counts.data;
- count = 0;
- i1 = 0;
- i2 = 0;
- while (i1 < n1 || i2 < n2) {
- if (i1 >= n1 || (i2 < n2 && inds2(i2) < inds1(i1))) {
- inds3(count) = inds2(i2);
- counts3(count) = counts2(i2);
- count += 1;
- i2 += 1;
- } else if (i2 >= n2 || (i1 < n1 && inds1(i1) < inds2(i2))) {
- inds3(count) = inds1(i1);
- counts3(count) = counts1(i1);
- count += 1;
- i1 += 1;
- } else {
- inds3(count) = inds1(i1);
- counts3(count) = counts1(i1) + counts2(i2);
- count += 1;
- i1 += 1;
- i2 += 1;
- }
- }
- out
- }
-
- def copy = {
- val inds2 = inds.copy
- val counts2 = counts.copy
- new SVec(inds2, counts2);
- }
-
- def checkInds = {
- var i = 0;
- val len = length;
- val ii = inds.data;
- while (i < len - 1) {
- if (ii(i) > ii(i+1)) {
- throw new RuntimeException("bad order %d %d %d" format (i, ii(i), ii(i+1)));
- }
- i += 1;
- }
- }
-}
-
-class SVTree(val n:Int) {
- val tree = new Array[SVec](n);
-
- def showTree = {
- var i = 0;
- while (i < n) {
- if (tree(i) != null) {
- print(" %d" format tree(i).length);
- } else {
- print(" 0");
- }
- i += 1;
- }
- println("");
- }
-
- def addSVec(a:SVec) = {
- var here = a;
- var i = 0;
- while (tree(i) != null) {
- here = tree(i).add(here);
- tree(i) = null;
- i += 1;
- }
- tree(i) = here;
- }
-
- def getSum:SVec = {
- var i = 0;
- var here:SVec = null;
- while (i < n && tree(i) == null) {
- i += 1;
- }
- if (i < n) {
- here = tree(i);
- tree(i) = null;
- }
- i += 1;
- while (i < n) {
- if (tree(i) != null) {
- here = tree(i).add(here);
- tree(i) = null;
- }
- i += 1;
- }
- here;
- }
-}
-
-object SVec {
- def apply(n:Int):SVec = {
- new SVec(lzeros(1,n), izeros(1,n))
- }
-}
-
-object RandomForest {
-
- trait Opts extends Model.Opts {
- var depth = 20;
- var ntrees = 20;
- var nsamps = 32;
- var nnodes = 200000;
- var nbits = 16;
- var gain = 0.01f;
- var catsPerSample = 1f;
- var ncats = 0;
- var training = true;
- var impurity = 0; // zero for entropy, one for Gini impurity
- var regression = false;
- var seed = 1;
- var useIfeats = false; // explicitly save Ifeat indices (vs. compute them)
- var MAE = true;
- var trace = 0;
- }
-
- class Options extends Opts {}
-
- class RFopts extends Learner.Options with RandomForest.Opts with DataSource.Opts with Batch.Opts;
-
- class RFSopts extends RFopts with MatSource.Opts;
-
- def learner(data:Mat, labels:Mat) = {
- val opts = new RFSopts;
- opts.nbits = 16;
- opts.batchSize = math.min(100000000/data.nrows, data.ncols);
- val nn = new Learner(
- new MatSource(Array(data, labels), opts),
- new RandomForest(opts),
- null,
- new Batch(opts),
- null,
- opts)
- (nn, opts)
- }
-
- def learner(ds:DataSource) = {
- val opts = new RFopts;
- opts.useGPU = false;
- val nn = new Learner(
- ds,
- new RandomForest(opts),
- null,
- new Batch(opts),
- null,
- opts)
- (nn, opts)
- }
-
- class FsOpts extends Learner.Options with RandomForest.Opts with FileSource.Opts with Batch.Opts
-
- def learner(datafile:String, labelfile:String):(Learner, FsOpts) = learner(List(FileSource.simpleEnum(datafile, 1, 0), FileSource.simpleEnum(labelfile, 1, 0)))
-
- def learner(fnames:List[(Int)=>String]) = {
- val opts = new FsOpts;
- opts.nbits = 16;
- opts.batchSize = 1000;
- opts.fnames = fnames;
- implicit val threads = threadPool(4);
- val nn = new Learner(
- new FileSource(opts),
- new RandomForest(opts),
- null,
- new Batch(opts),
- null,
- opts)
- (nn, opts)
- }
-
- class PredOpts extends Learner.Options with RandomForest.Opts with MatSource.Opts with MatSink.Opts;
-
- def predictor(model:Model, data:Mat):(Learner, PredOpts) = {
- val opts = new PredOpts;
- model.opts.asInstanceOf[RandomForest.Opts].training = false;
- opts.copyFrom(model.opts);
- val nn = new Learner(
- new MatSource(Array(data), opts),
- model,
- null,
- null,
- new MatSink(opts),
- opts)
- (nn, opts)
- }
-
- class FilePredOpts extends Learner.Options with RandomForest.Opts with FileSource.Opts with MatSink.Opts;
-
- def load(modelname:String):RandomForest = {
- val opts = new RandomForest.Options;
- val model = new RandomForest(opts);
- model.load(modelname);
- model;
- }
-
- def entropy(a:DMat):Double = {
- val sa = sum(a).dv;
- (a ddot ln(max(drow(1.0), a))) / sa - math.log(sa)
- }
-
- def entropy(a:DMat, b:DMat):Double = {
- val ea = entropy(a);
- val eb = entropy(b);
- val sa = sum(a).dv;
- val sb = sum(b).dv;
- if (sa > 0 && sb > 0) {
- (sa * ea + sb * eb)/(sa + sb)
- } else if (sa > 0) {
- ea
- } else {
- eb
- }
- }
-
- def entropy(a:IMat):Double = entropy(DMat(a));
-
- def entropy(a:IMat, b:IMat):Double = entropy(DMat(a), DMat(b));
-
- def checktree(tree:IMat, ncats:Int) {
- val ntrees = tree.ncols;
- val nnodes = tree.nrows >> 1;
- def checknode(inode:Int, itree:Int) {
- if (tree(inode * 2, itree) < 0) {
- if (tree(inode * 2 + 1, itree) < 0 || tree(inode * 2 + 1, itree) > ncats) {
- throw new RuntimeException("Bad node %d in tree %d" format (inode, itree));
- }
- } else {
- checknode(inode*2+1, itree);
- checknode(inode*2+2, itree);
- }
- }
- var i = 0
- while (i < ntrees) {
- checknode(0, i);
- i += 1;
- }
- println("OK");
- }
-
- def floatToInt(in:GMat, out:Mat, nbits:Int):GIMat = {
- val omat = GIMat.newOrCheckGIMat(in.nrows, in.ncols, out, in.GUID, "floatToInt".##)
- edu.berkeley.bid.CUMACH.floatToInt(in.length, in.data, omat.data, nbits)
- omat
- }
-
- def floatToInt(in:GMat, nbits:Int):GIMat = floatToInt(in, null, nbits)
-
- def countbits(n:Int):Int = {
- var i = 0;
- var j = 1;
- while (j < n) {
- j *= 2;
- i += 1;
- }
- i
- }
-}
+package BIDMach.models
+
+import BIDMat.{SBMat,CMat,CSMat,DMat,Dict,IDict,FMat,GMat,GIMat,GLMat,GSMat,HMat,IMat,LMat,Mat,SMat,SDMat}
+import BIDMach.Learner
+import BIDMach.datasources.{DataSource,MatSource,FileSource,SFileSource}
+import BIDMach.datasinks._
+import BIDMach.updaters.Batch
+import BIDMat.MatFunctions._
+import BIDMat.SciFunctions._
+import edu.berkeley.bid.CUMAT
+import edu.berkeley.bid.CUMACH
+import edu.berkeley.bid.CUMACH
+import jcuda._
+import jcuda.runtime.JCuda._
+import jcuda.runtime.cudaMemcpyKind._
+import scala.util.hashing.MurmurHash3
+import java.util.Arrays
+import scala.concurrent.Future
+import scala.concurrent.ExecutionContextExecutor
+
+ /**
+ * Random Forests. Given a datasource of data and labels, compute a random classification or regression Forest.
+ *
+ * * '''Options'''
+ - depth(20): Bound on the tree depth, also the number of passes over the dataset.
+ - ntrees(20): Number of trees in the Forest.
+ - nsamps(32): Number of random features to try to split each node.
+ - nnodes(200000): Bound on the size of each tree (number of nodes).
+ - nbits(16): Number of bits to use for feature values.
+ - gain(0.01f): Lower bound on impurity gain in order to split a node.
+ - catsPerSample(1f): Number of cats per sample for multilabel classification.
+ - ncats(0): Number of cats or regression values. 0 means guess from datasource.
+ - training(true): Run for training (true) or prediction (false)
+ - impurity(0): Impurity type, 0=entropy, 1=Gini
+ - regression(false): Build a regression Forest (true) or classification Forest (false).
+ - seed(1): Random seed for selecting features. Use this to train distinct Forests in multiple runs.
+ - useIfeats(false): An internal var, when true use explicit feature indices vs compute them.
+ - MAE(true): true=Use Mean Absolute Error when reporting performance vs. false=Mean Squared Error
+ - trace(0): level of debugging information to print (0,1,2).
+ *
+ * NOTE: The algorithm uses a packed representation of the dataset statistics with fixed precision fields.
+ * Setting nbits selects how many bits to use from each input data. For integer data, the lower nbits are used.
+ * For floating point data, the leading nbits are used. So e.g. 16 float bits gives sign, 8 bits of exponent,
+ * and 7 bits of mantissa with a leading 1.
+ *
+ * The category labels in the cats matrix should be contiguous, non-negative integer labels starting with zero.
+ *
+ * For regression, discrete (integer) target values should be used in the training data. The output will be continuous
+ * values interpolated from them.
+ *
+ * Other key parameters inherited from the learner, datasource and updater:
+ - batchSize(10000): The number of samples processed in a block
+ - putBack(-1): Whether to put predictions back into the datasource target. Should be 1 for prediction.
+ - useGPU(true): Use GPU acceleration if available
+ *
+ * '''Example:'''
+ *
+ * a is an nfeats x ninstances data matrix, c is a 1 x ninstances vector of labels
+ * {{{
+ * val (nn, opts) = RandomForest.learner(a,c)
+ * opts.what // prints the available options
+ * opts.depth=25 // Set depth - something like log2(ninstances / 10) is good
+ * opts.ntrees=20 // Good starting value. Increasing this usually increases accuracy.
+ * opts.nsamps=30 // Typically sqrt(nfeats) is good. Larger values may work better.
+ * opts.nnodes // Bounded by 2^depth, but usually smaller than this.
+ * opts.ncats=10 // Its a good idea to set this - learner will try to guess it, but may get it wrong
+ * opts.nbits=10 // Number of bits to use from input data.
+ * nn.train // train the learner.
+ * nn.modelmats // get the final model (4 matrices)
+ * }}}
+ */
+
+
+
+class RandomForest(override val opts:RandomForest.Opts = new RandomForest.Options) extends Model(opts) {
+
+ val ITree = 0; val INode = 1; val JFeat = 2; val IFeat = 3; val IVFeat = 4; val ICat = 5
+
+ var nnodes = 0
+ var ntrees = 0
+ var nsamps = 0
+ var nfeats = 0
+ var nbits = 0
+ var ncats = 0
+ var seed = 0
+ var batchSize = 0
+ var blockv:SVec = null
+ var gtmpinds:GLMat = null
+ var gpiones:GIMat = null
+ var gtmpcounts:GIMat = null
+ var totals:Array[SVTree] = null
+// var tt:Array[SVec] = null
+ var nodecounts:IMat = null
+// var tflags:IMat = null
+ var itrees:IMat = null; // Index of left child (right child is at this value + 1)
+ var ftrees:IMat = null; // The feature index for this node
+ var vtrees:IMat = null; // The value to compare with for this node
+ var ctrees:FMat = null; // Majority class for this node
+ var gitrees:GIMat = null; // Index of left child (right child is at this value + 1)
+ var gftrees:GIMat = null; // The feature index for this node
+ var gvtrees:GIMat = null; // The value to compare with for this node
+ var gctrees:GMat = null;
+ var gftree:GIMat = null
+ var gitree:GIMat = null
+ var lout:LMat = null
+ var gout:GLMat = null
+ var gtnodes:GIMat = null
+ var gfnodes:GMat = null
+ var outv:IMat = null; // Threshold values returned by minImpurity
+ var outf:IMat = null; // Features returned by minImpurity
+ var outn:IMat = null; // Node numbers returned by minImpurity
+ var outg:FMat = null; // Node impurity gain returned by minImpurity
+ var outc:FMat = null; // Category label (or avg) returned by minImpurity
+ var outleft:FMat = null; // child categories returned by minImpurity
+ var outright:FMat = null
+ var jc:IMat = null
+ var xnodes:IMat = null
+ var ynodes:FMat = null
+ var gains:FMat = null
+ var igains:FMat = null;
+ val fieldlengths = izeros(1,6)
+ var gfieldlengths:GIMat = null
+ var fieldmasks:Array[Int] = null
+ var fieldshifts:Array[Int] = null
+ var t0 = 0f
+ var t1 = 0f
+ var t2 = 0f
+ var t3 = 0f;
+ var t4 = 0f
+ var t5 = 0f
+ var t6 = 0f
+ runtimes = zeros(8,1)
+ var x:Mat = null
+ var y:Mat = null
+ var useIfeats = false
+ var lens0 = 0L
+ var lens1 = 0L
+
+ @inline def rhash(v1:Int, v2:Int, v3:Int, nb:Int):Int = {
+ math.abs(MurmurHash3.mix(MurmurHash3.mix(v1, v2), v3) % nb)
+ }
+
+ @inline def rhash(v1:Int, v2:Int, v3:Int, v4:Int, nb:Int):Int = {
+ math.abs(MurmurHash3.mix(MurmurHash3.mix(MurmurHash3.mix(v1, v2), v3), v4) % nb)
+ }
+
+ @inline def packFields(itree:Int, inode:Int, jfeat:Int, ifeat:Int, ivfeat:Int, icat:Int, fieldlengths:Array[Int]):Long = {
+ icat.toLong +
+ ((ivfeat.toLong +
+ ((ifeat.toLong +
+ ((jfeat.toLong +
+ ((inode.toLong +
+ (itree.toLong << fieldlengths(INode))
+ ) << fieldlengths(JFeat))
+ ) << fieldlengths(IFeat))
+ ) << fieldlengths(IVFeat))
+ ) << fieldlengths(ICat))
+ }
+
+ @inline def unpackFields(im:Long, fieldlengths:Array[Int]):(Int, Int, Int, Int, Int, Int) = {
+ var v = im
+ val icat = (v & ((1 << fieldlengths(ICat))-1)).toInt
+ v = v >>> fieldlengths(ICat)
+ val ivfeat = (v & ((1 << fieldlengths(IVFeat))-1)).toInt
+ v = v >>> fieldlengths(IVFeat)
+ val ifeat = (v & ((1 << fieldlengths(IFeat))-1)).toInt
+ v = v >>> fieldlengths(IFeat)
+ val jfeat = (v & ((1 << fieldlengths(JFeat))-1)).toInt
+ v = v >>> fieldlengths(JFeat)
+ val inode = (v & ((1 << fieldlengths(INode))-1)).toInt
+ v = v >>> fieldlengths(INode)
+ val itree = v.toInt
+ (itree, inode, jfeat, ifeat, ivfeat, icat)
+ }
+
+ @inline def extractAbove(fieldNum : Int, packedFields : Long, fieldshifts:Array[Int]) : Int = {
+ (packedFields >>> fieldshifts(fieldNum)).toInt
+ }
+
+ @inline def extractField(fieldNum : Int, packedFields : Long, fieldshifts:Array[Int], fieldmasks:Array[Int]) : Int = {
+ (packedFields >>> fieldshifts(fieldNum)).toInt & fieldmasks(fieldNum)
+ }
+
+ def init() = {
+ mats = datasource.next
+ nfeats = mats(0).nrows
+ val nc = mats(0).ncols
+ batchSize = nc
+ datasource.reset;
+ nnodes = opts.nnodes;
+ ntrees = opts.ntrees
+ nsamps = opts.nsamps
+ nbits = opts.nbits
+ seed = opts.seed
+ useIfeats = opts.useIfeats
+ lens0 = 0
+ lens1 = 0
+ ncats = if (opts.ncats > 0) opts.ncats else (maxi(mats(1)).dv.toInt + 1)
+ fieldlengths(ITree) = RandomForest.countbits(ntrees)
+ fieldlengths(INode) = RandomForest.countbits(nnodes)
+ fieldlengths(JFeat) = RandomForest.countbits(nsamps)
+ fieldlengths(IFeat) = if (useIfeats) RandomForest.countbits(nfeats) else 0
+ fieldlengths(IVFeat) = nbits
+ fieldlengths(ICat) = RandomForest.countbits(ncats)
+ fieldmasks = getFieldMasks(fieldlengths)
+ fieldshifts = getFieldShifts(fieldlengths)
+ if (refresh) {
+ if (sum(fieldlengths).v > 63) {
+ throw new RuntimeException("RandomForest: Too many bits in treepack! "+ sum(fieldlengths).v)
+ }
+ opts.asInstanceOf[Learner.Options].npasses = opts.depth; // Make sure we make the correct number of passes
+ itrees = izeros(nnodes, ntrees)
+ ftrees = izeros(nnodes, ntrees)
+ vtrees = izeros(nnodes, ntrees)
+ ctrees = zeros(nnodes, ntrees)
+ gains = zeros(ntrees,1)
+ igains = zeros(ntrees,1)
+// tflags = izeros(ntrees,1)
+// implicit val ec = threadPool(ntrees) // make sure there are enough threads (more than the lookahead count)
+// for (i <- 0 until ntrees) Future {driver_thread(i)(ec)}
+ nodecounts = iones(ntrees, 1)
+ ctrees.set(-1)
+ ctrees(0,?) = 0
+ ftrees.set(-1)
+ setmodelmats(Array(itrees, ftrees, vtrees, ctrees))
+ // Small buffers hold results of batch treepack and sort
+ val bsize = (opts.catsPerSample * batchSize * ntrees * nsamps).toInt
+ totals = new Array[SVTree](ntrees)
+ for (i <- 0 until ntrees) totals(i) = new SVTree(20)
+// tt = new Array[SVec](ntrees)
+ outv = IMat(nsamps, nnodes)
+ outf = IMat(nsamps, nnodes)
+ outn = IMat(nsamps, nnodes)
+ outg = FMat(nsamps, nnodes)
+ outc = FMat(nsamps, nnodes)
+ outleft = FMat(nsamps, nnodes)
+ outright = FMat(nsamps, nnodes)
+ jc = IMat(1, ntrees * nnodes * nsamps)
+ lout = LMat(1, batchSize * nsamps * ntrees)
+ if (useGPU) {
+ gpiones = giones(1, bsize)
+ gtmpinds = glzeros(1, bsize)
+ gtmpcounts = gizeros(1, bsize)
+ gout = GLMat(1, batchSize * nsamps * ntrees)
+ }
+ }
+ itrees = modelmats(0).asInstanceOf[IMat]
+ ftrees = modelmats(1).asInstanceOf[IMat]
+ vtrees = modelmats(2).asInstanceOf[IMat]
+ ctrees = modelmats(3).asInstanceOf[FMat];
+ if (useGPU) {
+ gfieldlengths = GIMat(fieldlengths)
+ gtnodes = GIMat(ntrees, batchSize)
+ gfnodes = GMat(ntrees, batchSize)
+ gftree = GIMat(nnodes, 1)
+ gitree = GIMat(nnodes, 1)
+ gitrees = GIMat(itrees)
+ gftrees = GIMat(ftrees)
+ gvtrees = GIMat(vtrees)
+ gctrees = GMat(ctrees)
+ }
+ }
+
+ def dobatch(gmats:Array[Mat], ipass:Int, i:Long) = {
+ val data = full(gmats(0))
+ val cats = gmats(1)
+// val xcats = IMat(cats);println("trace data %s %f" format (xcats(0,0->10).toString, sum(data(120,?)).dv))
+
+ val t0 = toc
+// var blockv0:SVec = null
+ data match {
+ case (fdata:FMat) => {
+ val nnodes = if (gmats.length > 2) gmats(2).asInstanceOf[IMat] else izeros(ntrees, data.ncols)
+ if (gmats.length > 2) {
+ treeStep(fdata, nnodes, null, itrees, ftrees, vtrees, ctrees, false)
+ } else {
+ treeWalk(fdata, nnodes, null, itrees, ftrees, vtrees, ctrees, ipass, false)
+ }
+ t1 = toc; runtimes(0) += t1 - t0
+ cats match {
+ case (icats:IMat) => {
+ lout = treePack(fdata, nnodes, icats, lout, seed)
+ }
+ case (fcats:FMat) => {
+ lout = treePack(fdata, nnodes, fcats, lout, seed)
+ }
+ }
+ t2 = toc; runtimes(1) += t2 - t1
+ java.util.Arrays.sort(lout.data, 0, lout.length)
+ Mat.nflops += lout.length * math.log(lout.length).toLong
+ t3 = toc; runtimes(2) += t3 - t2
+ blockv = makeV(lout)
+ }
+ case (gdata:GMat) => {
+ gtreeWalk(gdata, gtnodes, gfnodes, gitrees, gftrees, gvtrees, gctrees, ipass, false);
+ t1 = toc; runtimes(0) += t1 - t0
+ cats match {
+ case (gicats:GIMat) => {
+ gout = gtreePack(gdata, gtnodes, gicats, gout, seed)
+ }
+ case (gfcats:GMat) => {
+ gout = gtreePack(gdata, gtnodes, gfcats, gout, seed)
+ }
+ }
+ t2 = toc; runtimes(1) += t2 - t1
+ gpsort(gout);
+ t3 = toc; runtimes(2) += t3 - t2
+ blockv = gmakeV(gout, gpiones, gtmpinds, gtmpcounts)
+ }
+ case _ => {
+ throw new RuntimeException("RandomForest dobatch types dont match %s %s" format (data.mytype, cats.mytype))
+ }
+ }
+ lens0 += blockv.length
+// while (mini(tflags).v > 0) Thread.`yield`
+// blockv = blockv0.copy
+// tflags.set(1)
+ val tblocks = splittableNodes(blockv)
+ lens1 += tblocks.map(_.length).reduce(_+_)
+ t4 = toc; runtimes(3) += t4 - t3
+ addSVecs(tblocks, totals)
+ t5 = toc; runtimes(4) += t5 - t4;
+ }
+
+ def evalbatch(mats:Array[Mat], ipass:Int, here:Long):FMat = {
+ val depth = if (opts.training) ipass else opts.depth
+ val data = full(gmats(0))
+ val cats = if (gmats.length > 1) gmats(1) else null
+ val nnodes:Mat = if (gmats.length > 2) gmats(2) else null
+ val fnodes:FMat = zeros(ntrees, data.ncols)
+ data match {
+ case fdata:FMat => {
+ if (nnodes.asInstanceOf[AnyRef] != null) {
+ val nn = nnodes.asInstanceOf[IMat]
+ treeStep(fdata, nn, fnodes, itrees, ftrees, vtrees, ctrees, true)
+ } else {
+ treeWalk(fdata, null, fnodes, itrees, ftrees, vtrees, ctrees, depth, true)
+ }
+ }
+ case gdata:GMat => {
+ gtreeWalk(gdata, gtnodes, gfnodes, gitrees, gftrees, gvtrees, gctrees, depth, true)
+ val gff = new GMat(fnodes.nrows, fnodes.ncols, gfnodes.data, gfnodes.realsize)
+ fnodes <-- gff
+ }
+ }
+ ynodes = fnodes
+ if (opts.regression) {
+ var mm = mean(fnodes)
+ if (ogmats != null) {
+ val pcats = if (cats.asInstanceOf[AnyRef] == null || cats.nrows == 1) mm else mm on sqrt(variance(fnodes))
+ ogmats(0) = pcats
+ }
+ if (gmats.length > 1) {
+ val diff = mm - FMat(cats)
+ if (opts.MAE) -mean(abs(diff)) else -(diff dotr diff)/diff.length
+ } else {
+ row(0)
+ }
+ } else {
+ val mm = tally(fnodes)
+ if (ogmats != null) {
+ ogmats(0) = mm
+ }
+ if (gmats.length > 1) {
+ -mean(FMat(mm != IMat(cats)))
+ } else {
+ row(0)
+ }
+ }
+ }
+
+ def tally(nodes:FMat):IMat = {
+ val tallys = izeros(ncats, 1)
+ val best = izeros(1, nodes.ncols)
+ var i = 0
+ while (i < nodes.ncols) {
+ var j = 0
+ var maxind = -1
+ var maxv = -1
+ tallys.clear
+ while (j < nodes.nrows) {
+ val ct = nodes.data(j + i * nodes.nrows).toInt
+ tallys.data(ct) += 1
+ if (tallys.data(ct) > maxv) {
+ maxv = tallys.data(ct)
+ maxind = ct
+ }
+ j += 1
+ }
+ best.data(i) = maxind
+ i += 1
+ }
+ best
+ }
+
+ def tallyv(nodes:FMat):FMat = {
+ mean(nodes)
+ }
+
+ override def updatePass(ipass:Int) = {
+// while (mini(tflags).v > 0) Thread.`yield`
+// tflags.set(2)
+ val tt = getSum(totals)
+ t6 = toc
+ runtimes(5) += t6 - t5
+// while (mini(tflags).v > 0) Thread.`yield`
+ var itree = 0
+ var impure = 0.0
+ while (itree < ntrees) {
+ val totalinds = tt(itree).inds
+ val totalcounts = tt(itree).counts
+ val (jc0, jtree) = findBoundaries(totalinds, jc)
+ t0 = toc
+ val (gg, ifrac) = minImpurity(totalinds, totalcounts, outv, outf, outn, outg, outc, outleft, outright, jc0, jtree, itree, opts.impurity, opts.regression)
+ impure += ifrac
+ t1 = toc
+ runtimes(6) += t1 - t0
+ val (vm, im) = maxi2(gg); // Find feats with maximum -impurity gain
+ val inds = im.t + icol(0->im.length) * gg.nrows; // Turn into an index for the "out" matrices
+ val inodes = outn(inds); // get the node indices
+ ctrees(inodes, itree) = outc(inds); // Save the node class for these nodes
+ vtrees(inodes, itree) = outv(inds); // Threshold values
+ val reqgain = opts.gain
+ val igain = find(vm > reqgain); // find nodes above the impurity gain threshold
+ gains(itree) = if (vm.length>0) mean(vm).v else 0
+ igains(itree) = igain.length
+ if (igain.length > 0) {
+ val inn = inodes(igain)
+ val igg = inds(igain)
+ val ifff = outf(igg)
+ if (! useIfeats) jfeatsToIfeats(itree, inn, ifff, seed, gitree, gftree)
+ ftrees(inn, itree) = ifff; // Set the threshold features
+ val ibase = nodecounts(itree)
+ itrees(inn, itree) = icol(ibase until (ibase + 2 * igain.length) by 2); // Create indices for new child nodes
+ nodecounts(itree) += 2 * igain.length; // Update node counts for this tree
+ tochildren(itree, inn, outleft(igg), outright(igg)); // Save class ids to children in case we don't visit them later
+ }
+ itree += 1
+ t2 = toc
+ runtimes(7) += t2 - t1
+ }
+ if (useGPU) {
+ gitrees <-- itrees
+ gftrees <-- ftrees
+ gvtrees <-- vtrees
+ gctrees <-- ctrees
+ }
+ seed = opts.seed + 341211*(ipass+1)
+ println("purity gain %5.4f, fraction impure %4.3f, nnew %2.1f, nnodes %2.1f" format (mean(gains).v, lens1*1f/lens0, 2*mean(igains).v, mean(FMat(nodecounts)).v))
+ lens0 = 0
+ lens1 = 0
+// if (ipass == opts.depth-1) tflags.set(-1)
+ }
+
+ def tochildren(itree:Int, inodes:IMat, left:FMat, right:FMat) {
+ var i = 0
+ while (i < inodes.length) {
+ val inode = inodes(i)
+ val itr = itrees(inode, itree)
+ if (itr+1 >= nnodes) {
+ throw new RuntimeException("Tree %d size exceeds the node limit %d, try increasing nnodes or reducing depth" format (itree, nnodes))
+ }
+ ctrees(itr, itree) = left(i)
+ ctrees(itr+1, itree) = right(i)
+ i += 1
+ }
+
+ }
+
+
+ def getFieldShifts(fL : IMat) : Array[Int]= {
+ val out = new Array[Int](fL.length)
+ var i = fL.length - 2
+ while (i >= 0) {
+ out(i) = out(i+1) + fL(i+1)
+ i -= 1
+ }
+ out
+ }
+
+ def getFieldMasks(fL : IMat) : Array[Int] = {
+ val out = new Array[Int](fL.length)
+ var i = 0
+ while (i < fL.length) {
+ out(i) = (1 << fL(i)) - 1
+ i += 1
+ }
+ out
+ }
+
+ final val signbit:Int = 1 << 31
+ final val magnitude:Int = signbit - 1
+
+ @inline def floatConvert(a:Float):Int = {
+ val vmask = fieldmasks(4)
+ val fshift = 32 - fieldlengths(4)
+ var ai = java.lang.Float.floatToRawIntBits(a)
+ if ((ai & signbit) > 0) {
+ ai = -(ai & magnitude)
+ }
+ ai += signbit
+ (ai >> fshift) & vmask
+ }
+
+ @inline def floatConvert2(a:Float):Int = {
+ a.toInt
+ }
+
+ def treePack(fdata:FMat, treenodes:IMat, cats:IMat, out:LMat, seed:Int):LMat = {
+ val nfeats = fdata.nrows
+ val nitems = fdata.ncols
+ val ntrees = treenodes.nrows
+ val ionebased = Mat.ioneBased
+ var icolx = 0
+ var nxvals = 0
+ while (icolx < nitems) {
+ var itree = 0
+ while (itree < ntrees) {
+ val inode0 = treenodes(itree, icolx)
+ val inode = inode0 & magnitude
+ val isign = ((inode0 & signbit) ^ signbit).toLong << 32
+ if (inode >= 0) {
+ var jfeat = 0
+ while (jfeat < nsamps) {
+ val ifeat = rhash(seed, itree, inode, jfeat, nfeats)
+ val ivfeat = floatConvert(fdata(ifeat, icolx))
+ val ic = cats(icolx)
+ out.data(nxvals) = packFields(itree, inode, jfeat, if (useIfeats) ifeat else 0, ivfeat, ic, fieldlengths.data) | isign
+ nxvals += 1
+ jfeat += 1
+ }
+ }
+ itree += 1
+ }
+ icolx += 1
+ }
+ Mat.nflops += 50L * nxvals
+ new LMat(nxvals, 1, out.data)
+ }
+
+ def treePack(fdata:FMat, treenodes:IMat, fcats:FMat, out:LMat, seed:Int):LMat = {
+ val nfeats = fdata.nrows
+ val nitems = fdata.ncols
+ val ntrees = treenodes.nrows
+ val ionebased = Mat.ioneBased
+ var icolx = 0
+ var nxvals = 0
+ while (icolx < nitems) {
+ var itree = 0
+ while (itree < ntrees) {
+ val inode0 = treenodes(itree, icolx)
+ val inode = inode0 & magnitude
+ val isign = ((inode0 & signbit) ^ signbit).toLong << 32
+ if (inode >= 0) {
+ var jfeat = 0
+ while (jfeat < nsamps) {
+ val ifeat = rhash(seed, itree, inode, jfeat, nfeats)
+ val ivfeat = floatConvert(fdata(ifeat, icolx))
+ val ic = fcats(icolx).toInt
+ out.data(nxvals) = packFields(itree, inode, jfeat, if (useIfeats) ifeat else 0, ivfeat, ic, fieldlengths.data) | isign
+ nxvals += 1
+ jfeat += 1
+ }
+ }
+ itree += 1
+ }
+ icolx += 1
+ }
+ Mat.nflops += 50L * nxvals
+ new LMat(nxvals, 1, out.data)
+ }
+
+ def treeStep(fdata:FMat, tnodes:IMat, fnodes:FMat, itrees:IMat, ftrees:IMat, vtrees:IMat, ctrees:FMat, getcat:Boolean) {
+ val nfeats = fdata.nrows
+ val nitems = fdata.ncols
+ val ntrees = tnodes.nrows
+ var icol = 0
+ while (icol < nitems) {
+ var itree = 0
+ while (itree < ntrees) {
+ var inode = tnodes(itree, icol)
+ val ileft = itrees(inode, itree)
+ if (ileft >= 0) { // Has children so step down
+ val ifeat = ftrees(inode, itree)
+ val ithresh = vtrees(inode, itree)
+ val ivfeat = floatConvert(fdata(ifeat, icol))
+ if (ivfeat > ithresh) {
+ inode = ileft + 1
+ } else {
+ inode = ileft
+ }
+ }
+ if (getcat) {
+ fnodes(itree, icol) = ctrees(inode, itree)
+ } else {
+ tnodes(itree, icol) = inode
+ }
+ itree += 1
+ }
+ icol += 1
+ }
+ Mat.nflops += 1L * nitems * ntrees;
+ }
+
+ def treeWalk(fdata:FMat, tnodes:IMat, fnodes:FMat, itrees:IMat, ftrees:IMat, vtrees:IMat, ctrees:FMat, depth:Int, getcat:Boolean) = {
+ val nfeats = fdata.nrows
+ val nitems = fdata.ncols
+ var icol = 0
+ while (icol < nitems) {
+ var itree = 0
+ while (itree < ntrees) {
+ var inode = 0
+ var id = 0
+ while (id < depth) {
+ val ileft = itrees(inode, itree)
+ val ithresh = vtrees(inode, itree)
+ if (ileft == 0) { // This is a leaf, so
+ id = depth; // just skip out of the loop
+ if (ithresh == -2) { // this node is not splittable
+ inode = inode | signbit; // so mark it negative
+ }
+ } else {
+ val ifeat = ftrees(inode, itree); // Test this node and branch
+ val ivfeat = floatConvert(fdata(ifeat, icol))
+ if (ivfeat > ithresh) {
+ inode = ileft + 1
+ } else {
+ inode = ileft
+ }
+ }
+ id += 1
+ }
+ if (getcat) {
+ fnodes(itree, icol) = ctrees(inode & magnitude, itree)
+ } else {
+ tnodes(itree, icol) = inode
+ }
+ itree += 1
+ }
+ icol += 1
+ }
+ Mat.nflops += 1L * nitems * ntrees * depth
+ fnodes
+ }
+
+ def gtreeWalk(fdata:GMat, tnodes:GIMat, fnodes:GMat, itrees:GIMat, ftrees:GIMat, vtrees:GIMat, ctrees:GMat, depth:Int, getcat:Boolean) = {
+ val nrows = fdata.nrows
+ val ncols = fdata.ncols
+ Mat.nflops += 1L * ncols * ntrees * depth
+ val err = CUMACH.treeWalk(fdata.data, tnodes.data, fnodes.data, itrees.data, ftrees.data, vtrees.data, ctrees.data,
+ nrows, ncols, ntrees, nnodes, if (getcat) 1 else 0, nbits, depth)
+ if (err != 0) {throw new RuntimeException("gtreeWalk: error " + cudaGetErrorString(err))}
+ }
+
+ def gtreeStep(gdata:GMat, tnodes:GIMat, fnodes:GMat, itrees:GIMat, ftrees:GIMat, vtrees:GIMat, ctrees:GMat, getcat:Boolean) {}
+
+ def gmakeV(keys:GLMat, vals:GIMat, tmpkeys:GLMat, tmpcounts:GIMat):SVec = {
+ val (ginds, gcounts) = GLMat.collectLVec(keys, vals, tmpkeys, tmpcounts)
+ Mat.nflops += 1L * keys.length
+ val ovec = SVec(ginds.length)
+ ovec.inds <-- ginds
+ ovec.counts <-- gcounts
+ ovec
+ }
+
+ def makeV(ind:LMat):SVec = {
+ Mat.nflops += ind.length
+ val n = ind.length
+ val indd = ind.data
+ var ngroups = 0
+ var i = 1
+ while (i <= n) {
+ if (i == n || indd(i) != indd(i-1)) {
+ ngroups += 1
+ }
+ i += 1
+ }
+ val ovec = SVec(ngroups)
+ val okeys = ovec.inds.data
+ val ovals = ovec.counts.data
+ var cc = 0
+ ngroups = 0
+ i = 1
+ while (i <= n) {
+ cc += 1
+ if (i == n || indd(i) != indd(i-1)) {
+ okeys(ngroups) = indd(i-1)
+ ovals(ngroups) = cc
+ ngroups += 1
+ cc = 0
+ }
+ i += 1
+ }
+ ovec
+ }
+
+ def countV(ind1:LMat, counts1:IMat, ind2:LMat, counts2:IMat):Int = {
+ var count = 0
+ val n1 = counts1.length
+ val n2 = counts2.length
+ var i1 = 0
+ var i2 = 0
+ while (i1 < n1 || i2 < n2) {
+ if (i1 >= n1 || (i2 < n2 && ind2(i2) < ind1(i1))) {
+ count += 1
+ i2 += 1
+ } else if (i2 >= n2 || (i1 < n1 && ind1(i1) < ind2(i2))) {
+ count += 1
+ i1 += 1
+ } else {
+ count += 1
+ i1 += 1
+ i2 += 1
+ }
+ }
+ return count
+ }
+
+ // Add a short sparse Lvector (first arg) to a short one (2nd arg). Reuses the storage of the long vector.
+
+ def addV(ind1:LMat, counts1:IMat, ind2:LMat, counts2:IMat):(LMat, IMat) = {
+ if (ind1.length + ind2.length > ind2.data.length) {
+ throw new RuntimeException("temporary sparse Long storage too small %d %d" format (ind1.length+ind2.length, ind2.data.length))
+ }
+ val offset = ind1.length
+ var i = ind2.length - 1
+ while (i >= 0) {
+ ind2.data(i + offset) = ind2.data(i)
+ counts2.data(i + offset) = counts2.data(i)
+ i -= 1
+ }
+ var count = 0
+ var i1 = 0
+ val n1 = ind1.length
+ var i2 = offset
+ val n2 = ind2.length + offset
+ while (i1 < n1 || i2 < n2) {
+ if (i1 >= n1 || (i2 < n2 && ind2.data(i2) < ind1.data(i1))) {
+ ind2.data(count) = ind2.data(i2)
+ counts2.data(count) = counts2.data(i2)
+ count += 1
+ i2 += 1
+ } else if (i2 >= n2 || (i1 < n1 && ind1.data(i1) < ind2.data(i2))) {
+ ind2.data(count) = ind1.data(i1)
+ counts2.data(count) = counts1.data(i1)
+ count += 1
+ i1 += 1
+ } else {
+ ind2.data(count) = ind1.data(i1)
+ counts2.data(count) = counts1.data(i1) + counts2.data(i2)
+ count += 1
+ i1 += 1
+ i2 += 1
+ }
+ }
+ (new LMat(1, count, ind2.data), new IMat(1, count, counts2.data))
+ }
+
+ def gaddV(gix:GLMat, gcx:GIMat, gmidinds:GLMat, gmidcounts:GIMat, gmergedinds:GLMat, gmergedcounts:GIMat):(GLMat, GIMat) = {
+ val (ai, ac) = GLMat.mergeLVecs(gix, gcx, gmidinds, gmidcounts, gmergedinds, gmergedcounts)
+ GLMat.collectLVec(ai, ac, gmidinds, gmidcounts)
+ }
+
+ def copyinds(inds:LMat, tmp:LMat) = {
+ val out = new LMat(inds.length, 1, tmp.data)
+ out <-- inds
+ out
+ }
+
+ def copycounts(cnts:IMat, tmpc:IMat) = {
+ val out = new IMat(cnts.length, 1, tmpc.data)
+ out <-- cnts
+ out
+ }
+
+ def gtreePack(fdata:FMat, tnodes:IMat, icats:IMat, gout:GLMat, seed:Int):GLMat ={
+ val nrows = fdata.nrows
+ val ncols = fdata.ncols
+ val nxvals = ncols * ntrees * nsamps
+ Mat.nflops += 1L * nxvals
+ val gdata = GMat(fdata)
+ val gcats = GIMat(icats)
+ cudaMemcpy(gtnodes.data, Pointer.to(tnodes.data), ncols*ntrees*Sizeof.INT, cudaMemcpyHostToDevice)
+ cudaDeviceSynchronize()
+ var err = cudaGetLastError
+ if (err != 0) {throw new RuntimeException("fgtreePack: error " + cudaGetErrorString(err))}
+ err= CUMACH.treePack(gdata.data, gtnodes.data, gcats.data, gout.data, gfieldlengths.data, nrows, ncols, ntrees, nsamps, seed)
+ if (err != 0) {throw new RuntimeException("fgtreePack: error " + cudaGetErrorString(err))}
+ new GLMat(1, nxvals, gout.data, gout.realsize)
+ }
+
+ def gtreePack(gdata:GMat, gtnodes:GIMat, gcats:GIMat, gout:GLMat, seed:Int):GLMat ={
+ val nrows = gdata.nrows
+ val ncols = gdata.ncols
+ val nxvals = ncols * ntrees * nsamps
+ Mat.nflops += 1L * nxvals
+ val err= CUMACH.treePack(gdata.data, gtnodes.data, gcats.data, gout.data, gfieldlengths.data, nrows, ncols, ntrees, nsamps, seed)
+ if (err != 0) {throw new RuntimeException("gtreePack: error " + cudaGetErrorString(err))}
+ new GLMat(1, nxvals, gout.data, gout.realsize)
+ }
+
+ def gtreePack(gdata:GMat, gtnodes:GIMat, gcats:GMat, gout:GLMat, seed:Int):GLMat ={
+ val nrows = gdata.nrows
+ val ncols = gdata.ncols
+ val nxvals = ncols * ntrees * nsamps
+ Mat.nflops += 1L * nxvals
+ val err= CUMACH.treePackfc(gdata.data, gtnodes.data, gcats.data, gout.data, gfieldlengths.data, nrows, ncols, ntrees, nsamps, seed)
+ if (err != 0) {throw new RuntimeException("gtreePack: error " + cudaGetErrorString(err))}
+ new GLMat(1, nxvals, gout.data, gout.realsize)
+ }
+
+ def gpsort(gout:GLMat) = {
+ val nxvals = gout.length
+ Mat.nflops += 2L * nxvals * math.log(nxvals).toInt
+ val err = CUMAT.lsort(gout.data, nxvals, 1)
+ if (err != 0) {throw new RuntimeException("gpsort: error " + cudaGetErrorString(err))}
+ cudaDeviceSynchronize()
+ }
+
+ def jfeatsToIfeats(itree:Int, inodes:IMat, ifeats:IMat, seed:Int, gitree:GIMat, gftree:GIMat) {
+ if (useGPU) {
+ gjfeatsToIfeats(itree, inodes, ifeats, seed, gitree, gftree)
+ } else {
+ val len = inodes.length
+ var i = 0
+ while (i < len) {
+ val inode = inodes.data(i)
+ val jfeat = ifeats.data(i)
+ val ifeat = rhash(seed, itree, inode, jfeat, nfeats)
+ ifeats(i) = ifeat
+ i += 1
+ }
+ }
+ }
+
+ def gjfeatsToIfeats(itree:Int, inodes:IMat, ifeats:IMat, seed:Int, gitree:GIMat, gftree:GIMat) {
+ val len = inodes.length
+ val gi = new GIMat(inodes.nrows, inodes.ncols, gitree.data, gitree.realsize)
+ val gf = new GIMat(ifeats.nrows, ifeats.ncols, gftree.data, gftree.realsize)
+ gi <-- inodes
+ gf <-- ifeats
+ val err = CUMACH.jfeatsToIfeats(itree, gi.data, gf.data, gf.data, len, nfeats, seed)
+ if (err != 0) {throw new RuntimeException("gjfeatsToIfeats: error " + cudaGetErrorString(err))}
+ ifeats <-- gf
+ }
+
+/* def driver_thread(i:Int)(implicit ec:ExecutionContextExecutor) = {
+ while (tflags(i) >= 0) {
+ while (tflags(i) == 0) Thread.`yield`
+ if (tflags(i) == 1) {
+ val t3 = toc
+ val sp = splittableNodes_thread(blockv, i)
+ val t4 = toc;
+ runtimes(3) += t4 - t3
+ totals(i).addSVec(sp)
+ val t5 = toc
+ lens1 += sp.length
+ runtimes(4) += t5 - t4
+ tflags(i) == 0
+ } else if (tflags(i) == 2) {
+ val t5 = toc
+ tt(i) = totals(i).getSum
+ val t6 = toc
+ runtimes(5) += t6 - t5
+ tflags(i) == 0
+ }
+ }
+ } */
+
+ def splittableNodes(blockv:SVec):Array[SVec] = {
+ (0 until ntrees).par.map(i => {splittableNodes_thread(blockv, i);}).toArray
+ }
+
+ def splittableNodes_thread(blockv:SVec, itree:Int):SVec = {
+ val keys = blockv.inds.data
+ val istart = findIndex(blockv, itree)
+ val iend = findIndex(blockv, itree+1)
+ val out = SVec(iend - istart)
+ val body = (1L << 63) - 1
+ var i = istart
+ var j = 0
+ while (i < iend) {
+ var ki = keys(i)
+ ki = ki & body
+ val itree = extractField(ITree, ki, fieldshifts, fieldmasks)
+ out.inds.data(j) = ki
+ out.counts.data(j) = blockv.counts.data(i)
+ j += 1
+ i += 1
+ }
+ out
+ }
+
+ def findIndex(blockv:SVec, itree:Int):Int = {
+ val keys = blockv.inds.data
+ var istart = 0
+ var iend = blockv.length
+ val lsign = 1L << 63
+ while (iend - istart > 1) {
+ var mid = (istart + iend)/2
+ val key = keys(mid)
+ val ktree = if ((key & lsign) != 0) extractField(ITree, key, fieldshifts, fieldmasks) else ntrees
+ if (itree <= ktree) iend = mid else istart = mid
+ }
+ val key = keys(istart)
+ val ktree = if ((key & lsign) != 0) extractField(ITree, key, fieldshifts, fieldmasks) else ntrees
+ if (itree <= ktree) istart else iend
+ }
+
+ // Find boundaries where JFeat or ITree changes
+
+ def findBoundaries(keys:LMat, jc:IMat):(IMat,IMat) = {
+ val fieldshifts = getFieldShifts(fieldlengths)
+ val fshift = fieldshifts(JFeat)
+ val tshift = fieldshifts(ITree)
+ val tmat = izeros(ntrees+1,1)
+ var oldv = -1L
+ var v = -1
+ var t = 0
+ var nt = 0
+ var i = 0
+ var n = 0
+ while (i < keys.length) {
+ v = extractAbove(JFeat, keys(i), fieldshifts)
+ t = (keys(i) >>> tshift).toInt
+ while (t > nt) {
+ tmat(nt+1) = n
+ nt += 1
+ }
+ if (oldv != v) {
+ jc(n) = i
+ n += 1
+ oldv = v
+ }
+ i += 1
+ }
+ jc(n) = i
+ while (ntrees > nt) {
+ tmat(nt+1) = n
+ nt += 1
+ }
+ n += 1
+ if ((n-1) % nsamps != 0) throw new RuntimeException("boundaries %d not a multiple of nsamps %d" format (n-1, nsamps))
+ (new IMat(n, 1, jc.data), tmat)
+ }
+
+ trait imptyType {
+ val update: (Int)=>Double
+ val result: (Double, Int)=>Double
+ val combine: (Double, Double, Int, Int) => Double
+ }
+
+ object entImpurity extends imptyType {
+ def updatefn(a:Int):Double = { val v = math.max(a,1); v * math.log(v) }
+ def resultfn(acc:Double, tot:Int):Double = { val v = math.max(tot,1); math.log(v) - acc / v }
+ def combinefn(ent1:Double, ent2:Double, tot1:Int, tot2:Int):Double = { (ent1 * tot1 + ent2 * tot2)/math.max(1, tot1 + tot2) }
+ val update = updatefn _
+ val result = resultfn _
+ val combine = combinefn _
+ }
+
+ object giniImpurity extends imptyType {
+ def updatefn(a:Int):Double = { val v = a.toDouble; v * v }
+ def resultfn(acc:Double, tot:Int) = { val v = math.max(tot,1).toDouble; 1f - acc / (v * v) }
+ def combinefn(ent1:Double, ent2:Double, tot1:Int, tot2:Int):Double = { (ent1 * tot1 + ent2 * tot2)/math.max(1, tot1 + tot2) }
+ val update = updatefn _
+ val result = resultfn _
+ val combine = combinefn _
+ }
+
+ /*object varImpurity extends imptyType {
+ def updatefn(a:Int):Double = { val v = a; v * v }
+ def resultfn(acc:Double, tot:Int, n:Int):Double = {val v:Double = tot; acc - v*v/n }
+ def combinefn(a1:Double, a2:Double, tot1:Int, tot2:Int, n1:Int, n2:Int):Double = {
+ val n = n1+n2; val tot:Double = tot1 + tot2; (a1 + a2 - tot*tot/n)/n }
+ val update = updatefn _
+ val result = resultfn _
+ val combine = combinefn _
+ }*/
+
+ def regressVar(sumsq:Double, tott:Int, acc:Double, tot:Int, acct:Double, tot2:Int):Double = {
+ (sumsq - (acc * acc / tot + acct * acct / tot2)) / tott
+ }
+
+ val imptyFunArray = Array[imptyType](entImpurity,giniImpurity)
+
+ // Pass in one of the two object above as the last argument (imptyFns) to control the impurity
+ // outv should be an nsamps * nnodes array to hold the feature threshold value
+ // outf should be an nsamps * nnodes array to hold the feature index
+ // outg should be an nsamps * nnodes array holding the impurity gain (use maxi2 to get the best)
+ // jc should be a zero-based array that points to the start and end of each group of fixed node, jfeat
+
+ def minImpurityx(keys:LMat, cnts:IMat, outv:IMat, outf:IMat, outn:IMat, outg:FMat, outc:FMat, outleft:FMat, outright:FMat,
+ jc:IMat, jtree:IMat, itree:Int, fnum:Int, regression:Boolean):(FMat, Double) = {
+ minImpurity_thread(keys, cnts, outv, outf, outn, outg, outc, outleft, outright, jc, jtree, itree, fnum, regression, 0, 1)
+ }
+
+ def minImpurity(keys:LMat, cnts:IMat, outv:IMat, outf:IMat, outn:IMat, outg:FMat, outc:FMat, outleft:FMat, outright:FMat,
+ jc:IMat, jtree:IMat, itree:Int, fnum:Int, regression:Boolean):(FMat, Double) = {
+ val nthreads = 1 + (Mat.numThreads - 1)/2
+ val fm = new Array[FMat](nthreads)
+ val impure = DMat(1, nthreads)
+ (0 until nthreads).par.foreach(i => {
+ val (f, im) = minImpurity_thread(keys, cnts, outv, outf, outn, outg, outc, outleft, outright, jc, jtree, itree, fnum, regression, i, nthreads)
+ fm(i) = f
+ impure(i) = im
+ })
+ (fm(0), mean(impure).v)
+ }
+
+ def minImpurity_thread(keys:LMat, cnts:IMat, outv:IMat, outf:IMat, outn:IMat, outg:FMat, outc:FMat, outleft:FMat, outright:FMat,
+ jc:IMat, jtree:IMat, itree:Int, fnum:Int, regression:Boolean, ithread:Int, nthreads:Int):(FMat, Double) = {
+
+ val update = imptyFunArray(fnum).update
+ val result = imptyFunArray(fnum).result
+ val combine = imptyFunArray(fnum).combine
+
+ val totcounts = izeros(1,ncats)
+ val counts = izeros(1,ncats)
+ val fieldshifts = getFieldShifts(fieldlengths)
+ val fieldmasks = getFieldMasks(fieldlengths)
+
+ var j = 0
+ var tot = 0
+ var tott = 0
+ var acc = 0.0
+ var acct = 0.0
+ var i = ithread
+ val todo = jtree(itree+1) - jtree(itree)
+ Mat.nflops += todo * 4L * 10
+ var all = 0.0
+ var impure = 0.0
+ while (i < todo) {
+ val jci = jc(i + jtree(itree))
+ val jcn = jc(i + jtree(itree) + 1)
+
+ totcounts.clear
+ counts.clear
+ tott = 0
+ j = jci
+ var maxcnt = -1
+ var imaxcnt = -1
+ var totcats = 0.0
+ var sumsq = 0.0
+ while (j < jcn) { // First get the total counts for each group, and the most frequent cat
+ val key = keys(j)
+ val cnt = cnts(j)
+ val icat = extractField(ICat, key, fieldshifts, fieldmasks)
+ val newcnt = totcounts(icat) + cnt
+ totcounts(icat) = newcnt
+ totcats += 1.0 * cnt * icat
+ sumsq += 1.0 * icat * icat * cnt
+ tott += cnt
+ if (newcnt > maxcnt) {
+ maxcnt = newcnt
+ imaxcnt = icat
+ }
+ j += 1
+ }
+ val inode = extractField(INode, keys(jci), fieldshifts, fieldmasks)
+ val ifeat = extractField(if (useIfeats) IFeat else JFeat, keys(jci), fieldshifts, fieldmasks)
+ var minImpty = 0.0
+ var lastImpty = 0.0
+ var nodeImpty = 0.0
+ var partv = -2; // Will pass through for pure nodes
+ var lastkey = -1L
+ var jmaxcnt = 0
+ var kmaxcnt = 0
+ all += tott
+ var lefttotcats = 0.0
+ var lefttot = 0
+ if (maxcnt < tott) { // This is not a pure node
+ partv = -1
+ impure += tott
+ acct = 0
+ // println("totcounts "+totcounts.toString)
+ j = 0
+ if (regression) { // Get the impurity for the node
+ acct = totcats
+ val mmean = totcats / tott
+ nodeImpty = sumsq / tott - mmean * mmean
+ } else {
+ while (j < ncats) {
+ acct += update(totcounts(j))
+ j += 1
+ }
+ nodeImpty = result(acct, tott)
+ }
+ totcats = 0.0
+ var lastival = -1
+ minImpty = nodeImpty
+ lastImpty = Double.MaxValue
+ acc = 0
+ tot = 0
+ j = jci
+ maxcnt = -1
+ var jmax = j
+
+ while (j < jcn) {
+ val key = keys(j)
+ val cnt = cnts(j)
+ val ival = extractField(IVFeat, key, fieldshifts, fieldmasks)
+ val icat = extractField(ICat, key, fieldshifts, fieldmasks)
+
+ if (j > jci && ival != lastival) {
+ if (regression) {
+ lastImpty = regressVar(sumsq, tott, acc, tot, acct, tott - tot)
+ } else {
+ lastImpty = combine(result(acc, tot), result(acct, tott - tot), tot, tott - tot); // Dont compute every time!
+ }
+ if (lastImpty < minImpty) {
+ minImpty = lastImpty
+ partv = lastival
+ jmax = j
+ lefttotcats = totcats
+ lefttot = tot
+ }
+ }
+ val oldcnt = counts(icat)
+ val newcnt = oldcnt + cnt
+ counts(icat) = newcnt
+ if (newcnt > maxcnt) {
+ maxcnt = newcnt
+ jmaxcnt = icat
+ }
+ val oldcntt = totcounts(icat) - oldcnt
+ val newcntt = totcounts(icat) - newcnt
+ tot += cnt
+ if (regression) {
+ acc += 1.0 * icat * cnt
+ acct -= 1.0 * icat * cnt
+ } else {
+ acc += update(newcnt) - update(oldcnt)
+ acct += update(newcntt) - update(oldcntt)
+ }
+ totcats += cnt * icat
+ lastkey = key
+ lastival = ival
+ j += 1
+ }
+ if (! regression) {
+ counts.clear
+ maxcnt = -1
+ while (j > jmax) {
+ j -= 1
+ val key = keys(j)
+ val cnt = cnts(j)
+ val ival = extractField(IVFeat, key, fieldshifts, fieldmasks)
+ val icat = extractField(ICat, key, fieldshifts, fieldmasks)
+ val oldcnt = counts(icat)
+ val newcnt = oldcnt + cnt
+ counts(icat) = newcnt
+ if (newcnt > maxcnt) {
+ maxcnt = newcnt
+ kmaxcnt = icat
+ }
+ }
+ }
+// lastImpty = combine(result(acc, tot), result(acct, tott - tot), tot, tott - tot); // For checking
+ }
+// println("Impurity %f, %f, min %f, %d, %d" format (nodeImpty, lastImpty, minImpty, partv, ifeat))
+ outv(i) = partv
+ outg(i) = (nodeImpty - minImpty).toFloat
+ outf(i) = ifeat
+ if (regression) {
+ val defv = if (tott > 0) totcats.toFloat / tott else ncats/2.0f
+ outc(i) = defv
+ outleft(i) = if (lefttot > 0) lefttotcats.toFloat / lefttot else defv
+ outright(i) = if (tott - lefttot > 0) (totcats - lefttotcats) / (tott - lefttot) else defv
+ } else {
+ outc(i) = imaxcnt
+ outleft(i) = jmaxcnt
+ outright(i) = kmaxcnt
+ }
+ outn(i) = inode
+ i += nthreads
+ }
+ if (opts.trace > 0) println("fraction of impure nodes %f" format impure/all)
+ (new FMat(nsamps, todo/nsamps, outg.data), impure/all)
+ }
+
+ override def save(fname:String) = {
+ saveIMat(fname+"itrees.imat.lz4", itrees)
+ saveIMat(fname+"ftrees.imat.lz4", ftrees)
+ saveIMat(fname+"vtrees.imat.lz4", vtrees)
+ saveFMat(fname+"ctrees.fmat.lz4", ctrees)
+ }
+
+ override def load(fname:String) = {
+ itrees = loadIMat(fname+"itrees.imat.lz4")
+ ftrees = loadIMat(fname+"ftrees.imat.lz4")
+ vtrees = loadIMat(fname+"vtrees.imat.lz4")
+ ctrees = loadFMat(fname+"ctrees.fmat.lz4")
+ }
+
+ def addSVecs(a:Array[SVec], totals:Array[SVTree]) {
+ (0 until ntrees).par.foreach(i => {totals(i).addSVec(a(i));})
+ }
+
+ def getSum(totals:Array[SVTree]):Array[SVec] = {
+ (0 until ntrees).par.map(i => {totals(i).getSum;}).toArray
+ }
+
+}
+
+class SVec(val inds:LMat, val counts:IMat) {
+
+ def length = inds.length
+
+ def add(b:SVec):SVec = {
+
+ val inds1 = inds.data
+ val counts1 = counts.data
+ val inds2 = b.inds.data
+ val counts2 = b.counts.data
+
+ var count = 0
+ var i1 = 0
+ val n1 = length
+ var i2 = 0
+ val n2 = b.length
+ // First calculate the output size
+ while (i1 < n1 || i2 < n2) {
+ if (i1 >= n1 || (i2 < n2 && inds2(i2) < inds1(i1))) {
+ count += 1
+ i2 += 1
+ } else if (i2 >= n2 || (i1 < n1 && inds1(i1) < inds2(i2))) {
+ count += 1
+ i1 += 1
+ } else {
+ count += 1
+ i1 += 1
+ i2 += 1
+ }
+ }
+ // now make the output vector
+ val out = SVec(count)
+ val inds3 = out.inds.data
+ val counts3 = out.counts.data
+ count = 0
+ i1 = 0
+ i2 = 0
+ while (i1 < n1 || i2 < n2) {
+ if (i1 >= n1 || (i2 < n2 && inds2(i2) < inds1(i1))) {
+ inds3(count) = inds2(i2)
+ counts3(count) = counts2(i2)
+ count += 1
+ i2 += 1
+ } else if (i2 >= n2 || (i1 < n1 && inds1(i1) < inds2(i2))) {
+ inds3(count) = inds1(i1)
+ counts3(count) = counts1(i1)
+ count += 1
+ i1 += 1
+ } else {
+ inds3(count) = inds1(i1)
+ counts3(count) = counts1(i1) + counts2(i2)
+ count += 1
+ i1 += 1
+ i2 += 1
+ }
+ }
+ out
+ }
+
+ def copy = {
+ val inds2 = inds.copy
+ val counts2 = counts.copy
+ new SVec(inds2, counts2)
+ }
+
+ def checkInds = {
+ var i = 0
+ val len = length
+ val ii = inds.data
+ while (i < len - 1) {
+ if (ii(i) > ii(i+1)) {
+ throw new RuntimeException("bad order %d %d %d" format (i, ii(i), ii(i+1)))
+ }
+ i += 1
+ }
+ }
+}
+
+class SVTree(val n:Int) {
+ val tree = new Array[SVec](n)
+
+ def showTree = {
+ var i = 0
+ while (i < n) {
+ if (tree(i) != null) {
+ print(" %d" format tree(i).length)
+ } else {
+ print(" 0")
+ }
+ i += 1
+ }
+ println("")
+ }
+
+ def addSVec(a:SVec) = {
+ var here = a
+ var i = 0
+ while (tree(i) != null) {
+ here = tree(i).add(here)
+ tree(i) = null
+ i += 1
+ }
+ tree(i) = here
+ }
+
+ def getSum:SVec = {
+ var i = 0
+ var here:SVec = null
+ while (i < n && tree(i) == null) {
+ i += 1
+ }
+ if (i < n) {
+ here = tree(i)
+ tree(i) = null
+ }
+ i += 1
+ while (i < n) {
+ if (tree(i) != null) {
+ here = tree(i).add(here)
+ tree(i) = null
+ }
+ i += 1
+ }
+ here
+ }
+}
+
+object SVec {
+ def apply(n:Int):SVec = {
+ new SVec(lzeros(1,n), izeros(1,n))
+ }
+}
+
+object RandomForest {
+
+ trait Opts extends Model.Opts {
+ var depth = 20
+ var ntrees = 20
+ var nsamps = 32
+ var nnodes = 200000
+ var nbits = 16
+ var gain = 0.01f
+ var catsPerSample = 1f
+ var ncats = 0
+ var training = true
+ var impurity = 0; // zero for entropy, one for Gini impurity
+ var regression = false
+ var seed = 1
+ var useIfeats = false; // explicitly save Ifeat indices (vs. compute them)
+ var MAE = true
+ var trace = 0
+ }
+
+ class Options extends Opts {}
+
+ class RFopts extends Learner.Options with RandomForest.Opts with DataSource.Opts with Batch.Opts
+
+ class RFSopts extends RFopts with MatSource.Opts
+
+ def learner(data:Mat, labels:Mat) = {
+ val opts = new RFSopts
+ opts.nbits = 16
+ opts.batchSize = math.min(100000000/data.nrows, data.ncols)
+ val nn = new Learner(
+ new MatSource(Array(data, labels), opts),
+ new RandomForest(opts),
+ null,
+ new Batch(opts),
+ null,
+ opts)
+ (nn, opts)
+ }
+
+ def learner(ds:DataSource) = {
+ val opts = new RFopts
+ opts.useGPU = false
+ val nn = new Learner(
+ ds,
+ new RandomForest(opts),
+ null,
+ new Batch(opts),
+ null,
+ opts)
+ (nn, opts)
+ }
+
+ class FsOpts extends Learner.Options with RandomForest.Opts with FileSource.Opts with Batch.Opts
+
+ def learner(datafile:String, labelfile:String):(Learner, FsOpts) = learner(List(FileSource.simpleEnum(datafile, 1, 0), FileSource.simpleEnum(labelfile, 1, 0)))
+
+ def learner(fnames:List[(Int)=>String]) = {
+ val opts = new FsOpts
+ opts.nbits = 16
+ opts.batchSize = 1000
+ opts.fnames = fnames
+ implicit val threads = threadPool(4)
+ val nn = new Learner(
+ new FileSource(opts),
+ new RandomForest(opts),
+ null,
+ new Batch(opts),
+ null,
+ opts)
+ (nn, opts)
+ }
+
+ class PredOpts extends Learner.Options with RandomForest.Opts with MatSource.Opts with MatSink.Opts
+
+ def predictor(model:Model, data:Mat):(Learner, PredOpts) = {
+ val opts = new PredOpts
+ model.opts.asInstanceOf[RandomForest.Opts].training = false
+ opts.copyFrom(model.opts)
+ val nn = new Learner(
+ new MatSource(Array(data), opts),
+ model,
+ null,
+ null,
+ new MatSink(opts),
+ opts)
+ (nn, opts)
+ }
+
+ class FilePredOpts extends Learner.Options with RandomForest.Opts with FileSource.Opts with MatSink.Opts
+
+ def load(modelname:String):RandomForest = {
+ val opts = new RandomForest.Options
+ val model = new RandomForest(opts)
+ model.load(modelname);
+ model
+ }
+
+ def entropy(a:DMat):Double = {
+ val sa = sum(a).dv
+ (a ddot ln(max(drow(1.0), a))) / sa - math.log(sa)
+ }
+
+ def entropy(a:DMat, b:DMat):Double = {
+ val ea = entropy(a)
+ val eb = entropy(b)
+ val sa = sum(a).dv
+ val sb = sum(b).dv
+ if (sa > 0 && sb > 0) {
+ (sa * ea + sb * eb)/(sa + sb)
+ } else if (sa > 0) {
+ ea
+ } else {
+ eb
+ }
+ }
+
+ def entropy(a:IMat):Double = entropy(DMat(a))
+
+ def entropy(a:IMat, b:IMat):Double = entropy(DMat(a), DMat(b))
+
+ def checktree(tree:IMat, ncats:Int) {
+ val ntrees = tree.ncols
+ val nnodes = tree.nrows >> 1
+ def checknode(inode:Int, itree:Int) {
+ if (tree(inode * 2, itree) < 0) {
+ if (tree(inode * 2 + 1, itree) < 0 || tree(inode * 2 + 1, itree) > ncats) {
+ throw new RuntimeException("Bad node %d in tree %d" format (inode, itree))
+ }
+ } else {
+ checknode(inode*2+1, itree)
+ checknode(inode*2+2, itree)
+ }
+ }
+ var i = 0
+ while (i < ntrees) {
+ checknode(0, i)
+ i += 1
+ }
+ println("OK")
+ }
+
+ def floatToInt(in:GMat, out:Mat, nbits:Int):GIMat = {
+ val omat = GIMat.newOrCheckGIMat(in.nrows, in.ncols, out, in.GUID, "floatToInt".##)
+ edu.berkeley.bid.CUMACH.floatToInt(in.length, in.data, omat.data, nbits)
+ omat
+ }
+
+ def floatToInt(in:GMat, nbits:Int):GIMat = floatToInt(in, null, nbits)
+
+ def countbits(n:Int):Int = {
+ var i = 0
+ var j = 1
+ while (j < n) {
+ j *= 2
+ i += 1
+ }
+ i
+ }
+}
diff --git a/src/main/scala/BIDMach/models/Regression.scala b/src/main/scala/BIDMach/models/Regression.scala
index a9040407..7d3a9fd5 100755
--- a/src/main/scala/BIDMach/models/Regression.scala
+++ b/src/main/scala/BIDMach/models/Regression.scala
@@ -8,7 +8,7 @@ import BIDMach.updaters._
import BIDMach._
/**
- * Abstract class with shared code for Regression Models
+ * Abstract class with shared code for Regression Models
*/
abstract class RegressionModel(override val opts:RegressionModel.Opts) extends Model {
var targmap:Mat = null
@@ -17,24 +17,24 @@ abstract class RegressionModel(override val opts:RegressionModel.Opts) extends M
var sp:Mat = null
override def copyTo(mod:Model) = {
- super.copyTo(mod);
- val rmod = mod.asInstanceOf[RegressionModel];
- rmod.targmap = targmap;
- rmod.targets = targets;
- rmod.mask = mask;
+ super.copyTo(mod)
+ val rmod = mod.asInstanceOf[RegressionModel]
+ rmod.targmap = targmap
+ rmod.targets = targets
+ rmod.mask = mask
rmod.sp = sp;
}
def init() = {
useGPU = opts.useGPU && Mat.hasCUDA > 0
val data0 = mats(0)
- val m = data0.nrows;
+ val m = data0.nrows
val targetData = mats.length > 1
val d = if (opts.targmap.asInstanceOf[AnyRef] != null) {
opts.targmap.nrows
} else if (opts.targets.asInstanceOf[AnyRef] != null) {
opts.targets.nrows
- } else {
+ } else {
mats(1).nrows
}
val sdat = (sum(data0,2).t + 0.5f).asInstanceOf[FMat]
@@ -42,11 +42,11 @@ abstract class RegressionModel(override val opts:RegressionModel.Opts) extends M
println("corpus perplexity=%f" format (math.exp(-(sp ddot ln(sp)))))
if (refresh) {
- val mm = zeros(d,m);
+ val mm = zeros(d,m)
setmodelmats(Array(mm))
}
- modelmats(0) = convertMat(modelmats(0));
- updatemats = Array(modelmats(0).zeros(modelmats(0).nrows, modelmats(0).ncols));
+ modelmats(0) = convertMat(modelmats(0))
+ updatemats = Array(modelmats(0).zeros(modelmats(0).nrows, modelmats(0).ncols))
targmap = if (opts.targmap.asInstanceOf[AnyRef] != null) convertMat(opts.targmap) else opts.targmap
if (! targetData) {
targets = if (opts.targets.asInstanceOf[AnyRef] != null) convertMat(opts.targets) else opts.targets
@@ -83,7 +83,7 @@ object RegressionModel {
trait Opts extends Model.Opts {
var targets:FMat = null
var targmap:FMat = null
- var rmask:FMat = null
+ var rmask:FMat = null
}
class Options extends Opts {}
diff --git a/src/main/scala/BIDMach/models/SFA.scala b/src/main/scala/BIDMach/models/SFA.scala
index ff7b65a0..bd479377 100755
--- a/src/main/scala/BIDMach/models/SFA.scala
+++ b/src/main/scala/BIDMach/models/SFA.scala
@@ -47,86 +47,86 @@ import BIDMach.Learner
class SFA(override val opts:SFA.Opts = new SFA.Options) extends FactorModel(opts) {
- var mm:Mat = null;
- var traceMem = false;
- var pm:Mat = null;
- var mzero:Mat = null;
- var Minv:Mat = null;
- var diagM:Mat = null;
+ var mm:Mat = null
+ var traceMem = false
+ var pm:Mat = null
+ var mzero:Mat = null
+ var Minv:Mat = null
+ var diagM:Mat = null
var slm:Mat = null;
var mlm:Mat = null;
- var iavg:Mat = null;
- var avg:Mat = null;
- var lamu:Mat = null;
- var itemsum:Mat = null;
- var itemcount:Mat = null;
- var nfeats:Int = 0;
- var totratings:Double = 0;
- var nratings:Double = 0;
+ var iavg:Mat = null
+ var avg:Mat = null
+ var lamu:Mat = null
+ var itemsum:Mat = null
+ var itemcount:Mat = null
+ var nfeats:Int = 0
+ var totratings:Double = 0
+ var nratings:Double = 0
// For integrated ADAGrad updater
- var vexp:Mat = null;
- var texp:Mat = null;
- var lrate:Mat = null;
- var sumsq:Mat = null;
- var firststep = -1f;
- var waitsteps = 0;
- var epsilon = 0f;
+ var vexp:Mat = null
+ var texp:Mat = null
+ var lrate:Mat = null
+ var sumsq:Mat = null
+ var firststep = -1f
+ var waitsteps = 0
+ var epsilon = 0f
override def init() = {
- mats = datasource.next;
- datasource.reset;
- nfeats = mats(0).nrows;
- val batchSize = mats(0).ncols;
- val d = opts.dim;
+ mats = datasource.next
+ datasource.reset
+ nfeats = mats(0).nrows
+ val batchSize = mats(0).ncols
+ val d = opts.dim
if (refresh) {
- mm = normrnd(0,0.01f,d,nfeats);
- mm = convertMat(mm);
- avg = mm.zeros(1,1)
- iavg = mm.zeros(nfeats,1);
- itemsum = mm.zeros(nfeats, 1);
- itemcount = mm.zeros(nfeats, 1);
- diagM = mkdiag(ones(d,1));
- Minv = mm.zeros(d, d);
- Minv <-- diagM;
- setmodelmats(Array(mm, iavg, avg, Minv));
+ mm = normrnd(0,0.01f,d,nfeats)
+ mm = convertMat(mm)
+ avg = mm.zeros(1,1)
+ iavg = mm.zeros(nfeats,1)
+ itemsum = mm.zeros(nfeats, 1)
+ itemcount = mm.zeros(nfeats, 1)
+ diagM = mkdiag(ones(d,1))
+ Minv = mm.zeros(d, d)
+ Minv <-- diagM
+ setmodelmats(Array(mm, iavg, avg, Minv))
}
useGPU = opts.useGPU && Mat.hasCUDA > 0;
- if (useGPU || useDouble) {
- gmats = new Array[Mat](mats.length);
- } else {
- gmats = mats;
- }
-
- modelmats(0) = convertMat(modelmats(0));
- modelmats(1) = convertMat(modelmats(1));
- modelmats(2) = convertMat(modelmats(2));
- modelmats(3) = convertMat(modelmats(3));
- mm = modelmats(0);
- iavg = modelmats(1);
- avg = modelmats(2);
- Minv = modelmats(3);
+ if (useGPU || useDouble) {
+ gmats = new Array[Mat](mats.length)
+ } else {
+ gmats = mats
+ }
+
+ modelmats(0) = convertMat(modelmats(0))
+ modelmats(1) = convertMat(modelmats(1))
+ modelmats(2) = convertMat(modelmats(2))
+ modelmats(3) = convertMat(modelmats(3))
+ mm = modelmats(0)
+ iavg = modelmats(1)
+ avg = modelmats(2)
+ Minv = modelmats(3)
lamu = mm.ones(d, 1) ∘ opts.lambdau
- if (opts.doUsers) lamu(0) = opts.regumean;
- slm = mm.ones(1,1) ∘ (opts.lambdam * batchSize);
- mlm = mm.ones(1,1) ∘ (opts.regmmean * batchSize);
+ if (opts.doUsers) lamu(0) = opts.regumean
+ slm = mm.ones(1,1) ∘ (opts.lambdam * batchSize)
+ mlm = mm.ones(1,1) ∘ (opts.regmmean * batchSize)
mzero = mm.zeros(1,1)
if (opts.doUsers) mm(0,?) = 1f
- updatemats = new Array[Mat](3);
- if (opts.aopts != null) initADAGrad(d, nfeats);
+ updatemats = new Array[Mat](3)
+ if (opts.aopts != null) initADAGrad(d, nfeats)
}
def initADAGrad(d:Int, m:Int) = {
- val aopts = opts.asInstanceOf[ADAGrad.Opts];
- firststep = -1f;
- lrate = convertMat(aopts.lrate);
- texp = if (aopts.texp.asInstanceOf[AnyRef] != null) convertMat(aopts.texp) else null;
- vexp = convertMat(aopts.vexp);
- sumsq = convertMat(zeros(d, m));
- sumsq.set(aopts.initsumsq);
- waitsteps = aopts.waitsteps;
- epsilon = aopts.epsilon;
+ val aopts = opts.asInstanceOf[ADAGrad.Opts]
+ firststep = -1f
+ lrate = convertMat(aopts.lrate)
+ texp = if (aopts.texp.asInstanceOf[AnyRef] != null) convertMat(aopts.texp) else null
+ vexp = convertMat(aopts.vexp)
+ sumsq = convertMat(zeros(d, m))
+ sumsq.set(aopts.initsumsq)
+ waitsteps = aopts.waitsteps
+ epsilon = aopts.epsilon
}
def setpm(pm0:Mat) = {
@@ -134,62 +134,62 @@ class SFA(override val opts:SFA.Opts = new SFA.Options) extends FactorModel(opts
}
def uupdate(sdata0:Mat, user:Mat, ipass:Int, pos:Long):Unit = {
-// val slu = sum((sdata>mzero), 1) * opts.lambdau
+// val slu = sum((sdata>mzero), 1) * opts.lambdau
if (opts.doUsers) mm(0,?) = 1f;
if (pos == 0) println("start "+user(?,0).t.toString)
- val sdata = sdata0 - (iavg + avg);
- val b = mm * sdata;
- val r = if (ipass < opts.startup || putBack < 0) {
- // Setup CG on the first pass, or if no saved state
- user.clear
- b + 0
- } else {
- b - ((user ∘ lamu) + mm * DDS(mm, user, sdata)) // r = b - Ax
- }
- val z = Minv * r
- val p = z + 0
- for (i <- 0 until opts.uiter) {
- val Ap = (p ∘ lamu) + mm * DDS(mm, p, sdata);
- SFA.PreCGupdate(p, r, z, Ap, user, Minv, opts.ueps, opts.uconvg) // Should scale preconditioner by number of predictions per user
- if (opts.traceConverge) {
- println("i=%d, r=%f" format (i, norm(r)));
- }
- }
- if (pos == 0) println("end "+user(?,0).t.toString)
+ val sdata = sdata0 - (iavg + avg)
+ val b = mm * sdata
+ val r = if (ipass < opts.startup || putBack < 0) {
+ // Setup CG on the first pass, or if no saved state
+ user.clear
+ b + 0
+ } else {
+ b - ((user ∘ lamu) + mm * DDS(mm, user, sdata)) // r = b - Ax
+ }
+ val z = Minv * r
+ val p = z + 0
+ for (i <- 0 until opts.uiter) {
+ val Ap = (p ∘ lamu) + mm * DDS(mm, p, sdata)
+ SFA.PreCGupdate(p, r, z, Ap, user, Minv, opts.ueps, opts.uconvg) // Should scale preconditioner by number of predictions per user
+ if (opts.traceConverge) {
+ println("i=%d, r=%f" format (i, norm(r)))
+ }
+ }
+ if (pos == 0) println("end "+user(?,0).t.toString)
}
def mupdate(sdata0:Mat, user:Mat, ipass:Int, pos:Long):Unit = {
- val sdata = sdata0 - (iavg + avg);
+ val sdata = sdata0 - (iavg + avg)
// values to be accumulated
- val ddsmu = DDS(mm, user, sdata);
- val diffs = sdata + 1f;
- diffs.contents ~ sdata.contents - ddsmu.contents;
+ val ddsmu = DDS(mm, user, sdata)
+ val diffs = sdata + 1f
+ diffs.contents ~ sdata.contents - ddsmu.contents
if (ipass < 1) {
- itemsum ~ itemsum + sum(sdata0, 2);
- itemcount ~ itemcount + sum(sdata0 != 0f, 2);
- avg ~ sum(itemsum) / sum(itemcount);
- iavg ~ ((itemsum + avg) / (itemcount + 1)) - avg;
+ itemsum ~ itemsum + sum(sdata0, 2)
+ itemcount ~ itemcount + sum(sdata0 != 0f, 2)
+ avg ~ sum(itemsum) / sum(itemcount)
+ iavg ~ ((itemsum + avg) / (itemcount + 1)) - avg
}
updatemats(1) = (sum(diffs,2) - iavg*mlm) / (1 + sum(diffs>0f,2)); // per-item term estimator
- updatemats(2) = sum(diffs.contents) / (1 + diffs.contents.length);
+ updatemats(2) = sum(diffs.contents) / (1 + diffs.contents.length)
if (opts.weightByUser) {
val iwt = 100f / max(sum(sdata != 0f), 100f);
- val suser = user ∘ iwt;
+ val suser = user ∘ iwt
if (opts.aopts != null) {
- if (firststep <= 0) firststep = pos.toFloat;
- val step = (pos + firststep)/firststep;
- ADAGrad.multUpdate(suser, diffs, modelmats(0), sumsq, null, lrate, texp, vexp, epsilon, step, waitsteps);
+ if (firststep <= 0) firststep = pos.toFloat
+ val step = (pos + firststep)/firststep
+ ADAGrad.multUpdate(suser, diffs, modelmats(0), sumsq, null, lrate, texp, vexp, epsilon, step, waitsteps)
} else {
- updatemats(0) = suser *^ diffs - (mm ∘ slm); // simple derivative
+ updatemats(0) = suser *^ diffs - (mm ∘ slm); // simple derivative
}
} else {
- if (opts.aopts != null) {
- if (firststep <= 0) firststep = pos.toFloat;
- val step = (pos + firststep)/firststep;
- ADAGrad.multUpdate(user, diffs, modelmats(0), sumsq, null, lrate, texp, vexp, epsilon, step, waitsteps);
- } else {
- updatemats(0) = user *^ diffs - (mm ∘ slm); // simple derivative
- }
+ if (opts.aopts != null) {
+ if (firststep <= 0) firststep = pos.toFloat
+ val step = (pos + firststep)/firststep
+ ADAGrad.multUpdate(user, diffs, modelmats(0), sumsq, null, lrate, texp, vexp, epsilon, step, waitsteps)
+ } else {
+ updatemats(0) = user *^ diffs - (mm ∘ slm); // simple derivative
+ }
}
}
@@ -201,11 +201,11 @@ class SFA(override val opts:SFA.Opts = new SFA.Options) extends FactorModel(opts
pm <-- rm
if (ipass < 2) {
val mtmp = mm + 0
- for (i <- 0 until opts.miter) {
- val Ap = (pm ∘ slm) + user *^ DDS(pm, user, sdata)
- CG.CGupdate(pm, rm, Ap, mtmp, opts.ueps, opts.uconvg)
- }
- updatemats(0) = mtmp
+ for (i <- 0 until opts.miter) {
+ val Ap = (pm ∘ slm) + user *^ DDS(pm, user, sdata)
+ CG.CGupdate(pm, rm, Ap, mtmp, opts.ueps, opts.uconvg)
+ }
+ updatemats(0) = mtmp
} else {
updatemats(0) = rm
updatemats(1) = (pm ∘ slm) + user *^ DDS(pm, user, sdata) // accumulate Ap
@@ -217,53 +217,53 @@ class SFA(override val opts:SFA.Opts = new SFA.Options) extends FactorModel(opts
}
def evalfun(sdata:Mat, user:Mat, ipass:Int, pos:Long):FMat = {
- val preds = DDS(mm, user, sdata) + (iavg + avg);
+ val preds = DDS(mm, user, sdata) + (iavg + avg)
if (ogmats != null) {
- ogmats(0) = user;
+ ogmats(0) = user
if (ogmats.length > 1) {
- ogmats(1) = preds;
+ ogmats(1) = preds
}
}
- val dc = sdata.contents;
- val pc = preds.contents;
- val vv = (dc - pc) ddot (dc - pc);
- -sqrt(row(vv/sdata.nnz))
+ val dc = sdata.contents
+ val pc = preds.contents
+ val vv = (dc - pc) ddot (dc - pc)
+ -sqrt(row(vv/sdata.nnz))
}
override def evalfun(sdata:Mat, user:Mat, preds:Mat, ipass:Int, pos:Long):FMat = {
- val spreds = DDS(mm, user, sdata) + (iavg + avg);
- val dc = sdata.contents;
- val pc = spreds.contents;
- val vv = (dc - pc) ddot (dc - pc);
- val xpreds = DDS(mm, user, preds) + (iavg + avg);
- if (ogmats != null) {
- ogmats(0) = user;
+ val spreds = DDS(mm, user, sdata) + (iavg + avg)
+ val dc = sdata.contents
+ val pc = spreds.contents
+ val vv = (dc - pc) ddot (dc - pc)
+ val xpreds = DDS(mm, user, preds) + (iavg + avg)
+ if (ogmats != null) {
+ ogmats(0) = user
if (ogmats.length > 1) {
- ogmats(1) = xpreds;
+ ogmats(1) = xpreds
}
}
- preds.contents <-- xpreds.contents;
- -sqrt(row(vv/sdata.nnz))
+ preds.contents <-- xpreds.contents
+ -sqrt(row(vv/sdata.nnz))
}
}
object SFA {
trait Opts extends FactorModel.Opts {
- var ueps = 1e-10f
- var uconvg = 1e-3f
- var miter = 5
- var lambdau = 5f
- var lambdam = 5f
- var regumean = 0f
- var regmmean = 0f
- var startup = 1
- var traceConverge = false
- var doUsers = true
- var weightByUser = false
- var aopts:ADAGrad.Opts = null;
- var minv = 1f;
- var maxv = 5f;
-
+ var ueps = 1e-10f
+ var uconvg = 1e-3f
+ var miter = 5
+ var lambdau = 5f
+ var lambdam = 5f
+ var regumean = 0f
+ var regmmean = 0f
+ var startup = 1
+ var traceConverge = false
+ var doUsers = true
+ var weightByUser = false
+ var aopts:ADAGrad.Opts = null
+ var minv = 1f
+ var maxv = 5f
+
}
class Options extends Opts {}
@@ -274,15 +274,15 @@ object SFA {
opts.putBack = -1
opts.npasses = 4
opts.lrate = 0.1
- opts.initUval = 0f;
+ opts.initUval = 0f
opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
- val nn = new Learner(
- new MatSource(Array(mat0:Mat), opts),
- new SFA(opts),
- null,
- new Grad(opts),
- null,
- opts)
+ val nn = new Learner(
+ new MatSource(Array(mat0:Mat), opts),
+ new SFA(opts),
+ null,
+ new Grad(opts),
+ null,
+ opts)
(nn, opts)
}
@@ -292,17 +292,17 @@ object SFA {
opts.dim = d
opts.putBack = -1
opts.npasses = 4
- opts.lrate = 0.1;
- opts.initUval = 0f;
- opts.batchSize = math.min(100000, mat0.ncols/30 + 1);
- opts.aopts = opts;
- val nn = new Learner(
- new MatSource(Array(mat0:Mat), opts),
- new SFA(opts),
- null,
- null,
- null,
- opts);
+ opts.lrate = 0.1
+ opts.initUval = 0f
+ opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
+ opts.aopts = opts
+ val nn = new Learner(
+ new MatSource(Array(mat0:Mat), opts),
+ new SFA(opts),
+ null,
+ null,
+ null,
+ opts)
(nn, opts)
}
@@ -312,8 +312,8 @@ object SFA {
opts.dim = d
opts.putBack = 1
opts.npasses = 4
- opts.lrate = 0.1;
- opts.initUval = 0f;
+ opts.lrate = 0.1
+ opts.initUval = 0f
opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
val nn = new Learner(
new MatSource(Array(mat0, user0), opts),
@@ -331,10 +331,10 @@ object SFA {
opts.dim = d
opts.putBack = 1
opts.npasses = 4
- opts.lrate = 0.1;
- opts.initUval = 0f;
- opts.batchSize = math.min(100000, mat0.ncols/30 + 1);
- opts.aopts = opts;
+ opts.lrate = 0.1
+ opts.initUval = 0f
+ opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
+ opts.aopts = opts
val nn = new Learner(
new MatSource(Array(mat0, user0), opts),
new SFA(opts),
@@ -351,8 +351,8 @@ object SFA {
opts.dim = d
opts.putBack = 1
opts.npasses = 4
- opts.lrate = 0.1;
- opts.initUval = 0f;
+ opts.lrate = 0.1
+ opts.initUval = 0f
opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
val nn = new Learner(
new MatSource(Array(mat0, user0), opts),
@@ -365,26 +365,26 @@ object SFA {
}
class PredOpts extends Learner.Options with SFA.Opts with MatSource.Opts with MatSink.Opts
-
+
def predictor(model0:Model, mat1:Mat, preds:Mat) = {
val model = model0.asInstanceOf[SFA]
- val nopts = new PredOpts;
+ val nopts = new PredOpts
nopts.batchSize = math.min(10000, mat1.ncols/30 + 1)
nopts.putBack = -1
- val newmod = new SFA(nopts);
+ val newmod = new SFA(nopts)
newmod.refresh = false
- newmod.copyFrom(model);
- newmod.Minv = model.Minv;
- val mopts = model.opts.asInstanceOf[SFA.Opts];
- nopts.dim = mopts.dim;
- nopts.uconvg = mopts.uconvg;
- nopts.miter = mopts.miter;
- nopts.lambdau = mopts.lambdau;
- nopts.lambdam = mopts.lambdam;
- nopts.regumean = mopts.regumean;
- nopts.doUsers = mopts.doUsers;
- nopts.weightByUser = mopts.weightByUser;
- nopts.nmats = 2;
+ newmod.copyFrom(model)
+ newmod.Minv = model.Minv
+ val mopts = model.opts.asInstanceOf[SFA.Opts]
+ nopts.dim = mopts.dim
+ nopts.uconvg = mopts.uconvg
+ nopts.miter = mopts.miter
+ nopts.lambdau = mopts.lambdau
+ nopts.lambdam = mopts.lambdam
+ nopts.regumean = mopts.regumean
+ nopts.doUsers = mopts.doUsers
+ nopts.weightByUser = mopts.weightByUser
+ nopts.nmats = 2
val nn = new Learner(
new MatSource(Array(mat1, zeros(mopts.dim, mat1.ncols), preds), nopts),
newmod,
@@ -397,23 +397,23 @@ object SFA {
def predictor(model0:Model, mat1:Mat, user:Mat, preds:Mat) = {
val model = model0.asInstanceOf[SFA]
- val nopts = new PredOpts;
+ val nopts = new PredOpts
nopts.batchSize = math.min(10000, mat1.ncols/30 + 1)
nopts.putBack = -1
- val newmod = new SFA(nopts);
+ val newmod = new SFA(nopts)
newmod.refresh = false
- newmod.copyFrom(model);
- newmod.Minv = model.Minv;
- val mopts = model.opts.asInstanceOf[SFA.Opts];
- nopts.dim = mopts.dim;
- nopts.uconvg = mopts.uconvg;
- nopts.miter = mopts.miter;
- nopts.lambdau = mopts.lambdau;
- nopts.lambdam = mopts.lambdam;
- nopts.regumean = mopts.regumean;
- nopts.doUsers = mopts.doUsers;
- nopts.weightByUser = mopts.weightByUser;
- nopts.nmats = 2;
+ newmod.copyFrom(model)
+ newmod.Minv = model.Minv
+ val mopts = model.opts.asInstanceOf[SFA.Opts]
+ nopts.dim = mopts.dim
+ nopts.uconvg = mopts.uconvg
+ nopts.miter = mopts.miter
+ nopts.lambdau = mopts.lambdau
+ nopts.lambdam = mopts.lambdam
+ nopts.regumean = mopts.regumean
+ nopts.doUsers = mopts.doUsers
+ nopts.weightByUser = mopts.weightByUser
+ nopts.nmats = 2
val nn = new Learner(
new MatSource(Array(mat1, user, preds), nopts),
newmod,
@@ -425,21 +425,21 @@ object SFA {
}
// Preconditioned CG update
def PreCGupdate(p:Mat, r:Mat, z:Mat, Ap:Mat, x:Mat, Minv:Mat, weps:Float, convgd:Float) = {
- val safe = 300f;
- val pAp = (p dot Ap);
- max(pAp, weps, pAp);
- val rsold = (r dot z);
- val convec = rsold > convgd; // Check convergence
- val alpha = convec ∘ (rsold / pAp); // Only process unconverged elements
- min(alpha, safe, alpha);
- x ~ x + (p ∘ alpha);
- r ~ r - (Ap ∘ alpha);
- z ~ Minv * r;
- val rsnew = (z dot r); // order is important to avoid aliasing
- max(rsold, weps, rsold);
- val beta = convec ∘ (rsnew / rsold);
- min(beta, safe, beta);
- p ~ z + (p ∘ beta);
+ val safe = 300f
+ val pAp = (p dot Ap)
+ max(pAp, weps, pAp)
+ val rsold = (r dot z)
+ val convec = rsold > convgd; // Check convergence
+ val alpha = convec ∘ (rsold / pAp); // Only process unconverged elements
+ min(alpha, safe, alpha)
+ x ~ x + (p ∘ alpha)
+ r ~ r - (Ap ∘ alpha)
+ z ~ Minv * r
+ val rsnew = (z dot r); // order is important to avoid aliasing
+ max(rsold, weps, rsold)
+ val beta = convec ∘ (rsnew / rsold)
+ min(beta, safe, beta);
+ p ~ z + (p ∘ beta)
}
}
diff --git a/src/main/scala/BIDMach/models/SMF.scala b/src/main/scala/BIDMach/models/SMF.scala
index 4d8657e5..4a716e31 100755
--- a/src/main/scala/BIDMach/models/SMF.scala
+++ b/src/main/scala/BIDMach/models/SMF.scala
@@ -46,221 +46,221 @@ import BIDMach.Learner
class SMF(override val opts:SMF.Opts = new SMF.Options) extends FactorModel(opts) {
- var mm:Mat = null;
- var traceMem = false;
- var mzero:Mat = null;
+ var mm:Mat = null
+ var traceMem = false
+ var mzero:Mat = null
var slm:Mat = null;
var mlm:Mat = null;
- var iavg:Mat = null;
- var avg:Mat = null;
- var lamu:Mat = null;
- var itemsum:Mat = null;
- var itemcount:Mat = null;
- var nfeats:Int = 0;
- var nratings:Double = 0;
+ var iavg:Mat = null
+ var avg:Mat = null
+ var lamu:Mat = null
+ var itemsum:Mat = null
+ var itemcount:Mat = null
+ var nfeats:Int = 0
+ var nratings:Double = 0
// For integrated ADAGrad updater
- var vexp:Mat = null;
- var texp:Mat = null;
- var pexp:Mat = null;
- var cscale:Mat = null;
- var lrate:Mat = null;
- var uscale:Mat = null;
- var sumsq:Mat = null;
- var firststep = -1f;
- var waitsteps = 0;
- var epsilon = 0f;
- var aopts:ADAGrad.Opts = null;
+ var vexp:Mat = null
+ var texp:Mat = null
+ var pexp:Mat = null
+ var cscale:Mat = null
+ var lrate:Mat = null
+ var uscale:Mat = null
+ var sumsq:Mat = null
+ var firststep = -1f
+ var waitsteps = 0
+ var epsilon = 0f
+ var aopts:ADAGrad.Opts = null
override def init() = {
- mats = datasource.next;
- datasource.reset;
- nfeats = mats(0).nrows;
- val batchSize = mats(0).ncols;
- val d = opts.dim;
+ mats = datasource.next
+ datasource.reset
+ nfeats = mats(0).nrows
+ val batchSize = mats(0).ncols
+ val d = opts.dim
if (refresh) {
- mm = normrnd(0,0.01f,d,nfeats);
- mm = convertMat(mm);
- avg = mm.zeros(1,1)
- iavg = mm.zeros(nfeats,1);
- itemsum = mm.zeros(nfeats, 1);
- itemcount = mm.zeros(nfeats, 1);
- setmodelmats(Array(mm, iavg, avg));
+ mm = normrnd(0,0.01f,d,nfeats)
+ mm = convertMat(mm)
+ avg = mm.zeros(1,1)
+ iavg = mm.zeros(nfeats,1)
+ itemsum = mm.zeros(nfeats, 1)
+ itemcount = mm.zeros(nfeats, 1)
+ setmodelmats(Array(mm, iavg, avg))
}
useGPU = opts.useGPU && Mat.hasCUDA > 0;
- if (useGPU || useDouble) {
- gmats = new Array[Mat](mats.length);
- } else {
- gmats = mats;
- }
-
- modelmats(0) = convertMat(modelmats(0));
- modelmats(1) = convertMat(modelmats(1));
- modelmats(2) = convertMat(modelmats(2));
- mm = modelmats(0);
- iavg = modelmats(1);
- avg = modelmats(2);
+ if (useGPU || useDouble) {
+ gmats = new Array[Mat](mats.length)
+ } else {
+ gmats = mats
+ }
+
+ modelmats(0) = convertMat(modelmats(0))
+ modelmats(1) = convertMat(modelmats(1))
+ modelmats(2) = convertMat(modelmats(2))
+ mm = modelmats(0)
+ iavg = modelmats(1)
+ avg = modelmats(2)
lamu = mm.ones(d, 1) ∘ opts.lambdau
- if (opts.doUsers) lamu(0) = opts.regumean;
- slm = mm.ones(1,1) ∘ (opts.lambdam * batchSize);
- mlm = mm.ones(1,1) ∘ (opts.regmmean * batchSize);
- mzero = mm.zeros(1,1);
- uscale = mm.zeros(1,1);
- cscale = mm.ones(d, 1);
- cscale(0,0) = 0.0001f;
+ if (opts.doUsers) lamu(0) = opts.regumean
+ slm = mm.ones(1,1) ∘ (opts.lambdam * batchSize)
+ mlm = mm.ones(1,1) ∘ (opts.regmmean * batchSize)
+ mzero = mm.zeros(1,1)
+ uscale = mm.zeros(1,1)
+ cscale = mm.ones(d, 1)
+ cscale(0,0) = 0.0001f
if (opts.doUsers) mm(0,?) = 1f
- updatemats = new Array[Mat](3);
- updatemats(2) = mm.zeros(1,1);
- if (opts.aopts != null) initADAGrad(d, nfeats);
- vexp = convertMat(row(0.5f));
+ updatemats = new Array[Mat](3)
+ updatemats(2) = mm.zeros(1,1)
+ if (opts.aopts != null) initADAGrad(d, nfeats)
+ vexp = convertMat(row(0.5f))
}
def initADAGrad(d:Int, m:Int) = {
- aopts = opts.asInstanceOf[ADAGrad.Opts]
- firststep = -1f;
- lrate = convertMat(aopts.lrate);
- texp = if (aopts.texp.asInstanceOf[AnyRef] != null) convertMat(aopts.texp) else null;
- pexp = if (aopts.pexp.asInstanceOf[AnyRef] != null) convertMat(aopts.pexp) else null;
- vexp = convertMat(aopts.vexp);
- sumsq = convertMat(zeros(d, m));
- sumsq.set(aopts.initsumsq);
- waitsteps = aopts.waitsteps;
- epsilon = aopts.epsilon;
+ aopts = opts.asInstanceOf[ADAGrad.Opts]
+ firststep = -1f
+ lrate = convertMat(aopts.lrate)
+ texp = if (aopts.texp.asInstanceOf[AnyRef] != null) convertMat(aopts.texp) else null
+ pexp = if (aopts.pexp.asInstanceOf[AnyRef] != null) convertMat(aopts.pexp) else null
+ vexp = convertMat(aopts.vexp)
+ sumsq = convertMat(zeros(d, m))
+ sumsq.set(aopts.initsumsq)
+ waitsteps = aopts.waitsteps
+ epsilon = aopts.epsilon
}
def uupdate(sdata0:Mat, user:Mat, ipass:Int, pos:Long):Unit = {
- if (firststep <= 0) firststep = pos.toFloat;
- val step = (pos + firststep)/firststep;
- val texp = if (opts.asInstanceOf[Grad.Opts].texp.asInstanceOf[AnyRef] != null) {
- opts.asInstanceOf[Grad.Opts].texp.dv
- } else {
- opts.asInstanceOf[Grad.Opts].pexp.dv
- }
- uscale.set(opts.urate * math.pow(ipass+1, - texp).toFloat)
- val sdata = sdata0 - (iavg + avg);
- if (putBack < 0) {
- user.clear
- }
- val b = mm * sdata;
- val ucounts = sum(sdata0 != 0f);
- val uci = (ucounts + 1f) ^ (- vexp);
- for (i <- 0 until opts.uiter) {
- val preds = DDS(mm, user, sdata);
- val deriv = b - mm * preds - (user ∘ lamu);
- val du = (deriv ∘ uscale ∘ uci);
- if (opts.lsgd >= 0) {
- val dpreds = DDS(mm, du, sdata);
- accept(sdata, user, du, preds, dpreds, uscale, lamu, false);
- } else {
- user ~ user + du;
- }
+ if (firststep <= 0) firststep = pos.toFloat
+ val step = (pos + firststep)/firststep
+ val texp = if (opts.asInstanceOf[Grad.Opts].texp.asInstanceOf[AnyRef] != null) {
+ opts.asInstanceOf[Grad.Opts].texp.dv
+ } else {
+ opts.asInstanceOf[Grad.Opts].pexp.dv
+ }
+ uscale.set(opts.urate * math.pow(ipass+1, - texp).toFloat)
+ val sdata = sdata0 - (iavg + avg)
+ if (putBack < 0) {
+ user.clear
+ }
+ val b = mm * sdata
+ val ucounts = sum(sdata0 != 0f)
+ val uci = (ucounts + 1f) ^ (- vexp)
+ for (i <- 0 until opts.uiter) {
+ val preds = DDS(mm, user, sdata)
+ val deriv = b - mm * preds - (user ∘ lamu)
+ val du = (deriv ∘ uscale ∘ uci)
+ if (opts.lsgd >= 0) {
+ val dpreds = DDS(mm, du, sdata)
+ accept(sdata, user, du, preds, dpreds, uscale, lamu, false)
+ } else {
+ user ~ user + du
+ }
- if (opts.traceConverge) {
- println("step %d, loss %f" format (i, ((norm(sdata.contents - preds.contents) ^ 2f) + (sum(user dot (user ∘ lamu)))).dv/sdata.nnz));
- }
- }
+ if (opts.traceConverge) {
+ println("step %d, loss %f" format (i, ((norm(sdata.contents - preds.contents) ^ 2f) + (sum(user dot (user ∘ lamu)))).dv/sdata.nnz))
+ }
+ }
}
def mupdate(sdata0:Mat, user:Mat, ipass:Int, pos:Long):Unit = {
- val sdata = sdata0 - (iavg + avg);
+ val sdata = sdata0 - (iavg + avg)
// values to be accumulated
- val preds = DDS(mm, user, sdata);
- val diffs = sdata + 2f;
- diffs.contents ~ sdata.contents - preds.contents;
+ val preds = DDS(mm, user, sdata)
+ val diffs = sdata + 2f
+ diffs.contents ~ sdata.contents - preds.contents
if (ipass < 1) {
- itemsum ~ itemsum + sum(sdata0, 2);
- itemcount ~ itemcount + sum(sdata0 != 0f, 2);
- avg ~ sum(itemsum) / sum(itemcount);
- iavg ~ ((itemsum + avg) / (itemcount + 1)) - avg;
+ itemsum ~ itemsum + sum(sdata0, 2)
+ itemcount ~ itemcount + sum(sdata0 != 0f, 2)
+ avg ~ sum(itemsum) / sum(itemcount)
+ iavg ~ ((itemsum + avg) / (itemcount + 1)) - avg
}
val icomp = sdata0 != 0f
- val icount = sum(sdata0 != 0f, 2);
+ val icount = sum(sdata0 != 0f, 2)
updatemats(1) = (sum(diffs,2) - iavg*mlm) / (icount + 1f); // per-item term estimator
- updatemats(2) ~ sum(diffs.contents) / (diffs.contents.length + 1f);
+ updatemats(2) ~ sum(diffs.contents) / (diffs.contents.length + 1f)
val wuser = if (opts.weightByUser) {
- val iwt = 100f / max(sum(sdata != 0f), 100f);
- user ∘ iwt;
+ val iwt = 100f / max(sum(sdata != 0f), 100f);
+ user ∘ iwt
} else {
- user;
+ user
}
- if (firststep <= 0) firststep = pos.toFloat;
+ if (firststep <= 0) firststep = pos.toFloat
if (opts.lsgd >= 0 || opts.aopts == null) {
- updatemats(0) = (wuser *^ diffs - (mm ∘ slm)) / ((icount + 1).t ^ vexp); // simple derivative
- if (opts.lsgd >= 0) {
- val step = (pos + firststep)/firststep;
- uscale.set((lrate.dv * math.pow(step, - texp.dv)).toFloat);
- val dm = updatemats(0) ∘ uscale ∘ cscale;
- val dpreds = DDS(dm, user, sdata);
- accept(sdata, mm, dm, preds, dpreds, uscale, slm, true);
- }
+ updatemats(0) = (wuser *^ diffs - (mm ∘ slm)) / ((icount + 1).t ^ vexp); // simple derivative
+ if (opts.lsgd >= 0) {
+ val step = (pos + firststep)/firststep
+ uscale.set((lrate.dv * math.pow(step, - texp.dv)).toFloat)
+ val dm = updatemats(0) ∘ uscale ∘ cscale
+ val dpreds = DDS(dm, user, sdata)
+ accept(sdata, mm, dm, preds, dpreds, uscale, slm, true)
+ }
} else {
- if (texp.asInstanceOf[AnyRef] != null) {
- val step = (pos + firststep)/firststep;
- ADAGrad.multUpdate(wuser, diffs, modelmats(0), sumsq, null, lrate, texp, vexp, epsilon, step, waitsteps);
- } else {
- ADAGrad.multUpdate(wuser, diffs, modelmats(0), sumsq, null, lrate, pexp, vexp, epsilon, ipass + 1, waitsteps);
- }
+ if (texp.asInstanceOf[AnyRef] != null) {
+ val step = (pos + firststep)/firststep
+ ADAGrad.multUpdate(wuser, diffs, modelmats(0), sumsq, null, lrate, texp, vexp, epsilon, step, waitsteps)
+ } else {
+ ADAGrad.multUpdate(wuser, diffs, modelmats(0), sumsq, null, lrate, pexp, vexp, epsilon, ipass + 1, waitsteps)
+ }
}
- if (opts.doUsers) mm(0,?) = 1f;
+ if (opts.doUsers) mm(0,?) = 1f
}
def accept(sdata:Mat, mmod:Mat, du:Mat, preds:Mat, dpreds:Mat, scale:Mat, lambda:Mat, flip:Boolean) = {
- // println("sdata " + FMat(sdata.contents)(0->5,0).t)
- val diff1 = preds + 0f;
- diff1.contents ~ sdata.contents - preds.contents;
-// println("sdata %d %s" format (if (flip) 1 else 0, FMat(sdata.contents)(0->5,0).t.toString));
-// println("preds %d %s" format (if (flip) 1 else 0, FMat(preds.contents)(0->5,0).t.toString));
-// println("diff %d %s" format (if (flip) 1 else 0, FMat(diff1.contents)(0->5,0).t.toString));
-// println("sdata "+FMat(sdata.contents)(0->5,0).t.toString);
- val diff2 = diff1 + 0f;
- diff2.contents ~ diff1.contents - dpreds.contents;
- diff1.contents ~ diff1.contents ∘ diff1.contents;
- diff2.contents ~ diff2.contents ∘ diff2.contents;
- val rmmod = mmod + 1f;
- normrnd(0, opts.lsgd, rmmod);
- val mmod2 = mmod + du + rmmod ∘ scale;
- val loss1 = (if (flip) sum(diff1,2).t else sum(diff1)) + (mmod dot (mmod ∘ lambda));
- val loss2 = (if (flip) sum(diff2,2).t else sum(diff2)) + (mmod2 dot (mmod2 ∘ lambda));
-
- val accprob = erfc((loss2 - loss1) /scale);
- val rsel = accprob + 0f;
- rand(rsel);
- val selector = rsel < accprob;
- mmod ~ (mmod2 ∘ selector) + (mmod ∘ (1f - selector));
- if (opts.traceConverge) {
- println("accepted %d %f %f %f" format (if (flip) 1 else 0, mean(selector).dv, mean(loss1).dv, mean(loss2).dv));
- }
+ // println("sdata " + FMat(sdata.contents)(0->5,0).t)
+ val diff1 = preds + 0f
+ diff1.contents ~ sdata.contents - preds.contents
+// println("sdata %d %s" format (if (flip) 1 else 0, FMat(sdata.contents)(0->5,0).t.toString))
+// println("preds %d %s" format (if (flip) 1 else 0, FMat(preds.contents)(0->5,0).t.toString))
+// println("diff %d %s" format (if (flip) 1 else 0, FMat(diff1.contents)(0->5,0).t.toString))
+// println("sdata "+FMat(sdata.contents)(0->5,0).t.toString)
+ val diff2 = diff1 + 0f
+ diff2.contents ~ diff1.contents - dpreds.contents
+ diff1.contents ~ diff1.contents ∘ diff1.contents
+ diff2.contents ~ diff2.contents ∘ diff2.contents
+ val rmmod = mmod + 1f
+ normrnd(0, opts.lsgd, rmmod)
+ val mmod2 = mmod + du + rmmod ∘ scale
+ val loss1 = (if (flip) sum(diff1,2).t else sum(diff1)) + (mmod dot (mmod ∘ lambda))
+ val loss2 = (if (flip) sum(diff2,2).t else sum(diff2)) + (mmod2 dot (mmod2 ∘ lambda))
+
+ val accprob = erfc((loss2 - loss1) /scale);
+ val rsel = accprob + 0f
+ rand(rsel)
+ val selector = rsel < accprob
+ mmod ~ (mmod2 ∘ selector) + (mmod ∘ (1f - selector))
+ if (opts.traceConverge) {
+ println("accepted %d %f %f %f" format (if (flip) 1 else 0, mean(selector).dv, mean(loss1).dv, mean(loss2).dv))
+ }
}
def evalfun(sdata0:Mat, user:Mat, ipass:Int, pos:Long):FMat = {
- val sdata = sdata0 - (iavg + avg);
- val preds = DDS(mm, user, sdata);
- val dc = sdata.contents
- val pc = preds.contents
- val diff = dc - pc;
- val vv = diff ddot diff;
- -sqrt(row(vv/sdata.nnz))
+ val sdata = sdata0 - (iavg + avg)
+ val preds = DDS(mm, user, sdata)
+ val dc = sdata.contents
+ val pc = preds.contents
+ val diff = dc - pc
+ val vv = diff ddot diff
+ -sqrt(row(vv/sdata.nnz))
}
}
object SMF {
trait Opts extends FactorModel.Opts {
- var ueps = 1e-10f
- var uconvg = 1e-3f
- var miter = 5
- var lambdau = 5f
- var lambdam = 5f
- var regumean = 0f
- var regmmean = 0f
- var urate = 0.1f
- var lsgd = 0.1f
- var traceConverge = false
- var doUsers = true
- var weightByUser = false
- var aopts:ADAGrad.Opts = null;
- var minv = 1f;
- var maxv = 5f;
-
+ var ueps = 1e-10f
+ var uconvg = 1e-3f
+ var miter = 5
+ var lambdau = 5f
+ var lambdam = 5f
+ var regumean = 0f
+ var regmmean = 0f
+ var urate = 0.1f
+ var lsgd = 0.1f
+ var traceConverge = false
+ var doUsers = true
+ var weightByUser = false
+ var aopts:ADAGrad.Opts = null
+ var minv = 1f
+ var maxv = 5f
+
}
class Options extends Opts {}
@@ -271,15 +271,15 @@ object SMF {
opts.putBack = -1
opts.npasses = 4
opts.lrate = 0.1
- opts.initUval = 0f;
+ opts.initUval = 0f
opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
- val nn = new Learner(
- new MatSource(Array(mat0:Mat), opts),
- new SMF(opts),
- null,
- new Grad(opts),
- null,
- opts)
+ val nn = new Learner(
+ new MatSource(Array(mat0:Mat), opts),
+ new SMF(opts),
+ null,
+ new Grad(opts),
+ null,
+ opts)
(nn, opts)
}
@@ -289,17 +289,17 @@ object SMF {
opts.dim = d
opts.putBack = -1
opts.npasses = 4
- opts.lrate = 0.1;
- opts.initUval = 0f;
- opts.batchSize = math.min(100000, mat0.ncols/30 + 1);
- opts.aopts = opts;
- val nn = new Learner(
- new MatSource(Array(mat0:Mat), opts),
- new SMF(opts),
- null,
- null,
- null,
- opts);
+ opts.lrate = 0.1
+ opts.initUval = 0f
+ opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
+ opts.aopts = opts
+ val nn = new Learner(
+ new MatSource(Array(mat0:Mat), opts),
+ new SMF(opts),
+ null,
+ null,
+ null,
+ opts)
(nn, opts)
}
@@ -309,8 +309,8 @@ object SMF {
opts.dim = d
opts.putBack = 1
opts.npasses = 4
- opts.lrate = 0.1;
- opts.initUval = 0f;
+ opts.lrate = 0.1
+ opts.initUval = 0f
opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
val nn = new Learner(
new MatSource(Array(mat0, user0), opts),
@@ -328,10 +328,10 @@ object SMF {
opts.dim = d
opts.putBack = 1
opts.npasses = 4
- opts.lrate = 0.1;
- opts.initUval = 0f;
- opts.batchSize = math.min(100000, mat0.ncols/30 + 1);
- opts.aopts = opts;
+ opts.lrate = 0.1
+ opts.initUval = 0f
+ opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
+ opts.aopts = opts
val nn = new Learner(
new MatSource(Array(mat0, user0), opts),
new SMF(opts),
@@ -343,23 +343,23 @@ object SMF {
}
def predictor(model0:Model, mat1:Mat, preds:Mat) = {
- class xopts extends Learner.Options with SMF.Opts with MatSource.Opts with Grad.Opts
+ class xopts extends Learner.Options with SMF.Opts with MatSource.Opts with Grad.Opts
val model = model0.asInstanceOf[SMF]
- val nopts = new xopts;
+ val nopts = new xopts
nopts.batchSize = math.min(10000, mat1.ncols/30 + 1)
nopts.putBack = 1
- val newmod = new SMF(nopts);
+ val newmod = new SMF(nopts)
newmod.refresh = false
- newmod.copyFrom(model);
- val mopts = model.opts.asInstanceOf[SMF.Opts];
- nopts.dim = mopts.dim;
- nopts.uconvg = mopts.uconvg;
- nopts.miter = mopts.miter;
- nopts.lambdau = mopts.lambdau;
- nopts.lambdam = mopts.lambdam;
- nopts.regumean = mopts.regumean;
- nopts.doUsers = mopts.doUsers;
- nopts.weightByUser = mopts.weightByUser;
+ newmod.copyFrom(model)
+ val mopts = model.opts.asInstanceOf[SMF.Opts]
+ nopts.dim = mopts.dim
+ nopts.uconvg = mopts.uconvg
+ nopts.miter = mopts.miter
+ nopts.lambdau = mopts.lambdau
+ nopts.lambdam = mopts.lambdam
+ nopts.regumean = mopts.regumean
+ nopts.doUsers = mopts.doUsers
+ nopts.weightByUser = mopts.weightByUser
val nn = new Learner(
new MatSource(Array(mat1, preds), nopts),
newmod,
diff --git a/src/main/scala/BIDMach/models/SVD.scala b/src/main/scala/BIDMach/models/SVD.scala
index b2196583..b77ac790 100755
--- a/src/main/scala/BIDMach/models/SVD.scala
+++ b/src/main/scala/BIDMach/models/SVD.scala
@@ -25,143 +25,143 @@ class SVD(opts:SVD.Opts = new SVD.Options) extends Model(opts) {
var Q:Mat = null; // (Left) Singular vectors
var SV:Mat = null; // Singular values
- var P:Mat = null;
- var R:Mat = null;
- var Mean:Mat = null;
- var batchCount = 0;
- var batchStep = 0;
- var batchSize = 0;
- var meanCount = 0;
- var alpha:Mat = null;
+ var P:Mat = null
+ var R:Mat = null
+ var Mean:Mat = null
+ var batchCount = 0
+ var batchStep = 0
+ var batchSize = 0
+ var meanCount = 0
+ var alpha:Mat = null
def init() = {
- val nfeats = mats(0).nrows;
- batchSize = mats(0).ncols;
- if (refresh) {
- Q = normrnd(0, 1, nfeats, opts.dim); // Randomly initialize Q
-// QRdecompt(Q, Q, null); // Orthonormalize it
- Q ~ Q / sqrt(Q dot Q);
- SV = Q.zeros(1, opts.dim); // Holder for Singular values
- if (opts.subMean) Mean = Q.zeros(nfeats, 1)
- } else {
- Q = modelmats(0);
- SV = modelmats(1);
- if (opts.subMean) Mean = modelmats(2);
- }
- Q = convertMat(Q); // Move to GPU or double if needed
- SV = convertMat(SV);
- if (opts.subMean) {
- Mean = convertMat(Mean);
- setmodelmats(Array(Q, SV, Mean));
- Mean.clear;
- } else {
- setmodelmats(Array(Q, SV));
- }
- P = Q.zeros(Q.nrows, Q.ncols); // Zero P
- R = Q.zeros(opts.dim, opts.dim);
- alpha = Q.zeros(1,1);
+ val nfeats = mats(0).nrows
+ batchSize = mats(0).ncols
+ if (refresh) {
+ Q = normrnd(0, 1, nfeats, opts.dim); // Randomly initialize Q
+// QRdecompt(Q, Q, null); // Orthonormalize it
+ Q ~ Q / sqrt(Q dot Q)
+ SV = Q.zeros(1, opts.dim); // Holder for Singular values
+ if (opts.subMean) Mean = Q.zeros(nfeats, 1)
+ } else {
+ Q = modelmats(0)
+ SV = modelmats(1)
+ if (opts.subMean) Mean = modelmats(2)
+ }
+ Q = convertMat(Q); // Move to GPU or double if needed
+ SV = convertMat(SV)
+ if (opts.subMean) {
+ Mean = convertMat(Mean)
+ setmodelmats(Array(Q, SV, Mean))
+ Mean.clear
+ } else {
+ setmodelmats(Array(Q, SV))
+ }
+ P = Q.zeros(Q.nrows, Q.ncols); // Zero P
+ R = Q.zeros(opts.dim, opts.dim)
+ alpha = Q.zeros(1,1)
- updatemats = Array(P);
- batchCount = 0;
- batchStep = opts.batchesPerUpdate;
+ updatemats = Array(P)
+ batchCount = 0
+ batchStep = opts.batchesPerUpdate
}
def dobatch(mats:Array[Mat], ipass:Int, pos:Long):Unit = {
- val M = mats(0);
+ val M = mats(0)
if (opts.subMean && ipass == 0) {
- meanCount += 1;
- alpha.set(1f/meanCount);
- val mn = mean(M, 2);
+ meanCount += 1
+ alpha.set(1f/meanCount)
+ val mn = mean(M, 2)
Mean ~ Mean + alpha * (mn - Mean);
}
val Qt = Q.t; // Compute P = M * M^t * Q efficiently
- val QtM = Qt * M;
- if (opts.subMean) QtM ~ QtM - (Qt * Mean);
- val PPt = QtM *^ M;
- if (opts.subMean) PPt ~ PPt - (sum(QtM,2) *^ Mean);
+ val QtM = Qt * M
+ if (opts.subMean) QtM ~ QtM - (Qt * Mean)
+ val PPt = QtM *^ M
+ if (opts.subMean) PPt ~ PPt - (sum(QtM,2) *^ Mean)
val PP = PPt.t
if (ipass < opts.miniBatchPasses) {
if (batchCount >= batchStep) {
subspaceIter; // Do minibatch subspace iterations
- batchCount = 0;
- batchStep *= 2;
- P.clear;
+ batchCount = 0
+ batchStep *= 2
+ P.clear
}
}
- P ~ P + PP;
- batchCount += 1;
+ P ~ P + PP
+ batchCount += 1
}
def evalbatch(mat:Array[Mat], ipass:Int, pos:Long):FMat = {
- val M = mat(0);
- if (ogmats != null) {
- val Qt = Q.t;
- val QtM = Qt * M;
- if (opts.subMean) QtM ~ QtM - Qt * Mean;
- ogmats(0) = QtM; // Save right singular vectors
- val PPt = QtM *^ M;
- if (opts.subMean) PPt ~ PPt - QtM *^ Mean;
- P <-- PPt.t
- batchCount = 1;
- }
- SV ~ P ∙ Q; // Estimate the singular values
- val ndiff = opts.evalType match {
- case 0 => {
- norm(P - (SV ∘ Q)).dv / (math.sqrt(P.length)*M.ncols*batchCount); // residual
- }
- case 1 => {
- max(SV, 1e-6f, SV);
- norm((P / SV) - Q).dv / math.sqrt(P.length);
- }
- case 2 => {
- val Qt = Q.t;
- val QtM = Qt * M;
- if (opts.subMean) QtM ~ QtM - (Qt * Mean);
- val diff = sum(snorm(M)) - sum(QtM dotr QtM);
- if (opts.subMean) diff ~ diff + ((Mean ∙ Mean) * M.ncols - (Mean ∙ sum(M, 2)) * 2.0);
- math.sqrt(diff.dv) / math.sqrt(M.length);
- }
- }
+ val M = mat(0)
+ if (ogmats != null) {
+ val Qt = Q.t;
+ val QtM = Qt * M
+ if (opts.subMean) QtM ~ QtM - Qt * Mean
+ ogmats(0) = QtM; // Save right singular vectors
+ val PPt = QtM *^ M
+ if (opts.subMean) PPt ~ PPt - QtM *^ Mean
+ P <-- PPt.t
+ batchCount = 1
+ }
+ SV ~ P ∙ Q; // Estimate the singular values
+ val ndiff = opts.evalType match {
+ case 0 => {
+ norm(P - (SV ∘ Q)).dv / (math.sqrt(P.length)*M.ncols*batchCount); // residual
+ }
+ case 1 => {
+ max(SV, 1e-6f, SV)
+ norm((P / SV) - Q).dv / math.sqrt(P.length);
+ }
+ case 2 => {
+ val Qt = Q.t
+ val QtM = Qt * M
+ if (opts.subMean) QtM ~ QtM - (Qt * Mean)
+ val diff = sum(snorm(M)) - sum(QtM dotr QtM);
+ if (opts.subMean) diff ~ diff + ((Mean ∙ Mean) * M.ncols - (Mean ∙ sum(M, 2)) * 2.0)
+ math.sqrt(diff.dv) / math.sqrt(M.length)
+ }
+ }
row(-ndiff); // return the norm of the residual
}
override def updatePass(ipass:Int) = {
if (ipass < opts.asInstanceOf[Learner.Options].npasses-1) {
- if (ipass >= opts.miniBatchPasses) {
- if (opts.doRayleighRitz && ipass % 2 == 1)
- RayleighRitz;
- else
- subspaceIter;
- }
- P.clear;
- batchCount = 0;
- batchStep = opts.batchesPerUpdate;
+ if (ipass >= opts.miniBatchPasses) {
+ if (opts.doRayleighRitz && ipass % 2 == 1)
+ RayleighRitz
+ else
+ subspaceIter
+ }
+ P.clear
+ batchCount = 0
+ batchStep = opts.batchesPerUpdate
} else {
- SV ~ P ∙ Q;
+ SV ~ P ∙ Q
}
}
def RayleighRitz = {
- R ~ P ^* Q;
- val (evals, evecs) = feig(cpu(R));
- R <-- evecs(?, irow((R.ncols-1) to 0 by -1));
- Q <-- Q * R;
- P <-- P * R;
+ R ~ P ^* Q
+ val (evals, evecs) = feig(cpu(R))
+ R <-- evecs(?, irow((R.ncols-1) to 0 by -1))
+ Q <-- Q * R
+ P <-- P * R
}
def subspaceIter = {
- QRdecompt(P, Q, null);
+ QRdecompt(P, Q, null)
}
}
object SVD {
trait Opts extends Model.Opts {
- var miniBatchPasses = 1;
- var batchesPerUpdate = 10;
- var evalType = 0;
- var doRayleighRitz = true;
- var subMean = true;
+ var miniBatchPasses = 1
+ var batchesPerUpdate = 10
+ var evalType = 0
+ var doRayleighRitz = true
+ var subMean = true
}
class Options extends Opts {}
@@ -169,46 +169,46 @@ object SVD {
class MatOptions extends Learner.Options with SVD.Opts with MatSource.Opts with Batch.Opts
def learner(mat:Mat):(Learner, MatOptions) = {
- val opts = new MatOptions;
- opts.batchSize = math.min(100000, mat.ncols/30 + 1);
- opts.updateAll = true;
- val nn = new Learner(
- new MatSource(Array(mat), opts),
- new SVD(opts),
- null,
- new Batch(opts),
- null,
- opts)
+ val opts = new MatOptions
+ opts.batchSize = math.min(100000, mat.ncols/30 + 1)
+ opts.updateAll = true
+ val nn = new Learner(
+ new MatSource(Array(mat), opts),
+ new SVD(opts),
+ null,
+ new Batch(opts),
+ null,
+ opts)
(nn, opts)
}
class FileOptions extends Learner.Options with SVD.Opts with FileSource.Opts with Batch.Opts
def learner(fnames:String):(Learner, FileOptions) = {
- val opts = new FileOptions;
- opts.batchSize = 10000;
- opts.fnames = List(FileSource.simpleEnum(fnames, 1, 0));
- opts.updateAll = true;
- implicit val threads = threadPool(4);
- val nn = new Learner(
- new FileSource(opts),
- new SVD(opts),
- null,
- new Batch(opts),
- null,
- opts)
+ val opts = new FileOptions
+ opts.batchSize = 10000
+ opts.fnames = List(FileSource.simpleEnum(fnames, 1, 0))
+ opts.updateAll = true
+ implicit val threads = threadPool(4)
+ val nn = new Learner(
+ new FileSource(opts),
+ new SVD(opts),
+ null,
+ new Batch(opts),
+ null,
+ opts)
(nn, opts)
}
- class PredOptions extends Learner.Options with SVD.Opts with MatSource.Opts with MatSink.Opts;
+ class PredOptions extends Learner.Options with SVD.Opts with MatSource.Opts with MatSink.Opts
// This function constructs a predictor from an existing model
def predictor(model:Model, mat1:Mat):(Learner, PredOptions) = {
- val nopts = new PredOptions;
+ val nopts = new PredOptions
nopts.batchSize = math.min(10000, mat1.ncols/30 + 1)
- nopts.dim = model.opts.dim;
- nopts.miniBatchPasses = 0;
- val newmod = new SVD(nopts);
+ nopts.dim = model.opts.dim
+ nopts.miniBatchPasses = 0
+ val newmod = new SVD(nopts)
newmod.refresh = false
model.copyTo(newmod)
val nn = new Learner(
@@ -221,18 +221,18 @@ object SVD {
(nn, nopts)
}
- class FilePredOptions extends Learner.Options with SVD.Opts with FileSource.Opts with FileSink.Opts;
+ class FilePredOptions extends Learner.Options with SVD.Opts with FileSource.Opts with FileSink.Opts
// This function constructs a predictor from an existing model
def predictor(model:Model, infnames:String, outfnames:String):(Learner, FilePredOptions) = {
- val nopts = new FilePredOptions;
- nopts.dim = model.opts.dim;
- nopts.fnames = List(FileSource.simpleEnum(infnames, 1, 0));
- nopts.ofnames = List(FileSource.simpleEnum(outfnames, 1, 0));
- val newmod = new SVD(nopts);
+ val nopts = new FilePredOptions
+ nopts.dim = model.opts.dim
+ nopts.fnames = List(FileSource.simpleEnum(infnames, 1, 0))
+ nopts.ofnames = List(FileSource.simpleEnum(outfnames, 1, 0))
+ val newmod = new SVD(nopts)
newmod.refresh = false
- model.copyTo(newmod);
- implicit val threads = threadPool(4);
+ model.copyTo(newmod)
+ implicit val threads = threadPool(4)
val nn = new Learner(
new FileSource(nopts),
newmod,
diff --git a/src/main/scala/BIDMach/networks/Net.scala b/src/main/scala/BIDMach/networks/Net.scala
index 37ebac18..08f0f411 100644
--- a/src/main/scala/BIDMach/networks/Net.scala
+++ b/src/main/scala/BIDMach/networks/Net.scala
@@ -10,8 +10,8 @@ import BIDMach.mixins._
import BIDMach.models._
import BIDMach._
import BIDMach.networks.layers._
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
/**
* Basic Net class. Learns a supervised map from input blocks to output (target) data blocks.
@@ -19,216 +19,216 @@ import java.util.HashMap;
* The network topology is specified by opts.layers which is a sequence of "NodeSet" objects. There is a NodeSet
* Class for each Layer class, which holds the params for defining that layer. There is also an inputs parameter which points
* to the set of Node instances that mirror the final network structure.
- *
+ *
*/
class Net(override val opts:Net.Opts = new Net.Options) extends Model(opts) {
- var layers:Array[Layer] = null;
- var output_layers:Array[Layer] = null;
- var targmap:Mat = null;
- var mask:Mat = null;
- var bufmat:Mat = null;
- var modelMap:HashMap[String,Int] = null;
- var batchSize = -1;
- var imodel = 0;
- var initialize = false;
+ var layers:Array[Layer] = null
+ var output_layers:Array[Layer] = null
+ var targmap:Mat = null
+ var mask:Mat = null
+ var bufmat:Mat = null
+ var modelMap:HashMap[String,Int] = null
+ var batchSize = -1
+ var imodel = 0
+ var initialize = false
override def init() = {
-// mats = datasource.next;
- var nfeats = mats(0).nrows;
- batchSize = mats(0).ncols
- targmap = if (opts.targmap.asInstanceOf[AnyRef] != null) convertMat(opts.targmap) else null;
- mask = if (opts.dmask.asInstanceOf[AnyRef] != null) convertMat(opts.dmask) else null;
- createLayers;
- if (output_layers == null) output_layers = Array(layers(layers.length-1));
- if (modelMap == null) {
- modelMap = new HashMap[String,Int];
- }
- imodel = 0;
- layers.map((x:Layer) => if (x != null)x.getModelMats(this));
- if (refresh) {
- setmodelmats(new Array[Mat](imodel + modelMap.size));
- }
- if (updatemats == null) updatemats = new Array[Mat](modelmats.length);
- for (i <- 0 until modelmats.length) {
- if (modelmats(i).asInstanceOf[AnyRef] != null) modelmats(i) = convertMat(modelmats(i));
- if (updatemats(i).asInstanceOf[AnyRef] != null) {
- updatemats(i) = convertMat(updatemats(i));
- updatemats(i).clear;
- }
- };
- if (useGPU) copyMats(mats, gmats);
- val pb = putBack;
- putBack = -1;
- initialize = true;
- evalbatch(gmats, 0, 0);
- initialize = false;
- putBack = pb;
-// datasource.reset;
+// mats = datasource.next
+ var nfeats = mats(0).nrows
+ batchSize = mats(0).ncols
+ targmap = if (opts.targmap.asInstanceOf[AnyRef] != null) convertMat(opts.targmap) else null
+ mask = if (opts.dmask.asInstanceOf[AnyRef] != null) convertMat(opts.dmask) else null
+ createLayers
+ if (output_layers == null) output_layers = Array(layers(layers.length-1))
+ if (modelMap == null) {
+ modelMap = new HashMap[String,Int]
+ }
+ imodel = 0
+ layers.map((x:Layer) => if (x != null)x.getModelMats(this))
+ if (refresh) {
+ setmodelmats(new Array[Mat](imodel + modelMap.size))
+ }
+ if (updatemats == null) updatemats = new Array[Mat](modelmats.length)
+ for (i <- 0 until modelmats.length) {
+ if (modelmats(i).asInstanceOf[AnyRef] != null) modelmats(i) = convertMat(modelmats(i))
+ if (updatemats(i).asInstanceOf[AnyRef] != null) {
+ updatemats(i) = convertMat(updatemats(i))
+ updatemats(i).clear
+ }
+ }
+ if (useGPU) copyMats(mats, gmats)
+ val pb = putBack
+ putBack = -1
+ initialize = true
+ evalbatch(gmats, 0, 0)
+ initialize = false
+ putBack = pb
+// datasource.reset
}
def createLayers = {
- val nodes = opts.nodeset.nodes;
- layers = new Array[Layer](opts.nodeset.nnodes);
+ val nodes = opts.nodeset.nodes
+ layers = new Array[Layer](opts.nodeset.nnodes)
for (i <- 0 until opts.nodeset.nnodes) {
- layers(i) = nodes(i).create(this);
- nodes(i).myLayer = layers(i);
+ layers(i) = nodes(i).create(this)
+ nodes(i).myLayer = layers(i)
}
for (i <- 0 until opts.nodeset.nnodes) {
- for (j <- 0 until nodes(i).inputs.length) {
- if (nodes(i).inputs(j) != null) {
- val nodeTerm = nodes(i).inputs(j);
- layers(i).setInput(j, new LayerTerm(nodeTerm.node.myLayer, nodeTerm.term));
+ for (j <- 0 until nodes(i).inputs.length) {
+ if (nodes(i).inputs(j) != null) {
+ val nodeTerm = nodes(i).inputs(j)
+ layers(i).setInput(j, new LayerTerm(nodeTerm.node.myLayer, nodeTerm.term))
}
- }
+ }
}
}
def assignInputs(gmats:Array[Mat], ipass:Int, pos:Long) {
- layers(0).output = gmats(0);
+ layers(0).output = gmats(0)
}
def assignTargets(gmats:Array[Mat], ipass:Int, pos:Long) {
- if (targmap.asInstanceOf[AnyRef] != null) {
- layers(layers.length-1).target = targmap * gmats(0);
- } else if (gmats.length > 1) {
- layers(layers.length-1).target = full(gmats(1));
- }
+ if (targmap.asInstanceOf[AnyRef] != null) {
+ layers(layers.length-1).target = targmap * gmats(0)
+ } else if (gmats.length > 1) {
+ layers(layers.length-1).target = full(gmats(1))
+ }
}
def dobatch(gmats:Array[Mat], ipass:Int, pos:Long):Unit = {
- if (batchSize < 0) batchSize = gmats(0).ncols;
+ if (batchSize < 0) batchSize = gmats(0).ncols
if (batchSize == gmats(0).ncols) { // discard odd-sized minibatches
- assignInputs(gmats, ipass, pos);
- assignTargets(gmats, ipass, pos);
- if (mask.asInstanceOf[AnyRef] != null) {
- modelmats(0) ~ modelmats(0) ∘ mask;
- }
- var i = 0;
- while (i < layers.length) {
- if (opts.debug > 0) {
- println("dobatch forward %d %s" format (i, layers(i).getClass))
- }
- layers(i).forward;
- i += 1;
- }
- var j = 0;
+ assignInputs(gmats, ipass, pos)
+ assignTargets(gmats, ipass, pos)
+ if (mask.asInstanceOf[AnyRef] != null) {
+ modelmats(0) ~ modelmats(0) ∘ mask
+ }
+ var i = 0
+ while (i < layers.length) {
+ if (opts.debug > 0) {
+ println("dobatch forward %d %s" format (i, layers(i).getClass))
+ }
+ layers(i).forward
+ i += 1
+ }
+ var j = 0
while (j < output_layers.length) {
- output_layers(j).deriv.set(1);
- j += 1;
+ output_layers(j).deriv.set(1)
+ j += 1
}
if (opts.aopts == null) {
- for (j <- 0 until updatemats.length) updatemats(j).clear;
+ for (j <- 0 until updatemats.length) updatemats(j).clear
+ }
+ while (i > 1) {
+ i -= 1
+ if (opts.debug > 0) {
+ println("dobatch backward %d %s" format (i, layers(i).getClass))
+ }
+ layers(i).backward(ipass, pos)
+ }
+ if (mask.asInstanceOf[AnyRef] != null) {
+ updatemats(0) ~ updatemats(0) ∘ mask
}
- while (i > 1) {
- i -= 1;
- if (opts.debug > 0) {
- println("dobatch backward %d %s" format (i, layers(i).getClass))
- }
- layers(i).backward(ipass, pos);
- }
- if (mask.asInstanceOf[AnyRef] != null) {
- updatemats(0) ~ updatemats(0) ∘ mask;
- }
}
}
def evalbatch(mats:Array[Mat], ipass:Int, pos:Long):FMat = {
- if (batchSize < 0) batchSize = gmats(0).ncols;
- if (batchSize == gmats(0).ncols) {
- assignInputs(gmats, ipass, pos);
- assignTargets(gmats, ipass, pos);
- if (mask.asInstanceOf[AnyRef] != null) {
- modelmats(0) ~ modelmats(0) ∘ mask;
- }
- var i = 0;
- while (i < layers.length) {
- if (opts.debug > 0) {
- println("evalbatch forward %d %s" format (i, layers(i).getClass))
- }
- layers(i).forward;
- i += 1;
- }
- if (putBack >= 0) {
- output_layers(output_layers.length-1).output.colslice(0, gmats(0).ncols, gmats(1));
- }
- val scores = zeros(output_layers.length, 1);
- var j = 0;
+ if (batchSize < 0) batchSize = gmats(0).ncols
+ if (batchSize == gmats(0).ncols) {
+ assignInputs(gmats, ipass, pos)
+ assignTargets(gmats, ipass, pos)
+ if (mask.asInstanceOf[AnyRef] != null) {
+ modelmats(0) ~ modelmats(0) ∘ mask
+ }
+ var i = 0
+ while (i < layers.length) {
+ if (opts.debug > 0) {
+ println("evalbatch forward %d %s" format (i, layers(i).getClass))
+ }
+ layers(i).forward
+ i += 1
+ }
+ if (putBack >= 0) {
+ output_layers(output_layers.length-1).output.colslice(0, gmats(0).ncols, gmats(1))
+ }
+ val scores = zeros(output_layers.length, 1)
+ var j = 0
while (j < output_layers.length) {
- scores(j) = output_layers(j).score.v;
- if (ogmats != null && j < ogmats.length) ogmats(j) = output_layers(j).output.asMat;
- j += 1;
+ scores(j) = output_layers(j).score.v
+ if (ogmats != null && j < ogmats.length) ogmats(j) = output_layers(j).output.asMat
+ j += 1
}
- scores;
- } else {
- zeros(output_layers.length, 1);
- }
+ scores
+ } else {
+ zeros(output_layers.length, 1)
+ }
}
override def saveMetaData(fname:String) = {
import java.io._
- val str = BIDMat.JSON.toJSON(modelMap, true);
- val writer = new PrintWriter(new File(fname + "metadata.json"));
- writer.print(str);
- writer.close;
+ val str = BIDMat.JSON.toJSON(modelMap, true)
+ val writer = new PrintWriter(new File(fname + "metadata.json"))
+ writer.print(str)
+ writer.close
}
override def loadMetaData(fname:String) = {
import java.io._
- val fr = new BufferedReader(new FileReader(fname+"metadata.json"));
- val strbuf = new StringBuffer;
- var line:String = null;
+ val fr = new BufferedReader(new FileReader(fname+"metadata.json"))
+ val strbuf = new StringBuffer
+ var line:String = null
while ({line = fr.readLine(); line != null}) {
- strbuf.append(line).append("\n");
+ strbuf.append(line).append("\n")
}
- modelMap = JSON.fromJSON(strbuf.toString).asInstanceOf[HashMap[String,Int]];
+ modelMap = JSON.fromJSON(strbuf.toString).asInstanceOf[HashMap[String,Int]]
}
/*
- * Deal with annoying sub-sized minibatches
+ * Deal with annoying sub-sized minibatches
*/
def extendData(mat:Mat, batchSize:Int):Mat = {
- val nrows = mat.nrows;
- val ncols = mat.ncols;
- val bsize = batchSize - ncols;
+ val nrows = mat.nrows
+ val ncols = mat.ncols
+ val bsize = batchSize - ncols
if (bsize > 0) {
- val newGUID = MurmurHash3.mix(MurmurHash3.mix((mat.GUID >> 32).toInt, mat.GUID.toInt),"extendData".##);
- mat match {
- case a:FMat => {if (bufmat.asInstanceOf[AnyRef] == null) bufmat = zeros(nrows, bsize); a \ bufmat}
- case a:DMat => {if (bufmat.asInstanceOf[AnyRef] == null) bufmat = dzeros(nrows, bsize); a \ bufmat}
- case a:IMat => {if (bufmat.asInstanceOf[AnyRef] == null) bufmat = izeros(nrows, bsize); a \ bufmat}
- case a:LMat => {if (bufmat.asInstanceOf[AnyRef] == null) bufmat = lzeros(nrows, bsize); a \ bufmat}
- case a:GMat => {if (bufmat.asInstanceOf[AnyRef] == null) bufmat = gzeros(nrows, bsize); a \ bufmat}
- case a:GDMat => {if (bufmat.asInstanceOf[AnyRef] == null) bufmat = gdzeros(nrows, bsize); a \ bufmat}
- case a:GIMat => {if (bufmat.asInstanceOf[AnyRef] == null) bufmat = gizeros(nrows, bsize); a \ bufmat}
- case a:GLMat => {if (bufmat.asInstanceOf[AnyRef] == null) bufmat = glzeros(nrows, bsize); a \ bufmat}
- case a:SMat => {val b = new SMat(nrows, ncols, a.nnz, a.ir, a.jc, a.data); b.setGUID(newGUID); b}
- case a:SDMat => {val b = new SDMat(nrows, ncols, a.nnz, a.ir, a.jc, a.data); b.setGUID(newGUID); b}
- case a:GSMat => {val b = new GSMat(nrows, ncols, a.nnz, a.ir, a.ic, a.jc, a.data, a.realnnz); b.setGUID(newGUID); b}
- case a:GSDMat => {val b = new GSDMat(nrows, ncols, a.nnz, a.ir, a.ic, a.jc, a.data, a.realnnz); b.setGUID(newGUID); b}
- }
+ val newGUID = MurmurHash3.mix(MurmurHash3.mix((mat.GUID >> 32).toInt, mat.GUID.toInt),"extendData".##)
+ mat match {
+ case a:FMat => {if (bufmat.asInstanceOf[AnyRef] == null) bufmat = zeros(nrows, bsize); a \ bufmat}
+ case a:DMat => {if (bufmat.asInstanceOf[AnyRef] == null) bufmat = dzeros(nrows, bsize); a \ bufmat}
+ case a:IMat => {if (bufmat.asInstanceOf[AnyRef] == null) bufmat = izeros(nrows, bsize); a \ bufmat}
+ case a:LMat => {if (bufmat.asInstanceOf[AnyRef] == null) bufmat = lzeros(nrows, bsize); a \ bufmat}
+ case a:GMat => {if (bufmat.asInstanceOf[AnyRef] == null) bufmat = gzeros(nrows, bsize); a \ bufmat}
+ case a:GDMat => {if (bufmat.asInstanceOf[AnyRef] == null) bufmat = gdzeros(nrows, bsize); a \ bufmat}
+ case a:GIMat => {if (bufmat.asInstanceOf[AnyRef] == null) bufmat = gizeros(nrows, bsize); a \ bufmat}
+ case a:GLMat => {if (bufmat.asInstanceOf[AnyRef] == null) bufmat = glzeros(nrows, bsize); a \ bufmat}
+ case a:SMat => {val b = new SMat(nrows, ncols, a.nnz, a.ir, a.jc, a.data); b.setGUID(newGUID); b}
+ case a:SDMat => {val b = new SDMat(nrows, ncols, a.nnz, a.ir, a.jc, a.data); b.setGUID(newGUID); b}
+ case a:GSMat => {val b = new GSMat(nrows, ncols, a.nnz, a.ir, a.ic, a.jc, a.data, a.realnnz); b.setGUID(newGUID); b}
+ case a:GSDMat => {val b = new GSDMat(nrows, ncols, a.nnz, a.ir, a.ic, a.jc, a.data, a.realnnz); b.setGUID(newGUID); b}
+ }
} else {
- mat;
+ mat
}
}
}
object Net {
trait Opts extends Model.Opts {
- var links:IMat = null;
- var nweight:Float = 0.1f;
- var dropout:Float = 0.5f;
- var predict:Boolean = false;
- var targetNorm:Float = 1f;
- var targmap:Mat = null;
- var dmask:Mat = null;
- var hasBias:Boolean = false;
- var aopts:ADAGrad.Opts = null;
- var nmodelmats = 0;
- var nodeset:NodeSet = null;
- var tmatShape:(Int,Int) => (Array[Int], Array[Int], Array[Int], Array[Int]) = null;
+ var links:IMat = null
+ var nweight:Float = 0.1f
+ var dropout:Float = 0.5f
+ var predict:Boolean = false
+ var targetNorm:Float = 1f
+ var targmap:Mat = null
+ var dmask:Mat = null
+ var hasBias:Boolean = false
+ var aopts:ADAGrad.Opts = null
+ var nmodelmats = 0
+ var nodeset:NodeSet = null
+ var tmatShape:(Int,Int) => (Array[Int], Array[Int], Array[Int], Array[Int]) = null
}
class Options extends Opts {}
@@ -237,12 +237,12 @@ object Net {
/**
* Build a net with a stack of nodes. node(0) is an input node, node(n-1) is a GLM node.
* Intermediate nodes are Linear followed by nonlinear, starting and ending with Linear.
- * First Linear node width is given as an argument, then it tapers off by taper.
+ * First Linear node width is given as an argument, then it tapers off by taper.
*/
def dnodes2(nslabs:Int, width:Int, taper:Float, ntargs:Int, opts:Opts, nonlin:Int = 1):NodeSet = {
- val widths = int(width * (taper ^ row(0 -> (nslabs-1)))) \ ntargs;
- powerNet(widths, opts, 0, nonlin);
+ val widths = int(width * (taper ^ row(0 -> (nslabs-1)))) \ ntargs
+ powerNet(widths, opts, 0, nonlin)
}
/**
@@ -252,8 +252,8 @@ object Net {
*/
def dnodes3(nslabs:Int, width:Int, taper:Float, ntargs:Int, opts:Opts, nonlin:Int = 1):NodeSet = {
- val widths = int(width * (taper ^ row(0 -> (nslabs-1)))) \ ntargs;
- powerNet(widths, opts, 1, nonlin);
+ val widths = int(width * (taper ^ row(0 -> (nslabs-1)))) \ ntargs
+ powerNet(widths, opts, 1, nonlin)
}
/**
@@ -263,8 +263,8 @@ object Net {
*/
def dnodes4(nslabs:Int, width:Int, taper:Float, ntargs:Int, opts:Opts, nonlin:Int = 1):NodeSet = {
- val widths = int(width * (taper ^ row(0 -> (nslabs-1)))) \ ntargs;
- powerNet(widths, opts, 2, nonlin);
+ val widths = int(width * (taper ^ row(0 -> (nslabs-1)))) \ ntargs
+ powerNet(widths, opts, 2, nonlin)
}
/**
@@ -276,78 +276,78 @@ object Net {
*/
def powerNet(widths:IMat, opts:Opts, addons:Int, nonlin:Int = 1):NodeSet = {
- val thickness = 2 + addons;
+ val thickness = 2 + addons
val depth = 3 + (widths.length - 1) * thickness;
- val nodes = new NodeSet(depth);
- nodes(0) = new InputNode;
- nodes(1) = new LinNode{inputs(0) = nodes(0); outdim = widths(0); hasBias = opts.hasBias; aopts = opts.aopts; tmatShape = opts.tmatShape};
+ val nodes = new NodeSet(depth)
+ nodes(0) = new InputNode
+ nodes(1) = new LinNode{inputs(0) = nodes(0); outdim = widths(0); hasBias = opts.hasBias; aopts = opts.aopts; tmatShape = opts.tmatShape}
for (i <- 2 until depth - 1) {
- ((i-1) % thickness) match {
- case 0 => {
- val w = widths((i-1)/thickness);
- nodes(i) = new LinNode{inputs(0) = nodes(i-1); outdim = w; hasBias = opts.hasBias; aopts = opts.aopts;};
- }
- case 1 => {
- nonlin match {
- case 1 => nodes(i) = new TanhNode{inputs(0) = nodes(i-1)};
- case 2 => nodes(i) = new SigmoidNode{inputs(0) = nodes(i-1)};
- case 3 => nodes(i) = new RectNode{inputs(0) = nodes(i-1)};
- case 4 => nodes(i) = new SoftplusNode{inputs(0) = nodes(i-1)};
- }
- }
- case 2 => {
- nodes(i) = new DropoutNode{inputs(0) = nodes(i-1); frac = opts.dropout};
- }
- case 3 => {
- nodes(i) = new NormNode{inputs(0) = nodes(i-1); targetNorm = opts.targetNorm; weight = opts.nweight};
- }
- }
+ ((i-1) % thickness) match {
+ case 0 => {
+ val w = widths((i-1)/thickness)
+ nodes(i) = new LinNode{inputs(0) = nodes(i-1); outdim = w; hasBias = opts.hasBias; aopts = opts.aopts;}
+ }
+ case 1 => {
+ nonlin match {
+ case 1 => nodes(i) = new TanhNode{inputs(0) = nodes(i-1)}
+ case 2 => nodes(i) = new SigmoidNode{inputs(0) = nodes(i-1)}
+ case 3 => nodes(i) = new RectNode{inputs(0) = nodes(i-1)}
+ case 4 => nodes(i) = new SoftplusNode{inputs(0) = nodes(i-1)}
+ }
+ }
+ case 2 => {
+ nodes(i) = new DropoutNode{inputs(0) = nodes(i-1); frac = opts.dropout}
+ }
+ case 3 => {
+ nodes(i) = new NormNode{inputs(0) = nodes(i-1); targetNorm = opts.targetNorm; weight = opts.nweight}
+ }
+ }
}
- nodes(depth-1) = new GLMNode{inputs(0) = nodes(depth-2); links = opts.links};
- nodes;
+ nodes(depth-1) = new GLMNode{inputs(0) = nodes(depth-2); links = opts.links}
+ nodes
}
def powerShape(tailHeight:Float, power:Float)(headCount:Int, nfeats:Int):(Array[Int], Array[Int], Array[Int], Array[Int]) = {
- powerShape(tailHeight, power, true)(headCount, nfeats);
+ powerShape(tailHeight, power, true)(headCount, nfeats)
}
def powerShape(tailHeight:Float)(headCount:Int, nfeats:Int):(Array[Int], Array[Int], Array[Int], Array[Int]) = {
- powerShape(tailHeight, 1f, true)(headCount, nfeats);
+ powerShape(tailHeight, 1f, true)(headCount, nfeats)
}
def powerShape(tailHeight:Float, power:Float, leftAlign:Boolean)(headCount:Int, nfeats:Int):(Array[Int], Array[Int], Array[Int], Array[Int]) = {
- var nblocks = 1;
- var tc = tailHeight;
- var ymin = 0;
+ var nblocks = 1
+ var tc = tailHeight
+ var ymin = 0
while (tc < headCount) {
- val ymax = math.min(headCount, math.round(tc - 1e-5f));
- if (ymax - ymin > 0) nblocks += 1;
- ymin = ymax;
- tc *= 2;
+ val ymax = math.min(headCount, math.round(tc - 1e-5f))
+ if (ymax - ymin > 0) nblocks += 1
+ ymin = ymax
+ tc *= 2
}
- val y = new Array[Int](nblocks);
- val x = new Array[Int](nblocks);
- val h = new Array[Int](nblocks);
- val w = new Array[Int](nblocks);
- val ratio = math.pow(0.5, power);
- var xmax = nfeats;
- ymin = 0;
- tc = tailHeight;
- var i = 0;
+ val y = new Array[Int](nblocks)
+ val x = new Array[Int](nblocks)
+ val h = new Array[Int](nblocks)
+ val w = new Array[Int](nblocks)
+ val ratio = math.pow(0.5, power)
+ var xmax = nfeats
+ ymin = 0
+ tc = tailHeight
+ var i = 0
while (i < nblocks) {
- val newx = (xmax * ratio).toInt;
+ val newx = (xmax * ratio).toInt
val xmin = if (leftAlign) 0 else newx;
- val ymax = math.min(headCount, math.round(tc - 1e-5f));
+ val ymax = math.min(headCount, math.round(tc - 1e-5f))
if (ymax - ymin > 0) {
- x(i) = xmin;
- y(i) = ymin;
- w(i) = xmax - xmin;
- h(i) = ymax - ymin;
- i += 1;
+ x(i) = xmin
+ y(i) = ymin
+ w(i) = xmax - xmin
+ h(i) = ymax - ymin
+ i += 1
}
- xmax = newx;
- ymin = ymax;
- tc *= 2;
+ xmax = newx
+ ymin = ymax
+ tc *= 2
}
(y, x, h, w)
}
@@ -367,98 +367,98 @@ object Net {
class LearnOptions extends Learner.Options with Net.Opts with MatSource.Opts with ADAGrad.Opts with L1Regularizer.Opts
def learner(mat0:Mat, targ:Mat) = {
- val opts = new LearnOptions;
+ val opts = new LearnOptions
if (opts.links == null) {
- opts.links = izeros(1,targ.nrows);
- opts.links.set(1);
+ opts.links = izeros(1,targ.nrows)
+ opts.links.set(1)
}
- opts.batchSize = math.min(100000, mat0.ncols/30 + 1);
- val nn = new Learner(
- new MatSource(Array(mat0, targ), opts),
- new Net(opts),
- Array(new L1Regularizer(opts)),
- new ADAGrad(opts),
- null,
- opts)
+ opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
+ val nn = new Learner(
+ new MatSource(Array(mat0, targ), opts),
+ new Net(opts),
+ Array(new L1Regularizer(opts)),
+ new ADAGrad(opts),
+ null,
+ opts)
(nn, opts)
}
def learnerX(mat0:Mat, targ:Mat) = {
- val opts = new LearnOptions;
- opts.links = izeros(1,targ.nrows);
- opts.links.set(1);
+ val opts = new LearnOptions
+ opts.links = izeros(1,targ.nrows)
+ opts.links.set(1)
opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
- val nn = new Learner(
- new MatSource(Array(mat0, targ), opts),
- new Net(opts),
- null,
- null,
- null,
- opts)
+ val nn = new Learner(
+ new MatSource(Array(mat0, targ), opts),
+ new Net(opts),
+ null,
+ null,
+ null,
+ opts)
(nn, opts)
}
class FDSopts extends Learner.Options with Net.Opts with FileSource.Opts with ADAGrad.Opts with L1Regularizer.Opts
def learner(fn1:String, fn2:String):(Learner, FDSopts) = learner(List(FileSource.simpleEnum(fn1,1,0),
- FileSource.simpleEnum(fn2,1,0)));
+ FileSource.simpleEnum(fn2,1,0)))
- def learner(fn1:String):(Learner, FDSopts) = learner(List(FileSource.simpleEnum(fn1,1,0)));
+ def learner(fn1:String):(Learner, FDSopts) = learner(List(FileSource.simpleEnum(fn1,1,0)))
def learner(fnames:List[(Int)=>String]):(Learner, FDSopts) = {
- val opts = new FDSopts;
+ val opts = new FDSopts
opts.fnames = fnames
- opts.batchSize = 100000;
- opts.eltsPerSample = 500;
- implicit val threads = threadPool(4);
+ opts.batchSize = 100000
+ opts.eltsPerSample = 500
+ implicit val threads = threadPool(4)
val ds = new FileSource(opts)
- val nn = new Learner(
- ds,
- new Net(opts),
- Array(new L1Regularizer(opts)),
- new ADAGrad(opts),
- null,
- opts)
+ val nn = new Learner(
+ ds,
+ new Net(opts),
+ Array(new L1Regularizer(opts)),
+ new ADAGrad(opts),
+ null,
+ opts)
(nn, opts)
}
def learnerX(fn1:String, fn2:String):(Learner, FDSopts) = learnerX(List(FileSource.simpleEnum(fn1,1,0),
- FileSource.simpleEnum(fn2,1,0)));
+ FileSource.simpleEnum(fn2,1,0)))
- def learnerX(fn1:String):(Learner, FDSopts) = learnerX(List(FileSource.simpleEnum(fn1,1,0)));
+ def learnerX(fn1:String):(Learner, FDSopts) = learnerX(List(FileSource.simpleEnum(fn1,1,0)))
def learnerX(fnames:List[(Int)=>String]):(Learner, FDSopts) = {
val opts = new FDSopts
opts.fnames = fnames
- opts.batchSize = 100000;
- opts.eltsPerSample = 500;
- val ds = new FileSource(opts);
+ opts.batchSize = 100000
+ opts.eltsPerSample = 500
+ val ds = new FileSource(opts)
// val net = dnodes(3, 0, 1f, opts.targmap.nrows, opts) // default to a 3-node network
- val nn = new Learner(ds,
- new Net(opts),
- null,
- null,
- null,
- opts)
+ val nn = new Learner(ds,
+ new Net(opts),
+ null,
+ null,
+ null,
+ opts)
(nn, opts)
}
- class PredOptions extends Learner.Options with Net.Opts with MatSource.Opts with MatSink.Opts;
+ class PredOptions extends Learner.Options with Net.Opts with MatSource.Opts with MatSink.Opts
def predictor(model0:Model, mat0:Mat):(Learner, PredOptions) = {
- val model = model0.asInstanceOf[Net];
- val mopts = model.opts;
- val opts = new PredOptions;
- opts.batchSize = math.min(10000, mat0.ncols/30 + 1);
- opts.links = mopts.links;
- opts.nodeset = mopts.nodeset.clone;
+ val model = model0.asInstanceOf[Net]
+ val mopts = model.opts
+ val opts = new PredOptions
+ opts.batchSize = math.min(10000, mat0.ncols/30 + 1)
+ opts.links = mopts.links
+ opts.nodeset = mopts.nodeset.clone
opts.nodeset.nodes.foreach({case nx:LinNode => nx.aopts = null; case _ => Unit})
- opts.hasBias = mopts.hasBias;
- opts.dropout = 1f;
+ opts.hasBias = mopts.hasBias
+ opts.dropout = 1f
- val newmod = new Net(opts);
- newmod.refresh = false;
+ val newmod = new Net(opts)
+ newmod.refresh = false
newmod.copyFrom(model)
val nn = new Learner(
new MatSource(Array(mat0), opts),
@@ -466,52 +466,52 @@ object Net {
null,
null,
new MatSink(opts),
- opts);
+ opts)
(nn, opts)
}
- class FilePredOptions extends Learner.Options with Net.Opts with FileSource.Opts with FileSink.Opts;
+ class FilePredOptions extends Learner.Options with Net.Opts with FileSource.Opts with FileSink.Opts
def predictor(model0:Model, infn:String, outfn:String):(Learner, FilePredOptions) = {
- predictor(model0, List(FileSource.simpleEnum(infn,1,0)), List(FileSource.simpleEnum(outfn,1,0)));
+ predictor(model0, List(FileSource.simpleEnum(infn,1,0)), List(FileSource.simpleEnum(outfn,1,0)))
}
def predictor(model0:Model, infiles:List[(Int)=>String], outfiles:List[(Int)=>String]):(Learner, FilePredOptions) = {
- val model = model0.asInstanceOf[Net];
- val mopts = model.opts;
- val opts = new FilePredOptions;
- opts.fnames = infiles;
- opts.ofnames = outfiles;
- opts.links = mopts.links;
- opts.nodeset = mopts.nodeset.clone;
+ val model = model0.asInstanceOf[Net]
+ val mopts = model.opts
+ val opts = new FilePredOptions
+ opts.fnames = infiles
+ opts.ofnames = outfiles
+ opts.links = mopts.links
+ opts.nodeset = mopts.nodeset.clone
opts.nodeset.nodes.foreach({case nx:LinNode => nx.aopts = null; case _ => Unit})
- opts.hasBias = mopts.hasBias;
- opts.dropout = 1f;
+ opts.hasBias = mopts.hasBias
+ opts.dropout = 1f
- val newmod = new Net(opts);
- newmod.refresh = false;
- newmod.copyFrom(model);
- val dsource = new FileSource(opts);
- val dsink = new FileSink(opts);
+ val newmod = new Net(opts)
+ newmod.refresh = false
+ newmod.copyFrom(model)
+ val dsource = new FileSource(opts)
+ val dsink = new FileSink(opts)
val nn = new Learner(
dsource,
newmod,
null,
null,
dsink,
- opts);
+ opts)
(nn, opts)
}
- class LearnParOptions extends ParLearner.Options with Net.Opts with FileSource.Opts with ADAGrad.Opts with L1Regularizer.Opts;
+ class LearnParOptions extends ParLearner.Options with Net.Opts with FileSource.Opts with ADAGrad.Opts with L1Regularizer.Opts
def learnPar(fn1:String, fn2:String):(ParLearnerF, LearnParOptions) = {learnPar(List(FileSource.simpleEnum(fn1,1,0), FileSource.simpleEnum(fn2,1,0)))}
def learnPar(fnames:List[(Int) => String]):(ParLearnerF, LearnParOptions) = {
- val opts = new LearnParOptions;
- opts.batchSize = 10000;
- opts.lrate = 1f;
- opts.fnames = fnames;
+ val opts = new LearnParOptions
+ opts.batchSize = 10000
+ opts.lrate = 1f
+ opts.fnames = fnames
implicit val threads = threadPool(4)
val nn = new ParLearnerF(
new FileSource(opts),
diff --git a/src/main/scala/BIDMach/networks/NextWord.scala b/src/main/scala/BIDMach/networks/NextWord.scala
index a428b3ba..f9158ba5 100644
--- a/src/main/scala/BIDMach/networks/NextWord.scala
+++ b/src/main/scala/BIDMach/networks/NextWord.scala
@@ -11,119 +11,119 @@ import BIDMach.networks.layers._
import BIDMach._
/*
- * LSTM next Word prediction model, which comprises a rectangular grid of LSTM compound layers.
+ * LSTM next Word prediction model, which comprises a rectangular grid of LSTM compound layers.
*/
class NextWord(override val opts:NextWord.Opts = new NextWord.Options) extends Net(opts) {
- var shiftedInds:Mat = null;
- var leftedge:Layer = null;
- var height = 0;
- var width = 0;
- val preamble_size = 3;
+ var shiftedInds:Mat = null
+ var leftedge:Layer = null
+ var height = 0
+ var width = 0
+ val preamble_size = 3
// define some getters/setters on the grid
- def getlayer(j:Int, i:Int):Layer = layers(j + i * width + preamble_size);
- def setlayer(j:Int, i:Int, ll:Layer) = {layers(j + i * width + preamble_size) = ll};
-
- override def createLayers = {
- height = opts.height;
- width = opts.width;
+ def getlayer(j:Int, i:Int):Layer = layers(j + i * width + preamble_size)
+ def setlayer(j:Int, i:Int, ll:Layer) = {layers(j + i * width + preamble_size) = ll}
+
+ override def createLayers = {
+ height = opts.height
+ width = opts.width;
layers = if (opts.allout) {
- new Array[Layer]((height+2) * width + preamble_size);
+ new Array[Layer]((height+2) * width + preamble_size);
} else {
- new Array[Layer]((height) * width + preamble_size + 2);
+ new Array[Layer]((height) * width + preamble_size + 2)
}
leftedge = InputLayer(this); // dummy layer, left edge of zeros
// the preamble (bottom) layers
- layers(0) = InputLayer(this);
- val lopts1 = new LinNode{modelName = "inWordMap"; outdim = opts.dim; aopts = opts.aopts};
- layers(1) = LinLayer(this, lopts1).setInput(0, layers(0));
- val spopts = new SplitHorizNode{nparts = opts.width};
- layers(2) = SplitHorizLayer(this, spopts).setInput(0, layers(1));
+ layers(0) = InputLayer(this)
+ val lopts1 = new LinNode{modelName = "inWordMap"; outdim = opts.dim; aopts = opts.aopts}
+ layers(1) = LinLayer(this, lopts1).setInput(0, layers(0))
+ val spopts = new SplitHorizNode{nparts = opts.width}
+ layers(2) = SplitHorizLayer(this, spopts).setInput(0, layers(1))
// the main grid
for (i <- 0 until height) {
- val lopts = new LSTMNode;
- lopts.dim = opts.dim;
- lopts.aopts = opts.aopts;
- lopts.kind = opts.kind;
- lopts.prefix = if (opts.bylevel) "level_%d" format i; else ""
- lopts.constructGraph;
+ val lopts = new LSTMNode
+ lopts.dim = opts.dim
+ lopts.aopts = opts.aopts
+ lopts.kind = opts.kind
+ lopts.prefix = if (opts.bylevel) "level_%d" format i; else ""
+ lopts.constructGraph
for (j <- 0 until width) {
- val layer = LSTMLayer(this, lopts);
+ val layer = LSTMLayer(this, lopts)
if (i > 0) {
layer.setInput(2, getlayer(j, i-1)); // in most layers, input 2 (i) is from layer below
} else {
- layer.setInput(2, layers(2)(j)); // on bottom layer, input 2 is j^th output from the split layer
+ layer.setInput(2, layers(2)(j)); // on bottom layer, input 2 is j^th output from the split layer
}
if (j > 0) {
layer.setInput(0, getlayer(j-1, i)); // input 0 (prev_h) is layer to the left, output 0 (h)
layer.setInput(1, getlayer(j-1, i)(1)); // input 1 (prev_c) is layer to the left, output 1 (c)
} else {
layer.setInput(0, leftedge); // in first column, just use dummy (zeros) input
- layer.setInput(1, leftedge);
+ layer.setInput(1, leftedge)
}
- setlayer(j, i, layer);
+ setlayer(j, i, layer)
}
}
// the top layers
- val lopts2 = new LinNode{modelName = "outWordMap"; outdim = opts.nvocab; aopts = opts.aopts};
- val sopts = new SoftmaxOutputNode;
+ val lopts2 = new LinNode{modelName = "outWordMap"; outdim = opts.nvocab; aopts = opts.aopts}
+ val sopts = new SoftmaxOutputNode
if (opts.allout) {
- output_layers = new Array[Layer](width);
- for (j <- 0 until width) {
- val linlayer = LinLayer(this, lopts2).setInput(0, getlayer(j, height - 1));
- setlayer(j, height, linlayer);
- val smlayer = SoftmaxOutputLayer(this, sopts).setInput(0, linlayer);
- setlayer(j, height+1, smlayer);
- output_layers(j) = smlayer;
- }
+ output_layers = new Array[Layer](width)
+ for (j <- 0 until width) {
+ val linlayer = LinLayer(this, lopts2).setInput(0, getlayer(j, height - 1))
+ setlayer(j, height, linlayer);
+ val smlayer = SoftmaxOutputLayer(this, sopts).setInput(0, linlayer)
+ setlayer(j, height+1, smlayer)
+ output_layers(j) = smlayer
+ }
} else {
- val linlayer = LinLayer(this, lopts2).setInput(0, getlayer(width-1, height - 1));
- layers(width*height+preamble_size) = linlayer;
+ val linlayer = LinLayer(this, lopts2).setInput(0, getlayer(width-1, height - 1))
+ layers(width*height+preamble_size) = linlayer
val smlayer = SoftmaxOutputLayer(this, sopts).setInput(0, linlayer);
layers(width*height+preamble_size+1) = smlayer;
- output_layers = Array(smlayer);
+ output_layers = Array(smlayer)
}
}
override def assignInputs(gmats:Array[Mat], ipass:Int, pos:Long) {
if (batchSize % opts.width != 0) throw new RuntimeException("LSTMwordPredict error: batch size must be a multiple of network width %d %d" format (batchSize, opts.width))
- val nr = batchSize / opts.width;
- val in = gmats(0).view(opts.width, nr).t.view(1, batchSize);
- layers(0).output = oneHot(in, opts.nvocab);
+ val nr = batchSize / opts.width
+ val in = gmats(0).view(opts.width, nr).t.view(1, batchSize)
+ layers(0).output = oneHot(in, opts.nvocab)
if (leftedge.output.asInstanceOf[AnyRef] == null) {
- leftedge.output = convertMat(zeros(opts.dim, nr));
+ leftedge.output = convertMat(zeros(opts.dim, nr))
}
}
override def assignTargets(gmats:Array[Mat], ipass:Int, pos:Long) {
- val nr = batchSize / opts.width;
- val in0 = gmats(0);
- if (shiftedInds.asInstanceOf[AnyRef] == null) shiftedInds = convertMat(irow(1->in0.ncols) \ (in0.ncols-1));
- val inshift = in0(0, shiftedInds);
- val in = inshift.view(opts.width, nr).t;
+ val nr = batchSize / opts.width
+ val in0 = gmats(0)
+ if (shiftedInds.asInstanceOf[AnyRef] == null) shiftedInds = convertMat(irow(1->in0.ncols) \ (in0.ncols-1))
+ val inshift = in0(0, shiftedInds)
+ val in = inshift.view(opts.width, nr).t
if (opts.allout) {
- for (j <- 0 until opts.width) {
- val incol = in.colslice(j,j+1).t;
- getlayer(j, height+1).target = if (targmap.asInstanceOf[AnyRef] != null) targmap * incol; else incol;
- }
+ for (j <- 0 until opts.width) {
+ val incol = in.colslice(j,j+1).t
+ getlayer(j, height+1).target = if (targmap.asInstanceOf[AnyRef] != null) targmap * incol; else incol
+ }
} else {
- val incol = in.colslice(opts.width-1, opts.width).t;
- layers(height*width + preamble_size + 1).target = if (targmap.asInstanceOf[AnyRef] != null) targmap * incol; else incol;
+ val incol = in.colslice(opts.width-1, opts.width).t
+ layers(height*width + preamble_size + 1).target = if (targmap.asInstanceOf[AnyRef] != null) targmap * incol; else incol
}
}
}
object NextWord {
trait Opts extends Net.Opts {
- var width = 1;
- var height = 1;
- var nvocab = 100000;
- var kind = 0;
- var allout = true;
- var bylevel = true;
+ var width = 1
+ var height = 1
+ var nvocab = 100000
+ var kind = 0
+ var allout = true
+ var bylevel = true
}
class Options extends Opts {}
@@ -143,49 +143,49 @@ object NextWord {
class LearnOptions extends Learner.Options with NextWord.Opts with MatSource.Opts with ADAGrad.Opts with L1Regularizer.Opts
def learner(mat0:Mat) = {
- val opts = new LearnOptions;
- opts.batchSize = math.min(100000, mat0.ncols/30 + 1);
- val nn = new Learner(
- new MatSource(Array(mat0), opts),
- new NextWord(opts),
- Array(new L1Regularizer(opts)),
- new ADAGrad(opts),
- null,
- opts)
+ val opts = new LearnOptions
+ opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
+ val nn = new Learner(
+ new MatSource(Array(mat0), opts),
+ new NextWord(opts),
+ Array(new L1Regularizer(opts)),
+ new ADAGrad(opts),
+ null,
+ opts)
(nn, opts)
}
def learnerX(mat0:Mat) = {
- val opts = new LearnOptions;
- opts.batchSize = math.min(100000, mat0.ncols/30 + 1);
- val nn = new Learner(
- new MatSource(Array(mat0), opts),
- new NextWord(opts),
- null,
- null,
- null,
- opts)
+ val opts = new LearnOptions
+ opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
+ val nn = new Learner(
+ new MatSource(Array(mat0), opts),
+ new NextWord(opts),
+ null,
+ null,
+ null,
+ opts)
(nn, opts)
}
class FDSopts extends Learner.Options with NextWord.Opts with FileSource.Opts with ADAGrad.Opts with L1Regularizer.Opts
- def learner(fn1:String):(Learner, FDSopts) = learner(List(FileSource.simpleEnum(fn1,1,0)));
+ def learner(fn1:String):(Learner, FDSopts) = learner(List(FileSource.simpleEnum(fn1,1,0)))
def learner(fnames:List[(Int)=>String]):(Learner, FDSopts) = {
- val opts = new FDSopts;
+ val opts = new FDSopts
opts.fnames = fnames
- opts.batchSize = 100000;
- opts.eltsPerSample = 500;
- implicit val threads = threadPool(4);
+ opts.batchSize = 100000
+ opts.eltsPerSample = 500
+ implicit val threads = threadPool(4)
val ds = new FileSource(opts)
- val nn = new Learner(
- ds,
- new NextWord(opts),
- Array(new L1Regularizer(opts)),
- new ADAGrad(opts),
- null,
- opts)
+ val nn = new Learner(
+ ds,
+ new NextWord(opts),
+ Array(new L1Regularizer(opts)),
+ new ADAGrad(opts),
+ null,
+ opts)
(nn, opts)
}
-}
\ No newline at end of file
+}
\ No newline at end of file
diff --git a/src/main/scala/BIDMach/networks/SeqToSeq.scala b/src/main/scala/BIDMach/networks/SeqToSeq.scala
index 3e29ed61..1598701a 100644
--- a/src/main/scala/BIDMach/networks/SeqToSeq.scala
+++ b/src/main/scala/BIDMach/networks/SeqToSeq.scala
@@ -12,196 +12,196 @@ import BIDMach.networks.layers._
import BIDMach._
/*
- * LSTM next Word prediction model, which comprises a rectangular grid of LSTM compound layers.
+ * LSTM next Word prediction model, which comprises a rectangular grid of LSTM compound layers.
*/
class SeqToSeq(override val opts:SeqToSeq.Opts = new SeqToSeq.Options) extends Net(opts) {
- var PADrow:Mat = null;
- var OOVelem:Mat = null;
- var leftEdge:Layer = null;
- var leftStart:Mat = null;
- var dstxdata:Mat = null;
- var dstxdata0:Mat = null;
- var srcGrid:LayerMat = null;
- var dstGrid:LayerMat = null;
- var srcGridOpts:LSTMNode.GridOpts = null;
- var dstGridOpts:LSTMNode.GridOpts = null;
- var height = 0;
- var inwidth = 0;
- var outwidth = 0;
- var width = 0;
- var srcn = 0;
- var dstxn = 0;
- var dstyn = 0;
- val preamble_rows = 2;
-
- override def createLayers = {
- height = opts.height;
- inwidth = opts.inwidth;
- outwidth = opts.outwidth;
+ var PADrow:Mat = null
+ var OOVelem:Mat = null
+ var leftEdge:Layer = null
+ var leftStart:Mat = null
+ var dstxdata:Mat = null
+ var dstxdata0:Mat = null
+ var srcGrid:LayerMat = null
+ var dstGrid:LayerMat = null
+ var srcGridOpts:LSTMNode.GridOpts = null
+ var dstGridOpts:LSTMNode.GridOpts = null
+ var height = 0
+ var inwidth = 0
+ var outwidth = 0
+ var width = 0
+ var srcn = 0
+ var dstxn = 0
+ var dstyn = 0
+ val preamble_rows = 2
+
+ override def createLayers = {
+ height = opts.height
+ inwidth = opts.inwidth;
+ outwidth = opts.outwidth
leftEdge = InputLayer(this); // dummy layer, left edge of zeros
- srcGridOpts = new LSTMNode.GridOpts;
- srcGridOpts.copyFrom(opts);
- srcGridOpts.modelName = "src_level%d";
- srcGridOpts.netType = LSTMNode.gridTypeNoOutput;
- srcGrid = LSTMLayer.grid(this, height, inwidth, srcGridOpts);
- layers = srcGrid.data.filter(_ != null);
- for (i <- 0 until height) srcGrid(i+preamble_rows, 0).setInputs(leftEdge, leftEdge);
+ srcGridOpts = new LSTMNode.GridOpts
+ srcGridOpts.copyFrom(opts)
+ srcGridOpts.modelName = "src_level%d"
+ srcGridOpts.netType = LSTMNode.gridTypeNoOutput
+ srcGrid = LSTMLayer.grid(this, height, inwidth, srcGridOpts)
+ layers = srcGrid.data.filter(_ != null)
+ for (i <- 0 until height) srcGrid(i+preamble_rows, 0).setInputs(leftEdge, leftEdge)
if (! opts.embed) {
- dstGridOpts = new LSTMNode.GridOpts;
- dstGridOpts.copyFrom(opts);
- dstGridOpts.modelName = "dst_level%d";
- dstGridOpts.netType = LSTMNode.gridTypeSoftmaxOutput;
- dstGridOpts.outdim = opts.nvocab;
- dstGrid = LSTMLayer.grid(this, height, outwidth, dstGridOpts);
+ dstGridOpts = new LSTMNode.GridOpts
+ dstGridOpts.copyFrom(opts)
+ dstGridOpts.modelName = "dst_level%d"
+ dstGridOpts.netType = LSTMNode.gridTypeSoftmaxOutput
+ dstGridOpts.outdim = opts.nvocab
+ dstGrid = LSTMLayer.grid(this, height, outwidth, dstGridOpts)
- srcGrid link dstGrid;
- layers = layers ++ dstGrid.data.filter(_ != null);
- output_layers = new Array[Layer](outwidth);
- for (i <- 0 until outwidth) output_layers(i) = dstGrid(dstGrid.nrows-1, i);
+ srcGrid link dstGrid
+ layers = layers ++ dstGrid.data.filter(_ != null)
+ output_layers = new Array[Layer](outwidth)
+ for (i <- 0 until outwidth) output_layers(i) = dstGrid(dstGrid.nrows-1, i)
}
}
def mapOOV(in:Mat) = {
if (OOVelem.asInstanceOf[AnyRef] == null) {
- OOVelem = convertMat(iones(1,1) * opts.OOVsym);
+ OOVelem = convertMat(iones(1,1) * opts.OOVsym)
}
in ~ in + ((in >= opts.nvocab) ∘ (OOVelem - in))
}
override def assignInputs(gmats:Array[Mat], ipass:Int, pos:Long) = {
- val src = gmats(0);
- srcn = src.nnz/src.ncols;
- if (srcn*src.ncols != src.nnz) throw new RuntimeException("SeqToSeq src batch not fixed length");
+ val src = gmats(0)
+ srcn = src.nnz/src.ncols
+ if (srcn*src.ncols != src.nnz) throw new RuntimeException("SeqToSeq src batch not fixed length")
val srcdata = int(src.contents.view(srcn, batchSize).t); // IMat with columns corresponding to word positions, with batchSize rows.
- mapOOV(srcdata);
- val srcmat = oneHot(srcdata.contents, opts.nvocab);
- srcn = math.min(srcn, opts.inwidth);
- if (srcn < inwidth) initPrevCol;
+ mapOOV(srcdata)
+ val srcmat = oneHot(srcdata.contents, opts.nvocab)
+ srcn = math.min(srcn, opts.inwidth)
+ if (srcn < inwidth) initPrevCol
for (i <- 0 until srcn) {
- val cols = srcmat.colslice(i*batchSize, (i+1)*batchSize);
- srcGrid(0, inwidth + i - srcn).output = cols;
+ val cols = srcmat.colslice(i*batchSize, (i+1)*batchSize)
+ srcGrid(0, inwidth + i - srcn).output = cols
}
if (leftEdge.output.asInstanceOf[AnyRef] == null) {
- leftEdge.output = convertMat(zeros(opts.dim \ batchSize));
+ leftEdge.output = convertMat(zeros(opts.dim \ batchSize))
}
if (! opts.embed) {
- val dstx = gmats(1);
- val dstxn0 = dstx.nnz/dstx.ncols;
- if (dstxn0*dstx.ncols != dstx.nnz) throw new RuntimeException("SeqToSeq dstx batch not fixed length");
- val dstxdata0 = int(dstx.contents.view(dstxn0, batchSize).t);
- dstxn = dstxn0 + (if (opts.addStart) 1 else 0);
- if (opts.addStart && (leftStart.asInstanceOf[AnyRef] == null)) {
- leftStart = convertMat(izeros(batchSize, 1));
- }
- val dstxdata = if (opts.addStart) (leftStart \ dstxdata0) else dstxdata0;
- mapOOV(dstxdata);
- val dstxmat = oneHot(dstxdata.contents, opts.nvocab);
+ val dstx = gmats(1)
+ val dstxn0 = dstx.nnz/dstx.ncols
+ if (dstxn0*dstx.ncols != dstx.nnz) throw new RuntimeException("SeqToSeq dstx batch not fixed length");
+ val dstxdata0 = int(dstx.contents.view(dstxn0, batchSize).t)
+ dstxn = dstxn0 + (if (opts.addStart) 1 else 0)
+ if (opts.addStart && (leftStart.asInstanceOf[AnyRef] == null)) {
+ leftStart = convertMat(izeros(batchSize, 1))
+ }
+ val dstxdata = if (opts.addStart) (leftStart \ dstxdata0) else dstxdata0
+ mapOOV(dstxdata)
+ val dstxmat = oneHot(dstxdata.contents, opts.nvocab)
- dstxn = math.min(dstxn, opts.outwidth);
- for (i <- 0 until dstxn) {
- val cols = dstxmat.colslice(i*batchSize, (i+1)*batchSize);
- dstGrid(0, i).output = cols;
- }
+ dstxn = math.min(dstxn, opts.outwidth)
+ for (i <- 0 until dstxn) {
+ val cols = dstxmat.colslice(i*batchSize, (i+1)*batchSize)
+ dstGrid(0, i).output = cols
+ }
}
}
def initPrevCol = {
- for (i <- 0 until height) {
- val leftlayer = srcGrid(i+preamble_rows, inwidth-srcn-1);
- if (leftlayer.output.asInstanceOf[AnyRef] == null) {
- leftlayer.output = convertMat(zeros(opts.dim \ batchSize));
- }
- leftlayer.output.clear;
- if (leftlayer.outputs(1).asInstanceOf[AnyRef] == null) {
- leftlayer.setOutput(1, convertMat(zeros(opts.dim \ batchSize)));
- }
- leftlayer.outputs(1).clear;
- }
+ for (i <- 0 until height) {
+ val leftlayer = srcGrid(i+preamble_rows, inwidth-srcn-1)
+ if (leftlayer.output.asInstanceOf[AnyRef] == null) {
+ leftlayer.output = convertMat(zeros(opts.dim \ batchSize))
+ }
+ leftlayer.output.clear
+ if (leftlayer.outputs(1).asInstanceOf[AnyRef] == null) {
+ leftlayer.setOutput(1, convertMat(zeros(opts.dim \ batchSize)))
+ }
+ leftlayer.outputs(1).clear
+ }
}
override def assignTargets(gmats:Array[Mat], ipass:Int, pos:Long) {
- val dsty = if (gmats.length > 2) gmats(2) else gmats(1);
- val dstyn0 = dsty.nnz/dsty.ncols;
- if (dstyn0*dsty.ncols != dsty.nnz) throw new RuntimeException("SeqToSeq dsty batch not fixed length");
- val dstydata = int(dsty.contents.view(dstyn0, batchSize).t);
- mapOOV(dstydata);
- val dstyn1 = math.min(dstyn0 - (if (opts.addStart) 0 else 1), opts.outwidth);
+ val dsty = if (gmats.length > 2) gmats(2) else gmats(1)
+ val dstyn0 = dsty.nnz/dsty.ncols
+ if (dstyn0*dsty.ncols != dsty.nnz) throw new RuntimeException("SeqToSeq dsty batch not fixed length")
+ val dstydata = int(dsty.contents.view(dstyn0, batchSize).t)
+ mapOOV(dstydata)
+ val dstyn1 = math.min(dstyn0 - (if (opts.addStart) 0 else 1), opts.outwidth)
for (j <- 0 until dstyn1) {
- val incol = if (opts.addStart) dstydata.colslice(j,j+1).t else dstydata.colslice(j+1,j+2).t
- output_layers(j).target = incol;
+ val incol = if (opts.addStart) dstydata.colslice(j,j+1).t else dstydata.colslice(j+1,j+2).t
+ output_layers(j).target = incol
}
if (PADrow.asInstanceOf[AnyRef] == null) {
- PADrow = convertMat(iones(1, batchSize) * opts.PADsym);
+ PADrow = convertMat(iones(1, batchSize) * opts.PADsym)
}
- dstyn = math.min(dstyn1 + 1, opts.outwidth);
+ dstyn = math.min(dstyn1 + 1, opts.outwidth)
if (dstyn1 < opts.outwidth) {
- output_layers(dstyn1).target = PADrow;
+ output_layers(dstyn1).target = PADrow
}
}
override def dobatch(gmats:Array[Mat], ipass:Int, pos:Long):Unit = {
- if (batchSize < 0) batchSize = gmats(0).ncols;
+ if (batchSize < 0) batchSize = gmats(0).ncols
if (batchSize == gmats(0).ncols) { // discard odd-sized minibatches
- assignInputs(gmats, ipass, pos);
- assignTargets(gmats, ipass, pos);
+ assignInputs(gmats, ipass, pos)
+ assignTargets(gmats, ipass, pos)
if (mask.asInstanceOf[AnyRef] != null) {
- modelmats(0) ~ modelmats(0) ∘ mask;
+ modelmats(0) ~ modelmats(0) ∘ mask
}
- val mincol = inwidth - srcn;
- val maxcol = dstxn;
- srcGrid.forward(mincol, inwidth-1, opts.debug);
- dstGrid.forward(0, maxcol-1, opts.debug);
+ val mincol = inwidth - srcn
+ val maxcol = dstxn
+ srcGrid.forward(mincol, inwidth-1, opts.debug)
+ dstGrid.forward(0, maxcol-1, opts.debug)
output_layers.map((layer:Layer) => layer match {
case _:OutputLayer => {}
case _ => {if (layer.deriv.asInstanceOf[AnyRef] != null) layer.deriv.set(1);}
})
- if (opts.aopts == null) updatemats.map(_.clear);
+ if (opts.aopts == null) updatemats.map(_.clear)
- dstGrid.backward(0, maxcol-1, opts.debug, ipass, pos);
- srcGrid.backward(mincol, inwidth-1, opts.debug, ipass, pos);
+ dstGrid.backward(0, maxcol-1, opts.debug, ipass, pos)
+ srcGrid.backward(mincol, inwidth-1, opts.debug, ipass, pos)
}
}
override def evalbatch(mats:Array[Mat], ipass:Int, pos:Long):FMat = {
- if (batchSize < 0) batchSize = gmats(0).ncols;
+ if (batchSize < 0) batchSize = gmats(0).ncols
if (batchSize == gmats(0).ncols) {
- assignInputs(gmats, ipass, pos);
+ assignInputs(gmats, ipass, pos)
if (mask.asInstanceOf[AnyRef] != null) {
- modelmats(0) ~ modelmats(0) ∘ mask;
+ modelmats(0) ~ modelmats(0) ∘ mask
}
val mincol = inwidth - srcn;
- srcGrid.forward(mincol, inwidth-1, opts.debug);
+ srcGrid.forward(mincol, inwidth-1, opts.debug)
if (! opts.embed) {
- val maxcol = dstxn;
- assignTargets(gmats, ipass, pos);
- dstGrid.forward(0, maxcol-1, opts.debug);
- if (putBack >= 0) {
- output_layers(dstxn-1).output.colslice(0, gmats(0).ncols, gmats(1));
- }
- var score = 0f;
- var j = 0;
- while (j < dstxn-1) {
- score += output_layers(j).score.v;
- j += 1;
- }
- row(score/(dstxn-1));
+ val maxcol = dstxn
+ assignTargets(gmats, ipass, pos)
+ dstGrid.forward(0, maxcol-1, opts.debug);
+ if (putBack >= 0) {
+ output_layers(dstxn-1).output.colslice(0, gmats(0).ncols, gmats(1))
+ }
+ var score = 0f
+ var j = 0
+ while (j < dstxn-1) {
+ score += output_layers(j).score.v
+ j += 1
+ }
+ row(score/(dstxn-1))
} else {
- if (ogmats != null) {
- var embedding = srcGrid(height+preamble_rows-1, srcGrid.ncols-1).output.asMat;
- for (j <- 1 until opts.nembed) {
- embedding = embedding on srcGrid(height-j+preamble_rows-1, srcGrid.ncols-1).output.asMat;
- }
- ogmats(0) = embedding;
- }
- zeros(1,1);
+ if (ogmats != null) {
+ var embedding = srcGrid(height+preamble_rows-1, srcGrid.ncols-1).output.asMat
+ for (j <- 1 until opts.nembed) {
+ embedding = embedding on srcGrid(height-j+preamble_rows-1, srcGrid.ncols-1).output.asMat
+ }
+ ogmats(0) = embedding
+ }
+ zeros(1,1)
}
} else {
- zeros(1, 1);
+ zeros(1, 1)
}
}
}
@@ -244,64 +244,64 @@ object SeqToSeq {
class LearnOptions extends Learner.Options with SeqToSeq.Opts with MatSource.Opts with ADAGrad.Opts with L1Regularizer.Opts
def learner(mat0:Mat, mat1:Mat, regularize:Boolean = false) = {
- val opts = new LearnOptions;
- opts.batchSize = 128;
- val nn = new Learner(
- new MatSource(Array(mat0, mat1), opts),
- new SeqToSeq(opts),
- if (regularize) Array(new L1Regularizer(opts)) else null,
- new ADAGrad(opts),
- null,
- opts)
+ val opts = new LearnOptions
+ opts.batchSize = 128
+ val nn = new Learner(
+ new MatSource(Array(mat0, mat1), opts),
+ new SeqToSeq(opts),
+ if (regularize) Array(new L1Regularizer(opts)) else null,
+ new ADAGrad(opts),
+ null,
+ opts)
(nn, opts)
}
def learnerX(mat0:Mat, mat1:Mat) = {
- val opts = new LearnOptions;
- opts.batchSize = math.min(100000, mat0.ncols/30 + 1);
- val nn = new Learner(
- new MatSource(Array(mat0, mat1), opts),
- new SeqToSeq(opts),
- null,
- null,
- null,
- opts)
+ val opts = new LearnOptions
+ opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
+ val nn = new Learner(
+ new MatSource(Array(mat0, mat1), opts),
+ new SeqToSeq(opts),
+ null,
+ null,
+ null,
+ opts)
(nn, opts)
}
class FDSopts extends Learner.Options with SeqToSeq.Opts with FileSource.Opts with ADAGrad.Opts with L1Regularizer.Opts
- def learner(fn1:String, fn2:String, regularize:Boolean, adagrad:Boolean):(Learner, FDSopts) = learner(List(FileSource.simpleEnum(fn1,1,0), FileSource.simpleEnum(fn2,1,0)), regularize, adagrad);
+ def learner(fn1:String, fn2:String, regularize:Boolean, adagrad:Boolean):(Learner, FDSopts) = learner(List(FileSource.simpleEnum(fn1,1,0), FileSource.simpleEnum(fn2,1,0)), regularize, adagrad)
- def learner(fn1:String, fn2:String):(Learner, FDSopts) = learner(List(FileSource.simpleEnum(fn1,1,0), FileSource.simpleEnum(fn2,1,0)), false, true);
+ def learner(fn1:String, fn2:String):(Learner, FDSopts) = learner(List(FileSource.simpleEnum(fn1,1,0), FileSource.simpleEnum(fn2,1,0)), false, true)
- def learnerX(fn1:String, fn2:String):(Learner, FDSopts) = learnerX(List(FileSource.simpleEnum(fn1,1,0), FileSource.simpleEnum(fn2,1,0)));
+ def learnerX(fn1:String, fn2:String):(Learner, FDSopts) = learnerX(List(FileSource.simpleEnum(fn1,1,0), FileSource.simpleEnum(fn2,1,0)))
- def learner(fnames:List[(Int)=>String]):(Learner, FDSopts) = learner(fnames, false, true);
+ def learner(fnames:List[(Int)=>String]):(Learner, FDSopts) = learner(fnames, false, true)
def learner(fnames:List[(Int)=>String], regularize:Boolean, adagrad:Boolean):(Learner, FDSopts) = {
- val opts = new FDSopts;
+ val opts = new FDSopts
opts.fnames = fnames
- opts.batchSize = 128;
- opts.eltsPerSample = 500;
- implicit val threads = threadPool(4);
+ opts.batchSize = 128
+ opts.eltsPerSample = 500
+ implicit val threads = threadPool(4)
val ds = new FileSource(opts)
- val nn = new Learner(
- ds,
- new SeqToSeq(opts),
- if (regularize) Array(new L1Regularizer(opts)) else null,
- if (adagrad) new ADAGrad(opts) else new Grad(opts),
- null,
- opts)
+ val nn = new Learner(
+ ds,
+ new SeqToSeq(opts),
+ if (regularize) Array(new L1Regularizer(opts)) else null,
+ if (adagrad) new ADAGrad(opts) else new Grad(opts),
+ null,
+ opts)
(nn, opts)
}
def learnerX(fnames:List[(Int)=>String]):(Learner, FDSopts) = {
- val opts = new FDSopts;
+ val opts = new FDSopts
opts.fnames = fnames
- opts.batchSize = 128;
- opts.eltsPerSample = 500;
- implicit val threads = threadPool(4);
+ opts.batchSize = 128
+ opts.eltsPerSample = 500
+ implicit val threads = threadPool(4)
val ds = new FileSource(opts)
val nn = new Learner(
ds,
@@ -316,31 +316,31 @@ object SeqToSeq {
class FEopts extends Learner.Options with SeqToSeq.Opts with FileSource.Opts with FileSink.Opts
def embed(model:SeqToSeq, ifname:String, ofname:String):(Learner, FEopts) = {
- val opts = new FEopts;
- opts.copyFrom(model.opts);
- opts.fnames = List(FileSource.simpleEnum(ifname,1,0));
- opts.ofnames = List(FileSource.simpleEnum(ofname,1,0));
- opts.embed = true;
- val newmod = new SeqToSeq(opts);
- newmod.refresh = false;
- model.copyTo(newmod);
- implicit val threads = threadPool(4);
+ val opts = new FEopts
+ opts.copyFrom(model.opts)
+ opts.fnames = List(FileSource.simpleEnum(ifname,1,0))
+ opts.ofnames = List(FileSource.simpleEnum(ofname,1,0))
+ opts.embed = true
+ val newmod = new SeqToSeq(opts)
+ newmod.refresh = false
+ model.copyTo(newmod)
+ implicit val threads = threadPool(4)
val ds = new FileSource(opts)
- val nn = new Learner(
- new FileSource(opts),
- newmod,
- null,
- null,
- new FileSink(opts),
- opts)
+ val nn = new Learner(
+ new FileSource(opts),
+ newmod,
+ null,
+ null,
+ new FileSink(opts),
+ opts)
(nn, opts)
}
def load(fname:String):SeqToSeq = {
- val mm = new SeqToSeq;
- mm.loadMetaData(fname);
- mm.load(fname);
- mm
+ val mm = new SeqToSeq
+ mm.loadMetaData(fname)
+ mm.load(fname)
+ mm
}
}
diff --git a/src/main/scala/BIDMach/networks/Word2Vec.scala b/src/main/scala/BIDMach/networks/Word2Vec.scala
index 0ef8ec00..5facc52d 100644
--- a/src/main/scala/BIDMach/networks/Word2Vec.scala
+++ b/src/main/scala/BIDMach/networks/Word2Vec.scala
@@ -48,379 +48,379 @@ import scala.concurrent.duration.Duration
- maxArraySize(1024^3) the maximum size in words of a model array.
- nHeadTerms(0) the size of the head of the model - these terms are not changed.
- nSlices(1) Process (num) slices of the model on (num) nodes.
- - iSlice(0) which model slice are we processing on this node?
+ - iSlice(0) which model slice are we processing on this node?
*/
class Word2Vec(override val opts:Word2Vec.Opts = new Word2Vec.Options) extends Model(opts) {
- var firstPos = -1L;
- var wordtab:Mat = null;
- var randpermute:Mat = null;
- var ubound:Mat = null;
- var minusone:Mat = null;
- var wordmask:Mat = null;
- var allones:Mat = null;
- var randwords:Mat = null;
- var randsamp:Mat = null;
- var retEvalPos:GMat = null;
- var retEvalNeg:GMat = null;
- var nfeats = 0;
- var ncols = 0;
- var expt = 0f;
- var vexp = 0f;
- var salpha = 0f;
- var maxCols = 0;
- var nmmats = 1;
- var fmm:Array[Array[Float]] = null;
+ var firstPos = -1L
+ var wordtab:Mat = null
+ var randpermute:Mat = null
+ var ubound:Mat = null
+ var minusone:Mat = null
+ var wordmask:Mat = null
+ var allones:Mat = null
+ var randwords:Mat = null
+ var randsamp:Mat = null
+ var retEvalPos:GMat = null
+ var retEvalNeg:GMat = null
+ var nfeats = 0
+ var ncols = 0
+ var expt = 0f
+ var vexp = 0f
+ var salpha = 0f
+ var maxCols = 0
+ var nmmats = 1
+ var fmm:Array[Array[Float]] = null
- var ntimes = 12;
- var times:FMat = null;
- var delays:FMat = null;
+ var ntimes = 12
+ var times:FMat = null
+ var delays:FMat = null
var log:ArrayBuffer[String] = null
val dateFormat = new SimpleDateFormat("hh:mm:ss:SSS")
def addTime(itime:Int, lasti:Int = -1) = {
val t = toc
- times(itime) = t;
+ times(itime) = t
if (itime > 0) {
- delays(itime) += times(itime) - times(itime + lasti);
+ delays(itime) += times(itime) - times(itime + lasti)
}
val today = Calendar.getInstance().getTime()
- log += "Log: %s, GPU %d, event %d" format (dateFormat.format(today), if (useGPU) getGPU else 0, itime);
+ log += "Log: %s, GPU %d, event %d" format (dateFormat.format(today), if (useGPU) getGPU else 0, itime)
}
- var test1:Mat = null;
- var test2:Mat = null;
- var test3:Mat = null;
- var test4:Mat = null;
+ var test1:Mat = null
+ var test2:Mat = null
+ var test3:Mat = null
+ var test4:Mat = null
override def init() = {
- val mats = datasource.next;
- nfeats = opts.vocabSize;
- ncols = mats(0).ncols;
- maxCols = opts.maxArraySize / opts.dim;
- datasource.reset;
- val actualFeats = opts.nHeadTerms + 1 + (nfeats - opts.nHeadTerms - 1) / opts.nSlices; // Number of features on this node.
- nmmats = 1 + (actualFeats - 1)/maxCols; // number of model mats needed
- println("nmmats = %d" format nmmats);
- val offset = if (opts.dualMode) 1 else 0;
+ val mats = datasource.next
+ nfeats = opts.vocabSize
+ ncols = mats(0).ncols
+ maxCols = opts.maxArraySize / opts.dim
+ datasource.reset
+ val actualFeats = opts.nHeadTerms + 1 + (nfeats - opts.nHeadTerms - 1) / opts.nSlices; // Number of features on this node.
+ nmmats = 1 + (actualFeats - 1)/maxCols; // number of model mats needed
+ println("nmmats = %d" format nmmats)
+ val offset = if (opts.dualMode) 1 else 0
if (refresh) {
if (actualFeats <= maxCols) {
- setmodelmats(new Array[Mat](2));
- val mm0 = rand(opts.dim, actualFeats);
- mm0 ~ mm0 - 0.5f;
- mm0 ~ mm0 / opts.dim;
- modelmats(0) = mm0; // syn0 - context model
- modelmats(1) = zeros(opts.dim, actualFeats); // syn1neg - target word model
+ setmodelmats(new Array[Mat](2))
+ val mm0 = rand(opts.dim, actualFeats)
+ mm0 ~ mm0 - 0.5f
+ mm0 ~ mm0 / opts.dim
+ modelmats(0) = mm0; // syn0 - context model
+ modelmats(1) = zeros(opts.dim, actualFeats); // syn1neg - target word model
} else {
- setmodelmats(new Array[Mat](2 * (nmmats + offset)));
+ setmodelmats(new Array[Mat](2 * (nmmats + offset)))
for (i <- 0 until nmmats) {
- val xfeats = if (i < nmmats - 1) maxCols else actualFeats - (nmmats - 1) * maxCols;
- val tmp = rand(opts.dim, xfeats);
- tmp ~ tmp - 0.5f;
- tmp ~ tmp / opts.dim;
- modelmats(2 * (i + offset)) = tmp;
- modelmats(2 * (i + offset) + 1) = zeros(opts.dim, xfeats);
+ val xfeats = if (i < nmmats - 1) maxCols else actualFeats - (nmmats - 1) * maxCols
+ val tmp = rand(opts.dim, xfeats)
+ tmp ~ tmp - 0.5f
+ tmp ~ tmp / opts.dim
+ modelmats(2 * (i + offset)) = tmp;
+ modelmats(2 * (i + offset) + 1) = zeros(opts.dim, xfeats)
}
if (opts.dualMode) {
- modelmats(0) <-- modelmats(2).copy;
- modelmats(1) <-- modelmats(3).copy;
+ modelmats(0) <-- modelmats(2).copy
+ modelmats(1) <-- modelmats(3).copy
}
}
}
modelmats(0) = convertMat(modelmats(0)); // At most the first two will be GPU-based
modelmats(1) = convertMat(modelmats(1));
- val nskip = opts.nskip;
- val nwindow = nskip * 2 + 1;
+ val nskip = opts.nskip
+ val nwindow = nskip * 2 + 1
val skipcol = icol((-nskip) to -1) on icol(1 to nskip)
- expt = 1f / (1f - opts.wexpt);
+ expt = 1f / (1f - opts.wexpt)
wordtab = convertMat(max(0, min(ncols+1, iones(nwindow-1, 1) * irow(1 -> (ncols+1)) + skipcol))); // Indices for convolution matrix
wordmask = convertMat(skipcol * iones(1, ncols)); // columns = distances from center word
randpermute = convertMat(zeros(nwindow-1, ncols)); // holds random values for permuting negative context words
ubound = convertMat(zeros(1, ncols)); // upper bound random matrix
- minusone = convertMat(irow(-1));
- allones = convertMat(iones(1, ncols));
+ minusone = convertMat(irow(-1))
+ allones = convertMat(iones(1, ncols))
randwords = convertMat(zeros(1, (1.01 * opts.nneg * nskip * ncols / opts.nreuse).toInt)); // generates random negative words
randsamp = convertMat(zeros(1, ncols)); // For sub-sampling frequent words
- val gopts = opts.asInstanceOf[ADAGrad.Opts];
- vexp = gopts.vexp.v;
- salpha = opts.wsample * math.log(nfeats).toFloat;
- fmm = new Array[Array[Float]](modelmats.length);
+ val gopts = opts.asInstanceOf[ADAGrad.Opts]
+ vexp = gopts.vexp.v
+ salpha = opts.wsample * math.log(nfeats).toFloat
+ fmm = new Array[Array[Float]](modelmats.length)
if (useGPU) {
- retEvalPos = GMat(1,1);
- retEvalNeg = GMat(1,1);
+ retEvalPos = GMat(1,1)
+ retEvalNeg = GMat(1,1)
} else {
if (Mat.useMKL) {
for (i <- 0 until modelmats.length) {
- fmm(i) = modelmats(i).asInstanceOf[FMat].data;
+ fmm(i) = modelmats(i).asInstanceOf[FMat].data
}
}
}
- times = zeros(1, ntimes);
- delays = zeros(1, ntimes);
- log = ArrayBuffer();
+ times = zeros(1, ntimes)
+ delays = zeros(1, ntimes)
+ log = ArrayBuffer()
}
def dobatch(gmats:Array[Mat], ipass:Int, pos:Long):Unit = {
- addTime(0);
+ addTime(0)
if (gmats(0).ncols == ncols) {
- if (firstPos < 0) firstPos = pos;
- val nsteps = 1f * pos / firstPos;
- val gopts = opts.asInstanceOf[ADAGrad.Opts];
- val lrate = gopts.lrate.dv.toFloat * math.pow(nsteps, - gopts.texp.dv).toFloat;
- val (words, lb, ub, trandwords, goodwords) = wordMats(gmats, ipass, pos);
+ if (firstPos < 0) firstPos = pos
+ val nsteps = 1f * pos / firstPos
+ val gopts = opts.asInstanceOf[ADAGrad.Opts]
+ val lrate = gopts.lrate.dv.toFloat * math.pow(nsteps, - gopts.texp.dv).toFloat
+ val (words, lb, ub, trandwords, goodwords) = wordMats(gmats, ipass, pos)
- val lrpos = lrate.dv.toFloat;
- val lrneg = if (opts.eqPosNeg) lrpos else lrpos/opts.nneg;
- if (opts.nSlices == 1 && nmmats == 1) {
- procPositives(opts.nskip, words, lb, ub, modelmats(1), modelmats(0), lrpos, vexp);
- addTime(8);
- procNegatives(opts.nneg, opts.nreuse, trandwords, goodwords, modelmats(1), modelmats(0), lrneg, vexp);
- addTime(9);
- } else {
- procPositivesSlice(opts.nskip, words, lb, ub, modelmats, lrpos, vexp, opts.iSlice);
- addTime(8);
- procNegativesSlice(opts.nneg, opts.nreuse, trandwords, goodwords, modelmats, lrneg, vexp, opts.iSlice);
- addTime(9);
- }
+ val lrpos = lrate.dv.toFloat
+ val lrneg = if (opts.eqPosNeg) lrpos else lrpos/opts.nneg;
+ if (opts.nSlices == 1 && nmmats == 1) {
+ procPositives(opts.nskip, words, lb, ub, modelmats(1), modelmats(0), lrpos, vexp)
+ addTime(8);
+ procNegatives(opts.nneg, opts.nreuse, trandwords, goodwords, modelmats(1), modelmats(0), lrneg, vexp);
+ addTime(9)
+ } else {
+ procPositivesSlice(opts.nskip, words, lb, ub, modelmats, lrpos, vexp, opts.iSlice)
+ addTime(8);
+ procNegativesSlice(opts.nneg, opts.nreuse, trandwords, goodwords, modelmats, lrneg, vexp, opts.iSlice);
+ addTime(9)
+ }
}
}
def evalbatch(gmats:Array[Mat], ipass:Int, pos:Long):FMat = {
- addTime(0);
- if (gmats(0).ncols == ncols) {
- val (words, lb, ub, trandwords, goodwords) = wordMats(gmats, ipass, pos);
- val (epos, eneg) = if (opts.nSlices == 1 && nmmats == 1) {
- val epos0 = evalPositives(opts.nskip, words, lb, ub, modelmats(1), modelmats(0));
- addTime(10,-3);
- val eneg0 = evalNegatives(opts.nneg, opts.nreuse, trandwords, goodwords, modelmats(1), modelmats(0));
- addTime(11);
- (epos0, eneg0)
- } else {
- val epos0 = evalPositivesSlice(opts.nskip, words, lb, ub, modelmats, opts.iSlice);
- addTime(10,-3);
- val eneg0 = evalNegativesSlice(opts.nneg, opts.nreuse, trandwords, goodwords, modelmats, opts.iSlice);
- addTime(11);
- (epos0, eneg0)
- }
- val score = ((epos + eneg / (if (opts.eqPosNeg) 1 else opts.nneg)) / goodwords.length);
- row(score)
- } else row(0);
+ addTime(0)
+ if (gmats(0).ncols == ncols) {
+ val (words, lb, ub, trandwords, goodwords) = wordMats(gmats, ipass, pos)
+ val (epos, eneg) = if (opts.nSlices == 1 && nmmats == 1) {
+ val epos0 = evalPositives(opts.nskip, words, lb, ub, modelmats(1), modelmats(0))
+ addTime(10,-3)
+ val eneg0 = evalNegatives(opts.nneg, opts.nreuse, trandwords, goodwords, modelmats(1), modelmats(0))
+ addTime(11)
+ (epos0, eneg0)
+ } else {
+ val epos0 = evalPositivesSlice(opts.nskip, words, lb, ub, modelmats, opts.iSlice)
+ addTime(10,-3)
+ val eneg0 = evalNegativesSlice(opts.nneg, opts.nreuse, trandwords, goodwords, modelmats, opts.iSlice)
+ addTime(11)
+ (epos0, eneg0)
+ }
+ val score = ((epos + eneg / (if (opts.eqPosNeg) 1 else opts.nneg)) / goodwords.length)
+ row(score)
+ } else row(0)
}
def wordMats(mats:Array[Mat], ipass:Int, pos:Long):(Mat, Mat, Mat, Mat, Mat) = {
- val wordsens = mats(0);
- val words = if (opts.iflip) wordsens(1,?) else wordsens(0,?);
+ val wordsens = mats(0)
+ val words = if (opts.iflip) wordsens(1,?) else wordsens(0,?)
val wgood = words < opts.vocabSize; // Find OOV words
- addTime(1);
+ addTime(1)
rand(randsamp); // Take a random sample
- val wrat = float(words+1) * salpha;
- wrat ~ sqrt(wrat) + wrat;
- wgood ~ wgood ∘ int(randsamp < wrat);
+ val wrat = float(words+1) * salpha
+ wrat ~ sqrt(wrat) + wrat
+ wgood ~ wgood ∘ int(randsamp < wrat)
words ~ (wgood ∘ (words + 1)) - 1; // Set OOV or skipped samples to -1
- addTime(2);
+ addTime(2)
rand(ubound); // get random upper and lower bounds
- val ubrand = min(opts.nskip, int(ubound * opts.nskip) + 1);
- val lbrand = - ubrand;
- addTime(3);
+ val ubrand = min(opts.nskip, int(ubound * opts.nskip) + 1)
+ val lbrand = - ubrand
+ addTime(3)
val sentencenum = if (opts.iflip) wordsens(0,?) else wordsens(1,?); // Get the nearest sentence boundaries
- val lbsentence = - cumsumByKey(allones, sentencenum) + 1;
- val ubsentence = reverse(cumsumByKey(allones, reverse(sentencenum))) - 1;
+ val lbsentence = - cumsumByKey(allones, sentencenum) + 1
+ val ubsentence = reverse(cumsumByKey(allones, reverse(sentencenum))) - 1
val lb = max(lbrand, lbsentence); // Combine the bounds
- val ub = min(ubrand, ubsentence);
+ val ub = min(ubrand, ubsentence)
test3 = lb
test4 = ub
- addTime(4);
+ addTime(4)
val (trandwords, contextwords) = (words, lb, ub) match {
case (giwords:GIMat, gilb:GIMat, giub:GIMat) => {
- val iwords = minusone \ words \ minusone; // Build a convolution matrix.
- val cwords = iwords(wordtab);
- val pgoodwords = (wordmask >= lb) ∘ (wordmask <= ub) ∘ (cwords >= 0) ∘ (words >= 0); // Find context words satisfying the bound
- // and check that context and center word are good.
- val fgoodwords = float(pgoodwords);
- addTime(5);
-
- test1 = cwords;
+ val iwords = minusone \ words \ minusone; // Build a convolution matrix.
+ val cwords = iwords(wordtab)
+ val pgoodwords = (wordmask >= lb) ∘ (wordmask <= ub) ∘ (cwords >= 0) ∘ (words >= 0); // Find context words satisfying the bound
+ // and check that context and center word are good.
+ val fgoodwords = float(pgoodwords)
+ addTime(5)
+
+ test1 = cwords
- rand(randpermute); // Prepare a random permutation of context words for negative sampling
- randpermute ~ (fgoodwords ∘ (randpermute + 1f)) - 1f; // set the values for bad words to -1.
- val (vv, ii) = sortdown2(randpermute.view(randpermute.length, 1)); // Permute the good words
- val ngood = sum(vv >= 0f).dv.toInt; // Count of the good words
- val ngoodcols = ngood / opts.nreuse; // Number of good columns
- val cwi = cwords(ii);
-
- test2 = cwi
- addTime(6);
+ rand(randpermute); // Prepare a random permutation of context words for negative sampling
+ randpermute ~ (fgoodwords ∘ (randpermute + 1f)) - 1f; // set the values for bad words to -1.
+ val (vv, ii) = sortdown2(randpermute.view(randpermute.length, 1)); // Permute the good words
+ val ngood = sum(vv >= 0f).dv.toInt; // Count of the good words
+ val ngoodcols = ngood / opts.nreuse; // Number of good columns
+ val cwi = cwords(ii)
+
+ test2 = cwi
+ addTime(6)
- rand(randwords); // Compute some random negatives
- val irandwords = min(nfeats-1, int(nfeats * (randwords ^ expt)));
- val trandwords0 = irandwords.view(opts.nneg, ngoodcols); // shrink the matrices to the available data
- val contextwords0 = cwi.view(opts.nreuse, ngoodcols);
- addTime(7);
- (trandwords0, contextwords0)
+ rand(randwords); // Compute some random negatives
+ val irandwords = min(nfeats-1, int(nfeats * (randwords ^ expt)));
+ val trandwords0 = irandwords.view(opts.nneg, ngoodcols); // shrink the matrices to the available data
+ val contextwords0 = cwi.view(opts.nreuse, ngoodcols)
+ addTime(7)
+ (trandwords0, contextwords0)
}
case (iwords:IMat, ilb:IMat, iub:IMat) => {
- getnegs(iwords, ilb, iub, Mat.numThreads);
+ getnegs(iwords, ilb, iub, Mat.numThreads)
}
}
- (words, lb, ub, trandwords, contextwords);
+ (words, lb, ub, trandwords, contextwords)
}
def getnegs(words:IMat, lb:IMat, ub:IMat, nthreads:Int):(IMat, IMat) = {
- val ncols = words.ncols;
+ val ncols = words.ncols
// First count the good context words
val cwcounts = irow((0 until nthreads).par.map((ithread:Int) => { // work on blocks
- val istart = ((1L * ncols * ithread)/nthreads).toInt;
- val iend = ((1L * ncols * (ithread + 1))/nthreads).toInt;
- var i = istart;
- var icount = 0;
+ val istart = ((1L * ncols * ithread)/nthreads).toInt
+ val iend = ((1L * ncols * (ithread + 1))/nthreads).toInt
+ var i = istart
+ var icount = 0
while (i < iend) { // iterate over center words
if (words.data(i) >= 0) { // check center word is good
- var j = lb.data(i); // get lower and upper bounds
- var jend = ub.data(i);
- while (j <= jend) {
- if (j != 0 && words.data(i + j) >= 0) { // if not center word and context word is good, count it.
- icount += 1;
- }
- j += 1;
- }
+ var j = lb.data(i); // get lower and upper bounds
+ var jend = ub.data(i)
+ while (j <= jend) {
+ if (j != 0 && words.data(i + j) >= 0) { // if not center word and context word is good, count it.
+ icount += 1;
+ }
+ j += 1
+ }
}
- i += 1;
+ i += 1
}
icount
}).toArray)
// Now we know how many good words in each block
val ccc = cumsum(cwcounts); // so size the context word and neg word matrices
- val ngroups = ccc(ccc.length - 1) / opts.nreuse;
- val contextwords0 = izeros(opts.nreuse, ngroups);
- val trandwords0 = izeros(opts.nneg, ngroups);
+ val ngroups = ccc(ccc.length - 1) / opts.nreuse
+ val contextwords0 = izeros(opts.nreuse, ngroups)
+ val trandwords0 = izeros(opts.nneg, ngroups)
(0 until nthreads).par.map((ithread:Int) => { // Copy the good words into a dense matrix (contextwords0)
- val istart = ((1L * ncols * ithread)/nthreads).toInt;
- val iend = ((1L * ncols * (ithread + 1))/nthreads).toInt;
- var i = istart;
- var icount = 0;
- val mptr = ccc(ithread) - ccc(0);
+ val istart = ((1L * ncols * ithread)/nthreads).toInt
+ val iend = ((1L * ncols * (ithread + 1))/nthreads).toInt
+ var i = istart
+ var icount = 0
+ val mptr = ccc(ithread) - ccc(0)
while (i < iend) {
if (words.data(i) >= 0) {
- var j = lb.data(i);
- var jend = ub.data(i);
- while (j <= jend && mptr + icount < contextwords0.length) {
- if (j != 0 && words.data(i + j) >= 0) {
- contextwords0.data(mptr + icount) = words.data(i + j)
- icount += 1;
- }
- j += 1;
- }
+ var j = lb.data(i)
+ var jend = ub.data(i)
+ while (j <= jend && mptr + icount < contextwords0.length) {
+ if (j != 0 && words.data(i + j) >= 0) {
+ contextwords0.data(mptr + icount) = words.data(i + j)
+ icount += 1;
+ }
+ j += 1
+ }
}
- i += 1;
+ i += 1
}
icount
})
- addTime(5);
+ addTime(5)
val prand = drand(opts.nreuse, ngroups); // Rands for permutation
var i = 0; // Permute the good context words randomly
- val n = prand.length;
+ val n = prand.length
while (i < n) {
- val indx = math.min(n-1, i + math.floor(prand.data(i) * (n - i)).toInt);
+ val indx = math.min(n-1, i + math.floor(prand.data(i) * (n - i)).toInt)
if (indx > i) {
- val tmp = contextwords0.data(i);
- contextwords0.data(i) = contextwords0.data(indx);
- contextwords0.data(indx) = tmp;
+ val tmp = contextwords0.data(i)
+ contextwords0.data(i) = contextwords0.data(indx)
+ contextwords0.data(indx) = tmp
}
- i += 1;
+ i += 1
}
- addTime(6);
+ addTime(6)
val randneg = rand(opts.nneg, ngroups); // Compute some random negatives
(0 until nthreads).par.map((ithread:Int) => { // Work in blocks over the negs
- val istart = ((1L * ngroups * opts.nneg * ithread)/nthreads).toInt;
- val iend = ((1L * ngroups * opts.nneg * (ithread + 1))/nthreads).toInt;
- var i = istart;
+ val istart = ((1L * ngroups * opts.nneg * ithread)/nthreads).toInt
+ val iend = ((1L * ngroups * opts.nneg * (ithread + 1))/nthreads).toInt
+ var i = istart
while (i < iend) {
- trandwords0.data(i) = math.min(nfeats-1, (nfeats * math.pow(randneg.data(i), expt)).toInt);
- i += 1;
+ trandwords0.data(i) = math.min(nfeats-1, (nfeats * math.pow(randneg.data(i), expt)).toInt)
+ i += 1
}
})
-// println("mean=%f" format mean(FMat(trandwords0(?) < opts.nHeadTerms)).v);
- addTime(7);
+// println("mean=%f" format mean(FMat(trandwords0(?) < opts.nHeadTerms)).v)
+ addTime(7)
(trandwords0, contextwords0)
}
def procPositives(nskip:Int, words:Mat, lbound:Mat, ubound:Mat, model1:Mat, model2:Mat, lrate:Float, vexp:Float) = {
- val nrows = model1.nrows;
- val ncols = model1.ncols;
- val nwords = words.ncols;
- Mat.nflops += 6L * nwords * nskip * nrows;
+ val nrows = model1.nrows
+ val ncols = model1.ncols
+ val nwords = words.ncols
+ Mat.nflops += 6L * nwords * nskip * nrows
(words, lbound, ubound, model1, model2) match {
case (w:GIMat, lb:GIMat, ub:GIMat, m1:GMat, m2:GMat) => {
- val err = CUMACH.word2vecPos(nrows, nwords, nskip, w.data, lb.data, ub.data, m1.data, m2.data, lrate, vexp);
- if (err != 0) throw new RuntimeException("CUMACH.word2vecPos error " + cudaGetErrorString(err));
+ val err = CUMACH.word2vecPos(nrows, nwords, nskip, w.data, lb.data, ub.data, m1.data, m2.data, lrate, vexp)
+ if (err != 0) throw new RuntimeException("CUMACH.word2vecPos error " + cudaGetErrorString(err))
}
case (w:IMat, lb:IMat, ub:IMat, m1:FMat, m2:FMat) => if (Mat.useMKL) {
- CPUMACH.word2vecPos(nrows, nwords, nskip, w.data, lb.data, ub.data, m1.data, m2.data, lrate, vexp, Mat.numThreads);
+ CPUMACH.word2vecPos(nrows, nwords, nskip, w.data, lb.data, ub.data, m1.data, m2.data, lrate, vexp, Mat.numThreads)
} else {
- Word2Vec.procPosCPU(nrows, nwords, nskip, w.data, lb.data, ub.data, m1.data, m2.data, lrate, vexp, Mat.numThreads);
+ Word2Vec.procPosCPU(nrows, nwords, nskip, w.data, lb.data, ub.data, m1.data, m2.data, lrate, vexp, Mat.numThreads)
}
}
}
def procNegatives(nwa:Int, nwb:Int, wordsa:Mat, wordsb:Mat, modela:Mat, modelb:Mat, lrate:Float, vexp:Float) = {
- val nrows = modela.nrows;
- val ncols = modela.ncols;
- val nwords = wordsa.ncols;
- Mat.nflops += 6L * nwords * nwa * nwb * nrows;
+ val nrows = modela.nrows
+ val ncols = modela.ncols
+ val nwords = wordsa.ncols
+ Mat.nflops += 6L * nwords * nwa * nwb * nrows
(wordsa, wordsb, modela, modelb) match {
case (wa:GIMat, wb:GIMat, ma:GMat, mb:GMat) => {
- val err = CUMACH.word2vecNeg(nrows, nwords, nwa, nwb, wa.data, wb.data, ma.data, mb.data, lrate, vexp);
- if (err != 0) throw new RuntimeException("CUMACH.word2vecNeg error " + cudaGetErrorString(err));
+ val err = CUMACH.word2vecNeg(nrows, nwords, nwa, nwb, wa.data, wb.data, ma.data, mb.data, lrate, vexp)
+ if (err != 0) throw new RuntimeException("CUMACH.word2vecNeg error " + cudaGetErrorString(err))
}
case (wa:IMat, wb:IMat, ma:FMat, mb:FMat) => if (Mat.useMKL) {
- CPUMACH.word2vecNeg(nrows, nwords, nwa, nwb, wa.data, wb.data, ma.data, mb.data, lrate, vexp, Mat.numThreads);
+ CPUMACH.word2vecNeg(nrows, nwords, nwa, nwb, wa.data, wb.data, ma.data, mb.data, lrate, vexp, Mat.numThreads)
} else {
- Word2Vec.procNegCPU(nrows, nwords, nwa, nwb, wa.data, wb.data, ma.data, mb.data, lrate, vexp, Mat.numThreads);
+ Word2Vec.procNegCPU(nrows, nwords, nwa, nwb, wa.data, wb.data, ma.data, mb.data, lrate, vexp, Mat.numThreads)
}
}
}
def procPositivesSlice(nskip:Int, words:Mat, lbound:Mat, ubound:Mat, modelmats:Array[Mat], lrate:Float, vexp:Float, islice:Int) = {
import scala.concurrent.ExecutionContext.Implicits.global
- val nrows = modelmats(0).nrows;
- val nwords = words.ncols;
- Mat.nflops += 6L * nwords * nskip * nrows;
+ val nrows = modelmats(0).nrows
+ val nwords = words.ncols
+ Mat.nflops += 6L * nwords * nskip * nrows
(words, lbound, ubound) match {
case (w:IMat, lb:IMat, ub:IMat) => if (Mat.useMKL) {
- CPUMACH.word2vecPosSlice(nrows, nwords, nskip, w.data, lb.data, ub.data, fmm, lrate, vexp, Mat.numThreads,
- islice, opts.nSlices, maxCols, opts.nHeadTerms, if (opts.dualMode) 1 else 0, opts.doHead);
+ CPUMACH.word2vecPosSlice(nrows, nwords, nskip, w.data, lb.data, ub.data, fmm, lrate, vexp, Mat.numThreads,
+ islice, opts.nSlices, maxCols, opts.nHeadTerms, if (opts.dualMode) 1 else 0, opts.doHead)
} else {
- Word2Vec.procPosCPUslice(nrows, nwords, nskip, w.data, lb.data, ub.data, modelmats, lrate, vexp, Mat.numThreads,
- islice, opts.nSlices, maxCols, opts.nHeadTerms, opts.dualMode, opts.doHead);
+ Word2Vec.procPosCPUslice(nrows, nwords, nskip, w.data, lb.data, ub.data, modelmats, lrate, vexp, Mat.numThreads,
+ islice, opts.nSlices, maxCols, opts.nHeadTerms, opts.dualMode, opts.doHead)
}
case (w:GIMat, lb:GIMat, ub:GIMat) => if (opts.dualMode) {
- val m0 = modelmats(0).asInstanceOf[GMat];
- val m1 = modelmats(1).asInstanceOf[GMat];
- m0 <-- modelmats(2);
- m1 <-- modelmats(3);
-// val err = CUMACH.word2vecPos(nrows, m0.ncols, nskip, w.data, lb.data, ub.data, m0.data, m1.data, lrate, vexp);
-// if (err != 0) throw new RuntimeException("CUMACH.word2vecPos error " + cudaGetErrorString(err));
- modelmats(2) <-- m0;
- modelmats(3) <-- m1;
- Word2Vec.procPosCPUslice(nrows, nwords, nskip, IMat(w).data, IMat(lb).data, IMat(ub).data, modelmats, lrate, vexp, Mat.numThreads,
- islice, opts.nSlices, maxCols, opts.nHeadTerms, opts.dualMode, opts.doHead);
+ val m0 = modelmats(0).asInstanceOf[GMat]
+ val m1 = modelmats(1).asInstanceOf[GMat]
+ m0 <-- modelmats(2)
+ m1 <-- modelmats(3)
+// val err = CUMACH.word2vecPos(nrows, m0.ncols, nskip, w.data, lb.data, ub.data, m0.data, m1.data, lrate, vexp)
+// if (err != 0) throw new RuntimeException("CUMACH.word2vecPos error " + cudaGetErrorString(err));
+ modelmats(2) <-- m0
+ modelmats(3) <-- m1
+ Word2Vec.procPosCPUslice(nrows, nwords, nskip, IMat(w).data, IMat(lb).data, IMat(ub).data, modelmats, lrate, vexp, Mat.numThreads,
+ islice, opts.nSlices, maxCols, opts.nHeadTerms, opts.dualMode, opts.doHead)
} else {
throw new RuntimeException("Use dualMode to use the GPU with multi-part models")
}
@@ -429,154 +429,154 @@ class Word2Vec(override val opts:Word2Vec.Opts = new Word2Vec.Options) extends M
def procNegativesSlice(nwa:Int, nwb:Int, wordsa:Mat, wordsb:Mat, modelmats:Array[Mat], lrate:Float, vexp:Float, islice:Int) = {
import scala.concurrent.ExecutionContext.Implicits.global
- val nrows = modelmats(0).nrows;
- val nvocab = modelmats(0).ncols;
- val nwords = wordsa.ncols;
- Mat.nflops += 6L * nwords * nwa * nwb * nrows;
+ val nrows = modelmats(0).nrows
+ val nvocab = modelmats(0).ncols
+ val nwords = wordsa.ncols
+ Mat.nflops += 6L * nwords * nwa * nwb * nrows
(wordsa, wordsb) match {
case (wa:IMat, wb:IMat) => if (Mat.useMKL) {
- CPUMACH.word2vecNegSlice(nrows, nwords, nwa, nwb, wa.data, wb.data, fmm, lrate, vexp, Mat.numThreads,
- islice, opts.nSlices, maxCols, opts.nHeadTerms, if (opts.dualMode) 1 else 0, opts.doHead);
+ CPUMACH.word2vecNegSlice(nrows, nwords, nwa, nwb, wa.data, wb.data, fmm, lrate, vexp, Mat.numThreads,
+ islice, opts.nSlices, maxCols, opts.nHeadTerms, if (opts.dualMode) 1 else 0, opts.doHead)
} else {
- Word2Vec.procNegCPUslice(nrows, nwords, nwa, nwb, wa.data, wb.data, modelmats, lrate, vexp, Mat.numThreads,
- islice, opts.nSlices, maxCols, opts.nHeadTerms, opts.dualMode, opts.doHead);
+ Word2Vec.procNegCPUslice(nrows, nwords, nwa, nwb, wa.data, wb.data, modelmats, lrate, vexp, Mat.numThreads,
+ islice, opts.nSlices, maxCols, opts.nHeadTerms, opts.dualMode, opts.doHead)
}
case (wa:GIMat, wb:GIMat) => {
- if (opts.dualMode) {
- val m0 = modelmats(0).asInstanceOf[GMat];
- val m1 = modelmats(1).asInstanceOf[GMat];
- m0 <-- modelmats(2);
- m1 <-- modelmats(3);
- val err = CUMACH.word2vecNegFilt(nrows, nwords, nvocab, nwa, nwb, wa.data, wb.data, m0.data, m1.data, lrate, vexp);
- if (err != 0) throw new RuntimeException("CUMACH.word2vecNegFilt error " + cudaGetErrorString(err));
- modelmats(2) <-- m0;
- modelmats(3) <-- m1;
- Word2Vec.procNegCPUslice(nrows, nwords, nwa, nwb, IMat(wa).data, IMat(wb).data, modelmats, lrate, vexp, Mat.numThreads,
- islice, opts.nSlices, maxCols, opts.nHeadTerms, opts.dualMode, opts.doHead);
- } else {
- throw new RuntimeException("Use dualMode to use the GPU with multi-part models")
- }
+ if (opts.dualMode) {
+ val m0 = modelmats(0).asInstanceOf[GMat]
+ val m1 = modelmats(1).asInstanceOf[GMat]
+ m0 <-- modelmats(2)
+ m1 <-- modelmats(3)
+ val err = CUMACH.word2vecNegFilt(nrows, nwords, nvocab, nwa, nwb, wa.data, wb.data, m0.data, m1.data, lrate, vexp)
+ if (err != 0) throw new RuntimeException("CUMACH.word2vecNegFilt error " + cudaGetErrorString(err));
+ modelmats(2) <-- m0
+ modelmats(3) <-- m1
+ Word2Vec.procNegCPUslice(nrows, nwords, nwa, nwb, IMat(wa).data, IMat(wb).data, modelmats, lrate, vexp, Mat.numThreads,
+ islice, opts.nSlices, maxCols, opts.nHeadTerms, opts.dualMode, opts.doHead)
+ } else {
+ throw new RuntimeException("Use dualMode to use the GPU with multi-part models")
+ }
}
}
}
def evalPositives(nskip:Int, words:Mat, lbound:Mat, ubound:Mat, model1:Mat, model2:Mat):Double = {
- val nrows = model1.nrows;
- val ncols = model1.ncols;
- val nwords = words.ncols;
- Mat.nflops += 2L * nwords * nskip * nrows;
+ val nrows = model1.nrows
+ val ncols = model1.ncols
+ val nwords = words.ncols
+ Mat.nflops += 2L * nwords * nskip * nrows
(words, lbound, ubound, model1, model2) match {
case (w:GIMat, lb:GIMat, ub:GIMat, m1:GMat, m2:GMat) => {
retEvalPos.clear
- val err = CUMACH.word2vecEvalPos(nrows, nwords, nskip, w.data, lb.data, ub.data, m1.data, m2.data, retEvalPos.data);
- if (err != 0) throw new RuntimeException("CUMACH.word2vecEvalPos error " + cudaGetErrorString(err));
- retEvalPos.dv;
+ val err = CUMACH.word2vecEvalPos(nrows, nwords, nskip, w.data, lb.data, ub.data, m1.data, m2.data, retEvalPos.data)
+ if (err != 0) throw new RuntimeException("CUMACH.word2vecEvalPos error " + cudaGetErrorString(err))
+ retEvalPos.dv
}
case (w:IMat, lb:IMat, ub:IMat, m1:FMat, m2:FMat) =>
if (Mat.useMKL) {
- CPUMACH.word2vecEvalPos(nrows, nwords, nskip, w.data, lb.data, ub.data, m1.data, m2.data, Mat.numThreads);
+ CPUMACH.word2vecEvalPos(nrows, nwords, nskip, w.data, lb.data, ub.data, m1.data, m2.data, Mat.numThreads)
} else {
- Word2Vec.evalPosCPU(nrows, nwords, nskip, w.data, lb.data, ub.data, m1.data, m2.data, Mat.numThreads);
+ Word2Vec.evalPosCPU(nrows, nwords, nskip, w.data, lb.data, ub.data, m1.data, m2.data, Mat.numThreads)
}
}
}
def evalPositivesSlice(nskip:Int, words:Mat, lbound:Mat, ubound:Mat, modelmats:Array[Mat], islice:Int):Double = {
- val nrows = modelmats(0).nrows;
- val nwords = words.ncols;
- Mat.nflops += 2L * nwords * nskip * nrows;
+ val nrows = modelmats(0).nrows
+ val nwords = words.ncols
+ Mat.nflops += 2L * nwords * nskip * nrows
(words, lbound, ubound) match {
case (w:IMat, lb:IMat, ub:IMat) =>
Word2Vec.evalPosCPUslice(nrows, nwords, nskip, w.data, lb.data, ub.data, modelmats, Mat.numThreads,
- islice, opts.nSlices, maxCols, opts.nHeadTerms, opts.dualMode);
+ islice, opts.nSlices, maxCols, opts.nHeadTerms, opts.dualMode)
}
}
def evalNegatives(nwa:Int, nwb:Int, wordsa:Mat, wordsb:Mat, modela:Mat, modelb:Mat):Double = {
- val nrows = modela.nrows;
- val ncols = modela.ncols;
- val nwords = wordsa.ncols;
- Mat.nflops += 2L * nwords * nwa * nwb * nrows;
+ val nrows = modela.nrows
+ val ncols = modela.ncols
+ val nwords = wordsa.ncols
+ Mat.nflops += 2L * nwords * nwa * nwb * nrows
(wordsa, wordsb, modela, modelb) match {
case (wa:GIMat, wb:GIMat, ma:GMat, mb:GMat) => {
retEvalNeg.clear
- val err = CUMACH.word2vecEvalNeg(nrows, nwords, nwa, nwb, wa.data, wb.data, ma.data, mb.data, retEvalNeg.data);
- if (err != 0) throw new RuntimeException("CUMACH.word2vecEvalNeg error " + cudaGetErrorString(err));
+ val err = CUMACH.word2vecEvalNeg(nrows, nwords, nwa, nwb, wa.data, wb.data, ma.data, mb.data, retEvalNeg.data)
+ if (err != 0) throw new RuntimeException("CUMACH.word2vecEvalNeg error " + cudaGetErrorString(err))
retEvalNeg.dv;
}
case (wa:IMat, wb:IMat, ma:FMat, mb:FMat) =>
if (Mat.useMKL) {
- CPUMACH.word2vecEvalNeg(nrows, nwords, nwa, nwb, wa.data, wb.data, ma.data, mb.data, Mat.numThreads);
+ CPUMACH.word2vecEvalNeg(nrows, nwords, nwa, nwb, wa.data, wb.data, ma.data, mb.data, Mat.numThreads);
} else {
- Word2Vec.evalNegCPU(nrows, nwords, nwa, nwb, wa.data, wb.data, ma.data, mb.data, Mat.numThreads);
+ Word2Vec.evalNegCPU(nrows, nwords, nwa, nwb, wa.data, wb.data, ma.data, mb.data, Mat.numThreads)
}
}
}
def evalNegativesSlice(nwa:Int, nwb:Int, wordsa:Mat, wordsb:Mat, modelmats:Array[Mat], islice:Int):Double = {
- val nrows = modelmats(0).nrows;
- val nwords = wordsa.ncols;
- Mat.nflops += 2L * nwords * nwa * nwb * nrows;
+ val nrows = modelmats(0).nrows
+ val nwords = wordsa.ncols
+ Mat.nflops += 2L * nwords * nwa * nwb * nrows
(wordsa, wordsb) match {
case (wa:IMat, wb:IMat) =>
- Word2Vec.evalNegCPUslice(nrows, nwords, nwa, nwb, wa.data, wb.data, modelmats, Mat.numThreads,
- islice, opts.nSlices, maxCols, opts.nHeadTerms, opts.dualMode);
+ Word2Vec.evalNegCPUslice(nrows, nwords, nwa, nwb, wa.data, wb.data, modelmats, Mat.numThreads,
+ islice, opts.nSlices, maxCols, opts.nHeadTerms, opts.dualMode)
}
}
def trailingZeros(a:Long):Int = {
- var aa = a;
- var nz = 0;
+ var aa = a
+ var nz = 0
while ((aa & 1L) == 0) {
- aa = aa >> 1;
- nz += 1;
+ aa = aa >> 1
+ nz += 1
}
nz
}
override def mergeModelFn(models:Array[Model], mm:Array[Mat], um:Array[Mat], istep:Long):Unit = {
- val headlen = if (istep > 0) math.max(opts.headlen, opts.headlen << trailingZeros(istep)) else 0;
- val mlen = models(0).modelmats.length;
- val thisGPU = getGPU;
- val modj = new Array[Mat](models.length);
+ val headlen = if (istep > 0) math.max(opts.headlen, opts.headlen << trailingZeros(istep)) else 0
+ val mlen = models(0).modelmats.length
+ val thisGPU = getGPU
+ val modj = new Array[Mat](models.length)
for (j <- 0 until mlen) {
- val mmj = if (headlen > 0) mm(j).view(mm(j).nrows, math.min(mm(j).ncols, headlen)) else mm(j);
+ val mmj = if (headlen > 0) mm(j).view(mm(j).nrows, math.min(mm(j).ncols, headlen)) else mm(j)
mmj.clear
for (i <- 0 until models.length) {
- if (useGPU && i < Mat.hasCUDA) setGPU(i);
- modj(i) = if (headlen > 0) models(i).modelmats(j).view(models(i).modelmats(j).nrows, math.min(models(i).modelmats(j).ncols, headlen)) else models(i).modelmats(j);
- val umj = if (headlen > 0) um(j).view(um(j).nrows, math.min(um(j).ncols, headlen)) else um(j);
+ if (useGPU && i < Mat.hasCUDA) setGPU(i)
+ modj(i) = if (headlen > 0) models(i).modelmats(j).view(models(i).modelmats(j).nrows, math.min(models(i).modelmats(j).ncols, headlen)) else models(i).modelmats(j)
+ val umj = if (headlen > 0) um(j).view(um(j).nrows, math.min(um(j).ncols, headlen)) else um(j)
umj <-- modj(i)
- mmj ~ mmj + umj;
+ mmj ~ mmj + umj
}
- mmj ~ mmj * (1f/models.length);
+ mmj ~ mmj * (1f/models.length)
for (i <- 0 until models.length) {
- modj(i) <-- mmj;
+ modj(i) <-- mmj
}
}
- setGPU(thisGPU);
+ setGPU(thisGPU)
}
}
object Word2Vec {
trait Opts extends Model.Opts {
- var aopts:ADAGrad.Opts = null;
- var nskip = 5;
- var nneg = 5;
+ var aopts:ADAGrad.Opts = null
+ var nskip = 5
+ var nneg = 5
var nreuse = 5;
- var vocabSize = 100000;
- var wexpt = 0.75f;
- var wsample = 1e-4f;
- var headlen = 10000;
- var iflip = false;
- var eqPosNeg = false;
- var maxArraySize = 2047*1024*1024;
+ var vocabSize = 100000
+ var wexpt = 0.75f
+ var wsample = 1e-4f
+ var headlen = 10000
+ var iflip = false
+ var eqPosNeg = false
+ var maxArraySize = 2047*1024*1024
var nHeadTerms = 0;
- var nSlices = 1;
- var iSlice = 0;
- var dualMode = false;
- var doHead = 1;
+ var nSlices = 1
+ var iSlice = 0
+ var dualMode = false
+ var doHead = 1
}
class Options extends Opts {}
@@ -586,609 +586,609 @@ object Word2Vec {
A:Array[Float], B:Array[Float], lrate:Float, vexp:Float, nthreads:Int):Int = {
(0 until nthreads).par.map((ithread:Int) => {
- val istart = ((1L * ithread * ncols)/nthreads).toInt;
- val iend = ((1L * (ithread+1) * ncols)/nthreads).toInt;
- val daa = new Array[Float](nrows);
- var i = istart;
- while (i < iend) {
- var j = 0;
- var k = 0;
- var c = 0;
- var cv = 0f;
+ val istart = ((1L * ithread * ncols)/nthreads).toInt
+ val iend = ((1L * (ithread+1) * ncols)/nthreads).toInt
+ val daa = new Array[Float](nrows)
+ var i = istart
+ while (i < iend) {
+ var j = 0
+ var k = 0
+ var c = 0
+ var cv = 0f
- val iac = W(i);
- val ascale = math.pow(1+iac, vexp).toFloat;
- val ia = nrows * iac; // Get the current word (as a model matrix offset).
- if (ia >= 0) { // Check for OOV words
- c = 0;
- while (c < nrows) { // Current word
- daa(c) = 0; // delta for the A matrix (maps current and negative words).
- c += 1;
- }
- j = LB(i);
- while (j <= UB(i)) { // Iterate over neighbors in the skip window
- if (j != 0 && i + j >= 0 && i + j < ncols) { // context word index is in range (and not current word).
- val ibc = W(i + j);
- val bscale = math.pow(1+ibc, vexp).toFloat;
- val ib = nrows * ibc; // Get the context word and check it
- if (ib >= 0) {
- c = 0;
- cv = 0f;
- while (c < nrows) { // Inner product between current and context words.
- cv += A(c + ia) * B(c + ib);
- c += 1;
- }
+ val iac = W(i)
+ val ascale = math.pow(1+iac, vexp).toFloat
+ val ia = nrows * iac; // Get the current word (as a model matrix offset).
+ if (ia >= 0) { // Check for OOV words
+ c = 0
+ while (c < nrows) { // Current word
+ daa(c) = 0; // delta for the A matrix (maps current and negative words).
+ c += 1
+ }
+ j = LB(i)
+ while (j <= UB(i)) { // Iterate over neighbors in the skip window
+ if (j != 0 && i + j >= 0 && i + j < ncols) { // context word index is in range (and not current word).
+ val ibc = W(i + j)
+ val bscale = math.pow(1+ibc, vexp).toFloat
+ val ib = nrows * ibc; // Get the context word and check it
+ if (ib >= 0) {
+ c = 0
+ cv = 0f
+ while (c < nrows) { // Inner product between current and context words.
+ cv += A(c + ia) * B(c + ib)
+ c += 1
+ }
- if (cv > 16.0f) { // Apply logistic function with guards
- cv = 1.0f;
- } else if (cv < -16.0f) {
- cv = 0.0f;
- } else {
- cv = math.exp(cv).toFloat;
- cv = cv / (1.0f + cv);
- }
- cv = lrate * (1.0f - cv); // Subtract prediction from target (1.0), and scale by learning rate.
+ if (cv > 16.0f) { // Apply logistic function with guards
+ cv = 1.0f
+ } else if (cv < -16.0f) {
+ cv = 0.0f
+ } else {
+ cv = math.exp(cv).toFloat
+ cv = cv / (1.0f + cv)
+ }
+ cv = lrate * (1.0f - cv); // Subtract prediction from target (1.0), and scale by learning rate.
- c = 0;
- while (c < nrows) {
- daa(c) += ascale * cv * B(c + ib); // Compute backward derivatives for A and B with pseudo-ADAGrad scaling
- B(c + ib) += bscale * cv * A(c + ia);
- c += 1;
- }
- }
- }
- j += 1;
- }
- c = 0;
- while (c < nrows) { // Add derivative for A to A.
- A(c + ia) += daa(c);
- c += 1;
- }
- }
- i += 1;
- }
- });
- 0;
+ c = 0
+ while (c < nrows) {
+ daa(c) += ascale * cv * B(c + ib); // Compute backward derivatives for A and B with pseudo-ADAGrad scaling
+ B(c + ib) += bscale * cv * A(c + ia)
+ c += 1
+ }
+ }
+ }
+ j += 1
+ }
+ c = 0
+ while (c < nrows) { // Add derivative for A to A.
+ A(c + ia) += daa(c)
+ c += 1
+ }
+ }
+ i += 1
+ }
+ })
+ 0
}
def mapIndx(indx:Int, islice:Int, nslices:Int, nHead:Int, maxCols:Int, nrows:Int, offset:Int):(Int, Int, Boolean, Boolean) = {
- val newi = if (indx >= nHead) ((indx - nHead) / nslices + nHead) else indx; // new column index
- val m = newi / maxCols + offset; // which matrix are we in?
- val ismine = (indx >= nHead) && (indx % nslices == islice);
- val ishead = (indx < nHead);
- val i = nrows * (newi - m * maxCols);
- (m, i, ismine, ishead)
+ val newi = if (indx >= nHead) ((indx - nHead) / nslices + nHead) else indx; // new column index
+ val m = newi / maxCols + offset; // which matrix are we in?
+ val ismine = (indx >= nHead) && (indx % nslices == islice)
+ val ishead = (indx < nHead)
+ val i = nrows * (newi - m * maxCols)
+ (m, i, ismine, ishead)
}
def procPosCPUslice(nrows:Int, ncols:Int, skip:Int, W:Array[Int], LB:Array[Int], UB:Array[Int],
modelmats:Array[Mat], lrate:Float, vexp:Float, nthreads:Int,
islice:Int, nslices:Int, maxCols:Int, nHead:Int, dualMode:Boolean, doHead:Int):Int = {
- val arrayOffset = if (dualMode) 1 else 0;
+ val arrayOffset = if (dualMode) 1 else 0
(0 until nthreads).par.map((ithread:Int) => {
- val istart = ((1L * ithread * ncols)/nthreads).toInt;
- val iend = ((1L * (ithread+1) * ncols)/nthreads).toInt;
- val daa = new Array[Float](nrows);
- var i = istart;
- while (i < iend) {
- var j = 0;
- var k = 0;
- var c = 0;
- var cv = 0f;
+ val istart = ((1L * ithread * ncols)/nthreads).toInt
+ val iend = ((1L * (ithread+1) * ncols)/nthreads).toInt
+ val daa = new Array[Float](nrows)
+ var i = istart
+ while (i < iend) {
+ var j = 0
+ var k = 0
+ var c = 0
+ var cv = 0f
- val iac = W(i);
- val ascale = math.pow(1+iac, vexp).toFloat;
- if (iac >= 0) { // Check for OOV words
- val (ma, ia, aismine, aishead) = mapIndx(iac, islice, nslices, nHead, maxCols, nrows, arrayOffset);
- val A = modelmats(2*ma+1).asInstanceOf[FMat].data;
- c = 0;
- while (c < nrows) { // Current word
- daa(c) = 0; // delta for the A matrix (maps current and negative words).
- c += 1;
- }
- j = LB(i);
- var touched = false;
- while (j <= UB(i)) { // Iterate over neighbors in the skip window
- if (j != 0 && i + j >= 0 && i + j < ncols) { // context word index is in range (and not current word).
- val ibc = W(i + j); // Get the context word
- val bscale = math.pow(1+ibc, vexp).toFloat;
- if (ibc >= 0) { // check if context word is OOV
- val (mb, ib, bismine, bishead) = mapIndx(ibc, islice, nslices, nHead, maxCols, nrows, arrayOffset);
- val B = modelmats(2*mb).asInstanceOf[FMat].data;
- if ((doHead > 1 && aishead && bishead) || (aismine && bishead) || (bismine && aishead) || (aismine && bismine)) {
- touched = true;
- c = 0;
- cv = 0f;
- while (c < nrows) { // Inner product between current and context words.
- cv += A(c + ia) * B(c + ib);
- c += 1;
- }
+ val iac = W(i)
+ val ascale = math.pow(1+iac, vexp).toFloat;
+ if (iac >= 0) { // Check for OOV words
+ val (ma, ia, aismine, aishead) = mapIndx(iac, islice, nslices, nHead, maxCols, nrows, arrayOffset)
+ val A = modelmats(2*ma+1).asInstanceOf[FMat].data
+ c = 0
+ while (c < nrows) { // Current word
+ daa(c) = 0; // delta for the A matrix (maps current and negative words).
+ c += 1
+ }
+ j = LB(i)
+ var touched = false
+ while (j <= UB(i)) { // Iterate over neighbors in the skip window
+ if (j != 0 && i + j >= 0 && i + j < ncols) { // context word index is in range (and not current word).
+ val ibc = W(i + j); // Get the context word
+ val bscale = math.pow(1+ibc, vexp).toFloat;
+ if (ibc >= 0) { // check if context word is OOV
+ val (mb, ib, bismine, bishead) = mapIndx(ibc, islice, nslices, nHead, maxCols, nrows, arrayOffset)
+ val B = modelmats(2*mb).asInstanceOf[FMat].data
+ if ((doHead > 1 && aishead && bishead) || (aismine && bishead) || (bismine && aishead) || (aismine && bismine)) {
+ touched = true
+ c = 0
+ cv = 0f
+ while (c < nrows) { // Inner product between current and context words.
+ cv += A(c + ia) * B(c + ib)
+ c += 1
+ }
- if (cv > 16.0f) { // Apply logistic function with guards
- cv = 1.0f;
- } else if (cv < -16.0f) {
- cv = 0.0f;
- } else {
- cv = math.exp(cv).toFloat;
- cv = cv / (1.0f + cv);
- }
- cv = lrate * (1.0f - cv); // Subtract prediction from target (1.0), and scale by learning rate.
+ if (cv > 16.0f) { // Apply logistic function with guards
+ cv = 1.0f
+ } else if (cv < -16.0f) {
+ cv = 0.0f
+ } else {
+ cv = math.exp(cv).toFloat
+ cv = cv / (1.0f + cv)
+ }
+ cv = lrate * (1.0f - cv); // Subtract prediction from target (1.0), and scale by learning rate.
- c = 0;
- while (c < nrows) {
- daa(c) += ascale * cv * B(c + ib); // Compute backward derivatives for A and B with pseudo-ADAGrad scaling
- c += 1;
- }
- if (bismine || (bishead && doHead > 0)) {
- c = 0;
- while (c < nrows) {
- B(c + ib) += bscale * cv * A(c + ia);
- c += 1;
- }
- }
- }
- }
- }
- j += 1;
- }
- if (touched && (aismine || (aishead && doHead > 0))) {
- c = 0;
- while (c < nrows) { // Add derivative for A to A.
- A(c + ia) += daa(c);
- c += 1;
- }
- }
- }
- i += 1;
- }
- });
- 0;
+ c = 0
+ while (c < nrows) {
+ daa(c) += ascale * cv * B(c + ib); // Compute backward derivatives for A and B with pseudo-ADAGrad scaling
+ c += 1
+ }
+ if (bismine || (bishead && doHead > 0)) {
+ c = 0
+ while (c < nrows) {
+ B(c + ib) += bscale * cv * A(c + ia)
+ c += 1
+ }
+ }
+ }
+ }
+ }
+ j += 1
+ }
+ if (touched && (aismine || (aishead && doHead > 0))) {
+ c = 0
+ while (c < nrows) { // Add derivative for A to A.
+ A(c + ia) += daa(c)
+ c += 1
+ }
+ }
+ }
+ i += 1
+ }
+ })
+ 0
}
def procNegCPU(nrows:Int, nwords:Int, nwa:Int, nwb:Int, WA:Array[Int], WB:Array[Int], A:Array[Float], B:Array[Float],
lrate:Float, vexp:Float, nthreads:Int):Int = {
- (0 until nthreads).par.map((ithread:Int) => {
- val istart = ((1L * nwords * ithread) / nthreads).toInt;
- val iend = ((1L * nwords * (ithread+1)) / nthreads).toInt;
- val aa = new Array[Float](nwa * nrows);
- val bb = new Array[Float](nrows);
- var i = istart;
- while (i < iend) {
- var j = 0;
- var k = 0;
- var c = 0;
+ (0 until nthreads).par.map((ithread:Int) => {
+ val istart = ((1L * nwords * ithread) / nthreads).toInt
+ val iend = ((1L * nwords * (ithread+1)) / nthreads).toInt
+ val aa = new Array[Float](nwa * nrows)
+ val bb = new Array[Float](nrows)
+ var i = istart
+ while (i < iend) {
+ var j = 0
+ var k = 0
+ var c = 0
- j = 0;
- while (j < nwa) { // Clear tmp A matrix
- val ja = j * nrows;
- c = 0;
- while (c < nrows) {
- aa(c + ja) = 0;
- c += 1;
- }
- j+= 1;
- }
-
- k = 0;
- while (k < nwb) { // Loop over B words
- c = 0;
- while (c < nrows) { // Clear tmp B vector
- bb(c) = 0;
- c += 1;
- }
- val ibc = WB(k+i*nwb);
- val bscale = math.pow(1+ibc, vexp).toFloat;
- val ib = nrows * ibc; // Get the B word as an array offset.
- j = 0;
- while (j < nwa) { // Now iterate over A words.
- val iac = WA(j+i*nwa);
- val ascale = math.pow(1+iac, vexp).toFloat;
- val ia = nrows * iac; // Get an A word offset
-
- var cv = 0f;
- c = 0;
- while (c < nrows) { // Inner product between A and B columns
- cv += A(c + ia) * B(c + ib);
- c += 1;
- }
-
- if (cv > 16.0f) { // Guarded logistic function
- cv = 1.0f;
- } else if (cv < -16.0f) {
- cv = 0.0f;
- } else {
- cv = math.exp(cv).toFloat;
- cv = cv / (1.0f + cv);
- }
- cv = - cv * lrate; // Scale derivative by learning rate.
+ j = 0;
+ while (j < nwa) { // Clear tmp A matrix
+ val ja = j * nrows
+ c = 0;
+ while (c < nrows) {
+ aa(c + ja) = 0
+ c += 1
+ }
+ j+= 1
+ }
+
+ k = 0
+ while (k < nwb) { // Loop over B words
+ c = 0;
+ while (c < nrows) { // Clear tmp B vector
+ bb(c) = 0
+ c += 1
+ }
+ val ibc = WB(k+i*nwb)
+ val bscale = math.pow(1+ibc, vexp).toFloat
+ val ib = nrows * ibc; // Get the B word as an array offset.
+ j = 0
+ while (j < nwa) { // Now iterate over A words.
+ val iac = WA(j+i*nwa)
+ val ascale = math.pow(1+iac, vexp).toFloat
+ val ia = nrows * iac; // Get an A word offset
+
+ var cv = 0f
+ c = 0
+ while (c < nrows) { // Inner product between A and B columns
+ cv += A(c + ia) * B(c + ib)
+ c += 1
+ }
+
+ if (cv > 16.0f) { // Guarded logistic function
+ cv = 1.0f
+ } else if (cv < -16.0f) {
+ cv = 0.0f
+ } else {
+ cv = math.exp(cv).toFloat
+ cv = cv / (1.0f + cv)
+ }
+ cv = - cv * lrate; // Scale derivative by learning rate.
- val ja = j * nrows;
- c = 0;
- while (c < nrows) { // Update the derivatives
- aa(c + ja) += ascale * cv * B(c + ib);
- bb(c) += bscale * cv * A(c + ia);
- c += 1;
- }
- j += 1;
- }
- c = 0;
- while (c < nrows) { // Add B's derivative to B
- B(c + ib) += bb(c);
- c += 1;
- }
- k += 1;
- }
- j = 0;
- while (j < nwa) { // Add A's derivatives to A
- val ja = j * nrows;
- val ia = nrows * WA(j+i*nwa);
- c = 0;
- while (c < nrows) {
- A(c + ia) += aa(c + ja);
- c += 1;
- }
- j += 1;
- }
- i += 1;
- }
- });
- 0;
+ val ja = j * nrows
+ c = 0
+ while (c < nrows) { // Update the derivatives
+ aa(c + ja) += ascale * cv * B(c + ib)
+ bb(c) += bscale * cv * A(c + ia)
+ c += 1
+ }
+ j += 1
+ }
+ c = 0
+ while (c < nrows) { // Add B's derivative to B
+ B(c + ib) += bb(c)
+ c += 1
+ }
+ k += 1
+ }
+ j = 0
+ while (j < nwa) { // Add A's derivatives to A
+ val ja = j * nrows
+ val ia = nrows * WA(j+i*nwa)
+ c = 0
+ while (c < nrows) {
+ A(c + ia) += aa(c + ja)
+ c += 1
+ }
+ j += 1
+ }
+ i += 1
+ }
+ })
+ 0
}
def procNegCPUslice(nrows:Int, nwords:Int, nwa:Int, nwb:Int, WA:Array[Int], WB:Array[Int], modelmats:Array[Mat],
lrate:Float, vexp:Float, nthreads:Int, islice:Int, nslices:Int, maxCols:Int, nHead:Int, dualMode:Boolean, doHead:Int):Int = {
- val arrayOffset = if (dualMode) 1 else 0;
+ val arrayOffset = if (dualMode) 1 else 0
(0 until nthreads).par.map((ithread:Int) => {
- val istart = ((1L * nwords * ithread) / nthreads).toInt;
- val iend = ((1L * nwords * (ithread+1)) / nthreads).toInt;
- val aa = new Array[Float](nwa * nrows);
- val bb = new Array[Float](nrows);
- var i = istart;
- while (i < iend) {
- var j = 0;
- var k = 0;
- var c = 0;
+ val istart = ((1L * nwords * ithread) / nthreads).toInt
+ val iend = ((1L * nwords * (ithread+1)) / nthreads).toInt
+ val aa = new Array[Float](nwa * nrows)
+ val bb = new Array[Float](nrows)
+ var i = istart
+ while (i < iend) {
+ var j = 0
+ var k = 0
+ var c = 0
- j = 0;
- while (j < nwa) { // Clear tmp A matrix
- val ja = j * nrows;
- c = 0;
- while (c < nrows) {
- aa(c + ja) = 0;
- c += 1;
- }
- j+= 1;
- }
-
- k = 0;
- while (k < nwb) { // Loop over B words
- c = 0;
- while (c < nrows) { // Clear tmp B vector
- bb(c) = 0;
- c += 1;
- }
- val ibc = WB(k+i*nwb);
- val bscale = math.pow(1+ibc, vexp).toFloat;
- val (mb, ib, bismine, bishead) = mapIndx(ibc, islice, nslices, nHead, maxCols, nrows, arrayOffset);
- val B = modelmats(2*mb).asInstanceOf[FMat].data;
- j = 0;
- while (j < nwa) { // Now iterate over A words.
- val iac = WA(j+i*nwa);
- val ascale = math.pow(1+iac, vexp).toFloat;
- val (ma, ia, aismine, aishead) = mapIndx(iac, islice, nslices, nHead, maxCols, nrows, arrayOffset);
- val A = modelmats(2*ma+1).asInstanceOf[FMat].data;
- var cv = 0f;
- if ((doHead > 1 && aishead && bishead) || (aismine && bishead) || (bismine && aishead) || (aismine && bismine)) {
- c = 0;
- while (c < nrows) { // Inner product between A and B columns
- cv += A(c + ia) * B(c + ib);
- c += 1;
- }
+ j = 0;
+ while (j < nwa) { // Clear tmp A matrix
+ val ja = j * nrows
+ c = 0;
+ while (c < nrows) {
+ aa(c + ja) = 0
+ c += 1
+ }
+ j+= 1
+ }
+
+ k = 0
+ while (k < nwb) { // Loop over B words
+ c = 0;
+ while (c < nrows) { // Clear tmp B vector
+ bb(c) = 0
+ c += 1
+ }
+ val ibc = WB(k+i*nwb)
+ val bscale = math.pow(1+ibc, vexp).toFloat
+ val (mb, ib, bismine, bishead) = mapIndx(ibc, islice, nslices, nHead, maxCols, nrows, arrayOffset)
+ val B = modelmats(2*mb).asInstanceOf[FMat].data
+ j = 0
+ while (j < nwa) { // Now iterate over A words.
+ val iac = WA(j+i*nwa)
+ val ascale = math.pow(1+iac, vexp).toFloat
+ val (ma, ia, aismine, aishead) = mapIndx(iac, islice, nslices, nHead, maxCols, nrows, arrayOffset)
+ val A = modelmats(2*ma+1).asInstanceOf[FMat].data;
+ var cv = 0f
+ if ((doHead > 1 && aishead && bishead) || (aismine && bishead) || (bismine && aishead) || (aismine && bismine)) {
+ c = 0
+ while (c < nrows) { // Inner product between A and B columns
+ cv += A(c + ia) * B(c + ib)
+ c += 1
+ }
- if (cv > 16.0f) { // Guarded logistic function
- cv = 1.0f;
- } else if (cv < -16.0f) {
- cv = 0.0f;
- } else {
- cv = math.exp(cv).toFloat;
- cv = cv / (1.0f + cv);
- }
- cv = - cv * lrate; // Scale derivative by learning rate.
+ if (cv > 16.0f) { // Guarded logistic function
+ cv = 1.0f
+ } else if (cv < -16.0f) {
+ cv = 0.0f
+ } else {
+ cv = math.exp(cv).toFloat
+ cv = cv / (1.0f + cv)
+ }
+ cv = - cv * lrate; // Scale derivative by learning rate.
- val ja = j * nrows;
- c = 0;
- while (c < nrows) { // Update the derivatives
- aa(c + ja) += ascale * cv * B(c + ib);
- bb(c) += bscale * cv * A(c + ia);
- c += 1;
- }
- }
- j += 1;
- }
- if (bismine || (bishead && doHead > 0)) {
- c = 0;
- while (c < nrows) { // Add B's derivative to B
- B(c + ib) += bb(c);
- c += 1;
- }
- }
- k += 1;
- }
- j = 0;
- while (j < nwa) { // Add A's derivatives to A
- val ja = j * nrows;
- val iac = WA(j+i*nwa);
- val (ma, ia, aismine, aishead) = mapIndx(iac, islice, nslices, nHead, maxCols, nrows, arrayOffset);
- val A = modelmats(2*ma+1).asInstanceOf[FMat].data;
- if (aismine || (aishead && doHead > 0)) {
- c = 0;
- while (c < nrows) {
- A(c + ia) += aa(c + ja);
- c += 1;
- }
- }
- j += 1;
- }
- i += 1;
- }
- });
- 0;
+ val ja = j * nrows
+ c = 0
+ while (c < nrows) { // Update the derivatives
+ aa(c + ja) += ascale * cv * B(c + ib)
+ bb(c) += bscale * cv * A(c + ia)
+ c += 1
+ }
+ }
+ j += 1
+ }
+ if (bismine || (bishead && doHead > 0)) {
+ c = 0
+ while (c < nrows) { // Add B's derivative to B
+ B(c + ib) += bb(c)
+ c += 1
+ }
+ }
+ k += 1
+ }
+ j = 0
+ while (j < nwa) { // Add A's derivatives to A
+ val ja = j * nrows
+ val iac = WA(j+i*nwa)
+ val (ma, ia, aismine, aishead) = mapIndx(iac, islice, nslices, nHead, maxCols, nrows, arrayOffset)
+ val A = modelmats(2*ma+1).asInstanceOf[FMat].data
+ if (aismine || (aishead && doHead > 0)) {
+ c = 0
+ while (c < nrows) {
+ A(c + ia) += aa(c + ja)
+ c += 1
+ }
+ }
+ j += 1
+ }
+ i += 1
+ }
+ })
+ 0
}
def evalPosCPU(nrows:Int, ncols:Int, skip:Int, W:Array[Int], LB:Array[Int], UB:Array[Int],
A:Array[Float], B:Array[Float], nthreads:Int):Double = {
(0 until nthreads).par.map((ithread:Int) => {
- val istart = ((1L * ithread * ncols)/nthreads).toInt;
- val iend = ((1L * (ithread+1) * ncols)/nthreads).toInt;
- val daa = new Array[Float](nrows);
- var i = istart;
- var sum = 0.0;
- while (i < iend) {
- var j = 0;
- var k = 0;
- var c = 0;
- var cv = 0f;
+ val istart = ((1L * ithread * ncols)/nthreads).toInt
+ val iend = ((1L * (ithread+1) * ncols)/nthreads).toInt
+ val daa = new Array[Float](nrows)
+ var i = istart
+ var sum = 0.0
+ while (i < iend) {
+ var j = 0
+ var k = 0
+ var c = 0
+ var cv = 0f
- val ia = nrows * W(i); // Get the current word (as a model matrix offset).
- if (ia >= 0) { // Check for OOV words
- c = 0;
- while (c < nrows) { // Current word
- daa(c) = 0; // delta for the A matrix (maps current and negative words).
- c += 1;
- }
- j = LB(i);
- while (j <= UB(i)) { // Iterate over neighbors in the skip window
- if (j != 0 && i + j >= 0 && i + j < ncols) { // context word index is in range (and not current word).
- val ib = nrows * W(i + j); // Get the context word and check it.
- if (ib >= 0) {
- c = 0;
- cv = 0f;
- while (c < nrows) { // Inner product between current and context words.
- cv += A(c + ia) * B(c + ib);
- c += 1;
- }
+ val ia = nrows * W(i); // Get the current word (as a model matrix offset).
+ if (ia >= 0) { // Check for OOV words
+ c = 0
+ while (c < nrows) { // Current word
+ daa(c) = 0; // delta for the A matrix (maps current and negative words).
+ c += 1
+ }
+ j = LB(i)
+ while (j <= UB(i)) { // Iterate over neighbors in the skip window
+ if (j != 0 && i + j >= 0 && i + j < ncols) { // context word index is in range (and not current word).
+ val ib = nrows * W(i + j); // Get the context word and check it.
+ if (ib >= 0) {
+ c = 0
+ cv = 0f
+ while (c < nrows) { // Inner product between current and context words.
+ cv += A(c + ia) * B(c + ib)
+ c += 1
+ }
- if (cv > 16.0f) { // Apply logistic function with guards
- cv = 1.0f;
- } else if (cv < -16.0f) {
- cv = 0.0f;
- } else {
- cv = math.exp(cv).toFloat;
- cv = cv / (1.0f + cv);
- }
- sum += math.log(math.max(cv, 1e-20));
- }
- }
- j += 1;
- }
- }
- i += 1;
- }
- sum;
- }).reduce(_+_);
+ if (cv > 16.0f) { // Apply logistic function with guards
+ cv = 1.0f
+ } else if (cv < -16.0f) {
+ cv = 0.0f
+ } else {
+ cv = math.exp(cv).toFloat
+ cv = cv / (1.0f + cv)
+ }
+ sum += math.log(math.max(cv, 1e-20));
+ }
+ }
+ j += 1
+ }
+ }
+ i += 1
+ }
+ sum
+ }).reduce(_+_)
}
def evalPosCPUslice(nrows:Int, ncols:Int, skip:Int, W:Array[Int], LB:Array[Int], UB:Array[Int],
modelmats:Array[Mat], nthreads:Int, islice:Int, nslices:Int, maxCols:Int, nHead:Int, dualMode:Boolean):Double = {
- val arrayOffset = if (dualMode) 1 else 0;
+ val arrayOffset = if (dualMode) 1 else 0
(0 until nthreads).par.map((ithread:Int) => {
- val istart = ((1L * ithread * ncols)/nthreads).toInt;
- val iend = ((1L * (ithread+1) * ncols)/nthreads).toInt;
- val daa = new Array[Float](nrows);
- var i = istart;
- var sum = 0.0;
- while (i < iend) {
- var j = 0;
- var k = 0;
- var c = 0;
- var cv = 0f;
+ val istart = ((1L * ithread * ncols)/nthreads).toInt
+ val iend = ((1L * (ithread+1) * ncols)/nthreads).toInt
+ val daa = new Array[Float](nrows)
+ var i = istart
+ var sum = 0.0
+ while (i < iend) {
+ var j = 0
+ var k = 0
+ var c = 0
+ var cv = 0f
- val iac = W(i); // Get the current word (as a model matrix offset).
- if (iac >= 0) {
- val (ma, ia, aismine, aishead) = mapIndx(iac, islice, nslices, nHead, maxCols, nrows, arrayOffset);
- if (aismine || aishead) {
- val A = modelmats(2*ma+1).asInstanceOf[FMat].data;
- c = 0;
- while (c < nrows) { // Current word
- daa(c) = 0; // delta for the A matrix (maps current and negative words).
- c += 1;
- }
- j = LB(i);
- while (j <= UB(i)) { // Iterate over neighbors in the skip window
- if (j != 0 && i + j >= 0 && i + j < ncols) { // context word index is in range (and not current word).
- val ibc = W(i + j); // Get the context word and check it.
- if (ibc >= 0) {
- val (mb, ib, bismine, bishead) = mapIndx(ibc, islice, nslices, nHead, maxCols, nrows, arrayOffset);
- if (bismine || bishead) {
- val B = modelmats(2*mb).asInstanceOf[FMat].data;
- c = 0;
- cv = 0f;
- while (c < nrows) { // Inner product between current and context words.
- cv += A(c + ia) * B(c + ib);
- c += 1;
- }
+ val iac = W(i); // Get the current word (as a model matrix offset).
+ if (iac >= 0) {
+ val (ma, ia, aismine, aishead) = mapIndx(iac, islice, nslices, nHead, maxCols, nrows, arrayOffset)
+ if (aismine || aishead) {
+ val A = modelmats(2*ma+1).asInstanceOf[FMat].data
+ c = 0
+ while (c < nrows) { // Current word
+ daa(c) = 0; // delta for the A matrix (maps current and negative words).
+ c += 1
+ }
+ j = LB(i)
+ while (j <= UB(i)) { // Iterate over neighbors in the skip window
+ if (j != 0 && i + j >= 0 && i + j < ncols) { // context word index is in range (and not current word).
+ val ibc = W(i + j); // Get the context word and check it.
+ if (ibc >= 0) {
+ val (mb, ib, bismine, bishead) = mapIndx(ibc, islice, nslices, nHead, maxCols, nrows, arrayOffset)
+ if (bismine || bishead) {
+ val B = modelmats(2*mb).asInstanceOf[FMat].data
+ c = 0
+ cv = 0f
+ while (c < nrows) { // Inner product between current and context words.
+ cv += A(c + ia) * B(c + ib)
+ c += 1
+ }
- if (cv > 16.0f) { // Apply logistic function with guards
- cv = 1.0f;
- } else if (cv < -16.0f) {
- cv = 0.0f;
- } else {
- cv = math.exp(cv).toFloat;
- cv = cv / (1.0f + cv);
- }
- sum += math.log(math.max(cv, 1e-20));
- }
- }
- }
- j += 1;
- }
- }
- }
- i += 1;
- }
- sum;
- }).reduce(_+_);
+ if (cv > 16.0f) { // Apply logistic function with guards
+ cv = 1.0f
+ } else if (cv < -16.0f) {
+ cv = 0.0f
+ } else {
+ cv = math.exp(cv).toFloat
+ cv = cv / (1.0f + cv)
+ }
+ sum += math.log(math.max(cv, 1e-20));
+ }
+ }
+ }
+ j += 1
+ }
+ }
+ }
+ i += 1
+ }
+ sum
+ }).reduce(_+_)
}
def evalNegCPU(nrows:Int, nwords:Int, nwa:Int, nwb:Int, WA:Array[Int], WB:Array[Int], A:Array[Float], B:Array[Float], nthreads:Int):Double = {
- (0 until nthreads).par.map((ithread:Int) => {
- val istart = ((1L * nwords * ithread) / nthreads).toInt;
- val iend = ((1L * nwords * (ithread+1)) / nthreads).toInt;
- val aa = new Array[Float](nwa * nrows);
- val bb = new Array[Float](nrows);
- var sum = 0.0;
- var i = istart;
- while (i < iend) {
- var j = 0;
- var k = 0;
- var c = 0;
+ (0 until nthreads).par.map((ithread:Int) => {
+ val istart = ((1L * nwords * ithread) / nthreads).toInt
+ val iend = ((1L * nwords * (ithread+1)) / nthreads).toInt
+ val aa = new Array[Float](nwa * nrows)
+ val bb = new Array[Float](nrows)
+ var sum = 0.0
+ var i = istart
+ while (i < iend) {
+ var j = 0
+ var k = 0
+ var c = 0
- j = 0;
- while (j < nwa) { // Clear tmp A matrix
- val ja = j * nrows;
- c = 0;
- while (c < nrows) {
- aa(c + ja) = 0;
- c += 1;
- }
- j+= 1;
- }
-
- k = 0;
- while (k < nwb) { // Loop over B words
- c = 0;
- while (c < nrows) { // Clear tmp B vector
- bb(c) = 0;
- c += 1;
- }
- val ib = nrows * WB(k+i*nwb); // Get the B word as an array offset.
- j = 0;
- while (j < nwa) { // Now iterate over A words.
- val ia = nrows * WA(j+i*nwa); // Get an A word offset
-
- var cv = 0f;
- c = 0;
- while (c < nrows) { // Inner product between A and B columns
- cv += A(c + ia) * B(c + ib);
- c += 1;
- }
-
- if (cv > 16.0f) { // Guarded logistic function
- cv = 1.0f;
- } else if (cv < -16.0f) {
- cv = 0.0f;
- } else {
- cv = math.exp(cv).toFloat;
- cv = cv / (1.0f + cv);
- }
- sum += math.log(math.max(1-cv, 1e-20));
- j += 1;
- }
- k += 1;
- }
- i += 1;
- }
- sum;
- }).reduce(_+_);
+ j = 0;
+ while (j < nwa) { // Clear tmp A matrix
+ val ja = j * nrows
+ c = 0;
+ while (c < nrows) {
+ aa(c + ja) = 0
+ c += 1
+ }
+ j+= 1
+ }
+
+ k = 0
+ while (k < nwb) { // Loop over B words
+ c = 0;
+ while (c < nrows) { // Clear tmp B vector
+ bb(c) = 0
+ c += 1
+ }
+ val ib = nrows * WB(k+i*nwb); // Get the B word as an array offset.
+ j = 0
+ while (j < nwa) { // Now iterate over A words.
+ val ia = nrows * WA(j+i*nwa); // Get an A word offset
+
+ var cv = 0f
+ c = 0
+ while (c < nrows) { // Inner product between A and B columns
+ cv += A(c + ia) * B(c + ib)
+ c += 1
+ }
+
+ if (cv > 16.0f) { // Guarded logistic function
+ cv = 1.0f
+ } else if (cv < -16.0f) {
+ cv = 0.0f
+ } else {
+ cv = math.exp(cv).toFloat
+ cv = cv / (1.0f + cv)
+ }
+ sum += math.log(math.max(1-cv, 1e-20));
+ j += 1
+ }
+ k += 1
+ }
+ i += 1
+ }
+ sum
+ }).reduce(_+_)
}
def evalNegCPUslice(nrows:Int, nwords:Int, nwa:Int, nwb:Int, WA:Array[Int], WB:Array[Int], modelmats:Array[Mat], nthreads:Int,
islice:Int, nslices:Int, maxCols:Int, nHead:Int, dualMode:Boolean):Double = {
- val arrayOffset = if (dualMode) 1 else 0;
- (0 until nthreads).par.map((ithread:Int) => {
- val istart = ((1L * nwords * ithread) / nthreads).toInt;
- val iend = ((1L * nwords * (ithread+1)) / nthreads).toInt;
- val aa = new Array[Float](nwa * nrows);
- val bb = new Array[Float](nrows);
- var sum = 0.0;
- var i = istart;
- while (i < iend) {
- var j = 0;
- var k = 0;
- var c = 0;
+ val arrayOffset = if (dualMode) 1 else 0
+ (0 until nthreads).par.map((ithread:Int) => {
+ val istart = ((1L * nwords * ithread) / nthreads).toInt
+ val iend = ((1L * nwords * (ithread+1)) / nthreads).toInt
+ val aa = new Array[Float](nwa * nrows)
+ val bb = new Array[Float](nrows)
+ var sum = 0.0
+ var i = istart
+ while (i < iend) {
+ var j = 0
+ var k = 0
+ var c = 0
- j = 0;
- while (j < nwa) { // Clear tmp A matrix
- val ja = j * nrows;
- c = 0;
- while (c < nrows) {
- aa(c + ja) = 0;
- c += 1;
- }
- j+= 1;
- }
-
- k = 0;
- while (k < nwb) { // Loop over B words
- c = 0;
- while (c < nrows) { // Clear tmp B vector
- bb(c) = 0;
- c += 1;
- }
- val ibc = WB(k+i*nwb); // Get the B word as an array offset.
- val (mb, ib, bismine, bishead) = mapIndx(ibc, islice, nslices, nHead, maxCols, nrows, arrayOffset);
- if (bismine || bishead) {
- val B = modelmats(2*mb).asInstanceOf[FMat].data;
- j = 0;
- while (j < nwa) { // Now iterate over A words.
- val iac = WA(j+i*nwa); // Get an A word offset
- val (ma, ia, aismine, aishead) = mapIndx(iac, islice, nslices, nHead, maxCols, nrows, arrayOffset);
- if (aismine || aishead) {
- val A = modelmats(2*ma+1).asInstanceOf[FMat].data;
- var cv = 0f;
- c = 0;
- while (c < nrows) { // Inner product between A and B columns
- cv += A(c + ia) * B(c + ib);
- c += 1;
- }
- if (cv > 16.0f) { // Guarded logistic function
- cv = 1.0f;
- } else if (cv < -16.0f) {
- cv = 0.0f;
- } else {
- cv = math.exp(cv).toFloat;
- cv = cv / (1.0f + cv);
- }
- sum += math.log(math.max(1-cv, 1e-20));
- }
- j += 1;
- }
- }
- k += 1;
- }
- i += 1;
- }
- sum;
- }).reduce(_+_);
+ j = 0;
+ while (j < nwa) { // Clear tmp A matrix
+ val ja = j * nrows
+ c = 0;
+ while (c < nrows) {
+ aa(c + ja) = 0
+ c += 1
+ }
+ j+= 1
+ }
+
+ k = 0
+ while (k < nwb) { // Loop over B words
+ c = 0;
+ while (c < nrows) { // Clear tmp B vector
+ bb(c) = 0
+ c += 1
+ }
+ val ibc = WB(k+i*nwb); // Get the B word as an array offset.
+ val (mb, ib, bismine, bishead) = mapIndx(ibc, islice, nslices, nHead, maxCols, nrows, arrayOffset)
+ if (bismine || bishead) {
+ val B = modelmats(2*mb).asInstanceOf[FMat].data
+ j = 0
+ while (j < nwa) { // Now iterate over A words.
+ val iac = WA(j+i*nwa); // Get an A word offset
+ val (ma, ia, aismine, aishead) = mapIndx(iac, islice, nslices, nHead, maxCols, nrows, arrayOffset)
+ if (aismine || aishead) {
+ val A = modelmats(2*ma+1).asInstanceOf[FMat].data
+ var cv = 0f
+ c = 0
+ while (c < nrows) { // Inner product between A and B columns
+ cv += A(c + ia) * B(c + ib)
+ c += 1
+ }
+ if (cv > 16.0f) { // Guarded logistic function
+ cv = 1.0f
+ } else if (cv < -16.0f) {
+ cv = 0.0f
+ } else {
+ cv = math.exp(cv).toFloat
+ cv = cv / (1.0f + cv)
+ }
+ sum += math.log(math.max(1-cv, 1e-20));
+ }
+ j += 1
+ }
+ }
+ k += 1
+ }
+ i += 1
+ }
+ sum
+ }).reduce(_+_)
}
@@ -1204,103 +1204,103 @@ object Word2Vec {
Array(new L1Regularizer(nopts.asInstanceOf[L1Regularizer.Opts]))
}
- class LearnOptions extends Learner.Options with Word2Vec.Opts with MatSource.Opts with ADAGrad.Opts with L1Regularizer.Opts;
+ class LearnOptions extends Learner.Options with Word2Vec.Opts with MatSource.Opts with ADAGrad.Opts with L1Regularizer.Opts
def learner(mat0:Mat, targ:Mat) = {
- val opts = new LearnOptions;
- opts.batchSize = math.min(100000, mat0.ncols/30 + 1);
- val nn = new Learner(
- new MatSource(Array(mat0, targ), opts),
- new Word2Vec(opts),
- null,
- null,
- null,
- opts)
+ val opts = new LearnOptions
+ opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
+ val nn = new Learner(
+ new MatSource(Array(mat0, targ), opts),
+ new Word2Vec(opts),
+ null,
+ null,
+ null,
+ opts)
(nn, opts)
}
class FDSopts extends Learner.Options with Word2Vec.Opts with FileSource.Opts with ADAGrad.Opts with L1Regularizer.Opts
- def learner(fn1:String):(Learner, FDSopts) = learner(List(FileSource.simpleEnum(fn1,1,0)));
+ def learner(fn1:String):(Learner, FDSopts) = learner(List(FileSource.simpleEnum(fn1,1,0)))
def learner(fnames:List[(Int)=>String]):(Learner, FDSopts) = {
val opts = new FDSopts
- opts.fnames = fnames;
- opts.batchSize = 100000;
- opts.eltsPerSample = 500;
- implicit val threads = threadPool(4);
- val ds = new FileSource(opts);
- val nn = new Learner(
- ds,
- new Word2Vec(opts),
- null,
- null,
- null,
- opts)
+ opts.fnames = fnames
+ opts.batchSize = 100000
+ opts.eltsPerSample = 500
+ implicit val threads = threadPool(4)
+ val ds = new FileSource(opts)
+ val nn = new Learner(
+ ds,
+ new Word2Vec(opts),
+ null,
+ null,
+ null,
+ opts)
(nn, opts)
}
def predictor(model0:Model, mat0:Mat, preds:Mat):(Learner, LearnOptions) = {
- val model = model0.asInstanceOf[Word2Vec];
- val opts = new LearnOptions;
+ val model = model0.asInstanceOf[Word2Vec]
+ val opts = new LearnOptions
opts.batchSize = math.min(10000, mat0.ncols/30 + 1)
- if (mat0.asInstanceOf[AnyRef] != null) opts.putBack = 1;
+ if (mat0.asInstanceOf[AnyRef] != null) opts.putBack = 1
- val newmod = new Word2Vec(opts);
- newmod.refresh = false;
- newmod.copyFrom(model);
- val mopts = model.opts.asInstanceOf[Word2Vec.Opts];
- opts.dim = mopts.dim;
- opts.vocabSize = mopts.vocabSize;
- opts.nskip = mopts.nskip;
- opts.nneg = mopts.nneg;
- opts.nreuse = mopts.nreuse;
+ val newmod = new Word2Vec(opts)
+ newmod.refresh = false
+ newmod.copyFrom(model)
+ val mopts = model.opts.asInstanceOf[Word2Vec.Opts]
+ opts.dim = mopts.dim
+ opts.vocabSize = mopts.vocabSize
+ opts.nskip = mopts.nskip
+ opts.nneg = mopts.nneg
+ opts.nreuse = mopts.nreuse
val nn = new Learner(
new MatSource(Array(mat0, preds), opts),
newmod,
null,
null,
null,
- opts);
+ opts)
(nn, opts)
}
def predictor(model0:Model, mat0:Mat):(Learner, LearnOptions) = {
- val model = model0.asInstanceOf[Word2Vec];
- val opts = new LearnOptions;
+ val model = model0.asInstanceOf[Word2Vec]
+ val opts = new LearnOptions
opts.batchSize = math.min(10000, mat0.ncols/30 + 1)
- val newmod = new Word2Vec(opts);
- newmod.refresh = false;
- newmod.copyFrom(model);
- val mopts = model.opts.asInstanceOf[Word2Vec.Opts];
- opts.dim = mopts.dim;
- opts.vocabSize = mopts.vocabSize;
- opts.nskip = mopts.nskip;
- opts.nneg = mopts.nneg;
- opts.nreuse = mopts.nreuse;
- opts.maxArraySize = mopts.maxArraySize;
- opts.iSlice = mopts.iSlice;
- opts.nSlices = mopts.nSlices;
- opts.nHeadTerms = mopts.nHeadTerms;
+ val newmod = new Word2Vec(opts)
+ newmod.refresh = false
+ newmod.copyFrom(model)
+ val mopts = model.opts.asInstanceOf[Word2Vec.Opts]
+ opts.dim = mopts.dim
+ opts.vocabSize = mopts.vocabSize
+ opts.nskip = mopts.nskip
+ opts.nneg = mopts.nneg
+ opts.nreuse = mopts.nreuse
+ opts.maxArraySize = mopts.maxArraySize
+ opts.iSlice = mopts.iSlice
+ opts.nSlices = mopts.nSlices
+ opts.nHeadTerms = mopts.nHeadTerms
val nn = new Learner(
new MatSource(Array(mat0), opts),
newmod,
null,
null,
null,
- opts);
+ opts)
(nn, opts)
}
- class LearnParOptions extends ParLearner.Options with Word2Vec.Opts with FileSource.Opts with ADAGrad.Opts;
+ class LearnParOptions extends ParLearner.Options with Word2Vec.Opts with FileSource.Opts with ADAGrad.Opts
def learnPar(fn1:String):(ParLearnerF, LearnParOptions) = {learnPar(List(FileSource.simpleEnum(fn1,1,0)))}
def learnPar(fnames:List[(Int) => String]):(ParLearnerF, LearnParOptions) = {
- val opts = new LearnParOptions;
- opts.batchSize = 10000;
- opts.lrate = 1f;
- opts.fnames = fnames;
+ val opts = new LearnParOptions
+ opts.batchSize = 10000
+ opts.lrate = 1f
+ opts.fnames = fnames
implicit val threads = threadPool(4)
val nn = new ParLearnerF(
new FileSource(opts),
@@ -1315,65 +1315,65 @@ object Word2Vec {
// Read a Google Word2Vec model file in binary or text format.
def readGoogleW2V(fname:String, dict:Dict, n:Int, binary:Boolean = false):FMat = {
- val ins = HMat.getInputStream(fname, 0);
- val din = new DataInputStream(ins);
- val sin = new Scanner(din);
+ val ins = HMat.getInputStream(fname, 0)
+ val din = new DataInputStream(ins)
+ val sin = new Scanner(din)
val header = sin.nextLine
- val dims = header.split(" ");
- val nr = dims(0).toInt;
- val dim = dims(1).toInt;
- val model = FMat(dim, n);
+ val dims = header.split(" ")
+ val nr = dims(0).toInt
+ val dim = dims(1).toInt
+ val model = FMat(dim, n)
- var i = 0;
+ var i = 0
while (i < nr) {
- val word = sin.next;
- val icol = dict(word);
- val saveIt = (icol >= 0 && icol < n);
- var j = 0;
- while (j < dim) {
- val v = if (binary) {
- din.readFloat;
- } else {
- sin.nextFloat;
- }
- if (saveIt) model(j, icol) = v;
- j += 1;
- }
- sin.nextLine;
- i += 1;
- if (i % 1000 == 0) println("i=%d %s" format (i, word))
+ val word = sin.next
+ val icol = dict(word)
+ val saveIt = (icol >= 0 && icol < n)
+ var j = 0
+ while (j < dim) {
+ val v = if (binary) {
+ din.readFloat
+ } else {
+ sin.nextFloat
+ }
+ if (saveIt) model(j, icol) = v
+ j += 1
+ }
+ sin.nextLine
+ i += 1
+ if (i % 1000 == 0) println("i=%d %s" format (i, word))
}
- model;
+ model
}
// Write a Google Word2Vec model file in binary or text format.
def saveGoogleW2V(dict:CSMat, mod:FMat, fname:String, binary:Boolean = false) = {
- val outs = HMat.getOutputStream(fname, 0);
- val dout = new DataOutputStream(outs);
- val fout = new PrintWriter(dout);
- val cr = String.format("\n");
- fout.print(mod.ncols.toString + " " + mod.nrows.toString + cr);
- fout.flush;
- var i = 0;
- while (i < mod.ncols) {
- fout.print(dict(i)+ " ");
- fout.flush;
- var nwritten = 0;
- var j = 0;
- while (j < mod.nrows) {
- if (binary) {
- dout.writeFloat(mod(j,i));
- } else {
- dout.writeBytes("%g " format mod(j,i));
- }
- j += 1;
- }
- i += 1;
- dout.writeBytes(cr);
- }
- dout.close;
-};
+ val outs = HMat.getOutputStream(fname, 0)
+ val dout = new DataOutputStream(outs)
+ val fout = new PrintWriter(dout)
+ val cr = String.format("\n")
+ fout.print(mod.ncols.toString + " " + mod.nrows.toString + cr)
+ fout.flush
+ var i = 0
+ while (i < mod.ncols) {
+ fout.print(dict(i)+ " ")
+ fout.flush
+ var nwritten = 0
+ var j = 0
+ while (j < mod.nrows) {
+ if (binary) {
+ dout.writeFloat(mod(j,i))
+ } else {
+ dout.writeBytes("%g " format mod(j,i))
+ }
+ j += 1
+ }
+ i += 1
+ dout.writeBytes(cr)
+ }
+ dout.close
+}
}
diff --git a/src/main/scala/BIDMach/networks/layers/AddLayer.scala b/src/main/scala/BIDMach/networks/layers/AddLayer.scala
index e24aec19..c3c51ddc 100644
--- a/src/main/scala/BIDMach/networks/layers/AddLayer.scala
+++ b/src/main/scala/BIDMach/networks/layers/AddLayer.scala
@@ -10,8 +10,8 @@ import BIDMach.models._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
@@ -21,55 +21,55 @@ import BIDMach.networks._
class AddLayer(override val net:Net, override val opts:AddNodeOpts = new AddNode) extends Layer(net, opts) {
- override val _inputs = new Array[LayerTerm](opts.ninputs);
+ override val _inputs = new Array[LayerTerm](opts.ninputs)
- override def forward = {
- val start = toc;
- createOutput(inputData.dims);
- output <-- inputData;
- (1 until inputlength).map((i:Int) => output ~ output + inputDatas(i));
- clearDeriv;
- forwardtime += toc - start;
- }
+ override def forward = {
+ val start = toc
+ createOutput(inputData.dims)
+ output <-- inputData
+ (1 until inputlength).map((i:Int) => output ~ output + inputDatas(i))
+ clearDeriv
+ forwardtime += toc - start
+ }
- override def backward = {
- val start = toc;
- (0 until inputlength).map((i:Int) => {
- if (inputDerivs(i).asInstanceOf[AnyRef] != null) inputDerivs(i) ~ inputDerivs(i) + deriv
- });
- backwardtime += toc - start;
- }
+ override def backward = {
+ val start = toc
+ (0 until inputlength).map((i:Int) => {
+ if (inputDerivs(i).asInstanceOf[AnyRef] != null) inputDerivs(i) ~ inputDerivs(i) + deriv
+ })
+ backwardtime += toc - start
+ }
override def toString = {
- "add@"+("%04x" format (hashCode % 0x10000));
+ "add@"+("%04x" format (hashCode % 0x10000))
}
}
trait AddNodeOpts extends NodeOpts {
- var ninputs = 2;
+ var ninputs = 2
}
class AddNode extends Node with AddNodeOpts {
- override val inputs:Array[NodeTerm] = new Array[NodeTerm](ninputs);
+ override val inputs:Array[NodeTerm] = new Array[NodeTerm](ninputs)
def copyTo(opts:AddNode):AddNode = {
- super.copyTo(opts);
- opts.ninputs = ninputs;
- opts;
+ super.copyTo(opts)
+ opts.ninputs = ninputs
+ opts
}
- override def clone:AddNode = {copyTo(new AddNode).asInstanceOf[AddNode];}
+ override def clone:AddNode = {copyTo(new AddNode).asInstanceOf[AddNode];}
- override def create(net:Net):AddLayer = {AddLayer(net, this);}
+ override def create(net:Net):AddLayer = {AddLayer(net, this);}
override def toString = {
- "add@"+("%04x" format (hashCode % 0x10000));
+ "add@"+("%04x" format (hashCode % 0x10000))
}
}
object AddLayer {
- def apply(net:Net) = new AddLayer(net, new AddNode);
+ def apply(net:Net) = new AddLayer(net, new AddNode)
def apply(net:Net, opts:AddNodeOpts) = new AddLayer(net, opts);
}
diff --git a/src/main/scala/BIDMach/networks/layers/CompoundLayer.scala b/src/main/scala/BIDMach/networks/layers/CompoundLayer.scala
index afb15983..9765d1c7 100644
--- a/src/main/scala/BIDMach/networks/layers/CompoundLayer.scala
+++ b/src/main/scala/BIDMach/networks/layers/CompoundLayer.scala
@@ -10,104 +10,104 @@ import BIDMach.models._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
class CompoundLayer(override val net:Net, override val opts:CompoundNode = new CompoundNode) extends ModelLayer(net, opts) {
-
+
override def setInput(i:Int, v:LayerTerm):CompoundLayer = { // Assumes the inputs are the first k layers in internal_layers
- _inputs(i) = v;
- internal_layers(i).setInput(0, v);
+ _inputs(i) = v
+ internal_layers(i).setInput(0, v)
this
}
-
- var grid:LayerMat = null;
-
- def internal_layers:Array[Layer] = grid.data;
-
- override def forward = {
- val start = toc;
- for (i <- 0 until grid.ncols) {
- for (j <- 0 until grid.nrows) {
- val layer = grid(j, i);
- if (layer != null) {
- if (net.opts.debug != 0) {
- println(" compound layer forward (%d,%d) %s" format (j, i, layer.getClass));
- }
- layer.forward;
- }
- }
- }
+
+ var grid:LayerMat = null
+
+ def internal_layers:Array[Layer] = grid.data
+
+ override def forward = {
+ val start = toc
+ for (i <- 0 until grid.ncols) {
+ for (j <- 0 until grid.nrows) {
+ val layer = grid(j, i)
+ if (layer != null) {
+ if (net.opts.debug != 0) {
+ println(" compound layer forward (%d,%d) %s" format (j, i, layer.getClass))
+ }
+ layer.forward
+ }
+ }
+ }
- for (i <- 0 until opts.outputNumbers.length) {
- _outputs(i) = grid(opts.outputNumbers(i)).output;
- if (_derivs(i).asInstanceOf[AnyRef] == null){
- _derivs(i) = grid(opts.outputNumbers(i)).deriv;
- }
- }
- forwardtime += toc - start;
- }
-
- override def backward(ipass:Int, pos:Long) = {
- val start = toc;
- for (i <- (grid.ncols - 1) to 0 by -1) {
- for (j <- (grid.nrows -1) to 0 by -1) {
- val layer = grid(j, i);
- if (layer != null) {
- if (net.opts.debug != 0) {
- println(" compound layer backward (%d,%d) %s" format (j, i, layer.getClass));
- }
- layer.backward(ipass, pos);
- }
- }
- }
- backwardtime += toc - start;
- }
-
- override def getModelMats(net:Net) = {
- for (i <- 0 until grid.ncols) {
- for (j <- 0 until grid.nrows) {
- val layer = grid(j, i);
- if (layer != null) {
- layer.getModelMats(net);
- }
- }
- }
- }
+ for (i <- 0 until opts.outputNumbers.length) {
+ _outputs(i) = grid(opts.outputNumbers(i)).output
+ if (_derivs(i).asInstanceOf[AnyRef] == null){
+ _derivs(i) = grid(opts.outputNumbers(i)).deriv
+ }
+ }
+ forwardtime += toc - start
+ }
+
+ override def backward(ipass:Int, pos:Long) = {
+ val start = toc
+ for (i <- (grid.ncols - 1) to 0 by -1) {
+ for (j <- (grid.nrows -1) to 0 by -1) {
+ val layer = grid(j, i)
+ if (layer != null) {
+ if (net.opts.debug != 0) {
+ println(" compound layer backward (%d,%d) %s" format (j, i, layer.getClass))
+ }
+ layer.backward(ipass, pos)
+ }
+ }
+ }
+ backwardtime += toc - start
+ }
+
+ override def getModelMats(net:Net) = {
+ for (i <- 0 until grid.ncols) {
+ for (j <- 0 until grid.nrows) {
+ val layer = grid(j, i)
+ if (layer != null) {
+ layer.getModelMats(net)
+ }
+ }
+ }
+ }
- def construct = {
-// internal_layers = new Array[Layer](opts.lopts.length);
- grid = LayerMat(opts.grid.nrows, opts.grid.ncols);
- for (i <- 0 until grid.ncols) {
- for (j <- 0 until grid.nrows) {
- val node = opts.grid(j, i);
- if (node != null) {
- grid(j, i) = node.create(net);
- node.myLayer = grid(j, i);
- grid(j, i).parent = this;
- }
- }
- }
- for (i <- 0 until grid.ncols) {
- for (j <- 0 until grid.nrows) {
- val node = opts.grid(j, i);
- if (node != null) {
- for (k <- 0 until node.inputs.length) {
- if (node.inputs(k) != null) {
- val nodeTerm = node.inputs(k);
- grid(j, i).setInput(k, new LayerTerm(nodeTerm.node.myLayer, nodeTerm.term));
- }
- }
- grid(j, i) match {
- case aa:LinLayer => aa.opts.aopts = opts.aopts;
- case _ =>
- }
- }
- }
- }
- }
+ def construct = {
+// internal_layers = new Array[Layer](opts.lopts.length)
+ grid = LayerMat(opts.grid.nrows, opts.grid.ncols)
+ for (i <- 0 until grid.ncols) {
+ for (j <- 0 until grid.nrows) {
+ val node = opts.grid(j, i)
+ if (node != null) {
+ grid(j, i) = node.create(net)
+ node.myLayer = grid(j, i)
+ grid(j, i).parent = this
+ }
+ }
+ }
+ for (i <- 0 until grid.ncols) {
+ for (j <- 0 until grid.nrows) {
+ val node = opts.grid(j, i)
+ if (node != null) {
+ for (k <- 0 until node.inputs.length) {
+ if (node.inputs(k) != null) {
+ val nodeTerm = node.inputs(k);
+ grid(j, i).setInput(k, new LayerTerm(nodeTerm.node.myLayer, nodeTerm.term))
+ }
+ }
+ grid(j, i) match {
+ case aa:LinLayer => aa.opts.aopts = opts.aopts
+ case _ =>
+ }
+ }
+ }
+ }
+ }
override def toString = {
"compound@"+Integer.toHexString(hashCode % 0x10000).toString
@@ -115,13 +115,13 @@ class CompoundLayer(override val net:Net, override val opts:CompoundNode = new C
}
trait CompoundNodeOpts extends ModelNodeOpts {
- var aopts:ADAGrad.Opts = null;
- var prefix = "";
+ var aopts:ADAGrad.Opts = null
+ var prefix = ""
}
class CompoundNode extends ModelNode with CompoundNodeOpts {
- var grid:NodeMat = null;
-// var lopts:Array[Node] = null;
+ var grid:NodeMat = null
+// var lopts:Array[Node] = null
override def toString = {
"compound@"+Integer.toHexString(hashCode % 0x10000).toString
diff --git a/src/main/scala/BIDMach/networks/layers/CopyLayer.scala b/src/main/scala/BIDMach/networks/layers/CopyLayer.scala
index 0be2c9a9..a7c13677 100644
--- a/src/main/scala/BIDMach/networks/layers/CopyLayer.scala
+++ b/src/main/scala/BIDMach/networks/layers/CopyLayer.scala
@@ -10,8 +10,8 @@ import BIDMach.models._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
@@ -19,20 +19,20 @@ import BIDMach.networks._
class CopyLayer(override val net:Net, override val opts:CopyNodeOpts = new CopyNode) extends Layer(net, opts) {
override def forward = {
- val start = toc;
- if (output.asInstanceOf[AnyRef] == null) {
- val io = inputData;
- output = io.zeros(io.dims);
- }
- output <-- inputData;
- clearDeriv;
- forwardtime += toc - start;
+ val start = toc
+ if (output.asInstanceOf[AnyRef] == null) {
+ val io = inputData
+ output = io.zeros(io.dims)
+ }
+ output <-- inputData
+ clearDeriv
+ forwardtime += toc - start
}
override def backward = {
- val start = toc;
- if (inputDeriv.asInstanceOf[AnyRef] != null) inputDeriv ~ inputDeriv + deriv;
- backwardtime += toc - start;
+ val start = toc
+ if (inputDeriv.asInstanceOf[AnyRef] != null) inputDeriv ~ inputDeriv + deriv
+ backwardtime += toc - start
}
override def toString = {
@@ -45,7 +45,7 @@ trait CopyNodeOpts extends NodeOpts {
class CopyNode extends Node with CopyNodeOpts {
- override def clone:CopyNode = {copyTo(new CopyNode).asInstanceOf[CopyNode];}
+ override def clone:CopyNode = {copyTo(new CopyNode).asInstanceOf[CopyNode];}
override def create(net:Net):CopyLayer = {CopyLayer(net, this);}
@@ -56,7 +56,7 @@ class CopyNode extends Node with CopyNodeOpts {
object CopyLayer {
- def apply(net:Net) = new CopyLayer(net, new CopyNode);
+ def apply(net:Net) = new CopyLayer(net, new CopyNode)
- def apply(net:Net, opts:CopyNode) = new CopyLayer(net, opts);
-}
\ No newline at end of file
+ def apply(net:Net, opts:CopyNode) = new CopyLayer(net, opts)
+}
\ No newline at end of file
diff --git a/src/main/scala/BIDMach/networks/layers/DropoutLayer.scala b/src/main/scala/BIDMach/networks/layers/DropoutLayer.scala
index b45e7761..e4bfc899 100644
--- a/src/main/scala/BIDMach/networks/layers/DropoutLayer.scala
+++ b/src/main/scala/BIDMach/networks/layers/DropoutLayer.scala
@@ -10,8 +10,8 @@ import BIDMach.models._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
/**
@@ -20,27 +20,27 @@ import BIDMach.networks._
*/
class DropoutLayer(override val net:Net, override val opts:DropoutNodeOpts = new DropoutNode) extends Layer(net, opts) {
- var randmat:ND = null;
+ var randmat:ND = null
override def forward = {
- val start = toc;
- createOutput;
- randmat = inputData + 20f; // Hack to make a cached container to hold the random output
- if (nopts.predict) {
- output ~ inputData * opts.frac;
- } else {
- rand(randmat);
- randmat ~ randmat < opts.frac
- output ~ inputData ∘ randmat;
- }
- clearDeriv;
- forwardtime += toc - start;
+ val start = toc
+ createOutput
+ randmat = inputData + 20f; // Hack to make a cached container to hold the random output
+ if (nopts.predict) {
+ output ~ inputData * opts.frac
+ } else {
+ rand(randmat)
+ randmat ~ randmat < opts.frac
+ output ~ inputData ∘ randmat
+ }
+ clearDeriv
+ forwardtime += toc - start
}
override def backward = {
- val start = toc;
- if (inputDeriv.asInstanceOf[AnyRef] != null) inputDeriv ~ inputDeriv + (deriv ∘ randmat);
- backwardtime += toc - start;
+ val start = toc
+ if (inputDeriv.asInstanceOf[AnyRef] != null) inputDeriv ~ inputDeriv + (deriv ∘ randmat)
+ backwardtime += toc - start
}
override def toString = {
@@ -49,29 +49,29 @@ class DropoutLayer(override val net:Net, override val opts:DropoutNodeOpts = new
}
trait DropoutNodeOpts extends NodeOpts {
- var frac = 1f;
+ var frac = 1f
}
class DropoutNode extends Node with DropoutNodeOpts {
- def copyTo(opts:DropoutNode):DropoutNode = {
- super.copyTo(opts);
- opts.frac = frac;
- opts;
- }
+ def copyTo(opts:DropoutNode):DropoutNode = {
+ super.copyTo(opts)
+ opts.frac = frac
+ opts
+ }
- override def clone:DropoutNode = {copyTo(new DropoutNode);}
+ override def clone:DropoutNode = {copyTo(new DropoutNode);}
- override def create(net:Net):DropoutLayer = {DropoutLayer(net, this);}
+ override def create(net:Net):DropoutLayer = {DropoutLayer(net, this);}
- override def toString = {
- "dropout@"+Integer.toHexString(hashCode % 0x10000).toString
- }
+ override def toString = {
+ "dropout@"+Integer.toHexString(hashCode % 0x10000).toString
+ }
}
object DropoutLayer {
- def apply(net:Net) = new DropoutLayer(net, new DropoutNode);
+ def apply(net:Net) = new DropoutLayer(net, new DropoutNode)
- def apply(net:Net, opts:DropoutNodeOpts) = new DropoutLayer(net, opts);
+ def apply(net:Net, opts:DropoutNodeOpts) = new DropoutLayer(net, opts)
}
diff --git a/src/main/scala/BIDMach/networks/layers/ExpLayer.scala b/src/main/scala/BIDMach/networks/layers/ExpLayer.scala
index b62a711f..340a32dd 100644
--- a/src/main/scala/BIDMach/networks/layers/ExpLayer.scala
+++ b/src/main/scala/BIDMach/networks/layers/ExpLayer.scala
@@ -10,8 +10,8 @@ import BIDMach.models._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
@@ -21,19 +21,19 @@ import BIDMach.networks._
class ExpLayer(override val net:Net, override val opts:ExpNodeOpts = new ExpNode) extends Layer(net, opts) {
- override def forward = {
- val start = toc;
- createOutput;
- exp(inputData, output);
- clearDeriv;
- forwardtime += toc - start;
- }
-
- override def backward = {
- val start = toc;
- if (inputDeriv.asInstanceOf[AnyRef] != null) inputDeriv ~ inputDeriv + (deriv ∘ output);
- backwardtime += toc - start;
- }
+ override def forward = {
+ val start = toc
+ createOutput
+ exp(inputData, output)
+ clearDeriv
+ forwardtime += toc - start
+ }
+
+ override def backward = {
+ val start = toc
+ if (inputDeriv.asInstanceOf[AnyRef] != null) inputDeriv ~ inputDeriv + (deriv ∘ output);
+ backwardtime += toc - start
+ }
override def toString = {
"exp@"+Integer.toHexString(hashCode % 0x10000).toString
@@ -46,7 +46,7 @@ trait ExpNodeOpts extends NodeOpts {
class ExpNode extends Node with ExpNodeOpts {
- override def clone:ExpNode = {copyTo(new ExpNode).asInstanceOf[ExpNode];}
+ override def clone:ExpNode = {copyTo(new ExpNode).asInstanceOf[ExpNode];}
override def create(net:Net):ExpLayer = {ExpLayer(net, this);}
@@ -57,7 +57,7 @@ class ExpNode extends Node with ExpNodeOpts {
object ExpLayer {
- def apply(net:Net) = new ExpLayer(net, new ExpNode);
+ def apply(net:Net) = new ExpLayer(net, new ExpNode)
- def apply(net:Net, opts:ExpNode) = new ExpLayer(net, opts);
-}
\ No newline at end of file
+ def apply(net:Net, opts:ExpNode) = new ExpLayer(net, opts)
+}
\ No newline at end of file
diff --git a/src/main/scala/BIDMach/networks/layers/GLMLayer.scala b/src/main/scala/BIDMach/networks/layers/GLMLayer.scala
index 49a6d753..14712808 100644
--- a/src/main/scala/BIDMach/networks/layers/GLMLayer.scala
+++ b/src/main/scala/BIDMach/networks/layers/GLMLayer.scala
@@ -10,8 +10,8 @@ import BIDMach.models._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
@@ -22,33 +22,33 @@ import BIDMach.networks._
*/
class GLMLayer(override val net:Net, override val opts:GLMNodeOpts = new GLMNode) extends Layer(net, opts) {
- var ilinks:Mat = null;
- var totflops = 0L;
+ var ilinks:Mat = null
+ var totflops = 0L
- override def forward = {
- val start = toc;
- createOutput;
- if (ilinks.asInstanceOf[AnyRef] == null) {
- ilinks = convertMat(opts.links);
- for (i <- 0 until opts.links.length) {
- totflops += GLM.linkArray(opts.links(i)).fnflops
- }
- }
- output.asMat <-- GLM.preds(inputData.asMat, ilinks, totflops);
- clearDeriv;
- forwardtime += toc - start;
- }
+ override def forward = {
+ val start = toc
+ createOutput
+ if (ilinks.asInstanceOf[AnyRef] == null) {
+ ilinks = convertMat(opts.links)
+ for (i <- 0 until opts.links.length) {
+ totflops += GLM.linkArray(opts.links(i)).fnflops
+ }
+ }
+ output.asMat <-- GLM.preds(inputData.asMat, ilinks, totflops)
+ clearDeriv
+ forwardtime += toc - start
+ }
- override def backward = {
- val start = toc;
- if (inputDeriv.asInstanceOf[AnyRef] != null) inputDeriv.asMat ~ inputDeriv.asMat + (deriv.asMat ∘ GLM.derivs(output.asMat, target, ilinks, totflops));
- backwardtime += toc - start;
- }
+ override def backward = {
+ val start = toc
+ if (inputDeriv.asInstanceOf[AnyRef] != null) inputDeriv.asMat ~ inputDeriv.asMat + (deriv.asMat ∘ GLM.derivs(output.asMat, target, ilinks, totflops))
+ backwardtime += toc - start
+ }
- override def score:FMat = {
- val v = if (target.asInstanceOf[AnyRef] != null) GLM.llfun(output.asMat, target, ilinks, totflops) else row(0);
- FMat(mean(mean(v, 2)));
- }
+ override def score:FMat = {
+ val v = if (target.asInstanceOf[AnyRef] != null) GLM.llfun(output.asMat, target, ilinks, totflops) else row(0)
+ FMat(mean(mean(v, 2)))
+ }
override def toString = {
"glm@"+Integer.toHexString(hashCode % 0x10000).toString
@@ -56,19 +56,19 @@ class GLMLayer(override val net:Net, override val opts:GLMNodeOpts = new GLMNode
}
trait GLMNodeOpts extends NodeOpts {
- var links:IMat = null;
+ var links:IMat = null
}
class GLMNode extends Node with GLMNodeOpts {
- def copyTo(opts:GLMNode) = {
- super.copyTo(opts);
- opts.links = links;
- opts;
- }
+ def copyTo(opts:GLMNode) = {
+ super.copyTo(opts)
+ opts.links = links
+ opts
+ }
- override def clone:GLMNode = {copyTo(new GLMNode);}
+ override def clone:GLMNode = {copyTo(new GLMNode);}
- override def create(net:Net):GLMLayer = {GLMLayer(net, this);}
+ override def create(net:Net):GLMLayer = {GLMLayer(net, this);}
override def toString = {
"glm@"+Integer.toHexString(hashCode % 0x10000).toString
@@ -77,7 +77,7 @@ class GLMNode extends Node with GLMNodeOpts {
object GLMLayer {
- def apply(net:Net) = new GLMLayer(net, new GLMNode);
+ def apply(net:Net) = new GLMLayer(net, new GLMNode)
def apply(net:Net, opts:GLMNodeOpts) = new GLMLayer(net, opts);
diff --git a/src/main/scala/BIDMach/networks/layers/InputLayer.scala b/src/main/scala/BIDMach/networks/layers/InputLayer.scala
index d7295c45..e0b16e6f 100644
--- a/src/main/scala/BIDMach/networks/layers/InputLayer.scala
+++ b/src/main/scala/BIDMach/networks/layers/InputLayer.scala
@@ -10,8 +10,8 @@ import BIDMach.models._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
@@ -29,14 +29,14 @@ trait InputNodeOpts extends NodeOpts {}
class InputNode extends Node with InputNodeOpts {
def copyTo(opts:InputNode):InputNode = {
- super.copyTo(opts);
- opts;
+ super.copyTo(opts)
+ opts
}
override def clone:InputNode = {copyTo(new InputNode)}
-
+
override def create(net:Net):InputLayer = {
- InputLayer(net, this);
+ InputLayer(net, this)
}
override def toString = {
@@ -48,8 +48,8 @@ class InputNode extends Node with InputNodeOpts {
object InputLayer {
- def apply(net:Net) = new InputLayer(net, new InputNode);
+ def apply(net:Net) = new InputLayer(net, new InputNode)
- def apply(net:Net, opts:InputNodeOpts) = new InputLayer(net, opts);
+ def apply(net:Net, opts:InputNodeOpts) = new InputLayer(net, opts)
}
diff --git a/src/main/scala/BIDMach/networks/layers/LSTM.scala b/src/main/scala/BIDMach/networks/layers/LSTM.scala
index ebb6d1a2..f3baa579 100644
--- a/src/main/scala/BIDMach/networks/layers/LSTM.scala
+++ b/src/main/scala/BIDMach/networks/layers/LSTM.scala
@@ -9,17 +9,17 @@ import BIDMach.mixins._
import BIDMach.models._
import BIDMach.networks._
import BIDMach._
-import scala.util.hashing.MurmurHash3;
-import scala.collection.mutable.HashMap;
+import scala.util.hashing.MurmurHash3
+import scala.collection.mutable.HashMap
/**
- * LSTM unit
+ * LSTM unit
*/
class LSTMLayer(override val net:Net, override val opts:LSTMNode = new LSTMNode) extends CompoundLayer(net, opts) {
- override val _inputs = new Array[LayerTerm](3);
- override val _outputs = new Array[ND](2);
- override val _derivs = new Array[ND](2);
+ override val _inputs = new Array[LayerTerm](3)
+ override val _outputs = new Array[ND](2)
+ override val _derivs = new Array[ND](2)
override def toString = {
"LSTM@"+Integer.toHexString(hashCode % 0x10000).toString
@@ -27,27 +27,27 @@ class LSTMLayer(override val net:Net, override val opts:LSTMNode = new LSTMNode)
}
trait LSTMNodeOpts extends CompoundNodeOpts {
- var dim = 0;
- var kind = 1;
+ var dim = 0
+ var kind = 1
var hasBias = false;
- var scoreType = 0;
- var outdim = 0;
+ var scoreType = 0
+ var outdim = 0
def copyOpts(opts:LSTMNodeOpts):LSTMNodeOpts = {
- super.copyOpts(opts);
- opts.dim = dim;
- opts.kind = kind;
- opts.hasBias = hasBias;
- opts.scoreType = scoreType;
- opts.outdim = outdim;
- opts;
+ super.copyOpts(opts)
+ opts.dim = dim
+ opts.kind = kind
+ opts.hasBias = hasBias
+ opts.scoreType = scoreType
+ opts.outdim = outdim
+ opts
}
}
-class LSTMNode extends CompoundNode with LSTMNodeOpts {
+class LSTMNode extends CompoundNode with LSTMNodeOpts {
- override val inputs:Array[NodeTerm] = Array(null, null, null);
-// override val inputTerminals:Array[Int] = Array(0,0,0);
+ override val inputs:Array[NodeTerm] = Array(null, null, null)
+// override val inputTerminals:Array[Int] = Array(0,0,0)
def constructGraph = {
kind match {
@@ -57,128 +57,128 @@ class LSTMNode extends CompoundNode with LSTMNodeOpts {
case 3 => constructGraph3
case 4 => constructGraph4
case 5 => constructGraph5
- case _ => throw new RuntimeException("LSTMLayer type %d not recognized" format kind);
+ case _ => throw new RuntimeException("LSTMLayer type %d not recognized" format kind)
}
}
// Basic LSTM topology with 8 linear layers
-
- def constructGraph0 = {
- import BIDMach.networks.layers.Node._
- val odim = dim;
-
- val in_h = copy;
- val in_c = copy;
- val in_i = copy;
+
+ def constructGraph0 = {
+ import BIDMach.networks.layers.Node._
+ val odim = dim
+
+ val in_h = copy
+ val in_c = copy;
+ val in_i = copy
- val lin1 = linear(in_h)(prefix+"LSTM_h_in_gate", outdim=odim, hasBias=hasBias);
- val lin2 = linear(in_h)(prefix+"LSTM_h_out_gate", outdim=odim, hasBias=hasBias);
- val lin3 = linear(in_h)(prefix+"LSTM_h_forget_gate", outdim=odim, hasBias=hasBias);
- val lin4 = linear(in_h)(prefix+"LSTM_h_tanh_gate", outdim=odim, hasBias=hasBias);
-
- val lin5 = linear(in_i)(prefix+"LSTM_i_in_gate", outdim=odim, hasBias=hasBias);
- val lin6 = linear(in_i)(prefix+"LSTM_i_out_gate", outdim=odim, hasBias=hasBias);
- val lin7 = linear(in_i)(prefix+"LSTM_i_forget_gate", outdim=odim, hasBias=hasBias);
- val lin8 = linear(in_i)(prefix+"LSTM_i_tanh_gate", outdim=odim, hasBias=hasBias);
-
- val sum1 = lin1 + lin5;
- val sum2 = lin2 + lin6;
- val sum3 = lin3 + lin7;
- val sum4 = lin4 + lin8;
-
- val in_gate = σ(sum1);
- val out_gate = σ(sum2);
- val forget_gate = σ(sum3);
- val in_sat = tanh(sum4);
-
- val in_prod = in_gate ∘ in_sat;
- val f_prod = forget_gate ∘ in_c;
- val out_c = in_prod + f_prod;
-
- val out_tanh = tanh(out_c);
- val out_h = out_gate ∘ out_tanh;
+ val lin1 = linear(in_h)(prefix+"LSTM_h_in_gate", outdim=odim, hasBias=hasBias)
+ val lin2 = linear(in_h)(prefix+"LSTM_h_out_gate", outdim=odim, hasBias=hasBias);
+ val lin3 = linear(in_h)(prefix+"LSTM_h_forget_gate", outdim=odim, hasBias=hasBias)
+ val lin4 = linear(in_h)(prefix+"LSTM_h_tanh_gate", outdim=odim, hasBias=hasBias)
+
+ val lin5 = linear(in_i)(prefix+"LSTM_i_in_gate", outdim=odim, hasBias=hasBias)
+ val lin6 = linear(in_i)(prefix+"LSTM_i_out_gate", outdim=odim, hasBias=hasBias);
+ val lin7 = linear(in_i)(prefix+"LSTM_i_forget_gate", outdim=odim, hasBias=hasBias)
+ val lin8 = linear(in_i)(prefix+"LSTM_i_tanh_gate", outdim=odim, hasBias=hasBias)
+
+ val sum1 = lin1 + lin5
+ val sum2 = lin2 + lin6
+ val sum3 = lin3 + lin7
+ val sum4 = lin4 + lin8
+
+ val in_gate = σ(sum1)
+ val out_gate = σ(sum2)
+ val forget_gate = σ(sum3)
+ val in_sat = tanh(sum4)
+
+ val in_prod = in_gate ∘ in_sat
+ val f_prod = forget_gate ∘ in_c
+ val out_c = in_prod + f_prod
+
+ val out_tanh = tanh(out_c)
+ val out_h = out_gate ∘ out_tanh
- grid = (in_h on in_c on in_i on null) \ (lin1 \ lin5 \ sum1 \ in_gate \ in_prod \ out_tanh on
+ grid = (in_h on in_c on in_i on null) \ (lin1 \ lin5 \ sum1 \ in_gate \ in_prod \ out_tanh on
lin2 \ lin6 \ sum2 \ out_gate \ f_prod \ out_h on
lin3 \ lin7 \ sum3 \ forget_gate \ out_c \ null on
- lin4 \ lin8 \ sum4 \ in_sat \ null \ null);
-
- val lopts = grid.data;
- lopts.map((x:Node) => if (x != null) x.parent = this);
- outputNumbers = Array(lopts.indexOf(out_h), lopts.indexOf(out_c));
- }
-
- // LSTM with 4 linear layers, with h and i stacked as inputs
+ lin4 \ lin8 \ sum4 \ in_sat \ null \ null)
+
+ val lopts = grid.data
+ lopts.map((x:Node) => if (x != null) x.parent = this)
+ outputNumbers = Array(lopts.indexOf(out_h), lopts.indexOf(out_c))
+ }
+
+ // LSTM with 4 linear layers, with h and i stacked as inputs
def constructGraph1 = {
- import BIDMach.networks.layers.Node._
- val odim = dim;
-
- val in_h = copy;
- val in_c = copy;
- val in_i = copy;
- val h_over_i = in_h over in_i;
+ import BIDMach.networks.layers.Node._
+ val odim = dim
+
+ val in_h = copy
+ val in_c = copy;
+ val in_i = copy
+ val h_over_i = in_h over in_i
- val lin1 = linear(h_over_i)(prefix+"LSTM_in_gate", outdim=odim, hasBias=hasBias);
- val lin2 = linear(h_over_i)(prefix+"LSTM_out_gate", outdim=odim, hasBias=hasBias);
- val lin3 = linear(h_over_i)(prefix+"LSTM_forget_gate", outdim=odim, hasBias=hasBias);
- val lin4 = linear(h_over_i)(prefix+"LSTM_tanh_gate", outdim=odim, hasBias=hasBias);
-
- val in_gate = σ(lin1);
- val out_gate = σ(lin2);
- val forget_gate = σ(lin3);
- val in_sat = tanh(lin4);
-
- val in_prod = in_gate ∘ in_sat;
- val f_prod = forget_gate ∘ in_c;
- val out_c = in_prod + f_prod;
-
- val out_tanh = tanh(out_c);
- val out_h = out_gate ∘ out_tanh;
+ val lin1 = linear(h_over_i)(prefix+"LSTM_in_gate", outdim=odim, hasBias=hasBias)
+ val lin2 = linear(h_over_i)(prefix+"LSTM_out_gate", outdim=odim, hasBias=hasBias);
+ val lin3 = linear(h_over_i)(prefix+"LSTM_forget_gate", outdim=odim, hasBias=hasBias)
+ val lin4 = linear(h_over_i)(prefix+"LSTM_tanh_gate", outdim=odim, hasBias=hasBias)
+
+ val in_gate = σ(lin1)
+ val out_gate = σ(lin2)
+ val forget_gate = σ(lin3)
+ val in_sat = tanh(lin4)
+
+ val in_prod = in_gate ∘ in_sat
+ val f_prod = forget_gate ∘ in_c
+ val out_c = in_prod + f_prod
+
+ val out_tanh = tanh(out_c)
+ val out_h = out_gate ∘ out_tanh
- grid = in_h \ lin1 \ in_gate \ in_prod \ out_tanh on
+ grid = in_h \ lin1 \ in_gate \ in_prod \ out_tanh on
in_c \ lin2 \ out_gate \ f_prod \ out_h on
in_i \ lin3 \ forget_gate \ out_c \ null on
- h_over_i \ lin4 \ in_sat \ null \ null;
-
- val lopts = grid.data;
- lopts.map((x:Node) => if (x != null) x.parent = this);
- outputNumbers = Array(lopts.indexOf(out_h), lopts.indexOf(out_c));
-
+ h_over_i \ lin4 \ in_sat \ null \ null
+
+ val lopts = grid.data
+ lopts.map((x:Node) => if (x != null) x.parent = this)
+ outputNumbers = Array(lopts.indexOf(out_h), lopts.indexOf(out_c))
+
}
// LSTM with 1 linear layer, with h and i stacked as inputs, and all 4 output stacked
def constructGraph2 = {
import BIDMach.networks.layers.Node._
- val odim = dim;
- val in_h = copy;
- val in_c = copy;
+ val odim = dim
+ val in_h = copy
+ val in_c = copy
val in_i = copy;
- val h_over_i = in_h over in_i;
-
- val lin = linear(h_over_i)(prefix+"LSTM_all", outdim=4*odim, hasBias=hasBias);
- val sp = splitvert(lin, 4);
-
- val in_gate = σ(sp(0));
- val out_gate = σ(sp(1));
- val forget_gate = σ(sp(2));
- val in_sat = tanh(sp(3));
-
- val in_prod = in_gate ∘ in_sat;
- val f_prod = forget_gate ∘ in_c;
- val out_c = in_prod + f_prod;
-
- val out_tanh = tanh(out_c);
- val out_h = out_gate ∘ out_tanh;
+ val h_over_i = in_h over in_i
+
+ val lin = linear(h_over_i)(prefix+"LSTM_all", outdim=4*odim, hasBias=hasBias)
+ val sp = splitvert(lin, 4)
+
+ val in_gate = σ(sp(0))
+ val out_gate = σ(sp(1))
+ val forget_gate = σ(sp(2))
+ val in_sat = tanh(sp(3))
+
+ val in_prod = in_gate ∘ in_sat
+ val f_prod = forget_gate ∘ in_c
+ val out_c = in_prod + f_prod
+
+ val out_tanh = tanh(out_c)
+ val out_h = out_gate ∘ out_tanh;
grid = in_h \ lin \ in_gate \ in_prod \ out_tanh on
in_c \ sp \ out_gate \ f_prod \ out_h on
in_i \ null \ forget_gate \ out_c \ null on
- h_over_i \ null \ in_sat \ null \ null;
+ h_over_i \ null \ in_sat \ null \ null
val lopts = grid.data;
- lopts.map((x:Node) => if (x != null) x.parent = this);
+ lopts.map((x:Node) => if (x != null) x.parent = this)
outputNumbers = Array(lopts.indexOf(out_h), lopts.indexOf(out_c)); // Specifies the output layer numbers (next_h and next_c)
}
@@ -186,36 +186,36 @@ class LSTMNode extends CompoundNode with LSTMNodeOpts {
def constructGraph3 = {
import BIDMach.networks.layers.Node._
- val odim = dim;
- val in_h = copy;
- val in_c = copy;
+ val odim = dim
+ val in_h = copy
+ val in_c = copy
val in_i = copy;
- val h_over_i = in_h over in_i;
-
- val lin1 = linear(h_over_i)(prefix+"LSTM_in_out", outdim=2*odim, hasBias=hasBias);
- val sp1 = splitvert(lin1, 2);
- val lin2 = linear(h_over_i)(prefix+"LSTM_forget_tanh", outdim=2*odim, hasBias=hasBias);
- val sp2 = splitvert(lin2, 2);
-
- val in_gate = σ(sp1(0));
- val out_gate = σ(sp1(1));
- val forget_gate = σ(sp2(0));
- val in_sat = tanh(sp2(1));
-
- val in_prod = in_gate ∘ in_sat;
- val f_prod = forget_gate ∘ in_c;
- val out_c = in_prod + f_prod;
-
- val out_tanh = tanh(out_c);
- val out_h = out_gate ∘ out_tanh;
+ val h_over_i = in_h over in_i
+
+ val lin1 = linear(h_over_i)(prefix+"LSTM_in_out", outdim=2*odim, hasBias=hasBias)
+ val sp1 = splitvert(lin1, 2)
+ val lin2 = linear(h_over_i)(prefix+"LSTM_forget_tanh", outdim=2*odim, hasBias=hasBias)
+ val sp2 = splitvert(lin2, 2)
+
+ val in_gate = σ(sp1(0))
+ val out_gate = σ(sp1(1))
+ val forget_gate = σ(sp2(0))
+ val in_sat = tanh(sp2(1))
+
+ val in_prod = in_gate ∘ in_sat
+ val f_prod = forget_gate ∘ in_c
+ val out_c = in_prod + f_prod
+
+ val out_tanh = tanh(out_c)
+ val out_h = out_gate ∘ out_tanh;
grid = in_h \ lin1 \ in_gate \ in_prod \ out_tanh on
in_c \ sp1 \ out_gate \ f_prod \ out_h on
in_i \ lin2 \ forget_gate \ out_c \ null on
- h_over_i \ sp2 \ in_sat \ null \ null;
+ h_over_i \ sp2 \ in_sat \ null \ null
val lopts = grid.data;
- lopts.map((x:Node) => if (x != null) x.parent = this);
+ lopts.map((x:Node) => if (x != null) x.parent = this)
outputNumbers = Array(lopts.indexOf(out_h), lopts.indexOf(out_c)); // Specifies the output layer numbers (next_h and next_c)
}
@@ -223,144 +223,144 @@ class LSTMNode extends CompoundNode with LSTMNodeOpts {
def constructGraph4 = {
import BIDMach.networks.layers.Node._
- val odim = dim;
- val in_h = copy;
- val in_c = copy;
+ val odim = dim
+ val in_h = copy
+ val in_c = copy
val in_i = copy;
- val linh = linear(in_h)(prefix+"LSTM_h", outdim=4*odim, hasBias=hasBias);
- val sph = splitvert(linh, 4);
- val lini = linear(in_i)(prefix+"LSTM_i", outdim=4*odim, hasBias=hasBias);
- val spi = splitvert(lini, 4);
-
- val lin1 = sph(0) + spi(0);
- val lin2 = sph(1) + spi(1);
- val lin3 = sph(2) + spi(2);
- val lin4 = sph(3) + spi(3);
-
- val in_gate = σ(lin1);
- val out_gate = σ(lin2);
- val forget_gate = σ(lin3);
- val in_sat = tanh(lin4);
-
- val in_prod = in_gate ∘ in_sat;
- val f_prod = forget_gate ∘ in_c;
- val out_c = in_prod + f_prod;
-
- val out_tanh = tanh(out_c);
- val out_h = out_gate ∘ out_tanh;
+ val linh = linear(in_h)(prefix+"LSTM_h", outdim=4*odim, hasBias=hasBias)
+ val sph = splitvert(linh, 4)
+ val lini = linear(in_i)(prefix+"LSTM_i", outdim=4*odim, hasBias=hasBias)
+ val spi = splitvert(lini, 4)
+
+ val lin1 = sph(0) + spi(0)
+ val lin2 = sph(1) + spi(1)
+ val lin3 = sph(2) + spi(2)
+ val lin4 = sph(3) + spi(3)
+
+ val in_gate = σ(lin1)
+ val out_gate = σ(lin2)
+ val forget_gate = σ(lin3)
+ val in_sat = tanh(lin4)
+
+ val in_prod = in_gate ∘ in_sat
+ val f_prod = forget_gate ∘ in_c
+ val out_c = in_prod + f_prod
+
+ val out_tanh = tanh(out_c)
+ val out_h = out_gate ∘ out_tanh;
grid = (in_h on in_c on in_i on null) \ (linh \ lin1 \ in_gate \ in_prod \ out_tanh on
sph \ lin2 \ out_gate \ f_prod \ out_h on
lini \ lin3 \ forget_gate \ out_c \ null on
- spi \ lin4 \ in_sat \ null \ null);
+ spi \ lin4 \ in_sat \ null \ null)
val lopts = grid.data;
- lopts.map((x:Node) => if (x != null) x.parent = this);
+ lopts.map((x:Node) => if (x != null) x.parent = this)
outputNumbers = Array(lopts.indexOf(out_h), lopts.indexOf(out_c)); // Specifies the output layer numbers (next_h and next_c)
}
// LSTM using a fused inner kernel
def constructGraph5 = {
- import BIDMach.networks.layers.Node._
- val odim = dim;
-
- val in_h = copy;
- val in_c = copy;
- val in_i = copy;
- val h_over_i = in_h over in_i;
+ import BIDMach.networks.layers.Node._
+ val odim = dim
+
+ val in_h = copy
+ val in_c = copy;
+ val in_i = copy
+ val h_over_i = in_h over in_i
- val lin1 = linear(h_over_i)(prefix+"LSTM_in_gate", outdim=odim, hasBias=hasBias);
- val lin2 = linear(h_over_i)(prefix+"LSTM_out_gate", outdim=odim, hasBias=hasBias);
- val lin3 = linear(h_over_i)(prefix+"LSTM_forget_gate", outdim=odim, hasBias=hasBias);
- val lin4 = linear(h_over_i)(prefix+"LSTM_tanh_gate", outdim=odim, hasBias=hasBias);
-
- val lstm_gate = lstm_fused(in_c, lin1, lin2, lin3, lin4);
- val out_h = copy(new NodeTerm(lstm_gate, 1));
+ val lin1 = linear(h_over_i)(prefix+"LSTM_in_gate", outdim=odim, hasBias=hasBias)
+ val lin2 = linear(h_over_i)(prefix+"LSTM_out_gate", outdim=odim, hasBias=hasBias);
+ val lin3 = linear(h_over_i)(prefix+"LSTM_forget_gate", outdim=odim, hasBias=hasBias)
+ val lin4 = linear(h_over_i)(prefix+"LSTM_tanh_gate", outdim=odim, hasBias=hasBias)
+
+ val lstm_gate = lstm_fused(in_c, lin1, lin2, lin3, lin4);
+ val out_h = copy(new NodeTerm(lstm_gate, 1))
- grid = in_h \ lin1 \ lstm_gate on
+ grid = in_h \ lin1 \ lstm_gate on
in_c \ lin2 \ out_h on
in_i \ lin3 \ null on
- h_over_i \ lin4 \ null ;
-
- val lopts = grid.data;
- lopts.map((x:Node) => if (x != null) x.parent = this);
- outputNumbers = Array(lopts.indexOf(out_h), lopts.indexOf(lstm_gate));
-
+ h_over_i \ lin4 \ null
+
+ val lopts = grid.data
+ lopts.map((x:Node) => if (x != null) x.parent = this)
+ outputNumbers = Array(lopts.indexOf(out_h), lopts.indexOf(lstm_gate))
+
+ }
+
+ override def clone:LSTMNode = {
+ copyTo(new LSTMNode).asInstanceOf[LSTMNode]
}
-
- override def clone:LSTMNode = {
- copyTo(new LSTMNode).asInstanceOf[LSTMNode];
- }
- override def create(net:Net):LSTMLayer = {
- LSTMLayer(net, this);
- }
+ override def create(net:Net):LSTMLayer = {
+ LSTMLayer(net, this)
+ }
override def toString = {
"LSTM@"+Integer.toHexString(hashCode % 0x10000).toString
}
- def h = apply(0);
+ def h = apply(0)
- def c = apply(1);
- }
+ def c = apply(1)
+ }
object LSTMNode {
- final val gridTypeNoOutput = 0;
- final val gridTypeSoftmaxOutput = 1;
- final val gridTypeNegsampOutput = 2;
+ final val gridTypeNoOutput = 0
+ final val gridTypeSoftmaxOutput = 1
+ final val gridTypeNegsampOutput = 2
def apply() = {
- val n = new LSTMNode;
- n.constructGraph;
+ val n = new LSTMNode
+ n.constructGraph
n
}
def apply(opts:LSTMNodeOpts) = {
- val n = new LSTMNode;
- opts.copyOpts(n);
- n.constructGraph;
+ val n = new LSTMNode
+ opts.copyOpts(n)
+ n.constructGraph
n
}
- class GridOpts extends LSTMNodeOpts {var netType = 0; var bylevel = true};
+ class GridOpts extends LSTMNodeOpts {var netType = 0; var bylevel = true}
def grid(nrows:Int, ncols:Int, opts:GridOpts):NodeMat = {
import BIDMach.networks.layers.Node._
- val nlin = 2;
- val odim = opts.outdim;
- val idim = opts.dim;
+ val nlin = 2
+ val odim = opts.outdim
+ val idim = opts.dim
val nsoft = opts.netType match {
- case `gridTypeNoOutput` => 0;
- case `gridTypeNegsampOutput` => 1;
- case `gridTypeSoftmaxOutput` => 2;
+ case `gridTypeNoOutput` => 0
+ case `gridTypeNegsampOutput` => 1
+ case `gridTypeSoftmaxOutput` => 2
}
- val gr = NodeMat(nrows + nlin + nsoft, ncols);
+ val gr = NodeMat(nrows + nlin + nsoft, ncols)
for (k <- 0 until ncols) {
- gr(0, k) = input
+ gr(0, k) = input
}
- val modelName = opts.modelName;
+ val modelName = opts.modelName
for (k <- 0 until ncols) {
- gr(1, k) = linear(gr(0, k))((modelName format 0) +"_bottom", outdim=idim, hasBias = opts.hasBias)
+ gr(1, k) = linear(gr(0, k))((modelName format 0) +"_bottom", outdim=idim, hasBias = opts.hasBias)
}
for (k <- 0 until ncols) {
for (j <- nlin until nrows + nlin) {
val modelName = if (opts.bylevel) (opts.modelName format j-nlin) else (opts.modelName format 0)
- val below = gr(j-1, k);
+ val below = gr(j-1, k);
if (k > 0) {
- val left = gr(j, k-1).asInstanceOf[LSTMNode]
- gr(j, k) = lstm(h=left.h, c=left.c, i=below, m=modelName)(opts);
+ val left = gr(j, k-1).asInstanceOf[LSTMNode]
+ gr(j, k) = lstm(h=left.h, c=left.c, i=below, m=modelName)(opts)
} else {
- gr(j, k) = lstm(h=null, c=null, i=below, m=modelName)(opts);
+ gr(j, k) = lstm(h=null, c=null, i=below, m=modelName)(opts)
}
}
}
@@ -369,8 +369,8 @@ object LSTMNode {
case `gridTypeNoOutput` => {}
case `gridTypeSoftmaxOutput` => {
for (k <- 0 until ncols) {
- gr(nrows + nlin, k) = linear(gr(nrows + nlin - 1, k))(name=opts.modelName+"_top", outdim=odim, hasBias = opts.hasBias)
- gr(nrows + nlin + 1, k) = softmaxout(gr(nrows + nlin, k))(opts.scoreType);
+ gr(nrows + nlin, k) = linear(gr(nrows + nlin - 1, k))(name=opts.modelName+"_top", outdim=odim, hasBias = opts.hasBias)
+ gr(nrows + nlin + 1, k) = softmaxout(gr(nrows + nlin, k))(opts.scoreType)
}
}
case `gridTypeNegsampOutput` => {
@@ -385,16 +385,16 @@ object LSTMNode {
object LSTMLayer {
- def apply(net:Net) = new LSTMLayer(net, new LSTMNode);
+ def apply(net:Net) = new LSTMLayer(net, new LSTMNode)
def apply(net:Net, opts:LSTMNode) = {
- val x = new LSTMLayer(net, opts);
- x.construct;
- x;
+ val x = new LSTMLayer(net, opts)
+ x.construct
+ x
}
def grid(net:Net, nrows:Int, ncols:Int, opts:LSTMNode.GridOpts):LayerMat = {
- val nodeGrid = LSTMNode.grid(nrows, ncols, opts);
- LayerMat(nodeGrid, net);
+ val nodeGrid = LSTMNode.grid(nrows, ncols, opts)
+ LayerMat(nodeGrid, net)
}
}
diff --git a/src/main/scala/BIDMach/networks/layers/LSTMfusedLayer.scala b/src/main/scala/BIDMach/networks/layers/LSTMfusedLayer.scala
index 6bda1315..f2ce73f5 100755
--- a/src/main/scala/BIDMach/networks/layers/LSTMfusedLayer.scala
+++ b/src/main/scala/BIDMach/networks/layers/LSTMfusedLayer.scala
@@ -9,76 +9,76 @@ import BIDMach.mixins._
import BIDMach.models._
import BIDMach.networks._
import BIDMach._
-import scala.util.hashing.MurmurHash3;
-import scala.collection.mutable.HashMap;
-import edu.berkeley.bid.CUMACH;
+import scala.util.hashing.MurmurHash3
+import scala.collection.mutable.HashMap
+import edu.berkeley.bid.CUMACH
/**
* LSTM unit
*/
class LSTMfusedLayer(override val net:Net, override val opts:LSTMfusedNodeOpts = new LSTMfusedNode) extends Layer(net, opts) {
- override val _inputs = new Array[LayerTerm](5);
- override val _outputs = new Array[ND](2);
- override val _derivs = new Array[ND](2);
+ override val _inputs = new Array[LayerTerm](5)
+ override val _outputs = new Array[ND](2)
+ override val _derivs = new Array[ND](2)
override def toString = {
"LSTMcoa@"+Integer.toHexString(hashCode % 0x10000).toString
}
override def forward = {
- createOutput(inputData.dims);
- (inputData(0), inputData(1), inputData(2), inputData(3), inputData(4), outputs(0), outputs(1)) match {
- case (i0:GMat, i1:GMat, i2:GMat, i3:GMat, i4:GMat, out0:GMat, out1:GMat) => {
- CUMACH.LSTMfwd(i0.data, i1.data, i2.data, i3.data, i4.data, out0.data, out1.data, i0.length);
- }
- case (i0:FMat, i1:FMat, i2:FMat, i3:FMat, i4:FMat, out0:FMat, out1:FMat) => {
- LSTMfusedLayer.LSTMforward(i0, i1, i2, i3, i4, out0, out1);
- }
- }
- clearDerivs;
- }
+ createOutput(inputData.dims)
+ (inputData(0), inputData(1), inputData(2), inputData(3), inputData(4), outputs(0), outputs(1)) match {
+ case (i0:GMat, i1:GMat, i2:GMat, i3:GMat, i4:GMat, out0:GMat, out1:GMat) => {
+ CUMACH.LSTMfwd(i0.data, i1.data, i2.data, i3.data, i4.data, out0.data, out1.data, i0.length)
+ }
+ case (i0:FMat, i1:FMat, i2:FMat, i3:FMat, i4:FMat, out0:FMat, out1:FMat) => {
+ LSTMfusedLayer.LSTMforward(i0, i1, i2, i3, i4, out0, out1)
+ }
+ }
+ clearDerivs
+ }
override def backward = {
- (inputData(0), inputData(1), inputData(2), inputData(3), inputData(4), deriv(0), deriv(1), inputDeriv(0), inputDeriv(1), inputDeriv(2), inputDeriv(3), inputDeriv(4)) match {
- case (inC:GMat, lin1:GMat, lin2:GMat, lin3:GMat, lin4:GMat, doutC:GMat, doutH:GMat, dinC:GMat, dlin1:GMat, dlin2:GMat, dlin3:GMat, dlin4:GMat) => {
- CUMACH.LSTMbwd(inC.data, lin1.data, lin2.data, lin3.data, lin4.data, doutC.data, doutH.data, dinC.data, dlin1.data, dlin2.data, dlin3.data, dlin4.data, inC.length);
- }
- case (inC:FMat, lin1:FMat, lin2:FMat, lin3:FMat, lin4:FMat, doutC:FMat, doutH:FMat, dinC:FMat, dlin1:FMat, dlin2:FMat, dlin3:FMat, dlin4:FMat) => {
- LSTMfusedLayer.LSTMbackward(inC, lin1, lin2, lin3, lin4, doutC, doutH, dinC, dlin1, dlin2, dlin3, dlin4);
- }
- }
- }
+ (inputData(0), inputData(1), inputData(2), inputData(3), inputData(4), deriv(0), deriv(1), inputDeriv(0), inputDeriv(1), inputDeriv(2), inputDeriv(3), inputDeriv(4)) match {
+ case (inC:GMat, lin1:GMat, lin2:GMat, lin3:GMat, lin4:GMat, doutC:GMat, doutH:GMat, dinC:GMat, dlin1:GMat, dlin2:GMat, dlin3:GMat, dlin4:GMat) => {
+ CUMACH.LSTMbwd(inC.data, lin1.data, lin2.data, lin3.data, lin4.data, doutC.data, doutH.data, dinC.data, dlin1.data, dlin2.data, dlin3.data, dlin4.data, inC.length);
+ }
+ case (inC:FMat, lin1:FMat, lin2:FMat, lin3:FMat, lin4:FMat, doutC:FMat, doutH:FMat, dinC:FMat, dlin1:FMat, dlin2:FMat, dlin3:FMat, dlin4:FMat) => {
+ LSTMfusedLayer.LSTMbackward(inC, lin1, lin2, lin3, lin4, doutC, doutH, dinC, dlin1, dlin2, dlin3, dlin4);
+ }
+ }
+ }
}
trait LSTMfusedNodeOpts extends NodeOpts {
def copyOpts(opts:LSTMfusedNodeOpts):LSTMfusedNodeOpts = {
- super.copyOpts(opts);
- opts;
+ super.copyOpts(opts)
+ opts
}
}
-class LSTMfusedNode extends Node with LSTMfusedNodeOpts {
+class LSTMfusedNode extends Node with LSTMfusedNodeOpts {
- override val inputs:Array[NodeTerm] = Array(null, null, null, null, null);
+ override val inputs:Array[NodeTerm] = Array(null, null, null, null, null)
}
object LSTMfusedLayer {
- def apply(net:Net) = new LSTMfusedLayer(net, new LSTMfusedNode);
+ def apply(net:Net) = new LSTMfusedLayer(net, new LSTMfusedNode)
- def apply(net:Net, opts:LSTMfusedNodeOpts) = new LSTMfusedLayer(net, opts);
+ def apply(net:Net, opts:LSTMfusedNodeOpts) = new LSTMfusedLayer(net, opts)
@inline def sigmoid(a:Float):Float = {
- if (a > 20.0f) {
- return 1.0f;
- } else if (a < -80.0f) {
- return 0.0f;
- } else {
- return 1.0f/(1.0f + math.exp(-a).toFloat);
- }
+ if (a > 20.0f) {
+ return 1.0f
+ } else if (a < -80.0f) {
+ return 0.0f
+ } else {
+ return 1.0f/(1.0f + math.exp(-a).toFloat)
+ }
}
@inline def tanh(a:Float):Float = {
@@ -86,125 +86,125 @@ object LSTMfusedLayer {
}
@inline def deriv_sigmoid(a:Float, d:Float):Float = {
- d * (a - a * a);
+ d * (a - a * a)
}
@inline def deriv_tanh(a:Float, d:Float):Float = {
- d * (1.0f - a * a);
+ d * (1.0f - a * a)
}
def LSTMforward(incMat:FMat, lin1Mat:FMat, lin2Mat:FMat, lin3Mat:FMat, lin4Mat:FMat, outCMat:FMat, outHMat:FMat) {
- val n = incMat.length;
- val incArr = incMat.data;
- val lin1Arr = lin1Mat.data;
- val lin2Arr = lin2Mat.data;
- val lin3Arr = lin3Mat.data;
- val lin4Arr = lin4Mat.data;
- val outCArr = outCMat.data;
- val outHArr = outHMat.data;
- var i = 0;
+ val n = incMat.length
+ val incArr = incMat.data
+ val lin1Arr = lin1Mat.data
+ val lin2Arr = lin2Mat.data
+ val lin3Arr = lin3Mat.data
+ val lin4Arr = lin4Mat.data
+ val outCArr = outCMat.data
+ val outHArr = outHMat.data
+ var i = 0
while (i < n) {
- val in_c = incArr(i);
- val lin1 = lin1Arr(i);
- val lin2 = lin2Arr(i);
- val lin3 = lin3Arr(i);
- val lin4 = lin4Arr(i);
+ val in_c = incArr(i)
+ val lin1 = lin1Arr(i)
+ val lin2 = lin2Arr(i)
+ val lin3 = lin3Arr(i)
+ val lin4 = lin4Arr(i)
- val in_gate = sigmoid(lin1);
- val out_gate = sigmoid(lin2);
- val forget_gate = sigmoid(lin3);
- val in_sat = tanh(lin4);
+ val in_gate = sigmoid(lin1)
+ val out_gate = sigmoid(lin2)
+ val forget_gate = sigmoid(lin3)
+ val in_sat = tanh(lin4)
- val in_prod = in_gate * in_sat;
- val f_prod = forget_gate * in_c;
- val out_c = in_prod + f_prod;
+ val in_prod = in_gate * in_sat
+ val f_prod = forget_gate * in_c
+ val out_c = in_prod + f_prod
- val out_tanh = tanh(out_c);
- val out_h = out_gate * out_tanh;
+ val out_tanh = tanh(out_c)
+ val out_h = out_gate * out_tanh
- outCArr(i) = out_c;
+ outCArr(i) = out_c
outHArr(i)= out_h;
- i += 1;
+ i += 1
}
}
def LSTMbackward(incMat:FMat, lin1Mat:FMat, lin2Mat:FMat, lin3Mat:FMat, lin4Mat:FMat, doutCMat:FMat, doutHMat:FMat,
dincMat:FMat, dlin1Mat:FMat, dlin2Mat:FMat, dlin3Mat:FMat, dlin4Mat:FMat) {
- val n = incMat.length;
- val incArr = incMat.data;
- val lin1Arr = lin1Mat.data;
- val lin2Arr = lin2Mat.data;
- val lin3Arr = lin3Mat.data;
- val lin4Arr = lin4Mat.data;
- val doutCArr = doutCMat.data;
- val doutHArr = doutHMat.data;
- val dincArr = dincMat.data;
- val dlin1Arr = dlin1Mat.data;
- val dlin2Arr = dlin2Mat.data;
- val dlin3Arr = dlin3Mat.data;
- val dlin4Arr = dlin4Mat.data;
- var i = 0;
+ val n = incMat.length
+ val incArr = incMat.data
+ val lin1Arr = lin1Mat.data
+ val lin2Arr = lin2Mat.data
+ val lin3Arr = lin3Mat.data
+ val lin4Arr = lin4Mat.data
+ val doutCArr = doutCMat.data
+ val doutHArr = doutHMat.data
+ val dincArr = dincMat.data
+ val dlin1Arr = dlin1Mat.data
+ val dlin2Arr = dlin2Mat.data
+ val dlin3Arr = dlin3Mat.data
+ val dlin4Arr = dlin4Mat.data
+ var i = 0
while (i < n) {
- val in_c = incArr(i);
- val lin1 = lin1Arr(i);
- val lin2 = lin2Arr(i);
- val lin3 = lin3Arr(i);
- val lin4 = lin4Arr(i);
+ val in_c = incArr(i)
+ val lin1 = lin1Arr(i)
+ val lin2 = lin2Arr(i)
+ val lin3 = lin3Arr(i)
+ val lin4 = lin4Arr(i)
- val in_gate = sigmoid(lin1);
- val out_gate = sigmoid(lin2);
- val forget_gate = sigmoid(lin3);
- val in_sat = tanh(lin4);
+ val in_gate = sigmoid(lin1)
+ val out_gate = sigmoid(lin2)
+ val forget_gate = sigmoid(lin3)
+ val in_sat = tanh(lin4)
- val in_prod = in_gate * in_sat;
- val f_prod = forget_gate * in_c;
- val out_c = in_prod + f_prod;
+ val in_prod = in_gate * in_sat
+ val f_prod = forget_gate * in_c
+ val out_c = in_prod + f_prod
- val out_tanh = tanh(out_c);
+ val out_tanh = tanh(out_c)
- val dout_h = doutHArr(i);
- var dout_c = doutCArr(i);
+ val dout_h = doutHArr(i)
+ var dout_c = doutCArr(i)
- // out_h = out_gate * out_tanh;
- val dout_gate = dout_h * out_tanh;
- val dout_tanh = dout_h * out_gate;
+ // out_h = out_gate * out_tanh
+ val dout_gate = dout_h * out_tanh
+ val dout_tanh = dout_h * out_gate
- // out_tanh = tanh(out_c);
- dout_c += deriv_tanh(out_tanh, dout_tanh);
+ // out_tanh = tanh(out_c)
+ dout_c += deriv_tanh(out_tanh, dout_tanh)
- // out_c = in_prod + f_prod;
- val din_prod = dout_c;
- val df_prod = dout_c;
+ // out_c = in_prod + f_prod
+ val din_prod = dout_c
+ val df_prod = dout_c
- // f_prod = forget_gate * in_c;
- val dforget_gate = df_prod * in_c;
- val din_c = df_prod * forget_gate;
+ // f_prod = forget_gate * in_c
+ val dforget_gate = df_prod * in_c
+ val din_c = df_prod * forget_gate
- // in_prod = in_gate * in_sat;
- val din_gate = din_prod * in_sat;
- val din_sat = din_prod * in_gate;
+ // in_prod = in_gate * in_sat
+ val din_gate = din_prod * in_sat
+ val din_sat = din_prod * in_gate
- // in_gate = forward_sigmoid(lin1);
- // out_gate = forward_sigmoid(lin2);
- // forget_gate = forward_sigmoid(lin3);
- // in_sat = tanh(lin4);
+ // in_gate = forward_sigmoid(lin1)
+ // out_gate = forward_sigmoid(lin2)
+ // forget_gate = forward_sigmoid(lin3)
+ // in_sat = tanh(lin4)
- val dlin4 = deriv_tanh(in_sat, din_sat);
- val dlin3 = deriv_sigmoid(forget_gate, dforget_gate);
- val dlin2 = deriv_sigmoid(out_gate, dout_gate);
- val dlin1 = deriv_sigmoid(in_gate, din_gate);
+ val dlin4 = deriv_tanh(in_sat, din_sat)
+ val dlin3 = deriv_sigmoid(forget_gate, dforget_gate)
+ val dlin2 = deriv_sigmoid(out_gate, dout_gate)
+ val dlin1 = deriv_sigmoid(in_gate, din_gate)
- dlin4Arr(i) += dlin4;
- dlin3Arr(i) += dlin3;
- dlin2Arr(i) += dlin2;
- dlin1Arr(i) += dlin1;
- dincArr(i) += din_c;
+ dlin4Arr(i) += dlin4
+ dlin3Arr(i) += dlin3
+ dlin2Arr(i) += dlin2
+ dlin1Arr(i) += dlin1
+ dincArr(i) += din_c
- i += 1;
+ i += 1
}
}
}
-
\ No newline at end of file
+
\ No newline at end of file
diff --git a/src/main/scala/BIDMach/networks/layers/Layer.scala b/src/main/scala/BIDMach/networks/layers/Layer.scala
index 484e6dfc..6ddba84b 100644
--- a/src/main/scala/BIDMach/networks/layers/Layer.scala
+++ b/src/main/scala/BIDMach/networks/layers/Layer.scala
@@ -10,8 +10,8 @@ import BIDMach.models._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
/**
@@ -51,7 +51,7 @@ import BIDMach.networks._
* Each NodeSet instance has up to two inputs which are other NodeSet instances (or null). This graph structure can be cyclic.
* When the model is created, the Layer structure mimics the NodeSet structure.
*
- * You can also create the Layer graph directly using the "setinput()" method in each layer.
+ * You can also create the Layer graph directly using the "setinput()" method in each layer.
*/
// Notes:
@@ -76,24 +76,24 @@ import BIDMach.networks._
@SerialVersionUID(100L)
class Layer(val net:Net, val opts:NodeOpts = new Node) extends LayerTerm(null, 0) {
// Internal data arrays
- val _inputs = new Array[LayerTerm](1);
- val _outputs = new Array[ND](1);
- val _derivs = new Array[ND](1);
+ val _inputs = new Array[LayerTerm](1)
+ val _outputs = new Array[ND](1)
+ val _derivs = new Array[ND](1)
def inputlength = _inputs.length
var forwardtime = 0.0
var backwardtime = 0.0
override def layer = this
- def inputs = _inputs;
+ def inputs = _inputs
private var _GUID = Mat.myrand.nextLong
def setGUID(v:Long):Unit = {_GUID = v}
def GUID:Long = _GUID
// Setters and getters for general elements of those arrays
- def outputs(i:Int) = _outputs(i);
+ def outputs(i:Int) = _outputs(i)
def derivs(i:Int) = _derivs(i);
- def input(i:Int) = _inputs(i);
- def apply(i:Int) = new LayerTerm(this, i);
+ def input(i:Int) = _inputs(i)
+ def apply(i:Int) = new LayerTerm(this, i)
def setOutput(i:Int, v:ND):Layer = {_outputs(i) = v; this}
def setDeriv(i:Int, v:ND):Layer = {_derivs(i) = v; this}
@@ -102,13 +102,13 @@ class Layer(val net:Net, val opts:NodeOpts = new Node) extends LayerTerm(null, 0
def setInputs(v0:LayerTerm, v1:LayerTerm, v2:LayerTerm) = {setInput(0, v0); setInput(1, v1); setInput(2, v2); this}
// Setters and getters for the first input or output
- def input = _inputs(0);
- def output = _outputs(0);
- def deriv = _derivs(0);
+ def input = _inputs(0)
+ def output = _outputs(0)
+ def deriv = _derivs(0)
def input_=(v:LayerTerm): Unit = {_inputs(0) = v}
- def output_= (v:ND):Unit = {_outputs(0) = v};
- def deriv_=(v:ND):Unit = {_derivs(0) = v};
+ def output_= (v:ND):Unit = {_outputs(0) = v}
+ def deriv_=(v:ND):Unit = {_derivs(0) = v}
// Input getters (and one setter) which get the appropriate output from each input layer
def inputData = {val i = _inputs(0); i.layer._outputs(i.term);}
@@ -117,36 +117,36 @@ class Layer(val net:Net, val opts:NodeOpts = new Node) extends LayerTerm(null, 0
def inputDatas(i:Int) = {val lt = _inputs(i); lt.layer._outputs(lt.term);}
def inputDerivs(i:Int) = {val lt = _inputs(i); lt.layer._derivs(lt.term);}
- var target:Mat = null;
- def forward = {};
- def backward:Unit = {};
- def backward(ipass:Int, pos:Long):Unit = backward;
- def score:FMat = zeros(1,1);
- var parent:Layer = null;
- lazy val modelmats = net.modelmats;
- lazy val updatemats = net.updatemats;
- lazy val useGPU = net.useGPU;
- lazy val nopts = net.opts;
+ var target:Mat = null
+ def forward = {}
+ def backward:Unit = {}
+ def backward(ipass:Int, pos:Long):Unit = backward
+ def score:FMat = zeros(1,1)
+ var parent:Layer = null
+ lazy val modelmats = net.modelmats
+ lazy val updatemats = net.updatemats
+ lazy val useGPU = net.useGPU
+ lazy val nopts = net.opts
def convertMat(mat:Mat) = {net.convertMat(mat);}
def convertMat(mat:ND) = {net.convertMat(mat);}
def createOutput = {
- if (output.asInstanceOf[AnyRef] == null) output = inputData.zeros(inputData.dims);
+ if (output.asInstanceOf[AnyRef] == null) output = inputData.zeros(inputData.dims)
}
def createOutput(dims:IMat) = {
- if (output.asInstanceOf[AnyRef] == null) output = inputData.zeros(dims);
+ if (output.asInstanceOf[AnyRef] == null) output = inputData.zeros(dims)
}
def clearDeriv = {
- if (deriv.asInstanceOf[AnyRef] == null) deriv = output.zeros(output.dims);
- deriv.clear;
+ if (deriv.asInstanceOf[AnyRef] == null) deriv = output.zeros(output.dims)
+ deriv.clear
}
def clearDerivs = {
if (deriv.asInstanceOf[AnyRef] == null) {
for (i <- 0 until _outputs.length) {
- _derivs(i) = output.zeros(_outputs(i).dims);
+ _derivs(i) = output.zeros(_outputs(i).dims)
}
}
for (i <- 0 until _derivs.length) {
@@ -169,41 +169,41 @@ object Layer {
def dropout(a:LayerTerm, dfrac:Float) = new DropoutLayer(null, new DropoutNode{frac = dfrac}){inputs(0) = a}
- def exp(a:LayerTerm) = new ExpLayer(null){inputs(0) = a;};
+ def exp(a:LayerTerm) = new ExpLayer(null){inputs(0) = a;}
- def GLM(a:LayerTerm)(implicit opts:GLMNodeOpts) = new GLMLayer(null, opts){inputs(0) = a};
+ def GLM(a:LayerTerm)(implicit opts:GLMNodeOpts) = new GLMLayer(null, opts){inputs(0) = a}
- def input(a:LayerTerm) = new InputLayer(null){inputs(0) = a;};
+ def input(a:LayerTerm) = new InputLayer(null){inputs(0) = a;}
- def input = new InputLayer(null);
+ def input = new InputLayer(null)
def linear(a:LayerTerm)(net:Net, name:String="", outdim:Int=0, hasBias:Boolean=true, aopts:ADAGrad.Opts=null,
tmatShape:(Int,Int)=>(Array[Int], Array[Int], Array[Int], Array[Int])) = {
- val odim = outdim;
- val hBias = hasBias;
- val aaopts = aopts;
- val mname = name;
- val tms = tmatShape;
- new LinLayer(net, new LinNode{modelName = mname; outdim=odim; hasBias=hBias; aopts=aaopts; tmatShape = tms}){inputs(0)=a;};
+ val odim = outdim
+ val hBias = hasBias
+ val aaopts = aopts
+ val mname = name
+ val tms = tmatShape
+ new LinLayer(net, new LinNode{modelName = mname; outdim=odim; hasBias=hBias; aopts=aaopts; tmatShape = tms}){inputs(0)=a;}
}
def linear_(a:LayerTerm)(implicit net:Net, opts:LinNodeOpts) = {
new LinLayer(net, opts){inputs(0) = a;}
}
- def ln(a:LayerTerm) = new LnLayer(null){inputs(0) = a};
+ def ln(a:LayerTerm) = new LnLayer(null){inputs(0) = a}
def negsamp(a:LayerTerm)(net:Net, name:String="", outdim:Int=0, hasBias:Boolean=true, aopts:ADAGrad.Opts=null, nsamps:Int=100, expt:Float=0.5f, scoreType:Int=0, doCorrect:Boolean=true) = {
- val odim = outdim;
- val hBias = hasBias;
- val aaopts = aopts;
- val nnsamps = nsamps;
- val eexpt = expt;
- val dcr = doCorrect;
- val sct = scoreType;
- val mname = name;
- new NegsampOutputLayer(net, new NegsampOutputNode{modelName=mname; outdim=odim; hasBias=hBias; aopts=aaopts; nsamps=nnsamps; expt=eexpt; scoreType=sct; docorrect=dcr}){inputs(0)=a;};
+ val odim = outdim
+ val hBias = hasBias
+ val aaopts = aopts
+ val nnsamps = nsamps
+ val eexpt = expt
+ val dcr = doCorrect
+ val sct = scoreType
+ val mname = name
+ new NegsampOutputLayer(net, new NegsampOutputNode{modelName=mname; outdim=odim; hasBias=hBias; aopts=aaopts; nsamps=nnsamps; expt=eexpt; scoreType=sct; docorrect=dcr}){inputs(0)=a;}
}
def negsamp_(a:LayerTerm)(implicit net:Net, opts:NegsampOutputNodeOpts) = {
@@ -212,99 +212,99 @@ object Layer {
def norm(a:LayerTerm)(implicit opts:NormNodeOpts) = new NormLayer(null){inputs(0) = a;}
- def oneHot(a:LayerTerm) = new OnehotLayer(null){inputs(0) = a};
+ def oneHot(a:LayerTerm) = new OnehotLayer(null){inputs(0) = a}
- def rect(a:LayerTerm) = new RectLayer(null){inputs(0) = a};
+ def rect(a:LayerTerm) = new RectLayer(null){inputs(0) = a}
- def sigmoid(a:LayerTerm) = new SigmoidLayer(null){inputs(0) = a};
+ def sigmoid(a:LayerTerm) = new SigmoidLayer(null){inputs(0) = a}
- def σ(a:LayerTerm) = new SigmoidLayer(null){inputs(0) = a};
+ def σ(a:LayerTerm) = new SigmoidLayer(null){inputs(0) = a}
- def softmax(a:LayerTerm) = new SoftmaxLayer(null){inputs(0) = a};
+ def softmax(a:LayerTerm) = new SoftmaxLayer(null){inputs(0) = a}
def softmaxout(a:LayerTerm)(scoreTyp:Int=0, doVar:Boolean=false) = new SoftmaxOutputLayer(null, new SoftmaxOutputNode{scoreType=scoreTyp;doVariance=doVar}){inputs(0) = a}
- def softplus(a:LayerTerm) = new SoftplusLayer(null){inputs(0) = a};
+ def softplus(a:LayerTerm) = new SoftplusLayer(null){inputs(0) = a}
- def splithoriz(a:LayerTerm, np:Int) = new SplitHorizLayer(null, new SplitHorizNode{nparts = np}){inputs(0) = a};
+ def splithoriz(a:LayerTerm, np:Int) = new SplitHorizLayer(null, new SplitHorizNode{nparts = np}){inputs(0) = a}
- def splitvert(a:LayerTerm, np:Int) = new SplitVertLayer(null, new SplitVertNode{nparts = np}){inputs(0) = a};
+ def splitvert(a:LayerTerm, np:Int) = new SplitVertLayer(null, new SplitVertNode{nparts = np}){inputs(0) = a}
- def tanh(a:LayerTerm) = new TanhLayer(null){inputs(0) = a};
+ def tanh(a:LayerTerm) = new TanhLayer(null){inputs(0) = a}
def lstm(h:LayerTerm, c:LayerTerm, i:LayerTerm, m:String)(net:Net, opts:LSTMNodeOpts) = {
- val node = new LSTMNode;
- opts.copyOpts(node);
- node.modelName = m;
- node.constructGraph;
- val n = new LSTMLayer(net, node);
- n.setInput(0, h);
- n.setInput(1, c);
- n.setInput(2, i);
+ val node = new LSTMNode
+ opts.copyOpts(node)
+ node.modelName = m
+ node.constructGraph
+ val n = new LSTMLayer(net, node)
+ n.setInput(0, h)
+ n.setInput(1, c)
+ n.setInput(2, i)
n
}
def lstm_(h:LayerTerm, c:LayerTerm, i:LayerTerm, m:String)(implicit net:Net, opts:LSTMNodeOpts) = {
- lstm(h, c, i, m)(net, opts);
+ lstm(h, c, i, m)(net, opts)
}
}
class LayerTerm(val _layer:Layer, val term:Int) extends Serializable {
- def layer = _layer;
+ def layer = _layer
- def + (a:LayerTerm) = {val n=this; new AddLayer(null){inputs(0)=n; inputs(1)=a}};
+ def + (a:LayerTerm) = {val n=this; new AddLayer(null){inputs(0)=n; inputs(1)=a}}
- def *@ (a:LayerTerm) = {val n=this; new MulLayer(null){inputs(0)=n; inputs(1)=a;}};
+ def *@ (a:LayerTerm) = {val n=this; new MulLayer(null){inputs(0)=n; inputs(1)=a;}}
- def ∘ (a:LayerTerm) = {val n=this; new MulLayer(null){inputs(0)=n; inputs(1)=a;}};
+ def ∘ (a:LayerTerm) = {val n=this; new MulLayer(null){inputs(0)=n; inputs(1)=a;}}
- def over (a:LayerTerm) = {val n=this; new StackLayer(null){inputs(0)=n; inputs(1)=a;}};
+ def over (a:LayerTerm) = {val n=this; new StackLayer(null){inputs(0)=n; inputs(1)=a;}}
}
trait OutputLayer {}
object LayerFn {
- final val SIGMOIDFN = 0;
- final val TANHFN = 1;
- final val SOFTPLUSFN = 2;
+ final val SIGMOIDFN = 0
+ final val TANHFN = 1
+ final val SOFTPLUSFN = 2
- val fwdflops = irow(20, 20, 40);
- val bwdflops = irow(3, 3, 20);
+ val fwdflops = irow(20, 20, 40)
+ val bwdflops = irow(3, 3, 20)
// Loosely check dimensions. Skip dimensions of 1 in either tensor.
def checkdims(dims0:IMat, dims1:IMat) = {
if (dims1.asInstanceOf[AnyRef] != null) {
- var i0 = 0;
- var i1 = 0;
+ var i0 = 0
+ var i1 = 0
while (i0 < dims0.length && i1 < dims1.length) {
- while (i0 < dims0.length && dims0(i0) == 1) i0 += 1;
+ while (i0 < dims0.length && dims0(i0) == 1) i0 += 1
while (i1 < dims1.length && dims1(i1) == 1) i1 += 1;
if ((i0 >= dims0.length) != (i1 >= dims1.length)) {
- throw new RuntimeException("dimensions mismatch in Layer Function " + dims0.toString + " and " + dims1.toString);
+ throw new RuntimeException("dimensions mismatch in Layer Function " + dims0.toString + " and " + dims1.toString)
} else if (i0 < dims0.length && i1 < dims1.length && dims0(i0) != dims1(i1)) {
- throw new RuntimeException("dimensions mismatch in Layer Function " + dims0.toString + " and " + dims1.toString);
+ throw new RuntimeException("dimensions mismatch in Layer Function " + dims0.toString + " and " + dims1.toString);
}
- i0 += 1;
- i1 += 1;
+ i0 += 1
+ i1 += 1
}
}
}
- def applyfwd(a:ND, ifn:Int):ND = applyfwd(a, null, ifn);
+ def applyfwd(a:ND, ifn:Int):ND = applyfwd(a, null, ifn)
def applyfwd(a:ND, out:ND, ifn:Int):ND = {
- Mat.nflops += 1L * a.length * fwdflops(ifn);
- checkdims(a.dims, out.dims);
+ Mat.nflops += 1L * a.length * fwdflops(ifn)
+ checkdims(a.dims, out.dims)
a match {
case af:FND => {
- val oND = FND.newOrCheckFND(a.dims, out, a.GUID, ifn, "LayerFn".##);
- CPUMACH.applyfwd(af.data, oND.data, ifn, a.length, Mat.numThreads);
+ val oND = FND.newOrCheckFND(a.dims, out, a.GUID, ifn, "LayerFn".##)
+ CPUMACH.applyfwd(af.data, oND.data, ifn, a.length, Mat.numThreads)
oND
}
case ag:GND => {
- val oND = GND.newOrCheckGND(a.dims, out, a.GUID, ifn, "LayerFn".##);
- CUMACH.applyfwd(ag.data, oND.data, ifn, a.length);
+ val oND = GND.newOrCheckGND(a.dims, out, a.GUID, ifn, "LayerFn".##)
+ CUMACH.applyfwd(ag.data, oND.data, ifn, a.length)
oND
}
}
@@ -313,17 +313,17 @@ object LayerFn {
def applyderiv(a:ND, b:ND, ifn:Int):ND = applyderiv(a, b, null, ifn)
def applyderiv(a:ND, b:ND, out:ND, ifn:Int):ND = {
- Mat.nflops += 1L * a.length * bwdflops(ifn);
- checkdims(a.dims, b.dims);
+ Mat.nflops += 1L * a.length * bwdflops(ifn)
+ checkdims(a.dims, b.dims)
(a, b) match {
case (af:FND, bf:FND) => {
- val oND = FND.newOrCheckFND(a.dims, out, a.GUID, ifn, "LayerFn".##);
- CPUMACH.applyderiv(af.data, bf.data, oND.data, ifn, a.length, Mat.numThreads);
+ val oND = FND.newOrCheckFND(a.dims, out, a.GUID, ifn, "LayerFn".##)
+ CPUMACH.applyderiv(af.data, bf.data, oND.data, ifn, a.length, Mat.numThreads)
oND
}
case (ag:GND, bg:GND) => {
- val oND = GND.newOrCheckGND(a.dims, out, a.GUID, ifn, "LayerFn".##);
- CUMACH.applyderiv(ag.data, bg.data, oND.data, ifn, a.length);
+ val oND = GND.newOrCheckGND(a.dims, out, a.GUID, ifn, "LayerFn".##)
+ CUMACH.applyderiv(ag.data, bg.data, oND.data, ifn, a.length)
oND
}
}
diff --git a/src/main/scala/BIDMach/networks/layers/LayerMat.scala b/src/main/scala/BIDMach/networks/layers/LayerMat.scala
index 05e1b296..7df0f3fa 100755
--- a/src/main/scala/BIDMach/networks/layers/LayerMat.scala
+++ b/src/main/scala/BIDMach/networks/layers/LayerMat.scala
@@ -1,31 +1,31 @@
package BIDMach.networks.layers
-import BIDMach.networks.Net;
+import BIDMach.networks.Net
import BIDMat.Mat
import BIDMat.IMat
import BIDMat.DenseMat
import scala.collection.mutable.HashMap
-case class LayerMat(override val nrows:Int, override val ncols:Int, override val data:Array[Layer]) extends DenseMat[Layer](nrows, ncols, data) {
-
- override def t:LayerMat = LayerMat(gt(null))
-
- override def mytype = "LayerMat"
-
- def horzcat(b: LayerMat) = LayerMat(ghorzcat(b))
-
- def vertcat(b: LayerMat) = LayerMat(gvertcat(b))
-
- def find3:(IMat, IMat, LayerMat) = { val vv = gfind3 ; (IMat(vv._1), IMat(vv._2), LayerMat(vv._3)) }
-
- override def apply(a:IMat):LayerMat = LayerMat(gapply(a))
-
- override def apply(a:IMat, b:IMat):LayerMat = LayerMat(gapply(a, b))
-
- override def apply(a:Int, b:IMat):LayerMat = LayerMat(gapply(a, b))
-
- override def apply(a:IMat, b:Int):LayerMat = LayerMat(gapply(a, b))
-
+case class LayerMat(override val nrows:Int, override val ncols:Int, override val data:Array[Layer]) extends DenseMat[Layer](nrows, ncols, data) {
+
+ override def t:LayerMat = LayerMat(gt(null))
+
+ override def mytype = "LayerMat"
+
+ def horzcat(b: LayerMat) = LayerMat(ghorzcat(b))
+
+ def vertcat(b: LayerMat) = LayerMat(gvertcat(b))
+
+ def find3:(IMat, IMat, LayerMat) = { val vv = gfind3 ; (IMat(vv._1), IMat(vv._2), LayerMat(vv._3)) }
+
+ override def apply(a:IMat):LayerMat = LayerMat(gapply(a))
+
+ override def apply(a:IMat, b:IMat):LayerMat = LayerMat(gapply(a, b))
+
+ override def apply(a:Int, b:IMat):LayerMat = LayerMat(gapply(a, b))
+
+ override def apply(a:IMat, b:Int):LayerMat = LayerMat(gapply(a, b))
+
override def apply(a:Mat, b:Mat):LayerMat = LayerMat(gapply(a.asInstanceOf[IMat], b.asInstanceOf[IMat]))
override def apply(a:Mat, b:Int):LayerMat = LayerMat(gapply(a.asInstanceOf[IMat], b))
@@ -76,18 +76,18 @@ case class LayerMat(override val nrows:Int, override val ncols:Int, override val
def update(i:Int, jv:Mat, b:Layer):LayerMat = LayerMat(_update(IMat.ielem(i), jv.asInstanceOf[IMat], b))
- def ccMatOp(b: LayerMat, f:(Layer, Layer) => Layer, old:LayerMat) = LayerMat(ggMatOp(b, f, old))
-
- def ccMatOpScalar(b: Layer, f:(Layer, Layer) => Layer, old:LayerMat) = LayerMat(ggMatOpScalar(b, f, old))
-
- def ccReduceOp(n:Int, f1:(Layer) => Layer, f2:(Layer, Layer) => Layer, old:LayerMat) = LayerMat(ggReduceOp(n, f1, f2, old))
+ def ccMatOp(b: LayerMat, f:(Layer, Layer) => Layer, old:LayerMat) = LayerMat(ggMatOp(b, f, old))
+
+ def ccMatOpScalar(b: Layer, f:(Layer, Layer) => Layer, old:LayerMat) = LayerMat(ggMatOpScalar(b, f, old))
+
+ def ccReduceOp(n:Int, f1:(Layer) => Layer, f2:(Layer, Layer) => Layer, old:LayerMat) = LayerMat(ggReduceOp(n, f1, f2, old))
+
+ var layerMap:HashMap[Layer,Int] = null
- var layerMap:HashMap[Layer,Int] = null;
-
def rebuildMap = {
- layerMap = new HashMap[Layer,Int]();
+ layerMap = new HashMap[Layer,Int]()
for (i <- 0 until data.length) {
- layerMap.put(data(i), i);
+ layerMap.put(data(i), i)
}
}
@@ -95,31 +95,31 @@ case class LayerMat(override val nrows:Int, override val ncols:Int, override val
if (layerTerm == null) {
"null"
} else {
- val layer = layerTerm.layer;
- val term = layerTerm.term;
- if (layerMap == null) {
- rebuildMap;
- }
- if (layerMap.contains(layer)) {
- val i = layerMap(layer);
- if (data(i) != layer) rebuildMap;
- val coli = i / nrows;
- val rowi = i - coli * nrows;
- val v:Int = 'A';
- val coli0 = coli % 26;
- val ch0 = Character.toChars(v + coli0)(0).toString;
- val ch = if (coli < 26) {
- ch0;
- } else {
- val ch1 = Character.toChars(v + coli0/26)(0).toString;
- ch1 + ch0;
- }
- val ostr = ch + rowi.toString;
- if (term == 0) {
- ostr;
- } else {
- ostr + "[" + term.toString + "]";
- }
+ val layer = layerTerm.layer
+ val term = layerTerm.term
+ if (layerMap == null) {
+ rebuildMap
+ }
+ if (layerMap.contains(layer)) {
+ val i = layerMap(layer)
+ if (data(i) != layer) rebuildMap
+ val coli = i / nrows
+ val rowi = i - coli * nrows
+ val v:Int = 'A'
+ val coli0 = coli % 26
+ val ch0 = Character.toChars(v + coli0)(0).toString
+ val ch = if (coli < 26) {
+ ch0
+ } else {
+ val ch1 = Character.toChars(v + coli0/26)(0).toString
+ ch1 + ch0
+ }
+ val ostr = ch + rowi.toString;
+ if (term == 0) {
+ ostr
+ } else {
+ ostr + "[" + term.toString + "]"
+ }
} else {
"<==="
}
@@ -129,54 +129,54 @@ case class LayerMat(override val nrows:Int, override val ncols:Int, override val
override def printOne(i:Int):String = {
val v = data(i)
if (v != null) {
- val ostring = v.inputs.map(alphaCoords(_)).reduce(_+","+_);
- v.toString() + "(" + ostring +")";
+ val ostring = v.inputs.map(alphaCoords(_)).reduce(_+","+_)
+ v.toString() + "(" + ostring +")"
}
else
""
}
-
- def \ (b: LayerMat) = horzcat(b);
- def \ (b: Layer) = horzcat(LayerMat.elem(b))
- def on (b: LayerMat) = vertcat(b)
- def on (b: Layer) = vertcat(LayerMat.elem(b))
-
- def link(b:LayerMat):Unit = {
- for (i <- 0 until math.min(nrows, b.nrows)) {
- val lleft = apply(i, ncols-1);
- val lright = b(i, 0);
- (lleft, lright) match {
- case (a:LSTMLayer, b:LSTMLayer) => {
- b.setInput(0, a(0));
- b.setInput(1, a(1));
- }
- case _ => {}
- }
- }
- }
-
- def forward(col1:Int, col2:Int, debug:Int) = {
- for (i <- col1 to col2) {
- for (j <- 0 until nrows) {
- if (debug > 0) {
- println(" forward (%d,%d) %s" format (j, i, apply(j, i).getClass))
- }
- apply(j, i).forward;
- }
- }
- }
-
- def backward(col1:Int, col2:Int, debug:Int, ipass:Int, ipos:Long) = {
- for (i <- col2 to col1 by -1) {
- for (j <- (nrows-1) to 0 by -1) {
- if (debug > 0) {
- println(" backward (%d,%d) %s" format (j, i, apply(j, i).getClass))
- }
- apply(j, i).backward(ipass, ipos);
- }
- }
- }
+
+ def \ (b: LayerMat) = horzcat(b)
+ def \ (b: Layer) = horzcat(LayerMat.elem(b))
+ def on (b: LayerMat) = vertcat(b)
+ def on (b: Layer) = vertcat(LayerMat.elem(b))
+
+ def link(b:LayerMat):Unit = {
+ for (i <- 0 until math.min(nrows, b.nrows)) {
+ val lleft = apply(i, ncols-1)
+ val lright = b(i, 0)
+ (lleft, lright) match {
+ case (a:LSTMLayer, b:LSTMLayer) => {
+ b.setInput(0, a(0))
+ b.setInput(1, a(1))
+ }
+ case _ => {}
+ }
+ }
+ }
+
+ def forward(col1:Int, col2:Int, debug:Int) = {
+ for (i <- col1 to col2) {
+ for (j <- 0 until nrows) {
+ if (debug > 0) {
+ println(" forward (%d,%d) %s" format (j, i, apply(j, i).getClass))
+ }
+ apply(j, i).forward
+ }
+ }
+ }
+
+ def backward(col1:Int, col2:Int, debug:Int, ipass:Int, ipos:Long) = {
+ for (i <- col2 to col1 by -1) {
+ for (j <- (nrows-1) to 0 by -1) {
+ if (debug > 0) {
+ println(" backward (%d,%d) %s" format (j, i, apply(j, i).getClass))
+ }
+ apply(j, i).backward(ipass, ipos)
+ }
+ }
+ }
}
object LayerMat {
@@ -188,44 +188,44 @@ object LayerMat {
def apply(a:List[Layer]) = new LayerMat(1, a.length, a.toArray)
def apply(n:NodeMat, net:Net):LayerMat = {
- val nr = n.nrows;
- val nc = n.ncols;
- val mat = new LayerMat(nr, nc, new Array[Layer](nr*nc));
+ val nr = n.nrows
+ val nc = n.ncols
+ val mat = new LayerMat(nr, nc, new Array[Layer](nr*nc))
for (i <- 0 until nc) {
for (j <- 0 until nr) {
if (n(j, i) != null) {
- mat(j, i) = n(j, i).create(net);
- n(j, i).myLayer = mat(j, i);
+ mat(j, i) = n(j, i).create(net)
+ n(j, i).myLayer = mat(j, i)
}
}
}
for (i <- 0 until nc) {
for (j <- 0 until nr) {
if (n(j, i) != null) {
- val inputs = n(j, i).inputs;
+ val inputs = n(j, i).inputs
for (k <- 0 until inputs.length) {
- val input = inputs(k);
+ val input = inputs(k)
if (input != null) {
- val layer = input.node.myLayer;
- val layerTerm = if (input.term != 0) {
- new LayerTerm(layer, input.term)
- } else {
- layer;
- }
- mat(j, i).setInput(k, layerTerm);
+ val layer = input.node.myLayer
+ val layerTerm = if (input.term != 0) {
+ new LayerTerm(layer, input.term)
+ } else {
+ layer
+ }
+ mat(j, i).setInput(k, layerTerm)
}
}
}
}
}
- mat;
+ mat
}
def elem(x:Layer) = {
- val out = LayerMat(1,1)
- out.data(0) = x
- out
- }
+ val out = LayerMat(1,1)
+ out.data(0) = x
+ out
+ }
}
diff --git a/src/main/scala/BIDMach/networks/layers/LinLayer.scala b/src/main/scala/BIDMach/networks/layers/LinLayer.scala
index e8227617..460424ad 100644
--- a/src/main/scala/BIDMach/networks/layers/LinLayer.scala
+++ b/src/main/scala/BIDMach/networks/layers/LinLayer.scala
@@ -10,8 +10,8 @@ import BIDMach.models._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
/**
@@ -20,80 +20,80 @@ import BIDMach.networks._
*/
class LinLayer(override val net:Net, override val opts:LinNodeOpts = new LinNode) extends ModelLayer(net, opts) {
- var vexp:Mat = null;
- var texp:Mat = null;
- var lrate:Mat = null;
-// var sumsq:Mat = null;
- var mask:Mat = null;
- var dprod:Mat = null;
- var firststep = -1f;
- var waitsteps = 0;
- var epsilon = 0f;
- var ADAinitialized = false;
+ var vexp:Mat = null
+ var texp:Mat = null
+ var lrate:Mat = null
+// var sumsq:Mat = null
+ var mask:Mat = null
+ var dprod:Mat = null
+ var firststep = -1f
+ var waitsteps = 0
+ var epsilon = 0f
+ var ADAinitialized = false
def initModelMat(nr:Int, nc:Int):Mat = {
if (opts.tmatShape != null) {
- val (y, x, h, w) = opts.tmatShape(nr, nc);
- val out = TMat(nr, nc, y, x, h, w, zeros(1,1));
+ val (y, x, h, w) = opts.tmatShape(nr, nc)
+ val out = TMat(nr, nc, y, x, h, w, zeros(1,1))
out.tiles.foreach((x:Mat) => {rand(x); x ~ x - 0.5f})
- out;
+ out
} else {
- rand(nr, nc) - 0.5f;
+ rand(nr, nc) - 0.5f
}
}
override def forward = {
- val start = toc;
- val modelcols = inputData.nrows;
+ val start = toc
+ val modelcols = inputData.nrows
if (modelmats(imodel).asInstanceOf[AnyRef] == null) {
- val outdim = if (opts.outdim == 0) inputData.nrows else opts.outdim;
- modelmats(imodel) = convertMat(initModelMat(outdim, modelcols + (if (opts.hasBias) 1 else 0)));
+ val outdim = if (opts.outdim == 0) inputData.nrows else opts.outdim
+ modelmats(imodel) = convertMat(initModelMat(outdim, modelcols + (if (opts.hasBias) 1 else 0)))
updatemats(imodel) = modelmats(imodel).zeros(modelmats(imodel).nrows, modelmats(imodel).ncols);
}
- if (opts.aopts != null && !ADAinitialized) initADAGrad;
- val mm = if (opts.hasBias) modelmats(imodel).view(modelmats(imodel).nrows, modelcols) else modelmats(imodel);
- createOutput(mm.nrows \ inputData.ncols);
- output.asMat ~ mm * inputData.asMat;
- if (opts.hasBias) output.asMat ~ output.asMat + modelmats(imodel).colslice(modelcols, modelcols+1);
- clearDeriv;
- forwardtime += toc - start;
+ if (opts.aopts != null && !ADAinitialized) initADAGrad
+ val mm = if (opts.hasBias) modelmats(imodel).view(modelmats(imodel).nrows, modelcols) else modelmats(imodel)
+ createOutput(mm.nrows \ inputData.ncols)
+ output.asMat ~ mm * inputData.asMat
+ if (opts.hasBias) output.asMat ~ output.asMat + modelmats(imodel).colslice(modelcols, modelcols+1)
+ clearDeriv
+ forwardtime += toc - start
}
override def backward(ipass:Int, pos:Long) = {
- val start = toc;
- val modelcols = inputData.nrows;
- val mm = if (opts.hasBias) modelmats(imodel).view(modelmats(imodel).nrows, modelcols) else modelmats(imodel);
+ val start = toc
+ val modelcols = inputData.nrows
+ val mm = if (opts.hasBias) modelmats(imodel).view(modelmats(imodel).nrows, modelcols) else modelmats(imodel)
if (inputDeriv.asInstanceOf[AnyRef] != null) {
- mm.madd(deriv.asMat, inputDeriv.asMat, true, false);
+ mm.madd(deriv.asMat, inputDeriv.asMat, true, false)
}
if (opts.aopts != null) {
- if (firststep <= 0) firststep = pos.toFloat;
- val step = (pos + firststep)/firststep;
- ADAGrad.multUpdate(deriv.asMat, inputData.asMat, modelmats(imodel), updatemats(imodel), mask, lrate, vexp, texp, epsilon, step, waitsteps, opts.hasBias);
+ if (firststep <= 0) firststep = pos.toFloat
+ val step = (pos + firststep)/firststep
+ ADAGrad.multUpdate(deriv.asMat, inputData.asMat, modelmats(imodel), updatemats(imodel), mask, lrate, vexp, texp, epsilon, step, waitsteps, opts.hasBias)
} else {
- val um = if (opts.hasBias) updatemats(imodel).view(updatemats(imodel).nrows, modelcols) else updatemats(imodel);
- deriv.asMat.madd(inputData.asMat, um, false, true);
+ val um = if (opts.hasBias) updatemats(imodel).view(updatemats(imodel).nrows, modelcols) else updatemats(imodel)
+ deriv.asMat.madd(inputData.asMat, um, false, true)
if (opts.hasBias) updatemats(imodel)(?,modelcols) = updatemats(imodel)(?,modelcols) + sum(deriv.asMat,2)
}
- backwardtime += toc - start;
+ backwardtime += toc - start
}
def initADAGrad {
- val aopts = opts.aopts;
+ val aopts = opts.aopts
val mm = modelmats(imodel);
- val d = mm.nrows;
- val m = mm.ncols;
- firststep = -1f;
- lrate = convertMat(aopts.lrate);
- texp = convertMat(aopts.texp);
- vexp = convertMat(aopts.vexp);
-// sumsq = convertMat(zeros(d, m));
- updatemats(imodel).set(aopts.initsumsq);
- waitsteps = aopts.waitsteps;
- epsilon = aopts.epsilon;
- mask = aopts.mask;
- ADAinitialized = true;
+ val d = mm.nrows
+ val m = mm.ncols
+ firststep = -1f
+ lrate = convertMat(aopts.lrate)
+ texp = convertMat(aopts.texp)
+ vexp = convertMat(aopts.vexp)
+// sumsq = convertMat(zeros(d, m))
+ updatemats(imodel).set(aopts.initsumsq)
+ waitsteps = aopts.waitsteps
+ epsilon = aopts.epsilon
+ mask = aopts.mask
+ ADAinitialized = true
}
override def toString = {
@@ -102,24 +102,24 @@ class LinLayer(override val net:Net, override val opts:LinNodeOpts = new LinNode
}
trait LinNodeOpts extends ModelNodeOpts {
- var hasBias:Boolean = false;
- var aopts:ADAGrad.Opts = null;
- var outdim = 0;
- var tmatShape:(Int, Int) => (Array[Int], Array[Int], Array[Int], Array[Int]) = null;
+ var hasBias:Boolean = false
+ var aopts:ADAGrad.Opts = null
+ var outdim = 0
+ var tmatShape:(Int, Int) => (Array[Int], Array[Int], Array[Int], Array[Int]) = null
def copyOpts(opts:LinNodeOpts):LinNodeOpts = {
- super.copyOpts(opts);
- opts.hasBias = hasBias;
- opts.aopts = aopts;
- opts.outdim = outdim;
- opts;
+ super.copyOpts(opts)
+ opts.hasBias = hasBias
+ opts.aopts = aopts
+ opts.outdim = outdim
+ opts
}
}
class LinNode extends ModelNode with LinNodeOpts {
def copyTo(opts:LinNode):LinNode = {
- this.asInstanceOf[Node].copyTo(opts);
- copyOpts(opts);
+ this.asInstanceOf[Node].copyTo(opts)
+ copyOpts(opts)
opts
}
@@ -128,18 +128,18 @@ class LinNode extends ModelNode with LinNodeOpts {
}
override def clone:LinNode = {
- copyTo(new LinNode).asInstanceOf[LinNode];
+ copyTo(new LinNode).asInstanceOf[LinNode]
}
override def create(net:Net):LinLayer = {
- LinLayer(net, this);
+ LinLayer(net, this)
}
}
object LinLayer {
- def apply(net:Net) = new LinLayer(net, new LinNode);
+ def apply(net:Net) = new LinLayer(net, new LinNode)
- def apply(net:Net, opts:LinNodeOpts):LinLayer = new LinLayer(net, opts);
+ def apply(net:Net, opts:LinNodeOpts):LinLayer = new LinLayer(net, opts)
-}
\ No newline at end of file
+}
\ No newline at end of file
diff --git a/src/main/scala/BIDMach/networks/layers/LnLayer.scala b/src/main/scala/BIDMach/networks/layers/LnLayer.scala
index 942a4cb1..427602c6 100644
--- a/src/main/scala/BIDMach/networks/layers/LnLayer.scala
+++ b/src/main/scala/BIDMach/networks/layers/LnLayer.scala
@@ -10,8 +10,8 @@ import BIDMach.models._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
@@ -21,19 +21,19 @@ import BIDMach.networks._
class LnLayer(override val net:Net, override val opts:LnNodeOpts = new LnNode) extends Layer(net, opts) {
- override def forward = {
- val start = toc;
- createOutput;
- ln(inputData, output);
- clearDeriv;
- forwardtime += toc - start;
- }
-
- override def backward = {
- val start = toc;
- if (inputDeriv.asInstanceOf[AnyRef] != null) inputDeriv ~ inputDeriv + (deriv/inputData);
- backwardtime += toc - start;
- }
+ override def forward = {
+ val start = toc
+ createOutput
+ ln(inputData, output)
+ clearDeriv
+ forwardtime += toc - start
+ }
+
+ override def backward = {
+ val start = toc
+ if (inputDeriv.asInstanceOf[AnyRef] != null) inputDeriv ~ inputDeriv + (deriv/inputData);
+ backwardtime += toc - start
+ }
override def toString = {
"ln@"+Integer.toHexString(hashCode % 0x10000).toString
@@ -45,7 +45,7 @@ trait LnNodeOpts extends NodeOpts {
class LnNode extends Node with LnNodeOpts {
- override def clone:LnNode = {copyTo(new LnNode).asInstanceOf[LnNode];}
+ override def clone:LnNode = {copyTo(new LnNode).asInstanceOf[LnNode];}
override def create(net:Net):LnLayer = {LnLayer(net, this);}
@@ -56,7 +56,7 @@ class LnNode extends Node with LnNodeOpts {
object LnLayer {
- def apply(net:Net) = new LnLayer(net, new LnNode);
+ def apply(net:Net) = new LnLayer(net, new LnNode)
- def apply(net:Net, opts:LnNode) = new LnLayer(net, opts);
-}
\ No newline at end of file
+ def apply(net:Net, opts:LnNode) = new LnLayer(net, opts)
+}
\ No newline at end of file
diff --git a/src/main/scala/BIDMach/networks/layers/ModelLayer.scala b/src/main/scala/BIDMach/networks/layers/ModelLayer.scala
index 34af05f2..9f95a290 100644
--- a/src/main/scala/BIDMach/networks/layers/ModelLayer.scala
+++ b/src/main/scala/BIDMach/networks/layers/ModelLayer.scala
@@ -10,53 +10,53 @@ import BIDMach.models._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
class ModelLayer(override val net:Net, override val opts:ModelNodeOpts = new ModelNode) extends Layer(net, opts) {
- var imodel = 0;
+ var imodel = 0
override def getModelMats(net:Net):Unit = {
- imodel = if (net.opts.nmodelmats > 0) { // If explicit model numbers are given, use them.
- opts.imodel;
- } else if (opts.modelName.length > 0) { // If this is a named layer, look it up.
- if (net.modelMap.containsKey(opts.modelName)) {
- net.modelMap.get(opts.modelName);
- } else {
- val len = net.modelMap.size;
- net.modelMap.put(opts.modelName, len + net.opts.nmodelmats);
- len;
- }
- } else { // Otherwise return the next available int
- net.imodel += 1;
- net.imodel - 1;
- };
+ imodel = if (net.opts.nmodelmats > 0) { // If explicit model numbers are given, use them.
+ opts.imodel
+ } else if (opts.modelName.length > 0) { // If this is a named layer, look it up.
+ if (net.modelMap.containsKey(opts.modelName)) {
+ net.modelMap.get(opts.modelName)
+ } else {
+ val len = net.modelMap.size
+ net.modelMap.put(opts.modelName, len + net.opts.nmodelmats);
+ len
+ }
+ } else { // Otherwise return the next available int
+ net.imodel += 1
+ net.imodel - 1
+ }
}
}
trait ModelNodeOpts extends NodeOpts {
- var modelName = "";
- var imodel = 0;
+ var modelName = ""
+ var imodel = 0
def copyOpts(opts:ModelNodeOpts):ModelNodeOpts = {
- super.copyOpts(opts);
- opts.modelName = modelName;
- opts.imodel = imodel;
- opts;
+ super.copyOpts(opts)
+ opts.modelName = modelName
+ opts.imodel = imodel
+ opts
}
}
class ModelNode extends Node with ModelNodeOpts {
def copyTo(opts:ModelNode):ModelNode = {
- this.asInstanceOf[Node].copyTo(opts);
- copyOpts(opts);
+ this.asInstanceOf[Node].copyTo(opts)
+ copyOpts(opts)
opts
}
override def clone:ModelNode = {
- copyTo(new ModelNode).asInstanceOf[ModelNode];
+ copyTo(new ModelNode).asInstanceOf[ModelNode]
}
}
diff --git a/src/main/scala/BIDMach/networks/layers/MulLayer.scala b/src/main/scala/BIDMach/networks/layers/MulLayer.scala
index 3640fcd3..8c670b5a 100644
--- a/src/main/scala/BIDMach/networks/layers/MulLayer.scala
+++ b/src/main/scala/BIDMach/networks/layers/MulLayer.scala
@@ -10,8 +10,8 @@ import BIDMach.models._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
/**
@@ -20,35 +20,35 @@ import BIDMach.networks._
class MulLayer(override val net:Net, override val opts:MulNodeOpts = new MulNode) extends Layer(net, opts) {
- override val _inputs = new Array[LayerTerm](opts.ninputs);
- val qeps = 1e-40f;
+ override val _inputs = new Array[LayerTerm](opts.ninputs)
+ val qeps = 1e-40f
def guardSmall(a:ND, eps:Float):ND = {
- a + (abs(a) < eps) * (2*eps);
+ a + (abs(a) < eps) * (2*eps)
}
- override def forward = {
- val start = toc;
- createOutput(inputData.dims);
- output <-- inputData;
- (1 until inputlength).map((i:Int) => output ~ output ∘ inputDatas(i));
- clearDeriv;
- forwardtime += toc - start;
- }
+ override def forward = {
+ val start = toc
+ createOutput(inputData.dims)
+ output <-- inputData
+ (1 until inputlength).map((i:Int) => output ~ output ∘ inputDatas(i))
+ clearDeriv
+ forwardtime += toc - start
+ }
- override def backward = {
- val start = toc;
+ override def backward = {
+ val start = toc
if (_inputs.length == 2) {
- if (inputDerivs(0).asInstanceOf[AnyRef] != null) inputDerivs(0) ~ inputDerivs(0) + (deriv ∘ inputDatas(1));
- if (inputDerivs(1).asInstanceOf[AnyRef] != null) inputDerivs(1) ~ inputDerivs(1) + (deriv ∘ inputDatas(0));
+ if (inputDerivs(0).asInstanceOf[AnyRef] != null) inputDerivs(0) ~ inputDerivs(0) + (deriv ∘ inputDatas(1))
+ if (inputDerivs(1).asInstanceOf[AnyRef] != null) inputDerivs(1) ~ inputDerivs(1) + (deriv ∘ inputDatas(0))
} else {
- val doutput = deriv ∘ output;
- (0 until inputlength).map((i:Int) => {
- if (inputDerivs(i).asInstanceOf[AnyRef] != null) inputDerivs(i) ~ inputDerivs(i) + (doutput / guardSmall(inputDatas(i), qeps));
- });
+ val doutput = deriv ∘ output
+ (0 until inputlength).map((i:Int) => {
+ if (inputDerivs(i).asInstanceOf[AnyRef] != null) inputDerivs(i) ~ inputDerivs(i) + (doutput / guardSmall(inputDatas(i), qeps))
+ })
}
- backwardtime += toc - start;
- }
+ backwardtime += toc - start
+ }
override def toString = {
"mul@"+Integer.toHexString(hashCode % 0x10000).toString
@@ -56,21 +56,21 @@ class MulLayer(override val net:Net, override val opts:MulNodeOpts = new MulNode
}
trait MulNodeOpts extends NodeOpts {
- var ninputs = 2;
+ var ninputs = 2
}
class MulNode extends Node with MulNodeOpts {
- override val inputs:Array[NodeTerm] = new Array[NodeTerm](ninputs);
+ override val inputs:Array[NodeTerm] = new Array[NodeTerm](ninputs)
def copyTo(opts:MulNode):MulNode = {
- super.copyTo(opts);
- opts.ninputs = ninputs;
- opts;
+ super.copyTo(opts)
+ opts.ninputs = ninputs
+ opts
}
- override def clone:MulNode = {copyTo(new MulNode).asInstanceOf[MulNode];}
+ override def clone:MulNode = {copyTo(new MulNode).asInstanceOf[MulNode];}
- override def create(net:Net):MulLayer = {MulLayer(net, this);}
+ override def create(net:Net):MulLayer = {MulLayer(net, this);}
override def toString = {
"mul@"+Integer.toHexString(hashCode % 0x10000).toString
@@ -79,7 +79,7 @@ class MulNode extends Node with MulNodeOpts {
object MulLayer {
- def apply(net:Net) = new MulLayer(net, new MulNode);
+ def apply(net:Net) = new MulLayer(net, new MulNode)
def apply(net:Net, opts:MulNodeOpts) = new MulLayer(net, opts);
}
diff --git a/src/main/scala/BIDMach/networks/layers/NegsampOutputLayer.scala b/src/main/scala/BIDMach/networks/layers/NegsampOutputLayer.scala
index fd1f5d81..c4f918d3 100644
--- a/src/main/scala/BIDMach/networks/layers/NegsampOutputLayer.scala
+++ b/src/main/scala/BIDMach/networks/layers/NegsampOutputLayer.scala
@@ -10,123 +10,123 @@ import BIDMach.models._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
class NegsampOutputLayer(override val net:Net, override val opts:NegsampOutputNodeOpts = new NegsampOutputNode) extends ModelLayer(net, opts) with OutputLayer {
- var vexp:Mat = null;
- var texp:Mat = null;
- var lrate:Mat = null;
- var iexpt:Mat = null;
- var cfact:Mat = null;
+ var vexp:Mat = null
+ var texp:Mat = null
+ var lrate:Mat = null
+ var iexpt:Mat = null
+ var cfact:Mat = null
var cexpt:Mat = null;
-// var sumsq:Mat = null;
- var mask:Mat = null;
- var firststep = -1f;
- var waitsteps = 0;
- var epsilon = 0f;
- var ADAinitialized = false;
- var randwords:Mat = null;
- var onerow:Mat = null;
- var prods:Mat = null;
- var inputMat:Mat = null;
- var targMat:Mat = null;
- var irange:Mat = null;
- var coloffsets:Mat = null;
- var correction = 1f;
+// var sumsq:Mat = null
+ var mask:Mat = null
+ var firststep = -1f
+ var waitsteps = 0
+ var epsilon = 0f
+ var ADAinitialized = false
+ var randwords:Mat = null
+ var onerow:Mat = null
+ var prods:Mat = null
+ var inputMat:Mat = null
+ var targMat:Mat = null
+ var irange:Mat = null
+ var coloffsets:Mat = null
+ var correction = 1f
override def forward = {
- val start = toc;
- val modelrows = inputData.nrows;
- val nfeats = if (opts.outdim == 0) inputData.nrows else opts.outdim;
- if (correction.asInstanceOf[AnyRef] == null) correction = 1f * nfeats / opts.nsamps;
+ val start = toc
+ val modelrows = inputData.nrows
+ val nfeats = if (opts.outdim == 0) inputData.nrows else opts.outdim
+ if (correction.asInstanceOf[AnyRef] == null) correction = 1f * nfeats / opts.nsamps
if (modelmats(imodel).asInstanceOf[AnyRef] == null) {
- modelmats(imodel) = convertMat(normrnd(0, 1, modelrows + (if (opts.hasBias) 1 else 0), nfeats));
+ modelmats(imodel) = convertMat(normrnd(0, 1, modelrows + (if (opts.hasBias) 1 else 0), nfeats))
updatemats(imodel) = modelmats(imodel).zeros(modelmats(imodel).nrows, nfeats);
}
- if (opts.aopts != null && !ADAinitialized) initADAGrad;
- if (randwords.asInstanceOf[AnyRef] == null) randwords = convertMat(zeros(opts.nsamps + 1, inputData.ncols));
- if (iexpt.asInstanceOf[AnyRef] == null) iexpt = convertMat(row(1f/(1f-opts.expt)));
- if (onerow.asInstanceOf[AnyRef] == null) onerow = convertMat(ones(1, inputData.ncols));
+ if (opts.aopts != null && !ADAinitialized) initADAGrad
+ if (randwords.asInstanceOf[AnyRef] == null) randwords = convertMat(zeros(opts.nsamps + 1, inputData.ncols))
+ if (iexpt.asInstanceOf[AnyRef] == null) iexpt = convertMat(row(1f/(1f-opts.expt)))
+ if (onerow.asInstanceOf[AnyRef] == null) onerow = convertMat(ones(1, inputData.ncols))
val mm = modelmats(imodel);
- inputMat = if (opts.hasBias) (inputData.asMat on onerow) else inputData.asMat;
+ inputMat = if (opts.hasBias) (inputData.asMat on onerow) else inputData.asMat
rand(randwords); // Compute some random negatives
val irandwords = min(nfeats-2, int((nfeats - 1) * (randwords ^ iexpt))); // produce power-law values with exponent expt
irandwords ~ irandwords + (irandwords >= target); // remove targets as possible negative samples
- irandwords(opts.nsamps, ?) = target;
+ irandwords(opts.nsamps, ?) = target
- val indmat = nHot(irandwords, nfeats);
- prods = DDS(mm, inputMat, indmat);
- output = prods.contents.view(opts.nsamps+1, inputData.ncols);
+ val indmat = nHot(irandwords, nfeats)
+ prods = DDS(mm, inputMat, indmat)
+ output = prods.contents.view(opts.nsamps+1, inputData.ncols)
output.asMat ~ output.asMat - maxi(output.asMat)
exp(output, output); // ensures sum(exps) is between 1 and nfeats
if (opts.docorrect) {
- output(opts.nsamps, ?) = output(opts.nsamps, ?) * (1/correction);
+ output(opts.nsamps, ?) = output(opts.nsamps, ?) * (1/correction)
}
- val sout = sum(output.asMat);
- output.asMat ~ output.asMat / sout;
- forwardtime += toc - start;
+ val sout = sum(output.asMat)
+ output.asMat ~ output.asMat / sout
+ forwardtime += toc - start
}
override def backward = {
- val start = toc;
- val modelrows = inputData.nrows;
- val nfeats = if (opts.outdim == 0) inputData.nrows else opts.outdim;
- if (targMat.asInstanceOf[AnyRef] == null) targMat = convertMat(zeros(opts.nsamps, inputData.ncols) on ones(1, inputData.ncols));
- val mm = modelmats(imodel);
- val um = updatemats(imodel);
-
- deriv = targMat - output;
- prods.contents <-- deriv.asMat.contents;
- inputMat.madd(prods, um, false, true);
- if (inputDeriv.asInstanceOf[AnyRef] != null) {
- if (opts.hasBias) {
- inputMat ~ mm * prods;
- if (irange.asInstanceOf[AnyRef] == null) irange = convertMat(icol(0->inputData.nrows));
- inputDeriv ~ inputDeriv + inputMat(irange, ?);
- } else {
- mm.madd(prods, inputDeriv.asMat);
- }
- }
- backwardtime += toc - start;
+ val start = toc
+ val modelrows = inputData.nrows
+ val nfeats = if (opts.outdim == 0) inputData.nrows else opts.outdim
+ if (targMat.asInstanceOf[AnyRef] == null) targMat = convertMat(zeros(opts.nsamps, inputData.ncols) on ones(1, inputData.ncols))
+ val mm = modelmats(imodel);
+ val um = updatemats(imodel)
+
+ deriv = targMat - output
+ prods.contents <-- deriv.asMat.contents;
+ inputMat.madd(prods, um, false, true)
+ if (inputDeriv.asInstanceOf[AnyRef] != null) {
+ if (opts.hasBias) {
+ inputMat ~ mm * prods
+ if (irange.asInstanceOf[AnyRef] == null) irange = convertMat(icol(0->inputData.nrows))
+ inputDeriv ~ inputDeriv + inputMat(irange, ?)
+ } else {
+ mm.madd(prods, inputDeriv.asMat)
+ }
+ }
+ backwardtime += toc - start
}
def initADAGrad {
- val aopts = opts.aopts;
+ val aopts = opts.aopts
val mm = modelmats(imodel);
- val d = mm.nrows;
- val m = mm.ncols;
- firststep = -1f;
- lrate = convertMat(aopts.lrate);
- texp = convertMat(aopts.texp);
- vexp = convertMat(aopts.vexp);
-// sumsq = convertMat(zeros(d, m));
- updatemats(imodel).set(aopts.initsumsq);
- waitsteps = aopts.waitsteps;
- epsilon = aopts.epsilon;
- mask = aopts.mask;
- ADAinitialized = true;
+ val d = mm.nrows
+ val m = mm.ncols
+ firststep = -1f
+ lrate = convertMat(aopts.lrate)
+ texp = convertMat(aopts.texp)
+ vexp = convertMat(aopts.vexp)
+// sumsq = convertMat(zeros(d, m))
+ updatemats(imodel).set(aopts.initsumsq)
+ waitsteps = aopts.waitsteps
+ epsilon = aopts.epsilon
+ mask = aopts.mask
+ ADAinitialized = true
}
override def score:FMat = {
if (opts.scoreType < 2) {
opts.scoreType match {
- case 0 => FMat(mean(ln(output.asMat(opts.nsamps, ?))));
- case 1 => FMat(mean(output.asMat(opts.nsamps, ?) == maxi(output.asMat)));
- }
+ case 0 => FMat(mean(ln(output.asMat(opts.nsamps, ?))))
+ case 1 => FMat(mean(output.asMat(opts.nsamps, ?) == maxi(output.asMat)))
+ }
} else {
- val mprod = modelmats(imodel) ^* inputMat;
- mprod ~ mprod - maxi(mprod);
- exp(mprod, mprod);
- mprod ~ mprod / sum(mprod);
- if (coloffsets.asInstanceOf[AnyRef] == null) coloffsets = convertMat(irow(0->mprod.ncols)*mprod.nrows);
- val inds = target + coloffsets;
+ val mprod = modelmats(imodel) ^* inputMat
+ mprod ~ mprod - maxi(mprod)
+ exp(mprod, mprod)
+ mprod ~ mprod / sum(mprod)
+ if (coloffsets.asInstanceOf[AnyRef] == null) coloffsets = convertMat(irow(0->mprod.ncols)*mprod.nrows)
+ val inds = target + coloffsets
opts.scoreType match {
- case 2 => FMat(mean(ln(mprod(inds))));
+ case 2 => FMat(mean(ln(mprod(inds))))
case 3 => FMat(mean(mprod(inds) == maxi(mprod)));
}
}
@@ -139,37 +139,37 @@ class NegsampOutputLayer(override val net:Net, override val opts:NegsampOutputNo
trait NegsampOutputNodeOpts extends ModelNodeOpts {
- var nsamps = 100;
- var hasBias:Boolean = false;
- var aopts:ADAGrad.Opts = null;
+ var nsamps = 100
+ var hasBias:Boolean = false
+ var aopts:ADAGrad.Opts = null
var outdim = 0;
- var scoreType = 0;
- var expt = 0.5;
- var docorrect = true;
+ var scoreType = 0
+ var expt = 0.5
+ var docorrect = true
def copyOpts(opts:NegsampOutputNodeOpts):NegsampOutputNodeOpts = {
- super.copyOpts(opts);
- opts.nsamps = nsamps;
- opts.hasBias = hasBias;
- opts.aopts = aopts;
- opts.outdim = outdim;
- opts.expt = expt;
- opts.scoreType = scoreType;
- opts;
- }
+ super.copyOpts(opts)
+ opts.nsamps = nsamps
+ opts.hasBias = hasBias
+ opts.aopts = aopts
+ opts.outdim = outdim
+ opts.expt = expt
+ opts.scoreType = scoreType
+ opts
+ }
}
class NegsampOutputNode extends ModelNode with NegsampOutputNodeOpts {
def copyTo(opts:NegsampOutputNode):NegsampOutputNode = {
- this.asInstanceOf[ModelNode].copyTo(opts);
- copyOpts(opts);
+ this.asInstanceOf[ModelNode].copyTo(opts)
+ copyOpts(opts)
opts
}
- override def clone:NegsampOutputNode = {copyTo(new NegsampOutputNode).asInstanceOf[NegsampOutputNode];}
+ override def clone:NegsampOutputNode = {copyTo(new NegsampOutputNode).asInstanceOf[NegsampOutputNode];}
- override def create(net:Net):NegsampOutputLayer = {NegsampOutputLayer(net, this);}
+ override def create(net:Net):NegsampOutputLayer = {NegsampOutputLayer(net, this);}
override def toString = {
"negsamp@"+Integer.toHexString(hashCode % 0x10000).toString
@@ -178,7 +178,7 @@ class NegsampOutputNode extends ModelNode with NegsampOutputNodeOpts {
object NegsampOutputLayer {
- def apply(net:Net) = new NegsampOutputLayer(net, new NegsampOutputNode);
+ def apply(net:Net) = new NegsampOutputLayer(net, new NegsampOutputNode)
- def apply(net:Net, opts:NegsampOutputNode) = new NegsampOutputLayer(net, opts);
-}
\ No newline at end of file
+ def apply(net:Net, opts:NegsampOutputNode) = new NegsampOutputLayer(net, opts)
+}
\ No newline at end of file
diff --git a/src/main/scala/BIDMach/networks/layers/Node.scala b/src/main/scala/BIDMach/networks/layers/Node.scala
index 7b95cfd9..feb4eb05 100644
--- a/src/main/scala/BIDMach/networks/layers/Node.scala
+++ b/src/main/scala/BIDMach/networks/layers/Node.scala
@@ -11,8 +11,8 @@ import BIDMach.networks.layers._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
@@ -21,25 +21,25 @@ trait NodeOpts extends BIDMat.Opts {
var name = "";
def copyOpts(opts:NodeOpts):NodeOpts = {
- opts.name = name;
- opts;
+ opts.name = name
+ opts
}
}
class Node extends NodeTerm(null, 0) with NodeOpts {
- val inputs:Array[NodeTerm] = Array(null);
- var myLayer:Layer = null;
- var myGhost:Node = null;
- var parent:Node = null;
- var outputNumbers:Array[Int] = null;
+ val inputs:Array[NodeTerm] = Array(null)
+ var myLayer:Layer = null
+ var myGhost:Node = null
+ var parent:Node = null
+ var outputNumbers:Array[Int] = null
- override def node = this;
+ override def node = this
def copyTo(opts:Node):Node = {
- copyOpts(opts);
- opts.inputs(0) = inputs(0);
- myGhost = opts;
- opts;
+ copyOpts(opts)
+ opts.inputs(0) = inputs(0)
+ myGhost = opts
+ opts
}
override def toString = {
@@ -47,10 +47,10 @@ class Node extends NodeTerm(null, 0) with NodeOpts {
}
override def clone:Node = {
- copyTo(new Node).asInstanceOf[Node];
+ copyTo(new Node).asInstanceOf[Node]
}
- def apply(i:Int) = new NodeTerm(this, i);
+ def apply(i:Int) = new NodeTerm(this, i)
def create(net:Net):Layer = {null}
}
@@ -58,15 +58,15 @@ class Node extends NodeTerm(null, 0) with NodeOpts {
class NodeTerm(val _node:Node, val term:Int) extends Serializable {
- def node = _node;
+ def node = _node
- def + (a:NodeTerm) = {val n=this; new AddNode{inputs(0)=n; inputs(1)=a}};
+ def + (a:NodeTerm) = {val n=this; new AddNode{inputs(0)=n; inputs(1)=a}}
- def *@ (a:NodeTerm) = {val n=this; new MulNode{inputs(0)=n; inputs(1)=a;}};
+ def *@ (a:NodeTerm) = {val n=this; new MulNode{inputs(0)=n; inputs(1)=a;}}
- def ∘ (a:NodeTerm) = {val n=this; new MulNode{inputs(0)=n; inputs(1)=a;}};
+ def ∘ (a:NodeTerm) = {val n=this; new MulNode{inputs(0)=n; inputs(1)=a;}}
- def over (a:NodeTerm) = {val n=this; new StackNode{inputs(0)=n; inputs(1)=a;}};
+ def over (a:NodeTerm) = {val n=this; new StackNode{inputs(0)=n; inputs(1)=a;}}
}
object Node {
@@ -77,103 +77,103 @@ object Node {
def dropout(a:NodeTerm, frac:Float) = new DropoutNode{inputs(0) = a; frac = frac}
- def exp(a:NodeTerm) = new ExpNode{inputs(0) = a;};
+ def exp(a:NodeTerm) = new ExpNode{inputs(0) = a;}
- def glm_(a:NodeTerm)(implicit opts:GLMNodeOpts) = new GLMNode{inputs(0) = a; links = opts.links};
+ def glm_(a:NodeTerm)(implicit opts:GLMNodeOpts) = new GLMNode{inputs(0) = a; links = opts.links}
- def glm(a:NodeTerm)(links:IMat) = {val ilinks = links; new GLMNode{inputs(0) = a; links = ilinks}};
+ def glm(a:NodeTerm)(links:IMat) = {val ilinks = links; new GLMNode{inputs(0) = a; links = ilinks}}
- def input(a:NodeTerm) = new InputNode{inputs(0) = a;};
+ def input(a:NodeTerm) = new InputNode{inputs(0) = a;}
def input = new InputNode
def linear(a:NodeTerm)(name:String="", outdim:Int=0, hasBias:Boolean=true, aopts:ADAGrad.Opts=null) = {
- val odim = outdim;
- val hBias = hasBias;
- val aaopts = aopts;
- val mname = name;
- new LinNode{inputs(0)=a; modelName = mname; outdim=odim; hasBias=hBias; aopts=aaopts};
+ val odim = outdim
+ val hBias = hasBias
+ val aaopts = aopts
+ val mname = name
+ new LinNode{inputs(0)=a; modelName = mname; outdim=odim; hasBias=hBias; aopts=aaopts}
}
def linear_(a:NodeTerm)(implicit opts:LinNodeOpts) = {
val n = new LinNode{inputs(0) = a;}
- opts.copyOpts(n);
+ opts.copyOpts(n)
n
}
def lstm_fused(inc:NodeTerm, lin1:NodeTerm, lin2:NodeTerm, lin3:NodeTerm, lin4:NodeTerm) = {
new LSTMfusedNode{
- inputs(0) = inc;
- inputs(1) = lin1;
- inputs(2) = lin2;
- inputs(3) = lin3;
- inputs(4) = lin4;
+ inputs(0) = inc
+ inputs(1) = lin1
+ inputs(2) = lin2
+ inputs(3) = lin3
+ inputs(4) = lin4
}
}
- def ln(a:NodeTerm) = new LnNode{inputs(0) = a};
+ def ln(a:NodeTerm) = new LnNode{inputs(0) = a}
def negsamp(a:NodeTerm)(name:String="", outdim:Int=0, hasBias:Boolean=true, aopts:ADAGrad.Opts=null, nsamps:Int=100, expt:Float=0.5f, scoreType:Int=0, doCorrect:Boolean=true) = {
- val odim = outdim;
- val hBias = hasBias;
- val aaopts = aopts;
- val nnsamps = nsamps;
- val eexpt = expt;
- val dcr = doCorrect;
- val sct = scoreType;
- val mname = name;
- new NegsampOutputNode{inputs(0)=a; modelName=mname; outdim=odim; hasBias=hBias; aopts=aaopts; nsamps=nnsamps; expt=eexpt; scoreType=sct; docorrect=dcr};
+ val odim = outdim
+ val hBias = hasBias
+ val aaopts = aopts
+ val nnsamps = nsamps
+ val eexpt = expt
+ val dcr = doCorrect
+ val sct = scoreType
+ val mname = name
+ new NegsampOutputNode{inputs(0)=a; modelName=mname; outdim=odim; hasBias=hBias; aopts=aaopts; nsamps=nnsamps; expt=eexpt; scoreType=sct; docorrect=dcr}
}
def negsamp_(a:NodeTerm)(implicit opts:NegsampOutputNodeOpts) = {
val n = new NegsampOutputNode{inputs(0) = a}
- opts.copyOpts(n);
+ opts.copyOpts(n)
n
}
def norm_(a:NodeTerm)(implicit opts:NormNodeOpts) = {
val n = new NormNode{inputs(0) = a;}
- opts.copyOpts(n);
+ opts.copyOpts(n)
n
}
def norm(a:NodeTerm)(targetNorm:Float = 1f, weight:Float = 1f) = {
- val tnorm = targetNorm;
- val nweight = weight;
+ val tnorm = targetNorm
+ val nweight = weight
new NormNode{inputs(0) = a; targetNorm = tnorm; weight = nweight}
}
- def oneHot(a:NodeTerm) = new OnehotNode{inputs(0) = a};
+ def oneHot(a:NodeTerm) = new OnehotNode{inputs(0) = a}
- def rect(a:NodeTerm) = new RectNode{inputs(0) = a};
+ def rect(a:NodeTerm) = new RectNode{inputs(0) = a}
- def sigmoid(a:NodeTerm) = new SigmoidNode{inputs(0) = a};
+ def sigmoid(a:NodeTerm) = new SigmoidNode{inputs(0) = a}
- def σ(a:NodeTerm) = new SigmoidNode{inputs(0) = a};
+ def σ(a:NodeTerm) = new SigmoidNode{inputs(0) = a}
- def softmax(a:NodeTerm) = new SoftmaxNode{inputs(0) = a};
+ def softmax(a:NodeTerm) = new SoftmaxNode{inputs(0) = a}
def softmaxout(a:NodeTerm)(scoreTyp:Int=0, doVar:Boolean=false) = new SoftmaxOutputNode{inputs(0) = a; scoreType=scoreTyp; doVariance = doVar}
- def softplus(a:NodeTerm) = new SoftplusNode{inputs(0) = a};
+ def softplus(a:NodeTerm) = new SoftplusNode{inputs(0) = a}
- def splithoriz(a:NodeTerm, np:Int) = new SplitHorizNode{inputs(0) = a; nparts = np};
+ def splithoriz(a:NodeTerm, np:Int) = new SplitHorizNode{inputs(0) = a; nparts = np}
- def splitvert(a:NodeTerm, np:Int) = new SplitVertNode{inputs(0) = a; nparts = np};
+ def splitvert(a:NodeTerm, np:Int) = new SplitVertNode{inputs(0) = a; nparts = np}
- def tanh(a:NodeTerm) = new TanhNode{inputs(0) = a};
+ def tanh(a:NodeTerm) = new TanhNode{inputs(0) = a}
def lstm(h:NodeTerm, c:NodeTerm, i:NodeTerm, m:String)(opts:LSTMNodeOpts) = {
- val n = new LSTMNode;
- opts.copyOpts(n);
- n.modelName = m;
- n.constructGraph;
- n.inputs(0) = h;
- n.inputs(1) = c;
- n.inputs(2) = i;
+ val n = new LSTMNode
+ opts.copyOpts(n)
+ n.modelName = m
+ n.constructGraph
+ n.inputs(0) = h
+ n.inputs(1) = c
+ n.inputs(2) = i
n
}
- implicit def NodeToNodeMat(n:Node):NodeMat = NodeMat.elem(n);
+ implicit def NodeToNodeMat(n:Node):NodeMat = NodeMat.elem(n)
-}
\ No newline at end of file
+}
\ No newline at end of file
diff --git a/src/main/scala/BIDMach/networks/layers/NodeMat.scala b/src/main/scala/BIDMach/networks/layers/NodeMat.scala
index 97c9a448..23b88944 100755
--- a/src/main/scala/BIDMach/networks/layers/NodeMat.scala
+++ b/src/main/scala/BIDMach/networks/layers/NodeMat.scala
@@ -2,30 +2,30 @@ package BIDMach.networks.layers
import BIDMat.Mat
import BIDMat.IMat
import BIDMat.DenseMat
-import scala.collection.mutable.HashMap;
-
-case class NodeMat(override val nrows:Int, override val ncols:Int, override val data:Array[Node]) extends DenseMat[Node](nrows, ncols, data) {
-
- var nodeMap:HashMap[Node,Int] = null;
-
- override def t:NodeMat = NodeMat(gt(null))
-
- override def mytype = "NodeMat"
-
- def horzcat(b: NodeMat) = NodeMat(ghorzcat(b))
-
- def vertcat(b: NodeMat) = NodeMat(gvertcat(b))
-
- def find3:(IMat, IMat, NodeMat) = { val vv = gfind3 ; (IMat(vv._1), IMat(vv._2), NodeMat(vv._3)) }
-
- override def apply(a:IMat):NodeMat = NodeMat(gapply(a))
-
- override def apply(a:IMat, b:IMat):NodeMat = NodeMat(gapply(a, b))
-
- override def apply(a:Int, b:IMat):NodeMat = NodeMat(gapply(a, b))
-
- override def apply(a:IMat, b:Int):NodeMat = NodeMat(gapply(a, b))
-
+import scala.collection.mutable.HashMap
+
+case class NodeMat(override val nrows:Int, override val ncols:Int, override val data:Array[Node]) extends DenseMat[Node](nrows, ncols, data) {
+
+ var nodeMap:HashMap[Node,Int] = null
+
+ override def t:NodeMat = NodeMat(gt(null))
+
+ override def mytype = "NodeMat"
+
+ def horzcat(b: NodeMat) = NodeMat(ghorzcat(b))
+
+ def vertcat(b: NodeMat) = NodeMat(gvertcat(b))
+
+ def find3:(IMat, IMat, NodeMat) = { val vv = gfind3 ; (IMat(vv._1), IMat(vv._2), NodeMat(vv._3)) }
+
+ override def apply(a:IMat):NodeMat = NodeMat(gapply(a))
+
+ override def apply(a:IMat, b:IMat):NodeMat = NodeMat(gapply(a, b))
+
+ override def apply(a:Int, b:IMat):NodeMat = NodeMat(gapply(a, b))
+
+ override def apply(a:IMat, b:Int):NodeMat = NodeMat(gapply(a, b))
+
override def apply(a:Mat, b:Mat):NodeMat = NodeMat(gapply(a.asInstanceOf[IMat], b.asInstanceOf[IMat]))
override def apply(a:Mat, b:Int):NodeMat = NodeMat(gapply(a.asInstanceOf[IMat], b))
@@ -74,76 +74,76 @@ case class NodeMat(override val nrows:Int, override val ncols:Int, override val
def update(i:Int, jv:Mat, b:Node):NodeMat = NodeMat(_update(IMat.ielem(i), jv.asInstanceOf[IMat], b))
- def ccMatOp(b: NodeMat, f:(Node, Node) => Node, old:NodeMat) = NodeMat(ggMatOp(b, f, old))
-
- def ccMatOpScalar(b: Node, f:(Node, Node) => Node, old:NodeMat) = NodeMat(ggMatOpScalar(b, f, old))
-
- def ccReduceOp(n:Int, f1:(Node) => Node, f2:(Node, Node) => Node, old:NodeMat) = NodeMat(ggReduceOp(n, f1, f2, old))
+ def ccMatOp(b: NodeMat, f:(Node, Node) => Node, old:NodeMat) = NodeMat(ggMatOp(b, f, old))
+
+ def ccMatOpScalar(b: Node, f:(Node, Node) => Node, old:NodeMat) = NodeMat(ggMatOpScalar(b, f, old))
+
+ def ccReduceOp(n:Int, f1:(Node) => Node, f2:(Node, Node) => Node, old:NodeMat) = NodeMat(ggReduceOp(n, f1, f2, old))
def map(f: Node => Layer) = {
- val out = LayerMat(nrows, ncols);
+ val out = LayerMat(nrows, ncols)
for (i <- 0 until length) {
- out(i) = f(data(i));
+ out(i) = f(data(i))
}
- out;
+ out
}
def rebuildMap = {
- nodeMap = new HashMap[Node,Int]();
- for (i <- 0 until data.length) {
- nodeMap(data(i)) = i;
- }
+ nodeMap = new HashMap[Node,Int]()
+ for (i <- 0 until data.length) {
+ nodeMap(data(i)) = i
+ }
}
def alphaCoords(nodeTerm:NodeTerm) = {
- if (nodeTerm == null) {
- "null"
- } else {
- val node = nodeTerm.node;
- val term = nodeTerm.term;
- if (nodeMap == null) {
- rebuildMap;
- }
+ if (nodeTerm == null) {
+ "null"
+ } else {
+ val node = nodeTerm.node
+ val term = nodeTerm.term
+ if (nodeMap == null) {
+ rebuildMap
+ }
if (nodeMap.contains(node)) {
- val i = nodeMap(node);
- if (data(i) != node) rebuildMap;
- val coli = i / nrows;
- val rowi = i - coli * nrows;
- val v:Int = 'A';
- val coli0 = coli % 26;
- val ch0 = Character.toChars(v + coli0)(0).toString;
- val ch = if (coli < 26) {
- ch0;
- } else {
- val ch1 = Character.toChars(v + coli0/26)(0).toString;
- ch1 + ch0;
- }
- val ostr = ch + rowi.toString;
- if (term == 0) {
- ostr;
- } else {
- ostr + "[" + term.toString + "]";
- }
+ val i = nodeMap(node)
+ if (data(i) != node) rebuildMap
+ val coli = i / nrows
+ val rowi = i - coli * nrows
+ val v:Int = 'A'
+ val coli0 = coli % 26
+ val ch0 = Character.toChars(v + coli0)(0).toString
+ val ch = if (coli < 26) {
+ ch0
+ } else {
+ val ch1 = Character.toChars(v + coli0/26)(0).toString
+ ch1 + ch0
+ }
+ val ostr = ch + rowi.toString;
+ if (term == 0) {
+ ostr
+ } else {
+ ostr + "[" + term.toString + "]"
+ }
} else {
"<==="
}
}
}
-
- override def printOne(i:Int):String = {
- val v = data(i)
- if (v != null) {
- val ostring = v.inputs.map(alphaCoords(_)).reduce(_+","+_);
- v.toString() + "(" + ostring +")";
+
+ override def printOne(i:Int):String = {
+ val v = data(i)
+ if (v != null) {
+ val ostring = v.inputs.map(alphaCoords(_)).reduce(_+","+_)
+ v.toString() + "(" + ostring +")"
}
- else
- ""
- }
-
- def \ (b: NodeMat) = horzcat(b);
- def \ (b: Node) = horzcat(NodeMat.elem(b))
- def on (b: NodeMat) = vertcat(b)
- def on (b: Node) = vertcat(NodeMat.elem(b))
+ else
+ ""
+ }
+
+ def \ (b: NodeMat) = horzcat(b)
+ def \ (b: Node) = horzcat(NodeMat.elem(b))
+ def on (b: NodeMat) = vertcat(b)
+ def on (b: Node) = vertcat(NodeMat.elem(b))
}
object NodeMat {
@@ -155,10 +155,10 @@ object NodeMat {
def apply(a:List[Node]) = new NodeMat(1, a.length, a.toArray)
def elem(x:Node) = {
- val out = NodeMat(1,1)
- out.data(0) = x
- out
- }
+ val out = NodeMat(1,1)
+ out.data(0) = x
+ out
+ }
}
diff --git a/src/main/scala/BIDMach/networks/layers/NodeSet.scala b/src/main/scala/BIDMach/networks/layers/NodeSet.scala
index 6e8c4c0f..dce96efd 100644
--- a/src/main/scala/BIDMach/networks/layers/NodeSet.scala
+++ b/src/main/scala/BIDMach/networks/layers/NodeSet.scala
@@ -2,26 +2,26 @@ package BIDMach.networks.layers
class NodeSet(val nnodes:Int, val nodes:Array[Node]) extends Serializable {
- def this(nnodes:Int) = this(nnodes, new Array[Node](nnodes));
+ def this(nnodes:Int) = this(nnodes, new Array[Node](nnodes))
- def this(nodes:Array[Node]) = this(nodes.length, nodes);
+ def this(nodes:Array[Node]) = this(nodes.length, nodes)
- def apply(i:Int):Node = nodes(i);
+ def apply(i:Int):Node = nodes(i)
def update(i:Int, lopts:Node) = {nodes(i) = lopts; this}
- override def clone = copyTo(new NodeSet(nnodes));
+ override def clone = copyTo(new NodeSet(nnodes))
def copyTo(lopts:NodeSet):NodeSet = {
for (i <- 0 until nnodes) {
- lopts.nodes(i) = nodes(i).clone;
- nodes(i).myGhost = lopts.nodes(i);
+ lopts.nodes(i) = nodes(i).clone
+ nodes(i).myGhost = lopts.nodes(i)
}
for (i <- 0 until nnodes) {
for (j <- 0 until nodes(i).inputs.length) {
- if (nodes(i).inputs(j) != null) lopts.nodes(i).inputs(j) = nodes(i).inputs(j).node.myGhost;
+ if (nodes(i).inputs(j) != null) lopts.nodes(i).inputs(j) = nodes(i).inputs(j).node.myGhost
}
}
- lopts;
+ lopts
}
}
diff --git a/src/main/scala/BIDMach/networks/layers/NormLayer.scala b/src/main/scala/BIDMach/networks/layers/NormLayer.scala
index 27e11dc1..1af505ae 100644
--- a/src/main/scala/BIDMach/networks/layers/NormLayer.scala
+++ b/src/main/scala/BIDMach/networks/layers/NormLayer.scala
@@ -10,8 +10,8 @@ import BIDMach.models._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
@@ -21,26 +21,26 @@ import BIDMach.networks._
*/
class NormLayer(override val net:Net, override val opts:NormNodeOpts = new NormNode) extends Layer(net, opts) {
- var sconst:Mat = null;
+ var sconst:Mat = null
override def forward = {
- val start = toc;
- createOutput;
- output <-- inputData;
- clearDeriv;
- forwardtime += toc - start;
+ val start = toc
+ createOutput
+ output <-- inputData
+ clearDeriv
+ forwardtime += toc - start
}
override def backward = {
- val start = toc;
+ val start = toc
if (inputDeriv.asInstanceOf[AnyRef] != null) {
- if (sconst.asInstanceOf[AnyRef] == null) sconst = output.zeros(1,1);
- sconst.set(math.min(0.1f, math.max(-0.1f, (opts.targetNorm - norm(output.asMat)/output.length).toFloat * opts.weight)));
- inputDeriv = output + 0f;
- inputDeriv.asMat ~ output.asMat ∘ sconst;
- inputDeriv ~ inputDeriv + deriv;
+ if (sconst.asInstanceOf[AnyRef] == null) sconst = output.zeros(1,1)
+ sconst.set(math.min(0.1f, math.max(-0.1f, (opts.targetNorm - norm(output.asMat)/output.length).toFloat * opts.weight)))
+ inputDeriv = output + 0f
+ inputDeriv.asMat ~ output.asMat ∘ sconst
+ inputDeriv ~ inputDeriv + deriv;
}
- backwardtime += toc - start;
+ backwardtime += toc - start
}
override def toString = {
@@ -49,37 +49,37 @@ class NormLayer(override val net:Net, override val opts:NormNodeOpts = new NormN
}
trait NormNodeOpts extends NodeOpts {
- var targetNorm = 1f;
- var weight = 1f;
-
- def copyOpts(opts:NormNodeOpts):NormNodeOpts = {
- super.copyOpts(opts);
- opts.targetNorm = targetNorm;
- opts.weight = weight;
- opts;
+ var targetNorm = 1f
+ var weight = 1f
+
+ def copyOpts(opts:NormNodeOpts):NormNodeOpts = {
+ super.copyOpts(opts)
+ opts.targetNorm = targetNorm
+ opts.weight = weight
+ opts
}
}
class NormNode extends Node with NormNodeOpts {
- def copyTo(opts:NormNode):NormNode = {
- this.asInstanceOf[Node].copyTo(opts);
- copyOpts(opts);
- opts
- }
+ def copyTo(opts:NormNode):NormNode = {
+ this.asInstanceOf[Node].copyTo(opts)
+ copyOpts(opts)
+ opts
+ }
- override def clone:NormNode = {copyTo(new NormNode).asInstanceOf[NormNode];};
+ override def clone:NormNode = {copyTo(new NormNode).asInstanceOf[NormNode];}
- override def create(net:Net):NormLayer = {NormLayer(net, this);}
+ override def create(net:Net):NormLayer = {NormLayer(net, this);}
- override def toString = {
- "norm@"+Integer.toHexString(hashCode % 0x10000).toString
- }
+ override def toString = {
+ "norm@"+Integer.toHexString(hashCode % 0x10000).toString
+ }
}
object NormLayer {
- def apply(net:Net) = new NormLayer(net, new NormNode);
+ def apply(net:Net) = new NormLayer(net, new NormNode)
def apply(net:Net, opts:NormNode) = new NormLayer(net, opts);
}
diff --git a/src/main/scala/BIDMach/networks/layers/OnehotLayer.scala b/src/main/scala/BIDMach/networks/layers/OnehotLayer.scala
index 85cc0498..a7851cea 100644
--- a/src/main/scala/BIDMach/networks/layers/OnehotLayer.scala
+++ b/src/main/scala/BIDMach/networks/layers/OnehotLayer.scala
@@ -10,8 +10,8 @@ import BIDMach.models._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
/*
* Designed to map linear integer feature arrays to sparse matrices. Doesnt deal with derivatives.
@@ -20,9 +20,9 @@ import BIDMach.networks._
class OnehotLayer(override val net:Net, override val opts:OnehotNodeOpts = new OnehotNode) extends Layer(net, opts) {
override def forward = {
- val start = toc;
- output = oneHot(inputData.asMat);
- forwardtime += toc - start;
+ val start = toc
+ output = oneHot(inputData.asMat)
+ forwardtime += toc - start
}
override def toString = {
@@ -35,7 +35,7 @@ trait OnehotNodeOpts extends NodeOpts {
class OnehotNode extends Node with OnehotNodeOpts {
- override def clone:OnehotNode = {copyTo(new OnehotNode).asInstanceOf[OnehotNode];}
+ override def clone:OnehotNode = {copyTo(new OnehotNode).asInstanceOf[OnehotNode];}
override def create(net:Net):OnehotLayer = {OnehotLayer(net, this);}
@@ -46,7 +46,7 @@ class OnehotNode extends Node with OnehotNodeOpts {
object OnehotLayer {
- def apply(net:Net) = new OnehotLayer(net, new OnehotNode);
+ def apply(net:Net) = new OnehotLayer(net, new OnehotNode)
- def apply(net:Net, opts:OnehotNode) = new OnehotLayer(net, opts);
-}
\ No newline at end of file
+ def apply(net:Net, opts:OnehotNode) = new OnehotLayer(net, opts)
+}
\ No newline at end of file
diff --git a/src/main/scala/BIDMach/networks/layers/RectLayer.scala b/src/main/scala/BIDMach/networks/layers/RectLayer.scala
index 6dd4f905..12312e9f 100644
--- a/src/main/scala/BIDMach/networks/layers/RectLayer.scala
+++ b/src/main/scala/BIDMach/networks/layers/RectLayer.scala
@@ -10,8 +10,8 @@ import BIDMach.models._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
@@ -21,19 +21,19 @@ import BIDMach.networks._
*/
class RectLayer(override val net:Net, override val opts:RectNodeOpts = new RectNode) extends Layer(net, opts) {
- override def forward = {
- val start = toc;
- createOutput;
- output.asMat <-- max(inputData.asMat, 0f);
- clearDeriv;
- forwardtime += toc - start;
- }
+ override def forward = {
+ val start = toc
+ createOutput
+ output.asMat <-- max(inputData.asMat, 0f)
+ clearDeriv
+ forwardtime += toc - start
+ }
- override def backward = {
- val start = toc;
- if (inputDeriv.asInstanceOf[AnyRef] != null) inputDeriv ~ inputDeriv + (deriv ∘ (inputData > 0f));
- backwardtime += toc - start;
- }
+ override def backward = {
+ val start = toc
+ if (inputDeriv.asInstanceOf[AnyRef] != null) inputDeriv ~ inputDeriv + (deriv ∘ (inputData > 0f))
+ backwardtime += toc - start
+ }
override def toString = {
"rect@"+Integer.toHexString(hashCode % 0x10000).toString
@@ -45,16 +45,16 @@ trait RectNodeOpts extends NodeOpts {
class RectNode extends Node with RectNodeOpts {
def copyTo(opts:RectNode):RectNode = {
- super.copyTo(opts);
- opts;
+ super.copyTo(opts)
+ opts
}
override def clone:RectNode = {
- copyTo(new RectNode);
+ copyTo(new RectNode)
}
override def create(net:Net):RectLayer = {
- RectLayer(net, this);
+ RectLayer(net, this)
}
override def toString = {
@@ -64,7 +64,7 @@ class RectNode extends Node with RectNodeOpts {
object RectLayer {
- def apply(net:Net) = new RectLayer(net, new RectNode);
+ def apply(net:Net) = new RectLayer(net, new RectNode)
- def apply(net:Net, opts:RectNodeOpts) = new RectLayer(net, opts);
+ def apply(net:Net, opts:RectNodeOpts) = new RectLayer(net, opts)
}
diff --git a/src/main/scala/BIDMach/networks/layers/SigmoidLayer.scala b/src/main/scala/BIDMach/networks/layers/SigmoidLayer.scala
index cd062455..82e5c41b 100644
--- a/src/main/scala/BIDMach/networks/layers/SigmoidLayer.scala
+++ b/src/main/scala/BIDMach/networks/layers/SigmoidLayer.scala
@@ -10,8 +10,8 @@ import BIDMach.models._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
@@ -22,17 +22,17 @@ import BIDMach.networks._
class SigmoidLayer(override val net:Net, override val opts:SigmoidNodeOpts = new SigmoidNode) extends Layer(net, opts) {
override def forward = {
- val start = toc;
- createOutput;
- LayerFn.applyfwd(inputData, output, LayerFn.SIGMOIDFN);
- clearDeriv;
- forwardtime += toc - start;
+ val start = toc
+ createOutput
+ LayerFn.applyfwd(inputData, output, LayerFn.SIGMOIDFN)
+ clearDeriv
+ forwardtime += toc - start
}
override def backward = {
- val start = toc;
- if (inputDeriv.asInstanceOf[AnyRef] != null) inputDeriv ~ inputDeriv + LayerFn.applyderiv(output, deriv, LayerFn.SIGMOIDFN);
- backwardtime += toc - start;
+ val start = toc
+ if (inputDeriv.asInstanceOf[AnyRef] != null) inputDeriv ~ inputDeriv + LayerFn.applyderiv(output, deriv, LayerFn.SIGMOIDFN)
+ backwardtime += toc - start
}
override def toString = {
@@ -46,7 +46,7 @@ trait SigmoidNodeOpts extends NodeOpts {
class SigmoidNode extends Node with SigmoidNodeOpts {
- override def clone:SigmoidNode = {copyTo(new SigmoidNode).asInstanceOf[SigmoidNode];}
+ override def clone:SigmoidNode = {copyTo(new SigmoidNode).asInstanceOf[SigmoidNode];}
override def create(net:Net):SigmoidLayer = {SigmoidLayer(net, this);}
@@ -57,7 +57,7 @@ class SigmoidNode extends Node with SigmoidNodeOpts {
object SigmoidLayer {
- def apply(net:Net) = new SigmoidLayer(net, new SigmoidNode);
+ def apply(net:Net) = new SigmoidLayer(net, new SigmoidNode)
- def apply(net:Net, opts:SigmoidNode) = new SigmoidLayer(net, opts);
+ def apply(net:Net, opts:SigmoidNode) = new SigmoidLayer(net, opts)
}
diff --git a/src/main/scala/BIDMach/networks/layers/SoftmaxLayer.scala b/src/main/scala/BIDMach/networks/layers/SoftmaxLayer.scala
index 8907fd29..a41f11a7 100644
--- a/src/main/scala/BIDMach/networks/layers/SoftmaxLayer.scala
+++ b/src/main/scala/BIDMach/networks/layers/SoftmaxLayer.scala
@@ -10,8 +10,8 @@ import BIDMach.models._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
@@ -20,26 +20,26 @@ import BIDMach.networks._
*/
class SoftmaxLayer(override val net:Net, override val opts:SoftmaxNodeOpts = new SoftmaxNode) extends Layer(net, opts) {
- var coloffsets:Mat = null;
+ var coloffsets:Mat = null
- override def forward = {
- val start = toc;
- createOutput;
- val exps = exp(inputData.asMat - maxi(inputData.asMat)); // ensures sum(exps) is between 1 and nfeats
- output.asMat ~ exps / sum(exps);
- clearDeriv;
- forwardtime += toc - start;
- }
+ override def forward = {
+ val start = toc
+ createOutput
+ val exps = exp(inputData.asMat - maxi(inputData.asMat)); // ensures sum(exps) is between 1 and nfeats
+ output.asMat ~ exps / sum(exps)
+ clearDeriv
+ forwardtime += toc - start
+ }
- override def backward = {
- val start = toc;
- val exps = exp(inputData.asMat - maxi(inputData.asMat));
- val sumexps = sum(exps);
- val isum = 1f / (sumexps ∘ sumexps);
- if (inputDeriv.asInstanceOf[AnyRef] != null)
- inputDeriv.asMat ~ inputDeriv.asMat + (((exps / sumexps) ∘ deriv.asMat) - (exps ∘ (isum ∘ (exps ∙ deriv.asMat))));
- backwardtime += toc - start;
- }
+ override def backward = {
+ val start = toc
+ val exps = exp(inputData.asMat - maxi(inputData.asMat))
+ val sumexps = sum(exps)
+ val isum = 1f / (sumexps ∘ sumexps)
+ if (inputDeriv.asInstanceOf[AnyRef] != null)
+ inputDeriv.asMat ~ inputDeriv.asMat + (((exps / sumexps) ∘ deriv.asMat) - (exps ∘ (isum ∘ (exps ∙ deriv.asMat))))
+ backwardtime += toc - start
+ }
override def toString = {
"softmax@"+Integer.toHexString(hashCode % 0x10000).toString
@@ -52,7 +52,7 @@ trait SoftmaxNodeOpts extends NodeOpts {
class SoftmaxNode extends Node with SoftmaxNodeOpts {
- override def clone:SoftmaxNode = {copyTo(new SoftmaxNode).asInstanceOf[SoftmaxNode];};
+ override def clone:SoftmaxNode = {copyTo(new SoftmaxNode).asInstanceOf[SoftmaxNode];}
override def create(net:Net):SoftmaxLayer = {SoftmaxLayer(net, this);}
@@ -63,7 +63,7 @@ class SoftmaxNode extends Node with SoftmaxNodeOpts {
object SoftmaxLayer {
- def apply(net:Net) = new SoftmaxLayer(net, new SoftmaxNode);
+ def apply(net:Net) = new SoftmaxLayer(net, new SoftmaxNode)
- def apply(net:Net, opts:SoftmaxNode) = new SoftmaxLayer(net, opts);
+ def apply(net:Net, opts:SoftmaxNode) = new SoftmaxLayer(net, opts)
}
diff --git a/src/main/scala/BIDMach/networks/layers/SoftmaxOutputLayer.scala b/src/main/scala/BIDMach/networks/layers/SoftmaxOutputLayer.scala
index ee91f4a9..e3f11af2 100644
--- a/src/main/scala/BIDMach/networks/layers/SoftmaxOutputLayer.scala
+++ b/src/main/scala/BIDMach/networks/layers/SoftmaxOutputLayer.scala
@@ -10,8 +10,8 @@ import BIDMach.models._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
@@ -20,49 +20,49 @@ import BIDMach.networks._
*/
class SoftmaxOutputLayer(override val net:Net, override val opts:SoftmaxOutputNodeOpts = new SoftmaxOutputNode) extends Layer(net, opts) with OutputLayer {
- var coloffsets:Mat = null;
- var zero:Mat = null;
+ var coloffsets:Mat = null
+ var zero:Mat = null
override def forward = {
- val start = toc;
- createOutput;
+ val start = toc
+ createOutput
output.asMat ~ inputData.asMat - maxi(inputData.asMat)
exp(output.asMat, output.asMat); // ensures sum(exps) is between 1 and nfeats
- output.asMat ~ output.asMat / sum(output.asMat);
- clearDeriv;
- forwardtime += toc - start;
+ output.asMat ~ output.asMat / sum(output.asMat)
+ clearDeriv
+ forwardtime += toc - start
}
override def backward = {
- val start = toc;
- if (coloffsets.asInstanceOf[AnyRef] == null) coloffsets = convertMat(irow(0->output.ncols)*output.nrows);
- if (inputDeriv.asInstanceOf[AnyRef] != null) {
- if (zero.asInstanceOf[AnyRef] == null) zero = convertMat(row(0f));
- deriv.asMat ~ zero - output.asMat;
- val inds = target + coloffsets;
- deriv.asMat(inds) = deriv.asMat(inds) + 1f; // deriv = target - preds
+ val start = toc
+ if (coloffsets.asInstanceOf[AnyRef] == null) coloffsets = convertMat(irow(0->output.ncols)*output.nrows)
+ if (inputDeriv.asInstanceOf[AnyRef] != null) {
+ if (zero.asInstanceOf[AnyRef] == null) zero = convertMat(row(0f))
+ deriv.asMat ~ zero - output.asMat
+ val inds = target + coloffsets
+ deriv.asMat(inds) = deriv.asMat(inds) + 1f; // deriv = target - preds
inputDeriv ~ inputDeriv + deriv;
}
- backwardtime += toc - start;
+ backwardtime += toc - start
}
override def score:FMat = {
- if (coloffsets.asInstanceOf[AnyRef] == null) coloffsets = convertMat(irow(0->output.ncols)*output.nrows);
- val inds = target + coloffsets;
+ if (coloffsets.asInstanceOf[AnyRef] == null) coloffsets = convertMat(irow(0->output.ncols)*output.nrows)
+ val inds = target + coloffsets
if (opts.scoreType == 1) {
if (opts.doVariance) {
- val matches = (output(inds) == maxi(output.asMat));
- FMat(mean(matches)) on FMat(variance(matches));
+ val matches = (output(inds) == maxi(output.asMat))
+ FMat(mean(matches)) on FMat(variance(matches))
} else {
- FMat(mean(output(inds) == maxi(output.asMat)));
+ FMat(mean(output(inds) == maxi(output.asMat)))
}
} else {
- if (opts.doVariance) {
- val out = ln(output(inds));
- FMat(mean(out)) on FMat(variance(out));
- } else {
- FMat(mean(ln(output(inds))));
- }
+ if (opts.doVariance) {
+ val out = ln(output(inds))
+ FMat(mean(out)) on FMat(variance(out))
+ } else {
+ FMat(mean(ln(output(inds))));
+ }
}
}
@@ -72,28 +72,28 @@ class SoftmaxOutputLayer(override val net:Net, override val opts:SoftmaxOutputNo
}
trait SoftmaxOutputNodeOpts extends NodeOpts {
- var scoreType = 0;
- var doVariance = false;
-
- def copyOpts(opts:SoftmaxOutputNodeOpts):SoftmaxOutputNodeOpts = {
- super.copyOpts(opts);
- opts.scoreType = scoreType;
- opts.doVariance = doVariance;
- opts;
- }
+ var scoreType = 0
+ var doVariance = false
+
+ def copyOpts(opts:SoftmaxOutputNodeOpts):SoftmaxOutputNodeOpts = {
+ super.copyOpts(opts)
+ opts.scoreType = scoreType
+ opts.doVariance = doVariance
+ opts
+ }
}
class SoftmaxOutputNode extends Node with SoftmaxOutputNodeOpts {
def copyTo(opts:SoftmaxOutputNode):SoftmaxOutputNode = {
- this.asInstanceOf[Node].copyTo(opts);
- copyOpts(opts);
+ this.asInstanceOf[Node].copyTo(opts)
+ copyOpts(opts)
opts
}
- override def clone:SoftmaxOutputNode = {copyTo(new SoftmaxOutputNode).asInstanceOf[SoftmaxOutputNode];}
+ override def clone:SoftmaxOutputNode = {copyTo(new SoftmaxOutputNode).asInstanceOf[SoftmaxOutputNode];}
- override def create(net:Net):SoftmaxOutputLayer = {SoftmaxOutputLayer(net, this);}
+ override def create(net:Net):SoftmaxOutputLayer = {SoftmaxOutputLayer(net, this);}
override def toString = {
"softmaxout@"+Integer.toHexString(hashCode % 0x10000).toString
@@ -102,7 +102,7 @@ class SoftmaxOutputNode extends Node with SoftmaxOutputNodeOpts {
object SoftmaxOutputLayer {
- def apply(net:Net) = new SoftmaxOutputLayer(net, new SoftmaxOutputNode);
+ def apply(net:Net) = new SoftmaxOutputLayer(net, new SoftmaxOutputNode)
- def apply(net:Net, opts:SoftmaxOutputNode) = new SoftmaxOutputLayer(net, opts);
+ def apply(net:Net, opts:SoftmaxOutputNode) = new SoftmaxOutputLayer(net, opts)
}
diff --git a/src/main/scala/BIDMach/networks/layers/SoftplusLayer.scala b/src/main/scala/BIDMach/networks/layers/SoftplusLayer.scala
index 07fa424f..3adf558e 100644
--- a/src/main/scala/BIDMach/networks/layers/SoftplusLayer.scala
+++ b/src/main/scala/BIDMach/networks/layers/SoftplusLayer.scala
@@ -10,8 +10,8 @@ import BIDMach.models._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
@@ -20,20 +20,20 @@ import BIDMach.networks._
*/
class SoftplusLayer(override val net:Net, override val opts:SoftplusNodeOpts = new SoftplusNode) extends Layer(net, opts) {
- var totflops = 0L;
+ var totflops = 0L
override def forward = {
- val start = toc;
- createOutput;
- LayerFn.applyfwd(inputData, output, LayerFn.SOFTPLUSFN);
- clearDeriv;
- forwardtime += toc - start;
+ val start = toc
+ createOutput
+ LayerFn.applyfwd(inputData, output, LayerFn.SOFTPLUSFN)
+ clearDeriv
+ forwardtime += toc - start
}
override def backward = {
- val start = toc;
- if (inputDeriv.asInstanceOf[AnyRef] != null) inputDeriv ~ inputDeriv + LayerFn.applyderiv(inputData, deriv, LayerFn.SOFTPLUSFN);
- backwardtime += toc - start;
+ val start = toc
+ if (inputDeriv.asInstanceOf[AnyRef] != null) inputDeriv ~ inputDeriv + LayerFn.applyderiv(inputData, deriv, LayerFn.SOFTPLUSFN)
+ backwardtime += toc - start
}
override def toString = {
@@ -46,7 +46,7 @@ trait SoftplusNodeOpts extends NodeOpts {
class SoftplusNode extends Node with SoftplusNodeOpts {
- override def clone:SoftplusNode = {copyTo(new SoftplusNode).asInstanceOf[SoftplusNode];}
+ override def clone:SoftplusNode = {copyTo(new SoftplusNode).asInstanceOf[SoftplusNode];}
override def create(net:Net):SoftplusLayer = {SoftplusLayer(net, this);}
@@ -57,8 +57,8 @@ class SoftplusNode extends Node with SoftplusNodeOpts {
object SoftplusLayer {
- def apply(net:Net) = new SoftplusLayer(net, new SoftplusNode);
+ def apply(net:Net) = new SoftplusLayer(net, new SoftplusNode)
- def apply(net:Net, opts:SoftplusNode) = new SoftplusLayer(net, opts);
+ def apply(net:Net, opts:SoftplusNode) = new SoftplusLayer(net, opts)
}
diff --git a/src/main/scala/BIDMach/networks/layers/SplitHorizLayer.scala b/src/main/scala/BIDMach/networks/layers/SplitHorizLayer.scala
index 0b1d3d88..c5692b2a 100644
--- a/src/main/scala/BIDMach/networks/layers/SplitHorizLayer.scala
+++ b/src/main/scala/BIDMach/networks/layers/SplitHorizLayer.scala
@@ -10,39 +10,39 @@ import BIDMach.models._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
class SplitHorizLayer(override val net:Net, override val opts:SplitHorizNodeOpts = new SplitHorizNode) extends Layer(net, opts) {
- override val _outputs = new Array[ND](opts.nparts);
- override val _derivs = new Array[ND](opts.nparts);
- var nblock:Int = 0;
- var colranges = new Array[Mat](opts.nparts);
+ override val _outputs = new Array[ND](opts.nparts)
+ override val _derivs = new Array[ND](opts.nparts)
+ var nblock:Int = 0
+ var colranges = new Array[Mat](opts.nparts)
override def forward = {
- val start = toc;
- if (output.asInstanceOf[AnyRef] == null) {
- nblock = inputData.ncols / opts.nparts;
- for (i <- 0 until opts.nparts) {
- colranges(i) = convertMat(irow((i*nblock)->((i+1)*nblock)));
- }
- }
- for (i <- 0 until opts.nparts) {
- setOutput(i, inputData.colslice(i*nblock, (i+1)* nblock));
- }
- clearDerivs;
- forwardtime += toc - start;
+ val start = toc
+ if (output.asInstanceOf[AnyRef] == null) {
+ nblock = inputData.ncols / opts.nparts
+ for (i <- 0 until opts.nparts) {
+ colranges(i) = convertMat(irow((i*nblock)->((i+1)*nblock)))
+ }
+ }
+ for (i <- 0 until opts.nparts) {
+ setOutput(i, inputData.colslice(i*nblock, (i+1)* nblock))
+ }
+ clearDerivs
+ forwardtime += toc - start
}
override def backward = {
- val start = toc;
- if (inputDeriv.asInstanceOf[AnyRef] != null) {
- for (i <- 0 until opts.nparts) {
- inputDeriv(?, colranges(i)) = inputDeriv(?, colranges(i)) + derivs(i);
- }
- }
- backwardtime += toc - start;
+ val start = toc
+ if (inputDeriv.asInstanceOf[AnyRef] != null) {
+ for (i <- 0 until opts.nparts) {
+ inputDeriv(?, colranges(i)) = inputDeriv(?, colranges(i)) + derivs(i)
+ }
+ }
+ backwardtime += toc - start
}
override def toString = {
@@ -51,12 +51,12 @@ class SplitHorizLayer(override val net:Net, override val opts:SplitHorizNodeOpts
}
trait SplitHorizNodeOpts extends NodeOpts {
- var nparts = 1;
+ var nparts = 1
}
class SplitHorizNode extends Node with SplitHorizNodeOpts {
- override def clone:SplitHorizNode = {copyTo(new SplitHorizNode).asInstanceOf[SplitHorizNode];}
+ override def clone:SplitHorizNode = {copyTo(new SplitHorizNode).asInstanceOf[SplitHorizNode];}
override def create(net:Net):SplitHorizLayer = {SplitHorizLayer(net, this);}
@@ -67,7 +67,7 @@ class SplitHorizNode extends Node with SplitHorizNodeOpts {
object SplitHorizLayer {
- def apply(net:Net) = new SplitHorizLayer(net, new SplitHorizNode);
+ def apply(net:Net) = new SplitHorizLayer(net, new SplitHorizNode)
- def apply(net:Net, opts:SplitHorizNode) = new SplitHorizLayer(net, opts);
-}
\ No newline at end of file
+ def apply(net:Net, opts:SplitHorizNode) = new SplitHorizLayer(net, opts)
+}
\ No newline at end of file
diff --git a/src/main/scala/BIDMach/networks/layers/SplitVertLayer.scala b/src/main/scala/BIDMach/networks/layers/SplitVertLayer.scala
index cdb6fb06..010970c5 100644
--- a/src/main/scala/BIDMach/networks/layers/SplitVertLayer.scala
+++ b/src/main/scala/BIDMach/networks/layers/SplitVertLayer.scala
@@ -10,39 +10,39 @@ import BIDMach.models._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
class SplitVertLayer(override val net:Net, override val opts:SplitVertNodeOpts = new SplitVertNode) extends Layer(net, opts) {
- override val _outputs = new Array[ND](opts.nparts);
- override val _derivs = new Array[ND](opts.nparts);
- var nblock:Int = 0;
- var rowranges = new Array[Mat](opts.nparts);
+ override val _outputs = new Array[ND](opts.nparts)
+ override val _derivs = new Array[ND](opts.nparts)
+ var nblock:Int = 0
+ var rowranges = new Array[Mat](opts.nparts)
override def forward = {
- val start = toc;
- if (output.asInstanceOf[AnyRef] == null) {
- nblock = inputData.nrows / opts.nparts;
- for (i <- 0 until opts.nparts) {
- rowranges(i) = convertMat(icol((i*nblock)->((i+1)*nblock)));
- }
- }
- for (i <- 0 until opts.nparts) {
- setOutput(i, inputData(rowranges(i), ?));
- }
- clearDerivs;
- forwardtime += toc - start;
+ val start = toc
+ if (output.asInstanceOf[AnyRef] == null) {
+ nblock = inputData.nrows / opts.nparts
+ for (i <- 0 until opts.nparts) {
+ rowranges(i) = convertMat(icol((i*nblock)->((i+1)*nblock)))
+ }
+ }
+ for (i <- 0 until opts.nparts) {
+ setOutput(i, inputData(rowranges(i), ?))
+ }
+ clearDerivs
+ forwardtime += toc - start
}
override def backward = {
- val start = toc;
- if (inputDeriv.asInstanceOf[AnyRef] != null) {
- for (i <- 0 until opts.nparts) {
- inputDeriv(rowranges(i), ?) = inputDeriv(rowranges(i), ?) + derivs(i);
- }
- }
- backwardtime += toc - start;
+ val start = toc
+ if (inputDeriv.asInstanceOf[AnyRef] != null) {
+ for (i <- 0 until opts.nparts) {
+ inputDeriv(rowranges(i), ?) = inputDeriv(rowranges(i), ?) + derivs(i)
+ }
+ }
+ backwardtime += toc - start
}
override def toString = {
@@ -56,7 +56,7 @@ trait SplitVertNodeOpts extends NodeOpts {
class SplitVertNode extends Node with SplitVertNodeOpts {
- override def clone:SplitVertNode = {copyTo(new SplitVertNode).asInstanceOf[SplitVertNode];}
+ override def clone:SplitVertNode = {copyTo(new SplitVertNode).asInstanceOf[SplitVertNode];}
override def create(net:Net):SplitVertLayer = {SplitVertLayer(net, this);}
@@ -67,7 +67,7 @@ class SplitVertNode extends Node with SplitVertNodeOpts {
object SplitVertLayer {
- def apply(net:Net) = new SplitVertLayer(net, new SplitVertNode);
+ def apply(net:Net) = new SplitVertLayer(net, new SplitVertNode)
- def apply(net:Net, opts:SplitVertNode) = new SplitVertLayer(net, opts);
-}
\ No newline at end of file
+ def apply(net:Net, opts:SplitVertNode) = new SplitVertLayer(net, opts)
+}
\ No newline at end of file
diff --git a/src/main/scala/BIDMach/networks/layers/StackLayer.scala b/src/main/scala/BIDMach/networks/layers/StackLayer.scala
index 6f933d7e..1bd77342 100644
--- a/src/main/scala/BIDMach/networks/layers/StackLayer.scala
+++ b/src/main/scala/BIDMach/networks/layers/StackLayer.scala
@@ -10,41 +10,41 @@ import BIDMach.models._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
class StackLayer(override val net:Net, override val opts:StackNodeOpts = new StackNode) extends Layer(net, opts) {
- override val _inputs = new Array[LayerTerm](opts.ninputs);
+ override val _inputs = new Array[LayerTerm](opts.ninputs)
- var colranges = new Array[Mat](opts.ninputs);
+ var colranges = new Array[Mat](opts.ninputs)
override def forward = {
- val start = toc;
- if (output.asInstanceOf[AnyRef] == null) {
- var orows = 0;
- for (i <- 0 until opts.ninputs) {
- val thisrow = inputDatas(i).nrows;
- colranges(i) = convertMat(irow(orows -> (orows + thisrow)));
- orows += thisrow;
- }
- output = convertMat(zeros(orows \ inputData.ncols));
- }
- for (i <- 0 until opts.ninputs) {
- output.asMat(colranges(i), ?) = inputDatas(i).asMat;
- }
- clearDeriv;
- forwardtime += toc - start;
+ val start = toc
+ if (output.asInstanceOf[AnyRef] == null) {
+ var orows = 0
+ for (i <- 0 until opts.ninputs) {
+ val thisrow = inputDatas(i).nrows
+ colranges(i) = convertMat(irow(orows -> (orows + thisrow)))
+ orows += thisrow
+ }
+ output = convertMat(zeros(orows \ inputData.ncols))
+ }
+ for (i <- 0 until opts.ninputs) {
+ output.asMat(colranges(i), ?) = inputDatas(i).asMat
+ }
+ clearDeriv
+ forwardtime += toc - start
}
override def backward = {
- val start = toc;
- for (i <- 0 until opts.ninputs) {
- if (inputDerivs(i).asInstanceOf[AnyRef] != null) {
- inputDerivs(i) <-- deriv.asMat(colranges(i), ?)
- }
- }
- backwardtime += toc - start;
+ val start = toc
+ for (i <- 0 until opts.ninputs) {
+ if (inputDerivs(i).asInstanceOf[AnyRef] != null) {
+ inputDerivs(i) <-- deriv.asMat(colranges(i), ?)
+ }
+ }
+ backwardtime += toc - start
}
override def toString = {
@@ -54,13 +54,13 @@ class StackLayer(override val net:Net, override val opts:StackNodeOpts = new Sta
trait StackNodeOpts extends NodeOpts {
- var ninputs = 2;
+ var ninputs = 2
}
class StackNode extends Node with StackNodeOpts {
- override val inputs = new Array[NodeTerm](ninputs);
+ override val inputs = new Array[NodeTerm](ninputs)
- override def clone:StackNode = {copyTo(new StackNode).asInstanceOf[StackNode];}
+ override def clone:StackNode = {copyTo(new StackNode).asInstanceOf[StackNode];}
override def create(net:Net):StackLayer = {StackLayer(net, this);}
@@ -71,7 +71,7 @@ class StackNode extends Node with StackNodeOpts {
object StackLayer {
- def apply(net:Net) = new StackLayer(net, new StackNode);
+ def apply(net:Net) = new StackLayer(net, new StackNode)
- def apply(net:Net, opts:StackNode) = new StackLayer(net, opts);
-}
\ No newline at end of file
+ def apply(net:Net, opts:StackNode) = new StackLayer(net, opts)
+}
\ No newline at end of file
diff --git a/src/main/scala/BIDMach/networks/layers/SumLayer.scala b/src/main/scala/BIDMach/networks/layers/SumLayer.scala
index 3ccdf44b..e9a5c896 100644
--- a/src/main/scala/BIDMach/networks/layers/SumLayer.scala
+++ b/src/main/scala/BIDMach/networks/layers/SumLayer.scala
@@ -10,29 +10,29 @@ import BIDMach.models._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
/**
* Sum layer.
*/
class SumLayer(override val net:Net, override val opts:SumNodeOpts = new SumNode) extends Layer(net, opts) {
- var vmap:ND = null;
+ var vmap:ND = null
override def forward = {
- val start = toc;
- createOutput(1 \ inputData.ncols);
- output.asMat <-- sum(inputData.asMat);
- clearDeriv;
- forwardtime += toc - start;
+ val start = toc
+ createOutput(1 \ inputData.ncols)
+ output.asMat <-- sum(inputData.asMat)
+ clearDeriv
+ forwardtime += toc - start
}
override def backward = {
- val start = toc;
- if (vmap.asInstanceOf[AnyRef] == null) vmap = deriv.ones(output.nrows, 1);
- if (inputDeriv.asInstanceOf[AnyRef] != null) inputDeriv ~ inputDeriv + (vmap * deriv);
- backwardtime += toc - start;
+ val start = toc
+ if (vmap.asInstanceOf[AnyRef] == null) vmap = deriv.ones(output.nrows, 1)
+ if (inputDeriv.asInstanceOf[AnyRef] != null) inputDeriv ~ inputDeriv + (vmap * deriv);
+ backwardtime += toc - start
}
override def toString = {
@@ -45,7 +45,7 @@ trait SumNodeOpts extends NodeOpts {
class SumNode extends Node with SumNodeOpts {
- override def clone:SumNode = {copyTo(new SumNode).asInstanceOf[SumNode];}
+ override def clone:SumNode = {copyTo(new SumNode).asInstanceOf[SumNode];}
override def create(net:Net):SumLayer = {SumLayer(net, this);}
@@ -56,7 +56,7 @@ class SumNode extends Node with SumNodeOpts {
object SumLayer {
- def apply(net:Net) = new SumLayer(net, new SumNode);
+ def apply(net:Net) = new SumLayer(net, new SumNode)
- def apply(net:Net, opts:SumNode) = new SumLayer(net, opts);
-}
\ No newline at end of file
+ def apply(net:Net, opts:SumNode) = new SumLayer(net, opts)
+}
\ No newline at end of file
diff --git a/src/main/scala/BIDMach/networks/layers/TanhLayer.scala b/src/main/scala/BIDMach/networks/layers/TanhLayer.scala
index 7cf79d63..0a8baafa 100644
--- a/src/main/scala/BIDMach/networks/layers/TanhLayer.scala
+++ b/src/main/scala/BIDMach/networks/layers/TanhLayer.scala
@@ -10,8 +10,8 @@ import BIDMach.models._
import BIDMach._
import edu.berkeley.bid.CPUMACH
import edu.berkeley.bid.CUMACH
-import scala.util.hashing.MurmurHash3;
-import java.util.HashMap;
+import scala.util.hashing.MurmurHash3
+import java.util.HashMap
import BIDMach.networks._
/**
@@ -20,19 +20,19 @@ import BIDMach.networks._
class TanhLayer(override val net:Net, override val opts:TanhNodeOpts = new TanhNode) extends Layer(net, opts) {
- override def forward = {
- val start = toc;
- createOutput;
- tanh(inputData, output);
- clearDeriv;
- forwardtime += toc - start;
- }
-
- override def backward = {
- val start = toc;
- if (inputDeriv.asInstanceOf[AnyRef] != null) inputDeriv ~ inputDeriv + LayerFn.applyderiv(output, deriv, LayerFn.TANHFN);
- backwardtime += toc - start;
- }
+ override def forward = {
+ val start = toc
+ createOutput
+ tanh(inputData, output)
+ clearDeriv
+ forwardtime += toc - start
+ }
+
+ override def backward = {
+ val start = toc
+ if (inputDeriv.asInstanceOf[AnyRef] != null) inputDeriv ~ inputDeriv + LayerFn.applyderiv(output, deriv, LayerFn.TANHFN)
+ backwardtime += toc - start
+ }
override def toString = {
"tanh@"+Integer.toHexString(hashCode % 0x10000).toString
@@ -44,7 +44,7 @@ trait TanhNodeOpts extends NodeOpts {
class TanhNode extends Node with TanhNodeOpts {
- override def clone:TanhNode = {copyTo(new TanhNode).asInstanceOf[TanhNode];}
+ override def clone:TanhNode = {copyTo(new TanhNode).asInstanceOf[TanhNode];}
override def create(net:Net):TanhLayer = {TanhLayer(net, this);}
@@ -55,7 +55,7 @@ class TanhNode extends Node with TanhNodeOpts {
object TanhLayer {
- def apply(net:Net) = new TanhLayer(net, new TanhNode);
+ def apply(net:Net) = new TanhLayer(net, new TanhNode)
- def apply(net:Net, opts:TanhNode) = new TanhLayer(net, opts);
+ def apply(net:Net, opts:TanhNode) = new TanhLayer(net, opts)
}
diff --git a/src/main/scala/BIDMach/updaters/ADAGrad.scala b/src/main/scala/BIDMach/updaters/ADAGrad.scala
index 3635022b..27cc6d1a 100755
--- a/src/main/scala/BIDMach/updaters/ADAGrad.scala
+++ b/src/main/scala/BIDMach/updaters/ADAGrad.scala
@@ -17,7 +17,7 @@ class ADAGrad(override val opts:ADAGrad.Opts = new ADAGrad.Options) extends Upda
var sumSq:Array[Mat] = null
var stepn:Mat = null
var mask:Mat = null
- var momentum:Array[Mat] = null;
+ var momentum:Array[Mat] = null
var ve:Mat = null
var pe:Mat = null
var te:Mat = null
@@ -28,165 +28,165 @@ class ADAGrad(override val opts:ADAGrad.Opts = new ADAGrad.Options) extends Upda
override def init(model0:Model) = {
model = model0
- modelmats = model.modelmats;
- updatemats = model.updatemats;
- val mm = modelmats(0);
- mask = opts.mask;
- val nmats = modelmats.length;
- sumSq = new Array[Mat](nmats);
- val hasmomentum = (opts.momentum.asInstanceOf[AnyRef] != null || opts.nesterov.asInstanceOf[AnyRef] != null);
- if (hasmomentum) momentum = new Array[Mat](nmats);
+ modelmats = model.modelmats
+ updatemats = model.updatemats
+ val mm = modelmats(0)
+ mask = opts.mask
+ val nmats = modelmats.length
+ sumSq = new Array[Mat](nmats)
+ val hasmomentum = (opts.momentum.asInstanceOf[AnyRef] != null || opts.nesterov.asInstanceOf[AnyRef] != null)
+ if (hasmomentum) momentum = new Array[Mat](nmats)
for (i <- 0 until nmats) {
- sumSq(i) = modelmats(i).ones(modelmats(i).nrows, modelmats(i).ncols) *@ opts.initsumsq
- if (hasmomentum) momentum(i) = modelmats(i).zeros(modelmats(i).nrows, modelmats(i).ncols);
+ sumSq(i) = modelmats(i).ones(modelmats(i).nrows, modelmats(i).ncols) *@ opts.initsumsq
+ if (hasmomentum) momentum(i) = modelmats(i).zeros(modelmats(i).nrows, modelmats(i).ncols)
}
if (opts.langevin > 0) {
- randmat = new Array[Mat](nmats);
- for (i <- 0 until nmats) {
- randmat(i) = modelmats(i).zeros(modelmats(i).nrows, modelmats(i).ncols);
- }
+ randmat = new Array[Mat](nmats)
+ for (i <- 0 until nmats) {
+ randmat(i) = modelmats(i).zeros(modelmats(i).nrows, modelmats(i).ncols)
+ }
}
- stepn = mm.zeros(1,1);
- one = mm.ones(1,1);
- ve = mm.zeros(opts.vexp.nrows, opts.vexp.ncols);
- if (opts.texp.asInstanceOf[AnyRef] != null) te = mm.zeros(opts.texp.nrows, opts.texp.ncols);
- if (opts.pexp.asInstanceOf[AnyRef] != null) pe = mm.zeros(opts.pexp.nrows, opts.pexp.ncols);
- lrate = mm.zeros(opts.lrate.nrows, 1);
- mu = mm.zeros(1,1);
- ve <-- opts.vexp;
- te <-- opts.texp;
+ stepn = mm.zeros(1,1)
+ one = mm.ones(1,1)
+ ve = mm.zeros(opts.vexp.nrows, opts.vexp.ncols)
+ if (opts.texp.asInstanceOf[AnyRef] != null) te = mm.zeros(opts.texp.nrows, opts.texp.ncols)
+ if (opts.pexp.asInstanceOf[AnyRef] != null) pe = mm.zeros(opts.pexp.nrows, opts.pexp.ncols)
+ lrate = mm.zeros(opts.lrate.nrows, 1)
+ mu = mm.zeros(1,1)
+ ve <-- opts.vexp
+ te <-- opts.texp
}
def update2(ipass:Int, step:Long):Unit = {
- modelmats = model.modelmats;
- updatemats = model.updatemats;
- val nsteps = if (step == 0) 1f else {
- if (firstStep == 0f) {
- firstStep = step;
- 1f;
- } else {
- step / firstStep;
- }
- }
- stepn.set(nsteps+1);
- val nw = one / stepn;
- val nmats = math.min(modelmats.length, updatemats.length)
- // println("u2 sumsq %g" format mini(sumSq(0)).dv)
- for (i <- 0 until nmats) {
- val um = updatemats(i);
- val mm = modelmats(i);
- val ss = sumSq(i);
- if (opts.lrate.ncols > 1) {
- lrate <-- opts.lrate(?,i);
- } else {
- lrate <-- opts.lrate;
- }
- val newsquares = um *@ um;
- newsquares ~ newsquares *@ nw;
- ss ~ ss *@ (one - nw);
- ss ~ ss + newsquares;
- if (opts.waitsteps < nsteps) {
- val grad = ss ^ ve;
- if (java.lang.Double.isNaN(sum(sum(grad)).dv)) throw new RuntimeException("ADA0 1 "+i);
- grad ~ grad *@ (stepn ^ te);
- if (java.lang.Double.isNaN(sum(sum(grad)).dv)) throw new RuntimeException("ADA0 2 "+i);
- grad ~ grad + opts.epsilon;
- mm ~ mm + ((um / grad) *@ lrate);
- if (java.lang.Double.isNaN(sum(sum(mm)).dv)) throw new RuntimeException("ADA0 3 "+i);
- if (mask != null) mm ~ mm *@ mask;
- }
- um.clear;
- }
+ modelmats = model.modelmats
+ updatemats = model.updatemats
+ val nsteps = if (step == 0) 1f else {
+ if (firstStep == 0f) {
+ firstStep = step
+ 1f
+ } else {
+ step / firstStep
+ }
+ }
+ stepn.set(nsteps+1)
+ val nw = one / stepn
+ val nmats = math.min(modelmats.length, updatemats.length)
+ // println("u2 sumsq %g" format mini(sumSq(0)).dv)
+ for (i <- 0 until nmats) {
+ val um = updatemats(i)
+ val mm = modelmats(i)
+ val ss = sumSq(i)
+ if (opts.lrate.ncols > 1) {
+ lrate <-- opts.lrate(?,i)
+ } else {
+ lrate <-- opts.lrate
+ }
+ val newsquares = um *@ um
+ newsquares ~ newsquares *@ nw
+ ss ~ ss *@ (one - nw)
+ ss ~ ss + newsquares
+ if (opts.waitsteps < nsteps) {
+ val grad = ss ^ ve
+ if (java.lang.Double.isNaN(sum(sum(grad)).dv)) throw new RuntimeException("ADA0 1 "+i)
+ grad ~ grad *@ (stepn ^ te)
+ if (java.lang.Double.isNaN(sum(sum(grad)).dv)) throw new RuntimeException("ADA0 2 "+i)
+ grad ~ grad + opts.epsilon
+ mm ~ mm + ((um / grad) *@ lrate)
+ if (java.lang.Double.isNaN(sum(sum(mm)).dv)) throw new RuntimeException("ADA0 3 "+i)
+ if (mask != null) mm ~ mm *@ mask
+ }
+ um.clear
+ }
}
-
+
override def update(ipass:Int, step:Long, gprogress:Float):Unit = {
- val start = toc;
+ val start = toc
modelmats = model.modelmats
updatemats = model.updatemats
val nsteps = if (step == 0) 1f else {
if (firstStep == 0f) {
- firstStep = step;
- 1f;
+ firstStep = step
+ 1f
} else {
- step / firstStep;
+ step / firstStep
}
}
val tscale = if (opts.texp.asInstanceOf[AnyRef] != 0) {
- stepn.set(1/(nsteps+1));
- stepn ^ te;
+ stepn.set(1/(nsteps+1))
+ stepn ^ te
} else {
- stepn.set(1f/(ipass+1));
- stepn ^ pe;
+ stepn.set(1f/(ipass+1))
+ stepn ^ pe
}
- val nw = stepn;
- val nmats = math.min(modelmats.length, updatemats.length);
+ val nw = stepn
+ val nmats = math.min(modelmats.length, updatemats.length)
// println("u sumsq %g" format mini(sumSq(0)).dv)
for (i <- 0 until nmats) {
- if (opts.policies.asInstanceOf[AnyRef] != null) {
- if (opts.policies.length > 1) {
- tscale.set(opts.policies(i)(nsteps, gprogress));
- } else {
- tscale.set(opts.policies(0)(nsteps, gprogress));
- }
- }
- val mm = modelmats(i);
- val um = updatemats(i);
- val ss = sumSq(i);
- if (opts.lrate.ncols > 1) {
- lrate <-- opts.lrate(?,i);
- } else {
- lrate <-- opts.lrate;
- }
- (mm, um, ss, ve, tscale, lrate) match {
- case (gmm:GMat, gum:GMat, gss:GMat, gve:GMat, gts:GMat, glrate:GMat) => {
+ if (opts.policies.asInstanceOf[AnyRef] != null) {
+ if (opts.policies.length > 1) {
+ tscale.set(opts.policies(i)(nsteps, gprogress))
+ } else {
+ tscale.set(opts.policies(0)(nsteps, gprogress))
+ }
+ }
+ val mm = modelmats(i)
+ val um = updatemats(i)
+ val ss = sumSq(i)
+ if (opts.lrate.ncols > 1) {
+ lrate <-- opts.lrate(?,i)
+ } else {
+ lrate <-- opts.lrate
+ }
+ (mm, um, ss, ve, tscale, lrate) match {
+ case (gmm:GMat, gum:GMat, gss:GMat, gve:GMat, gts:GMat, glrate:GMat) => {
if (opts.momentum.asInstanceOf[AnyRef] != null) {
- val mu = if (opts.momentum.length > 1) opts.momentum(i) else opts.momentum(0);
- ADAGrad.ADAGradm(gmm, gum, gss, momentum.asInstanceOf[GMat], mu, mask.asInstanceOf[GMat], nw.dv.toFloat, gve, gts, glrate, opts.langevin, opts.epsilon, (opts.waitsteps < nsteps));
+ val mu = if (opts.momentum.length > 1) opts.momentum(i) else opts.momentum(0)
+ ADAGrad.ADAGradm(gmm, gum, gss, momentum.asInstanceOf[GMat], mu, mask.asInstanceOf[GMat], nw.dv.toFloat, gve, gts, glrate, opts.langevin, opts.epsilon, (opts.waitsteps < nsteps))
} else if (opts.nesterov.asInstanceOf[AnyRef] != null) {
- val mu = if (opts.nesterov.length > 1) opts.nesterov(i) else opts.nesterov(0);
- ADAGrad.ADAGradn(gmm, gum, gss, momentum.asInstanceOf[GMat], mu, mask.asInstanceOf[GMat], nw.dv.toFloat, gve, gts, glrate, opts.langevin, opts.epsilon, (opts.waitsteps < nsteps));
+ val mu = if (opts.nesterov.length > 1) opts.nesterov(i) else opts.nesterov(0)
+ ADAGrad.ADAGradn(gmm, gum, gss, momentum.asInstanceOf[GMat], mu, mask.asInstanceOf[GMat], nw.dv.toFloat, gve, gts, glrate, opts.langevin, opts.epsilon, (opts.waitsteps < nsteps))
} else {
- ADAGrad.ADAGradx(gmm, gum, gss, mask.asInstanceOf[GMat], nw.dv.toFloat, gve, gts, glrate, opts.langevin, opts.epsilon, (opts.waitsteps < nsteps));
+ ADAGrad.ADAGradx(gmm, gum, gss, mask.asInstanceOf[GMat], nw.dv.toFloat, gve, gts, glrate, opts.langevin, opts.epsilon, (opts.waitsteps < nsteps))
}
- }
- case _ => {
- val newsquares = um *@ um;
- newsquares ~ newsquares *@ nw;
- ss ~ ss *@ (one - nw);
- ss ~ ss + newsquares;
- if (opts.waitsteps < nsteps) {
- // if (java.lang.Double.isNaN(sum(sum(ss)).dv)) throw new RuntimeException("ADAGrad NaN in sumsquares matrix "+i);
- val grad = ss ^ ve;
- // if (java.lang.Double.isNaN(sum(sum(grad)).dv)) throw new RuntimeException("ADAGrad NaN in scaled sumsquares matrix "+i);
- grad ~ grad + opts.epsilon;
- grad ~ um / grad; // Normalized gradient
+ }
+ case _ => {
+ val newsquares = um *@ um
+ newsquares ~ newsquares *@ nw
+ ss ~ ss *@ (one - nw)
+ ss ~ ss + newsquares
+ if (opts.waitsteps < nsteps) {
+ // if (java.lang.Double.isNaN(sum(sum(ss)).dv)) throw new RuntimeException("ADAGrad NaN in sumsquares matrix "+i)
+ val grad = ss ^ ve
+ // if (java.lang.Double.isNaN(sum(sum(grad)).dv)) throw new RuntimeException("ADAGrad NaN in scaled sumsquares matrix "+i)
+ grad ~ grad + opts.epsilon
+ grad ~ um / grad; // Normalized gradient
if (opts.langevin > 0) { // Add Langevin random permutations
- normrnd(0, opts.langevin, randmat(i));
- grad ~ grad + randmat(i);
+ normrnd(0, opts.langevin, randmat(i))
+ grad ~ grad + randmat(i)
}
- // if (java.lang.Double.isNaN(sum(sum(grad)).dv)) throw new RuntimeException("ADAGrad NaN in gradient quotient in derivative "+i);
- grad ~ grad *@ (tscale *@ lrate); // Basic scaled gradient
+ // if (java.lang.Double.isNaN(sum(sum(grad)).dv)) throw new RuntimeException("ADAGrad NaN in gradient quotient in derivative "+i)
+ grad ~ grad *@ (tscale *@ lrate); // Basic scaled gradient
if (opts.momentum.asInstanceOf[AnyRef] != null) {
- val i0 = if (opts.momentum.length > 1) i else 0;
+ val i0 = if (opts.momentum.length > 1) i else 0
mu <-- opts.momentum(i0); // Get the momentum decay rate
grad ~ grad + momentum(i); // Add momentum to the gradient
- momentum(i) ~ grad *@ mu; // update momentum using the new gradient
+ momentum(i) ~ grad *@ mu; // update momentum using the new gradient
}
if (opts.nesterov.asInstanceOf[AnyRef] != null) {
- val i0 = if (opts.nesterov.length > 1) i else 0;
+ val i0 = if (opts.nesterov.length > 1) i else 0
mu <-- opts.nesterov(i0); // Get the momentum decay rate
grad ~ grad + momentum(i); // Add momentum to the gradient
mm ~ mm - momentum(i); // A bit of algebra, remove old momentum from the model
momentum(i) ~ grad *@ mu; // Update the momentum
- mm ~ mm + momentum(i); // Add the new momentum to the model;
+ mm ~ mm + momentum(i); // Add the new momentum to the model
}
mm ~ mm + grad; // Add full gradient to the model
- if (mask != null) mm ~ mm *@ mask;
- }
- }
- }
+ if (mask != null) mm ~ mm *@ mask
+ }
+ }
+ }
}
- runningtime += toc - start;
+ runningtime += toc - start
}
}
@@ -203,171 +203,171 @@ object ADAGrad {
def multUpdateHelperT(a:FMat, b:SMat, mm:FMat, ssq:FMat, mask:FMat, lrate:FMat, vexp:FMat, texp:FMat,
istep:Float, addgrad:Int, epsilon:Float, ithread:Int, numThreads:Int) = {
- val nr = a.nrows;
- val lrdim = lrate.length;
- val vedim = vexp.length;
- val tedim = texp.length;
- val istart = (1L*ithread*nr/numThreads).toInt;
- val iend = (1L*(ithread+1)*nr/numThreads).toInt;
- val ioff = Mat.ioneBased;
- var i = 0;
+ val nr = a.nrows
+ val lrdim = lrate.length
+ val vedim = vexp.length
+ val tedim = texp.length
+ val istart = (1L*ithread*nr/numThreads).toInt
+ val iend = (1L*(ithread+1)*nr/numThreads).toInt
+ val ioff = Mat.ioneBased
+ var i = 0
while (i < b.ncols) {
- var j = b.jc(i) - ioff;
- while (j < b.jc(i+1)-ioff) {
- val dval = b.data(j);
- val ival = b.ir(j) - ioff;
- var k = istart;
- while (k < iend) {
- val grad = a.data(k+i*nr)*dval;
- ssq.data(k+ival*nr) += grad*grad + epsilon;
- if (addgrad > 0) {
- val lr = if (lrdim > 1) lrate.data(k) else lrate.data(0);
- val ve = if (vedim > 1) vexp.data(k) else vexp.data(0);
- val te = if (tedim > 1) texp.data(k) else texp.data(0);
- val pve = if (ve == 0) 1f else math.pow(ssq.data(k+ival*nr) * istep, ve).toFloat;
- val ste = math.pow(istep, te).toFloat;
- val ngrad = grad * lr * ste / pve;
- mm.data(k+ival*nr) += ngrad;
- }
- k += 1;
- }
- if (mask.asInstanceOf[AnyRef] != null) {
- k = istart;
- if (mask.nrows == 1) {
- while (k < iend) {
- mm.data(k+ival*nr) *= mask.data(ival);
- k += 1;
- }
- } else {
- while (k < iend) {
- mm.data(k+ival*nr) *= mask.data(k+ival*nr);
- k += 1;
- }
- }
- }
- j += 1;
- }
- i += 1;
+ var j = b.jc(i) - ioff
+ while (j < b.jc(i+1)-ioff) {
+ val dval = b.data(j)
+ val ival = b.ir(j) - ioff
+ var k = istart
+ while (k < iend) {
+ val grad = a.data(k+i*nr)*dval
+ ssq.data(k+ival*nr) += grad*grad + epsilon
+ if (addgrad > 0) {
+ val lr = if (lrdim > 1) lrate.data(k) else lrate.data(0)
+ val ve = if (vedim > 1) vexp.data(k) else vexp.data(0)
+ val te = if (tedim > 1) texp.data(k) else texp.data(0)
+ val pve = if (ve == 0) 1f else math.pow(ssq.data(k+ival*nr) * istep, ve).toFloat
+ val ste = math.pow(istep, te).toFloat
+ val ngrad = grad * lr * ste / pve
+ mm.data(k+ival*nr) += ngrad
+ }
+ k += 1
+ }
+ if (mask.asInstanceOf[AnyRef] != null) {
+ k = istart
+ if (mask.nrows == 1) {
+ while (k < iend) {
+ mm.data(k+ival*nr) *= mask.data(ival)
+ k += 1
+ }
+ } else {
+ while (k < iend) {
+ mm.data(k+ival*nr) *= mask.data(k+ival*nr)
+ k += 1
+ }
+ }
+ }
+ j += 1
+ }
+ i += 1
}
}
/**
* Integrate the last stage of a gradient update (sparse, transposed multiply) with ADAGRAD.
- * Supports both CPU and GPU implementation.
+ * Supports both CPU and GPU implementation.
*/
def multUpdate(a:Mat, b:Mat, mm:Mat, sumSq:Mat, mask:Mat, lrate:Mat, vexp:Mat, texp:Mat, eps:Float, step:Float, waitsteps:Int):Unit =
- multUpdate(a, b, mm, sumSq, mask, lrate, vexp, texp, eps, step, waitsteps, false);
+ multUpdate(a, b, mm, sumSq, mask, lrate, vexp, texp, eps, step, waitsteps, false)
def multUpdate(a:Mat, b:Mat, mm:Mat, sumSq:Mat, mask:Mat, lrate:Mat, vexp:Mat, texp:Mat, eps:Float, step:Float, waitsteps:Int, hasBias:Boolean):Unit = {
- val istep = 1f/step;
- val addgrad = if (step > waitsteps - 0.5f) 1 else 0;
- val nr = a.nrows;
- val nc = b.ncols;
- val nbr = b.nrows;
- val biasv = if (hasBias) 1 else 0;
+ val istep = 1f/step
+ val addgrad = if (step > waitsteps - 0.5f) 1 else 0
+ val nr = a.nrows
+ val nc = b.ncols
+ val nbr = b.nrows
+ val biasv = if (hasBias) 1 else 0
(a, b, mm, sumSq, lrate, vexp, texp) match {
case (fa:FMat, sb:SMat, fmm:FMat, fssq:FMat, flrate:FMat, fvexp:FMat, ftexp:FMat) => {
- Mat.nflops += 20L * nr * b.nnz;
- val fmask = mask.asInstanceOf[FMat];
- val masknr = if (fmask.asInstanceOf[AnyRef] != null) fmask.nrows else 0;
+ Mat.nflops += 20L * nr * b.nnz
+ val fmask = mask.asInstanceOf[FMat]
+ val masknr = if (fmask.asInstanceOf[AnyRef] != null) fmask.nrows else 0
CPUMACH.multADAGrad(nr, nc, b.nnz, fa.data, sb.data, sb.ir, sb.jc, fmm.data, fssq.data, if (fmask != null) fmask.data else null, masknr,
- flrate.data, flrate.nrows, fvexp.data, fvexp.nrows, ftexp.data, ftexp.nrows, istep, addgrad, eps, biasv, nbr);
+ flrate.data, flrate.nrows, fvexp.data, fvexp.nrows, ftexp.data, ftexp.nrows, istep, addgrad, eps, biasv, nbr)
}
case (ga:GMat, gsb:GSMat, gmm:GMat, gssq:GMat, glrate:GMat, gvexp:GMat, gtexp:GMat) => {
- Mat.nflops += 20L * nr * b.nnz;
- val gmask0 = mask.asInstanceOf[GMat];
- val gmaskdata = if (gmask0.asInstanceOf[AnyRef] != null) gmask0.data else new jcuda.Pointer();
- val masknr = if (gmask0.asInstanceOf[AnyRef] != null) gmask0.nrows else 0;
+ Mat.nflops += 20L * nr * b.nnz
+ val gmask0 = mask.asInstanceOf[GMat]
+ val gmaskdata = if (gmask0.asInstanceOf[AnyRef] != null) gmask0.data else new jcuda.Pointer()
+ val masknr = if (gmask0.asInstanceOf[AnyRef] != null) gmask0.nrows else 0
CUMACH.multADAGrad(nr, nc, b.nnz, ga.data, gsb.data, gsb.ir, gsb.ic, gmm.data, gssq.data, gmaskdata, masknr,
glrate.data, lrate.nrows, gvexp.data, vexp.nrows, gtexp.data, texp.nrows, istep, addgrad, eps, biasv, nbr)
}
case (fa:FMat, sb:SMat, fmm:TMat, fssq:TMat, flrate:FMat, fvexp:FMat, ftexp:FMat) => {
- Mat.nflops += 20L * nr * b.nnz;
- val fmask = mask.asInstanceOf[FMat];
- val masknr = if (fmask.asInstanceOf[AnyRef] != null) fmask.nrows else 0;
+ Mat.nflops += 20L * nr * b.nnz
+ val fmask = mask.asInstanceOf[FMat]
+ val masknr = if (fmask.asInstanceOf[AnyRef] != null) fmask.nrows else 0
for (i <- 0 until fmm.tiles.length) {
- val mmtile = fmm.tiles(i).asInstanceOf[FMat];
- val ssqtile = fssq.tiles(i).asInstanceOf[FMat];
- val nr = mmtile.nrows;
- val nc = mmtile.ncols;
- val y = fmm.y(i);
- val x = fmm.x(i);
- CPUMACH.multADAGradTile(nr, nc, y, x, b.nnz, fa.data, fa.nrows, sb.data, sb.ir, sb.jc, mmtile.data, ssqtile.data, if (fmask != null) fmask.data else null, masknr,
- flrate.data, flrate.nrows, fvexp.data, fvexp.nrows, ftexp.data, ftexp.nrows, istep, addgrad, eps, biasv, nbr);
+ val mmtile = fmm.tiles(i).asInstanceOf[FMat]
+ val ssqtile = fssq.tiles(i).asInstanceOf[FMat]
+ val nr = mmtile.nrows
+ val nc = mmtile.ncols
+ val y = fmm.y(i)
+ val x = fmm.x(i)
+ CPUMACH.multADAGradTile(nr, nc, y, x, b.nnz, fa.data, fa.nrows, sb.data, sb.ir, sb.jc, mmtile.data, ssqtile.data, if (fmask != null) fmask.data else null, masknr,
+ flrate.data, flrate.nrows, fvexp.data, fvexp.nrows, ftexp.data, ftexp.nrows, istep, addgrad, eps, biasv, nbr)
}
}
case (ga:GMat, gsb:GSMat, gmm:TMat, gssq:TMat, glrate:GMat, gvexp:GMat, gtexp:GMat) => {
- Mat.nflops += 20L * nr * b.nnz;
-// println("istep=%f" format istep);
- val gmask0 = mask.asInstanceOf[GMat];
- val gmaskdata = if (gmask0.asInstanceOf[AnyRef] != null) gmask0.data else new jcuda.Pointer();
- val masknr = if (gmask0.asInstanceOf[AnyRef] != null) gmask0.nrows else 0;
+ Mat.nflops += 20L * nr * b.nnz
+// println("istep=%f" format istep)
+ val gmask0 = mask.asInstanceOf[GMat]
+ val gmaskdata = if (gmask0.asInstanceOf[AnyRef] != null) gmask0.data else new jcuda.Pointer()
+ val masknr = if (gmask0.asInstanceOf[AnyRef] != null) gmask0.nrows else 0
for (i <- 0 until gmm.tiles.length) {
- val mmtile = gmm.tiles(i).asInstanceOf[GMat];
- val ssqtile = gssq.tiles(i).asInstanceOf[GMat];
- val nr = mmtile.nrows;
- val nc = mmtile.ncols;
- val y = gmm.y(i);
- val x = gmm.x(i);
+ val mmtile = gmm.tiles(i).asInstanceOf[GMat]
+ val ssqtile = gssq.tiles(i).asInstanceOf[GMat]
+ val nr = mmtile.nrows
+ val nc = mmtile.ncols
+ val y = gmm.y(i)
+ val x = gmm.x(i)
CUMACH.multADAGradTile(nr, nc, y, x, gsb.nnz, ga.data, ga.nrows, gsb.data, gsb.ir, gsb.ic, mmtile.data, ssqtile.data, gmaskdata, masknr,
- glrate.data, lrate.nrows, gvexp.data, vexp.nrows, gtexp.data, texp.nrows, istep, addgrad, eps, biasv, nbr)
+ glrate.data, lrate.nrows, gvexp.data, vexp.nrows, gtexp.data, texp.nrows, istep, addgrad, eps, biasv, nbr)
}
}
case _ => {
val grad0 = mm match {
- case tmm:TMat => mm + 0f;
- case _ => mm.view(mm.nrows, mm.ncols - (if (hasBias) 1 else 0)) + 0;
+ case tmm:TMat => mm + 0f
+ case _ => mm.view(mm.nrows, mm.ncols - (if (hasBias) 1 else 0)) + 0
}
- grad0.clear;
- a.madd(b, grad0, false, true);
- val grad = if (hasBias) grad0 \ sum(a,2) else grad0;
- val ssq = grad ∘ grad;
- ssq ~ ssq ∘ istep;
- sumSq ~ sumSq ∘ (1f - istep);
- sumSq ~ sumSq + ssq;
- ssq ~ sumSq ^ vexp;
- grad ~ grad / ssq;
- val te = texp + 0f;
- te.set(istep);
- te ~ te ^ texp;
- grad ~ grad ∘ (lrate ∘ te);
- mm ~ mm + grad;
+ grad0.clear
+ a.madd(b, grad0, false, true)
+ val grad = if (hasBias) grad0 \ sum(a,2) else grad0
+ val ssq = grad ∘ grad
+ ssq ~ ssq ∘ istep
+ sumSq ~ sumSq ∘ (1f - istep)
+ sumSq ~ sumSq + ssq
+ ssq ~ sumSq ^ vexp
+ grad ~ grad / ssq
+ val te = texp + 0f
+ te.set(istep)
+ te ~ te ^ texp
+ grad ~ grad ∘ (lrate ∘ te)
+ mm ~ mm + grad
}
}
}
def pairMultUpdate(a:Mat, b:Mat, mm:Mat, sumSq:Mat, mask:Mat, lrate:Mat, vexp:Mat, texp:Mat, eps:Float, step:Float, waitsteps:Int, hasBias:Boolean):Unit = {
- val istep = 1f/step;
- val addgrad = if (step > waitsteps - 0.5f) 1 else 0;
- val biasv = if (hasBias) 1 else 0;
+ val istep = 1f/step
+ val addgrad = if (step > waitsteps - 0.5f) 1 else 0
+ val biasv = if (hasBias) 1 else 0
(a, b, mm, sumSq, lrate, vexp, texp) match {
case (ga:GMat, gsb:GSMat, gmm:GMat, gssq:GMat, glrate:GMat, gvexp:GMat, gtexp:GMat) => {
- val nr = a.nrows;
- val nc = b.ncols;
- val nbr = b.nrows;
- val nfeats = mm.ncols/2;
- Mat.nflops += 20L * nr * b.nnz;
- val (gmdata, masklen) = if (mask.asInstanceOf[AnyRef] != null) (mask.asInstanceOf[GMat].data, mask.length) else (null, 0);
- CUMACH.pairMultADAGradTile(nr, nc, nfeats, nfeats, ga.data, nr, 0, 0, gsb.data, gsb.ir, gsb.jc, 0, 0, 1, gmm.data, mm.nrows,
- gssq.data, gmdata, masklen, glrate.data, lrate.length, gvexp.data, vexp.length, gtexp.data, texp.length,
- istep, 1, eps);
+ val nr = a.nrows
+ val nc = b.ncols
+ val nbr = b.nrows
+ val nfeats = mm.ncols/2
+ Mat.nflops += 20L * nr * b.nnz
+ val (gmdata, masklen) = if (mask.asInstanceOf[AnyRef] != null) (mask.asInstanceOf[GMat].data, mask.length) else (null, 0)
+ CUMACH.pairMultADAGradTile(nr, nc, nfeats, nfeats, ga.data, nr, 0, 0, gsb.data, gsb.ir, gsb.jc, 0, 0, 1, gmm.data, mm.nrows,
+ gssq.data, gmdata, masklen, glrate.data, lrate.length, gvexp.data, vexp.length, gtexp.data, texp.length,
+ istep, 1, eps)
}
case (ga:GMat, gsb:GSMat, gmm:TMat, gssq:TMat, glrate:GMat, gvexp:GMat, gtexp:GMat) => {
- Mat.nflops += 20L * a.nrows * b.nnz;
- for (i <- 0 until gmm.tiles.length) {
- val mmtile = gmm.tiles(i).asInstanceOf[GMat];
- val ssqtile = gssq.tiles(i).asInstanceOf[GMat];
- val nr = mmtile.nrows;
- val nc = mmtile.ncols;
- val nfeats = mmtile.ncols/2;
- val y = gmm.y(i);
- val x = gmm.x(i);
- val (gmdata, masklen) = if (mask.asInstanceOf[AnyRef] != null) (mask.asInstanceOf[GMat].data, mask.length) else (null, 0);
- CUMACH.pairMultADAGradTile(nr, nc, nfeats, nfeats, ga.data, y, 0, nr, gsb.data, gsb.ir, gsb.jc, x, 0, 1,
- mmtile.data, mm.nrows, ssqtile.data, gmdata, masklen, glrate.data, lrate.length,
- gvexp.data, vexp.length, gtexp.data, texp.length, istep, 1, eps);
- }
+ Mat.nflops += 20L * a.nrows * b.nnz
+ for (i <- 0 until gmm.tiles.length) {
+ val mmtile = gmm.tiles(i).asInstanceOf[GMat]
+ val ssqtile = gssq.tiles(i).asInstanceOf[GMat]
+ val nr = mmtile.nrows
+ val nc = mmtile.ncols
+ val nfeats = mmtile.ncols/2
+ val y = gmm.y(i)
+ val x = gmm.x(i)
+ val (gmdata, masklen) = if (mask.asInstanceOf[AnyRef] != null) (mask.asInstanceOf[GMat].data, mask.length) else (null, 0)
+ CUMACH.pairMultADAGradTile(nr, nc, nfeats, nfeats, ga.data, y, 0, nr, gsb.data, gsb.ir, gsb.jc, x, 0, 1,
+ mmtile.data, mm.nrows, ssqtile.data, gmdata, masklen, glrate.data, lrate.length,
+ gvexp.data, vexp.length, gtexp.data, texp.length, istep, 1, eps)
+ }
}
}
}
@@ -379,52 +379,52 @@ object ADAGrad {
* Supports both CPU and GPU implementation.
*/
def hashmultUpdate(a:Mat, b:Mat, nfeats:Int, bound1:Int, bound2:Int, transpose:Int,
- mm:Mat, sumSq:Mat, mask:Mat, lrate:Mat, vexp:Mat, texp:Mat, eps:Float, step:Float, waitsteps:Int) = {
- val istep = 1f/step;
- val addgrad = if (step > waitsteps - 0.5f) 1 else 0;
- val nr = a.nrows;
- val nc = b.ncols;
- val npc = b.nnz / b.ncols;
- Mat.nflops += 2L * nr * b.nnz * npc;
+ mm:Mat, sumSq:Mat, mask:Mat, lrate:Mat, vexp:Mat, texp:Mat, eps:Float, step:Float, waitsteps:Int) = {
+ val istep = 1f/step
+ val addgrad = if (step > waitsteps - 0.5f) 1 else 0
+ val nr = a.nrows
+ val nc = b.ncols
+ val npc = b.nnz / b.ncols
+ Mat.nflops += 2L * nr * b.nnz * npc
(a, b, mm, sumSq, lrate, vexp, texp) match {
case (fa:FMat, sb:SMat, fmm:FMat, fssq:FMat, flrate:FMat, fvexp:FMat, ftexp:FMat) => {
- val fmask = mask.asInstanceOf[FMat];
- if (1L*nr*b.nnz > 100000L && Mat.numThreads > 1) {
- (0 until Mat.numThreads).par.map((ithread:Int) =>
- multUpdateHelperT(fa, sb, fmm, fssq, fmask, flrate, fvexp, ftexp, istep, addgrad, eps, ithread, Mat.numThreads));
- } else {
- multUpdateHelperT(fa, sb, fmm, fssq, fmask, flrate, fvexp, ftexp, istep, addgrad, eps, 0, 1);
- }
+ val fmask = mask.asInstanceOf[FMat]
+ if (1L*nr*b.nnz > 100000L && Mat.numThreads > 1) {
+ (0 until Mat.numThreads).par.map((ithread:Int) =>
+ multUpdateHelperT(fa, sb, fmm, fssq, fmask, flrate, fvexp, ftexp, istep, addgrad, eps, ithread, Mat.numThreads))
+ } else {
+ multUpdateHelperT(fa, sb, fmm, fssq, fmask, flrate, fvexp, ftexp, istep, addgrad, eps, 0, 1)
+ }
}
case (ga:GMat, gsb:GSMat, gmm:GMat, gssq:GMat, glrate:GMat, gvexp:GMat, gtexp:GMat) => {
- val gmask0 = mask.asInstanceOf[GMat];
- val gmaskdata = if (gmask0.asInstanceOf[AnyRef] != null) gmask0.data else new jcuda.Pointer();
- val masknr = if (gmask0.asInstanceOf[AnyRef] != null) gmask0.nrows else 0;
+ val gmask0 = mask.asInstanceOf[GMat]
+ val gmaskdata = if (gmask0.asInstanceOf[AnyRef] != null) gmask0.data else new jcuda.Pointer()
+ val masknr = if (gmask0.asInstanceOf[AnyRef] != null) gmask0.nrows else 0
val err = CUMACH.hashmultADAGrad(nr, nfeats, nc, bound1, bound2, ga.data, gsb.data, gsb.ir, gsb.jc, transpose,
gmm.data, gssq.data, gmaskdata, masknr, glrate.data, lrate.nrows, gvexp.data, vexp.nrows, gtexp.data, texp.nrows, istep, addgrad, eps)
- if (err != 0) {
- throw new RuntimeException("hashMultUpdate error " + jcuda.runtime.JCuda.cudaGetErrorString(err));
- }
+ if (err != 0) {
+ throw new RuntimeException("hashMultUpdate error " + jcuda.runtime.JCuda.cudaGetErrorString(err))
+ }
}
}
}
def ADAGradx(mm:GMat, um:GMat, ss:GMat, mask:GMat, nw:Float, ve:GMat, ts:GMat, lrate:GMat, langevin:Float, epsilon:Float, doupdate:Boolean) = {
- val (gmask, maskr) = if (mask.asInstanceOf[AnyRef] == null) (null, 0) else (mask.data, mask.nrows);
- CUMACH.ADAGrad(mm.nrows, mm.ncols, mm.data, um.data, ss.data, gmask, maskr, nw, ve.data, ve.nrows,
- ts.data, ts.nrows, lrate.data, lrate.nrows, langevin, epsilon, if (doupdate) 1 else 0);
+ val (gmask, maskr) = if (mask.asInstanceOf[AnyRef] == null) (null, 0) else (mask.data, mask.nrows)
+ CUMACH.ADAGrad(mm.nrows, mm.ncols, mm.data, um.data, ss.data, gmask, maskr, nw, ve.data, ve.nrows,
+ ts.data, ts.nrows, lrate.data, lrate.nrows, langevin, epsilon, if (doupdate) 1 else 0)
}
def ADAGradm(mm:GMat, um:GMat, ss:GMat, momentum:GMat, mu:Float, mask:GMat, nw:Float, ve:GMat, ts:GMat, lrate:GMat, langevin:Float, epsilon:Float, doupdate:Boolean) = {
- val (gmask, maskr) = if (mask.asInstanceOf[AnyRef] == null) (null, 0) else (mask.data, mask.nrows);
+ val (gmask, maskr) = if (mask.asInstanceOf[AnyRef] == null) (null, 0) else (mask.data, mask.nrows)
CUMACH.ADAGradm(mm.nrows, mm.ncols, mm.data, um.data, ss.data, momentum.data, mu, gmask, maskr, nw, ve.data, ve.nrows,
- ts.data, ts.nrows, lrate.data, lrate.nrows, langevin, epsilon, if (doupdate) 1 else 0);
+ ts.data, ts.nrows, lrate.data, lrate.nrows, langevin, epsilon, if (doupdate) 1 else 0)
}
def ADAGradn(mm:GMat, um:GMat, ss:GMat, momentum:GMat, mu:Float, mask:GMat, nw:Float, ve:GMat, ts:GMat, lrate:GMat, langevin:Float, epsilon:Float, doupdate:Boolean) = {
- val (gmask, maskr) = if (mask.asInstanceOf[AnyRef] == null) (null, 0) else (mask.data, mask.nrows);
+ val (gmask, maskr) = if (mask.asInstanceOf[AnyRef] == null) (null, 0) else (mask.data, mask.nrows)
CUMACH.ADAGradn(mm.nrows, mm.ncols, mm.data, um.data, ss.data, momentum.data, mu, gmask, maskr, nw, ve.data, ve.nrows,
- ts.data, ts.nrows, lrate.data, lrate.nrows, langevin, epsilon, if (doupdate) 1 else 0);
+ ts.data, ts.nrows, lrate.data, lrate.nrows, langevin, epsilon, if (doupdate) 1 else 0)
}
}
diff --git a/src/main/scala/BIDMach/updaters/BatchNorm.scala b/src/main/scala/BIDMach/updaters/BatchNorm.scala
index cec968dc..1b662fe7 100755
--- a/src/main/scala/BIDMach/updaters/BatchNorm.scala
+++ b/src/main/scala/BIDMach/updaters/BatchNorm.scala
@@ -15,21 +15,21 @@ class BatchNorm(override val opts:BatchNorm.Opts = new BatchNorm.Options) extend
val updatemats = model.updatemats
accumulators = new Array[Mat](updatemats.length)
for (i <- 0 until accumulators.length) {
- accumulators(i) = updatemats(i).zeros(updatemats(i).nrows, updatemats(i).ncols)
+ accumulators(i) = updatemats(i).zeros(updatemats(i).nrows, updatemats(i).ncols)
}
}
override def update(ipass:Int, step:Long) = {
- val updatemats = model.updatemats
+ val updatemats = model.updatemats
for (i <- 0 until accumulators.length) {
- accumulators(i) ~ accumulators(i) + updatemats(i)
+ accumulators(i) ~ accumulators(i) + updatemats(i)
}
}
override def clear() = {
- for (i <- 0 until accumulators.length) {
- accumulators(i).clear
- }
+ for (i <- 0 until accumulators.length) {
+ accumulators(i).clear
+ }
}
override def updateM(ipass:Int):Unit = {
diff --git a/src/main/scala/BIDMach/updaters/CG.scala b/src/main/scala/BIDMach/updaters/CG.scala
index d71fbab1..0f4cfa92 100755
--- a/src/main/scala/BIDMach/updaters/CG.scala
+++ b/src/main/scala/BIDMach/updaters/CG.scala
@@ -6,47 +6,47 @@ import BIDMat.SciFunctions._
import BIDMach.models._
class CG(override val opts:CG.Opts = new CG.Options) extends Updater(opts) {
- var res:Mat = null
- var Ap:Mat = null
- var pm:Mat = null
- var rm:Mat = null
- var zm:Mat = null
- var mm:Mat = null
- var lastStep = -1L
-
- override def init(model0:Model) = {
- super.init(model0)
- mm = model0.modelmats(0)
- res = mm.zeros(mm.nrows, mm.ncols)
- Ap = mm.zeros(mm.nrows, mm.ncols)
- pm = mm.zeros(mm.nrows, mm.ncols)
- rm = mm.zeros(mm.nrows, mm.ncols)
- model.asInstanceOf[CGUpdateable].setpm(pm)
- lastStep = -1
+ var res:Mat = null
+ var Ap:Mat = null
+ var pm:Mat = null
+ var rm:Mat = null
+ var zm:Mat = null
+ var mm:Mat = null
+ var lastStep = -1L
+
+ override def init(model0:Model) = {
+ super.init(model0)
+ mm = model0.modelmats(0)
+ res = mm.zeros(mm.nrows, mm.ncols)
+ Ap = mm.zeros(mm.nrows, mm.ncols)
+ pm = mm.zeros(mm.nrows, mm.ncols)
+ rm = mm.zeros(mm.nrows, mm.ncols)
+ model.asInstanceOf[CGUpdateable].setpm(pm)
+ lastStep = -1
}
-
- override def update(ipass:Int, step:Long) = {
- val updatemats = model.updatemats
- if (ipass < opts.spasses) {
- mm <-- updatemats(0)
- } else {
- res ~ res + updatemats(0)
- Ap ~ Ap + updatemats(1)
- }
- }
-
- override def updateM(ipass:Int) = {
-// if (ipass == 0) pm <-- res
- if (ipass >= opts.spasses) {
- if (ipass == opts.spasses || opts.moving) rm <-- res
- CG.CGupdate(pm, rm, Ap, mm, opts.meps, opts.convgd)
- }
- Ap.clear
- res.clear
- lastStep = -1
+
+ override def update(ipass:Int, step:Long) = {
+ val updatemats = model.updatemats
+ if (ipass < opts.spasses) {
+ mm <-- updatemats(0)
+ } else {
+ res ~ res + updatemats(0)
+ Ap ~ Ap + updatemats(1)
+ }
}
-
- override def clear = {
+
+ override def updateM(ipass:Int) = {
+// if (ipass == 0) pm <-- res
+ if (ipass >= opts.spasses) {
+ if (ipass == opts.spasses || opts.moving) rm <-- res
+ CG.CGupdate(pm, rm, Ap, mm, opts.meps, opts.convgd)
+ }
+ Ap.clear
+ res.clear
+ lastStep = -1
+ }
+
+ override def clear = {
}
}
@@ -56,44 +56,44 @@ trait CGUpdateable {
object CG {
trait Opts extends Updater.Opts {
- var meps = 1e-12f
- var convgd = 1e-1f
- var moving = true
- var spasses = 2
+ var meps = 1e-12f
+ var convgd = 1e-1f
+ var moving = true
+ var spasses = 2
}
class Options extends Opts {}
def CGupdate(p:Mat, r:Mat, Ap:Mat, x:Mat, weps:Float, convgd:Float) = {
- val pAp = (p dot Ap)
- max(pAp, weps, pAp)
- val rsold = (r dot r) + 0 // add 0 to make a new vector, Otherwise this will alias...
- val convec = rsold > convgd // Check convergence
- val alpha = convec ∘ (rsold / pAp) // Only process unconverged elements
- min(alpha, 1f, alpha)
- x ~ x + (p ∘ alpha)
- r ~ r - (Ap ∘ alpha)
- val rsnew = (r dot r) // ...down here
- max(rsold, weps, rsold)
- val beta = convec ∘ (rsnew / rsold)
- min(beta, 1f, beta)
- p ~ r + (p ∘ beta)
+ val pAp = (p dot Ap)
+ max(pAp, weps, pAp)
+ val rsold = (r dot r) + 0 // add 0 to make a new vector, Otherwise this will alias...
+ val convec = rsold > convgd // Check convergence
+ val alpha = convec ∘ (rsold / pAp) // Only process unconverged elements
+ min(alpha, 1f, alpha)
+ x ~ x + (p ∘ alpha)
+ r ~ r - (Ap ∘ alpha)
+ val rsnew = (r dot r) // ...down here
+ max(rsold, weps, rsold)
+ val beta = convec ∘ (rsnew / rsold)
+ min(beta, 1f, beta)
+ p ~ r + (p ∘ beta)
}
// Preconditioned CG update
def PreCGupdate(p:Mat, r:Mat, z:Mat, Ap:Mat, x:Mat, Minv:Mat, weps:Float, convgd:Float) = {
- val pAp = (p dot Ap)
- max(pAp, weps, pAp)
- val rsold = (r dot z)
- val convec = rsold > convgd // Check convergence
- val alpha = convec ∘ (rsold / pAp) // Only process unconverged elements
- min(alpha, 1f, alpha)
- x ~ x + (p ∘ alpha)
- r ~ r - (Ap ∘ alpha)
- z ~ Minv * r
- val rsnew = (z dot r) // order is critical to avoid aliasing
- max(rsold, weps, rsold)
- val beta = convec ∘ (rsnew / rsold)
- min(beta, 1f, beta)
- p ~ z + (p ∘ beta)
+ val pAp = (p dot Ap)
+ max(pAp, weps, pAp)
+ val rsold = (r dot z)
+ val convec = rsold > convgd // Check convergence
+ val alpha = convec ∘ (rsold / pAp) // Only process unconverged elements
+ min(alpha, 1f, alpha)
+ x ~ x + (p ∘ alpha)
+ r ~ r - (Ap ∘ alpha)
+ z ~ Minv * r
+ val rsnew = (z dot r) // order is critical to avoid aliasing
+ max(rsold, weps, rsold)
+ val beta = convec ∘ (rsnew / rsold)
+ min(beta, 1f, beta)
+ p ~ z + (p ∘ beta)
}
}
diff --git a/src/main/scala/BIDMach/updaters/Grad.scala b/src/main/scala/BIDMach/updaters/Grad.scala
index 46001878..e2fe419a 100755
--- a/src/main/scala/BIDMach/updaters/Grad.scala
+++ b/src/main/scala/BIDMach/updaters/Grad.scala
@@ -13,61 +13,61 @@ class Grad(override val opts:Grad.Opts = new Grad.Options) extends Updater {
var modelmats:Array[Mat] = null
var updatemats:Array[Mat] = null
var sumSq:Mat = null
- var momentum:Array[Mat] = null;
+ var momentum:Array[Mat] = null
var stepn:Mat = null
var mask:Mat = null
var ve:Mat = null
- var te:Mat = null
- var pe:Mat = null
- var lrate:Mat = null
- var mu:Mat = null
- var randmat:Array[Mat] = null
- var norm_scaling:Mat = null
+ var te:Mat = null
+ var pe:Mat = null
+ var lrate:Mat = null
+ var mu:Mat = null
+ var randmat:Array[Mat] = null
+ var norm_scaling:Mat = null
override def init(model0:Model) = {
- model = model0;
- modelmats = model.modelmats;
- updatemats = model.updatemats;
- mask = opts.mask;
- val mm = modelmats(0);
- stepn = mm.zeros(1,1);
- val nmats = modelmats.length;
- val hasmomentum = (opts.momentum.asInstanceOf[AnyRef] != null || opts.nesterov.asInstanceOf[AnyRef] != null);
+ model = model0
+ modelmats = model.modelmats
+ updatemats = model.updatemats
+ mask = opts.mask
+ val mm = modelmats(0)
+ stepn = mm.zeros(1,1)
+ val nmats = modelmats.length
+ val hasmomentum = (opts.momentum.asInstanceOf[AnyRef] != null || opts.nesterov.asInstanceOf[AnyRef] != null)
if (hasmomentum) {
- momentum = new Array[Mat](nmats);
+ momentum = new Array[Mat](nmats)
for (i <- 0 until nmats) {
- momentum(i) = modelmats(i).zeros(modelmats(i).nrows, modelmats(i).ncols);
+ momentum(i) = modelmats(i).zeros(modelmats(i).nrows, modelmats(i).ncols)
}
}
if (opts.langevin > 0) {
- randmat = new Array[Mat](nmats);
+ randmat = new Array[Mat](nmats)
for (i <- 0 until nmats) {
- randmat(i) = modelmats(i).zeros(modelmats(i).nrows, modelmats(i).ncols);
+ randmat(i) = modelmats(i).zeros(modelmats(i).nrows, modelmats(i).ncols)
}
}
if (opts.texp.asInstanceOf[AnyRef] != null) {
- te = mm.zeros(opts.texp.nrows, opts.texp.ncols);
- te <-- opts.texp;
+ te = mm.zeros(opts.texp.nrows, opts.texp.ncols)
+ te <-- opts.texp
}
if (opts.pexp.asInstanceOf[AnyRef] != null) {
- pe = mm.zeros(opts.pexp.nrows, opts.pexp.ncols);
- pe <-- opts.pexp;
+ pe = mm.zeros(opts.pexp.nrows, opts.pexp.ncols)
+ pe <-- opts.pexp
}
- lrate = mm.zeros(opts.lrate.nrows, 1);
- mu = mm.zeros(1,1);
+ lrate = mm.zeros(opts.lrate.nrows, 1)
+ mu = mm.zeros(1,1)
}
def clipping() {
if (opts.clipByValue>0f) {
var i = 0
while (i < updatemats.length){
- min(updatemats(i),opts.clipByValue,updatemats(i));
- max(updatemats(i),-opts.clipByValue,updatemats(i));
+ min(updatemats(i),opts.clipByValue,updatemats(i))
+ max(updatemats(i),-opts.clipByValue,updatemats(i))
i+=1
}
}
if (opts.max_grad_norm>0f){
- var i=0;
+ var i=0
var tot = 0.0
while(i 1) {
- tscale.set(opts.policies(i)(nsteps, gprogress));
- } else {
- tscale.set(opts.policies(0)(nsteps, gprogress));
- }
- }
- if (opts.lrate.ncols > 1) {
- lrate <-- opts.lrate(?,i);
- } else {
- lrate <-- opts.lrate;
- }
+ if (opts.policies.length > 1) {
+ tscale.set(opts.policies(i)(nsteps, gprogress))
+ } else {
+ tscale.set(opts.policies(0)(nsteps, gprogress))
+ }
+ }
+ if (opts.lrate.ncols > 1) {
+ lrate <-- opts.lrate(?,i)
+ } else {
+ lrate <-- opts.lrate
+ }
- if (opts.waitsteps < nsteps) {
- val grad = updatemats(i);
+ if (opts.waitsteps < nsteps) {
+ val grad = updatemats(i)
if (opts.langevin > 0) { // Add Langevin random permutations
- normrnd(0, opts.langevin, randmat(i));
- grad ~ grad + randmat(i);
+ normrnd(0, opts.langevin, randmat(i))
+ grad ~ grad + randmat(i)
+ }
+ grad ~ grad *@ (lrate *@ tscale)
+ if (opts.momentum.asInstanceOf[AnyRef] != null) {
+ val i0 = if (opts.momentum.length > 1) i else 0
+ mu <-- opts.momentum(i0); // Get the momentum decay rate
+ grad ~ grad + momentum(i); // Add momentum to the gradient
+ momentum(i) ~ grad *@ mu; // update momentum using the new gradient
+ }
+ if (opts.nesterov.asInstanceOf[AnyRef] != null) {
+ val i0 = if (opts.nesterov.length > 1) i else 0
+ mu <-- opts.nesterov(i0); // Get the momentum decay rate
+ grad ~ grad + momentum(i); // Add momentum to the gradient
+ mm ~ mm - momentum(i); // A bit of algebra, remove old momentum from the model
+ momentum(i) ~ grad *@ mu; // Update the momentum
+ mm ~ mm + momentum(i); // Add the new momentum to the model
}
- grad ~ grad *@ (lrate *@ tscale);
- if (opts.momentum.asInstanceOf[AnyRef] != null) {
- val i0 = if (opts.momentum.length > 1) i else 0;
- mu <-- opts.momentum(i0); // Get the momentum decay rate
- grad ~ grad + momentum(i); // Add momentum to the gradient
- momentum(i) ~ grad *@ mu; // update momentum using the new gradient
- }
- if (opts.nesterov.asInstanceOf[AnyRef] != null) {
- val i0 = if (opts.nesterov.length > 1) i else 0;
- mu <-- opts.nesterov(i0); // Get the momentum decay rate
- grad ~ grad + momentum(i); // Add momentum to the gradient
- mm ~ mm - momentum(i); // A bit of algebra, remove old momentum from the model
- momentum(i) ~ grad *@ mu; // Update the momentum
- mm ~ mm + momentum(i); // Add the new momentum to the model;
- }
- modelmats(i) ~ modelmats(i) + grad;
- if (mask != null) modelmats(i) ~ modelmats(i) *@ mask;
- }
- }
- }
+ modelmats(i) ~ modelmats(i) + grad
+ if (mask != null) modelmats(i) ~ modelmats(i) *@ mask
+ }
+ }
+ }
}
@@ -157,7 +157,7 @@ object Grad {
var policies:Array[(Float, Float)=>Float] = null
var momentum:FMat = null
var nesterov:FMat = null
- var langevin = 0f;
+ var langevin = 0f
var clipByValue = -1f
var max_grad_norm = -1f
}
@@ -166,68 +166,68 @@ object Grad {
def multUpdate(a:Mat, b:Mat, mm:Mat, mask:Mat, lrate:Mat, texp:Mat, step:Float, limit:Float):Unit =
- multUpdate(a, b, mm, mask, lrate, texp, step, limit, false);
+ multUpdate(a, b, mm, mask, lrate, texp, step, limit, false)
def multUpdate(a:Mat, b:Mat, mm:Mat, mask:Mat, lrate:Mat, texp:Mat, step:Float, limit:Float, hasBias:Boolean):Unit = {
- val istep = 1f/step;
- val nr = a.nrows;
- val nc = b.ncols;
- val nbr = b.nrows;
- val biasv = if (hasBias) 1 else 0;
- val te = texp + 0f;
- te.set(istep);
- te ~ te ^ texp;
- val lr = lrate ∘ te;
- (a, b, mm, lr) match {
- case (ga:GMat, gb:GSMat, gmm:GMat, glr:GMat) => {
- val maskdata = if (mask != null) mask.asInstanceOf[GMat].data else null;
- val masknr = if (mask != null) mask.nrows else 0;
- CUMACH.multGradTile(nr, nc, 0, 0, b.nnz, ga.data, a.nrows, gb.data, gb.ir, gb.ic,
- gmm.data, maskdata, masknr, glr.data, lr.length, limit, biasv, nbr);
- }
- case (ga:GMat, gb:GSMat, tmm:TMat, glr:GMat) => {
- for (i <- 0 until tmm.tiles.length) {
- val tile = tmm.tiles(i).asInstanceOf[GMat];
- val maskmat = if (mask != null) mask.asInstanceOf[TMat].tiles(i).asInstanceOf[GMat] else null;
- val masknr = if (mask != null) maskmat.nrows else 0;
- val maskdata = if (mask != null) maskmat.data else null;
- CUMACH.multGradTile(tile.nrows, tile.ncols, tmm.y(i), tmm.x(i), b.nnz, ga.data, a.nrows, gb.data, gb.ir, gb.ic,
- tile.data, maskdata, masknr, glr.data, lr.length, limit, biasv, nbr);
- }
- }
- case _ => {
- val grad0 = mm + 0;
- a.madd(b, grad0, false, true);
- val grad = if (hasBias) grad0 \ sum(a,2) else grad0;
- if (limit > 0) {
- min(grad, limit, grad);
- max(grad, -limit, grad);
- }
- grad ~ grad ∘ lr;
- mm ~ mm + grad;
- }
- }
+ val istep = 1f/step
+ val nr = a.nrows
+ val nc = b.ncols
+ val nbr = b.nrows
+ val biasv = if (hasBias) 1 else 0
+ val te = texp + 0f
+ te.set(istep)
+ te ~ te ^ texp
+ val lr = lrate ∘ te
+ (a, b, mm, lr) match {
+ case (ga:GMat, gb:GSMat, gmm:GMat, glr:GMat) => {
+ val maskdata = if (mask != null) mask.asInstanceOf[GMat].data else null
+ val masknr = if (mask != null) mask.nrows else 0;
+ CUMACH.multGradTile(nr, nc, 0, 0, b.nnz, ga.data, a.nrows, gb.data, gb.ir, gb.ic,
+ gmm.data, maskdata, masknr, glr.data, lr.length, limit, biasv, nbr)
+ }
+ case (ga:GMat, gb:GSMat, tmm:TMat, glr:GMat) => {
+ for (i <- 0 until tmm.tiles.length) {
+ val tile = tmm.tiles(i).asInstanceOf[GMat]
+ val maskmat = if (mask != null) mask.asInstanceOf[TMat].tiles(i).asInstanceOf[GMat] else null
+ val masknr = if (mask != null) maskmat.nrows else 0
+ val maskdata = if (mask != null) maskmat.data else null
+ CUMACH.multGradTile(tile.nrows, tile.ncols, tmm.y(i), tmm.x(i), b.nnz, ga.data, a.nrows, gb.data, gb.ir, gb.ic,
+ tile.data, maskdata, masknr, glr.data, lr.length, limit, biasv, nbr)
+ }
+ }
+ case _ => {
+ val grad0 = mm + 0
+ a.madd(b, grad0, false, true)
+ val grad = if (hasBias) grad0 \ sum(a,2) else grad0
+ if (limit > 0) {
+ min(grad, limit, grad)
+ max(grad, -limit, grad)
+ }
+ grad ~ grad ∘ lr
+ mm ~ mm + grad
+ }
+ }
}
def PWlinear(segments:FMat):(Float, Float) => Float = {
(nsteps:Float, gprogress:Float) => {
- var i = 1;
+ var i = 1
while (i < segments.nrows && gprogress > segments(i, 0)) {
- i += 1;
+ i += 1
}
- val frac = (gprogress - segments(i-1,0)) / (segments(i,0) - segments(i-1,0));
- frac * segments(i,1) + (1-frac) * segments(i-1,1);
+ val frac = (gprogress - segments(i-1,0)) / (segments(i,0) - segments(i-1,0))
+ frac * segments(i,1) + (1-frac) * segments(i-1,1)
}
}
def PWexp(segments:FMat):(Float, Float) => Float = {
(nsteps:Float, gprogress:Float) => {
- var i = 1;
+ var i = 1
while (i < segments.nrows && gprogress > segments(i, 0)) {
- i += 1;
+ i += 1
}
- val frac = (gprogress - segments(i-1,0)) / (segments(i,0) - segments(i-1,0));
- math.exp(frac * math.log(segments(i,1)) + (1-frac) * math.log(segments(i-1,1))).toFloat;
+ val frac = (gprogress - segments(i-1,0)) / (segments(i,0) - segments(i-1,0))
+ math.exp(frac * math.log(segments(i,1)) + (1-frac) * math.log(segments(i-1,1))).toFloat
}
}
}
diff --git a/src/main/scala/BIDMach/updaters/IncMult.scala b/src/main/scala/BIDMach/updaters/IncMult.scala
index e179289c..685ae229 100755
--- a/src/main/scala/BIDMach/updaters/IncMult.scala
+++ b/src/main/scala/BIDMach/updaters/IncMult.scala
@@ -23,13 +23,13 @@ class IncMult(override val opts:IncMult.Opts = new IncMult.Options) extends Upda
val um = updatemats(0)
val ums = updatemats(1)
val rr = if (step == 0) 1f else {
- if (firstStep == 0f) {
- firstStep = step
- 1f
- } else {
- (math.pow(firstStep / step, opts.power)).toFloat
- }
- }
+ if (firstStep == 0f) {
+ firstStep = step
+ 1f
+ } else {
+ (math.pow(firstStep / step, opts.power)).toFloat
+ }
+ }
um ~ um *@ rm.set(rr)
ln(mm, mm)
@@ -40,7 +40,7 @@ class IncMult(override val opts:IncMult.Opts = new IncMult.Options) extends Upda
}
override def clear() = {
- firstStep = 0f
+ firstStep = 0f
}
}
diff --git a/src/main/scala/BIDMach/updaters/IncNorm.scala b/src/main/scala/BIDMach/updaters/IncNorm.scala
index 62142dc4..4eac2d26 100755
--- a/src/main/scala/BIDMach/updaters/IncNorm.scala
+++ b/src/main/scala/BIDMach/updaters/IncNorm.scala
@@ -7,7 +7,7 @@ import BIDMach.models._
/**
* Incrementally update two moving averages using updatemats(0) and updatemats(1), and compute the model
- * as their ratio.
+ * as their ratio.
*/
class IncNorm(override val opts:IncNorm.Opts = new IncNorm.Options) extends Updater(opts) {
@@ -17,7 +17,7 @@ class IncNorm(override val opts:IncNorm.Opts = new IncNorm.Options) extends Upda
var started:Int = 0
override def init(model0:Model) = {
- super.init(model0)
+ super.init(model0)
val modelmats = model0.modelmats
val updatemats = model0.updatemats
restart = modelmats(0) + 1f
@@ -26,35 +26,35 @@ class IncNorm(override val opts:IncNorm.Opts = new IncNorm.Options) extends Upda
}
override def update(ipass:Int, step:Long) = {
- val modelmats = model.modelmats
- val updatemats = model.updatemats
- val mm = modelmats(0)
- val um = updatemats(0)
- val rr = if (step == 0) 0.99f else {
- if (firstStep == 0f) {
- firstStep = step
- 0.99f
- } else {
- math.pow(firstStep / step, opts.power).toFloat
- }
- }
- if (modelmats.length > 1) {
- val ms = modelmats(1)
- val ums = updatemats(1)
- ums ~ ums *@ rm.set(rr)
- ms ~ ms *@ rm.set(1-rr)
- ms ~ ms + ums
- um ~ um / ms
- }
- if (modelmats.length > 2) {
- val ms2 = modelmats(2)
- val ums2 = updatemats(2)
- ums2 ~ ums2 *@ rm.set(rr)
- ms2 ~ ms2 *@ rm.set(1-rr)
- ms2 ~ ms2 + ums2
- }
- um ~ um *@ rm.set(rr)
- mm ~ mm *@ rm.set(1-rr)
+ val modelmats = model.modelmats
+ val updatemats = model.updatemats
+ val mm = modelmats(0)
+ val um = updatemats(0)
+ val rr = if (step == 0) 0.99f else {
+ if (firstStep == 0f) {
+ firstStep = step
+ 0.99f
+ } else {
+ math.pow(firstStep / step, opts.power).toFloat
+ }
+ }
+ if (modelmats.length > 1) {
+ val ms = modelmats(1)
+ val ums = updatemats(1)
+ ums ~ ums *@ rm.set(rr)
+ ms ~ ms *@ rm.set(1-rr)
+ ms ~ ms + ums
+ um ~ um / ms
+ }
+ if (modelmats.length > 2) {
+ val ms2 = modelmats(2)
+ val ums2 = updatemats(2)
+ ums2 ~ ums2 *@ rm.set(rr)
+ ms2 ~ ms2 *@ rm.set(1-rr)
+ ms2 ~ ms2 + ums2
+ }
+ um ~ um *@ rm.set(rr)
+ mm ~ mm *@ rm.set(1-rr)
mm ~ mm + um
if (opts.isprob) mm ~ mm / sum(mm,2)
if (opts.warmup > 0) {
@@ -72,7 +72,7 @@ class IncNorm(override val opts:IncNorm.Opts = new IncNorm.Options) extends Upda
}
override def clear() = {
- firstStep = 0f
+ firstStep = 0f
}
}
diff --git a/src/main/scala/BIDMach/updaters/Telescoping.scala b/src/main/scala/BIDMach/updaters/Telescoping.scala
index 5329f414..d304a11c 100755
--- a/src/main/scala/BIDMach/updaters/Telescoping.scala
+++ b/src/main/scala/BIDMach/updaters/Telescoping.scala
@@ -6,14 +6,14 @@ import BIDMat.SciFunctions._
import BIDMach.models._
class Telescoping(override val opts:Telescoping.Opts = new Telescoping.Options) extends Updater {
- var accumulators:Array[Mat] = null
+ var accumulators:Array[Mat] = null
var firstStep = 0L
var nextStep = 10L
var nextCount = 0L
var rm:Mat = null
override def init(model0:Model) = {
- super.init(model0)
+ super.init(model0)
val modelmats = model0.modelmats
val updatemats = model0.updatemats
rm = model0.modelmats(0).zeros(1,1)
@@ -21,30 +21,30 @@ class Telescoping(override val opts:Telescoping.Opts = new Telescoping.Options)
for (i <- 0 until updatemats.length) yield {
accumulators(i) = updatemats(i).zeros(updatemats(i).nrows, updatemats(i).ncols)
}
- firstStep = 0L
+ firstStep = 0L
nextStep = 10L
nextCount = 0L
}
-
- override def update(ipass:Int, step:Long) = {
- if (firstStep == 0 && step > 0) {
- firstStep = step
- }
- val updatemats = model.updatemats
+
+ override def update(ipass:Int, step:Long) = {
+ if (firstStep == 0 && step > 0) {
+ firstStep = step
+ }
+ val updatemats = model.updatemats
for (i <- 0 until updatemats.length) {
- accumulators(i) ~ accumulators(i) + updatemats(i)
+ accumulators(i) ~ accumulators(i) + updatemats(i)
+ }
+ if (step >= nextCount) {
+ model.modelmats(0) ~ accumulators(0) / accumulators(1)
+ nextStep = (nextStep * opts.factor).toLong
+ nextCount = step + nextStep
}
- if (step >= nextCount) {
- model.modelmats(0) ~ accumulators(0) / accumulators(1)
- nextStep = (nextStep * opts.factor).toLong
- nextCount = step + nextStep
- }
}
override def clear() = {
- for (i <- 0 until accumulators.length) {
- accumulators(i).clear
- }
+ for (i <- 0 until accumulators.length) {
+ accumulators(i).clear
+ }
}
}
diff --git a/src/main/scala/BIDMach/updaters/Updater.scala b/src/main/scala/BIDMach/updaters/Updater.scala
index 614f67e6..2645245e 100755
--- a/src/main/scala/BIDMach/updaters/Updater.scala
+++ b/src/main/scala/BIDMach/updaters/Updater.scala
@@ -7,8 +7,8 @@ import BIDMach.models._
abstract class Updater(val opts:Updater.Opts = new Updater.Options) extends Serializable {
- var model:Model = null;
- var runningtime = 0.0;
+ var model:Model = null
+ var runningtime = 0.0
def init(model0:Model) = {
model = model0