diff --git a/compiler.ts b/compiler.ts index 8e633e02..35f56897 100644 --- a/compiler.ts +++ b/compiler.ts @@ -2,6 +2,8 @@ import { Program, Stmt, Expr, Value, Class, VarInit, FunDef } from "./ir" import { Annotation, BinOp, Type, UniOp } from "./ast" import { APPLY, BOOL, createMethodName, makeWasmFunType, NONE, NUM } from "./utils"; import { equalType } from "./type-check"; +import { getTypeInfo } from "./memory"; +import exp from "constants"; export type GlobalEnv = { globals: Map; @@ -89,18 +91,66 @@ export function compile(ast: Program, env: GlobalEnv) : CompileResul function codeGenStmt(stmt: Stmt, env: GlobalEnv): Array { switch (stmt.tag) { case "store": - return [ + let post = [ ...codeGenValue(stmt.start, env), + `(i32.add)`, + `call $ref_lookup`, ...codeGenValue(stmt.offset, env), - ...codeGenValue(stmt.value, env), + `(local.get $$scratch)`, `call $store` + ] + let pre = [...codeGenValue(stmt.value, env), `(local.set $$scratch)` ,`(i32.const 0)`] + if (stmt.value.a?.type?.tag === "list" ||stmt.value.a?.type?.tag === "class" || stmt.value?.tag === "none" || stmt.value?.tag === "num") { + pre = [ + ...codeGenValue(stmt.value, env), + `(local.set $$scratch)`, + ...codeGenValue(stmt.start, env), + `call $ref_lookup`, + ...codeGenValue(stmt.offset, env), + `(call $load)`, // load the ref number referred to by argument ref no. and the offset + `(i32.const 0)`, + `(i32.const -1)`, + `(i32.const 0)`, + `(call $traverse_update)`, + `(i32.mul (i32.const 0))`, // hack to take top value of stack + `(local.get $$scratch)`, + `(i32.add)`, // hack to take top value of stack + ...codeGenValue(stmt.start, env), + `(i32.const 1)`, + `(i32.const 0)`, + `(call $traverse_update)`, + `(i32.mul (i32.const 0))` + ] + } + return pre.concat(post); + case "assign": var valStmts = codeGenExpr(stmt.value, env); - if (env.locals.has(stmt.name)) { - return valStmts.concat([`(local.set $${stmt.name})`]); - } else { - return valStmts.concat([`(global.set $${stmt.name})`]); + if (((stmt.value.a?.type?.tag === "list" || stmt.value.a?.type?.tag === "class" || (stmt.value.tag === "value" && stmt.value.value.tag === "none")) || (stmt.value?.tag === "value" && stmt.value.value.tag === "num")) && (stmt.value.tag !== "alloc")) { // if the assignment is object assignment + valStmts.push(`(i32.const 0)`, `(i32.const 1)`, `(i32.const 1)` , `(call $traverse_update)`) // update the count of the object on the RHS + if (env.locals.has(stmt.name)) { + return [`(local.get $${stmt.name})`, // update the count of the object on the LHS + `(i32.const 0)`, + `(i32.const -1)`, + `(i32.const 1)`, + `(call $traverse_update)`, + `(local.set $${stmt.name})`].concat(valStmts).concat([`(local.set $${stmt.name})`]); + } else { + return [`(global.get $${stmt.name})`, + `(i32.const 0)`, + `(i32.const -1)`, + `(i32.const 1)`, + `(call $traverse_update)`, + `(global.set $${stmt.name})`].concat(valStmts).concat([`(global.set $${stmt.name})`]); + } + } + else { + if (env.locals.has(stmt.name)) { + return valStmts.concat([`(local.set $${stmt.name})`]); + } else { + return valStmts.concat([`(global.set $${stmt.name})`]); + } } case "return": @@ -157,9 +207,11 @@ function codeGenExpr(expr: Expr, env: GlobalEnv): Array { ...exprStmts, `(local.set $$scratch)`, // bignum addr `(local.get $$scratch)`, // store addr + `(call $ref_lookup)`, `(i32.const 0)`, // store offset `(i32.const 0)`, // 0 - len `(local.get $$scratch)`, // load addr + `(call $ref_lookup)`, `(i32.const 0)`, // load offset `(call $load)`, // load bignum len `(i32.sub)`, // store val @@ -181,7 +233,7 @@ function codeGenExpr(expr: Expr, env: GlobalEnv): Array { } else if (expr.name === "print" && equalType(argTyp, NONE)) { callName = "print_none"; } else if (expr.name === "len") { - return [...argStmts, "(i32.const 0)", "call $load"]; + return [...argStmts,"(call $ref_lookup)", "(i32.const 0)", "call $load"]; } return argStmts.concat([`(call $${callName})`]); @@ -194,24 +246,42 @@ function codeGenExpr(expr: Expr, env: GlobalEnv): Array { case "call": var valStmts = expr.arguments.map((arg) => codeGenValue(arg, env)).flat(); if(expr.name === "len"){ - return [...valStmts, "(i32.const 0)", "call $load"]; + return [...valStmts, "(call $ref_lookup)", "(i32.const 0)", "call $load"]; } valStmts.push(`(call $${expr.name})`); - return valStmts; + // Not sure if plugging in the scope calls here is the best way to do this + return [ + `(call $add_scope)`, + ...valStmts, + `(call $remove_scope)` + ]; case "call_indirect": var valStmts = codeGenExpr(expr.fn, env); var fnStmts = expr.arguments.map((arg) => codeGenValue(arg, env)).flat(); - return [...fnStmts, ...valStmts, `(call_indirect (type ${makeWasmFunType(expr.arguments.length)}))`]; + return [`(call $add_scope)`, ...fnStmts, ...valStmts, `(call_indirect (type ${makeWasmFunType(expr.arguments.length)}))`, `(call $remove_scope)`]; case "alloc": + if (expr.fixed) { + let r = [ + ...codeGenValue(expr.amount, env), + `(i32.const ${parseInt(expr.fixed.map(b => b ? 1: 0).reverse().join(""), 2)})`, //parseInt(binArr.reverse().join(""), 2) + `(i32.const ${expr.fixed.length})`, + `call $alloc` + ] + return(r); + } + let fields = [...env.classes.get(expr?.a?.type?.tag === "class" && expr.a.type.name).values()]; return [ ...codeGenValue(expr.amount, env), + `(i32.const ${getTypeInfo(fields.map(f => f[1]))})`, + `(i32.const ${fields.length})`, `call $alloc` ]; case "load": return [ ...codeGenValue(expr.start, env), + `call $ref_lookup`, ...codeGenValue(expr.offset, env), `call $load` ] @@ -240,12 +310,15 @@ function codeGenValue(val: Value, env: GlobalEnv): Array { var return_val : string[] = [] return_val.push(`(i32.const ${n})`); + return_val.push(`(i32.const 0)`) + return_val.push(`(i32.const 1)`); return_val.push(`(call $alloc)`); return_val.push(`(local.set $$scratch)`); // store the bignum in (n+1) blocks // store number of blocks in the first block return_val.push(`(local.get $$scratch)`); + return_val.push(`(call $ref_lookup)`); return_val.push(`(i32.const ${i})`); if (neg) return_val.push(`(i32.const -${n-1})`); @@ -257,6 +330,7 @@ function codeGenValue(val: Value, env: GlobalEnv): Array { // store the digits in the rest of blocks for (i; i < n; i++) { return_val.push(`(local.get $$scratch)`); + return_val.push(`(call $ref_lookup)`); return_val.push(`(i32.const ${i})`); return_val.push(`(i32.const ${digits[i-1]})`); return_val.push(`call $store`); diff --git a/designs/images/metadata.drawio.png b/designs/images/metadata.drawio.png new file mode 100644 index 00000000..0b560741 Binary files /dev/null and b/designs/images/metadata.drawio.png differ diff --git a/designs/memory-management-design.md b/designs/memory-management-design.md new file mode 100644 index 00000000..c54baa52 --- /dev/null +++ b/designs/memory-management-design.md @@ -0,0 +1,615 @@ +# Memory management + +# Milestone 1 + +## Updated Tests +### 1. Classes inherited from `object` + +**Case:** +``` +class Rat(object): + id: int = 123 + y: int = 0 + def __init__(self: Rat): + self.y = 1 + + x: Rat = None + x = Rat() +``` +**Expected:** +Let `o` be the object referred to by variable `x` +``` +assert number of references of o is 1 +assert size of o(in bytes) is 4 +assert type of the fields in o is [value] +``` + +### 2. Multiple references + +**Case:** +``` +class Rat(object): + id: int = 123 + y: int = 0 + def __init__(self: Rat): + self.y = 1 + x: Rat = None + y: Rat = None + x = Rat() + y = x + y = None +``` +**Expected:** +Let `o` be the object referred to by variable `x` +``` +assert number of references of o is 3 +assert size of o(in bytes) is 4 +assert type of the fields in o is [value] +``` + +### 3. Removing references + +**Case:** +``` +class Rat(object): + id: int = 123 + y: int = 0 + def __init__(self: Rat): + self.y = 1 + x: Rat = None + y: Rat = None + x = Rat() + y = x + y = None +``` +**Expected:** +Let `o` be the object referred to by variable `x` +``` +assert number of references of o is 1 +assert size of o(in bytes) is 4 +assert type of the fields in o is [value] +``` + +### 4. Removing references out of scope + +**Case:** +``` +class Rat(object): + id: int = 123 + y: int = 0 + def __init__(self: Rat): + self.y = 1 + def someFunc(self: Rat): + r: Rat = None + r = self + r.y = 100 + + x: Rat = None + y: Rat = None + x = Rat() + x.someFunc() +``` +**Expected:** +Let `o` be the object referred to by variable `x` +``` +assert number of references of o is 1 +assert size of o(in bytes) is 4 +assert type of the fields in o is [value] +``` + +### 5. Objects created in non local scope + +**Case:** +``` +class Rat(object): + id: int = 123 + y: int = 0 + def __init__(self: Rat): + self.y = 1 + + def someFunc() -> Rat: + r: Rat = None + r = Rat() + r.y = 100 + return r + + x: Rat = None + x = someFunc() +``` +**Expected:** +Let `o` be the object referred to by variable `x` +``` +assert number of references of o is 1 +assert size of o(in bytes) is 4 +assert type of the fields in o is [value] +``` + +### 6. Access is not assignment + +**Case:** +``` +class Rat(object): + id: int = 123 + y: int = 0 + def __init__(self: Rat): + self.y = 1 + + x: Rat = None + x = Rat() + x.y + print(x.y) +``` +**Expected:** +Let `o` be the object referred to by variable `x` +``` +assert number of references of o is 1 +assert size of o(in bytes) is 4 +assert type of the fields in o is [value] +``` + +### 7: Objects as fields + +**Case:** +``` +class Link(object): + id: int = 0 + next: Link = None + def __init__(self: Link): + self.id = 123 + def add(l: Link, val: int) -> Link: + m: Link = None + m = Link() + m.id = val + l.next = m + return m + + x: Link = None + y: Link = None + x = Link() + y = x.add(456) +``` +**Expected:** + +Let `o` be the object referred to by variable `x`
+Let `p` be the object referred to by variable `y` +``` +assert number of references of o is 1 +assert type of fields in o is [value, pointer] +assert number of references of p is 2 +assert type of fields in p is [value, pointer] +``` + +### 8. Anonymous object deletion + +**Case:** +``` +class Link(object): + id: int = 123 + next: Link = None + + def add(l: Link) -> Link: + l.next = Link() + l.id = 456 + return l.next + + x: Link = None + x = Link() + x.add() + x = None +``` +**Expected:** +``` +assert number of references of any object is 0 +``` + +### 9. Simple linked cycle +**Case:** +``` +class Link(object): + id: int = 0 + next: Link = None + + x: Link = None + y: Link = None + x = Link() + x.id = 123 + y = Link() + y.id = 456 + x.next = y + y.next = x +``` +**Expected:** + +Let `o` be the object referred to by variable `x`
+Let `p` be the object referred to by variable `y` +``` +assert number of references of o is 2 +assert type of fields in o is [value, pointer] +assert number of references of p is 2 +assert type of fields in p is [value, pointer] +``` + +### 10. Simple deletion in cycle +**Case:** +``` +class Link(object): + id: int = 0 + next: Link = None + + x: Link = None + y: Link = None + x = Link() + x.id = 123 + y = Link() + y.id = 456 + x.next = y + y.next = x + + x = None +``` +**Expected:** + +Let `o` be the object referred to by variable `x`
+Let `p` be the object referred to by variable `y` +``` +assert number of references of o is 1 +assert type of fields in o is [value, pointer] +assert number of references of p is 1 +assert type of fields in p is [value, pointer] +``` + +## Updated Changes to IR + +We managed to do our implementation without making any changes to IR. + +## Added functions/ data types/ files +Functions for reference counting are added to `memory.ts`. We do plan to eventually port things to WASM once we have most functionality for the GC in place and are able to support BigInt and Lists without issues, and have more extensive tests. + +## Updated Value representation and memory layout + +![Memory layout with metadata](./images/metadata.drawio.png) + +The type of a field is denoted by a single bit. For the memory manager we feel that it does not really matter what the type of an field is, as long as we are able to differentiate between data and references to data. The current representation that we plan uses 32 bits to represent types in an object, which puts an upper bound on the number of fields in an object to 32. If needed this number can be easily increased, assigning more bits to storing types in the metadata. + +The size of the object is stored in bytes, and the number of references is stored along with the object. + +Update: We added an `amount` field to denote the total amount of memory allocated in bytes. `size` field holds the size of a single object in the data stored. The total number of objects associated would then be `(amount - metadatsize) / size`. + +### Compaction/ Defragmentation +Performing defragmentation for the heap will move objects around in the heap. Since, we can't go about modifying all the references, each object we create will have an **immutable reference number**. We plan to maintain a mapping from this reference number to the actual location in memory. When compaction takes place we will update this mapping. All variables in the program will store the **reference number** instead of the actual address in memory. + +## Updates and Design decisions (Week 2) + +We were able to pass all the tests which we had described last week. The node traversal and the scoping were the most intereresting this week and `Tests 9`, `Test 10`, `Test 4` and `Test 5` are a good representation of the cases we have accounted for. Our tests are present in the `memory.test.ts` file. We had to add a function to `asserts.test.ts` to support testing for memory management functions. + + +We had to make changes in `compiler.ts` to support the memory management functionality. However, this does not break the originial implementation in any way. + +We abstracted out the functionality for memory management in `memory.ts`. The load and store for metadata is handled in `memory.wat`. + +We also added another field to denote the total amount of data allocated in bytes, since without this we would not have been able to know how many objects we stored in a contiguous fashion at a given memory location. This allows the array list group to request any amount of data. Note that the `Data` field in the memory metadata diagram denotes the maximum memory which can be allocated by one object. We can however allocate any amount of such objects in a contiguous manner. + +We had a bit of a trouble in figuring out how to check if the reference counts that we collect are indeed correct since we don't have access to the variable names. For testing we added a field called `id` to each object which stores a unique identifier for an object. The reference counts and other metadata was tested using this field as an identifier. Ids also allow us to write more complex tests without worrying about objects being moved in the memory by the garbage colection functionality we will implement. + +# Milestone 2 + +## New Tests +### 1. Doubly-Linked Less Simple Cycle +**Case:** +``` +class Link(object): + id: int = 0 + next: Link = None + prev: Link = None + +x: Link = None +y: Link = None +z: Link = None +x = Link() +x.id = 123 +y = Link() +y.id = 456 +z = Link() +z.id = 789 +x.next = y +y.next = z +z.next = x +x.prev = z +y.prev = x +z.prev = y +``` +**Expected:** +Let `o` be the object referred to by variable `x`
+Let `p` be the object referred to by variable `y`
+Let `q` be the object referred to by variable `z` +``` +assert number of references of o is 3 +assert type of fields in o is [value, pointer] +assert number of references of p is 3 +assert type of fields in p is [value, pointer] +assert number of references of q is 3 +assert type of fields in q is [value, pointer] +``` + +### 2. Doubly-Linked Less Simple Cycle Deletion +**Case:** +``` +class Link(object): + id: int = 0 + next: Link = None + prev: Link = None + +x: Link = None +y: Link = None +z: Link = None +x = Link() +x.id = 123 +y = Link() +y.id = 456 +z = Link() +z.id = 789 +x.next = y +y.next = z +z.next = x +x.prev = z +y.prev = x + +y = None +``` +**Expected:** +Let `o` be the object referred to by variable `x`
+Let `p` be the object referred to by variable `y`
+Let `q` be the object referred to by variable `z` +``` +assert number of references of o is 3 +assert type of fields in o is [value, pointer] +assert number of references of p is 2 +assert type of fields in p is [value, pointer] +assert number of references of q is 3 +assert type of fields in q is [value, pointer] +``` + +### 3. Doubly-Linked Less Simple Cycle Complete Deletion +**Case:** +``` +class Link(object): + id: int = 0 + next: Link = None + prev: Link = None + +x: Link = None +y: Link = None +z: Link = None +x = Link() +x.id = 123 +y = Link() +y.id = 456 +z = Link() +z.id = 789 +x.next = y +y.next = z +z.next = x +x.prev = z +y.prev = x + +y = None +x.next = None +z.prev = None +``` +**Expected:** +Let `o` be the object referred to by variable `x`
+Let `p` be the object referred to by variable `y`
+Let `q` be the object referred to by variable `z` +``` +assert number of references of o is 2 +assert type of fields in o is [value, pointer] +assert number of references of p is 0 +assert type of fields in p is [value, pointer] +assert number of references of q is 2 +assert type of fields in q is [value, pointer] +``` + +### 4. Simple Inherited Reference +**Case:** +``` +class Link(Object): + id: int = 0 + next: Link = None + + def add(l: Link, val: int) -> BLink: + m: Link = None + m = Link() + m.id = val + l.next = m + return m + +class ALink(Link): + def __init__(self: ALink): + super().__init__() + def add(l: Link, val: int) -> BLink: + m: Link = None + m = ALink() + m.id = val + l.next = m + print("hello from Alink") + return m + +x: Link = None +y: Link = None +x = ALink() +x.id = 123 +y = x.add(456) +``` +**Expected:** +Let `o` be the object referred to by variable `x`
+Let `p` be the object referred to by variable `y` +``` +assert number of references of o is 1 +assert type of fields in o is [value, pointer] +assert number of references of p is 2 +assert type of fields in p is [value, pointer] +assert prints "hello from Alink" +``` + + +### 5. Simple Global Reference +**Case:** +``` +class Link(object): + id: int = 0 + + def assign_global(self: Link): + global global_link + global_link = self + +global_link: Link = None +x: Link = None +x = Link() +x.id = 123 +x.assign_global() +``` +**Expected:** +Let `o` be the object referred to by variable `x` +``` +assert number of references of o is 2 +assert type of fields in o is [value, pointer] +``` + +### 6. Simple Global Reference Deletion +**Case:** +``` +class Link(object): + id: int = 0 + next: Link = None + + def assign_global(self: Link): + global global_link + global_link = self + +global_link: Link = None +x: Link = None +x = Link() +x.id = 123 +x.next = Link() +x.next.id = 456 +x.assign_global() +x = None +global_link = None +``` +**Expected:** +Let `o` be the object referred to by variable `x`
+Let `p` be the object referred to by variable `x.next` +``` +assert number of references of o is 0 +assert type of fields in o is [value, pointer] +assert number of references of p is 0 +assert type of fields in p is [value, pointer] +``` + +### 7. Simple Global Reference Reassignment +**Case:** +```python +class Link(object): + id: int = 0 + next: Link = None + + def assign_global(self: Link): + global global_link + global_link = self + +global_link: Link = None +x: Link = None +x = Link() +x.id = 123 +x.assign_global() +x.next = Link() +x.next.id = 456 +x.next.assign_global() +x = None +``` +**Expected:** +Let `o` be the object referred to by variable `x`
+Let `p` be the object referred to by variable `x.next` +``` +assert number of references of o is 0 +assert type of fields in o is [value, pointer] +assert number of references of p is 1 +assert type of fields in p is [value, pointer] +``` + +### 9. Garbage collection for out of scope objects +```python +class Rat(object): + id: int = 123 + y: int = 0 + def __init__(self: Rat): + self.y = 1 + def someFunc(self: Rat): + r: Rat = None + r = Rat() + + x: Rat = None + y: Rat = None + x = Rat() + x.someFunc() +``` +**Expected:** +Total memeory consumed after garbage collection is equal to that required by only 1 object. + +### 9. Garbage collection in a loop +``` +class Link(object): + val: int = 0 +a: Rat = None +while True: + a = Rat() + a = None +``` +**Expected:** +The program does not crash with out of memory error + +### 10. Integration with Lists +``` +a: [int] = [1,2,3,4] +b: [int] = [4, 6, 7] +a = a + b +a = None +b = None +``` +**Expected** +Total memory allocated after garbage collection is equal to 0. + +### 11. self assignment is not garbage collected +```python +class Rat(object): + id: int = 123 + y: int = 0 + def __init__(self: Rat): + self.y = 1 + +a = Rat() +a = a +``` +**Expected** +Running garbage collection does not reclaim memory of the object denoted by `a` + + +The tests defined in milestone 1 will be revised to also check is the amount of memeory allocated is correct after garbage collection routine is run. + +## Features +### Deleting objects and defragmentation +We currently only track the reference counts of the objects and know when an object has 0 references. We have to add object deletion and defragmentation so that the space allocated to the *to be deleted* objects can be reclaimed. This will be done by shifting the rest of the objects in the memory and updating the reference -> address mapping. The garbage collection functionality will be deferred to some other time. We plan to determine when this process should happen based on a threshold on the number of references which can be reassigned and also on the total remaining memory. These checks will be performed on an `alloc` call, which in a way is an implicit call to the garbage collector. + +### Integration with the Closures/Inheritance/BigInts/Lists group +We noticed that this group needs additional metadata in the heap in addition to the object data. We plan to use the newer alloc function as suggested by Professor Politz. +```javascript +| { tag: "alloc", amount: Value, fixed?: boolean[], rest?: boolean } +``` +This would remove the dependency on where metadata is placed for various datatypes and for the memory management group. + +### Integration with error reporting +We need to use the error types defined by the error reporting group and use their newer `Annotation` type. +Note: If the newer `alloc` is implemented, then we would not have to rely on inferring the class field types ourselves from the `type` field in annotation. Instead, this information would be given by the `fixed` field in the newer alloc. + +### Porting to WASM +We plan to port as much functionality as we can from JS in `memory.ts` to WASM in `memory.wat`. We believe this should speed up the memory management functionality by a bit. We plan to do this after making sure our functionality works correctly on the JS implementation. diff --git a/ir.ts b/ir.ts index 32c09971..884ff616 100644 --- a/ir.ts +++ b/ir.ts @@ -28,8 +28,8 @@ export type Expr = | { a?: A, tag: "builtin1", name: string, arg: Value } | { a?: A, tag: "builtin2", name: string, left: Value, right: Value} | { a?: A, tag: "call", name: string, arguments: Array> } + | { a?: A, tag: "alloc", amount: Value, fixed?: boolean[], rest?: boolean } | { a?: A, tag: "call_indirect", fn: Expr, arguments: Array> } - | { a?: A, tag: "alloc", amount: Value } | { a?: A, tag: "load", start: Value, offset: Value } export type Value = diff --git a/lower.ts b/lower.ts index 75d4528c..326ccc74 100644 --- a/lower.ts +++ b/lower.ts @@ -3,7 +3,7 @@ import * as IR from './ir'; import { Type, UniOp, Annotation } from './ast'; import * as ERRORS from './errors'; import { GlobalEnv } from './compiler'; -import { APPLY, CLASS, createMethodName, BOOL, NONE, NUM } from './utils'; +import { APPLY, CLASS, createMethodName, BOOL, NONE, NUM, getFieldType } from './utils'; let nameCounters : Map = new Map(); export function resetNameCounters() { @@ -663,7 +663,7 @@ function flattenExprToExpr(e : AST.Expr, blocks: Array = ERRORS.flattenAssertNotNone(e.a, objval); - const callMethod : IR.Expr = { tag: "call", name: `${className}$${e.method}`, arguments: [objval, ...argvals] } + const callMethod : IR.Expr = {a: e.a, tag: "call", name: `${className}$${e.method}`, arguments: [objval, ...argvals] } return [ [...objinits, ...arginits], [...objstmts, checkObj, ...argstmts], @@ -687,7 +687,13 @@ function flattenExprToExpr(e : AST.Expr, blocks: Array = { tag: "alloc", amount: { tag: "wasmint", value: fields.length + 1} }; + const alloc : IR.Expr = { + a: e.a, + tag: "alloc", + amount: { tag: "wasmint", value: fields.length + 1}, + fixed: [false].concat(getFieldType(fields.map(f => f[1][1]))) // first field is a wasmint + }; + //console.log(alloc); const assigns : IR.Stmt[] = fields.map(f => { const [_, [index, value]] = f; return { @@ -716,9 +722,12 @@ function flattenExprToExpr(e : AST.Expr, blocks: Array = { + a: e.a, tag: "alloc", - amount: { tag: "wasmint", value: e.items.length + 2 }, - }; + amount: { tag: "wasmint", value: e.items.length + 2}, + fixed: [true, false], // first field is a bigInt, second is a i32 + rest: e.a.type?.tag === "class" || e.a.type?.tag === "list" || e.a.type?.tag === "none" + } var inits: Array> = []; var stmts: Array> = []; var classes: Array> = []; @@ -835,9 +844,9 @@ function flattenExprToExpr(e : AST.Expr, blocks: Array = new Map(); + +let refNum = 0; // immutable reference number for objects +export let memHeap: Int32Array; +let activeStack: Set[]; // maintains objects created in the local scope +let inactiveRefList: ref[] = []; +let reclaimable: number = 0; + +// clean slate for each run +export function memInit(memory: Int32Array) { + refMap = new Map(); + refNum = 0; + memHeap = memory; + activeStack = [new Set()]; + reclaimable = 0; + inactiveRefList = []; + memory.fill(0); +} + +// generate a reference number for the memory address +export function memGenRef(addr: memAddr): ref { + let r; + if (inactiveRefList.length !== 0) { + r = inactiveRefList.pop(); + } else { + refNum++; + if (refNum > 2147483647) { + throw new MemError("maximum references allocated"); + } + r = refNum; + } + + activeStack[activeStack.length - 1].add(r); + refMap.set(r, addr); + return r; +} + +// get memory address from reference number +export function refLookup(r: ref) : ref { + if (refMap.has(r)) { + return refMap.get(r); + } + console.log(refMap, memHeap); + throw new MemError(`invalid reference: ${r}`) +} + + +// traverse nodes in a BFS manner amking updates to reference counts +export function traverseUpdate(r: ref, assignRef: ref, update: number, fromAssign: number): ref { // returns r so that stack state can be maintained + if (r === 0 || (assignRef !== 0 && memHeap[(refLookup(assignRef) / 4) + refNumOffset] <= 0)) { + return r + } + let explored : Set; + explored = new Set(); + explored.add(assignRef); // assignRef fixes issues for cycles in the ref chain + let travQueue = [r]; + if (update > 0) { + activeStack[activeStack.length - 1].add(r); + } + memHeap[(refLookup(r)/4) + refNumOffset] += update; + while (travQueue.length > 0) { + const curr = travQueue.shift(); + const addr = refLookup(curr) / 4; + if (memHeap[addr + refNumOffset] < 0) { + memHeap[addr + refNumOffset] = 0; + } + if (memHeap[addr + refNumOffset] === 0) { + reclaimable += memHeap[addr + amountOffset] + metadataAmt; + } + explored.add(curr); + + let types = memHeap[addr + typeOffset]; + let size = memHeap[addr + sizeOffset]; + const amt = memHeap[addr + amountOffset]; + + for (let i = 0; i <= size; i++) { + if ((types & (1 << i)) !== 0) { + let temp = memHeap[addr + dataOffset + i]; + if (temp !== 0 && !explored.has(temp)) { // 0 is None + explored.add(temp); + travQueue.push(temp); + if (fromAssign) { + memHeap[(refLookup(temp)/4) + refNumOffset] += update; + } + } + } + } + } + return r +} + +export function compact(): memAddr { + let free: memAddr = heapStart; + + function isGarbage(r: ref): boolean { + const addr = refLookup(r) / 4; + return memHeap[addr + refNumOffset] === 0; + } + function move(fromAddr: memAddr, toAddr: memAddr, amount: number) { + fromAddr /= 4; + toAddr /= 4; + for (let i = 0; i < amount + metadataAmt; i++) { + memHeap[toAddr + i] = memHeap[fromAddr + i]; + } + } + for (const [r, addr] of refMap) { + + if (!isGarbage(r)) { + const amount = memHeap[addr / 4 + amountOffset]; + move(addr, free, amount); + refMap.set(r, free); + free += ((amount + metadataAmt) * 4); + } else { + refMap.delete(r); + inactiveRefList.push(r); + } + } + return free; +} + +export function memReclaim(heap: number, amount: number): memAddr { + const memSize = memHeap.length; + + const memfits = () => (heap/4 + amount + metadataAmt) < memSize; + if (!memfits()) { + heap = compact(); + if (!memfits()) { + throw new MemError("out of memory :("); + } + } else if (reclaimable / 4 > memHeap.length / 2) { + heap = compact(); + + } + + return heap; +} + + +export function addScope() { + activeStack.push(new Set()); +} + +export function removeScope() { + activeStack[activeStack.length - 1].forEach(r => traverseUpdate(r, 0, -1, 1)); + activeStack.pop(); +} + +export function getTypeInfo(fields: Value[]): number { + const binArr : number[] = fields.map(f => { + if (f.tag === "none" || f.tag === "num") { + return 1; + } + return 0; + }); + + if (binArr.length === 0) { + return 0; + } + return parseInt(binArr.reverse().join(""), 2); +} + + + + diff --git a/runner.ts b/runner.ts index cb318412..720c8978 100644 --- a/runner.ts +++ b/runner.ts @@ -12,6 +12,7 @@ import { PyValue, NONE, BOOL, NUM, CLASS, makeWasmFunType } from "./utils"; import { closureName, lowerProgram } from './lower'; import { monomorphizeProgram } from './monomorphizer'; import { optimizeProgram } from './optimization'; +import { memInit } from './memory'; import { wasmErrorImports } from './errors'; export type Config = { @@ -129,7 +130,8 @@ export async function run(source : string, config: Config) : Promise<[Value) { + //debug function for tests + function debugId(id: number, offset: number) { // id should be of type int and the first field in the object + for (const [_, addr] of refMap) { + let n = load_bignum(memHeap[addr/4 + dataOffset + 1], importObject.libmemory.load); + if (n as any == id) { + return memHeap[addr/4 + offset]; + } + } + throw new Error(`no such id: ${id}`); + } + it(name, async () => { + await run(source); + for (const p of pairs) { + chai.expect(debugId(p[0], p[1])).to.eq(p[2]) + } + }); +} +export function assertHeap(name:string, source: string, heap: memAddr) { + it(name, async () => { + await run(source); + chai.expect(compact()).to.eq(heap) + }); +} + + diff --git a/tests/import-object.test.ts b/tests/import-object.test.ts index fd2b9fb1..1663bf91 100644 --- a/tests/import-object.test.ts +++ b/tests/import-object.test.ts @@ -1,5 +1,6 @@ import { readFileSync } from "fs"; import { binop_bignum, binop_comp_bignum, builtin_bignum, load_bignum, des_check, bignum_to_i32 } from "../utils"; +import * as memMgmt from "../memory"; import { bigMath } from "../utils"; import { importObjectErrors } from "../errors"; @@ -33,8 +34,8 @@ function print(typ: Type, arg: any, loader: WebAssembly.ExportValue): any { export async function addLibs() { const bytes = readFileSync("build/memory.wasm"); const memory = new WebAssembly.Memory({initial:10, maximum:100}); - const memoryModule = await WebAssembly.instantiate(bytes, { js: { mem: memory } }) - importObject.libmemory = memoryModule.instance.exports, + const memoryModule = await WebAssembly.instantiate(bytes, { js: { mem: memory }, libmemory: {memGenRef: memMgmt.memGenRef, memReclaim: memMgmt.memReclaim} }) + importObject.libmemory = {...memoryModule.instance.exports, ...memMgmt}, importObject.memory_values = memory; importObject.js = {memory}; return importObject; diff --git a/tests/memory.test.ts b/tests/memory.test.ts new file mode 100644 index 00000000..e62e6bfd --- /dev/null +++ b/tests/memory.test.ts @@ -0,0 +1,357 @@ +import { dataOffset, heapStart, memHeap, refMap, refNumOffset, sizeOffset, typeOffset } from "../memory"; +import { load_bignum } from "../utils"; +import { assertMemState, assertHeap } from "./asserts.test"; +import { importObject } from "./import-object.test"; + +describe("Memory tests", () => { +// NOTE: all tests other than refNumOffset are commented because with additional metadata from groups +// these values are somewhat tedious to figure out +assertMemState("classes-from-object", ` + class Rat(object): + id: int = 123 + y: int = 0 + def __init__(self: Rat): + self.y = 1 + + x: Rat = None + x = Rat() + `, [ + // first value in the tuple denotes id, NOTE: this is a hack since we dont have access to object names + [123, refNumOffset, 1], // 1 reference at the end of the program where object id is 123 + //[123, dataOffset + 1, 1], // x.y = 1 + //[123, sizeOffset, 2], // size is stored in 4-byte units + //[123, typeOffset, 0] +]); // all types are values or non-references + +assertMemState("multiple-references", ` + class Rat(object): + id: int = 123 + y: int = 0 + def __init__(self: Rat): + self.y = 1 + x: Rat = None + y: Rat = None + z: Rat = None + x = Rat() + y = x + z = y + `, [ + // first value in the tuple denotes id, NOTE: this is a hack since we dont have access to object names + [123, refNumOffset, 3], // 3 references at the end of the program where object id is 123 + //[123, dataOffset + 1, 1], // x.y = 1 + //[123, sizeOffset, 2], // size is stored in 4-byte units + //[123, typeOffset, 0] +]); // all types are values or non-references + +assertMemState("removing-references", ` + class Rat(object): + id: int = 123 + y: int = 0 + def __init__(self: Rat): + self.y = 1 + x: Rat = None + y: Rat = None + x = Rat() + y = x + y = None + `, [ + // first value in the tuple denotes id, NOTE: this is a hack since we dont have access to object names + [123, refNumOffset, 1], // 1 reference at the end of the program where object id is 123 + //[123, dataOffset + 1, 1], // x.y = 1 + //[123, sizeOffset, 2], // size is stored in 4-byte units + //[123, typeOffset, 0] +]); // all types are values or non-references + +assertMemState("remove-references-out-of-scope", ` + class Rat(object): + id: int = 123 + y: int = 0 + def __init__(self: Rat): + self.y = 1 + def someFunc(self: Rat): + r: Rat = None + r = self + r.y = 100 + + x: Rat = None + y: Rat = None + x = Rat() + x.someFunc() + `, [ + // first value in the tuple denotes id, NOTE: this is a hack since we dont have access to object names + [123, refNumOffset, 1], // 1 reference at the end of the program where object id is 123 + //[123, dataOffset + 1, 100], // x.y = 100 + //[123, sizeOffset, 2], // size is stored in 4-byte units + //[123, typeOffset, 0] +]); // all types are values or non-references + +assertMemState("created-in-non-local-scope", ` + class Rat(object): + id: int = 123 + y: int = 0 + def __init__(self: Rat): + self.y = 1 + + def someFunc() -> Rat: + r: Rat = None + r = Rat() + r.y = 100 + return r + + x: Rat = None + x = someFunc() + `, [ + // first value in the tuple denotes id, NOTE: this is a hack since we dont have access to object names + [123, refNumOffset, 1], // 1 reference at the end of the program where object id is 123 + //[123, dataOffset + 1, 100], // x.y = 100 + //[123, sizeOffset, 2], // size is stored in 4-byte units + //[123, typeOffset, 0] +]); // all types are values or non-references + +assertMemState("access-not-assignment", ` + class Rat(object): + id: int = 123 + y: int = 0 + def __init__(self: Rat): + self.y = 1 + + x: Rat = None + x = Rat() + x.y + print(x.y) + `, [ + // first value in the tuple denotes id, NOTE: this is a hack since we dont have access to object names + [123, refNumOffset, 1], // 1 reference at the end of the program where object id is 123 + //[123, dataOffset + 1, 1], // x.y = 100 + //[123, sizeOffset, 2], // size is stored in 4-byte units + //[123, typeOffset, 0] +]); // all types are values or non-references + +assertMemState("objects-as-fields", ` + class Link(object): + id: int = 0 + next: Link = None + def __init__(self: Link): + self.id = 123 + def add(l: Link, val: int) -> Link: + m: Link = None + m = Link() + m.id = val + l.next = m + return m + + x: Link = None + y: Link = None + x = Link() + y = x.add(456) + `, [ + // first value in the tuple denotes id, NOTE: this is a hack since we dont have access to object names + [123, refNumOffset, 1], // 1 reference at the end of the program where object id is 123 + [456, refNumOffset, 2], // 2 reference at the end of the program where object id is 456 + //[123, typeOffset, 2] // first field is a value, the next is a reference + ]); + +assertMemState("anon-object-deletion", ` + class Link(object): + id: int = 123 + next: Link = None + + def add(l: Link) -> Link: + l.next = Link() + l.id = 456 + return l.next + + x: Link = None + x = Link() + x.add() + x = None + `, [ + // first value in the tuple denotes id, NOTE: this is a hack since we dont have access to object names + [123, refNumOffset, 0], // 0 references at the end of the program where object id is 123 + [456, refNumOffset, 0], // 0 references at the end of the program where object id is 456 + ]); + +assertMemState("simple-cycle", ` + class Link(object): + id: int = 0 + next: Link = None + + x: Link = None + y: Link = None + x = Link() + x.id = 123 + y = Link() + y.id = 456 + x.next = y + y.next = x + `, [ + // first value in the tuple denotes id, NOTE: this is a hack since we dont have access to object names + [123, refNumOffset, 2], // 2 references at the end of the program where object id is 123 + [456, refNumOffset, 2], // 2 references at the end of the program where object id is 456 + ]); + +assertMemState("simple-cycle-deletion", ` + class Link(object): + id: int = 0 + next: Link = None + + x: Link = None + y: Link = None + x = Link() + x.id = 123 + y = Link() + y.id = 456 + x.next = y + y.next = x + + x = None + `, [ + // first value in the tuple denotes id, NOTE: this is a hack since we dont have access to object names + [123, refNumOffset, 1], // 1 references at the end of the program where object id is 123 + [456, refNumOffset, 1], // 1 references at the end of the program where object id is 456 + ]); + +assertMemState("less-simple-cycle", ` + class Link(object): + id: int = 0 + next: Link = None + prev: Link = None + + x: Link = None + y: Link = None + z: Link = None + x = Link() + x.id = 123 + y = Link() + y.id = 456 + z = Link() + z.id = 789 + x.next = y + y.next = z + z.next = x + x.prev = z + y.prev = x + z.prev = y + `, [ + // first value in the tuple denotes id, NOTE: this is a hack since we dont have access to object names + [123, refNumOffset, 3], // 3 references in the program where object id is 123 + [456, refNumOffset, 3], // 3 references in the program where object id is 456 + [789, refNumOffset, 3], // 3 references in the program where object id is 789 + ]); // all types are values or non-references + +assertMemState("less-simple-cycle-deletion", ` + class Link(object): + id: int = 0 + next: Link = None + prev: Link = None + + x: Link = None + y: Link = None + z: Link = None + x = Link() + x.id = 123 + y = Link() + y.id = 456 + z = Link() + z.id = 789 + x.next = y + y.next = z + z.next = x + x.prev = z + y.prev = x + z.prev = y + + y = None + `, [ + // first value in the tuple denotes id, NOTE: this is a hack since we dont have access to object names + [123, refNumOffset, 2], // 2 references in the program where object id is 123 + [456, refNumOffset, 2], // 2 references in the program where object id is 456 + [789, refNumOffset, 2], // 2 references in the program where object id is 789 + ]); // all types are values or non-references + +assertMemState("less-simple-cycle-complete-deletion", ` + class Link(object): + id: int = 0 + next: Link = None + prev: Link = None + + x: Link = None + y: Link = None + z: Link = None + x = Link() + x.id = 123 + y = Link() + y.id = 456 + z = Link() + z.id = 789 + x.next = y + y.next = z + z.next = x + x.prev = z + y.prev = x + z.prev = y + + y = None + x.next = None + z.prev = None + `, [ + // first value in the tuple denotes id, NOTE: this is a hack since we dont have access to object names + [123, refNumOffset, 2], // 2 references in the program where object id is 123 + [456, refNumOffset, 0], // 0 references in the program where object id is 456 + [789, refNumOffset, 2], // 2 references in the program where object id is 789 + ]); // all types are values or non-references + + + assertHeap("single-delete", ` + class Rat(object): + id: int = 123 + y: int = 0 + def __init__(self: Rat): + self.y = 1 + x: Rat = None + x = Rat() + x = None +`, heapStart); + + assertHeap("delete-in-a-loop", ` + class Rat(object): + id: int = 123 + y: int = 0 + def __init__(self: Rat): + self.y = 1 + n: int = 1124 + a: Rat = None + a = Rat() + while n >= 0: + a = Rat() + n = n - 1 + a = None + n = 0 + `, heapStart) // 2 ints in the object, each is 4 byte + + assertHeap("list-delete", ` + a : [int] = None + a = [1,2,3,4] + a = None +`, heapStart); + +assertHeap("gc-out-of-scope", ` + class Rat(object): + id: int = 123 + y: int = 456 + def __init__(self: Rat): + self.y = 1 + def someFunc(self: Rat): + r: Rat = None + r = Rat() + + x: Rat = None + y: Rat = None + x = Rat() + x.someFunc() + x = None +`, heapStart); // Expect amount of memory allocated for this code to be 2 ints + 4 metadata blocks + +}); + + diff --git a/utils.ts b/utils.ts index 422b016f..094b007e 100644 --- a/utils.ts +++ b/utils.ts @@ -1,4 +1,6 @@ import { Value, Type, Annotation, Literal } from "./ast"; +import { Value as IR_Value } from "./ir"; +import { metadataAmt, refMap } from "./memory"; export const bigMath = { // https://stackoverflow.com/a/64953280 @@ -97,11 +99,11 @@ export function load_bignum(addr: number, loader: WebAssembly.ExportValue): bigi const load = loader as CallableFunction; if (addr === 0) return BigInt(0); - const numlength = load(addr, 0); + const numlength = load(refMap.get(addr), 0); var bignum : bigint = BigInt(0); for (let i = Math.abs(numlength); i > 0; i--) { bignum <<= BigInt(31); - bignum += BigInt(load(addr, i) & 0x7fffffff); // mask number to 2^31 + bignum += BigInt(load(refMap.get(addr), i) & 0x7fffffff); // mask number to 2^31 } if (numlength < 0) bignum *= BigInt(-1); @@ -111,14 +113,14 @@ export function load_bignum(addr: number, loader: WebAssembly.ExportValue): bigi export function alloc_bignum(numlength: number, allocator: WebAssembly.ExportValue): number { const alloc = allocator as CallableFunction; // allocate one extra space for metadata (length) - return alloc(Math.abs(numlength)+1); + return alloc(Math.abs(numlength)+1, 0, Math.abs(numlength)+1); } export function store_bignum(addr: number, numlength: number, digits: number[], storer: WebAssembly.ExportValue) { const store = storer as CallableFunction; - store(addr, 0, numlength); + store(refMap.get(addr), 0, numlength); digits.forEach((d, i) => { - store(addr, i+1, d); + store(refMap.get(addr), i+1, d); }); } @@ -205,3 +207,17 @@ export function createMethodName(cls: string, method: string): string{ export function makeWasmFunType(paramNum: number): string { return `$callable${paramNum}param`; } + +export function getFieldType(fields: IR_Value[]): boolean[] { + const boolArr : boolean[] = fields.map(f => { + if (f.tag === "none" || f.tag == "num") { + return true; + } + return false; + }); + + if (boolArr.length === 0) { + return [false]; + } + return boolArr; +} \ No newline at end of file diff --git a/webstart.ts b/webstart.ts index b0043e3c..c067ad7e 100644 --- a/webstart.ts +++ b/webstart.ts @@ -2,6 +2,7 @@ import {BasicREPL} from './repl'; import { Type, Value, Annotation, Class } from './ast'; import { defaultTypeEnv, TypeCheckError } from './type-check'; import { NUM, BOOL, NONE, load_bignum, builtin_bignum, binop_bignum, binop_comp_bignum, bigMath, des_check, bignum_to_i32 } from './utils'; +import * as memMgmt from './memory'; import { importObjectErrors } from './errors'; import CodeMirror from 'codemirror'; @@ -155,8 +156,8 @@ function webStart() { const memory = new WebAssembly.Memory({ initial: 10, maximum: 100 }); const memoryModule = await fetch('memory.wasm').then(response => response.arrayBuffer() - ).then(bytes => - WebAssembly.instantiate(bytes, { js: { mem: memory } }) + ).then(bytes => + WebAssembly.instantiate(bytes, { js: { mem: memory }, libmemory: {memGenRef: memMgmt.memGenRef, memReclaim: memMgmt.memReclaim} }) ); function initCodeMirror() { @@ -254,8 +255,8 @@ function webStart() { $gt: (arg1: number, arg2: number) => binop_comp_bignum([arg1, arg2], bigMath.gt, memoryModule.instance.exports), $bignum_to_i32: (arg: number) => bignum_to_i32(arg, loader), }, + libmemory: {...memoryModule.instance.exports, ...memMgmt}, errors: importObjectErrors, - libmemory: memoryModule.instance.exports, memory_values: memory, js: { memory: memory } };