Skip to content

Commit

Permalink
Initial pass at jawk. A surprising amount of functionality is working\!
Browse files Browse the repository at this point in the history
  • Loading branch information
Ethan Brown committed Jun 6, 2013
1 parent 8ba3698 commit 6fc4712
Show file tree
Hide file tree
Showing 8 changed files with 872 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,5 @@ logs
results

npm-debug.log

node_modules
674 changes: 674 additions & 0 deletions LICENSE

Large diffs are not rendered by default.

11 changes: 11 additions & 0 deletions example/BBS-list
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
aardvark 555-5553 1200/300 B
alpo-net 555-3412 2400/1200/300 A
barfly 555-7685 1200/300 A
bites 555-1675 2400/1200/300 A
camelot 555-0542 300 C
core 555-2912 1200/300 C
fooey 555-1234 2400/1200/300 B
foot 555-6699 1200/300 B
macfoo 555-6480 1200/300 A
sdace 555-3430 2400/1200/300 A
sabafoo 555-2127 1200/300 C
17 changes: 17 additions & 0 deletions example/inventory-shipped
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
Jan 13 25 15 115
Feb 15 32 24 226
Mar 15 24 34 228
Apr 31 52 63 420
May 16 34 29 208
Jun 31 42 75 492
Jul 24 34 67 436
Aug 15 34 47 316
Sep 13 55 37 277
Oct 29 54 68 525
Nov 20 87 82 577
Dec 17 35 61 401

Jan 21 36 64 620
Feb 26 58 80 652
Mar 24 75 70 495
Apr 21 70 74 514
5 changes: 5 additions & 0 deletions example/test.jawk
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
BEGIN { var barfl = 0; foo = 0; print('starting up!'); }
/barfl/ { print($0 + ' (barfl=' + (++barfl) + ')') } # this does a bunch of nothing!
/foo/ { print($0 + ' (foo=' + (++foo) + ')') }
{ print( $0 +' ('+$1+','+$2+','+$3+','+$4+','+$5+','+$6+')' ) }
END { print( 'total barfl: ' + barfl + '\ntotal foo: ' + foo ) }
96 changes: 96 additions & 0 deletions jawk.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
var fs = require('fs'),
vm = require('vm'),
readlines = require('./lib/readlines').readlines;

var opts = require('nomnom')
.option( 'file', {
abbr: 'f',
flag: false,
help: 'Script file'
})
.option( 'verbose', {
abbr: 'v',
flag: true,
help: 'Verbose output'
})
.parse();

var program;
var inputFname;
var inputEncoding = 'utf8';

if( opts.file ) {
program = fs.readFileSync( opts.file, 'utf8' ).replace(/\r/g,'');
// TODO: handle case of console input
inputFname = opts[0];
} else {
program = opts[0].replace(/\r/g,'');
// TODO: handle case of console input
inputFname = opts[1];
}

// note that for functions below, this!=jawkContext, so we have to use jawkContext explicitly. this is a little
// fragile, but it works, so i'm leaving it for now....
var jawkContext = vm.createContext({
RS: '\\s+',
NR: 0,
print: function(s) { console.log( s===undefined ? jawkContext.$0 : s ); },
length: function(x) { return x===undefined ? jawkContext.$0.length : x.length; }
});

var begin, end,
rules = [];

var lineRegex = /^(.*?)\s*(\{.*?\})?\s*(#.*)?$/;

var programLineNumber = 0;
program.split('\n').forEach( function(line) {
programLineNumber++;
var m = line.match( lineRegex );
if( !m ) return;
// note that rules can consist only of a pattern (with no action; the action defaults to printing the whole input line)
if( m[2]===undefined ) m[2] = 'print($0)';
// construct the rule; note that we remove the brackets from the action
var rule = { line: line, lineNumber: programLineNumber, pattern: m[1], action: m[2].replace(/^\{(.*)\}$/,'$1').trim(), comment: m[3] };
if( rule.pattern.trim() === 'BEGIN' ) {
begin = rule;
} else if( rule.pattern.trim() === 'END' ) {
end = rule;
} else {
// the rule may already have slashes around it, so we remove those
rule.regex = new RegExp( rule.pattern.replace(/^\/?(.*?)\/?$/,'$1') );
rules.push( rule );
}
});

function executeAction( rule, context ) {
try {
vm.runInContext( rule.action, context );
} catch( ex ) {
console.log( 'invalid jawk rule:' );
console.log( rule.lineNumber + ': ' + rule.line );
if( opts.verbose ) console.log( ex );
process.exit( 1 );
}
}

if( begin ) executeAction( begin, jawkContext );

readlines( inputFname, 'utf8', function(line) {
jawkContext.NR++;
for( var i=0; i<rules.length; i++ ) {
if( line.match( rules[i].regex ) ) {
jawkContext.$0 = line;
// build record fields
var fields = line.split( new RegExp( jawkContext.RS ) );
jawkContext.NR = fields.length;
var j;
for( j=0; j<fields.length; j++ ) jawkContext['$'+(j+1)] = fields[j];
for( j=fields.length; j<1000; j++ ) jawkContext['$'+(j+1)] = '';
executeAction( rules[i], jawkContext );
continue; // once we've found a matching rule, we're done; move to the next input line
}
}
});

if( end ) executeAction( end, jawkContext );
42 changes: 42 additions & 0 deletions lib/readlines.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
var fs = require('fs');

exports.readlines = function( inputFname, inputEncoding, processLine ) {

try {
var fd = fs.openSync( inputFname, 'r' );

var bufSize = 4096,
buf = new Buffer( bufSize ),
bufStr,
remainder = '',
newlineIdx,
bytesRead = 0;

do {
bytesRead = fs.readSync( fd, buf, 0, bufSize );
// get a string representation of the buffer, and remove any pesky CRs (lines in Windows files end in CRLF, not just LF)
bufStr = buf.toString(inputEncoding,0,bytesRead).replace(/\r/g,'');
// we split on LFs; if there are any full lines contained within the buffer, they will be processed
var lines = bufStr.split('\n');
if( lines.length > 1 ) {
// process first line (which starts with the last buffer's remainder)
processLine( remainder + lines[0] );
remainder = '';
// process everything but the last line (which will become the remainder)...
for( var i=1; i<lines.length-1; i++ ) {
// process line
processLine( lines[i] );
}
}
// the last line becomes the remainder
remainder += lines[lines.length-1];
} while( bytesRead > 0 );
// if the line isn't terminated with a line ending, there will be a remainder
if( remainder.length ) processLine( remainder );

} finally {
if( fd ) fs.close( fd );
}

}

25 changes: 25 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"name": "jawk",
"description": "Interpreter for jawk, an awk-like data proccessing language with JavaScript syntax.",
"author": "Ethan Brown <[email protected]>",
"contributors": [
"Ethan Brown <[email protected]>"
],
"version": "0.0.1beta",
"license": "GPL",
"dependencies": {
"nomnom": "~1.6"
}
"keywords": [
"awk",
"data-driven",
"interpreter",
"language"
],
"repository": "https://github.com/Jammerwoch/jawk.git",
"main": "jawk",
"bin" : {
"jawk" : "./jawk.js"
}
}

0 comments on commit 6fc4712

Please sign in to comment.