-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial pass at jawk. A surprising amount of functionality is working\!
- Loading branch information
Ethan Brown
committed
Jun 6, 2013
1 parent
8ba3698
commit 6fc4712
Showing
8 changed files
with
872 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,3 +12,5 @@ logs | |
results | ||
|
||
npm-debug.log | ||
|
||
node_modules |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
aardvark 555-5553 1200/300 B | ||
alpo-net 555-3412 2400/1200/300 A | ||
barfly 555-7685 1200/300 A | ||
bites 555-1675 2400/1200/300 A | ||
camelot 555-0542 300 C | ||
core 555-2912 1200/300 C | ||
fooey 555-1234 2400/1200/300 B | ||
foot 555-6699 1200/300 B | ||
macfoo 555-6480 1200/300 A | ||
sdace 555-3430 2400/1200/300 A | ||
sabafoo 555-2127 1200/300 C |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
Jan 13 25 15 115 | ||
Feb 15 32 24 226 | ||
Mar 15 24 34 228 | ||
Apr 31 52 63 420 | ||
May 16 34 29 208 | ||
Jun 31 42 75 492 | ||
Jul 24 34 67 436 | ||
Aug 15 34 47 316 | ||
Sep 13 55 37 277 | ||
Oct 29 54 68 525 | ||
Nov 20 87 82 577 | ||
Dec 17 35 61 401 | ||
|
||
Jan 21 36 64 620 | ||
Feb 26 58 80 652 | ||
Mar 24 75 70 495 | ||
Apr 21 70 74 514 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
BEGIN { var barfl = 0; foo = 0; print('starting up!'); } | ||
/barfl/ { print($0 + ' (barfl=' + (++barfl) + ')') } # this does a bunch of nothing! | ||
/foo/ { print($0 + ' (foo=' + (++foo) + ')') } | ||
{ print( $0 +' ('+$1+','+$2+','+$3+','+$4+','+$5+','+$6+')' ) } | ||
END { print( 'total barfl: ' + barfl + '\ntotal foo: ' + foo ) } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
var fs = require('fs'), | ||
vm = require('vm'), | ||
readlines = require('./lib/readlines').readlines; | ||
|
||
var opts = require('nomnom') | ||
.option( 'file', { | ||
abbr: 'f', | ||
flag: false, | ||
help: 'Script file' | ||
}) | ||
.option( 'verbose', { | ||
abbr: 'v', | ||
flag: true, | ||
help: 'Verbose output' | ||
}) | ||
.parse(); | ||
|
||
var program; | ||
var inputFname; | ||
var inputEncoding = 'utf8'; | ||
|
||
if( opts.file ) { | ||
program = fs.readFileSync( opts.file, 'utf8' ).replace(/\r/g,''); | ||
// TODO: handle case of console input | ||
inputFname = opts[0]; | ||
} else { | ||
program = opts[0].replace(/\r/g,''); | ||
// TODO: handle case of console input | ||
inputFname = opts[1]; | ||
} | ||
|
||
// note that for functions below, this!=jawkContext, so we have to use jawkContext explicitly. this is a little | ||
// fragile, but it works, so i'm leaving it for now.... | ||
var jawkContext = vm.createContext({ | ||
RS: '\\s+', | ||
NR: 0, | ||
print: function(s) { console.log( s===undefined ? jawkContext.$0 : s ); }, | ||
length: function(x) { return x===undefined ? jawkContext.$0.length : x.length; } | ||
}); | ||
|
||
var begin, end, | ||
rules = []; | ||
|
||
var lineRegex = /^(.*?)\s*(\{.*?\})?\s*(#.*)?$/; | ||
|
||
var programLineNumber = 0; | ||
program.split('\n').forEach( function(line) { | ||
programLineNumber++; | ||
var m = line.match( lineRegex ); | ||
if( !m ) return; | ||
// note that rules can consist only of a pattern (with no action; the action defaults to printing the whole input line) | ||
if( m[2]===undefined ) m[2] = 'print($0)'; | ||
// construct the rule; note that we remove the brackets from the action | ||
var rule = { line: line, lineNumber: programLineNumber, pattern: m[1], action: m[2].replace(/^\{(.*)\}$/,'$1').trim(), comment: m[3] }; | ||
if( rule.pattern.trim() === 'BEGIN' ) { | ||
begin = rule; | ||
} else if( rule.pattern.trim() === 'END' ) { | ||
end = rule; | ||
} else { | ||
// the rule may already have slashes around it, so we remove those | ||
rule.regex = new RegExp( rule.pattern.replace(/^\/?(.*?)\/?$/,'$1') ); | ||
rules.push( rule ); | ||
} | ||
}); | ||
|
||
function executeAction( rule, context ) { | ||
try { | ||
vm.runInContext( rule.action, context ); | ||
} catch( ex ) { | ||
console.log( 'invalid jawk rule:' ); | ||
console.log( rule.lineNumber + ': ' + rule.line ); | ||
if( opts.verbose ) console.log( ex ); | ||
process.exit( 1 ); | ||
} | ||
} | ||
|
||
if( begin ) executeAction( begin, jawkContext ); | ||
|
||
readlines( inputFname, 'utf8', function(line) { | ||
jawkContext.NR++; | ||
for( var i=0; i<rules.length; i++ ) { | ||
if( line.match( rules[i].regex ) ) { | ||
jawkContext.$0 = line; | ||
// build record fields | ||
var fields = line.split( new RegExp( jawkContext.RS ) ); | ||
jawkContext.NR = fields.length; | ||
var j; | ||
for( j=0; j<fields.length; j++ ) jawkContext['$'+(j+1)] = fields[j]; | ||
for( j=fields.length; j<1000; j++ ) jawkContext['$'+(j+1)] = ''; | ||
executeAction( rules[i], jawkContext ); | ||
continue; // once we've found a matching rule, we're done; move to the next input line | ||
} | ||
} | ||
}); | ||
|
||
if( end ) executeAction( end, jawkContext ); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
var fs = require('fs'); | ||
|
||
exports.readlines = function( inputFname, inputEncoding, processLine ) { | ||
|
||
try { | ||
var fd = fs.openSync( inputFname, 'r' ); | ||
|
||
var bufSize = 4096, | ||
buf = new Buffer( bufSize ), | ||
bufStr, | ||
remainder = '', | ||
newlineIdx, | ||
bytesRead = 0; | ||
|
||
do { | ||
bytesRead = fs.readSync( fd, buf, 0, bufSize ); | ||
// get a string representation of the buffer, and remove any pesky CRs (lines in Windows files end in CRLF, not just LF) | ||
bufStr = buf.toString(inputEncoding,0,bytesRead).replace(/\r/g,''); | ||
// we split on LFs; if there are any full lines contained within the buffer, they will be processed | ||
var lines = bufStr.split('\n'); | ||
if( lines.length > 1 ) { | ||
// process first line (which starts with the last buffer's remainder) | ||
processLine( remainder + lines[0] ); | ||
remainder = ''; | ||
// process everything but the last line (which will become the remainder)... | ||
for( var i=1; i<lines.length-1; i++ ) { | ||
// process line | ||
processLine( lines[i] ); | ||
} | ||
} | ||
// the last line becomes the remainder | ||
remainder += lines[lines.length-1]; | ||
} while( bytesRead > 0 ); | ||
// if the line isn't terminated with a line ending, there will be a remainder | ||
if( remainder.length ) processLine( remainder ); | ||
|
||
} finally { | ||
if( fd ) fs.close( fd ); | ||
} | ||
|
||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
{ | ||
"name": "jawk", | ||
"description": "Interpreter for jawk, an awk-like data proccessing language with JavaScript syntax.", | ||
"author": "Ethan Brown <[email protected]>", | ||
"contributors": [ | ||
"Ethan Brown <[email protected]>" | ||
], | ||
"version": "0.0.1beta", | ||
"license": "GPL", | ||
"dependencies": { | ||
"nomnom": "~1.6" | ||
} | ||
"keywords": [ | ||
"awk", | ||
"data-driven", | ||
"interpreter", | ||
"language" | ||
], | ||
"repository": "https://github.com/Jammerwoch/jawk.git", | ||
"main": "jawk", | ||
"bin" : { | ||
"jawk" : "./jawk.js" | ||
} | ||
} | ||
|