-
Notifications
You must be signed in to change notification settings - Fork 1
/
harnocode.js
323 lines (272 loc) · 9.53 KB
/
harnocode.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
const esprima = require("esprima");
const escodegen = require("escodegen");
const fs = require("fs");
/**
* Format JavaScript code as an ASCII-art image.
*
* @param {String} code JavaScript code to format
* @param {String} mask Image mask containing " " and "x" chars
* @param {object} options Additional options
*
* Options:
*
* splitStrings: If true, long string literals could be splitted into
* concatenation of substrings for nicer image.
* For example, "hello world" could be split into
* "hello" + " world". This lets code fit
* the image more nicely. However, the resulting
* AST will be different from the original, so there
* will be no way to validate that the resulting
* code matches original.
*
*
* @return {String} formatted code
*/
exports.harnocode = function (code, mask, options) {
options = options || {};
// Regenerating code from AST will inserting omitted semicolons.
// We need it for safer manipulations later on
const format = {
safeConcatenation: true,
escapeless: true,
semicolons: true,
renumber: true,
quotes: "single",
}
code = escodegen.generate(esprima.parse(code), {format: format});
const tokens = tokenize(code);
const lines = splitMaskToGroups(mask);
let tokenIndex = 0;
let groupIndex = 0;
let result = [];
let splitStrings = (options.splitStrings === undefined) ? false : options.splitStrings;
let safe = (options.safe === undefined) ? true : options.safe;
function processMask() {
return lines.map(groups => {
let lineResult = [];
let offset = 0;
if (tokenIndex >= tokens.length)
return;
groups.forEach((group, i) => {
let groupWidth = group[0].length;
let isBeforeNewline = (i == groups.length-1);
if (group.index < offset) {
// Happens when we overflow preceeding groups
// In this case we should shrink current group
// or skip it entirely
groupWidth -= (offset - group.index);
if (groupWidth <= 0)
return;
}
let groupTokens = takeTokens(tokens, groupWidth, tokenIndex,
{isBeforeNewline, safe, splitStrings});
// TODO: remove dummy space tokens
let groupTokensJustified = justify(groupTokens, groupWidth);
let padding = " ".repeat(Math.max(0, group.index - offset));
offset += padding.length + groupTokensJustified.length;
tokenIndex += groupTokens.length;
lineResult.push(padding);
lineResult.push(groupTokensJustified);
});
return lineResult.join("");
});
}
do {
result = result.concat(processMask());
} while (tokenIndex < tokens.length && options.repeat);
// Add remaining tokens if mask is shorter than code
let remainder = tokens.slice(tokenIndex).join("");
result.push(remainder);
let resultStr = result.join("\n").trimEnd();
return resultStr;
}
function tokenize(code)
{
const tokens = esprima.tokenize(code);
let result = [];
const isPunct = (token) => token.type == "Punctuator";
const isString = (token) => token.type == "String";
for (var i = 0; i < tokens.length - 1; i++) {
let t1 = tokens[i];
let t2 = tokens[i + 1];
result.push(t1.value);
if (needSpace(t1, t2))
result.push(" ");
}
if (tokens.length > 0)
result.push(tokens.at(-1).value);
return result;
}
/**
* Splits string literal in concatenation of two literals.
*/
function splitStringLiteral(str, size, options)
{
options = options || {};
size = size || 5;
if (str.length < size + 3)
return [str];
// Make sure we don't split a string in the middle of an escapement sequence
// This implementation is naive: assume that the longest escapement is of
// form `\u1234` and always skip 5 chars after `\`
while (str.slice(size+1-5, size +1).includes('\\') && size < str.length) {
size++;
}
let quote = str[0];
str = str.slice(1, -1); // remove quotes
let result = [
quote + str.slice(0, size) + quote,
'+',
quote + str.slice(size) + quote,
]
if (options.safe) {
// TODO: do not add nested parenthesis
result.unshift('(');
result.push(')')
}
return result;
}
function needSpace(token1, token2)
{
// we don't need space between non-punct and punct
// we need space in all other cases
/**
let noSpace = ((isPunct(t1) && !isPunct(t2)) ||
(isPunct(t2) && !isPunct(t1)));
let needSpace = ((isPunct(t1) && isPunct(t2)) ||
(!isPunct(t1) && !isPunct(t2)));
**/
// This is an easy (but slow) way to ensure we don't break tokenization
// by not inserting space between tokens.
// If it's too slow, replace with a bunch of rules like in harnocode v1
try {
let tokens = esprima.tokenize(token1.value + token2.value);
// TODO: tokenization should not change tokens
if (tokens.length != 2) return true;
if (tokens[0].value != token1.value) return true;
if (tokens[1].value != token2.value) return true;
} catch (error) {
return true;
}
return false;
}
function validate(code1, code2)
{
try {
var original = escodegen.generate(esprima.parse(code1));
var harnified = escodegen.generate(esprima.parse(code2));
} catch (error) {
process.stderr.write(`Error parsing harnified code: \n ${error}`);
return false;
}
if (original != harnified) {
process.stderr.write(`Broken AST!`);
const LineDiff = require("line-diff");
process.stderr.write(new LineDiff(original, harnified).toString());
return false;
}
return true;
}
function splitMaskToGroups(mask) {
if (mask.trim().length == 0)
return [];
return mask.split("\n").map(splitLineToGroups);
}
function splitLineToGroups(line) {
return [...line.matchAll(/x+/g)];
}
function takeTokens(tokens, groupLength, tokenIndex, options)
{
options = options || {};
let toTake = [];
let toTakeLength = 0;
let specialTokens = ["var", "do", "while", "continue", "break", "return", "throw"];
let cantStartWith = ["++", "--", "=>"]; // hacky
const isStringLiteral = (s) => (s.startsWith("'") && s.endsWith("'"));
const isLongStringLiteral = (s) => isStringLiteral(s) && s.length >= 10;
while (tokenIndex < tokens.length) {
let token = tokens[tokenIndex];
let canTake = (toTakeLength + token.length) <= groupLength;
if (toTakeLength == 0)
canTake = true;
if (toTakeLength == 1 && toTake[0] == ' ')
canTake = true;
let toTakeNext = canTake? 1 : 0;
if (options.isBeforeNewline && !canTake) {
// prevent inserting newline after special tokens as it may affect AST
if (specialTokens.includes(toTake.at(-1)))
toTakeNext = 1;
if (toTake.at(-1) == ' ' && specialTokens.includes(toTake.at(-2)))
toTakeNext = 1;
// Can't start newline with the following tokens (TODO more? refactor!)
if (cantStartWith.includes(tokens.at(tokenIndex + 0)))
toTakeNext = 1;
if (cantStartWith.includes(tokens.at(tokenIndex + 1)))
toTakeNext = 2;
if (cantStartWith.includes(tokens.at(tokenIndex + 2)))
toTakeNext = 3;
}
// If we haven't reach the desired group width yet
// and we're about to discard a long string literal,
// don't do it. Take the string literal and split it later.
if (!toTakeNext && options.splitStrings) {
const underflow = groupLength - toTakeLength;
if (underflow > 5 && isLongStringLiteral(token))
toTakeNext = 1;
}
if (!toTakeNext)
break;
while (toTakeNext) {
toTake.push(tokens[tokenIndex]);
toTakeLength += tokens[tokenIndex].length;
tokenIndex += 1;
toTakeNext -= 1;
}
}
// If the group ends with a long dangling string literal, split it
const lastToken = toTake.at(-1);
const overflow = toTakeLength - groupLength;
if (options.splitStrings && overflow > 5 && isStringLiteral(lastToken) && lastToken.length > 5) {
let splitSize = lastToken.length - overflow - 1 - 2; // account for quotes and '('
splitSize = Math.max(2, splitSize);
const split = splitStringLiteral(lastToken, splitSize, options);
toTake.splice(-1, 1, ...split.slice(0, 2));
tokens.splice(tokenIndex - 1, 1, ...split);
}
return toTake;
}
function justify(tokens, width) {
if (tokens.length == 0)
return " ".repeat(width);
let tokensWidth = tokens.reduce((a, b) => a + b.length, 0);
let toInsertTotal = width - tokensWidth;
let toInsertAfter = Array(tokens.length - 1).fill(0);
let insertedActual = 0;
for (let i = 0; i < toInsertAfter.length; i++) {
let expectedRunningInserted = Math.round(toInsertTotal * (i / toInsertAfter.length));
toInsertAfter[i] = Math.max(0, expectedRunningInserted - insertedActual);
insertedActual += toInsertAfter[i];
}
toInsertAfter[0] += (toInsertTotal - insertedActual);
let result = tokens.map((token, i) => token + " ".repeat(Math.max(0, toInsertAfter[i])));
return result.join("");
}
exports.formatFile = function (path, mask, options) {
var program = fs.readFileSync(path).toString();
var rest = "";
if (options.bryntum) {
options['skip-validation'] = true;
let index = program.indexOf(';');
rest = program.slice(index);
program = program.slice(0, index);
}
let result = exports.harnocode(program, mask, options);
if (!options['skip-validation'])
if (!validate(program, result))
process.exit(1);
if (options.bryntum) {
result = result + "\n" + rest;
}
process.stdout.write("// harnocode \\\\\n");
process.stdout.write(result);
}