From 241c7b619dd6ea7c72df59c19896655d38f51016 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antti=20Korpim=C3=A4ki?= Date: Thu, 24 Nov 2022 00:58:09 +0100 Subject: [PATCH] Feature: Make control characters visible Inspired by issue #10. This makes outputs a little noisier, but adds a lot of clarity in situations with invisible characters, which are typically very hard to understand otherwise. --- readme.markdown | 32 ++++++++++- src/main.js | 146 ++++++++++++++++++++++++++++++++++++++++-------- test.ls | 121 ++++++++++++++++++++++++++++++++++----- 3 files changed, 260 insertions(+), 39 deletions(-) diff --git a/readme.markdown b/readme.markdown index 61fe3d8..532cba4 100644 --- a/readme.markdown +++ b/readme.markdown @@ -13,8 +13,8 @@ preceding code blocks, runs them, and checks that the outputs match.
You choose the shell command(s). Many languages in the same doc are OK. - __Helpful failure diagnostics__ -
Colours (optional), diffs, line numbers, exit code and stderr, - etc. +
Colours (optional), diffs, line numbers, exit code, stderr, + invisible characters, etc. - __Parallel tests on multi-core machines__
Configurable. Result output ordering remains constant. - __[TAP][tap-spec] format output__ @@ -362,6 +362,34 @@ comment for each annotation. `txm` exits `0` if and only if all tests pass. +### Invisible characters + +In diff additions and deletions, [C0 Control Characters][control-chars] (such +as Null, Line Feed, or Space), which are ordinarily invisible, are shown as the +corresponding [Unicode Control Picture][control-picture]. These take the form +of small diagonally arranged letters, so Null becomes ␀, Line Feed becomes ␊, +and Space becomes ␠. This is the standard way to show this set of invisible +characters. + +Whenever such characters are used, an index will be present in the accompanying +test data, listing what original character each picture corresponds to, with +its name, C escape, and Unicode code point. This is intended to give as much +information as possible, because bugs relating to invisible characters are +awkward to debug. + +If an invisible character is not part of the diff, it is shown normally +(without a Control Picture replacement.) + +To maintain line breaks, the Line Feed character is kept as-is, with its +Control Picture (␊) added at the end of the line for clarity. + +Invisible characters that aren't part of the C0 set are shown as-is. Examples +include the zero-width space, or right-to-left text flow marker. + + +[control-chars]: https://en.wikipedia.org/wiki/C0_and_C1_control_codes#C0_controls +[control-picture]: https://en.wikipedia.org/wiki/Unicode_control_characters#Control_pictures + ### Colour (color, for Americans grepping) Coloured output is automatically enabled when outputting directly to a diff --git a/src/main.js b/src/main.js index 8ad7814..bad3626 100644 --- a/src/main.js +++ b/src/main.js @@ -110,13 +110,35 @@ const runTests = (queue, options) => { } const makeColouredDiff = (expected, actual) => { - if (!color.enabled) return { expected, actual } const diff = dmp.diff_main(expected, actual) dmp.diff_cleanupSemantic(diff); - const withVisibleNewlines = (text) => - text.replace(new RegExp(os.EOL, 'g'), (x) => `↵${x}`) + // We intend to replace invisible control characters with standard + // Unicode Control Pictures. This set will be part of the return value. + // It will contain the Control Pictures that were used, so that they can + // be listed for easy reference by the user. + const controlPicturesUsed = new Set() + + // Debugging issues with control characters is very painful, so let's + // make it easier. + const withVisibleControlCharacters = text => { + + const controlPictures = [] + const resultingText = text.replace(/[\x00-\x1f\x20]/g, x => { + const pic = String.fromCharCode(x.charCodeAt(0) + 0x2400) + controlPictures.push(pic) + // For line feeds, also retain the actual line feed. Else everything + // that's part of a diff would be forced onto one line. + if (x === '\n') { + return pic + '\n' + } else { + return pic + } + }) + + return { text: resultingText, controlPictures } + } const changeType = { NONE: 0, ADDED: 1, REMOVED: -1 } @@ -127,9 +149,12 @@ const runTests = (queue, options) => { return textSoFar + text case changeType.ADDED: return textSoFar - case changeType.REMOVED: + case changeType.REMOVED: { + const { text: resultingText, controlPictures } = withVisibleControlCharacters(text) + controlPictures.forEach(x => controlPicturesUsed.add(x)) return textSoFar + color.strikethrough(color.inverse(color.red( - withVisibleNewlines(text)))) + resultingText))) + } } }, '') @@ -138,9 +163,11 @@ const runTests = (queue, options) => { switch (change) { case changeType.NONE: return textSoFar + text - case changeType.ADDED: - return textSoFar + color.inverse(color.green( - withVisibleNewlines(text))) + case changeType.ADDED: { + const { text: resultingText, controlPictures } = withVisibleControlCharacters(text) + controlPictures.forEach(x => controlPicturesUsed.add(x)) + return textSoFar + color.inverse(color.green(resultingText)) + } case changeType.REMOVED: return textSoFar } @@ -148,6 +175,7 @@ const runTests = (queue, options) => { return { expected: highlightedExpected, actual: highlightedActual, + controlPicturesUsed, } } @@ -327,27 +355,50 @@ const runTests = (queue, options) => { return cb() } + // A helper function to generate the test-result note-text that lists + // invisible control characters replaced for clarity. + function controlPicturesNote(type, controlPicturesUsed) { + return [...controlPicturesUsed.values()].map(picture => { + const name = nameOfControlCharacterForControlPicture(picture) + return `${picture} represents ${name}` + }).join('\n') + } + if (('output' in test) && stdout !== test.output.text) { - const {expected, actual} = makeColouredDiff(test.output.text, stdout) - fail(index, test.name, 'output mismatch', - Object.assign({ - 'expected stdout': expected, - 'actual stdout': actual, - program: test.program.code, - 'stderr': stderr, - }, collectAnnotationLocations(test))) + const {expected, actual, controlPicturesUsed} = + makeColouredDiff(test.output.text, stdout) + + const notes = {} + notes['expected stdout'] = expected + notes['actual stdout'] = actual + if (controlPicturesUsed.size > 0) { + notes['invisible characters in diff'] = + controlPicturesNote('stdout', controlPicturesUsed) + } + notes['program'] = test.program.code + notes['stderr'] = stderr + Object.assign(notes, collectAnnotationLocations(test)) + + fail(index, test.name, 'output mismatch', notes) return cb() } if (('error' in test) && stderr !== test.error.text) { - const {expected, actual} = makeColouredDiff(test.error.text, stderr) - fail(index, test.name, 'error mismatch', - Object.assign({ - 'expected stderr': expected, - 'actual stderr': actual, - program: test.program.code, - 'stdout': stdout, - }, collectAnnotationLocations(test))) + const {expected, actual, controlPicturesUsed} = + makeColouredDiff(test.error.text, stderr) + + const notes = {} + notes['expected stderr'] = expected + notes['actual stderr'] = actual + if (controlPicturesUsed.size > 0) { + notes['invisible characters in diff'] = + controlPicturesNote('stderr', controlPicturesUsed) + } + notes['program'] = test.program.code + notes['stdout'] = stdout + Object.assign(notes, collectAnnotationLocations(test)) + + fail(index, test.name, 'error mismatch', notes) return cb() } @@ -782,4 +833,51 @@ const parsingError = (name, failureReason, properties) => { process.exit(exitCode.FORMAT_ERROR) } +// Since the Unicode Control Pictures are not always self-explanatory, this +// function is for listing the names, C escapes, and Unicode code points of the +// characters they stand for. This is a lot of information, but bugs relating +// to invisible characters can be correspondingly hairy. +function nameOfControlCharacterForControlPicture(c) { + const codepoint = c.charCodeAt(0) - 0x2400 + const hex = codepoint.toString(16) + const unicodeCodepoint = "U+" + "0000".substring(0, 4 - hex.length) + hex + + return ({ + 0x00: 'Null ("\\0")', + 0x01: 'Start of Heading', + 0x02: 'Start of Text', + 0x03: 'End of Text', + 0x04: 'End of Transmission', + 0x05: 'Enquiry', + 0x06: 'Acknowledge', + 0x07: 'Bell ("\\a")', + 0x08: 'Backspace ("\\b")', + 0x09: 'Horizontal Tabulation ("\\t")', + 0x0A: 'Line Feed ("\\n")', + 0x0B: 'Vertical Tabulation ("\\v")', + 0x0C: 'Form Feed ("\\f")', + 0x0D: 'Carriage Return ("\\r")', + 0x0E: 'Shift Out', + 0x0F: 'Shift In', + 0x10: 'Data Link Escape', + 0x11: 'Device Control One', + 0x12: 'Device Control Two', + 0x13: 'Device Control Three', + 0x14: 'Device Control Four', + 0x15: 'Negative Acknowledge', + 0x16: 'Synchronous Idle', + 0x17: 'End of Transmission Block', + 0x18: 'Cancel', + 0x19: 'End of Medium', + 0x1A: 'Substitute', + 0x1B: 'Escape ("\\e")', + 0x1C: 'File Separator', + 0x1D: 'Group Separator', + 0x1E: 'Record Separator', + 0x1F: 'Unit Separator', + 0x20: 'Space (" ")', + 0x23: 'Space (" ")', + })[codepoint] + ` [${unicodeCodepoint}]` +} + export default parseAndRunTests diff --git a/test.ls b/test.ls index 197a662..ba8203a 100644 --- a/test.ls +++ b/test.ls @@ -114,12 +114,10 @@ txm-expect do hello - there hello - there """ expect-exit: 1 @@ -130,7 +128,6 @@ txm-expect do --- expected stdout: | hello - there actual stdout: | hi @@ -141,9 +138,9 @@ txm-expect do input location: | line 4 output location: | - lines 8-9 + line 8 error location: | - lines 13-14 + line 12 program location: | line 1 --- @@ -156,15 +153,15 @@ txm-expect do txm-expect do name: "stderr mismatch" input: """ - + - hi + console.error('hi') + console.log('hi') hello - there """ expect-exit: 1 @@ -175,18 +172,19 @@ txm-expect do --- expected stderr: | hello - there - actual stderr: '' + actual stderr: | + hi + program: | - cat + node stdout: | hi input location: | - line 4 + lines 4-5 error location: | - lines 8-9 + line 9 program location: | line 1 --- @@ -1698,6 +1696,103 @@ txm-expect do """ +txm-expect do + name: "stdout diff containing control characters" + input: """ + + + + + process.stdout.write("line 1\\r\\0\\nline 2") + + + + line 1 + line 2 + + """ + expect-exit: 1 + expect-stdout: """ + TAP version 13 + 1..1 + not ok 1 name: output mismatch + --- + expected stdout: | + line 1 + line 2␊ + + actual stdout: | + line 1␍␀ + line 2 + invisible characters in diff: | + ␊ represents Line Feed ("\\n") [U+000a] + ␍ represents Carriage Return ("\\r") [U+000d] + ␀ represents Null ("\\0") [U+0000] + program: | + node + stderr: '' + input location: | + line 5 + output location: | + lines 9-10 + program location: | + line 1 + --- + + # 0/1 passed + # FAILED 1 + + """ + +txm-expect do + name: "stderr diff containing control characters" + input: """ + + + + + process.stderr.write("line 1\\r\\0\\nline 2") + + + + line 1 + line 2 + + """ + expect-exit: 1 + expect-stdout: """ + TAP version 13 + 1..1 + not ok 1 name: error mismatch + --- + expected stderr: | + line 1 + line 2␊ + + actual stderr: | + line 1␍␀ + line 2 + invisible characters in diff: | + ␊ represents Line Feed ("\\n") [U+000a] + ␍ represents Carriage Return ("\\r") [U+000d] + ␀ represents Null ("\\0") [U+0000] + program: | + node + stdout: '' + input location: | + line 5 + error location: | + lines 9-10 + program location: | + line 1 + --- + + # 0/1 passed + # FAILED 1 + + """ + + txm-expect do name: "success colours work" force-color: true