Skip to content

Commit

Permalink
Feature: Make control characters visible
Browse files Browse the repository at this point in the history
Inspired by issue #10.

This makes outputs a little noisier, but adds a lot of clarity in
situations with invisible characters, which are typically very hard to
understand otherwise.
  • Loading branch information
anko committed Nov 23, 2022
1 parent 7b7e394 commit 241c7b6
Show file tree
Hide file tree
Showing 3 changed files with 260 additions and 39 deletions.
32 changes: 30 additions & 2 deletions readme.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ preceding code blocks, runs them, and checks that the outputs match.
<br><sup>You choose the shell command(s). Many languages in the same doc
are OK.</sup>
- __Helpful failure diagnostics__
<br><sup>Colours (optional), diffs, line numbers, exit code and stderr,
etc.</sup>
<br><sup>Colours (optional), diffs, line numbers, exit code, stderr,
invisible characters, etc.</sup>
- __Parallel tests on multi-core machines__
<br><sup>Configurable. Result output ordering remains constant.</sup>
- __[TAP][tap-spec] format output__
Expand Down Expand Up @@ -362,6 +362,34 @@ comment for each annotation.
`txm` exits `0` if and only if all tests pass.
### Invisible characters
In diff additions and deletions, [C0 Control Characters][control-chars] (such
as Null, Line Feed, or Space), which are ordinarily invisible, are shown as the
corresponding [Unicode Control Picture][control-picture]. These take the form
of small diagonally arranged letters, so Null becomes ␀, Line Feed becomes ␊,
and Space becomes ␠. This is the standard way to show this set of invisible
characters.
Whenever such characters are used, an index will be present in the accompanying
test data, listing what original character each picture corresponds to, with
its name, C escape, and Unicode code point. This is intended to give as much
information as possible, because bugs relating to invisible characters are
awkward to debug.
If an invisible character is not part of the diff, it is shown normally
(without a Control Picture replacement.)
To maintain line breaks, the Line Feed character is kept as-is, with its
Control Picture (␊) added at the end of the line for clarity.
Invisible characters that aren't part of the C0 set are shown as-is. Examples
include the zero-width space, or right-to-left text flow marker.
[control-chars]: https://en.wikipedia.org/wiki/C0_and_C1_control_codes#C0_controls
[control-picture]: https://en.wikipedia.org/wiki/Unicode_control_characters#Control_pictures
### Colour <sub>(color, for Americans grepping)</sub>
Coloured output is automatically enabled when outputting directly to a
Expand Down
146 changes: 122 additions & 24 deletions src/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,35 @@ const runTests = (queue, options) => {
}

const makeColouredDiff = (expected, actual) => {
if (!color.enabled) return { expected, actual }

const diff = dmp.diff_main(expected, actual)
dmp.diff_cleanupSemantic(diff);

const withVisibleNewlines = (text) =>
text.replace(new RegExp(os.EOL, 'g'), (x) => `↵${x}`)
// We intend to replace invisible control characters with standard
// Unicode Control Pictures. This set will be part of the return value.
// It will contain the Control Pictures that were used, so that they can
// be listed for easy reference by the user.
const controlPicturesUsed = new Set()

// Debugging issues with control characters is very painful, so let's
// make it easier.
const withVisibleControlCharacters = text => {

const controlPictures = []
const resultingText = text.replace(/[\x00-\x1f\x20]/g, x => {
const pic = String.fromCharCode(x.charCodeAt(0) + 0x2400)
controlPictures.push(pic)
// For line feeds, also retain the actual line feed. Else everything
// that's part of a diff would be forced onto one line.
if (x === '\n') {
return pic + '\n'
} else {
return pic
}
})

return { text: resultingText, controlPictures }
}

const changeType = { NONE: 0, ADDED: 1, REMOVED: -1 }

Expand All @@ -127,9 +149,12 @@ const runTests = (queue, options) => {
return textSoFar + text
case changeType.ADDED:
return textSoFar
case changeType.REMOVED:
case changeType.REMOVED: {
const { text: resultingText, controlPictures } = withVisibleControlCharacters(text)
controlPictures.forEach(x => controlPicturesUsed.add(x))
return textSoFar + color.strikethrough(color.inverse(color.red(
withVisibleNewlines(text))))
resultingText)))
}
}
}, '')

Expand All @@ -138,16 +163,19 @@ const runTests = (queue, options) => {
switch (change) {
case changeType.NONE:
return textSoFar + text
case changeType.ADDED:
return textSoFar + color.inverse(color.green(
withVisibleNewlines(text)))
case changeType.ADDED: {
const { text: resultingText, controlPictures } = withVisibleControlCharacters(text)
controlPictures.forEach(x => controlPicturesUsed.add(x))
return textSoFar + color.inverse(color.green(resultingText))
}
case changeType.REMOVED:
return textSoFar
}
}, '')
return {
expected: highlightedExpected,
actual: highlightedActual,
controlPicturesUsed,
}
}

Expand Down Expand Up @@ -327,27 +355,50 @@ const runTests = (queue, options) => {
return cb()
}

// A helper function to generate the test-result note-text that lists
// invisible control characters replaced for clarity.
function controlPicturesNote(type, controlPicturesUsed) {
return [...controlPicturesUsed.values()].map(picture => {
const name = nameOfControlCharacterForControlPicture(picture)
return `${picture} represents ${name}`
}).join('\n')
}

if (('output' in test) && stdout !== test.output.text) {
const {expected, actual} = makeColouredDiff(test.output.text, stdout)
fail(index, test.name, 'output mismatch',
Object.assign({
'expected stdout': expected,
'actual stdout': actual,
program: test.program.code,
'stderr': stderr,
}, collectAnnotationLocations(test)))
const {expected, actual, controlPicturesUsed} =
makeColouredDiff(test.output.text, stdout)

const notes = {}
notes['expected stdout'] = expected
notes['actual stdout'] = actual
if (controlPicturesUsed.size > 0) {
notes['invisible characters in diff'] =
controlPicturesNote('stdout', controlPicturesUsed)
}
notes['program'] = test.program.code
notes['stderr'] = stderr
Object.assign(notes, collectAnnotationLocations(test))

fail(index, test.name, 'output mismatch', notes)
return cb()
}

if (('error' in test) && stderr !== test.error.text) {
const {expected, actual} = makeColouredDiff(test.error.text, stderr)
fail(index, test.name, 'error mismatch',
Object.assign({
'expected stderr': expected,
'actual stderr': actual,
program: test.program.code,
'stdout': stdout,
}, collectAnnotationLocations(test)))
const {expected, actual, controlPicturesUsed} =
makeColouredDiff(test.error.text, stderr)

const notes = {}
notes['expected stderr'] = expected
notes['actual stderr'] = actual
if (controlPicturesUsed.size > 0) {
notes['invisible characters in diff'] =
controlPicturesNote('stderr', controlPicturesUsed)
}
notes['program'] = test.program.code
notes['stdout'] = stdout
Object.assign(notes, collectAnnotationLocations(test))

fail(index, test.name, 'error mismatch', notes)
return cb()
}

Expand Down Expand Up @@ -782,4 +833,51 @@ const parsingError = (name, failureReason, properties) => {
process.exit(exitCode.FORMAT_ERROR)
}

// Since the Unicode Control Pictures are not always self-explanatory, this
// function is for listing the names, C escapes, and Unicode code points of the
// characters they stand for. This is a lot of information, but bugs relating
// to invisible characters can be correspondingly hairy.
function nameOfControlCharacterForControlPicture(c) {
const codepoint = c.charCodeAt(0) - 0x2400
const hex = codepoint.toString(16)
const unicodeCodepoint = "U+" + "0000".substring(0, 4 - hex.length) + hex

return ({
0x00: 'Null ("\\0")',
0x01: 'Start of Heading',
0x02: 'Start of Text',
0x03: 'End of Text',
0x04: 'End of Transmission',
0x05: 'Enquiry',
0x06: 'Acknowledge',
0x07: 'Bell ("\\a")',
0x08: 'Backspace ("\\b")',
0x09: 'Horizontal Tabulation ("\\t")',
0x0A: 'Line Feed ("\\n")',
0x0B: 'Vertical Tabulation ("\\v")',
0x0C: 'Form Feed ("\\f")',
0x0D: 'Carriage Return ("\\r")',
0x0E: 'Shift Out',
0x0F: 'Shift In',
0x10: 'Data Link Escape',
0x11: 'Device Control One',
0x12: 'Device Control Two',
0x13: 'Device Control Three',
0x14: 'Device Control Four',
0x15: 'Negative Acknowledge',
0x16: 'Synchronous Idle',
0x17: 'End of Transmission Block',
0x18: 'Cancel',
0x19: 'End of Medium',
0x1A: 'Substitute',
0x1B: 'Escape ("\\e")',
0x1C: 'File Separator',
0x1D: 'Group Separator',
0x1E: 'Record Separator',
0x1F: 'Unit Separator',
0x20: 'Space (" ")',
0x23: 'Space (" ")',
})[codepoint] + ` [${unicodeCodepoint}]`
}

export default parseAndRunTests
Loading

0 comments on commit 241c7b6

Please sign in to comment.