diff --git a/scripts/lib/parse-config.js b/scripts/lib/parse-config.js index c4b45550..d24dad17 100644 --- a/scripts/lib/parse-config.js +++ b/scripts/lib/parse-config.js @@ -8,6 +8,48 @@ import { capitalize, PROJECT_ROOT } from './utils.js' // These messages are embedded in others and do not define Mapeo data types const EMBEDDED_MESSAGES = ['tags', 'common'] +// We avoid creating data type IDs that match these, since blobs (e.g. icons) +// can be stored in Mapeo hypercores, and we want to avoid trying to parse a +// file blob as a Mapeo datatype. This just minimizes cases where the Mapeo +// indexer might try to parse (and fail) a document that is not actually a Mapeo +// doc. +const KNOWN_FILE_SIGNATURE_PREFIXES = [ + [0xef, 0xbb, 0xbf], // UTF-8 BOM + [0xfe, 0xff], // UTF-16 BOM + [0x3c, 0x3f, 0x78, 0x6d, 0x6c], // ` { + let doesMatch = true + for (let i = 0; i < Math.min(prefix.length, 6); i++) { + if (prefix[i] !== buf[i]) { + doesMatch = false + } + } + return doesMatch + } + ) + if (matchingKnownFileSignature) { + throw new Error( + 'This datatype ID (' + + dataTypeId + + ') matches a known file signature, please choose a different one' + ) + } +} + /** * Parse the proto message types and check: * @@ -65,6 +107,7 @@ export function parseConfig() { throw new Error('Duplicate dataTypeId in ' + filepath) } duplicateIdCheck.set(dataTypeId, schemaName) + validateDatatypeId(dataTypeId) dataTypeIds[schemaName] = dataTypeId