From 012eb399ecda937a8e7f32c31a96a111cdcac934 Mon Sep 17 00:00:00 2001 From: Philip McGrath Date: Tue, 14 Nov 2017 20:44:51 -0600 Subject: [PATCH] path-utils: Uniform maximum length on all platforms. When necessary, replace some bytes from the middle rather than hashing the path. (This means it is still human-readable, mostly.) --- gui-lib/framework/private/path-utils.rkt | 101 ++++++++++++++--------- 1 file changed, 64 insertions(+), 37 deletions(-) diff --git a/gui-lib/framework/private/path-utils.rkt b/gui-lib/framework/private/path-utils.rkt index 497a413be..b4abaea10 100644 --- a/gui-lib/framework/private/path-utils.rkt +++ b/gui-lib/framework/private/path-utils.rkt @@ -2,7 +2,6 @@ (require "sig.rkt" racket/list - openssl/md5 "../preferences.rkt") (import) @@ -97,57 +96,85 @@ (build-path base name-element)])) +(define candidate-separators + `(#"!" #"%" #"_" #"|" #":" #">" #"^" #"$" #"@" #"*" #"?")) + +(define separator-regexps + (map (compose1 byte-regexp regexp-quote) candidate-separators)) ; encode-as-path-element : dir-path path-element -> path-element ; N.B. generate-backup-name may supply a relative directory, but ; we should always use a complete one. -; Using simplify-path does that and ensures no 'up or 'same -; Using ! is not completely robust, but works well enough for Emacs. +; That is handled by simplify+explode-path->bytes. ; Windows has limitations on path lengths. Racket handles MAX_PATH ; by using "\\?\" paths when necessary, but individual elements must -; be shorter than lpMaximumComponentLength. If necessary, we avoid -; this by hashing the path. +; be shorter than lpMaximumComponentLength. +; We respect this limit (on all platforms, for consistency) +; by replacing some bytes from the middle if necessary. (define (encode-as-path-element base-maybe-relative name) - (define windows? - (eq? 'windows (system-path-convention-type))) (define illegal-rx - (if windows? - #rx#"\\\\" - #rx#"/")) - (define pth - (simplify-path (build-path base-maybe-relative name))) + (case (system-path-convention-type) + [(windows) #rx#"\\\\"] + [else #rx#"/"])) + (define l-bytes + (simplify+explode-path->bytes (build-path base-maybe-relative name))) + (define separator-byte + (or (let ([all-components (apply bytes-append l-bytes)]) + (for/first ([sep (in-list candidate-separators)] + [rx (in-list separator-regexps)] + #:unless (regexp-match? rx all-components)) + sep)) + #"!")) (define legible-name-bytes - (let ([elements (explode-path pth)]) - (apply - bytes-append - (add-between - (cons (regexp-replace* illegal-rx - (path->bytes (car elements)) - #"!") - (for/list ([elem (in-list (cdr elements))]) - (regexp-replace* illegal-rx - (path-element->bytes elem) - #"!"))) - #"!")))) - (cond - [(or (not windows?) - (< (bytes-length legible-name-bytes) - (lpMaximumComponentLength))) - (bytes->path-element legible-name-bytes)] - [else - (string->path-element - (regexp-replace* - #rx"\\\\" ; NOT illegal-rx : this is a string regexp - (md5 (open-input-bytes (path->bytes pth))) - "!"))])) - - + (apply + bytes-append + separator-byte + (add-between + (for/list ([elem (in-list l-bytes)]) + (regexp-replace* illegal-rx + (path-element->bytes elem) + separator-byte)) + separator-byte))) + (define num-legible-bytes + (bytes-length legible-name-bytes)) + (bytes->path-element + (cond + [(< num-legible-bytes + (lpMaximumComponentLength)) + legible-name-bytes] + [else + (define replacement + (bytes-append separator-byte #"..." separator-byte)) + (define num-excess-bytes + (+ (- num-legible-bytes + (lpMaximumComponentLength)) + 5 ; extra margin of safety + (bytes-length replacement))) + (define num-bytes-to-keep-per-side + (floor (/ (- num-legible-bytes num-excess-bytes) + 2))) + (bytes-append + (subbytes legible-name-bytes 0 num-bytes-to-keep-per-side) + replacement + (subbytes legible-name-bytes (- num-legible-bytes + num-bytes-to-keep-per-side)))]))) + + +;; simplify+explode-path->bytes : path? -> (listof bytes?) +;; Useful because path-element->bytes doesn't work on root paths. +;; Using simplify-path ensures no 'up or 'same. +(define (simplify+explode-path->bytes pth) + (define elems + (explode-path (simplify-path pth))) + (cons (path->bytes (car elems)) + (map path-element->bytes (cdr elems)))) ;; lpMaximumComponentLength : -> real? ;; Returns the maximum length of an element of a "\\?\" path on Windows. ;; For now, assuming 255, but really this should be ;; "the value returned in the lpMaximumComponentLength parameter ;; of the GetVolumeInformation function". +;; See https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx#maxpath (define (lpMaximumComponentLength) 255)