From ce36d505d226839c7afd7212d7205ff1b9b96ed8 Mon Sep 17 00:00:00 2001 From: Tom Sherman Date: Fri, 8 Nov 2024 14:19:05 +0000 Subject: [PATCH] Comprehensive punycode handling --- .../[identifier]/[collection]/[rkey]/page.tsx | 2 +- .../app/at/[identifier]/[collection]/page.tsx | 2 +- .../app/at/[identifier]/page.tsx | 2 +- .../app/at/_lib/did-components.tsx | 6 +- .../atproto-browser/lib/atproto-server.ts | 7 +- .../atproto-browser/lib/navigation.test.ts | 22 ++--- packages/atproto-browser/lib/navigation.ts | 84 ++++++++++++++++--- packages/atproto-browser/lib/util.ts | 8 +- 8 files changed, 98 insertions(+), 35 deletions(-) diff --git a/packages/atproto-browser/app/at/[identifier]/[collection]/[rkey]/page.tsx b/packages/atproto-browser/app/at/[identifier]/[collection]/[rkey]/page.tsx index 28e06e34..8a8ccafd 100644 --- a/packages/atproto-browser/app/at/[identifier]/[collection]/[rkey]/page.tsx +++ b/packages/atproto-browser/app/at/[identifier]/[collection]/[rkey]/page.tsx @@ -54,7 +54,7 @@ export default async function RkeyPage({ <>

Record diff --git a/packages/atproto-browser/app/at/[identifier]/[collection]/page.tsx b/packages/atproto-browser/app/at/[identifier]/[collection]/page.tsx index ead7dc87..85cae394 100644 --- a/packages/atproto-browser/app/at/[identifier]/[collection]/page.tsx +++ b/packages/atproto-browser/app/at/[identifier]/[collection]/page.tsx @@ -27,7 +27,7 @@ export default async function CollectionPage({

{params.collection} records{" "} diff --git a/packages/atproto-browser/app/at/[identifier]/page.tsx b/packages/atproto-browser/app/at/[identifier]/page.tsx index 1f64e2fa..c5f4b238 100644 --- a/packages/atproto-browser/app/at/[identifier]/page.tsx +++ b/packages/atproto-browser/app/at/[identifier]/page.tsx @@ -28,7 +28,7 @@ export default async function IdentifierPage({ return ( <> - +

diff --git a/packages/atproto-browser/app/at/_lib/did-components.tsx b/packages/atproto-browser/app/at/_lib/did-components.tsx index 5ca09904..7bd746a7 100644 --- a/packages/atproto-browser/app/at/_lib/did-components.tsx +++ b/packages/atproto-browser/app/at/_lib/did-components.tsx @@ -4,6 +4,7 @@ import { resolveIdentity } from "@/lib/atproto-server"; import { DidCollections } from "./collection-server"; import { Suspense } from "react"; import Link from "@/lib/link"; +import { domainToUnicode } from "url"; export function CollapsedDidSummary({ did }: { did: string }) { return ( @@ -47,7 +48,10 @@ export async function DidHandle({ did }: { did: string }) { return ( <> {handle ? ( - {handle} + // WARN: There is potential for homograph attacks here, in the future we should consider punycode encoding ambiguous characters as per (for example) https://chromium.googlesource.com/chromium/src/+/main/docs/idn.md. + + {domainToUnicode(handle)} + ) : ( "⚠️ Invalid Handle" )}{" "} diff --git a/packages/atproto-browser/lib/atproto-server.ts b/packages/atproto-browser/lib/atproto-server.ts index 02dbe52c..ac671817 100644 --- a/packages/atproto-browser/lib/atproto-server.ts +++ b/packages/atproto-browser/lib/atproto-server.ts @@ -9,6 +9,7 @@ import { cache } from "react"; import { unstable_cache as nextCache } from "next/cache"; import { isValidHandle } from "@atproto/syntax"; import { isDid } from "@atproto/did"; +import { domainToASCII } from "url"; function timeoutWith( timeout: number, @@ -57,8 +58,10 @@ export async function resolveIdentity( const decoded = decodeURIComponent(didOrHandle); let didStr; let didFromHandle = null; - if (isValidHandle(decoded)) { - didFromHandle = await resolveHandle(decoded).catch(() => undefined); + if (isValidHandle(domainToASCII(decoded))) { + didFromHandle = await resolveHandle(domainToASCII(decoded)).catch( + () => undefined, + ); didStr = didFromHandle; if (!didStr) { return { diff --git a/packages/atproto-browser/lib/navigation.test.ts b/packages/atproto-browser/lib/navigation.test.ts index f4e13543..2db70ab6 100644 --- a/packages/atproto-browser/lib/navigation.test.ts +++ b/packages/atproto-browser/lib/navigation.test.ts @@ -36,20 +36,17 @@ const makeValidCases = (authority: string) => }); const VALID_CASES = [ - ...makeValidCases("valid-handle.com"), + ...makeValidCases("example.com"), ...makeValidCases("did:plc:hello"), ...makeValidCases("did:web:hello"), + // Unicode should be preserved, we handle punycode transformation within the fetch of the page not on navigation + ...makeValidCases("mañana.com"), - // punycode - ...PATH_SUFFIXES.flatMap((suffix) => { - const result = `/at/xn--maana-pta.com${suffix}`; - return [ - [`mañana.com${suffix}`, result], - [`at://mañana.com${suffix}`, result], - ]; - }), + ["@example.com", "/at/example.com"], + ["@mañana.com", "/at/mañana.com"], - ["@valid-handle.com", "/at/valid-handle.com"], + // Not sure about this case. Are bare hosts supported in the spec? For now we allow it to error out at a later stage + ["host", "/at/host"], ]; describe("navigates valid input", () => { @@ -73,9 +70,8 @@ describe("strips whitespace and zero-width characters from valid input", () => { describe("shows error on invalid input", () => { test.each([ - ["@", "Invalid URI: @"], - ["@invalid", "Invalid URI: @invalid"], - // ["invalid", "Invalid URI: invalid"], + ["@", "Invalid handle: @"], + ["@invalid", "Invalid handle: @invalid"], ])('"%s" -> "%s"', async (input, expectedError) => { expect((await navigateAtUri(input)).error).toMatch(expectedError); }); diff --git a/packages/atproto-browser/lib/navigation.ts b/packages/atproto-browser/lib/navigation.ts index b69f38f3..72f09d1d 100644 --- a/packages/atproto-browser/lib/navigation.ts +++ b/packages/atproto-browser/lib/navigation.ts @@ -1,11 +1,12 @@ import "server-only"; import { getAtUriPath } from "./util"; -import { AtUri, isValidHandle } from "@atproto/syntax"; +import { isValidHandle } from "@atproto/syntax"; import { redirect } from "next/navigation"; import { parse as parseHtml } from "node-html-parser"; import { parse as parseLinkHeader } from "http-link-header"; import { domainToASCII } from "url"; +import { isDid } from "@atproto/did"; export async function navigateAtUri(input: string) { // Remove all zero-width characters and weird control codes from the input @@ -13,18 +14,30 @@ export async function navigateAtUri(input: string) { .replace(/[\u200B-\u200D\uFEFF\u202C]/g, "") .trim(); - // Try punycode encoding the input as a domain name and parse it as a handle - const handle = parseHandle(domainToASCII(sanitizedInput) || sanitizedInput); + const handle = parseHandle(sanitizedInput); if (handle) { - redirect(getAtUriPath(new AtUri(`at://${handle}`))); + redirect( + getAtUriPath({ + host: handle, + }), + ); + } else if (sanitizedInput.startsWith("@")) { + return { + error: `Invalid handle: ${sanitizedInput}`, + }; } const result = sanitizedInput.startsWith("http://") || sanitizedInput.startsWith("https://") ? await getAtUriFromHttp(sanitizedInput) - : parseUri(sanitizedInput); + : parseUri( + // Add at:// to start if it's missing + sanitizedInput.startsWith("at://") + ? sanitizedInput + : `at://${sanitizedInput}`, + ); if ("error" in result) { return result; @@ -33,11 +46,20 @@ export async function navigateAtUri(input: string) { redirect(getAtUriPath(result.uri)); } +/** + * Using our own type to allow for unicode handles/hosts which is not currently supported by the ATProto library + */ +type MinimalAtUri = { + host: string; + collection?: string; + rkey?: string; +}; + type UriParseResult = | { error: string; } - | { uri: AtUri }; + | { uri: MinimalAtUri }; async function getAtUriFromHttp(url: string): Promise { const controller = new AbortController(); @@ -60,7 +82,7 @@ async function getAtUriFromHttp(url: string): Promise { const result = ref ? parseUri(ref.uri) : null; if (result && "uri" in result) { controller.abort(); - redirect(getAtUriPath(result.uri)); + return result; } } @@ -80,7 +102,6 @@ async function getAtUriFromHttp(url: string): Promise { link.getAttribute("href")?.startsWith("at://"), ); if (atUriAlternate) { - console.log(atUriAlternate.getAttribute("href")); const result = parseUri( decodeURIComponent(atUriAlternate.getAttribute("href")!), ); @@ -94,19 +115,56 @@ async function getAtUriFromHttp(url: string): Promise { }; } +export const ATP_URI_REGEX = + // proto- --did-------------- name --path---- --query-- --hash-- + /^(at:\/\/)?((?:did:[a-z0-9:%-]+)|(?:.*))(\/[^?#\s]*)?(\?[^#\s]+)?(#[^\s]+)?$/i; + +/** + * Parses an AT URI but allows the host to be a unicode handle. + * + * Unicode handles are preserved and not punycode encoded so that they can be displayed as-is in eg. the URL bar and URI form. + * + * There is potential for homograph attacks here, in the future we should consider punycode encoding ambiguous characters as per (for example) https://chromium.googlesource.com/chromium/src/+/main/docs/idn.md. This also applies to + */ function parseUri(input: string): UriParseResult { - try { - return { uri: new AtUri(input) }; - } catch (_) { + const match = ATP_URI_REGEX.exec(input); + if (match) { + if (!match[2]) { + return { + error: `Invalid URI: ${input}`, + }; + } + + const host = match[2]; + + if (host.startsWith("did:") && !isDid(host)) { + return { + error: `Invalid DID in URI: ${input}`, + }; + } + + const pathname = match[3]; return { - error: `Invalid URI: ${input}`, + uri: { + host, + collection: pathname?.split("/").filter(Boolean)[0], + rkey: pathname?.split("/").filter(Boolean)[1], + }, }; } + + return { + error: `Invalid URI: ${input}`, + }; } function parseHandle(input: string): string | null { // Remove the leading @ const handle = input.replace(/^@/, ""); - if (!isValidHandle(handle)) return null; + + if (!isValidHandle(handle) && !isValidHandle(domainToASCII(handle))) { + return null; + } + // We check for the punycode encoded version of the handle but always return the preserved input so that we can display the original handle return handle; } diff --git a/packages/atproto-browser/lib/util.ts b/packages/atproto-browser/lib/util.ts index 3a92be65..2b4e40bc 100644 --- a/packages/atproto-browser/lib/util.ts +++ b/packages/atproto-browser/lib/util.ts @@ -1,5 +1,3 @@ -import { AtUri } from "@atproto/syntax"; - export const utcDateFormatter = new Intl.DateTimeFormat("en-US", { dateStyle: "medium", timeStyle: "short", @@ -10,7 +8,11 @@ export function isNotNull(x: T | null): x is T { return x !== null; } -export function getAtUriPath(uri: AtUri): string { +export function getAtUriPath(uri: { + host: string; + collection?: string; + rkey?: string; +}): string { return `/at/${[uri.host, uri.collection, uri.rkey] .filter(Boolean) .map((c) => c && decodeURIComponent(c))