Skip to content

Commit

Permalink
Comprehensive punycode handling
Browse files Browse the repository at this point in the history
  • Loading branch information
tom-sherman committed Nov 8, 2024
1 parent 9bffe65 commit ce36d50
Show file tree
Hide file tree
Showing 8 changed files with 98 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ export default async function RkeyPage({
<>
<link
rel="alternate"
href={`at://${params.identifier}/${params.collection}/${params.rkey}`}
href={`at://${identityResult.didDocument.id}/${params.collection}/${params.rkey}`}
/>
<h2>
Record
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ export default async function CollectionPage({
<div>
<link
rel="alternate"
href={`at://${params.identifier}/${params.collection}`}
href={`at://${identityResult.didDocument.id}/${params.collection}`}
/>
<h1>
{params.collection} records{" "}
Expand Down
2 changes: 1 addition & 1 deletion packages/atproto-browser/app/at/[identifier]/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ export default async function IdentifierPage({

return (
<>
<link rel="alternate" href={`at://${params.identifier}`} />
<link rel="alternate" href={`at://${identityResult.didDocument.id}`} />
<h1>
<DidHandle did={identityResult.didDocument.id} />
</h1>
Expand Down
6 changes: 5 additions & 1 deletion packages/atproto-browser/app/at/_lib/did-components.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { resolveIdentity } from "@/lib/atproto-server";
import { DidCollections } from "./collection-server";
import { Suspense } from "react";
import Link from "@/lib/link";
import { domainToUnicode } from "url";

export function CollapsedDidSummary({ did }: { did: string }) {
return (
Expand Down Expand Up @@ -47,7 +48,10 @@ export async function DidHandle({ did }: { did: string }) {
return (
<>
{handle ? (
<Link href={`/at/${handle}`}>{handle}</Link>
// WARN: There is potential for homograph attacks here, in the future we should consider punycode encoding ambiguous characters as per (for example) https://chromium.googlesource.com/chromium/src/+/main/docs/idn.md.
<Link href={`/at/${domainToUnicode(handle)}`}>
{domainToUnicode(handle)}
</Link>
) : (
"⚠️ Invalid Handle"
)}{" "}
Expand Down
7 changes: 5 additions & 2 deletions packages/atproto-browser/lib/atproto-server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import { cache } from "react";
import { unstable_cache as nextCache } from "next/cache";
import { isValidHandle } from "@atproto/syntax";
import { isDid } from "@atproto/did";
import { domainToASCII } from "url";

function timeoutWith<T>(
timeout: number,
Expand Down Expand Up @@ -57,8 +58,10 @@ export async function resolveIdentity(
const decoded = decodeURIComponent(didOrHandle);
let didStr;
let didFromHandle = null;
if (isValidHandle(decoded)) {
didFromHandle = await resolveHandle(decoded).catch(() => undefined);
if (isValidHandle(domainToASCII(decoded))) {
didFromHandle = await resolveHandle(domainToASCII(decoded)).catch(
() => undefined,
);
didStr = didFromHandle;
if (!didStr) {
return {
Expand Down
22 changes: 9 additions & 13 deletions packages/atproto-browser/lib/navigation.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,20 +36,17 @@ const makeValidCases = (authority: string) =>
});

const VALID_CASES = [
...makeValidCases("valid-handle.com"),
...makeValidCases("example.com"),
...makeValidCases("did:plc:hello"),
...makeValidCases("did:web:hello"),
// Unicode should be preserved, we handle punycode transformation within the fetch of the page not on navigation
...makeValidCases("mañana.com"),

// punycode
...PATH_SUFFIXES.flatMap((suffix) => {
const result = `/at/xn--maana-pta.com${suffix}`;
return [
[`mañana.com${suffix}`, result],
[`at://mañana.com${suffix}`, result],
];
}),
["@example.com", "/at/example.com"],
["@mañana.com", "/at/mañana.com"],

["@valid-handle.com", "/at/valid-handle.com"],
// Not sure about this case. Are bare hosts supported in the spec? For now we allow it to error out at a later stage
["host", "/at/host"],
];

describe("navigates valid input", () => {
Expand All @@ -73,9 +70,8 @@ describe("strips whitespace and zero-width characters from valid input", () => {

describe("shows error on invalid input", () => {
test.each([
["@", "Invalid URI: @"],
["@invalid", "Invalid URI: @invalid"],
// ["invalid", "Invalid URI: invalid"],
["@", "Invalid handle: @"],
["@invalid", "Invalid handle: @invalid"],
])('"%s" -> "%s"', async (input, expectedError) => {
expect((await navigateAtUri(input)).error).toMatch(expectedError);
});
Expand Down
84 changes: 71 additions & 13 deletions packages/atproto-browser/lib/navigation.ts
Original file line number Diff line number Diff line change
@@ -1,30 +1,43 @@
import "server-only";

import { getAtUriPath } from "./util";
import { AtUri, isValidHandle } from "@atproto/syntax";
import { isValidHandle } from "@atproto/syntax";
import { redirect } from "next/navigation";
import { parse as parseHtml } from "node-html-parser";
import { parse as parseLinkHeader } from "http-link-header";
import { domainToASCII } from "url";
import { isDid } from "@atproto/did";

export async function navigateAtUri(input: string) {
// Remove all zero-width characters and weird control codes from the input
const sanitizedInput = input
.replace(/[\u200B-\u200D\uFEFF\u202C]/g, "")
.trim();

// Try punycode encoding the input as a domain name and parse it as a handle
const handle = parseHandle(domainToASCII(sanitizedInput) || sanitizedInput);
const handle = parseHandle(sanitizedInput);

if (handle) {
redirect(getAtUriPath(new AtUri(`at://${handle}`)));
redirect(
getAtUriPath({
host: handle,
}),
);
} else if (sanitizedInput.startsWith("@")) {
return {
error: `Invalid handle: ${sanitizedInput}`,
};
}

const result =
sanitizedInput.startsWith("http://") ||
sanitizedInput.startsWith("https://")
? await getAtUriFromHttp(sanitizedInput)
: parseUri(sanitizedInput);
: parseUri(
// Add at:// to start if it's missing
sanitizedInput.startsWith("at://")
? sanitizedInput
: `at://${sanitizedInput}`,
);

if ("error" in result) {
return result;
Expand All @@ -33,11 +46,20 @@ export async function navigateAtUri(input: string) {
redirect(getAtUriPath(result.uri));
}

/**
* Using our own type to allow for unicode handles/hosts which is not currently supported by the ATProto library
*/
type MinimalAtUri = {
host: string;
collection?: string;
rkey?: string;
};

type UriParseResult =
| {
error: string;
}
| { uri: AtUri };
| { uri: MinimalAtUri };

async function getAtUriFromHttp(url: string): Promise<UriParseResult> {
const controller = new AbortController();
Expand All @@ -60,7 +82,7 @@ async function getAtUriFromHttp(url: string): Promise<UriParseResult> {
const result = ref ? parseUri(ref.uri) : null;
if (result && "uri" in result) {
controller.abort();
redirect(getAtUriPath(result.uri));
return result;
}
}

Expand All @@ -80,7 +102,6 @@ async function getAtUriFromHttp(url: string): Promise<UriParseResult> {
link.getAttribute("href")?.startsWith("at://"),
);
if (atUriAlternate) {
console.log(atUriAlternate.getAttribute("href"));
const result = parseUri(
decodeURIComponent(atUriAlternate.getAttribute("href")!),
);
Expand All @@ -94,19 +115,56 @@ async function getAtUriFromHttp(url: string): Promise<UriParseResult> {
};
}

export const ATP_URI_REGEX =
// proto- --did-------------- name --path---- --query-- --hash--
/^(at:\/\/)?((?:did:[a-z0-9:%-]+)|(?:.*))(\/[^?#\s]*)?(\?[^#\s]+)?(#[^\s]+)?$/i;

/**
* Parses an AT URI but allows the host to be a unicode handle.
*
* Unicode handles are preserved and not punycode encoded so that they can be displayed as-is in eg. the URL bar and URI form.
*
* There is potential for homograph attacks here, in the future we should consider punycode encoding ambiguous characters as per (for example) https://chromium.googlesource.com/chromium/src/+/main/docs/idn.md. This also applies to <DidHandle>
*/
function parseUri(input: string): UriParseResult {
try {
return { uri: new AtUri(input) };
} catch (_) {
const match = ATP_URI_REGEX.exec(input);
if (match) {
if (!match[2]) {
return {
error: `Invalid URI: ${input}`,
};
}

const host = match[2];

if (host.startsWith("did:") && !isDid(host)) {
return {
error: `Invalid DID in URI: ${input}`,
};
}

const pathname = match[3];
return {
error: `Invalid URI: ${input}`,
uri: {
host,
collection: pathname?.split("/").filter(Boolean)[0],
rkey: pathname?.split("/").filter(Boolean)[1],
},
};
}

return {
error: `Invalid URI: ${input}`,
};
}

function parseHandle(input: string): string | null {
// Remove the leading @
const handle = input.replace(/^@/, "");
if (!isValidHandle(handle)) return null;

if (!isValidHandle(handle) && !isValidHandle(domainToASCII(handle))) {
return null;
}
// We check for the punycode encoded version of the handle but always return the preserved input so that we can display the original handle
return handle;
}
8 changes: 5 additions & 3 deletions packages/atproto-browser/lib/util.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import { AtUri } from "@atproto/syntax";

export const utcDateFormatter = new Intl.DateTimeFormat("en-US", {
dateStyle: "medium",
timeStyle: "short",
Expand All @@ -10,7 +8,11 @@ export function isNotNull<T>(x: T | null): x is T {
return x !== null;
}

export function getAtUriPath(uri: AtUri): string {
export function getAtUriPath(uri: {
host: string;
collection?: string;
rkey?: string;
}): string {
return `/at/${[uri.host, uri.collection, uri.rkey]
.filter(Boolean)
.map((c) => c && decodeURIComponent(c))
Expand Down

0 comments on commit ce36d50

Please sign in to comment.