Skip to content

Commit

Permalink
Generate sub result anchor links with safer URL handling
Browse files Browse the repository at this point in the history
  • Loading branch information
bglw committed Sep 5, 2023
1 parent 61409e8 commit 0a06adf
Showing 1 changed file with 81 additions and 43 deletions.
124 changes: 81 additions & 43 deletions pagefind_web_js/lib/sub_results.ts
Original file line number Diff line number Diff line change
@@ -1,55 +1,93 @@
import { build_excerpt, calculate_excerpt_region } from "./excerpt";

export const calculate_sub_results = (
fragment: PagefindSearchFragment,
desired_excerpt_length: number
): PagefindSubResult[] => {
const anchors = fragment.anchors
.filter(
(a) => /h\d/i.test(a.element) && a.text?.length && /\w/.test(a.text)
)
.sort((a, b) => a.location - b.location);
const results: PagefindSubResult[] = [];

export const calculate_sub_results = (fragment: PagefindSearchFragment, desired_excerpt_length: number): PagefindSubResult[] => {
let current_anchor_position = 0;
let current_anchor: PagefindSubResult = {
title: fragment.meta["title"],
url: fragment.url,
locations: [],
excerpt: "",
};

const anchors = fragment.anchors.filter(a => /h\d/i.test(a.element) && a.text?.length && /\w/.test(a.text)).sort((a, b) => a.location - b.location);
const results: PagefindSubResult[] = [];
const add_result = (end_range?: number) => {
if (current_anchor.locations.length) {
const relative_locations = current_anchor.locations.map(
(l) => l - current_anchor_position
);
const excerpt_start =
calculate_excerpt_region(relative_locations, desired_excerpt_length) +
current_anchor_position;
const excerpt_length = end_range
? Math.min(end_range - excerpt_start, desired_excerpt_length)
: desired_excerpt_length;
current_anchor.excerpt = build_excerpt(
fragment,
excerpt_start,
excerpt_length,
current_anchor.locations
);

let current_anchor_position = 0;
let current_anchor: PagefindSubResult = {
title: fragment.meta["title"],
url: fragment.url,
locations: [],
excerpt: ""
};
results.push(current_anchor);
}
};

const add_result = (end_range?: number) => {
if (current_anchor.locations.length) {
const relative_locations = current_anchor.locations.map(l => l - current_anchor_position);
const excerpt_start = calculate_excerpt_region(relative_locations, desired_excerpt_length) + current_anchor_position;
const excerpt_length = end_range ? Math.min((end_range - excerpt_start), desired_excerpt_length) : desired_excerpt_length;
current_anchor.excerpt = build_excerpt(fragment, excerpt_start, excerpt_length, current_anchor.locations);
for (let word of fragment.locations) {
if (!anchors.length || word < anchors[0].location) {
current_anchor.locations.push(word);
} else {
let next_anchor = anchors.shift()!;

results.push(current_anchor);
}
}
// Word is in a new sub result, track the previous one.
add_result(next_anchor.location);

for (let word of fragment.locations) {
while (anchors.length && word >= anchors[0].location) {
next_anchor = anchors.shift()!;
}

if (!anchors.length || word < anchors[0].location) {
current_anchor.locations.push(word);
let anchored_url = fragment.url;
try {
const url_is_fq = /^((https?:)?\/\/)/.test(anchored_url);
if (url_is_fq) {
let fq_url = new URL(anchored_url);
fq_url.hash = next_anchor.id;
anchored_url = fq_url.toString();
} else {
let next_anchor = anchors.shift()!;

// Word is in a new sub result, track the previous one.
add_result(next_anchor.location);

while (anchors.length && word >= anchors[0].location) {
next_anchor = anchors.shift()!;
}

current_anchor_position = next_anchor.location;
current_anchor = {
title: next_anchor.text!,
url: `${fragment.url}#${next_anchor.id}`,
anchor: next_anchor,
locations: [word],
excerpt: "" // TODO: Proper URL handling
};
if (!/^\//.test(anchored_url)) {
anchored_url = `/${anchored_url}`;
}
let fq_url = new URL(`https://example.com${anchored_url}`);
fq_url.hash = next_anchor.id;
anchored_url = fq_url
.toString()
.replace(/^https:\/\/example.com/, "");
}
} catch (e) {
console.error(
`Pagefind: Couldn't process ${anchored_url} for a search result`
);
}

current_anchor_position = next_anchor.location;
current_anchor = {
title: next_anchor.text!,
url: anchored_url,
anchor: next_anchor,
locations: [word],
excerpt: "",
};
}
add_result(anchors[0]?.location);

return results;
}
}
add_result(anchors[0]?.location);

return results;
};

0 comments on commit 0a06adf

Please sign in to comment.