Skip to content

Commit

Permalink
Add global search, share functions
Browse files Browse the repository at this point in the history
  • Loading branch information
tdraier committed Mar 5, 2025
1 parent 4805f8c commit 4c9bd79
Show file tree
Hide file tree
Showing 7 changed files with 230 additions and 170 deletions.
6 changes: 2 additions & 4 deletions front/lib/api/content_nodes.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import type {
ContentNodesViewType,
ContentNodeWithParent,
CoreAPIContentNode,
DataSourceViewContentNode,
DataSourceViewType,
} from "@dust-tt/types";
import { assertNever, MIME_TYPES } from "@dust-tt/types";
Expand Down Expand Up @@ -84,10 +84,9 @@ function isExpandable(
}

export function getContentNodeFromCoreNode(
dataSourceView: DataSourceViewType,
coreNode: CoreAPIContentNode,
viewType: ContentNodesViewType
): DataSourceViewContentNode {
): ContentNodeWithParent {
return {
internalId: coreNode.node_id,
parentInternalId: coreNode.parent_id ?? null,
Expand All @@ -108,6 +107,5 @@ export function getContentNodeFromCoreNode(
FOLDERS_SELECTION_PREVENTED_MIME_TYPES.includes(coreNode.mime_type) ||
(viewType === "table" && coreNode.node_type !== "table"),
parentTitle: coreNode.parent_title,
dataSourceView,
};
}
20 changes: 8 additions & 12 deletions front/lib/api/data_source_view.ts
Original file line number Diff line number Diff line change
Expand Up @@ -113,15 +113,13 @@ export async function getFlattenedContentNodesOfViewTypeForDataSourceView(
const resultNodes: CoreAPIContentNode[] = coreRes.value.nodes;
nextPageCursor = coreRes.value.next_page_cursor;

const nodes = resultNodes.map((node) =>
getContentNodeFromCoreNode(
const nodes = resultNodes.map((node) => ({
...getContentNodeFromCoreNode(node, viewType),
dataSourceView:
dataSourceView instanceof DataSourceViewResource
? dataSourceView.toJSON()
: dataSourceView,
node,
viewType
)
);
}));

return new Ok({
nodes,
Expand Down Expand Up @@ -207,15 +205,13 @@ export async function getContentNodesForDataSourceView(
nextPageCursor = coreRes.value.next_page_cursor;
} while (resultNodes.length < limit && nextPageCursor);

const nodes = resultNodes.map((node) =>
getContentNodeFromCoreNode(
const nodes = resultNodes.map((node) => ({
...getContentNodeFromCoreNode(node, viewType),
dataSourceView:
dataSourceView instanceof DataSourceViewResource
? dataSourceView.toJSON()
: dataSourceView,
node,
viewType
)
);
}));
const sortedNodes = !internalIds
? nodes
: internalIds.flatMap((id) =>
Expand Down
77 changes: 19 additions & 58 deletions front/lib/api/spaces.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@ import type {
ContentNodesViewType,
CoreAPIError,
CoreAPISearchOptions,
CoreAPISearchScope,
DataSourceViewContentNode,
DataSourceWithAgentsUsageType,
Result,
SearchWarningCode,
} from "@dust-tt/types";
import { assertNever, CoreAPI, Err, Ok, removeNulls } from "@dust-tt/types";
import { CoreAPI, Err, Ok, removeNulls } from "@dust-tt/types";
import assert from "assert";
import { uniq } from "lodash";

Expand All @@ -25,6 +24,7 @@ import { KeyResource } from "@app/lib/resources/key_resource";
import { SpaceResource } from "@app/lib/resources/space_resource";
import { frontSequelize } from "@app/lib/resources/storage";
import { UserResource } from "@app/lib/resources/user_resource";
import { getSearchFilterFromDataSourceViews } from "@app/lib/search";
import { isPrivateSpacesLimitReached } from "@app/lib/spaces";
import logger from "@app/logger/logger";
import { launchScrubSpaceWorkflow } from "@app/poke/temporal/client";
Expand Down Expand Up @@ -246,46 +246,6 @@ export async function createRegularSpaceAndGroup(
return new Ok(space);
}

function getCoreViewTypeFilter(viewType: ContentNodesViewType) {
switch (viewType) {
case "document":
return ["folder", "document"];
case "table":
return ["folder", "table"];
case "all":
return ["folder", "table", "document"];
default:
assertNever(viewType);
}
}

function searchScopeForDsv({
dsv,
includeDataSources,
isSingleDsv,
}: {
dsv: DataSourceViewResource;
includeDataSources: boolean;
isSingleDsv: boolean;
}): CoreAPISearchScope {
// On a single datasource view, we never want to match the datasource name.
if (isSingleDsv) {
return "nodes_titles";
}

if (includeDataSources) {
// For webcrawler datasources, we want to search the only datasource
// title, not the nodes titles.
if (dsv.dataSource.connectorProvider === "webcrawler") {
return "data_source_name";
}

return "both";
}

return "nodes_titles";
}

export async function searchContenNodesInSpace(
auth: Authenticator,
space: SpaceResource,
Expand Down Expand Up @@ -320,22 +280,21 @@ export async function searchContenNodesInSpace(

const coreAPI = new CoreAPI(config.getCoreAPIConfig(), logger);

const isSingleDsv = dataSourceViews.length === 1;
const searchFilterResult = getSearchFilterFromDataSourceViews(
auth.getNonNullableWorkspace(),
dataSourceViews,
{
excludedNodeMimeTypes,
includeDataSources,
viewType,
}
);
if (searchFilterResult.isErr()) {
return searchFilterResult;
}
const searchRes = await coreAPI.searchNodes({
query,
filter: {
data_source_views: dataSourceViews.map((dsv) => ({
data_source_id: dsv.dataSource.dustAPIDataSourceId,
view_filter: dsv.parentsIn ?? [],
search_scope: searchScopeForDsv({
dsv,
includeDataSources,
isSingleDsv,
}),
})),
excluded_node_mime_types: excludedNodeMimeTypes,
node_types: getCoreViewTypeFilter(viewType),
},
filter: searchFilterResult.value,
options,
});

Expand All @@ -361,8 +320,10 @@ export async function searchContenNodesInSpace(

return [];
}

return getContentNodeFromCoreNode(dataSourceView.toJSON(), node, viewType);
return {
...getContentNodeFromCoreNode(node, viewType),
dataSourceView: dataSourceView.toJSON(),
};
});

return new Ok({
Expand Down
128 changes: 128 additions & 0 deletions front/lib/search.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import type {
ContentNodesViewType,
CoreAPISearchScope,
LightWorkspaceType,
} from "@dust-tt/types";
import { assertNever, Err, Ok } from "@dust-tt/types";

import { DustError } from "@app/lib/error";
import type { DataSourceResource } from "@app/lib/resources/data_source_resource";
import type { DataSourceViewResource } from "@app/lib/resources/data_source_view_resource";
import logger from "@app/logger/logger";

export function getCoreViewTypeFilter(viewType: ContentNodesViewType) {
switch (viewType) {
case "document":
return ["folder", "document"];
case "table":
return ["folder", "table"];
case "all":
return ["folder", "table", "document"];
default:
assertNever(viewType);
}
}

export function searchScopeForDataSource({
dataSource,
includeDataSources,
isSingleDataSource: isSingleDataSource,
}: {
dataSource: DataSourceResource;
includeDataSources: boolean;
isSingleDataSource: boolean;
}): CoreAPISearchScope {
// On a single datasource view, we never want to match the datasource name.
if (isSingleDataSource) {
return "nodes_titles";
}

if (includeDataSources) {
// For webcrawler datasources, we want to search the only datasource
// title, not the nodes titles.
if (dataSource.connectorProvider === "webcrawler") {
return "data_source_name";
}

return "both";
}

return "nodes_titles";
}

export function getSearchFilterFromDataSourceViews(
workspace: LightWorkspaceType,
dataSourceViews: DataSourceViewResource[],
{
excludedNodeMimeTypes,
includeDataSources,
viewType,
}: {
excludedNodeMimeTypes: readonly string[];
includeDataSources: boolean;
viewType: ContentNodesViewType;
}
) {
const groupedPerDataSource = dataSourceViews.reduce(
(acc, dsv) => {
const dataSourceId = dsv.dataSource.dustAPIDataSourceId;
if (!acc.has(dataSourceId)) {
acc.set(dataSourceId, {
dataSource: dsv.dataSource,
dataSourceViews: [],
parentsIn: [],
});
}
const entry = acc.get(dataSourceId);

if (entry) {
entry.dataSourceViews.push(dsv);
if (dsv.parentsIn && entry.parentsIn !== null) {
entry.parentsIn?.push(...dsv.parentsIn);
} else {
entry.parentsIn = null;
}
}

return acc;
},
new Map<
string,
{
dataSource: DataSourceResource;
dataSourceViews: DataSourceViewResource[];
parentsIn: string[] | null;
}
>()
);
const entries = [...groupedPerDataSource.entries()];

if (entries.length === 0) {
return new Err(new DustError("data_source_error", "No datasources found."));
}

if (entries.length > 1024) {
logger.warn(
{
workspaceId: workspace.sId,
filterLength: entries.length,
},
"Filter length is greater than 1024, truncating"
);
entries.splice(1024);
}

return new Ok({
data_source_views: entries.map(([data_source_id, entry]) => ({
data_source_id,
view_filter: entry.parentsIn ? [...new Set(entry.parentsIn)] : [],
search_scope: searchScopeForDataSource({
dataSource: entry.dataSource,
includeDataSources,
isSingleDataSource: entries.length === 1,
}),
})),
excluded_node_mime_types: excludedNodeMimeTypes,
node_types: getCoreViewTypeFilter(viewType),
});
}
Loading

0 comments on commit 4c9bd79

Please sign in to comment.