diff --git a/web/blueprint/src/lib/components/commands/CommandCreateConcept.svelte b/web/blueprint/src/lib/components/commands/CommandCreateConcept.svelte index 3295e3ec1..472a5f679 100644 --- a/web/blueprint/src/lib/components/commands/CommandCreateConcept.svelte +++ b/web/blueprint/src/lib/components/commands/CommandCreateConcept.svelte @@ -56,7 +56,7 @@ $: schema = schemaQuery && $schemaQuery?.data; $: stringFields = schema - ? childFields(schema).filter(f => f.dtype === 'string' && !isSignalField(f, schema!)) + ? childFields(schema).filter(f => f.dtype === 'string' && !isSignalField(f)) : []; $: fields = stringFields.sort((a, b) => { const aIsIndexed = childFields(a).some( @@ -80,7 +80,7 @@ $: { // Auto-select the first path. if (fields && fields.length > 0 && path === undefined) { - path = deserializePath(fields[0].path); + path = fields[0].path; } } diff --git a/web/blueprint/src/lib/components/commands/selectors/FieldSelect.svelte b/web/blueprint/src/lib/components/commands/selectors/FieldSelect.svelte index 9fbf2faa8..9204717df 100644 --- a/web/blueprint/src/lib/components/commands/selectors/FieldSelect.svelte +++ b/web/blueprint/src/lib/components/commands/selectors/FieldSelect.svelte @@ -28,7 +28,7 @@ .filter(field => (filter ? filter(field) : true)) : null; - $: sourceFields = fields?.filter(f => $schema.data && !isSignalField(f, $schema.data)); + $: sourceFields = fields?.filter(f => $schema.data && !isSignalField(f)); function formatField(field: LilacField): string { return `${field.path.join('.')} (${field.dtype})`; diff --git a/web/blueprint/src/lib/components/concepts/DatasetFieldEmbeddingSelector.svelte b/web/blueprint/src/lib/components/concepts/DatasetFieldEmbeddingSelector.svelte index 9dd984b07..ebe8f3a49 100644 --- a/web/blueprint/src/lib/components/concepts/DatasetFieldEmbeddingSelector.svelte +++ b/web/blueprint/src/lib/components/concepts/DatasetFieldEmbeddingSelector.svelte @@ -37,7 +37,7 @@ $: schema = $schemaQuery.data; $: pathId = path ? serializePath(path) : undefined; $: sourceFields = schema - ? childFields(schema).filter(f => !isSignalField(f, schema!) && f.dtype != null) + ? childFields(schema).filter(f => !isSignalField(f) && f.dtype != null) : []; $: indexedFields = sourceFields.filter(f => childFields(f).some(f => f.signal != null && childFields(f).some(f => f.dtype === 'embedding')) diff --git a/web/blueprint/src/lib/components/contextMenu/SchemaFieldMenu.svelte b/web/blueprint/src/lib/components/contextMenu/SchemaFieldMenu.svelte index f4942fec1..bbc2c5f1f 100644 --- a/web/blueprint/src/lib/components/contextMenu/SchemaFieldMenu.svelte +++ b/web/blueprint/src/lib/components/contextMenu/SchemaFieldMenu.svelte @@ -10,8 +10,7 @@ isSignalRootField, isSortableField, serializePath, - type LilacField, - type LilacSchema + type LilacField } from '$lilac'; import {Modal, OverflowMenu, OverflowMenuItem} from 'carbon-components-svelte'; import {InProgress} from 'carbon-icons-svelte'; @@ -19,7 +18,6 @@ import {hoverTooltip} from '../common/HoverTooltip'; export let field: LilacField; - export let schema: LilacSchema; let deleteSignalOpen = false; @@ -31,7 +29,7 @@ const datasetStore = getDatasetContext(); const deleteSignal = deleteSignalMutation(); - $: isSignal = isSignalField(field, schema); + $: isSignal = isSignalField(field); $: isSignalRoot = isSignalRootField(field); $: isPreview = isPreviewSignal($datasetStore.selectRowsSchema?.data || null, field.path); diff --git a/web/blueprint/src/lib/components/datasetView/DatasetSettingsModal.svelte b/web/blueprint/src/lib/components/datasetView/DatasetSettingsModal.svelte index 24de6d147..409a4b3c5 100644 --- a/web/blueprint/src/lib/components/datasetView/DatasetSettingsModal.svelte +++ b/web/blueprint/src/lib/components/datasetView/DatasetSettingsModal.svelte @@ -52,7 +52,7 @@ let preferredEmbedding: string | undefined = $appSettings.embedding; $: mediaFields = petals(schema).filter( - f => f.dtype === 'string' && !pathIsEqual(f.path, [ROWID]) && !isSignalField(f, schema) + f => f.dtype === 'string' && !pathIsEqual(f.path, [ROWID]) && !isSignalField(f) ); $: { diff --git a/web/blueprint/src/lib/components/datasetView/DownloadFieldList.svelte b/web/blueprint/src/lib/components/datasetView/DownloadFieldList.svelte index fe49e959c..4bb199afb 100644 --- a/web/blueprint/src/lib/components/datasetView/DownloadFieldList.svelte +++ b/web/blueprint/src/lib/components/datasetView/DownloadFieldList.svelte @@ -12,7 +12,7 @@ checkedFields.push(field); checkedFields = checkedFields; } else { - checkedFields = checkedFields.filter(f => f !== field); + checkedFields = checkedFields.filter(f => !pathIsEqual(f.path, field.path)); } } diff --git a/web/blueprint/src/lib/components/datasetView/DownloadModal.svelte b/web/blueprint/src/lib/components/datasetView/DownloadModal.svelte index c336fd4d7..81c9b16fd 100644 --- a/web/blueprint/src/lib/components/datasetView/DownloadModal.svelte +++ b/web/blueprint/src/lib/components/datasetView/DownloadModal.svelte @@ -48,7 +48,7 @@ return {sourceFields: null, enrichedFields: null}; } const petalFields = petals(schema).filter(field => ['embedding'].indexOf(field.dtype!) === -1); - const sourceFields = petalFields.filter(f => !isSignalField(f, schema)); + const sourceFields = petalFields.filter(f => !isSignalField(f)); const enrichedFields = childFields(schema) .filter(f => isSignalRootField(f)) .filter(f => !childFields(f).some(f => f.dtype === 'embedding')); diff --git a/web/blueprint/src/lib/components/datasetView/FilterPanel.svelte b/web/blueprint/src/lib/components/datasetView/FilterPanel.svelte index 9f25a77c9..ca3535fea 100644 --- a/web/blueprint/src/lib/components/datasetView/FilterPanel.svelte +++ b/web/blueprint/src/lib/components/datasetView/FilterPanel.svelte @@ -2,7 +2,7 @@ import {queryConcept} from '$lib/queries/conceptQueries'; import {getDatasetContext} from '$lib/stores/datasetStore'; import {getDatasetViewContext} from '$lib/stores/datasetViewStore'; - import {getSearches, getSort} from '$lib/view_utils'; + import {displayPath, getSearches, getSort} from '$lib/view_utils'; import { deserializePath, formatValue, @@ -86,7 +86,7 @@ .map(field => { return { path: field.path, - text: serializePath(field.path.slice(1)), + text: displayPath(field.path.slice(1)), disabled: false }; }) diff --git a/web/blueprint/src/lib/components/datasetView/FilterPill.svelte b/web/blueprint/src/lib/components/datasetView/FilterPill.svelte index 4cf7a1911..45f89bcd0 100644 --- a/web/blueprint/src/lib/components/datasetView/FilterPill.svelte +++ b/web/blueprint/src/lib/components/datasetView/FilterPill.svelte @@ -1,9 +1,9 @@
@@ -48,9 +51,13 @@ })} on:remove={() => datasetViewStore.removeFilter(filter)} > - {hidePath ? '' : shortenPath} - {FILTER_SHORTHANDS[filter.op]} - {formattedValue} + {#if filter.op === 'exists'} + has {hidePath ? '' : shortenPath} + {:else} + {hidePath ? '' : shortenPath} + {FILTER_SHORTHANDS[filter.op]} + {formattedValue} + {/if}
@@ -62,4 +69,7 @@ :global(.filter-pill .bx--tooltip__content) { @apply flex flex-col items-center; } + :global(.search-container .bx--list-box__menu) { + max-height: 26rem !important; + } diff --git a/web/blueprint/src/lib/components/datasetView/SearchPanel.svelte b/web/blueprint/src/lib/components/datasetView/SearchPanel.svelte index ea5f9803e..5fdd360e0 100644 --- a/web/blueprint/src/lib/components/datasetView/SearchPanel.svelte +++ b/web/blueprint/src/lib/components/datasetView/SearchPanel.svelte @@ -9,13 +9,28 @@ import {getSettingsContext} from '$lib/stores/settingsStore'; import { conceptDisplayName, + displayPath, getComputedEmbeddings, getSearchEmbedding, getSearchPath, getSearches, - getSortedConcepts + getSortedConcepts, + shortFieldName } from '$lib/view_utils'; - import {deserializePath, serializePath, type Path} from '$lilac'; + import { + childFields, + deserializePath, + getSignalInfo, + isNumeric, + pathIncludes, + serializePath, + type LilacSchema, + type Op, + type Path, + type SignalInfoWithTypedSchema, + type StatsResult, + type UnaryFilter + } from '$lilac'; import {Button, ComboBox, InlineLoading, Select, SelectItem, Tag} from 'carbon-components-svelte'; import {Add, Checkmark, Chip, SearchAdvanced} from 'carbon-icons-svelte'; import {Command, triggerCommand} from '../commands/Commands.svelte'; @@ -33,6 +48,74 @@ $: searches = getSearches($datasetViewStore, searchPath); + function getFieldSearchItems( + stats: StatsResult[] | null, + schema: LilacSchema | null, + embeddings: SignalInfoWithTypedSchema[] | undefined + ): SearchItem[] { + if (schema == null || stats == null || searchPath == null) { + return []; + } + const allFields = schema ? childFields(schema) : []; + const items: SearchItem[] = []; + for (const field of allFields) { + if (field.dtype == null) { + // Ignore non-pedals. + continue; + } + if (field.dtype === 'embedding' || field.dtype === 'binary') { + // Ignore special dtypes. + continue; + } + if (!pathIncludes(field.path, searchPath)) { + // Ignore any fields unrelated to the current search path. + continue; + } + const signal = getSignalInfo(field); + if (signal?.signal_name === 'concept_score') { + // Ignore any concept scores since they are handled seperately via preview. + continue; + } + const isEmbedding = embeddings?.some(e => e.name === signal?.signal_name); + if (isEmbedding) { + // Ignore any embeddings since they are special "index" fields. + continue; + } + const shortName = shortFieldName(field.path); + const text = displayPath(field.path.slice(searchPath.length)); + // Suggest sorting for numeric fields. + if (isNumeric(field.dtype)) { + items.push({ + id: {type: 'field', path: field.path, sort: 'DESC', isSignal: signal != null} as FieldId, + text, + description: `Sort descending by ${shortName}` + }); + continue; + } + + // Suggest "exists" for signal string fields such as PII. + if (field.dtype === 'string' || field.dtype === 'string_span') { + if (signal == null) { + // Skip filtering source fields by EXISTS. + continue; + } + items.push({ + id: {type: 'field', path: field.path, op: 'exists', isSignal: signal != null} as FieldId, + text, + description: `Find documents with ${shortName}` + }); + continue; + } + } + return items; + } + + $: fieldSearchItems = getFieldSearchItems( + $datasetStore.stats, + $datasetStore.schema, + $embeddings.data + ); + const signalMutation = computeSignalMutation(); // Only show the visible string fields in the dropdown. @@ -65,8 +148,8 @@ !isEmbeddingComputed && isWaitingForIndexing[indexingKey(searchPath, selectedEmbedding)]; $: placeholderText = isEmbeddingComputed - ? 'Search by concept or keyword.' - : 'Search by keyword. Click "compute embedding" to search by concept.'; + ? 'Search by keyword, field or concept.' + : 'Search by keyword or field. Compute embedding to enable concept search.'; const concepts = queryConcepts(); const authInfo = queryAuthInfo(); @@ -74,18 +157,26 @@ $: namespaceConcepts = getSortedConcepts($concepts.data || [], userId); interface ConceptId { + type: 'concept'; namespace: string; name: string; } - interface SearchSelectItem { - id: ConceptId | 'new-concept' | 'keyword-search' | 'semantic-search'; + interface FieldId { + type: 'field'; + path: Path; + isSignal: boolean; + op?: Op; + sort?: 'ASC' | 'DESC'; + } + interface SearchItem { + id: ConceptId | FieldId | 'new-concept' | 'keyword-search' | 'semantic-search'; text: string; description?: string; } - let conceptSelectItems: SearchSelectItem[] = []; + let searchItems: SearchItem[] = []; let searchText = ''; - let newConceptItem: SearchSelectItem; + let newConceptItem: SearchItem; $: newConceptItem = { id: 'new-concept', text: searchText, @@ -94,20 +185,21 @@ $: keywordSearchItem = { id: 'keyword-search', text: searchText - } as SearchSelectItem; + } as SearchItem; $: semanticSearchItem = { id: 'semantic-search', text: searchText, disabled: !isEmbeddingComputed - } as SearchSelectItem; - $: conceptSelectItems = $concepts?.data + } as SearchItem; + $: searchItems = $concepts?.data ? [ - newConceptItem, ...(searchText != '' ? [keywordSearchItem] : []), ...(searchText != '' && selectedEmbedding ? [semanticSearchItem] : []), + newConceptItem, + ...fieldSearchItems, ...namespaceConcepts.flatMap(namespaceConcept => namespaceConcept.concepts.map(c => ({ - id: {namespace: c.namespace, name: c.name}, + id: {namespace: c.namespace, name: c.name, type: 'concept'} as ConceptId, text: conceptDisplayName(c.namespace, c.name, $authInfo.data), description: c.description, disabled: @@ -157,8 +249,8 @@ const selectSearchItem = ( e: CustomEvent<{ - selectedId: SearchSelectItem['id']; - selectedItem: SearchSelectItem; + selectedId: SearchItem['id']; + selectedItem: SearchItem; }> ) => { if (searchPath == null) return; @@ -180,7 +272,6 @@ path: searchPath, onCreate: e => searchConcept(e.detail.namespace, e.detail.name) }); - conceptComboBox.clear(); } else if (e.detail.selectedId === 'keyword-search') { if (searchText == '') { return; @@ -190,7 +281,6 @@ type: 'keyword', query: searchText }); - conceptComboBox.clear(); } else if (e.detail.selectedId == 'semantic-search') { if (searchText == '' || selectedEmbedding == null) { return; @@ -201,10 +291,25 @@ query: searchText, embedding: selectedEmbedding }); - conceptComboBox.clear(); - } else { + } else if (e.detail.selectedId.type === 'concept') { searchConcept(e.detail.selectedId.namespace, e.detail.selectedId.name); + } else if (e.detail.selectedId.type === 'field') { + const searchItem = e.detail.selectedId as FieldId; + if (searchItem.sort != null) { + datasetViewStore.setSortBy(searchItem.path); + datasetViewStore.setSortOrder(searchItem.sort); + } else if (searchItem.op != null) { + datasetViewStore.addFilter({ + path: searchItem.path, + op: searchItem.op + } as UnaryFilter); + } else { + throw new Error(`Unknown search type ${e.detail.selectedId}`); + } + } else { + throw new Error(`Unknown search type ${e.detail.selectedId}`); } + conceptComboBox.clear(); }; const selectField = (e: Event) => { @@ -234,7 +339,7 @@ @@ -242,7 +347,14 @@ placeholder={placeholderText} let:item={it} > - {@const item = conceptSelectItems.find(p => p.id === it.id)} + {@const item = searchItems.find(p => p.id === it.id)} + {@const isSignal = + item != null && + typeof item.id === 'object' && + item.id.type === 'field' && + item.id.isSignal} + {@const isConcept = + item != null && typeof item.id === 'object' && item.id.type === 'concept'} {#if item == null}
{:else if item.id === 'new-concept'} @@ -270,7 +382,7 @@
{:else} -
+
{item.text}
{#if item.description}
{item.description}
@@ -334,4 +446,12 @@ :global(.new-concept, .new-keyword) { @apply h-full; } + + /* Style the combobox item's parent div with a background color depending on type of search. */ + :global(.bx--list-box__menu-item:not(.bx--list-box__menu-item--highlighted):has(.isSignal)) { + @apply bg-blue-50; + } + :global(.bx--list-box__menu-item:not(.bx--list-box__menu-item--highlighted):has(.isConcept)) { + @apply bg-emerald-100; + } diff --git a/web/blueprint/src/lib/components/schemaView/SchemaField.svelte b/web/blueprint/src/lib/components/schemaView/SchemaField.svelte index b5e5d2fc3..82c342941 100644 --- a/web/blueprint/src/lib/components/schemaView/SchemaField.svelte +++ b/web/blueprint/src/lib/components/schemaView/SchemaField.svelte @@ -35,7 +35,7 @@ export let sourceField: LilacField | undefined = undefined; export let indent = 0; - $: isSignal = isSignalField(field, schema); + $: isSignal = isSignalField(field); $: isSignalRoot = isSignalRootField(field); $: isSourceField = !isSignal; @@ -269,7 +269,7 @@
{/if} - +
{#if expandedDetails} @@ -285,7 +285,7 @@ {schema} field={childField} indent={indent + 1} - sourceField={isSourceField && isSignalField(childField, schema) ? field : sourceField} + sourceField={isSourceField && isSignalField(childField) ? field : sourceField} /> {/each} {/if} diff --git a/web/blueprint/src/lib/view_utils.ts b/web/blueprint/src/lib/view_utils.ts index 846231256..ff07c757c 100644 --- a/web/blueprint/src/lib/view_utils.ts +++ b/web/blueprint/src/lib/view_utils.ts @@ -1,5 +1,6 @@ import { L, + PATH_WILDCARD, VALUE_KEY, childFields, deserializePath, @@ -177,7 +178,7 @@ export function isPathVisible( // Signal columns are not visible by default. Because children inherit from parents, we only need // need to check for the parent. const field = getField(schema, pathArray); - const isSignal = isSignalField(field!, schema); + const isSignal = isSignalField(field!); if (isSignal) { return false; @@ -746,3 +747,16 @@ export function getChars(text: string): string[] { export function stringLength(text: string): number { return getChars(text).length; } + +/** Returns a short-form field name for displaying. */ +export function shortFieldName(path: Path): string { + if (path.length === 0) { + throw new Error('Cannot get short name for empty path'); + } + return [...path].reverse().find(p => p !== PATH_WILDCARD)!; +} + +export function displayPath(path: Path): string { + const result = path.join('.'); + return result.replaceAll(`.${PATH_WILDCARD}`, '[]'); +} diff --git a/web/lib/src/lilac.spec.ts b/web/lib/src/lilac.spec.ts index f46044910..fdf1cabd8 100644 --- a/web/lib/src/lilac.spec.ts +++ b/web/lib/src/lilac.spec.ts @@ -303,8 +303,8 @@ describe('lilac', () => { describe('utilities', () => { it('isSignalField', () => { - expect(isSignalField(schema.fields!.comment_text!, schema)).toEqual(false); - expect(isSignalField(schema.fields!.comment_text.fields!.pii, schema)).toEqual(true); + expect(isSignalField(schema.fields!.comment_text!)).toEqual(false); + expect(isSignalField(schema.fields!.comment_text.fields!.pii)).toEqual(true); }); }); diff --git a/web/lib/src/lilac.ts b/web/lib/src/lilac.ts index 3fc4e99ad..6debaab1b 100644 --- a/web/lib/src/lilac.ts +++ b/web/lib/src/lilac.ts @@ -37,6 +37,7 @@ export type Op = BinaryFilter['op'] | UnaryFilter['op'] | ListFilter['op']; export type LilacField = Field & { path: Path; + parent?: LilacField; // Overwrite the fields and repeated_field properties to be LilacField repeated_field?: LilacField; fields?: Record; @@ -202,25 +203,20 @@ export function getValueNodes(row: LilacValueNode, path: Path): LilacValueNode[] return list.filter(value => pathIsMatching(path, L.path(value))); } -/** - * Determine if field is produced by a signal. We do this by walking the schema from the root to the - * field, and checking if a parent has a signal. - */ -export function isSignalField( - field: LilacField, - schema: LilacField, - hasSignalRootParent = false -): boolean { - if (isSignalRootField(schema)) { - hasSignalRootParent = true; +/** Determine if a field is produced by a signal. */ +export function isSignalField(field: LilacField): boolean { + return getSignalInfo(field) != null; +} + +/** If a field is produced by a signal, it returns the signal information. Otherwise returns null. */ +export function getSignalInfo(field: LilacField): Signal | null { + if (field.signal) { + return field.signal; } - if (schema === field) return hasSignalRootParent; - if (schema.fields != null) { - return Object.values(schema.fields).some(f => isSignalField(field, f, hasSignalRootParent)); - } else if (schema.repeated_field != null) { - return isSignalField(field, schema.repeated_field, hasSignalRootParent); + if (field.parent) { + return getSignalInfo(field.parent); } - return false; + return null; } /** True if the field was the root field produced by a signal. */ @@ -262,12 +258,14 @@ export function deserializeField(field: Field, path: Path = []): LilacField { for (const [fieldName, field] of Object.entries(fields)) { const lilacChildField = deserializeField(field, [...path, fieldName]); lilacChildField.path = [...path, fieldName]; + lilacChildField.parent = lilacField; lilacField.fields[fieldName] = lilacChildField; } } if (repeated_field != null) { const lilacChildField = deserializeField(repeated_field, [...path, PATH_WILDCARD]); lilacChildField.path = [...path, PATH_WILDCARD]; + lilacChildField.parent = lilacField; lilacField.repeated_field = lilacChildField; } return lilacField;