Skip to content

Commit

Permalink
feat: improve API types
Browse files Browse the repository at this point in the history
  • Loading branch information
DerYeger committed Nov 4, 2024
1 parent 7827c6d commit 997f4e0
Show file tree
Hide file tree
Showing 12 changed files with 165 additions and 63 deletions.
19 changes: 14 additions & 5 deletions packages/encoder/bag-of-paths-encoder/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import type { GraphModel } from '@cm2ml/ir'
import type { StructuredOutput } from '@cm2ml/plugin'
import { ExecutionError, batchTryCatch, compose, definePlugin, defineStructuredBatchPlugin, getFirstNonError } from '@cm2ml/plugin'
import { Stream } from '@yeger/streams'

import type { CompiledTemplates } from './bop-types'
import { pathWeightTypes, sortOrders } from './bop-types'
import type { EncodedPath } from './encoding'
import { encodePaths } from './encoding'
import { collectPaths } from './paths'
import type { PruneMethod } from './prune'
Expand All @@ -14,11 +16,13 @@ export type { PathWeight } from './bop-types'
export { pathWeightTypes }
export type { EncodedModelMember, EncodedPath } from './encoding'

export interface BagOfPathsMetadata {
idAttribute: string | undefined
typeAttributes: string[] | undefined
}

interface PrecomputedMetadata {
metamodelData: {
idAttribute: string | undefined
typeAttributes: string[] | undefined
}
metamodelData: BagOfPathsMetadata
compiledTemplates: CompiledTemplates
}

Expand Down Expand Up @@ -72,6 +76,11 @@ const TemplateCompiler = defineStructuredBatchPlugin({
},
})

export interface BagOfPathsData {
paths: EncodedPath[]
mapping: string[]
}

const PathBuilder = definePlugin({
name: 'path-builder',
parameters: {
Expand Down Expand Up @@ -134,7 +143,7 @@ const PathBuilder = definePlugin({
group: 'Paths',
},
},
invoke: ({ data, metadata }: { data: GraphModel | ExecutionError, metadata: PrecomputedMetadata }, parameters) => {
invoke: ({ data, metadata }: { data: GraphModel | ExecutionError, metadata: PrecomputedMetadata }, parameters): StructuredOutput<BagOfPathsData, BagOfPathsMetadata> | ExecutionError => {
if (data instanceof ExecutionError) {
return data
}
Expand Down
37 changes: 28 additions & 9 deletions packages/encoder/feature-encoder/src/features.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,24 @@ export interface FeatureDeriverSettings extends FeatureEncoderProviderSettings {
edgeFeatureOverride: FeatureMetadata | null
}

export type FeatureContext = ReturnType<typeof deriveFeatures>
export interface FeatureContext {
staticData: StaticFeatureData
canEncodeNodeAttribute: (attribute: Attribute) => boolean
canEncodeEdgeAttribute: (attribute: Attribute) => boolean
mapNodeAttribute: (attribute: Attribute) => number | string | null
mapEdgeAttribute: (attribute: Attribute) => number | string | null
getNodeFeatureVector: (node: GraphNode) => FeatureVector
getEdgeFeatureVector: (edge: GraphEdge) => FeatureVector
}

export interface StaticFeatureData {
nodeFeatures: FeatureMetadata
edgeFeatures: FeatureMetadata
onlyEncodedFeatures: boolean
idAttribute: string | undefined
typeAttributes: string[] | undefined
nameAttribute: string | undefined
}

export function deriveFeatures(models: GraphModel[], settings: FeatureDeriverSettings) {
const nodes = Stream.from(models).flatMap(({ nodes }) => nodes).cache()
Expand All @@ -70,15 +87,17 @@ export function deriveFeatures(models: GraphModel[], settings: FeatureDeriverSet
const edgeFeatures: FeatureMetadata = internalEdgeFeatures.map(([name, type, encoder]) => [name, type, encoder?.export?.() ?? null] as const)

const metamodel = models[0]?.metamodel

const staticData: StaticFeatureData = {
edgeFeatures,
nodeFeatures,
onlyEncodedFeatures: settings.onlyEncodedFeatures,
idAttribute: metamodel?.idAttribute,
typeAttributes: metamodel?.typeAttributes,
nameAttribute: metamodel?.nameAttribute,
}
return {
staticData: {
edgeFeatures,
nodeFeatures,
onlyEncodedFeatures: settings.onlyEncodedFeatures,
idAttribute: metamodel?.idAttribute,
typeAttributes: metamodel?.typeAttributes,
nameAttribute: metamodel?.nameAttribute,
},
staticData,
canEncodeNodeAttribute: (attribute: Attribute) => nodeEncoderProvider.canEncodeAttribute(attribute),
canEncodeEdgeAttribute: (attribute: Attribute) => edgeEncoderProvider.canEncodeAttribute(attribute),
mapNodeAttribute: createAttributeMapper(nodeEncoderProvider),
Expand Down
18 changes: 14 additions & 4 deletions packages/encoder/feature-encoder/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import { GraphModel } from '@cm2ml/ir'
import type { StructuredOutput } from '@cm2ml/plugin'
import { ExecutionError, ValidationError, compose, definePlugin, defineStructuredBatchPlugin, getFirstNonError } from '@cm2ml/plugin'
import { lazy } from '@cm2ml/utils'
import { Stream } from '@yeger/streams'
import { ZodError } from 'zod'

import { getFeatureMetadataFromFile } from './feature-metadata-extractor'
import type { FeatureContext, FeatureVector, StaticFeatureData } from './features'
import { FeatureMetadataSchema, deriveFeatures } from './features'

export type { FeatureContext, FeatureMetadata, FeatureName, FeatureType, FeatureVector } from './features'
export type { FeatureContext, FeatureMetadata, FeatureName, FeatureType, FeatureVector, StaticFeatureData } from './features'

export const FeatureEncoder = defineStructuredBatchPlugin({
name: 'feature-encoder',
Expand Down Expand Up @@ -63,12 +65,12 @@ export const FeatureEncoder = defineStructuredBatchPlugin({
group: 'features',
},
},
invoke(input: (GraphModel | ExecutionError)[], parameters) {
invoke(input: (GraphModel | ExecutionError)[], parameters): (StructuredOutput<GraphModel, FeatureContext> | ExecutionError)[] {
try {
const models = input.filter((item) => item instanceof GraphModel)
const nodeFeatureOverride = parameters.nodeFeatures !== '' ? FeatureMetadataSchema.parse(JSON.parse(parameters.nodeFeatures)) : null
const edgeFeatureOverride = parameters.edgeFeatures !== '' ? FeatureMetadataSchema.parse(JSON.parse(parameters.edgeFeatures)) : null
const features = lazy(() => deriveFeatures(models, { ...parameters, nodeFeatureOverride, edgeFeatureOverride }))
const features: FeatureContext = lazy(() => deriveFeatures(models, { ...parameters, nodeFeatureOverride, edgeFeatureOverride }))
return input.map((item) => {
if (item instanceof ExecutionError) {
return item
Expand All @@ -87,10 +89,18 @@ export const FeatureEncoder = defineStructuredBatchPlugin({
},
})

export interface EncodedFeatures {
nodes: FeatureVector[]
edges: FeatureVector[]
}

/**
* Encodes a graph model with feature vectors.
*/
export const StandaloneFeatureEncoder = compose(FeatureEncoder, definePlugin({
name: 'feature-vector-generator',
parameters: {},
invoke(batch, _parameters) {
invoke(batch, _parameters): (StructuredOutput<EncodedFeatures, StaticFeatureData | undefined> | ExecutionError)[] {
const firstNonError = getFirstNonError(batch)
const metadata = firstNonError?.metadata.staticData
return batch.map((item) => {
Expand Down
32 changes: 26 additions & 6 deletions packages/encoder/graph-encoder/src/edge-encoder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ import { Stream } from '@yeger/streams'
export const formats = ['list', 'matrix'] as const
export type Format = typeof formats[number]

export type AdjacencyEncoding = (AdjacencyListEncoding | AdjacencyMatrixEncoding) & {
nodeFeatureVectors: FeatureVector[]
}

export const EdgeEncoder = defineStructuredPlugin({
name: 'edge-encoder',
parameters: {
Expand Down Expand Up @@ -39,11 +43,13 @@ export const EdgeEncoder = defineStructuredPlugin({
.map(getNodeFeatureVector)
.toArray()

const encodedData: AdjacencyEncoding = {
...edgeEncoding,
nodeFeatureVectors,
}

return {
data: {
...edgeEncoding,
nodeFeatureVectors,
},
data: encodedData,
metadata: staticData,
}
},
Expand All @@ -59,12 +65,19 @@ function getSortedIds(model: GraphModel) {

export type AdjacencyList = [number, number][] | [number, number, number][]

export interface AdjacencyListEncoding {
format: 'list'
list: AdjacencyList
nodes: string[]
edgeFeatureVectors: FeatureVector[]
}

function encodeAsAdjacencyList(
edges: ReadonlySet<GraphEdge>,
sortedIds: string[],
weighted: boolean,
getEdgeFeatureVector: (edge: GraphEdge) => FeatureVector,
) {
): AdjacencyListEncoding {
const list = new Array<
readonly [number, number] | readonly [number, number, number]
>()
Expand Down Expand Up @@ -124,11 +137,18 @@ function sortAdjacencyList(list: AdjacencyList) {

export type AdjacencyMatrix = number[][]

export interface AdjacencyMatrixEncoding {
format: 'matrix'
matrix: AdjacencyMatrix
nodes: string[]
edgeFeatureVectors: FeatureVector[]
}

function encodeAsAdjacencyMatrix(
edges: ReadonlySet<GraphEdge>,
sortedIds: string[],
weighted: boolean,
) {
): AdjacencyMatrixEncoding {
const matrix = createAdjacencyMatrix(sortedIds.length)
fillAdjacencyMatrix(matrix, edges, sortedIds, weighted)
return { format: 'matrix' as const, matrix, nodes: sortedIds, edgeFeatureVectors: [] }
Expand Down
8 changes: 7 additions & 1 deletion packages/encoder/graph-encoder/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@ import { batchTryCatch, compose } from '@cm2ml/plugin'

import { EdgeEncoder } from './edge-encoder'

export type { AdjacencyList, AdjacencyMatrix } from './edge-encoder'
export type { AdjacencyEncoding, AdjacencyList, AdjacencyListEncoding, AdjacencyMatrix, AdjacencyMatrixEncoding } from './edge-encoder'

/**
* Encodes a graph model as a raw graph with feature vectors and adjacency data.
*
* **Requirements:**
* - Each node must have a unique id.
*/
export const GraphEncoder = compose(FeatureEncoder, batchTryCatch(EdgeEncoder), 'raw-graph')
4 changes: 3 additions & 1 deletion packages/encoder/pattern-miner/src/frequency.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@ import type { MinedPattern } from './mining'
import type { LabeledEdge, SerializedLabeledEdge } from './normalization'
import type { FrequencyParameters, PatternOrder } from './pattern-types'

export type PatternData = SerializedLabeledEdge[]

export interface PatternWithFrequency {
/**
* The pattern
*/
pattern: SerializedLabeledEdge[]
pattern: PatternData
/**
* A DOT-notation graph of the pattern.
*/
Expand Down
29 changes: 23 additions & 6 deletions packages/encoder/pattern-miner/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
import type { GraphModel } from '@cm2ml/ir'
import type { InferOut } from '@cm2ml/plugin'
import type { InferOut, StructuredOutput } from '@cm2ml/plugin'
import { ExecutionError, batchTryCatch, compose, definePlugin } from '@cm2ml/plugin'
import { Stream } from '@yeger/streams'

import { embedPartitions } from './embedding'
import type { PatternWithFrequency } from './frequency'
import { calculateFrequencies } from './frequency'
import type { MinedPattern } from './mining'
import { minePatterns } from './mining'
import type { PatternMapping } from './normalization'
import { normalizePartitions } from './normalization'
import { partitionNodes } from './partitioning'
import { restorePartitionEdges } from './restoration'

export type { PatternWithFrequency } from './frequency'
export type { PatternData, PatternWithFrequency } from './frequency'
export type { MinedPattern } from './mining'
export type { PatternMapping, SerializedLabeledEdge } from './normalization'

interface PatternData {
patterns: MinedPattern[]
mapping: PatternMapping
}

const ModelPatternMiner = batchTryCatch(definePlugin({
name: 'patterns',
Expand Down Expand Up @@ -65,15 +75,16 @@ const ModelPatternMiner = batchTryCatch(definePlugin({
group: 'mining',
},
},
invoke(model: GraphModel, parameters) {
invoke(model: GraphModel, parameters): StructuredOutput<PatternData, null> {
const partitions = partitionNodes(model, parameters)
.map(restorePartitionEdges)
const { normalizedPartitions, mapping } = normalizePartitions(partitions, parameters)
const embedding = embedPartitions(normalizedPartitions)
const patterns = minePatterns(embedding, parameters)
const data: PatternData = { patterns, mapping }
return {
data: { patterns, mapping },
metadata: {},
data,
metadata: null,
}
},
}))
Expand Down Expand Up @@ -107,7 +118,7 @@ const PatternFrequencyMiner = definePlugin({
group: 'filter',
},
},
invoke(batch: InferOut<typeof ModelPatternMiner>, parameters) {
invoke(batch: InferOut<typeof ModelPatternMiner>, parameters): (StructuredOutput<PatternMapping | ExecutionError, PatternWithFrequency[]>)[] {
const patterns = Stream
.from(batch)
.map((result) => result instanceof ExecutionError ? [] : result.data.patterns)
Expand All @@ -120,4 +131,10 @@ const PatternFrequencyMiner = definePlugin({
},
})

/**
* Detects patterns in graph models.
*
* **Requirements:**
* - Each node must have a unique id.
*/
export const PatternMiner = compose(ModelPatternMiner, PatternFrequencyMiner, 'pattern-miner')
10 changes: 9 additions & 1 deletion packages/encoder/pattern-miner/src/normalization.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@ import type { GraphEdge, GraphNode } from '@cm2ml/ir'

import type { NormalizationParameters } from './pattern-types'

/**
* Maps the ID of a labeled node to the IDs of the nodes in the original graph.
*/
export type PatternMapping = Record<string, string[]>

export function normalizePartitions(partitions: Set<GraphNode>[], parameters: NormalizationParameters) {
const normalizedLabeledNodes: LabeledNode[][] = []
const crossPartitionMapping: Record<string, Set<string>> = {}
Expand All @@ -18,9 +23,12 @@ export function normalizePartitions(partitions: Set<GraphNode>[], parameters: No
crossPartitionMapping[labeledNodeId].add(graphNodeId)
})
})
const mapping: PatternMapping = Object
.fromEntries(Object.entries(crossPartitionMapping)
.map(([key, value]) => [key, Array.from(value)]))
return {
normalizedPartitions: normalizedLabeledNodes,
mapping: Object.fromEntries(Object.entries(crossPartitionMapping).map(([key, value]) => [key, Array.from(value)])),
mapping,
}
}

Expand Down
Loading

0 comments on commit 997f4e0

Please sign in to comment.