From c8e6bc761383768fa8ddec09e05a0cb67c69b22b Mon Sep 17 00:00:00 2001 From: Alexandr Burdiyan Date: Wed, 20 Mar 2024 12:36:59 +0100 Subject: [PATCH] refactor(backend): optimize database indices and queries for blob metadata --- .../daemon/api/entities/v1alpha/entities.go | 2 +- backend/daemon/api/groups/v1alpha/db.go | 2 +- backend/daemon/api/groups/v1alpha/groups.go | 2 +- backend/daemon/storage/migrations.go | 28 +++++ backend/daemon/storage/schema.gensum | 2 +- backend/daemon/storage/schema.sql | 4 +- backend/hyper/hypersql/queries.gen.go | 102 ++---------------- backend/hyper/hypersql/queries.gensum | 4 +- backend/hyper/hypersql/queries.go | 44 ++------ backend/mttnet/list_blobs.go | 2 +- backend/mttnet/providing.go | 11 -- backend/syncing/syncing.go | 8 +- 12 files changed, 56 insertions(+), 155 deletions(-) diff --git a/backend/daemon/api/entities/v1alpha/entities.go b/backend/daemon/api/entities/v1alpha/entities.go index 7be3fba2e..eda39d649 100644 --- a/backend/daemon/api/entities/v1alpha/entities.go +++ b/backend/daemon/api/entities/v1alpha/entities.go @@ -272,7 +272,7 @@ var qGetEntityTimeline = dqb.Str(` public_keys.principal AS author, group_concat(change_deps.parent, ' ') AS deps FROM structural_blobs - JOIN blobs ON blobs.id = structural_blobs.id + JOIN blobs INDEXED BY blobs_metadata ON blobs.id = structural_blobs.id JOIN public_keys ON public_keys.id = structural_blobs.author LEFT JOIN change_deps ON change_deps.child = structural_blobs.id LEFT JOIN drafts ON (drafts.resource, drafts.blob) = (structural_blobs.resource, structural_blobs.id) diff --git a/backend/daemon/api/groups/v1alpha/db.go b/backend/daemon/api/groups/v1alpha/db.go index 15f31cd4e..05eae1318 100644 --- a/backend/daemon/api/groups/v1alpha/db.go +++ b/backend/daemon/api/groups/v1alpha/db.go @@ -176,7 +176,7 @@ var QCollectBlobs = dqb.Str(` blobs.codec, blobs.multihash FROM selected - JOIN blobs ON blobs.id = selected.id AND selected.kind = 1 + JOIN blobs INDEXED BY blobs_metadata ON blobs.id = selected.id AND selected.kind = 1 ORDER BY blobs.id; `) diff --git a/backend/daemon/api/groups/v1alpha/groups.go b/backend/daemon/api/groups/v1alpha/groups.go index 55995557c..17cf9ee9a 100644 --- a/backend/daemon/api/groups/v1alpha/groups.go +++ b/backend/daemon/api/groups/v1alpha/groups.go @@ -847,7 +847,7 @@ var qListDocumentGroups = dqb.Str(` structural_blobs.ts AS ts FROM resource_links JOIN structural_blobs ON structural_blobs.id = resource_links.source - JOIN blobs ON blobs.id = structural_blobs.id + JOIN blobs INDEXED BY blobs_metadata ON blobs.id = structural_blobs.id JOIN resources ON resources.id = structural_blobs.resource WHERE resource_links.type = 'group/content' AND resource_links.target = :document diff --git a/backend/daemon/storage/migrations.go b/backend/daemon/storage/migrations.go index e6b91d269..94417e355 100644 --- a/backend/daemon/storage/migrations.go +++ b/backend/daemon/storage/migrations.go @@ -386,6 +386,34 @@ var migrations = []migration{ DELETE FROM kv WHERE key = 'last_reindex_time'; `)) }}, + {Version: "2024-03-19.01", Run: func(_ *Dir, conn *sqlite.Conn) error { + return sqlitex.ExecScript(conn, sqlfmt(` + DROP VIEW IF EXISTS key_delegations_view; + CREATE VIEW IF NOT EXISTS key_delegations_view AS + SELECT + kd.id AS blob, + blobs.codec AS blob_codec, + blobs.multihash AS blob_multihash, + iss.principal AS issuer, + del.principal AS delegate + FROM key_delegations kd + JOIN blobs INDEXED BY blobs_metadata ON blobs.id = kd.id + JOIN public_keys iss ON iss.id = kd.issuer + JOIN public_keys del ON del.id = kd.delegate; + + DROP VIEW IF EXISTS drafts_view; + CREATE VIEW IF NOT EXISTS drafts_view AS + SELECT + drafts.resource AS resource_id, + drafts.blob AS blob_id, + resources.iri AS resource, + blobs.codec AS codec, + blobs.multihash AS multihash + FROM drafts + JOIN resources ON resources.id = drafts.resource + JOIN blobs INDEXED BY blobs_metadata ON blobs.id = drafts.blob; + `)) + }}, } const ( diff --git a/backend/daemon/storage/schema.gensum b/backend/daemon/storage/schema.gensum index 3bc987f36..6f124c386 100644 --- a/backend/daemon/storage/schema.gensum +++ b/backend/daemon/storage/schema.gensum @@ -1,2 +1,2 @@ -srcs: 73de8491dac69809249c20522cd0a451 +srcs: c0bb41a9dd9db2130a02aa83b977269f outs: 0efc08608cead13c5b8756e4beee2ecd diff --git a/backend/daemon/storage/schema.sql b/backend/daemon/storage/schema.sql index 5b55b3251..4e2f4741d 100644 --- a/backend/daemon/storage/schema.sql +++ b/backend/daemon/storage/schema.sql @@ -99,7 +99,7 @@ SELECT iss.principal AS issuer, del.principal AS delegate FROM key_delegations kd -JOIN blobs ON blobs.id = kd.id +JOIN blobs INDEXED BY blobs_metadata ON blobs.id = kd.id JOIN public_keys iss ON iss.id = kd.issuer JOIN public_keys del ON del.id = kd.delegate; @@ -176,7 +176,7 @@ SELECT blobs.multihash AS multihash FROM drafts JOIN resources ON resources.id = drafts.resource -JOIN blobs ON blobs.id = drafts.blob; +JOIN blobs INDEXED BY blobs_metadata ON blobs.id = drafts.blob; -- View for dependency links between changes. CREATE VIEW change_deps AS diff --git a/backend/hyper/hypersql/queries.gen.go b/backend/hyper/hypersql/queries.gen.go index 2835a7975..9673c27d4 100644 --- a/backend/hyper/hypersql/queries.gen.go +++ b/backend/hyper/hypersql/queries.gen.go @@ -18,7 +18,7 @@ type BlobsHaveResult struct { func BlobsHave(conn *sqlite.Conn, blobsMultihash []byte) (BlobsHaveResult, error) { const query = `SELECT 1 AS have -FROM blobs +FROM blobs INDEXED BY blobs_metadata_by_hash WHERE blobs.multihash = :blobsMultihash AND blobs.size >= 0` @@ -92,7 +92,7 @@ type BlobsGetSizeResult struct { func BlobsGetSize(conn *sqlite.Conn, blobsMultihash []byte) (BlobsGetSizeResult, error) { const query = `SELECT blobs.id, blobs.size -FROM blobs +FROM blobs INDEXED BY blobs_metadata_by_hash WHERE blobs.multihash = :blobsMultihash` var out BlobsGetSizeResult @@ -195,7 +195,7 @@ type BlobsListKnownResult struct { func BlobsListKnown(conn *sqlite.Conn) ([]BlobsListKnownResult, error) { const query = `SELECT blobs.id, blobs.multihash, blobs.codec -FROM blobs +FROM blobs INDEXED BY blobs_metadata LEFT JOIN drafts ON drafts.blob = blobs.id WHERE blobs.size >= 0 AND drafts.blob IS NULL @@ -538,7 +538,7 @@ type KeyDelegationsGetIssuerResult struct { func KeyDelegationsGetIssuer(conn *sqlite.Conn, blobsMultihash []byte) (KeyDelegationsGetIssuerResult, error) { const query = `SELECT key_delegations.issuer FROM key_delegations -JOIN blobs ON blobs.id = key_delegations.id +JOIN blobs INDEXED BY blobs_metadata_by_hash ON blobs.id = key_delegations.id WHERE blobs.multihash = :blobsMultihash LIMIT 1` @@ -780,52 +780,6 @@ ORDER BY structural_blobs.ts` return out, err } -type ChangesListPublicNoDataResult struct { - StructuralBlobsViewBlobID int64 - StructuralBlobsViewCodec int64 - StructuralBlobsViewResourceID int64 - StructuralBlobsViewTs int64 - StructuralBlobsViewMultihash []byte - StructuralBlobsViewSize int64 - StructuralBlobsViewResource string - DraftsBlob int64 -} - -func ChangesListPublicNoData(conn *sqlite.Conn) ([]ChangesListPublicNoDataResult, error) { - const query = `SELECT structural_blobs_view.blob_id, structural_blobs_view.codec, structural_blobs_view.resource_id, structural_blobs_view.ts, structural_blobs_view.multihash, structural_blobs_view.size, structural_blobs_view.resource, drafts.blob -FROM structural_blobs_view -LEFT JOIN drafts ON drafts.resource = structural_blobs_view.resource_id -WHERE drafts.blob IS NULL -AND structural_blobs_view.blob_type = 'Change' ORDER BY structural_blobs_view.resource, structural_blobs_view.ts` - - var out []ChangesListPublicNoDataResult - - before := func(stmt *sqlite.Stmt) { - } - - onStep := func(i int, stmt *sqlite.Stmt) error { - out = append(out, ChangesListPublicNoDataResult{ - StructuralBlobsViewBlobID: stmt.ColumnInt64(0), - StructuralBlobsViewCodec: stmt.ColumnInt64(1), - StructuralBlobsViewResourceID: stmt.ColumnInt64(2), - StructuralBlobsViewTs: stmt.ColumnInt64(3), - StructuralBlobsViewMultihash: stmt.ColumnBytes(4), - StructuralBlobsViewSize: stmt.ColumnInt64(5), - StructuralBlobsViewResource: stmt.ColumnText(6), - DraftsBlob: stmt.ColumnInt64(7), - }) - - return nil - } - - err := sqlitegen.ExecStmt(conn, query, before, onStep) - if err != nil { - err = fmt.Errorf("failed query: ChangesListPublicNoData: %w", err) - } - - return out, err -} - type ChangesResolveHeadsResult struct { ResolvedJSON []byte } @@ -974,7 +928,7 @@ type ChangesGetDepsResult struct { func ChangesGetDeps(conn *sqlite.Conn, changeDepsChild int64) ([]ChangesGetDepsResult, error) { const query = `SELECT blobs.codec, blobs.multihash FROM change_deps -JOIN blobs ON blobs.id = change_deps.parent +JOIN blobs INDEXED BY blobs_metadata ON blobs.id = change_deps.parent WHERE change_deps.child = :changeDepsChild` var out []ChangesGetDepsResult @@ -1000,50 +954,6 @@ WHERE change_deps.child = :changeDepsChild` return out, err } -type ChangesInfoForEntityResult struct { - BlobsCodec int64 - BlobsMultihash []byte - StructuralBlobsID int64 - StructuralBlobsTs int64 - PublicKeysPrincipal []byte - IsTrusted int64 -} - -func ChangesInfoForEntity(conn *sqlite.Conn, structuralBlobsResource int64) ([]ChangesInfoForEntityResult, error) { - const query = `SELECT blobs.codec, blobs.multihash, structural_blobs.id, structural_blobs.ts, public_keys.principal, trusted_accounts.id > 0 AS is_trusted -FROM structural_blobs -JOIN blobs ON blobs.id = structural_blobs.id -JOIN public_keys ON public_keys.id = structural_blobs.author -LEFT JOIN trusted_accounts ON trusted_accounts.id = structural_blobs.author -WHERE structural_blobs.resource = :structuralBlobsResource` - - var out []ChangesInfoForEntityResult - - before := func(stmt *sqlite.Stmt) { - stmt.SetInt64(":structuralBlobsResource", structuralBlobsResource) - } - - onStep := func(i int, stmt *sqlite.Stmt) error { - out = append(out, ChangesInfoForEntityResult{ - BlobsCodec: stmt.ColumnInt64(0), - BlobsMultihash: stmt.ColumnBytes(1), - StructuralBlobsID: stmt.ColumnInt64(2), - StructuralBlobsTs: stmt.ColumnInt64(3), - PublicKeysPrincipal: stmt.ColumnBytes(4), - IsTrusted: stmt.ColumnInt64(5), - }) - - return nil - } - - err := sqlitegen.ExecStmt(conn, query, before, onStep) - if err != nil { - err = fmt.Errorf("failed query: ChangesInfoForEntity: %w", err) - } - - return out, err -} - type BacklinksForDocumentResult struct { ResourcesID int64 ResourcesIRI string @@ -1060,7 +970,7 @@ func BacklinksForDocument(conn *sqlite.Conn, resourceLinksTarget int64) ([]Backl FROM resource_links JOIN structural_blobs ON structural_blobs.id = resource_links.source JOIN resources ON resources.id = structural_blobs.resource -JOIN blobs ON blobs.id = structural_blobs.id +JOIN blobs INDEXED BY blobs_metadata ON blobs.id = structural_blobs.id WHERE resource_links.type GLOB 'doc/*' AND resource_links.target = :resourceLinksTarget` diff --git a/backend/hyper/hypersql/queries.gensum b/backend/hyper/hypersql/queries.gensum index 041a16c7d..c97ff6fa9 100644 --- a/backend/hyper/hypersql/queries.gensum +++ b/backend/hyper/hypersql/queries.gensum @@ -1,2 +1,2 @@ -srcs: 800af070fac742f44f90f3142c9ffcdd -outs: ed8e2acc2115513302d723aab1d251e1 +srcs: 2e1bd3b9f4e7baaf51f43553662842dc +outs: 63faeb2bdfe750e843383133579a0550 diff --git a/backend/hyper/hypersql/queries.go b/backend/hyper/hypersql/queries.go index 0d9f5b9d0..163592b72 100644 --- a/backend/hyper/hypersql/queries.go +++ b/backend/hyper/hypersql/queries.go @@ -14,7 +14,7 @@ func generateQueries() error { code, err := sgen.CodegenQueries("hypersql", qb.MakeQuery(s.Schema, "BlobsHave", sgen.QueryKindSingle, "SELECT", qb.Results(qb.ResultExpr("1", "have", sgen.TypeInt)), '\n', - "FROM", s.Blobs, '\n', + "FROM", s.Blobs, "INDEXED BY blobs_metadata_by_hash", '\n', "WHERE", s.BlobsMultihash, "=", qb.VarCol(s.BlobsMultihash), '\n', "AND", s.BlobsSize, ">=", "0", ), @@ -35,7 +35,7 @@ func generateQueries() error { s.BlobsID, s.BlobsSize, ), '\n', - "FROM", s.Blobs, '\n', + "FROM", s.Blobs, "INDEXED BY blobs_metadata_by_hash", '\n', "WHERE", s.BlobsMultihash, "=", qb.VarCol(s.BlobsMultihash), ), qb.MakeQuery(s.Schema, "BlobsInsert", sgen.QueryKindSingle, @@ -66,7 +66,7 @@ func generateQueries() error { s.BlobsMultihash, s.BlobsCodec, ), '\n', - "FROM", s.Blobs, '\n', + "FROM", s.Blobs, "INDEXED BY blobs_metadata", '\n', "LEFT JOIN", s.Drafts, "ON", s.DraftsBlob, "=", s.BlobsID, '\n', "WHERE", s.BlobsSize, ">=", "0", '\n', "AND", s.DraftsBlob, "IS NULL", '\n', @@ -173,7 +173,7 @@ func generateQueries() error { s.KeyDelegationsIssuer, ), '\n', "FROM", s.KeyDelegations, '\n', - "JOIN", s.Blobs, "ON", s.BlobsID, "=", s.KeyDelegationsID, '\n', + "JOIN", s.Blobs, "INDEXED BY blobs_metadata_by_hash ON", s.BlobsID, "=", s.KeyDelegationsID, '\n', "WHERE", s.BlobsMultihash, "=", qb.VarCol(s.BlobsMultihash), '\n', "LIMIT 1", ), @@ -242,23 +242,6 @@ func generateQueries() error { "WHERE", s.ResourcesIRI, "=", qb.VarCol(s.ResourcesIRI), '\n', "ORDER BY", s.StructuralBlobsTs, ), - qb.MakeQuery(s.Schema, "ChangesListPublicNoData", sgen.QueryKindMany, - "SELECT", qb.Results( - s.StructuralBlobsViewBlobID, - s.StructuralBlobsViewCodec, - s.StructuralBlobsViewResourceID, - s.StructuralBlobsViewTs, - s.StructuralBlobsViewMultihash, - s.StructuralBlobsViewSize, - s.StructuralBlobsViewResource, - s.DraftsBlob, - ), '\n', - "FROM", s.StructuralBlobsView, '\n', - "LEFT JOIN", s.Drafts, "ON", s.DraftsResource, "=", s.StructuralBlobsViewResourceID, '\n', - "WHERE", s.DraftsBlob, "IS NULL", '\n', - "AND", s.StructuralBlobsViewBlobType, "= 'Change'", - "ORDER BY", qb.Enumeration(s.StructuralBlobsViewResource, s.StructuralBlobsViewTs), - ), qb.MakeQuery(s.Schema, "ChangesResolveHeads", sgen.QueryKindSingle, "WITH RECURSIVE changeset (change) AS", qb.SubQuery( "SELECT value", @@ -328,24 +311,9 @@ WHERE blob NOT IN deps`, s.BlobsMultihash, ), '\n', "FROM", s.ChangeDeps, '\n', - "JOIN", s.Blobs, "ON", s.BlobsID, "=", s.ChangeDepsParent, '\n', + "JOIN", s.Blobs, "INDEXED BY blobs_metadata ON", s.BlobsID, "=", s.ChangeDepsParent, '\n', "WHERE", s.ChangeDepsChild, "=", qb.VarCol(s.ChangeDepsChild), ), - qb.MakeQuery(s.Schema, "ChangesInfoForEntity", sgen.QueryKindMany, - "SELECT", qb.Results( - s.BlobsCodec, - s.BlobsMultihash, - s.StructuralBlobsID, - s.StructuralBlobsTs, - s.PublicKeysPrincipal, - qb.ResultExpr(s.C_TrustedAccountsID+" > 0", "is_trusted", sgen.TypeInt), - ), '\n', - "FROM", s.StructuralBlobs, '\n', - "JOIN", s.Blobs, "ON", s.BlobsID, "=", s.StructuralBlobsID, '\n', - "JOIN", s.PublicKeys, "ON", s.PublicKeysID, "=", s.StructuralBlobsAuthor, '\n', - "LEFT JOIN", s.TrustedAccounts, "ON", s.TrustedAccountsID, "=", s.StructuralBlobsAuthor, '\n', - "WHERE", s.StructuralBlobsResource, "=", qb.VarCol(s.StructuralBlobsResource), - ), qb.MakeQuery(s.Schema, "BacklinksForDocument", sgen.QueryKindMany, "SELECT", qb.Results( @@ -361,7 +329,7 @@ WHERE blob NOT IN deps`, "FROM", s.ResourceLinks, '\n', "JOIN", s.StructuralBlobs, "ON", s.StructuralBlobsID, "=", s.ResourceLinksSource, '\n', "JOIN", s.Resources, "ON", s.ResourcesID, "=", s.StructuralBlobsResource, '\n', - "JOIN", s.Blobs, "ON", s.BlobsID, "=", s.StructuralBlobsID, '\n', + "JOIN", s.Blobs, "INDEXED BY blobs_metadata ON", s.BlobsID, "=", s.StructuralBlobsID, '\n', "WHERE", s.ResourceLinksType, "GLOB 'doc/*'", '\n', "AND", s.ResourceLinksTarget, "=", qb.VarCol(s.ResourceLinksTarget), ), diff --git a/backend/mttnet/list_blobs.go b/backend/mttnet/list_blobs.go index a579d19ca..1fe07f19d 100644 --- a/backend/mttnet/list_blobs.go +++ b/backend/mttnet/list_blobs.go @@ -75,7 +75,7 @@ var qListBlobs = dqb.Str(` blobs.id, blobs.codec, blobs.multihash - FROM blobs + FROM blobs INDEXED BY blobs_metadata LEFT OUTER JOIN drafts ON drafts.blob = blobs.id WHERE blobs.size >= 0 AND drafts.blob IS NULL diff --git a/backend/mttnet/providing.go b/backend/mttnet/providing.go index dce1393cc..5eba7c8f7 100644 --- a/backend/mttnet/providing.go +++ b/backend/mttnet/providing.go @@ -6,7 +6,6 @@ import ( "mintter/backend/hyper" "mintter/backend/hyper/hypersql" "mintter/backend/logging" - "mintter/backend/pkg/dqb" "time" "crawshaw.io/sqlite/sqlitex" @@ -15,16 +14,6 @@ import ( "go.uber.org/zap" ) -var qAllPublicBlobs = dqb.Str(` - SELECT - blobs.codec, - blobs.multihash - FROM blobs - LEFT OUTER JOIN drafts ON drafts.blob = blobs.id - WHERE blobs.size >= 0 - AND drafts.blob IS NULL; -`) - var randSrc = rand.NewSource(time.Now().UnixNano()) func makeProvidingStrategy(db *sqlitex.Pool, logLevel string) provider.KeyChanFunc { diff --git a/backend/syncing/syncing.go b/backend/syncing/syncing.go index e3decdb56..fbb0ed2ae 100644 --- a/backend/syncing/syncing.go +++ b/backend/syncing/syncing.go @@ -37,6 +37,11 @@ var ( Help: "Number of blobs we want to sync at this time. Same blob may be counted multiple times if it's wanted from multiple peers.", }, []string{"package"}) + mWantedBlobsTotal = promauto.NewCounter(prometheus.CounterOpts{ + Name: "mintter_syncing_wanted_blobs_total", + Help: "The total number of blobs we wanted to sync from a single peer sync. Same blob may be counted multiple times if it's wanted from multiple peers.", + }) + mSyncsTotal = promauto.NewCounter(prometheus.CounterOpts{ Name: "mintter_syncing_periodic_operations_total", Help: "The total number of periodic sync operations performed with peers (groups don't count).", @@ -66,8 +71,8 @@ var ( Name: "mintter_syncing_worker_tick_duration_seconds", Help: "Duration of a single worker tick.", Objectives: map[float64]float64{ - 0.25: 0.05, 0.5: 0.05, + 0.75: 0.02, 0.9: 0.01, 0.99: 0.001, }, @@ -407,6 +412,7 @@ func syncPeer( if !ok { want = append(want, wantBlob{cid: c, cursor: obj.Cursor}) + mWantedBlobsTotal.Inc() } }