Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add batching logic when we pull windows profiles to install or remove #26964

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions changes/26918-placeholders-windows
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
- Fixed an issue with assigning Windows MDM profiles to large numbers (> 65k) of hosts by batching
the relevant database queries.
14 changes: 14 additions & 0 deletions server/datastore/mysql/mdm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ func TestMDMShared(t *testing.T) {
{"TestIsHostConnectedToFleetMDM", testIsHostConnectedToFleetMDM},
{"TestAreHostsConnectedToFleetMDM", testAreHostsConnectedToFleetMDM},
{"TestBulkSetPendingMDMHostProfilesExcludeAny", testBulkSetPendingMDMHostProfilesExcludeAny},
{"TestBulkSetPendingMDMHostProfilesLotsOfHosts", testBulkSetPendingMDMWindowsHostProfilesLotsOfHosts},
}

for _, c := range cases {
Expand Down Expand Up @@ -7614,3 +7615,16 @@ func testBulkSetPendingMDMHostProfilesExcludeAny(t *testing.T, ds *Datastore) {
winHost2: {},
})
}

func testBulkSetPendingMDMWindowsHostProfilesLotsOfHosts(t *testing.T, ds *Datastore) {
ctx := context.Background()

var hostUUIDs []string
// The bug this test was built to reproduce is visible down to ~16400 hosts; keeping this at 66k for scale testing
for range 66000 {
hostUUIDs = append(hostUUIDs, uuid.NewString())
}

_, err := ds.bulkSetPendingMDMWindowsHostProfilesDB(ctx, ds.writer(ctx), hostUUIDs, nil)
require.NoError(t, err)
}
268 changes: 163 additions & 105 deletions server/datastore/mysql/microsoft_mdm.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"encoding/xml"
"errors"
"fmt"
"math"
"strings"

"github.com/fleetdm/fleet/v4/server/contexts/ctxerr"
Expand Down Expand Up @@ -1247,146 +1248,158 @@ func (ds *Datastore) ListMDMWindowsProfilesToInstall(ctx context.Context) ([]*fl
// be without and use the reader replica?
err := ds.withTx(ctx, func(tx sqlx.ExtContext) error {
var err error
result, err = listMDMWindowsProfilesToInstallDB(ctx, tx, nil, nil)
result, err = ds.listAllMDMWindowsProfilesToInstallDB(ctx, tx)
return err
})
return result, err
}

func listMDMWindowsProfilesToInstallDB(
ctx context.Context,
tx sqlx.ExtContext,
hostUUIDs []string,
onlyProfileUUIDs []string,
) ([]*fleet.MDMWindowsProfilePayload, error) {
// The query below is a set difference between:
//
// - Set A (ds), the "desired state", can be obtained from a JOIN between
// mdm_windows_configuration_profiles and hosts.
//
// - Set B, the "current state" given by host_mdm_windows_profiles.
//
// A - B gives us the profiles that need to be installed:
//
// - profiles that are in A but not in B
//
// - profiles that are in A and in B, with an operation type of "install"
// and a NULL status. Other statuses mean that the operation is already in
// flight (pending), the operation has been completed but is still subject
// to independent verification by Fleet (verifying), or has reached a terminal
// state (failed or verified). If the profile's content is edited, all relevant hosts will
// be marked as status NULL so that it gets re-installed.
//
// Note that for label-based profiles, only fully-satisfied profiles are
// considered for installation. This means that a broken label-based profile,
// where one of the labels does not exist anymore, will not be considered for
// installation.

query := fmt.Sprintf(`
// The query below is a set difference between:
//
// - Set A (ds), the "desired state", can be obtained from a JOIN between
// mdm_windows_configuration_profiles and hosts.
//
// - Set B, the "current state" given by host_mdm_windows_profiles.
//
// A - B gives us the profiles that need to be installed:
//
// - profiles that are in A but not in B
//
// - profiles that are in A and in B, with an operation type of "install"
// and a NULL status. Other statuses mean that the operation is already in
// flight (pending), the operation has been completed but is still subject
// to independent verification by Fleet (verifying), or has reached a terminal
// state (failed or verified). If the profile's content is edited, all relevant hosts will
// be marked as status NULL so that it gets re-installed.
//
// Note that for label-based profiles, only fully-satisfied profiles are
// considered for installation. This means that a broken label-based profile,
// where one of the labels does not exist anymore, will not be considered for
// installation.
const windowsProfilesToInstallQuery = `
SELECT
ds.profile_uuid,
ds.host_uuid,
ds.name as profile_name
FROM ( %s ) as ds
FROM ( ` + windowsMDMProfilesDesiredStateQuery + ` ) as ds
LEFT JOIN host_mdm_windows_profiles hmwp
ON hmwp.profile_uuid = ds.profile_uuid AND hmwp.host_uuid = ds.host_uuid
WHERE
-- profiles in A but not in B
( hmwp.profile_uuid IS NULL AND hmwp.host_uuid IS NULL ) OR
-- profiles in A and B with operation type "install" and NULL status
( hmwp.host_uuid IS NOT NULL AND hmwp.operation_type = ? AND hmwp.status IS NULL )
`, windowsMDMProfilesDesiredStateQuery)
`

hostFilter := "TRUE"
if len(hostUUIDs) > 0 {
if len(onlyProfileUUIDs) > 0 {
hostFilter = "mwcp.profile_uuid IN (?) AND h.uuid IN (?)"
} else {
hostFilter = "h.uuid IN (?)"
}
func (ds *Datastore) listAllMDMWindowsProfilesToInstallDB(ctx context.Context, tx sqlx.ExtContext) ([]*fleet.MDMWindowsProfilePayload, error) {
var profiles []*fleet.MDMWindowsProfilePayload
err := sqlx.SelectContext(ctx, tx, &profiles, fmt.Sprintf(windowsProfilesToInstallQuery, "TRUE", "TRUE", "TRUE", "TRUE"), fleet.MDMOperationTypeInstall)
if err != nil {
return nil, ctxerr.Wrapf(ctx, err, "selecting windows MDM profiles to install")
}

var err error
args := []any{fleet.MDMOperationTypeInstall}
query = fmt.Sprintf(query, hostFilter, hostFilter, hostFilter, hostFilter)
if len(hostUUIDs) > 0 {
return profiles, nil
}

func (ds *Datastore) listMDMWindowsProfilesToInstallDB(
ctx context.Context,
tx sqlx.ExtContext,
hostUUIDs []string,
onlyProfileUUIDs []string,
) (profiles []*fleet.MDMWindowsProfilePayload, err error) {
if len(hostUUIDs) == 0 {
return profiles, nil
}

hostFilter := "h.uuid IN (?)"
if len(onlyProfileUUIDs) > 0 {
hostFilter = "mwcp.profile_uuid IN (?) AND h.uuid IN (?)"
}

toInstallQuery := fmt.Sprintf(windowsProfilesToInstallQuery, hostFilter, hostFilter, hostFilter, hostFilter)

// use a 10k host batch size to match what we do on the macOS side.
selectProfilesBatchSize := 10_000
if ds.testSelectMDMProfilesBatchSize > 0 {
selectProfilesBatchSize = ds.testSelectMDMProfilesBatchSize
}
selectProfilesTotalBatches := int(math.Ceil(float64(len(hostUUIDs)) / float64(selectProfilesBatchSize)))

for i := range selectProfilesTotalBatches {
start := i * selectProfilesBatchSize
end := min(start+selectProfilesBatchSize, len(hostUUIDs))

batchUUIDs := hostUUIDs[start:end]

var args []any
var stmt string
if len(onlyProfileUUIDs) > 0 {
query, args, err = sqlx.In(
query,
onlyProfileUUIDs, hostUUIDs,
onlyProfileUUIDs, hostUUIDs,
onlyProfileUUIDs, hostUUIDs,
onlyProfileUUIDs, hostUUIDs,
stmt, args, err = sqlx.In(
toInstallQuery,
onlyProfileUUIDs, batchUUIDs,
onlyProfileUUIDs, batchUUIDs,
onlyProfileUUIDs, batchUUIDs,
onlyProfileUUIDs, batchUUIDs,
fleet.MDMOperationTypeInstall,
)
} else {
query, args, err = sqlx.In(query, hostUUIDs, hostUUIDs, hostUUIDs, hostUUIDs, fleet.MDMOperationTypeInstall)
stmt, args, err = sqlx.In(toInstallQuery, batchUUIDs, batchUUIDs, batchUUIDs, batchUUIDs, fleet.MDMOperationTypeInstall)
}
if err != nil {
return nil, ctxerr.Wrap(ctx, err, "building sqlx.In")
return nil, ctxerr.Wrapf(ctx, err, "building sqlx.In for list MDM windows profiles to install, batch %d of %d", i, selectProfilesTotalBatches)
}

var partialResult []*fleet.MDMWindowsProfilePayload
err = sqlx.SelectContext(ctx, tx, &partialResult, stmt, args...)
if err != nil {
return nil, ctxerr.Wrapf(ctx, err, "selecting windows MDM profiles to install, batch %d of %d", i, selectProfilesTotalBatches)
}

profiles = append(profiles, partialResult...)
}

var profiles []*fleet.MDMWindowsProfilePayload
err = sqlx.SelectContext(ctx, tx, &profiles, query, args...)
return profiles, err
return profiles, nil
}

func (ds *Datastore) ListMDMWindowsProfilesToRemove(ctx context.Context) ([]*fleet.MDMWindowsProfilePayload, error) {
var result []*fleet.MDMWindowsProfilePayload
err := ds.withTx(ctx, func(tx sqlx.ExtContext) error {
var err error
result, err = listMDMWindowsProfilesToRemoveDB(ctx, tx, nil, nil)
result, err = ds.listAllMDMWindowsProfilesToRemoveDB(ctx, tx)
return err
})

return result, err
}

func listMDMWindowsProfilesToRemoveDB(
ctx context.Context,
tx sqlx.ExtContext,
hostUUIDs []string,
onlyProfileUUIDs []string,
) ([]*fleet.MDMWindowsProfilePayload, error) {
// The query below is a set difference between:
//
// - Set A (ds), the desired state, can be obtained from a JOIN between
// mdm_windows_configuration_profiles and hosts.
// - Set B, the current state given by host_mdm_windows_profiles.
//
// B - A gives us the profiles that need to be removed
//
// Any other case are profiles that are in both B and A, and as such are
// processed by the ListMDMWindowsProfilesToInstall method (since they are
// in both, their desired state is necessarily to be installed).
//
// Note that for label-based profiles, only those that are fully-sastisfied
// by the host are considered for install (are part of the desired state used
// to compute the ones to remove). However, as a special case, a broken
// label-based profile will NOT be removed from a host where it was
// previously installed. However, if a host used to satisfy a label-based
// profile but no longer does (and that label-based profile is not "broken"),
// the profile will be removed from the host.

hostFilter := "TRUE"
if len(hostUUIDs) > 0 {
if len(onlyProfileUUIDs) > 0 {
hostFilter = "hmwp.profile_uuid IN (?) AND hmwp.host_uuid IN (?)"
} else {
hostFilter = "hmwp.host_uuid IN (?)"
}
}

query := fmt.Sprintf(`
// The query below is a set difference between:
//
// - Set A (ds), the desired state, can be obtained from a JOIN between
// mdm_windows_configuration_profiles and hosts.
// - Set B, the current state given by host_mdm_windows_profiles.
//
// # B - A gives us the profiles that need to be removed
//
// Any other case are profiles that are in both B and A, and as such are
// processed by the ListMDMWindowsProfilesToInstall method (since they are
// in both, their desired state is necessarily to be installed).
//
// Note that for label-based profiles, only those that are fully-satisfied
// by the host are considered for install (are part of the desired state used
// to compute the ones to remove). However, as a special case, a broken
// label-based profile will NOT be removed from a host where it was
// previously installed. However, if a host used to satisfy a label-based
// profile but no longer does (and that label-based profile is not "broken"),
// the profile will be removed from the host.
const windowsProfilesToRemoveQuery = `
SELECT
hmwp.profile_uuid,
hmwp.host_uuid,
hmwp.operation_type,
COALESCE(hmwp.detail, '') as detail,
hmwp.status,
hmwp.command_uuid
FROM ( %s ) as ds
FROM ( ` + windowsMDMProfilesDesiredStateQuery + ` ) as ds
RIGHT JOIN host_mdm_windows_profiles hmwp
ON hmwp.profile_uuid = ds.profile_uuid AND hmwp.host_uuid = ds.host_uuid
WHERE
Expand All @@ -1404,24 +1417,69 @@ func listMDMWindowsProfilesToRemoveDB(
mcpl.label_id IS NULL
) AND
(%s)
`, fmt.Sprintf(windowsMDMProfilesDesiredStateQuery, "TRUE", "TRUE", "TRUE", "TRUE"), hostFilter)
`

func (ds *Datastore) listAllMDMWindowsProfilesToRemoveDB(ctx context.Context, tx sqlx.ExtContext) (profiles []*fleet.MDMWindowsProfilePayload, err error) {
err = sqlx.SelectContext(ctx, tx, &profiles, fmt.Sprintf(windowsProfilesToRemoveQuery, "TRUE", "TRUE", "TRUE", "TRUE", "TRUE"))
if err != nil {
return nil, ctxerr.Wrapf(ctx, err, "selecting windows MDM profiles to remove")
}

return profiles, nil
}

var err error
var args []any
if len(hostUUIDs) > 0 {
func (ds *Datastore) listMDMWindowsProfilesToRemoveDB(
ctx context.Context,
tx sqlx.ExtContext,
hostUUIDs []string,
onlyProfileUUIDs []string,
) (profiles []*fleet.MDMWindowsProfilePayload, err error) {
if len(hostUUIDs) == 0 {
return profiles, nil
}

hostFilter := "hmwp.host_uuid IN (?)"
if len(onlyProfileUUIDs) > 0 {
hostFilter = "hmwp.profile_uuid IN (?) AND hmwp.host_uuid IN (?)"
}

toRemoveQuery := fmt.Sprintf(windowsProfilesToRemoveQuery, "TRUE", "TRUE", "TRUE", "TRUE", hostFilter)

// use a 10k host batch size to match what we do on the macOS side.
selectProfilesBatchSize := 10_000
if ds.testSelectMDMProfilesBatchSize > 0 {
selectProfilesBatchSize = ds.testSelectMDMProfilesBatchSize
}
selectProfilesTotalBatches := int(math.Ceil(float64(len(hostUUIDs)) / float64(selectProfilesBatchSize)))

for i := range selectProfilesTotalBatches {
start := i * selectProfilesBatchSize
end := min(start+selectProfilesBatchSize, len(hostUUIDs))

batchUUIDs := hostUUIDs[start:end]

var err error
var args []any
var stmt string
if len(onlyProfileUUIDs) > 0 {
query, args, err = sqlx.In(query, onlyProfileUUIDs, hostUUIDs)
stmt, args, err = sqlx.In(toRemoveQuery, onlyProfileUUIDs, batchUUIDs)
} else {
query, args, err = sqlx.In(query, hostUUIDs)
stmt, args, err = sqlx.In(toRemoveQuery, batchUUIDs)
}
if err != nil {
return nil, ctxerr.Wrapf(ctx, err, "building sqlx.In for list MDM windows profiles to remove, batch %d of %d", i, selectProfilesTotalBatches)
}

var partialResult []*fleet.MDMWindowsProfilePayload
err = sqlx.SelectContext(ctx, tx, &partialResult, stmt, args...)
if err != nil {
return nil, err
return nil, ctxerr.Wrapf(ctx, err, "selecting windows MDM profiles to remove, batch %d of %d", i, selectProfilesTotalBatches)
}

profiles = append(profiles, partialResult...)
}

var profiles []*fleet.MDMWindowsProfilePayload
err = sqlx.SelectContext(ctx, tx, &profiles, query, args...)
return profiles, err
return profiles, nil
}

func (ds *Datastore) BulkUpsertMDMWindowsHostProfiles(ctx context.Context, payload []*fleet.MDMWindowsBulkUpsertHostProfilePayload) error {
Expand Down Expand Up @@ -1930,12 +1988,12 @@ func (ds *Datastore) bulkSetPendingMDMWindowsHostProfilesDB(
return false, nil
}

profilesToInstall, err := listMDMWindowsProfilesToInstallDB(ctx, tx, hostUUIDs, onlyProfileUUIDs)
profilesToInstall, err := ds.listMDMWindowsProfilesToInstallDB(ctx, tx, hostUUIDs, onlyProfileUUIDs)
if err != nil {
return false, ctxerr.Wrap(ctx, err, "list profiles to install")
}

profilesToRemove, err := listMDMWindowsProfilesToRemoveDB(ctx, tx, hostUUIDs, onlyProfileUUIDs)
profilesToRemove, err := ds.listMDMWindowsProfilesToRemoveDB(ctx, tx, hostUUIDs, onlyProfileUUIDs)
if err != nil {
return false, ctxerr.Wrap(ctx, err, "list profiles to remove")
}
Expand Down
Loading