Skip to content

Commit

Permalink
Synchronize VCS repos concurrently
Browse files Browse the repository at this point in the history
Cloning/synchronising VCS repos can be unnecessarily slow if done
serially. By synchronizing the repos concurrently we make much better
use of time.

Introduces rerunConcurrentlyIfChanged, a Rebuild monad function that
runs, from multiple actions, the actions that need rebuilding concurrently.
  • Loading branch information
alt-romes committed Nov 16, 2024
1 parent b6c28ee commit c89ab54
Show file tree
Hide file tree
Showing 10 changed files with 188 additions and 74 deletions.
1 change: 1 addition & 0 deletions cabal-install/src/Distribution/Client/CmdInstall.hs
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,7 @@ installAction flags@NixStyleFlags{extraFlags, configFlags, installFlags, project
fetchAndReadSourcePackages
verbosity
distDirLayout
compiler
(projectConfigShared config)
(projectConfigBuildOnly config)
[ProjectPackageRemoteTarball uri | uri <- uris]
Expand Down
4 changes: 2 additions & 2 deletions cabal-install/src/Distribution/Client/Install.hs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ import Distribution.Solver.Types.PkgConfigDb
import Distribution.Solver.Types.Settings
import Distribution.Solver.Types.SourcePackage as SourcePackage

import Distribution.Client.ProjectConfig
import Distribution.Client.Utils
( MergeResult (..)
, ProgressPhase (..)
Expand Down Expand Up @@ -1443,7 +1444,7 @@ performInstallations
if parallelInstall
then newParallelJobControl numJobs
else newSerialJobControl
fetchLimit <- newJobLimit (min numJobs numFetchJobs)
fetchLimit <- newJobLimit (min numJobs maxNumFetchJobs)
installLock <- newLock -- serialise installation
cacheLock <- newLock -- serialise access to setup exe cache
executeInstallPlan
Expand Down Expand Up @@ -1486,7 +1487,6 @@ performInstallations
cinfo = compilerInfo comp

numJobs = determineNumJobs (installNumJobs installFlags)
numFetchJobs = 2
parallelInstall = numJobs >= 2
keepGoing = fromFlag (installKeepGoing installFlags)
distPref =
Expand Down
45 changes: 44 additions & 1 deletion cabal-install/src/Distribution/Client/JobControl.hs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ module Distribution.Client.JobControl
, Lock
, newLock
, criticalSection

-- * Higher level utils
, newJobControlFromParStrat
, withJobControl
, mapConcurrentWithJobs
) where

import Distribution.Client.Compat.Prelude
Expand All @@ -40,11 +45,14 @@ import Control.Concurrent (forkIO, forkIOWithUnmask, threadDelay)
import Control.Concurrent.MVar
import Control.Concurrent.STM (STM, TVar, atomically, modifyTVar', newTVarIO, readTVar)
import Control.Concurrent.STM.TChan
import Control.Exception (bracket_, mask_, try)
import Control.Exception (bracket, bracket_, mask_, try)
import Control.Monad (forever, replicateM_)
import Distribution.Client.Compat.Semaphore
import Distribution.Client.Utils (numberOfProcessors)
import Distribution.Compat.Stack
import Distribution.Simple.Compiler
import Distribution.Simple.Utils
import Distribution.Types.ParStrat
import System.Semaphore

-- | A simple concurrency abstraction. Jobs can be spawned and can complete
Expand Down Expand Up @@ -262,3 +270,38 @@ newLock = fmap Lock $ newMVar ()

criticalSection :: Lock -> IO a -> IO a
criticalSection (Lock lck) act = bracket_ (takeMVar lck) (putMVar lck ()) act

--------------------------------------------------------------------------------
-- More high level utils
--------------------------------------------------------------------------------

newJobControlFromParStrat
:: Verbosity
-> Compiler
-> ParStratInstall
-- ^ The parallel strategy
-> Maybe Int
-- ^ A cap on the number of jobs (e.g. to force a maximum of 2 concurrent downloads despite a -j8 parallel strategy)
-> IO (JobControl IO a)
newJobControlFromParStrat verbosity compiler parStrat numJobsCap = case parStrat of
Serial -> newSerialJobControl
NumJobs n -> newParallelJobControl (capJobs (fromMaybe numberOfProcessors n))
UseSem n ->
if jsemSupported compiler
then newSemaphoreJobControl verbosity (capJobs n)
else do
warn verbosity "-jsem is not supported by the selected compiler, falling back to normal parallelism control."
newParallelJobControl (capJobs n)
where
capJobs n = min (fromMaybe maxBound numJobsCap) n

withJobControl :: IO (JobControl IO a) -> (JobControl IO a -> IO b) -> IO b
withJobControl mkJC = bracket mkJC cleanupJobControl

-- | Concurrently execute actions on a list using the given JobControl.
-- The maximum number of concurrent jobs is tied to the JobControl instance.
-- The resulting list does /not/ preserve the original order!
mapConcurrentWithJobs :: JobControl IO b -> (a -> IO b) -> [a] -> IO [b]
mapConcurrentWithJobs jobControl f xs = do
traverse_ (spawnJob jobControl . f) xs
traverse (const $ collectJob jobControl) xs
20 changes: 5 additions & 15 deletions cabal-install/src/Distribution/Client/ProjectBuilding.hs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ import qualified Data.Set as Set

import qualified Text.PrettyPrint as Disp

import Control.Exception (assert, bracket, handle)
import Control.Exception (assert, handle)
import System.Directory (doesDirectoryExist, doesFileExist, renameDirectory)
import System.FilePath (makeRelative, normalise, takeDirectory, (<.>), (</>))
import System.Semaphore (SemaphoreName (..))
Expand All @@ -98,7 +98,6 @@ import Distribution.Simple.Flag (fromFlagOrDefault)

import Distribution.Client.ProjectBuilding.PackageFileMonitor
import Distribution.Client.ProjectBuilding.UnpackedPackage (annotateFailureNoLog, buildAndInstallUnpackedPackage, buildInplaceUnpackedPackage)
import Distribution.Client.Utils (numberOfProcessors)

------------------------------------------------------------------------------

Expand Down Expand Up @@ -355,17 +354,6 @@ rebuildTargets
}
| fromFlagOrDefault False (projectConfigOfflineMode config) && not (null packagesToDownload) = return offlineError
| otherwise = do
-- Concurrency control: create the job controller and concurrency limits
-- for downloading, building and installing.
mkJobControl <- case buildSettingNumJobs of
Serial -> newSerialJobControl
NumJobs n -> newParallelJobControl (fromMaybe numberOfProcessors n)
UseSem n ->
if jsemSupported compiler
then newSemaphoreJobControl verbosity n
else do
warn verbosity "-jsem is not supported by the selected compiler, falling back to normal parallelism control."
newParallelJobControl n
registerLock <- newLock -- serialise registration
cacheLock <- newLock -- serialise access to setup exe cache
-- TODO: [code cleanup] eliminate setup exe cache
Expand All @@ -380,7 +368,9 @@ rebuildTargets
createDirectoryIfMissingVerbose verbosity True distTempDirectory
traverse_ (createPackageDBIfMissing verbosity compiler progdb) packageDBsToUse

bracket (pure mkJobControl) cleanupJobControl $ \jobControl -> do
-- Concurrency control: create the job controller and concurrency limits
-- for downloading, building and installing.
withJobControl (newJobControlFromParStrat verbosity compiler buildSettingNumJobs Nothing) $ \jobControl -> do
-- Before traversing the install plan, preemptively find all packages that
-- will need to be downloaded and start downloading them.
asyncDownloadPackages
Expand All @@ -391,7 +381,7 @@ rebuildTargets
$ \downloadMap ->
-- For each package in the plan, in dependency order, but in parallel...
InstallPlan.execute
mkJobControl
jobControl
keepGoing
(BuildFailure Nothing . DependentFailed . packageId)
installPlan
Expand Down
58 changes: 48 additions & 10 deletions cabal-install/src/Distribution/Client/ProjectConfig.hs
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,14 @@ module Distribution.Client.ProjectConfig
, resolveSolverSettings
, BuildTimeSettings (..)
, resolveBuildTimeSettings
, resolveNumJobsSetting

-- * Checking configuration
, checkBadPerPackageCompilerPaths
, BadPerPackageCompilerPaths (..)

-- * Globals
, maxNumFetchJobs
) where

import Distribution.Client.Compat.Prelude
Expand All @@ -68,6 +72,7 @@ import Prelude ()
import Distribution.Client.Glob
( isTrivialRootedGlob
)
import Distribution.Client.JobControl
import Distribution.Client.ProjectConfig.Legacy
import Distribution.Client.ProjectConfig.Types
import Distribution.Client.RebuildMonad
Expand Down Expand Up @@ -434,12 +439,7 @@ resolveBuildTimeSettings
-- buildSettingLogVerbosity -- defined below, more complicated
buildSettingBuildReports = fromFlag projectConfigBuildReports
buildSettingSymlinkBinDir = flagToList projectConfigSymlinkBinDir
buildSettingNumJobs =
if fromFlag projectConfigUseSemaphore
then UseSem (determineNumJobs projectConfigNumJobs)
else case (determineNumJobs projectConfigNumJobs) of
1 -> Serial
n -> NumJobs (Just n)
buildSettingNumJobs = resolveNumJobsSetting projectConfigUseSemaphore projectConfigNumJobs
buildSettingKeepGoing = fromFlag projectConfigKeepGoing
buildSettingOfflineMode = fromFlag projectConfigOfflineMode
buildSettingKeepTempFiles = fromFlag projectConfigKeepTempFiles
Expand Down Expand Up @@ -535,6 +535,20 @@ resolveBuildTimeSettings
| isParallelBuild buildSettingNumJobs = False
| otherwise = False

-- | Determine the number of jobs (ParStrat) from the project config
resolveNumJobsSetting
:: Flag Bool
-- ^ Whether to use a semaphore (-jsem)
-> Flag (Maybe Int)
-- ^ The number of jobs to run concurrently
-> ParStratX Int
resolveNumJobsSetting projectConfigUseSemaphore projectConfigNumJobs =
if fromFlag projectConfigUseSemaphore
then UseSem (determineNumJobs projectConfigNumJobs)
else case (determineNumJobs projectConfigNumJobs) of
1 -> Serial
n -> NumJobs (Just n)

---------------------------------------------
-- Reading and writing project config files
--
Expand Down Expand Up @@ -1213,13 +1227,15 @@ mplusMaybeT ma mb = do
fetchAndReadSourcePackages
:: Verbosity
-> DistDirLayout
-> Compiler
-> ProjectConfigShared
-> ProjectConfigBuildOnly
-> [ProjectPackageLocation]
-> Rebuild [PackageSpecifier (SourcePackage UnresolvedPkgLoc)]
fetchAndReadSourcePackages
verbosity
distDirLayout
compiler
projectConfigShared
projectConfigBuildOnly
pkgLocations = do
Expand Down Expand Up @@ -1256,7 +1272,9 @@ fetchAndReadSourcePackages
syncAndReadSourcePackagesRemoteRepos
verbosity
distDirLayout
compiler
projectConfigShared
projectConfigBuildOnly
(fromFlag (projectConfigOfflineMode projectConfigBuildOnly))
[repo | ProjectPackageRemoteRepo repo <- pkgLocations]

Expand Down Expand Up @@ -1373,16 +1391,23 @@ fetchAndReadSourcePackageRemoteTarball
syncAndReadSourcePackagesRemoteRepos
:: Verbosity
-> DistDirLayout
-> Compiler
-> ProjectConfigShared
-> ProjectConfigBuildOnly
-> Bool
-> [SourceRepoList]
-> Rebuild [PackageSpecifier (SourcePackage UnresolvedPkgLoc)]
syncAndReadSourcePackagesRemoteRepos
verbosity
DistDirLayout{distDownloadSrcDirectory}
compiler
ProjectConfigShared
{ projectConfigProgPathExtra
}
ProjectConfigBuildOnly
{ projectConfigUseSemaphore
, projectConfigNumJobs
}
offlineMode
repos = do
repos' <-
Expand All @@ -1408,10 +1433,15 @@ syncAndReadSourcePackagesRemoteRepos
in configureVCS verbosity progPathExtra vcs

concat
<$> sequenceA
[ rerunIfChanged verbosity monitor repoGroup' $ do
vcs' <- getConfiguredVCS repoType
syncRepoGroupAndReadSourcePackages vcs' pathStem repoGroup'
<$> rerunConcurrentlyIfChanged
verbosity
(newJobControlFromParStrat verbosity compiler parStrat (Just maxNumFetchJobs))
[ ( monitor
, repoGroup'
, do
vcs' <- getConfiguredVCS repoType
syncRepoGroupAndReadSourcePackages vcs' pathStem repoGroup'
)
| repoGroup@((primaryRepo, repoType) : _) <- Map.elems reposByLocation
, let repoGroup' = map fst repoGroup
pathStem =
Expand All @@ -1424,6 +1454,7 @@ syncAndReadSourcePackagesRemoteRepos
monitor = newFileMonitor (pathStem <.> "cache")
]
where
parStrat = resolveNumJobsSetting projectConfigUseSemaphore projectConfigNumJobs
syncRepoGroupAndReadSourcePackages
:: VCS ConfiguredProgram
-> FilePath
Expand Down Expand Up @@ -1760,3 +1791,10 @@ onlyTopLevelProvenance :: Set ProjectConfigProvenance -> Set ProjectConfigProven
onlyTopLevelProvenance = Set.filter $ \case
Implicit -> False
Explicit ps -> isTopLevelConfigPath ps

-- | The maximum amount of fetch jobs that can run concurrently.
-- For instance, this is used to limit the amount of concurrent downloads from
-- hackage, or the amount of concurrent git clones for
-- source-repository-package stanzas.
maxNumFetchJobs :: Int
maxNumFetchJobs = 2
10 changes: 4 additions & 6 deletions cabal-install/src/Distribution/Client/ProjectConfig/Legacy.hs
Original file line number Diff line number Diff line change
Expand Up @@ -206,12 +206,10 @@ type ProjectConfigSkeleton = CondTree ConfVar [ProjectConfigPath] ProjectConfig
singletonProjectConfigSkeleton :: ProjectConfig -> ProjectConfigSkeleton
singletonProjectConfigSkeleton x = CondNode x mempty mempty

instantiateProjectConfigSkeletonFetchingCompiler :: Monad m => m (OS, Arch, CompilerInfo) -> FlagAssignment -> ProjectConfigSkeleton -> m ProjectConfig
instantiateProjectConfigSkeletonFetchingCompiler fetch flags skel
| null (toListOf traverseCondTreeV skel) = pure $ fst (ignoreConditions skel)
| otherwise = do
(os, arch, impl) <- fetch
pure $ instantiateProjectConfigSkeletonWithCompiler os arch impl flags skel
instantiateProjectConfigSkeletonFetchingCompiler :: (OS, Arch, CompilerInfo) -> FlagAssignment -> ProjectConfigSkeleton -> ProjectConfig
instantiateProjectConfigSkeletonFetchingCompiler (os, arch, impl) flags skel
| null (toListOf traverseCondTreeV skel) = fst (ignoreConditions skel)
| otherwise = instantiateProjectConfigSkeletonWithCompiler os arch impl flags skel

instantiateProjectConfigSkeletonWithCompiler :: OS -> Arch -> CompilerInfo -> FlagAssignment -> ProjectConfigSkeleton -> ProjectConfig
instantiateProjectConfigSkeletonWithCompiler os arch impl _flags skel = go $ mapTreeConds (fst . simplifyWithSysParams os arch impl) skel
Expand Down
18 changes: 10 additions & 8 deletions cabal-install/src/Distribution/Client/ProjectPlanning.hs
Original file line number Diff line number Diff line change
Expand Up @@ -385,17 +385,16 @@ rebuildProjectConfig
$ do
liftIO $ info verbosity "Project settings changed, reconfiguring..."
projectConfigSkeleton <- phaseReadProjectConfig
let fetchCompiler = do
-- have to create the cache directory before configuring the compiler
liftIO $ createDirectoryIfMissingVerbose verbosity True distProjectCacheDirectory
(compiler, Platform arch os, _) <- configureCompiler verbosity distDirLayout (fst (PD.ignoreConditions projectConfigSkeleton) <> cliConfig)
pure (os, arch, compilerInfo compiler)

projectConfig <- instantiateProjectConfigSkeletonFetchingCompiler fetchCompiler mempty projectConfigSkeleton
-- have to create the cache directory before configuring the compiler
liftIO $ createDirectoryIfMissingVerbose verbosity True distProjectCacheDirectory
(compiler, Platform arch os, _) <- configureCompiler verbosity distDirLayout (fst (PD.ignoreConditions projectConfigSkeleton) <> cliConfig)

let projectConfig = instantiateProjectConfigSkeletonFetchingCompiler (os, arch, compilerInfo compiler) mempty projectConfigSkeleton
when (projectConfigDistDir (projectConfigShared $ projectConfig) /= NoFlag) $
liftIO $
warn verbosity "The builddir option is not supported in project and config files. It will be ignored."
localPackages <- phaseReadLocalPackages (projectConfig <> cliConfig)
localPackages <- phaseReadLocalPackages compiler (projectConfig <> cliConfig)
return (projectConfig, localPackages)

let configfiles =
Expand Down Expand Up @@ -427,9 +426,11 @@ rebuildProjectConfig
-- NOTE: These are all packages mentioned in the project configuration.
-- Whether or not they will be considered local to the project will be decided by `shouldBeLocal`.
phaseReadLocalPackages
:: ProjectConfig
:: Compiler
-> ProjectConfig
-> Rebuild [PackageSpecifier UnresolvedSourcePackage]
phaseReadLocalPackages
compiler
projectConfig@ProjectConfig
{ projectConfigShared
, projectConfigBuildOnly
Expand All @@ -444,6 +445,7 @@ rebuildProjectConfig
fetchAndReadSourcePackages
verbosity
distDirLayout
compiler
projectConfigShared
projectConfigBuildOnly
pkgLocations
Expand Down
Loading

0 comments on commit c89ab54

Please sign in to comment.