Skip to content

Commit

Permalink
Random batch
Browse files Browse the repository at this point in the history
  • Loading branch information
TonyCTHsu committed Jan 31, 2025
1 parent fa3c7c0 commit 9db18c7
Showing 1 changed file with 7 additions and 46 deletions.
53 changes: 7 additions & 46 deletions tasks/github.rake
Original file line number Diff line number Diff line change
Expand Up @@ -287,59 +287,20 @@ namespace :github do
end
end

# Random!
matching_tasks.shuffle!

batch_count = 7
batch_count *= 2 if RUBY_PLATFORM == 'java'

groups = []
objects = matching_tasks.shuffle
remaining = objects.dup

while remaining.any?
group = Set.new
queue = [remaining.first]

# Use BFS to find all connected objects
while queue.any?
current = queue.shift
next if group.include?(current)

group.add(current)
tasks_per_job = (matching_tasks.size.to_f / batch_count).ceil

# Find all objects that share the same task or gemfile
connected = remaining.select do |obj|
next if group.include?(obj)

obj[:task] == current[:task] || obj[:gemfile] == current[:gemfile]
end

queue.concat(connected)
end
batched_matrix = { 'include' => [] }

# Add the connected group and remove its objects from remaining
groups << group.to_a
remaining.reject! { |obj| group.include?(obj) }
matching_tasks.each_slice(tasks_per_job).with_index do |task_group, index|
batched_matrix['include'] << { 'batch' => index.to_s, 'tasks' => task_group }
end

# Sort groups by size in descending order
groups.sort_by!(&:size).reverse!

# Initialize batches
batches = Array.new(batch_count) { [] }

# Distribute groups to minimize size differences between batches
groups.each do |g|
# Find the batch with the minimum current size
target_batch = batches.min_by(&:size)
target_batch.concat(g)
end

# Create the final structure
batched_matrix = {
'include' => batches.map.with_index do |tasks, index|
{ 'batch' => index, 'tasks' => tasks }
end
}

# Output the JSON
puts JSON.dump(batched_matrix)
end
Expand Down

0 comments on commit 9db18c7

Please sign in to comment.