forked from rte-france/or-tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
perfect_matching.h
502 lines (419 loc) · 19.6 KB
/
perfect_matching.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
// Copyright 2010-2022 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Implementation of the Blossom V min-cost perfect matching algorithm. The
// main source for the algo is the paper: "Blossom V: A new implementation
// of a minimum cost perfect matching algorithm", Vladimir Kolmogorov.
//
// The Algorithm is a primal-dual algorithm. It always maintains a dual-feasible
// solution. We recall some notations here, but see the paper for more details
// as it is well written.
//
// TODO(user): This is a work in progress. The algo is not fully implemented
// yet. The initial version is closer to Blossom IV since we update the dual
// values for all trees at once with the same delta.
#ifndef OR_TOOLS_GRAPH_PERFECT_MATCHING_H_
#define OR_TOOLS_GRAPH_PERFECT_MATCHING_H_
#include <cstdint>
#include <functional>
#include <limits>
#include <memory>
#include <string>
#include <vector>
#include "absl/base/attributes.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_join.h"
#include "ortools/base/adjustable_priority_queue-inl.h"
#include "ortools/base/adjustable_priority_queue.h"
#include "ortools/base/int_type.h"
#include "ortools/base/integral_types.h"
#include "ortools/base/logging.h"
#include "ortools/base/macros.h"
#include "ortools/base/strong_vector.h"
namespace operations_research {
class BlossomGraph;
// Given an undirected graph with costs on each edges, this class allows to
// compute a perfect matching with minimum cost. A matching is a set of disjoint
// pairs of nodes connected by an edge. The matching is perfect if all nodes are
// matched to each others.
class MinCostPerfectMatching {
public:
// TODO(user): For now we ask the number of nodes at construction, but we
// could automatically infer it from the added edges if needed.
MinCostPerfectMatching() {}
explicit MinCostPerfectMatching(int num_nodes) { Reset(num_nodes); }
// Resets the class for a new graph.
//
// TODO(user): Eventually, we may support incremental Solves(). Or at least
// memory reuse if one wants to solve many problems in a row.
void Reset(int num_nodes);
// Adds an undirected edges between the two given nodes.
//
// For now we only accept non-negative cost.
// TODO(user): We can easily shift all costs if negative costs are needed.
//
// Important: The algorithm supports multi-edges, but it will be slower. So it
// is better to only add one edge with a minimum cost between two nodes. In
// particular, do not add both AddEdge(a, b, cost) and AddEdge(b, a, cost).
// TODO(user): We could just presolve them away.
void AddEdgeWithCost(int tail, int head, int64_t cost);
// Solves the min-cost perfect matching problem on the given graph.
//
// NOTE(user): If needed we could support a time limit. Aborting early will
// not provide a perfect matching, but the algorithm does maintain a valid
// lower bound on the optimal cost that gets better and better during
// execution until it reaches the optimal value. Similarly, it is easy to
// support an early stop if this bound crosses a preset threshold.
enum Status {
// A perfect matching with min-cost has been found.
OPTIMAL = 0,
// There is no perfect matching in this graph.
INFEASIBLE = 1,
// The costs are too large and caused an overflow during the algorithm
// execution.
INTEGER_OVERFLOW = 2,
// Advanced usage: the matching is OPTIMAL and was computed without
// overflow, but its OptimalCost() does not fit on an int64_t. Note that
// Match() still work and you can re-compute the cost in double for
// instance.
COST_OVERFLOW = 3,
};
ABSL_MUST_USE_RESULT Status Solve();
// Returns the cost of the perfect matching. Only valid when the last solve
// status was OPTIMAL.
int64_t OptimalCost() const {
DCHECK(optimal_solution_found_);
return optimal_cost_;
}
// Returns the node matched to the given node. In a perfect matching all nodes
// have a match. Only valid when the last solve status was OPTIMAL.
int Match(int node) const {
DCHECK(optimal_solution_found_);
return matches_[node];
}
const std::vector<int>& Matches() const {
DCHECK(optimal_solution_found_);
return matches_;
}
private:
std::unique_ptr<BlossomGraph> graph_;
// Fields used to report the optimal solution. Most of it could be read on
// the fly from BlossomGraph, but we prefer to copy them here. This allows to
// reclaim the memory of graph_ early or allows to still query the last
// solution if we later allow re-solve with incremental changes to the graph.
bool optimal_solution_found_ = false;
int64_t optimal_cost_ = 0;
int64_t maximum_edge_cost_ = 0;
std::vector<int> matches_;
};
// Class containing the main data structure used by the Blossom algorithm.
//
// At the core is the original undirected graph. During the algorithm execution
// we might collapse nodes into so-called Blossoms. A Blossom is a cycle of
// external nodes (which can be blossom nodes) of odd length (>= 3). The edges
// of the cycle are called blossom-forming eges and will always be tight
// (i.e. have a slack of zero). Once a Blossom is created, its nodes become
// "internal" and are basically considered merged into the blossom node for the
// rest of the algorithm (except if we later re-expand the blossom).
//
// Moreover, external nodes of the graph will have 3 possible types ([+], [-]
// and free [0]). Free nodes will always be matched together in pairs. Nodes of
// type [+] and [-] are arranged in a forest of alternating [+]/[-] disjoint
// trees. Each unmatched node is the root of a tree, and of type [+]. Nodes [-]
// will always have exactly one child to witch they are matched. [+] nodes can
// have any number of [-] children, to which they are not matched. All the edges
// of the trees will always be tight. Some examples below, double edges are used
// for matched nodes:
//
// A matched pair of free nodes: [0] === [0]
//
// A possible rooted tree: [+] -- [-] ==== [+]
// \
// [-] ==== [+] ---- [-] === [+]
// \
// [-] === [+]
//
// A single unmatched node is also a tree: [+]
//
// TODO(user): For now this class does not maintain a second graph of edges
// between the trees nor does it maintains priority queue of edges.
//
// TODO(user): For now we use CHECKs in many places to facilitate development.
// Switch them to DCHECKs for speed once the code is more stable.
class BlossomGraph {
public:
// Typed index used by this class.
DEFINE_INT_TYPE(NodeIndex, int);
DEFINE_INT_TYPE(EdgeIndex, int);
DEFINE_INT_TYPE(CostValue, int64_t);
// Basic constants.
// NOTE(user): Those can't be constexpr because of the or-tools export,
// which complains for constexpr DEFINE_INT_TYPE.
static const NodeIndex kNoNodeIndex;
static const EdgeIndex kNoEdgeIndex;
static const CostValue kMaxCostValue;
// Node related data.
// We store the edges incident to a node separately in the graph_ member.
struct Node {
explicit Node(NodeIndex n) : parent(n), match(n), root(n) {}
// A node can be in one of these 4 exclusive states. Internal nodes are part
// of a Blossom and should be ignored until this Blossom is expanded. All
// the other nodes are "external". A free node is always matched to another
// free node. All the other external node are in alternating [+]/[-] trees
// rooted at the only unmatched node of the tree (always of type [+]).
bool IsInternal() const {
DCHECK(!is_internal || type == 0);
return is_internal;
}
bool IsFree() const { return type == 0 && !is_internal; }
bool IsPlus() const { return type == 1; }
bool IsMinus() const { return type == -1; }
// Is this node a blossom? if yes, it was formed by merging the node.blossom
// nodes together. Note that we reuse the index of node.blossom[0] for this
// blossom node. A blossom node can be of any type.
bool IsBlossom() const { return !blossom.empty(); }
// The type of this node. We use an int for convenience in the update
// formulas. This is 1 for [+] nodes, -1 for [-] nodes and 0 for all the
// others.
//
// Internal node also have a type of zero so the dual formula are correct.
int type = 0;
// Whether this node is part of a blossom.
bool is_internal = false;
// The parent of this node in its tree or itself otherwise.
// Unused for internal nodes.
NodeIndex parent;
// Itself if not matched, or this node match otherwise.
// Unused for internal nodes.
NodeIndex match;
// The root of this tree which never changes until a tree is disassambled by
// an Augment(). Unused for internal nodes.
NodeIndex root;
// The "delta" to apply to get the dual for nodes of this tree.
// This is only filled for root nodes (i.e unmatched nodes).
CostValue tree_dual_delta = CostValue(0);
// See the formula in Dual() used to derive the true dual of this node.
// This is the equal to the "true" dual for free exterior node and internal
// node.
CostValue pseudo_dual = CostValue(0);
#ifndef NDEBUG
// The true dual of this node. We only maintain this in debug mode.
CostValue dual = CostValue(0);
#endif
// Non-empty for Blossom only. The odd-cycle of blossom nodes that form this
// blossom. The first element should always be the current blossom node, and
// all the other nodes are internal nodes.
std::vector<NodeIndex> blossom;
// This allows to store information about a new blossom node created by
// Shrink() so that we can properly restore it on Expand(). Note that we
// store the saved information on the second node of a blossom cycle (and
// not the blossom node itself) because that node will be "hidden" until the
// blossom is expanded so this way, we do not need more than one set of
// saved information per node.
#ifndef NDEBUG
CostValue saved_dual;
#endif
CostValue saved_pseudo_dual;
std::vector<NodeIndex> saved_blossom;
};
// An undirected edge between two nodes: tail <-> head.
struct Edge {
Edge(NodeIndex t, NodeIndex h, CostValue c)
: pseudo_slack(c),
#ifndef NDEBUG
slack(c),
#endif
tail(t),
head(h) {
}
// Returns the "other" end of this edge.
NodeIndex OtherEnd(NodeIndex n) const {
DCHECK(n == tail || n == head);
return NodeIndex(tail.value() ^ head.value() ^ n.value());
}
// AdjustablePriorityQueue interface. Note that we use std::greater<> in
// our queues since we want the lowest pseudo_slack first.
void SetHeapIndex(int index) { pq_position = index; }
int GetHeapIndex() const { return pq_position; }
bool operator>(const Edge& other) const {
return pseudo_slack > other.pseudo_slack;
}
// See the formula is Slack() used to derive the true slack of this edge.
CostValue pseudo_slack;
#ifndef NDEBUG
// We only maintain this in debug mode.
CostValue slack;
#endif
// These are the current tail/head of this edges. These are changed when
// creating or expanding blossoms. The order do not matter.
//
// TODO(user): Consider using node_a/node_b instead to remove the "directed"
// meaning. I do need to think a bit more about it though.
NodeIndex tail;
NodeIndex head;
// Position of this Edge in the underlying std::vector<> used to encode the
// heap of one priority queue. An edge can be in at most one priority queue
// which allow us to share this amongst queues.
int pq_position = -1;
};
// Creates a BlossomGraph on the given number of nodes.
explicit BlossomGraph(int num_nodes);
// Same comment as MinCostPerfectMatching::AddEdgeWithCost() applies.
void AddEdge(NodeIndex tail, NodeIndex head, CostValue cost);
// Heuristic to start with a dual-feasible solution and some matched edges.
// To be called once all edges are added. Returns false if the problem is
// detected to be INFEASIBLE.
ABSL_MUST_USE_RESULT bool Initialize();
// Enters a loop that perform one of Grow()/Augment()/Shrink()/Expand() until
// a fixed point is reached.
void PrimalUpdates();
// Computes the maximum possible delta for UpdateAllTrees() that keeps the
// dual feasibility. Dual update approach (2) from the paper. This also fills
// primal_update_edge_queue_.
CostValue ComputeMaxCommonTreeDualDeltaAndResetPrimalEdgeQueue();
// Applies the same dual delta to all trees. Dual update approach (2) from the
// paper.
void UpdateAllTrees(CostValue delta);
// Returns true iff this node is matched and is thus not a tree root.
// This cannot live in the Node class because we need to know the NodeIndex.
bool NodeIsMatched(NodeIndex n) const;
// Returns the node matched to the given one, or n if this node is not
// currently matched.
NodeIndex Match(NodeIndex n) const;
// Adds to the tree of tail the free matched pair(head, Match(head)).
// The edge is only used in DCHECKs. We duplicate tail/head because the
// order matter here.
void Grow(EdgeIndex e, NodeIndex tail, NodeIndex head);
// Merges two tree and augment the number of matched nodes by 1. This is
// the only functions that change the current matching.
void Augment(EdgeIndex e);
// Creates a Blossom using the given [+] -- [+] edge between two nodes of the
// same tree.
void Shrink(EdgeIndex e);
// Expands a Blossom into its component.
void Expand(NodeIndex to_expand);
// Returns the current number of matched nodes.
int NumMatched() const { return nodes_.size() - unmatched_nodes_.size(); }
// Returns the current dual objective which is always a valid lower-bound on
// the min-cost matching. Note that this is capped to kint64max in case of
// overflow. Because all of our cost are positive, this starts at zero.
CostValue DualObjective() const;
// This must be called at the end of the algorithm to recover the matching.
void ExpandAllBlossoms();
// Return the "slack" of the given edge.
CostValue Slack(const Edge& edge) const;
// Returns the dual value of the given node (which might be a pseudo-node).
CostValue Dual(const Node& node) const;
// Display to VLOG(1) some statistic about the solve.
void DisplayStats() const;
// Checks that there is no possible primal update in the current
// configuration.
void DebugCheckNoPossiblePrimalUpdates();
// Tests that the dual values are currently feasible.
// This should ALWAYS be the case.
bool DebugDualsAreFeasible() const;
// In debug mode, we maintain the real slack of each edges and the real dual
// of each node via this function. Both Slack() and Dual() checks in debug
// mode that the value computed is the correct one.
void DebugUpdateNodeDual(NodeIndex n, CostValue delta);
// Returns true iff this is an external edge with a slack of zero.
// An external edge is an edge between two external nodes.
bool DebugEdgeIsTightAndExternal(const Edge& edge) const;
// Getters to access node/edges from outside the class.
// Only used in tests.
const Edge& GetEdge(int e) const { return edges_[EdgeIndex(e)]; }
const Node& GetNode(int n) const { return nodes_[NodeIndex(n)]; }
// Display information for debugging.
std::string NodeDebugString(NodeIndex n) const;
std::string EdgeDebugString(EdgeIndex e) const;
std::string DebugString() const;
private:
// Returns the index of a tight edge between the two given external nodes.
// Returns kNoEdgeIndex if none could be found.
//
// TODO(user): Store edges for match/parent/blossom instead and remove the
// need for this function that can take around 10% of the running time on
// some problems.
EdgeIndex FindTightExternalEdgeBetweenNodes(NodeIndex tail, NodeIndex head);
// Appends the path from n to the root of its tree. Used by Augment().
void AppendNodePathToRoot(NodeIndex n, std::vector<NodeIndex>* path) const;
// Returns the depth of a node in its tree. Used by Shrink().
int GetDepth(NodeIndex n) const;
// Adds positive delta to dual_objective_ and cap at kint64max on overflow.
void AddToDualObjective(CostValue delta);
// In the presence of blossoms, the original tail/head of an arc might not be
// up to date anymore. It is important to use these functions instead in all
// the places where this can happen. That is basically everywhere except in
// the initialization.
NodeIndex Tail(const Edge& edge) const {
return root_blossom_node_[edge.tail];
}
NodeIndex Head(const Edge& edge) const {
return root_blossom_node_[edge.head];
}
// Returns the Head() or Tail() that does not correspond to node. Node that
// node must be one of the original index in the given edge, this is DCHECKed
// by edge.OtherEnd().
NodeIndex OtherEnd(const Edge& edge, NodeIndex node) const {
return root_blossom_node_[edge.OtherEnd(node)];
}
// Same as OtherEnd() but the given node should either be Tail(edge) or
// Head(edge) and do not need to be one of the original node of this edge.
NodeIndex OtherEndFromExternalNode(const Edge& edge, NodeIndex node) const {
const NodeIndex head = Head(edge);
if (head != node) {
DCHECK_EQ(node, Tail(edge));
return head;
}
return Tail(edge);
}
// Returns the given node and if this node is a blossom, all its internal
// nodes (recursively). Note that any call to SubNodes() invalidate the
// previously returned reference.
const std::vector<NodeIndex>& SubNodes(NodeIndex n);
// Just used to check that initialized is called exactly once.
bool is_initialized_ = false;
// The set of all edges/nodes of the graph.
absl::StrongVector<EdgeIndex, Edge> edges_;
absl::StrongVector<NodeIndex, Node> nodes_;
// Identity for a non-blossom node, and its top blossom node (in case of many
// nested blossom) for an internal node.
absl::StrongVector<NodeIndex, NodeIndex> root_blossom_node_;
// The current graph incidence. Note that one EdgeIndex should appear in
// exactly two places (on its tail and head incidence list).
absl::StrongVector<NodeIndex, std::vector<EdgeIndex>> graph_;
// Used by SubNodes().
std::vector<NodeIndex> subnodes_;
// The unmatched nodes are exactly the root of the trees. After
// initialization, this is only modified by Augment() which removes two nodes
// from this list each time. Note that during Shrink()/Expand() we never
// change the indexing of the root nodes.
std::vector<NodeIndex> unmatched_nodes_;
// List of tight_edges and possible shrink to check in PrimalUpdates().
std::vector<EdgeIndex> primal_update_edge_queue_;
std::vector<EdgeIndex> possible_shrink_;
// Priority queues of edges of a certain types.
AdjustablePriorityQueue<Edge, std::greater<Edge>> plus_plus_pq_;
AdjustablePriorityQueue<Edge, std::greater<Edge>> plus_free_pq_;
std::vector<Edge*> tmp_all_tops_;
// The dual objective. Increase as the algorithm progress. This is a lower
// bound on the min-cost of a perfect matching.
CostValue dual_objective_ = CostValue(0);
// Statistics on the main operations.
int64_t num_grows_ = 0;
int64_t num_augments_ = 0;
int64_t num_shrinks_ = 0;
int64_t num_expands_ = 0;
int64_t num_dual_updates_ = 0;
};
} // namespace operations_research
#endif // OR_TOOLS_GRAPH_PERFECT_MATCHING_H_