-
Notifications
You must be signed in to change notification settings - Fork 751
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[GOBBLIN-2137]merged dagNodeStateStore and failedDagNodeStateStore tables #4032
base: master
Are you sure you want to change the base?
Changes from 6 commits
719052f
68f6ea2
8614a0d
577e4bf
dcf6e1a
7883009
7e3e556
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -77,22 +77,11 @@ public interface DagManagementStateStore { | |
|
||
/** | ||
* This marks the dag as a failed one. | ||
* Failed dags are queried using {@link DagManagementStateStore#getFailedDag(DagManager.DagId)} ()} later to be retried. | ||
* Failed dags are queried using {@link DagManagementStateStore#getDag(DagManager.DagId)} ()} later to be retried. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does it remain useful to both retrieve the DAG while also asserting that it's failed? |
||
* @param dagId failing dag's dagId | ||
*/ | ||
void markDagFailed(DagManager.DagId dagId) throws IOException; | ||
|
||
/** | ||
* Returns the failed dag. | ||
* If the dag is not found because it was never marked as failed through | ||
* {@link DagManagementStateStore#markDagFailed(org.apache.gobblin.service.modules.orchestration.DagManager.DagId)}, | ||
* it returns Optional.absent. | ||
* @param dagId dag id of the failed dag | ||
*/ | ||
Optional<Dag<JobExecutionPlan>> getFailedDag(DagManager.DagId dagId) throws IOException; | ||
|
||
void deleteFailedDag(DagManager.DagId dagId) throws IOException; | ||
|
||
/** | ||
* Adds state of a {@link org.apache.gobblin.service.modules.flowgraph.Dag.DagNode} to the store. | ||
* Note that a DagNode is a part of a Dag and must already be present in the store through | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -61,15 +61,12 @@ | |
@Slf4j | ||
@Singleton | ||
public class MySqlDagManagementStateStore implements DagManagementStateStore { | ||
// todo - these two stores should merge | ||
private DagStateStoreWithDagNodes dagStateStore; | ||
private DagStateStoreWithDagNodes failedDagStateStore; | ||
private final JobStatusRetriever jobStatusRetriever; | ||
private boolean dagStoresInitialized = false; | ||
private final UserQuotaManager quotaManager; | ||
Map<URI, TopologySpec> topologySpecMap; | ||
private final Config config; | ||
public static final String FAILED_DAG_STATESTORE_PREFIX = "failedDagStateStore"; | ||
public static final String DAG_STATESTORE_CLASS_KEY = DagManager.DAG_MANAGER_PREFIX + "dagStateStoreClass"; | ||
FlowCatalog flowCatalog; | ||
@Getter | ||
|
@@ -91,8 +88,6 @@ public MySqlDagManagementStateStore(Config config, FlowCatalog flowCatalog, User | |
private synchronized void start() { | ||
if (!dagStoresInitialized) { | ||
this.dagStateStore = createDagStateStore(config, topologySpecMap); | ||
this.failedDagStateStore = createDagStateStore(ConfigUtils.getConfigOrEmpty(config, FAILED_DAG_STATESTORE_PREFIX).withFallback(config), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. any ideas on handling migration when we roll this out (presuming the failed DagStateStore was not empty)? |
||
topologySpecMap); | ||
// This implementation does not need to update quota usage when the service restarts or when its leadership status | ||
// changes because quota usage are persisted in mysql table. For the same reason, there is no need to call getDags also. | ||
// Also, calling getDags during startUp may fail, because the topologies that are required to deserialize dags may | ||
|
@@ -134,10 +129,8 @@ public void addDag(Dag<JobExecutionPlan> dag) throws IOException { | |
@Override | ||
public void markDagFailed(DagManager.DagId dagId) throws IOException { | ||
Dag<JobExecutionPlan> dag = this.dagStateStore.getDag(dagId); | ||
this.failedDagStateStore.writeCheckpoint(dag); | ||
this.dagStateStore.cleanUp(dagId); | ||
// todo - updated failedDagStateStore iff cleanup returned 1 | ||
// or merge dagStateStore and failedDagStateStore and change the flag that marks a dag `failed` | ||
dag.setFailedDag(true); | ||
this.dagStateStore.writeCheckpoint(dag); | ||
log.info("Marked dag failed {}", dagId); | ||
} | ||
|
||
|
@@ -147,21 +140,10 @@ public void deleteDag(DagManager.DagId dagId) throws IOException { | |
log.info("Deleted dag {}", dagId); | ||
} | ||
|
||
@Override | ||
public void deleteFailedDag(DagManager.DagId dagId) throws IOException { | ||
this.failedDagStateStore.cleanUp(dagId); | ||
log.info("Deleted failed dag {}", dagId); | ||
} | ||
|
||
@Override | ||
public Optional<Dag<JobExecutionPlan>> getFailedDag(DagManager.DagId dagId) throws IOException { | ||
return Optional.of(this.failedDagStateStore.getDag(dagId)); | ||
} | ||
|
||
@Override | ||
public synchronized void addDagNodeState(Dag.DagNode<JobExecutionPlan> dagNode, DagManager.DagId dagId) | ||
throws IOException { | ||
this.dagStateStore.updateDagNode(dagId, dagNode); | ||
this.dagStateStore.updateDagNode(dagId, dagNode, false);// isFailedDag is set as false because addDagNodeState adds a new DagNode, doesn't update an existing dagNode as failed. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: space before starting a comment. also more brevity; e.g.:
|
||
} | ||
|
||
@Override | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -54,7 +54,7 @@ public ResumeDagProc(ResumeDagTask resumeDagTask, Config config) { | |
@Override | ||
protected Optional<Dag<JobExecutionPlan>> initialize(DagManagementStateStore dagManagementStateStore) | ||
throws IOException { | ||
return dagManagementStateStore.getFailedDag(getDagId()); | ||
return dagManagementStateStore.getDag(getDagId()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shall we verify the one returned is actually failed? |
||
} | ||
|
||
@Override | ||
|
@@ -92,7 +92,7 @@ protected void act(DagManagementStateStore dagManagementStateStore, Optional<Dag | |
dagManagementStateStore.addDag(failedDag.get()); | ||
|
||
// if it fails here, it will check point the failed dag in the (running) dag store again, which is idempotent | ||
dagManagementStateStore.deleteFailedDag(getDagId()); | ||
dagManagementStateStore.deleteDag(getDagId()); | ||
|
||
DagProcUtils.submitNextNodes(dagManagementStateStore, failedDag.get(), getDagId()); | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
because we do not persist dag level field in mysql, adding fields to Dag will not be much useful and may lead to bugs
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We want to have this field here as we didn't want to add additional parameters in all the methods to pass on is_failed value.