forked from Texera/texera
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathQ3_Final.json
1 lines (1 loc) · 9 KB
/
Q3_Final.json
1
{"operators":[{"operatorID":"ScalaUDF-operator-62b8ecc5-9699-4c0f-860d-32bc9586c908","operatorType":"ScalaUDF","operatorVersion":"bfcdd448e9d4ff69a973c29ccf008ec9b5baed71","operatorProperties":{"code":"import edu.uci.ics.amber.engine.common.{CheckpointState, CheckpointSupport}\nimport edu.uci.ics.amber.engine.common.tuple.amber.TupleLike\nimport edu.uci.ics.texera.workflow.common.tuple.Tuple\nimport edu.uci.ics.texera.workflow.common.operators.OperatorExecutor\nimport scala.collection.mutable\nimport java.time.{Instant, LocalDate, LocalDateTime, ZoneId}\nimport java.time.format.DateTimeFormatter\nimport edu.uci.ics.amber.engine.common.workflow.PortIdentity\n\nclass ScalaUDFOpExec extends OperatorExecutor with CheckpointSupport {\n\n private var currentMonth: String = \"\"\n private var currentMonthSum: Double = 0.0\n\n // Process tuples coming from a prior operator or UDF\n override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = {\n val amount = tuple.getFields(1).asInstanceOf[Int]\n val timestamp = tuple.getFields(2).asInstanceOf[Long]\n\n // Convert the timestamp to LocalDateTime to extract year and month\n val dateTime = LocalDateTime.ofInstant(Instant.ofEpochMilli(timestamp), ZoneId.systemDefault())\n \n // Use \"yyyy-MM\" format for the month (e.g., \"2023-01\" for January 2023)\n val monthKey = dateTime.format(DateTimeFormatter.ofPattern(\"yyyy-MM\"))\n\n // If this is a new month, output the sum for the previous month and reset the sum\n var returnIter:Iterator[TupleLike] = Iterator.empty\n if (monthKey != currentMonth) {\n if (currentMonth.nonEmpty) {\n // Output the sum for the previous month\n returnIter = Iterator(TupleLike(currentMonth, currentMonthSum))\n }\n // Reset the sum for the new month\n currentMonth = monthKey\n currentMonthSum = 0.0\n }\n\n // Update the sum for the current month\n currentMonthSum += amount\n returnIter\n }\n\n// Serialize the state (current month and current month's sum) to a checkpoint\n override def serializeState(\n currentIteratorState: Iterator[(TupleLike, Option[PortIdentity])],\n checkpoint: CheckpointState\n ): Iterator[(TupleLike, Option[PortIdentity])] = {\n checkpoint.save(\"currentMonth\", currentMonth)\n checkpoint.save(\"currentMonthSum\", currentMonthSum)\n currentIteratorState\n }\n\n // Deserialize the state (current month and current month's sum) from a checkpoint\n override def deserializeState(\n checkpoint: CheckpointState\n ): Iterator[(TupleLike, Option[PortIdentity])] = {\n currentMonth = checkpoint.load(\"currentMonth\").asInstanceOf[String]\n currentMonthSum = checkpoint.load(\"currentMonthSum\").asInstanceOf[Double]\n Iterator.empty\n }\n\n // Display the current state, showing the cumulative sum for the current month\n override def getState: String = {\n s\"Current Month: $currentMonth, Current Month Sum: $currentMonthSum\"\n }\n\n // Estimate the cost of checkpointing (simple cost estimation)\n override def getEstimatedCheckpointCost: Long = 0L\n}\n","workers":1,"retainInputColumns":false,"outputColumns":[{"attributeName":"month","attributeType":"string"},{"attributeName":"amount","attributeType":"double"}]},"inputPorts":[{"portID":"input-0","displayName":"","allowMultiInputs":true,"isDynamicPort":false,"dependencies":[]}],"outputPorts":[{"portID":"output-0","displayName":"","allowMultiInputs":false,"isDynamicPort":false}],"showAdvanced":false,"isDisabled":false,"customDisplayName":"Scala UDF","dynamicInputPorts":true,"dynamicOutputPorts":true},{"operatorID":"ScalaUDFSource-operator-2e26f979-cbf8-40ce-8452-fc46b26cea3d","operatorType":"ScalaUDFSource","operatorVersion":"bfcdd448e9d4ff69a973c29ccf008ec9b5baed71","operatorProperties":{"code":"import edu.uci.ics.amber.engine.common.{CheckpointState, CheckpointSupport}\nimport edu.uci.ics.amber.engine.common.SourceOperatorExecutor\nimport edu.uci.ics.amber.engine.common.tuple.amber.TupleLike\nimport edu.uci.ics.amber.engine.common.workflow.PortIdentity\nimport scala.concurrent.duration._\nimport java.time.{Instant, LocalDate, LocalDateTime, ZoneId}\nimport java.time.format.DateTimeFormatter\nimport java.time.Instant\nimport scala.util.Random\n\nclass ScalaUDFOpExec extends SourceOperatorExecutor with CheckpointSupport {\n\n // Variable to maintain the current index of tuple production\n var currentIndex: Int = 0\n val totalTuplesToProduce: Int = 360 // Total number of tuples to produce\n val errorFrequency: Int = 15 // Introduce an error tuple every 50 tuples\n val startDate: LocalDate = LocalDate.of(LocalDate.now().getYear, 1, 1)\n\n // Set a specific seed for random number generation to make it deterministic\n var random = new Random(12345L) // 12345L is the seed\n\n // Produce tuples lazily using an iterator, wrapping all computation within the iterator\n override def produceTuple(): Iterator[TupleLike] = new Iterator[TupleLike] {\n \n // Return true if more tuples need to be produced\n override def hasNext: Boolean = currentIndex < totalTuplesToProduce\n\n // Generate the next tuple\n override def next(): TupleLike = {\n val tupleLike = if (currentIndex % errorFrequency == 0 && currentIndex != 0) {\n // Create an error tuple every `errorFrequency` tuples\n createErrorTransaction()\n } else {\n // Create a valid transaction tuple\n val validTuple = createValidTransaction()\n validTuple\n }\n\n // Update the current index\n currentIndex += 1\n\n // Introduce a 0.1-second delay between each tuple\n Thread.sleep(500)\n\n // Return the generated tuple\n tupleLike\n }\n }\n\n // Method to create a valid transaction and return it as a TupleLike\n private def createValidTransaction(): TupleLike = {\n val transactionID = s\"txn-${currentIndex}\"\n val amount = random.between(3,150) // Random amount between 0 and 1000\n // Simulate the date by adding currentIndex days to January 1st of the current year\n val date = startDate.plusDays(currentIndex - 1) // For currentIndex = 1, this will be Jan 1st\n // Convert the LocalDate to a timestamp in milliseconds\n val timestamp = date.atStartOfDay(ZoneId.systemDefault()).toInstant.toEpochMilli\n TupleLike(transactionID, amount, timestamp)\n }\n\n\n // Method to create an error transaction and return it as a TupleLike\n private def createErrorTransaction(): TupleLike = {\n // Simulate the date by adding currentIndex days to January 1st of the current year\n val date = startDate.plusDays(random.between(1,360)) // For currentIndex = 1, this will be Jan 1st\n // Convert the LocalDate to a timestamp in milliseconds\n val timestamp = date.atStartOfDay(ZoneId.systemDefault()).toInstant.toEpochMilli\n TupleLike(\"txn-${currentIndex}\", 189, timestamp)\n }\n\n // Serialize the state (saving the current index, accumulated sum, total time gap, and transaction count to the checkpoint)\n override def serializeState(\n currentIteratorState: Iterator[(TupleLike, Option[PortIdentity])],\n checkpoint: CheckpointState\n ): Iterator[(TupleLike, Option[PortIdentity])] = {\n // Save important states to the checkpoint\n checkpoint.save(\"currentIndex\", currentIndex)\n checkpoint.save(\"random\", random)\n\n currentIteratorState\n }\n\n // Deserialize the state (restoring the current index, accumulated sum, total time gap, and transaction count from the checkpoint)\n override def deserializeState(\n checkpoint: CheckpointState\n ): Iterator[(TupleLike, Option[PortIdentity])] = {\n // Restore the saved states from the checkpoint\n currentIndex = checkpoint.load(\"currentIndex\").asInstanceOf[Int]\n random = checkpoint.load(\"random\").asInstanceOf[Random]\n \n // Continue producing tuples starting from the restored index\n produceTuple().map(tuple => (tuple, Option.empty))\n }\n\n // Estimate the cost of checkpointing (you can customize this based on the use case)\n override def getEstimatedCheckpointCost: Long = 0L\n\n // Override getState to show accumulated sum and average gap between transactions\n override def getState: String = \"\"\n}\n","workers":1,"columns":[{"attributeName":"transactionID","attributeType":"string"},{"attributeName":"amount","attributeType":"integer"},{"attributeName":"timestamp","attributeType":"long"}]},"inputPorts":[],"outputPorts":[{"portID":"output-0","displayName":"","allowMultiInputs":false,"isDynamicPort":false}],"showAdvanced":false,"isDisabled":false,"customDisplayName":"1-out Scala UDF","dynamicInputPorts":false,"dynamicOutputPorts":false}],"operatorPositions":{"ScalaUDF-operator-62b8ecc5-9699-4c0f-860d-32bc9586c908":{"x":-33,"y":-89},"ScalaUDFSource-operator-2e26f979-cbf8-40ce-8452-fc46b26cea3d":{"x":-259,"y":-89}},"links":[{"linkID":"link-10c70aeb-13cf-4c46-80d9-482f95b84b30","source":{"operatorID":"ScalaUDFSource-operator-2e26f979-cbf8-40ce-8452-fc46b26cea3d","portID":"output-0"},"target":{"operatorID":"ScalaUDF-operator-62b8ecc5-9699-4c0f-860d-32bc9586c908","portID":"input-0"}}],"groups":[],"commentBoxes":[]}