DeloitteDigitalUK · miohtama · Oct 14, 2020 · Oct 14, 2020 · Oct 15, 2020 · Oct 15, 2020
diff --git a/jira_agile_metrics/calculators/ageinghistory.py b/jira_agile_metrics/calculators/ageinghistory.py
@@ -0,0 +1,92 @@
+import logging
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+from ..calculator import Calculator
+from ..utils import set_chart_style
+
+from .cycletime import CycleTimeCalculator
+
+logger = logging.getLogger(__name__)
+
+
+class AgeingHistoryChartCalculator(Calculator):
+    """Draw a chart showing breakdown of states where issues spent time.
+
+    Unlike Ageing WIP this consideres done items as well,
+    so you get historical reference data to better understand the
+    status of the current pipeline.
+    """
+
+    def run(self, today=None):
+
+        # short circuit relatively expensive calculation if it won't be used
+        if not self.settings['ageing_history_chart']:
+            return None
+
+        cycle_data = self.get_result(CycleTimeCalculator)
+        cycle_names = [s['name'] for s in self.settings['cycle']]
+
+        # All states between "Backlog" and "Done"
+        active_cycle_names = cycle_names[1:-1]
+
+        # What Pandas columns we are going to export
+        series = {
+            'status': {'data': [], 'dtype': 'str'},
+            'age': {'data': [], 'dtype': 'float'},
+        }
+        # Add one column per each state
+        # for name in active_cycle_names:
+        #    series[name] = {'data': [], 'dtype': 'timedelta64[ns]'}
+
+        # For each issue create one row for each state and then duration spent in that state
+        for idx, row in cycle_data.iterrows():
+            for state in active_cycle_names:
+                # Duration column as pd.timedelta is filled by new cycletime calculator
+                duration = row[f"{state} duration"].total_seconds() / (24 * 3600)
+                series["status"]["data"].append(state)
+                series["age"]["data"].append(duration)
+
+        data = {}
+        for k, v in series.items():
+            data[k] = pd.Series(v['data'], dtype=v['dtype'])
+
+        return pd.DataFrame(data,
+            columns=['status', 'age']
+        )
+
+    def write(self):
+        output_file = self.settings['ageing_history_chart']
+        if not output_file:
+            logger.debug("No output file specified for ageing WIP chart")
+            return
+
+        chart_data = self.get_result()
+
+        if len(chart_data.index) == 0:
+            logger.warning("Unable to draw ageing WIP chart with zero completed items")
+            return
+
+        fig, ax = plt.subplots()
+
+        if self.settings['ageing_history_chart_title']:
+            ax.set_title(self.settings['ageing_history_chart_title'])
+
+        sns.swarmplot(x='status', y='age', data=chart_data, ax=ax)
+
+        ax.set_xlabel("Status")
+        ax.set_ylabel("Age (days)")
+
+        ax.set_xticklabels(ax.xaxis.get_majorticklabels(), rotation=90)
+
+        _, top = ax.get_ylim()
+        ax.set_ylim(0, top)
+
+        set_chart_style()
+
+        # Write file
+        logger.info("Writing ageing history chart to %s", output_file)
+        fig.savefig(output_file, bbox_inches='tight', dpi=300)
+        plt.close(fig)
diff --git a/jira_agile_metrics/calculators/ageingwip.py b/jira_agile_metrics/calculators/ageingwip.py
@@ -55,11 +55,11 @@ def extract_age(row):
         ageing_wip_data.dropna(how='any', inplace=True, subset=['status', 'age'])
 
         # reorder columns so we get key, summary, status, age, and then all the cycle stages
+        logger.debug("Ageing WIP data is for columns %s-%s", committed_column, last_active_column)
         ageing_wip_data = pd.concat((
             ageing_wip_data[['key', 'summary', 'status', 'age']],
             ageing_wip_data.loc[:, committed_column:last_active_column]
         ), axis=1)
-
         return ageing_wip_data
 
     def write(self):

diff --git a/jira_agile_metrics/calculators/cycleflow.py b/jira_agile_metrics/calculators/cycleflow.py
@@ -0,0 +1,122 @@
+import logging
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+
+from ..calculator import Calculator
+from ..utils import get_extension, set_chart_style
+
+from .cycletime import CycleTimeCalculator
+
+logger = logging.getLogger(__name__)
+
+
+class CycleFlowCalculator(Calculator):
+    """Create the data to build a non-cumulate flow diagram: a DataFrame,
+    indexed by day, with columns containing cumulative days for each
+    of the items in the configured cycle.
+
+    """
+
+    def run(self):
+
+        cycle_data = self.get_result(CycleTimeCalculator)
+
+        # Exclude backlog and done
+        active_cycles = self.settings["cycle"][1:-1]
+
+        cycle_names = [s['name'] for s in active_cycles]
+
+        return calculate_cycle_flow_data(cycle_data, cycle_names)
+
+    def write(self):
+        data = self.get_result()
+
+        if self.settings['cycle_flow_chart']:
+            if data is not None:
+                self.write_chart(data, self.settings['cycle_flow_chart'])
+            else:
+                logger.info("Did not match any entries for cycle flow chart")
+        else:
+            logger.debug("No output file specified for cycle flow chart")
+
+    def write_chart(self, data, output_file):
+
+        if len(data.index) == 0:
+            logger.warning("Cannot draw cycle flow without data")
+            return
+
+        fig, ax = plt.subplots()
+
+        ax.set_title("Cycle flow")
+        data.plot.area(ax=ax, stacked=True, legend=False)
+        ax.set_xlabel("Period of issue complete")
+        ax.set_ylabel("Time spent (days)")
+
+        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
+
+        set_chart_style()
+
+        # Write file
+        logger.info("Writing cycle flow chart to %s", output_file)
+        fig.savefig(output_file, bbox_inches='tight', dpi=300)
+        plt.close(fig)
+
+
+def calculate_cycle_flow_data(cycle_data, cycle_names, frequency="1M", resample_on="completed_timestamp"):
+    """Calculate diagram data for times spent in different cycles.
+
+    :param cycle_data: Cycle time calculator outpu
+
+    :param cycle_names: List of cycles includedin the flow chat
+
+    :param frequency: Weekly, monthly, etc.
+
+    :param resample_on: Column that is used as the base for frequency - you can switch between start and completed timestamps
+    """
+
+    # Build a dataframe of just the "duration" columns
+    duration_cols = [f"{cycle} duration" for cycle in cycle_names]
+    cfd_data = cycle_data[[resample_on] + duration_cols]
+
+    # Remove issues that lack completion date
+    # https://stackoverflow.com/a/55066805/315168
+    cfd_data = cfd_data[cfd_data[resample_on].notnull()]
+
+    # Zero out missing data for cycles which issue skipped
+    cfd_data = cfd_data.fillna(pd.Timedelta(seconds=0))
+
+    # We did not have any issues with completed_timestamp,
+    # cannot do resample
+    if cfd_data.empty:
+        return None
+
+    sampled = cfd_data.resample(frequency, on=resample_on).agg(np.sum)
+
+    #
+    # Sample output
+    #                         Development duration          Fixes duration         Review duration             QA duration
+    # completed_timestamp
+    # 2020-02-29           0 days 00:02:14.829000  0 days 01:21:01.586000  0 days 06:21:59.009000  1 days 13:19:26.173000
+    # 2020-03-31           4 days 04:53:44.114000  0 days 19:13:43.590000  1 days 00:51:11.272000  2 days 01:54:57.958000
+    # 2020-04-30           6 days 11:48:55.864000  1 days 15:48:23.789000  3 days 17:51:01.561000 10 days 11:54:59.661000
+
+    # Convert Panda Timedeltas to days as float
+    # sampled = sampled[duration_cols].apply(lambda x: float(x.item().days))
+    # https://stackoverflow.com/a/54535619/315168
+    sampled[duration_cols] = sampled[duration_cols] / np.timedelta64(1, 'D')
+
+    # Fill missing values with zero duration
+    sampled = sampled.fillna(0)
+
+    # Make sure we always return stacked charts in the same order
+    # TODO: Not 100% sure if this is needed
+    sampled.columns = pd.CategoricalIndex(sampled.columns.values,
+                                    ordered=True,
+                                    categories=duration_cols)
+
+
+    # Sort the columns (axis=1) by the new categorical ordering
+    sampled = sampled.sort_index(axis=1)
+
+    return sampled
diff --git a/jira_agile_metrics/calculators/cycleflow100.py b/jira_agile_metrics/calculators/cycleflow100.py
@@ -0,0 +1,69 @@
+import logging
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+
+from ..calculator import Calculator
+from ..utils import get_extension, set_chart_style
+
+from .cycletime import CycleTimeCalculator
+from .cycleflow import calculate_cycle_flow_data
+
+
+logger = logging.getLogger(__name__)
+
+
+class CycleFlow100Calculator(Calculator):
+    """Same as cycle flow chart, but uses 100% stacked line graph instead.
+
+    https://stackoverflow.com/questions/29940382/100-area-plot-of-a-pandas-dataframe
+    """
+
+    def run(self):
+        cycle_data = self.get_result(CycleTimeCalculator)
+        # Exclude backlog and done
+        active_cycles = self.settings["cycle"][1:-1]
+        cycle_names = [s['name'] for s in active_cycles]
+        data = calculate_cycle_flow_data(cycle_data, cycle_names)
+        if data is not None:
+            # Stack cols to 100%
+            data = data.divide(data.sum(axis=1), axis=0)
+        return data
+
+    def write(self):
+        data = self.get_result()
+
+        if self.settings['cycle_flow_100_chart']:
+            if data is not None:
+                self.write_chart(data, self.settings['cycle_flow_100_chart'])
+            else:
+                logger.info("Did not match any entries for Cycle flow 100% chart")
+        else:
+            logger.debug("No output file specified for cycle flow chart")
+
+    def write_chart(self, data, output_file):
+
+        if len(data.index) == 0:
+            logger.warning("Cannot draw cycle flow without data")
+            return
+
+        fig, ax = plt.subplots()
+
+        ax.set_title("Cycle flow")
+        data.plot.area(ax=ax, stacked=True, legend=False)
+        ax.set_xlabel("Period of issue complete")
+        ax.set_ylabel("Time spent (%)")
+
+        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
+
+        # bottom = data[data.columns[-1]].min()
+        # top = data[data.columns[0]].max()
+        # ax.set_ylim(bottom=bottom, top=top)
+
+        set_chart_style()
+
+        # Write file
+        logger.info("Writing cycle flow chart to %s", output_file)
+        fig.savefig(output_file, bbox_inches='tight', dpi=300)
+        plt.close(fig)
+