diff --git a/CHANGELOG.md b/CHANGELOG.md index d5614bb56..3b99f0d44 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Renamed variable in pipeline.finalise to better reflect what the dataframe represents (sources_df -> associations_df) [#787](https://github.com/askap-vast/vast-pipeline/pull/787) - Fixed typo in variable name ("assoications") [#787](https://github.com/askap-vast/vast-pipeline/pull/787) +- Fix partition calculation from wrong DataFrame object [#783](https://github.com/askap-vast/vast-pipeline/pull/783) - Fix processing config parameters not displaying in web interface [#782](https://github.com/askap-vast/vast-pipeline/pull/782) - Fix incorrect zenodo links [#780](https://github.com/askap-vast/vast-pipeline/pull/780) @@ -30,6 +31,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [#788](https://github.com/askap-vast/vast-pipeline/pull/788): feat, fix: Speed up forced fitting code by using numba-fied forced_phot code and reordering some calculations - [#787](https://github.com/askap-vast/vast-pipeline/pull/787): fix: Optimise associations merge and minor variable name updates +- [#783](https://github.com/askap-vast/vast-pipeline/pull/783): fix: Fix partition calculation from wrong DataFrame object - [#782](https://github.com/askap-vast/vast-pipeline/pull/782): fix: Fix processing config parameters not displaying in web interface - [#780](https://github.com/askap-vast/vast-pipeline/pull/780): docs: Fix incorrect zenodo links diff --git a/vast_pipeline/pipeline/new_sources.py b/vast_pipeline/pipeline/new_sources.py index 4ef9dd08a..23879a650 100644 --- a/vast_pipeline/pipeline/new_sources.py +++ b/vast_pipeline/pipeline/new_sources.py @@ -236,7 +236,7 @@ def parallel_get_rms_measurements( } n_workers, n_partitions = calculate_workers_and_partitions( - df, + out, n_cpu=n_cpu, max_partition_mb=max_partition_mb ) diff --git a/vast_pipeline/pipeline/pairs.py b/vast_pipeline/pipeline/pairs.py index 8d678cc11..2ca17c17c 100644 --- a/vast_pipeline/pipeline/pairs.py +++ b/vast_pipeline/pipeline/pairs.py @@ -72,7 +72,7 @@ def calculate_measurement_pair_metrics( """ n_workers, n_partitions = calculate_workers_and_partitions( - df, + df.set_index('source'), n_cpu=n_cpu, max_partition_mb=max_partition_mb ) diff --git a/vast_pipeline/utils/utils.py b/vast_pipeline/utils/utils.py index a57dd0c74..e06e95029 100644 --- a/vast_pipeline/utils/utils.py +++ b/vast_pipeline/utils/utils.py @@ -422,7 +422,7 @@ def calculate_n_partitions(df, n_cpu, partition_size_mb=15): partition_size_mb = int(np.ceil(mem_usage_mb / n_partitions)) - logger.debug("Using %d partions of %dMB", n_partitions, partition_size_mb) + logger.debug("Using %d partitions of %dMB", n_partitions, partition_size_mb) return n_partitions