Skip to content

Commit

Permalink
Memory Optimizations (#225)
Browse files Browse the repository at this point in the history
* Some memory optimizations

* Additional memory optimizations for dense eigenvalues

* Lint
  • Loading branch information
william-dawson authored Dec 11, 2023
1 parent b2adba2 commit 6050810
Show file tree
Hide file tree
Showing 8 changed files with 89 additions and 43 deletions.
6 changes: 3 additions & 3 deletions Source/Fortran/EigenSolversModule.F90
Original file line number Diff line number Diff line change
Expand Up @@ -122,25 +122,25 @@ END FUNCTION func

!! Convert to a triplet list, map the triplet list, fill.
CALL GetMatrixTripletList(vals, tlist)
CALL DestructMatrix(vals)
DO II = 1, tlist%CurrentSize
tlist%DATA(II)%point_value = func(tlist%DATA(II)%point_value)
END DO

!! Fill
CALL ConstructEmptyMatrix(ResultMat, this)
CALL FillMatrixFromTripletList(ResultMat, tlist, preduplicated_in = .TRUE.)
CALL DestructTripletList(tlist)

!! Multiply Back Together
CALL MatrixMultiply(vecs, ResultMat, temp, threshold_in = params%threshold)
CALL TransposeMatrix(vecs, vecsT)
CALL DestructMatrix(vecs)
CALL ConjugateMatrix(vecsT)
CALL MatrixMultiply(temp, vecsT, ResultMat, threshold_in = params%threshold)

!! Cleanup
CALL DestructMatrix(vecs)
CALL DestructMatrix(temp)
CALL DestructMatrix(vals)
CALL DestructTripletList(tlist)
CALL DestructSolverParameters(params)

END SUBROUTINE DenseMatrixFunction
Expand Down
4 changes: 2 additions & 2 deletions Source/Fortran/dense_includes/ConstructMatrixDFromS.f90
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
INTEGER :: KK ! Total element counter
INTEGER :: elements_per_inner

CALL ConstructEmptyMatrix(dense_matrix, sparse_matrix%rows, &
& sparse_matrix%columns)
CALL ConstructEmptyMatrix(dense_matrix, &
& sparse_matrix%rows, sparse_matrix%columns)
dense_matrix%DATA = 0

!! Loop over elements.
Expand Down
48 changes: 30 additions & 18 deletions Source/Fortran/distributed_algebra_includes/MatrixMultiply.f90
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
TYPE(ReduceHelper_t), DIMENSION(:), ALLOCATABLE :: column_helper
TYPE(ReduceHelper_t), DIMENSION(:, :), ALLOCATABLE :: slice_helper
!! For Iterating Over Local Blocks
INTEGER :: II, II2
INTEGER :: JJ, JJ2
INTEGER :: II, II2, II2_range
INTEGER :: JJ, JJ2, JJ2_range
INTEGER :: duplicate_start_column, duplicate_offset_column
INTEGER :: duplicate_start_row, duplicate_offset_row
REAL(NTREAL) :: working_threshold
Expand Down Expand Up @@ -93,19 +93,22 @@
SELECT CASE (ATasks(II))
CASE(LocalGatherA)
ATasks(II) = TaskRunningA
!$OMP TASK DEFAULT(SHARED), PRIVATE(JJ2), FIRSTPRIVATE(II)
!$OMP TASK DEFAULT(SHARED), PRIVATE(JJ2, JJ2_range), FIRSTPRIVATE(II)
!! First Align The Data We Are Working With
DO JJ2 = 1, &
& matAB%process_grid%number_of_blocks_columns / &
JJ2_range = matAB%process_grid%number_of_blocks_columns / &
& matAB%process_grid%num_process_slices
DO JJ2 = 1, JJ2_range
CALL CopyMatrix(matA%LMAT(II, &
& duplicate_start_column + &
& duplicate_offset_column * (JJ2 - 1)),&
& AdjacentABlocks(II, JJ2))
END DO
!! Then Do A Local Gather
!! Then Do A Local Gather and Cleanup
CALL ComposeMatrixColumns(AdjacentABlocks(II, :), &
& LocalRowContribution(II))
DO JJ2 = 1, JJ2_range
CALL DestructMatrix(AdjacentABlocks(II, JJ2))
END DO
ATasks(II) = SendSizeA
!$OMP END TASK
CASE(SendSizeA)
Expand Down Expand Up @@ -134,8 +137,10 @@
!$OMP TASK DEFAULT(SHARED), FIRSTPRIVATE(II)
CALL ReduceAndComposeMatrixCleanup(LocalRowContribution(II), &
& GatheredRowContribution(II), row_helper(II))
CALL DestructMatrix(LocalRowContribution(II))
CALL TransposeMatrix(GatheredRowContribution(II), &
& GatheredRowContributionT(II))
CALL DestructMatrix(GatheredRowContribution(II))
ATasks(II) = CleanupA
!$OMP END TASK
CASE(CleanupA)
Expand All @@ -148,17 +153,21 @@
SELECT CASE (BTasks(JJ))
CASE(LocalGatherB)
BTasks(JJ) = TaskRunningB
!$OMP TASK DEFAULT(SHARED), PRIVATE(II2), FIRSTPRIVATE(JJ)
!$OMP TASK DEFAULT(SHARED), PRIVATE(II2, II2_range), FIRSTPRIVATE(JJ)
!! First Transpose The Data We Are Working With
DO II2 = 1, matAB%process_grid%number_of_blocks_rows / &
II2_range = matAB%process_grid%number_of_blocks_rows / &
& matAB%process_grid%num_process_slices
DO II2 = 1, II2_range
CALL TransposeMatrix(matB%LMAT(duplicate_start_row + &
& duplicate_offset_row * (II2 - 1), JJ), &
& TransposedBBlocks(II2, JJ))
END DO
!! Then Do A Local Gather
!! Then Do A Local Gather and Cleanup
CALL ComposeMatrixColumns(TransposedBBlocks(:, JJ), &
& LocalColumnContribution(JJ))
DO II2 = 1, II2_range
CALL DestructMatrix(TransposedBBlocks(II2, JJ))
END DO
BTasks(JJ) = SendSizeB
!$OMP END TASK
CASE(SendSizeB)
Expand Down Expand Up @@ -187,6 +196,7 @@
!$OMP TASK DEFAULT(SHARED), FIRSTPRIVATE(JJ)
CALL ReduceAndComposeMatrixCleanup(LocalColumnContribution(JJ), &
& GatheredColumnContribution(JJ), column_helper(JJ))
CALL DestructMatrix(LocalColumnContribution(JJ))
BTasks(JJ) = CleanupB
!$OMP END TASK
CASE(CleanupB)
Expand Down Expand Up @@ -216,6 +226,7 @@
IF (matAB%process_grid%num_process_slices .EQ. 1) THEN
ABTasks(II,JJ) = CleanupAB
CALL CopyMatrix(SliceContribution(II, JJ), matAB%LMAT(II, JJ))
CALL DestructMatrix(SliceContribution(II, JJ))
ELSE
ABTasks(II, JJ) = SendSizeAB
END IF
Expand Down Expand Up @@ -245,6 +256,7 @@
!$OMP TASK DEFAULT(SHARED), FIRSTPRIVATE(II, JJ)
CALL ReduceAndSumMatrixCleanup(SliceContribution(II, JJ), &
& matAB%LMAT(II, JJ), threshold, slice_helper(II, JJ))
CALL DestructMatrix(SliceContribution(II, JJ))
ABTasks(II, JJ) = CleanupAB
!$OMP END TASK
CASE(CleanupAB)
Expand All @@ -261,16 +273,7 @@
!$OMP END MASTER
!$OMP END PARALLEL

!! Copy to output matrix.
IF (ABS(beta) .LT. TINY(beta)) THEN
CALL CopyMatrix(matAB, matC)
ELSE
CALL ScaleMatrix(MatC, beta)
CALL IncrementMatrix(MatAB, MatC)
END IF

!! Cleanup
CALL DestructMatrix(matAB)
DEALLOCATE(row_helper)
DEALLOCATE(column_helper)
DEALLOCATE(slice_helper)
Expand Down Expand Up @@ -316,3 +319,12 @@
END DO
END DO
DEALLOCATE(SliceContribution)

!! Copy to output matrix.
IF (ABS(beta) .LT. TINY(beta)) THEN
CALL CopyMatrix(matAB, matC)
ELSE
CALL ScaleMatrix(MatC, beta)
CALL IncrementMatrix(MatAB, MatC)
END IF
CALL DestructMatrix(matAB)
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
CALL ShiftTripletList(shifted, 1 - this%start_row, 1 - this%start_column)
CALL SortTripletList(shifted, this%local_columns, &
& this%local_rows, sorted_tlist)
CALL DestructTripletList(shifted)
!! Build
CALL ConstructMatrixFromTripletList(local_matrix, sorted_tlist, &
& this%local_rows, this%local_columns)
Expand Down
30 changes: 22 additions & 8 deletions Source/Fortran/distributed_includes/TransposeMatrix.f90
Original file line number Diff line number Diff line change
@@ -1,22 +1,36 @@
!! Local Data
INTEGER :: II

CALL ConstructTripletList(new_list)
INTEGER :: II, KK

CALL GetMatrixTripletList(AMat, tlist)

!! Determine the size.
!! these loops could be merged using append but I choose not to for the
!! sake of memory.
KK = 0
DO II = 1, tlist%CurrentSize
IF (MOD(II, AMat%process_grid%num_process_slices) .EQ. &
& AMat%process_grid%my_slice) THEN
KK = KK + 1
END IF
END DO
CALL ConstructTripletList(new_list, KK)

!! Fill the Triplet List
KK = 0
DO II = 1, tlist%CurrentSize
IF (MOD(II, AMat%process_grid%num_process_slices) .EQ. &
& AMat%process_grid%my_slice) THEN
KK = KK + 1
CALL GetTripletAt(tlist, II, trip)
trip_t%index_row = trip%index_column
trip_t%index_column = trip%index_row
trip_t%point_value = trip%point_value
CALL AppendToTripletList(new_list, trip_t)
new_list%DATA(KK)%index_row = trip%index_column
new_list%DATA(KK)%index_column = trip%index_row
new_list%DATA(KK)%point_value = trip%point_value
END IF
END DO
CALL DestructTripletList(tlist)

!! Create the matrix
CALL DestructMatrix(TransMat)
CALL ConstructEmptyMatrix(TransMat, AMat)
CALL FillMatrixFromTripletList(TransMat, new_list)
CALL DestructTripletList(new_list)
CALL DestructTripletList(tlist)
4 changes: 2 additions & 2 deletions Source/Fortran/eigenexa_includes/EigenSerial.f90
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
!! Gather as dense
CALL GatherMatrixToProcess(this, local_s)
CALL ConstructMatrixDFromS(local_s, local_d)
CALL DestructMatrix(local_s)

!! Decompose
CALL EigenDecomposition(local_d, V, W)
CALL DestructMatrix(local_d)

!! Filter if necessary
IF (nvals+1 .LE. V%rows) THEN
Expand Down Expand Up @@ -35,7 +37,5 @@
END IF

!! Cleanup
CALL DestructMatrix(local_s)
CALL DestructMatrix(local_d)
CALL DestructTripletList(V_t)
CALL DestructTripletList(W_t)
33 changes: 25 additions & 8 deletions Source/Fortran/eigenexa_includes/EigenToNT.f90
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
!! Local Variables
INTEGER :: row_start, row_end, col_start, col_end
INTEGER :: II, JJ, ilookup, jlookup
INTEGER :: II, JJ, KK, ilookup, jlookup
INTEGER :: ind

!! Get The Eigenvectors
Expand All @@ -13,23 +13,40 @@
ALLOCATE(VD1(SIZE(VD, DIM = 1)*SIZE(VD, DIM = 2)))
VD1(:) = PACK(VD, .TRUE.)

CALL ConstructTripletList(triplet_v)
!! First loop: count how many elements are needed
!! originally I used Append to eliminate this loop but we need to save
!! memory here.
KK = 0
ind = 1
DO JJ = col_start, col_end
DO II = row_start, row_end
IF (ABS(VD1(ind + II -1)) .GT. params%threshold) THEN
KK = KK + 1
END IF
END DO
ind = ind + exa%offset
END DO

!! Construct The Triplet List
CALL ConstructTripletList(triplet_v, KK)

!! Reset indices and do the actual filling
ind = 1
KK = 0
DO JJ = col_start, col_end
jlookup = eigen_translate_l2g(JJ, exa%proc_cols, exa%colid)
DO II = row_start, row_end
IF (ABS(VD1(ind + II -1)) .GT. params%threshold) THEN
KK = KK + 1
ilookup = eigen_translate_l2g(II, exa%proc_rows, exa%rowid)
CALL SetTriplet(trip, jlookup, ilookup, VD1(ind + II -1))
CALL AppendToTripletList(triplet_v, trip)
CALL SetTriplet(triplet_v%DATA(KK), jlookup, ilookup, &
& VD1(ind + II -1))
END IF
END DO
ind = ind + exa%offset
END DO
DEALLOCATE(VD1)

!! Fill and Clean Up
CALL FillMatrixFromTripletList(V, triplet_v)

!! Cleanup
CALL DestructTripletList(triplet_v)

DEALLOCATE(VD1)
6 changes: 4 additions & 2 deletions Source/Fortran/sparse_includes/DenseBranch.f90
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@
CALL MultiplyMatrix(DenseA, DenseB, DenseC, &
& IsATransposed_in = IsATransposed, IsBTransposed_in = IsBTransposed)

!! Cleanup Intermediate
CALL DestructMatrix(DenseA)
CALL DestructMatrix(DenseB)

!! Convert Back
CALL ConstructMatrixSFromD(DenseC, matC, threshold)
CALL ScaleMatrix(matC, alpha)

!! Cleanup
CALL DestructMatrix(DenseA)
CALL DestructMatrix(DenseB)
CALL DestructMatrix(DenseC)

0 comments on commit 6050810

Please sign in to comment.