From e0c94973ad2e015eee746f7a68bd6c7ee2f8ac55 Mon Sep 17 00:00:00 2001 From: wissambouymedj Date: Thu, 13 Jun 2024 17:34:06 +0200 Subject: [PATCH 1/9] Replaced MPI_Gatherv with MPI_Send/Recv --- src/mergemesh_pmmg.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/mergemesh_pmmg.c b/src/mergemesh_pmmg.c index 444c98b1..72efa925 100644 --- a/src/mergemesh_pmmg.c +++ b/src/mergemesh_pmmg.c @@ -1170,10 +1170,22 @@ int PMMG_gather_parmesh( PMMG_pParMesh parmesh, /* Gather the packed parmeshes */ ier = MG_MIN ( ier, ier_pack ); - MPI_CHECK( MPI_Gatherv ( ptr,pack_size,MPI_CHAR, + + // Remplacer MPI_Gatherv par une boucle Send/Recv + if (parmesh->myrank == root) { + for (int i = 0; i < nprocs; ++i) { + if (i != root) { + MPI_CHECK(MPI_Recv(rcv_buffer + displs[i], rcv_pack_size[i], MPI_CHAR, i, 0, parmesh->comm, MPI_STATUS_IGNORE), ier = 0); + } + } + } else { + MPI_CHECK(MPI_Send(ptr, pack_size, MPI_CHAR, root, 0, parmesh->comm), ier = 0); + } + /*MPI_CHECK( MPI_Gatherv ( ptr,pack_size,MPI_CHAR, rcv_buffer,rcv_pack_size, - displs,MPI_CHAR,root,parmesh->comm ),ier=0 ); - + displs,MPI_CHAR,root,parmesh->comm ),ier=0 );*/ + + PMMG_DEL_MEM(parmesh,ptr,char,"buffer to send"); /** 4: Unpack parmeshes */ From cc8b30d83f578e00586bb3cb999a21f269b27ad6 Mon Sep 17 00:00:00 2001 From: Corentin Prigent Date: Fri, 26 Jul 2024 14:45:33 +0200 Subject: [PATCH 2/9] modified edge ownhership rules --- src/analys_pmmg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/analys_pmmg.c b/src/analys_pmmg.c index 55726673..c84a137c 100644 --- a/src/analys_pmmg.c +++ b/src/analys_pmmg.c @@ -1224,6 +1224,7 @@ int PMMG_set_edge_owners( PMMG_pParMesh parmesh,MMG5_HGeom *hpar,MPI_Comm comm ) if( !MG_EOK(pt) || !pt->xt ) continue; pxt = &mesh->xtetra[pt->xt]; for( ifac = 0; ifac < 4; ifac++ ) { + if( !MG_GET(pxt->ori,ifac) ) continue; tag = pxt->ftag[ifac]; /* Skip non-boundary faces */ if( !(tag & MG_BDY) || ( (tag & MG_PARBDY) && !(tag & MG_PARBDYBDY) ) ) From b97159d4708f69bc041dade6ee211490848ce1e7 Mon Sep 17 00:00:00 2001 From: Corentin Prigent Date: Mon, 29 Jul 2024 15:33:17 +0200 Subject: [PATCH 3/9] add check function for edge owners --- src/analys_pmmg.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/src/analys_pmmg.c b/src/analys_pmmg.c index c84a137c..a55bbe93 100644 --- a/src/analys_pmmg.c +++ b/src/analys_pmmg.c @@ -1287,6 +1287,80 @@ int PMMG_set_edge_owners( PMMG_pParMesh parmesh,MMG5_HGeom *hpar,MPI_Comm comm ) return 1; } +/** + * \param parmesh pointer toward the parmesh structure + * \param hpar hash table parallel edges + * \param comm pointer toward the MPI communicator to use: when called before + * the first mesh balancing (at preprocessing stage) we have to use the + * read_comm communicator (i.e. the communicator used to provide the inputs). + * For all ather calls, comm has to be the communicator to use for computations. + * + * \return 0 if failure, 1 if success. + * + * Check that every edge has one and only one owner. + */ +int PMMG_check_edge_owners( PMMG_pParMesh parmesh,MMG5_HGeom *hpar,MPI_Comm comm ) { + PMMG_pInt_comm int_edge_comm; + PMMG_pExt_comm ext_edge_comm; + MMG5_pMesh mesh; + MMG5_pEdge pa; + int *intvalues, *itosend, *itorecv; + int i, idx, k, nitem, color, ia; + MPI_Status status; + + assert( parmesh->ngrp == 1 ); + mesh = parmesh->listgrp[0].mesh; + + int_edge_comm = parmesh->int_edge_comm; + intvalues = int_edge_comm->intvalues; + + /** Store list of parallel edge owners */ + for (ia=1;ia<=mesh->na;ia++) { + pa = &mesh->edge[ia]; + intvalues[ia-1] = pa->base; + if (pa->base == parmesh->nprocs) return 0; + } + + /** Exchange values on the interfaces among procs */ + for ( k = 0; k < parmesh->next_edge_comm; ++k ) { + ext_edge_comm = &parmesh->ext_edge_comm[k]; + nitem = ext_edge_comm->nitem; + color = ext_edge_comm->color_out; + + itosend = ext_edge_comm->itosend; + itorecv = ext_edge_comm->itorecv; + + /* Fill buffers */ + for ( i=0; iint_comm_index[i]; + itosend[i] = intvalues[idx]; + } + + /* Communication */ + MPI_CHECK( + MPI_Sendrecv(itosend,nitem,MPI_INT,color,MPI_ANALYS_TAG+2, + itorecv,nitem,MPI_INT,color,MPI_ANALYS_TAG+2, + comm,&status),return 0 ); + } + + /* Check that all edges have the same owner over the whole mesh */ + for ( k = 0; k < parmesh->next_edge_comm; ++k ) { + ext_edge_comm = &parmesh->ext_edge_comm[k]; + + itorecv = ext_edge_comm->itorecv; + + for ( i=0; initem; ++i ) { + idx = ext_edge_comm->int_comm_index[i]; + if (!(intvalues[idx] == itorecv[i])) { + fprintf(stderr,"Parallel edge has two different owners. \n"); + return 0; + } + } + } + + return 1; +} + /** * \param parmesh pointer toward the parmesh structure * \param mesh pointer toward the mesh structure @@ -2794,6 +2868,13 @@ int PMMG_analys(PMMG_pParMesh parmesh,MMG5_pMesh mesh,MPI_Comm comm) { return 0; } +#ifndef NDEBUG + if (!PMMG_check_edge_owners(parmesh,&hpar,comm)) { + fprintf(stderr,"\n ## Parallel edge has no owner or too many owners. Exit program. \n"); + return 0; + } +#endif + /* identify singularities on parallel points. * No need to call a *_setVertexNmTag function, as it already takes into * account non-manifold configurations. */ From ae90fa1f230040e26e7d60cd25fb866e38ce248c Mon Sep 17 00:00:00 2001 From: Corentin Prigent Date: Tue, 30 Jul 2024 15:17:46 +0200 Subject: [PATCH 4/9] modified edge owner check function --- src/analys_pmmg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/analys_pmmg.c b/src/analys_pmmg.c index a55bbe93..5e084eb0 100644 --- a/src/analys_pmmg.c +++ b/src/analys_pmmg.c @@ -1318,6 +1318,7 @@ int PMMG_check_edge_owners( PMMG_pParMesh parmesh,MMG5_HGeom *hpar,MPI_Comm comm for (ia=1;ia<=mesh->na;ia++) { pa = &mesh->edge[ia]; intvalues[ia-1] = pa->base; + if (!(pa->tag & MG_PARBDYBDY)) continue; if (pa->base == parmesh->nprocs) return 0; } From c5a7b52a1fb34a62d6d748ad2ac21ee4aea42c56 Mon Sep 17 00:00:00 2001 From: Mark Potse Date: Wed, 8 May 2024 17:16:50 +0200 Subject: [PATCH 5/9] my configuration, using my own branch of mmg From 7012a66e16008cd8f75530514686e79309acc587 Mon Sep 17 00:00:00 2001 From: Mark Potse Date: Sat, 18 May 2024 17:25:15 +0200 Subject: [PATCH 6/9] write a specific error message when the number of elements to gather in PMMG_gather_parmesh exceeds INT_MAX This problem occurs when the size of the first np-1 of the compressed mesh parts exceeds INT_MAX. This commit introduces an error message for this circumstance, which replaces an error on a negative allocation size. It also shows the allocation size in the error message for allocation failur so that this kind of problem is easier to diagnose. --- src/mergemesh_pmmg.c | 14 ++++++++++++-- src/parmmg.h | 8 +++++--- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/mergemesh_pmmg.c b/src/mergemesh_pmmg.c index 72efa925..7c31f5ed 100644 --- a/src/mergemesh_pmmg.c +++ b/src/mergemesh_pmmg.c @@ -1098,7 +1098,7 @@ int PMMG_gather_parmesh( PMMG_pParMesh parmesh, int **rcv_next_node_comm, PMMG_pExt_comm **rcv_ext_node_comm ) { - size_t pack_size_tot; + size_t pack_size_tot,next_disp; int *rcv_pack_size,ier,ier_glob,k,*displs,ier_pack; int nprocs,root,pack_size; char *rcv_buffer,*buffer,*ptr; @@ -1145,7 +1145,17 @@ int PMMG_gather_parmesh( PMMG_pParMesh parmesh, displs[0] = 0; for ( k=1; kINT_MAX){ + /* The displacements argument to MPI_Gatherv() is an array of int + * (signed) so the number of elements must be smaller than 2^31. + * To get around this we must pack more data in a single element + * or use multiple messages. + */ + fprintf(stderr, " ## Error: too many elements for MPI_Gatherv()\n"); + MPI_Abort(parmesh->comm, 1); /* error detected only on root */ + } + displs[k] = next_disp; } pack_size_tot = (size_t)(displs[nprocs-1])+(size_t)(rcv_pack_size[nprocs-1]); assert ( pack_size_tot < SIZE_MAX && "SIZE_MAX overflow" ); diff --git a/src/parmmg.h b/src/parmmg.h index 88941a4b..57c6dd39 100644 --- a/src/parmmg.h +++ b/src/parmmg.h @@ -289,12 +289,14 @@ static const int PMMG_MVIFCS_NLAYERS = 2; #define ERROR_AT(msg1,msg2) \ - fprintf( stderr, msg1 msg2 " function: %s, file: %s, line: %d \n", \ - __func__, __FILE__, __LINE__ ) + fprintf( stderr, "%s %s function: %s, file: %s, line: %d \n", \ + msg1, msg2, __func__, __FILE__, __LINE__ ) #define MEM_CHK_AVAIL(mesh,bytes,msg) do { \ if ( (mesh)->memCur + (bytes) > (mesh)->memMax ) { \ - ERROR_AT(msg," Exceeded max memory allowed: "); \ + char diag[1024]; \ + snprintf(diag, 1024, " Allocation of %ld bytes exceeds max %ld: ", bytes, (mesh)->memMax); \ + ERROR_AT(msg, diag); \ stat = PMMG_FAILURE; \ } else if ( (mesh)->memCur + (bytes) < 0 ) { \ ERROR_AT(msg," Tried to free more mem than allocated: " ); \ From 4c2c2c773025b773be4612847090e0a0b97144f2 Mon Sep 17 00:00:00 2001 From: Algiane Froehly Date: Mon, 5 Aug 2024 11:47:49 +0200 Subject: [PATCH 7/9] Attempt to fix the PR - If ran using 1 mpi process, the rcv_buffer variable has to be filled by the packed parmesh; - the pointer toward the buffer provided to the mpipack_parmesh function is modified by the function so cannot be used anymore after the function call. --- src/mergemesh_pmmg.c | 67 +++++++++++++++++++++++++++++++------------- src/mpi_pmmg.h | 2 +- src/mpipack_pmmg.c | 2 ++ src/mpiunpack_pmmg.c | 3 ++ 4 files changed, 54 insertions(+), 20 deletions(-) diff --git a/src/mergemesh_pmmg.c b/src/mergemesh_pmmg.c index 7c31f5ed..a9d07c4c 100644 --- a/src/mergemesh_pmmg.c +++ b/src/mergemesh_pmmg.c @@ -1100,8 +1100,8 @@ int PMMG_gather_parmesh( PMMG_pParMesh parmesh, size_t pack_size_tot,next_disp; int *rcv_pack_size,ier,ier_glob,k,*displs,ier_pack; - int nprocs,root,pack_size; - char *rcv_buffer,*buffer,*ptr; + int nprocs,root,pack_size,buf_idx; + char *rcv_buffer,*ptr_to_free,*buffer; nprocs = parmesh->nprocs; root = parmesh->info.root; @@ -1157,13 +1157,30 @@ int PMMG_gather_parmesh( PMMG_pParMesh parmesh, } displs[k] = next_disp; } + + /* On root, we will gather all the meshes in rcv_buffer so we have to + * compute the total pack size */ pack_size_tot = (size_t)(displs[nprocs-1])+(size_t)(rcv_pack_size[nprocs-1]); assert ( pack_size_tot < SIZE_MAX && "SIZE_MAX overflow" ); - PMMG_MALLOC( parmesh,rcv_buffer,pack_size_tot,char,"rcv_buffer",ier=0); + + /* root will write directly in the suitable position of rcv_buffer */ + buf_idx = displs[root]; } + else { + /* on ranks other than root we just need to store the local mesh so buffer + * will be of size pack_size */ + pack_size_tot = pack_size; + /* we will write the mesh at the starting position */ + buf_idx = 0; + } + + PMMG_MALLOC( parmesh,rcv_buffer,pack_size_tot,char,"rcv_buffer",ier=0); /* Parmesh compression */ - PMMG_MALLOC ( parmesh,buffer,pack_size,char,"buffer to send",ier=0 ); + buffer = &rcv_buffer[buf_idx]; + + /* Save input allocated address to avoid arrors at unalloc */ + ptr_to_free = rcv_buffer; #ifndef NDEBUG /* Remark: in release mode, a non allocated buffer used in gatherv creates a @@ -1174,29 +1191,40 @@ int PMMG_gather_parmesh( PMMG_pParMesh parmesh, } #endif - ptr = buffer; + /* /!\ mpipack_parmesh and mpiunpack_parmesh are modifying the buffer pointer + * making it not valid for realloc / unalloc */ + + /* Save adress of buffer because it will be set the the end of the char array + by the \a PMMG_mpipack_parmesh function */ + char *buffer_to_send = buffer; ier_pack = PMMG_mpipack_parmesh ( parmesh ,&buffer ); + + /* Do not use \a buffer pointer after this call: it points toward the end of + * the packed array which is useless */ + buffer = NULL; + assert ( ier_pack ); /* Gather the packed parmeshes */ ier = MG_MIN ( ier, ier_pack ); - - // Remplacer MPI_Gatherv par une boucle Send/Recv + + /* Here the gatherv call has been replaced by a send/recv to avoid errors when + * displacements overflow the INT_MAX value */ if (parmesh->myrank == root) { - for (int i = 0; i < nprocs; ++i) { - if (i != root) { - MPI_CHECK(MPI_Recv(rcv_buffer + displs[i], rcv_pack_size[i], MPI_CHAR, i, 0, parmesh->comm, MPI_STATUS_IGNORE), ier = 0); + int i; + for ( i = 0; i < nprocs; ++i ) { + if ( i != root ) { + MPI_CHECK( + MPI_Recv(rcv_buffer + displs[i], rcv_pack_size[i], MPI_CHAR, i, + MPI_MERGEMESH_TAG, parmesh->comm, MPI_STATUS_IGNORE), + ier = 0); } } } else { - MPI_CHECK(MPI_Send(ptr, pack_size, MPI_CHAR, root, 0, parmesh->comm), ier = 0); + MPI_CHECK( + MPI_Send(buffer_to_send, pack_size, MPI_CHAR, root, MPI_MERGEMESH_TAG,parmesh->comm), + ier = 0); } - /*MPI_CHECK( MPI_Gatherv ( ptr,pack_size,MPI_CHAR, - rcv_buffer,rcv_pack_size, - displs,MPI_CHAR,root,parmesh->comm ),ier=0 );*/ - - - PMMG_DEL_MEM(parmesh,ptr,char,"buffer to send"); /** 4: Unpack parmeshes */ #ifndef NDEBUG @@ -1207,7 +1235,6 @@ int PMMG_gather_parmesh( PMMG_pParMesh parmesh, #endif if ( parmesh->myrank == root ) { - ptr = rcv_buffer; for ( k=0; k Date: Tue, 6 Aug 2024 11:51:34 +0200 Subject: [PATCH 8/9] Remove warning about int overflow before fixing Send/Recv for very large size. --- src/mergemesh_pmmg.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/mergemesh_pmmg.c b/src/mergemesh_pmmg.c index a9d07c4c..cdec4502 100644 --- a/src/mergemesh_pmmg.c +++ b/src/mergemesh_pmmg.c @@ -1144,17 +1144,7 @@ int PMMG_gather_parmesh( PMMG_pParMesh parmesh, if ( parmesh->myrank == root ) { displs[0] = 0; for ( k=1; kINT_MAX){ - /* The displacements argument to MPI_Gatherv() is an array of int - * (signed) so the number of elements must be smaller than 2^31. - * To get around this we must pack more data in a single element - * or use multiple messages. - */ - fprintf(stderr, " ## Error: too many elements for MPI_Gatherv()\n"); - MPI_Abort(parmesh->comm, 1); /* error detected only on root */ - } displs[k] = next_disp; } From 84be939f43b5d6a722c56780f41826e0047952e3 Mon Sep 17 00:00:00 2001 From: Algiane Froehly Date: Tue, 6 Aug 2024 11:53:20 +0200 Subject: [PATCH 9/9] Fix integer size to allow Send/Receive of arrays with cumulative size larger than INT_MAX. --- src/mergemesh_pmmg.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mergemesh_pmmg.c b/src/mergemesh_pmmg.c index cdec4502..252422f5 100644 --- a/src/mergemesh_pmmg.c +++ b/src/mergemesh_pmmg.c @@ -1098,9 +1098,9 @@ int PMMG_gather_parmesh( PMMG_pParMesh parmesh, int **rcv_next_node_comm, PMMG_pExt_comm **rcv_ext_node_comm ) { - size_t pack_size_tot,next_disp; - int *rcv_pack_size,ier,ier_glob,k,*displs,ier_pack; - int nprocs,root,pack_size,buf_idx; + size_t pack_size_tot,next_disp,*displs,buf_idx; + int *rcv_pack_size,ier,ier_glob,k,ier_pack; + int nprocs,root,pack_size; char *rcv_buffer,*ptr_to_free,*buffer; nprocs = parmesh->nprocs; @@ -1120,7 +1120,7 @@ int PMMG_gather_parmesh( PMMG_pParMesh parmesh, /** 1: Memory alloc */ if ( parmesh->myrank == root ) { PMMG_MALLOC( parmesh, rcv_pack_size ,nprocs,int,"rcv_pack_size",ier=0); - PMMG_MALLOC( parmesh, displs ,nprocs,int,"displs for gatherv",ier=0); + PMMG_MALLOC( parmesh, displs ,nprocs,size_t,"displs for gatherv",ier=0); PMMG_CALLOC( parmesh, (*rcv_grps) ,nprocs,PMMG_Grp,"rcv_grps",ier=0); PMMG_MALLOC( parmesh, (*rcv_int_node_comm) ,nprocs,PMMG_Int_comm,"rcv_int_comm" ,ier=0); PMMG_MALLOC( parmesh, (*rcv_next_node_comm),nprocs,int,"rcv_next_comm" ,ier=0);