-
Notifications
You must be signed in to change notification settings - Fork 1
/
lulesh.h
902 lines (727 loc) · 30 KB
/
lulesh.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
#ifndef MY_LULESH_H
#define MY_LULESH_H
#if !defined(USE_MPI)
# error "You should specify USE_MPI=0 or USE_MPI=1 on the compile line"
#endif
// OpenMP will be compiled in if this flag is set to 1 AND the compiler beging
// used supports it (i.e. the _OPENMP symbol is defined)
#define USE_OMP 1
#if USE_MPI
#include <mpi.h>
/*
define one of these three symbols:
SEDOV_SYNC_POS_VEL_NONE
SEDOV_SYNC_POS_VEL_EARLY
SEDOV_SYNC_POS_VEL_LATE
*/
#define SEDOV_SYNC_POS_VEL_NONE 1
#endif
#include <math.h>
#include <vector>
#include <laik_vector.h>
#include "laik_vector_comm_exclusive_halo.h"
#include "laik_vector_comm_overlapping_overlapping.h"
#include "laik_vector_repart_exclusive.h"
#include "laik_vector_repart_overlapping.h"
//**************************************************
// Allow flexibility for arithmetic representations
//**************************************************
#define MAX(a, b) ( ((a) > (b)) ? (a) : (b))
// Precision specification
typedef float real4 ;
typedef double real8 ;
typedef long double real10 ; // 10 bytes on x86
typedef int Index_t ; // array subscript and loop index
typedef real8 Real_t ; // floating point representation
typedef int Int_t ; // integer representation
enum { VolumeError = -1, QStopError = -2 } ;
inline real4 SQRT(real4 arg) { return sqrtf(arg) ; }
inline real8 SQRT(real8 arg) { return sqrt(arg) ; }
inline real10 SQRT(real10 arg) { return sqrtl(arg) ; }
inline real4 CBRT(real4 arg) { return cbrtf(arg) ; }
inline real8 CBRT(real8 arg) { return cbrt(arg) ; }
inline real10 CBRT(real10 arg) { return cbrtl(arg) ; }
inline real4 FABS(real4 arg) { return fabsf(arg) ; }
inline real8 FABS(real8 arg) { return fabs(arg) ; }
inline real10 FABS(real10 arg) { return fabsl(arg) ; }
// Stuff needed for boundary conditions
// 2 BCs on each of 6 hexahedral faces (12 bits)
#define XI_M 0x00007
#define XI_M_SYMM 0x00001
#define XI_M_FREE 0x00002
#define XI_M_COMM 0x00004
#define XI_P 0x00038
#define XI_P_SYMM 0x00008
#define XI_P_FREE 0x00010
#define XI_P_COMM 0x00020
#define ETA_M 0x001c0
#define ETA_M_SYMM 0x00040
#define ETA_M_FREE 0x00080
#define ETA_M_COMM 0x00100
#define ETA_P 0x00e00
#define ETA_P_SYMM 0x00200
#define ETA_P_FREE 0x00400
#define ETA_P_COMM 0x00800
#define ZETA_M 0x07000
#define ZETA_M_SYMM 0x01000
#define ZETA_M_FREE 0x02000
#define ZETA_M_COMM 0x04000
#define ZETA_P 0x38000
#define ZETA_P_SYMM 0x08000
#define ZETA_P_FREE 0x10000
#define ZETA_P_COMM 0x20000
// MPI Message Tags
#define MSG_COMM_SBN 1024
#define MSG_SYNC_POS_VEL 2048
#define MSG_MONOQ 3072
#define MAX_FIELDS_PER_MPI_COMM 6
// Assume 128 byte coherence
// Assume Real_t is an "integral power of 2" bytes wide
#define CACHE_COHERENCE_PAD_REAL (128 / sizeof(Real_t))
#define CACHE_ALIGN_REAL(n) \
(((n) + (CACHE_COHERENCE_PAD_REAL - 1)) & ~(CACHE_COHERENCE_PAD_REAL-1))
//////////////////////////////////////////////////////
// Primary data structure
//////////////////////////////////////////////////////
/*
* The implementation of the data abstraction used for lulesh
* resides entirely in the Domain class below. You can change
* grouping and interleaving of fields here to maximize data layout
* efficiency for your underlying architecture or compiler.
*
* For example, fields can be implemented as STL objects or
* raw array pointers. As another example, individual fields
* m_x, m_y, m_z could be budled into
*
* struct { Real_t x, y, z ; } *m_coord ;
*
* allowing accessor functions such as
*
* "Real_t &x(Index_t idx) { return m_coord[idx].x ; }"
* "Real_t &y(Index_t idx) { return m_coord[idx].y ; }"
* "Real_t &z(Index_t idx) { return m_coord[idx].z ; }"
*/
class Domain {
public:
/**
* @brief Constructor Domain updated to use additional
* parameters. The partitionings and transitions
* are needed to construct the laik_vectors used
*/
Domain(Int_t numRanks, Index_t colLoc,
Index_t rowLoc, Index_t planeLoc,
Index_t nx, Int_t tp, Int_t nr, Int_t balance, Int_t cost,
Laik_Instance *inst, Laik_Group* world,
Laik_Space* elems, Laik_Space* nodes,
Laik_Partitioning *exclusive, Laik_Partitioning *halo, Laik_Partitioning *overlapping,
Laik_Transition *transitionToExclusive, Laik_Transition *transitionToHalo, Laik_Transition *transitionToOverlappingInit,Laik_Transition * transitionToOverlappingReduce);
/**
* @brief this method is extracted from the constructor of Domain
* The constructor only calls this and it does the initialization
* The reason for this change is that after repartitioning
* some parts of the constructor has to be re-executed (
* of course not the zero initialization of data) to update
* some parameters
* @param numRanks
* @param colLoc
* @param rowLoc
* @param planeLoc
* @param nx
* @param tp
* @param nr
* @param balance
* @param cost
*/
void init_domain(Int_t numRanks, Index_t colLoc,
Index_t rowLoc, Index_t planeLoc,
Index_t nx, Int_t tp, Int_t nr, Int_t balance, Int_t cost);
/**
* @brief this method initializes the parameters that are
* set in the controctor of Domain
* similar to init_domain but it is only used for
* repartitioning as it does not include all the
* codes in the constructor of Domain in the ref-
* erence implementation. For example it skips
* initializing the fields to zero since this
* initialization would remove the data and
* leads to incorrect results after repartitioning.
* @param numRanks
* @param colLoc
* @param rowLoc
* @param planeLoc
* @param nx
* @param tp
* @param nr
* @param balance
* @param cost
*/
void re_init_domain(Int_t numRanks, Index_t colLoc,
Index_t rowLoc, Index_t planeLoc,
Index_t nx, Int_t tp, Int_t nr, Int_t balance, Int_t cost);
//
// ALLOCATION
//
void AllocateNodePersistent(Int_t numNode, Int_t numRanks) // Node-centered
{
int edgeElem = (int)(cbrt(numNode)+0.1)-1;
int side = cbrt (numRanks);
int globalNumNode=(edgeElem*side+1)*(edgeElem*side+1)*(edgeElem*side+1);
#ifdef REPARTITIONING
m_x.resize(globalNumNode); // coordinates
m_y.resize(globalNumNode);
m_z.resize(globalNumNode);
m_xd.resize(globalNumNode); // velocities
m_yd.resize(globalNumNode);
m_zd.resize(globalNumNode);
m_xdd.resize(globalNumNode); // accelerations
m_ydd.resize(globalNumNode);
m_zdd.resize(globalNumNode);
#endif
#ifdef PERFORMANCE
m_x.resize(numNode); // coordinates
m_y.resize(numNode);
m_z.resize(numNode);
m_xd.resize(numNode); // velocities
m_yd.resize(numNode);
m_zd.resize(numNode);
m_xdd.resize(numNode); // accelerations
m_ydd.resize(numNode);
m_zdd.resize(numNode);
#endif
m_fx.resize(globalNumNode); // forces
m_fy.resize(globalNumNode);
m_fz.resize(globalNumNode);
m_nodalMass.resize(globalNumNode); // mass
}
void AllocateElemPersistent(Int_t numElem, Int_t numRanks) // Elem-centered
{
m_nodelist.resize(8*numElem);
// elem connectivities through face
m_lxim.resize(numElem);
m_lxip.resize(numElem);
m_letam.resize(numElem);
m_letap.resize(numElem);
m_lzetam.resize(numElem);
m_lzetap.resize(numElem);
m_elemBC.resize(numElem);
#ifdef REPARTITIONING
m_e.resize(numRanks*numElem);
m_p.resize(numRanks*numElem);
m_q.resize(numRanks*numElem);
m_ql.resize(numRanks*numElem);
m_qq.resize(numRanks*numElem);
m_v.resize(numRanks*numElem);
m_volo.resize(numRanks*numElem);
m_delv.resize(numRanks*numElem);
m_vdov.resize(numRanks*numElem);
m_arealg.resize(numRanks*numElem);
m_ss.resize(numRanks*numElem);
m_elemMass.resize(numRanks*numElem);
#endif
#ifdef PERFORMANCE
m_e.resize(numElem);
m_p.resize(numElem);
m_q.resize(numElem);
m_ql.resize(numElem);
m_qq.resize(numElem);
m_v.resize(numElem);
m_volo.resize(numElem);
m_delv.resize(numElem);
m_vdov.resize(numElem);
m_arealg.resize(numElem);
m_ss.resize(numElem);
m_elemMass.resize(numElem);
#endif
}
void AllocateGradients(Int_t numElem, Int_t numRanks, Index_t allElem)
{
#ifdef REPARTITIONING
// Position gradients
m_delx_xi.resize(numRanks*numElem) ;
m_delx_eta.resize(numRanks*numElem) ;
m_delx_zeta.resize(numRanks*numElem) ;
#endif
#ifdef PERFORMANCE
m_delx_xi.resize(numElem) ;
m_delx_eta.resize(numElem) ;
m_delx_zeta.resize(numElem) ;
#endif
// Velocity gradients
m_delv_xi.resize(numRanks*numElem) ;
m_delv_eta.resize(numRanks*numElem);
m_delv_zeta.resize(numRanks*numElem) ;
}
void DeallocateGradients()
{
m_delx_zeta.clear() ;
m_delx_eta.clear() ;
m_delx_xi.clear() ;
m_delv_zeta.clear() ;
m_delv_eta.clear() ;
m_delv_xi.clear() ;
}
void AllocateStrains(Int_t numElem)
{
#ifdef REPARTITIONING
m_dxx.resize(numRanks()*numElem) ;
m_dyy.resize(numRanks()*numElem) ;
m_dzz.resize(numRanks()*numElem) ;
#endif
#ifdef PERFORMANCE
m_dxx.resize(numElem) ;
m_dyy.resize(numElem) ;
m_dzz.resize(numElem) ;
#endif
}
void DeallocateStrains()
{
m_dzz.clear() ;
m_dyy.clear() ;
m_dxx.clear() ;
}
// GETTERS
// for accessing laik_vectors out of Domain
// this is not needed by lulesh as lulesh implements
// accessors to the data independent of the container.
// (only for debugging)
//
laik_vector_comm_overlapping_overlapping<double>& get_fx() { return m_fx;}
laik_vector_comm_overlapping_overlapping<double>& get_fy() { return m_fy;}
laik_vector_comm_overlapping_overlapping<double>& get_fz() { return m_fz;}
laik_vector_comm_overlapping_overlapping<double>& get_nodalMass() { return m_nodalMass;}
laik_vector_comm_exclusive_halo<double>& get_delv_xi() { return m_delv_xi;}
laik_vector_comm_exclusive_halo<double>& get_delv_eta() { return m_delv_eta;}
laik_vector_comm_exclusive_halo<double>& get_delv_zeta() { return m_delv_zeta;}
// ACCESSORS
//
// Node-centered
// Nodal coordinates
Real_t& x(Index_t idx) { return m_x[idx] ; }
Real_t& y(Index_t idx) { return m_y[idx] ; }
Real_t& z(Index_t idx) { return m_z[idx] ; }
// Nodal velocities
Real_t& xd(Index_t idx) { return m_xd[idx] ; }
Real_t& yd(Index_t idx) { return m_yd[idx] ; }
Real_t& zd(Index_t idx) { return m_zd[idx] ; }
// Nodal accelerations
Real_t& xdd(Index_t idx) { return m_xdd[idx] ; }
Real_t& ydd(Index_t idx) { return m_ydd[idx] ; }
Real_t& zdd(Index_t idx) { return m_zdd[idx] ; }
// Nodal forces
Real_t& fx(Index_t idx) { return m_fx[idx] ; }
Real_t& fy(Index_t idx) { return m_fy[idx] ; }
Real_t& fz(Index_t idx) { return m_fz[idx] ; }
// Nodal mass
Real_t& nodalMass(Index_t idx) { return m_nodalMass[idx] ; }
// Nodes on symmertry planes
Index_t symmX(Index_t idx) { return m_symmX[idx] ; }
Index_t symmY(Index_t idx) { return m_symmY[idx] ; }
Index_t symmZ(Index_t idx) { return m_symmZ[idx] ; }
bool symmXempty() { return m_symmX.empty(); }
bool symmYempty() { return m_symmY.empty(); }
bool symmZempty() { return m_symmZ.empty(); }
//
// Element-centered
//
Index_t& regElemSize(Index_t idx) { return m_regElemSize[idx] ; }
Index_t& regNumList(Index_t idx) { return m_regNumList[idx] ; }
Index_t* regNumList() { return &m_regNumList[0] ; }
std::vector<Index_t> regElemlist(Int_t r) { return m_regElemlist[r] ; }
Index_t& regElemlist(Int_t r, Index_t idx) { return m_regElemlist[r][idx] ; }
Index_t* nodelist(Index_t idx) { return &m_nodelist[Index_t(8)*idx] ; }
// elem connectivities through face
Index_t& lxim(Index_t idx) { return m_lxim[idx] ; }
Index_t& lxip(Index_t idx) { return m_lxip[idx] ; }
Index_t& letam(Index_t idx) { return m_letam[idx] ; }
Index_t& letap(Index_t idx) { return m_letap[idx] ; }
Index_t& lzetam(Index_t idx) { return m_lzetam[idx] ; }
Index_t& lzetap(Index_t idx) { return m_lzetap[idx] ; }
// elem face symm/free-surface flag
Int_t& elemBC(Index_t idx) { return m_elemBC[idx] ; }
// Principal strains - temporary
Real_t& dxx(Index_t idx) { return m_dxx[idx] ; }
Real_t& dyy(Index_t idx) { return m_dyy[idx] ; }
Real_t& dzz(Index_t idx) { return m_dzz[idx] ; }
// Velocity gradient - temporary
Real_t& delv_xi(Index_t idx) { return m_delv_xi[idx] ; }
Real_t& delv_eta(Index_t idx) { return m_delv_eta[idx] ; }
Real_t& delv_zeta(Index_t idx) { return m_delv_zeta[idx] ; }
// Position gradient - temporary
Real_t& delx_xi(Index_t idx) { return m_delx_xi[idx] ; }
Real_t& delx_eta(Index_t idx) { return m_delx_eta[idx] ; }
Real_t& delx_zeta(Index_t idx) { return m_delx_zeta[idx] ; }
// Energy
Real_t& e(Index_t idx) { return m_e[idx] ; }
// Pressure
Real_t& p(Index_t idx) { return m_p[idx] ; }
// Artificial viscosity
Real_t& q(Index_t idx) { return m_q[idx] ; }
// Linear term for q
Real_t& ql(Index_t idx) { return m_ql[idx] ; }
// Quadratic term for q
Real_t& qq(Index_t idx) { return m_qq[idx] ; }
// Relative volume
Real_t& v(Index_t idx) { return m_v[idx] ; }
Real_t& delv(Index_t idx) { return m_delv[idx] ; }
// Reference volume
Real_t& volo(Index_t idx) { return m_volo[idx] ; }
// volume derivative over volume
Real_t& vdov(Index_t idx) { return m_vdov[idx] ; }
// Element characteristic length
Real_t& arealg(Index_t idx) { return m_arealg[idx] ; }
// Sound speed
Real_t& ss(Index_t idx) { return m_ss[idx] ; }
// Element mass
Real_t& elemMass(Index_t idx) { return m_elemMass[idx] ; }
Index_t nodeElemCount(Index_t idx)
{ return m_nodeElemStart[idx+1] - m_nodeElemStart[idx] ; }
Index_t *nodeElemCornerList(Index_t idx)
{ return &m_nodeElemCornerList[m_nodeElemStart[idx]] ; }
// Parameters
// Cutoffs
Real_t u_cut() const { return m_u_cut ; }
Real_t e_cut() const { return m_e_cut ; }
Real_t p_cut() const { return m_p_cut ; }
Real_t q_cut() const { return m_q_cut ; }
Real_t v_cut() const { return m_v_cut ; }
// Other constants (usually are settable via input file in real codes)
Real_t hgcoef() const { return m_hgcoef ; }
Real_t qstop() const { return m_qstop ; }
Real_t monoq_max_slope() const { return m_monoq_max_slope ; }
Real_t monoq_limiter_mult() const { return m_monoq_limiter_mult ; }
Real_t ss4o3() const { return m_ss4o3 ; }
Real_t qlc_monoq() const { return m_qlc_monoq ; }
Real_t qqc_monoq() const { return m_qqc_monoq ; }
Real_t qqc() const { return m_qqc ; }
Real_t eosvmax() const { return m_eosvmax ; }
Real_t eosvmin() const { return m_eosvmin ; }
Real_t pmin() const { return m_pmin ; }
Real_t emin() const { return m_emin ; }
Real_t dvovmax() const { return m_dvovmax ; }
Real_t refdens() const { return m_refdens ; }
// Timestep controls, etc...
Real_t& time() { return m_time ; }
Real_t& deltatime() { return m_deltatime ; }
Real_t& deltatimemultlb() { return m_deltatimemultlb ; }
Real_t& deltatimemultub() { return m_deltatimemultub ; }
Real_t& stoptime() { return m_stoptime ; }
Real_t& dtcourant() { return m_dtcourant ; }
Real_t& dthydro() { return m_dthydro ; }
Real_t& dtmax() { return m_dtmax ; }
Real_t& dtfixed() { return m_dtfixed ; }
Int_t& cycle() { return m_cycle ; }
Index_t& numRanks() { return m_numRanks ; }
Index_t& colLoc() { return m_colLoc ; }
Index_t& rowLoc() { return m_rowLoc ; }
Index_t& planeLoc() { return m_planeLoc ; }
Index_t& tp() { return m_tp ; }
Index_t& sizeX() { return m_sizeX ; }
Index_t& sizeY() { return m_sizeY ; }
Index_t& sizeZ() { return m_sizeZ ; }
Index_t& numReg() { return m_numReg ; }
Int_t& cost() { return m_cost ; }
Index_t& numElem() { return m_numElem ; }
Index_t& numNode() { return m_numNode ; }
Index_t& maxPlaneSize() { return m_maxPlaneSize ; }
Index_t& maxEdgeSize() { return m_maxEdgeSize ; }
//
// MPI-Related additional data
//
#if USE_MPI
// Communication Work space
Real_t *commDataSend ;
Real_t *commDataRecv ;
// Maximum number of block neighbors
MPI_Request recvRequest[26] ; // 6 faces + 12 edges + 8 corners
MPI_Request sendRequest[26] ; // 6 faces + 12 edges + 8 corners
//laik communication
void communicateNodalMass(){
m_nodalMass.switch_to_p1();
m_nodalMass.switch_to_p2();
}
#endif
// LAIK-related additional data
// the domain needs to
// know about the laik instance
// and laik group
Laik_Instance *inst;
Laik_Group *world;
// LAIK-related method
/**
* @brief helper function for calling migration and data re-distribution
* in all laik_vectors so they perfor re-distribution.
* @param new_group
* @param p_exclusive
* @param p_halo
* @param p_overlapping
* @param t_to_exclusive
* @param t_to_halo
* @param t_to_overlapping_init
* @param t_to_overlapping_reduce
*/
void re_distribute_data_structures(Laik_Group* new_group, Laik_Partitioning* p_exclusive, Laik_Partitioning* p_halo, Laik_Partitioning* p_overlapping, Laik_Transition *t_to_exclusive, Laik_Transition *t_to_halo, Laik_Transition *t_to_overlapping_init, Laik_Transition *t_to_overlapping_reduce);
private:
void BuildMesh(Int_t nx, Int_t edgeNodes, Int_t edgeElems);
void SetupThreadSupportStructures();
void CreateRegionIndexSets(Int_t nreg, Int_t balance);
void SetupCommBuffers(Int_t edgeNodes);
void SetupSymmetryPlanes(Int_t edgeNodes);
void SetupElementConnectivities(Int_t edgeElems);
void SetupBoundaryConditions(Int_t edgeElems);
//
// IMPLEMENTATION
//
/* Node-centered */
#ifdef REPARTITIONING
laik_vector_repart_overlapping<double> m_x ; /* coordinates */
laik_vector_repart_overlapping<double> m_y ;
laik_vector_repart_overlapping<double> m_z ;
laik_vector_repart_overlapping<double> m_xd ; /* velocities */
laik_vector_repart_overlapping<double> m_yd ;
laik_vector_repart_overlapping<double> m_zd ;
laik_vector_repart_overlapping<double> m_xdd ; /* accelerations */
laik_vector_repart_overlapping<double> m_ydd ;
laik_vector_repart_overlapping<double> m_zdd ;
#endif
#ifdef PERFORMANCE
std::vector<Real_t> m_x ; /* coordinates */
std::vector<Real_t> m_y ;
std::vector<Real_t> m_z ;
std::vector<Real_t> m_xd ; /* velocities */
std::vector<Real_t> m_yd ;
std::vector<Real_t> m_zd ;
std::vector<Real_t> m_xdd ; /* accelerations */
std::vector<Real_t> m_ydd ;
std::vector<Real_t> m_zdd ;
#endif
laik_vector_comm_overlapping_overlapping<double> m_fx ; /* forces */
laik_vector_comm_overlapping_overlapping<double> m_fy ;
laik_vector_comm_overlapping_overlapping<double> m_fz ;
laik_vector_comm_overlapping_overlapping<double> m_nodalMass ; /* mass */
std::vector<Index_t> m_symmX ; /* symmetry plane nodesets */
std::vector<Index_t> m_symmY ;
std::vector<Index_t> m_symmZ ;
// Element-centered
// Region information
Int_t m_numReg ;
Int_t m_cost; //imbalance cost
//Index_t *m_regElemSize ; // Size of region sets
//Index_t *m_regNumList ; // Region number per domain element
//Index_t **m_regElemlist ; // region indexset
std::vector<Index_t> m_regElemSize; // Size of region sets
std::vector<Index_t> m_regNumList; // Region number per domain element
std::vector <std::vector <Index_t> > m_regElemlist; // region indexset
std::vector<Index_t> m_nodelist ; /* elemToNode connectivity */
std::vector<Index_t> m_lxim ; /* element connectivity across each face */
std::vector<Index_t> m_lxip ;
std::vector<Index_t> m_letam ;
std::vector<Index_t> m_letap ;
std::vector<Index_t> m_lzetam ;
std::vector<Index_t> m_lzetap ;
std::vector<Int_t> m_elemBC ; /* symmetry/free-surface flags for each elem face */
#ifdef REPARTITIONING
laik_vector_repart_exclusive<double> m_dxx ; /* principal strains -- temporary */
laik_vector_repart_exclusive<double> m_dyy ;
laik_vector_repart_exclusive<double> m_dzz ;
#endif
#ifdef PERFORMANCE
std::vector<Real_t> m_dxx ; /* principal strains -- temporary */
std::vector<Real_t> m_dyy ;
std::vector<Real_t> m_dzz ;
#endif
laik_vector_comm_exclusive_halo<double> m_delv_xi ; /* velocity gradient -- temporary */
laik_vector_comm_exclusive_halo<double> m_delv_eta ;
laik_vector_comm_exclusive_halo<double> m_delv_zeta ;
#ifdef REPARTITIONING
laik_vector_repart_exclusive<double> m_delx_xi ; /* coordinate gradient -- temporary */
laik_vector_repart_exclusive<double> m_delx_eta ;
laik_vector_repart_exclusive<double> m_delx_zeta ;
laik_vector_repart_exclusive<double> m_e ; /* energy */
laik_vector_repart_exclusive<double> m_p ; /* pressure */
laik_vector_repart_exclusive<double> m_q ; /* q */
laik_vector_repart_exclusive<double> m_ql ; /* linear term for q */
laik_vector_repart_exclusive<double> m_qq ; /* quadratic term for q */
laik_vector_repart_exclusive<double> m_v ; /* relative volume */
laik_vector_repart_exclusive<double> m_volo ; /* reference volume */
std::vector<Real_t> m_vnew ; /* new relative volume -- temporary */
laik_vector_repart_exclusive<double> m_delv ; /* m_vnew - m_v */
laik_vector_repart_exclusive<double> m_vdov ; /* volume derivative over volume */
laik_vector_repart_exclusive<double> m_arealg ; /* characteristic length of an element */
laik_vector_repart_exclusive<double> m_ss ; /* "sound speed" */
laik_vector_repart_exclusive<double> m_elemMass ; /* mass */
#endif
#ifdef PERFORMANCE
std::vector<Real_t> m_delx_xi ; /* coordinate gradient -- temporary */
std::vector<Real_t> m_delx_eta ;
std::vector<Real_t> m_delx_zeta ;
std::vector<Real_t> m_e ; /* energy */
std::vector<Real_t> m_p ; /* pressure */
std::vector<Real_t> m_q ; /* q */
std::vector<Real_t> m_ql ; /* linear term for q */
std::vector<Real_t> m_qq ; /* quadratic term for q */
std::vector<Real_t> m_v ; /* relative volume */
std::vector<Real_t> m_volo ; /* reference volume */
std::vector<Real_t> m_vnew ; /* new relative volume -- temporary */
std::vector<Real_t> m_delv ; /* m_vnew - m_v */
std::vector<Real_t> m_vdov ; /* volume derivative over volume */
std::vector<Real_t> m_arealg ; /* characteristic length of an element */
std::vector<Real_t> m_ss ; /* "sound speed" */
std::vector<Real_t> m_elemMass ; /* mass */
#endif
// Cutoffs (treat as constants)
const Real_t m_e_cut ; // energy tolerance
const Real_t m_p_cut ; // pressure tolerance
const Real_t m_q_cut ; // q tolerance
const Real_t m_v_cut ; // relative volume tolerance
const Real_t m_u_cut ; // velocity tolerance
// Other constants (usually setable, but hardcoded in this proxy app)
const Real_t m_hgcoef ; // hourglass control
const Real_t m_ss4o3 ;
const Real_t m_qstop ; // excessive q indicator
const Real_t m_monoq_max_slope ;
const Real_t m_monoq_limiter_mult ;
const Real_t m_qlc_monoq ; // linear term coef for q
const Real_t m_qqc_monoq ; // quadratic term coef for q
const Real_t m_qqc ;
const Real_t m_eosvmax ;
const Real_t m_eosvmin ;
const Real_t m_pmin ; // pressure floor
const Real_t m_emin ; // energy floor
const Real_t m_dvovmax ; // maximum allowable volume change
const Real_t m_refdens ; // reference density
// Variables to keep track of timestep, simulation time, and cycle
Real_t m_dtcourant ; // courant constraint
Real_t m_dthydro ; // volume change constraint
Int_t m_cycle ; // iteration count for simulation
Real_t m_dtfixed ; // fixed time increment
Real_t m_time ; // current time
Real_t m_deltatime ; // variable time increment
Real_t m_deltatimemultlb ;
Real_t m_deltatimemultub ;
Real_t m_dtmax ; // maximum allowable time increment
Real_t m_stoptime ; // end time for simulation
Int_t m_numRanks ;
Index_t m_colLoc ;
Index_t m_rowLoc ;
Index_t m_planeLoc ;
Index_t m_tp ;
Index_t m_sizeX ;
Index_t m_sizeY ;
Index_t m_sizeZ ;
Index_t m_numElem ;
Index_t m_numNode ;
Index_t m_maxPlaneSize ;
Index_t m_maxEdgeSize ;
// OMP hack
Index_t *m_nodeElemStart ;
Index_t *m_nodeElemCornerList ;
// Used in setup
Index_t m_rowMin, m_rowMax;
Index_t m_colMin, m_colMax;
Index_t m_planeMin, m_planeMax ;
} ;
typedef Real_t &(Domain::* Domain_member )(Index_t) ;
struct cmdLineOpts {
Int_t its; // -i
Int_t nx; // -s
Int_t numReg; // -r
Int_t numFiles; // -f
Int_t showProg; // -p
Int_t quiet; // -q
Int_t viz; // -v
Int_t cost; // -c
Int_t balance; // -b
Int_t repart; // -repart
Int_t cycle; // -repart_cycle
};
// Function Prototypes
// lulesh-par
Real_t CalcElemVolume( const Real_t x[8],
const Real_t y[8],
const Real_t z[8]);
// lulesh-util
void ParseCommandLineOptions(int argc, char *argv[],
Int_t myRank, struct cmdLineOpts *opts);
void VerifyAndWriteFinalOutput(Real_t elapsed_time,
Domain& locDom,
Int_t nx,
Int_t numRanks);
// lulesh-viz
void DumpToVisit(Domain& domain, int numFiles, int myRank, int numRanks);
// lulesh-comm
void CommRecv(Domain& domain, Int_t msgType, Index_t xferFields,
Index_t dx, Index_t dy, Index_t dz,
bool doRecv, bool planeOnly);
void CommSend(Domain& domain, Int_t msgType,
Index_t xferFields, Domain_member *fieldData,
Index_t dx, Index_t dy, Index_t dz,
bool doSend, bool planeOnly);
void CommSBN(Domain& domain, Int_t xferFields, Domain_member *fieldData);
void CommSyncPosVel(Domain& domain);
void CommMonoQ(Domain& domain);
// lulesh-init
void InitMeshDecomp(Int_t numRanks, Int_t myRank,
Int_t *col, Int_t *row, Int_t *plane, Int_t *side);
// helper function for laik implementation
// this function set six flags based to identify
// if a domain locates on edges of the global domain
void init_config_params(Laik_Group* group, int& b ,int& f, int& d,int& u, int& l, int& r);
/**
* @brief helper functions for facilitating calculation of
* partitionings and transitions
* @param world
* @param indexSpaceElements
* @param indexSpaceNodes
* @param indexSapceDt
* @param exclusivePartitioning
* @param haloPartitioning
* @param overlapingPartitioning
* @param allPartitioning
* @param transitionToExclusive
* @param transitionToHalo
* @param transitionToOverlappingInit
* @param transitionToOverlappingReduce
*/
void create_partitionings_and_transitions( Laik_Group *&world,
Laik_Space *&indexSpaceElements,
Laik_Space *&indexSpaceNodes,
Laik_Space *&indexSapceDt,
Laik_Partitioning *&exclusivePartitioning,
Laik_Partitioning *&haloPartitioning,
Laik_Partitioning *&overlapingPartitioning,
Laik_Partitioning *&allPartitioning,
Laik_Transition *&transitionToExclusive,
Laik_Transition *&transitionToHalo,
Laik_Transition *&transitionToOverlappingInit,
Laik_Transition *&transitionToOverlappingReduce);
/**
* @brief helper function to remove unusing
* partitioning and transitions
* @param exclusivePartitioning
* @param haloPartitioning
* @param overlapingPartitioning
* @param allPartitioning
* @param transitionToExclusive
* @param transitionToHalo
* @param transitionToOverlappingInit
* @param transitionToOverlappingReduce
*/
void remove_partitionings_and_transitions(Laik_Partitioning *&exclusivePartitioning,
Laik_Partitioning *&haloPartitioning,
Laik_Partitioning *&overlapingPartitioning,
Laik_Partitioning *&allPartitioning,
Laik_Transition *&transitionToExclusive,
Laik_Transition *&transitionToHalo,
Laik_Transition *&transitionToOverlappingInit,
Laik_Transition *&transitionToOverlappingReduce);
/**
* @brief helper function for calculation of removing list of processes from the process group
* @param world
* @param opts
* @param side
* @param newside
* @param diffsize
* @param removeList
*/
void calculate_removing_list(Laik_Group* world, cmdLineOpts& opts, double side, double& newside, int& diffsize, int *&removeList);
#endif