-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathzarms2.c
386 lines (377 loc) · 16.6 KB
/
zarms2.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <complex.h>
#define PERMTOL 0.99 /* 0 --> no permutation 0.01 to 0.1 good */
#include "./LIB/zheads.h"
#include "./LIB/zprotos.h"
#include "./LIB/zdefs.h"
void *Malloc( int, char * );
int zarms2(csptr Amat, int *ipar, double *droptol, int *lfil,
double tolind, arms PreMat, FILE *ft)
{
/*--------------------------------------------------------------------------
| MULTI-LEVEL BLOCK ILUT PRECONDITIONER.
| ealier version: June 23, 1999 BJS --
| version2: Dec. 07th, 2000, YS [reorganized ]
| version3: Aug. 2005. [reorganized + includes ddpq]
| Slight modifications for compatibility with complex package: Nov. 2005 DOK
+----------------------------------------------------------------------------
| ON ENTRY:
| =========
| ( Amat ) = original matrix A stored in C-style Compressed Sparse
| Row (CSR) format --
| see LIB/heads.h for the formal definition of this format.
|
| ipar[0:17] = integer array to store parameters for
| arms construction (arms2)
|
| ipar[0]:=nlev. number of levels (reduction processes).
| see also "on return" below.
|
| ipar[1]:= level-reordering option to be used.
| if ipar[1]==0 ARMS uses a block independent set ordering
| with a sort of reverse cutill Mc Kee ordering to build
| the blocks. This yields a symmetric ordering.
| in this case the reordering routine called is indsetC
| if ipar[1] == 1, then a nonsymmetric ordering is used.
| In this case, the B matrix is constructed to be as
| diagonally dominant as possible and as sparse as possble.
| in this case the reordering routine called is ddPQ.
|
| ipar[2]:=bsize. for indset Dimension of the blocks.
| bsize is only a target block size. The size of
| each block can vary and is >= bsize.
| for ddPQ: this is only the smallest size of the
| last level. arms will stop when either the number
| of levels reaches nlev (ipar[0]) or the size of the
| next level (C block) is less than bsize.
|
| ipar[3]:=iout if (iout > 0) statistics on the run are
| printed to FILE *ft
|
| ipar[4-9] NOT used [reserved for later use] - set to zero.
|
| The following set method options for arms2. Their default values can
| all be set to zero if desired.
|
| ipar[10-13] == meth[0:3] = method flags for interlevel blocks
| ipar[14-17] == meth[0:3] = method flags for last level block -
| with the following meaning in both cases:
| meth[0] nonsymmetric permutations of 1: yes. affects rperm
| USED FOR LAST SCHUR COMPLEMENT
| meth[1] permutations of columns 0:no 1: yes. So far this is
| USED ONLY FOR LAST BLOCK [ILUTP instead of ILUT].
| (so ipar[11] does no matter - enter zero). If
| ipar[15] is one then ILUTP will be used instead
| of ILUT. Permutation data stored in: perm2.
| meth[2] diag. row scaling. 0:no 1:yes. Data: D1
| meth[3] diag. column scaling. 0:no 1:yes. Data: D2
| all transformations related to parametres in meth[*] (permutation,
| scaling,..) are applied to the matrix before processing it
|
| ft = file for printing statistics on run
|
| droptol = Threshold parameters for dropping elements in ILU
| factorization.
| droptol[0:4] = related to the multilevel block factorization
| droptol[5:6] = related to ILU factorization of last block.
| This flexibility is more than is really needed. one can use
| a single parameter for all. it is preferable to use one value
| for droptol[0:4] and another (smaller) for droptol[5:6]
| droptol[0] = threshold for dropping in L [B]. See piluNEW.c:
| droptol[1] = threshold for dropping in U [B].
| droptol[2] = threshold for dropping in L^{-1} F
| droptol[3] = threshold for dropping in E U^{-1}
| droptol[4] = threshold for dropping in Schur complement
| droptol[5] = threshold for dropping in L in last block
| [see ilutpC.c]
| droptol[6] = threshold for dropping in U in last block
| [see ilutpC.c]
| This provides a rich selection - though in practice only 4
| parameters are needed [which can be set to be the same
actually] -- indeed it makes sense to take
| droptol[0] = droptol[1], droptol[2] = droptol[3],
| and droptol[4] = droptol[5]
|
| lfil = lfil[0:6] is an array containing the fill-in parameters.
| similar explanations as above, namely:
| lfil[0] = amount of fill-in kept in L [B].
| lfil[1] = amount of fill-in kept in U [B].
| lfil[2] = amount of fill-in kept in E L\inv
| lfil[3] = amount of fill-in kept in U \inv F
| lfil[4] = amount of fill-in kept in S .
| lfil[5] = amount of fill-in kept in L_S .
| lfil[6] = amount of fill-in kept in U_S
|
| tolind = tolerance parameter used by the indset function.
| a row is not accepted into the independent set if
| the *relative* diagonal tolerance is below tolind.
| see indset function for details. Good values are
| between 0.05 and 0.5 -- larger values tend to be better
| for harder problems.
|
| ON RETURN:
|=============
|
| (PreMat) = arms data structure which consists of two parts:
| levmat and ilsch.
|
| ++(levmat)= permuted and sorted matrices for each level of the block
| factorization stored in PerMat4 struct. Specifically
| each level in levmat contains the 4 matrices in:
|
|
| |\ | |
| | \ U | |
| | \ | F |
| | L \ | |
| | \ | |
| |----------+-------|
| | | |
| | E | |
| | | |
|
| plus a few other things. See LIB/heads.h for details.
|
| ++(ilsch) = IluSpar struct. If the block of the last level is:
|
| | B F |
| A_l = | |
| | E C |
|
| then IluSpar contains the block C and an ILU
| factorization (matrices L and U) for the last
| Schur complement [S ~ C - E inv(B) F ]
| (modulo dropping) see LIB/heads.h for details.
|
| ipar[0] = number of levels found (may differ from input value)
|
+---------------------------------------------------------------------*/
/*-------------------- function prototyping done in LIB/protos.h */
/*-------------------- move above to protos.h */
p4ptr levp, levc, levn, levmat = PreMat->levmat;
csptr schur, B=NULL, F=NULL, E=NULL, C=NULL;
ilutptr ilsch = PreMat->ilus;
/*-------------------- local variables (initialized) */
double *dd1, *dd2;
int nlev = ipar[0], bsize = ipar[2], iout = ipar[3], ierr = 0;
int methL[4], methS[4];
/*-------------------- local variables (not initialized) */
int nA, nB, nC, j, n, ilev, symperm;
/*-------------------- work arrays: */
int *iwork, *uwork;
/* timer arrays: */
/* double *symtime, *unstime, *factime, *tottime;*/
/*---------------------------BEGIN ARMS-------------------------------*/
/* schur matrix starts being original A */
int zsetupCS(csptr, int);
int zprtC(csptr, int);
/*-------------------- begin */
schur = (csptr) Malloc(sizeof(zSparMat), "arms2:1" );
/*---------------------------------------------------------------------
| The matrix (a,ja,ia) plays role of Schur compl. from the 0th level.
+--------------------------------------------------------------------*/
nC = nA = n = Amat->n;
if (bsize >= n) bsize = n-1;
levmat->n = n; levmat->nB = 0;
zsetupCS(schur, n);
zcscpy(Amat,schur);
levc = levmat;
/*--------------------------------------- */
levc->prev = levc->next = levp = NULL;
levc->n = 0;
memcpy(methL, &ipar[10], 4*sizeof(int));
memcpy(methS, &ipar[14], 4*sizeof(int));
/*---------------------------------------------------------------------
| The preconditioner construction is divided into two parts:
| 1st part: construct and store multi-level L and U factors;
| 2nd part: construct the ILUT factorization for the coarsest level
+--------------------------------------------------------------------*/
if ( (iout > 0) && (nlev > 0) ) {
fprintf(ft," \n");
fprintf(ft,"Level Total Unknowns B-block Coarse set\n");
fprintf(ft,"===== ============== ======= ==========\n");
}
/*---------------------------------------------------------------------
| main loop to construct multi-level LU preconditioner. Loop is on the
| level ilev. nA is the dimension of matrix A_l at each level.
+--------------------------------------------------------------------*/
for (ilev = 0; ilev < nlev; ilev++) {
/*-------------------- new nA is old nC -- from previous level */
nA = nC;
if ( nA <= bsize ) goto label1000;
/*-------------------- allocate work space */
iwork = (int *) Malloc(nA*sizeof(int), "arms2:2.5" );
symperm = 0; /* 0nly needed in cleanP4 */
if (ipar[1] == 1)
uwork = (int *) Malloc(nA*sizeof(int), "arms2:2.5" );
else{
symperm = 1;
uwork = iwork;
}
/*-------------------- SCALING*/
dd1 = NULL;
dd2 = NULL;
if (methL[2]) {
dd1 = (double *) Malloc(nA*sizeof(double), "arms2:3" );
j=zroscalC(schur, dd1,1);
if (j) printf("ERROR in roscalC - row %d is a zero row\n",j);
}
if (methL[3]) {
dd2 = (double *) Malloc(nA*sizeof(double), "arms2:4" );
j=zcoscalC(schur, dd2,1);
if (j) printf("ERROR in coscalC - column %d is a zero column\n",j);
}
/*--------------------independent-sets-permutation-------------------
| do reordering -- The matrix and its transpose are used.
+--------------------------------------------------------------------*/
/* if (SHIFTTOL > 0.0) shiftsD(schur,SHIFTTOL); */
if (ipar[1] == 1)
zPQperm(schur, bsize, uwork, iwork, &nB, tolind) ;
else
zindsetC (schur, bsize, iwork, &nB, tolind) ;
/*---------------------------------------------------------------------
| nB is the total number of nodes in the independent set.
| nC : nA - nB = the size of the reduced system.
+--------------------------------------------------------------------*/
nC = nA - nB;
/* if the size of B or C is zero , exit the main loop */
/* printf (" nB %d nC %d \n",nB, nC); */
if ( nB == 0 || nC == 0 ) goto label1000;
/*---------------------------------------------------------------------
| The matrix for the current level is in (schur).
| The permutations arrays are in iwork and uwork (row).
| The routines rpermC, cpermC permute the matrix in place.
*-----------------------------------------------------------------------*/
/* DEBUG : SHOULD THIS GO BEFORE GOTO LABEL1000 ?? */
zrpermC(schur,uwork);
zcpermC(schur,iwork);
/* zprtC(schur, ilev) ; print matrix - debugging */
/*-----------------------------------------------------------------------
| If this is the first level, the permuted matrix is stored in
| (levc) = (levmat). Otherwise, the next level is created in (levc).
+--------------------------------------------------------------------*/
if (ilev > 0) {
/*- delete C matrix of any level except last one (no longer needed) */
zcleanCS(C);
levn = (p4ptr) Malloc(sizeof(zPer4Mat), "arms2:6" );
levc->next = levn;
levp = levc;
levc = levn;
levc->prev = levp;
}
/*-------------------- p4ptr struct for current schur complement */
B = (csptr) Malloc(sizeof(zSparMat), "arms2:7" );
E = (csptr) Malloc(sizeof(zSparMat), "arms2:8" );
F = (csptr) Malloc(sizeof(zSparMat), "arms2:9" );
C = (csptr) Malloc(sizeof(zSparMat), "arms2:10" );
zcsSplit4(schur, nB, nC, B, F, E, C);
zsetupP4(levc, nB, nC, F, E);
/*-------------------- copy a few pointers ---- */
levc->perm = iwork;
levc->rperm = uwork;
levc->symperm = symperm;
levc->D1=dd1;
levc->D2=dd2;
/*---------------------------------------------------------------------
| a copy of the matrix (schur) has been permuted. Now perform the
| block factorization:
|
| | B F | | L 0 | | U L^-1 F |
| | | = | | X | | = L x U
| | E C | | E U^-1 I | | 0 A1 |
|
| The factors E U^-1 and L^-1 F are discarded after the factorization.
|
+--------------------------------------------------------------------*/
if (iout > 0)
fprintf(ft,"%3d %13d %13d %10d\n", ilev+1,nA,nB,nC);
/*---------------------------------------------------------------------
| PILUT constructs one level of the block ILU fact. The permuted matrix
| is in (levc). The L and U factors will be stored in the p4mat struct.
| destroy current Schur complement - no longer needed - and set-up new
| one for next level...
+--------------------------------------------------------------------*/
zcleanCS(schur);
schur = (csptr) Malloc(sizeof(zSparMat), "arms2:11" );
zsetupCS(schur, nC);
/*----------------------------------------------------------------------
| calling PILU to construct this level block factorization
| ! core dump in extreme case of empty matrices.
+----------------------------------------------------------------------*/
ierr = zpilu(levc, B, C, droptol, lfil, schur) ;
if (ierr) {
fprintf(ft," ERROR IN PILU -- IERR = %d\n", ierr);
return(1);
}
zcleanCS(B);
}
/*---------------------------------------------------------------------
| done with the reduction. Record the number of levels in ipar[0]
|**********************************************************************
+--------------------------------------------------------------------*/
label1000:
/* printf (" nnz_Schur %d \n",cs_nnz (schur)); */
levc->next = NULL;
ipar[0] = ilev;
PreMat->nlev = ilev;
PreMat->n = n;
nC = schur->n;
zsetupILUT(ilsch,nC);
/*--------------------------------------------------------------------*/
/* define C-matrix (member of ilsch) to be last C matrix */
if (ilev > 0) ilsch->C=C;
/*-------------------- for ilut fact of schur matrix */
/* SCALING */
ilsch->D1 = NULL;
if (methS[2]) {
ilsch->D1 = (double *) Malloc(nC*sizeof(double), "arms2:iluschD1" );
j=zroscalC(schur, ilsch->D1, 1);
if (j) printf("ERROR in roscalC - row %d is a zero row\n",j);
}
ilsch->D2 = NULL;
if (methS[3]) {
ilsch->D2 = (double *) Malloc(nC*sizeof(double), "arms2:iluschD1" );
j =zcoscalC(schur, ilsch->D2, 1);
if (j) printf("ERROR in coscalC - column %d is a zero column\n",j);
}
/*---------------------------------------------------------------------
| get ILUT factorization for the last reduced system.
+--------------------------------------------------------------------*/
uwork = NULL;
iwork = NULL;
if (methS[0]) {
iwork = (int *) Malloc(nC*sizeof(int), "arms2:3" );
uwork = (int *) Malloc(nC*sizeof(int), "arms2:3.5" );
tolind = 0.0;
zPQperm(schur, bsize, uwork, iwork, &nB, tolind) ;
zrpermC(schur,uwork);
zcpermC(schur,iwork);
}
ilsch->rperm = uwork;
ilsch->perm = iwork;
/* printf(" lf : %d %d %d %d %d %d %d \n",lfil[0],
lfil[1], lfil[2], lfil[3], lfil[4], lfil[5], lfil[6]) ; */
ilsch->perm2 = NULL;
if (methS[1] == 0)
ierr = zilutD(schur, droptol, lfil, ilsch);
else {
ilsch->perm2 = (int *) Malloc(nC*sizeof(int), "arms2:ilutpC" );
for (j=0; j<nC; j++)
ilsch->perm2[j] = j;
ierr = zilutpC(schur, droptol, lfil, PERMTOL, nC, ilsch, 0);
}
/*---------- OPTIMIZATION: NEED TO COMPOUND THE TWO
RIGHT PERMUTATIONS -- CHANGES HERE AND IN
USCHUR SOLVE == compound permutations */
if (ierr) {
fprintf(ft," ERROR IN ILUT -- IERR = %d\n", ierr);
return(1);
}
/* Last Schur complement no longer needed */
zcleanCS(schur);
return 0;
}/*-----end-of-ARMS2----------------------------------------------------
+--------------------------------------------------------------------*/