Actual source code: ex18.c

  1: static char help[] = "Tests the use of MatZeroRowsColumns() for parallel matrices.\n\
  2: Contributed-by: Stephan Kramer <s.kramer@imperial.ac.uk>\n\n";

  4: #include <petscmat.h>

  6: int main(int argc,char **args)
  7: {
  8:   Mat            A;
  9:   Vec            x, rhs, y;
 10:   PetscInt       i,j,k,b,m = 3,n,nlocal=2,bs=1,Ii,J;
 11:   PetscInt       *boundary_nodes, nboundary_nodes, *boundary_indices;
 12:   PetscMPIInt    rank,size;
 13:   PetscScalar    v,v0,v1,v2,a0=0.1,a,rhsval, *boundary_values,diag = 1.0;
 14:   PetscReal      norm;
 15:   char           convname[64];
 16:   PetscBool      upwind = PETSC_FALSE, nonlocalBC = PETSC_FALSE, zerorhs = PETSC_TRUE, convert = PETSC_FALSE;

 18:   PetscInitialize(&argc,&args,(char*)0,help);
 19:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
 20:   MPI_Comm_size(PETSC_COMM_WORLD,&size);
 21:   n = nlocal*size;

 23:   PetscOptionsGetInt(NULL,NULL, "-bs", &bs, NULL);
 24:   PetscOptionsGetBool(NULL,NULL, "-nonlocal_bc", &nonlocalBC, NULL);
 25:   PetscOptionsGetScalar(NULL,NULL, "-diag", &diag, NULL);
 26:   PetscOptionsGetString(NULL,NULL,"-convname",convname,sizeof(convname),&convert);
 27:   PetscOptionsGetBool(NULL,NULL, "-zerorhs", &zerorhs, NULL);

 29:   MatCreate(PETSC_COMM_WORLD,&A);
 30:   MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m*n*bs,m*n*bs);
 31:   MatSetFromOptions(A);
 32:   MatSetUp(A);

 34:   MatCreateVecs(A, NULL, &rhs);
 35:   VecSetFromOptions(rhs);
 36:   VecSetUp(rhs);

 38:   rhsval = 0.0;
 39:   for (i=0; i<m; i++) {
 40:     for (j=nlocal*rank; j<nlocal*(rank+1); j++) {
 41:       a = a0;
 42:       for (b=0; b<bs; b++) {
 43:         /* let's start with a 5-point stencil diffusion term */
 44:         v = -1.0;  Ii = (j + n*i)*bs + b;
 45:         if (i>0)   {J = Ii - n*bs; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
 46:         if (i<m-1) {J = Ii + n*bs; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
 47:         if (j>0)   {J = Ii - 1*bs; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
 48:         if (j<n-1) {J = Ii + 1*bs; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
 49:         v = 4.0; MatSetValues(A,1,&Ii,1,&Ii,&v,ADD_VALUES);
 50:         if (upwind) {
 51:           /* now add a 2nd order upwind advection term to add a little asymmetry */
 52:           if (j>2) {
 53:             J = Ii-2*bs; v2 = 0.5*a; v1 = -2.0*a; v0 = 1.5*a;
 54:             MatSetValues(A,1,&Ii,1,&J,&v2,ADD_VALUES);
 55:           } else {
 56:             /* fall back to 1st order upwind */
 57:             v1 = -1.0*a; v0 = 1.0*a;
 58:           };
 59:           if (j>1) {J = Ii-1*bs; MatSetValues(A,1,&Ii,1,&J,&v1,ADD_VALUES);}
 60:           MatSetValues(A,1,&Ii,1,&Ii,&v0,ADD_VALUES);
 61:           a /= 10.; /* use a different velocity for the next component */
 62:           /* add a coupling to the previous and next components */
 63:           v = 0.5;
 64:           if (b>0) {J = Ii - 1; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
 65:           if (b<bs-1) {J = Ii + 1; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
 66:         }
 67:         /* make up some rhs */
 68:         VecSetValue(rhs, Ii, rhsval, INSERT_VALUES);
 69:         rhsval += 1.0;
 70:       }
 71:     }
 72:   }
 73:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
 74:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);

 76:   if (convert) { /* Test different Mat implementations */
 77:     Mat B;

 79:     MatConvert(A,convname,MAT_INITIAL_MATRIX,&B);
 80:     MatDestroy(&A);
 81:     A    = B;
 82:   }

 84:   VecAssemblyBegin(rhs);
 85:   VecAssemblyEnd(rhs);
 86:   /* set rhs to zero to simplify */
 87:   if (zerorhs) {
 88:     VecZeroEntries(rhs);
 89:   }

 91:   if (nonlocalBC) {
 92:     /*version where boundary conditions are set by processes that don't necessarily own the nodes */
 93:     if (rank == 0) {
 94:       nboundary_nodes = size>m ? nlocal : m-size+nlocal;
 95:       PetscMalloc1(nboundary_nodes,&boundary_nodes);
 96:       k = 0;
 97:       for (i=size; i<m; i++,k++) {boundary_nodes[k] = n*i;};
 98:     } else if (rank < m) {
 99:       nboundary_nodes = nlocal+1;
100:       PetscMalloc1(nboundary_nodes,&boundary_nodes);
101:       boundary_nodes[0] = rank*n;
102:       k = 1;
103:     } else {
104:       nboundary_nodes = nlocal;
105:       PetscMalloc1(nboundary_nodes,&boundary_nodes);
106:       k = 0;
107:     }
108:     for (j=nlocal*rank; j<nlocal*(rank+1); j++,k++) {boundary_nodes[k] = j;};
109:   } else {
110:     /*version where boundary conditions are set by the node owners only */
111:     PetscMalloc1(m*n,&boundary_nodes);
112:     k=0;
113:     for (j=0; j<n; j++) {
114:       Ii = j;
115:       if (Ii>=rank*m*nlocal && Ii<(rank+1)*m*nlocal) boundary_nodes[k++] = Ii;
116:     }
117:     for (i=1; i<m; i++) {
118:       Ii = n*i;
119:       if (Ii>=rank*m*nlocal && Ii<(rank+1)*m*nlocal) boundary_nodes[k++] = Ii;
120:     }
121:     nboundary_nodes = k;
122:   }

124:   VecDuplicate(rhs, &x);
125:   VecZeroEntries(x);
126:   PetscMalloc2(nboundary_nodes*bs,&boundary_indices,nboundary_nodes*bs,&boundary_values);
127:   for (k=0; k<nboundary_nodes; k++) {
128:     Ii = boundary_nodes[k]*bs;
129:     v = 1.0*boundary_nodes[k];
130:     for (b=0; b<bs; b++, Ii++) {
131:       boundary_indices[k*bs+b] = Ii;
132:       boundary_values[k*bs+b] = v;
133:       PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%d %" PetscInt_FMT " %f\n", rank, Ii, (double)PetscRealPart(v));
134:       v += 0.1;
135:     }
136:   }
137:   PetscSynchronizedFlush(PETSC_COMM_WORLD, NULL);
138:   VecSetValues(x, nboundary_nodes*bs, boundary_indices, boundary_values, INSERT_VALUES);
139:   VecAssemblyBegin(x);
140:   VecAssemblyEnd(x);

142:   /* We can check the rhs returned by MatZeroColumns by computing y=rhs-A*x  and overwriting the boundary entries with boundary values */
143:   VecDuplicate(x, &y);
144:   MatMult(A, x, y);
145:   VecAYPX(y, -1.0, rhs);
146:   for (k=0; k<nboundary_nodes*bs; k++) boundary_values[k] *= diag;
147:   VecSetValues(y, nboundary_nodes*bs, boundary_indices, boundary_values, INSERT_VALUES);
148:   VecAssemblyBegin(y);
149:   VecAssemblyEnd(y);

151:   PetscPrintf(PETSC_COMM_WORLD, "*** Matrix A and vector x:\n");
152:   MatView(A, PETSC_VIEWER_STDOUT_WORLD);
153:   VecView(x,PETSC_VIEWER_STDOUT_WORLD);

155:   MatZeroRowsColumns(A, nboundary_nodes*bs, boundary_indices, diag, x, rhs);
156:   PetscPrintf(PETSC_COMM_WORLD, "*** Vector rhs returned by MatZeroRowsColumns\n");
157:   VecView(rhs,PETSC_VIEWER_STDOUT_WORLD);
158:   VecAXPY(y, -1.0, rhs);
159:   VecNorm(y, NORM_INFINITY, &norm);
160:   if (norm > 1.0e-10) {
161:     PetscPrintf(PETSC_COMM_WORLD, "*** Difference between rhs and y, inf-norm: %f\n", (double)norm);
162:     VecView(y,PETSC_VIEWER_STDOUT_WORLD);
163:     SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "Bug in MatZeroRowsColumns");
164:   }

166:   PetscFree(boundary_nodes);
167:   PetscFree2(boundary_indices,boundary_values);
168:   VecDestroy(&x);
169:   VecDestroy(&y);
170:   VecDestroy(&rhs);
171:   MatDestroy(&A);

173:   PetscFinalize();
174:   return 0;
175: }

177: /*TEST

179:    test:
180:       diff_args: -j
181:       suffix: 0

183:    test:
184:       diff_args: -j
185:       suffix: 1
186:       nsize: 2

188:    test:
189:       diff_args: -j
190:       suffix: 10
191:       nsize: 2
192:       args: -bs 2 -nonlocal_bc

194:    test:
195:       diff_args: -j
196:       suffix: 11
197:       nsize: 7
198:       args: -bs 2 -nonlocal_bc

200:    test:
201:       diff_args: -j
202:       suffix: 12
203:       args: -bs 2 -nonlocal_bc -mat_type baij

205:    test:
206:       diff_args: -j
207:       suffix: 13
208:       nsize: 2
209:       args: -bs 2 -nonlocal_bc -mat_type baij

211:    test:
212:       diff_args: -j
213:       suffix: 14
214:       nsize: 7
215:       args: -bs 2 -nonlocal_bc -mat_type baij

217:    test:
218:       diff_args: -j
219:       suffix: 2
220:       nsize: 7

222:    test:
223:       diff_args: -j
224:       suffix: 3
225:       args: -mat_type baij

227:    test:
228:       diff_args: -j
229:       suffix: 4
230:       nsize: 2
231:       args: -mat_type baij

233:    test:
234:       diff_args: -j
235:       suffix: 5
236:       nsize: 7
237:       args: -mat_type baij

239:    test:
240:       diff_args: -j
241:       suffix: 6
242:       args: -bs 2 -mat_type baij

244:    test:
245:       diff_args: -j
246:       suffix: 7
247:       nsize: 2
248:       args: -bs 2 -mat_type baij

250:    test:
251:       diff_args: -j
252:       suffix: 8
253:       nsize: 7
254:       args: -bs 2 -mat_type baij

256:    test:
257:       diff_args: -j
258:       suffix: 9
259:       args: -bs 2 -nonlocal_bc

261:    test:
262:       diff_args: -j
263:       suffix: 15
264:       args: -bs 2 -nonlocal_bc -convname shell

266:    test:
267:       diff_args: -j
268:       suffix: 16
269:       nsize: 2
270:       args: -bs 2 -nonlocal_bc -convname shell

272:    test:
273:       diff_args: -j
274:       suffix: 17
275:       args: -bs 2 -nonlocal_bc -convname dense

277:    testset:
278:       diff_args: -j
279:       suffix: full
280:       nsize: {{1 3}separate output}
281:       args: -diag {{0.12 -0.13}separate output} -convname {{aij shell baij}separate output} -zerorhs 0

283:    test:
284:       diff_args: -j
285:       requires: cuda
286:       suffix: cusparse_1
287:       nsize: 1
288:       args: -diag {{0.12 -0.13}separate output} -convname {{seqaijcusparse mpiaijcusparse}separate output} -zerorhs 0 -mat_type {{seqaijcusparse mpiaijcusparse}separate output}

290:    test:
291:       diff_args: -j
292:       requires: cuda
293:       suffix: cusparse_3
294:       nsize: 3
295:       args: -diag {{0.12 -0.13}separate output} -convname mpiaijcusparse -zerorhs 0 -mat_type mpiaijcusparse

297: TEST*/