Actual source code: vechip.hip.cpp

  1: /*
  2:  Implementation of the sequential hip vectors.

  4:  This file contains the code that can be compiled with a C
  5:  compiler.  The companion file vechip2.hip.cpp contains the code that
  6:  must be compiled with hipcc compiler.
  7:  */

  9: #define PETSC_SKIP_SPINLOCK

 11: #include <petscconf.h>
 12: #include <petsc/private/vecimpl.h>
 13: #include <../src/vec/vec/impls/dvecimpl.h>
 14: #include <petsc/private/hipvecimpl.h>

 16: PetscErrorCode VecHIPGetArrays_Private(Vec v,const PetscScalar** x,const PetscScalar** x_d,PetscOffloadMask* flg)
 17: {
 19:   if (x) {
 20:     Vec_Seq *h = (Vec_Seq*)v->data;

 22:     *x = h->array;
 23:   }
 24:   if (x_d) {
 25:     Vec_HIP *d = (Vec_HIP*)v->spptr;

 27:     *x_d = d ? d->GPUarray : NULL;
 28:   }
 29:   if (flg) *flg = v->offloadmask;
 30:   return 0;
 31: }

 33: /*
 34:     Allocates space for the vector array on the Host if it does not exist.
 35:     Does NOT change the PetscHIPFlag for the vector
 36:     Does NOT zero the HIP array
 37:  */
 38: PetscErrorCode VecHIPAllocateCheckHost(Vec v)
 39: {
 40:   PetscScalar *array;
 41:   Vec_Seq     *s = (Vec_Seq*)v->data;
 42:   PetscInt     n = v->map->n;

 44:   if (!s) {
 45:     PetscNewLog((PetscObject)v,&s);
 46:     v->data = s;
 47:   }
 48:   if (!s->array) {
 49:     if (n*sizeof(PetscScalar) > v->minimum_bytes_pinned_memory) {
 50:       PetscMallocSetHIPHost();
 51:       v->pinned_memory = PETSC_TRUE;
 52:     }
 53:     PetscMalloc1(n,&array);
 54:     PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
 55:     s->array           = array;
 56:     s->array_allocated = array;
 57:     if (n*sizeof(PetscScalar) > v->minimum_bytes_pinned_memory) {
 58:       PetscMallocResetHIPHost();
 59:     }
 60:     if (v->offloadmask == PETSC_OFFLOAD_UNALLOCATED) {
 61:       v->offloadmask = PETSC_OFFLOAD_CPU;
 62:     }
 63:   }
 64:   return 0;
 65: }

 67: PetscErrorCode VecCopy_SeqHIP_Private(Vec xin,Vec yin)
 68: {
 69:   PetscScalar       *ya;
 70:   const PetscScalar *xa;

 72:   VecHIPAllocateCheckHost(xin);
 73:   VecHIPAllocateCheckHost(yin);
 74:   if (xin != yin) {
 75:     VecGetArrayRead(xin,&xa);
 76:     VecGetArray(yin,&ya);
 77:     PetscArraycpy(ya,xa,xin->map->n);
 78:     VecRestoreArrayRead(xin,&xa);
 79:     VecRestoreArray(yin,&ya);
 80:   }
 81:   return 0;
 82: }

 84: PetscErrorCode VecSetRandom_SeqHIP(Vec xin,PetscRandom r)
 85: {
 86:   PetscInt     n = xin->map->n;
 87:   PetscScalar *xx;

 89:   VecGetArrayWrite(xin,&xx);
 90:   PetscRandomGetValues(r,n,xx);
 91:   VecRestoreArrayWrite(xin,&xx);
 92:   return 0;
 93: }

 95: PetscErrorCode VecDestroy_SeqHIP_Private(Vec v)
 96: {
 97:   Vec_Seq *vs = (Vec_Seq*)v->data;

 99:   PetscObjectSAWsViewOff(v);
100: #if defined(PETSC_USE_LOG)
101:   PetscLogObjectState((PetscObject)v,"Length=%" PetscInt_FMT,v->map->n);
102: #endif
103:   if (vs) {
104:     if (vs->array_allocated) {
105:       if (v->pinned_memory) {
106:         PetscMallocSetHIPHost();
107:       }
108:       PetscFree(vs->array_allocated);
109:       if (v->pinned_memory) {
110:         PetscMallocResetHIPHost();
111:         v->pinned_memory = PETSC_FALSE;
112:       }
113:     }
114:     PetscFree(vs);
115:   }
116:   return 0;
117: }

119: PetscErrorCode VecResetArray_SeqHIP_Private(Vec vin)
120: {
121:   Vec_Seq *v = (Vec_Seq*)vin->data;

123:   v->array         = v->unplacedarray;
124:   v->unplacedarray = 0;
125:   return 0;
126: }

128: PetscErrorCode VecResetArray_SeqHIP(Vec vin)
129: {
130:   VecHIPCopyFromGPU(vin);
131:   VecResetArray_SeqHIP_Private(vin);
132:   vin->offloadmask = PETSC_OFFLOAD_CPU;
133:   return 0;
134: }

136: PetscErrorCode VecPlaceArray_SeqHIP(Vec vin,const PetscScalar *a)
137: {
138:   VecHIPCopyFromGPU(vin);
139:   VecPlaceArray_Seq(vin,a);
140:   vin->offloadmask = PETSC_OFFLOAD_CPU;
141:   return 0;
142: }

144: PetscErrorCode VecReplaceArray_SeqHIP(Vec vin,const PetscScalar *a)
145: {
146:   Vec_Seq *vs = (Vec_Seq*)vin->data;

148:   if (vs->array != vs->array_allocated) {
149:     /* make sure the users array has the latest values */
150:     VecHIPCopyFromGPU(vin);
151:   }
152:   if (vs->array_allocated) {
153:     if (vin->pinned_memory) {
154:       PetscMallocSetHIPHost();
155:     }
156:     PetscFree(vs->array_allocated);
157:     if (vin->pinned_memory) {
158:       PetscMallocResetHIPHost();
159:     }
160:   }
161:   vin->pinned_memory = PETSC_FALSE;
162:   vs->array_allocated = vs->array = (PetscScalar*)a;
163:   vin->offloadmask = PETSC_OFFLOAD_CPU;
164:   return 0;
165: }

167: /*@
168:  VecCreateSeqHIP - Creates a standard, sequential array-style vector.

170:  Collective

172:  Input Parameter:
173:  +  comm - the communicator, should be PETSC_COMM_SELF
174:  -  n - the vector length

176:  Output Parameter:
177:  .  v - the vector

179:  Notes:
180:  Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
181:  same type as an existing vector.

183:  Level: intermediate

185:  .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
186:  @*/
187: PetscErrorCode VecCreateSeqHIP(MPI_Comm comm,PetscInt n,Vec *v)
188: {
189:   VecCreate(comm,v);
190:   VecSetSizes(*v,n,n);
191:   VecSetType(*v,VECSEQHIP);
192:   return 0;
193: }

195: PetscErrorCode VecDuplicate_SeqHIP(Vec win,Vec *V)
196: {
197:   VecCreateSeqHIP(PetscObjectComm((PetscObject)win),win->map->n,V);
198:   PetscLayoutReference(win->map,&(*V)->map);
199:   PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
200:   PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
201:   (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
202:   return 0;
203: }

205: PetscErrorCode VecCreate_SeqHIP(Vec V)
206: {
207:   PetscDeviceInitialize(PETSC_DEVICE_HIP);
208:   PetscLayoutSetUp(V->map);
209:   VecHIPAllocateCheck(V);
210:   VecCreate_SeqHIP_Private(V,((Vec_HIP*)V->spptr)->GPUarray_allocated);
211:   VecSet_SeqHIP(V,0.0);
212:   return 0;
213: }

215: /*@C
216:    VecCreateSeqHIPWithArray - Creates a HIP sequential array-style vector,
217:    where the user provides the array space to store the vector values. The array
218:    provided must be a GPU array.

220:    Collective

222:    Input Parameters:
223: +  comm - the communicator, should be PETSC_COMM_SELF
224: .  bs - the block size
225: .  n - the vector length
226: -  array - GPU memory where the vector elements are to be stored.

228:    Output Parameter:
229: .  V - the vector

231:    Notes:
232:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
233:    same type as an existing vector.

235:    If the user-provided array is NULL, then VecHIPPlaceArray() can be used
236:    at a later stage to SET the array for storing the vector values.

238:    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
239:    The user should not free the array until the vector is destroyed.

241:    Level: intermediate

243: .seealso: VecCreateMPIHIPWithArray(), VecCreate(), VecDuplicate(), VecDuplicateVecs(),
244:           VecCreateGhost(), VecCreateSeq(), VecHIPPlaceArray(), VecCreateSeqWithArray(),
245:           VecCreateMPIWithArray()
246: @*/
247: PetscErrorCode  VecCreateSeqHIPWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar array[],Vec *V)
248: {
249:   PetscDeviceInitialize(PETSC_DEVICE_HIP);
250:   VecCreate(comm,V);
251:   VecSetSizes(*V,n,n);
252:   VecSetBlockSize(*V,bs);
253:   VecCreate_SeqHIP_Private(*V,array);
254:   return 0;
255: }

257: /*@C
258:    VecCreateSeqHIPWithArrays - Creates a HIP sequential array-style vector,
259:    where the user provides the array space to store the vector values.

261:    Collective

263:    Input Parameters:
264: +  comm - the communicator, should be PETSC_COMM_SELF
265: .  bs - the block size
266: .  n - the vector length
267: -  cpuarray - CPU memory where the vector elements are to be stored.
268: -  gpuarray - GPU memory where the vector elements are to be stored.

270:    Output Parameter:
271: .  V - the vector

273:    Notes:
274:    If both cpuarray and gpuarray are provided, the caller must ensure that
275:    the provided arrays have identical values.

277:    PETSc does NOT free the provided arrays when the vector is destroyed via
278:    VecDestroy(). The user should not free the array until the vector is
279:    destroyed.

281:    Level: intermediate

283: .seealso: VecCreateMPIHIPWithArrays(), VecCreate(), VecCreateSeqWithArray(),
284:           VecHIPPlaceArray(), VecCreateSeqHIPWithArray(),
285:           VecHIPAllocateCheckHost()
286: @*/
287: PetscErrorCode  VecCreateSeqHIPWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar cpuarray[],const PetscScalar gpuarray[],Vec *V)
288: {
289:   // set V's gpuarray to be gpuarray, do not allocate memory on host yet.
290:   VecCreateSeqHIPWithArray(comm,bs,n,gpuarray,V);

292:   if (cpuarray && gpuarray) {
293:     Vec_Seq *s = (Vec_Seq*)((*V)->data);
294:     s->array = (PetscScalar*)cpuarray;
295:     (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
296:   } else if (cpuarray) {
297:     Vec_Seq *s = (Vec_Seq*)((*V)->data);
298:     s->array = (PetscScalar*)cpuarray;
299:     (*V)->offloadmask = PETSC_OFFLOAD_CPU;
300:   } else if (gpuarray) {
301:     (*V)->offloadmask = PETSC_OFFLOAD_GPU;
302:   } else {
303:     (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
304:   }

306:   return 0;
307: }

309: PetscErrorCode VecGetArray_SeqHIP(Vec v,PetscScalar **a)
310: {
311:   VecHIPCopyFromGPU(v);
312:   *a   = *((PetscScalar**)v->data);
313:   return 0;
314: }

316: PetscErrorCode VecRestoreArray_SeqHIP(Vec v,PetscScalar **a)
317: {
318:   v->offloadmask = PETSC_OFFLOAD_CPU;
319:   return 0;
320: }

322: PetscErrorCode VecGetArrayWrite_SeqHIP(Vec v,PetscScalar **a)
323: {
324:   VecHIPAllocateCheckHost(v);
325:   *a   = *((PetscScalar**)v->data);
326:   return 0;
327: }

329: PetscErrorCode VecGetArrayAndMemType_SeqHIP(Vec v,PetscScalar** a,PetscMemType *mtype)
330: {
331:   VecHIPCopyToGPU(v);
332:   *a   = ((Vec_HIP*)v->spptr)->GPUarray;
333:   if (mtype) *mtype = PETSC_MEMTYPE_HIP;
334:   return 0;
335: }

337: PetscErrorCode VecRestoreArrayAndMemType_SeqHIP(Vec v,PetscScalar** a)
338: {
339:   v->offloadmask = PETSC_OFFLOAD_GPU;
340:   return 0;
341: }

343: PetscErrorCode VecGetArrayWriteAndMemType_SeqHIP(Vec v,PetscScalar** a,PetscMemType *mtype)
344: {
345:   /* Allocate memory (not zeroed) on device if not yet, but no need to sync data from host to device */
346:   VecHIPAllocateCheck(v);
347:   *a   = ((Vec_HIP*)v->spptr)->GPUarray;
348:   if (mtype) *mtype = PETSC_MEMTYPE_HIP;
349:   return 0;
350: }

352: PetscErrorCode VecBindToCPU_SeqHIP(Vec V,PetscBool bind)
353: {
354:   V->boundtocpu = bind;
355:   if (bind) {
356:     VecHIPCopyFromGPU(V);
357:     V->offloadmask                 = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
358:     V->ops->dot                    = VecDot_Seq;
359:     V->ops->norm                   = VecNorm_Seq;
360:     V->ops->tdot                   = VecTDot_Seq;
361:     V->ops->scale                  = VecScale_Seq;
362:     V->ops->copy                   = VecCopy_Seq;
363:     V->ops->set                    = VecSet_Seq;
364:     V->ops->swap                   = VecSwap_Seq;
365:     V->ops->axpy                   = VecAXPY_Seq;
366:     V->ops->axpby                  = VecAXPBY_Seq;
367:     V->ops->axpbypcz               = VecAXPBYPCZ_Seq;
368:     V->ops->pointwisemult          = VecPointwiseMult_Seq;
369:     V->ops->pointwisedivide        = VecPointwiseDivide_Seq;
370:     V->ops->setrandom              = VecSetRandom_Seq;
371:     V->ops->dot_local              = VecDot_Seq;
372:     V->ops->tdot_local             = VecTDot_Seq;
373:     V->ops->norm_local             = VecNorm_Seq;
374:     V->ops->mdot_local             = VecMDot_Seq;
375:     V->ops->mtdot_local            = VecMTDot_Seq;
376:     V->ops->maxpy                  = VecMAXPY_Seq;
377:     V->ops->mdot                   = VecMDot_Seq;
378:     V->ops->mtdot                  = VecMTDot_Seq;
379:     V->ops->aypx                   = VecAYPX_Seq;
380:     V->ops->waxpy                  = VecWAXPY_Seq;
381:     V->ops->dotnorm2               = NULL;
382:     V->ops->placearray             = VecPlaceArray_Seq;
383:     V->ops->replacearray           = VecReplaceArray_SeqHIP;
384:     V->ops->resetarray             = VecResetArray_Seq;
385:     V->ops->duplicate              = VecDuplicate_Seq;
386:     V->ops->conjugate              = VecConjugate_Seq;
387:     V->ops->getlocalvector         = NULL;
388:     V->ops->restorelocalvector     = NULL;
389:     V->ops->getlocalvectorread     = NULL;
390:     V->ops->restorelocalvectorread = NULL;
391:     V->ops->getarraywrite          = NULL;
392:     V->ops->getarrayandmemtype     = NULL;
393:     V->ops->restorearrayandmemtype = NULL;
394:     V->ops->getarraywriteandmemtype= NULL;
395:     V->ops->max                    = VecMax_Seq;
396:     V->ops->min                    = VecMin_Seq;
397:     V->ops->reciprocal             = VecReciprocal_Default;
398:     V->ops->sum                    = NULL;
399:     V->ops->shift                  = NULL;
400:   } else {
401:     V->ops->dot                    = VecDot_SeqHIP;
402:     V->ops->norm                   = VecNorm_SeqHIP;
403:     V->ops->tdot                   = VecTDot_SeqHIP;
404:     V->ops->scale                  = VecScale_SeqHIP;
405:     V->ops->copy                   = VecCopy_SeqHIP;
406:     V->ops->set                    = VecSet_SeqHIP;
407:     V->ops->swap                   = VecSwap_SeqHIP;
408:     V->ops->axpy                   = VecAXPY_SeqHIP;
409:     V->ops->axpby                  = VecAXPBY_SeqHIP;
410:     V->ops->axpbypcz               = VecAXPBYPCZ_SeqHIP;
411:     V->ops->pointwisemult          = VecPointwiseMult_SeqHIP;
412:     V->ops->pointwisedivide        = VecPointwiseDivide_SeqHIP;
413:     V->ops->setrandom              = VecSetRandom_SeqHIP;
414:     V->ops->dot_local              = VecDot_SeqHIP;
415:     V->ops->tdot_local             = VecTDot_SeqHIP;
416:     V->ops->norm_local             = VecNorm_SeqHIP;
417:     V->ops->mdot_local             = VecMDot_SeqHIP;
418:     V->ops->maxpy                  = VecMAXPY_SeqHIP;
419:     V->ops->mdot                   = VecMDot_SeqHIP;
420:     V->ops->aypx                   = VecAYPX_SeqHIP;
421:     V->ops->waxpy                  = VecWAXPY_SeqHIP;
422:     V->ops->dotnorm2               = VecDotNorm2_SeqHIP;
423:     V->ops->placearray             = VecPlaceArray_SeqHIP;
424:     V->ops->replacearray           = VecReplaceArray_SeqHIP;
425:     V->ops->resetarray             = VecResetArray_SeqHIP;
426:     V->ops->destroy                = VecDestroy_SeqHIP;
427:     V->ops->duplicate              = VecDuplicate_SeqHIP;
428:     V->ops->conjugate              = VecConjugate_SeqHIP;
429:     V->ops->getlocalvector         = VecGetLocalVector_SeqHIP;
430:     V->ops->restorelocalvector     = VecRestoreLocalVector_SeqHIP;
431:     V->ops->getlocalvectorread     = VecGetLocalVectorRead_SeqHIP;
432:     V->ops->restorelocalvectorread = VecRestoreLocalVectorRead_SeqHIP;
433:     V->ops->getarraywrite          = VecGetArrayWrite_SeqHIP;
434:     V->ops->getarray               = VecGetArray_SeqHIP;
435:     V->ops->restorearray           = VecRestoreArray_SeqHIP;
436:     V->ops->getarrayandmemtype     = VecGetArrayAndMemType_SeqHIP;
437:     V->ops->restorearrayandmemtype = VecRestoreArrayAndMemType_SeqHIP;
438:     V->ops->getarraywriteandmemtype= VecGetArrayWriteAndMemType_SeqHIP;
439:     V->ops->max                    = VecMax_SeqHIP;
440:     V->ops->min                    = VecMin_SeqHIP;
441:     V->ops->reciprocal             = VecReciprocal_SeqHIP;
442:     V->ops->sum                    = VecSum_SeqHIP;
443:     V->ops->shift                  = VecShift_SeqHIP;
444:   }
445:   return 0;
446: }

448: PetscErrorCode VecCreate_SeqHIP_Private(Vec V,const PetscScalar *array)
449: {
450:   Vec_HIP       *vechip;
451:   PetscMPIInt    size;
452:   PetscBool      option_set;

454:   MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
456:   VecCreate_Seq_Private(V,0);
457:   PetscObjectChangeTypeName((PetscObject)V,VECSEQHIP);
458:   VecBindToCPU_SeqHIP(V,PETSC_FALSE);
459:   V->ops->bindtocpu = VecBindToCPU_SeqHIP;

461:   /* Later, functions check for the Vec_HIP structure existence, so do not create it without array */
462:   if (array) {
463:     if (!V->spptr) {
464:       PetscReal      pinned_memory_min;

467:       PetscCalloc(sizeof(Vec_HIP),&V->spptr);
468:       vechip = (Vec_HIP*)V->spptr;
469:       V->offloadmask = PETSC_OFFLOAD_UNALLOCATED;

471:       pinned_memory_min = 0;
472:       /* Need to parse command line for minimum size to use for pinned memory allocations on host here.
473:          Note: This same code duplicated in VecHIPAllocateCheck() and VecCreate_MPIHIP_Private(). Is there a good way to avoid this? */
474:       PetscOptionsBegin(PetscObjectComm((PetscObject)V),((PetscObject)V)->prefix,"VECHIP Options","Vec");
475:       PetscOptionsReal("-vec_pinned_memory_min","Minimum size (in bytes) for an allocation to use pinned memory on host","VecSetPinnedMemoryMin",pinned_memory_min,&pinned_memory_min,&option_set);
476:       if (option_set) V->minimum_bytes_pinned_memory = pinned_memory_min;
477:       PetscOptionsEnd();
478:     }
479:     vechip = (Vec_HIP*)V->spptr;
480:     vechip->GPUarray = (PetscScalar*)array;
481:     V->offloadmask = PETSC_OFFLOAD_GPU;

483:   }
484:   return 0;
485: }