Actual source code: bvec1.c


  2: /*
  3:    Defines the BLAS based vector operations. Code shared by parallel
  4:   and sequential vectors.
  5: */

  7: #include <../src/vec/vec/impls/dvecimpl.h>
  8: #include <petscblaslapack.h>

 10: PetscErrorCode VecDot_Seq(Vec xin,Vec yin,PetscScalar *z)
 11: {
 12:   const PetscScalar *ya,*xa;
 13:   PetscBLASInt      one = 1,bn = 0;

 15:   PetscBLASIntCast(xin->map->n,&bn);
 16:   VecGetArrayRead(xin,&xa);
 17:   VecGetArrayRead(yin,&ya);
 18:   /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc the second */
 19:   PetscStackCallBLAS("BLASdot",*z   = BLASdot_(&bn,ya,&one,xa,&one));
 20:   VecRestoreArrayRead(xin,&xa);
 21:   VecRestoreArrayRead(yin,&ya);
 22:   if (xin->map->n > 0) {
 23:     PetscLogFlops(2.0*xin->map->n-1);
 24:   }
 25:   return 0;
 26: }

 28: PetscErrorCode VecTDot_Seq(Vec xin,Vec yin,PetscScalar *z)
 29: {
 30:   const PetscScalar *ya,*xa;
 31:   PetscBLASInt      one = 1,bn = 0;

 33:   PetscBLASIntCast(xin->map->n,&bn);
 34:   VecGetArrayRead(xin,&xa);
 35:   VecGetArrayRead(yin,&ya);
 36:   PetscStackCallBLAS("BLASdot",*z   = BLASdotu_(&bn,xa,&one,ya,&one));
 37:   VecRestoreArrayRead(xin,&xa);
 38:   VecRestoreArrayRead(yin,&ya);
 39:   if (xin->map->n > 0) {
 40:     PetscLogFlops(2.0*xin->map->n-1);
 41:   }
 42:   return 0;
 43: }

 45: PetscErrorCode VecScale_Seq(Vec xin, PetscScalar alpha)
 46: {
 47:   PetscBLASInt   one = 1,bn;

 49:   PetscBLASIntCast(xin->map->n,&bn);
 50:   if (alpha == (PetscScalar)0.0) {
 51:     VecSet_Seq(xin,alpha);
 52:   } else if (alpha != (PetscScalar)1.0) {
 53:     PetscScalar a = alpha,*xarray;
 54:     VecGetArray(xin,&xarray);
 55:     PetscStackCallBLAS("BLASscal",BLASscal_(&bn,&a,xarray,&one));
 56:     VecRestoreArray(xin,&xarray);
 57:   }
 58:   PetscLogFlops(xin->map->n);
 59:   return 0;
 60: }

 62: PetscErrorCode VecAXPY_Seq(Vec yin,PetscScalar alpha,Vec xin)
 63: {
 64:   const PetscScalar *xarray;
 65:   PetscScalar       *yarray;
 66:   PetscBLASInt      one = 1,bn;

 68:   PetscBLASIntCast(yin->map->n,&bn);
 69:   /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
 70:   if (alpha != (PetscScalar)0.0) {
 71:     VecGetArrayRead(xin,&xarray);
 72:     VecGetArray(yin,&yarray);
 73:     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bn,&alpha,xarray,&one,yarray,&one));
 74:     VecRestoreArrayRead(xin,&xarray);
 75:     VecRestoreArray(yin,&yarray);
 76:     PetscLogFlops(2.0*yin->map->n);
 77:   }
 78:   return 0;
 79: }

 81: PetscErrorCode VecAXPBY_Seq(Vec yin,PetscScalar a,PetscScalar b,Vec xin)
 82: {
 83:   PetscInt          n = yin->map->n,i;
 84:   const PetscScalar *xx;
 85:   PetscScalar       *yy;

 87:   if (a == (PetscScalar)0.0) {
 88:     VecScale_Seq(yin,b);
 89:   } else if (b == (PetscScalar)1.0) {
 90:     VecAXPY_Seq(yin,a,xin);
 91:   } else if (a == (PetscScalar)1.0) {
 92:     VecAYPX_Seq(yin,b,xin);
 93:   } else if (b == (PetscScalar)0.0) {
 94:     VecGetArrayRead(xin,&xx);
 95:     VecGetArray(yin,(PetscScalar**)&yy);
 96:     for (i=0; i<n; i++) yy[i] = a*xx[i];
 97:     VecRestoreArrayRead(xin,&xx);
 98:     VecRestoreArray(yin,(PetscScalar**)&yy);
 99:     PetscLogFlops(xin->map->n);
100:   } else {
101:     VecGetArrayRead(xin,&xx);
102:     VecGetArray(yin,(PetscScalar**)&yy);
103:     for (i=0; i<n; i++) yy[i] = a*xx[i] + b*yy[i];
104:     VecRestoreArrayRead(xin,&xx);
105:     VecRestoreArray(yin,(PetscScalar**)&yy);
106:     PetscLogFlops(3.0*xin->map->n);
107:   }
108:   return 0;
109: }

111: PetscErrorCode VecAXPBYPCZ_Seq(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
112: {
113:   PetscInt          n = zin->map->n,i;
114:   const PetscScalar *yy,*xx;
115:   PetscScalar       *zz;

117:   VecGetArrayRead(xin,&xx);
118:   VecGetArrayRead(yin,&yy);
119:   VecGetArray(zin,&zz);
120:   if (alpha == (PetscScalar)1.0) {
121:     for (i=0; i<n; i++) zz[i] = xx[i] + beta*yy[i] + gamma*zz[i];
122:     PetscLogFlops(4.0*n);
123:   } else if (gamma == (PetscScalar)1.0) {
124:     for (i=0; i<n; i++) zz[i] = alpha*xx[i] + beta*yy[i] + zz[i];
125:     PetscLogFlops(4.0*n);
126:   } else if (gamma == (PetscScalar)0.0) {
127:     for (i=0; i<n; i++) zz[i] = alpha*xx[i] + beta*yy[i];
128:     PetscLogFlops(3.0*n);
129:   } else {
130:     for (i=0; i<n; i++) zz[i] = alpha*xx[i] + beta*yy[i] + gamma*zz[i];
131:     PetscLogFlops(5.0*n);
132:   }
133:   VecRestoreArrayRead(xin,&xx);
134:   VecRestoreArrayRead(yin,&yy);
135:   VecRestoreArray(zin,&zz);
136:   return 0;
137: }