Actual source code: nepdefl.c

slepc-3.20.2 2024-03-15
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */

 11: #include <slepc/private/nepimpl.h>
 12: #include <slepcblaslapack.h>
 13: #include "nepdefl.h"

 15: PetscErrorCode NEPDeflationGetInvariantPair(NEP_EXT_OP extop,BV *X,Mat *H)
 16: {
 17:   PetscFunctionBegin;
 18:   if (X) *X = extop->X;
 19:   if (H) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,extop->szd+1,extop->szd+1,extop->H,H));
 20:   PetscFunctionReturn(PETSC_SUCCESS);
 21: }

 23: static PetscErrorCode NEPDeflationExtendInvariantPair(NEP_EXT_OP extop,Vec u,PetscScalar lambda,PetscInt k)
 24: {
 25:   Vec            uu;
 26:   PetscInt       ld,i;
 27:   PetscMPIInt    np;
 28:   PetscReal      norm;

 30:   PetscFunctionBegin;
 31:   PetscCall(BVGetColumn(extop->X,k,&uu));
 32:   ld = extop->szd+1;
 33:   PetscCall(NEPDeflationCopyToExtendedVec(extop,uu,extop->H+k*ld,u,PETSC_TRUE));
 34:   PetscCall(BVRestoreColumn(extop->X,k,&uu));
 35:   PetscCall(BVNormColumn(extop->X,k,NORM_2,&norm));
 36:   PetscCall(BVScaleColumn(extop->X,k,1.0/norm));
 37:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)u),&np));
 38:   for (i=0;i<k;i++) extop->H[k*ld+i] *= PetscSqrtReal(np)/norm;
 39:   extop->H[k*(ld+1)] = lambda;
 40:   PetscFunctionReturn(PETSC_SUCCESS);
 41: }

 43: PetscErrorCode NEPDeflationExtractEigenpair(NEP_EXT_OP extop,PetscInt k,Vec u,PetscScalar lambda,DS ds)
 44: {
 45:   Mat            A,H;
 46:   PetscInt       ldh=extop->szd+1,ldds,k1=k+1;
 47:   PetscScalar    *eigr,*eigi,*t,*Z;
 48:   Vec            x;

 50:   PetscFunctionBegin;
 51:   PetscCall(NEPDeflationExtendInvariantPair(extop,u,lambda,k));
 52:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,k1,k1,extop->H,&H));
 53:   PetscCall(MatDenseSetLDA(H,ldh));
 54:   PetscCall(PetscCalloc3(k1,&eigr,k1,&eigi,extop->szd,&t));
 55:   PetscCall(DSReset(ds));
 56:   PetscCall(DSSetType(ds,DSNHEP));
 57:   PetscCall(DSAllocate(ds,ldh));
 58:   PetscCall(DSGetLeadingDimension(ds,&ldds));
 59:   PetscCall(DSSetDimensions(ds,k1,0,k1));
 60:   PetscCall(DSGetMat(ds,DS_MAT_A,&A));
 61:   PetscCall(MatCopy(H,A,SAME_NONZERO_PATTERN));
 62:   PetscCall(DSRestoreMat(ds,DS_MAT_A,&A));
 63:   PetscCall(MatDestroy(&H));
 64:   PetscCall(DSSolve(ds,eigr,eigi));
 65:   PetscCall(DSVectors(ds,DS_MAT_X,&k,NULL));
 66:   PetscCall(DSGetArray(ds,DS_MAT_X,&Z));
 67:   PetscCall(BVMultColumn(extop->X,1.0,Z[k*ldds+k],k,Z+k*ldds));
 68:   PetscCall(DSRestoreArray(ds,DS_MAT_X,&Z));
 69:   PetscCall(BVGetColumn(extop->X,k,&x));
 70:   PetscCall(NEPDeflationCopyToExtendedVec(extop,x,t,u,PETSC_FALSE));
 71:   PetscCall(BVRestoreColumn(extop->X,k,&x));
 72:   PetscCall(PetscFree3(eigr,eigi,t));
 73:   PetscFunctionReturn(PETSC_SUCCESS);
 74: }

 76: PetscErrorCode NEPDeflationCopyToExtendedVec(NEP_EXT_OP extop,Vec v,PetscScalar *a,Vec vex,PetscBool back)
 77: {
 78:   PetscMPIInt    np,rk,count;
 79:   PetscScalar    *array1,*array2;
 80:   PetscInt       nloc;

 82:   PetscFunctionBegin;
 83:   if (extop->szd) {
 84:     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)vex),&rk));
 85:     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)vex),&np));
 86:     PetscCall(BVGetSizes(extop->nep->V,&nloc,NULL,NULL));
 87:     if (v) {
 88:       PetscCall(VecGetArray(v,&array1));
 89:       PetscCall(VecGetArray(vex,&array2));
 90:       if (back) PetscCall(PetscArraycpy(array1,array2,nloc));
 91:       else PetscCall(PetscArraycpy(array2,array1,nloc));
 92:       PetscCall(VecRestoreArray(v,&array1));
 93:       PetscCall(VecRestoreArray(vex,&array2));
 94:     }
 95:     if (a) {
 96:       PetscCall(VecGetArray(vex,&array2));
 97:       if (back) {
 98:         PetscCall(PetscArraycpy(a,array2+nloc,extop->szd));
 99:         PetscCall(PetscMPIIntCast(extop->szd,&count));
100:         PetscCallMPI(MPI_Bcast(a,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex)));
101:       } else {
102:         PetscCall(PetscArraycpy(array2+nloc,a,extop->szd));
103:         PetscCall(PetscMPIIntCast(extop->szd,&count));
104:         PetscCallMPI(MPI_Bcast(array2+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex)));
105:       }
106:       PetscCall(VecRestoreArray(vex,&array2));
107:     }
108:   } else {
109:     if (back) PetscCall(VecCopy(vex,v));
110:     else PetscCall(VecCopy(v,vex));
111:   }
112:   PetscFunctionReturn(PETSC_SUCCESS);
113: }

115: PetscErrorCode NEPDeflationCreateVec(NEP_EXT_OP extop,Vec *v)
116: {
117:   PetscInt       nloc;
118:   Vec            u;
119:   VecType        type;

121:   PetscFunctionBegin;
122:   if (extop->szd) {
123:     PetscCall(BVGetColumn(extop->nep->V,0,&u));
124:     PetscCall(VecGetType(u,&type));
125:     PetscCall(BVRestoreColumn(extop->nep->V,0,&u));
126:     PetscCall(VecCreate(PetscObjectComm((PetscObject)extop->nep),v));
127:     PetscCall(VecSetType(*v,type));
128:     PetscCall(BVGetSizes(extop->nep->V,&nloc,NULL,NULL));
129:     nloc += extop->szd;
130:     PetscCall(VecSetSizes(*v,nloc,PETSC_DECIDE));
131:   } else PetscCall(BVCreateVec(extop->nep->V,v));
132:   PetscFunctionReturn(PETSC_SUCCESS);
133: }

135: PetscErrorCode NEPDeflationCreateBV(NEP_EXT_OP extop,PetscInt sz,BV *V)
136: {
137:   PetscInt           nloc;
138:   BVType             type;
139:   BVOrthogType       otype;
140:   BVOrthogRefineType oref;
141:   PetscReal          oeta;
142:   BVOrthogBlockType  oblock;
143:   NEP                nep=extop->nep;

145:   PetscFunctionBegin;
146:   if (extop->szd) {
147:     PetscCall(BVGetSizes(nep->V,&nloc,NULL,NULL));
148:     PetscCall(BVCreate(PetscObjectComm((PetscObject)nep),V));
149:     PetscCall(BVSetSizes(*V,nloc+extop->szd,PETSC_DECIDE,sz));
150:     PetscCall(BVGetType(nep->V,&type));
151:     PetscCall(BVSetType(*V,type));
152:     PetscCall(BVGetOrthogonalization(nep->V,&otype,&oref,&oeta,&oblock));
153:     PetscCall(BVSetOrthogonalization(*V,otype,oref,oeta,oblock));
154:     PetscCall(PetscObjectStateIncrease((PetscObject)*V));
155:   } else PetscCall(BVDuplicateResize(nep->V,sz,V));
156:   PetscFunctionReturn(PETSC_SUCCESS);
157: }

159: PetscErrorCode NEPDeflationSetRandomVec(NEP_EXT_OP extop,Vec v)
160: {
161:   PetscInt       n,next,i;
162:   PetscRandom    rand;
163:   PetscScalar    *array;
164:   PetscMPIInt    nn,np;

166:   PetscFunctionBegin;
167:   PetscCall(BVGetRandomContext(extop->nep->V,&rand));
168:   PetscCall(VecSetRandom(v,rand));
169:   if (extop->szd) {
170:     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v),&np));
171:     PetscCall(BVGetSizes(extop->nep->V,&n,NULL,NULL));
172:     PetscCall(VecGetLocalSize(v,&next));
173:     PetscCall(VecGetArray(v,&array));
174:     for (i=n+extop->n;i<next;i++) array[i] = 0.0;
175:     for (i=n;i<n+extop->n;i++) array[i] /= PetscSqrtReal(np);
176:     PetscCall(PetscMPIIntCast(extop->n,&nn));
177:     PetscCallMPI(MPI_Bcast(array+n,nn,MPIU_SCALAR,0,PetscObjectComm((PetscObject)v)));
178:     PetscCall(VecRestoreArray(v,&array));
179:   }
180:   PetscFunctionReturn(PETSC_SUCCESS);
181: }

183: static PetscErrorCode NEPDeflationEvaluateBasisMat(NEP_EXT_OP extop,PetscInt idx,PetscBool hat,PetscScalar *bval,PetscScalar *Hj,PetscScalar *Hjprev)
184: {
185:   PetscInt       i,k,n=extop->n,ldhj=extop->szd,ldh=extop->szd+1;
186:   PetscScalar    sone=1.0,zero=0.0;
187:   PetscBLASInt   ldh_,ldhj_,n_;

189:   PetscFunctionBegin;
190:   i = (idx<0)?extop->szd*extop->szd*(-idx):extop->szd*extop->szd;
191:   PetscCall(PetscArrayzero(Hj,i));
192:   PetscCall(PetscBLASIntCast(ldhj+1,&ldh_));
193:   PetscCall(PetscBLASIntCast(ldhj,&ldhj_));
194:   PetscCall(PetscBLASIntCast(n,&n_));
195:   if (idx<1) {
196:     if (!hat) for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 1.0;
197:     else for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 0.0;
198:   } else {
199:       for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[idx-1];
200:       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hjprev,&ldhj_,&zero,Hj,&ldhj_));
201:       for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[idx-1];
202:       if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[idx-1];
203:   }
204:   if (idx<0) {
205:     idx = -idx;
206:     for (k=1;k<idx;k++) {
207:       for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[k-1];
208:       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hj+(k-1)*ldhj*ldhj,&ldhj_,&zero,Hj+k*ldhj*ldhj,&ldhj_));
209:       for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[k-1];
210:       if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[k-1];
211:     }
212:   }
213:   PetscFunctionReturn(PETSC_SUCCESS);
214: }

216: PetscErrorCode NEPDeflationLocking(NEP_EXT_OP extop,Vec u,PetscScalar lambda)
217: {
218:   PetscInt       i;

220:   PetscFunctionBegin;
221:   PetscCall(NEPDeflationExtendInvariantPair(extop,u,lambda,extop->n));
222:   extop->n++;
223:   PetscCall(BVSetActiveColumns(extop->X,0,extop->n));
224:   if (extop->n <= extop->szd) {
225:     /* update XpX */
226:     PetscCall(BVDotColumn(extop->X,extop->n-1,extop->XpX+(extop->n-1)*extop->szd));
227:     extop->XpX[(extop->n-1)*(1+extop->szd)] = 1.0;
228:     for (i=0;i<extop->n-1;i++) extop->XpX[i*extop->szd+extop->n-1] = PetscConj(extop->XpX[(extop->n-1)*extop->szd+i]);
229:     /* determine minimality index */
230:     extop->midx = PetscMin(extop->max_midx,extop->n);
231:     /* polynominal basis coefficients */
232:     for (i=0;i<extop->midx;i++) extop->bc[i] = extop->nep->target;
233:     /* evaluate the polynomial basis in H */
234:     PetscCall(NEPDeflationEvaluateBasisMat(extop,-extop->midx,PETSC_FALSE,NULL,extop->Hj,NULL));
235:   }
236:   PetscFunctionReturn(PETSC_SUCCESS);
237: }

239: static PetscErrorCode NEPDeflationEvaluateHatFunction(NEP_EXT_OP extop, PetscInt idx,PetscScalar lambda,PetscScalar *y,PetscScalar *hfj,PetscScalar *hfjp,PetscInt ld)
240: {
241:   PetscInt          i,j,k,off,ini,fin,sz,ldh,n=extop->n;
242:   Mat               A,B;
243:   PetscScalar       *array;
244:   const PetscScalar *barray;

246:   PetscFunctionBegin;
247:   if (idx<0) {ini = 0; fin = extop->nep->nt;}
248:   else {ini = idx; fin = idx+1;}
249:   if (y) sz = hfjp?n+2:n+1;
250:   else sz = hfjp?3*n:2*n;
251:   ldh = extop->szd+1;
252:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&A));
253:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&B));
254:   PetscCall(MatDenseGetArray(A,&array));
255:   for (j=0;j<n;j++)
256:     for (i=0;i<n;i++) array[j*sz+i] = extop->H[j*ldh+i];
257:   PetscCall(MatDenseRestoreArrayWrite(A,&array));
258:   if (y) {
259:     PetscCall(MatDenseGetArray(A,&array));
260:     array[extop->n*(sz+1)] = lambda;
261:     if (hfjp) { array[(n+1)*sz+n] = 1.0; array[(n+1)*sz+n+1] = lambda;}
262:     for (i=0;i<n;i++) array[n*sz+i] = y[i];
263:     PetscCall(MatDenseRestoreArrayWrite(A,&array));
264:     for (j=ini;j<fin;j++) {
265:       PetscCall(FNEvaluateFunctionMat(extop->nep->f[j],A,B));
266:       PetscCall(MatDenseGetArrayRead(B,&barray));
267:       for (i=0;i<n;i++) hfj[j*ld+i] = barray[n*sz+i];
268:       if (hfjp) for (i=0;i<n;i++) hfjp[j*ld+i] = barray[(n+1)*sz+i];
269:       PetscCall(MatDenseRestoreArrayRead(B,&barray));
270:     }
271:   } else {
272:     off = idx<0?ld*n:0;
273:     PetscCall(MatDenseGetArray(A,&array));
274:     for (k=0;k<n;k++) {
275:       array[(n+k)*sz+k] = 1.0;
276:       array[(n+k)*sz+n+k] = lambda;
277:     }
278:     if (hfjp) for (k=0;k<n;k++) {
279:       array[(2*n+k)*sz+n+k] = 1.0;
280:       array[(2*n+k)*sz+2*n+k] = lambda;
281:     }
282:     PetscCall(MatDenseRestoreArray(A,&array));
283:     for (j=ini;j<fin;j++) {
284:       PetscCall(FNEvaluateFunctionMat(extop->nep->f[j],A,B));
285:       PetscCall(MatDenseGetArrayRead(B,&barray));
286:       for (i=0;i<n;i++) for (k=0;k<n;k++) hfj[j*off+i*ld+k] = barray[n*sz+i*sz+k];
287:       if (hfjp) for (k=0;k<n;k++) for (i=0;i<n;i++) hfjp[j*off+i*ld+k] = barray[2*n*sz+i*sz+k];
288:       PetscCall(MatDenseRestoreArrayRead(B,&barray));
289:     }
290:   }
291:   PetscCall(MatDestroy(&A));
292:   PetscCall(MatDestroy(&B));
293:   PetscFunctionReturn(PETSC_SUCCESS);
294: }

296: static PetscErrorCode MatMult_NEPDeflation(Mat M,Vec x,Vec y)
297: {
298:   NEP_DEF_MATSHELL  *matctx;
299:   NEP_EXT_OP        extop;
300:   Vec               x1,y1;
301:   PetscScalar       *yy,sone=1.0,zero=0.0;
302:   const PetscScalar *xx;
303:   PetscInt          nloc,i;
304:   PetscMPIInt       np;
305:   PetscBLASInt      n_,one=1,szd_;

307:   PetscFunctionBegin;
308:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M),&np));
309:   PetscCall(MatShellGetContext(M,&matctx));
310:   extop = matctx->extop;
311:   if (extop->ref) PetscCall(VecZeroEntries(y));
312:   if (extop->szd) {
313:     x1 = matctx->w[0]; y1 = matctx->w[1];
314:     PetscCall(VecGetArrayRead(x,&xx));
315:     PetscCall(VecPlaceArray(x1,xx));
316:     PetscCall(VecGetArray(y,&yy));
317:     PetscCall(VecPlaceArray(y1,yy));
318:     PetscCall(MatMult(matctx->T,x1,y1));
319:     if (!extop->ref && extop->n) {
320:       PetscCall(VecGetLocalSize(x1,&nloc));
321:       /* copy for avoiding warning of constant array xx */
322:       for (i=0;i<extop->n;i++) matctx->work[i] = xx[nloc+i]*PetscSqrtReal(np);
323:       PetscCall(BVMultVec(matctx->U,1.0,1.0,y1,matctx->work));
324:       PetscCall(BVDotVec(extop->X,x1,matctx->work));
325:       PetscCall(PetscBLASIntCast(extop->n,&n_));
326:       PetscCall(PetscBLASIntCast(extop->szd,&szd_));
327:       PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->A,&szd_,matctx->work,&one,&zero,yy+nloc,&one));
328:       PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->B,&szd_,xx+nloc,&one,&sone,yy+nloc,&one));
329:       for (i=0;i<extop->n;i++) yy[nloc+i] /= PetscSqrtReal(np);
330:     }
331:     PetscCall(VecResetArray(x1));
332:     PetscCall(VecRestoreArrayRead(x,&xx));
333:     PetscCall(VecResetArray(y1));
334:     PetscCall(VecRestoreArray(y,&yy));
335:   } else PetscCall(MatMult(matctx->T,x,y));
336:   PetscFunctionReturn(PETSC_SUCCESS);
337: }

339: static PetscErrorCode MatCreateVecs_NEPDeflation(Mat M,Vec *right,Vec *left)
340: {
341:   NEP_DEF_MATSHELL *matctx;

343:   PetscFunctionBegin;
344:   PetscCall(MatShellGetContext(M,&matctx));
345:   if (right) PetscCall(VecDuplicate(matctx->w[0],right));
346:   if (left) PetscCall(VecDuplicate(matctx->w[0],left));
347:   PetscFunctionReturn(PETSC_SUCCESS);
348: }

350: static PetscErrorCode MatDestroy_NEPDeflation(Mat M)
351: {
352:   NEP_DEF_MATSHELL *matctx;

354:   PetscFunctionBegin;
355:   PetscCall(MatShellGetContext(M,&matctx));
356:   if (matctx->extop->szd) {
357:     PetscCall(BVDestroy(&matctx->U));
358:     PetscCall(PetscFree2(matctx->hfj,matctx->work));
359:     PetscCall(PetscFree2(matctx->A,matctx->B));
360:     PetscCall(VecDestroy(&matctx->w[0]));
361:     PetscCall(VecDestroy(&matctx->w[1]));
362:   }
363:   if (matctx->P != matctx->T) PetscCall(MatDestroy(&matctx->P));
364:   PetscCall(MatDestroy(&matctx->T));
365:   PetscCall(PetscFree(matctx));
366:   PetscFunctionReturn(PETSC_SUCCESS);
367: }

369: static PetscErrorCode NEPDeflationEvaluateBasis(NEP_EXT_OP extop,PetscScalar lambda,PetscInt n,PetscScalar *val,PetscBool jacobian)
370: {
371:   PetscScalar p;
372:   PetscInt    i;

374:   PetscFunctionBegin;
375:   if (!jacobian) {
376:     val[0] = 1.0;
377:     for (i=1;i<extop->n;i++) val[i] = val[i-1]*(lambda-extop->bc[i-1]);
378:   } else {
379:     val[0] = 0.0;
380:     p = 1.0;
381:     for (i=1;i<extop->n;i++) {
382:       val[i] = val[i-1]*(lambda-extop->bc[i-1])+p;
383:       p *= (lambda-extop->bc[i-1]);
384:     }
385:   }
386:   PetscFunctionReturn(PETSC_SUCCESS);
387: }

389: static PetscErrorCode NEPDeflationComputeShellMat(NEP_EXT_OP extop,PetscScalar lambda,PetscBool jacobian,Mat *M)
390: {
391:   NEP_DEF_MATSHELL *matctx,*matctxC;
392:   PetscInt         nloc,mloc,n=extop->n,j,i,szd=extop->szd,ldh=szd+1,k;
393:   Mat              F,Mshell,Mcomp;
394:   PetscBool        ini=PETSC_FALSE;
395:   PetscScalar      *hf,*hfj,*hfjp,sone=1.0,*hH,*hHprev,*pts,*B,*A,*Hj=extop->Hj,*basisv,zero=0.0;
396:   PetscBLASInt     n_,info,szd_;

398:   PetscFunctionBegin;
399:   if (!M) Mshell = jacobian?extop->MJ:extop->MF;
400:   else Mshell = *M;
401:   Mcomp = jacobian?extop->MF:extop->MJ;
402:   if (!Mshell) {
403:     ini = PETSC_TRUE;
404:     PetscCall(PetscNew(&matctx));
405:     PetscCall(MatGetLocalSize(extop->nep->function,&mloc,&nloc));
406:     nloc += szd; mloc += szd;
407:     PetscCall(MatCreateShell(PetscObjectComm((PetscObject)extop->nep),nloc,mloc,PETSC_DETERMINE,PETSC_DETERMINE,matctx,&Mshell));
408:     PetscCall(MatShellSetOperation(Mshell,MATOP_MULT,(void(*)(void))MatMult_NEPDeflation));
409:     PetscCall(MatShellSetOperation(Mshell,MATOP_CREATE_VECS,(void(*)(void))MatCreateVecs_NEPDeflation));
410:     PetscCall(MatShellSetOperation(Mshell,MATOP_DESTROY,(void(*)(void))MatDestroy_NEPDeflation));
411:     matctx->nep = extop->nep;
412:     matctx->extop = extop;
413:     if (!M) {
414:       if (jacobian) { matctx->jacob = PETSC_TRUE; matctx->T = extop->nep->jacobian; extop->MJ = Mshell; }
415:       else { matctx->jacob = PETSC_FALSE; matctx->T = extop->nep->function; extop->MF = Mshell; }
416:       PetscCall(PetscObjectReference((PetscObject)matctx->T));
417:       if (!jacobian) {
418:         if (extop->nep->function_pre && extop->nep->function_pre != extop->nep->function) {
419:           matctx->P = extop->nep->function_pre;
420:           PetscCall(PetscObjectReference((PetscObject)matctx->P));
421:         } else matctx->P = matctx->T;
422:       }
423:     } else {
424:       matctx->jacob = jacobian;
425:       PetscCall(MatDuplicate(jacobian?extop->nep->jacobian:extop->nep->function,MAT_DO_NOT_COPY_VALUES,&matctx->T));
426:       *M = Mshell;
427:       if (!jacobian) {
428:         if (extop->nep->function_pre && extop->nep->function_pre != extop->nep->function) PetscCall(MatDuplicate(extop->nep->function_pre,MAT_DO_NOT_COPY_VALUES,&matctx->P));
429:         else matctx->P = matctx->T;
430:       }
431:     }
432:     if (szd) {
433:       PetscCall(BVCreateVec(extop->nep->V,matctx->w));
434:       PetscCall(VecDuplicate(matctx->w[0],matctx->w+1));
435:       PetscCall(BVDuplicateResize(extop->nep->V,szd,&matctx->U));
436:       PetscCall(PetscMalloc2(extop->simpU?2*(szd)*(szd):2*(szd)*(szd)*extop->nep->nt,&matctx->hfj,szd,&matctx->work));
437:       PetscCall(PetscMalloc2(szd*szd,&matctx->A,szd*szd,&matctx->B));
438:     }
439:   } else PetscCall(MatShellGetContext(Mshell,&matctx));
440:   if (ini || matctx->theta != lambda || matctx->n != extop->n) {
441:     if (ini || matctx->theta != lambda) {
442:       if (jacobian) PetscCall(NEPComputeJacobian(extop->nep,lambda,matctx->T));
443:       else PetscCall(NEPComputeFunction(extop->nep,lambda,matctx->T,matctx->P));
444:     }
445:     if (n) {
446:       matctx->hfjset = PETSC_FALSE;
447:       if (!extop->simpU) {
448:         /* likely hfjp has been already computed */
449:         if (Mcomp) {
450:           PetscCall(MatShellGetContext(Mcomp,&matctxC));
451:           if (matctxC->hfjset && matctxC->theta == lambda && matctxC->n == extop->n) {
452:             PetscCall(PetscArraycpy(matctx->hfj,matctxC->hfj,2*extop->szd*extop->szd*extop->nep->nt));
453:             matctx->hfjset = PETSC_TRUE;
454:           }
455:         }
456:         hfj = matctx->hfj; hfjp = matctx->hfj+extop->szd*extop->szd*extop->nep->nt;
457:         if (!matctx->hfjset) {
458:           PetscCall(NEPDeflationEvaluateHatFunction(extop,-1,lambda,NULL,hfj,hfjp,n));
459:           matctx->hfjset = PETSC_TRUE;
460:         }
461:         PetscCall(BVSetActiveColumns(matctx->U,0,n));
462:         hf = jacobian?hfjp:hfj;
463:         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n,n,hf,&F));
464:         PetscCall(BVMatMult(extop->X,extop->nep->A[0],matctx->U));
465:         PetscCall(BVMultInPlace(matctx->U,F,0,n));
466:         PetscCall(BVSetActiveColumns(extop->W,0,extop->n));
467:         for (j=1;j<extop->nep->nt;j++) {
468:           PetscCall(BVMatMult(extop->X,extop->nep->A[j],extop->W));
469:           PetscCall(MatDensePlaceArray(F,hf+j*n*n));
470:           PetscCall(BVMult(matctx->U,1.0,1.0,extop->W,F));
471:           PetscCall(MatDenseResetArray(F));
472:         }
473:         PetscCall(MatDestroy(&F));
474:       } else {
475:         hfj = matctx->hfj;
476:         PetscCall(BVSetActiveColumns(matctx->U,0,n));
477:         PetscCall(BVMatMult(extop->X,matctx->T,matctx->U));
478:         for (j=0;j<n;j++) {
479:           for (i=0;i<n;i++) hfj[j*n+i] = -extop->H[j*ldh+i];
480:           hfj[j*(n+1)] += lambda;
481:         }
482:         PetscCall(PetscBLASIntCast(n,&n_));
483:         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
484:         PetscCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,hfj,&n_,&info));
485:         PetscCall(PetscFPTrapPop());
486:         SlepcCheckLapackInfo("trtri",info);
487:         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n,n,hfj,&F));
488:         PetscCall(BVMultInPlace(matctx->U,F,0,n));
489:         if (jacobian) {
490:           PetscCall(NEPDeflationComputeFunction(extop,lambda,NULL));
491:           PetscCall(MatShellGetContext(extop->MF,&matctxC));
492:           PetscCall(BVMult(matctx->U,-1.0,1.0,matctxC->U,F));
493:         }
494:         PetscCall(MatDestroy(&F));
495:       }
496:       PetscCall(PetscCalloc3(n,&basisv,szd*szd,&hH,szd*szd,&hHprev));
497:       PetscCall(NEPDeflationEvaluateBasis(extop,lambda,n,basisv,jacobian));
498:       A = matctx->A;
499:       PetscCall(PetscArrayzero(A,szd*szd));
500:       if (!jacobian) for (i=0;i<n;i++) A[i*(szd+1)] = 1.0;
501:       for (j=0;j<n;j++)
502:         for (i=0;i<n;i++)
503:           for (k=1;k<extop->midx;k++) A[j*szd+i] += basisv[k]*PetscConj(Hj[k*szd*szd+i*szd+j]);
504:       PetscCall(PetscBLASIntCast(n,&n_));
505:       PetscCall(PetscBLASIntCast(szd,&szd_));
506:       B = matctx->B;
507:       PetscCall(PetscArrayzero(B,szd*szd));
508:       for (i=1;i<extop->midx;i++) {
509:         PetscCall(NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev));
510:         PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
511:         PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,B,&szd_));
512:         pts = hHprev; hHprev = hH; hH = pts;
513:       }
514:       PetscCall(PetscFree3(basisv,hH,hHprev));
515:     }
516:     matctx->theta = lambda;
517:     matctx->n = extop->n;
518:   }
519:   PetscFunctionReturn(PETSC_SUCCESS);
520: }

522: PetscErrorCode NEPDeflationComputeFunction(NEP_EXT_OP extop,PetscScalar lambda,Mat *F)
523: {
524:   PetscFunctionBegin;
525:   PetscCall(NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,NULL));
526:   if (F) *F = extop->MF;
527:   PetscFunctionReturn(PETSC_SUCCESS);
528: }

530: PetscErrorCode NEPDeflationComputeJacobian(NEP_EXT_OP extop,PetscScalar lambda,Mat *J)
531: {
532:   PetscFunctionBegin;
533:   PetscCall(NEPDeflationComputeShellMat(extop,lambda,PETSC_TRUE,NULL));
534:   if (J) *J = extop->MJ;
535:   PetscFunctionReturn(PETSC_SUCCESS);
536: }

538: PetscErrorCode NEPDeflationSolveSetUp(NEP_EXT_OP extop,PetscScalar lambda)
539: {
540:   NEP_DEF_MATSHELL  *matctx;
541:   NEP_DEF_FUN_SOLVE solve;
542:   PetscInt          i,j,n=extop->n;
543:   Vec               u,tu;
544:   Mat               F;
545:   PetscScalar       snone=-1.0,sone=1.0;
546:   PetscBLASInt      n_,szd_,ldh_,*p,info;
547:   Mat               Mshell;

549:   PetscFunctionBegin;
550:   solve = extop->solve;
551:   if (lambda!=solve->theta || n!=solve->n) {
552:     PetscCall(NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,solve->sincf?NULL:&solve->T));
553:     Mshell = (solve->sincf)?extop->MF:solve->T;
554:     PetscCall(MatShellGetContext(Mshell,&matctx));
555:     PetscCall(NEP_KSPSetOperators(solve->ksp,matctx->T,matctx->P));
556:     if (!extop->ref && n) {
557:       PetscCall(PetscBLASIntCast(n,&n_));
558:       PetscCall(PetscBLASIntCast(extop->szd,&szd_));
559:       PetscCall(PetscBLASIntCast(extop->szd+1,&ldh_));
560:       if (!extop->simpU) {
561:         PetscCall(BVSetActiveColumns(solve->T_1U,0,n));
562:         for (i=0;i<n;i++) {
563:           PetscCall(BVGetColumn(matctx->U,i,&u));
564:           PetscCall(BVGetColumn(solve->T_1U,i,&tu));
565:           PetscCall(KSPSolve(solve->ksp,u,tu));
566:           PetscCall(BVRestoreColumn(solve->T_1U,i,&tu));
567:           PetscCall(BVRestoreColumn(matctx->U,i,&u));
568:         }
569:         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n,n,solve->work,&F));
570:         PetscCall(BVDot(solve->T_1U,extop->X,F));
571:         PetscCall(MatDestroy(&F));
572:       } else {
573:         for (j=0;j<n;j++)
574:           for (i=0;i<n;i++) solve->work[j*n+i] = extop->XpX[j*extop->szd+i];
575:         for (i=0;i<n;i++) extop->H[i*ldh_+i] -= lambda;
576:         PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n_,&n_,&snone,extop->H,&ldh_,solve->work,&n_));
577:         for (i=0;i<n;i++) extop->H[i*ldh_+i] += lambda;
578:       }
579:       PetscCall(PetscArraycpy(solve->M,matctx->B,extop->szd*extop->szd));
580:       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&snone,matctx->A,&szd_,solve->work,&n_,&sone,solve->M,&szd_));
581:       PetscCall(PetscMalloc1(n,&p));
582:       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
583:       PetscCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n_,&n_,solve->M,&szd_,p,&info));
584:       SlepcCheckLapackInfo("getrf",info);
585:       PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&n_,solve->M,&szd_,p,solve->work,&n_,&info));
586:       SlepcCheckLapackInfo("getri",info);
587:       PetscCall(PetscFPTrapPop());
588:       PetscCall(PetscFree(p));
589:     }
590:     solve->theta = lambda;
591:     solve->n = n;
592:   }
593:   PetscFunctionReturn(PETSC_SUCCESS);
594: }

596: PetscErrorCode NEPDeflationFunctionSolve(NEP_EXT_OP extop,Vec b,Vec x)
597: {
598:   Vec               b1,x1;
599:   PetscScalar       *xx,*bb,*x2,*b2,*w,*w2,snone=-1.0,sone=1.0,zero=0.0;
600:   NEP_DEF_MATSHELL  *matctx;
601:   NEP_DEF_FUN_SOLVE solve=extop->solve;
602:   PetscBLASInt      one=1,szd_,n_,ldh_;
603:   PetscInt          nloc,i;
604:   PetscMPIInt       np,count;

606:   PetscFunctionBegin;
607:   if (extop->ref) PetscCall(VecZeroEntries(x));
608:   if (extop->szd) {
609:     x1 = solve->w[0]; b1 = solve->w[1];
610:     PetscCall(VecGetArray(x,&xx));
611:     PetscCall(VecPlaceArray(x1,xx));
612:     PetscCall(VecGetArray(b,&bb));
613:     PetscCall(VecPlaceArray(b1,bb));
614:   } else {
615:     b1 = b; x1 = x;
616:   }
617:   PetscCall(KSPSolve(extop->solve->ksp,b1,x1));
618:   if (!extop->ref && extop->n && extop->szd) {
619:     PetscCall(PetscBLASIntCast(extop->szd,&szd_));
620:     PetscCall(PetscBLASIntCast(extop->n,&n_));
621:     PetscCall(PetscBLASIntCast(extop->szd+1,&ldh_));
622:     PetscCall(BVGetSizes(extop->nep->V,&nloc,NULL,NULL));
623:     PetscCall(PetscMalloc2(extop->n,&b2,extop->n,&x2));
624:     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)b),&np));
625:     for (i=0;i<extop->n;i++) b2[i] = bb[nloc+i]*PetscSqrtReal(np);
626:     w = solve->work; w2 = solve->work+extop->n;
627:     PetscCall(MatShellGetContext(solve->sincf?extop->MF:solve->T,&matctx));
628:     PetscCall(PetscArraycpy(w2,b2,extop->n));
629:     PetscCall(BVDotVec(extop->X,x1,w));
630:     PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&snone,matctx->A,&szd_,w,&one,&sone,w2,&one));
631:     PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,solve->M,&szd_,w2,&one,&zero,x2,&one));
632:     if (extop->simpU) {
633:       for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] -= solve->theta;
634:       for (i=0;i<extop->n;i++) w[i] = x2[i];
635:       PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&one,&snone,extop->H,&ldh_,w,&n_));
636:       for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] += solve->theta;
637:       PetscCall(BVMultVec(extop->X,-1.0,1.0,x1,w));
638:     } else PetscCall(BVMultVec(solve->T_1U,-1.0,1.0,x1,x2));
639:     for (i=0;i<extop->n;i++) xx[i+nloc] = x2[i]/PetscSqrtReal(np);
640:     PetscCall(PetscMPIIntCast(extop->n,&count));
641:     PetscCallMPI(MPI_Bcast(xx+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)b)));
642:   }
643:   if (extop->szd) {
644:     PetscCall(VecResetArray(x1));
645:     PetscCall(VecRestoreArray(x,&xx));
646:     PetscCall(VecResetArray(b1));
647:     PetscCall(VecRestoreArray(b,&bb));
648:     if (!extop->ref && extop->n) PetscCall(PetscFree2(b2,x2));
649:   }
650:   PetscFunctionReturn(PETSC_SUCCESS);
651: }

653: PetscErrorCode NEPDeflationSetRefine(NEP_EXT_OP extop,PetscBool ref)
654: {
655:   PetscFunctionBegin;
656:   extop->ref = ref;
657:   PetscFunctionReturn(PETSC_SUCCESS);
658: }

660: PetscErrorCode NEPDeflationReset(NEP_EXT_OP extop)
661: {
662:   PetscInt          j;
663:   NEP_DEF_FUN_SOLVE solve;

665:   PetscFunctionBegin;
666:   if (!extop) PetscFunctionReturn(PETSC_SUCCESS);
667:   PetscCall(PetscFree(extop->H));
668:   PetscCall(BVDestroy(&extop->X));
669:   if (extop->szd) {
670:     PetscCall(VecDestroy(&extop->w));
671:     PetscCall(PetscFree3(extop->Hj,extop->XpX,extop->bc));
672:     PetscCall(BVDestroy(&extop->W));
673:   }
674:   PetscCall(MatDestroy(&extop->MF));
675:   PetscCall(MatDestroy(&extop->MJ));
676:   if (extop->solve) {
677:     solve = extop->solve;
678:     if (extop->szd) {
679:       if (!extop->simpU) PetscCall(BVDestroy(&solve->T_1U));
680:       PetscCall(PetscFree2(solve->M,solve->work));
681:       PetscCall(VecDestroy(&solve->w[0]));
682:       PetscCall(VecDestroy(&solve->w[1]));
683:     }
684:     if (!solve->sincf) PetscCall(MatDestroy(&solve->T));
685:     PetscCall(PetscFree(extop->solve));
686:   }
687:   if (extop->proj) {
688:     if (extop->szd) {
689:       for (j=0;j<extop->nep->nt;j++) PetscCall(MatDestroy(&extop->proj->V1pApX[j]));
690:       PetscCall(MatDestroy(&extop->proj->XpV1));
691:       PetscCall(PetscFree3(extop->proj->V2,extop->proj->V1pApX,extop->proj->work));
692:       PetscCall(VecDestroy(&extop->proj->w));
693:       PetscCall(BVDestroy(&extop->proj->V1));
694:     }
695:     PetscCall(PetscFree(extop->proj));
696:   }
697:   PetscCall(PetscFree(extop));
698:   PetscFunctionReturn(PETSC_SUCCESS);
699: }

701: PetscErrorCode NEPDeflationInitialize(NEP nep,BV X,KSP ksp,PetscBool sincfun,PetscInt sz,NEP_EXT_OP *extop)
702: {
703:   NEP_EXT_OP        op;
704:   NEP_DEF_FUN_SOLVE solve;
705:   PetscInt          szd;
706:   Vec               x;

708:   PetscFunctionBegin;
709:   PetscCall(NEPDeflationReset(*extop));
710:   PetscCall(PetscNew(&op));
711:   *extop  = op;
712:   op->nep = nep;
713:   op->n   = 0;
714:   op->szd = szd = sz-1;
715:   op->max_midx = PetscMin(MAX_MINIDX,szd);
716:   op->X = X;
717:   if (!X) PetscCall(BVDuplicateResize(nep->V,sz,&op->X));
718:   else PetscCall(PetscObjectReference((PetscObject)X));
719:   PetscCall(PetscCalloc1(sz*sz,&(op)->H));
720:   if (op->szd) {
721:     PetscCall(BVGetColumn(op->X,0,&x));
722:     PetscCall(VecDuplicate(x,&op->w));
723:     PetscCall(BVRestoreColumn(op->X,0,&x));
724:     op->simpU = PETSC_FALSE;
725:     if (nep->fui==NEP_USER_INTERFACE_SPLIT) {
726:       /* undocumented option to use the simple expression for U = T*X*inv(lambda-H) */
727:       PetscCall(PetscOptionsGetBool(NULL,NULL,"-nep_deflation_simpleu",&op->simpU,NULL));
728:     } else {
729:       op->simpU = PETSC_TRUE;
730:     }
731:     PetscCall(PetscCalloc3(szd*szd*op->max_midx,&(op)->Hj,szd*szd,&(op)->XpX,szd,&op->bc));
732:     PetscCall(BVDuplicateResize(op->X,op->szd,&op->W));
733:   }
734:   if (ksp) {
735:     PetscCall(PetscNew(&solve));
736:     op->solve    = solve;
737:     solve->ksp   = ksp;
738:     solve->sincf = sincfun;
739:     solve->n     = -1;
740:     if (op->szd) {
741:       if (!op->simpU) PetscCall(BVDuplicateResize(nep->V,szd,&solve->T_1U));
742:       PetscCall(PetscMalloc2(szd*szd,&solve->M,2*szd*szd,&solve->work));
743:       PetscCall(BVCreateVec(nep->V,&solve->w[0]));
744:       PetscCall(VecDuplicate(solve->w[0],&solve->w[1]));
745:     }
746:   }
747:   PetscFunctionReturn(PETSC_SUCCESS);
748: }

750: static PetscErrorCode NEPDeflationDSNEPComputeMatrix(DS ds,PetscScalar lambda,PetscBool deriv,DSMatType mat,void *ctx)
751: {
752:   Mat               A,Ei;
753:   PetscScalar       *T,*w1,*w2,*w=NULL,*ww,*hH,*hHprev,*pts;
754:   PetscScalar       alpha,alpha2,*AB,sone=1.0,zero=0.0,*basisv,s;
755:   const PetscScalar *E;
756:   PetscInt          i,ldds,nwork=0,szd,nv,j,k,n;
757:   PetscBLASInt      inc=1,nv_,ldds_,dim_,szdk,szd_,n_,ldh_;
758:   PetscMPIInt       np;
759:   NEP_DEF_PROJECT   proj=(NEP_DEF_PROJECT)ctx;
760:   NEP_EXT_OP        extop=proj->extop;
761:   NEP               nep=extop->nep;

763:   PetscFunctionBegin;
764:   PetscCall(DSGetDimensions(ds,&nv,NULL,NULL,NULL));
765:   PetscCall(DSGetLeadingDimension(ds,&ldds));
766:   PetscCall(DSGetMat(ds,mat,&A));
767:   PetscCall(MatZeroEntries(A));
768:   /* mat = V1^*T(lambda)V1 */
769:   for (i=0;i<nep->nt;i++) {
770:     if (deriv) PetscCall(FNEvaluateDerivative(nep->f[i],lambda,&alpha));
771:     else PetscCall(FNEvaluateFunction(nep->f[i],lambda,&alpha));
772:     PetscCall(DSGetMat(ds,DSMatExtra[i],&Ei));
773:     PetscCall(MatAXPY(A,alpha,Ei,SAME_NONZERO_PATTERN));
774:     PetscCall(DSRestoreMat(ds,DSMatExtra[i],&Ei));
775:   }
776:   PetscCall(DSRestoreMat(ds,mat,&A));
777:   if (!extop->ref && extop->n) {
778:     PetscCall(DSGetArray(ds,mat,&T));
779:     n = extop->n;
780:     szd = extop->szd;
781:     PetscCall(PetscArrayzero(proj->work,proj->lwork));
782:     PetscCall(PetscBLASIntCast(nv,&nv_));
783:     PetscCall(PetscBLASIntCast(n,&n_));
784:     PetscCall(PetscBLASIntCast(ldds,&ldds_));
785:     PetscCall(PetscBLASIntCast(szd,&szd_));
786:     PetscCall(PetscBLASIntCast(proj->dim,&dim_));
787:     PetscCall(PetscBLASIntCast(extop->szd+1,&ldh_));
788:     w1 = proj->work; w2 = proj->work+proj->dim*proj->dim;
789:     nwork += 2*proj->dim*proj->dim;

791:     /* mat = mat + V1^*U(lambda)V2 */
792:     for (i=0;i<nep->nt;i++) {
793:       if (extop->simpU) {
794:         if (deriv) PetscCall(FNEvaluateDerivative(nep->f[i],lambda,&alpha));
795:         else PetscCall(FNEvaluateFunction(nep->f[i],lambda,&alpha));
796:         ww = w1; w = w2;
797:         PetscCall(PetscArraycpy(ww,proj->V2,szd*nv));
798:         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np));
799:         for (j=0;j<szd*nv;j++) ww[j] *= PetscSqrtReal(np);
800:         for (j=0;j<n;j++) extop->H[j*ldh_+j] -= lambda;
801:         alpha = -alpha;
802:         PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha,extop->H,&ldh_,ww,&szd_));
803:         if (deriv) {
804:           PetscCall(PetscBLASIntCast(szd*nv,&szdk));
805:           PetscCall(FNEvaluateFunction(nep->f[i],lambda,&alpha2));
806:           PetscCall(PetscArraycpy(w,proj->V2,szd*nv));
807:           for (j=0;j<szd*nv;j++) w[j] *= PetscSqrtReal(np);
808:           alpha2 = -alpha2;
809:           PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
810:           alpha2 = 1.0;
811:           PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
812:           PetscCallBLAS("BLASaxpy",BLASaxpy_(&szdk,&sone,w,&inc,ww,&inc));
813:         }
814:         for (j=0;j<n;j++) extop->H[j*ldh_+j] += lambda;
815:       } else {
816:         PetscCall(NEPDeflationEvaluateHatFunction(extop,i,lambda,NULL,w1,w2,szd));
817:         w = deriv?w2:w1; ww = deriv?w1:w2;
818:         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np));
819:         s = PetscSqrtReal(np);
820:         PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&s,w,&szd_,proj->V2,&szd_,&zero,ww,&szd_));
821:       }
822:       PetscCall(MatDenseGetArrayRead(proj->V1pApX[i],&E));
823:       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&nv_,&nv_,&n_,&sone,E,&dim_,ww,&szd_,&sone,T,&ldds_));
824:       PetscCall(MatDenseRestoreArrayRead(proj->V1pApX[i],&E));
825:     }

827:     /* mat = mat + V2^*A(lambda)V1 */
828:     basisv = proj->work+nwork; nwork += szd;
829:     hH     = proj->work+nwork; nwork += szd*szd;
830:     hHprev = proj->work+nwork; nwork += szd*szd;
831:     AB     = proj->work+nwork;
832:     PetscCall(NEPDeflationEvaluateBasis(extop,lambda,n,basisv,deriv));
833:     if (!deriv) for (i=0;i<n;i++) AB[i*(szd+1)] = 1.0;
834:     for (j=0;j<n;j++)
835:       for (i=0;i<n;i++)
836:         for (k=1;k<extop->midx;k++) AB[j*szd+i] += basisv[k]*PetscConj(extop->Hj[k*szd*szd+i*szd+j]);
837:     PetscCall(MatDenseGetArrayRead(proj->XpV1,&E));
838:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,E,&szd_,&zero,w,&szd_));
839:     PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
840:     PetscCall(MatDenseRestoreArrayRead(proj->XpV1,&E));

842:     /* mat = mat + V2^*B(lambda)V2 */
843:     PetscCall(PetscArrayzero(AB,szd*szd));
844:     for (i=1;i<extop->midx;i++) {
845:       PetscCall(NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev));
846:       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
847:       PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,AB,&szd_));
848:       pts = hHprev; hHprev = hH; hH = pts;
849:     }
850:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,proj->V2,&szd_,&zero,w,&szd_));
851:     PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
852:     PetscCall(DSRestoreArray(ds,mat,&T));
853:   }
854:   PetscFunctionReturn(PETSC_SUCCESS);
855: }

857: PetscErrorCode NEPDeflationProjectOperator(NEP_EXT_OP extop,BV Vext,DS ds,PetscInt j0,PetscInt j1)
858: {
859:   PetscInt        k,j,n=extop->n,dim;
860:   Vec             v,ve;
861:   BV              V1;
862:   Mat             G;
863:   NEP             nep=extop->nep;
864:   NEP_DEF_PROJECT proj;

866:   PetscFunctionBegin;
867:   NEPCheckSplit(extop->nep,1);
868:   proj = extop->proj;
869:   if (!proj) {
870:     /* Initialize the projection data structure */
871:     PetscCall(PetscNew(&proj));
872:     extop->proj = proj;
873:     proj->extop = extop;
874:     PetscCall(BVGetSizes(Vext,NULL,NULL,&dim));
875:     proj->dim = dim;
876:     if (extop->szd) {
877:       proj->lwork = 3*dim*dim+2*extop->szd*extop->szd+extop->szd;
878:       PetscCall(PetscMalloc3(dim*extop->szd,&proj->V2,nep->nt,&proj->V1pApX,proj->lwork,&proj->work));
879:       for (j=0;j<nep->nt;j++) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,proj->dim,extop->szd,NULL,&proj->V1pApX[j]));
880:       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,extop->szd,proj->dim,NULL,&proj->XpV1));
881:       PetscCall(BVCreateVec(extop->X,&proj->w));
882:       PetscCall(BVDuplicateResize(extop->X,proj->dim,&proj->V1));
883:     }
884:     PetscCall(DSNEPSetComputeMatrixFunction(ds,NEPDeflationDSNEPComputeMatrix,(void*)proj));
885:   }

887:   /* Split Vext in V1 and V2 */
888:   if (extop->szd) {
889:     for (j=j0;j<j1;j++) {
890:       PetscCall(BVGetColumn(Vext,j,&ve));
891:       PetscCall(BVGetColumn(proj->V1,j,&v));
892:       PetscCall(NEPDeflationCopyToExtendedVec(extop,v,proj->V2+j*extop->szd,ve,PETSC_TRUE));
893:       PetscCall(BVRestoreColumn(proj->V1,j,&v));
894:       PetscCall(BVRestoreColumn(Vext,j,&ve));
895:     }
896:     V1 = proj->V1;
897:   } else V1 = Vext;

899:   /* Compute matrices V1^* A_i V1 */
900:   PetscCall(BVSetActiveColumns(V1,j0,j1));
901:   for (k=0;k<nep->nt;k++) {
902:     PetscCall(DSGetMat(ds,DSMatExtra[k],&G));
903:     PetscCall(BVMatProject(V1,nep->A[k],V1,G));
904:     PetscCall(DSRestoreMat(ds,DSMatExtra[k],&G));
905:   }

907:   if (extop->n) {
908:     if (extop->szd) {
909:       /* Compute matrices V1^* A_i X  and V1^* X */
910:       PetscCall(BVSetActiveColumns(extop->W,0,n));
911:       for (k=0;k<nep->nt;k++) {
912:         PetscCall(BVMatMult(extop->X,nep->A[k],extop->W));
913:         PetscCall(BVDot(extop->W,V1,proj->V1pApX[k]));
914:       }
915:       PetscCall(BVDot(V1,extop->X,proj->XpV1));
916:     }
917:   }
918:   PetscFunctionReturn(PETSC_SUCCESS);
919: }