Actual source code: dvec2.c
2: /*
3: Defines some vector operation functions that are shared by
4: sequential and parallel vectors.
5: */
6: #include <../src/vec/vec/impls/dvecimpl.h>
7: #include <petsc/private/kernels/petscaxpy.h>
9: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
10: #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
11: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
12: {
13: PetscInt i,nv_rem,n = xin->map->n;
14: PetscScalar sum0,sum1,sum2,sum3;
15: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
16: Vec *yy;
18: sum0 = 0.0;
19: sum1 = 0.0;
20: sum2 = 0.0;
22: i = nv;
23: nv_rem = nv&0x3;
24: yy = (Vec*)yin;
25: VecGetArrayRead(xin,&x);
27: switch (nv_rem) {
28: case 3:
29: VecGetArrayRead(yy[0],&yy0);
30: VecGetArrayRead(yy[1],&yy1);
31: VecGetArrayRead(yy[2],&yy2);
32: fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
33: VecRestoreArrayRead(yy[0],&yy0);
34: VecRestoreArrayRead(yy[1],&yy1);
35: VecRestoreArrayRead(yy[2],&yy2);
36: z[0] = sum0;
37: z[1] = sum1;
38: z[2] = sum2;
39: break;
40: case 2:
41: VecGetArrayRead(yy[0],&yy0);
42: VecGetArrayRead(yy[1],&yy1);
43: fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
44: VecRestoreArrayRead(yy[0],&yy0);
45: VecRestoreArrayRead(yy[1],&yy1);
46: z[0] = sum0;
47: z[1] = sum1;
48: break;
49: case 1:
50: VecGetArrayRead(yy[0],&yy0);
51: fortranmdot1_(x,yy0,&n,&sum0);
52: VecRestoreArrayRead(yy[0],&yy0);
53: z[0] = sum0;
54: break;
55: case 0:
56: break;
57: }
58: z += nv_rem;
59: i -= nv_rem;
60: yy += nv_rem;
62: while (i >0) {
63: sum0 = 0.;
64: sum1 = 0.;
65: sum2 = 0.;
66: sum3 = 0.;
67: VecGetArrayRead(yy[0],&yy0);
68: VecGetArrayRead(yy[1],&yy1);
69: VecGetArrayRead(yy[2],&yy2);
70: VecGetArrayRead(yy[3],&yy3);
71: fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
72: VecRestoreArrayRead(yy[0],&yy0);
73: VecRestoreArrayRead(yy[1],&yy1);
74: VecRestoreArrayRead(yy[2],&yy2);
75: VecRestoreArrayRead(yy[3],&yy3);
76: yy += 4;
77: z[0] = sum0;
78: z[1] = sum1;
79: z[2] = sum2;
80: z[3] = sum3;
81: z += 4;
82: i -= 4;
83: }
84: VecRestoreArrayRead(xin,&x);
85: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
86: return 0;
87: }
89: #else
90: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
91: {
92: PetscInt n = xin->map->n,i,j,nv_rem,j_rem;
93: PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3;
94: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
95: Vec *yy;
97: sum0 = 0.;
98: sum1 = 0.;
99: sum2 = 0.;
101: i = nv;
102: nv_rem = nv&0x3;
103: yy = (Vec*)yin;
104: j = n;
105: VecGetArrayRead(xin,&xbase);
106: x = xbase;
108: switch (nv_rem) {
109: case 3:
110: VecGetArrayRead(yy[0],&yy0);
111: VecGetArrayRead(yy[1],&yy1);
112: VecGetArrayRead(yy[2],&yy2);
113: switch (j_rem=j&0x3) {
114: case 3:
115: x2 = x[2];
116: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
117: sum2 += x2*PetscConj(yy2[2]);
118: case 2:
119: x1 = x[1];
120: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
121: sum2 += x1*PetscConj(yy2[1]);
122: case 1:
123: x0 = x[0];
124: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
125: sum2 += x0*PetscConj(yy2[0]);
126: case 0:
127: x += j_rem;
128: yy0 += j_rem;
129: yy1 += j_rem;
130: yy2 += j_rem;
131: j -= j_rem;
132: break;
133: }
134: while (j>0) {
135: x0 = x[0];
136: x1 = x[1];
137: x2 = x[2];
138: x3 = x[3];
139: x += 4;
141: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
142: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
143: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
144: j -= 4;
145: }
146: z[0] = sum0;
147: z[1] = sum1;
148: z[2] = sum2;
149: VecRestoreArrayRead(yy[0],&yy0);
150: VecRestoreArrayRead(yy[1],&yy1);
151: VecRestoreArrayRead(yy[2],&yy2);
152: break;
153: case 2:
154: VecGetArrayRead(yy[0],&yy0);
155: VecGetArrayRead(yy[1],&yy1);
156: switch (j_rem=j&0x3) {
157: case 3:
158: x2 = x[2];
159: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
160: case 2:
161: x1 = x[1];
162: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
163: case 1:
164: x0 = x[0];
165: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
166: case 0:
167: x += j_rem;
168: yy0 += j_rem;
169: yy1 += j_rem;
170: j -= j_rem;
171: break;
172: }
173: while (j>0) {
174: x0 = x[0];
175: x1 = x[1];
176: x2 = x[2];
177: x3 = x[3];
178: x += 4;
180: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
181: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
182: j -= 4;
183: }
184: z[0] = sum0;
185: z[1] = sum1;
187: VecRestoreArrayRead(yy[0],&yy0);
188: VecRestoreArrayRead(yy[1],&yy1);
189: break;
190: case 1:
191: VecGetArrayRead(yy[0],&yy0);
192: switch (j_rem=j&0x3) {
193: case 3:
194: x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
195: case 2:
196: x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
197: case 1:
198: x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
199: case 0:
200: x += j_rem;
201: yy0 += j_rem;
202: j -= j_rem;
203: break;
204: }
205: while (j>0) {
206: sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
207: + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
208: yy0 +=4;
209: j -= 4; x+=4;
210: }
211: z[0] = sum0;
213: VecRestoreArrayRead(yy[0],&yy0);
214: break;
215: case 0:
216: break;
217: }
218: z += nv_rem;
219: i -= nv_rem;
220: yy += nv_rem;
222: while (i >0) {
223: sum0 = 0.;
224: sum1 = 0.;
225: sum2 = 0.;
226: sum3 = 0.;
227: VecGetArrayRead(yy[0],&yy0);
228: VecGetArrayRead(yy[1],&yy1);
229: VecGetArrayRead(yy[2],&yy2);
230: VecGetArrayRead(yy[3],&yy3);
232: j = n;
233: x = xbase;
234: switch (j_rem=j&0x3) {
235: case 3:
236: x2 = x[2];
237: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
238: sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
239: case 2:
240: x1 = x[1];
241: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
242: sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
243: case 1:
244: x0 = x[0];
245: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
246: sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
247: case 0:
248: x += j_rem;
249: yy0 += j_rem;
250: yy1 += j_rem;
251: yy2 += j_rem;
252: yy3 += j_rem;
253: j -= j_rem;
254: break;
255: }
256: while (j>0) {
257: x0 = x[0];
258: x1 = x[1];
259: x2 = x[2];
260: x3 = x[3];
261: x += 4;
263: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
264: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
265: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
266: sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
267: j -= 4;
268: }
269: z[0] = sum0;
270: z[1] = sum1;
271: z[2] = sum2;
272: z[3] = sum3;
273: z += 4;
274: i -= 4;
275: VecRestoreArrayRead(yy[0],&yy0);
276: VecRestoreArrayRead(yy[1],&yy1);
277: VecRestoreArrayRead(yy[2],&yy2);
278: VecRestoreArrayRead(yy[3],&yy3);
279: yy += 4;
280: }
281: VecRestoreArrayRead(xin,&xbase);
282: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
283: return 0;
284: }
285: #endif
287: /* ----------------------------------------------------------------------------*/
288: PetscErrorCode VecMTDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
289: {
290: PetscInt n = xin->map->n,i,j,nv_rem,j_rem;
291: PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3;
292: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
293: Vec *yy;
295: sum0 = 0.;
296: sum1 = 0.;
297: sum2 = 0.;
299: i = nv;
300: nv_rem = nv&0x3;
301: yy = (Vec*)yin;
302: j = n;
303: VecGetArrayRead(xin,&xbase);
304: x = xbase;
306: switch (nv_rem) {
307: case 3:
308: VecGetArrayRead(yy[0],&yy0);
309: VecGetArrayRead(yy[1],&yy1);
310: VecGetArrayRead(yy[2],&yy2);
311: switch (j_rem=j&0x3) {
312: case 3:
313: x2 = x[2];
314: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
315: sum2 += x2*yy2[2];
316: case 2:
317: x1 = x[1];
318: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
319: sum2 += x1*yy2[1];
320: case 1:
321: x0 = x[0];
322: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
323: sum2 += x0*yy2[0];
324: case 0:
325: x += j_rem;
326: yy0 += j_rem;
327: yy1 += j_rem;
328: yy2 += j_rem;
329: j -= j_rem;
330: break;
331: }
332: while (j>0) {
333: x0 = x[0];
334: x1 = x[1];
335: x2 = x[2];
336: x3 = x[3];
337: x += 4;
339: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
340: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
341: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
342: j -= 4;
343: }
344: z[0] = sum0;
345: z[1] = sum1;
346: z[2] = sum2;
347: VecRestoreArrayRead(yy[0],&yy0);
348: VecRestoreArrayRead(yy[1],&yy1);
349: VecRestoreArrayRead(yy[2],&yy2);
350: break;
351: case 2:
352: VecGetArrayRead(yy[0],&yy0);
353: VecGetArrayRead(yy[1],&yy1);
354: switch (j_rem=j&0x3) {
355: case 3:
356: x2 = x[2];
357: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
358: case 2:
359: x1 = x[1];
360: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
361: case 1:
362: x0 = x[0];
363: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
364: case 0:
365: x += j_rem;
366: yy0 += j_rem;
367: yy1 += j_rem;
368: j -= j_rem;
369: break;
370: }
371: while (j>0) {
372: x0 = x[0];
373: x1 = x[1];
374: x2 = x[2];
375: x3 = x[3];
376: x += 4;
378: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
379: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
380: j -= 4;
381: }
382: z[0] = sum0;
383: z[1] = sum1;
385: VecRestoreArrayRead(yy[0],&yy0);
386: VecRestoreArrayRead(yy[1],&yy1);
387: break;
388: case 1:
389: VecGetArrayRead(yy[0],&yy0);
390: switch (j_rem=j&0x3) {
391: case 3:
392: x2 = x[2]; sum0 += x2*yy0[2];
393: case 2:
394: x1 = x[1]; sum0 += x1*yy0[1];
395: case 1:
396: x0 = x[0]; sum0 += x0*yy0[0];
397: case 0:
398: x += j_rem;
399: yy0 += j_rem;
400: j -= j_rem;
401: break;
402: }
403: while (j>0) {
404: sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
405: j -= 4; x+=4;
406: }
407: z[0] = sum0;
409: VecRestoreArrayRead(yy[0],&yy0);
410: break;
411: case 0:
412: break;
413: }
414: z += nv_rem;
415: i -= nv_rem;
416: yy += nv_rem;
418: while (i >0) {
419: sum0 = 0.;
420: sum1 = 0.;
421: sum2 = 0.;
422: sum3 = 0.;
423: VecGetArrayRead(yy[0],&yy0);
424: VecGetArrayRead(yy[1],&yy1);
425: VecGetArrayRead(yy[2],&yy2);
426: VecGetArrayRead(yy[3],&yy3);
427: x = xbase;
429: j = n;
430: switch (j_rem=j&0x3) {
431: case 3:
432: x2 = x[2];
433: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
434: sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
435: case 2:
436: x1 = x[1];
437: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
438: sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
439: case 1:
440: x0 = x[0];
441: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
442: sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
443: case 0:
444: x += j_rem;
445: yy0 += j_rem;
446: yy1 += j_rem;
447: yy2 += j_rem;
448: yy3 += j_rem;
449: j -= j_rem;
450: break;
451: }
452: while (j>0) {
453: x0 = x[0];
454: x1 = x[1];
455: x2 = x[2];
456: x3 = x[3];
457: x += 4;
459: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
460: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
461: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
462: sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
463: j -= 4;
464: }
465: z[0] = sum0;
466: z[1] = sum1;
467: z[2] = sum2;
468: z[3] = sum3;
469: z += 4;
470: i -= 4;
471: VecRestoreArrayRead(yy[0],&yy0);
472: VecRestoreArrayRead(yy[1],&yy1);
473: VecRestoreArrayRead(yy[2],&yy2);
474: VecRestoreArrayRead(yy[3],&yy3);
475: yy += 4;
476: }
477: VecRestoreArrayRead(xin,&xbase);
478: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
479: return 0;
480: }
482: PetscErrorCode VecMax_Seq(Vec xin,PetscInt *idx,PetscReal *z)
483: {
484: PetscInt i,j=0,n = xin->map->n;
485: PetscReal max,tmp;
486: const PetscScalar *xx;
488: VecGetArrayRead(xin,&xx);
489: if (!n) {
490: max = PETSC_MIN_REAL;
491: j = -1;
492: } else {
493: max = PetscRealPart(*xx++); j = 0;
494: for (i=1; i<n; i++) {
495: if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
496: }
497: }
498: *z = max;
499: if (idx) *idx = j;
500: VecRestoreArrayRead(xin,&xx);
501: return 0;
502: }
504: PetscErrorCode VecMin_Seq(Vec xin,PetscInt *idx,PetscReal *z)
505: {
506: PetscInt i,j=0,n = xin->map->n;
507: PetscReal min,tmp;
508: const PetscScalar *xx;
510: VecGetArrayRead(xin,&xx);
511: if (!n) {
512: min = PETSC_MAX_REAL;
513: j = -1;
514: } else {
515: min = PetscRealPart(*xx++); j = 0;
516: for (i=1; i<n; i++) {
517: if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
518: }
519: }
520: *z = min;
521: if (idx) *idx = j;
522: VecRestoreArrayRead(xin,&xx);
523: return 0;
524: }
526: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
527: {
528: PetscInt i,n = xin->map->n;
529: PetscScalar *xx;
531: VecGetArrayWrite(xin,&xx);
532: if (alpha == (PetscScalar)0.0) {
533: PetscArrayzero(xx,n);
534: } else {
535: for (i=0; i<n; i++) xx[i] = alpha;
536: }
537: VecRestoreArrayWrite(xin,&xx);
538: return 0;
539: }
541: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
542: {
543: PetscInt n = xin->map->n,j,j_rem;
544: const PetscScalar *yy0,*yy1,*yy2,*yy3;
545: PetscScalar *xx,alpha0,alpha1,alpha2,alpha3;
547: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
548: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
549: #endif
551: PetscLogFlops(nv*2.0*n);
552: VecGetArray(xin,&xx);
553: switch (j_rem=nv&0x3) {
554: case 3:
555: VecGetArrayRead(y[0],&yy0);
556: VecGetArrayRead(y[1],&yy1);
557: VecGetArrayRead(y[2],&yy2);
558: alpha0 = alpha[0];
559: alpha1 = alpha[1];
560: alpha2 = alpha[2];
561: alpha += 3;
562: PetscKernelAXPY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
563: VecRestoreArrayRead(y[0],&yy0);
564: VecRestoreArrayRead(y[1],&yy1);
565: VecRestoreArrayRead(y[2],&yy2);
566: y += 3;
567: break;
568: case 2:
569: VecGetArrayRead(y[0],&yy0);
570: VecGetArrayRead(y[1],&yy1);
571: alpha0 = alpha[0];
572: alpha1 = alpha[1];
573: alpha +=2;
574: PetscKernelAXPY2(xx,alpha0,alpha1,yy0,yy1,n);
575: VecRestoreArrayRead(y[0],&yy0);
576: VecRestoreArrayRead(y[1],&yy1);
577: y +=2;
578: break;
579: case 1:
580: VecGetArrayRead(y[0],&yy0);
581: alpha0 = *alpha++;
582: PetscKernelAXPY(xx,alpha0,yy0,n);
583: VecRestoreArrayRead(y[0],&yy0);
584: y +=1;
585: break;
586: }
587: for (j=j_rem; j<nv; j+=4) {
588: VecGetArrayRead(y[0],&yy0);
589: VecGetArrayRead(y[1],&yy1);
590: VecGetArrayRead(y[2],&yy2);
591: VecGetArrayRead(y[3],&yy3);
592: alpha0 = alpha[0];
593: alpha1 = alpha[1];
594: alpha2 = alpha[2];
595: alpha3 = alpha[3];
596: alpha += 4;
598: PetscKernelAXPY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
599: VecRestoreArrayRead(y[0],&yy0);
600: VecRestoreArrayRead(y[1],&yy1);
601: VecRestoreArrayRead(y[2],&yy2);
602: VecRestoreArrayRead(y[3],&yy3);
603: y += 4;
604: }
605: VecRestoreArray(xin,&xx);
606: return 0;
607: }
609: #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>
611: PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)
612: {
613: PetscInt n = yin->map->n;
614: PetscScalar *yy;
615: const PetscScalar *xx;
617: if (alpha == (PetscScalar)0.0) {
618: VecCopy(xin,yin);
619: } else if (alpha == (PetscScalar)1.0) {
620: VecAXPY_Seq(yin,alpha,xin);
621: } else if (alpha == (PetscScalar)-1.0) {
622: PetscInt i;
623: VecGetArrayRead(xin,&xx);
624: VecGetArray(yin,&yy);
626: for (i=0; i<n; i++) yy[i] = xx[i] - yy[i];
628: VecRestoreArrayRead(xin,&xx);
629: VecRestoreArray(yin,&yy);
630: PetscLogFlops(1.0*n);
631: } else {
632: VecGetArrayRead(xin,&xx);
633: VecGetArray(yin,&yy);
634: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
635: {
636: PetscScalar oalpha = alpha;
637: fortranaypx_(&n,&oalpha,xx,yy);
638: }
639: #else
640: {
641: PetscInt i;
643: for (i=0; i<n; i++) yy[i] = xx[i] + alpha*yy[i];
644: }
645: #endif
646: VecRestoreArrayRead(xin,&xx);
647: VecRestoreArray(yin,&yy);
648: PetscLogFlops(2.0*n);
649: }
650: return 0;
651: }
653: #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
654: /*
655: IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
656: to be slower than a regular C loop. Hence,we do not include it.
657: void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
658: */
660: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha,Vec xin,Vec yin)
661: {
662: PetscInt i,n = win->map->n;
663: PetscScalar *ww;
664: const PetscScalar *yy,*xx;
666: VecGetArrayRead(xin,&xx);
667: VecGetArrayRead(yin,&yy);
668: VecGetArray(win,&ww);
669: if (alpha == (PetscScalar)1.0) {
670: PetscLogFlops(n);
671: /* could call BLAS axpy after call to memcopy, but may be slower */
672: for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
673: } else if (alpha == (PetscScalar)-1.0) {
674: PetscLogFlops(n);
675: for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
676: } else if (alpha == (PetscScalar)0.0) {
677: PetscArraycpy(ww,yy,n);
678: } else {
679: PetscScalar oalpha = alpha;
680: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
681: fortranwaxpy_(&n,&oalpha,xx,yy,ww);
682: #else
683: for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
684: #endif
685: PetscLogFlops(2.0*n);
686: }
687: VecRestoreArrayRead(xin,&xx);
688: VecRestoreArrayRead(yin,&yy);
689: VecRestoreArray(win,&ww);
690: return 0;
691: }
693: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal *max)
694: {
695: PetscInt n = xin->map->n,i;
696: const PetscScalar *xx,*yy;
697: PetscReal m = 0.0;
699: VecGetArrayRead(xin,&xx);
700: VecGetArrayRead(yin,&yy);
701: for (i = 0; i < n; i++) {
702: if (yy[i] != (PetscScalar)0.0) {
703: m = PetscMax(PetscAbsScalar(xx[i]/yy[i]), m);
704: } else {
705: m = PetscMax(PetscAbsScalar(xx[i]), m);
706: }
707: }
708: VecRestoreArrayRead(xin,&xx);
709: VecRestoreArrayRead(yin,&yy);
710: MPIU_Allreduce(&m,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));
711: PetscLogFlops(n);
712: return 0;
713: }
715: PetscErrorCode VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
716: {
717: Vec_Seq *v = (Vec_Seq*)vin->data;
720: v->unplacedarray = v->array; /* save previous array so reset can bring it back */
721: v->array = (PetscScalar*)a;
722: return 0;
723: }
725: PetscErrorCode VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
726: {
727: Vec_Seq *v = (Vec_Seq*)vin->data;
729: PetscFree(v->array_allocated);
730: v->array_allocated = v->array = (PetscScalar*)a;
731: return 0;
732: }