static double sv2[2*LOC]; static double rv2[2*LOC]; static double sv4[4*LOC]; static double rv4[4*LOC]; /********************************************** * FUNCTION: sub2 -- subtract two 2-vectors * * INPUTS: r, a, b (r = a-b) * **********************************************/ void sub2 (r, a, b) struct v2 *r, *a, *b; { r->el[0] = a->el[0] - b->el[0]; r->el[1] = a->el[1] - b->el[1]; } /* end of function sub2 */ /************************************************* * FUNCTION: sub2_4 -- subtract two 4-vectors * * INPUTS: k, t, u = k-t * *************************************************/ void sub2_4 (u, k, t) struct v4 *u, *k, *t; { sub2(&u->el[0], &k->el[0], &t->el[0]); sub2(&u->el[1], &k->el[1], &t->el[1]); } /* end of function sub2_4 */ /*********************************************** * FUNCTION: subkv2 -- subtract two kvec2's * * INPUTS: U(=A-B), A, B * ***********************************************/ void subkv2 (U, A, B, xm) struct kvec2 *U, *A, *B; int xm; { int i; for (i = 0; i < xm; i++) sub2(&U->el[i], &A->el[i], &B->el[i]); } /* end of function subkv2 */ /*********************************************** * FUNCTION: subkv4 -- subtract two kvec4's * * INPUTS: U(=A-B), A, B * ***********************************************/ void subkv4 (U, A, B, xm) struct kvec4 *U, *A, *B; int xm; { int i; for (i = 0; i < xm; i++) sub2_4(&U->el[i], &A->el[i], &B->el[i]); } /* end of function subkv4 */ /************************************************************ * FUNCTION: bmult2 -- multiply a b-matrix by a 2-vector * * INPUTS: B = b-matrix * * v = 2-vector * * r = Bv * ************************************************************/ void bmult2 (r, B, v) struct v2 *r, *B, *v; { r->el[0] = B->el[0] * v->el[0]; r->el[1] = B->el[1] * v->el[0]; } /* end of function bmult2 */ /************************************************************ * FUNCTION: cmult2 -- multiply a c-matrix by a 2-vector * * INPUTS: C = c-matrix * * v = 2-vector * * r = Cv * ************************************************************/ void cmult2 (r, C, v) struct v2 *r, *C, *v; { r->el[0] = C->el[0] * v->el[1]; r->el[1] = C->el[1] * v->el[1]; } /* end of function cmult2 */ /*************************************************************** * FUNCTION: mult22_2 -- multiply a 2x2 matrix by a 2-vector * * INPUTS: A = 2x2 matrix * * x = 2-vector * * r = Ax * ***************************************************************/ void mult22_2 (r, A, x) struct m22 *A; struct v2 *r, *x; { r->el[0] = A->el[0][0]*x->el[0] + A->el[0][1]*x->el[1]; r->el[1] = A->el[1][0]*x->el[0] + A->el[1][1]*x->el[1]; } /* end of function mult22_2 */ /***************************************** * FUNCTION: add2 -- add two 2-vectors * * INPUTS: A, B * * OUTPUT: R = A + B * *****************************************/ void add2 (R, A, B) struct v2 *R, *A, *B; { R->el[0] = A->el[0] + B->el[0]; R->el[1] = A->el[1] + B->el[1]; } /* end of function add2 */ /**************************************************************** * FUNCTION: mult_M24 -- multiplies a 24-matrix by a u-vector * * INPUTS: M = 24-matrix * * u = u-vector * * r = Mu * ****************************************************************/ void mult_M24 (r, M, u, xm) struct triblock24 *M; struct kvec2 *r; struct kvec4 *u; int xm; { struct v2 temp1, temp2, temp3; int i, im1, xmm1=xm-1, xmm2=xm-2; mult22_2(&temp1, &M->a[0].el[0], &u->el[0].el[0]); mult22_2(&temp2, &M->a[0].el[1], &u->el[0].el[1]); add2(&temp1, &temp1, &temp2); mult22_2(&temp2, &M->b[0], &u->el[1].el[0]); add2(&r->el[0], &temp1, &temp2); for (i = 1; i <= xmm2; i++) { im1 = i-1; mult22_2(&temp1, &M->c[im1], &u->el[im1].el[1]); mult22_2(&temp2, &M->a[i].el[0], &u->el[i].el[0]); add2(&temp3, &temp1, &temp2); mult22_2(&temp1, &M->a[i].el[1], &u->el[i].el[1]); mult22_2(&temp2, &M->b[i], &u->el[i+1].el[0]); add2(&temp1, &temp1, &temp2); add2(&r->el[i], &temp1, &temp3); } mult22_2(&temp1, &M->c[xmm2], &u->el[xmm2].el[1]); mult22_2(&temp2, &M->a[xmm1].el[0], &u->el[xmm1].el[0]); add2(&temp1, &temp1, &temp2); mult22_2(&temp2, &M->a[xmm1].el[1], &u->el[xmm1].el[1]); add2(&r->el[xmm1], &temp1, &temp2); } /* end of function mult_M24 */ /**************************************************************** * FUNCTION: mult44_4 -- multiply a 4x4 matrix by a 4-vector * * INPUTS: M = 4x4 matrix * * v = 4-vector * * u = Mv * ****************************************************************/ void mult44_4 (u, M, v) struct m44 *M; struct v4 *u, *v; { struct v2 temp1, temp2; mult22_2(&temp1, &M->A, &v->el[0]); mult22_2(&temp2, &M->B, &v->el[1]); add2(&u->el[0], &temp1, &temp2); mult22_2(&temp1, &M->C, &v->el[0]); mult22_2(&temp2, &M->D, &v->el[1]); add2(&u->el[1], &temp1, &temp2); } /* end of function mult44_4 */ /****************************************************************** * FUNCTION: bmult2_4 -- multiply a 4x4 b-matrix by a 4-vector * * INPUTS: B = 4x4 c matrix * * v = 4-vector * * u = Cv * ******************************************************************/ void bmult2_4 (u, B, v) struct v4 *u, *v; struct m42 *B; { mult22_2(&u->el[0], &B->el[0], &v->el[0]); mult22_2(&u->el[1], &B->el[1], &v->el[0]); } /* end of function bmult2_4 */ /***************************************** * FUNCTION: add4 -- add two 4-vectors * * INPUTS: A, B * * OUTPUT: R = A + B * *****************************************/ void add4 (R, A, B) struct v4 *R, *A, *B; { add2(&R->el[0], &A->el[0], &B->el[0]); add2(&R->el[1], &A->el[1], &B->el[1]); } /* end of function add4 */ /****************************************************************** * FUNCTION: cmult2_4 -- multiply a 4x4 c-matrix by a 4-vector * * INPUTS: C = 4x4 c matrix * * v = 4-vector * * u = Cv * ******************************************************************/ void cmult2_4 (u, C, v) struct v4 *u, *v; struct m42 *C; { mult22_2(&u->el[0], &C->el[0], &v->el[1]); mult22_2(&u->el[1], &C->el[1], &v->el[1]); } /* end of function cmult2_4 */ /**************************************************** * FUNCTION: seam -- "adds" together two vectors * * INPUTS: C = A+B * ****************************************************/ void seam (C, A, B, xm) struct kvec4 *C; struct kvec2 *A, *B; int xm; { int i, j; for (i = 0; i < xm; i++) for (j = 0; j < 2; j++) { C->el[i].el[j].el[0] = A->el[i].el[j]; C->el[i].el[j].el[1] = B->el[i].el[j]; } } /* end of function seam */ /********************************************************************* * FUNCTION: mult_M44 -- multiplies a triblock44 matrix by a kvec4 * * INPUTS: M = triblock44 matrix * * u = kvec4 vector * * r = Mu * *********************************************************************/ void mult_M44 (r, M, u, xm) struct triblock44 *M; struct kvec4 *r, *u; int xm; { struct v4 temp1, temp2, temp3; int i, im1, xmm1=xm-1, xmm2=xm-2; mult44_4(&temp1, &M->a[0], &u->el[0]); bmult2_4(&temp2, &M->b[0], &u->el[1]); add4(&r->el[0], &temp1, &temp2); for (i = 1; i <= xmm2; i++) { im1 = i-1; cmult2_4(&temp1, &M->c[im1], &u->el[im1]); mult44_4(&temp2, &M->a[i], &u->el[i]); add4(&temp3, &temp1, &temp2); bmult2_4(&temp2, &M->b[i], &u->el[i+1]); add4(&r->el[i], &temp3, &temp2); } cmult2_4(&temp1, &M->c[xmm2], &u->el[xmm2]); mult44_4(&temp2, &M->a[xmm1], &u->el[xmm1]); add4(&r->el[xmm1], &temp1, &temp2); } /* end of function mult_M44 */ /****************************************** * FUNCTION: addkv4 -- add two kvec4's * * INPUTS: U(=A+B), A, B * ******************************************/ void addkv4 (U, A, B, xm) struct kvec4 *U, *A, *B; int xm; { int i, j; for (i = 0; i < xm; i++) for (j = 0; j < 2; j++) add2(&U->el[i].el[j], &A->el[i].el[j], &B->el[i].el[j]); } /* end of function addkv4 */ /********************************************************************************* * FUNCTION: mult_tri2xu -- multiplies a triblock22x matrix by a kvec2x vector * * INPUTS: M = triblock22x matrix * * u = kvec2x vector * * r = Mu * *********************************************************************************/ void mult_tri2xu (r, M, u, xm) struct triblock22 *M; struct kvec2 *r, *u; int xm; { struct v2 temp1, temp2, temp3; int i, im1, xmm1=xm-1, xmm2=xm-2; mult22_2(&temp1, &M->a[0], &u->el[0]); bmult2(&temp2, &M->b[0], &u->el[1]); add2(&r->el[0], &temp1, &temp2); for (i = 1; i <= xmm2; i++) { im1 = i-1; cmult2(&temp1, &M->c[im1], &u->el[im1]); mult22_2(&temp2, &M->a[i], &u->el[i]); add2(&temp3, &temp1, &temp2); bmult2(&temp2, &M->b[i], &u->el[i+1]); add2(&r->el[i], &temp3, &temp2); } cmult2(&temp1, &M->c[xmm2], &u->el[xmm2]); mult22_2(&temp2, &M->a[xmm1], &u->el[xmm1]); add2(&r->el[xmm1], &temp1, &temp2); } /* end of function mult_tri2xu */ /****************************************** * FUNCTION: addkv2 -- add two kvec2's * * INPUTS: U(=A+B), A, B * ******************************************/ void addkv2 (U, A, B, xm) struct kvec2 *U, *A, *B; int xm; { int i; for (i = 0; i < xm; i++) add2(&U->el[i], &A->el[i], &B->el[i]); } /* end of function addkv2 */ /**************************************************************** * FUNCTION: mult_M42 -- multiplies a 42-matrix by a u-vector * * INPUTS: M = 42-matrix * * u = u-vector * * r = Mu * ****************************************************************/ void mult_M42 (r, M, u, xm) struct triblock22 *M; struct kvec2 *r, *u; int xm; { struct v2 temp1, temp2, temp3; int i, im1, xmm1=xm-1, xmm2=xm-2; mult22_2(&temp1, &M->a[0], &u->el[0]); bmult2(&temp2, &M->b[0], &u->el[1]); add2(&r->el[0], &temp1, &temp2); for (i = 1; i <= xmm2; i++) { im1 = i-1; cmult2(&temp1, &M->c[im1], &u->el[im1]); mult22_2(&temp2, &M->a[i], &u->el[i]); add2(&temp3, &temp1, &temp2); bmult2(&temp2, &M->b[i], &u->el[i+1]); add2(&r->el[i], &temp3, &temp2); } cmult2(&temp1, &M->c[xmm2], &u->el[xmm2]); mult22_2(&temp2, &M->a[xmm1], &u->el[xmm1]); add2(&r->el[xmm1], &temp1, &temp2); } /* end of function mult_M42 */ /************************************************************************ * FUNCTION: dot_collvec --determine the dot product of two collvecs * * INPUTS: A, B * * OUTPUT: A (dot) B * ************************************************************************/ double dot_collvec (A, B, xm, ym) struct collvec *A, *B; int xm, ym; { int i, j, k, l; double sum = 0; for (i = 0; i < xm; i++) for (j = 0; j < 2; j++) { sum += A->F[0].el[i].el[j] * B->F[0].el[i].el[j]; sum += A->L[0].el[i].el[j] * B->L[0].el[i].el[j]; } for (i = 0; i <= ym-2; i++) for (j = 0; j < xm; j++) for (k = 0; k < 2; k++) for (l = 0; l < 2; l++) sum += A->M[i].el[j].el[k].el[l] * B->M[i].el[j].el[k].el[l]; return(sum); } /* end of function dot_collvec */ /***************************************************************************** * FUNCTION: dot_mat_kvec4 --determine the dot product of two mat_kvec4's * * INPUTS: A, B * * OUTPUT: A (dot) B * *****************************************************************************/ double dot_mat_kvec4 (A, B, xm, ym) struct mat_kvec4 *A, *B; int xm, ym; { int i, j, k, l; double sum = 0; for (i = 0; i < ym; i++) for (j = 0; j < xm; j++) for (k = 0; k < 2; k++) for (l = 0; l < 2; l++) sum += A->el[i].el[j].el[k].el[l] * B->el[i].el[j].el[k].el[l]; return(sum); } /* end of function dot_mat_kvec4 */ /************************************************* * FUNCTION: saxpy_mat_kvec4 --determine ax+y * * INPUTS: A, B * * OUTPUT: A (dot) B * *************************************************/ void saxpy_mat_kvec4 (r, a, x, y, xm, ym) struct mat_kvec4 *r, *x, *y; double a; int xm, ym; { int i, j, k, l; double sum = 0; for (i = 0; i < ym; i++) for (j = 0; j < xm; j++) for (k = 0; k < 2; k++) for (l = 0; l < 2; l++) r->el[i].el[j].el[k].el[l] = a*x->el[i].el[j].el[k].el[l] + y->el[i].el[j].el[k].el[l]; } /* end of function saxpy_mat_kvec4 */ /********************************************************** * FUNCTION: scmultkv2 -- multiply a kvec2 by a scalar * * INPUTS: U(=kA), k, A * **********************************************************/ void scmultkv2 (U, k, A, xm) struct kvec2 *U, *A; double k; int xm; { int i, j; for (i = 0; i < xm; i++) for (j = 0; j < 2; j++) U->el[i].el[j] = k * A->el[i].el[j]; } /* end of function scmultkv2 */ /********************************************************** * FUNCTION: scmultkv4 -- multiply a kvec4 by a scalar * * INPUTS: U(=kA), c, A * **********************************************************/ void scmultkv4 (U, c, A, xm) struct kvec4 *U, *A; double c; int xm; { int i, j, k; for (i = 0; i < xm; i++) for (j = 0; j < 2; j++) for (k = 0; k < 2; k++) U->el[i].el[j].el[k] = c * A->el[i].el[j].el[k]; } /* end of function scmultkv4 */ /***************************************************************** * FUNCTION: scmult_collvec -- multiply a collvec by a scalar * * INPUTS: A,B = collvecs (B = cA) * * c = scalar * *****************************************************************/ void scmult_collvec (B, A, c, xm, ym) struct collvec *B, *A; double c; int ym; { int i; scmultkv2(&B->F[0], c, &A->F[0], xm); scmultkv2(&B->L[0], c, &A->L[0], xm); for (i = 0; i <= ym-2; i++) scmultkv4(&B->M[i], c, &A->M[i], xm); } /* end of function scmult_collvec */ /********************************************************* * FUNCTION: add_collvec -- add two collvecs together * * INPUTS: C = A+B * *********************************************************/ void add_collvec (C, A, B, xm, ym) struct collvec *A, *B, *C; int xm, ym; { int i; addkv2(&C->F[0], &A->F[0], &B->F[0], xm); addkv2(&C->L[0], &A->L[0], &B->L[0], xm); for (i = 0; i <= ym-2; i++) addkv4(&C->M[i], &A->M[i], &B->M[i], xm); } /* end of function add_collvec */ /******************************************************* * FUNCTION: saxpy_collvec -- compute r=ax+y, where * * x and y are collvecs * * a = scalar * *******************************************************/ void saxpy_collvec (r, a, x, y, xm, ym) struct collvec *r, *x, *y; double a; int xm, ym; { struct collvec tmp; dcollvec(&tmp, xm, ym); scmult_collvec(&tmp, x, a, xm, ym); add_collvec(r, &tmp, y, xm, ym); free_collvec(&tmp, ym); } /* end of function saxpy_collvec */ /******************************************** * FUNCTION: blank -- prints blank lines * * INPUT: n = number of blank lines * ********************************************/ void blank(n) int n; { int i; for (i = 0; i < n; i++) printf("\n"); } /* end of function blank */ /************************************************************* * FUNCTION: print22 -- prints a 2x2 matrix to the screen * * INPUT: A = 2x2 matrix to be printed * *************************************************************/ void print22 (A) struct m22 *A; { int i, j; for (i = 0; i < 2; i++) { blank(1); for (j = 0; j < 2; j++) printf("%e ", A->el[i][j]); } blank(2); } /* end of function print22 */ /************************************************************ * FUNCTION: inv22 -- determines inverse of a 2x2 matrix * * INPUTS: A = 2x2 matrix whose inverse we compute * * B = 2x2 matrix computed s.t. B = inv(A) * ************************************************************/ void inv22 (B, A) struct m22 *B, *A; { double det; det = A->el[0][0]*A->el[1][1] - A->el[1][0]*A->el[0][1]; if (det == 0.0) { printf("\n\nSingular 2x2 matrix\n"); print22(A); printf("exiting from function inv22 ...\n\n"); exit(0); } B->el[0][0] = A->el[1][1] / det; B->el[1][1] = A->el[0][0] / det; B->el[0][1] = -A->el[0][1] / det; B->el[1][0] = -A->el[1][0] / det; } /* end of function inv22 */ /************************************************************ * FUNCTION: mult22 -- multiply two 2x2 matrices together * * INPUTS: A, B * * OUTPUT: R = AB * ************************************************************/ void mult22 (R, A, B) struct m22 *R, *A, *B; { R->el[0][0] = A->el[0][0]*B->el[0][0] + A->el[0][1]*B->el[1][0]; R->el[0][1] = A->el[0][0]*B->el[0][1] + A->el[0][1]*B->el[1][1]; R->el[1][0] = A->el[1][0]*B->el[0][0] + A->el[1][1]*B->el[1][0]; R->el[1][1] = A->el[1][0]*B->el[0][1] + A->el[1][1]*B->el[1][1]; } /* end of function mult22 */ /************************************************** * FUNCTION: sub22 -- subtract two 2x2 matrices * * INPUTS: A, B * * OUTPUT: R = A - B * **************************************************/ void sub22 (R, A, B) struct m22 *R, *A, *B; { R->el[0][0] = A->el[0][0] - B->el[0][0]; R->el[0][1] = A->el[0][1] - B->el[0][1]; R->el[1][0] = A->el[1][0] - B->el[1][0]; R->el[1][1] = A->el[1][1] - B->el[1][1]; } /* end of function sub22 */ /*************************************************************** * FUNCTION: opp22 -- produces the opposite of a 2x2 matrix * * INPUTS: A = given 2x2 matrix * * B = -A * ***************************************************************/ void opp22 (B, A) struct m22 *B, *A; { int i, j; for (i = 0; i < 2; i++) for (j = 0; j < 2; j++) B->el[i][j] = -A->el[i][j]; } /* end of function opp22 */ /********************************************* * FUNCTION: add22 -- add two 2x2 matrices * * INPUTS: A, B * * OUTPUT: R = A + B * *********************************************/ void add22 (R, A, B) struct m22 *R, *A, *B; { R->el[0][0] = A->el[0][0] + B->el[0][0]; R->el[0][1] = A->el[0][1] + B->el[0][1]; R->el[1][0] = A->el[1][0] + B->el[1][0]; R->el[1][1] = A->el[1][1] + B->el[1][1]; } /* end of function add22 */ /********************************************************* * FUNCTION: solve22 -- solves the 2x2 matrix equation * * Au = k * * INPUTS: A, u, k * *********************************************************/ void solve22 (u, A, k) struct m22 *A; struct v2 *u, *k; { double a,b,c,d,e,f, det; a = A->el[0][0]; b = A->el[0][1]; c = A->el[1][0]; d = A->el[1][1]; e = k->el[0]; f = k->el[1]; det = a*d - b*c; u->el[0] = (d*e - b*f) / det; u->el[1] = (a*f - c*e) / det; } /* end of function solve22 */ /*********************************************************** * FUNCTION: cmultb -- multiply a c-matrix by a b-matrix * * INPUTS: C = c-matrix * * B = b-matrix * * R = c*b * ***********************************************************/ void cmultb (R, C, B) struct v2 *R, *C, *B; { R->el[0] = C->el[0] * B->el[1]; R->el[1] = C->el[1] * B->el[1]; } /* end of function cmultb */ /**************************************************************** * FUNCTION: subb -- subtract a b-matrix from a dense matrix * * INPUTS: A = 2x2 matrix * * B = 2-vector (matrix) * * OUTPUT: R = 2x2 matrix A-B * ****************************************************************/ void subb (R, A, B) struct m22 *R, *A; struct v2 *B; { R->el[0][0] = A->el[0][0] - B->el[0]; R->el[0][1] = A->el[0][1]; R->el[1][0] = A->el[1][0] - B->el[1]; R->el[1][1] = A->el[1][1]; } /* end of function subb */ /********************************************** * FUNCTION: inv44 -- inverts a 4x4 matrix * * INPUTS: M = given matrix to be inverted * * V = inv(M) (i.e., we compute V) * **********************************************/ void inv44 (V, M) struct m44 *V, *M; { struct m22 Ainv, Binv, Cinv, Dinv, temp1, temp2, Astar, Bstar, Cstar, Dstar, Astarinv, Bstarinv, Cstarinv, Dstarinv; /* get Ainv and Dinv */ inv22(&Ainv, &M->A); inv22(&Dinv, &M->D); /* get Schur complements */ mult22(&temp1, &M->B, &Dinv); mult22(&temp2, &temp1, &M->C); sub22(&temp1, &M->A, &temp2); inv22(&V->A, &temp1); mult22(&temp1, &M->C, &Ainv); mult22(&temp2, &temp1, &M->B); sub22(&temp1, &M->D, &temp2); inv22(&V->D, &temp1); /* get rest of V */ mult22(&temp1, &Dinv, &M->C); opp22(&temp2, &temp1); mult22(&V->C, &temp2, &V->A); mult22(&temp1, &Ainv, &M->B); opp22(&temp2, &temp1); mult22(&V->B, &temp2, &V->D); } /* end of function inv44 */ /************************************************************** * FUNCTION: multb4 -- multiply a 4x4 matrix by a b matrix * * INPUTS: M = 4x4 dense matrix * * b = 4x4 b matrix * * u = Mb * **************************************************************/ void multb4 (u, M, b) struct m44 *M; struct m42 *u, *b; { struct m22 temp1, temp2; mult22(&temp1, &M->A, &b->el[0]); mult22(&temp2, &M->B, &b->el[1]); add22(&u->el[0], &temp1, &temp2); mult22(&temp1, &M->C, &b->el[0]); mult22(&temp2, &M->D, &b->el[1]); add22(&u->el[1], &temp1, &temp2); } /* end of function multb4 */ /********************************************************************* * FUNCTION: cmultb4 -- multiply a 4x4 c matrix by a 4x4 b matrix * * INPUTS: c, b, u = cb * *********************************************************************/ void cmultb4 (u, c, b) struct m42 *u, *c, *b; { mult22(&u->el[0], &c->el[0], &b->el[1]); mult22(&u->el[1], &c->el[1], &b->el[1]); } /* end of function cmultb4 */ /************************************************************************* * FUNCTION: subb4 -- subtract a 4x4 b matrix from a 4x4 dense matrix * * INPUTS: M = 4x4 dense matrix * * b = 4x4 b matrix * * U = M-b * *************************************************************************/ void subb4 (U, M, b) struct m42 *b; struct m44 *M, *U; { sub22(&U->A, &M->A, &b->el[0]); sub22(&U->C, &M->C, &b->el[1]); U->B = M->B; U->D = M->D; } /* end of function subb4 */ /*********************************************************************** * FUNCTION: tridiag2 -- solves the 1-d collocation matrix equation * * Au = k * * INPUTS: A = block tridiagonal matrix; each block is a 2x2 matrix * * k = known RHS vector * * u = solution vector * ***********************************************************************/ void tridiag2 (u, A, k, xm) struct triblock22 *A; struct kvec2 *u, *k; int xm; { struct v2 t2; struct m22 bet, inv; struct gvec2 g; int j, jp1, xmm2=xm-2; dgvec2(&g, xm); /************************* * forward elimination * *************************/ bet = A->a[0]; solve22(&u->el[0], &bet, &k->el[0]); for (j = 0; j <= xmm2; j++) { jp1 = j+1; solve22(&g.el[j], &bet, &A->b[j]); cmultb(&t2, &A->c[j], &g.el[j]); subb(&bet, &A->a[jp1], &t2); cmult2(&t2, &A->c[j], &u->el[j]); sub2(&t2, &k->el[jp1], &t2); solve22(&u->el[jp1], &bet, &t2); } /*********************** * back substitution * ***********************/ for (j = xmm2; j >= 0; j--) { bmult2(&t2, &g.el[j], &u->el[j+1]); sub2(&u->el[j], &u->el[j], &t2); } free_gvec2(&g); } /* end of function tridiag2 */ /*********************************************************************** * FUNCTION: tridiag4 -- solves the matrix equation * * Au = k * * INPUTS: A = block tridiagonal matrix; each block is a 4x4 matrix * * k = known RHS vector * * u = solution vector * ***********************************************************************/ void tridiag4 (u, A, k, xm) struct triblock44 *A; struct kvec4 *u, *k; int xm; { struct m44 bet, inv; struct gvec4 g; struct m42 temp42; struct v4 temp4; int j, jp1, xmm2=xm-2; dgvec4(&g, xm); /************************* * forward elimination * *************************/ inv44(&inv, &A->a[0]); mult44_4(&u->el[0], &inv, &k->el[0]); for (j = 0; j <= xmm2; j++) { jp1 = j+1; multb4(&g.el[j], &inv, &A->b[j]); cmultb4(&temp42, &A->c[j], &g.el[j]); subb4(&bet, &A->a[jp1], &temp42); inv44(&inv, &bet); cmult2_4(&temp4, &A->c[j], &u->el[j]); sub2_4(&temp4, &k->el[jp1], &temp4); mult44_4(&u->el[jp1], &inv, &temp4); } /*********************** * back substitution * ***********************/ for (j = xmm2; j >= 0; j--) { bmult2_4(&temp4, &g.el[j], &u->el[j+1]); sub2_4(&u->el[j], &u->el[j], &temp4); } free_gvec4(&g); } /* end of function tridiag4 */ /********************************************************************** * mult_CU_send -- multiply a Cblock by a kvec4 and send the result * * to the next processor * **********************************************************************/ void mult_CU_send (C, U, loc, tp1) struct triblock24 *C; struct kvec4 *U; int loc, tp1; { struct kvec2 t; int i, j; dkvec2(&t, loc); mult_M24(&t, C, U, loc); pvm_initsend(code); for (i = 0; i < loc; i++) for (j = 0; j < 2; j++) pvm_pkdouble(&t.el[i].el[j], 1, 1); pvm_send(tp1, 3); free_kvec2(&t); } /* end of function mult_CU_send */ /****************************************************************** * FUNCTION: mult_CU_AU_BU -- * * compute R(i) = C(i-1)*U(i-1) + A(i)*U(i) + B(i)*U(i+1) * ******************************************************************/ void mult_CU_AU_BU (R, A, B, C, Um, U, Up, loc) struct kvec4 *R, *Um, *U, *Up; struct triblock24 *B, *C; struct triblock44 *A; int loc; { struct kvec2 t21, t22; struct kvec4 t41, t42; dkvec2(&t21, loc); dkvec2(&t22, loc); dkvec4(&t41, loc); dkvec4(&t42, loc); mult_M24(&t21, C, Um, loc); mult_M24(&t22, B, Up, loc); seam(&t41, &t21, &t22, loc); mult_M44(&t42, A, U, loc); addkv4(R, &t41, &t42, loc); free_kvec2(&t21); free_kvec2(&t22); free_kvec4(&t41); free_kvec4(&t42); } /* end of fucntion mult_CU_AU_BU */ /****************************************************** * FUNCTION: mult_CU_BU -- * * compute R(i) = C(i-1)*U(i-1) + B(i)*U(i+1) * ******************************************************/ void mult_CU_BU (R, B, C, Um, Up, loc) struct kvec4 *R, *Um, *Up; struct triblock24 *B, *C; int loc; { struct kvec2 t21, t22; dkvec2(&t21, loc); dkvec2(&t22, loc); mult_M24(&t21, C, Um, loc); mult_M24(&t22, B, Up, loc); seam(R, &t21, &t22, loc); free_kvec2(&t21); free_kvec2(&t22); } /* end of fucntion mult_CU_BU */ /********************************************** * FUNCTION: mult_CFUF_AU_BU -- * * compute R0 = CF*UF + A0*U0 + B0*U1 * **********************************************/ void mult_CFUF_AU_BU (R, A, B, CF, UF, U0, U1, loc) struct kvec4 *R, *U0, *U1; struct kvec2 *UF; struct triblock24 *B; struct triblock44 *A; struct triblock22 *CF; int loc; { struct kvec2 t21, t22; struct kvec4 t41, t42; dkvec2(&t21, loc); dkvec2(&t22, loc); dkvec4(&t41, loc); dkvec4(&t42, loc); mult_M42(&t21, CF, UF, loc); mult_M24(&t22, B, U1, loc); seam(&t41, &t21, &t22, loc); mult_M44(&t42, A, U0, loc); addkv4(R, &t41, &t42, loc); free_kvec2(&t21); free_kvec2(&t22); free_kvec4(&t41); free_kvec4(&t42); } /* end of fucntion mult_CFUF_AU_BU */ /************************************** * FUNCTION: mult_AFUF_BFU0 -- * * compute RF = AF*UF + BF*U0 * **************************************/ void mult_AFUF_BFU0 (R, AF, BF, UF, U0, loc) struct kvec2 *R, *UF; struct kvec4 *U0; struct triblock22 *AF; struct triblock24 *BF; int loc; { struct kvec2 t21, t22; dkvec2(&t21, loc); dkvec2(&t22, loc); mult_M24(&t21, BF, U0, loc); mult_tri2xu(&t22, AF, UF, loc); addkv2(R, &t21, &t22, loc); free_kvec2(&t21); free_kvec2(&t22); } /* end of function mult_AFUF_BFU0 */ /******************************** * FUNCTION: mult_CU_AU_BLUL * ********************************/ void mult_CU_AU_BLUL (R, A, BL, C, Um1, Um2, UL, loc) struct kvec4 *R, *Um1, *Um2; struct kvec2 *UL; struct triblock44 *A; struct triblock22 *BL; struct triblock24 *C; int loc; { struct kvec2 t21, t22; struct kvec4 t41, t42; dkvec2(&t21, loc); dkvec2(&t22, loc); dkvec4(&t41, loc); dkvec4(&t42, loc); mult_M24(&t21, C, Um2, loc); mult_M42(&t22, BL, UL, loc); seam(&t41, &t21, &t22, loc); mult_M44(&t42, A, Um1, loc); addkv4(R, &t41, &t42, loc); free_kvec2(&t21); free_kvec2(&t22); free_kvec4(&t41); free_kvec4(&t42); } /* end of function mult_CU_AU_BLUL */ /*************************************************************** * FUNCTION: mult_ALUL_recv -- add AL*UL to received vector * ***************************************************************/ void mult_ALUL_recv (R, AL, UL, loc, tm1) struct kvec2 *R, *UL; struct triblock22 *AL; int loc, tm1; { struct kvec2 t21, t22; int i, j; dkvec2(&t21, loc); dkvec2(&t22, loc); mult_tri2xu(&t22, AL, UL, loc); pvm_recv(tm1, 3); for (i = 0; i < loc; i++) for (j = 0; j < 2; j++) pvm_upkdouble(&t21.el[i].el[j], 1, 1); addkv2(R, &t21, &t22, loc); free_kvec2(&t21); free_kvec2(&t22); } /* end of function mult_ALUL_recv */ /*********************************************************************** * FUNCTION: mult_AU_BU_recv -- add A0*U0 + B0*U1 to received vector * ***********************************************************************/ void mult_AU_BU_recv (R, A, B, U0, U1, loc, tm1) struct kvec4 *R, *U0, *U1; struct triblock44 *A; struct triblock24 *B; int loc, tm1; { int i, j; struct kvec2 t21, t22; struct kvec4 t41, t42; dkvec2(&t21, loc); dkvec2(&t22, loc); dkvec4(&t41, loc); dkvec4(&t42, loc); mult_M44(&t42, A, U0, loc); mult_M24(&t22, B, U1, loc); pvm_recv(tm1, 3); for (i = 0; i < loc; i++) for (j = 0; j < 2; j++) pvm_upkdouble(&t21.el[i].el[j], 1, 1); seam(&t41, &t21, &t22, loc); addkv4(R, &t41, &t42, loc); free_kvec2(&t21); free_kvec2(&t22); free_kvec4(&t41); free_kvec4(&t42); } /* end of function mult_AU_BU_recv */ /************************************************************* * FUNCTION: mult_BU_recv -- add B0*U1 to received vector * *************************************************************/ void mult_BU_recv (R, B, U1, loc, tm1) struct kvec4 *R, *U1; struct triblock24 *B; int loc, tm1; { int i, j; struct kvec2 t21, t22; dkvec2(&t21, loc); dkvec2(&t22, loc); mult_M24(&t22, B, U1, loc); pvm_recv(tm1, 3); for (i = 0; i < loc; i++) for (j = 0; j < 2; j++) pvm_upkdouble(&t21.el[i].el[j], 1, 1); seam(R, &t21, &t22, loc); free_kvec2(&t21); free_kvec2(&t22); } /* end of function mult_BU_recv */ /********************************************** * FUNCTION: copy_kvec4 -- copies A into B * **********************************************/ void copy_kvec4 (B, A, loc) struct kvec4 *B, *A; int loc; { int i, j, k; for (i = 0; i < loc; i++) for (j = 0; j < 2; j++) for (k = 0; k < 2; k++) B->el[i].el[j].el[k] = A->el[i].el[j].el[k]; } /* end of function copy_kvec4 */ /********************************************** * FUNCTION: copy_kvec2 -- copies A into B * **********************************************/ void copy_kvec2 (B, A, loc) struct kvec2 *B, *A; int loc; { int i, j; for (i = 0; i < loc; i++) for (j = 0; j < 2; j++) B->el[i].el[j] = A->el[i].el[j]; } /* end of function copy_kvec2 */ /************************* * FUNCTION: psolve_M * *************************/ void psolve_M (Y, M, P, bpp, loc, tm1, tp1) struct collvec *Y, *P; struct first_proc_collmat *M; int bpp, loc, tm1, tp1; { int i, im1, ii, j, k, ctr, bppm3 = bpp-3; struct kvec2 t21, t22; struct kvec4 t41, t42; dkvec2(&t21, loc); dkvec2(&t22, loc); dkvec4(&t41, loc); dkvec4(&t42, loc); /********* * RED * *********/ tridiag2(&Y->L[0], &M->AL[0], &P->L[0], loc); ctr = -1; for (i = 0; i < loc; i++) for (j = 0; j < 2; j++) sv2[++ctr] = Y->L[0].el[i].el[j]; /*cshift_S2(&Y->L[0], loc, tm1);*/ tridiag2(&Y->F[0], &M->AF[0], &P->F[0], loc); for (i = 1; i <= bppm3; i += 2) tridiag4(&Y->M[i], &M->A[i], &P->M[i], loc); /*********** * BLACK * ***********/ mult_M42(&t21, &M->CF[0], &Y->F[0], loc); mult_M24(&t22, &M->B[0], &Y->M[1], loc); seam(&t41, &t21, &t22, loc); subkv4(&t42, &P->M[0], &t41, loc); tridiag4(&Y->M[0], &M->A[0], &t42, loc); for (i = 2; i <= bpp-4; i += 2) { im1 = i-1; mult_M24(&t21, &M->C[im1], &Y->M[im1], loc); mult_M24(&t22, &M->B[i], &Y->M[i+1], loc); seam(&t41, &t21, &t22, loc); subkv4(&t42, &P->M[i], &t41, loc); tridiag4(&Y->M[i], &M->A[i], &t42, loc); } i = bpp-2; im1 = bppm3; mult_M24(&t21, &M->C[im1], &Y->M[im1], loc); barrier(); shmem_get(rv4, sv4, 4*loc, tp1); ctr = -1; for (ii = 0; ii < loc; ii++) for (j = 0; j < 2; j++) for (k = 0; k < 2; k++) Y->M[bpp-1].el[ii].el[j].el[k] = rv4[++ctr]; /*cshift_R4(&Y->M[bpp-1], loc, tp1);*/ mult_M24(&t22, &M->B[i], &Y->M[i+1], loc); seam(&t41, &t21, &t22, loc); subkv4(&t42, &P->M[i], &t41, loc); tridiag4(&Y->M[i], &M->A[i], &t42, loc); free_kvec2(&t21); free_kvec2(&t22); free_kvec4(&t41); free_kvec4(&t42); } /* end of function psolve_M */ /************************* * FUNCTION: psolve_S * *************************/ void psolve_S (Y, M, P, bpp, loc, tm1, tp1) struct mat_kvec4 *Y, *P; struct proc_collmat *M; int bpp, loc, tm1, tp1; { int i, im1, ii, j, k, bppm2=bpp-2, ctr; struct kvec2 t21, t22; struct kvec4 t41, t42; dkvec2(&t21, loc); dkvec2(&t22, loc); dkvec4(&t41, loc); dkvec4(&t42, loc); /********* * RED * *********/ tridiag4(&Y->el[0], &M->A[0], &P->el[0], loc); ctr = -1; for (i = 0; i < loc; i++) for (j = 0; j < 2; j++) for (k = 0; k < 2; k++) sv4[++ctr] = Y->el[0].el[i].el[j].el[k]; /*cshift_S4(&Y->el[0], loc, tm1);*/ for (i = 2; i <= bppm2; i += 2) tridiag4(&Y->el[i], &M->A[i], &P->el[i], loc); /*********** * BLACK * ***********/ for (i = 1; i <= bpp-3; i += 2) { im1 = i-1; mult_M24(&t21, &M->C[im1], &Y->el[im1], loc); mult_M24(&t22, &M->B[i], &Y->el[i+1], loc); seam(&t41, &t21, &t22, loc); subkv4(&t42, &P->el[i], &t41, loc); tridiag4(&Y->el[i], &M->A[i], &t42, loc); } i = bpp-1; im1 = bppm2; mult_M24(&t21, &M->C[im1], &Y->el[im1], loc); barrier(); shmem_get(rv4, sv4, 4*loc, tp1); ctr = -1; for (ii = 0; ii < loc; ii++) for (j = 0; j < 2; j++) for (k = 0; k < 2; k++) Y->el[bpp].el[ii].el[j].el[k] = rv4[++ctr]; /*cshift_R4(&Y->el[bpp], loc, tp1);*/ mult_M24(&t22, &M->B[i], &Y->el[i+1], loc); seam(&t41, &t21, &t22, loc); subkv4(&t42, &P->el[i], &t41, loc); tridiag4(&Y->el[i], &M->A[i], &t42, loc); free_kvec2(&t21); free_kvec2(&t22); free_kvec4(&t41); free_kvec4(&t42); } /* end of function psolve_S */ /************************* * FUNCTION: psolve_LS * *************************/ void psolve_LS (Y, YL, LM, P, bpp, loc, tm1, tp1) struct mat_kvec4 *Y, *P; struct kvec2 *YL; struct last_proc_collmat *LM; int bpp, loc, tm1, tp1; { int i, im1, ii, j, k, bppm2=bpp-2, ctr; struct kvec2 t21, t22; struct kvec4 t41, t42; dkvec2(&t21, loc); dkvec2(&t22, loc); dkvec4(&t41, loc); dkvec4(&t42, loc); /********* * RED * *********/ tridiag4(&Y->el[0], &LM->A[0], &P->el[0], loc); ctr = -1; for (i = 0; i < loc; i++) for (j = 0; j < 2; j++) for (k = 0; k < 2; k++) sv4[++ctr] = Y->el[0].el[i].el[j].el[k]; /*cshift_S4(&Y->el[0], loc, tm1);*/ for (i = 2; i <= bppm2; i += 2) tridiag4(&Y->el[i], &LM->A[i], &P->el[i], loc); /*********** * BLACK * ***********/ for (i = 1; i <= bpp-3; i += 2) { im1 = i-1; mult_M24(&t21, &LM->C[im1], &Y->el[im1], loc); mult_M24(&t22, &LM->B[i], &Y->el[i+1], loc); seam(&t41, &t21, &t22, loc); subkv4(&t42, &P->el[i], &t41, loc); tridiag4(&Y->el[i], &LM->A[i], &t42, loc); } i = bpp-1; im1 = bppm2; mult_M24(&t21, &LM->C[im1], &Y->el[im1], loc); barrier(); shmem_get(rv2, sv2, 2*loc, tp1); ctr = -1; for (ii = 0; ii < loc; ii++) for (j = 0; j < 2; j++) YL->el[ii].el[j] = rv2[++ctr]; /*cshift_R2(YL, loc, tp1);*/ mult_M42(&t22, &LM->BL[0], YL, loc); seam(&t41, &t21, &t22, loc); subkv4(&t42, &P->el[i], &t41, loc); tridiag4(&Y->el[i], &LM->A[i], &t42, loc); free_kvec2(&t21); free_kvec2(&t22); free_kvec4(&t41); free_kvec4(&t42); } /* end of function psolve_LS */ /************************************************************************************ * FUNCTION: mult_collmat_collvec_trick_M -- computes v=M*y (using p) for master * ************************************************************************************/ void mult_collmat_collvec_trick_M (V, M, Y, P, bpp, loc, tm1, tp1) struct collvec *V, *Y, *P; struct first_proc_collmat *M; int bpp, loc, tm1, tp1; { int i, j, im1, bppm2=bpp-2, ctr; struct kvec2 kv21; dkvec2(&kv21, loc); mult_M24(&kv21, &M->C[bppm2], &Y->M[bppm2], loc); ctr = -1; for (i = 0; i < loc; i++) for (j = 0; j < 2; j++) sv2[++ctr] = kv21.el[i].el[j]; /*mult_CU_send(&M->C[bppm2], &Y->M[bppm2], loc, tp1);*/ for (i = 0; i <= bppm2; i += 2) copy_kvec4(&V->M[i], &P->M[i], loc); for (i = bpp-3; i >= 1; i -= 2) { im1 = i - 1; mult_CU_BU(&V->M[i], &M->B[i], &M->C[im1], &Y->M[im1], &Y->M[i+1], loc); addkv4(&V->M[i], &V->M[i], &P->M[i], loc); } mult_M24(&V->F[0], &M->BF[0], &Y->M[0], loc); addkv2(&V->F[0], &V->F[0], &P->F[0], loc); barrier(); shmem_get(rv2, sv2, 2*loc, tm1); ctr = -1; for (i = 0; i < loc; i++) for (j = 0; j < 2; j++) V->L[0].el[i].el[j] = rv2[++ctr]; /* pvm_recv(tm1, 3); for (i = 0; i < loc; i++) for (j = 0; j < 2; j++) pvm_upkdouble(&V->L[0].el[i].el[j], 1, 1); */ addkv2(&V->L[0], &V->L[0], &P->L[0], loc); free_kvec2(&kv21); } /* end of function mult_collmat_collvec_trick_M */ /*********************************************************************************** * FUNCTION: mult_collmat_collvec_trick_S -- computes v=M*y (using p) for slave * ***********************************************************************************/ void mult_collmat_collvec_trick_S (V, M, Y, P, bpp, loc, tm1, tp1) struct mat_kvec4 *V, *Y, *P; struct proc_collmat *M; int bpp, loc, tm1, tp1; { int i, im1, j, bppm1=bpp-1, ctr; struct kvec2 kv21, kv22; dkvec2(&kv21, loc); dkvec2(&kv22, loc); mult_M24(&kv21, &M->C[bppm1], &Y->el[bppm1], loc); ctr = -1; for (i = 0; i < loc; i++) for (j = 0; j < 2; j++) sv2[++ctr] = kv21.el[i].el[j]; /*mult_CU_send(&M->C[bppm1], &Y->el[bppm1], loc, tp1);*/ for (i = 1; i <= bppm1; i += 2) copy_kvec4(&V->el[i], &P->el[i], loc); for (i = bpp-2; i >= 1; i -= 2) { im1 = i - 1; mult_CU_BU(&V->el[i], &M->B[i], &M->C[im1], &Y->el[im1], &Y->el[i+1], loc); addkv4(&V->el[i], &V->el[i], &P->el[i], loc); } mult_M24(&kv22, &M->B[0], &Y->el[1], loc); barrier(); shmem_get(rv2, sv2, 2*loc, tm1); ctr = -1; for (i = 0; i < loc; i++) for (j = 0; j < 2; j++) kv21.el[i].el[j] = rv2[++ctr]; seam(&V->el[0], &kv21, &kv22, loc); /*mult_BU_recv(&V->el[0], &M->B[0], &Y->el[1], loc, tm1);*/ addkv4(&V->el[0], &V->el[0], &P->el[0], loc); free_kvec2(&kv21); free_kvec2(&kv22); } /* end of fucntion mult_collmat_collvec_trick_S */ /************************************************************************************* * FUNCTION: mult_collmat_collvec_trick_LS -- computes v=M*y (using p) for lslave * *************************************************************************************/ void mult_collmat_collvec_trick_LS (V, LM, Y, P, bpp, loc, tm1, tp1) struct mat_kvec4 *V, *Y, *P; struct last_proc_collmat *LM; int bpp, loc, tm1, tp1; { int i, im1, j, bppm1=bpp-1, ctr; struct kvec2 kv21, kv22; dkvec2(&kv21, loc); dkvec2(&kv22, loc); mult_M24(&kv21, &LM->CL[0], &Y->el[bppm1], loc); ctr = -1; for (i = 0; i < loc; i++) for (j = 0; j < 2; j++) sv2[++ctr] = kv21.el[i].el[j]; /*mult_CU_send(&LM->CL[0], &Y->el[bppm1], loc, tp1);*/ for (i = 1; i <= bppm1; i += 2) copy_kvec4(&V->el[i], &P->el[i], loc); for (i = bpp-2; i >= 1; i -= 2) { im1 = i - 1; mult_CU_BU(&V->el[i], &LM->B[i], &LM->C[im1], &Y->el[im1], &Y->el[i+1], loc); addkv4(&V->el[i], &V->el[i], &P->el[i], loc); } mult_M24(&kv22, &LM->B[0], &Y->el[1], loc); barrier(); shmem_get(rv2, sv2, 2*loc, tm1); ctr = -1; for (i = 0; i < loc; i++) for (j = 0; j < 2; j++) kv21.el[i].el[j] = rv2[++ctr]; seam(&V->el[0], &kv21, &kv22, loc); /*mult_BU_recv(&V->el[0], &LM->B[0], &Y->el[1], loc, tm1);*/ addkv4(&V->el[0], &V->el[0], &P->el[0], loc); free_kvec2(&kv21); free_kvec2(&kv22); } /* end of fucntion mult_collmat_collvec_trick_LS */