/* *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * * * CitcomS by Louis Moresi, Shijie Zhong, Lijie Han, Eh Tan, * Clint Conrad, Michael Gurnis, and Eun-seo Choi. * Copyright (C) 1994-2005, California Institute of Technology. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * * *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ /* Functions which solve for the velocity and pressure fields using Uzawa-type iteration loop. */ #include #include #include "element_definitions.h" #include "global_defs.h" #include static float solve_Ahat_p_fhat(struct All_variables *E, double **V, double **P, double **F, double imp, int *steps_max); static float solve_Ahat_p_fhat_CG(struct All_variables *E, double **V, double **P, double **F, double imp, int *steps_max); static float solve_Ahat_p_fhat_BiCG(struct All_variables *E, double **V, double **P, double **F, double imp, int *steps_max); static float solve_Ahat_p_fhat_iterCG(struct All_variables *E, double **V, double **P, double **F, double imp, int *steps_max); static double initial_vel_residual(struct All_variables *E, double **V, double **P, double **F, double imp); static double incompressibility_residual(struct All_variables *E, double **V, double **r); /* Master loop for pressure and (hence) velocity field */ void solve_constrained_flow_iterative(E) struct All_variables *E; { void v_from_vector(); void p_to_nodes(); int cycles; cycles=E->control.p_iterations; /* Solve for velocity and pressure, correct for bc's */ solve_Ahat_p_fhat(E,E->U,E->P,E->F,E->control.accuracy,&cycles); v_from_vector(E); p_to_nodes(E,E->P,E->NP,E->mesh.levmax); return; } void solve_constrained_flow_iterative_pseudo_surf(E) struct All_variables *E; { void v_from_vector_pseudo_surf(); void p_to_nodes(); int cycles; cycles=E->control.p_iterations; /* Solve for velocity and pressure, correct for bc's */ solve_Ahat_p_fhat(E,E->U,E->P,E->F,E->control.accuracy,&cycles); v_from_vector_pseudo_surf(E); p_to_nodes(E,E->P,E->NP,E->mesh.levmax); return; } /* ========================================================================= */ static float solve_Ahat_p_fhat(struct All_variables *E, double **V, double **P, double **F, double imp, int *steps_max) { float residual; if(E->control.inv_gruneisen == 0) residual = solve_Ahat_p_fhat_CG(E, V, P, F, imp, steps_max); else { //residual = solve_Ahat_p_fhat_BiCG(E, V, P, F, imp, steps_max); residual = solve_Ahat_p_fhat_iterCG(E, V, P, F, imp, steps_max); } return(residual); } /* Solve incompressible Stokes flow using * conjugate gradient (CG) iterations */ static float solve_Ahat_p_fhat_CG(struct All_variables *E, double **V, double **P, double **F, double imp, int *steps_max) { int m, j, count, valid, lev, npno, neq; int gnpno, gneq; double *r1[NCS], *r2[NCS], *z1[NCS], *s1[NCS], *s2[NCS]; double *shuffle[NCS]; double alpha, delta, r0dotz0, r1dotz1; double residual, v_res; double global_vdot(), global_pdot(); double time0, CPU_time0(); float dpressure, dvelocity; void assemble_c_u(); void assemble_div_u(); void assemble_del2_u(); void assemble_grad_p(); void strip_bcs_from_residual(); int solve_del2_u(); void parallel_process_termination(); gnpno = E->mesh.npno; gneq = E->mesh.neq; npno = E->lmesh.npno; neq = E->lmesh.neq; lev = E->mesh.levmax; for (m=1; m<=E->sphere.caps_per_proc; m++) { r1[m] = (double *)malloc((npno+1)*sizeof(double)); r2[m] = (double *)malloc((npno+1)*sizeof(double)); z1[m] = (double *)malloc((npno+1)*sizeof(double)); s1[m] = (double *)malloc((npno+1)*sizeof(double)); s2[m] = (double *)malloc((npno+1)*sizeof(double)); } time0 = CPU_time0(); count = 0; if(E->control.inv_gruneisen != 0) { for(m=1;m<=E->sphere.caps_per_proc;m++) for(j=1;j<=npno;j++) r2[m][j] = 0.0; assemble_c_u(E, V, r2, lev); } /* calculate the initial velocity residual */ v_res = initial_vel_residual(E, V, P, F, imp); /* initial residual r1 = div(V) or r1 = div(rho*V) if compressible */ assemble_div_u(E, V, r1, lev); if(E->control.inv_gruneisen != 0) for(m=1;m<=E->sphere.caps_per_proc;m++) for(j=1;j<=npno;j++) { r1[m][j] += r2[m][j]; } residual = incompressibility_residual(E, V, r1); if (E->control.print_convergence && E->parallel.me==0) { fprintf(E->fp,"AhatP (%03d) after %g seconds with div/v=%.3e " "dv/v=%.3e and dp/p=%.3e for step %d\n", count, CPU_time0()-time0, E->monitor.incompressibility, 0.0, 0.0, E->monitor.solution_cycles); fflush(E->fp); fprintf(stderr,"AhatP (%03d) after %g seconds with div/v=%.3e " "dv/v=%.3e and dp/p=%.3e for step %d\n", count, CPU_time0()-time0, E->monitor.incompressibility, 0.0, 0.0, E->monitor.solution_cycles); } /* pressure and velocity corrections */ dpressure = 1.0; dvelocity = 1.0; valid = 1; r0dotz0 = 0; while( (valid) && (count < *steps_max) && (E->monitor.incompressibility >= E->control.tole_comp) && (dpressure >= imp) && (dvelocity >= imp) ) { /* preconditioner BPI ~= inv(K), z1 = BPI*r1 */ for(m=1; m<=E->sphere.caps_per_proc; m++) for(j=1; j<=npno; j++) z1[m][j] = E->BPI[lev][m][j] * r1[m][j]; /* r1dotz1 = */ r1dotz1 = global_pdot(E, r1, z1, lev); assert(r1dotz1 != 0.0 /* Division by zero in head of incompressibility iteration */); /* update search direction */ if(count == 0) for (m=1; m<=E->sphere.caps_per_proc; m++) for(j=1; j<=npno; j++) s2[m][j] = z1[m][j]; else { /* s2 = z1 + s1 * / */ delta = r1dotz1 / r0dotz0; for(m=1; m<=E->sphere.caps_per_proc; m++) for(j=1; j<=npno; j++) s2[m][j] = z1[m][j] + delta * s1[m][j]; } /* solve K*u1 = grad(s2) for u1 */ assemble_grad_p(E, s2, F, lev); valid = solve_del2_u(E, E->u1, F, imp*v_res, lev); strip_bcs_from_residual(E, E->u1, lev); /* F = div(u1) */ assemble_div_u(E, E->u1, F, lev); /* alpha = / */ if(valid) /* alpha defined this way is the same as R&W */ alpha = r1dotz1 / global_pdot(E, s2, F, lev); else alpha = 0.0; /* r2 = r1 - alpha * div(u1) */ for(m=1; m<=E->sphere.caps_per_proc; m++) for(j=1; j<=npno; j++) r2[m][j] = r1[m][j] - alpha * F[m][j]; /* P = P + alpha * s2 */ for(m=1; m<=E->sphere.caps_per_proc; m++) for(j=1; j<=npno; j++) P[m][j] += alpha * s2[m][j]; /* V = V - alpha * u1 */ for(m=1; m<=E->sphere.caps_per_proc; m++) for(j=0; ju1[m][j]; /* compute velocity and incompressibility residual */ assemble_div_u(E, V, F, lev); incompressibility_residual(E, V, F); /* compute velocity and pressure corrections */ dpressure = alpha * sqrt(global_pdot(E, s2, s2, lev) / (1.0e-32 + global_pdot(E, P, P, lev))); dvelocity = alpha * sqrt(global_vdot(E, E->u1, E->u1, lev) / (1.0e-32 + E->monitor.vdotv)); count++; if(E->control.print_convergence && E->parallel.me==0) { fprintf(E->fp, "AhatP (%03d) after %g seconds with div/v=%.3e " "dv/v=%.3e and dp/p=%.3e for step %d\n", count, CPU_time0()-time0, E->monitor.incompressibility, dvelocity, dpressure, E->monitor.solution_cycles); fprintf(stderr, "AhatP (%03d) after %g seconds with div/v=%.3e " "dv/v=%.3e and dp/p=%.3e for step %d\n", count, CPU_time0()-time0, E->monitor.incompressibility, dvelocity, dpressure, E->monitor.solution_cycles); } /* shift array pointers */ for(m=1; m<=E->sphere.caps_per_proc; m++) { shuffle[m] = s1[m]; s1[m] = s2[m]; s2[m] = shuffle[m]; shuffle[m] = r1[m]; r1[m] = r2[m]; r2[m] = shuffle[m]; } /* shift = */ r0dotz0 = r1dotz1; } /* end loop for conjugate gradient */ for(m=1; m<=E->sphere.caps_per_proc; m++) { free((void *) r1[m]); free((void *) r2[m]); free((void *) z1[m]); free((void *) s1[m]); free((void *) s2[m]); } *steps_max=count; return(residual); } /* Solve compressible Stokes flow using * bi-conjugate gradient stablized (BiCG-stab) iterations */ static float solve_Ahat_p_fhat_BiCG(struct All_variables *E, double **V, double **P, double **F, double imp, int *steps_max) { void assemble_div_rho_u(); void assemble_del2_u(); void assemble_grad_p(); void strip_bcs_from_residual(); int solve_del2_u(); void parallel_process_termination(); double global_vdot(), global_pdot(); double CPU_time0(); int gnpno, gneq; int npno, neq; int m, j, count, lev; int valid; double alpha, beta, omega; double r0dotrt, r1dotrt; double residual, dpressure, dvelocity; double *r1[NCS], *r2[NCS], *pt[NCS], *p1[NCS], *p2[NCS]; double *rt[NCS], *v0[NCS], *s0[NCS], *st[NCS], *t0[NCS]; double *u0[NCS]; double *shuffle[NCS]; double time0, v_res; gnpno = E->mesh.npno; gneq = E->mesh.neq; npno = E->lmesh.npno; neq = E->lmesh.neq; lev = E->mesh.levmax; for (m=1; m<=E->sphere.caps_per_proc; m++) { r1[m] = (double *)malloc((npno+1)*sizeof(double)); r2[m] = (double *)malloc((npno+1)*sizeof(double)); pt[m] = (double *)malloc((npno+1)*sizeof(double)); p1[m] = (double *)malloc((npno+1)*sizeof(double)); p2[m] = (double *)malloc((npno+1)*sizeof(double)); rt[m] = (double *)malloc((npno+1)*sizeof(double)); v0[m] = (double *)malloc((npno+1)*sizeof(double)); s0[m] = (double *)malloc((npno+1)*sizeof(double)); st[m] = (double *)malloc((npno+1)*sizeof(double)); t0[m] = (double *)malloc((npno+1)*sizeof(double)); u0[m] = (double *)malloc((neq+1)*sizeof(double)); } time0 = CPU_time0(); count = 0; /* calculate the initial velocity residual */ v_res = initial_vel_residual(E, V, P, F, imp); /* initial residual r1 = div(rho_ref*V) */ assemble_div_rho_u(E, V, r1, lev); residual = incompressibility_residual(E, V, r1); /* initial conjugate residual rt = r1 */ for(m=1; m<=E->sphere.caps_per_proc; m++) for(j=1; j<=npno; j++) rt[m][j] = r1[m][j]; if (E->control.print_convergence && E->parallel.me==0) { fprintf(E->fp,"AhatP (%03d) after %g seconds with div/v=%.3e " "dv/v=%.3e and dp/p=%.3e for step %d\n", count, CPU_time0()-time0, E->monitor.incompressibility, 0.0, 0.0, E->monitor.solution_cycles); fflush(E->fp); fprintf(stderr,"AhatP (%03d) after %g seconds with div/v=%.3e " "dv/v=%.3e and dp/p=%.3e for step %d\n", count, CPU_time0()-time0, E->monitor.incompressibility, 0.0, 0.0, E->monitor.solution_cycles); } /* pressure and velocity corrections */ dpressure = 1.0; dvelocity = 1.0; valid = 1; r0dotrt = alpha = omega = 0; while( (valid) && (count < *steps_max) && ((E->monitor.incompressibility >= E->control.tole_comp) && (dpressure >= imp) && (dvelocity >= imp)) ) { /* r1dotrt = */ r1dotrt = global_pdot(E, r1, rt, lev); if(r1dotrt == 0.0) { /* TODO: can we resume the computation even when BiCGstab failed? */ fprintf(E->fp, "BiCGstab method failed!!\n"); fprintf(stderr, "BiCGstab method failed!!\n"); parallel_process_termination(); } /* update search direction */ if(count == 0) for (m=1; m<=E->sphere.caps_per_proc; m++) for(j=1; j<=npno; j++) p2[m][j] = r1[m][j]; else { /* p2 = r1 + / * alpha/omega * (p1 - omega*v0) */ beta = (r1dotrt / r0dotrt) * (alpha / omega); for(m=1; m<=E->sphere.caps_per_proc; m++) for(j=1; j<=npno; j++) p2[m][j] = r1[m][j] + beta * (p1[m][j] - omega * v0[m][j]); } /* preconditioner BPI ~= inv(K), pt = BPI*p2 */ for(m=1; m<=E->sphere.caps_per_proc; m++) for(j=1; j<=npno; j++) pt[m][j] = E->BPI[lev][m][j] * p2[m][j]; /* solve K*u0 = grad(pt) for u1 */ assemble_grad_p(E, pt, F, lev); valid = solve_del2_u(E, u0, F, imp*v_res, lev); if(!valid) fprintf(stderr, "not valid 1\n"); strip_bcs_from_residual(E, u0, lev); /* v0 = div(rho_ref*u0) */ assemble_div_rho_u(E, u0, v0, lev); /* alpha = r1dotrt / */ alpha = r1dotrt / global_pdot(E, rt, v0, lev); /* s0 = r1 - alpha * v0 */ for(m=1; m<=E->sphere.caps_per_proc; m++) for(j=1; j<=npno; j++) s0[m][j] = r1[m][j] - alpha * v0[m][j]; /* stop iteration if norm(s) is small enough */ if(global_pdot(E, s0, s0, lev) < imp*gnpno) { // is the check correct? // update solution, TODO //break; } /* preconditioner BPI ~= inv(K), st = BPI*s0 */ for(m=1; m<=E->sphere.caps_per_proc; m++) for(j=1; j<=npno; j++) st[m][j] = E->BPI[lev][m][j] * s0[m][j]; /* solve K*u1 = grad(st) for u1 */ assemble_grad_p(E, st, F, lev); valid = solve_del2_u(E, E->u1, F, imp*v_res, lev); if(!valid) fprintf(stderr, "not valid 2\n"); strip_bcs_from_residual(E, E->u1, lev); /* t0 = div(rho_ref * u1) */ assemble_div_rho_u(E, E->u1, t0, lev); /* omega = / */ omega = global_pdot(E, t0, s0, lev) / global_pdot(E, t0, t0, lev); /* r2 = s0 - omega * t0 */ for(m=1; m<=E->sphere.caps_per_proc; m++) for(j=1; j<=npno; j++) r2[m][j] = s0[m][j] - omega * t0[m][j]; /* P = P + alpha * pt + omega * st */ for(m=1; m<=E->sphere.caps_per_proc; m++) for(j=1; j<=npno; j++) s0[m][j] = alpha * pt[m][j] + omega * st[m][j]; for(m=1; m<=E->sphere.caps_per_proc; m++) for(j=1; j<=npno; j++) P[m][j] += s0[m][j]; /* V = V - alpha * u0 - omega * u1 */ for(m=1; m<=E->sphere.caps_per_proc; m++) for(j=0; ju1[m][j]; for(m=1; m<=E->sphere.caps_per_proc; m++) for(j=0; jmonitor.vdotv) ); count++; if(E->control.print_convergence && E->parallel.me==0) { fprintf(E->fp, "AhatP (%03d) after %g seconds with div/v=%.3e " "dv/v=%.3e and dp/p=%.3e for step %d\n", count, CPU_time0()-time0, E->monitor.incompressibility, dvelocity, dpressure, E->monitor.solution_cycles); fprintf(stderr, "AhatP (%03d) after %g seconds with div/v=%.3e " "dv/v=%.3e and dp/p=%.3e for step %d\n", count, CPU_time0()-time0, E->monitor.incompressibility, dvelocity, dpressure, E->monitor.solution_cycles); } /* shift array pointers */ for(m=1; m<=E->sphere.caps_per_proc; m++) { shuffle[m] = p1[m]; p1[m] = p2[m]; p2[m] = shuffle[m]; shuffle[m] = r1[m]; r1[m] = r2[m]; r2[m] = shuffle[m]; } /* shift = */ r0dotrt = r1dotrt; } /* end loop for conjugate gradient */ for(m=1; m<=E->sphere.caps_per_proc; m++) { free((void *) r1[m]); free((void *) r2[m]); free((void *) pt[m]); free((void *) p1[m]); free((void *) p2[m]); free((void *) rt[m]); free((void *) v0[m]); free((void *) s0[m]); free((void *) st[m]); free((void *) t0[m]); free((void *) u0[m]); } *steps_max=count; return(residual); } /* Solve compressible Stokes flow using * conjugate gradient (CG) iterations with an outer iteration */ static float solve_Ahat_p_fhat_iterCG(struct All_variables *E, double **V, double **P, double **FF, double imp, int *steps_max) { int m, i; int cycles, num_of_loop; double residual; double relative_err_v, relative_err_p; double *F[NCS],*old_v[NCS], *old_p[NCS],*diff_v[NCS],*diff_p[NCS]; const int npno = E->lmesh.npno; const int neq = E->lmesh.neq; const int lev = E->mesh.levmax; double global_vdot(),global_pdot(); for (m=1;m<=E->sphere.caps_per_proc;m++) { F[m] = (double *)malloc((neq+1)*sizeof(double)); old_v[m] = (double *)malloc((neq+1)*sizeof(double)); diff_v[m] = (double *)malloc((neq+1)*sizeof(double)); old_p[m] = (double *)malloc((npno+1)*sizeof(double)); diff_p[m] = (double *)malloc((npno+1)*sizeof(double)); } cycles = E->control.p_iterations; residual = 1.0; relative_err_v = 1.0; relative_err_p = 1.0; num_of_loop = 0; while((relative_err_v >= E->control.relative_err_accuracy || relative_err_p >= E->control.relative_err_accuracy) && num_of_loop <= E->control.compress_iter_maxstep) { for (m=1;m<=E->sphere.caps_per_proc;m++) { /* copy the original force vector since we need to keep it intact between iterations */ for(i=0;icontrol.accuracy,&cycles); for (m=1;m<=E->sphere.caps_per_proc;m++) for(i=0;isphere.caps_per_proc;m++) for(i=1;i<=npno;i++) diff_p[m][i] = P[m][i] - old_p[m][i]; relative_err_p = sqrt( global_pdot(E,diff_p,diff_p,lev) / (1.0e-32 + global_pdot(E,P,P,lev)) ); num_of_loop++; if(E->parallel.me == 0) { fprintf(stderr, "Relative error err_v / v = %e and err_p / p = %e after %d loops\n\n", relative_err_v, relative_err_p, num_of_loop); fprintf(E->fp, "Relative error err_v / v = %e and err_p / p = %e after %d loops\n\n", relative_err_v, relative_err_p, num_of_loop); } } /* end of while */ for (m=1;m<=E->sphere.caps_per_proc;m++) { free((void *) F[m]); free((void *) old_v[m]); free((void *) old_p[m]); free((void *) diff_v[m]); free((void *) diff_p[m]); } return(residual); } static double initial_vel_residual(struct All_variables *E, double **V, double **P, double **F, double imp) { void assemble_del2_u(); void assemble_grad_p(); void strip_bcs_from_residual(); int solve_del2_u(); double global_vdot(); int neq = E->lmesh.neq; int gneq = E->mesh.neq; int lev = E->mesh.levmax; int i, m; double v_res; v_res = sqrt(global_vdot(E, F, F, lev) / gneq); if (E->parallel.me==0) { fprintf(E->fp, "initial residue of momentum equation F %.9e %d\n", v_res, gneq); fprintf(stderr, "initial residue of momentum equation F %.9e %d\n", v_res, gneq); } /* F = F - grad(P) - K*V */ assemble_grad_p(E, P, E->u1, lev); for(m=1; m<=E->sphere.caps_per_proc; m++) for(i=0; iu1[m][i]; assemble_del2_u(E, V, E->u1, lev, 1); for(m=1; m<=E->sphere.caps_per_proc; m++) for(i=0; iu1[m][i]; strip_bcs_from_residual(E, F, lev); /* solve K*u1 = F for u1 */ solve_del2_u(E, E->u1, F, imp*v_res, lev); strip_bcs_from_residual(E, E->u1, lev); /* V = V + u1 */ for(m=1; m<=E->sphere.caps_per_proc; m++) for(i=0; iu1[m][i]; return(v_res); } static double incompressibility_residual(struct All_variables *E, double **V, double **r) { double global_pdot(); double global_vdot(); int gnpno = E->mesh.npno; int gneq = E->mesh.neq; int lev = E->mesh.levmax; double tmp1, tmp2; /* incompressiblity residual = norm(F) / norm(V) */ tmp1 = global_vdot(E, V, V, lev); tmp2 = global_pdot(E, r, r, lev); E->monitor.incompressibility = sqrt((gneq / gnpno) *( (1.0e-32 + tmp2) / (1.0e-32 + tmp1) )); E->monitor.vdotv = tmp1; return(sqrt(tmp2/gnpno));; }