/* *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * *===================================================================== * * CitcomS * --------------------------------- * * Authors: * Louis Moresi, Shijie Zhong, Lijie Han, Eh Tan, * Clint Conrad, Michael Gurnis, and Eun-seo Choi * (c) California Institute of Technology 1994-2005 * * By downloading and/or installing this software you have * agreed to the CitcomS.py-LICENSE bundled with this software. * Free for non-commercial academic research ONLY. * This program is distributed WITHOUT ANY WARRANTY whatsoever. * *===================================================================== * * Copyright June 2005, by the California Institute of Technology. * ALL RIGHTS RESERVED. United States Government Sponsorship Acknowledged. * * Any commercial use must be negotiated with the Office of Technology * Transfer at the California Institute of Technology. This software * may be subject to U.S. export control laws and regulations. By * accepting this software, the user agrees to comply with all * applicable U.S. export laws and regulations, including the * International Traffic and Arms Regulations, 22 C.F.R. 120-130 and * the Export Administration Regulations, 15 C.F.R. 730-744. User has * the responsibility to obtain export licenses, or other export * authority as may be required before exporting such information to * foreign countries or providing access to foreign nationals. In no * event shall the California Institute of Technology be liable to any * party for direct, indirect, special, incidental or consequential * damages, including lost profits, arising out of the use of this * software and its documentation, even if the California Institute of * Technology has been advised of the possibility of such damage. * * The California Institute of Technology specifically disclaims any * warranties, including the implied warranties or merchantability and * fitness for a particular purpose. The software and documentation * provided hereunder is on an "as is" basis, and the California * Institute of Technology has no obligations to provide maintenance, * support, updates, enhancements or modifications. * *===================================================================== * * *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ /* Functions which solve for the velocity and pressure fields using Uzawa-type iteration loop. */ #include #include #include "element_definitions.h" #include "global_defs.h" #include extern int Emergency_stop; /* Master loop for pressure and (hence) velocity field */ void solve_constrained_flow_iterative(E) struct All_variables *E; { double *D1; double *u; double *R,*Bp; double residual_ddash; double vmag; double global_vdot(),global_pdot(); float solve_Ahat_p_fhat(); void assemble_del2_u(); void assemble_grad_p(); void assemble_div_u(); void v_from_vector(); void p_to_nodes(); void strip_bcs_from_residual(); void velocities_conform_bcs(); int steps,cycles; int i,j,k,doff,vel_cycles_previous,vel_calls_previous; double time,CPU_time0(); const int npno = E->lmesh.npno; const int gnpno = E->mesh.npno; const int nno = E->lmesh.nno; const int dims = E->mesh.nsd; const int neq = E->lmesh.neq; const int gneq = E->mesh.neq; const int addi_dof = additional_dof[dims]; time=CPU_time0(); cycles=E->control.p_iterations; /* Solve for velocity and pressure, correct for bc's */ residual_ddash=solve_Ahat_p_fhat(E,E->U,E->P,E->F,E->control.accuracy,&cycles); v_from_vector(E); p_to_nodes(E,E->P,E->NP,E->mesh.levmax); /* */ return; } void solve_constrained_flow_iterative_pseudo_surf(E) struct All_variables *E; { double *D1; double *u; double *R,*Bp; double residual_ddash; double vmag; double global_vdot(),global_pdot(); float solve_Ahat_p_fhat(); void v_from_vector_pseudo_surf(); void p_to_nodes(); int steps,cycles; int i,j,k,doff,vel_cycles_previous,vel_calls_previous; double time,CPU_time0(); const int npno = E->lmesh.npno; const int gnpno = E->mesh.npno; const int nno = E->lmesh.nno; const int dims = E->mesh.nsd; const int neq = E->lmesh.neq; const int gneq = E->mesh.neq; const int addi_dof = additional_dof[dims]; time=CPU_time0(); cycles=E->control.p_iterations; /* Solve for velocity and pressure, correct for bc's */ residual_ddash=solve_Ahat_p_fhat(E,E->U,E->P,E->F,E->control.accuracy,&cycles); v_from_vector_pseudo_surf(E); p_to_nodes(E,E->P,E->NP,E->mesh.levmax); /* */ return; } /* ========================================================================== */ float solve_Ahat_p_fhat(E,V,P,F,imp,steps_max) struct All_variables *E; double **V,**P,**F; double imp; int *steps_max; { int m,i,j,k,ii,count,convergent,valid,problems,lev,lev_low,npno,neq,steps; int gnpno,gneq; double *r1[NCS],*R[NCS]; double *r0[NCS],*r2[NCS],*z0[NCS],*z1[NCS],*s1[NCS],*s2[NCS],*Ah[NCS]; double *shuffle[NCS]; double alpha,delta,s2dotAhat,r0dotr0,r1dotz1; double residual, initial_residual, last_residual,v_res; double global_vdot(),global_pdot(); double *dvector(); double time0,time,CPU_time0(); void assemble_div_u(); void assemble_del2_u(); void assemble_grad_p(); void strip_bcs_from_residual(); int solve_del2_u(); void parallel_process_termination(); const int dims=E->mesh.nsd; const int n=loc_mat_size[E->mesh.nsd]; gnpno=E->mesh.npno; gneq=E->mesh.neq; for (m=1;m<=E->sphere.caps_per_proc;m++) { npno=E->lmesh.npno; neq=E->lmesh.neq; r0[m] = (double *)malloc((npno+1)*sizeof(double)); r1[m] = (double *)malloc((npno+1)*sizeof(double)); r2[m] = (double *)malloc((npno+1)*sizeof(double)); z0[m] = (double *)malloc((npno+1)*sizeof(double)); z1[m] = (double *)malloc((npno+1)*sizeof(double)); s1[m] = (double *)malloc((npno+1)*sizeof(double)); s2[m] = (double *)malloc((npno+1)*sizeof(double)); } problems=0; time0=time=CPU_time0(); /* calculate the velocity residual, note there are tricks involved here */ lev=E->mesh.levmax; v_res=sqrt(global_vdot(E,F,F,lev)/gneq); if (E->parallel.me==0) { fprintf(E->fp,"initial residue of momentum equation F %.9e %d\n",v_res,gneq); fprintf(stderr,"initial residue of momentum equation F %.9e %d\n",v_res,gneq); } assemble_grad_p(E,P,E->u1,lev); for (m=1;m<=E->sphere.caps_per_proc;m++) for(i=0;iu1[m][i]; assemble_del2_u(E,V,E->u1,lev,1); for (m=1;m<=E->sphere.caps_per_proc;m++) for(i=0;iu1[m][i]; strip_bcs_from_residual(E,F,lev); valid=solve_del2_u(E,E->u1,F,imp*v_res,E->mesh.levmax); strip_bcs_from_residual(E,E->u1,lev); for (m=1;m<=E->sphere.caps_per_proc;m++) for(i=0;iu1[m][i]; assemble_div_u(E,V,r1,lev); residual = initial_residual = sqrt(global_pdot(E,r1,r1,lev)/gnpno); E->monitor.vdotv = sqrt(global_vdot(E,V,V,lev)/gneq); E->monitor.incompressibility = residual/E->monitor.vdotv; count = 0; convergent=0; if (E->control.print_convergence && E->parallel.me==0) { fprintf(E->fp,"AhatP (%03d) after %g seconds with div/v=%.3e for step %d\n",count,CPU_time0()-time0,E->monitor.incompressibility,E->monitor.solution_cycles); /**/ fflush(E->fp); fprintf(stderr,"AhatP (%03d) after %g seconds with div/v=%.3e for step %d\n",count,CPU_time0()-time0,E->monitor.incompressibility,E->monitor.solution_cycles); /**/ } while( (valid) && (count < *steps_max) && ( E->monitor.incompressibility >= E->control.tole_comp ) ) { for (m=1;m<=E->sphere.caps_per_proc;m++) for(j=1;j<=npno;j++) z1[m][j] = E->BPI[lev][m][j]*r1[m][j]; r1dotz1 = global_pdot(E,r1,z1,lev); if ((count == 0)) for (m=1;m<=E->sphere.caps_per_proc;m++) for(j=1;j<=npno;j++) s2[m][j] = z1[m][j]; else { r0dotr0=global_pdot(E,r0,z0,lev); assert(r0dotr0 != 0.0 /* Division by zero in head of incompressibility iteration */); delta = r1dotz1/r0dotr0; for (m=1;m<=E->sphere.caps_per_proc;m++) for(j=1;j<=npno;j++) s2[m][j] = z1[m][j] + delta * s1[m][j]; } assemble_grad_p(E,s2,F,lev); valid=solve_del2_u(E,E->u1,F,imp*v_res,lev); strip_bcs_from_residual(E,E->u1,lev); assemble_div_u(E,E->u1,F,lev); s2dotAhat=global_pdot(E,s2,F,lev); if (valid) /* alpha defined this way is the same as R&W */ alpha = r1dotz1/s2dotAhat; else alpha = 0.0; for (m=1;m<=E->sphere.caps_per_proc;m++) for(j=1;j<=npno;j++) { r2[m][j] = r1[m][j] - alpha * F[m][j]; P[m][j] += alpha * s2[m][j]; } for (m=1;m<=E->sphere.caps_per_proc;m++) for(j=0;ju1[m][j]; assemble_div_u(E,V,F,lev); E->monitor.vdotv = global_vdot(E,V,V,E->mesh.levmax); E->monitor.incompressibility = sqrt((gneq/gnpno)*(1.0e-32+global_pdot(E,F,F,lev)/(1.0e-32+E->monitor.vdotv))); count++; if (E->control.print_convergence && E->parallel.me==0) { fprintf(E->fp,"AhatP (%03d) after %g seconds with div/v=%.3e for step %d\n",count,CPU_time0()-time0,E->monitor.incompressibility,E->monitor.solution_cycles); /**/ fflush(E->fp); fprintf(stderr,"AhatP (%03d) after %g seconds with div/v=%.3e for step %d\n",count,CPU_time0()-time0,E->monitor.incompressibility,E->monitor.solution_cycles); /**/ } for (m=1;m<=E->sphere.caps_per_proc;m++) { shuffle[m]=s1[m];s1[m]=s2[m];s2[m]=shuffle[m]; shuffle[m]=r0[m];r0[m]=r1[m];r1[m]=r2[m];r2[m]=shuffle[m]; shuffle[m]=z0[m];z0[m]=z1[m];z1[m]=shuffle[m]; } } /* end loop for conjugate gradient */ if(problems) { fprintf(E->fp,"Convergence of velocity solver may affect continuity\n"); fprintf(E->fp,"Consider running with the `see_convergence=on' option\n"); fprintf(E->fp,"To evaluate the performance of the current relaxation parameters\n"); fflush(E->fp); } for (m=1;m<=E->sphere.caps_per_proc;m++) { free((void *) r0[m]); free((void *) r1[m]); free((void *) r2[m]); free((void *) z0[m]); free((void *) z1[m]); free((void *) s1[m]); free((void *) s2[m]); } *steps_max=count; return(residual); } /* ========================================================================== */ void v_from_vector(E) struct All_variables *E; { int i,eqn1,eqn2,eqn3,m,node,d; unsigned int type; float sint,cost,sinf,cosf; const int nno = E->lmesh.nno; const int dofs = E->mesh.dof; const int level=E->mesh.levmax; double sum_V = 0.0, sum_dV = 0.0, rel_error = 0.0, global_max_error = 0.0; double tol_error = 1.0e-03; for (m=1;m<=E->sphere.caps_per_proc;m++) { for(node=1;node<=nno;node++) { E->sphere.cap[m].V[1][node] = E->U[m][E->id[m][node].doff[1]]; E->sphere.cap[m].V[2][node] = E->U[m][E->id[m][node].doff[2]]; E->sphere.cap[m].V[3][node] = E->U[m][E->id[m][node].doff[3]]; if (E->node[m][node] & VBX) E->sphere.cap[m].V[1][node] = E->sphere.cap[m].VB[1][node]; if (E->node[m][node] & VBY) E->sphere.cap[m].V[2][node] = E->sphere.cap[m].VB[2][node]; if (E->node[m][node] & VBZ) E->sphere.cap[m].V[3][node] = E->sphere.cap[m].VB[3][node]; } for (i=1;i<=E->lmesh.nno;i++) { eqn1 = E->id[m][i].doff[1]; eqn2 = E->id[m][i].doff[2]; eqn3 = E->id[m][i].doff[3]; sint = E->SinCos[level][m][0][i]; sinf = E->SinCos[level][m][1][i]; cost = E->SinCos[level][m][2][i]; cosf = E->SinCos[level][m][3][i]; E->temp[m][eqn1] = E->sphere.cap[m].V[1][i]*cost*cosf - E->sphere.cap[m].V[2][i]*sinf + E->sphere.cap[m].V[3][i]*sint*cosf; E->temp[m][eqn2] = E->sphere.cap[m].V[1][i]*cost*sinf + E->sphere.cap[m].V[2][i]*cosf + E->sphere.cap[m].V[3][i]*sint*sinf; E->temp[m][eqn3] = -E->sphere.cap[m].V[1][i]*sint + E->sphere.cap[m].V[3][i]*cost; } } return; } void v_from_vector_pseudo_surf(E) struct All_variables *E; { int i,eqn1,eqn2,eqn3,m,node,d; unsigned int type; float sint,cost,sinf,cosf; const int nno = E->lmesh.nno; const int dofs = E->mesh.dof; const int level=E->mesh.levmax; double sum_V = 0.0, sum_dV = 0.0, rel_error = 0.0, global_max_error = 0.0; double tol_error = 1.0e-03; for (m=1;m<=E->sphere.caps_per_proc;m++) { for(node=1;node<=nno;node++) { E->sphere.cap[m].Vprev[1][node] = E->sphere.cap[m].V[1][node]; E->sphere.cap[m].Vprev[2][node] = E->sphere.cap[m].V[2][node]; E->sphere.cap[m].Vprev[3][node] = E->sphere.cap[m].V[3][node]; E->sphere.cap[m].V[1][node] = E->U[m][E->id[m][node].doff[1]]; E->sphere.cap[m].V[2][node] = E->U[m][E->id[m][node].doff[2]]; E->sphere.cap[m].V[3][node] = E->U[m][E->id[m][node].doff[3]]; if (E->node[m][node] & VBX) E->sphere.cap[m].V[1][node] = E->sphere.cap[m].VB[1][node]; if (E->node[m][node] & VBY) E->sphere.cap[m].V[2][node] = E->sphere.cap[m].VB[2][node]; if (E->node[m][node] & VBZ) E->sphere.cap[m].V[3][node] = E->sphere.cap[m].VB[3][node]; sum_dV += (E->sphere.cap[m].V[1][node] - E->sphere.cap[m].Vprev[1][node])*(E->sphere.cap[m].V[1][node] - E->sphere.cap[m].Vprev[1][node]) + (E->sphere.cap[m].V[2][node] - E->sphere.cap[m].Vprev[2][node])*(E->sphere.cap[m].V[2][node] - E->sphere.cap[m].Vprev[2][node]) + (E->sphere.cap[m].V[3][node] - E->sphere.cap[m].Vprev[3][node])*(E->sphere.cap[m].V[3][node] - E->sphere.cap[m].Vprev[3][node]); sum_V += E->sphere.cap[m].V[1][node]*E->sphere.cap[m].V[1][node] + E->sphere.cap[m].V[2][node]*E->sphere.cap[m].V[2][node] + E->sphere.cap[m].V[3][node]*E->sphere.cap[m].V[3][node]; } rel_error = sqrt(sum_dV)/sqrt(sum_V); MPI_Allreduce(&rel_error,&global_max_error,1,MPI_DOUBLE,MPI_MAX,E->parallel.world); if(global_max_error <= tol_error) E->monitor.stop_topo_loop = 1; if(E->parallel.me==0) fprintf(stderr,"global_max_error=%e stop_topo_loop=%d\n",global_max_error,E->monitor.stop_topo_loop); for (i=1;i<=E->lmesh.nno;i++) { eqn1 = E->id[m][i].doff[1]; eqn2 = E->id[m][i].doff[2]; eqn3 = E->id[m][i].doff[3]; sint = E->SinCos[level][m][0][i]; sinf = E->SinCos[level][m][1][i]; cost = E->SinCos[level][m][2][i]; cosf = E->SinCos[level][m][3][i]; E->temp[m][eqn1] = E->sphere.cap[m].V[1][i]*cost*cosf - E->sphere.cap[m].V[2][i]*sinf + E->sphere.cap[m].V[3][i]*sint*cosf; E->temp[m][eqn2] = E->sphere.cap[m].V[1][i]*cost*sinf + E->sphere.cap[m].V[2][i]*cosf + E->sphere.cap[m].V[3][i]*sint*sinf; E->temp[m][eqn3] = -E->sphere.cap[m].V[1][i]*sint + E->sphere.cap[m].V[3][i]*cost; } } return; } void velo_from_element(E,VV,m,el,sphere_key) struct All_variables *E; float VV[4][9]; int el,m,sphere_key; { int a, node; const int dims=E->mesh.nsd; const int ends=enodes[E->mesh.nsd]; const int nno=E->lmesh.nno; const int lev=E->mesh.levmax; if (sphere_key) for(a=1;a<=ends;a++) { node = E->ien[m][el].node[a]; VV[1][a] = E->sphere.cap[m].V[1][node]; VV[2][a] = E->sphere.cap[m].V[2][node]; VV[3][a] = E->sphere.cap[m].V[3][node]; } else for(a=1;a<=ends;a++) { node = E->ien[m][el].node[a]; VV[1][a] = E->temp[m][E->id[m][node].doff[1]]; VV[2][a] = E->temp[m][E->id[m][node].doff[2]]; VV[3][a] = E->temp[m][E->id[m][node].doff[3]]; } return; }