https://github.com/penn-graphics-research/ziran2019
Raw File
Tip revision: 8d3d27cd17bbceab18c317820dbe595178f6312a authored by fangy14 on 06 November 2019, 07:20:57 UTC
open source
Tip revision: 8d3d27c
AdmmFormula.h
#ifndef ADMM_FORMULA_H
#define ADMM_FORMULA_H

#include <Ziran/CS/Util/Forward.h>
#include <Eigen/Eigenvalues>

namespace ZIRAN {

#define TV Vector<T, dim>
#define TM Matrix<T, dim, dim>

template <class T, int dim>
TM makePD(const TM& symMtr)
{
    Eigen::SelfAdjointEigenSolver<TM> eigenSolver(symMtr);
    TV D(eigenSolver.eigenvalues());
    for (int i = 0; i < dim; i++) {
        if (D[i] < 0.0) {
            D[i] = 0.0;
        }
    }
    return eigenSolver.eigenvectors() * D.asDiagonal() * eigenSolver.eigenvectors().transpose();
};

template <class T>
Matrix<T, 2, 2> makePD2D(const Matrix<T, 2, 2>& symMtr)
{
    Eigen::SelfAdjointEigenSolver<Matrix<T, 2, 2>> eigenSolver(symMtr);
    Vector<T, 2> D(eigenSolver.eigenvalues());
    for (int i = 0; i < 2; i++) {
        if (D[i] < 0.0)
            D[i] = 0.0;
    }
    return eigenSolver.eigenvectors() * D.asDiagonal() * eigenSolver.eigenvectors().transpose();
};

void outputResidual(double a, double b)
{
    static bool first_line_output = true;
    if (first_line_output) {
        FILE* f = fopen("output/residual.txt", "w");
        fclose(f);
        first_line_output = false;
    }
    FILE* f = fopen("output/residual.txt", "a");
    fprintf(f, "%.10lf %.10lf\n", a, b);
    fclose(f);
}

#undef TV
#undef TM

} // namespace ZIRAN

#endif

/*
void outputGlobal()
{
    FILE* f = fopen("output/result_original.txt", "w");
    TVStack x = dv, y = dv;
    for (int i = 0; i < Base::num_nodes; ++i)
        for (int j = 0; j < dim; ++j) {
            printf("%d %d %d %d\n", i, j, Base::num_nodes, dim);
            x.setZero();
            x.col(i)(j) = 1;
            cg_objective.multiply(x, y);
            for (int p = 0; p < Base::num_nodes; ++p)
                for (int q = 0; q < dim; ++q)
                    fprintf(f, "%.30f ", y.col(p)(q));
            fprintf(f, "\n");
        }
    fclose(f);
}

void outputGlobalOOT()
{
    auto multiply = [&](const TStack& x, TStack& b) {
        b.setZero();
        // P2G
        auto grid_array = grid.grid->Get_Array();
        grid.iterateGrid([&](IV node, GridState<T, dim>& g) {
            g.new_v = TV::Zero();
        });
        for (int i = 0; i < (int)div_nodes.size(); ++i) {
            uint64_t offset00 = div_nodes[i];
            uint64_t offset10 = SparseMask::Packed_Add(offset00, SparseMask::Linear_Offset(1, 0));
            uint64_t offset01 = SparseMask::Packed_Add(offset00, SparseMask::Linear_Offset(0, 1));
            uint64_t offset11 = SparseMask::Packed_Add(offset00, SparseMask::Linear_Offset(1, 1));
            auto& g00 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset00));
            auto& g10 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset10));
            auto& g01 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset01));
            auto& g11 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset11));
            T tmp = x(i) / (T)2 / dx;
            g00.new_v += TV(-tmp, -tmp).cwiseProduct(Q[g00.idx]);
            g10.new_v += TV(tmp, -tmp).cwiseProduct(Q[g10.idx]);
            g01.new_v += TV(-tmp, tmp).cwiseProduct(Q[g01.idx]);
            g11.new_v += TV(tmp, tmp).cwiseProduct(Q[g11.idx]);
        }

        tbb::parallel_for(0, (int)div_nodes.size(), [&](int i) {
            uint64_t offset00 = div_nodes[i];
            uint64_t offset10 = SparseMask::Packed_Add(offset00, SparseMask::Linear_Offset(1, 0));
            uint64_t offset01 = SparseMask::Packed_Add(offset00, SparseMask::Linear_Offset(0, 1));
            uint64_t offset11 = SparseMask::Packed_Add(offset00, SparseMask::Linear_Offset(1, 1));
            auto& g00 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset00));
            auto& g10 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset10));
            auto& g01 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset01));
            auto& g11 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset11));
            TV v00 = g00.new_v.cwiseProduct(Q[g00.idx]);
            TV v10 = g10.new_v.cwiseProduct(Q[g10.idx]);
            TV v01 = g01.new_v.cwiseProduct(Q[g01.idx]);
            TV v11 = g11.new_v.cwiseProduct(Q[g11.idx]);
            b(i) = rho_scale * omegain[i] * Rin[i] * Rin[i] * omegain[i] * get_div(v00, v10, v01, v11);
        });
    };

    FILE* f = fopen("output/result_oot.txt", "w");
    TStack x = TStack::Zero(div_nodes.size());
    TStack y = TStack::Zero(div_nodes.size());
    for (int i = 0; i < (int)div_nodes.size(); ++i) {
        x.setZero();
        x(i) = 1;
        multiply(x, y);
        for (int p = 0; p < (int)div_nodes.size(); ++p)
            fprintf(f, "%.30f ", y(p));
        fprintf(f, "\n");
    }
    fclose(f);
}

void outputGlobalMass()
{
    auto& Xarray = particles.X.array;
    grid.iterateGrid([&](IV node, GridState<T, dim>& g) {
        g.new_v = g.m * (Q[g.idx].cwiseProduct(Q[g.idx]));
    });
    TVStack pre = dv;
    grid.iterateGrid([&](IV node, GridState<T, dim>& g) {
        pre.col(g.idx) = g.new_v;
    });
    FILE* f = fopen("output/result_mass.txt", "w");
    TVStack x = dv, y = dv;
    for (int i = 0; i < Base::num_nodes; ++i)
        for (int j = 0; j < dim; ++j)
            fprintf(f, "%.30f ", pre.col(i)(j));
    fclose(f);
}

void outputGlobalPreconditioner()
{
    auto& Xarray = particles.X.array;
    grid.iterateGrid([&](IV node, GridState<T, dim>& g) {
        g.new_v = TV::Zero();
    });
//        for (uint64_t color = 0; color < (1 << dim); ++color) {
//            tbb::parallel_for(0, (int)particle_group.size(), [&](int group_idx) {
//                if ((block_offset[group_idx] & ((1 << dim) - 1)) != color)
//                    return;
//                for (int idx = particle_group[group_idx].first; idx <= particle_group[group_idx].second; ++idx) {
//                    int i = particle_order[idx];
//                    TV& Xp = Xarray[i];
//                    TM& Fn = (*Fn_pointer)[i];
//                    BSplineWeights<T, dim> spline(Xp, dx);
//                    grid.iterateKernel(spline, particle_base_offset[i], [&](IV node, T w, TV dw, GridState<T, dim>& g) {
//                        TM tmp = dt * dt * rho_scale * Q[g.idx] * dw.transpose() * Fn;
//                        TM tmpooRR = tmp.cwiseProduct(omega[i]).cwiseProduct(R[i]).cwiseProduct(R[i]).cwiseProduct(omega[i]);
//                        TV opt = tmpooRR * Fn.transpose() * dw;
//                        g.new_v += opt.cwiseProduct(Q[g.idx]);
//                    });
//                }
//            });
//        }
    if constexpr (use_incompressibility && dim == 2) {
        auto grid_array = grid.grid->Get_Array();
        tbb::parallel_for(0, (int)div_nodes.size(), [&](int i) {
            uint64_t offset00 = div_nodes[i];
            uint64_t offset10 = SparseMask::Packed_Add(offset00, SparseMask::Linear_Offset(1, 0));
            uint64_t offset01 = SparseMask::Packed_Add(offset00, SparseMask::Linear_Offset(0, 1));
            uint64_t offset11 = SparseMask::Packed_Add(offset00, SparseMask::Linear_Offset(1, 1));
            auto& g00 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset00));
            auto& g10 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset10));
            auto& g01 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset01));
            auto& g11 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset11));
            mtx.lock();
            g00.new_v += (rho_scale * Q[g00.idx] * omegain[i] * Rin[i] * Rin[i] * omegain[i]).cwiseProduct(Q[g00.idx]) / (T)4 / dx / dx;
            g10.new_v += (rho_scale * Q[g10.idx] * omegain[i] * Rin[i] * Rin[i] * omegain[i]).cwiseProduct(Q[g10.idx]) / (T)4 / dx / dx;
            g01.new_v += (rho_scale * Q[g01.idx] * omegain[i] * Rin[i] * Rin[i] * omegain[i]).cwiseProduct(Q[g01.idx]) / (T)4 / dx / dx;
            g11.new_v += (rho_scale * Q[g11.idx] * omegain[i] * Rin[i] * Rin[i] * omegain[i]).cwiseProduct(Q[g11.idx]) / (T)4 / dx / dx;
            mtx.unlock();
        });
    }
    TVStack pre = dv;
    grid.iterateGrid([&](IV node, GridState<T, dim>& g) {
        pre.col(g.idx) = g.new_v;
    });
    FILE* f = fopen("output/result_diagonal.txt", "w");
    TVStack x = dv, y = dv;
    for (int i = 0; i < Base::num_nodes; ++i)
        for (int j = 0; j < dim; ++j)
            fprintf(f, "%.30f ", pre.col(i)(j));
    fclose(f);
}

 void updateRhoScaleAbsolute()
{
    auto& Xarray = particles.X.array;
    // use new_v to store dv
    // G2P
    T prime_residual = 0;
    grid.iterateTouchedGrid([&](IV node, GridState<T, dim>& g) {
        g.new_v = TV::Zero();
    });
    grid.iterateGrid([&](IV node, GridState<T, dim>& g) {
        g.new_v = dv.col(g.idx);
    });
    tbb::parallel_for(0, (int)particle_group.size(), [&](int group_idx) {
        for (int idx = particle_group[group_idx].first; idx <= particle_group[group_idx].second; ++idx) {
            int i = particle_order[idx];
            TV& Xp = Xarray[i];
            TM& Fn = (*Fn_pointer)[i];
            BSplineWeights<T, dim> spline(Xp, dx);
            TM tmp = Fn - F[i];
            grid.iterateKernel(spline, particle_base_offset[i], [&](IV node, T w, TV dw, GridState<T, dim>& g) {
                tmp += dt * g.v * dw.transpose() * Fn;
                tmp += dt * g.new_v * dw.transpose() * Fn;
            });
            mtx.lock();
            prime_residual += (tmp.cwiseProduct(omega[i]).cwiseProduct(R[i])).squaredNorm();
            mtx.unlock();
        }
    });
    if constexpr (use_incompressibility && dim == 2) {
        auto grid_array = grid.grid->Get_Array();
        tbb::parallel_for(0, (int)div_nodes.size(), [&](int i) {
            uint64_t offset00 = div_nodes[i];
            uint64_t offset10 = SparseMask::Packed_Add(offset00, SparseMask::Linear_Offset(1, 0));
            uint64_t offset01 = SparseMask::Packed_Add(offset00, SparseMask::Linear_Offset(0, 1));
            uint64_t offset11 = SparseMask::Packed_Add(offset00, SparseMask::Linear_Offset(1, 1));
            auto& g00 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset00));
            auto& g10 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset10));
            auto& g01 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset01));
            auto& g11 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset11));
            TV v00 = g00.v + dv.col(g00.idx);
            TV v10 = g10.v + dv.col(g10.idx);
            TV v01 = g01.v + dv.col(g01.idx);
            TV v11 = g11.v + dv.col(g11.idx);
            mtx.lock();
            T tmp = Rin[i] * omegain[i] * get_div(v00, v10, v01, v11);
            prime_residual += tmp * tmp;
            mtx.unlock();
        });
    }
    prime_residual = std::sqrt(prime_residual);

    T dual_residual = 0;
    grid.iterateGrid([&](IV node, GridState<T, dim>& g) {
        g.new_v -= old_dv.col(g.idx);
    });
    tbb::parallel_for(0, (int)particle_group.size(), [&](int group_idx) {
        for (int idx = particle_group[group_idx].first; idx <= particle_group[group_idx].second; ++idx) {
            int i = particle_order[idx];
            TV& Xp = Xarray[i];
            TM& Fn = (*Fn_pointer)[i];
            BSplineWeights<T, dim> spline(Xp, dx);
            TM tmp = TM::Zero();
            grid.iterateKernel(spline, particle_base_offset[i], [&](IV node, T w, TV dw, GridState<T, dim>& g) {
                tmp += dt * g.new_v * dw.transpose() * Fn;
            });
            mtx.lock();
            dual_residual += (rho_scale * tmp.cwiseProduct(omega[i]).cwiseProduct(R[i]).cwiseProduct(R[i]).cwiseProduct(omega[i])).squaredNorm();
            mtx.unlock();
        }
    });
    dual_residual = std::sqrt(dual_residual);

    if (prime_residual > 10 * dual_residual) {
        rho_scale *= 2;
    }
    else if (10 * prime_residual < dual_residual) {
        rho_scale *= 0.5;
    }
}

void updateRhoScaleRelative()
{
    auto& Xarray = particles.X.array;
    // use new_v to store dv
    // G2P
    T prime_residual = 0;
    T prime_invidual = 0;
    grid.iterateTouchedGrid([&](IV node, GridState<T, dim>& g) {
        g.new_v = TV::Zero();
    });
    grid.iterateGrid([&](IV node, GridState<T, dim>& g) {
        g.new_v = dv.col(g.idx);
    });
    tbb::parallel_for(0, (int)particle_group.size(), [&](int group_idx) {
        for (int idx = particle_group[group_idx].first; idx <= particle_group[group_idx].second; ++idx) {
            int i = particle_order[idx];
            TV& Xp = Xarray[i];
            TM& Fn = (*Fn_pointer)[i];
            BSplineWeights<T, dim> spline(Xp, dx);
            TM tmp1 = Fn, tmp2 = F[i];
            grid.iterateKernel(spline, particle_base_offset[i], [&](IV node, T w, TV dw, GridState<T, dim>& g) {
                tmp2 += dt * g.v * dw.transpose() * Fn;
                tmp2 += dt * g.new_v * dw.transpose() * Fn;
            });
            TM tmp = tmp1 - tmp2;
            mtx.lock();
            prime_residual = std::max(prime_residual, (tmp.cwiseProduct(omega[i]).cwiseProduct(R[i])).cwiseAbs().maxCoeff());
            prime_invidual = std::max(prime_invidual, (tmp1.cwiseProduct(omega[i]).cwiseProduct(R[i])).cwiseAbs().maxCoeff());
            prime_invidual = std::max(prime_invidual, (tmp2.cwiseProduct(omega[i]).cwiseProduct(R[i])).cwiseAbs().maxCoeff());
            //prime_invidual = std::max(prime_invidual, (tmp3.cwiseProduct(omega[i]).cwiseProduct(R[i])).cwiseAbs().maxCoeff());
            mtx.unlock();
        }
    });
    prime_residual = std::sqrt(prime_residual / prime_invidual);
    T dual_residual = 0;
    T dual_invidual = 0;

    auto ce_name = constitutive_model_name<CorotatedElasticity<T, dim>>;
    auto ranges = particles.X.ranges;
    tbb::parallel_for(ranges, [&](DisjointRanges& subrange) {
        DisjointRanges subset(subrange, particles.commonRanges(ce_name(), element_measure_name<T>(), F_name()));
        for (auto iter = particles.subsetIter(subset, ce_name(), element_measure_name<T>(), F_name()); iter; ++iter) {
            auto& constitutive_model = iter.template get<0>();
            auto& vol = iter.template get<1>();
            auto& Fn = iter.template get<2>();
            int i = iter.entryId();
            TV& Xp = Xarray[i];
            BSplineWeights<T, dim> spline(Xp, dx);
            TM tmp1 = TM::Zero(), tmp2 = TM::Zero();
            grid.iterateKernel(spline, particle_base_offset[i], [&](IV node, T w, TV dw, GridState<T, dim>& g) {
                if (g.idx >= 0) {
                    tmp1 += dt * g.new_v * dw.transpose() * Fn;
                    tmp2 += dt * old_dv.col(g.idx) * dw.transpose() * Fn;
                }
            });
            TM tmp = tmp1 - tmp2;
            mtx.lock();
            TM u_bar = -y[i].cwiseQuotient(omega[i]) / rho_scale;
            dual_residual = std::max(dual_residual, (rho_scale * tmp.cwiseProduct(omega[i]).cwiseProduct(R[i]).cwiseProduct(R[i]).cwiseProduct(omega[i])).cwiseAbs().maxCoeff());
            dual_invidual = std::max(dual_invidual, (rho_scale * (u_bar.cwiseProduct(omega[i]).cwiseProduct(omega[i]))).cwiseAbs().maxCoeff());

            typename CorotatedElasticity<T, dim>::Scratch s;
            constitutive_model.updateScratch(F[i], s);
            TM firstPiola;
            constitutive_model.firstPiola(s, firstPiola);
            TM VP = vol * firstPiola;

            dual_invidual = std::max(dual_invidual, VP.cwiseAbs().maxCoeff());
            mtx.unlock();
        }
    });

    dual_residual = std::sqrt(dual_residual / dual_invidual);
    if (prime_residual > 10 * dual_residual) {
        rho_scale *= 2;
    }
    else if (10 * prime_residual < dual_residual) {
        rho_scale *= 0.5;
    }
}

void calcDivergence()
{
    T result = 0;
    auto grid_array = grid.grid->Get_Array();
    tbb::parallel_for(0, (int)div_nodes.size(), [&](int i) {
        uint64_t offset00 = div_nodes[i];
        uint64_t offset10 = SparseMask::Packed_Add(offset00, SparseMask::Linear_Offset(1, 0));
        uint64_t offset01 = SparseMask::Packed_Add(offset00, SparseMask::Linear_Offset(0, 1));
        uint64_t offset11 = SparseMask::Packed_Add(offset00, SparseMask::Linear_Offset(1, 1));
        auto& g00 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset00));
        auto& g10 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset10));
        auto& g01 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset01));
        auto& g11 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset11));
        TV v00 = g00.idx < 0 ? TV::Zero() : (g00.v + dv.col(g00.idx)).eval();
        TV v10 = g10.idx < 0 ? TV::Zero() : (g10.v + dv.col(g10.idx)).eval();
        TV v01 = g01.idx < 0 ? TV::Zero() : (g01.v + dv.col(g01.idx)).eval();
        TV v11 = g11.idx < 0 ? TV::Zero() : (g11.v + dv.col(g11.idx)).eval();
        mtx.lock();
        T tmp = get_div(v00, v10, v01, v11);
        result += tmp * tmp;
        mtx.unlock();
    });
    printf("=========== %.20lf\n", result);
}

void FStep(T localTol)
{
    ZIRAN_TIMER();
    auto& Xarray = particles.X.array;
    grid.iterateTouchedGrid([&](IV node, GridState<T, dim>& g) {
        g.new_v = TV::Zero();
    });
    grid.iterateGrid([&](IV node, GridState<T, dim>& g) {
        g.new_v = dv.col(g.idx);
    });

    auto ce_name = constitutive_model_name<CorotatedElasticity<T, dim>>;
    auto ranges = particles.X.ranges;
    tbb::parallel_for(ranges, [&](DisjointRanges& subrange) {
        DisjointRanges subset(subrange, particles.commonRanges(ce_name(), element_measure_name<T>(), F_name()));
        for (auto iter = particles.subsetIter(subset, ce_name(), element_measure_name<T>(), F_name()); iter; ++iter) {
            auto& constitutive_model = iter.template get<0>();
            auto& vol = iter.template get<1>();
            auto& Fn = iter.template get<2>();
            int i = iter.entryId();
            TV& Xp = Xarray[i];
            // build rhs
            TM u_bar = -y[i].cwiseQuotient(omega[i]) / rho_scale;
            TM FDb = Fn + u_bar.cwiseQuotient(R[i]).cwiseQuotient(R[i]);
            BSplineWeights<T, dim> spline(Xp, dx);
            grid.iterateKernel(spline, particle_base_offset[i], [&](IV node, T w, TV dw, GridState<T, dim>& g) {
                FDb += dt * g.v * dw.transpose() * Fn;
                FDb += dt * g.new_v * dw.transpose() * Fn;
            });
            typedef typename CorotatedElasticity<T, dim>::Hessian Hessian;
            // SVD rhs_raw
            auto computeEnergyVal_zUpdate = [&](const FlattenTM& flatten_F, T& E) {
                TM F = TM(flatten_F.data());
                typename CorotatedElasticity<T, dim>::Scratch s;
                constitutive_model.updateScratch(F, s);
                E = vol * constitutive_model.psi(s);
                for (int col = 0; col < dim; ++col)
                    for (int row = 0; row < dim; ++row) {
                        T ooRR = omega[i](row, col) * omega[i](row, col) * R[i](row, col) * R[i](row, col);
                        E += 0.5 * rho_scale * ooRR * (F(row, col) - FDb(row, col)) * (F(row, col) - FDb(row, col));
                    }
                // E = vol * constitutive_model.psi(s) + 0.5 * rho_scale * ((F - FDb).cwiseProduct(omega[i]).cwiseProduct(R[i])).squaredNorm();
            };
            auto computeGradient_zUpdate = [&](const FlattenTM& flatten_F, FlattenTM& g) {
                TM F = TM(flatten_F.data());
                typename CorotatedElasticity<T, dim>::Scratch s;
                constitutive_model.updateScratch(F, s);
                TM firstPiola;
                constitutive_model.firstPiola(s, firstPiola);
                g = vol * FlattenTM(firstPiola.data());
                for (int col = 0; col < dim; ++col)
                    for (int row = 0; row < dim; ++row) {
                        int idx = col * dim + row;
                        T ooRR = omega[i](row, col) * omega[i](row, col) * R[i](row, col) * R[i](row, col);
                        g(idx) += rho_scale * ooRR * (F(row, col) - FDb(row, col));
                    }
                // g = FlattenTM((vol * firstPiola + rho_scale * (F - FDb).cwiseProduct(omega[i]).cwiseProduct(omega[i]).cwiseProduct(R[i]).cwiseProduct(R[i])).eval().data());
            };
            auto computeHessianProxy_zUpdate = [&](const FlattenTM& flatten_F, Hessian& P) {
                TM F = TM(flatten_F.data());
                typename CorotatedElasticity<T, dim>::Scratch s;
                constitutive_model.updateScratch(F, s);
                Hessian firstPiolaDerivative;
                constitutive_model.firstPiolaDerivative(s, firstPiolaDerivative);
                // TODO: diagonal for this
                makePD(firstPiolaDerivative);
                P = vol * firstPiolaDerivative;
                for (int col = 0; col < dim; ++col)
                    for (int row = 0; row < dim; ++row) {
                        int idx = col * dim + row;
                        T ooRR = omega[i](row, col) * omega[i](row, col) * R[i](row, col) * R[i](row, col);
                        P(idx, idx) += rho_scale * ooRR;
                    }
                // P = vol * firstPiolaDerivative + rho_scale * Hessian(FlattenTM((omega[i].cwiseProduct(omega[i]).cwiseProduct(R[i]).cwiseProduct(R[i])).eval().data()).asDiagonal());
            };
            FlattenTM Fi(F[i].data());
            FlattenTM g;
            for (int j = 0;; j++) {
                computeGradient_zUpdate(Fi, g);
                if (g.norm() < localTol) {
                    break;
                }
                Hessian P;
                computeHessianProxy_zUpdate(Fi, P);
                FlattenTM p = P.ldlt().solve(-g);
                T alpha = 1.0;
                FlattenTM Fi0 = Fi;
                T E0;
                computeEnergyVal_zUpdate(Fi0, E0);
                Fi = Fi0 + alpha * p;
                T E;
                computeEnergyVal_zUpdate(Fi, E);
                while (E > E0) {
                    alpha /= 2.0;
                    Fi = Fi0 + alpha * p;
                    computeEnergyVal_zUpdate(Fi, E);
                }
                if (E0 - E < 1e-6 * E0) {
                    break;
                }
            }
            F[i] = TM(Fi.data());
            //                F[i] = TM::Identity();
        }
    });
}

void ruizEqulibration()
{
    ZIRAN_TIMER();
    int Np = particles.count;
    int Nn = Base::num_nodes;
    rGeps = TStack::Zero(Np * dim * dim);
    rQeps = TStack::Zero(Nn * dim);
    rReps = TStack::Zero(Np * dim * dim);
    rRineps = TStack::Zero((int)div_nodes.size());
    T eps = 1e-12;
    for (int tim = 0;; ++tim) {
        T Gresidual = (TStack::Ones(Np * dim * dim) - rGeps).cwiseAbs().maxCoeff();
        T Qresidual = (TStack::Ones(Nn * dim) - rQeps).cwiseAbs().maxCoeff();
        T Rresidual = (TStack::Ones(Np * dim * dim) - rReps).cwiseAbs().maxCoeff();
        T Rinresidual = div_nodes.size() == 0 ? (T)0 : (TStack::Ones((int)div_nodes.size()) - rRineps).cwiseAbs().maxCoeff();
        if (std::max(std::max(Gresidual, Qresidual), std::max(Rresidual, Rinresidual)) < eps) {
            ZIRAN_WARN("Residuals : ", Gresidual, " ", Qresidual, " ", Rresidual, " ", Rinresidual, ". Convergence number : ", tim);
            break;
        }
        // rGeps
        DEFINE_CE_NAME
        //auto ce_name = constitutive_model_name<CorotatedElasticity<T, dim>>;
        auto ranges = particles.X.ranges;
        tbb::parallel_for(ranges, [&](DisjointRanges& subrange) {
            DisjointRanges subset(subrange, particles.commonRanges(ce_name(), element_measure_name<T>(), F_name()));
            for (auto iter = particles.subsetIter(subset, ce_name(), element_measure_name<T>(), F_name()); iter; ++iter) {
                auto& constitutive_model = iter.template get<0>();
                auto& vol = iter.template get<1>();
                auto& Fn = iter.template get<2>();
                int i = iter.entryId();
                typename CorotatedElasticity<T, dim>::Scratch s;
                constitutive_model.updateScratch(Fn, s);
                typename CorotatedElasticity<T, dim>::Hessian dPdF;
                constitutive_model.firstPiolaDerivative(s, dPdF);
                for (int col = 0; col < dim; ++col)
                    for (int row = 0; row < dim; ++row) {
                        int idx = col * dim + row;
                        FlattenTM cp = (vol * dPdF.row(idx).transpose()).cwiseProduct(FlattenTM(G[i].data()));
                        T Zcom = std::abs(cp.cwiseAbs().maxCoeff() * G[i](row, col));
                        T WTcom = std::abs(omega[i](row, col) * R[i](row, col) * G[i](row, col));
                        rGeps(i * dim * dim + idx) = std::max(Zcom, WTcom);
                    }
            }
        });
        // rQeps
        grid.iterateGrid([&](IV node, GridState<T, dim>& g) {
            for (int d = 0; d < dim; ++d)
                rQeps(g.idx * dim + d) = std::abs(g.m * Q[g.idx](d) * Q[g.idx](d));
        });
        auto& Xarray = particles.X.array;
        for (uint64_t color = 0; color < (1 << dim); ++color) {
            tbb::parallel_for(0, (int)particle_group.size(), [&](int group_idx) {
                if ((block_offset[group_idx] & ((1 << dim) - 1)) != color)
                    return;
                for (int idx = particle_group[group_idx].first; idx <= particle_group[group_idx].second; ++idx) {
                    int i = particle_order[idx];
                    TV& Xp = Xarray[i];
                    TM& Fn = (*Fn_pointer)[i];
                    BSplineWeights<T, dim> spline(Xp, dx);
                    grid.iterateKernel(spline, particle_base_offset[i], [&](const IV& node, T w, const TV& dw, GridState<T, dim>& g) {
                        for (int d = 0; d < dim; ++d) {
                            TV ts = ((R[i].row(d)).cwiseProduct(omega[i].row(d))).transpose();
                            T DTWTcom = std::abs(ts.cwiseProduct(dt * Fn.transpose() * dw).cwiseAbs().maxCoeff() * Q[g.idx](d));
                            rQeps(g.idx * dim + d) = std::max(rQeps(g.idx * dim + d), DTWTcom);
                        }
                    });
                }
            });
        }
        if constexpr (use_incompressibility && dim == 2) {
            auto grid_array = grid.grid->Get_Array();
            tbb::parallel_for(0, (int)div_nodes.size(), [&](int i) {
                uint64_t offset00 = div_nodes[i];
                uint64_t offset10 = SparseMask::Packed_Add(offset00, SparseMask::Linear_Offset(1, 0));
                uint64_t offset01 = SparseMask::Packed_Add(offset00, SparseMask::Linear_Offset(0, 1));
                uint64_t offset11 = SparseMask::Packed_Add(offset00, SparseMask::Linear_Offset(1, 1));
                auto& g00 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset00));
                auto& g10 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset10));
                auto& g01 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset01));
                auto& g11 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset11));
                rQeps(g00.idx * dim + 0) = std::max(rQeps(g00.idx * dim + 0), std::abs(Q[g00.idx](0) * omegain[i] * Rin[i] / (T)2 / dx));
                rQeps(g00.idx * dim + 1) = std::max(rQeps(g00.idx * dim + 1), std::abs(Q[g00.idx](1) * omegain[i] * Rin[i] / (T)2 / dx));
                rQeps(g10.idx * dim + 0) = std::max(rQeps(g10.idx * dim + 0), std::abs(Q[g10.idx](0) * omegain[i] * Rin[i] / (T)2 / dx));
                rQeps(g10.idx * dim + 1) = std::max(rQeps(g10.idx * dim + 1), std::abs(Q[g10.idx](1) * omegain[i] * Rin[i] / (T)2 / dx));
                rQeps(g01.idx * dim + 0) = std::max(rQeps(g01.idx * dim + 0), std::abs(Q[g01.idx](0) * omegain[i] * Rin[i] / (T)2 / dx));
                rQeps(g01.idx * dim + 1) = std::max(rQeps(g01.idx * dim + 1), std::abs(Q[g01.idx](1) * omegain[i] * Rin[i] / (T)2 / dx));
                rQeps(g11.idx * dim + 0) = std::max(rQeps(g11.idx * dim + 0), std::abs(Q[g11.idx](0) * omegain[i] * Rin[i] / (T)2 / dx));
                rQeps(g11.idx * dim + 1) = std::max(rQeps(g11.idx * dim + 1), std::abs(Q[g11.idx](1) * omegain[i] * Rin[i] / (T)2 / dx));
            });
        }
        // rReps
        tbb::parallel_for(0, (int)particle_group.size(), [&](int group_idx) {
            for (int idx = particle_group[group_idx].first; idx <= particle_group[group_idx].second; ++idx) {
                int i = particle_order[idx];
                TV& Xp = Xarray[i];
                TM& Fn = (*Fn_pointer)[i];
                for (int col = 0; col < dim; ++col)
                    for (int row = 0; row < dim; ++row) {
                        int idx = col * dim + row;
                        rReps(i * dim * dim + idx) = std::abs(omega[i](row, col) * R[i](row, col) * G[i](row, col));
                    }
                BSplineWeights<T, dim> spline(Xp, dx);
                grid.iterateKernel(spline, particle_base_offset[i], [&](IV node, T w, TV dw, GridState<T, dim>& g) {
                    for (int col = 0; col < dim; ++col)
                        for (int row = 0; row < dim; ++row) {
                            int idx = col * dim + row;
                            T WDcom = std::abs(Q[g.idx](row) * (dt * dw.transpose() * Fn).transpose()(col) * omega[i](row, col) * R[i](row, col));
                            rReps(i * dim * dim + idx) = std::max(rReps(i * dim * dim + idx), WDcom);
                        }
                });
            }
        });
        // rRineps
        if constexpr (use_incompressibility && dim == 2) {
            auto grid_array = grid.grid->Get_Array();
            tbb::parallel_for(0, (int)div_nodes.size(), [&](int i) {
                uint64_t offset00 = div_nodes[i];
                uint64_t offset10 = SparseMask::Packed_Add(offset00, SparseMask::Linear_Offset(1, 0));
                uint64_t offset01 = SparseMask::Packed_Add(offset00, SparseMask::Linear_Offset(0, 1));
                uint64_t offset11 = SparseMask::Packed_Add(offset00, SparseMask::Linear_Offset(1, 1));
                auto& g00 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset00));
                auto& g10 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset10));
                auto& g01 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset01));
                auto& g11 = reinterpret_cast<GridState<T, dim>&>(grid_array(offset11));
                T Q00 = Q[g00.idx].cwiseAbs().maxCoeff();
                T Q10 = Q[g10.idx].cwiseAbs().maxCoeff();
                T Q01 = Q[g01.idx].cwiseAbs().maxCoeff();
                T Q11 = Q[g11.idx].cwiseAbs().maxCoeff();
                rRineps(i) = std::abs(Rin[i] * omegain[i] * std::max(std::max(Q00, Q10), std::max(Q01, Q11)) / (T)2 / dx);
            });
        }
        // update
        StdVector<TM> GG(G);
        StdVector<TV> QQ(Q);
        StdVector<TM> RR(R);
        StdVector<T> RRin(Rin);
        for (int i = 0; i < Np; ++i)
            G[i] = GG[i].cwiseQuotient(TM(rGeps.template block<dim * dim, 1>(i * dim * dim, 0).data()).cwiseSqrt());
        for (int i = 0; i < Nn; ++i)
            Q[i] = QQ[i].cwiseQuotient(TV(rQeps.template block<dim, 1>(i * dim, 0).data()).cwiseSqrt());
        for (int i = 0; i < Np; ++i)
            R[i] = RR[i].cwiseQuotient(TM(rReps.template block<dim * dim, 1>(i * dim * dim, 0).data()).cwiseSqrt());
        for (int i = 0; i < (int)div_nodes.size(); ++i)
            Rin[i] = RRin[i] / std::sqrt(rRineps(i));
    }
}

void writeState(std::ostream& out)
{
    Base::writeState(out);

    std::string filename = SimulationBase::output_dir.absolutePath(SimulationBase::outputFileName("residual", ".bgeo"));

    Partio::ParticlesDataMutable* parts = Partio::create();
    Partio::ParticleAttribute posH, typeH, residualH;
    posH = parts->addAttribute("position", Partio::VECTOR, 3);
    typeH = parts->addAttribute("type", Partio::VECTOR, 1);
    residualH = parts->addAttribute("residual", Partio::VECTOR, 1);

    // write to partio structure
    for (int k = 0; k < particles.count; k++) {
        int idx = parts->addParticle();
        float* posP = parts->dataWrite<float>(posH, idx);
        float* typeP = parts->dataWrite<float>(typeH, idx);
        float* residualP = parts->dataWrite<float>(residualH, idx);
        for (int d = 0; d < 3; ++d)
            posP[d] = 0;
        for (int d = 0; d < dim; ++d)
            posP[d] = (float)particles.X.array[k](d);
        typeP[0] = 0;
        residualP[0] = 0;
    }

    StdVector<TV> grid_pos;
    grid_pos.resize(Base::num_nodes);
    grid.iterateGrid([&](IV node, GridState<T, dim>& g) {
        grid_pos[g.idx] = node.template cast<T>() * dx;
    });
    for (int k = 0; k < Base::num_nodes; k++) {
        int idx = parts->addParticle();
        float* posP = parts->dataWrite<float>(posH, idx);
        float* typeP = parts->dataWrite<float>(typeH, idx);
        float* residualP = parts->dataWrite<float>(residualH, idx);
        for (int d = 0; d < 3; ++d)
            posP[d] = 0;
        for (int d = 0; d < dim; ++d)
            posP[d] = (float)grid_pos[k](d);
        typeP[0] = 1;
        residualP[0] = _residual.col(k).norm();
    }

    Partio::write(filename.c_str(), *parts);
    parts->release();
}

*/
back to top