https://github.com/krdav/SPURF
Tip revision: e200c01944917bdb63c3f7bd5656e64ce90ec439 authored by Amrit Dhar on 02 August 2018, 19:46:12 UTC
Update liao_dataset.fasta
Update liao_dataset.fasta
Tip revision: e200c01
model_fitting.cpp
#include <RcppArmadillo.h>
using namespace Rcpp;
using namespace arma;
// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::plugins(cpp11)]]
mat logistic(const mat& t, double M, double B) {
return 1 / (1 + exp(-B * (t - M)));
}
// [[Rcpp::export]]
mat compute_kmeans_clust_profs(const mat& dff_kmeans_centers,
const mat& dff2_subsamp) {
// 1) compute distances from subsampled profiles to all cluster centroids
// 2) assign cluster profile estimates to subsampled profiles
mat clust_profs(size(dff2_subsamp), fill::none);
for (unsigned int i = 0; i < dff2_subsamp.n_rows; i++) {
vec dist_vec(dff_kmeans_centers.n_rows, fill::none);
for (unsigned int j = 0; j < dff_kmeans_centers.n_rows; j++) {
dist_vec(j) = norm(dff2_subsamp.row(i) - dff_kmeans_centers.row(j), 1);
}
clust_profs.row(i) =
dff_kmeans_centers.rows(find(dist_vec == min(dist_vec)));
}
return clust_profs;
}
///////////////////////// PREDICT PROFILES
// [[Rcpp::export]]
mat predict_profs(const rowvec& alpha, const urowvec& alpha_cols_assign,
const mat& dff2_subsamp, const List& dff2_profs,
unsigned int alpha_groups) {
mat alpha_mat(alpha.begin(), alpha_groups, dff2_profs.size());
mat pred_profs(size(dff2_subsamp), fill::zeros);
for (uword i = 0; i < dff2_profs.size(); i++) {
mat prof_i = dff2_profs[i];
pred_profs +=
prof_i.each_row() % alpha_mat(alpha_cols_assign - 1, uvec({i})).t();
}
pred_profs += dff2_subsamp.each_row() %
(1 - sum(alpha_mat.rows(alpha_cols_assign - 1), 1).t());
return pred_profs;
}
///////////////// F
// [[Rcpp::export]]
double f(const rowvec& alpha, const urowvec& alpha_cols_assign,
const mat& dff2_subsamp, const mat& dff2_full, const List& dff2_profs,
unsigned int alpha_groups) {
mat pred_profs = predict_profs(alpha, alpha_cols_assign, dff2_subsamp,
dff2_profs, alpha_groups);
return accu(square(dff2_full - pred_profs)) / (149 * dff2_full.n_rows);
}
///////////////// PENALIZED F
// [[Rcpp::export]]
double f_penal(const rowvec& alpha, const urowvec& alpha_cols_assign,
const mat& dff2_subsamp, const mat& dff2_full,
const List& dff2_profs, unsigned int alpha_groups, int d,
double lambda1, double lambda2) {
double err = f(alpha, alpha_cols_assign, dff2_subsamp, dff2_full, dff2_profs,
alpha_groups);
double coef_penal = norm(alpha, 1);
double fusion_penal = 0;
for (uword i = 0; i < dff2_profs.size(); i++) {
fusion_penal += norm(
diff(alpha(span(i * alpha_groups, (i + 1) * alpha_groups - 1)), d), 1);
}
return err + lambda1 * coef_penal + lambda2 * fusion_penal;
}
///////////// JACCARD F
// [[Rcpp::export]]
mat calculate_jacc(const mat& pred_profs, const mat& dff2_full, double cutoff,
double B) {
mat pred_sets = logistic(pred_profs, cutoff, B);
mat full_sets = logistic(dff2_full, cutoff, B);
mat jacc_mat(dff2_full.n_rows, 149, fill::none);
for (uword i = 0; i < dff2_full.n_rows; i++) {
for (uword j = 0; j < 149; j++) {
if (all(dff2_full(uvec({i}), regspace<uvec>(21 * j, 21 * (j + 1) - 1)) ==
rowvec({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1}))) {
jacc_mat(i, j) = 0;
continue;
}
const rowvec& pred_set =
pred_sets(uvec({i}), regspace<uvec>(21 * j, 21 * (j + 1) - 1));
const rowvec& full_set =
full_sets(uvec({i}), regspace<uvec>(21 * j, 21 * (j + 1) - 1));
double num = dot(pred_set, full_set);
rowvec denom_vec = pred_set + full_set;
denom_vec(find(denom_vec > 1)).ones();
double denom = sum(denom_vec);
if (num == 0 && denom == 0) {
jacc_mat(i, j) = 1;
} else {
jacc_mat(i, j) = num / denom;
}
}
}
return -jacc_mat;
}
// [[Rcpp::export]]
double f_jacc(const rowvec& alpha, const urowvec& alpha_cols_assign,
const mat& dff2_subsamp, const mat& dff2_full,
const List& dff2_profs, unsigned int alpha_groups, double cutoff,
double B) {
mat pred_profs = predict_profs(alpha, alpha_cols_assign, dff2_subsamp,
dff2_profs, alpha_groups);
return accu(calculate_jacc(pred_profs, dff2_full, cutoff, B)) /
(149 * dff2_full.n_rows);
}
/////////////// PENALIZED JACCARD F
// [[Rcpp::export]]
double f_penal_jacc(const rowvec& alpha, const urowvec& alpha_cols_assign,
const mat& dff2_subsamp, const mat& dff2_full,
const List& dff2_profs, unsigned int alpha_groups,
double cutoff, double B, int d, double lambda1,
double lambda2) {
double err = f_jacc(alpha, alpha_cols_assign, dff2_subsamp, dff2_full,
dff2_profs, alpha_groups, cutoff, B);
double coef_penal = norm(alpha, 1);
double fusion_penal = 0;
for (uword i = 0; i < dff2_profs.size(); i++) {
fusion_penal += norm(
diff(alpha(span(i * alpha_groups, (i + 1) * alpha_groups - 1)), d), 1);
}
return err + lambda1 * coef_penal + lambda2 * fusion_penal;
}
///////////// PREDICT NOGAP PROFILES
// [[Rcpp::export]]
mat predict_nogap_profs(const rowvec& alpha, const urowvec& alpha_cols_assign,
const mat& dff2_subsamp, const List& dff2_profs,
const umat& dff2_nogap, unsigned int alpha_groups) {
mat alpha_mat(alpha.begin(), alpha_groups, dff2_profs.size());
mat pred_profs(size(dff2_subsamp), fill::zeros);
pred_profs.cols(regspace<uvec>(20, 21, pred_profs.n_cols - 1)).fill(1);
for (unsigned int i = 0; i < dff2_subsamp.n_rows; i++) {
uvec col_inds = find(dff2_nogap.row(i));
pred_profs(uvec({i}), col_inds).zeros();
for (uword j = 0; j < dff2_profs.size(); j++) {
mat prof_j = dff2_profs[j];
pred_profs(uvec({i}), col_inds) +=
prof_j(uvec({i}), col_inds) %
alpha_mat(alpha_cols_assign - 1, uvec({j})).eval()(col_inds).t();
}
pred_profs(uvec({i}), col_inds) +=
dff2_subsamp(uvec({i}), col_inds) %
(1 - sum(alpha_mat.rows(alpha_cols_assign - 1).eval().rows(col_inds), 1)
.t());
}
return pred_profs;
}
//////////////// NOGAP F
// [[Rcpp::export]]
double f_nogap(const rowvec& alpha, const urowvec& alpha_cols_assign,
const mat& dff2_subsamp, const mat& dff2_full,
const List& dff2_profs, const umat& dff2_nogap,
unsigned int alpha_groups) {
mat pred_profs = predict_nogap_profs(alpha, alpha_cols_assign, dff2_subsamp,
dff2_profs, dff2_nogap, alpha_groups);
return accu(square(dff2_full - pred_profs)) / (accu(dff2_nogap) / 21);
}
////////////////// NOGAP JACCARD F
// [[Rcpp::export]]
double f_nogap_jacc(const rowvec& alpha, const urowvec& alpha_cols_assign,
const mat& dff2_subsamp, const mat& dff2_full,
const List& dff2_profs, const umat& dff2_nogap,
unsigned int alpha_groups, double cutoff, double B) {
mat pred_profs = predict_nogap_profs(alpha, alpha_cols_assign, dff2_subsamp,
dff2_profs, dff2_nogap, alpha_groups);
return accu(calculate_jacc(pred_profs, dff2_full, cutoff, B)) /
(accu(dff2_nogap) / 21);
}