https://github.com/MSanderlab/Pancreatic-progenitor-epigenome-maps-prioritize-type-2-diabetes-risk-genes-with-roles-in-development
Raw File
Tip revision: ba79c687523c2696ea0ef30d8476e28a0d860f18 authored by MSanderlab on 16 January 2021, 09:01:57 UTC
adding files
Tip revision: ba79c68
HMM_calls.m
clear all	
addpath(genpath('required_modules/HMMall/')); 
addpath(genpath('required_modules/pmtk3-2april2011/')); 
addpath(genpath('required_modules/order'));
for i=1:23
	clearvars -except i;
	i
		
	if i==23
		chr='chrX';
	else
		chr=['chr' num2str(i)];
	end

	filename=['DI.' chr '.10000.2000000'];

	minaic = 100000000;
	data = load(filename);
	prior0    = [0.33 0.33 0.33];
	transmat0 = [0.33 0.33 0.33
        	     0.33 0.33 0.33
             	     0.33 0.33 0.33];
	testX = data(:,4)';
	endm  = 15;
	aic   = ones(1,endm)*10000000;    
	inim  = 1;

	for M=inim:endm
		M
       
		try
	    		Q = 3; O = 1;
	    		[mixModel] = mixGaussFit(testX',Q*M,'maxIter', 500);
	    		mu0     = mixModel.cpd.mu;
	    		Sigma0  = bsxfun(@plus, eye(1), mixModel.cpd.Sigma);
	    		mixmat0 = mixModel.mixWeight;
	    		mu0     = reshape(mu0, [O Q M]);
	    		Sigma0  = reshape(Sigma0, [O O Q M]);
	    		mixmat0 = reshape(mixmat0, [Q,M]);
        
	    		[LL, prior1, transmat1, mu1, Sigma1, mixmat1]     = mhmm_em(testX, prior0, transmat0, mu0, Sigma0, mixmat0,'max_iter', 500,'thresh',1e-5);

	    		loglik = mhmm_logprob(testX, prior1, transmat1, mu1, Sigma1, mixmat1);
	    		B = mixgauss_prob(testX, mu1, Sigma1, mixmat1);
	    		path = viterbi_path(prior1, transmat1, B);
	    		[gamma, alpha, beta, logp] = hmmFwdBack(prior1, transmat1, B);
	    		decodedFromEMmaxMarg = maxidx(gamma);

	    		s_p = size(prior1);s_t = size(transmat1);s_m = size(mu1);s_v = size(Sigma1);s_i = size(mixmat1);s_te = size(testX);
        
	    		if (M==1)
        			num_p = (s_p(1)*s_p(2)) + (s_t(1)*s_t(2)) + (s_m(1)*s_m(2)) + (s_v(1)*s_v(2)) + (s_i(1)*s_i(2));
	    		else
	        		num_p = (s_p(1)*s_p(2)) + (s_t(1)*s_t(2)) + (s_m(1)*s_m(2)*s_m(3)) + (s_v(1)*s_v(2)*s_v(3)*s_v(4)) + (s_i(1)*s_i(2));
	    		end     
        
	    		aic(M)    = (-2*loglik) + (2*num_p);
	    		ll(M)     = loglik;
	    		best_p(M,:) = path';
	    		best_d(M,:) = decodedFromEMmaxMarg';
	    		best_g1(M,:) = gamma(1,:)';
	    		best_g2(M,:) = gamma(2,:)';
	    		best_g3(M,:) = gamma(3,:)';
		catch exception
			break
		end
	end
          
	ord = order(min(aic)) - 1;
	div = power(10,ord);

	for k=inim:endm
	     min(aic);
	     paic(k) = exp((min(aic)-aic(k))/(div*2));
	     if ((paic(k)) >= 0.9)
	         ind = k;
	         break;
	     end
	end    
        
	final_p      = best_p(ind,:)';
	final_d      = best_d(ind,:)';
	final_g(:,1) = best_g1(ind,:)';
	final_g(:,2) = best_g2(ind,:)';
	final_g(:,3) = best_g3(ind,:)';
	final_aic = aic(ind); minaic = min(aic);

	states = [final_p final_d final_g];
	file =  ['DI.' chr '.10000.2000000.output'];
	save (file,'aic','ind','states','-ASCII');      
end
back to top