Content - 3aa758aab6ee34937f864faa1180854ddc2794b4 - 3726bc7/CLL077_TDS.m

visit type:
https://github.com/moyanre/smcgenedeconv

05 April 2024, 22:41:07 UTC
Tip revision: 28901d7a08d52e76361554e802fc97aa26c0981a authored by moyanre on 13 June 2019, 11:34:51 UTC
Update README.md
Tip revision: 28901d7
CLL077_TDS.m

clear all;   clc;

 rng('default');
 rng(3000); %300



load('TDS_n_matrix')
load('TDS_NN_matrix')

     
 
Data1 = TDS_n_matrix;
N_st_Matrix1 = TDS_NN_matrix;





rep = 50; % 20...number of replicates
Data = repmat(Data1,rep,1);
N_st_Matrix = repmat(N_st_Matrix1,rep,1);


S = size(Data,1);
T = size(Data,2);
%%%%% initialization

        % alpha of IBP
          alpha = 0.2; % 0.08
          betas = [2 2];

        % number of particles
          N = 300; %500
        
        % Initialize p_o
          a_00 = 1; % 1
          b_00 = 100; % 100
          p_o_particles = betarnd(a_00,b_00,[1,N]);
          
          
        % Initialize theta_ot, t = 1,...,T
          a_0 = 0.4; %0.4 
          b_general = 1;
          theta_o_particles = gamrnd(a_0,b_general,[T,N]);
          
        % parameters of theta_ct
          a = 6; %6
          
        % Initialize matrix Z, t = 1,...,T
          Z_init = zeros(1,5,N);
        
        para_frac = 0.05; %0.05
          
for s = 1:S
    
    s
    
    Data_s = Data(s,:);
    N_st_s = N_st_Matrix(s,:);
    
   
    
    if s == 1
    
    
         %PROB = zeros(1,N);
         
            for n = 1:N

                Z_lana_n = Z_init(:,:,n);

                Z_loni_n = fn_trinary_cIBP_Tran(Z_lana_n,alpha,betas,s);
                
                Z_loni{n} = Z_loni_n;
     
                %%% Get the number of columns of Z, then get the number of rows of theta matrix and sample them 
                
                 C_here_n = size(Z_loni_n,2);

                 no_theta_n = C_here_n*T;
                 theta_ct_n = gamrnd(a,b_general,[no_theta_n,1]);
                 
                 %%% let the sampled parameters pass through state equation
                 
                 theta_ct_n = normrnd(theta_ct_n,para_frac*theta_ct_n);
                 
                 theta_ct_ALL{n} = theta_ct_n;  %%% all theta_ct
                 
                 
                 p_o_particles(n) = normrnd(p_o_particles(n),para_frac*p_o_particles(n));
                   po_n = p_o_particles(n);
                   
                   theta_o_particles(:,n) = normrnd(theta_o_particles(:,n),para_frac*theta_o_particles(:,n));
                   theta_o_n_all_T = theta_o_particles(:,n);
                     
                   
                   %%% Calculate the weight for particle n
                   
                    prob_n = vpa(zeros(1,T));
                    
                       for t = 1:T
                           n_st = Data_s(t); N_st = N_st_s(t);
                           
                           theta_o_t = theta_o_n_all_T(t);
                           
                         %%% take out the theta_ct
                           bee = (t-1)*C_here_n + 1;  
                           eed = t*C_here_n; 
                           theta_ct = theta_ct_n(bee:eed);
                           
                           all_theta = [theta_o_t; theta_ct]; %column vector
                           w_ct = all_theta/sum(all_theta);    %column vector
                           
                           po_and_z = [po_n Z_loni_n(s,:)]; % row vector
                           
                           p_st = po_and_z*w_ct;
                           
                         
                           n_choose_k = nchoosek(vpa(N_st),n_st);
                           pp1 = vpa(p_st)^(n_st);
                           pp2 = (1 - vpa(p_st))^(N_st - n_st);
                           lad = n_choose_k * pp1 * pp2;
                           prob_n(t) = lad;
                           
                           %prob_n(t) = binopdf(n_st,N_st,p_st);
                          
                            
                       end
                       
                       prob_n;
                       
%                           inter1 = sum(log(prob_n));
%                           PROB(n) = exp(vpa(inter1));

                         
                          inter1 = vpa(sum(log(prob_n)));
                          inter2 = vpa(exp(inter1));
                          PROB(n) = inter2;

                          
                  
            end
            
            %%% normalize weights
            
                 wt = PROB/sum(PROB); clear PROB;
                 wt = double(wt);
            
            %%% resample  %%% rr = randsample([1:N],N,true,wt), new = old{rr}
            
                 rr = randsample([1:N],N,true,wt);
                 
                 p_o_particles = p_o_particles(rr);
                 theta_o_particles = theta_o_particles(:,rr);
                 
                 Z_loni = Z_loni(rr);
                 theta_ct_ALL = theta_ct_ALL(rr);
                 
%                  for m = 1:N
%                      theta_ct_ALL{m} = theta_ct_ALL{rr(m)};
%                      Z_loni{m} = Z_loni{rr(m)};   
%                  end
                 
                 
                 
                 
      
    else  %%%%% other time steps
        
        Z_lana = Z_loni;
           
         %PROB = zeros(1,N);
         
            for n = 1:N

                Z_lana_n = Z_lana{n};

                Z_loni_n = fn_trinary_cIBP_Tran(Z_lana_n,alpha,betas,s);
                
                Z_loni{n} = Z_loni_n;
     
                
                %%% see if a new column is created for the new and then sample W matrix to compliment this 
                
                 C_lana = size(Z_lana_n,2);
                 C_loni = size(Z_loni_n,2);
                 diff = C_loni - C_lana;
                 
                 
                 
                %%% Calculate the weight for particle n
                  
                   p_o_particles(n) = normrnd(p_o_particles(n),para_frac*p_o_particles(n));
                   po_n = p_o_particles(n);
                   
                   theta_o_particles(:,n) = normrnd(theta_o_particles(:,n),para_frac*theta_o_particles(:,n));
                   theta_o_n_all_T = theta_o_particles(:,n);
                     
                  
                   theta_ct_ALL{n} = normrnd(theta_ct_ALL{n},para_frac*theta_ct_ALL{n});
                   theta_ct_n = theta_ct_ALL{n};  %%% all theta_ct
                   
                   
                   
                  if diff == 0  %%% new column(s) of Z is/are not created.
                   
                            prob_n = vpa(zeros(1,T));
                               for t = 1:T
                                   
                                   n_st = Data_s(t);
                                   N_st = N_st_s(t);

                                   theta_o_t = theta_o_n_all_T(t);

                                 %%% take out the theta_ct

                                   

                                       bee = (t-1)*C_lana + 1;  
                                       eed = t*C_lana; 
                                       theta_ct = theta_ct_n(bee:eed);
                                       all_theta = [theta_o_t; theta_ct]; %column vector

                                       w_ct = all_theta/sum(all_theta);    %column vector

                                       po_and_z = [po_n Z_loni_n(s,:)]; % row vector

                                       p_st = po_and_z*w_ct;
                                       
                                       n_choose_k = nchoosek(vpa(N_st),n_st);
                                        pp1 = vpa(p_st)^(n_st);
                                        pp2 = (1 - vpa(p_st))^(N_st - n_st);
                                        lad = n_choose_k * pp1 * pp2;
                                        prob_n(t) = lad;

                                       %prob_n(t) = binopdf(n_st,N_st,p_st);

                               end
                       
                  else
                      
                      
                                 prob_n = vpa(zeros(1,T));
                                 
                                 theta_ct_new_matrix = zeros(C_loni,T);
                                 
                               for t = 1:T
                                   n_st = Data_s(t); N_st = N_st_s(t);

                                   theta_o_t = theta_o_n_all_T(t);

                                 %%% take out the theta_ct

                                       bee = (t-1)*C_lana + 1;  
                                       eed = t*C_lana; 
                                       theta_ct = theta_ct_n(bee:eed);
                                  
                                       theta_ct_loni = [theta_ct_n(bee:eed); gamrnd(a,b_general,[diff,1])];
                                       theta_ct_new_matrix(:,t) = theta_ct_loni;
                                       
                                       all_theta = [theta_o_t; theta_ct_loni]; %column vector
                                   

                                       w_ct = all_theta/sum(all_theta);    %column vector

                                       po_and_z = [po_n Z_loni_n(s,:)]; % row vector

                                       p_st = po_and_z*w_ct;
                                       
                                        n_choose_k = nchoosek(vpa(N_st),n_st);
                                        pp1 = vpa(p_st)^(n_st);
                                        pp2 = (1 - vpa(p_st))^(N_st - n_st);
                                        lad = n_choose_k * pp1 * pp2;
                                        prob_n(t) = lad;

                                       
                                       

                                       %prob_n(t) = binopdf(n_st,N_st,p_st);

                               end
                       
                                       theta_ct_ALL{n} = theta_ct_new_matrix(:);
                           
                  end
                  
%                                         inter2 = sum(log(prob_n));
%                                         PROB(n) = exp(vpa(inter2));
                                        
                                        inter3 = vpa(sum(log(prob_n)));
                                        inter4 = vpa(exp(inter3));
                                        PROB(n) = inter4;
                                        
                  
            end
            
            
            
            
            %%% normalize weights
            
                 wt = PROB/sum(PROB); clear PROB;
                 wt = double(wt);
                 
            
            %%% resample  %%% rr = randsample([1:N],N,true,wt), new = old{rr}
            
                 rr = randsample([1:N],N,true,wt);
                 
                 p_o_particles = p_o_particles(rr);
                 theta_o_particles = theta_o_particles(:,rr);
                 
                 theta_ct_ALL = theta_ct_ALL(rr);
                 Z_loni = Z_loni(rr);   
                 
                 
%                  for m = 1:N
%                      theta_ct_ALL{m} = theta_ct_ALL{rr(m)};
%                      Z_loni{m} = Z_loni{rr(m)};   
%                  end
                 
        
    end
       
       
       
end
          
   

wt = (1/N)*ones(1,N);

S_orig = S/rep;
for n = 1:N
Z_loni_now{n} = Z_loni{1}(S - S_orig + 1:S,:);
end




%%%%%%%  POINT ESTIMATES %%%%%%%%%%
   
    
 %%%%% determine point estimate of C...i.e C_est
 
        ALL_C = zeros(1,N);
        for n = 1:N
            ALL_C(n) = size(Z_loni_now{n},2); 
        end
        
       ALL_C
        
       unique_C = unique(ALL_C);
       
       prob_C = zeros(1,length(unique_C));
       len_Cs = zeros(1,length(unique_C));
       for c = 1:length(unique_C);
           
           prr = unique_C(c);
           ind_c = find(ALL_C == prr);
           prob_C(c) = length(ind_c)/N;
           len_Cs(c) = length(ind_c);
       end
 
       [vaa, indd] = max(len_Cs);
       
      
       C_est = unique_C(indd)
       
       
       %%%% Plot of C
             
             uppe = max(unique_C) + 1;
             lowe = min(unique_C) - 1;
             
             x_min = 0;
             x_max = uppe + 5;
             
             pre_x = 0:lowe;
             post_x = uppe:x_max;
             xx = [pre_x unique_C post_x]; 
             yy = [zeros(1,lowe+1) prob_C zeros(1,x_max-max(unique_C))]; 
             
       figure;
            plot(xx,yy)
            xlabel('C')
            ylabel('P(C|Y)')
            xlim([x_min x_max])
            ylim([0 1])
            %vline(C_est);
            h = vline(C_est,'g','The Answer');
      
  
 
 
 %%%%% pick out all particles (Z,W,p) that belong to this C-star
     ind_C_est = find(ALL_C == C_est);
     
    picked_Z_loni = Z_loni_now(ind_C_est);
    picked_p_o_particles = p_o_particles(ind_C_est); 
    picked_theta_o_particles = theta_o_particles(:,ind_C_est);
    picked_theta_ct_ALL = theta_ct_ALL(ind_C_est);
    picked_wt = wt(ind_C_est);
 
    
    
    
    
    
    
    
   %     
%  %%%%% determine point estimate of Z   i.e Z_est
%  
%    NNN = length(ind_C_est);
%    
%    arg_vector = zeros(1,NNN);
%    all_permutations = perms(1:C_est);
% 
%      for g = 1:NNN
%          
%          Z_prime = picked_Z_loni{g};
%          
%            dist_Z_l_prime = zeros(1,NNN);
%          for l = 1:NNN
%              
%              Z_l = picked_Z_loni{l};
%               
%                    d_min_vec = zeros(1,size(all_permutations,1)); 
%                 for permu = 1:size(all_permutations,1)
%                     perm_here = all_permutations(permu,:);
%                     
%                     permuted_Z_prime = Z_prime(:,perm_here);
%                     
%                     d_min_vec(permu) = sum(sum(abs(Z_l - permuted_Z_prime)));
%                     
%                 end
%                 
%              dist_Z_l_prime(l) = min(d_min_vec);
%          end
%          
%   arg_vector(g) = sum(picked_wt.*dist_Z_l_prime);
% 
%      end
%      
%      
%      
%      [val,indx] = min(arg_vector);
%      
%      %%%% point estimate of Z
%      Z_est = picked_Z_loni{indx}
%      
%      %%%% point estimate of p_0
%      p_o_est = picked_p_o_particles(indx)
%      
%      %%%% point estimate of W
%      
%      theta_o_est = picked_theta_o_particles(:,indx);
%      theta_ct_ALL_est = picked_theta_ct_ALL{indx};
    
     
     
     
     
     
    indx = 2;
     %%%% point estimate of Z
     Z_est = picked_Z_loni{indx};
     Z_est_binarized = Z_est > 0.4
     
     %%%% point estimate of p_0
     p_o_est = picked_p_o_particles(indx)
     
     %%%% point estimate of W
     
     theta_o_est = picked_theta_o_particles(:,indx);
     theta_ct_ALL_est = picked_theta_ct_ALL{indx};
     
     olu = vec2mat(theta_ct_ALL_est,C_est)';
     ade = [theta_o_est'; olu]; 
     sum_ade = sum(ade);
     
     W_est = ade./repmat(sum_ade,C_est+1,1)
     
     
     %%% SAVE RESULTS 
             TDS003_WGS_Z = Z_est;
             TDS003_WGS_W = W_est;
             TDS003_WGS_p_o = p_o_est;
             TDS003_WGS_C = C_est;

             save('TDS003_WGS_Z','TDS003_WGS_Z')
             save('TDS003_WGS_W','TDS003_WGS_W')
             save('TDS003_WGS_p_o','TDS003_WGS_p_o')
             save('TDS003_WGS_C','TDS003_WGS_C')
     
     
     
     
     
  
     
%  %%%%%%% COMPUTE Z_error and W_error 
%      
% if size(Z_est,2) == size(Z_true,2)
%    
%    all_perms = perms(1:size(Z_est,2));
%    error_z = zeros(size(all_perms,1),1);
%    
%    dim_Z = size(Z_true);
%    dim_W = size(W_est);
%    
%         for g = 1:size(all_perms,1)
%             
%             perm_g = all_perms(g,:);
%             z_perm = Z_est(:,perm_g);
%             
%             error_z(g) = sum(sum(abs(z_perm - Z_true)))/prod(dim_Z);
%             
%             
%         end
%     
%         
%         [chosen_Z_error, chosen_perm_ind] = min(error_z);
%         
%         Z_error = chosen_Z_error
%         
%         
%         
%         
%         WWWW = W_est(2:end,:);
%         
%         chosen_perm = all_perms(chosen_perm_ind,:);
%         W_est_arranged = [W_est(1,:); WWWW(chosen_perm',:)];
%             
%         W_error = sum(sum(abs(W_est_arranged - W)))/prod(dim_W) 
%         
%         
%         
%         Z_true_sum = sum(Z_true)
%         
%         ZZZ_rearranged = Z_est(:,chosen_perm);
%         ZZZ_rearranged_sum = sum(ZZZ_rearranged)
%         
%     
% end
% 
% 



% 
% %%%% Error bbetween the p_st_true and the p_st_est
% 
%    p_o_true = 0.02; 
%    
%    p_st_true = [p_o_true*ones(S_orig,1) Z_true]*W;
%    p_st_est = [p_o_est*ones(S_orig,1) ZZZ_rearranged]*W_est_arranged;
%    
%    error_p_st = p_st_true - p_st_est;
%    
%    figure;
%    hist(error_p_st(:));
%    
%    abs_error_pst = sum(sum(abs(error_p_st)))/(S_orig*T)
%    
% 
% 

     
     
     
     
     

%  %%%% HEATMAPS:  
%  
% %%% 1.   True Z
%     
%     for h = 1:4
%         figure;
%         colormap('parula')
%         imagesc( Z_true(:,h))
%         colorbar
%     end  
%     
%     
% %%%. 2. Estimated Z    
%     
%     for h = 1:4
%         figure;
%         colormap('parula')
%         imagesc(ZZZ(:,h))
%         colorbar
%     end
% 
% 
%     %%% 3.   True W
%     %%% 4.   Estimated W