### function node_plot plots the data assigned to a specified node, projected into a ### two dimsional subspace, to illustrate the split at that node (or the would-be split for a leaf node) ## arguments: # sol = cluster solution arising from any of the clustering algorithms in the package # node = either the node number to be viewed (nodes are listed in the order # they are added to the model), or a vector of length 2 specifying the # depth and position of the node within the hierarchy. # labels = vector of length n. If class labels are given then the # performance at the specified node is given. node_plot <- function(sol, node, labels = NULL){ op <- par(no.readonly = TRUE) # if node location is given by its position (rather than number), then determine its number if(length(node)>1){ for(i in 1:length(sol$Nodes)){ if(sum(node==sol$Nodes[[i]]$location)==2){ node = i break } } if(length(node)>1) stop('You must specify a node location within the given hierarchy') } X <- sol$data # if labels are given, then ensure they are integer valued for the purpose of colour plots if(!is.null(labels)){ lab_new <- numeric(length(labels)) u <- unique(labels) for(i in 1:length(u)) lab_new[which(labels==u[i])] = i labels <- lab_new } # determine dimensions and layout for the plot d <- max(sol$model[,1]) w <- subtree_width(sol, 1) l.mat <- matrix(0, 4, 3) l.mat[1,] <- c(2, 2, 1) l.mat[2,] <- c(2, 2, 1) l.mat[3,] <- c(2, 2, 3) l.mat[4,] <- c(2, 2, 3) layout(l.mat) # plot the full hierarchical structure in the upper corner par(mar = c(0, 0, 0, 0)) plot(0, cex = 0, ylim = c(0, 1), xlim = c(0, 1), xaxt = 'n', yaxt = 'n', bty = 'n') for(i in 1:d){ ixs = which(sol$model[,1]==i) for(j in ixs){ loc <- sol$model[j,] is.leaf = 1-sum((sol$model[,1]==(i+1))*(sol$model[,2]==(2*loc[2]))) if(is.leaf){ y1 <- 1-(i-1)/d y2 <- 1-i/d x1 <- (2*loc[2]-1)/2^loc[1] x2 <- (2*loc[2]-1)/2^loc[1] segments(x1, y1, x2, y2) } else{ y1 <- 1-(i-1)/d y2 <- 1-i/d x1 <- (2*loc[2]-1)/2^loc[1] x2 <- (2*loc[2]-1)/2^loc[1] segments(x1, y1, x2, y2) x1 <- (2*(2*loc[2]-1)-1)/2^(loc[1]+1) x2 <- (2*(2*loc[2])-1)/2^(loc[1]+1) segments(x1, y2, x2, y2) } if(j==node) points((2*loc[2]-1)/2^loc[1], 1-i/d, col = rgb(1, 0, 0, .5), pch = 16, cex = 3) } } # plot the projected data in the main body of the plot par(mar = c(2, 2, 0, 2)) is.leaf = 1-sum((sol$model[,1]==(sol$model[node,1]+1))*(sol$model[,2]==(2*sol$model[node,2]))) if(ncol(X)>2) v2 <- rARPACK::eigs_sym(cov(X[sol$Nodes[[node]]$ixs,]-X[sol$Nodes[[node]]$ixs,]%*%sol$Nodes[[node]]$v%*%t(sol$Nodes[[node]]$v)), 1)$vectors[,1] else v2 <- eigen(cov(X[sol$Nodes[[node]]$ixs,]-X[sol$Nodes[[node]]$ixs,]%*%sol$Nodes[[node]]$v%*%t(sol$Nodes[[node]]$v)))$vectors[,1] Xp <- X[sol$Nodes[[node]]$ixs,]%*%cbind(sol$Nodes[[node]]$v, v2) if(sol$method=='MDH') den <- density(Xp[,1], bw = sol$Nodes[[node]]$params$h) else den <- density(Xp[,1]) if(is.null(labels)){ if(is.leaf){ if(sol$model[node,2]%%2) plot(Xp, col = 4, tck = .02, yaxt = 'n', bty = 'n') else plot(Xp, col = 2, tck = .02, yaxt = 'n', bty = 'n') } else plot(Xp, col = (Xp[,1]2) v2 <- rARPACK::eigs_sym(cov(X-X%*%sol$v%*%t(sol$v)), 1)$vectors[,1] else v2 <- eigen(cov(X-X%*%sol$v%*%t(sol$v)))$vectors[,1] Xp <- X%*%cbind(sol$v, v2) # compute the external quality of the split through success ratio (if labels are provided) if(!is.null(labels)){ prf <- success_ratio((Xp[,1]