PLIN1 CorShrink analysis

Kushal K Dey

3/2/2018

We perform adaptive correlation shrinkage analysis using the CorShrink R package on the subject by tissue expression data for the PLIN1 gene.

library(CorShrink)
library(corrplot)
## corrplot 0.84 loaded
library(gridExtra)
library(ggplot2)
library(ashr)
data("sample_by_feature_data")

This data contains many NA values.We now run CorShrink on this data.

out <- CorShrinkData(sample_by_feature_data, sd_boot = FALSE, image = "null", 
              ash.control = list(mixcompdist = "halfuniform", control = list(maxiter = 1000)),                report_model = TRUE)
order_index <- get(load("../shared_output/order_index.rda"))
col2 <- c("blue", "white", "red")
corrplot(out$cor[order_index, order_index], diag = FALSE,
         col = colorRampPalette(col2)(200),
         tl.pos = "td", tl.cex = 0.9, tl.col = "black",
         rect.col = "white",na.label.col = "white",
         method = "color", type = "upper") 

The empirical correlation matrix

cor_mat <- cor(sample_by_feature_data, use = "pairwise.complete.obs")
corrplot(cor_mat[order_index, order_index], diag = FALSE,
         col = colorRampPalette(col2)(200),
         tl.pos = "td", tl.cex = 0.9, tl.col = "black",
         rect.col = "white",na.label.col = "white",
         method = "color", type = "upper") 

nsamp_mat <- get(load("../shared_output/common_samples.rda"))
df <- data.frame("original" = cor_mat[lower.tri(cor_mat)],
                 "corshrink" = out$cor[lower.tri(out$cor)],
                 "nsamp" = nsamp_mat[lower.tri(nsamp_mat)])

ggplot(df, aes(original, corshrink)) + geom_point(aes(colour = nsamp)) +
scale_colour_gradient(low = "lightcyan", high = "darkcyan") + coord_fixed(ratio=1) + xlim(-0.5, 1) + ylim(-0.5, 1)

nsamp_mat <- get(load("../shared_output/common_samples.rda"))
df <- data.frame("original" = log((1+cor_mat[lower.tri(cor_mat)])/(1-cor_mat[lower.tri(cor_mat)])),
                 "corshrink" = log((1+out$cor[lower.tri(out$cor)])/(1-out$cor[lower.tri(out$cor)])),
                 "nsamp" = nsamp_mat[lower.tri(nsamp_mat)])

ggplot(df, aes(original, corshrink)) + geom_point(aes(colour = nsamp)) +
scale_colour_gradient(low = "lightcyan", high = "darkcyan") + coord_fixed(ratio=1) + xlim(-0.5, 1) + ylim(-0.5, 1)

The density plot of the estimated \(g\) from te CorShrink model.

phalfuniform <- function(x){
  df <- cbind(out$model$fitted_g$a, out$model$fitted_g$b)
  z <- sum(out$model$fitted_g$pi[-1] * apply(df[-1,], 1, function(y) return (punif(x, y[1], y[2]))))
  if(x >= 0){
     z <- z + out$model$fitted_g$pi[1]
  }
  return(z)
}

dhalfuniform <- function(x){
  df <- cbind(out$model$fitted_g$a, out$model$fitted_g$b)
  z <- sum(out$model$fitted_g$pi[-1] * apply(df[-1,], 1, function(y) return (dunif(x, y[1], y[2]))))
  return(z)
}
xout <- sort(c(0, seq(-1,1, length.out = 10000)))
pdf_vals_2 <- sapply(xout, function(x) return(dhalfuniform(x)))
plot(xout, pdf_vals_2, type = "l", col = "red", xlab = "values", ylab = "pdf", cex.axis = 1, cex.lab = 1.5, ylim = c(0, 3))
abline(v=0, col = "black", lty = 5)

cdf_vals_2 <- sapply(xout, function(x) return(phalfuniform(x)))
plot(xout, cdf_vals_2, type = "l", col = "red", xlab = "values", ylab = "cdf", cex.axis = 1,
     cex.lab = 1.5)


This R Markdown site was created with workflowr