# this script prepares eQTL results for mashr # 1/28/2020 JR # based on mashr vignette https://stephenslab.github.io/mashr/articles/intro_mash.html # last editted and ran 10/7/2020 JR: cleaned up library(ashr) library(mashr) library(data.table) library(dplyr) library(doParallel) cores <- as.integer(Sys.getenv("SLURM_STEP_TASKS_PER_NODE")) registerDoParallel(cores = cores) library(RhpcBLASctl) blas_set_num_threads(cores) folder = "SCAIP1-6" load(paste0("./mashr_input_",folder,".Rd")) # subset to T cells only: SEs <- SEs[,grep("Tcell",colnames(SEs))] slopes <- slopes[,grep("Tcell",colnames(slopes))] pvalues <- pvalues[,grep("Tcell",colnames(pvalues))] # let's remove all NAs as well as Inf from SEs: SEs <- SEs[!rowSums(is.finite(as.matrix(SEs)))0)) sum(rowSums(is.na(as.matrix(pvalues))>0)) stopifnot(identical(rownames(slopes),rownames(SEs))) stopifnot(identical(rownames(slopes),rownames(pvalues))) # dim(SEs) # 4019906 5 data = mash_set_data(as.matrix(slopes), as.matrix(SEs)) # 2. Set up the DATA-DRIVEN covariance matrices # 2.1. select strong signals m.1by1 = mash_1by1(data) strong = get_significant_results(m.1by1,0.05) # 2.2. Obtain initial data-driven covariance matrices # this step estimates the covariances of the observed data U.pca = cov_pca(data,5,subset=strong) print(names(U.pca)) # canonical matrix: U.c = cov_canonical(data) # 2.3. Apply Extreme Deconvolution # this step estimates the covariances of the actual underlying effects U.ed = cov_ed(data, U.pca, subset=strong) # save the mashr input data: save(data, U.ed,U.c, file="Tcell_mash-ready-data.Rd") sessionInfo()