#source("https://bioconductor.org/biocLite.R") #biocLite("tximport") library("tximport") library("tximportData") library("dplyr") library("tidyr") library("DESeq2") sample_id <- dir(file.path("/data/projects/roni_rnaseq_de_august2018/", "data_quant")) itsv <- file.path("/data/projects/roni_rnaseq_de_august2018", "data_quant", sample_id[1], "quantifications/kallisto/hg38_mmtv/paired_end/abundance.tsv") target_mapping <- read.delim(itsv) # extract information from target_id target_mapping$target_id_raw <- target_mapping$target_id target_id_names <- c('transcript_id', 'gene_id', 'ott1', 'ott2', 'transcript_name', 'gene_name', 'transcript_length', 'gene_type', 'empty') target_mapping <- target_mapping %>% separate(target_id_raw, target_id_names, "\\|") # select specific columns target_mapping <- target_mapping[, c("target_id", "gene_id", 'gene_name')] tx2gene<-target_mapping[,c("target_id","gene_id")] kal_dirs <- file.path("/data/projects/roni_rnaseq_de_august2018", "data_quant", sample_id, "quantifications/kallisto/hg38_mmtv/paired_end/abundance.h5") names(kal_dirs)<-sample_id s2c <- read.table(file.path("/data/projects/roni_rnaseq_de_august2018", "metadata", "hiseq_info.txt"), header = TRUE, stringsAsFactors=FALSE) s2c <- dplyr::select(s2c, sample = sample, condition) s2c <- dplyr::mutate(s2c, path = kal_dirs) print(s2c) #Full table row.names(s2c)<-sample_id names(kal_dirs) <- sample_id ''' Import kallisto abundances kallisto abundance.h5 files can be imported by setting type to "kallisto". Note that this requires that you have the Bioconductor package rhdf5 installed. ''' #First comparison 2D_wt vs 2D_shx condA <- which(s2c$condition == "2D_wt") condB <- which(s2c$condition == "2D_sh") s2c_AvsB <- s2c[c(condA,condB),] print(s2c_AvsB) #2D_wt vs 2D_shx txi.kallisto <- tximport::tximport(s2c_AvsB$path, type = "kallisto", tx2gene=tx2gene) head(txi.kallisto$counts) dds <- DESeqDataSetFromTximport(txi.kallisto , colData = s2c_AvsB, design = ~ condition) keep <- rowSums(counts(dds)) >= 5 #Wald test dds <- dds[keep,] dds <- DESeq(dds) res <- results(dds) res nrow(subset(res,padj<0.05)) #LRT dds.LRT <- DESeq(dds,betaPrior=FALSE, test="LRT", full= ~ condition, reduced=~1) res.LRT <- results(dds.LRT) nrow(subset(res.LRT ,padj<0.05)) #First comparison 3D_wt vs 3D_shx condA <- which(s2c$condition == "3D_wt") condB <- which(s2c$condition == "3D_sh") s2c_AvsB <- s2c[c(condA,condB),] print(s2c_AvsB) #3D_wt vs 3D_shx txi.kallisto <- tximport::tximport(s2c_AvsB$path, type = "kallisto", tx2gene=tx2gene) head(txi.kallisto$counts) dds <- DESeqDataSetFromTximport(txi.kallisto , colData = s2c_AvsB, design = ~ condition) keep <- rowSums(counts(dds)) >= 5 #Wald test dds <- dds[keep,] dds <- DESeq(dds) res <- results(dds) res nrow(subset(res,padj<0.05)) #LRT dds.LRT <- DESeq(dds,betaPrior=FALSE, test="LRT", full= ~ condition, reduced=~1) res.LRT <- results(dds.LRT) nrow(subset(res.LRT ,padj<0.05))