#source("https://bioconductor.org/biocLite.R") #biocLite("tximport") library("tximport") library("tximportData") library("dplyr") library("tidyr") library("DESeq2") sample_id <- dir(file.path("/data/projects/roni_rnaseq_de_august2018/", "data_quant2")) itsv <- file.path("/data/projects/roni_rnaseq_de_august2018", "data_quant2", sample_id[1], "quantifications/kallisto/hg38_mmtv/paired_end/abundance.tsv") target_mapping <- read.delim(itsv) # extract information from target_id target_mapping$target_id_raw <- target_mapping$target_id target_id_names <- c('transcript_id', 'gene_id', 'ott1', 'ott2', 'transcript_name', 'gene_name', 'transcript_length', 'gene_type', 'empty') target_mapping <- target_mapping %>% separate(target_id_raw, target_id_names, "\\|") # select specific columns target_mapping <- target_mapping[, c("target_id", "gene_id", 'gene_name')] tx2gene<-target_mapping[,c("target_id","gene_id")] kal_dirs <- file.path("/data/projects/roni_rnaseq_de_august2018", "data_quant2", sample_id, "quantifications/kallisto/hg38_mmtv/paired_end/abundance.h5") names(kal_dirs)<-sample_id s2c <- read.table(file.path("/data/projects/roni_rnaseq_de_august2018", "metadata", "hiseq_info2.txt"), header = TRUE, stringsAsFactors=FALSE) s2c <- dplyr::select(s2c, sample = sample, condition) s2c <- dplyr::mutate(s2c, path = kal_dirs) print(s2c) #Full table row.names(s2c)<-sample_id names(kal_dirs) <- sample_id ''' Import kallisto abundances kallisto abundance.h5 files can be imported by setting type to "kallisto". Note that this requires that you have the Bioconductor package rhdf5 installed. ''' txi.kallisto <- tximport::tximport(s2c$path, type = "kallisto", tx2gene=tx2gene) head(txi.kallisto$counts) write.csv(txi.kallisto$counts, file = "/data/projects/roni_rnaseq_de_august2018/tables/rw34-rw38_kallisto_counts.csv", quote=FALSE,row.names=TRUE)