library(ggplot2)
library(cowplot)
library(dplyr)
library(scales)
library(gridExtra)
library(grid)
library(ggplotify)
library(data.table)

palette <- c('GENCODE_protein_coding' = '#009900', 'CLS_TMs' = '#cc9966', 'CLS_FL_TMs' = '#cc00cc')
dat<-fread('output/statsFiles/all.min2reads.matureRNALength.stats.tsv.gz', header=T, sep='\t')
dat <- subset(dat, seqTech=='ont-Crg-CapTrap')

dat <- subset(dat, capDesign=='HpreCap')


dat <- subset(dat, sizeFrac=='0+')

dat <- subset(dat, sampleRep=='HEK293T01Rep2')


dat$seqTech <- gsub('-', '\n', dat$seqTech)

dat$sampleRep <- gsub('HpreCap_', '', dat$sampleRep)



horizCats <- length(unique(dat$capDesign)) * length(unique(dat$sampleRep))
vertCats <- length(unique(dat$seqTech))

wXyPlot = (horizCats * 0.9) +1.7
hXyPlot = (vertCats * 0.6) + 1.7

geom_textSize=1.4 
themeSize = (14/5) * geom_textSize
# https://stackoverflow.com/questions/25061822/ggplot-geom-text-font-size-control/25062509
lineSize=geom_textSize/8
minorLineSize=lineSize/2


dat <- subset(dat, spliced==1)


wXyPlot = wXyPlot * 1.2

dat %>%
  group_by(seqTech, sizeFrac, capDesign, sampleRep, category) %>%
  summarise(med=median(mature_RNA_length)) -> datSumm

summaryStats = transform(datSumm, LabelM = comma(round(med, digits = 0))) 

geom_textSize = geom_textSize + 1


plotBase <- "p <- ggplot(dat, aes(x=mature_RNA_length, fill=category)) +
geom_histogram(binwidth=100, alpha=0.35, position='identity') +
geom_vline(data=summaryStats, aes(xintercept=med, color=category), size=lineSize, show.legend=FALSE) +
geom_text(data = summaryStats[summaryStats$category=='CLS_TMs',], aes(label = LabelM, x = Inf, y = Inf, color=category), hjust=1.2, vjust=1,  size=geom_textSize, show.legend=FALSE) +
geom_text(data = summaryStats[summaryStats$category=='CLS_FL_TMs',], aes(label = LabelM, x = Inf, y = Inf, color=category), hjust=1.2, vjust=2.1,  size=geom_textSize, show.legend=FALSE) +

coord_cartesian(xlim=c(0, 3000)) +
scale_x_continuous(labels=comma)+
scale_color_manual(values=palette, name='Category', labels = c('GENCODE_protein_coding' = 'GENCODE
protein-coding', 'CLS_TMs'='TMs', 'CLS_FL_TMs'='FL TMs')) +
scale_fill_manual(values=palette, name='Category', labels = c('GENCODE_protein_coding' = 'GENCODE
protein-coding', 'CLS_TMs'='TMs', 'CLS_FL_TMs'='FL TMs')) +

xlab('Mature RNA length') +
theme(axis.text= element_text(size=themeSize*1.8), axis.ticks = element_line(size=lineSize), axis.line = element_line(colour = '#595959', size=lineSize), axis.title=element_text(size = themeSize*2), panel.grid.major = element_line(colour='#d9d9d9', size=lineSize),panel.grid.minor = element_line(colour='#e6e6e6', size=minorLineSize),panel.border = element_blank(),panel.background = element_blank(), strip.background = element_rect(colour='#737373',fill='white'), legend.key.size=unit(0.5,'line'), legend.title=element_text(size=themeSize*1.2), legend.text=element_text(size=themeSize), strip.text = element_text(size = themeSize)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) + "


plotFacetXy <- parse(text =paste(plotBase, "facet_grid( seqTech ~ capDesign + sampleRep, scales='free_y')"))
plotFacetYx <- parse(text=paste(plotBase, "facet_grid( capDesign + sampleRep ~ seqTech, scales='free_y')"))

plotFacetXy <- parse(text =paste(plotFacetXy, " + theme(strip.text.x = element_blank(), strip.text.y = element_blank())"))
plotFacetYx <- parse(text=paste(plotFacetYx, " + theme(strip.text.x = element_blank(), strip.text.y = element_blank())"))

pXy <- eval(plotFacetXy)
pYx <- eval(plotFacetYx)

legend <- get_legend(pXy)

pXyNoLegend <- pXy + theme(legend.position='none')
pYxNoLegend <- pYx + theme(legend.position='none')

legendOnly <- grid.arrange(legend)
pXyGrob <- as.grob(pXy)
pYxGrob <- as.grob(pYx)
pXyNoLegendGrob <- as.grob(pXyNoLegend)
pYxNoLegendGrob <- as.grob(pYxNoLegend)


hLegendOnly <- convertUnit(sum(legend$heights), 'in', valueOnly=TRUE)
wLegendOnly <- convertUnit(sum(legend$widths), 'in', valueOnly=TRUE)


hYxPlot <- wXyPlot
wYxPlot <- hXyPlot 

hXyNoLegendPlot<- hXyPlot 
wXyNoLegendPlot<- wXyPlot - wLegendOnly

hYxNoLegendPlot<- hYxPlot
wYxNoLegendPlot<- wYxPlot - wLegendOnly


wYxPlot = wYxPlot * 1.2
wYxNoLegendPlot<- wYxPlot - wLegendOnly

save_plot('output/plots/matureRNALength.stats/ont-Crg-CapTrap/HpreCap/ont-Crg-CapTrap_HpreCap_0+_HEK293T01Rep2.min2reads.splicing_status-spliced.matureRNALength.hist.stats.legendOnly.png', legendOnly, base_width=wLegendOnly, base_height=hLegendOnly)
save_plot('output/plots/matureRNALength.stats/ont-Crg-CapTrap/HpreCap/ont-Crg-CapTrap_HpreCap_0+_HEK293T01Rep2.min2reads.splicing_status-spliced.matureRNALength.hist.stats.xy.wLegend.png', pXy, base_width=wXyPlot, base_height=hXyPlot)

save_plot('output/plots/matureRNALength.stats/ont-Crg-CapTrap/HpreCap/ont-Crg-CapTrap_HpreCap_0+_HEK293T01Rep2.min2reads.splicing_status-spliced.matureRNALength.hist.stats.xy.woLegend.png', pXyNoLegend, base_width=wXyNoLegendPlot, base_height=hXyNoLegendPlot)
save_plot('output/plots/matureRNALength.stats/ont-Crg-CapTrap/HpreCap/ont-Crg-CapTrap_HpreCap_0+_HEK293T01Rep2.min2reads.splicing_status-spliced.matureRNALength.hist.stats.yx.wLegend.png', pYx, base_width=wYxPlot, base_height=hYxPlot)

save_plot('output/plots/matureRNALength.stats/ont-Crg-CapTrap/HpreCap/ont-Crg-CapTrap_HpreCap_0+_HEK293T01Rep2.min2reads.splicing_status-spliced.matureRNALength.hist.stats.yx.woLegend.png', pYxNoLegend, base_width=wYxNoLegendPlot, base_height=hYxNoLegendPlot)