##### Load the data ##########
setwd("/Users/sDocuments/5.Blog_Video/RNAseq_R")
Data <- read.table("RNAseq_Result.txt",header=T,skip=1)

head(Data)

CountData <- Data[,seq(from=7,to=ncol(Data))]
head(CountData)

GeneIDs <- Data[,1]
rownames(CountData) <- GeneIDs
head(CountData)

#############################################################################
######### MDS plot ###############################################
#############################################################################
library(edgeR)

Condition <- c("Control","Control","Control"
               ,"Cancer","Cancer","Cancer")

SampleID <- colnames(CountData)
MetaData <- data.frame(SampleID ,Condition)
str(MetaData)

targets <- MetaData ##Metadata
targets$Condition <- factor(targets$Condition)
str(targets)
levels(targets$Condition)
Condition <-factor(Condition,levels=c("Control","Cancer"))
targets$Condition <- factor(targets$Condition,levels=c("Control","Cancer"))

str(Condition)
head(CountData)
targets

design <- model.matrix(~Condition, data=targets)
y <- DGEList(counts=CountData, gene=GeneIDs)
y <- calcNormFactors(y)
y <- estimateGLMRobustDisp(y,design)
#plotBCV(y)
#fit <- glmFit(y, design)

#pdf(paste(Tissue,"_MDSplot.pdf",sep=""))
MDS_data <- plotMDS(y, top=20000)
#dev.off()
?plotMDS
head(MDS_data)
plot_data <- data.frame(Condition = targets$Condition, X=MDS_data$x, Y=MDS_data$y)
head(plot_data)


##############################################
######## DEG Analysis
##############################################

## Normal vs Cancer
fit <- glmFit(y, design)
lrt <- glmLRT(fit,coef = 2)
Result_table <- topTags(lrt, n=dim(CountData)[1], sort.by="none")$table
head(Result_table)
nrow(CountData)[1]
print(sum(Result_table$PValue < 0.01))
print(sum(Result_table$FDR < 0.01))
print(sum(Result_table$FDR < 0.05))

TMM <- cpm(y, normalized.lib.sizes=TRUE,log=T)
head(TMM)
TMM_ColName <- paste("TMMValue_",gsub(".bam", "",colnames(TMM)),sep="")
colnames(TMM) <- TMM_ColName

head(CountData)
RawCount_ColName <- paste("RawCount_",gsub(".bam", "",colnames(CountData)),sep="")
colnames(CountData) <- RawCount_ColName

Result <- cbind(Result_table,TMM,CountData)
head(Result)
tail(Result)

write.table(Result, "Result.txt", sep="\t", quote=F, row.names=F)


### DrawPlot###
library(ggplot2)
Index <- as.numeric(order(Result$FDR)[1:10])

plot_data <- data.frame(Gene_expression = TMM[Index[1],],
                        Condition,
                        Gene_ID = rep(GeneIDs[Index[1]], length(Condition)))

for(i in 2:10){
  temp <- data.frame(Gene_expression = TMM[Index[i],],
                     Condition,
                     Gene_ID =rep(GeneIDs[Index[i]], length(Condition)))
  
  plot_data <- rbind(plot_data, temp)
}


ggplot(data=plot_data, aes(x=Condition, y=Gene_expression)) +
  geom_boxplot() +
  geom_boxplot(aes(fill = Condition)) +
  facet_wrap(~ Gene_ID, nrow=3, ncol=4, scale = "free") +
  guides(fill=FALSE) +
  ylab("log2 TMM normalized values") +
  xlab("") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))


ggsave("TopPvalue_BoxPlot.tiff", width=8, height=6)