##### Load the data ##########
setwd("/Users/sDocuments/5.Blog_Video/RNAseq_R")
Data <- read.table("RNAseq_Result.txt",header=T,skip=1)
head(Data)
CountData <- Data[,seq(from=7,to=ncol(Data))]
head(CountData)
GeneIDs <- Data[,1]
rownames(CountData) <- GeneIDs
head(CountData)
#############################################################################
######### MDS plot ###############################################
#############################################################################
library(edgeR)
Condition <- c("Control","Control","Control"
,"Cancer","Cancer","Cancer")
SampleID <- colnames(CountData)
MetaData <- data.frame(SampleID ,Condition)
str(MetaData)
targets <- MetaData ##Metadata
targets$Condition <- factor(targets$Condition)
str(targets)
levels(targets$Condition)
Condition <-factor(Condition,levels=c("Control","Cancer"))
targets$Condition <- factor(targets$Condition,levels=c("Control","Cancer"))
str(Condition)
head(CountData)
targets
design <- model.matrix(~Condition, data=targets)
y <- DGEList(counts=CountData, gene=GeneIDs)
y <- calcNormFactors(y)
y <- estimateGLMRobustDisp(y,design)
#plotBCV(y)
#fit <- glmFit(y, design)
#pdf(paste(Tissue,"_MDSplot.pdf",sep=""))
MDS_data <- plotMDS(y, top=20000)
#dev.off()
?plotMDS
head(MDS_data)
plot_data <- data.frame(Condition = targets$Condition, X=MDS_data$x, Y=MDS_data$y)
head(plot_data)
##############################################
######## DEG Analysis
##############################################
## Normal vs Cancer
fit <- glmFit(y, design)
lrt <- glmLRT(fit,coef = 2)
Result_table <- topTags(lrt, n=dim(CountData)[1], sort.by="none")$table
head(Result_table)
nrow(CountData)[1]
print(sum(Result_table$PValue < 0.01))
print(sum(Result_table$FDR < 0.01))
print(sum(Result_table$FDR < 0.05))
TMM <- cpm(y, normalized.lib.sizes=TRUE,log=T)
head(TMM)
TMM_ColName <- paste("TMMValue_",gsub(".bam", "",colnames(TMM)),sep="")
colnames(TMM) <- TMM_ColName
head(CountData)
RawCount_ColName <- paste("RawCount_",gsub(".bam", "",colnames(CountData)),sep="")
colnames(CountData) <- RawCount_ColName
Result <- cbind(Result_table,TMM,CountData)
head(Result)
tail(Result)
write.table(Result, "Result.txt", sep="\t", quote=F, row.names=F)
### DrawPlot###
library(ggplot2)
Index <- as.numeric(order(Result$FDR)[1:10])
plot_data <- data.frame(Gene_expression = TMM[Index[1],],
Condition,
Gene_ID = rep(GeneIDs[Index[1]], length(Condition)))
for(i in 2:10){
temp <- data.frame(Gene_expression = TMM[Index[i],],
Condition,
Gene_ID =rep(GeneIDs[Index[i]], length(Condition)))
plot_data <- rbind(plot_data, temp)
}
ggplot(data=plot_data, aes(x=Condition, y=Gene_expression)) +
geom_boxplot() +
geom_boxplot(aes(fill = Condition)) +
facet_wrap(~ Gene_ID, nrow=3, ncol=4, scale = "free") +
guides(fill=FALSE) +
ylab("log2 TMM normalized values") +
xlab("") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
ggsave("TopPvalue_BoxPlot.tiff", width=8, height=6)