[AppleScript] 纯文本查看 复制代码
# ###Typically, we recommend users to run samples from all groups together, and then
# use thecontrast argument of the results function to extract comparisons of
# interest after fitting the model using DESeq.
setwd("C:/Users/lixia/Desktop/learn/DESeq2/")
rm(list=ls())
library(DESeq2)
########
source("http://bioconductor.org/biocLite.R")
options(BioC_mirror="http://mirrors.ustc.edu.cn/bioc/")
biocLite("airway")
biocLite("IHW")
library(airway)
data("airway")
se <- airway
se
ddsSE <- DESeqDataSet(se, design = ~ cell + dex)
ddsSE
library("pasilla")
pasCts <- system.file("extdata", "pasilla_gene_counts.tsv",
package="pasilla", mustWork=TRUE)
pasAnno <- system.file("extdata", "pasilla_sample_annotation.csv",
package="pasilla", mustWork=TRUE)
countData <- as.matrix(read.csv(pasCts,sep="\t",row.names="gene_id"))
###表达矩阵,如countData中相关reads数没有,记作0,不是NA,切记!!!
colData <- read.csv(pasAnno, row.names=1)
colData <- colData[,c("condition","type")] ###select two column ###分组信息
head(countData)
head(colData)
rownames(colData) <- sub("fb","",rownames(colData)) ###去除rownames(colData)中的“fb”,以匹配header的信息。。。
all(rownames(colData) %in% colnames(countData)) ###判断两者是否相等
######表达矩阵的列(样品),样本分组信息的行,必须一致
countData <- countData[, rownames(colData)]
all(rownames(colData) == colnames(countData))
dds <- DESeqDataSetFromMatrix(countData = countData,
colData = colData,
design = ~ condition)
dds
featureData <- data.frame(gene=rownames(countData)) ####添加一些额外的信息。。。gene
(mcols(dds) <- DataFrame(mcols(dds), featureData)) ###mcols,...???
dds <- DESeq(dds) ##第二步,直接用DESeq函数即可 此步的目的!!!
dds
res <- results(dds)
res
write.table(res, file="ddseq2.xls", sep="\t",quote=F)
library("BiocParallel")
register(MulticoreParam(4))
# ##???
#These steps should take less than 30 seconds for most analyses. For experiments
# with many samples (e.g. 100 samples), one can take advantage of parallelized
# computation. Both of the above functions have an argument parallel which
# if set to TRUE can be used to distribute computation across cores specified by
# the register function of BiocParallel. For example, the following chunk (not
# evaluated here), would register 4 cores, and then the two functions above, with
# parallel=TRUE, would split computation over these cores.
res <- results(dds)
resOrdered <- res[order(res$padj),]
summary(res)
sum(res$padj < 0.1, na.rm=TRUE)
res05 <- results(dds, alpha=0.05) ###alpha默认值(结合了P值+FDR)为0.1,如果需要更改则需要将其另外赋值,与上述比较
summary(res05)
sum(res05$padj < 0.05, na.rm=TRUE)
library("IHW")
resIHW <- results(dds, filterFun=ihw)
summary(resIHW)
###A new Bioconductor package,IHW, is now available that implements the
#method ofIndependent Hypothesis Weighting
sum(resIHW$padj < 0.1, na.rm=TRUE)
metadata(resIHW)$ihwResult
######
plotMA(res, main="DESeq2", ylim=c(-2,2)) ###log2FC,normalize的图 具体作用是啥???
###
idx <- identify(res$baseMean, res$log2FoldChange)
rownames(res)[idx]
###????
plotCounts(dds, gene=which.min(res$padj), intgroup="condition")
d <- plotCounts(dds, gene=which.min(res$padj), intgroup="condition",
returnData=TRUE)
###d为啥?? condition啥时候引入的
library("ggplot2")
ggplot(d, aes(x=condition, y=count)) +
geom_point(position=position_jitter(w=0.1,h=0)) +
scale_y_log10(breaks=c(25,100,400))
mcols(res)$description
mcols(res)
res
write.csv(as.data.frame(resOrdered),file="condition_treated_results.csv")
resSig <- subset(resOrdered, padj < 0.1)
resSig
write.csv(as.data.frame(resSig),file="condition_treated_results resSig.csv")
######上述为一个因子的比较
### design = ~ condition
# dds <- DESeq(dds)
# dds
# res <- results(dds)
# res
###下面讲解多个因子的比较
ddsMF <- dds
design(ddsMF) <- formula(~ type + condition)
###不知道如何组合的,2与2的组合应该有四种。。。
##解答,默认情况下是后面的(本例中为conditon)进行比较,
##除非额外指定具体的contrast进行比较,
##如resMFType <- results(ddsMF,contrast=c("type", "single-read", "paired-end"))
ddsMF <- DESeq(ddsMF)
colData(dds)
##res的值是
head(res)
##condition untreated vs treated
resMF <- results(ddsMF)
head(resMF)
### 注意同head(res)输出结果的比较,是有差别的;condition untreated vs treated
resMFType <- results(ddsMF,contrast=c("type", "single-read", "paired-end"))
head(resMFType)
###transformations of the form
##y= log2(n+ 1) or more generally, y= log2(n+n0);
#不知道两者的区别!!!