082-python/R-wiwi
[AppleScript] 纯文本查看 复制代码 #对gtf文件进行探究
#gtf/gff:[url=http://asia.ensembl.org/info/website/upload/gff.html]http://asia.ensembl.org/info/website/upload/gff.html[/url]
#安装packages:GenomicFeatures\ TxDb.Hsapiens.UCSC.hg19.knownGene
source("https://bioconductor.org/biocLite.R")
biocLite("AnnotationDbi")
source("https://bioconductor.org/biocLite.R")
biocLite("GenomicFeatures")
library("GenomicFeatures")
source("http://bioconductor.org/biocLite.R")
biocLite("TxDb.Hsapiens.UCSC.hg19.knownGene")
library("TxDb.Hsapiens.UCSC.hg19.knownGene")
#txdb
GRCh38_txdb <- makeTxDbFromGFF("Homo_sapiens.GRCh38.87.chr.gtf.gz",dataSource ="ENSEMBL",organism="Homo sapiens",format = "gtf")
saveDb(GRCh38_txdb,file="GRCh38.87.sqlite")
GRCh38_txdb <- loadDb("GRCh38.87.sqlite")
columns(GRCh38_txdb)
seqlevels(GRCh38_txdb)
#每条染色体基因个数的分布?
grch38_genes <- genes(GRCh38_txdb,columns="gene_id")
head(as.data.frame(grch38_genes))
grch38_genes_df <- as.data.frame(grch38_genes)
head(grch38_genes_df)
nrow(grch38_genes_df)
library(ggplot2)
ggplot(grch38_genes_df,aes(x=factor(seqnames)))+geom_bar()
[AppleScript] 纯文本查看 复制代码 #转录本
grch38_tx <- transcripts(GRCh38_txdb,columns=c("tx_name"))
head(as.data.frame(grch38_tx))
grch38_tx_df <- as.data.frame(grch38_tx)
nrow(grch38_tx_df)
ggplot(grch38_tx_df,aes(x=factor(seqnames)))+geom_bar()
[AppleScript] 纯文本查看 复制代码 #所有基因平均有多少个转录本
grch38_tx_by_genes <- transcriptsBy(GRCh38_txdb,by=c("gene"))
head(as.data.frame(grch38_tx_by_genes))
nrow(as.data.frame(grch38_tx_by_genes))
grch38_tx_by_genes <- as.data.frame(grch38_tx_by_genes)
library(dplyr)
grch38_tx_by_genes <- grch38_tx_by_genes %>% group_by(group_name) %>% mutate(count=n())
head(grch38_tx_by_genes)
sort(as.numeric(unique(grch38_tx_by_genes$count)))
summary(as.numeric(grch38_tx_by_genes$count))
df <- unique(grch38_tx_by_genes[,c("group_name","count")])
ggplot(df,aes(x=factor(count)))+geom_bar()
[AppleScript] 纯文本查看 复制代码 #基因上的外显子分布情况?
grch38_exons <- transcriptsBy(GRCh38_txdb,by=c("exon"))
head(as.data.frame((grch38_exons)),10)
grch38_exons <- as.data.frame(grch38_exons)
nrow(grch38_exons)
grch38_exons <- grch38_exons %>% group_by(tx_name) %>% mutate(count=n())
sort(as.numeric(unique(grch38_exons$count)))
summary(as.numeric(grch38_exons$count))
df <- unique(grch38_exons[,c("group_name","count")])
ggplot(df,aes(x=factor(count)))+geom_bar()
|