搜索
查看: 2860|回复: 0

肿瘤异质性/克隆分析

[复制链接]

6

主题

23

帖子

201

积分

中级会员

Rank: 3Rank: 3

积分
201
发表于 2017-2-27 17:03:30 | 显示全部楼层 |阅读模式
本帖最后由 qin_qinyang 于 2017-2-28 11:21 编辑

主要使用R包expands
1.安装install.packages("expands")
注意:默认安装的版本时1.72,但是我在测试数据的时候发现新的包里缺少部分demo数据,所以又下载了1.5版本的vignettes,放到了安装文件里
2.测试数据
[AppleScript] 纯文本查看 复制代码
library(expands)
##导入突变数据
data(snv)
head(snv)
#如果是自己的数据的话
#data <- read.table("snv",header = T,stringsAsFactors = T,sep = "\t")
#snv <- as.matrix(data)
#str(snv)
# chr [1:780, 1:7] "1" "1" "1" "1" "1" "1" "1" "1" "1" ...
# - attr(*, "dimnames")=List of 2
# ..$ : NULL
# ..$ : chr [1:7] "chr" "startpos" "endpos" "REF" ...
#PN_B - ploidy of B-allele in normal cells.
# chr  startpos    endpos REF ALT  AF_Tumor PN_B
# [1,]   1  15888817  15888817  65  67 0.6546392    0
# [2,]   1  17046613  17046613  67  84 0.9216301    1
# [3,]   1  22199882  22199882  67  71 0.1639344    0
# [4,]   1 109735398 109735398  65  67 0.1940928    0
# [5,]   1 144854412 144854412  65  67 0.9278351    1
# [6,]   1 144854597 144854597  84  67 0.9200710    1
#REF - ASCII code of the reference nucleotide (in hg18/hg19) 
#ALT - ASCII code of the B-allele nucleotide 
#A 65
#T 84
#C 67
#G 71
## 只取部分数据测试
idx=sample(1:nrow(snv), 130, replace=FALSE); snv=snv[idx,];
##导入拷贝数数据
data(cbs);
head(cbs)
# chr startpos    endpos CN_Estimate
# [1,]  22 26290824  51237893    1.280513
# [2,]  17    54279  81188538    1.723031
# [3,]   8   162897 146279898    1.865630
# [4,]   5 98191824 110460104    1.740130
# [5,]  14 19114214 107283536    2.016270
# [6,]  15 20087214  56658198    2.011064
##确定每个突变位点的质量??
dm=assignQuantityToMutation(snv,cbs,"CN_Estimate");
head(dm)
##parameters
max_PM=6; maxScore=1.25; precision=0.018;
plotF=1;
##the name of the sample
snvF="TCGA-06-0152-01";
##计算细胞纯度分布
cfd=computeCellFrequencyDistributions(dm, max_PM, precision)
##对突变进行分类
toUseIdx=which(apply(is.finite(cfd$densities),1,all) )
SPs=clusterCellFrequencies(cfd$densities[toUseIdx,], precision)
## exclude SPs detected at high noise levels
SPs=SPs[SPs[,"score"]<=maxScore,]; 
print(SPs)
# Mean Weighted     score precision nMutations
# [1,]         0.262 0.5820134     0.018         12
# [2,]         0.316 0.5862493     0.018         10
# [3,]         0.982 0.4716710     0.018         10
##assign mutations to subpopulations
aM= assignMutations( dm, SPs)
plotSPs(aM$dm, snvF,cex=1)
##assigning copy number to subpopulations
aQ=assignQuantityToSP(cbs, aM$dm)
##building phylogeny
spPhylo=buildPhylo(aQ$ploidy,snvF)
plot(spPhylo$tree,cex=2.5)
##多组样本之间的比较
#Patient and sample labels
patient='ID_MRD_001';
samples=c('_primPancreas','_metKidney','_metLung');
output=patient;
cbs=as.list(paste(patient, samples,'.cbs',sep=""));
#The SP files for each sample (previously calculated via runExPANdS-function):
sps <- list.files(system.file("vignettes", package="expands"),
             pattern = ".sps", full.names = TRUE)
cbs <- list.files(system.file("vignettes", package="expands"),
                  pattern = ".cbs", full.names = TRUE)
sampleGroup=list(cbs=cbs,sps=sps,labels=samples)
tr=buildMultiSamplePhylo(sampleGroup,output,ambigSg = F, plotF=0);
##Tree tip color labels according to sample origin of SPs:
jet <- colorRampPalette(c("#00007F", "blue", "#007FFF","cyan", "#7FFF7F", "yellow", "#FF7F00", "red", "#7F0000"))
colmap = jet( length(sampleGroup$labels) )
colors <- rep(colmap[1], each = length(tr$tip.label))
for (i in 1: length(sampleGroup$labels) ) {
  ii = grep(sampleGroup$labels[], tr$tip.label)
  colors[ii] = colmap
}
plot(tr, tip.col = colors, cex = 0.8)










上一篇:大家好,有研究过多线染色体的吗?求助如何给染色体分段
下一篇:illumina的基因型芯片是一种全新的数据表示方式
回复

使用道具 举报

您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

QQ|手机版|小黑屋|生信技能树 ( 粤ICP备15016384号  

GMT+8, 2019-10-21 01:46 , Processed in 0.039687 second(s), 30 queries .

Powered by Discuz! X3.2

© 2001-2013 Comsenz Inc.