搜索
楼主: Jimmy

生信编程直播第二题-hg19基因组序列的一些探究

  [复制链接]

10

主题

52

帖子

559

积分

版主

Rank: 7Rank: 7Rank: 7

积分
559
QQ
发表于 2017-7-12 15:58:17 | 显示全部楼层
芋头 发表于 2017-7-12 01:59
请问层主是在哪里看的视频,我找不到链接啊.....

视频是要购买的,可以联系版主询问。
回复 支持 反对

使用道具 举报

0

主题

3

帖子

39

积分

新手上路

Rank: 1

积分
39
发表于 2017-7-13 03:41:32 | 显示全部楼层
旭日早升 发表于 2017-7-12 15:58
视频是要购买的,可以联系版主询问。

好的,谢啦
回复 支持 反对

使用道具 举报

0

主题

4

帖子

109

积分

注册会员

Rank: 2

积分
109
发表于 2017-9-1 17:18:41 | 显示全部楼层
#!/usr/bin/perl -w

#hg19每条染色体长度,每条染色体N的含量,GC含量。(fasta文件的探索)
use strict;

open IN,"<$ARGV[0]" or die $!;
print "chr\tA\tT\tGC\tN\tlength\tGC(%)\tN(%)\n";

my $id;
my $seq;
my %seq;
while(<IN>){
        chomp;
        if ($_=~/^>/){
                $id = $_;
        }
        else {
                $seq{$id} .= $_;
        }
}
=cut
        foreach $id (sort keys %seq){
        print "$id\n$seq{$id}\n";
        }
打印哈希
=cut
my $GC;
my $seq_len;
my $GC_rat;
my @GC;
my @A;
my @T;
my @N;
my $A;
my $T;
my $N;

foreach $id (sort keys %seq){
         $seq_len = length $seq{$id};


         @GC = ($seq{$id} =~/(G|C|g|c)/g);
         $GC = $#GC +1;
         @A = ($seq{$id} =~/(A|a)/g);
         $A = $#A +1;
         @T = ($seq{$id} =~/(T|t)/g);
         $T = $#T +1;
         @N = ($seq{$id} =~/(N|n)/g);
         $N = $#N +1;       
     
         my $percent_GC = ($GC)/($A+$T+$GC);
         my $percent_N = ($N)/($A+$T+$GC);
        print "$id\t$A\t$T\t$GC\t$N\t$seq_len\t$percent_GC\t$percent_N\n";
}


close IN;

E:\资料\perl\生信技能树\第二题
回复 支持 反对

使用道具 举报

3

主题

8

帖子

197

积分

注册会员

Rank: 2

积分
197
发表于 2017-12-27 19:30:50 | 显示全部楼层
[Python] 纯文本查看 复制代码
 	
#! /bin/python
import pysam
import os
os.chdir("/home/liuzh/gpfs/zhangxt/practice")
hg19 = pysam.FastaFile("hg19.fa")
for chr in hg19.references:
	seq = hg19.fetch(chr)
	seqLen = len(seq)
	N = seq.count("N")
	GC = seq.count("G")+seq.count("C")+seq.count("g")+seq.count("c")
	print(chr,"N%","%.2f"%(N/seqLen),"GC%","%.2f"%(GC/(seqLen-N)))


回复 支持 反对

使用道具 举报

0

主题

19

帖子

68

积分

注册会员

Rank: 2

积分
68
发表于 2020-6-15 14:39:05 | 显示全部楼层
import os, time, csv
from Bio import SeqIO
from Bio.SeqUtils import GC

filelist = os.listdir(r'D:\bioinformatics\生物信息练习\hg19-test-untar')
chr_info = []
start = time.time()

for chrn in filelist:
    with open(('D:\\bioinformatics\\生物信息练习\\hg19-test-untar\\'+str(chrn)),'r') as handle:
        chrn = SeqIO.read(handle, 'fasta')
        chr_length = len(chrn.seq)
        a = chrn.seq.upper().count('A')
        t = chrn.seq.upper().count('T')
        c = chrn.seq.upper().count('C')
        g = chrn.seq.upper().count('G')
        n = chrn.seq.upper().count('N')
        chr_cg = GC(chrn.seq.upper())
        print(chr_length, a,t,c,g,n,chr_cg)      
        chr_info.append([chrn.id, chr_length, a,t,c,g,n,chr_cg])
print(chr_info)
end = time.time()
print('running time:%.2f second.'%(end - start))

headers = ['染色体', '长度', 'A数量', 'T数量','C数量','G数量','N数量','CG含量',]
with open(r'D:\bioinformatics\生物信息练习\hg19_test.csv', 'w', newline='') as f:
    f_csv = csv.writer(f)
    if headers:
        f_csv.writerow(headers)
    if chr_info:
        f_csv.writerows(chr_info)
回复 支持 反对

使用道具 举报

0

主题

19

帖子

68

积分

注册会员

Rank: 2

积分
68
发表于 2020-6-15 14:44:43 | 显示全部楼层

染色体长度A数量T数量C数量G数量N数量CG含量
chr1249250621655708916566875647024412470165622397000037.72948433
chr1013553474738330752383769152730864827298423422000940.2900896
chr1113500651638307244383174362723679827268038387700040.37200397
chr11_gl000202_random40103922689781125410645054.60688727
chr1213385189538604831386245172663499526617050337050239.78430414
chr13115169878293369452942545918412698184147761958000031.97665452
chr14107349540259929662619749518027132180719471906000033.62760474
chr15102531392236208762359792117247582172283872083662633.62479366
chr1690354753217240832182864217630040177019881147000039.10367394
chr178119521021159933212069811772795617700340340000043.63348035
chr17_ctg5_hap1168082842921443292235590936278310000042.75821202
chr17_gl000203_random37498125641245260746408033.28710865
chr17_gl000204_random8131019702173222270121585054.46562538
chr17_gl000205_random17458849255524713671636146041.73368158
chr17_gl000206_random410018770104631048311285053.09138801
chr187807724822465380224894931483868514863671342001938.04226809
chr18_gl000207_random4262100413855881285043.94650399
chr195912898314390632144289511347825513511145332000045.64495892
chr19_gl000208_random9268929179286021836916539037.66142692
chr19_gl000209_random15916943978411833544338565046.49649115
chr1_gl000191_random10643327971312642378523413044.34526885
chr1_gl000192_random547496163078157247112730114441041.49272323
chr224319937371102632712393794791546547947042499485539.41725088
chr206302552016523053167252271310782813149412352000041.6612826
chr21481298951042292410348785716021271747211302325329.78384432
chr21_gl000210_random27682628863177726725110054.10374973
chr225130456690947759054551837598483692351641002132.63884739
chr319802243058713343587604853865319738670110322529539.04775181
chr419115427657932980579520683588580635890822349260037.54905697
chr4_ctg9_hap1590426185171190487107556107212036.37509188
chr4_gl000193_random18978953509550564061640608042.79700088
chr4_gl000194_random19146952949556934152141306043.25869984
chr518091526053672554538041373508938335129186322000038.81296083
chr617111506750554433505339233314328733163423372000138.74977883
chr6_apd_hap14622290660350640840509167510390230154322.05740012
chr6_cox_hap247953711341236131157010698061072759044.67985897
chr6_dbb_hap346103961172535114295194355094526640609440.96862829
chr6_mann_hap446832631156084113419390302290744258252238.65817487
chr6_mcf_hap5483339810509681024227858581861135103848735.579855
chr6_qbl_hap646119841189669116871196704396990231665941.99808586
chr6_ssto_hap749285671172219115616891822092694475501637.43814378
chr715913866345997757460472573167167031636979378500039.78206666
chr7_gl000195_random18289653469550573702137349040.66245298
chr814636402242767293427150252870398328702621347510039.22180001
chr8_gl000196_random3891413843964279107519039.64896952
chr8_gl000197_random371758644840898831014010053.86146604
chr9141213431352600783524388224826212248132592107000035.15208904
chr9_gl000198_random9008527843281401561718485037.85535883
chr9_gl000199_random16987454702507653498129426037.91457198
chr9_gl000200_random18703555353569663720237514039.94760339
chr9_gl000201_random36148710175601037311114059.44173952
chrM165715113408651922180044.48735743
chrUn_gl00021116656650926511653196832507038.70837986
chrUn_gl00021218685852729511934245440482044.38450588
chrUn_gl00021316423948047490153383133346040.90197821
chrUn_gl00021413771840645398912748429698041.52107931
chrUn_gl00021517254550334497383625036223042.00237619
chrUn_gl00021617229441409585664671725602041.97418366
chrUn_gl00021717214951945554953257832131037.58894911
chrUn_gl00021816114746030480473329633774041.62038387
chrUn_gl00021917919854530530593550136108039.96082546
chrUn_gl00022016180237230461554072037697048.4647903
chrUn_gl00022115539747915474442988630152038.63523749
chrUn_gl00022218686151533531584086641304043.97386292
chrUn_gl00022318045552931495683884939107043.19968967
chrUn_gl00022417969350248516603743040355043.2877185
chrUn_gl00022521117356943535994893151700047.65334583
chrUn_gl000226150084502464926263231039.02585288
chrUn_gl00022712837441076346522625026396041.00986181
chrUn_gl00022812912030512289673549534146053.93509913
chrUn_gl000229199133983594453854601050.14814443
chrUn_gl00023043691126781278993388886041.71110755
chrUn_gl000231273867100805462036029044.66515738
chrUn_gl00023240652115341211684908512041.82328053
chrUn_gl00023345941121921426095049985042.42180188
chrUn_gl00023440531107971227787258732043.07073598
chrUn_gl0002353447411845953065856514037.99675117
chrUn_gl00023641934130061147084329026041.63208852
chrUn_gl0002374586712273121911024111162046.66317832
chrUn_gl00023839939104041355978058171040.00100153
chrUn_gl000239338249221924672688089045.40267266
chrUn_gl00024041933136011049089628880042.54882789
chrUn_gl00024142152136201281578647853037.286487
chrUn_gl0002424352310591118691003311030048.39510144
chrUn_gl000243433411072812673103049636046.00724487
chrUn_gl00024439929109481156085648857043.62994315
chrUn_gl00024536651128751046767076602036.31278819
chrUn_gl00024638154104401296873547392038.64863448
chrUn_gl0002473642211002954077948086043.60002196
chrUn_gl00024839786101141150792658900045.65676369
chrUn_gl0002493850210793969889789033046.77938808
chrX15527056045648952457724242981335329865831417000038.43560814
chrY5937356676676257733482509917151532883372000017.26771641
D:\bioinformatics\生物信息练习\hg19-test.png
回复 支持 反对

使用道具 举报

您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

QQ|手机版|小黑屋|生信技能树 ( 粤ICP备15016384号  

GMT+8, 2020-8-15 10:06 , Processed in 0.037942 second(s), 27 queries .

Powered by Discuz! X3.2

© 2001-2013 Comsenz Inc.