ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/contig_to_chr_coords.py
Revision: 29
Committed: Tue Aug 2 21:24:54 2011 UTC (13 years, 1 month ago) by gpertea
File size: 4631 byte(s)
Log Message:
adding tophat source work

Line File contents
1 #!/usr/bin/env python
2 # encoding: utf-8
3 """
4 contig_to_chr_coords.py
5
6 Created by Cole Trapnell on 2008-09-05.
7 Copyright (c) 2008 Cole Trapnell. All rights reserved.
8 """
9
10 import sys
11 import getopt
12
13
14 help_message = '''
15 Takes the NCBI seq_contig file and maps contig coords to whole chromosome
16 coords in a GTF, GFF, or BED file
17
18 contig_to_chr_coords.py <format_flag> <seq_contig.md> <junctions.bed|transcripts.gff|transcripts.gtf>
19 '''
20
21
22 class Usage(Exception):
23 def __init__(self, msg):
24 self.msg = msg
25
26
27 def main(argv=None):
28 if argv is None:
29 argv = sys.argv
30 try:
31 try:
32 opts, args = getopt.getopt(argv[1:], "ho:vbg", ["help", "output=", "bed", "gff"])
33 except getopt.error, msg:
34 raise Usage(msg)
35
36 arg_is_splice = False
37 arg_is_gff = False
38 # option processing
39 for option, value in opts:
40 if option == "-v":
41 verbose = True
42 if option in ("-h", "--help"):
43 raise Usage(help_message)
44 if option in ("-o", "--output"):
45 output = value
46 if option in ("-b", "--bed"):
47 arg_is_splice = True
48 if option in ("-g", "--gff"):
49 arg_is_gff = True
50
51 if (arg_is_splice == False and arg_is_gff == False) or (arg_is_splice == True and arg_is_gff == True):
52 print >> sys.stderr, "Error: please specify either -b or -g, but not both"
53 raise Usage(help_message)
54
55 if len(args) < 1:
56 raise Usage(help_message)
57 contig_to_chr_file = open(args[0])
58
59 contigs = {}
60
61 for line in contig_to_chr_file.readlines():
62 if line[0] == "#":
63 continue
64 line = line.strip()
65 cols = line.split('\t')
66 if len(cols) < 9:
67 continue
68 chromosome = cols[1]
69 group = cols[8]
70 feature_name = cols[5]
71 if not feature_name in ["start", "end"]:
72 contigs[feature_name] = (chromosome, int(cols[2]))
73 #print feature_name, chromosome, int(cols[2])
74
75 if arg_is_gff:
76 gff_file = open(args[1])
77
78 lines = gff_file.readlines()
79 print lines[0],
80 for line in lines[1:]:
81 line = line.strip()
82 cols = line.split('\t')
83 if len(cols) < 8:
84 continue
85
86 contig = cols[0]
87 chr_fields = contig.split('|')
88 refseq_id = chr_fields[3]
89 contig = contigs.get(refseq_id)
90 chr_name = contig[0]
91 pipe_idx = chr_name.find('|')
92
93 if pipe_idx != -1:
94 chr_name = chr_name[:pipe_idx]
95 if contig != None:
96 #print line
97 left_pos = contig[1] + int(cols[3])
98 right_pos = contig[1] + int(cols[4])
99
100 print "chr%s\tTopHat\tisland\t%d\t%d\t%s\t.\t.\t%s" % (chr_name, left_pos, right_pos, cols[5],cols[8])
101 #print >>sys.stderr, "%s\t%d\t%d\t%s\t%s\t%s\t%s" % (contig[0], left_pos, right_pos,cols[3],cols[6],cols[0],cols[1])
102
103
104 if arg_is_splice:
105 splice_file = open(args[1])
106
107 lines = splice_file.readlines()
108 print lines[0],
109 for line in lines[1:]:
110 line = line.strip()
111 cols = line.split('\t')
112 contig = cols[0]
113 chr_fields = contig.split('|')
114 refseq_id = chr_fields[3]
115 contig = contigs.get(refseq_id)
116 chr_name = contig[0]
117 pipe_idx = chr_name.find('|')
118
119 if pipe_idx != -1:
120 chr_name = chr_name[:pipe_idx]
121 if contig != None:
122 #print line
123 left_pos = contig[1] + int(cols[1])
124 right_pos = contig[1] + int(cols[2])
125
126 print "chr%s\t%d\t%d\t%s\t0\t%s\t%s\t%s\t255,0,0\t2\t1,1\t%s" % (chr_name, left_pos, right_pos, cols[3],cols[5],left_pos, right_pos,cols[11])
127 #print >>sys.stderr, "%s\t%d\t%d\t%s\t%s\t%s\t%s" % (contig[0], left_pos, right_pos,cols[3],cols[6],cols[0],cols[1])
128 except Usage, err:
129 print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
130 print >> sys.stderr, "\t for help use --help"
131 return 2
132
133
134 if __name__ == "__main__":
135 sys.exit(main())

Properties

Name Value
svn:executable *