ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/scripts/mirfind_common.pl
Revision: 24
Committed: Tue Jul 26 21:46:39 2011 UTC (13 years, 1 month ago) by gpertea
File size: 5532 byte(s)
Log Message:
Line File contents
1 #!/usr/bin/perl
2 use strict;
3 use Getopt::Std;
4
5 my $usage = q/Usage:
6 mirfind_common.pl [-t total1:total2] <file1.mirfind> <file2.mirfind>
7 Takes output files of mirfind_score.pl and reports potential miRNAs
8 that have the same location of the mature sequence;
9 A coordinate variation of 1 for either ends is allowed.
10
11 (input files are assumed sorted by region start coordinate,
12 as they are when they are created by mirfind_score.pl)
13
14 /;
15 umask 0002;
16 getopts('t:o:') || die($usage."\n");
17 my $outfile=$Getopt::Std::opt_o;
18 if ($outfile) {
19 open(OUTF, '>'.$outfile) || die("Error creating output file $outfile\n");
20 select(OUTF);
21 }
22 # --
23 die ($usage."\n") unless @ARGV==2;
24 my @alines;
25 my @blines;
26 my $totals=$Getopt::Std::opt_t;
27 my @total=split(/[\:\,\/]/,$totals);
28 my $ratio=$total[1]/$total[0] if $total[0]>0;
29 open(FILA, $ARGV[0]) || die ("Error opening file $ARGV[0]!\n");
30 open(FILB, $ARGV[1]) || die ("Error opening file $ARGV[1]!\n");
31 while (<FILA>) {
32 chomp;
33 my @t=split(/\t/);
34 push(@alines, [@t,0]);
35 }
36 close(FILA);
37 @alines = sort { ($$a[1] eq $$b[1]) ? $$a[3]<=>$$b[3] : $$a[1] cmp $$b[1] } @alines;
38 while (<FILB>) {
39 chomp;
40 my @t=split(/\t/);
41 push(@blines, [@t, 0]);
42 }
43 close(FILB);
44 @blines = sort { ($$a[1] eq $$b[1]) ? $$a[3]<=>$$b[3] : $$a[1] cmp $$b[1] } @blines;
45
46 my ($a_reg, $a_start,$a_chr, $a_strand, $a_end, $a_mstart, $a_mend, $acov,
47 $b_reg, $b_start,$b_chr, $b_strand, $b_end, $b_mstart, $b_mend, $bcov);
48 my $anext;
49 my @ta; #ta[13] has the intron info, if any
50 my @tb;
51 my ($ad, $ai, $bd, $bi);
52 $bi=-1;
53 for ($ai=0; $ai<@alines; $ai++) {
54 $ad=$alines[$ai];
55 @ta=@$ad;
56 if ($a_start && $a_chr eq $ta[1] && $a_start>$ta[3]) {
57 die("Wrong order for $ARGV[0] lines?! ($a_reg vs $ta[0])!\n");
58 }
59 #($a_start, $a_end)=($ta[0]=~m/\|(\d+)\-(\d+)$/);
60 ($a_start, $a_end)=($ta[3],$ta[4]);
61 ($a_reg, $a_chr, $a_strand)=@ta[0..2];
62 ($a_mstart, $a_mend)=($ta[8]=~m/m\|(\d+)\-(\d+)/);
63 ($acov)=($ta[6]=~m/(\d+)$/);
64 $ad->[6]=$acov;
65 $ta[6]=$acov;
66 die ("Error parsing mstar/mend!\n") unless $a_mend>$a_mstart && $a_mstart>1;
67 goto AB_CMP if ($anext);
68 NEXT_B:
69 $bi++;
70 last unless $bi<@blines;
71 $bd=$blines[$bi];
72 @tb=@$bd;
73 if ($b_start && $b_chr eq $tb[1] && $b_start>$tb[3]) {
74 die("Wrong order for $ARGV[1] lines?! ($b_reg vs $tb[0])!\n");
75 }
76 ($b_start, $b_end)=($tb[3],$tb[4]);
77 ($b_reg, $b_chr, $b_strand)=@tb[0..2];
78 ($b_mstart, $b_mend)=($tb[8]=~m/m\|(\d+)\-(\d+)/);
79 ($bcov)=($tb[6]=~m/(\d+)$/);
80 $bd->[6]=$bcov;
81 $tb[6]=$bcov;
82 AB_CMP:
83 $anext=0;
84 if ($b_chr gt $a_chr) { # a lags behind
85 $anext=1;
86 next;
87 }
88 elsif ($a_chr gt $b_chr) { # b lags behind
89 goto NEXT_B;
90 }
91 # same chromosome:
92 if ($b_start>$a_end) { # a lags behind
93 $anext=1;
94 next;
95 }
96 if ($a_start>$b_end) { # b lags behind
97 goto NEXT_B;
98 }
99 # - hairpin overlap here, check for mature miRNA overlap
100 if (abs($a_mstart-$b_mstart)<5 && abs($a_mend-$b_mend)<5) { # &&
101 #abs($a_start-$b_start)<7 && abs($a_end-$b_end)<7) {
102 # common miRNA prediction here
103 #print join("\t", $ta[0], @ta[5..8], join('|',@ta[10..12]),
104 # $tb[0], @tb[5..8], join('|',@tb[10..12]), $ta[14])."\n";
105 my ($hstart, $hend, $mseq, $mcoords) = ($ta[6]>$tb[6]) ? (@ta[3..4], @ta[7..8]) : (@tb[3..4], @tb[7..8]);
106 my ($mstart, $mend)=($mcoords=~m/m\|(\d+)\-(\d+)/);
107 my $foldchanges='';
108 if ($ratio) {
109 my ($v1,$v2)=($ta[6]*$ratio, $tb[6]);
110 my $fc=($v1>$v2)? -($v1/$v2) : $v2/$v1;
111 $foldchanges="\t".sprintf('%.2f',$fc)."\t".sprintf('%.2f',abs($fc));
112
113 }
114 print join("\t", join('|',@ta[1..2], $hstart.'_'.$hend), $mseq, $mstart, $mend, @ta[5..6], $ta[10].'|'.$ta[11], $ta[12],
115 @tb[5..6], $tb[10].'|'.$tb[11], $tb[12]).$foldchanges."\n";
116
117 $bd->[15]=1;
118 $ad->[15]=1;
119 }
120 if ($b_end>$a_end) {
121 $anext=1;
122 next;
123 }
124 else {
125 goto NEXT_B;
126 }
127 }
128
129 # -- now also add those entries only expressed in one sample but not the other
130 foreach my $d (@alines) {
131 next if $d->[15]==1 || $d->[6]<15;
132 my @t=@$d;
133 my ($hstart, $hend, $mseq, $mcoords) = (@t[3..4], @t[7..8]);
134 my ($mstart, $mend)=($mcoords=~m/m\|(\d+)\-(\d+)/);
135 my $foldchanges='';
136 if ($ratio) {
137 my ($v1,$v2)=($t[6]*$ratio, 1);
138 my $fc=($v1>$v2)? -($v1/$v2) : $v2/$v1;
139 $foldchanges="\t".sprintf('%.2f',$fc)."\t".sprintf('%.2f',abs($fc));
140 }
141
142 print join("\t", join('|',@t[1..2], $hstart.'_'.$hend), $mseq, $mstart, $mend, @t[5..6], $t[10].'|'.$t[11], $t[12],
143 '-','-', '-', '-' ).$foldchanges."\n";
144
145 #print join("\t", $t[0], $t[5], 'x'.$t[6], @t[7..8], join('|',@t[10..12]),
146 # '-', '-','-','-','-', '-', $t[14])."\n";
147 }
148
149 foreach my $d (@blines) {
150 next if $d->[15]==1 || $d->[6]<15;
151 my @t=@$d;
152 my ($hstart, $hend, $mseq, $mcoords) = (@t[3..4], @t[7..8]);
153 my ($mstart, $mend)=($mcoords=~m/m\|(\d+)\-(\d+)/);
154 my $foldchanges='';
155 if ($ratio) {
156 my ($v1,$v2)=(1*$ratio, $t[6]);
157 my $fc=($v1>$v2)? -($v1/$v2) : $v2/$v1;
158 $foldchanges="\t".sprintf('%.2f',$fc)."\t".sprintf('%.2f',abs($fc));
159 }
160 print join("\t", join('|',@t[1..2], $hstart.'_'.$hend), $mseq, $mstart, $mend, '-','-', '-', '-',
161 @t[5..6], $t[10].'|'.$t[11], $t[12]).$foldchanges."\n";
162 #print join("\t", $t[0], '-','-','-','-', '-',
163 # $t[0], $t[5], 'x'.$t[6], @t[7..8], join('|',@t[10..12]), $t[14])."\n";
164 }
165
166 # --
167 if ($outfile) {
168 select(STDOUT);
169 close(OUTF);
170 }
171
172 #************ Subroutines **************

Properties

Name Value
svn:executable *