1 |
gpertea |
23 |
#!/usr/bin/perl |
2 |
|
|
use strict; |
3 |
|
|
use FindBin; |
4 |
|
|
|
5 |
|
|
umask 0002; |
6 |
|
|
#the line below is needed if pvmsx is used |
7 |
|
|
# also, the error condition is set only by the presence of $file |
8 |
|
|
$ENV{'PATH'}=$FindBin::Bin.':'.$ENV{'PATH'}; |
9 |
|
|
|
10 |
|
|
#so for pvmsx to consider the task was successful, $file must be deleted! |
11 |
|
|
#============== |
12 |
|
|
# 1 is the name of the fasta sequence input file |
13 |
|
|
# 2 is the # of sequences in ${1} should = 1 for this script |
14 |
|
|
# 3 is the slice no. being processed by sx |
15 |
|
|
# 4 is 0 if not the last file, 1 if the last file |
16 |
|
|
# 5 is the # of sequences skipped initially |
17 |
|
|
# 6 is the # of sequences to be processed (-1 = ALL) |
18 |
|
|
# 7 user parameter |
19 |
|
|
# 1 2 3 4 5 6 |
20 |
|
|
my ($file, $numpass, $slice_num, $last, $skipped, $total, $gfa)=@ARGV; |
21 |
|
|
#print STDERR "running: $0 ".join(' ',@ARGV)."\n"; |
22 |
|
|
my $usage=q{ |
23 |
|
|
Slice processing script for exonerate protein2dna model client option |
24 |
|
|
Never use without a parent controllers (e.g. gridx). |
25 |
|
|
Must start the server prior to running this gridx command. |
26 |
|
|
Usage example: |
27 |
|
|
gridx -q -N -O logs -g condor -p 20 -n 3000 -i proteindb.fa exonerate_p2g.psx /full/path/to/genomicDNA.fa |
28 |
|
|
}; |
29 |
|
|
|
30 |
|
|
die $usage."\n (genome file='$gfa', qslice='$file')\n" unless (-f $gfa) && (-s $file); |
31 |
|
|
my $fout=$file.'.gff3'; |
32 |
|
|
my $fexout=$file.'.exo'; |
33 |
|
|
my $log_file='log_std'; |
34 |
|
|
my $err_file='err_log'; |
35 |
|
|
open(STDERR, '>>'.$err_file); |
36 |
|
|
open(STDOUT, '>>'.$log_file); |
37 |
|
|
|
38 |
|
|
my $cmd="exonerate.icc $file $gfa -V 0 --model p2g --percent 20 -n 11 --showalignment no --saturatethreshold 80 ". |
39 |
|
|
"--ryo '".'%ti\t%qi\tmap_end\t%r\t%ql|%qab-%qae\t%ps\t%g\t%qd\n'."' --geneseed 60 -x 50 --proteinwordlen 4 ". |
40 |
|
|
"--seedrepeat 2 --maxintron 400000 -M 512 --showtargetgff yes --showvulgar no > $fexout"; |
41 |
|
|
|
42 |
|
|
#my $cmd="gmap -D $dbdir -d $gmapdb -B 2 -f 1 $file > $gmap_res"; |
43 |
|
|
my $slno=sprintf("slice:%09d",$slice_num); |
44 |
|
|
print STDERR ">>$slno: $cmd\n"; |
45 |
|
|
&runCmd($cmd, $fexout); |
46 |
|
|
if (-s $fexout) { |
47 |
|
|
$cmd="fltexonerate -p70 -c70 < $fexout > $fout"; |
48 |
|
|
&runCmd($cmd, $fout); |
49 |
|
|
} |
50 |
|
|
print STDERR "<<$slno: done.\n"; |
51 |
|
|
unlink($file); |
52 |
|
|
exit 0; |
53 |
|
|
|
54 |
|
|
sub runCmd { |
55 |
|
|
my ($docmd, @todel) = @_; |
56 |
|
|
my $errmsg = `( $docmd ) 2>&1`; |
57 |
|
|
my $errcode=$?; |
58 |
|
|
my @cores=<core.*>; |
59 |
|
|
if ($errcode || @cores>0 || ($errmsg=~/error|segmentation|violation/si) || ($errmsg=~/\bfail/si)) { |
60 |
|
|
print STDERR "!Error at:\n$docmd\n"; |
61 |
|
|
print STDERR "$errmsg\n"; |
62 |
|
|
foreach (@todel) { |
63 |
|
|
unlink($_); |
64 |
|
|
} |
65 |
|
|
exit(1); |
66 |
|
|
} |
67 |
|
|
} |