1 |
gpertea |
23 |
#!/usr/bin/perl |
2 |
|
|
use strict; |
3 |
|
|
use FindBin; |
4 |
|
|
|
5 |
|
|
umask 0002; |
6 |
|
|
#the line below is needed if pvmsx is used |
7 |
|
|
# also, the error condition is set only by the presence of $file |
8 |
|
|
$ENV{'PATH'}=$FindBin::Bin.':'.$ENV{'PATH'}; |
9 |
|
|
|
10 |
|
|
#so for pvmsx to consider the task was successful, $file must be deleted! |
11 |
|
|
#============== |
12 |
|
|
# 1 is the name of the fasta sequence input file |
13 |
|
|
# 2 is the # of sequences in ${1} should = 1 for this script |
14 |
|
|
# 3 is the slice no. being processed by sx |
15 |
|
|
# 4 is 0 if not the last file, 1 if the last file |
16 |
|
|
# 5 is the # of sequences skipped initially |
17 |
|
|
# 6 is the # of sequences to be processed (-1 = ALL) |
18 |
|
|
# 7 user parameter |
19 |
|
|
# 1 2 3 4 5 6 |
20 |
|
|
my ($file, $numpass, $slice_num, $last, $skipped, $total, $prots)=@ARGV; |
21 |
|
|
#print STDERR "running: $0 ".join(' ',@ARGV)."\n"; |
22 |
|
|
my $usage=q{ |
23 |
|
|
Slice processing script for blat's protein-to-dnax mode; |
24 |
|
|
Never use without a parent controllers (e.g. gridx). |
25 |
|
|
Usage example: |
26 |
|
|
gridx -q -N -O logs -g condor -p 20 -n 2000 -i contigdb.fa blat_protdb.psx /fs/fullpath/to/proteins.fa |
27 |
|
|
}; #' |
28 |
|
|
|
29 |
|
|
die $usage."\n" unless -s $file && $total && $prots; |
30 |
|
|
my $fout=$file.'.gff3'; |
31 |
|
|
|
32 |
|
|
my $log_file='log_std'; |
33 |
|
|
my $err_file='err_log'; |
34 |
|
|
open(STDERR, '>>'.$err_file); |
35 |
|
|
open(STDOUT, '>>'.$log_file); |
36 |
|
|
|
37 |
|
|
#my $toskip=($file =~ m/_\@(\d+)_v\d+\.\d+/) ? $1 : $skipped+$numpass*($slice_num-1); |
38 |
|
|
#my ($srvhost, $srvport)=split(/\:/,$srv); |
39 |
|
|
#$srvport=8080 unless $srvport; |
40 |
|
|
my $cmd="gblat -minScore=50 -minIdentity=70 -t=dnax -q=prot -out=gff -noHead $file $prots $fout"; |
41 |
|
|
|
42 |
|
|
#my $cmd="gmap -D $dbdir -d $gmapdb -B 2 -f 1 $file > $gmap_res"; |
43 |
|
|
my $slno=sprintf("slice:%09d",$slice_num); |
44 |
|
|
print STDERR ">>$slno: $cmd\n"; |
45 |
|
|
&runCmd($cmd, $fout); |
46 |
|
|
|
47 |
|
|
print STDERR "<<$slno: done.\n"; |
48 |
|
|
unlink($file); |
49 |
|
|
exit 0; |
50 |
|
|
|
51 |
|
|
sub runCmd { |
52 |
|
|
my ($docmd, @todel) = @_; |
53 |
|
|
my $errmsg = `($docmd) 2>&1`; |
54 |
|
|
if ($? || ($errmsg=~/ERROR/si) || ($errmsg=~/Segmentation/si) || ($errmsg=~/Failed/s) || $errmsg=~/Invalid/s) { |
55 |
|
|
print STDERR "!Error at:\n$docmd\n"; |
56 |
|
|
print STDERR "$errmsg\n"; |
57 |
|
|
foreach (@todel) { |
58 |
|
|
unlink($_); |
59 |
|
|
} |
60 |
|
|
exit(1); |
61 |
|
|
} |
62 |
|
|
} |