1 |
#!/usr/bin/perl |
2 |
use strict; |
3 |
use FindBin; |
4 |
|
5 |
umask 0002; |
6 |
#the line below is needed if pvmsx is used |
7 |
# also, the error condition is set only by the presence of $file |
8 |
$ENV{'PATH'}=$FindBin::Bin.':'.$ENV{'PATH'}; |
9 |
|
10 |
#so for pvmsx to consider the task was successful, $file must be deleted! |
11 |
#============== |
12 |
# 1 is the name of the fasta sequence input file |
13 |
# 2 is the # of sequences in ${1} should = 1 for this script |
14 |
# 3 is the slice no. being processed by sx |
15 |
# 4 is 0 if not the last file, 1 if the last file |
16 |
# 5 is the # of sequences skipped initially |
17 |
# 6 is the # of sequences to be processed (-1 = ALL) |
18 |
# 7 user parameter |
19 |
# 1 2 3 4 5 6 |
20 |
my ($file, $numpass, $slice_num, $last, $skipped, $total, $serverport, $dbdir)=@ARGV; |
21 |
#print STDERR "running: $0 ".join(' ',@ARGV)."\n"; |
22 |
my $usage=q{ |
23 |
Slice processing script for blat protein-to-dnax mode, cross-species settings; |
24 |
Never use without a parent controllers (e.g. gridx). |
25 |
Must start the server prior to running this gridx command. |
26 |
Usage example: |
27 |
gridx -q -N -O logs -p 20 -n 2000 -i proteins.fa blat_pxclient.psx server:port[:srv_count] /full/path/to/seqdir |
28 |
|
29 |
srv_count can be given when multiple gfServers have been launched |
30 |
on a multi-CPU <server> host, so the servers are assumed at |
31 |
consecutive ports on the same host, starting at the given port |
32 |
}; |
33 |
|
34 |
die $usage."\n" unless -s $file && $total && $dbdir; |
35 |
my $fout=$file.'.gff3'; |
36 |
my ($server, $port, $srvcount)=split(/:/, $serverport); |
37 |
die $usage."\n" unless $port>255; |
38 |
if ($srvcount>1) { |
39 |
$port += ( $slice_num % $srvcount ); |
40 |
} |
41 |
die $usage."\nInvalid data directory $dbdir" unless -d $dbdir; |
42 |
my $log_file='log_std'; |
43 |
my $err_file='err_log'; |
44 |
open(STDERR, '>>'.$err_file); |
45 |
open(STDOUT, '>>'.$log_file); |
46 |
|
47 |
#my $toskip=($file =~ m/_\@(\d+)_v\d+\.\d+/) ? $1 : $skipped+$numpass*($slice_num-1); |
48 |
#my ($srvhost, $srvport)=split(/\:/,$srv); |
49 |
#$srvport=8080 unless $srvport; |
50 |
my $cmd="gfClient -minScore=30 -minIdentity=50 -t=dnax -q=prot -out=gff -nohead $server $port $dbdir $file $fout"; |
51 |
|
52 |
#my $cmd="gmap -D $dbdir -d $gmapdb -B 2 -f 1 $file > $gmap_res"; |
53 |
my $slno=sprintf("slice:%09d",$slice_num); |
54 |
print STDERR ">>$slno: $cmd\n"; |
55 |
&runCmd($cmd, $fout); |
56 |
|
57 |
print STDERR "<<$slno: done.\n"; |
58 |
unlink($file); |
59 |
exit 0; |
60 |
|
61 |
sub runCmd { |
62 |
my ($docmd, @todel) = @_; |
63 |
my $errmsg = `($docmd) 2>&1`; |
64 |
if ($? || ($errmsg=~/ERROR/si) || ($errmsg=~/Segmentation/si) || ($errmsg=~/Failed/s) || $errmsg=~/Invalid/s) { |
65 |
print STDERR "!Error at:\n$docmd\n"; |
66 |
print STDERR "$errmsg\n"; |
67 |
foreach (@todel) { |
68 |
unlink($_); |
69 |
} |
70 |
exit(1); |
71 |
} |
72 |
} |