ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/scripts/pexo_srvrun.psx
Revision: 24
Committed: Tue Jul 26 21:46:39 2011 UTC (13 years, 1 month ago) by gpertea
File size: 5004 byte(s)
Log Message:
Line File contents
1 #!/usr/bin/perl
2 use strict;
3 use Getopt::Std;
4 use Cwd qw(realpath cwd chdir);
5 use File::Basename;
6
7 my $usage = q/Usage:
8
9 gridx [other_gridx_opts] -U -i <proteins.mfa> pexo_srvrun.psx <fastadb_esi_fullpath> [<port>]
10
11 Start a local exonerate server and run exonerate for each protein sequence
12 in <proteins.mfa>, one sequence at a time. Default port is 3834.
13 /;
14 umask 0002;
15 #so for pvmsx to consider the task was successful, $file must be deleted!
16 #==============
17 # 1 is the name of the fasta sequence input file
18 # 2 is the # of sequences in ${1} should = 1 for this script
19 # 3 is the slice no. being processed by sx
20 # 4 is 0 if not the last file, 1 if the last file
21 # 5 is the # of sequences skipped initially
22 # 6 is the # of sequences to be processed (-1 = ALL)
23 # 7.. user parameters
24 # 1 2 3 4 5 6 7 8
25 my ($qmfa, $numpass, $slice_num, $last, $skipped, $total, $esidb, $port )=@ARGV;
26
27 my $cwd=cwd();
28 $cwd=~s/\/$//;
29 die("$usage\n") unless $esidb;
30 die ($usage."Error locating file $esidb from current directory $cwd\n")
31 unless -f $esidb;
32 $port=3834 unless $port;
33 my $srv="localhost:$port";
34 $esidb=realpath($esidb);
35 my $esidir=getFDir($esidb);
36 $esidir=~s/\/$//;
37 my $esifile=getFName($esidb);
38 $esidir='' if (-f $esifile) && ($esidir eq $cwd);
39 my $srvpid=0;
40
41 startServer();
42
43 open(MFA, $qmfa) || die ($usage."Error opening query file!\n");
44 #my $esibase=$esifile;
45 #$esibase=~s/\.esi$//i;
46 #$esibase=~s/\.[trans]+$//i;
47 #$esibase=~s/\.[mp]*fa*$//i;
48 my $outfile=$qmfa.'.exout';
49 #open(FOUT, '>'.$outfile) || die ("Error creating output file $outfile !\n");
50
51 my $params = '-V 0 --model p2g --percent 20 -n 11 --showalignment no --showtargetgff yes --showvulgar no'.
52 ' --maxintron 450000 --seedrepeat 2 --saturatethreshold 80 --geneseed 60 -x 50 --proteinwordlen 4 '.
53 q/--ryo '%ti\t%qi\tmap_end\t%r\t%ql|%qab-%qae\t%ps\t%g\t%qd\n' /;
54 my $host=$ENV{'HOST'} || $ENV{'HOSTNAME'};
55 ($host)=($host=~m/^(\w+)/);
56 my $username=$ENV{'USER'} || $ENV{'LOGNAME'};
57
58 unless ($username) { $username = $ENV{'USERNAME'} || 'user'; }
59 my $tmpqn=$host.'_'.$username.'_'.$$;
60 my $qno=1;
61 my ($qdefline, $qseq, $qseqlen, $qname);
62 while (<MFA>) {
63 if (m/^>/) {
64 my $d=$_;
65 qSeqFa(); #uses $qseq, $qdefline, $qno, $outfile
66 $qdefline=$d;
67 ($qname)=($d=~m/^>(\S+)/);
68 $qseqlen=0;
69 }
70 else {
71 $qseq.=$_;
72 chomp;
73 $qseqlen+=length($_);
74 }
75 }
76 qSeqFa();
77 close(MFA);
78 kill(15, $srvpid); # sending kill signal to server
79 exit 0;
80
81
82 #================ SUBROUTINES ============
83
84 sub getFName {
85 return basename($_[0]);
86 }
87
88 sub getFDir {
89 return dirname($_[0]);
90 }
91
92 sub qSeqFa {
93 return unless $qdefline && $qseqlen>3;
94 my $qnstr=sprintf('%06d', $qno);
95 my $qfname=$tmpqn.'_'.$qnstr.'.qfa';
96 open(QFA, '>'.$qfname) || die("Error creating tmp qry file $qfname !\n");
97 print QFA $qdefline.$qseq;
98 close(QFA);
99 my $qfout=$qfname.'.exout';
100 my $s=int($qseqlen/10);
101 if ($s<150) { $s=60; }
102 elsif ($s<250) { $s=100 }
103 elsif ($s>1000) { $s=1000 }
104 my $rparams=$params;
105 $rparams=~s/geneseed\s+\d+/geneseed $s/;
106 my $ecmd="exonerate $rparams $qfname $srv > $qfout";
107 print STDERR ">q_$qnstr [$qname, $qseqlen] : $ecmd\n";
108 my $retry=0;
109 QRETRY:
110 if (runClient($ecmd, $qfname, $qfout)) {
111 print STDERR " ..ran ok, wait for connection cleanup..\n";
112 my $sl;
113 do { $sl=<SRVLOG> } until ($sl=~m/cleaning up connection/);
114 print STDERR "<q_$qnstr done.\n";
115 # append qfout to outfile
116 if (-s $qfout) {
117 system("cat $qfout >> $outfile") &&
118 die("Error at appending $qfout to $outfile !\n");
119 }
120 unlink($qfname);
121 # read the server output, otherwise it gets suspended!
122 } #no error
123 else { #error detected
124 unless (srvRunning()) {
125 #server not alive, restart it
126 #close(SRVLOG);
127 startServer();
128 }
129 $retry++;
130 if ($retry<2) {
131 print STDERR " ..retrying $ecmd\n";
132 sleep(5);
133 goto QRETRY;
134 }
135 else { #giving up on this slice:
136 print STDERR "Failed ($qno): $qfname vs $esifile\n";
137 }
138 }
139 $qno++;
140 $qseq='';
141 $qseqlen=0;
142 $qdefline='';
143 unlink($qfout);
144 }
145
146 sub srvRunning() {
147 my $s=`ps -h -o pid,ppid,cmd -p $srvpid`;
148 my ($pid, $ppid, $cmd)=split(' ',$s,3);
149 return ($pid==$srvpid && $ppid==$$);
150 }
151
152 sub runClient {
153 my ($docmd, $qname, $qdel) = @_;
154 my $errmsg = `($docmd) 2>&1`;
155 if ($? || ($errmsg=~/Error|Segmentation|Failed|Fatal|Invalid|Abort/si)) {
156 #print STDERR "!Error at: $docmd\n";
157 print STDERR " Error detected for $qname vs $esifile\n";
158 print STDERR " Error msg : $errmsg\n";
159 unlink($qdel);
160 return 0;
161 }
162 return 1;
163 }
164
165
166 sub startServer {
167 if ($esidir) {
168 chdir($esidir);
169 }
170 my $cmd='exonerate-server --proteinwordlen 4 --port '.$port.' '.$esifile;
171 print STDERR " Starting server (in $cwd): $cmd \n";
172 $srvpid=open(SRVLOG, $cmd.' 2>&1 |');
173 do {
174 $_=<SRVLOG>;
175 print STDERR $_;
176 } until m/listening/i;
177 #close(SRVLOG); -- no good, this will wait for the process to close!
178 if ($esidir) {
179 chdir($cwd);
180 }
181 }

Properties

Name Value
svn:executable *