1 |
package tinker; |
2 |
|
3 |
import java.io.BufferedReader; |
4 |
import java.io.File; |
5 |
import java.io.FileInputStream; |
6 |
import java.io.FileNotFoundException; |
7 |
import java.io.FileOutputStream; |
8 |
import java.io.IOException; |
9 |
import java.io.InputStreamReader; |
10 |
import java.io.PrintWriter; |
11 |
import java.nio.channels.FileChannel; |
12 |
import java.util.Formatter; |
13 |
import java.util.regex.Matcher; |
14 |
import java.util.regex.Pattern; |
15 |
|
16 |
public class TinkerRunner { |
17 |
|
18 |
private static final String PROTEIN_PROG = "protein"; |
19 |
private static final String DISTGEOM_PROG = "distgeom"; |
20 |
private static final String PDBXYZ_PROG = "pdbxyz"; |
21 |
private static final String XYZPDB_PROG = "xyzpdb"; |
22 |
private static final String CYCLISE_PROTEIN_STR = "N"; |
23 |
private static final String DGEOM_PARAMS = "Y N Y Y N N A"; |
24 |
private static final String TINKER_ERROR_STR = " TINKER is Unable to Continue"; |
25 |
|
26 |
private String tinkerBinDir; |
27 |
|
28 |
private String forceFieldFileName; |
29 |
|
30 |
private String proteinProg; |
31 |
private String distgeomProg; |
32 |
private String pdbxyzProg; |
33 |
private String xyzpdbProg; |
34 |
|
35 |
private File logFile; |
36 |
private PrintWriter log; |
37 |
|
38 |
// arrays for storing distgeom output data |
39 |
private int[] numUpperBoundViol; |
40 |
private int[] numLowerBoundViol; |
41 |
private double[] maxUpperBoundViol; |
42 |
private double[] maxLowerBoundViol; |
43 |
private double[] rmsBoundViol; |
44 |
private int[] numUpperViol; |
45 |
private int[] numLowerViol; |
46 |
private double[] maxUpperViol; |
47 |
private double[] maxLowerViol; |
48 |
private double[] rmsRestViol; |
49 |
|
50 |
|
51 |
/** |
52 |
* Constructs a TinkerRunner object by passing initial parameters |
53 |
* @param tinkerBinDir The directory where the tinker executables are |
54 |
* @param forceFieldFileName The force field file |
55 |
* @param logFile File where all tinker output will be logged to |
56 |
* @throws FileNotFoundException If logFile can't be written |
57 |
*/ |
58 |
public TinkerRunner(String tinkerBinDir, String forceFieldFileName, File logFile) throws FileNotFoundException { |
59 |
this.tinkerBinDir = tinkerBinDir; |
60 |
this.forceFieldFileName = forceFieldFileName; |
61 |
this.proteinProg = new File(this.tinkerBinDir,PROTEIN_PROG).getAbsolutePath(); |
62 |
this.distgeomProg = new File(this.tinkerBinDir,DISTGEOM_PROG).getAbsolutePath(); |
63 |
this.pdbxyzProg = new File(this.tinkerBinDir,PDBXYZ_PROG).getAbsolutePath(); |
64 |
this.xyzpdbProg = new File(this.tinkerBinDir,XYZPDB_PROG).getAbsolutePath(); |
65 |
|
66 |
this.logFile = logFile; |
67 |
this.log = new PrintWriter(new FileOutputStream(logFile)); |
68 |
} |
69 |
|
70 |
/** |
71 |
* To get the expected File that a tinker program will output given an input file and an extension for the output files |
72 |
* The directory where the input file is will be scanned to see if it contains files of the form basename.ext, basename.ext_2, basename.ext_3 etc. |
73 |
* @param file |
74 |
* @param ext |
75 |
* @return |
76 |
*/ |
77 |
private File getTinkerOutputFileName(File file, String ext){ |
78 |
String basename = file.getName(); |
79 |
basename = basename.substring(0, basename.lastIndexOf(".")); |
80 |
String dirname = file.getParent(); |
81 |
|
82 |
String tinkerOutFileName = basename + "." + ext; |
83 |
|
84 |
if (new File(dirname,tinkerOutFileName).exists()) { |
85 |
int i = 2; |
86 |
tinkerOutFileName = basename + "." + ext + "_" + i; |
87 |
while (new File(dirname,tinkerOutFileName).exists()) { |
88 |
i++; |
89 |
tinkerOutFileName = basename + "." + ext + "_" + i; |
90 |
} |
91 |
} |
92 |
return new File(dirname,tinkerOutFileName); |
93 |
} |
94 |
|
95 |
/** |
96 |
* Runs tinker's protein program to generate an elongated protein structure given a sequence |
97 |
* @param sequence |
98 |
* @param outPath The directory where output files will be written |
99 |
* @param outBasename The base name for the output files |
100 |
* @throws IOException |
101 |
* @throws TinkerError |
102 |
*/ |
103 |
public void runProtein(String sequence, String outPath, String outBasename) throws IOException, TinkerError { |
104 |
boolean tinkerError = false; // to store the exit state of the tinker program |
105 |
|
106 |
if (!new File(outPath).exists()) { |
107 |
throw new FileNotFoundException("Specified directory "+outPath+" does not exist"); |
108 |
} |
109 |
File tinkerxyzout = getTinkerOutputFileName(new File(outPath,outBasename+".xyz"),"xyz"); |
110 |
File tinkerintout = getTinkerOutputFileName(new File(outPath,outBasename+".int"),"int"); |
111 |
tinkerintout.deleteOnExit(); |
112 |
File tinkerseqout = getTinkerOutputFileName(new File(outPath,outBasename+".seq"),"seq"); |
113 |
|
114 |
// running protein program in outPath dir (so that output files are written to outPath) |
115 |
Process protProc = Runtime.getRuntime().exec(proteinProg, null, new File(outPath)); |
116 |
// piping input |
117 |
PrintWriter protInput = new PrintWriter(protProc.getOutputStream()); |
118 |
protInput.println(outBasename); |
119 |
protInput.println("Unfolded chain created by tinker's protein program"); |
120 |
protInput.println(forceFieldFileName); |
121 |
for (int i=0;i<sequence.length();i++) { |
122 |
// we've got to use 3 letter code for CYS, otherwise tinker takes the default CYX (which means cystein with disulfide bridge) |
123 |
if (sequence.charAt(i)=='C') { |
124 |
protInput.println("CYS"); |
125 |
} else { |
126 |
protInput.println(sequence.charAt(i)); |
127 |
} |
128 |
} |
129 |
protInput.println(); |
130 |
protInput.println(CYCLISE_PROTEIN_STR); |
131 |
protInput.close(); |
132 |
|
133 |
// logging output |
134 |
BufferedReader protOutput = new BufferedReader(new InputStreamReader(protProc.getInputStream())); |
135 |
String line; |
136 |
while((line = protOutput.readLine()) != null) { |
137 |
log.println(line); |
138 |
if (line.startsWith(TINKER_ERROR_STR)) { |
139 |
tinkerError = true; |
140 |
} |
141 |
} |
142 |
|
143 |
tinkerxyzout.renameTo(new File(outPath,outBasename+".xyz")); |
144 |
tinkerseqout.renameTo(new File(outPath,outBasename+".seq")); |
145 |
|
146 |
if (tinkerError) { |
147 |
log.close(); |
148 |
throw new TinkerError("Tinker error, revise log file "+logFile.getAbsolutePath()); |
149 |
} |
150 |
} |
151 |
|
152 |
/** |
153 |
* Runs tinker's distgeom program capturing output with restrain violation statistics into member variable arrays |
154 |
* that can be retrieved using the getters: getMaxLowerBoundViol, getMaxUpperBoundViol, getMaxLowerViol etc... |
155 |
* Two files are needed as input for distgeom: an xyz file and a key file, the latter is not passed but instead implicitely |
156 |
* defined by xyzFile: must be in same directory and must have same basename with extension .key |
157 |
* @param xyzFile |
158 |
* @param outPath Directory where output files will be written |
159 |
* @param outBasename Base name of the output files |
160 |
* @param n Number of models that we want distgeom to produce |
161 |
* @throws TinkerError If an error seen in tinker's output |
162 |
* @throws IOException |
163 |
*/ |
164 |
public void runDistgeom(File xyzFile, String outPath, String outBasename, int n) throws TinkerError, IOException, InterruptedException { |
165 |
boolean tinkerError = false; // to store the exit state of the tinker program |
166 |
if (!new File(outPath).exists()) { |
167 |
throw new FileNotFoundException("Specified directory "+outPath+" does not exist"); |
168 |
} |
169 |
if (!xyzFile.exists()){ |
170 |
throw new FileNotFoundException("Specified xyz file "+xyzFile.getAbsolutePath()+" does not exist"); |
171 |
} |
172 |
String basename = xyzFile.getName(); |
173 |
basename = basename.substring(0, basename.lastIndexOf(".")); |
174 |
File keyFile = new File(xyzFile.getParent(),basename+".key"); |
175 |
if (! keyFile.exists()) { |
176 |
throw new FileNotFoundException("Key file "+keyFile.getAbsolutePath()+" not present in input directory "+xyzFile.getParent()); |
177 |
} |
178 |
// getting names of tinker output files |
179 |
File[] tinkerout = new File[n+1]; |
180 |
for (int i=1;i<=n;i++) { |
181 |
String ext = new Formatter().format("%03d", i).toString(); |
182 |
tinkerout[i] = getTinkerOutputFileName(xyzFile, ext); |
183 |
} |
184 |
// initialising arrays were we store captured output data |
185 |
numUpperBoundViol = new int[n+1]; |
186 |
numLowerBoundViol = new int[n+1]; |
187 |
maxUpperBoundViol = new double[n+1]; |
188 |
maxLowerBoundViol = new double[n+1]; |
189 |
rmsBoundViol = new double[n+1]; |
190 |
numUpperViol = new int[n+1]; |
191 |
numLowerViol = new int[n+1]; |
192 |
maxUpperViol = new double[n+1]; |
193 |
maxLowerViol = new double[n+1]; |
194 |
rmsRestViol = new double[n+1]; |
195 |
// running distgeom program |
196 |
String cmdLine = distgeomProg+" "+xyzFile.getAbsolutePath()+" "+n+" "+DGEOM_PARAMS; |
197 |
Process dgeomProc = Runtime.getRuntime().exec(cmdLine); |
198 |
// logging and capturing output |
199 |
BufferedReader dgeomOutput = new BufferedReader(new InputStreamReader(dgeomProc.getInputStream())); |
200 |
String line; |
201 |
int i=1; |
202 |
while((line = dgeomOutput.readLine()) != null) { |
203 |
log.println(line); |
204 |
if (line.startsWith(TINKER_ERROR_STR)) { |
205 |
tinkerError = true; |
206 |
} |
207 |
Pattern p = Pattern.compile("^ Num Upper Bound Violations :\\s+(\\d+)"); |
208 |
Matcher m = p.matcher(line); |
209 |
if (m.find()) { |
210 |
numUpperBoundViol[i]=Integer.parseInt(m.group(1)); |
211 |
} |
212 |
p = Pattern.compile("^ Num Lower Bound Violations :\\s+(\\d+)"); |
213 |
m = p.matcher(line); |
214 |
if (m.find()) { |
215 |
numLowerBoundViol[i]=Integer.parseInt(m.group(1)); |
216 |
} |
217 |
p = Pattern.compile("^ Max Upper Bound Violation :\\s+(\\d+\\.\\d\\d\\d\\d)"); |
218 |
m = p.matcher(line); |
219 |
if (m.find()) { |
220 |
maxUpperBoundViol[i]=Double.parseDouble(m.group(1)); |
221 |
} |
222 |
p = Pattern.compile("^ Max Lower Bound Violation :\\s+(\\d+\\.\\d\\d\\d\\d)"); |
223 |
m = p.matcher(line); |
224 |
if (m.find()) { |
225 |
maxLowerBoundViol[i]=Double.parseDouble(m.group(1)); |
226 |
} |
227 |
p = Pattern.compile("^ RMS Deviation from Bounds :\\s+(\\d+\\.\\d\\d\\d\\d)"); |
228 |
m = p.matcher(line); |
229 |
if (m.find()) { |
230 |
rmsBoundViol[i]=Double.parseDouble(m.group(1)); |
231 |
} |
232 |
p = Pattern.compile("^ Num Upper Restraint Violations :\\s+(\\d+)"); |
233 |
m = p.matcher(line); |
234 |
if (m.find()) { |
235 |
numUpperViol[i]=Integer.parseInt(m.group(1)); |
236 |
} |
237 |
p = Pattern.compile("^ Num Lower Restraint Violations :\\s+(\\d+)"); |
238 |
m = p.matcher(line); |
239 |
if (m.find()) { |
240 |
numLowerViol[i]=Integer.parseInt(m.group(1)); |
241 |
} |
242 |
p = Pattern.compile("^ Max Upper Restraint Violation :\\s+(\\d+\\.\\d\\d\\d\\d)"); |
243 |
m = p.matcher(line); |
244 |
if (m.find()) { |
245 |
maxUpperViol[i]=Double.parseDouble(m.group(1)); |
246 |
} |
247 |
p = Pattern.compile("^ Max Lower Restraint Violation :\\s+(\\d+\\.\\d\\d\\d\\d)"); |
248 |
m = p.matcher(line); |
249 |
if (m.find()) { |
250 |
maxLowerViol[i]=Double.parseDouble(m.group(1)); |
251 |
} |
252 |
p = Pattern.compile("^ RMS Restraint Dist Violation :\\s+(\\d+\\.\\d\\d\\d\\d)"); |
253 |
m = p.matcher(line); |
254 |
if (m.find()) { |
255 |
rmsRestViol[i]=Double.parseDouble(m.group(1)); |
256 |
System.out.println("Done model "+i+". Violations: "+numUpperViol[i]+" upper, "+numLowerViol[i]+" lower"); |
257 |
i++; |
258 |
|
259 |
} |
260 |
} |
261 |
//renaming files to our chosen outBasename+ext |
262 |
for (i=1;i<=n;i++) { |
263 |
String ext = new Formatter().format("%03d", i).toString(); |
264 |
tinkerout[i].renameTo(new File(outPath,outBasename+"."+ext)); |
265 |
} |
266 |
// throwing exception if error string was caught in output |
267 |
if (tinkerError) { |
268 |
log.close(); |
269 |
throw new TinkerError("Tinker error, revise log file "+logFile.getAbsolutePath()); |
270 |
} |
271 |
int exitValue = dgeomProc.waitFor(); |
272 |
// throwing exception if exit state is 137: happens in Linux when another instance of distgeom is running in same machine, the OS kills it with exit state 137 |
273 |
if (exitValue==137) { |
274 |
log.close(); |
275 |
throw new TinkerError("Distgeom was killed by OS, probably another instance of distgeom is running in this computer"); |
276 |
} |
277 |
else if (exitValue==139) { |
278 |
log.close(); |
279 |
throw new TinkerError("Distgeom was killed with exit code 139. Not enough memory."); |
280 |
|
281 |
} |
282 |
// this is to catch all other possible errors not caught already by the parse of the error string in output |
283 |
else if (exitValue!=0) { |
284 |
log.close(); |
285 |
throw new TinkerError("Distgeom exited with a non 0 exit code: "+exitValue+". Unknown error."); |
286 |
} |
287 |
|
288 |
} |
289 |
|
290 |
/** |
291 |
* Runs tinker's xyzpdb program to convert a given xyzFile (needing also a seqFile) to a pdbFile |
292 |
* @param xyzFile |
293 |
* @param seqFile |
294 |
* @param pdbFile |
295 |
* @throws IOException |
296 |
* @throws TinkerError If an error seen in tinker's output |
297 |
*/ |
298 |
public void runXyzpdb(File xyzFile, File seqFile, File pdbFile) throws IOException, TinkerError { |
299 |
boolean tinkerError = false; // to store the exit state of the tinker program |
300 |
if (!xyzFile.exists()){ |
301 |
throw new FileNotFoundException("Specified xyz file "+xyzFile.getAbsolutePath()+" does not exist"); |
302 |
} |
303 |
if (!seqFile.exists()){ |
304 |
throw new FileNotFoundException("Specified seq file "+seqFile.getAbsolutePath()+" does not exist"); |
305 |
} |
306 |
|
307 |
String basename = xyzFile.getName(); |
308 |
basename = basename.substring(0, basename.lastIndexOf(".")); |
309 |
File tmpSeqFile = new File(seqFile.getParent(),basename+".seq"); |
310 |
|
311 |
// if seqFile doesn't follow the naming convention (basename of xyzFile+seq extension) that tinker expects, we copy it to tmpSeqFile (which has right name) |
312 |
if (!tmpSeqFile.equals(seqFile)) { |
313 |
FileChannel srcChannel = new FileInputStream(seqFile).getChannel(); |
314 |
FileChannel dstChannel = new FileOutputStream(tmpSeqFile).getChannel(); |
315 |
dstChannel.transferFrom(srcChannel, 0, srcChannel.size()); |
316 |
srcChannel.close(); |
317 |
dstChannel.close(); |
318 |
// if we copied then that means tmpSeqFile is different from seqFile and thus we want to delete the tmp file on exit |
319 |
tmpSeqFile.deleteOnExit(); |
320 |
} |
321 |
|
322 |
File tinkerpdbout = getTinkerOutputFileName(xyzFile, "pdb"); |
323 |
|
324 |
// running tinker's xyzpdb |
325 |
// beware: it takes as a silent input the seq file seqFile (or tmpSeqFile if the input seqFile didn't have the right name) |
326 |
Process xyzpdbProc = Runtime.getRuntime().exec(xyzpdbProg+" "+xyzFile.getAbsolutePath()+" "+forceFieldFileName); |
327 |
|
328 |
// logging output |
329 |
BufferedReader xyzpdbOutput = new BufferedReader(new InputStreamReader(xyzpdbProc.getInputStream())); |
330 |
String line; |
331 |
while((line = xyzpdbOutput.readLine()) != null) { |
332 |
log.println(line); |
333 |
if (line.startsWith(TINKER_ERROR_STR)) { |
334 |
tinkerError = true; |
335 |
} |
336 |
} |
337 |
|
338 |
tinkerpdbout.renameTo(pdbFile); |
339 |
|
340 |
if (tinkerError) { |
341 |
log.close(); |
342 |
throw new TinkerError("Tinker error, revise log file "+logFile.getAbsolutePath()); |
343 |
} |
344 |
} |
345 |
|
346 |
/** |
347 |
* Runs tinker's pdbxyz program to convert a pdbFile to a xyzFile |
348 |
* @param pdbFile |
349 |
* @param xyzFile |
350 |
* @throws IOException |
351 |
* @throws TinkerError If an error seen in tinker's output |
352 |
*/ |
353 |
public void runPdbxyz(File pdbFile, File xyzFile) throws IOException, TinkerError{ |
354 |
boolean tinkerError = false; // to store the exit state of the tinker program |
355 |
if (!pdbFile.exists()){ |
356 |
throw new FileNotFoundException("Specified pdb file "+pdbFile.getAbsolutePath()+" does not exist"); |
357 |
} |
358 |
File tinkerxyzout = getTinkerOutputFileName(pdbFile, "xyz"); |
359 |
// running tinker's pdbxyz |
360 |
Process pdbxyzProc = Runtime.getRuntime().exec(pdbxyzProg+" "+pdbFile.getAbsolutePath()+" "+forceFieldFileName); |
361 |
|
362 |
// logging output |
363 |
BufferedReader pdbxyzOutput = new BufferedReader(new InputStreamReader(pdbxyzProc.getInputStream())); |
364 |
String line; |
365 |
while((line = pdbxyzOutput.readLine()) != null) { |
366 |
log.println(line); |
367 |
if (line.startsWith(TINKER_ERROR_STR)) { |
368 |
tinkerError = true; |
369 |
} |
370 |
} |
371 |
tinkerxyzout.renameTo(xyzFile); |
372 |
if (tinkerError) { |
373 |
log.close(); |
374 |
throw new TinkerError("Tinker error, revise log file "+logFile.getAbsolutePath()); |
375 |
} |
376 |
} |
377 |
|
378 |
/** |
379 |
* Closes log stream, must be called after no other tinker program will be run with this TinkerRunner object |
380 |
* (otherwise log is not flushed to file) |
381 |
*/ |
382 |
public void closeLog() { |
383 |
log.close(); |
384 |
} |
385 |
|
386 |
public double[] getMaxLowerBoundViol() { |
387 |
return maxLowerBoundViol; |
388 |
} |
389 |
|
390 |
public double[] getMaxLowerViol() { |
391 |
return maxLowerViol; |
392 |
} |
393 |
|
394 |
public double[] getMaxUpperBoundViol() { |
395 |
return maxUpperBoundViol; |
396 |
} |
397 |
|
398 |
public double[] getMaxUpperViol() { |
399 |
return maxUpperViol; |
400 |
} |
401 |
|
402 |
public int[] getNumLowerBoundViol() { |
403 |
return numLowerBoundViol; |
404 |
} |
405 |
|
406 |
public int[] getNumLowerViol() { |
407 |
return numLowerViol; |
408 |
} |
409 |
|
410 |
public int[] getNumUpperBoundViol() { |
411 |
return numUpperBoundViol; |
412 |
} |
413 |
|
414 |
public int[] getNumUpperViol() { |
415 |
return numUpperViol; |
416 |
} |
417 |
|
418 |
public double[] getRmsBoundViol() { |
419 |
return rmsBoundViol; |
420 |
} |
421 |
|
422 |
public double[] getRmsRestViol() { |
423 |
return rmsRestViol; |
424 |
} |
425 |
} |