ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/branches/aglappe-jung/reconstruct.java
Revision: 368
Committed: Thu Oct 25 09:38:02 2007 UTC (16 years, 11 months ago) by duarte
Original Path: trunk/reconstruct.java
File size: 11157 byte(s)
Log Message:
Now parsing tinker's error function value, changed report file format (again!!)
Line File contents
1 import gnu.getopt.Getopt;
2
3 import java.io.File;
4 import java.io.FileNotFoundException;
5 import java.io.FileOutputStream;
6 import java.io.IOException;
7 import java.io.PrintWriter;
8 import java.sql.SQLException;
9 import java.util.Formatter;
10 import java.util.Locale;
11
12 import proteinstructure.AAinfo;
13 import proteinstructure.ConformationsNotSameSizeError;
14 import proteinstructure.Graph;
15 import proteinstructure.Pdb;
16 import proteinstructure.PdbChainCodeNotFoundError;
17 import proteinstructure.PdbCodeNotFoundError;
18 import proteinstructure.PdbaseInconsistencyError;
19 import proteinstructure.PdbasePdb;
20 import proteinstructure.PdbfileFormatError;
21 import proteinstructure.PdbfilePdb;
22
23 import tinker.ConstraintsMaker;
24 import tinker.TinkerError;
25 import tinker.TinkerRunner;
26
27
28 public class reconstruct {
29
30
31 private static final String TINKERBINDIR = "/project/StruPPi/Software/tinker/bin";
32 private static final String PRMFILE = "/project/StruPPi/Software/tinker/amber/amber99.prm";
33 private static final String PRMTYPE = "amber";
34
35 public static void main(String[] args) {
36
37 String programName = reconstruct.class.getName();
38 String help = "Usage:\n" +
39 programName+" -p <pdb code> -c <pdb chain code> -t <contact_type> [-r] -d <distance cutoff 1> -D <distance cutoff 2> -i <distance cutoff 3> -b <base name> -o <output dir> [-n <number of models>]\n";
40
41 String pdbCode = "";
42 String pdbChainCode = "";
43 String ct = "";
44 double cutoff1 = 0.0;
45 double cutoff2 = 0.0;
46 double cutoff3 = 0.0;
47 String outputDir = "";
48 String baseName = "";
49 boolean cross = false;
50 int n = 1;
51
52 Getopt g = new Getopt(programName, args, "p:c:d:t:rb:o:d:D:i:n:h?");
53 int c;
54 while ((c = g.getopt()) != -1) {
55 switch(c){
56 case 'p':
57 pdbCode = g.getOptarg();
58 break;
59 case 'c':
60 pdbChainCode = g.getOptarg();
61 break;
62 case 't':
63 ct = g.getOptarg();
64 break;
65 case 'r':
66 cross = true;
67 break;
68 case 'd':
69 cutoff1 = Double.valueOf(g.getOptarg());
70 break;
71 case 'D':
72 cutoff2 = Double.valueOf(g.getOptarg());
73 break;
74 case 'i':
75 cutoff3 = Double.valueOf(g.getOptarg());
76 break;
77 case 'b':
78 baseName = g.getOptarg();
79 break;
80 case 'o':
81 outputDir = g.getOptarg();
82 break;
83 case 'n':
84 n = Integer.parseInt(g.getOptarg());
85 break;
86 case 'h':
87 case '?':
88 System.out.println(help);
89 System.exit(0);
90 break; // getopt() already printed an error
91 }
92 }
93
94 if (pdbCode.equals("") || pdbChainCode.equals("") || ct.equals("") || cutoff1==0.0 || outputDir.equals("") || baseName.equals("")){
95 System.err.println("Must specify at least -p, -c, -t, -d, -o and -b");
96 System.err.println(help);
97 System.exit(1);
98 }
99
100 if (baseName.contains(".")) {
101 System.err.println("Basename can't contain a dot (not allowed by tinker). Exiting");
102 System.exit(1);
103 }
104
105 if (!AAinfo.isValidContactType(ct) || ct.contains("ALL")) {
106 System.err.println("Invalid contact type specified. Exiting");
107 System.exit(1);
108 }
109
110 if (n>999) {
111 System.err.println("Maximum number of models is 999. Specify a lower value. Exiting");
112 System.exit(1);
113 }
114
115 boolean doublecm = false;
116 String ct1 = ct;
117 String ct2 = ct;
118 String ct3 = null;
119 if (ct.contains("_")) {
120 ct1 = ct.split("_")[0];
121 ct2 = ct.split("_")[1];
122 doublecm = true;
123 }
124 if (cross) {
125 ct3 = ct1+"/"+ct2;
126 }
127
128 if (cutoff2==0.0) {
129 cutoff2 = cutoff1;
130 }
131 if (cutoff3==0.0) {
132 cutoff3 = cutoff1;
133 }
134
135 Pdb pdb = null;
136 try {
137 pdb = new PdbasePdb(pdbCode,pdbChainCode);
138 } catch (PdbaseInconsistencyError e) {
139 System.err.println("Pdbase inconsistency for structure "+pdbCode+". Can't continue, exiting");
140 System.exit(1);
141 } catch (PdbCodeNotFoundError e) {
142 System.err.println("Given pdb code "+pdbCode+" couldn't be found in pdbase. Exiting");
143 System.exit(1);
144 } catch (SQLException e) {
145 System.err.println("Problems connecting to database for getting pdb data for "+pdbCode+". Exiting");
146 System.exit(1);
147 } catch (PdbChainCodeNotFoundError e) {
148 System.err.println("Given pdb chain code "+pdbChainCode+" couldn't be found for pdb code "+pdbCode+". Exiting");
149 System.exit(1);
150 }
151 // we also write the file to the out dir so it can be used later for clustering rmsds etc.
152 File origPdbFile = new File (outputDir,baseName+".native.pdb");
153 try {
154 pdb.dump2pdbfile(origPdbFile.getAbsolutePath());
155 } catch (IOException e4) {
156 System.err.println("Couldn't write original pdb file "+origPdbFile.getAbsolutePath());
157 System.err.println("Continuing without it, this is not needed for the rest of the reconstruction process but only for post processing (e.g. comparing rmsds to original)");
158 }
159
160 String sequence = pdb.getSequence();
161
162 Graph graph1 = pdb.get_graph(ct1, cutoff1);
163 Graph graph2 = null;
164 Graph graph3 = null;
165 if (doublecm) {
166 graph2 = pdb.get_graph(ct2, cutoff2);
167 }
168 if (cross) {
169 graph3 = pdb.get_graph(ct3, cutoff3);
170 }
171
172
173 // defining files
174 File logFile = new File(outputDir,baseName+".tinker.log");
175 File prmFile = new File(PRMFILE);
176 File xyzFile = new File(outputDir,baseName+".xyz");
177 File seqFile = new File(outputDir,baseName+".seq");
178 File pdbFile = new File(outputDir,baseName+".pdb");
179 File keyFile = new File(outputDir,baseName+".key");
180 File reportFile = new File(outputDir,baseName+".report");
181
182 // creating TinkerRunner object
183 TinkerRunner tr = null;
184 try {
185 tr = new TinkerRunner(TINKERBINDIR,PRMFILE,logFile);
186 } catch (FileNotFoundException e3) {
187 System.err.println("Couldn't find tinker bin dir "+TINKERBINDIR+". Exiting");
188 System.exit(1);
189 }
190
191 // 1. run tinker's protein program
192 try {
193 tr.runProtein(sequence, outputDir, baseName );
194 } catch (IOException e2) {
195 System.err.println("Couldn't read file to run 'protein' "+seqFile.getAbsolutePath());
196 System.err.println("Exiting");
197 System.exit(1);
198 } catch (TinkerError e2) {
199 System.err.println("Tinker error while running 'protein', check log file "+logFile.getAbsolutePath()+". Exiting");
200 System.exit(1);
201 }
202
203 // 1a. convert xyz file to pdb to be able to map atom serials after
204 try {
205 tr.runXyzpdb(xyzFile, seqFile, pdbFile);
206 } catch (IOException e1) {
207 System.err.println("Couldn't read files "+xyzFile.getAbsolutePath()+" or "+seqFile.getAbsolutePath()+" or write to "+pdbFile.getAbsolutePath()+" for running 'xyzpdb'");
208 System.err.println("Exiting");
209 System.exit(1);
210 } catch (TinkerError e1) {
211 System.err.println("Tinker error while running xyzpdb, check log file "+logFile.getAbsolutePath()+". Exiting");
212 System.exit(1);
213 }
214
215 // 2. creating constraints into key file
216 ConstraintsMaker cm = null;
217 try {
218 cm = new ConstraintsMaker(pdbFile,xyzFile,prmFile,PRMTYPE,keyFile);
219 } catch (IOException e3) {
220 System.err.println("Couldn't read files "+xyzFile.getAbsolutePath()+", "+pdbFile.getAbsolutePath()+" or, "+prmFile.getAbsolutePath()+" write to "+keyFile.getAbsolutePath()+" for creating distance constraints");
221 System.err.println("Exiting");
222 System.exit(1);
223 } catch (PdbfileFormatError e3) {
224 System.err.println("pdb file "+pdbFile.getAbsolutePath()+" converted from "+xyzFile.getAbsolutePath()+" doesn't seem to be in the right format. Check log? ("+logFile.getAbsolutePath()+"). Exiting");
225 System.exit(1);
226 }
227
228 cm.createConstraints(graph1);
229 if (doublecm) cm.createConstraints(graph2);
230 if (cross) cm.createConstraints(graph3);
231
232 cm.closeKeyFile();
233
234 // 3. run tinker's distgeom
235 try {
236 System.out.println("Running distgeom...");
237 tr.runDistgeom(xyzFile, outputDir, baseName, n);
238 } catch (TinkerError e1) {
239 System.err.println(e1.getMessage());
240 System.err.println("Exiting");
241 System.exit(1);
242 } catch (IOException e1) {
243 System.err.println("Couldn't read files "+xyzFile.getAbsolutePath()+" or write 'distgeom' output files to output dir "+outputDir);
244 System.err.println("Exiting");
245 System.exit(1);
246 } catch(InterruptedException e) {
247 System.err.println("Distgeom was interrupted:" + e.getMessage());
248 System.err.println("Exiting.");
249 System.exit(1);
250 }
251
252 double[] err = tr.getErrorFunctionVal();
253 double[] mubv = tr.getMaxUpperBoundViol();
254 double[] mlbv = tr.getMaxLowerBoundViol();
255 double[] muv = tr.getMaxUpperViol();
256 double[] mlv = tr.getMaxLowerViol();
257 int[] nubv = tr.getNumUpperBoundViol();
258 int[] nlbv = tr.getNumLowerBoundViol();
259 int[] nuv = tr.getNumUpperViol();
260 int[] nlv = tr.getNumLowerViol();
261 double[] rbv = tr.getRmsBoundViol();
262 double[] rrv = tr.getRmsRestViol();
263
264
265 // 4. converting xyz output files to pdb files and calculating rmsds
266
267 double[] rmsds = new double[n+1];
268
269 for (int i = 1; i<=n; i++) {
270 String ext = new Formatter().format(".%03d",i).toString();
271 File outputXyzFile = new File(outputDir, baseName+ext);
272 File outputPdbFile = new File(outputDir, baseName+ext+".pdb");
273 try {
274 tr.runXyzpdb(outputXyzFile, seqFile, outputPdbFile);
275
276 Pdb outputPdb = new PdbfilePdb(outputPdbFile.getAbsolutePath(),"NULL");
277 rmsds[i] = pdb.rmsd(outputPdb, "Ca");
278
279 } catch (IOException e) {
280 System.err.println("Couldn't read file "+outputXyzFile.getAbsolutePath()+", or "+seqFile.getAbsolutePath()+", or write to "+outputPdbFile.getAbsolutePath()+" while converting with 'xyzpdb'");
281 System.err.println("Can't calculate rmsd for it");
282 } catch (TinkerError e) {
283 System.err.println("Tinker error while running 'xyzpdb' to convert"+outputXyzFile.getAbsolutePath()+", check log file "+logFile.getAbsolutePath());
284 System.err.println("Can't calculate rmsd for it");
285 }
286 catch (PdbfileFormatError e) {
287 System.err.println("Output pdb file "+outputPdbFile.getAbsolutePath()+" doesn't seem to be in the correcet format. Can't calculate rmsd for it");
288 } catch (PdbChainCodeNotFoundError e) {
289 // this shouldn't happen, chain code is hard coded, we throw stack trace and continue if it happens
290 e.printStackTrace();
291 } catch (ConformationsNotSameSizeError e) {
292 System.err.println(pdbFile.getAbsolutePath()+" and "+outputPdbFile.getAbsolutePath()+" don't have the same conformation size, can't calculate rmsd for them.");
293 }
294
295 tr.closeLog();
296 }
297
298
299 // 6. report
300 try {
301 PrintWriter reportOut = new PrintWriter(new FileOutputStream(reportFile));
302 reportOut.println("run_id\tcutoff\tcutoff2\tcutoff3\tct\tct2\tct3" +
303 "\tresult_id\terror_val\tup_bound_viol\tlow_bound_viol\tmax_bound_up\tmax_bound_low\trms_bound" +
304 "\tup_viol\tlow_viol\tmax_up\tmax_low\trms_viol\trmsd_to_orig");
305
306 for (int i=1;i<=n;i++){
307 String rmsd = String.format(Locale.US,"%6.3f",rmsds[i]);
308 String errStr = String.format(Locale.US, "%6.3f", err[i]);
309 reportOut.println(pdbCode+"_"+pdbChainCode+"\t"+cutoff1+"\t"+cutoff2+"\t"+cutoff3+"\t"+ct1+"\t"+ct2+"\t"+ct3+"\t"+
310 i+"\t"+errStr+"\t"+nubv[i]+"\t"+nlbv[i]+"\t"+mubv[i]+"\t"+mlbv[i]+"\t"+muv[i]+"\t"+mlv[i]+"\t"+rbv[i]+"\t"+
311 nuv[i]+"\t"+nlv[i]+"\t"+rrv[i]+"\t"+rmsd);
312 }
313 reportOut.close();
314 } catch (FileNotFoundException e) {
315 System.err.println("Couldn't write to report file "+reportFile.getAbsolutePath() +". Error: "+e.getMessage());
316 }
317
318 }
319
320 }