ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/testGraphAverager.java
Revision: 492
Committed: Wed Jan 2 13:18:57 2008 UTC (16 years, 9 months ago) by duarte
File size: 13347 byte(s)
Log Message:
Copied the aglappe-jung branch into trunk.

Line File contents
1 import graphAveraging.GraphAverager;
2
3 import java.io.*;
4 import java.util.*;
5 import proteinstructure.*;
6 import tinker.TinkerError;
7 import tinker.TinkerRunner;
8
9 public class testGraphAverager {
10
11 /**
12 * @param args
13 */
14 public static void main(String[] args) {
15 // Plan:
16 // use getopt to parse the following parameters
17 // - original structure (optional, for benchmarking)
18 // - file with list of structures for averaging (required)
19 // - contact type & distance cutoff (both required)
20 // - consensus edge threshold (defaults to 0.5?)
21 // - run tinker or not
22 // - parameters for tinker model picking (optional)
23 // - number of tinker models to output (only if run tinker is set, defaults to one)
24 // - output benchmark result (graph comparison or structure comparison, only if original structure is specified)
25 // - output file for resulting graph (optional)
26
27 // take pdb files from command line (later: use getopt)
28 // create trivial alignment (check alignment)
29 // create graphAverager
30 // create consensus graph
31 // compare with graph for native structure (first argument)
32
33 // do tinker reconstruction
34 // compare with native structure
35
36 // benchmarking:
37 // - use score of best model vs. native (either graph based or structure based)
38 // - use matlab to optimize input parameters
39 // - create matlab executable for optimizing command line parameters:
40 // * specify command line to be run and constant parameters
41 // * specify parameter ranges to be optimized (discrete numerical, continuous numerical with steps, discrete set)
42 // * specify regexp to parse result from output (stdout or file)
43 // * specify optimization procedure (use default matlab ones, e.g. each parameter separate, genetic algorithm, simul. annealing, ...)
44 // * specify convergence threshold or timeout
45 // * run on cluster?
46
47 // Input options:
48 // - edgeType
49 // - distCutoff
50 // - targetPdb
51 // - targetChainCode
52 // - list of predictions files
53 // - edge threshold
54 // - min seq separation (for benchmarking and reconstruction)
55
56 // Output options:
57 // -c write consensus graph
58 // -w write weighted average graph
59 // -t write target graph
60 // -p write result pdb file
61 // -a write accuracy of result vs. native
62 // -o write coverage of results vs. native
63 // -r write rmsd of result vs. native
64 // -b write table of benchmarking results
65 // -v verbose output
66
67 // read command line parameters
68 if(args.length < 5) {
69 System.out.println("Usage: testGraphAverager <targetPdbFile> <ChainCode> <ListOfPredictionFiles> <edgeThreshold> <minSeqSep>");
70 System.exit(1);
71 }
72
73 // constants
74 String maxClusterExecutable = "/project/StruPPi/bin/maxcluster";
75 String TINKERBINDIR = "/project/StruPPi/Software/tinker/bin";
76 String PRMFILE = "/project/StruPPi/Software/tinker/amber/amber99.prm";
77
78 // input variables
79 File targetFile = new File(args[0]);
80 File listFile = new File(args[2]);
81 String chainCode = args[1];
82 String thresholdStr = args[3];
83 String minSeqSepStr = args[4];
84
85 // parameters (should all become command line parameters eventually)
86 String contactType = "Cb"; // edge type for graph generation (and reconstruction)
87 double distCutoff = 8.0; // distance cutuff for graph generation (and reconstruction)
88 double graphAveragingThreshold = Double.parseDouble(thresholdStr); // take edges which occur in at least this fraction of models
89 int minSeqSep = Integer.parseInt(minSeqSepStr); // consider only edges with at least this sequence separation
90 String outConsensusGraphFile = "consensusGraph.cm"; // name of consensus graph file
91 String outAverageGraphFile = "averageGraph.cm"; // name of average graph file
92 String outTargetGraphFile = "targetGraph.cm"; // name of target graph file
93 String outStructFile = "predictedStructure.pdb"; // name of structure file
94 int numberOfTinkerModels = 100; // number of models to be reconstructed by Tinker
95
96 boolean verbose = false; // if false, only results will be written to stdout
97 boolean outputConsensusGraph = false; // whether to write the resulting consensus graph to a file
98 boolean outputAverageGraph = false; // whether to write the weighted average graph to a file
99 boolean outputTargetGraph = false; // whether to write the target graph to a file
100 boolean outputSingleLineResult = false; // whether to output graph measures of consensus graph
101 boolean outputFullResultTable = false; // whether to output a table with graph measures for all inputs
102 boolean doReconstruct = true; // whether to use tinker to create a 3D prediction
103 boolean outputSingleLine3dResult = true;
104 boolean outputPredictedStructure = false; // whether to write reconstructed structure to a file
105
106 // local variables
107 String targetFileName = targetFile.getAbsolutePath(); // target file name
108 Pdb target = null; // target structure
109 RIGraph targetGraph = null; // target graph
110 Vector<String> templateFileNames = new Vector<String>(); // vector of template file names
111 Vector<RIGraph> models = new Vector<RIGraph>(); // vector of template graphs
112 RIGraph averageGraph; // weighted graph with fraction of occurrance for each edge
113 RIGraph consensusGraph; // consensus graph after applying threshold
114
115 if(!targetFile.canRead()) {
116 System.err.println("Can not read from file " + targetFileName);
117 System.exit(1);
118 }
119 if(!listFile.canRead()) {
120 System.err.println("Can not read from file " + listFile.getAbsolutePath());
121 System.exit(1);
122 }
123
124 // read target graph
125 if(verbose) System.out.println("Reading input files...");
126 try {
127 target = new PdbfilePdb(targetFile.getAbsolutePath());
128 target.load(chainCode);
129 targetGraph = target.get_graph(contactType, distCutoff);
130 } catch(PdbLoadError e) {
131 System.err.println("Error while trying to load pdb data from file " + targetFile.getAbsolutePath()+", specific error: "+e.getMessage());
132 System.exit(1);
133 }
134
135 // read list of predictions
136 try {
137 BufferedReader in = new BufferedReader(new FileReader(listFile));
138 String line;
139 File file;
140 Pdb pdb;
141 RIGraph graph;
142 while ((line = in.readLine() ) != null) {
143 file = new File(line);
144 if(!file.canRead()) {
145 System.err.println("File " + line + " not found.");
146 System.exit(1);
147 } else {
148 templateFileNames.add(line);
149 try {
150 pdb = new PdbfilePdb(file.getAbsolutePath());
151 pdb.load(Pdb.NULL_CHAIN_CODE);
152 graph = pdb.get_graph(contactType, distCutoff);
153 models.add(graph);
154 if(verbose) System.out.print(".");
155 } catch(PdbLoadError e) {
156 System.err.println("Error while trying to load pdb data from file " + file.getAbsolutePath()+", specific error: "+e.getMessage());
157 System.exit(1);
158 }
159 }
160 }
161 in.close();
162
163 } catch(FileNotFoundException e) {
164 System.err.println("File " + listFile.getAbsolutePath() + " not found.");
165 System.exit(1);
166 } catch(IOException e) {
167 System.err.println("Error reading from file " + listFile.getAbsolutePath());
168 System.exit(1);
169 }
170 if(verbose) System.out.println();
171
172 // create trivial alignment and template graphs
173 TreeMap<String,String> sequences = new TreeMap<String, String>();
174 TreeMap<String, RIGraph> templateGraphs = new TreeMap<String, RIGraph>();
175 sequences.put(targetFile.getAbsolutePath(), targetGraph.getSequence());
176 for(int i=0; i < models.size(); i++) {
177 sequences.put(templateFileNames.get(i), models.get(i).getSequence());
178 templateGraphs.put(templateFileNames.get(i), models.get(i));
179 }
180 Alignment al = null;
181 try {
182 al = new Alignment(sequences);
183 } catch (AlignmentConstructionError e) {
184 System.err.println("Could not create alignment: " + e.getMessage());
185 }
186
187 // create GraphAverager
188 GraphAverager grav = new GraphAverager(targetGraph.getSequence(), al, templateGraphs, targetFile.getAbsolutePath());
189 if(verbose) System.out.println("Calculating average...");
190 consensusGraph = grav.getConsensusGraph(graphAveragingThreshold);
191 averageGraph = grav.getAverageGraph();
192
193 // compare consensus graph with target
194 PredEval eval = consensusGraph.evaluatePrediction(targetGraph,minSeqSep);
195
196 // generate result table (consensus vs. individual models)
197 ArrayList<PredEval> evals = new ArrayList<PredEval>();
198 String targetTitle = "Average";
199 eval.title = targetTitle;
200 evals.add(eval);
201 for (int i = 0; i < models.size(); i++) {
202 RIGraph g = models.get(i);
203 String title = templateFileNames.get(i).split("/")[templateFileNames.get(i).split("/").length-1];
204 eval = g.evaluatePrediction(targetGraph,minSeqSep);
205 eval.title = title;
206 evals.add(eval);
207 }
208 // sort results
209 Collections.sort(evals, new Comparator<PredEval>() {
210 public int compare(PredEval e, PredEval e2) {
211 return -((new Double(e.accuracy+e.coverage)).compareTo(new Double(e2.accuracy+e2.coverage)));
212 }
213 });
214 // find best model accuracy and coverage (excluding average graph)
215 double bestAcc;
216 double bestCov;
217 if(evals.get(1).title!=targetTitle) {
218 bestAcc = evals.get(1).accuracy;
219 bestCov = evals.get(1).coverage;
220 } else {
221 bestAcc = evals.get(2).accuracy;
222 bestCov = evals.get(2).coverage;
223 }
224
225 // print full result table
226 if(outputFullResultTable) {
227 // TODO: add gdt to native order by gdt (and number of models in header)
228 System.out.printf("Results (ContactType=%s DistCutoff=%.1f AveragingThreshold=%s MinSeqSep=%d):\n", contactType, distCutoff, thresholdStr,minSeqSep);
229 System.out.println();
230 System.out.print(" # "); PredEval.printHeaders();
231 int c = 1;
232 for(PredEval e:evals) {
233 if(e.title == targetTitle) System.out.println();
234 System.out.printf("%2d ",c++);
235 e.printRow();
236 if(e.title == targetTitle) System.out.println();
237 }
238 }
239
240 // print accuracy and coverage of consensus graph and best model vs. target graph
241 if(outputSingleLineResult) {
242 System.out.printf("%.2f\t%.2f\t%.2f\t%.2f\n",eval.accuracy, eval.coverage, bestAcc, bestCov); // picked_gdt, best_pred_gdt, best_templ_gdt
243 }
244
245 // write resulting consensus graph to file
246 if(outputConsensusGraph) {
247 try {
248 consensusGraph.write_graph_to_file(outConsensusGraphFile);
249 } catch (IOException e) {
250 // TODO Auto-generated catch block
251 e.printStackTrace();
252 }
253 }
254
255 // write weighted average graph to file
256 if(outputAverageGraph) {
257 try {
258 averageGraph.write_graph_to_file(outAverageGraphFile);
259 } catch (IOException e) {
260 // TODO Auto-generated catch block
261 e.printStackTrace();
262 }
263 }
264
265 // write target graph to file
266 if(outputTargetGraph) {
267 try{
268 targetGraph.write_graph_to_file(outTargetGraphFile);
269
270 } catch (IOException e) {
271 // TODO Auto-generated catch block
272 e.printStackTrace();
273 }
274 }
275
276 // run reconstruction for consensus graph
277 if(doReconstruct) {
278
279 RIGraph[] graphs = {consensusGraph};
280 String sequence = targetGraph.getSequence();
281
282 TinkerRunner tr = null;
283 try {
284 tr = new TinkerRunner(TINKERBINDIR, PRMFILE);
285 } catch (FileNotFoundException e3) {
286 System.err.println("Couldn't find tinker bin dir "+TINKERBINDIR+". Exiting");
287 System.exit(1);
288 }
289
290 try {
291 Pdb resultPdb = tr.reconstruct(sequence, graphs, numberOfTinkerModels);
292 if(outputPredictedStructure) {
293 resultPdb.dump2pdbfile(outStructFile);
294 System.err.println("Output of reconstruction written to " + outStructFile);
295 }
296 } catch (IOException e) {
297 System.err.println("Error while running Tinker reconstruction: " + e.getMessage());
298 } catch (TinkerError e) {
299 System.err.println("Error while running Tinker reconstruction: " + e.getMessage());
300 }
301
302 // benchmark 3D prediction
303
304 // compare all tinker models with real
305 double[] tinkerModelGdts = null;
306 try {
307 tinkerModelGdts = tr.getGdtsToNative(targetFileName, maxClusterExecutable);
308 } catch(IOException e) {
309 System.err.println("Error while calculating GDT scores: " + e.getMessage());
310 }
311 // find best gdt from all tinker models
312 double bestGdt = 0;
313 //int bestModel = 0;
314 for (int i = 0; i < tinkerModelGdts.length; i++) {
315 if(tinkerModelGdts[i] > bestGdt) {
316 //bestModel = i;
317 bestGdt = tinkerModelGdts[i];
318 }
319 }
320 // get gdt of picked tinker model (picked by least bound violations)
321 int pickedIdx = tr.pickByLeastBoundViols();
322 double pickedGdt = tinkerModelGdts[pickedIdx-1];
323
324 // get template gdt scores
325 //double[] templateGdtScores = new double[templateFileNames.size()];
326 double bestTemplateGdt = 0;
327 MaxClusterRunner maxCluster = null;
328 try {
329 maxCluster = new MaxClusterRunner(maxClusterExecutable);
330 for(String tpl:templateFileNames) {
331 double gdtScore = maxCluster.calculatePairwiseScore(tpl, targetFileName,MaxClusterRunner.ScoreType.GDT);
332 if(gdtScore > bestTemplateGdt) {
333 bestTemplateGdt = gdtScore;
334 }
335 }
336 } catch(IOException e) {
337 System.err.println("Error running maxcluster:" + e.getMessage());
338 }
339 if(outputSingleLine3dResult) {
340 // row output: best template gdt, best predicted gdt, picked predicted gdt
341 System.out.printf("%6.3f\t%6.3f\t%6.3f\n", bestGdt, pickedGdt, bestTemplateGdt);
342 }
343
344 }
345 }
346 }