1 |
duarte |
338 |
import gnu.getopt.Getopt; |
2 |
|
|
|
3 |
|
|
import java.io.File; |
4 |
|
|
import java.io.FileNotFoundException; |
5 |
duarte |
347 |
import java.io.FileOutputStream; |
6 |
duarte |
338 |
import java.io.IOException; |
7 |
duarte |
347 |
import java.io.PrintWriter; |
8 |
duarte |
338 |
import java.sql.SQLException; |
9 |
|
|
import java.util.Formatter; |
10 |
duarte |
349 |
import java.util.Locale; |
11 |
duarte |
338 |
|
12 |
duarte |
346 |
import proteinstructure.AAinfo; |
13 |
duarte |
338 |
import proteinstructure.ConformationsNotSameSizeError; |
14 |
duarte |
441 |
import proteinstructure.PdbLoadError; |
15 |
duarte |
420 |
import proteinstructure.RIGraph; |
16 |
duarte |
338 |
import proteinstructure.Pdb; |
17 |
|
|
import proteinstructure.PdbCodeNotFoundError; |
18 |
|
|
import proteinstructure.PdbasePdb; |
19 |
|
|
|
20 |
|
|
import tinker.TinkerError; |
21 |
|
|
import tinker.TinkerRunner; |
22 |
|
|
|
23 |
|
|
|
24 |
|
|
public class reconstruct { |
25 |
|
|
|
26 |
|
|
private static final String TINKERBINDIR = "/project/StruPPi/Software/tinker/bin"; |
27 |
|
|
private static final String PRMFILE = "/project/StruPPi/Software/tinker/amber/amber99.prm"; |
28 |
|
|
|
29 |
|
|
public static void main(String[] args) { |
30 |
|
|
|
31 |
|
|
String programName = reconstruct.class.getName(); |
32 |
|
|
String help = "Usage:\n" + |
33 |
duarte |
471 |
programName+" -p <pdb code> -c <pdb chain code> -t <contact_type> [-r] -d <distance cutoff 1> -D <distance cutoff 2> -i <distance cutoff 3> -b <base name> -o <output dir> [-n <number of models>] [-m <min range>] [-M <max range>] \n"; |
34 |
duarte |
338 |
|
35 |
|
|
String pdbCode = ""; |
36 |
|
|
String pdbChainCode = ""; |
37 |
|
|
String ct = ""; |
38 |
|
|
double cutoff1 = 0.0; |
39 |
|
|
double cutoff2 = 0.0; |
40 |
|
|
double cutoff3 = 0.0; |
41 |
|
|
String outputDir = ""; |
42 |
|
|
String baseName = ""; |
43 |
|
|
boolean cross = false; |
44 |
|
|
int n = 1; |
45 |
duarte |
393 |
//double forceConstant = 100.0; |
46 |
duarte |
471 |
int minRange = 0; |
47 |
|
|
int maxRange = 0; |
48 |
duarte |
393 |
|
49 |
duarte |
471 |
Getopt g = new Getopt(programName, args, "p:c:d:t:rb:o:d:D:i:n:m:M:h?"); |
50 |
duarte |
338 |
int c; |
51 |
|
|
while ((c = g.getopt()) != -1) { |
52 |
|
|
switch(c){ |
53 |
|
|
case 'p': |
54 |
|
|
pdbCode = g.getOptarg(); |
55 |
|
|
break; |
56 |
|
|
case 'c': |
57 |
|
|
pdbChainCode = g.getOptarg(); |
58 |
|
|
break; |
59 |
|
|
case 't': |
60 |
|
|
ct = g.getOptarg(); |
61 |
|
|
break; |
62 |
|
|
case 'r': |
63 |
|
|
cross = true; |
64 |
|
|
break; |
65 |
|
|
case 'd': |
66 |
|
|
cutoff1 = Double.valueOf(g.getOptarg()); |
67 |
|
|
break; |
68 |
|
|
case 'D': |
69 |
|
|
cutoff2 = Double.valueOf(g.getOptarg()); |
70 |
|
|
break; |
71 |
|
|
case 'i': |
72 |
|
|
cutoff3 = Double.valueOf(g.getOptarg()); |
73 |
|
|
break; |
74 |
|
|
case 'b': |
75 |
|
|
baseName = g.getOptarg(); |
76 |
|
|
break; |
77 |
|
|
case 'o': |
78 |
|
|
outputDir = g.getOptarg(); |
79 |
|
|
break; |
80 |
|
|
case 'n': |
81 |
|
|
n = Integer.parseInt(g.getOptarg()); |
82 |
duarte |
393 |
break; |
83 |
duarte |
471 |
case 'm': |
84 |
|
|
minRange = Integer.parseInt(g.getOptarg()); |
85 |
|
|
break; |
86 |
|
|
case 'M': |
87 |
|
|
maxRange = Integer.parseInt(g.getOptarg()); |
88 |
|
|
break; |
89 |
duarte |
393 |
//case 'f': |
90 |
|
|
// forceConstant = Double.valueOf(g.getOptarg()); |
91 |
|
|
// break; |
92 |
duarte |
338 |
case 'h': |
93 |
|
|
case '?': |
94 |
|
|
System.out.println(help); |
95 |
|
|
System.exit(0); |
96 |
|
|
break; // getopt() already printed an error |
97 |
|
|
} |
98 |
|
|
} |
99 |
|
|
|
100 |
|
|
if (pdbCode.equals("") || pdbChainCode.equals("") || ct.equals("") || cutoff1==0.0 || outputDir.equals("") || baseName.equals("")){ |
101 |
|
|
System.err.println("Must specify at least -p, -c, -t, -d, -o and -b"); |
102 |
|
|
System.err.println(help); |
103 |
|
|
System.exit(1); |
104 |
|
|
} |
105 |
|
|
|
106 |
duarte |
346 |
if (baseName.contains(".")) { |
107 |
|
|
System.err.println("Basename can't contain a dot (not allowed by tinker). Exiting"); |
108 |
|
|
System.exit(1); |
109 |
|
|
} |
110 |
|
|
|
111 |
|
|
if (!AAinfo.isValidContactType(ct) || ct.contains("ALL")) { |
112 |
|
|
System.err.println("Invalid contact type specified. Exiting"); |
113 |
|
|
System.exit(1); |
114 |
|
|
} |
115 |
|
|
|
116 |
|
|
if (n>999) { |
117 |
|
|
System.err.println("Maximum number of models is 999. Specify a lower value. Exiting"); |
118 |
|
|
System.exit(1); |
119 |
|
|
} |
120 |
|
|
|
121 |
duarte |
338 |
boolean doublecm = false; |
122 |
|
|
String ct1 = ct; |
123 |
|
|
String ct2 = ct; |
124 |
|
|
String ct3 = null; |
125 |
|
|
if (ct.contains("_")) { |
126 |
|
|
ct1 = ct.split("_")[0]; |
127 |
|
|
ct2 = ct.split("_")[1]; |
128 |
|
|
doublecm = true; |
129 |
|
|
} |
130 |
|
|
if (cross) { |
131 |
|
|
ct3 = ct1+"/"+ct2; |
132 |
|
|
} |
133 |
|
|
|
134 |
|
|
if (cutoff2==0.0) { |
135 |
|
|
cutoff2 = cutoff1; |
136 |
|
|
} |
137 |
|
|
if (cutoff3==0.0) { |
138 |
|
|
cutoff3 = cutoff1; |
139 |
|
|
} |
140 |
|
|
|
141 |
|
|
Pdb pdb = null; |
142 |
|
|
try { |
143 |
duarte |
441 |
pdb = new PdbasePdb(pdbCode); |
144 |
|
|
pdb.load(pdbChainCode); |
145 |
|
|
} catch (PdbLoadError e) { |
146 |
|
|
System.err.println("Error while loading pdb data. Specific error "+e.getMessage()); |
147 |
duarte |
338 |
System.exit(1); |
148 |
|
|
} catch (PdbCodeNotFoundError e) { |
149 |
|
|
System.err.println("Given pdb code "+pdbCode+" couldn't be found in pdbase. Exiting"); |
150 |
|
|
System.exit(1); |
151 |
|
|
} catch (SQLException e) { |
152 |
|
|
System.err.println("Problems connecting to database for getting pdb data for "+pdbCode+". Exiting"); |
153 |
|
|
System.exit(1); |
154 |
|
|
} |
155 |
duarte |
349 |
// we also write the file to the out dir so it can be used later for clustering rmsds etc. |
156 |
duarte |
367 |
File origPdbFile = new File (outputDir,baseName+".native.pdb"); |
157 |
duarte |
349 |
try { |
158 |
|
|
pdb.dump2pdbfile(origPdbFile.getAbsolutePath()); |
159 |
|
|
} catch (IOException e4) { |
160 |
|
|
System.err.println("Couldn't write original pdb file "+origPdbFile.getAbsolutePath()); |
161 |
|
|
System.err.println("Continuing without it, this is not needed for the rest of the reconstruction process but only for post processing (e.g. comparing rmsds to original)"); |
162 |
|
|
} |
163 |
|
|
|
164 |
duarte |
338 |
String sequence = pdb.getSequence(); |
165 |
|
|
|
166 |
duarte |
420 |
RIGraph[] graphs =null; |
167 |
|
|
RIGraph graph1 = pdb.get_graph(ct1, cutoff1); |
168 |
duarte |
471 |
if (maxRange>0) graph1.restrictContactsToMaxRange(maxRange); |
169 |
|
|
if (minRange>0) graph1.restrictContactsToMinRange(minRange); |
170 |
duarte |
420 |
RIGraph graph2 = null; |
171 |
|
|
RIGraph graph3 = null; |
172 |
duarte |
338 |
if (doublecm) { |
173 |
duarte |
471 |
graph2 = pdb.get_graph(ct2, cutoff2); |
174 |
|
|
if (maxRange>0) graph2.restrictContactsToMaxRange(maxRange); |
175 |
|
|
if (minRange>0) graph2.restrictContactsToMinRange(minRange); |
176 |
stehr |
379 |
if (cross) { |
177 |
|
|
graph3 = pdb.get_graph(ct3, cutoff3); |
178 |
duarte |
471 |
if (maxRange>0) graph3.restrictContactsToMaxRange(maxRange); |
179 |
|
|
if (minRange>0) graph3.restrictContactsToMinRange(minRange); |
180 |
duarte |
420 |
graphs = new RIGraph[3]; |
181 |
stehr |
379 |
graphs[0] = graph1; |
182 |
|
|
graphs[1] = graph2; |
183 |
|
|
graphs[2] = graph3; |
184 |
|
|
} else { |
185 |
duarte |
420 |
graphs = new RIGraph[2]; |
186 |
stehr |
379 |
graphs[0] = graph1; |
187 |
|
|
graphs[1] = graph2; |
188 |
|
|
} |
189 |
|
|
} else { |
190 |
duarte |
420 |
graphs = new RIGraph[1]; |
191 |
stehr |
379 |
graphs[0] = graph1; |
192 |
duarte |
338 |
} |
193 |
|
|
|
194 |
|
|
// defining files |
195 |
duarte |
347 |
File reportFile = new File(outputDir,baseName+".report"); |
196 |
duarte |
338 |
|
197 |
|
|
// creating TinkerRunner object |
198 |
stehr |
379 |
|
199 |
duarte |
338 |
TinkerRunner tr = null; |
200 |
|
|
try { |
201 |
stehr |
379 |
tr = new TinkerRunner(TINKERBINDIR, PRMFILE); |
202 |
duarte |
338 |
} catch (FileNotFoundException e3) { |
203 |
|
|
System.err.println("Couldn't find tinker bin dir "+TINKERBINDIR+". Exiting"); |
204 |
|
|
System.exit(1); |
205 |
|
|
} |
206 |
|
|
|
207 |
stehr |
379 |
// call reconstruction |
208 |
|
|
|
209 |
duarte |
338 |
try { |
210 |
stehr |
379 |
tr.reconstruct(sequence, graphs, n, outputDir, baseName, false); |
211 |
|
|
} catch (IOException e) { |
212 |
|
|
System.err.println("Error while running Tinker reconstruction: " + e.getMessage()); |
213 |
duarte |
485 |
System.exit(1); |
214 |
stehr |
379 |
} catch (TinkerError e) { |
215 |
|
|
System.err.println("Error while running Tinker reconstruction: " + e.getMessage()); |
216 |
duarte |
485 |
System.exit(1); |
217 |
duarte |
338 |
} |
218 |
stehr |
379 |
|
219 |
duarte |
368 |
double[] err = tr.getErrorFunctionVal(); |
220 |
duarte |
338 |
double[] mubv = tr.getMaxUpperBoundViol(); |
221 |
|
|
double[] mlbv = tr.getMaxLowerBoundViol(); |
222 |
|
|
double[] muv = tr.getMaxUpperViol(); |
223 |
|
|
double[] mlv = tr.getMaxLowerViol(); |
224 |
|
|
int[] nubv = tr.getNumUpperBoundViol(); |
225 |
|
|
int[] nlbv = tr.getNumLowerBoundViol(); |
226 |
|
|
int[] nuv = tr.getNumUpperViol(); |
227 |
|
|
int[] nlv = tr.getNumLowerViol(); |
228 |
|
|
double[] rbv = tr.getRmsBoundViol(); |
229 |
|
|
double[] rrv = tr.getRmsRestViol(); |
230 |
|
|
|
231 |
|
|
|
232 |
stehr |
379 |
// calculate rmsds |
233 |
duarte |
338 |
|
234 |
|
|
double[] rmsds = new double[n+1]; |
235 |
|
|
|
236 |
duarte |
347 |
for (int i = 1; i<=n; i++) { |
237 |
|
|
String ext = new Formatter().format(".%03d",i).toString(); |
238 |
|
|
File outputPdbFile = new File(outputDir, baseName+ext+".pdb"); |
239 |
|
|
try { |
240 |
stehr |
379 |
Pdb outputPdb = tr.getStructure(i); |
241 |
duarte |
347 |
rmsds[i] = pdb.rmsd(outputPdb, "Ca"); |
242 |
|
|
} |
243 |
stehr |
379 |
catch (TinkerError e) { |
244 |
duarte |
471 |
System.err.println("Error while trying to retrieve results from Tinker: "+ e.getMessage()); |
245 |
duarte |
347 |
} catch (ConformationsNotSameSizeError e) { |
246 |
stehr |
379 |
System.err.println(origPdbFile.getAbsolutePath()+" and "+outputPdbFile.getAbsolutePath()+" don't have the same conformation size, can't calculate rmsd for them."); |
247 |
duarte |
347 |
} |
248 |
|
|
} |
249 |
|
|
|
250 |
stehr |
379 |
// write report file |
251 |
duarte |
338 |
|
252 |
duarte |
347 |
try { |
253 |
|
|
PrintWriter reportOut = new PrintWriter(new FileOutputStream(reportFile)); |
254 |
duarte |
373 |
reportOut.println("run_id\tcutoff\tcutoff2\tcutoff3\tct\tct2\tct3\tnum_res" + |
255 |
|
|
"\tresult_id\terror_val" + |
256 |
|
|
"\tup_bound_viol\tlow_bound_viol\tmax_bound_up\tmax_bound_low\trms_bound" + |
257 |
duarte |
347 |
"\tup_viol\tlow_viol\tmax_up\tmax_low\trms_viol\trmsd_to_orig"); |
258 |
|
|
|
259 |
|
|
for (int i=1;i<=n;i++){ |
260 |
duarte |
368 |
String rmsd = String.format(Locale.US,"%6.3f",rmsds[i]); |
261 |
|
|
String errStr = String.format(Locale.US, "%6.3f", err[i]); |
262 |
duarte |
373 |
// run_id cutoff cutoff2 cutoff3 ct1 ct2 ct3 num_res |
263 |
|
|
reportOut.println(pdbCode+"_"+pdbChainCode+"\t"+cutoff1+"\t"+cutoff2+"\t"+cutoff3+"\t"+ct1+"\t"+ct2+"\t"+ct3+"\t"+sequence.length()+"\t"+ |
264 |
|
|
// result_id error_val |
265 |
|
|
i + "\t" + errStr + "\t" + |
266 |
|
|
// up_bound_viol low_bound_viol max_bound_up max_bound_low rms_bound |
267 |
|
|
nubv[i] + "\t" + nlbv[i] + "\t" + mubv[i] + "\t" + mlbv[i] + "\t" + rbv[i] + "\t" + |
268 |
|
|
// up_viol low_viol max_up max_low rms_viol |
269 |
|
|
nuv[i] + "\t" + nlv[i] + "\t" + muv[i] + "\t" + mlv[i] + "\t"+ rrv[i] + "\t"+ |
270 |
|
|
// rmsd_to_orig |
271 |
|
|
rmsd); |
272 |
duarte |
347 |
} |
273 |
|
|
reportOut.close(); |
274 |
|
|
} catch (FileNotFoundException e) { |
275 |
|
|
System.err.println("Couldn't write to report file "+reportFile.getAbsolutePath() +". Error: "+e.getMessage()); |
276 |
duarte |
338 |
} |
277 |
|
|
|
278 |
|
|
} |
279 |
|
|
|
280 |
|
|
} |