1 |
duarte |
338 |
import gnu.getopt.Getopt; |
2 |
|
|
|
3 |
|
|
import java.io.File; |
4 |
|
|
import java.io.FileNotFoundException; |
5 |
|
|
import java.io.IOException; |
6 |
|
|
import java.sql.SQLException; |
7 |
|
|
import java.util.Formatter; |
8 |
|
|
|
9 |
|
|
import proteinstructure.ConformationsNotSameSizeError; |
10 |
|
|
import proteinstructure.Graph; |
11 |
|
|
import proteinstructure.Pdb; |
12 |
|
|
import proteinstructure.PdbChainCodeNotFoundError; |
13 |
|
|
import proteinstructure.PdbCodeNotFoundError; |
14 |
|
|
import proteinstructure.PdbaseInconsistencyError; |
15 |
|
|
import proteinstructure.PdbasePdb; |
16 |
|
|
import proteinstructure.PdbfileFormatError; |
17 |
|
|
import proteinstructure.PdbfilePdb; |
18 |
|
|
|
19 |
|
|
import tinker.ConstraintsMaker; |
20 |
|
|
import tinker.TinkerError; |
21 |
|
|
import tinker.TinkerRunner; |
22 |
|
|
|
23 |
|
|
|
24 |
|
|
public class reconstruct { |
25 |
|
|
|
26 |
|
|
|
27 |
|
|
private static final String TINKERBINDIR = "/project/StruPPi/Software/tinker/bin"; |
28 |
|
|
private static final String PRMFILE = "/project/StruPPi/Software/tinker/amber/amber99.prm"; |
29 |
|
|
private static final String PRMTYPE = "amber"; |
30 |
|
|
|
31 |
|
|
public static void main(String[] args) { |
32 |
|
|
|
33 |
|
|
String programName = reconstruct.class.getName(); |
34 |
|
|
String help = "Usage:\n" + |
35 |
|
|
programName+" -p <pdb code> -c <pdb chain code> -t <contact_type> [-r] -d <distance cutoff 1> -D <distance cutoff 2> -i <distance cutoff 3> -b <base name> -o <output dir> [-n <number of models>]\n"; |
36 |
|
|
|
37 |
|
|
String pdbCode = ""; |
38 |
|
|
String pdbChainCode = ""; |
39 |
|
|
String ct = ""; |
40 |
|
|
double cutoff1 = 0.0; |
41 |
|
|
double cutoff2 = 0.0; |
42 |
|
|
double cutoff3 = 0.0; |
43 |
|
|
String outputDir = ""; |
44 |
|
|
String baseName = ""; |
45 |
|
|
boolean cross = false; |
46 |
|
|
int n = 1; |
47 |
|
|
|
48 |
|
|
Getopt g = new Getopt(programName, args, "p:c:d:t:rb:o:d:D:i:n:h?"); |
49 |
|
|
int c; |
50 |
|
|
while ((c = g.getopt()) != -1) { |
51 |
|
|
switch(c){ |
52 |
|
|
case 'p': |
53 |
|
|
pdbCode = g.getOptarg(); |
54 |
|
|
break; |
55 |
|
|
case 'c': |
56 |
|
|
pdbChainCode = g.getOptarg(); |
57 |
|
|
break; |
58 |
|
|
case 't': |
59 |
|
|
ct = g.getOptarg(); |
60 |
|
|
break; |
61 |
|
|
case 'r': |
62 |
|
|
cross = true; |
63 |
|
|
break; |
64 |
|
|
case 'd': |
65 |
|
|
cutoff1 = Double.valueOf(g.getOptarg()); |
66 |
|
|
break; |
67 |
|
|
case 'D': |
68 |
|
|
cutoff2 = Double.valueOf(g.getOptarg()); |
69 |
|
|
break; |
70 |
|
|
case 'i': |
71 |
|
|
cutoff3 = Double.valueOf(g.getOptarg()); |
72 |
|
|
break; |
73 |
|
|
case 'b': |
74 |
|
|
baseName = g.getOptarg(); |
75 |
|
|
break; |
76 |
|
|
case 'o': |
77 |
|
|
outputDir = g.getOptarg(); |
78 |
|
|
break; |
79 |
|
|
case 'n': |
80 |
|
|
n = Integer.parseInt(g.getOptarg()); |
81 |
|
|
break; |
82 |
|
|
case 'h': |
83 |
|
|
case '?': |
84 |
|
|
System.out.println(help); |
85 |
|
|
System.exit(0); |
86 |
|
|
break; // getopt() already printed an error |
87 |
|
|
} |
88 |
|
|
} |
89 |
|
|
|
90 |
|
|
if (pdbCode.equals("") || pdbChainCode.equals("") || ct.equals("") || cutoff1==0.0 || outputDir.equals("") || baseName.equals("")){ |
91 |
|
|
System.err.println("Must specify at least -p, -c, -t, -d, -o and -b"); |
92 |
|
|
System.err.println(help); |
93 |
|
|
System.exit(1); |
94 |
|
|
} |
95 |
|
|
|
96 |
|
|
boolean doublecm = false; |
97 |
|
|
String ct1 = ct; |
98 |
|
|
String ct2 = ct; |
99 |
|
|
String ct3 = null; |
100 |
|
|
if (ct.contains("_")) { |
101 |
|
|
ct1 = ct.split("_")[0]; |
102 |
|
|
ct2 = ct.split("_")[1]; |
103 |
|
|
doublecm = true; |
104 |
|
|
} |
105 |
|
|
if (cross) { |
106 |
|
|
ct3 = ct1+"/"+ct2; |
107 |
|
|
} |
108 |
|
|
|
109 |
|
|
if (cutoff2==0.0) { |
110 |
|
|
cutoff2 = cutoff1; |
111 |
|
|
} |
112 |
|
|
if (cutoff3==0.0) { |
113 |
|
|
cutoff3 = cutoff1; |
114 |
|
|
} |
115 |
|
|
|
116 |
|
|
|
117 |
|
|
Pdb pdb = null; |
118 |
|
|
try { |
119 |
|
|
pdb = new PdbasePdb(pdbCode,pdbChainCode); |
120 |
|
|
} catch (PdbaseInconsistencyError e) { |
121 |
|
|
System.err.println("Pdbase inconsistency for structure "+pdbCode+". Can't continue, exiting"); |
122 |
|
|
System.exit(1); |
123 |
|
|
} catch (PdbCodeNotFoundError e) { |
124 |
|
|
System.err.println("Given pdb code "+pdbCode+" couldn't be found in pdbase. Exiting"); |
125 |
|
|
System.exit(1); |
126 |
|
|
} catch (SQLException e) { |
127 |
|
|
System.err.println("Problems connecting to database for getting pdb data for "+pdbCode+". Exiting"); |
128 |
|
|
System.exit(1); |
129 |
|
|
} catch (PdbChainCodeNotFoundError e) { |
130 |
|
|
System.err.println("Given pdb chain code "+pdbChainCode+" couldn't be found for pdb code "+pdbCode+". Exiting"); |
131 |
|
|
System.exit(1); |
132 |
|
|
} |
133 |
|
|
String sequence = pdb.getSequence(); |
134 |
|
|
|
135 |
|
|
Graph graph1 = pdb.get_graph(ct1, cutoff1); |
136 |
|
|
Graph graph2 = null; |
137 |
|
|
Graph graph3 = null; |
138 |
|
|
if (doublecm) { |
139 |
|
|
graph2 = pdb.get_graph(ct2, cutoff2); |
140 |
|
|
} |
141 |
|
|
if (cross) { |
142 |
|
|
graph3 = pdb.get_graph(ct3, cutoff3); |
143 |
|
|
} |
144 |
|
|
|
145 |
|
|
|
146 |
|
|
// defining files |
147 |
|
|
File logFile = new File(outputDir,"tinker.log"); |
148 |
|
|
File prmFile = new File(PRMFILE); |
149 |
|
|
File xyzFile = new File(outputDir,baseName+".xyz"); |
150 |
|
|
File seqFile = new File(outputDir,baseName+".seq"); |
151 |
|
|
File pdbFile = new File(outputDir,baseName+".pdb"); |
152 |
|
|
File keyFile = new File(outputDir,baseName+".key"); |
153 |
|
|
|
154 |
|
|
// creating TinkerRunner object |
155 |
|
|
TinkerRunner tr = null; |
156 |
|
|
try { |
157 |
|
|
tr = new TinkerRunner(TINKERBINDIR,PRMFILE,logFile); |
158 |
|
|
} catch (FileNotFoundException e3) { |
159 |
|
|
System.err.println("Couldn't find tinker bin dir "+TINKERBINDIR+". Exiting"); |
160 |
|
|
System.exit(1); |
161 |
|
|
} |
162 |
|
|
|
163 |
|
|
// 1. run tinker's protein program |
164 |
|
|
try { |
165 |
|
|
tr.runProtein(sequence, outputDir, baseName ); |
166 |
|
|
} catch (IOException e2) { |
167 |
|
|
System.err.println("Couldn't read file to run 'protein' "+seqFile.getAbsolutePath()); |
168 |
|
|
System.err.println("Exiting"); |
169 |
|
|
System.exit(1); |
170 |
|
|
} catch (TinkerError e2) { |
171 |
|
|
System.err.println("Tinker error while running 'protein', check log file "+logFile.getAbsolutePath()+". Exiting"); |
172 |
|
|
System.exit(1); |
173 |
|
|
} |
174 |
|
|
|
175 |
|
|
// 1a. convert xyz file to pdb to be able to map atom serials after |
176 |
|
|
try { |
177 |
|
|
tr.runXyzpdb(xyzFile, seqFile, pdbFile); |
178 |
|
|
} catch (IOException e1) { |
179 |
|
|
System.err.println("Couldn't read files "+xyzFile.getAbsolutePath()+" or "+seqFile.getAbsolutePath()+" or write to "+pdbFile.getAbsolutePath()+" for running 'xyzpdb'"); |
180 |
|
|
System.err.println("Exiting"); |
181 |
|
|
System.exit(1); |
182 |
|
|
} catch (TinkerError e1) { |
183 |
|
|
System.err.println("Tinker error while running xyzpdb, check log file "+logFile.getAbsolutePath()+". Exiting"); |
184 |
|
|
System.exit(1); |
185 |
|
|
} |
186 |
|
|
|
187 |
|
|
// 2. creating constraints into key file |
188 |
|
|
ConstraintsMaker cm = null; |
189 |
|
|
try { |
190 |
|
|
cm = new ConstraintsMaker(pdbFile,xyzFile,prmFile,PRMTYPE,keyFile); |
191 |
|
|
} catch (IOException e3) { |
192 |
|
|
System.err.println("Couldn't read files "+xyzFile.getAbsolutePath()+", "+pdbFile.getAbsolutePath()+" or, "+prmFile.getAbsolutePath()+" write to "+keyFile.getAbsolutePath()+" for creating distance constraints"); |
193 |
|
|
System.err.println("Exiting"); |
194 |
|
|
System.exit(1); |
195 |
|
|
} catch (PdbfileFormatError e3) { |
196 |
|
|
System.err.println("pdb file "+pdbFile.getAbsolutePath()+" converted from "+xyzFile.getAbsolutePath()+" doesn't seem to be in the right format. Check log? ("+logFile.getAbsolutePath()+"). Exiting"); |
197 |
|
|
System.exit(1); |
198 |
|
|
} |
199 |
|
|
try { |
200 |
|
|
cm.createConstraints(graph1); |
201 |
|
|
if (doublecm) cm.createConstraints(graph2); |
202 |
|
|
if (cross) cm.createConstraints(graph3); |
203 |
|
|
} catch (Exception e2) { |
204 |
|
|
System.err.println("Invalid contact type for writing constraints."); |
205 |
|
|
System.err.println("Error: "+e2.getMessage()); |
206 |
|
|
System.err.println("Exiting"); |
207 |
|
|
System.exit(1); |
208 |
|
|
} |
209 |
|
|
cm.closeKeyFile(); |
210 |
|
|
|
211 |
|
|
// 3. run tinker's distgeom |
212 |
|
|
try { |
213 |
|
|
System.out.println("Running distgeom..."); |
214 |
|
|
tr.runDistgeom(xyzFile, outputDir, baseName, n); |
215 |
|
|
} catch (TinkerError e1) { |
216 |
duarte |
340 |
System.err.println(e1.getMessage()); |
217 |
|
|
System.err.println("Exiting"); |
218 |
duarte |
338 |
System.exit(1); |
219 |
|
|
} catch (IOException e1) { |
220 |
|
|
System.err.println("Couldn't read files "+xyzFile.getAbsolutePath()+" or write 'distgeom' output files to output dir "+outputDir); |
221 |
|
|
System.err.println("Exiting"); |
222 |
|
|
System.exit(1); |
223 |
stehr |
344 |
} catch(InterruptedException e) { |
224 |
|
|
System.err.println("Distgeom was interrupted:" + e.getMessage()); |
225 |
|
|
System.err.println("Exiting."); |
226 |
|
|
System.exit(1); |
227 |
duarte |
338 |
} |
228 |
|
|
|
229 |
|
|
double[] mubv = tr.getMaxUpperBoundViol(); |
230 |
|
|
double[] mlbv = tr.getMaxLowerBoundViol(); |
231 |
|
|
double[] muv = tr.getMaxUpperViol(); |
232 |
|
|
double[] mlv = tr.getMaxLowerViol(); |
233 |
|
|
int[] nubv = tr.getNumUpperBoundViol(); |
234 |
|
|
int[] nlbv = tr.getNumLowerBoundViol(); |
235 |
|
|
int[] nuv = tr.getNumUpperViol(); |
236 |
|
|
int[] nlv = tr.getNumLowerViol(); |
237 |
|
|
double[] rbv = tr.getRmsBoundViol(); |
238 |
|
|
double[] rrv = tr.getRmsRestViol(); |
239 |
|
|
|
240 |
|
|
|
241 |
|
|
// 4. converting xyz output files to pdb files and calculating rmsds |
242 |
|
|
|
243 |
|
|
double[] rmsds = new double[n+1]; |
244 |
|
|
|
245 |
|
|
for (int i = 1; i<=n; i++) { |
246 |
|
|
String ext = new Formatter().format(".%03d",i).toString(); |
247 |
|
|
File outputXyzFile = new File(outputDir, baseName+ext); |
248 |
|
|
File outputPdbFile = new File(outputDir, baseName+ext+".pdb"); |
249 |
|
|
try { |
250 |
|
|
tr.runXyzpdb(outputXyzFile, seqFile, outputPdbFile); |
251 |
|
|
|
252 |
|
|
Pdb outputPdb = new PdbfilePdb(outputPdbFile.getAbsolutePath(),"NULL"); |
253 |
|
|
rmsds[i] = pdb.rmsd(outputPdb, "Ca"); |
254 |
|
|
|
255 |
|
|
} catch (IOException e) { |
256 |
|
|
System.err.println("Couldn't read file "+outputXyzFile.getAbsolutePath()+", or "+seqFile.getAbsolutePath()+", or write to "+outputPdbFile.getAbsolutePath()+" while converting with 'xyzpdb'"); |
257 |
|
|
System.err.println("Can't calculate rmsd for it"); |
258 |
|
|
} catch (TinkerError e) { |
259 |
|
|
System.err.println("Tinker error while running 'xyzpdb' to convert"+outputXyzFile.getAbsolutePath()+", check log file "+logFile.getAbsolutePath()); |
260 |
|
|
System.err.println("Can't calculate rmsd for it"); |
261 |
|
|
} |
262 |
|
|
catch (PdbfileFormatError e) { |
263 |
|
|
System.err.println("Output pdb file "+outputPdbFile.getAbsolutePath()+" doesn't seem to be in the correcet format. Can't calculate rmsd for it"); |
264 |
|
|
} catch (PdbChainCodeNotFoundError e) { |
265 |
|
|
// this shouldn't happen, chain code is hard coded, we throw stack trace and continue if it happens |
266 |
|
|
e.printStackTrace(); |
267 |
|
|
} catch (ConformationsNotSameSizeError e) { |
268 |
|
|
System.err.println(pdbFile.getAbsolutePath()+" and "+outputPdbFile.getAbsolutePath()+" don't have the same conformation size, can't calculate rmsd for them."); |
269 |
|
|
} |
270 |
|
|
|
271 |
|
|
tr.closeLog(); |
272 |
|
|
} |
273 |
|
|
|
274 |
|
|
|
275 |
|
|
// 6. report |
276 |
|
|
System.out.println("accession_code\tchain_pdb_code\tcutoff\tcutoff2\tcutoff3\tct\tct2\tct3" + |
277 |
|
|
"\tresult_id\tup_bound_viol\tlow_bound_viol\tmax_bound_up\tmax_bound_low\trms_bound" + |
278 |
|
|
"\tup_viol\tlow_viol\tmax_up\tmax_low\trms_viol\trmsd_to_orig"); |
279 |
|
|
|
280 |
|
|
for (int i=1;i<=n;i++){ |
281 |
|
|
System.out.println(pdbCode+"\t"+pdbChainCode+"\t"+cutoff1+"\t"+cutoff2+"\t"+cutoff3+"\t"+ct1+"\t"+ct2+"\t"+ct3+"\t"+ |
282 |
duarte |
340 |
i+"\t"+nubv[i]+"\t"+nlbv[i]+"\t"+mubv[i]+"\t"+mlbv[i]+"\t"+muv[i]+"\t"+mlv[i]+"\t"+rbv[i]+"\t"+ |
283 |
duarte |
338 |
"\t"+nuv[i]+"\t"+nlv[i]+"\t"+rrv[i]+"\t"+rmsds[i]); |
284 |
|
|
} |
285 |
|
|
|
286 |
|
|
} |
287 |
|
|
|
288 |
|
|
} |