1 |
import gnu.getopt.Getopt; |
2 |
|
3 |
import java.io.BufferedReader; |
4 |
import java.io.FileReader; |
5 |
import java.io.IOException; |
6 |
import java.sql.SQLException; |
7 |
|
8 |
|
9 |
//import proteinstructure.CiffilePdb; |
10 |
import proteinstructure.PdbLoadError; |
11 |
import proteinstructure.RIGraph; |
12 |
import proteinstructure.Pdb; |
13 |
import proteinstructure.PdbCodeNotFoundError; |
14 |
import proteinstructure.PdbasePdb; |
15 |
import proteinstructure.PdbfilePdb; |
16 |
import proteinstructure.SecStrucElement; |
17 |
import tools.MySQLConnection; |
18 |
|
19 |
|
20 |
public class genDbGraph { |
21 |
/*------------------------------ constants ------------------------------*/ |
22 |
|
23 |
public static final String PDB_DB = "pdbase"; |
24 |
public static final String DB_HOST = "white"; |
25 |
public static final String DB_USER = getUserName(); |
26 |
public static final String DB_PWD = "nieve"; |
27 |
public static final String DSSP_EXE = "/project/StruPPi/bin/dssp"; |
28 |
public static final String DSSP_PARAMS = "--"; |
29 |
public static final String NACCESS_EXE = "/project/StruPPi/bin/naccess"; |
30 |
public static final String NACCESS_PARAMS = ""; |
31 |
|
32 |
//public static double cutoff = 4.2; |
33 |
//public static String edgeType = "ALL"; |
34 |
|
35 |
/*---------------------------- private methods --------------------------*/ |
36 |
/** |
37 |
* Get user name from operating system (for use as database username). |
38 |
* */ |
39 |
private static String getUserName() { |
40 |
String user = null; |
41 |
user = System.getProperty("user.name"); |
42 |
if(user == null) { |
43 |
System.err.println("Could not get user name from operating system."); |
44 |
} |
45 |
return user; |
46 |
} |
47 |
|
48 |
public static void main(String[] args) throws IOException { |
49 |
|
50 |
|
51 |
String help = "Usage, 3 options:\n" + |
52 |
"1) genDbGraph -i <listfile> -d <distance_cutoff> -t <contact_type> [-r directed] -s <seq_sep> -o <output_db> [-D <pdbase_db>] [-m <mode>] \n" + |
53 |
"2) genDbGraph -p <pdb_code> -c <chain_pdb_code> -d <distance_cutoff> -t <contact_type> [-r directed] -s <seq_sep> -o <output_db> [-D <pdbase_db>] [-m <mode>] \n" + |
54 |
"3) genDbGraph -f <pdbfile> -c <chain_pdb_code> -d <distance_cutoff> -t <contact_type> [-r directed] -s <seq_sep> -o <output_db> [-m <mode>] \n" + |
55 |
"\nA comma separated list of contact types and distance cutoffs can be given instead of just 1, e.g. -d 8.0,8.5 -t Ca,Cb will generate the graphs for Ca at 8.0 and for Cb at 8.5\n" + |
56 |
"If only 1 contact type given and multiple cutoffs, graphs will be generated at all the cutoffs for the one contact type\n"+ |
57 |
"\nIn case 2) also a list of comma separated pdb codes and chain codes can be specified, e.g. -p 1bxy,1jos -c A,A\n" + |
58 |
"\nIf pdbase_db not specified, the default pdbase will be used\n" + |
59 |
"\nSecondary structure will be taken from pdbase database. If reading from pdb file and the pdb file is missing the secondary structure, then it will be assigned using dssp\n"; |
60 |
|
61 |
String listfile = ""; |
62 |
String[] pdbCodes = null; |
63 |
String[] pdbChainCodes = null; |
64 |
String pdbfile = ""; |
65 |
String pdbaseDb = PDB_DB; |
66 |
String[] edgeTypes = null; |
67 |
double[] cutoffs = null; |
68 |
int[] seqseps = null; |
69 |
boolean[] directed = null; |
70 |
String outputDb = ""; |
71 |
String mode = "GRAPH"; |
72 |
|
73 |
Getopt g = new Getopt("genDbGraph", args, "i:p:c:f:d:t:r:s:o:D:m:h?"); |
74 |
int c; |
75 |
while ((c = g.getopt()) != -1) { |
76 |
switch(c){ |
77 |
case 'i': |
78 |
listfile = g.getOptarg(); |
79 |
break; |
80 |
case 'p': |
81 |
pdbCodes = g.getOptarg().split(","); |
82 |
break; |
83 |
case 'c': |
84 |
pdbChainCodes = g.getOptarg().split(","); |
85 |
break; |
86 |
case 'f': |
87 |
pdbfile = g.getOptarg(); |
88 |
break; |
89 |
case 'd': |
90 |
String[] cutoffsStr = g.getOptarg().split(","); |
91 |
cutoffs = new double[cutoffsStr.length]; |
92 |
for (int i =0;i<cutoffs.length;i++) { |
93 |
cutoffs[i] = Double.valueOf(cutoffsStr[i]); |
94 |
} |
95 |
break; |
96 |
case 't': |
97 |
edgeTypes = g.getOptarg().split(","); |
98 |
break; |
99 |
case 'r': |
100 |
String[] directedStr = g.getOptarg().split(","); |
101 |
directed = new boolean[directedStr.length]; |
102 |
for (int i =0;i<directed.length;i++) { |
103 |
directed[i] = Boolean.valueOf(directedStr[i]); |
104 |
} |
105 |
break; |
106 |
case 's': |
107 |
String[] seqsepsStr = g.getOptarg().split(","); |
108 |
seqseps = new int[seqsepsStr.length]; |
109 |
for (int i=0;i<seqseps.length;i++) { |
110 |
seqseps[i] = Integer.valueOf(seqsepsStr[i]); |
111 |
} |
112 |
break; |
113 |
case 'o': |
114 |
outputDb = g.getOptarg(); |
115 |
break; |
116 |
case 'D': |
117 |
pdbaseDb = g.getOptarg(); |
118 |
break; |
119 |
case 'm': |
120 |
mode = g.getOptarg(); |
121 |
break; |
122 |
case 'h': |
123 |
case '?': |
124 |
System.out.println(help); |
125 |
System.exit(0); |
126 |
break; // getopt() already printed an error |
127 |
} |
128 |
} |
129 |
|
130 |
if (directed==null) { |
131 |
// we set by default all directed to false |
132 |
directed = new boolean[edgeTypes.length]; |
133 |
} |
134 |
|
135 |
if (outputDb.equals("") || edgeTypes==null || cutoffs==null) { |
136 |
System.err.println("Some missing option\n"); |
137 |
System.err.println(help); |
138 |
System.exit(1); |
139 |
} |
140 |
if (edgeTypes.length!=cutoffs.length && edgeTypes.length!=1) { |
141 |
System.err.println("Not same number of contact types as cutoffs given\n"); |
142 |
System.err.println(help); |
143 |
System.exit(1); |
144 |
} |
145 |
if (seqseps != null && edgeTypes.length!=seqseps.length) { |
146 |
System.err.println("Not same number of contact types as sequence separations given\n"); |
147 |
System.err.println(help); |
148 |
System.exit(1); |
149 |
} |
150 |
if (directed != null && edgeTypes.length!=directed.length) { |
151 |
System.err.println("Not same number of contact types as directionalities given\n"); |
152 |
System.err.println(help); |
153 |
System.exit(1); |
154 |
} |
155 |
if (listfile.equals("") && pdbCodes==null && pdbfile.equals("")){ |
156 |
System.err.println("Either a listfile, some pdb codes/chain codes or a pdbfile must be given\n"); |
157 |
System.err.println(help); |
158 |
System.exit(1); |
159 |
} |
160 |
if ((!listfile.equals("") && pdbCodes!=null) || (!listfile.equals("") && !pdbfile.equals("")) || (pdbCodes!=null && !pdbfile.equals(""))) { |
161 |
System.err.println("Options -p/-c, -i and -f/-c are exclusive. Use only one of them\n"); |
162 |
System.err.println(help); |
163 |
System.exit(1); |
164 |
} |
165 |
if (!(mode.equals("GRAPH") || mode.equals("PDB") || mode.equals("BOTH"))) { |
166 |
System.err.println("Allowed values for mode:GRAPH,PDB,BOTH."); |
167 |
System.err.println(help); |
168 |
System.exit(1); |
169 |
} |
170 |
|
171 |
// setting edgeTypes in case only 1 was given with multiple cutoffs |
172 |
if (edgeTypes.length==1 && cutoffs.length>1) { |
173 |
String edgeType = edgeTypes[0]; |
174 |
edgeTypes = new String[cutoffs.length]; |
175 |
for (int i=0;i<cutoffs.length;i++){ |
176 |
edgeTypes[i] = edgeType; |
177 |
} |
178 |
} |
179 |
|
180 |
|
181 |
MySQLConnection conn = null; |
182 |
|
183 |
try{ |
184 |
conn = new MySQLConnection(DB_HOST, DB_USER, DB_PWD); |
185 |
conn.setSqlMode("NO_UNSIGNED_SUBTRACTION,TRADITIONAL"); |
186 |
} catch (Exception e) { |
187 |
System.err.println("Error opening database connection. Exiting"); |
188 |
System.exit(1); |
189 |
} |
190 |
|
191 |
|
192 |
if (pdbfile.equals("")){ |
193 |
|
194 |
if (!listfile.equals("")) { |
195 |
BufferedReader fpdb = new BufferedReader(new FileReader(listfile)); |
196 |
String line = ""; |
197 |
int numLines = 0; |
198 |
fpdb.mark(100000); |
199 |
while ((line = fpdb.readLine() ) != null ) { |
200 |
if (line.length()>0) numLines++; |
201 |
} |
202 |
fpdb.reset(); |
203 |
pdbCodes = new String[numLines]; |
204 |
pdbChainCodes = new String[numLines]; |
205 |
numLines = 0; |
206 |
while ((line = fpdb.readLine() ) != null ) { |
207 |
pdbCodes[numLines] = line.split("\\s+")[0].toLowerCase(); |
208 |
pdbChainCodes[numLines] = line.split("\\s+")[1]; |
209 |
numLines++; |
210 |
} |
211 |
fpdb.close(); |
212 |
} |
213 |
|
214 |
int numPdbs = 0; |
215 |
|
216 |
for (int i=0;i<pdbCodes.length;i++) { |
217 |
String pdbCode = pdbCodes[i]; |
218 |
String pdbChainCode = pdbChainCodes[i]; |
219 |
|
220 |
boolean dssp = false, scop = false, naccess = false, consurf = false, ec = false, csa = false; |
221 |
int numGraphs = 0; |
222 |
|
223 |
try { |
224 |
|
225 |
System.out.println("Getting pdb data for "+pdbCode+"_"+pdbChainCode); |
226 |
|
227 |
Pdb pdb = new PdbasePdb(pdbCode, pdbaseDb, conn); |
228 |
//Pdb pdb = new CiffilePdb(new File("/project/StruPPi/BiO/DBd/PDB-REMEDIATED/data/structures/unzipped/all/mmCIF/"+pdbCode+".cif"), pdbChainCode); |
229 |
pdb.load(pdbChainCode); |
230 |
try { |
231 |
pdb.runDssp(DSSP_EXE, DSSP_PARAMS, SecStrucElement.ReducedState.THREESTATE, SecStrucElement.ReducedState.THREESTATE); |
232 |
//pdb.runDssp(DSSP_EXE, DSSP_PARAMS); |
233 |
dssp = true; |
234 |
} catch (Exception e) { |
235 |
System.err.println(e.getMessage()); |
236 |
} |
237 |
if (!mode.equals("GRAPH")) { |
238 |
try { |
239 |
pdb.checkScop("1.71", false); |
240 |
scop = true; |
241 |
} catch (Exception e) { |
242 |
System.err.println(e.getMessage()); |
243 |
} |
244 |
try { |
245 |
int mistakes = pdb.checkConsurfHssp(false); |
246 |
System.out.println("ConsurfHssp Mistakes:"+mistakes); |
247 |
if (mistakes == 0) consurf = true; |
248 |
} catch (Exception e) { |
249 |
System.err.println(e.getMessage()); |
250 |
} |
251 |
try { |
252 |
pdb.checkEC(false); |
253 |
ec = true; |
254 |
} catch (Exception e) { |
255 |
System.err.println(e.getMessage()); |
256 |
} |
257 |
try { |
258 |
int mistakes = pdb.checkCSA("2.2.5", false); |
259 |
System.out.println("CSA Mistakes:"+mistakes); |
260 |
if (mistakes == 0) csa = true; |
261 |
} catch (Exception e) { |
262 |
System.err.println(e.getMessage()); |
263 |
}/* |
264 |
pdb.restrictToScopDomain("d1pjua2"); |
265 |
try { |
266 |
pdb.runDssp(DSSP_EXE, DSSP_PARAMS, SecStrucElement.ReducedState.THREESTATE, SecStrucElement.ReducedState.THREESTATE); |
267 |
//pdb.runDssp(DSSP_EXE, DSSP_PARAMS); |
268 |
dssp = true; |
269 |
} catch (Exception e) { |
270 |
System.err.println(e.getMessage()); |
271 |
}*/ |
272 |
try { |
273 |
pdb.runNaccess(NACCESS_EXE, NACCESS_PARAMS); |
274 |
naccess = true; |
275 |
} catch (Exception e) { |
276 |
System.err.println(e.getMessage()); |
277 |
} |
278 |
|
279 |
//pdb.writeToDb(conn,outputDb); |
280 |
pdb.writeToDbFast(conn, outputDb); |
281 |
} |
282 |
// get graphs |
283 |
if (!mode.equals("PDB")) { |
284 |
for (int j = 0; j<edgeTypes.length; j++) { |
285 |
System.out.print("--> "+(directed[j]?"directed":"")+" graph "+edgeTypes[j]+" for cutoff "+cutoffs[j]); |
286 |
|
287 |
RIGraph graph = pdb.get_graph(edgeTypes[j], cutoffs[j], directed[j]); |
288 |
//graph.restrictContactsBetweenSs(); |
289 |
if (seqseps != null) { |
290 |
if (seqseps[j] > 1) { |
291 |
System.out.print(" and sequence separation >= "+seqseps[j]); |
292 |
graph.restrictContactsToMinRange(seqseps[j]); |
293 |
} |
294 |
} |
295 |
//graph.writeToDb(conn,outputDb); |
296 |
graph.write_graph_to_db_fast(conn,outputDb); |
297 |
|
298 |
System.out.println(); |
299 |
numPdbs++; |
300 |
numGraphs++; |
301 |
} |
302 |
} |
303 |
|
304 |
} catch (PdbLoadError e) { |
305 |
System.err.println("Error loading pdb data for " + pdbCode + pdbChainCode+", specific error: "+e.getMessage()); |
306 |
} catch (PdbCodeNotFoundError e) { |
307 |
System.err.println("Couldn't find pdb code "+pdbCode); |
308 |
} catch (SQLException e) { |
309 |
System.err.println("SQL error for structure "+pdbCode+"_"+pdbChainCode+", error: "+e.getMessage()); |
310 |
} |
311 |
/* catch (CiffileFormatError e) { |
312 |
System.err.println(e.getMessage()); |
313 |
}*/ |
314 |
|
315 |
System.out.println("SUMMARY:"+pdbCode+"_"+pdbChainCode+" dssp:"+dssp+" scop:"+scop+" naccess:"+naccess+" consurf:"+consurf+" ec:"+ec+" csa:"+csa+ " graphs:"+numGraphs); |
316 |
|
317 |
} |
318 |
|
319 |
// output results |
320 |
System.out.println("Number of graphs loaded successfully: " + numPdbs); |
321 |
|
322 |
|
323 |
} else { |
324 |
String pdbChainCode = pdbChainCodes[0]; |
325 |
boolean dssp = false, scop = false, naccess = false, consurf = false, ec = false, csa = false; |
326 |
int numGraphs = 0; |
327 |
|
328 |
try { |
329 |
|
330 |
System.out.println("Getting chain "+pdbChainCode+" from pdb file "+pdbfile); |
331 |
|
332 |
Pdb pdb = new PdbfilePdb(pdbfile); |
333 |
pdb.load(pdbChainCode); |
334 |
if (!pdb.hasSecondaryStructure()) { |
335 |
pdb.runDssp(DSSP_EXE, DSSP_PARAMS, SecStrucElement.ReducedState.THREESTATE, SecStrucElement.ReducedState.THREESTATE); |
336 |
//pdb.runDssp(DSSP_EXE, DSSP_PARAMS); |
337 |
} |
338 |
if (!mode.equals("GRAPH")) { |
339 |
try { |
340 |
pdb.runDssp(DSSP_EXE, DSSP_PARAMS, SecStrucElement.ReducedState.THREESTATE, SecStrucElement.ReducedState.THREESTATE); |
341 |
//pdb.runDssp(DSSP_EXE, DSSP_PARAMS); |
342 |
dssp = true; |
343 |
} catch (Exception e) { |
344 |
System.err.println(e.getMessage()); |
345 |
} |
346 |
try { |
347 |
pdb.checkScop("1.71", false); |
348 |
scop = true; |
349 |
} catch (Exception e) { |
350 |
System.err.println(e.getMessage()); |
351 |
} |
352 |
try { |
353 |
int mistakes = pdb.checkConsurfHssp(false); |
354 |
System.out.println("ConsurfHssp Mistakes:"+mistakes); |
355 |
if (mistakes == 0) consurf = true; |
356 |
} catch (Exception e) { |
357 |
System.err.println(e.getMessage()); |
358 |
} |
359 |
try { |
360 |
pdb.checkEC(false); |
361 |
ec = true; |
362 |
} catch (Exception e) { |
363 |
System.err.println(e.getMessage()); |
364 |
} |
365 |
try { |
366 |
int mistakes = pdb.checkCSA("2.2.5", false); |
367 |
System.out.println("CSA Mistakes:"+mistakes); |
368 |
if (mistakes == 0) csa = true; |
369 |
} catch (Exception e) { |
370 |
System.err.println(e.getMessage()); |
371 |
} |
372 |
/*pdb.restrictToScopDomain("d1eaka3"); |
373 |
try { |
374 |
pdb.runDssp(DSSP_EXE, DSSP_PARAMS, SecStrucElement.ReducedState.THREESTATE, SecStrucElement.ReducedState.THREESTATE); |
375 |
//pdb.runDssp(DSSP_EXE, DSSP_PARAMS); |
376 |
dssp = true; |
377 |
} catch (Exception e) { |
378 |
System.err.println(e.getMessage()); |
379 |
}*/ |
380 |
try { |
381 |
pdb.runNaccess(NACCESS_EXE, NACCESS_PARAMS); |
382 |
naccess = true; |
383 |
} catch (Exception e) { |
384 |
System.err.println(e.getMessage()); |
385 |
} |
386 |
|
387 |
//pdb.writeToDb(conn,outputDb); |
388 |
pdb.writeToDbFast(conn, outputDb); |
389 |
} |
390 |
|
391 |
// get graphs |
392 |
if (!mode.equals("PDB")) { |
393 |
for (int j = 0; j<edgeTypes.length; j++) { |
394 |
System.out.print("--> "+(directed[j]?"directed":"")+" graph "+edgeTypes[j]+" for cutoff "+cutoffs[j]); |
395 |
|
396 |
RIGraph graph = pdb.get_graph(edgeTypes[j], cutoffs[j], directed[j]); |
397 |
//graph.restrictContactsBetweenSs(); |
398 |
if (seqseps != null) { |
399 |
if (seqseps[j] > 1) { |
400 |
System.out.print(" and sequence separation >= "+seqseps[j]); |
401 |
graph.restrictContactsToMinRange(seqseps[j]); |
402 |
} |
403 |
} |
404 |
//graph.writeToDb(conn,outputDb); |
405 |
graph.write_graph_to_db_fast(conn,outputDb); |
406 |
|
407 |
System.out.println(); |
408 |
numGraphs++; |
409 |
} |
410 |
} |
411 |
|
412 |
} catch (SQLException e) { |
413 |
System.err.println("Couldn't write graph to db, error: "+e.getMessage()); |
414 |
} catch (PdbLoadError e) { |
415 |
System.err.println("Error loading from pdb file "+pdbfile+", specific error: "+e.getMessage()); |
416 |
} |
417 |
|
418 |
System.out.println("SUMMARY:"+pdbfile+"_"+pdbChainCode+" dssp:"+dssp+" scop:"+scop+" naccess:"+naccess+" consurf:"+consurf+" ec:"+ec+" csa:"+csa+ " graphs:"+numGraphs); |
419 |
|
420 |
} |
421 |
|
422 |
// closing db connection |
423 |
conn.close(); |
424 |
} |
425 |
|
426 |
} |