ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/genDbGraph.java
Revision: 514
Committed: Fri Jan 11 14:59:43 2008 UTC (16 years, 8 months ago) by filippis
File size: 14412 byte(s)
Log Message:
Latest scop and csa versions updated.
Line User Rev File contents
1 duarte 264 import gnu.getopt.Getopt;
2    
3     import java.io.BufferedReader;
4     import java.io.FileReader;
5     import java.io.IOException;
6     import java.sql.SQLException;
7    
8    
9 filippis 398 //import proteinstructure.CiffilePdb;
10 duarte 441 import proteinstructure.PdbLoadError;
11 duarte 420 import proteinstructure.RIGraph;
12 duarte 264 import proteinstructure.Pdb;
13     import proteinstructure.PdbCodeNotFoundError;
14     import proteinstructure.PdbasePdb;
15     import proteinstructure.PdbfilePdb;
16 filippis 412 import proteinstructure.SecStrucElement;
17 duarte 264 import tools.MySQLConnection;
18    
19    
20     public class genDbGraph {
21     /*------------------------------ constants ------------------------------*/
22    
23     public static final String PDB_DB = "pdbase";
24     public static final String DB_HOST = "white";
25     public static final String DB_USER = getUserName();
26     public static final String DB_PWD = "nieve";
27     public static final String DSSP_EXE = "/project/StruPPi/bin/dssp";
28     public static final String DSSP_PARAMS = "--";
29 filippis 396 public static final String NACCESS_EXE = "/project/StruPPi/bin/naccess";
30     public static final String NACCESS_PARAMS = "";
31 duarte 264
32     //public static double cutoff = 4.2;
33     //public static String edgeType = "ALL";
34    
35     /*---------------------------- private methods --------------------------*/
36     /**
37     * Get user name from operating system (for use as database username).
38     * */
39     private static String getUserName() {
40     String user = null;
41     user = System.getProperty("user.name");
42     if(user == null) {
43     System.err.println("Could not get user name from operating system.");
44     }
45     return user;
46     }
47    
48     public static void main(String[] args) throws IOException {
49    
50    
51     String help = "Usage, 3 options:\n" +
52 filippis 438 "1) genDbGraph -i <listfile> -d <distance_cutoff> -t <contact_type> [-r directed] -s <seq_sep> -o <output_db> [-D <pdbase_db>] [-m <mode>] \n" +
53     "2) genDbGraph -p <pdb_code> -c <chain_pdb_code> -d <distance_cutoff> -t <contact_type> [-r directed] -s <seq_sep> -o <output_db> [-D <pdbase_db>] [-m <mode>] \n" +
54     "3) genDbGraph -f <pdbfile> -c <chain_pdb_code> -d <distance_cutoff> -t <contact_type> [-r directed] -s <seq_sep> -o <output_db> [-m <mode>] \n" +
55 duarte 359 "\nA comma separated list of contact types and distance cutoffs can be given instead of just 1, e.g. -d 8.0,8.5 -t Ca,Cb will generate the graphs for Ca at 8.0 and for Cb at 8.5\n" +
56     "If only 1 contact type given and multiple cutoffs, graphs will be generated at all the cutoffs for the one contact type\n"+
57     "\nIn case 2) also a list of comma separated pdb codes and chain codes can be specified, e.g. -p 1bxy,1jos -c A,A\n" +
58 duarte 358 "\nIf pdbase_db not specified, the default pdbase will be used\n" +
59     "\nSecondary structure will be taken from pdbase database. If reading from pdb file and the pdb file is missing the secondary structure, then it will be assigned using dssp\n";
60 duarte 264
61     String listfile = "";
62     String[] pdbCodes = null;
63     String[] pdbChainCodes = null;
64     String pdbfile = "";
65     String pdbaseDb = PDB_DB;
66 duarte 359 String[] edgeTypes = null;
67     double[] cutoffs = null;
68 filippis 407 int[] seqseps = null;
69 filippis 438 boolean[] directed = null;
70 duarte 264 String outputDb = "";
71 filippis 396 String mode = "GRAPH";
72 duarte 264
73 filippis 438 Getopt g = new Getopt("genDbGraph", args, "i:p:c:f:d:t:r:s:o:D:m:h?");
74 duarte 264 int c;
75     while ((c = g.getopt()) != -1) {
76     switch(c){
77     case 'i':
78     listfile = g.getOptarg();
79     break;
80     case 'p':
81     pdbCodes = g.getOptarg().split(",");
82     break;
83     case 'c':
84     pdbChainCodes = g.getOptarg().split(",");
85     break;
86     case 'f':
87     pdbfile = g.getOptarg();
88     break;
89     case 'd':
90 duarte 359 String[] cutoffsStr = g.getOptarg().split(",");
91     cutoffs = new double[cutoffsStr.length];
92     for (int i =0;i<cutoffs.length;i++) {
93     cutoffs[i] = Double.valueOf(cutoffsStr[i]);
94     }
95 duarte 264 break;
96     case 't':
97 duarte 359 edgeTypes = g.getOptarg().split(",");
98 duarte 264 break;
99 filippis 438 case 'r':
100     String[] directedStr = g.getOptarg().split(",");
101     directed = new boolean[directedStr.length];
102     for (int i =0;i<directed.length;i++) {
103     directed[i] = Boolean.valueOf(directedStr[i]);
104     }
105     break;
106 filippis 407 case 's':
107     String[] seqsepsStr = g.getOptarg().split(",");
108     seqseps = new int[seqsepsStr.length];
109 filippis 503 for (int i=0;i<seqseps.length;i++) {
110 filippis 407 seqseps[i] = Integer.valueOf(seqsepsStr[i]);
111     }
112     break;
113 duarte 264 case 'o':
114     outputDb = g.getOptarg();
115     break;
116     case 'D':
117     pdbaseDb = g.getOptarg();
118     break;
119 filippis 396 case 'm':
120     mode = g.getOptarg();
121 filippis 398 break;
122 duarte 264 case 'h':
123     case '?':
124     System.out.println(help);
125     System.exit(0);
126     break; // getopt() already printed an error
127     }
128     }
129    
130 duarte 453 if (directed==null) {
131     // we set by default all directed to false
132     directed = new boolean[edgeTypes.length];
133     }
134    
135 duarte 359 if (outputDb.equals("") || edgeTypes==null || cutoffs==null) {
136     System.err.println("Some missing option\n");
137 duarte 264 System.err.println(help);
138     System.exit(1);
139     }
140 duarte 359 if (edgeTypes.length!=cutoffs.length && edgeTypes.length!=1) {
141     System.err.println("Not same number of contact types as cutoffs given\n");
142     System.err.println(help);
143     System.exit(1);
144     }
145 filippis 407 if (seqseps != null && edgeTypes.length!=seqseps.length) {
146     System.err.println("Not same number of contact types as sequence separations given\n");
147     System.err.println(help);
148     System.exit(1);
149     }
150 filippis 438 if (directed != null && edgeTypes.length!=directed.length) {
151     System.err.println("Not same number of contact types as directionalities given\n");
152     System.err.println(help);
153     System.exit(1);
154     }
155 duarte 264 if (listfile.equals("") && pdbCodes==null && pdbfile.equals("")){
156 duarte 359 System.err.println("Either a listfile, some pdb codes/chain codes or a pdbfile must be given\n");
157 duarte 264 System.err.println(help);
158     System.exit(1);
159     }
160     if ((!listfile.equals("") && pdbCodes!=null) || (!listfile.equals("") && !pdbfile.equals("")) || (pdbCodes!=null && !pdbfile.equals(""))) {
161 duarte 359 System.err.println("Options -p/-c, -i and -f/-c are exclusive. Use only one of them\n");
162 duarte 264 System.err.println(help);
163     System.exit(1);
164     }
165 filippis 396 if (!(mode.equals("GRAPH") || mode.equals("PDB") || mode.equals("BOTH"))) {
166     System.err.println("Allowed values for mode:GRAPH,PDB,BOTH.");
167     System.err.println(help);
168     System.exit(1);
169     }
170 duarte 359
171     // setting edgeTypes in case only 1 was given with multiple cutoffs
172     if (edgeTypes.length==1 && cutoffs.length>1) {
173     String edgeType = edgeTypes[0];
174     edgeTypes = new String[cutoffs.length];
175     for (int i=0;i<cutoffs.length;i++){
176     edgeTypes[i] = edgeType;
177     }
178     }
179 duarte 264
180    
181     MySQLConnection conn = null;
182    
183     try{
184     conn = new MySQLConnection(DB_HOST, DB_USER, DB_PWD);
185 filippis 407 conn.setSqlMode("NO_UNSIGNED_SUBTRACTION,TRADITIONAL");
186 duarte 264 } catch (Exception e) {
187     System.err.println("Error opening database connection. Exiting");
188     System.exit(1);
189     }
190    
191    
192     if (pdbfile.equals("")){
193    
194     if (!listfile.equals("")) {
195     BufferedReader fpdb = new BufferedReader(new FileReader(listfile));
196     String line = "";
197     int numLines = 0;
198     fpdb.mark(100000);
199     while ((line = fpdb.readLine() ) != null ) {
200 duarte 285 if (line.length()>0) numLines++;
201 duarte 264 }
202     fpdb.reset();
203     pdbCodes = new String[numLines];
204     pdbChainCodes = new String[numLines];
205     numLines = 0;
206     while ((line = fpdb.readLine() ) != null ) {
207 duarte 285 pdbCodes[numLines] = line.split("\\s+")[0].toLowerCase();
208     pdbChainCodes[numLines] = line.split("\\s+")[1];
209 duarte 264 numLines++;
210     }
211 duarte 358 fpdb.close();
212 duarte 264 }
213    
214     int numPdbs = 0;
215    
216     for (int i=0;i<pdbCodes.length;i++) {
217     String pdbCode = pdbCodes[i];
218     String pdbChainCode = pdbChainCodes[i];
219 filippis 412
220     boolean dssp = false, scop = false, naccess = false, consurf = false, ec = false, csa = false;
221     int numGraphs = 0;
222    
223 duarte 264 try {
224 filippis 412
225 duarte 359 System.out.println("Getting pdb data for "+pdbCode+"_"+pdbChainCode);
226 duarte 285
227 filippis 503 Pdb pdb = new PdbasePdb(pdbCode, pdbaseDb, conn);
228     //Pdb pdb = new CiffilePdb(new File("/project/StruPPi/BiO/DBd/PDB-REMEDIATED/data/structures/unzipped/all/mmCIF/"+pdbCode+".cif"), pdbChainCode);
229     pdb.load(pdbChainCode);
230     try {
231     pdb.runDssp(DSSP_EXE, DSSP_PARAMS, SecStrucElement.ReducedState.THREESTATE, SecStrucElement.ReducedState.THREESTATE);
232     //pdb.runDssp(DSSP_EXE, DSSP_PARAMS);
233     dssp = true;
234     } catch (Exception e) {
235     System.err.println(e.getMessage());
236     }
237 filippis 396 if (!mode.equals("GRAPH")) {
238 filippis 398 try {
239 filippis 514 pdb.checkScop("1.73", false);
240 filippis 412 scop = true;
241 filippis 398 } catch (Exception e) {
242     System.err.println(e.getMessage());
243     }
244     try {
245     int mistakes = pdb.checkConsurfHssp(false);
246     System.out.println("ConsurfHssp Mistakes:"+mistakes);
247 filippis 412 if (mistakes == 0) consurf = true;
248 filippis 398 } catch (Exception e) {
249     System.err.println(e.getMessage());
250 filippis 503 }
251 filippis 398 try {
252     pdb.checkEC(false);
253 filippis 412 ec = true;
254 filippis 398 } catch (Exception e) {
255     System.err.println(e.getMessage());
256     }
257     try {
258 filippis 514 int mistakes = pdb.checkCSA("2.2.7", false);
259 filippis 398 System.out.println("CSA Mistakes:"+mistakes);
260 filippis 412 if (mistakes == 0) csa = true;
261 filippis 398 } catch (Exception e) {
262     System.err.println(e.getMessage());
263 filippis 503 }/*
264     pdb.restrictToScopDomain("d1pjua2");
265     try {
266     pdb.runDssp(DSSP_EXE, DSSP_PARAMS, SecStrucElement.ReducedState.THREESTATE, SecStrucElement.ReducedState.THREESTATE);
267     //pdb.runDssp(DSSP_EXE, DSSP_PARAMS);
268     dssp = true;
269     } catch (Exception e) {
270     System.err.println(e.getMessage());
271     }*/
272     try {
273     pdb.runNaccess(NACCESS_EXE, NACCESS_PARAMS);
274     naccess = true;
275     } catch (Exception e) {
276     System.err.println(e.getMessage());
277 filippis 398 }
278 filippis 503
279 filippis 407 //pdb.writeToDb(conn,outputDb);
280     pdb.writeToDbFast(conn, outputDb);
281 filippis 396 }
282 duarte 359 // get graphs
283 filippis 396 if (!mode.equals("PDB")) {
284     for (int j = 0; j<edgeTypes.length; j++) {
285 filippis 438 System.out.print("--> "+(directed[j]?"directed":"")+" graph "+edgeTypes[j]+" for cutoff "+cutoffs[j]);
286 filippis 396
287 filippis 438 RIGraph graph = pdb.get_graph(edgeTypes[j], cutoffs[j], directed[j]);
288 filippis 503 //graph.restrictContactsBetweenSs();
289 filippis 407 if (seqseps != null) {
290     if (seqseps[j] > 1) {
291 filippis 411 System.out.print(" and sequence separation >= "+seqseps[j]);
292 filippis 407 graph.restrictContactsToMinRange(seqseps[j]);
293     }
294     }
295 duarte 420 //graph.writeToDb(conn,outputDb);
296 filippis 407 graph.write_graph_to_db_fast(conn,outputDb);
297 filippis 396
298 duarte 420 System.out.println();
299 filippis 396 numPdbs++;
300 filippis 412 numGraphs++;
301 filippis 396 }
302 duarte 359 }
303 duarte 285
304 duarte 441 } catch (PdbLoadError e) {
305     System.err.println("Error loading pdb data for " + pdbCode + pdbChainCode+", specific error: "+e.getMessage());
306 duarte 264 } catch (PdbCodeNotFoundError e) {
307 duarte 277 System.err.println("Couldn't find pdb code "+pdbCode);
308 duarte 264 } catch (SQLException e) {
309 duarte 285 System.err.println("SQL error for structure "+pdbCode+"_"+pdbChainCode+", error: "+e.getMessage());
310 duarte 441 }
311     /* catch (CiffileFormatError e) {
312 filippis 398 System.err.println(e.getMessage());
313     }*/
314 filippis 412
315     System.out.println("SUMMARY:"+pdbCode+"_"+pdbChainCode+" dssp:"+dssp+" scop:"+scop+" naccess:"+naccess+" consurf:"+consurf+" ec:"+ec+" csa:"+csa+ " graphs:"+numGraphs);
316 duarte 264
317     }
318    
319     // output results
320 duarte 359 System.out.println("Number of graphs loaded successfully: " + numPdbs);
321 duarte 264
322    
323     } else {
324     String pdbChainCode = pdbChainCodes[0];
325 filippis 412 boolean dssp = false, scop = false, naccess = false, consurf = false, ec = false, csa = false;
326     int numGraphs = 0;
327    
328 duarte 264 try {
329 duarte 359
330     System.out.println("Getting chain "+pdbChainCode+" from pdb file "+pdbfile);
331    
332 duarte 441 Pdb pdb = new PdbfilePdb(pdbfile);
333     pdb.load(pdbChainCode);
334 duarte 264 if (!pdb.hasSecondaryStructure()) {
335 filippis 503 pdb.runDssp(DSSP_EXE, DSSP_PARAMS, SecStrucElement.ReducedState.THREESTATE, SecStrucElement.ReducedState.THREESTATE);
336     //pdb.runDssp(DSSP_EXE, DSSP_PARAMS);
337 duarte 264 }
338 filippis 396 if (!mode.equals("GRAPH")) {
339 filippis 398 try {
340 filippis 412 pdb.runDssp(DSSP_EXE, DSSP_PARAMS, SecStrucElement.ReducedState.THREESTATE, SecStrucElement.ReducedState.THREESTATE);
341     //pdb.runDssp(DSSP_EXE, DSSP_PARAMS);
342     dssp = true;
343 filippis 398 } catch (Exception e) {
344     System.err.println(e.getMessage());
345     }
346     try {
347 filippis 514 pdb.checkScop("1.73", false);
348 filippis 412 scop = true;
349 filippis 398 } catch (Exception e) {
350     System.err.println(e.getMessage());
351     }
352     try {
353     int mistakes = pdb.checkConsurfHssp(false);
354     System.out.println("ConsurfHssp Mistakes:"+mistakes);
355 filippis 412 if (mistakes == 0) consurf = true;
356 filippis 398 } catch (Exception e) {
357     System.err.println(e.getMessage());
358     }
359     try {
360     pdb.checkEC(false);
361 filippis 412 ec = true;
362 filippis 398 } catch (Exception e) {
363     System.err.println(e.getMessage());
364     }
365     try {
366 filippis 514 int mistakes = pdb.checkCSA("2.2.7", false);
367 filippis 398 System.out.println("CSA Mistakes:"+mistakes);
368 filippis 412 if (mistakes == 0) csa = true;
369 filippis 398 } catch (Exception e) {
370     System.err.println(e.getMessage());
371     }
372 filippis 503 /*pdb.restrictToScopDomain("d1eaka3");
373     try {
374     pdb.runDssp(DSSP_EXE, DSSP_PARAMS, SecStrucElement.ReducedState.THREESTATE, SecStrucElement.ReducedState.THREESTATE);
375     //pdb.runDssp(DSSP_EXE, DSSP_PARAMS);
376     dssp = true;
377     } catch (Exception e) {
378     System.err.println(e.getMessage());
379     }*/
380     try {
381     pdb.runNaccess(NACCESS_EXE, NACCESS_PARAMS);
382     naccess = true;
383     } catch (Exception e) {
384     System.err.println(e.getMessage());
385     }
386    
387 filippis 412 //pdb.writeToDb(conn,outputDb);
388 filippis 411 pdb.writeToDbFast(conn, outputDb);
389 filippis 396 }
390 duarte 359
391     // get graphs
392 filippis 396 if (!mode.equals("PDB")) {
393     for (int j = 0; j<edgeTypes.length; j++) {
394 filippis 438 System.out.print("--> "+(directed[j]?"directed":"")+" graph "+edgeTypes[j]+" for cutoff "+cutoffs[j]);
395 filippis 396
396 filippis 438 RIGraph graph = pdb.get_graph(edgeTypes[j], cutoffs[j], directed[j]);
397 filippis 503 //graph.restrictContactsBetweenSs();
398 filippis 407 if (seqseps != null) {
399     if (seqseps[j] > 1) {
400 filippis 411 System.out.print(" and sequence separation >= "+seqseps[j]);
401 filippis 407 graph.restrictContactsToMinRange(seqseps[j]);
402     }
403     }
404 duarte 420 //graph.writeToDb(conn,outputDb);
405 filippis 407 graph.write_graph_to_db_fast(conn,outputDb);
406 filippis 411
407 filippis 412 System.out.println();
408     numGraphs++;
409 filippis 396 }
410 duarte 264 }
411    
412 duarte 359 } catch (SQLException e) {
413     System.err.println("Couldn't write graph to db, error: "+e.getMessage());
414 duarte 441 } catch (PdbLoadError e) {
415     System.err.println("Error loading from pdb file "+pdbfile+", specific error: "+e.getMessage());
416     }
417 filippis 412
418     System.out.println("SUMMARY:"+pdbfile+"_"+pdbChainCode+" dssp:"+dssp+" scop:"+scop+" naccess:"+naccess+" consurf:"+consurf+" ec:"+ec+" csa:"+csa+ " graphs:"+numGraphs);
419    
420 duarte 264 }
421 duarte 358
422     // closing db connection
423     conn.close();
424 duarte 264 }
425    
426     }