ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/genDbGraph.java
Revision: 492
Committed: Wed Jan 2 13:18:57 2008 UTC (16 years, 8 months ago) by duarte
File size: 13527 byte(s)
Log Message:
Copied the aglappe-jung branch into trunk.

Line User Rev File contents
1 duarte 264 import gnu.getopt.Getopt;
2    
3     import java.io.BufferedReader;
4     import java.io.FileReader;
5     import java.io.IOException;
6     import java.sql.SQLException;
7    
8    
9 filippis 398 //import proteinstructure.CiffilePdb;
10 duarte 441 import proteinstructure.PdbLoadError;
11 duarte 420 import proteinstructure.RIGraph;
12 duarte 264 import proteinstructure.Pdb;
13     import proteinstructure.PdbCodeNotFoundError;
14     import proteinstructure.PdbasePdb;
15     import proteinstructure.PdbfilePdb;
16 filippis 412 import proteinstructure.SecStrucElement;
17 duarte 264 import tools.MySQLConnection;
18    
19    
20     public class genDbGraph {
21     /*------------------------------ constants ------------------------------*/
22    
23     public static final String PDB_DB = "pdbase";
24     public static final String DB_HOST = "white";
25     public static final String DB_USER = getUserName();
26     public static final String DB_PWD = "nieve";
27     public static final String DSSP_EXE = "/project/StruPPi/bin/dssp";
28     public static final String DSSP_PARAMS = "--";
29 filippis 396 public static final String NACCESS_EXE = "/project/StruPPi/bin/naccess";
30     public static final String NACCESS_PARAMS = "";
31 duarte 264
32     //public static double cutoff = 4.2;
33     //public static String edgeType = "ALL";
34    
35     /*---------------------------- private methods --------------------------*/
36     /**
37     * Get user name from operating system (for use as database username).
38     * */
39     private static String getUserName() {
40     String user = null;
41     user = System.getProperty("user.name");
42     if(user == null) {
43     System.err.println("Could not get user name from operating system.");
44     }
45     return user;
46     }
47    
48     public static void main(String[] args) throws IOException {
49    
50    
51     String help = "Usage, 3 options:\n" +
52 filippis 438 "1) genDbGraph -i <listfile> -d <distance_cutoff> -t <contact_type> [-r directed] -s <seq_sep> -o <output_db> [-D <pdbase_db>] [-m <mode>] \n" +
53     "2) genDbGraph -p <pdb_code> -c <chain_pdb_code> -d <distance_cutoff> -t <contact_type> [-r directed] -s <seq_sep> -o <output_db> [-D <pdbase_db>] [-m <mode>] \n" +
54     "3) genDbGraph -f <pdbfile> -c <chain_pdb_code> -d <distance_cutoff> -t <contact_type> [-r directed] -s <seq_sep> -o <output_db> [-m <mode>] \n" +
55 duarte 359 "\nA comma separated list of contact types and distance cutoffs can be given instead of just 1, e.g. -d 8.0,8.5 -t Ca,Cb will generate the graphs for Ca at 8.0 and for Cb at 8.5\n" +
56     "If only 1 contact type given and multiple cutoffs, graphs will be generated at all the cutoffs for the one contact type\n"+
57     "\nIn case 2) also a list of comma separated pdb codes and chain codes can be specified, e.g. -p 1bxy,1jos -c A,A\n" +
58 duarte 358 "\nIf pdbase_db not specified, the default pdbase will be used\n" +
59     "\nSecondary structure will be taken from pdbase database. If reading from pdb file and the pdb file is missing the secondary structure, then it will be assigned using dssp\n";
60 duarte 264
61     String listfile = "";
62     String[] pdbCodes = null;
63     String[] pdbChainCodes = null;
64     String pdbfile = "";
65     String pdbaseDb = PDB_DB;
66 duarte 359 String[] edgeTypes = null;
67     double[] cutoffs = null;
68 filippis 407 int[] seqseps = null;
69 filippis 438 boolean[] directed = null;
70 duarte 264 String outputDb = "";
71 filippis 396 String mode = "GRAPH";
72 duarte 264
73 filippis 438 Getopt g = new Getopt("genDbGraph", args, "i:p:c:f:d:t:r:s:o:D:m:h?");
74 duarte 264 int c;
75     while ((c = g.getopt()) != -1) {
76     switch(c){
77     case 'i':
78     listfile = g.getOptarg();
79     break;
80     case 'p':
81     pdbCodes = g.getOptarg().split(",");
82     break;
83     case 'c':
84     pdbChainCodes = g.getOptarg().split(",");
85     break;
86     case 'f':
87     pdbfile = g.getOptarg();
88     break;
89     case 'd':
90 duarte 359 String[] cutoffsStr = g.getOptarg().split(",");
91     cutoffs = new double[cutoffsStr.length];
92     for (int i =0;i<cutoffs.length;i++) {
93     cutoffs[i] = Double.valueOf(cutoffsStr[i]);
94     }
95 duarte 264 break;
96     case 't':
97 duarte 359 edgeTypes = g.getOptarg().split(",");
98 duarte 264 break;
99 filippis 438 case 'r':
100     String[] directedStr = g.getOptarg().split(",");
101     directed = new boolean[directedStr.length];
102     for (int i =0;i<directed.length;i++) {
103     directed[i] = Boolean.valueOf(directedStr[i]);
104     }
105     break;
106 filippis 407 case 's':
107     String[] seqsepsStr = g.getOptarg().split(",");
108     seqseps = new int[seqsepsStr.length];
109     for (int i =0;i<seqseps.length;i++) {
110     seqseps[i] = Integer.valueOf(seqsepsStr[i]);
111     }
112     break;
113 duarte 264 case 'o':
114     outputDb = g.getOptarg();
115     break;
116     case 'D':
117     pdbaseDb = g.getOptarg();
118     break;
119 filippis 396 case 'm':
120     mode = g.getOptarg();
121 filippis 398 break;
122 duarte 264 case 'h':
123     case '?':
124     System.out.println(help);
125     System.exit(0);
126     break; // getopt() already printed an error
127     }
128     }
129    
130 duarte 453 if (directed==null) {
131     // we set by default all directed to false
132     directed = new boolean[edgeTypes.length];
133     }
134    
135 duarte 359 if (outputDb.equals("") || edgeTypes==null || cutoffs==null) {
136     System.err.println("Some missing option\n");
137 duarte 264 System.err.println(help);
138     System.exit(1);
139     }
140 duarte 359 if (edgeTypes.length!=cutoffs.length && edgeTypes.length!=1) {
141     System.err.println("Not same number of contact types as cutoffs given\n");
142     System.err.println(help);
143     System.exit(1);
144     }
145 filippis 407 if (seqseps != null && edgeTypes.length!=seqseps.length) {
146     System.err.println("Not same number of contact types as sequence separations given\n");
147     System.err.println(help);
148     System.exit(1);
149     }
150 filippis 438 if (directed != null && edgeTypes.length!=directed.length) {
151     System.err.println("Not same number of contact types as directionalities given\n");
152     System.err.println(help);
153     System.exit(1);
154     }
155 duarte 264 if (listfile.equals("") && pdbCodes==null && pdbfile.equals("")){
156 duarte 359 System.err.println("Either a listfile, some pdb codes/chain codes or a pdbfile must be given\n");
157 duarte 264 System.err.println(help);
158     System.exit(1);
159     }
160     if ((!listfile.equals("") && pdbCodes!=null) || (!listfile.equals("") && !pdbfile.equals("")) || (pdbCodes!=null && !pdbfile.equals(""))) {
161 duarte 359 System.err.println("Options -p/-c, -i and -f/-c are exclusive. Use only one of them\n");
162 duarte 264 System.err.println(help);
163     System.exit(1);
164     }
165 filippis 396 if (!(mode.equals("GRAPH") || mode.equals("PDB") || mode.equals("BOTH"))) {
166     System.err.println("Allowed values for mode:GRAPH,PDB,BOTH.");
167     System.err.println(help);
168     System.exit(1);
169     }
170 duarte 359
171     // setting edgeTypes in case only 1 was given with multiple cutoffs
172     if (edgeTypes.length==1 && cutoffs.length>1) {
173     String edgeType = edgeTypes[0];
174     edgeTypes = new String[cutoffs.length];
175     for (int i=0;i<cutoffs.length;i++){
176     edgeTypes[i] = edgeType;
177     }
178     }
179 duarte 264
180    
181     MySQLConnection conn = null;
182    
183     try{
184     conn = new MySQLConnection(DB_HOST, DB_USER, DB_PWD);
185 filippis 407 conn.setSqlMode("NO_UNSIGNED_SUBTRACTION,TRADITIONAL");
186 duarte 264 } catch (Exception e) {
187     System.err.println("Error opening database connection. Exiting");
188     System.exit(1);
189     }
190    
191    
192     if (pdbfile.equals("")){
193    
194     if (!listfile.equals("")) {
195     BufferedReader fpdb = new BufferedReader(new FileReader(listfile));
196     String line = "";
197     int numLines = 0;
198     fpdb.mark(100000);
199     while ((line = fpdb.readLine() ) != null ) {
200 duarte 285 if (line.length()>0) numLines++;
201 duarte 264 }
202     fpdb.reset();
203     pdbCodes = new String[numLines];
204     pdbChainCodes = new String[numLines];
205     numLines = 0;
206     while ((line = fpdb.readLine() ) != null ) {
207 duarte 285 pdbCodes[numLines] = line.split("\\s+")[0].toLowerCase();
208     pdbChainCodes[numLines] = line.split("\\s+")[1];
209 duarte 264 numLines++;
210     }
211 duarte 358 fpdb.close();
212 duarte 264 }
213    
214     int numPdbs = 0;
215    
216     for (int i=0;i<pdbCodes.length;i++) {
217     String pdbCode = pdbCodes[i];
218     String pdbChainCode = pdbChainCodes[i];
219 filippis 412
220     boolean dssp = false, scop = false, naccess = false, consurf = false, ec = false, csa = false;
221     int numGraphs = 0;
222    
223 duarte 264 try {
224 filippis 412
225 duarte 359 System.out.println("Getting pdb data for "+pdbCode+"_"+pdbChainCode);
226 duarte 285
227 duarte 441 Pdb pdb = new PdbasePdb(pdbCode, pdbaseDb, conn);
228     pdb.load(pdbChainCode);
229 filippis 398 //Pdb pdb = new CiffilePdb(new File("/project/StruPPi/BiO/DBd/PDB-REMEDIATED/data/structures/unzipped/all/mmCIF/"+pdbCode+".cif"), pdbChainCode);
230 filippis 396 if (!mode.equals("GRAPH")) {
231 filippis 398 try {
232 filippis 412 pdb.runDssp(DSSP_EXE, DSSP_PARAMS, SecStrucElement.ReducedState.THREESTATE, SecStrucElement.ReducedState.THREESTATE);
233     //pdb.runDssp(DSSP_EXE, DSSP_PARAMS);
234     dssp = true;
235 filippis 398 } catch (Exception e) {
236     System.err.println(e.getMessage());
237     }
238     try {
239     pdb.checkScop("1.71", false);
240 filippis 412 scop = true;
241 filippis 398 } catch (Exception e) {
242     System.err.println(e.getMessage());
243     }
244     try {
245     pdb.runNaccess(NACCESS_EXE, NACCESS_PARAMS);
246 filippis 412 naccess = true;
247 filippis 398 } catch (Exception e) {
248     System.err.println(e.getMessage());
249     }
250     try {
251     int mistakes = pdb.checkConsurfHssp(false);
252     System.out.println("ConsurfHssp Mistakes:"+mistakes);
253 filippis 412 if (mistakes == 0) consurf = true;
254 filippis 398 } catch (Exception e) {
255     System.err.println(e.getMessage());
256     }
257     try {
258     pdb.checkEC(false);
259 filippis 412 ec = true;
260 filippis 398 } catch (Exception e) {
261     System.err.println(e.getMessage());
262     }
263     try {
264     int mistakes = pdb.checkCSA("2.2.5", false);
265     System.out.println("CSA Mistakes:"+mistakes);
266 filippis 412 if (mistakes == 0) csa = true;
267 filippis 398 } catch (Exception e) {
268     System.err.println(e.getMessage());
269     }
270 filippis 407 //pdb.writeToDb(conn,outputDb);
271     pdb.writeToDbFast(conn, outputDb);
272 filippis 396 }
273 duarte 359 // get graphs
274 filippis 396 if (!mode.equals("PDB")) {
275     for (int j = 0; j<edgeTypes.length; j++) {
276 filippis 438 System.out.print("--> "+(directed[j]?"directed":"")+" graph "+edgeTypes[j]+" for cutoff "+cutoffs[j]);
277 filippis 396
278 filippis 438 RIGraph graph = pdb.get_graph(edgeTypes[j], cutoffs[j], directed[j]);
279 filippis 407 if (seqseps != null) {
280     if (seqseps[j] > 1) {
281 filippis 411 System.out.print(" and sequence separation >= "+seqseps[j]);
282 filippis 407 graph.restrictContactsToMinRange(seqseps[j]);
283     }
284     }
285 duarte 420 //graph.writeToDb(conn,outputDb);
286 filippis 407 graph.write_graph_to_db_fast(conn,outputDb);
287 filippis 396
288 duarte 420 System.out.println();
289 filippis 396 numPdbs++;
290 filippis 412 numGraphs++;
291 filippis 396 }
292 duarte 359 }
293 duarte 285
294 duarte 441 } catch (PdbLoadError e) {
295     System.err.println("Error loading pdb data for " + pdbCode + pdbChainCode+", specific error: "+e.getMessage());
296 duarte 264 } catch (PdbCodeNotFoundError e) {
297 duarte 277 System.err.println("Couldn't find pdb code "+pdbCode);
298 duarte 264 } catch (SQLException e) {
299 duarte 285 System.err.println("SQL error for structure "+pdbCode+"_"+pdbChainCode+", error: "+e.getMessage());
300 duarte 441 }
301     /* catch (CiffileFormatError e) {
302 filippis 398 System.err.println(e.getMessage());
303     }*/
304 filippis 412
305     System.out.println("SUMMARY:"+pdbCode+"_"+pdbChainCode+" dssp:"+dssp+" scop:"+scop+" naccess:"+naccess+" consurf:"+consurf+" ec:"+ec+" csa:"+csa+ " graphs:"+numGraphs);
306 duarte 264
307     }
308    
309     // output results
310 duarte 359 System.out.println("Number of graphs loaded successfully: " + numPdbs);
311 duarte 264
312    
313     } else {
314     String pdbChainCode = pdbChainCodes[0];
315 filippis 412 boolean dssp = false, scop = false, naccess = false, consurf = false, ec = false, csa = false;
316     int numGraphs = 0;
317    
318 duarte 264 try {
319 duarte 359
320     System.out.println("Getting chain "+pdbChainCode+" from pdb file "+pdbfile);
321    
322 duarte 441 Pdb pdb = new PdbfilePdb(pdbfile);
323     pdb.load(pdbChainCode);
324 duarte 264 if (!pdb.hasSecondaryStructure()) {
325     pdb.runDssp(DSSP_EXE, DSSP_PARAMS);
326     }
327 filippis 396 if (!mode.equals("GRAPH")) {
328 filippis 398 try {
329 filippis 412 pdb.runDssp(DSSP_EXE, DSSP_PARAMS, SecStrucElement.ReducedState.THREESTATE, SecStrucElement.ReducedState.THREESTATE);
330     //pdb.runDssp(DSSP_EXE, DSSP_PARAMS);
331     dssp = true;
332 filippis 398 } catch (Exception e) {
333     System.err.println(e.getMessage());
334     }
335     try {
336     pdb.checkScop("1.71", false);
337 filippis 412 scop = true;
338 filippis 398 } catch (Exception e) {
339     System.err.println(e.getMessage());
340     }
341     try {
342     pdb.runNaccess(NACCESS_EXE, NACCESS_PARAMS);
343 filippis 412 naccess = true;
344 filippis 398 } catch (Exception e) {
345     System.err.println(e.getMessage());
346     }
347     try {
348     int mistakes = pdb.checkConsurfHssp(false);
349     System.out.println("ConsurfHssp Mistakes:"+mistakes);
350 filippis 412 if (mistakes == 0) consurf = true;
351 filippis 398 } catch (Exception e) {
352     System.err.println(e.getMessage());
353     }
354     try {
355     pdb.checkEC(false);
356 filippis 412 ec = true;
357 filippis 398 } catch (Exception e) {
358     System.err.println(e.getMessage());
359     }
360     try {
361     int mistakes = pdb.checkCSA("2.2.5", false);
362     System.out.println("CSA Mistakes:"+mistakes);
363 filippis 412 if (mistakes == 0) csa = true;
364 filippis 398 } catch (Exception e) {
365     System.err.println(e.getMessage());
366     }
367 filippis 412 //pdb.writeToDb(conn,outputDb);
368 filippis 411 pdb.writeToDbFast(conn, outputDb);
369 filippis 396 }
370 duarte 359
371     // get graphs
372 filippis 396 if (!mode.equals("PDB")) {
373     for (int j = 0; j<edgeTypes.length; j++) {
374 filippis 438 System.out.print("--> "+(directed[j]?"directed":"")+" graph "+edgeTypes[j]+" for cutoff "+cutoffs[j]);
375 filippis 396
376 filippis 438 RIGraph graph = pdb.get_graph(edgeTypes[j], cutoffs[j], directed[j]);
377 filippis 407 if (seqseps != null) {
378     if (seqseps[j] > 1) {
379 filippis 411 System.out.print(" and sequence separation >= "+seqseps[j]);
380 filippis 407 graph.restrictContactsToMinRange(seqseps[j]);
381     }
382     }
383 duarte 420 //graph.writeToDb(conn,outputDb);
384 filippis 407 graph.write_graph_to_db_fast(conn,outputDb);
385 filippis 411
386 filippis 412 System.out.println();
387     numGraphs++;
388 filippis 396 }
389 duarte 264 }
390    
391 duarte 359 } catch (SQLException e) {
392     System.err.println("Couldn't write graph to db, error: "+e.getMessage());
393 duarte 441 } catch (PdbLoadError e) {
394     System.err.println("Error loading from pdb file "+pdbfile+", specific error: "+e.getMessage());
395     }
396 filippis 412
397     System.out.println("SUMMARY:"+pdbfile+"_"+pdbChainCode+" dssp:"+dssp+" scop:"+scop+" naccess:"+naccess+" consurf:"+consurf+" ec:"+ec+" csa:"+csa+ " graphs:"+numGraphs);
398    
399 duarte 264 }
400 duarte 358
401     // closing db connection
402     conn.close();
403 duarte 264 }
404    
405     }