ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/genDbGraph.java
Revision: 411
Committed: Tue Nov 20 19:41:16 2007 UTC (16 years, 10 months ago) by filippis
File size: 11869 byte(s)
Log Message:
minor changes
Line User Rev File contents
1 duarte 264 import gnu.getopt.Getopt;
2    
3     import java.io.BufferedReader;
4     import java.io.FileReader;
5     import java.io.IOException;
6     import java.sql.SQLException;
7    
8    
9 filippis 398 //import proteinstructure.CiffilePdb;
10     //import proteinstructure.CiffileFormatError;
11 duarte 264 import proteinstructure.Graph;
12     import proteinstructure.Pdb;
13     import proteinstructure.PdbChainCodeNotFoundError;
14     import proteinstructure.PdbCodeNotFoundError;
15     import proteinstructure.PdbaseInconsistencyError;
16     import proteinstructure.PdbasePdb;
17     import proteinstructure.PdbfileFormatError;
18     import proteinstructure.PdbfilePdb;
19     import tools.MySQLConnection;
20    
21    
22     public class genDbGraph {
23     /*------------------------------ constants ------------------------------*/
24    
25     public static final String PDB_DB = "pdbase";
26     public static final String DB_HOST = "white";
27     public static final String DB_USER = getUserName();
28     public static final String DB_PWD = "nieve";
29     public static final String DSSP_EXE = "/project/StruPPi/bin/dssp";
30     public static final String DSSP_PARAMS = "--";
31 filippis 396 public static final String NACCESS_EXE = "/project/StruPPi/bin/naccess";
32     public static final String NACCESS_PARAMS = "";
33 duarte 264
34     //public static double cutoff = 4.2;
35     //public static String edgeType = "ALL";
36    
37     /*---------------------------- private methods --------------------------*/
38     /**
39     * Get user name from operating system (for use as database username).
40     * */
41     private static String getUserName() {
42     String user = null;
43     user = System.getProperty("user.name");
44     if(user == null) {
45     System.err.println("Could not get user name from operating system.");
46     }
47     return user;
48     }
49    
50     public static void main(String[] args) throws IOException {
51    
52    
53     String help = "Usage, 3 options:\n" +
54 filippis 407 "1) genDbGraph -i <listfile> -d <distance_cutoff> -t <contact_type> -s <seq_sep> -o <output_db> [-D <pdbase_db>] [-m <mode>] \n" +
55     "2) genDbGraph -p <pdb_code> -c <chain_pdb_code> -d <distance_cutoff> -t <contact_type> -s <seq_sep> -o <output_db> [-D <pdbase_db>] [-m <mode>] \n" +
56     "3) genDbGraph -f <pdbfile> -c <chain_pdb_code> -d <distance_cutoff> -t <contact_type> -s <seq_sep> -o <output_db> [-m <mode>] \n" +
57 duarte 359 "\nA comma separated list of contact types and distance cutoffs can be given instead of just 1, e.g. -d 8.0,8.5 -t Ca,Cb will generate the graphs for Ca at 8.0 and for Cb at 8.5\n" +
58     "If only 1 contact type given and multiple cutoffs, graphs will be generated at all the cutoffs for the one contact type\n"+
59     "\nIn case 2) also a list of comma separated pdb codes and chain codes can be specified, e.g. -p 1bxy,1jos -c A,A\n" +
60 duarte 358 "\nIf pdbase_db not specified, the default pdbase will be used\n" +
61     "\nSecondary structure will be taken from pdbase database. If reading from pdb file and the pdb file is missing the secondary structure, then it will be assigned using dssp\n";
62 duarte 264
63     String listfile = "";
64     String[] pdbCodes = null;
65     String[] pdbChainCodes = null;
66     String pdbfile = "";
67     String pdbaseDb = PDB_DB;
68 duarte 359 String[] edgeTypes = null;
69     double[] cutoffs = null;
70 filippis 407 int[] seqseps = null;
71 duarte 264 String outputDb = "";
72 filippis 396 String mode = "GRAPH";
73 duarte 264
74 filippis 407 Getopt g = new Getopt("genDbGraph", args, "i:p:c:f:d:t:s:o:D:m:h?");
75 duarte 264 int c;
76     while ((c = g.getopt()) != -1) {
77     switch(c){
78     case 'i':
79     listfile = g.getOptarg();
80     break;
81     case 'p':
82     pdbCodes = g.getOptarg().split(",");
83     break;
84     case 'c':
85     pdbChainCodes = g.getOptarg().split(",");
86     break;
87     case 'f':
88     pdbfile = g.getOptarg();
89     break;
90     case 'd':
91 duarte 359 String[] cutoffsStr = g.getOptarg().split(",");
92     cutoffs = new double[cutoffsStr.length];
93     for (int i =0;i<cutoffs.length;i++) {
94     cutoffs[i] = Double.valueOf(cutoffsStr[i]);
95     }
96 duarte 264 break;
97     case 't':
98 duarte 359 edgeTypes = g.getOptarg().split(",");
99 duarte 264 break;
100 filippis 407 case 's':
101     String[] seqsepsStr = g.getOptarg().split(",");
102     seqseps = new int[seqsepsStr.length];
103     for (int i =0;i<seqseps.length;i++) {
104     seqseps[i] = Integer.valueOf(seqsepsStr[i]);
105     }
106     break;
107 duarte 264 case 'o':
108     outputDb = g.getOptarg();
109     break;
110     case 'D':
111     pdbaseDb = g.getOptarg();
112     break;
113 filippis 396 case 'm':
114     mode = g.getOptarg();
115 filippis 398 break;
116 duarte 264 case 'h':
117     case '?':
118     System.out.println(help);
119     System.exit(0);
120     break; // getopt() already printed an error
121     }
122     }
123    
124 duarte 359 if (outputDb.equals("") || edgeTypes==null || cutoffs==null) {
125     System.err.println("Some missing option\n");
126 duarte 264 System.err.println(help);
127     System.exit(1);
128     }
129 duarte 359 if (edgeTypes.length!=cutoffs.length && edgeTypes.length!=1) {
130     System.err.println("Not same number of contact types as cutoffs given\n");
131     System.err.println(help);
132     System.exit(1);
133     }
134 filippis 407 if (seqseps != null && edgeTypes.length!=seqseps.length) {
135     System.err.println("Not same number of contact types as sequence separations given\n");
136     System.err.println(help);
137     System.exit(1);
138     }
139 duarte 264 if (listfile.equals("") && pdbCodes==null && pdbfile.equals("")){
140 duarte 359 System.err.println("Either a listfile, some pdb codes/chain codes or a pdbfile must be given\n");
141 duarte 264 System.err.println(help);
142     System.exit(1);
143     }
144     if ((!listfile.equals("") && pdbCodes!=null) || (!listfile.equals("") && !pdbfile.equals("")) || (pdbCodes!=null && !pdbfile.equals(""))) {
145 duarte 359 System.err.println("Options -p/-c, -i and -f/-c are exclusive. Use only one of them\n");
146 duarte 264 System.err.println(help);
147     System.exit(1);
148     }
149 filippis 396 if (!(mode.equals("GRAPH") || mode.equals("PDB") || mode.equals("BOTH"))) {
150     System.err.println("Allowed values for mode:GRAPH,PDB,BOTH.");
151     System.err.println(help);
152     System.exit(1);
153     }
154 duarte 359
155     // setting edgeTypes in case only 1 was given with multiple cutoffs
156     if (edgeTypes.length==1 && cutoffs.length>1) {
157     String edgeType = edgeTypes[0];
158     edgeTypes = new String[cutoffs.length];
159     for (int i=0;i<cutoffs.length;i++){
160     edgeTypes[i] = edgeType;
161     }
162     }
163 duarte 264
164    
165     MySQLConnection conn = null;
166    
167     try{
168     conn = new MySQLConnection(DB_HOST, DB_USER, DB_PWD);
169 filippis 407 conn.setSqlMode("NO_UNSIGNED_SUBTRACTION,TRADITIONAL");
170 duarte 264 } catch (Exception e) {
171     System.err.println("Error opening database connection. Exiting");
172     System.exit(1);
173     }
174    
175    
176     if (pdbfile.equals("")){
177    
178     if (!listfile.equals("")) {
179     BufferedReader fpdb = new BufferedReader(new FileReader(listfile));
180     String line = "";
181     int numLines = 0;
182     fpdb.mark(100000);
183     while ((line = fpdb.readLine() ) != null ) {
184 duarte 285 if (line.length()>0) numLines++;
185 duarte 264 }
186     fpdb.reset();
187     pdbCodes = new String[numLines];
188     pdbChainCodes = new String[numLines];
189     numLines = 0;
190     while ((line = fpdb.readLine() ) != null ) {
191 duarte 285 pdbCodes[numLines] = line.split("\\s+")[0].toLowerCase();
192     pdbChainCodes[numLines] = line.split("\\s+")[1];
193 duarte 264 numLines++;
194     }
195 duarte 358 fpdb.close();
196 duarte 264 }
197    
198     int numPdbs = 0;
199    
200     for (int i=0;i<pdbCodes.length;i++) {
201     String pdbCode = pdbCodes[i];
202     String pdbChainCode = pdbChainCodes[i];
203    
204     try {
205 duarte 359
206     System.out.println("Getting pdb data for "+pdbCode+"_"+pdbChainCode);
207 duarte 285
208 duarte 277 Pdb pdb = new PdbasePdb(pdbCode, pdbChainCode, pdbaseDb, conn);
209 filippis 398 //Pdb pdb = new CiffilePdb(new File("/project/StruPPi/BiO/DBd/PDB-REMEDIATED/data/structures/unzipped/all/mmCIF/"+pdbCode+".cif"), pdbChainCode);
210 filippis 396 if (!mode.equals("GRAPH")) {
211 filippis 398 try {
212     pdb.runDssp(DSSP_EXE, DSSP_PARAMS);
213     } catch (Exception e) {
214     System.err.println(e.getMessage());
215     }
216     try {
217     pdb.checkScop("1.71", false);
218     } catch (Exception e) {
219     System.err.println(e.getMessage());
220     }
221     try {
222     pdb.runNaccess(NACCESS_EXE, NACCESS_PARAMS);
223     } catch (Exception e) {
224     System.err.println(e.getMessage());
225     }
226     try {
227     int mistakes = pdb.checkConsurfHssp(false);
228     System.out.println("ConsurfHssp Mistakes:"+mistakes);
229     } catch (Exception e) {
230     System.err.println(e.getMessage());
231     }
232     try {
233     pdb.checkEC(false);
234     } catch (Exception e) {
235     System.err.println(e.getMessage());
236     }
237     try {
238     int mistakes = pdb.checkCSA("2.2.5", false);
239     System.out.println("CSA Mistakes:"+mistakes);
240     } catch (Exception e) {
241     System.err.println(e.getMessage());
242     }
243 filippis 407 //pdb.writeToDb(conn,outputDb);
244     pdb.writeToDbFast(conn, outputDb);
245 filippis 396 }
246 duarte 359 // get graphs
247 filippis 396 if (!mode.equals("PDB")) {
248     for (int j = 0; j<edgeTypes.length; j++) {
249 filippis 411 System.out.print("--> graph "+edgeTypes[j]+" for cutoff "+cutoffs[j]);
250 filippis 396
251     Graph graph = pdb.get_graph(edgeTypes[j], cutoffs[j]);
252 filippis 407 if (seqseps != null) {
253     if (seqseps[j] > 1) {
254 filippis 411 System.out.print(" and sequence separation >= "+seqseps[j]);
255 filippis 407 graph.restrictContactsToMinRange(seqseps[j]);
256     }
257     }
258     //graph.write_graph_to_db(conn,outputDb);
259     graph.write_graph_to_db_fast(conn,outputDb);
260 filippis 396
261 filippis 411 System.out.println();
262 filippis 396 numPdbs++;
263     }
264 duarte 359 }
265 duarte 285
266 duarte 264 } catch (PdbaseInconsistencyError e) {
267 duarte 277 System.err.println("Inconsistency in " + pdbCode + pdbChainCode);
268 duarte 264 } catch (PdbCodeNotFoundError e) {
269 duarte 277 System.err.println("Couldn't find pdb code "+pdbCode);
270 duarte 264 } catch (SQLException e) {
271 duarte 285 System.err.println("SQL error for structure "+pdbCode+"_"+pdbChainCode+", error: "+e.getMessage());
272 duarte 264 } catch (PdbChainCodeNotFoundError e) {
273 duarte 277 System.err.println("Couldn't find pdb chain code "+pdbChainCode+" for pdb code "+pdbCode);
274 filippis 398 }/* catch (CiffileFormatError e) {
275     System.err.println(e.getMessage());
276     }*/
277 duarte 264
278     }
279    
280     // output results
281 duarte 359 System.out.println("Number of graphs loaded successfully: " + numPdbs);
282 duarte 264
283    
284     } else {
285     String pdbChainCode = pdbChainCodes[0];
286     try {
287 duarte 359
288     System.out.println("Getting chain "+pdbChainCode+" from pdb file "+pdbfile);
289    
290 duarte 264 Pdb pdb = new PdbfilePdb(pdbfile,pdbChainCode);
291     if (!pdb.hasSecondaryStructure()) {
292     pdb.runDssp(DSSP_EXE, DSSP_PARAMS);
293     }
294 filippis 396 if (!mode.equals("GRAPH")) {
295 filippis 398 try {
296     pdb.runDssp(DSSP_EXE, DSSP_PARAMS);
297     } catch (Exception e) {
298     System.err.println(e.getMessage());
299     }
300     try {
301     pdb.checkScop("1.71", false);
302     } catch (Exception e) {
303     System.err.println(e.getMessage());
304     }
305     try {
306     pdb.runNaccess(NACCESS_EXE, NACCESS_PARAMS);
307     } catch (Exception e) {
308     System.err.println(e.getMessage());
309     }
310     try {
311     int mistakes = pdb.checkConsurfHssp(false);
312     System.out.println("ConsurfHssp Mistakes:"+mistakes);
313     } catch (Exception e) {
314     System.err.println(e.getMessage());
315     }
316     try {
317     pdb.checkEC(false);
318     } catch (Exception e) {
319     System.err.println(e.getMessage());
320     }
321     try {
322     int mistakes = pdb.checkCSA("2.2.5", false);
323     System.out.println("CSA Mistakes:"+mistakes);
324     } catch (Exception e) {
325     System.err.println(e.getMessage());
326     }
327 filippis 396 pdb.writeToDb(conn,outputDb);
328 filippis 411 pdb.writeToDbFast(conn, outputDb);
329 filippis 396 }
330 duarte 359
331     // get graphs
332 filippis 396 if (!mode.equals("PDB")) {
333     for (int j = 0; j<edgeTypes.length; j++) {
334 filippis 411 System.out.print("--> graph "+edgeTypes[j]+" for cutoff "+cutoffs[j]);
335 filippis 396
336     Graph graph = pdb.get_graph(edgeTypes[j], cutoffs[j]);
337 filippis 407 if (seqseps != null) {
338     if (seqseps[j] > 1) {
339 filippis 411 System.out.print(" and sequence separation >= "+seqseps[j]);
340 filippis 407 graph.restrictContactsToMinRange(seqseps[j]);
341     }
342     }
343     //graph.write_graph_to_db(conn,outputDb);
344     graph.write_graph_to_db_fast(conn,outputDb);
345 filippis 411
346     System.out.println();
347 filippis 396 }
348 duarte 264 }
349    
350 duarte 359 } catch (SQLException e) {
351     System.err.println("Couldn't write graph to db, error: "+e.getMessage());
352 duarte 264 } catch (PdbfileFormatError e) {
353     System.err.println("pdb file "+pdbfile+" doesn't have right format");
354     } catch (PdbChainCodeNotFoundError e) {
355     System.err.println("chain code "+pdbChainCode+" wasn't found in file "+pdbfile);
356     }
357     }
358 duarte 358
359     // closing db connection
360     conn.close();
361 duarte 264 }
362    
363     }