ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/makeNbhTbl.java
Revision: 492
Committed: Wed Jan 2 13:18:57 2008 UTC (16 years, 8 months ago) by duarte
File size: 16108 byte(s)
Log Message:
Copied the aglappe-jung branch into trunk.

Line User Rev File contents
1 stehr 24 /****************************************************************************
2     * Make neighbourhood table
3     * Author: Henning Stehr
4     * Date: 21/12/2005
5     * Usage: makenbh [start_index] [end_index]
6     * Description: Simple command line program to generate the 'neighbourhoods'
7     * table in the 'newmsdgraph' database. The program will
8     * generate rows in the neighbourhood table for each node in
9     * each graph with start_index <= graph_id <= end_index.
10     * Contact: stehr@molgen.mpg.de
11     *****************************************************************************/
12    
13     import java.sql.*;
14    
15     public class makeNbhTbl {
16    
17     /* -------------------------------- constants ------------------------------------------*/
18    
19     // constants for database connection
20     static final String dbDriver = "org.gjt.mm.mysql.Driver";
21     static final String dbServer = "jdbc:mysql://fliederlila.molgen.mpg.de:3036/newmsdgraph";
22     static final String dbUserName = "henning";
23     static final String dbPassword = "das4ke";
24    
25     // constants for graph database
26     //static final String listTable = "henning.myfamily_list";
27     //static final String nodeTable = "henning.myfamily_nodes";
28     //static final String edgeTable = "henning.myfamily_edges";
29     static final String listTable = "list";
30     static final String nodeTable = "nodes";
31     static final String edgeTable = "edges";
32    
33    
34     // constants for neighbourhood table
35     //static final String nbhTable = "henning.myfamily_nbhs";
36     static final String nbhTable = "neighbourhoods_sc_sc";
37     static final String edgeType = "SC_SC";
38     static final String tempNodes = "temp_nodes";
39     static final String tempEdges = "temp_edges";
40     static final String nbhTableDefinition = "(`graph_id` int(10) unsigned default NULL," +
41     " `cid` varchar(6) default NULL," +
42     " `num` int(5) unsigned default NULL," +
43     " `res` char(3) default NULL," +
44     " `entry_id` int(5) unsigned default NULL," +
45     " `size` int(3) unsigned default NULL, " +
46     " `hash` char(20) default NULL, " +
47     " `ALA` int(3) unsigned default NULL," +
48     " `ARG` int(3) unsigned default NULL," +
49     " `ASN` int(3) unsigned default NULL," +
50     " `ASP` int(3) unsigned default NULL," +
51     " `CYS` int(3) unsigned default NULL," +
52     " `GLN` int(3) unsigned default NULL," +
53     " `GLU` int(3) unsigned default NULL," +
54     " `GLY` int(3) unsigned default NULL," +
55     " `HIS` int(3) unsigned default NULL," +
56     " `ILE` int(3) unsigned default NULL," +
57     " `LEU` int(3) unsigned default NULL," +
58     " `LYS` int(3) unsigned default NULL," +
59     " `MET` int(3) unsigned default NULL," +
60     " `PHE` int(3) unsigned default NULL," +
61     " `PRO` int(3) unsigned default NULL," +
62     " `SER` int(3) unsigned default NULL," +
63     " `THR` int(3) unsigned default NULL," +
64     " `TRP` int(3) unsigned default NULL," +
65     " `TYR` int(3) unsigned default NULL," +
66     " `VAL` int(3) unsigned default NULL," +
67     " CONSTRAINT PRIMARY KEY (graph_id, cid, num));";
68    
69     // This array is used to convert an amino acid number to its three letter code.
70     // The numbering is from 1 to 20 to ensure compatibility with function aa_three2num().
71     static final String[] aa_num2three = {"???", "ALA", "ARG", "ASN", "ASP", "CYS",
72     "GLN", "GLU", "GLY", "HIS", "ILE",
73     "LEU", "LYS", "MET", "PHE", "PRO",
74     "SER", "THR", "TRP", "TYR", "VAL"};
75    
76     /* ------------------------------ class variables -------------------------------------*/
77    
78     static Connection conn; // global database connection
79    
80     /* -------------------------------- functions -----------------------------------------*/
81    
82     // return the number of an amino acid by its three letter code
83     public static int aa_three2num(String threeLetterCode) {
84     if(threeLetterCode.equalsIgnoreCase("ALA")) return 1;
85     if(threeLetterCode.equalsIgnoreCase("ARG")) return 2;
86     if(threeLetterCode.equalsIgnoreCase("ASN")) return 3;
87     if(threeLetterCode.equalsIgnoreCase("ASP")) return 4;
88     if(threeLetterCode.equalsIgnoreCase("CYS")) return 5;
89     if(threeLetterCode.equalsIgnoreCase("GLN")) return 6;
90     if(threeLetterCode.equalsIgnoreCase("GLU")) return 7;
91     if(threeLetterCode.equalsIgnoreCase("GLY")) return 8;
92     if(threeLetterCode.equalsIgnoreCase("HIS")) return 9;
93     if(threeLetterCode.equalsIgnoreCase("ILE")) return 10;
94     if(threeLetterCode.equalsIgnoreCase("LEU")) return 11;
95     if(threeLetterCode.equalsIgnoreCase("LYS")) return 12;
96     if(threeLetterCode.equalsIgnoreCase("MET")) return 13;
97     if(threeLetterCode.equalsIgnoreCase("PHE")) return 14;
98     if(threeLetterCode.equalsIgnoreCase("PRO")) return 15;
99     if(threeLetterCode.equalsIgnoreCase("SER")) return 16;
100     if(threeLetterCode.equalsIgnoreCase("THR")) return 17;
101     if(threeLetterCode.equalsIgnoreCase("TRP")) return 18;
102     if(threeLetterCode.equalsIgnoreCase("TYR")) return 19;
103     if(threeLetterCode.equalsIgnoreCase("VAL")) return 20;
104     return -1;
105     }
106    
107     public static String nbhGetHashValue(int[] nbh) {
108    
109     char currentChar;
110     StringBuffer resultBuffer = new StringBuffer("????????????????????");
111    
112     // nbh[i] is defined for i=1..20
113     for(int i = 1; i <= 20; i++) {
114     currentChar = '?';
115     if(nbh[i] < 0)
116     System.err.println("Error in function nbhGetHashValue: " +
117     "array entry nbh[" + i + "] = " + nbh[i] +" is negative.");
118     //else if(nbh[i] <= 9) currentChar = '0' + nbh[i];
119     //else if(nbh[i] <= 9 + 26) currentChar = 'A' + nbh[i];
120     else if(nbh[i] > 26 + 9)
121     System.err.println("Error in function nbhGetHashValue: " +
122     "array entry nbh[" + i + "] = " + nbh[i] + " is too big.");
123     else currentChar = Character.forDigit(nbh[i], 26 + 10);
124     resultBuffer.setCharAt(i-1, currentChar);
125     }
126     return resultBuffer.toString();
127     }
128    
129    
130     // open connection to database
131     public static void openDBConnection() {
132    
133     try { // try to load driver
134     Class.forName(dbDriver).newInstance();
135     try {
136     conn = DriverManager.getConnection(dbServer, dbUserName, dbPassword);
137     } catch (SQLException E) {
138     System.err.println("Error: Unable to open database connection.");
139     System.err.println("SQLException: " + E.getMessage());
140     System.err.println("SQLState: " + E.getSQLState());
141     System.err.println("VendorError: " + E.getErrorCode());
142     } // end try/catch connection
143     } // end try load Driver
144     catch (Exception E) {
145     System.err.println("Error: Unable to load database driver.");
146     E.printStackTrace();
147     } // end catch
148    
149     return;
150     }
151    
152     // close database connection
153     public static void closeDBConnection() {
154    
155     try {
156     conn.close();
157     } catch (SQLException E) {
158     System.out.println("Error: Unable to close connection.");
159     System.out.println("SQLException: " + E.getMessage());
160     System.out.println("SQLState: " + E.getSQLState());
161     System.out.println("VendorError: " + E.getErrorCode());
162     System.exit(1);
163     } // end try/catch connection
164    
165     return;
166     }
167    
168     // create new neighbourhood table if not exists
169     public static void createNeighbourhoodTableIfNotExists() {
170     Statement stmt;
171     String query;
172     try {
173     query = "CREATE TABLE IF NOT EXISTS " + nbhTable + " " + nbhTableDefinition + ";";
174     stmt = conn.createStatement();
175     stmt.execute(query);
176     } catch (SQLException E) {
177     System.err.println("Error: Unable to create neighbourhood table.");
178     System.err.println("SQLException: " + E.getMessage());
179     System.err.println("SQLState: " + E.getSQLState());
180     System.err.println("VendorError: " + E.getErrorCode());
181     } // end catch
182    
183     return;
184     }
185    
186     // insert neighbourhood entries for each graph with from <= graph_id <= to
187     public static void insertNeighbourhoods(int fromIdx, int toIdx) {
188     Statement graphsStmt,
189     nodesStmt,
190     edgesStmt;
191     ResultSet graphsRs,
192     nodesRs,
193     edgesRs;
194     String query,
195     chainId = "?",
196     resType,
197     nbhResType,
198     hash;
199     int graphId = 0,
200     entryId,
201     resNum = 0,
202     numGraphs = 0,
203     numNodes = 0,
204     // existingNodes,
205     resTypeNum,
206     nbhResCount,
207     nbhResTypeNum,
208     nbhSize;
209     int[] nbh;
210    
211     try {
212     // get number of graphs
213     graphsStmt = conn.createStatement();
214     query = "SELECT COUNT(*) FROM " + listTable +
215     " WHERE graph_id >= " + fromIdx + " AND graph_id <= " + toIdx + ";";
216     graphsRs = graphsStmt.executeQuery(query);
217     if(graphsRs.next()) {
218     numGraphs = graphsRs.getInt(1);
219     }
220     graphsRs.close();
221     graphsStmt.close();
222     System.out.println("Processing " + numGraphs + (numGraphs==1?" graph...":" graphs..."));
223    
224     // iterate over all graphs
225     graphsStmt = conn.createStatement();
226     query = "SELECT graph_id, entry_id FROM " + listTable +
227     " WHERE graph_id >= " + fromIdx + " AND graph_id <= " + toIdx + ";";
228     graphsRs = graphsStmt.executeQuery(query);
229     numGraphs = 0;
230     while(graphsRs.next()) {
231     graphId = graphsRs.getInt(1);
232     entryId = graphsRs.getInt(2);
233    
234     // check whether neighbourhoods for this graph already exist
235     // nodesStmt = conn.createStatement();
236     // query = "SELECT COUNT(*) FROM " + nbhTable + " WHERE graph_id = " + graphId + ";";
237     // nodesRs = nodesStmt.executeQuery(query);
238     // if(nodesRs.next()) {
239     // existingNodes = nodesRs.getInt(1);
240     // if(existingNodes > 0) {
241     // System.out.println("Warning: Neighbourhoods for the graph with graph_id = "
242     // + graphId + " already exist.");
243     // }
244     // }
245     // nodesRs.close();
246     // nodesStmt.close();
247    
248     // extract all nodes and edges for this graph into temporary tables
249     nodesStmt = conn.createStatement();
250     nodesStmt.addBatch("DROP TEMPORARY TABLE IF EXISTS " + tempNodes + ";");
251     nodesStmt.addBatch("DROP TEMPORARY TABLE IF EXISTS " + tempEdges + ";");
252     nodesStmt.addBatch("CREATE TEMPORARY TABLE " + tempNodes +
253     " SELECT * FROM " + nodeTable + " WHERE graph_id = " + graphId + ";");
254     nodesStmt.addBatch("CREATE TEMPORARY TABLE " + tempEdges +
255     " SELECT * FROM " + edgeTable + " WHERE graph_id = " + graphId +
256     " AND " + edgeType + " > 0;"); // take only relevant edges
257     // if necessary create indices to speed up queries
258     nodesStmt.executeBatch();
259     nodesStmt.close();
260    
261     // iterate over all nodes for this graph
262     nodesStmt = conn.createStatement();
263     query = "SELECT cid, num, res FROM "+ tempNodes + ";"; // graph_id matches automatically
264     nodesRs = nodesStmt.executeQuery(query);
265     while(nodesRs.next()) {
266     chainId = nodesRs.getString(1);
267     resNum = nodesRs.getInt(2);
268     resType = nodesRs.getString(3);
269     resTypeNum = aa_three2num(resType);
270     if(resTypeNum < 1 || resTypeNum > 20) {
271     System.err.println("Warning: Non standard amino acid " + resType +
272     " at position " + resNum + " in chain " + chainId +
273     " of graph " + graphId + ".");
274     }
275    
276     // get residue counts from edges table
277     edgesStmt = conn.createStatement();
278     query = "SELECT j_res AS res, COUNT(*) AS count FROM "+ tempEdges +
279     " WHERE i_cid = '" + chainId + "' AND i_num = " + resNum +
280     " GROUP BY j_res;";
281     //System.out.println(query);
282     edgesRs = edgesStmt.executeQuery(query);
283    
284     nbh = new int[21]; // set all residue counts to zero
285     nbhSize = 0;
286     while(edgesRs.next()) {
287     nbhResType = edgesRs.getString(1);
288     nbhResCount = edgesRs.getInt(2);
289     nbhResTypeNum = aa_three2num(nbhResType);
290     if(nbhResTypeNum >= 1 && nbhResTypeNum <= 20) {
291     nbh[nbhResTypeNum] = nbhResCount;
292     nbhSize += nbhResCount;
293     }
294     }
295     edgesRs.close();
296     edgesStmt.close();
297    
298     // update nbh table
299     hash = nbhGetHashValue(nbh);
300     query = "INSERT INTO " + nbhTable + " (graph_id, cid, num, res, entry_id, size, hash, "
301     + "ALA, ARG, ASN, ASP, CYS, GLN, GLU, GLY, HIS, ILE, "
302     + "LEU, LYS, MET, PHE, PRO, SER, THR, TRP, TYR, VAL) " +
303     "VALUES (" + graphId + ",'" + chainId + "'," + resNum + ",'"
304     + resType + "'," + entryId + "," + nbhSize + ",'" + hash + "',"
305     + nbh[1] + "," + nbh[2] + "," + nbh[3] + "," + nbh[4] + "," + nbh[5] + ","
306     + nbh[6] + "," + nbh[7] + "," + nbh[8] + "," + nbh[9] + "," + nbh[10] + ","
307     + nbh[11] + "," + nbh[12] + "," + nbh[13] + "," + nbh[14] + "," + nbh[15] + ","
308     + nbh[16] + "," + nbh[17] + "," + nbh[18] + "," + nbh[19] + "," + nbh[20] + ");";
309     edgesStmt = conn.createStatement();
310     if(edgesStmt.executeUpdate(query) != 1) {
311     System.err.println("Error: Insert into table " + nbhTable + " failed.");
312     System.exit(1);
313     }
314     edgesStmt.close();
315     numNodes++;
316     } // end of while over nodes
317     nodesRs.close();
318     nodesStmt.close();
319     numGraphs++;
320     if(numGraphs % 1000 == 0) {
321     System.out.print(numGraphs + " ");
322     System.out.flush();
323     }
324     System.gc(); // clean up
325     } // end while over all graphs
326     graphsRs.close();
327     graphsStmt.close();
328     } catch (SQLException E) {
329     System.err.println("Error in function insertNeighbourhoods:");
330     System.err.println("graphId = "+ graphId +", chainId = "+ chainId +", resNum = "+ resNum);
331     System.err.println("SQLException: " + E.getMessage());
332     System.err.println("SQLState: " + E.getSQLState());
333     System.err.println("VendorError: " + E.getErrorCode());
334     } // end catch
335    
336     System.out.println();
337     System.out.println("Extracted "+ numNodes + " neighbourhood" + (numNodes==1?"":"s")
338     + " from " + numGraphs + " graph" + (numGraphs==1?"":"s") + ".");
339     return;
340     }
341    
342     public static void createIndicesOnNeighbourhoodTable() {
343     // index on graph_id
344     // index on graph_id, cid, num
345     // index on res
346     // index on size
347     // index on hash
348     }
349    
350     /* ----------------------------------- main -------------------------------------------*/
351    
352     public static void main(String[] args) {
353    
354     int from = 0,
355     to = 0;
356    
357     // read command line parameters
358     if ( args.length != 2) {
359     System.err.println("Usage: makeNbhTbl [start_index] [end_index]");
360     System.exit(1);
361     } else {
362    
363     // read parameter 1
364    
365     try {
366     from = Integer.parseInt(args[0]);
367     if(from <= 0) {
368     System.err.println("Parameter 1 has to be positive");
369     System.exit(1);
370     }
371     }
372     catch(NumberFormatException e) {
373     System.err.println("Parameter 1 has to be an integer");
374     System.exit(1);
375     }
376    
377     // read parameter 2
378    
379     try {
380     to = Integer.parseInt(args[1]);
381     if(to <= 0) {
382     System.err.println("Parameter 2 has to be positive");
383     System.exit(1);
384     } else
385     if(from > to) {
386     System.err.println("Start index has to be smaller or equal than end index");
387     System.exit(1);
388     }
389     }
390     catch(NumberFormatException e) {
391     System.err.println("Parameter 2 has to be an integer");
392     System.exit(1);
393     }
394    
395     }
396    
397     // execute
398     openDBConnection();
399     createNeighbourhoodTableIfNotExists();
400     insertNeighbourhoods(from, to);
401     createIndicesOnNeighbourhoodTable();
402     closeDBConnection();
403    
404     } // end of main
405    
406     } // end of class makeNbhTbl