ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/hashIterNbhood.java
Revision: 267
Committed: Tue Aug 14 09:10:58 2007 UTC (17 years, 1 month ago) by lappe
File size: 20279 byte(s)
Log Message:
Line File contents
1 import tools.MySQLConnection;
2
3 import java.sql.SQLException;
4 import java.sql.Statement;
5 import java.sql.ResultSet;
6
7 public class hashIterNbhood {
8
9 /**
10 *
11 * iterating over all steps by exchange of one neighbor at a time by a common neighbor
12 * with subsequent scoring
13 * @author lappe
14 */
15 static int maxRank = 21; // value to replace for non-existence of central redue in the resultvector (rank=0)
16 // higher values should penalize non-existence more
17 static String user = "lappe" ; // change user name!!
18 static MySQLConnection conn;
19 static double lastEntropy=0.0, lastFreq, lastAUC, lastavgk, lastdevk;
20 static int lastRank, lastTotal;
21 static int graphid=0, resnr=0;
22 static int size1=0, size2=0; // the dimensions of the matrices = |shell1|x|shell2| = nr of direct(1) x indirect(2) nbs
23 static String rsideline[], rheadline[]={" -->\t(-)1st shell\t0"," |\t \t-"," |\t(+) \t0"," V\t2nd shell\t-"};
24 static String sideline[], headline[]={"\t\t0","\t\t-","\t\t0","\t\t-","\t\tX"};
25 static String restype="?", ressec="?", newnbhood="";
26 static int cn1[], cn2[];
27 static int sumdelta[][], rank[][], total[][], cnsize[][];
28 static double entropy[][], freq[][], AUC[][];
29 static String newnbs[][], nbstring[][], moveset[][];
30
31 static int printRank=1,
32 printTotal = 2,
33 printEntropy = 3,
34 printFreq = 4,
35 printAUC = 5,
36 printNbstring= 6,
37 printMoveset = 7,
38 printdeltaRank=8,
39 printCNSize =9,
40 printCNSxdelta=10;
41
42 public static void main(String[] args) {
43
44 if (args.length<2){
45 System.err.println("The graph_id and residue-nr. needs to be given .... i.e. 7 110");
46 System.exit(1);
47 }
48 graphid = Integer.parseInt( args[0]);
49 resnr = Integer.parseInt( args[1]);
50 int j_num=0, oj_num=0, oj_shell, oj_cnsize, i, j, x, oi, oj, o1, o2, score=0;
51 boolean overx = false;
52 String sql, oj_res, oj_sec;
53 Statement mstmt, mjst, nstmt;
54 ResultSet mrsst, nrsst;
55
56 try {
57 conn = new MySQLConnection("white",user,"nieve","pdb_reps_graph_4_2"); // the UPPERCASE DB!
58 System.out.println("Hashed Iterating / Evaluating neighborhood v.0.2. ");
59
60 sql = "select num, res, sstype from single_model_node where graph_id="+graphid+" and num="+resnr+";";
61 mstmt = conn.createStatement();
62 mrsst = mstmt.executeQuery(sql);
63 if (mrsst.next()) {
64 // this is the central node -> get type and secondary structure
65 restype = mrsst.getString( 2).toUpperCase();
66 ressec = mrsst.getString( 3).toUpperCase();
67 } // end if central residue
68 mrsst.close();
69 mstmt.close();
70 System.out.println("GraphID "+graphid+" Central residue is "+restype+":"+resnr+":"+ressec);
71
72 // retrieve the original nbhood into orig_shell
73 System.out.println("retrieving original first shell ... ");
74 mstmt = conn.createStatement();
75 mstmt.executeUpdate("drop table if exists orig_shell;");
76 mstmt.close();
77
78 mstmt = conn.createStatement();
79 mstmt.executeUpdate("create table orig_shell as select i_num, i_res, j_num, j_res, j_sstype, 1 as shell from single_model_edge where graph_id="+graphid+" and i_num="+resnr+";");
80 mstmt.close();
81
82 System.out.println("adding the original 2nd shell ...");
83 sql = "select j_num from orig_shell where shell=1;";
84 mstmt = conn.createStatement();
85 mrsst = mstmt.executeQuery(sql);
86 i=0;
87 while (mrsst.next()) {
88 i++;
89 oj_num = mrsst.getInt(1);
90 System.out.println(i+":"+oj_num);
91 mjst = conn.createStatement();
92 sql = "insert into orig_shell select i_num, i_res, j_num, j_res, j_sstype, 2 as shell from single_model_edge where graph_id="+graphid+" and i_num="+oj_num+";";
93 // System.out.println(">"+sql);
94 mjst.executeUpdate( sql);
95 mjst.close();
96 } // end while
97 mrsst.close();
98 mstmt.close();
99
100 System.out.println("gathering the original 1st and 2nd shell nbs.");
101 sql = "select j_num, j_res, j_sstype, min(shell) as shell, count(*) as cn from orig_shell where j_num!="+resnr+" group by j_num order by j_num;";
102 mstmt = conn.createStatement();
103 mrsst = mstmt.executeQuery(sql);
104 o1=0;
105 o2=0;
106 while (mrsst.next()) {
107 if ( mrsst.getInt( 4)==1) { // count 1st shell entry
108 o1++;
109 System.out.print("1#"+o1);
110 rheadline[0]+="\t"+o1;
111 rheadline[1]+="\t"+mrsst.getString(2); // res
112 rheadline[2]+="\t"+mrsst.getInt(1); // resnum
113 rheadline[3]+="\t"+mrsst.getString(3); // SStype
114 } // end if 2st shell
115 if ( mrsst.getInt( 4)==2) { // count 2nd shell entry
116 o2++;
117 System.out.print("2#"+o2);
118 } // end if 2nd shell
119 System.out.println(" :\t"+mrsst.getInt( 1)+"\t"+mrsst.getString( 2)+"\t"+mrsst.getString( 3)+"\t"+mrsst.getInt( 4)+"\t"+mrsst.getInt( 5));
120 } // end while
121 System.out.println("Orig.SIZE 1st shell "+o1);
122 System.out.println("Orig.SIZE 2nd shell "+o2);
123 rsideline = new String[o2+1];
124 rsideline[0]="+0\tRnum:S(cn)";
125 sumdelta = new int[(o1+1)][(o2+1)];
126 newnbs = new String[(o1+1)][(o2+1)];
127 // creating the perturbed version of shell 1 into temp_shell
128 for (j=0; j<=o2; j++) { // <=o2 outer loop through all originally indirect contacts
129
130 for (i=0; i<=o1; i++) { // inner loop through all originally direct contacts
131 System.out.println("---------------------------------------------");
132 System.out.println("Creating perturbed nbhood ("+i+","+j+")\t");
133 // clear first
134 nstmt = conn.createStatement();
135 nstmt.executeUpdate("drop table if exists temp_shell;");
136 nstmt.close();
137 nstmt = conn.createStatement();
138 nstmt.executeUpdate("create table temp_shell select * from orig_shell limit 0;");
139 nstmt.close();
140 oi = 0;
141 oj = 0;
142 mrsst.beforeFirst();
143 newnbhood="";
144 overx = false;
145 while (mrsst.next()) {
146 oj_num = mrsst.getInt( 1);
147 oj_res = mrsst.getString(2);
148 oj_sec = mrsst.getString(3);
149 oj_shell = mrsst.getInt( 4);
150 oj_cnsize = mrsst.getInt( 5);
151 if (oj_num>resnr) { // we are over x
152 if (!overx) {
153 newnbhood+="x";
154 overx=true;
155 } // end if over x
156 } // END IF J > X
157 if (oj_shell==1) { // a direct 1st shell neighbour
158 oi++;
159 if (oi!=i) {// if this is NOT the one direct nb 2B dropped
160 // include as 1st shell nbor into temp_shell
161 nstmt = conn.createStatement();
162 sql = "insert into temp_shell values("+resnr+",\'"+restype+"\',"+oj_num+",\'"+oj_res+"\',\'"+oj_sec+"\', 1);";
163 // System.out.println("oi>"+ sql);
164 nstmt.executeUpdate(sql);
165 nstmt.close();
166 newnbhood+=oj_res;
167 } // end if ni!=i
168 } else { // 2nd shell neighbour
169 oj++;
170 if (oj==j) { // this is the 2nd shell nb 2B included
171 // put as new 1st shell nbor
172 nstmt = conn.createStatement();
173 sql = "insert into temp_shell values("+resnr+",\'"+restype+"\',"+oj_num+",\'"+oj_res+"\',\'"+oj_sec+"\', 1);";
174 // System.out.println("oj>"+ sql);
175 nstmt.executeUpdate(sql);
176 nstmt.close();
177 newnbhood+=oj_res;
178 } // end if
179 if (j==0) { // creating the sideline ruler array for the output
180 rsideline[oj] = "+"+oj+"\t"+oj_res+""+oj_num+":"+oj_sec+"("+oj_cnsize+")";
181 } // end if j==0
182 } // end if 1st/2nd shell
183
184 } // end while through the entire nbhood
185 if (!overx) { // we haven't seen a nb > x yet
186 newnbhood+="x"; // x sits at the end of the nbhoodstring
187 overx=true;
188 } // end if over x
189 System.out.println("new direct nbhood "+newnbhood);
190 // Now the "updated" / perturbed version of shell 1 is in temp_shell
191 // we can build 2nd shell accordingly.
192 // System.out.println("building the 2nd shell");
193 sql = "select j_num, j_res, j_sstype from temp_shell where shell=1;";
194 nstmt = conn.createStatement();
195 nrsst = nstmt.executeQuery(sql);
196 x = 0;
197 while (nrsst.next()) {
198 x++;
199 j_num = nrsst.getInt( 1);
200 // System.out.println(x+":"+nrsst.getString( 2)+" "+j_num+" "+nrsst.getString( 3));
201 mjst = conn.createStatement();
202 sql = "insert into temp_shell select i_num, i_res, j_num, j_res, j_sstype, 2 as shell from single_model_edge where graph_id="+graphid+" and i_num="+j_num+";";
203 // System.out.println(">"+sql);
204 mjst.executeUpdate( sql);
205 mjst.close();
206 } // end while
207 nrsst.close();
208 nstmt.close();
209 // and score this move
210 newnbs[i][j]= newnbhood;
211 System.out.println("Scoring ["+i+"]["+j+"] "+newnbs[i][j]);
212 score = scoreCurrentNbhood();
213 sumdelta[i][j] = score;
214 reportMatrix( printCNSize );
215 reportMatrix( printdeltaRank );
216 reportMatrix( printCNSxdelta );
217 System.out.println("SumDeltaRank Score = \t"+score);
218 // reportMatrix( printNbstring );
219 } // next i
220 System.out.println("\t");
221 } // next j
222 // report total matrix sumdelta
223 reportResults( o1, o2, printRank);
224 reportResults( o1, o2, printNbstring);
225 // Cleanup ...
226 mrsst.close();
227 mstmt.close();
228
229 } catch (SQLException e) {
230 e.printStackTrace();
231 System.err.println("SQLException: " + e.getMessage());
232 System.err.println("SQLState: " + e.getSQLState());
233 } // end try/catch
234 System.out.println("fin.");
235 } // end main
236
237
238
239 public static void reportResults( int o1, int o2, int what2print) {
240 System.out.println("Printing "+what2print);
241 if (what2print==printRank) System.out.println("Overall resulting SumDeltaRank Matrix" );
242 if (what2print==printNbstring) System.out.println("Overall tested new nbhoodStrings" );
243 /*if (what2print==printTotal) System.out.print("total[i][j]" );
244 if (what2print==printEntropy) System.out.print("entropy[i][j]" );
245 if (what2print==printFreq ) System.out.print("freq[i][j]" );
246 if (what2print==printAUC) System.out.print("AUC[i][j]" );
247
248 if (what2print==printMoveset) System.out.print("moveset[i][j]" );
249 if (what2print==printdeltaRank) System.out.print("rank[i][j]-rank[0][0]" );) */
250
251 // print headerline(s)
252 System.out.println(rheadline[0]);
253 System.out.println(rheadline[1]);
254 System.out.println(rheadline[2]);
255 System.out.println(rheadline[3]);
256 for (int j=0; j<=o2; j++) {
257 // print rsideline
258 System.out.print( rsideline[j]+"\t");
259 for ( int i=0; i<=o1; i++) {
260 if (what2print==printRank) System.out.print( sumdelta[i][j] );
261 if (what2print==printNbstring) System.out.print( newnbs[i][j] );
262 System.out.print("\t");
263 } // next i
264 System.out.println("");
265 } // next j
266 } // end of reportResults
267
268
269
270 public static int scoreCurrentNbhood() {
271 int n1=0, n2=0, ni, nj, i, j, j_num, j_shell, j_cnsize, sumdeltarank=0;
272 String sql, j_res, j_sec, nbs, mymove, precol;
273 boolean overx = false;
274 Statement stmt;
275 ResultSet rsst;
276
277 try {
278 headline[0]="\t\t0";
279 headline[1]="\t\t-";
280 headline[2]="\t\t0";
281 headline[3]="\t\t-";
282 headline[4]="\t\tX";
283
284 // System.out.println("retrieving the entire nbhood (1st and 2nd shell)");
285 sql = "select j_num, j_res, j_sstype, min(shell) as shell, count(*) as cn from temp_shell where j_num!="+resnr+" group by j_num order by j_num;";
286 stmt = conn.createStatement();
287 rsst = stmt.executeQuery(sql);
288 // counting shell2
289 n2=0;
290 while (rsst.next()) {
291 if ( rsst.getInt( 4)==1) { // count 1st shell entry
292 n1++;
293 System.out.print("1#"+n1);
294 headline[0]+="\t"+n1;
295 headline[1]+="\t"+rsst.getString(2); // res
296 headline[2]+="\t"+rsst.getInt(1); // resnum
297 headline[3]+="\t"+rsst.getString(3); // SStype
298 headline[4]+="\t"+rsst.getInt(5); // CNSize
299 } // end if 2st shell
300 if ( rsst.getInt( 4)==2) { // count 2nd shell entry
301 n2++;
302 System.out.print("2#"+n2);
303 } // end if 2nd shell
304 System.out.println(" :\t"+rsst.getInt( 1)+"\t"+rsst.getString( 2)+"\t"+rsst.getString( 3)+"\t"+rsst.getInt( 4)+"\t"+rsst.getInt( 5));
305 } // end while
306 size1 = n1;
307 size2 = n2;
308 System.out.println("SIZE 1st shell "+size1);
309 System.out.println("SIZE 2nd shell "+size2);
310 headline[4]=headline[4].replace("X",(""+size1));
311
312 // n1 and n2 are known, initialise matrices accordingly.
313 // nbhood, move, rank, entropy, freq, AUC etc. (evtl.+ degree(?))
314 rank = new int[(n1+1)][(n2+1)];
315 rank[0][0]=maxRank;
316 total = new int[(n1+1)][(n2+1)];
317 entropy = new double[(n1+1)][(n2+1)];
318 freq = new double[(n1+1)][(n2+1)];
319 AUC = new double[(n1+1)][(n2+1)];
320 nbstring = new String[(n1+1)][(n2+1)];
321 moveset = new String[(n1+1)][(n2+1)];
322 sideline = new String[n2+1];
323 cn1 = new int[n1+1];
324 cn2 = new int[n2+1];
325 cnsize = new int[(n1+1)][(n2+1)];
326
327 for (j=0; j<=n2; j++) { // outer loop through all indirect contacts
328 for (i=0; i<=n1; i++) { // inner loop through all direct contacts
329 mymove = "";
330 overx = false;
331 System.out.print("("+i+","+j+")\t");
332 ni = 0;
333 nj = 0;
334 sideline[0]="+0\tRnum:S("+n1+")";
335 cn1[0]=n1;
336 cn2[0]=n1;
337 nbs="%";
338 rsst.beforeFirst();
339 while (rsst.next()) {
340 j_num = rsst.getInt(1);
341 j_res = rsst.getString(2);
342 j_sec = rsst.getString(3);
343 j_shell = rsst.getInt(4);
344 j_cnsize = rsst.getInt(5);
345 if (j_num>resnr) { // we are over x
346 if (!overx) {
347 nbs+="x%";
348 overx=true;
349 } // end if over x
350 } // END IF J > X
351 if (j_shell==1) { // a direct 1st shell neighbour
352 ni++;
353 if (ni!=i) {// if this is NOT the one direct nb 2B dropped
354 nbs+=j_res.toUpperCase()+"%"; // it is included
355 } else { // this one IS dropped
356 mymove += "(-"+j_res+":"+j_num+":"+j_sec+"/"+j_cnsize+")";
357 cn1[ni]=j_cnsize;
358 } // end if ni!=i
359 } else { // 2nd shell neighbour
360 nj++;
361 if (nj==j) { // this is the 2nd shell nb 2B included
362 nbs+=j_res.toUpperCase()+"%";
363 mymove += "(+"+j_res+":"+j_num+":"+j_sec+"/"+j_cnsize+")";
364
365 } // end if
366 // // only once for building the sidelines
367 if (j==0) {
368 sideline[nj] = "+"+nj+"\t"+j_res+""+j_num+":"+j_sec+"("+j_cnsize+")";
369 cn2[nj] = j_cnsize;
370 } // end if sideline
371 } // end if 1st/2nd shell
372 } // end while through the entire nbhood
373 if (!overx) { // in case x is the very last we haven't seen it yet
374 nbs+="x%"; // add it in the end
375 overx=true;
376 } // end if over x
377 System.out.print("("+nbs+")\t");
378 nbstring[i][j] = nbs;
379 moveset[i][j] = mymove;
380 precol = nbstring[i][0];
381 getEntropy( nbs, restype, precol);
382 if (lastRank==0) lastRank = rank[0][0];
383 rank[i][j] = lastRank;
384 entropy[i][j] = lastEntropy;
385 freq[i][j] = lastFreq;
386 AUC[i][j] = lastAUC;
387 total[i][j]= lastTotal;
388 cnsize[i][j]=cn1[i]*cn2[j];
389 if (lastRank > 0) {
390 sumdeltarank += ((lastRank-(rank[0][0]))*(cnsize[i][j]));
391 } else {
392 sumdeltarank += ((maxRank-(rank[0][0]))*(cnsize[i][j]));
393 } // end if lastRank was defined
394 } // close inner loop (i)
395 System.out.println(".");
396 } // next outerloop (j)
397 rsst.close();
398 stmt.close();
399
400 } catch (SQLException e) {
401 e.printStackTrace();
402 System.err.println("SQLException: " + e.getMessage());
403 System.err.println("SQLState: " + e.getSQLState());
404 } // end try/catch
405 // System.out.println("fin.");
406 return sumdeltarank;
407 } // end scoreCurrentNbhood
408
409
410 public static void reportMatrix( int what2print) {
411 System.out.println("Printing "+what2print);
412 if (what2print==printRank) System.out.print("rank[i][j]" );
413 if (what2print==printTotal) System.out.print("total[i][j]" );
414 if (what2print==printEntropy) System.out.print("entropy[i][j]" );
415 if (what2print==printFreq ) System.out.print("freq[i][j]" );
416 if (what2print==printAUC) System.out.print("AUC[i][j]" );
417 if (what2print==printNbstring) System.out.print("nbstring[i][j]" );
418 if (what2print==printMoveset) System.out.print("moveset[i][j]" );
419 if (what2print==printdeltaRank) System.out.print("rank[i][j]-rank[0][0]" );
420 if (what2print==printCNSize) System.out.print("cnsize[i][j]" );
421 if (what2print==printCNSxdelta) System.out.print("cnsize[i][j]*(rank[i][j]-rank[0][0])" );
422
423 System.out.println("...");
424 // print headerline(s)
425 System.out.println(headline[0]);
426 System.out.println(headline[1]);
427 System.out.println(headline[2]);
428 System.out.println(headline[3]);
429 System.out.println(headline[4]);
430 for (int j=0; j<=size2; j++) {
431 // print sideline
432 System.out.print( sideline[j]+"\t");
433 for ( int i=0; i<=size1; i++) {
434 if (what2print==printRank) System.out.print( rank[i][j] );
435 if (what2print==printTotal) System.out.print( total[i][j] );
436 if (what2print==printEntropy) System.out.print( entropy[i][j] );
437 if (what2print==printFreq ) System.out.print( freq[i][j] );
438 if (what2print==printAUC) System.out.print( AUC[i][j] );
439 if (what2print==printNbstring) System.out.print( nbstring[i][j] );
440 if (what2print==printMoveset) System.out.print( moveset[i][j] );
441 if (what2print==printdeltaRank) System.out.print( rank[i][j]-rank[0][0] );
442 if (what2print==printCNSize) System.out.print( cnsize[i][j] );
443 if (what2print==printCNSxdelta) System.out.print( cnsize[i][j]*(rank[i][j]-rank[0][0]) );
444 System.out.print("\t");
445 } // next i
446 System.out.println("");
447 } // next j
448 } // end of report
449
450 public static void getEntropy( String nbs, String centRes, String predec) {
451 String sql, res, this_n, prec_n;
452 Statement stmt;
453 ResultSet rsst;
454 double p, psum=0.0, logp, plogp, plogpsum=0.0;
455 try {
456 // Hashing first row tables comes first
457 // System.out.println("nbs : "+nbs);
458 // System.out.println("predec: "+predec);
459 this_n = nbs.replace("%","");
460 prec_n = predec.replace("%","");
461 // System.out.println("this_n: ["+this_n+"]");
462 // System.out.println("prec_n: ["+prec_n+"]");
463 if (prec_n.equals(this_n)) {
464 // System.out.println("have to create db for this "+prec_n);
465 sql = "create table IF NOT EXISTS nbhashtables."+prec_n+" as select res, n, k from single_model_node where n like '"+nbs+"';";
466 // System.out.println(" >> "+sql);
467 stmt = conn.createStatement();
468 stmt.executeUpdate( sql);
469 stmt.close();
470 } // else System.out.println("using preceding db of "+prec_n);
471
472 // now we can safely derive the estimates from the hashtable
473 sql = "select count(*) from nbhashtables."+prec_n+" where n like '"+nbs+"';";
474 // System.out.println( sql);
475 stmt = conn.createStatement();
476 rsst = stmt.executeQuery(sql);
477 if (rsst.next()) lastTotal = rsst.getInt( 1);
478 rsst.close();
479 stmt.close();
480
481 sql = "select res, count(*) as t, count(*)/"+lastTotal+" as p, avg( k), stddev( k) from nbhashtables."+prec_n+" where n like '"+nbs+"' group by res order by p DESC;";
482 stmt = conn.createStatement();
483 rsst = stmt.executeQuery(sql);
484 // System.out.println("rank : res : total t : fraction p : log2(p) : -p*log2(p)");
485 int rank = 0;
486 boolean seenCentRes = false;
487 lastAUC = 0.0;
488 lastRank = 0;
489 lastFreq = 0.0;
490 lastavgk = 0.0;
491 lastdevk = 0.0;
492 while (rsst.next()) {
493 rank ++;
494 res = rsst.getString(1); // 1st column -- res
495 p = rsst.getDouble(3); // 3rd: fraction p
496 // System.out.print(rank+ " : " + res+" : "+num+ " : " + p);
497 logp = Math.log(p)/Math.log(2.0); // to basis 2 for info in bits
498 // System.out.print(" : " + logp);
499 plogp = -1.0 * p * logp;
500 // System.out.print(" : " + plogp);
501 plogpsum += plogp;
502 psum += p;
503
504 if (res.equals(centRes)) {
505 // System.out.print(" <==" + centRes);
506 seenCentRes = true;
507 lastFreq = p;
508 lastRank = rank;
509 lastavgk = rsst.getDouble(4);
510 lastdevk = rsst.getDouble(5);
511 }
512 if (seenCentRes) lastAUC += p;
513 // System.out.println("");
514 }
515 // System.out.println("Sum :"+lastTotal+" : "+psum+" : "+plogpsum);
516 rsst.close();
517 stmt.close();
518 lastEntropy = plogpsum;
519 if (lastRank==0) lastRank = maxRank;
520 } catch (SQLException e) {
521 e.printStackTrace();
522 System.err.println("SQLException: " + e.getMessage());
523 System.err.println("SQLState: " + e.getSQLState());
524 }
525
526 } // end of getEntropy
527
528 } // end class