ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/cnCoarseNbhood.java
Revision: 492
Committed: Wed Jan 2 13:18:57 2008 UTC (16 years, 8 months ago) by duarte
File size: 23007 byte(s)
Log Message:
Copied the aglappe-jung branch into trunk.

Line File contents
1 import tools.MySQLConnection;
2
3 import java.sql.SQLException;
4 import java.sql.Statement;
5 import java.sql.ResultSet;
6
7 public class cnCoarseNbhood {
8
9 /**
10 * CN2 size x sumdelta
11 * iterating over all steps by exchange of one neighbor at a time by a common neighbor
12 * with subsequent scoring
13 * @author lappe
14 */
15 static int maxRank = 21; // value to replace for non-existence of central redue in the resultvector (rank=0)
16 // higher values should penalize non-existence more
17 static int VL=2; // Verbosity Level : 0=silent ... 3=very talkative
18 static String user = "lappe" ; // change user name!!
19 static MySQLConnection conn;
20 static String backgrndDB = "pdb_reps_graph_4_2";
21 static String targetNodes = "target_node";
22 static String targetEdges = "target_edge";
23 static double lastEntropy=0.0, lastFreq, lastAUC, lastavgk, lastdevk;
24 static int lastRank, lastTotal, lastxcn=0;
25 static int graphid=0, resnr=0;
26 static int size1=0, size2=0; // the dimensions of the matrices = |shell1|x|shell2| = nr of direct(1) x indirect(2) nbs
27 static String rsideline[], rheadline[]={" -->\t(-)1st shell\t0"," |\t \t-"," |\t(+) \t0"," V\t2nd shell\t-", " \t \tX"};
28 static String sideline[], headline[]={"\t\t0","\t\t-","\t\t0","\t\t-","\t\tX"};
29 static String restype="?", ressec="?", newnbhood="";
30 static int cn1[], cn2[];
31 static int sumdelta[][], rank[][], total[][], cnsize[][], cnall[][];
32 static double entropy[][], freq[][], AUC[][];
33 static String newnbs[][], nbstring[][], moveset[][];
34
35 static int printRank=1,
36 printTotal = 2,
37 printEntropy = 3,
38 printFreq = 4,
39 printAUC = 5,
40 printNbstring= 6,
41 printMoveset = 7,
42 printdeltaRank=8,
43 printCNSize =9,
44 printCNSxdelta=10;
45
46 public static void main(String[] args) {
47
48 if (args.length<2){
49 System.err.println("Coarse NBHood : graph_id and residue-nr. need to be given .... i.e. 7 110");
50 System.exit(1);
51 }
52 graphid = Integer.parseInt( args[0]);
53 resnr = Integer.parseInt( args[1]);
54 int j_num=0, oj_num=0, oj_shell, oj_cnsize, i, j, x, oi, oj, o1, o2, ixnum, jxnum, score=0;
55 boolean overx = false;
56 String sql, oj_res, oj_sec;
57 Statement mstmt, mjst, nstmt;
58 ResultSet mrsst, nrsst;
59
60 try {
61 conn = new MySQLConnection("white",user,"nieve", backgrndDB); // the UPPERCASE DB!
62 System.out.println("Coarse Scoring Target neighborhoods v.0.5. ");
63
64 sql = "select num, res, sstype from "+targetNodes+" where graph_id="+graphid+" and num="+resnr+";";
65 mstmt = conn.createStatement();
66 mrsst = mstmt.executeQuery(sql);
67 if (mrsst.next()) {
68 // this is the central node -> get type and secondary structure
69 restype = mrsst.getString( 2).toUpperCase();
70 ressec = mrsst.getString( 3).toUpperCase();
71 } // end if central residue
72 mrsst.close();
73 mstmt.close();
74 System.out.println("GraphID "+graphid+" Central residue is "+restype+":"+resnr+":"+ressec);
75
76 // retrieve the original nbhood into orig_shell
77 System.out.println("retrieving original first shell ... ");
78 mstmt = conn.createStatement();
79 mstmt.executeUpdate("drop table if exists orig_shell;");
80 mstmt.close();
81
82 mstmt = conn.createStatement();
83 mstmt.executeUpdate("create table orig_shell as " +
84 "select graph_id,i_node_id,i_cid,i_num,i_res,i_sstype,j_node_id,j_cid,j_num,j_res,j_sstype,BB,SC, 1 as shell " +
85 "from "+targetEdges+
86 " where graph_id="+graphid+" and i_num="+resnr+";");
87 mstmt.close();
88
89 System.out.println("adding the original 2nd shell ...");
90 sql = "select j_cid, j_num, j_res, j_sstype, BB, SC from orig_shell where shell=1 and graph_id="+graphid+" and i_num="+resnr+";";
91 mstmt = conn.createStatement();
92 mrsst = mstmt.executeQuery(sql);
93 i=0;
94 while (mrsst.next()) {
95 i++;
96 oj_num = mrsst.getInt(2);
97 System.out.print(i+":"+mrsst.getString(1));
98 System.out.print("\t"+oj_num);
99 System.out.print("\t"+mrsst.getString(3));
100 System.out.print("\t"+mrsst.getString(4));
101 System.out.print("\t"+mrsst.getInt(5));
102 System.out.print("\t"+mrsst.getInt(6));
103 System.out.println();
104 mjst = conn.createStatement();
105 sql = "insert into orig_shell select graph_id,i_node_id,i_cid,i_num,i_res,i_sstype,j_node_id,j_cid,j_num,j_res,j_sstype,BB,SC, 2 as shell " +
106 "from "+targetEdges+" " +
107 "where graph_id="+graphid+" and i_num="+oj_num+";";
108 // System.out.println(">"+sql);
109 mjst.executeUpdate( sql);
110 mjst.close();
111 } // end while
112 mrsst.close();
113 mstmt.close();
114
115 System.out.println("gathering all direct and indirect neighbours.");
116 sql = "select j_num, j_res, j_sstype, min(shell) as shell, count(*) as cn " +
117 "from orig_shell where j_num!="+resnr+" group by j_num order by j_num;";
118 mstmt = conn.createStatement();
119 mrsst = mstmt.executeQuery(sql);
120 o1=0;
121 o2=0;
122 while (mrsst.next()) {
123 if ( mrsst.getInt( 4)==1) { // count 1st shell entry
124 o1++;
125 System.out.print("1#"+o1);
126 rheadline[0]+="\t"+o1;
127 rheadline[1]+="\t"+mrsst.getString(2); // res
128 rheadline[2]+="\t"+mrsst.getInt(1); // resnum
129 rheadline[3]+="\t"+mrsst.getString(3); // SStype
130 rheadline[4]+="\t("+mrsst.getInt(5)+")"; // CN
131 } // end if 2st shell
132 if ( mrsst.getInt( 4)==2) { // count 2nd shell entry
133 o2++;
134 System.out.print("2#"+o2);
135 } // end if 2nd shell
136 System.out.println(" :\t"+mrsst.getInt( 1)+"\t"+mrsst.getString( 2)+"\t"+mrsst.getString( 3)+"\t"+mrsst.getInt( 4)+"\t"+mrsst.getInt( 5));
137 } // end while
138 System.out.println("Orig.SIZE 1st shell "+o1);
139 System.out.println("Orig.SIZE 2nd shell "+o2);
140 rheadline[4] = rheadline[4].replace("X", ("("+o1)+")");
141 rsideline = new String[o2+1];
142 rsideline[0]="+0\tRnum:S(cn)";
143 sumdelta = new int[(o1+1)][(o2+1)];
144 newnbs = new String[(o1+1)][(o2+1)];
145 cnall = new int[(o1+1)][(o2+1)];
146 // creating the perturbed version of shell 1 into temp_shell
147 for (j=0; j<=o2; j++) { // <=o2 outer loop through all originally indirect contacts
148
149 for (i=0; i<=o1; i++) { // inner loop through all originally direct contacts
150 if (VL>=1) {
151 System.out.println("---------------------------------------------");
152 System.out.println("Creating perturbed nbhood ("+i+","+j+")\t");
153 }
154 // clear first
155 nstmt = conn.createStatement();
156 nstmt.executeUpdate("drop table if exists temp_shell;");
157 nstmt.close();
158 nstmt = conn.createStatement();
159 nstmt.executeUpdate("create table temp_shell select * from orig_shell limit 0;");
160 nstmt.close();
161 oi = 0;
162 oj = 0;
163 mrsst.beforeFirst();
164 newnbhood="";
165 overx = false;
166 ixnum=0;
167 jxnum=0;
168 while (mrsst.next()) {
169 oj_num = mrsst.getInt( 1);
170 oj_res = mrsst.getString(2);
171 oj_sec = mrsst.getString(3);
172 oj_shell = mrsst.getInt( 4);
173 oj_cnsize = mrsst.getInt( 5);
174 if (oj_num>resnr) { // we are over x
175 if (!overx) {
176 newnbhood+="x";
177 overx=true;
178 } // end if over x
179 } // END IF J > X
180 if (oj_shell==1) { // a direct 1st shell neighbour
181 oi++;
182 if (oi!=i) {// if this is NOT the one direct nb 2B dropped
183 // include as 1st shell nbor into temp_shell
184 nstmt = conn.createStatement();
185 sql = "insert into temp_shell values("+resnr+",\'"+restype+"\',"+oj_num+",\'"+oj_res+"\',\'"+oj_sec+"\', 1);";
186 // System.out.println("oi>"+ sql);
187 nstmt.executeUpdate(sql);
188 nstmt.close();
189 newnbhood+=oj_res;
190 } else {
191 ixnum=oj_num;
192 } // end if ni!=i
193 } else { // 2nd shell neighbour
194 oj++;
195 if (oj==j) { // this is the 2nd shell nb 2B included
196 // put as new 1st shell nbor
197 nstmt = conn.createStatement();
198 sql = "insert into temp_shell values("+resnr+",\'"+restype+"\',"+oj_num+",\'"+oj_res+"\',\'"+oj_sec+"\', 1);";
199 // System.out.println("oj>"+ sql);
200 nstmt.executeUpdate(sql);
201 nstmt.close();
202 newnbhood+=oj_res;
203 jxnum=oj_num;
204 } // end if
205 if (j==0) { // creating the sideline ruler array for the output
206 rsideline[oj] = "+"+oj+"\t"+oj_res+""+oj_num+":"+oj_sec+"("+oj_cnsize+")";
207 } // end if j==0
208 } // end if 1st/2nd shell
209
210 } // end while through the entire nbhood
211 if (!overx) { // we haven't seen a nb > x yet
212 newnbhood+="x"; // x sits at the end of the nbhoodstring
213 overx=true;
214 } // end if over x
215 // System.out.println("new direct nbhood "+newnbhood);
216 // Now the "updated" / perturbed version of shell 1 is in temp_shell
217 // we can build 2nd shell accordingly.
218 // System.out.println("building the 2nd shell");
219 sql = "select j_num, j_res, j_sstype from temp_shell where shell=1;";
220 nstmt = conn.createStatement();
221 nrsst = nstmt.executeQuery(sql);
222 x = 0;
223 while (nrsst.next()) {
224 x++;
225 j_num = nrsst.getInt( 1);
226 // System.out.println(x+":"+nrsst.getString( 2)+" "+j_num+" "+nrsst.getString( 3));
227 mjst = conn.createStatement();
228 sql = "insert into temp_shell select i_num, i_res, j_num, j_res, j_sstype, 2 as shell from "+targetEdges+" where graph_id="+graphid+" and i_num="+j_num+";";
229 // System.out.println(">"+sql);
230 mjst.executeUpdate( sql);
231 mjst.close();
232 } // end while
233 nrsst.close();
234 nstmt.close();
235 // and score this move
236 lastxcn=0;
237 newnbs[i][j]= newnbhood;
238 System.out.print("\n["+i+"]["+j+"]\t-"+ixnum+"/+"+jxnum+"\t"+newnbs[i][j]+"\t ");
239 score = scoreCurrentNbhood( ixnum, jxnum);
240 sumdelta[i][j] = score;
241 cnall[i][j] = lastxcn;
242 if (VL>=1) {
243 reportMatrix( printCNSize );
244 reportMatrix( printdeltaRank );
245 System.out.println("SumDeltaRank Score = \t"+score);
246 System.out.println("CN1 x CN2 product = \t"+cnall[i][j]);
247 } else {
248 System.out.print("\t"+score+"*"+cnall[i][j]+"\t= "+(score*cnall[i][j]));
249 }
250 // reportMatrix( printNbstring );
251 } // next i
252 System.out.println("\t");
253 } // next j
254 // report total matrix sumdelta
255 // if (VL>=1) {
256 System.out.println("GraphID "+graphid+" Central residue is "+restype+":"+resnr+":"+ressec);
257 System.out.println("backgroundDB"+backgrndDB+" \t maxRank : "+maxRank);
258 reportResults( o1, o2, printRank);
259 reportResults( o1, o2, printCNSize);
260 //}
261 reportResults( o1, o2, printCNSxdelta );
262 // Cleanup ...
263 mrsst.close();
264 mstmt.close();
265
266 } catch (SQLException e) {
267 e.printStackTrace();
268 System.err.println("SQLException: " + e.getMessage());
269 System.err.println("SQLState: " + e.getSQLState());
270 } // end try/catch
271 System.out.println("fin.");
272 } // end main
273
274
275
276 public static void reportResults( int o1, int o2, int what2print) {
277 System.out.println("Printing "+what2print);
278 if (what2print==printRank) System.out.println("Overall resulting SumDeltaRank Matrix" );
279 if (what2print==printNbstring) System.out.println("Overall tested new nbhoodStrings" );
280 if (what2print==printCNSize) System.out.println("Overall cnsize" );
281 if (what2print==printCNSxdelta) System.out.println("Overall (cnsize*sumDeltaRank)" );
282 /*if (what2print==printTotal) System.out.print("total[i][j]" );
283 if (what2print==printEntropy) System.out.print("entropy[i][j]" );
284 if (what2print==printFreq ) System.out.print("freq[i][j]" );
285 if (what2print==printAUC) System.out.print("AUC[i][j]" );
286
287 if (what2print==printMoveset) System.out.print("moveset[i][j]" );
288 if (what2print==printdeltaRank) System.out.print("rank[i][j]-rank[0][0]" );) */
289
290 // print headerline(s)
291 System.out.println(rheadline[0]);
292 System.out.println(rheadline[1]);
293 System.out.println(rheadline[2]);
294 System.out.println(rheadline[3]);
295 System.out.println(rheadline[4]);
296 for (int j=0; j<=o2; j++) {
297 // print rsideline
298 System.out.print( rsideline[j]+"\t");
299 for ( int i=0; i<=o1; i++) {
300 if (what2print==printRank) System.out.print( sumdelta[i][j] );
301 if (what2print==printNbstring) System.out.print( newnbs[i][j] );
302 if (what2print==printCNSize) System.out.print( cnall[i][j] );
303 if (what2print==printCNSxdelta) System.out.print( sumdelta[i][j]*cnall[i][j] );
304 System.out.print("\t");
305 } // next i
306 System.out.println("");
307 } // next j
308 } // end of reportResults
309
310
311
312 public static int scoreCurrentNbhood( int ixnum, int jxnum) {
313 int ixcn=0, jxcn=0, n1=0, n2=0, ni, nj, i, j, j_num, j_shell, j_cnsize, sumdeltarank=0;
314 String sql, j_res, j_sec, nbs, mymove, precol;
315 boolean overx = false;
316 Statement stmt;
317 ResultSet rsst;
318
319 try {
320 headline[0]="\t\t\t0";
321 headline[1]="\t\t\t-";
322 headline[2]="\t\t\t0";
323 headline[3]="\t\t\t-";
324 headline[4]="\t\t\tX";
325
326 // System.out.println("retrieving the entire nbhood (1st and 2nd shell)");
327 sql = "select j_num, j_res, j_sstype, min(shell) as shell, count(*) as cn from temp_shell where j_num!="+resnr+" group by j_num order by j_num;";
328 stmt = conn.createStatement();
329 rsst = stmt.executeQuery(sql);
330 // counting shell2
331 n2=0;
332 while (rsst.next()) {
333 if ( rsst.getInt( 4)==1) { // count 1st shell entry
334 n1++;
335 // System.out.print("1#"+n1);
336 headline[0]+="\t"+n1;
337 headline[1]+="\t"+rsst.getString(2); // res
338 headline[2]+="\t"+rsst.getInt(1); // resnum
339 headline[3]+="\t"+rsst.getString(3); // SStype
340 headline[4]+="\t("+rsst.getInt(5)+")"; // CNSize
341 } // end if 2st shell
342 if ( rsst.getInt( 4)==2) { // count 2nd shell entry
343 n2++;
344 // System.out.print("2#"+n2);
345 } // end if 2nd shell
346 // System.out.println(" :\t"+rsst.getInt( 1)+"\t"+rsst.getString( 2)+"\t"+rsst.getString( 3)+"\t"+rsst.getInt( 4)+"\t"+rsst.getInt( 5));
347 } // end while
348 size1 = n1;
349 size2 = n2;
350 ixcn = n1;
351 jxcn = n1;
352 if (VL>=1) {
353 System.out.println("|1st shell|="+size1+" \tx\t |2nd shell|="+size2);
354 }
355 headline[4]=headline[4].replace("X",("("+size1+")"));
356
357 // n1 and n2 are known, initialise matrices accordingly.
358 // nbhood, move, rank, entropy, freq, AUC etc. (evtl.+ degree(?))
359 rank = new int[(n1+1)][(n2+1)];
360 rank[0][0]=maxRank;
361 total = new int[(n1+1)][(n2+1)];
362 entropy = new double[(n1+1)][(n2+1)];
363 freq = new double[(n1+1)][(n2+1)];
364 AUC = new double[(n1+1)][(n2+1)];
365 nbstring = new String[(n1+1)][(n2+1)];
366 moveset = new String[(n1+1)][(n2+1)];
367 sideline = new String[n2+1];
368 cn1 = new int[n1+1];
369 cn2 = new int[n2+1];
370 cnsize = new int[(n1+1)][(n2+1)];
371
372 for (j=0; j<=n2; j++) { // outer loop through all indirect contacts
373 for (i=0; i<=n1; i++) { // inner loop through all direct contacts
374 mymove = "";
375 overx = false;
376 if (VL>=1) {
377 System.out.print("("+i+","+j+")\t");
378 }
379 ni = 0;
380 nj = 0;
381 sideline[0]="+0\tRnum:S("+n1+")";
382 cn1[0]=n1;
383 cn2[0]=n1;
384 nbs="%";
385 rsst.beforeFirst();
386 while (rsst.next()) {
387 j_num = rsst.getInt(1);
388 j_res = rsst.getString(2);
389 j_sec = rsst.getString(3);
390 j_shell = rsst.getInt(4);
391 j_cnsize = rsst.getInt(5);
392 if (j_num>resnr) { // we are over x
393 if (!overx) {
394 nbs+="x%";
395 overx=true;
396 } // end if over x
397 } // END IF J > X
398 if (j_shell==1) { // a direct 1st shell neighbour
399 ni++;
400 if (ni!=i) {// if this is NOT the one direct nb 2B dropped
401 nbs+=j_res.toUpperCase()+"%"; // it is included
402 if ( j_num==jxnum && j==0) { // This is the direct nb dropped
403 jxcn=j_cnsize;
404 if (VL>=2) System.out.print("(j"+jxnum+":"+jxcn+")");
405 }
406 } else { // this one IS dropped
407 mymove += "(-"+j_res+":"+j_num+":"+j_sec+"/"+j_cnsize+")";
408 cn1[ni]=j_cnsize;
409 } // end if ni!=i
410
411 } else { // 2nd shell neighbour
412 nj++;
413 if (nj==j) { // this is the 2nd shell nb 2B included
414 nbs+=j_res.toUpperCase()+"%";
415 mymove += "(+"+j_res+":"+j_num+":"+j_sec+"/"+j_cnsize+")";
416 if ( j_num==ixnum && i==0) { // This is the dropped direct nb, no 2b found in 2ns shell
417 ixcn=j_cnsize;
418 if (VL>=2) System.out.print("(i"+ixnum+":"+ixcn+")");
419 }
420 } // end if
421
422 // // only once for building the sidelines
423 if (j==0) {
424 sideline[nj] = "+"+nj+"\t"+j_res+""+j_num+":"+j_sec+"("+j_cnsize+")";
425 cn2[nj] = j_cnsize;
426 } // end if sideline
427 } // end if 1st/2nd shell
428
429 } // end while through the entire nbhood
430 if (!overx) { // in case x is the very last we haven't seen it yet
431 nbs+="x%"; // add it in the end
432 overx=true;
433 } // end if over x
434 if (VL>=1) {
435 System.out.print("("+nbs+")\t");
436 }
437 nbstring[i][j] = nbs;
438 moveset[i][j] = mymove;
439 precol = nbstring[i][0];
440 getEntropy( nbs, restype, precol);
441 if (lastRank==0) lastRank = maxRank;
442 rank[i][j] = lastRank;
443 entropy[i][j] = lastEntropy;
444 freq[i][j] = lastFreq;
445 AUC[i][j] = lastAUC;
446 total[i][j]= lastTotal;
447 cnsize[i][j]=cn1[i]*cn2[j];
448 if (VL>=1) System.out.print(""+cnsize[i][j]+"\t");
449 if (lastRank > 0) {
450 sumdeltarank += ( (lastRank-rank[0][0]) );
451 // sumdeltarank += ( (lastRank-rank[0][0]) * (cnsize[i][j]) );
452 } else {
453 sumdeltarank += ( (maxRank-rank[0][0]) );
454 // sumdeltarank += ( (maxRank-rank[0][0]) * (cnsize[i][j]) );
455 } // end if lastRank was defined
456 } // close inner loop (i)
457 if (VL>=1) {
458 System.out.println(".");
459 } else {
460 System.out.print(".");
461 }
462 } // next outerloop (j)
463 lastxcn=(ixcn*jxcn);
464 if (VL>=1) {
465 System.out.println("lastxcn=(ixcn*jxcn)=("+ixcn+"*"+jxcn+")="+lastxcn);
466 }
467 rsst.close();
468 stmt.close();
469
470 } catch (SQLException e) {
471 e.printStackTrace();
472 System.err.println("SQLException: " + e.getMessage());
473 System.err.println("SQLState: " + e.getSQLState());
474 } // end try/catch
475 // System.out.println("fin.");
476 return sumdeltarank;
477 } // end scoreCurrentNbhood
478
479
480 public static void reportMatrix( int what2print) {
481 System.out.println("\nPrinting "+what2print);
482 if (what2print==printRank) System.out.print("rank[i][j]" );
483 if (what2print==printTotal) System.out.print("total[i][j]" );
484 if (what2print==printEntropy) System.out.print("entropy[i][j]" );
485 if (what2print==printFreq ) System.out.print("freq[i][j]" );
486 if (what2print==printAUC) System.out.print("AUC[i][j]" );
487 if (what2print==printNbstring) System.out.print("nbstring[i][j]" );
488 if (what2print==printMoveset) System.out.print("moveset[i][j]" );
489 if (what2print==printdeltaRank) System.out.print("rank[i][j]-rank[0][0]" );
490 if (what2print==printCNSize) System.out.print("cnsize[i][j]" );
491 if (what2print==printCNSxdelta) System.out.print("cnsize[i][j]*(rank[i][j]-rank[0][0])" );
492
493 System.out.println("...");
494 // print headerline(s)
495 System.out.println(headline[0]);
496 System.out.println(headline[1]);
497 System.out.println(headline[2]);
498 System.out.println(headline[3]);
499 System.out.println(headline[4]);
500 for (int j=0; j<=size2; j++) {
501 // print sideline
502 System.out.print( sideline[j]+"\t");
503 for ( int i=0; i<=size1; i++) {
504 if (what2print==printRank) System.out.print( rank[i][j] );
505 if (what2print==printTotal) System.out.print( total[i][j] );
506 if (what2print==printEntropy) System.out.print( entropy[i][j] );
507 if (what2print==printFreq ) System.out.print( freq[i][j] );
508 if (what2print==printAUC) System.out.print( AUC[i][j] );
509 if (what2print==printNbstring) System.out.print( nbstring[i][j] );
510 if (what2print==printMoveset) System.out.print( moveset[i][j] );
511 if (what2print==printdeltaRank) System.out.print( rank[i][j]-rank[0][0] );
512 if (what2print==printCNSize) System.out.print( cnsize[i][j] );
513 if (what2print==printCNSxdelta) System.out.print( cnsize[i][j]*(rank[i][j]-rank[0][0]) );
514 System.out.print("\t");
515 } // next i
516 System.out.println("");
517 } // next j
518 } // end of report
519
520 public static void getEntropy( String nbs, String centRes, String predec) { // pass centSS as well ???
521 String sql, res, this_n, prec_n;
522 Statement stmt;
523 ResultSet rsst;
524 double p, psum=0.0, logp, plogp, plogpsum=0.0;
525 try {
526 // Hashing first row tables comes first
527 System.out.println("nbs : "+nbs);
528 System.out.println("predec: "+predec);
529 this_n = nbs.replace("%","_");
530 prec_n = predec.replace("%","_");
531 System.out.println("this_n: ["+this_n+"]");
532 System.out.println("prec_n: ["+prec_n+"]");
533 if (prec_n.equals(this_n)) {
534 // System.out.println("have to create db for this "+prec_n);
535 sql = "create table IF NOT EXISTS nbhashtables."+prec_n+" as select res, n, k from nbstrings where nwg like '%"+nbs+"%';";
536 if (VL>=2) System.out.println(" >> "+sql);
537 stmt = conn.createStatement();
538 stmt.executeUpdate( sql);
539 stmt.close();
540 } // else System.out.println("using preceding db of "+prec_n);
541
542 // now we can safely derive the estimates from the hashtable
543 sql = "select count(*) from nbhashtables."+prec_n+" where n like '"+nbs+"';";
544 // System.out.println( sql);
545 stmt = conn.createStatement();
546 rsst = stmt.executeQuery(sql);
547 if (rsst.next()) lastTotal = rsst.getInt( 1);
548 rsst.close();
549 stmt.close();
550
551 sql = "select res, count(*) as t, count(*)/"+lastTotal+" as p, avg( k), stddev( k) from nbhashtables."+prec_n+" where n like '"+nbs+"' group by res order by p DESC;";
552 stmt = conn.createStatement();
553 rsst = stmt.executeQuery(sql);
554 // System.out.println("rank : res : total t : fraction p : log2(p) : -p*log2(p)");
555 int rank = 0;
556 boolean seenCentRes = false;
557 lastAUC = 0.0;
558 lastRank = 0;
559 lastFreq = 0.0;
560 lastavgk = 0.0;
561 lastdevk = 0.0;
562 while (rsst.next()) {
563 rank ++;
564 res = rsst.getString(1); // 1st column -- res
565 p = rsst.getDouble(3); // 3rd: fraction p
566 // System.out.print(rank+ " : " + res+" : "+num+ " : " + p);
567 logp = Math.log(p)/Math.log(2.0); // to basis 2 for info in bits
568 // System.out.print(" : " + logp);
569 plogp = -1.0 * p * logp;
570 // System.out.print(" : " + plogp);
571 plogpsum += plogp;
572 psum += p;
573
574 if (res.equals(centRes)) {
575 // System.out.print(" <==" + centRes);
576 seenCentRes = true;
577 lastFreq = p;
578 lastRank = rank;
579 lastavgk = rsst.getDouble(4);
580 lastdevk = rsst.getDouble(5);
581 }
582 if (seenCentRes) lastAUC += p;
583 // System.out.println("");
584 }
585 // System.out.println("Sum :"+lastTotal+" : "+psum+" : "+plogpsum);
586 rsst.close();
587 stmt.close();
588 lastEntropy = plogpsum;
589 if (lastRank==0) lastRank = maxRank;
590 } catch (SQLException e) {
591 e.printStackTrace();
592 System.err.println("SQLException: " + e.getMessage());
593 System.err.println("SQLState: " + e.getSQLState());
594 }
595
596 } // end of getEntropy
597
598 } // end class