ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/cnCoarseNbhood.java
Revision: 492
Committed: Wed Jan 2 13:18:57 2008 UTC (16 years, 8 months ago) by duarte
File size: 23007 byte(s)
Log Message:
Copied the aglappe-jung branch into trunk.

Line User Rev File contents
1 lappe 272 import tools.MySQLConnection;
2    
3     import java.sql.SQLException;
4     import java.sql.Statement;
5     import java.sql.ResultSet;
6    
7     public class cnCoarseNbhood {
8    
9     /**
10     * CN2 size x sumdelta
11     * iterating over all steps by exchange of one neighbor at a time by a common neighbor
12     * with subsequent scoring
13     * @author lappe
14     */
15     static int maxRank = 21; // value to replace for non-existence of central redue in the resultvector (rank=0)
16     // higher values should penalize non-existence more
17     static int VL=2; // Verbosity Level : 0=silent ... 3=very talkative
18     static String user = "lappe" ; // change user name!!
19     static MySQLConnection conn;
20     static String backgrndDB = "pdb_reps_graph_4_2";
21     static String targetNodes = "target_node";
22     static String targetEdges = "target_edge";
23     static double lastEntropy=0.0, lastFreq, lastAUC, lastavgk, lastdevk;
24     static int lastRank, lastTotal, lastxcn=0;
25     static int graphid=0, resnr=0;
26     static int size1=0, size2=0; // the dimensions of the matrices = |shell1|x|shell2| = nr of direct(1) x indirect(2) nbs
27     static String rsideline[], rheadline[]={" -->\t(-)1st shell\t0"," |\t \t-"," |\t(+) \t0"," V\t2nd shell\t-", " \t \tX"};
28     static String sideline[], headline[]={"\t\t0","\t\t-","\t\t0","\t\t-","\t\tX"};
29     static String restype="?", ressec="?", newnbhood="";
30     static int cn1[], cn2[];
31     static int sumdelta[][], rank[][], total[][], cnsize[][], cnall[][];
32     static double entropy[][], freq[][], AUC[][];
33     static String newnbs[][], nbstring[][], moveset[][];
34    
35     static int printRank=1,
36     printTotal = 2,
37     printEntropy = 3,
38     printFreq = 4,
39     printAUC = 5,
40     printNbstring= 6,
41     printMoveset = 7,
42     printdeltaRank=8,
43     printCNSize =9,
44     printCNSxdelta=10;
45    
46     public static void main(String[] args) {
47    
48     if (args.length<2){
49     System.err.println("Coarse NBHood : graph_id and residue-nr. need to be given .... i.e. 7 110");
50     System.exit(1);
51     }
52     graphid = Integer.parseInt( args[0]);
53     resnr = Integer.parseInt( args[1]);
54     int j_num=0, oj_num=0, oj_shell, oj_cnsize, i, j, x, oi, oj, o1, o2, ixnum, jxnum, score=0;
55     boolean overx = false;
56     String sql, oj_res, oj_sec;
57     Statement mstmt, mjst, nstmt;
58     ResultSet mrsst, nrsst;
59    
60     try {
61     conn = new MySQLConnection("white",user,"nieve", backgrndDB); // the UPPERCASE DB!
62     System.out.println("Coarse Scoring Target neighborhoods v.0.5. ");
63    
64     sql = "select num, res, sstype from "+targetNodes+" where graph_id="+graphid+" and num="+resnr+";";
65     mstmt = conn.createStatement();
66     mrsst = mstmt.executeQuery(sql);
67     if (mrsst.next()) {
68     // this is the central node -> get type and secondary structure
69     restype = mrsst.getString( 2).toUpperCase();
70     ressec = mrsst.getString( 3).toUpperCase();
71     } // end if central residue
72     mrsst.close();
73     mstmt.close();
74     System.out.println("GraphID "+graphid+" Central residue is "+restype+":"+resnr+":"+ressec);
75    
76     // retrieve the original nbhood into orig_shell
77     System.out.println("retrieving original first shell ... ");
78     mstmt = conn.createStatement();
79     mstmt.executeUpdate("drop table if exists orig_shell;");
80     mstmt.close();
81    
82     mstmt = conn.createStatement();
83     mstmt.executeUpdate("create table orig_shell as " +
84     "select graph_id,i_node_id,i_cid,i_num,i_res,i_sstype,j_node_id,j_cid,j_num,j_res,j_sstype,BB,SC, 1 as shell " +
85     "from "+targetEdges+
86     " where graph_id="+graphid+" and i_num="+resnr+";");
87     mstmt.close();
88    
89     System.out.println("adding the original 2nd shell ...");
90     sql = "select j_cid, j_num, j_res, j_sstype, BB, SC from orig_shell where shell=1 and graph_id="+graphid+" and i_num="+resnr+";";
91     mstmt = conn.createStatement();
92     mrsst = mstmt.executeQuery(sql);
93     i=0;
94     while (mrsst.next()) {
95     i++;
96     oj_num = mrsst.getInt(2);
97     System.out.print(i+":"+mrsst.getString(1));
98     System.out.print("\t"+oj_num);
99     System.out.print("\t"+mrsst.getString(3));
100     System.out.print("\t"+mrsst.getString(4));
101     System.out.print("\t"+mrsst.getInt(5));
102     System.out.print("\t"+mrsst.getInt(6));
103     System.out.println();
104     mjst = conn.createStatement();
105     sql = "insert into orig_shell select graph_id,i_node_id,i_cid,i_num,i_res,i_sstype,j_node_id,j_cid,j_num,j_res,j_sstype,BB,SC, 2 as shell " +
106     "from "+targetEdges+" " +
107     "where graph_id="+graphid+" and i_num="+oj_num+";";
108     // System.out.println(">"+sql);
109     mjst.executeUpdate( sql);
110     mjst.close();
111     } // end while
112     mrsst.close();
113     mstmt.close();
114    
115     System.out.println("gathering all direct and indirect neighbours.");
116     sql = "select j_num, j_res, j_sstype, min(shell) as shell, count(*) as cn " +
117     "from orig_shell where j_num!="+resnr+" group by j_num order by j_num;";
118     mstmt = conn.createStatement();
119     mrsst = mstmt.executeQuery(sql);
120     o1=0;
121     o2=0;
122     while (mrsst.next()) {
123     if ( mrsst.getInt( 4)==1) { // count 1st shell entry
124     o1++;
125     System.out.print("1#"+o1);
126     rheadline[0]+="\t"+o1;
127     rheadline[1]+="\t"+mrsst.getString(2); // res
128     rheadline[2]+="\t"+mrsst.getInt(1); // resnum
129     rheadline[3]+="\t"+mrsst.getString(3); // SStype
130     rheadline[4]+="\t("+mrsst.getInt(5)+")"; // CN
131     } // end if 2st shell
132     if ( mrsst.getInt( 4)==2) { // count 2nd shell entry
133     o2++;
134     System.out.print("2#"+o2);
135     } // end if 2nd shell
136     System.out.println(" :\t"+mrsst.getInt( 1)+"\t"+mrsst.getString( 2)+"\t"+mrsst.getString( 3)+"\t"+mrsst.getInt( 4)+"\t"+mrsst.getInt( 5));
137     } // end while
138     System.out.println("Orig.SIZE 1st shell "+o1);
139     System.out.println("Orig.SIZE 2nd shell "+o2);
140     rheadline[4] = rheadline[4].replace("X", ("("+o1)+")");
141     rsideline = new String[o2+1];
142     rsideline[0]="+0\tRnum:S(cn)";
143     sumdelta = new int[(o1+1)][(o2+1)];
144     newnbs = new String[(o1+1)][(o2+1)];
145     cnall = new int[(o1+1)][(o2+1)];
146     // creating the perturbed version of shell 1 into temp_shell
147     for (j=0; j<=o2; j++) { // <=o2 outer loop through all originally indirect contacts
148    
149     for (i=0; i<=o1; i++) { // inner loop through all originally direct contacts
150     if (VL>=1) {
151     System.out.println("---------------------------------------------");
152     System.out.println("Creating perturbed nbhood ("+i+","+j+")\t");
153     }
154     // clear first
155     nstmt = conn.createStatement();
156     nstmt.executeUpdate("drop table if exists temp_shell;");
157     nstmt.close();
158     nstmt = conn.createStatement();
159     nstmt.executeUpdate("create table temp_shell select * from orig_shell limit 0;");
160     nstmt.close();
161     oi = 0;
162     oj = 0;
163     mrsst.beforeFirst();
164     newnbhood="";
165     overx = false;
166     ixnum=0;
167     jxnum=0;
168     while (mrsst.next()) {
169     oj_num = mrsst.getInt( 1);
170     oj_res = mrsst.getString(2);
171     oj_sec = mrsst.getString(3);
172     oj_shell = mrsst.getInt( 4);
173     oj_cnsize = mrsst.getInt( 5);
174     if (oj_num>resnr) { // we are over x
175     if (!overx) {
176     newnbhood+="x";
177     overx=true;
178     } // end if over x
179     } // END IF J > X
180     if (oj_shell==1) { // a direct 1st shell neighbour
181     oi++;
182     if (oi!=i) {// if this is NOT the one direct nb 2B dropped
183     // include as 1st shell nbor into temp_shell
184     nstmt = conn.createStatement();
185     sql = "insert into temp_shell values("+resnr+",\'"+restype+"\',"+oj_num+",\'"+oj_res+"\',\'"+oj_sec+"\', 1);";
186     // System.out.println("oi>"+ sql);
187     nstmt.executeUpdate(sql);
188     nstmt.close();
189     newnbhood+=oj_res;
190     } else {
191     ixnum=oj_num;
192     } // end if ni!=i
193     } else { // 2nd shell neighbour
194     oj++;
195     if (oj==j) { // this is the 2nd shell nb 2B included
196     // put as new 1st shell nbor
197     nstmt = conn.createStatement();
198     sql = "insert into temp_shell values("+resnr+",\'"+restype+"\',"+oj_num+",\'"+oj_res+"\',\'"+oj_sec+"\', 1);";
199     // System.out.println("oj>"+ sql);
200     nstmt.executeUpdate(sql);
201     nstmt.close();
202     newnbhood+=oj_res;
203     jxnum=oj_num;
204     } // end if
205     if (j==0) { // creating the sideline ruler array for the output
206     rsideline[oj] = "+"+oj+"\t"+oj_res+""+oj_num+":"+oj_sec+"("+oj_cnsize+")";
207     } // end if j==0
208     } // end if 1st/2nd shell
209    
210     } // end while through the entire nbhood
211     if (!overx) { // we haven't seen a nb > x yet
212     newnbhood+="x"; // x sits at the end of the nbhoodstring
213     overx=true;
214     } // end if over x
215     // System.out.println("new direct nbhood "+newnbhood);
216     // Now the "updated" / perturbed version of shell 1 is in temp_shell
217     // we can build 2nd shell accordingly.
218     // System.out.println("building the 2nd shell");
219     sql = "select j_num, j_res, j_sstype from temp_shell where shell=1;";
220     nstmt = conn.createStatement();
221     nrsst = nstmt.executeQuery(sql);
222     x = 0;
223     while (nrsst.next()) {
224     x++;
225     j_num = nrsst.getInt( 1);
226     // System.out.println(x+":"+nrsst.getString( 2)+" "+j_num+" "+nrsst.getString( 3));
227     mjst = conn.createStatement();
228     sql = "insert into temp_shell select i_num, i_res, j_num, j_res, j_sstype, 2 as shell from "+targetEdges+" where graph_id="+graphid+" and i_num="+j_num+";";
229     // System.out.println(">"+sql);
230     mjst.executeUpdate( sql);
231     mjst.close();
232     } // end while
233     nrsst.close();
234     nstmt.close();
235     // and score this move
236     lastxcn=0;
237     newnbs[i][j]= newnbhood;
238     System.out.print("\n["+i+"]["+j+"]\t-"+ixnum+"/+"+jxnum+"\t"+newnbs[i][j]+"\t ");
239     score = scoreCurrentNbhood( ixnum, jxnum);
240     sumdelta[i][j] = score;
241     cnall[i][j] = lastxcn;
242     if (VL>=1) {
243     reportMatrix( printCNSize );
244     reportMatrix( printdeltaRank );
245     System.out.println("SumDeltaRank Score = \t"+score);
246     System.out.println("CN1 x CN2 product = \t"+cnall[i][j]);
247     } else {
248     System.out.print("\t"+score+"*"+cnall[i][j]+"\t= "+(score*cnall[i][j]));
249     }
250     // reportMatrix( printNbstring );
251     } // next i
252     System.out.println("\t");
253     } // next j
254     // report total matrix sumdelta
255     // if (VL>=1) {
256     System.out.println("GraphID "+graphid+" Central residue is "+restype+":"+resnr+":"+ressec);
257     System.out.println("backgroundDB"+backgrndDB+" \t maxRank : "+maxRank);
258     reportResults( o1, o2, printRank);
259     reportResults( o1, o2, printCNSize);
260     //}
261     reportResults( o1, o2, printCNSxdelta );
262     // Cleanup ...
263     mrsst.close();
264     mstmt.close();
265    
266     } catch (SQLException e) {
267     e.printStackTrace();
268     System.err.println("SQLException: " + e.getMessage());
269     System.err.println("SQLState: " + e.getSQLState());
270     } // end try/catch
271     System.out.println("fin.");
272     } // end main
273    
274    
275    
276     public static void reportResults( int o1, int o2, int what2print) {
277     System.out.println("Printing "+what2print);
278     if (what2print==printRank) System.out.println("Overall resulting SumDeltaRank Matrix" );
279     if (what2print==printNbstring) System.out.println("Overall tested new nbhoodStrings" );
280     if (what2print==printCNSize) System.out.println("Overall cnsize" );
281     if (what2print==printCNSxdelta) System.out.println("Overall (cnsize*sumDeltaRank)" );
282     /*if (what2print==printTotal) System.out.print("total[i][j]" );
283     if (what2print==printEntropy) System.out.print("entropy[i][j]" );
284     if (what2print==printFreq ) System.out.print("freq[i][j]" );
285     if (what2print==printAUC) System.out.print("AUC[i][j]" );
286    
287     if (what2print==printMoveset) System.out.print("moveset[i][j]" );
288     if (what2print==printdeltaRank) System.out.print("rank[i][j]-rank[0][0]" );) */
289    
290     // print headerline(s)
291     System.out.println(rheadline[0]);
292     System.out.println(rheadline[1]);
293     System.out.println(rheadline[2]);
294     System.out.println(rheadline[3]);
295     System.out.println(rheadline[4]);
296     for (int j=0; j<=o2; j++) {
297     // print rsideline
298     System.out.print( rsideline[j]+"\t");
299     for ( int i=0; i<=o1; i++) {
300     if (what2print==printRank) System.out.print( sumdelta[i][j] );
301     if (what2print==printNbstring) System.out.print( newnbs[i][j] );
302     if (what2print==printCNSize) System.out.print( cnall[i][j] );
303     if (what2print==printCNSxdelta) System.out.print( sumdelta[i][j]*cnall[i][j] );
304     System.out.print("\t");
305     } // next i
306     System.out.println("");
307     } // next j
308     } // end of reportResults
309    
310    
311    
312     public static int scoreCurrentNbhood( int ixnum, int jxnum) {
313     int ixcn=0, jxcn=0, n1=0, n2=0, ni, nj, i, j, j_num, j_shell, j_cnsize, sumdeltarank=0;
314     String sql, j_res, j_sec, nbs, mymove, precol;
315     boolean overx = false;
316     Statement stmt;
317     ResultSet rsst;
318    
319     try {
320     headline[0]="\t\t\t0";
321     headline[1]="\t\t\t-";
322     headline[2]="\t\t\t0";
323     headline[3]="\t\t\t-";
324     headline[4]="\t\t\tX";
325    
326     // System.out.println("retrieving the entire nbhood (1st and 2nd shell)");
327     sql = "select j_num, j_res, j_sstype, min(shell) as shell, count(*) as cn from temp_shell where j_num!="+resnr+" group by j_num order by j_num;";
328     stmt = conn.createStatement();
329     rsst = stmt.executeQuery(sql);
330     // counting shell2
331     n2=0;
332     while (rsst.next()) {
333     if ( rsst.getInt( 4)==1) { // count 1st shell entry
334     n1++;
335     // System.out.print("1#"+n1);
336     headline[0]+="\t"+n1;
337     headline[1]+="\t"+rsst.getString(2); // res
338     headline[2]+="\t"+rsst.getInt(1); // resnum
339     headline[3]+="\t"+rsst.getString(3); // SStype
340     headline[4]+="\t("+rsst.getInt(5)+")"; // CNSize
341     } // end if 2st shell
342     if ( rsst.getInt( 4)==2) { // count 2nd shell entry
343     n2++;
344     // System.out.print("2#"+n2);
345     } // end if 2nd shell
346     // System.out.println(" :\t"+rsst.getInt( 1)+"\t"+rsst.getString( 2)+"\t"+rsst.getString( 3)+"\t"+rsst.getInt( 4)+"\t"+rsst.getInt( 5));
347     } // end while
348     size1 = n1;
349     size2 = n2;
350     ixcn = n1;
351     jxcn = n1;
352     if (VL>=1) {
353     System.out.println("|1st shell|="+size1+" \tx\t |2nd shell|="+size2);
354     }
355     headline[4]=headline[4].replace("X",("("+size1+")"));
356    
357     // n1 and n2 are known, initialise matrices accordingly.
358     // nbhood, move, rank, entropy, freq, AUC etc. (evtl.+ degree(?))
359     rank = new int[(n1+1)][(n2+1)];
360     rank[0][0]=maxRank;
361     total = new int[(n1+1)][(n2+1)];
362     entropy = new double[(n1+1)][(n2+1)];
363     freq = new double[(n1+1)][(n2+1)];
364     AUC = new double[(n1+1)][(n2+1)];
365     nbstring = new String[(n1+1)][(n2+1)];
366     moveset = new String[(n1+1)][(n2+1)];
367     sideline = new String[n2+1];
368     cn1 = new int[n1+1];
369     cn2 = new int[n2+1];
370     cnsize = new int[(n1+1)][(n2+1)];
371    
372     for (j=0; j<=n2; j++) { // outer loop through all indirect contacts
373     for (i=0; i<=n1; i++) { // inner loop through all direct contacts
374     mymove = "";
375     overx = false;
376     if (VL>=1) {
377     System.out.print("("+i+","+j+")\t");
378     }
379     ni = 0;
380     nj = 0;
381     sideline[0]="+0\tRnum:S("+n1+")";
382     cn1[0]=n1;
383     cn2[0]=n1;
384     nbs="%";
385     rsst.beforeFirst();
386     while (rsst.next()) {
387     j_num = rsst.getInt(1);
388     j_res = rsst.getString(2);
389     j_sec = rsst.getString(3);
390     j_shell = rsst.getInt(4);
391     j_cnsize = rsst.getInt(5);
392     if (j_num>resnr) { // we are over x
393     if (!overx) {
394     nbs+="x%";
395     overx=true;
396     } // end if over x
397     } // END IF J > X
398     if (j_shell==1) { // a direct 1st shell neighbour
399     ni++;
400     if (ni!=i) {// if this is NOT the one direct nb 2B dropped
401     nbs+=j_res.toUpperCase()+"%"; // it is included
402     if ( j_num==jxnum && j==0) { // This is the direct nb dropped
403     jxcn=j_cnsize;
404     if (VL>=2) System.out.print("(j"+jxnum+":"+jxcn+")");
405     }
406     } else { // this one IS dropped
407     mymove += "(-"+j_res+":"+j_num+":"+j_sec+"/"+j_cnsize+")";
408     cn1[ni]=j_cnsize;
409     } // end if ni!=i
410    
411     } else { // 2nd shell neighbour
412     nj++;
413     if (nj==j) { // this is the 2nd shell nb 2B included
414     nbs+=j_res.toUpperCase()+"%";
415     mymove += "(+"+j_res+":"+j_num+":"+j_sec+"/"+j_cnsize+")";
416     if ( j_num==ixnum && i==0) { // This is the dropped direct nb, no 2b found in 2ns shell
417     ixcn=j_cnsize;
418     if (VL>=2) System.out.print("(i"+ixnum+":"+ixcn+")");
419     }
420     } // end if
421    
422     // // only once for building the sidelines
423     if (j==0) {
424     sideline[nj] = "+"+nj+"\t"+j_res+""+j_num+":"+j_sec+"("+j_cnsize+")";
425     cn2[nj] = j_cnsize;
426     } // end if sideline
427     } // end if 1st/2nd shell
428    
429     } // end while through the entire nbhood
430     if (!overx) { // in case x is the very last we haven't seen it yet
431     nbs+="x%"; // add it in the end
432     overx=true;
433     } // end if over x
434     if (VL>=1) {
435     System.out.print("("+nbs+")\t");
436     }
437     nbstring[i][j] = nbs;
438     moveset[i][j] = mymove;
439     precol = nbstring[i][0];
440     getEntropy( nbs, restype, precol);
441     if (lastRank==0) lastRank = maxRank;
442     rank[i][j] = lastRank;
443     entropy[i][j] = lastEntropy;
444     freq[i][j] = lastFreq;
445     AUC[i][j] = lastAUC;
446     total[i][j]= lastTotal;
447     cnsize[i][j]=cn1[i]*cn2[j];
448     if (VL>=1) System.out.print(""+cnsize[i][j]+"\t");
449     if (lastRank > 0) {
450     sumdeltarank += ( (lastRank-rank[0][0]) );
451     // sumdeltarank += ( (lastRank-rank[0][0]) * (cnsize[i][j]) );
452     } else {
453     sumdeltarank += ( (maxRank-rank[0][0]) );
454     // sumdeltarank += ( (maxRank-rank[0][0]) * (cnsize[i][j]) );
455     } // end if lastRank was defined
456     } // close inner loop (i)
457     if (VL>=1) {
458     System.out.println(".");
459     } else {
460     System.out.print(".");
461     }
462     } // next outerloop (j)
463     lastxcn=(ixcn*jxcn);
464     if (VL>=1) {
465     System.out.println("lastxcn=(ixcn*jxcn)=("+ixcn+"*"+jxcn+")="+lastxcn);
466     }
467     rsst.close();
468     stmt.close();
469    
470     } catch (SQLException e) {
471     e.printStackTrace();
472     System.err.println("SQLException: " + e.getMessage());
473     System.err.println("SQLState: " + e.getSQLState());
474     } // end try/catch
475     // System.out.println("fin.");
476     return sumdeltarank;
477     } // end scoreCurrentNbhood
478    
479    
480     public static void reportMatrix( int what2print) {
481     System.out.println("\nPrinting "+what2print);
482     if (what2print==printRank) System.out.print("rank[i][j]" );
483     if (what2print==printTotal) System.out.print("total[i][j]" );
484     if (what2print==printEntropy) System.out.print("entropy[i][j]" );
485     if (what2print==printFreq ) System.out.print("freq[i][j]" );
486     if (what2print==printAUC) System.out.print("AUC[i][j]" );
487     if (what2print==printNbstring) System.out.print("nbstring[i][j]" );
488     if (what2print==printMoveset) System.out.print("moveset[i][j]" );
489     if (what2print==printdeltaRank) System.out.print("rank[i][j]-rank[0][0]" );
490     if (what2print==printCNSize) System.out.print("cnsize[i][j]" );
491     if (what2print==printCNSxdelta) System.out.print("cnsize[i][j]*(rank[i][j]-rank[0][0])" );
492    
493     System.out.println("...");
494     // print headerline(s)
495     System.out.println(headline[0]);
496     System.out.println(headline[1]);
497     System.out.println(headline[2]);
498     System.out.println(headline[3]);
499     System.out.println(headline[4]);
500     for (int j=0; j<=size2; j++) {
501     // print sideline
502     System.out.print( sideline[j]+"\t");
503     for ( int i=0; i<=size1; i++) {
504     if (what2print==printRank) System.out.print( rank[i][j] );
505     if (what2print==printTotal) System.out.print( total[i][j] );
506     if (what2print==printEntropy) System.out.print( entropy[i][j] );
507     if (what2print==printFreq ) System.out.print( freq[i][j] );
508     if (what2print==printAUC) System.out.print( AUC[i][j] );
509     if (what2print==printNbstring) System.out.print( nbstring[i][j] );
510     if (what2print==printMoveset) System.out.print( moveset[i][j] );
511     if (what2print==printdeltaRank) System.out.print( rank[i][j]-rank[0][0] );
512     if (what2print==printCNSize) System.out.print( cnsize[i][j] );
513     if (what2print==printCNSxdelta) System.out.print( cnsize[i][j]*(rank[i][j]-rank[0][0]) );
514     System.out.print("\t");
515     } // next i
516     System.out.println("");
517     } // next j
518     } // end of report
519    
520     public static void getEntropy( String nbs, String centRes, String predec) { // pass centSS as well ???
521     String sql, res, this_n, prec_n;
522     Statement stmt;
523     ResultSet rsst;
524     double p, psum=0.0, logp, plogp, plogpsum=0.0;
525     try {
526     // Hashing first row tables comes first
527     System.out.println("nbs : "+nbs);
528     System.out.println("predec: "+predec);
529     this_n = nbs.replace("%","_");
530     prec_n = predec.replace("%","_");
531     System.out.println("this_n: ["+this_n+"]");
532     System.out.println("prec_n: ["+prec_n+"]");
533     if (prec_n.equals(this_n)) {
534     // System.out.println("have to create db for this "+prec_n);
535     sql = "create table IF NOT EXISTS nbhashtables."+prec_n+" as select res, n, k from nbstrings where nwg like '%"+nbs+"%';";
536     if (VL>=2) System.out.println(" >> "+sql);
537     stmt = conn.createStatement();
538     stmt.executeUpdate( sql);
539     stmt.close();
540     } // else System.out.println("using preceding db of "+prec_n);
541    
542     // now we can safely derive the estimates from the hashtable
543     sql = "select count(*) from nbhashtables."+prec_n+" where n like '"+nbs+"';";
544     // System.out.println( sql);
545     stmt = conn.createStatement();
546     rsst = stmt.executeQuery(sql);
547     if (rsst.next()) lastTotal = rsst.getInt( 1);
548     rsst.close();
549     stmt.close();
550    
551     sql = "select res, count(*) as t, count(*)/"+lastTotal+" as p, avg( k), stddev( k) from nbhashtables."+prec_n+" where n like '"+nbs+"' group by res order by p DESC;";
552     stmt = conn.createStatement();
553     rsst = stmt.executeQuery(sql);
554     // System.out.println("rank : res : total t : fraction p : log2(p) : -p*log2(p)");
555     int rank = 0;
556     boolean seenCentRes = false;
557     lastAUC = 0.0;
558     lastRank = 0;
559     lastFreq = 0.0;
560     lastavgk = 0.0;
561     lastdevk = 0.0;
562     while (rsst.next()) {
563     rank ++;
564     res = rsst.getString(1); // 1st column -- res
565     p = rsst.getDouble(3); // 3rd: fraction p
566     // System.out.print(rank+ " : " + res+" : "+num+ " : " + p);
567     logp = Math.log(p)/Math.log(2.0); // to basis 2 for info in bits
568     // System.out.print(" : " + logp);
569     plogp = -1.0 * p * logp;
570     // System.out.print(" : " + plogp);
571     plogpsum += plogp;
572     psum += p;
573    
574     if (res.equals(centRes)) {
575     // System.out.print(" <==" + centRes);
576     seenCentRes = true;
577     lastFreq = p;
578     lastRank = rank;
579     lastavgk = rsst.getDouble(4);
580     lastdevk = rsst.getDouble(5);
581     }
582     if (seenCentRes) lastAUC += p;
583     // System.out.println("");
584     }
585     // System.out.println("Sum :"+lastTotal+" : "+psum+" : "+plogpsum);
586     rsst.close();
587     stmt.close();
588     lastEntropy = plogpsum;
589     if (lastRank==0) lastRank = maxRank;
590     } catch (SQLException e) {
591     e.printStackTrace();
592     System.err.println("SQLException: " + e.getMessage());
593     System.err.println("SQLState: " + e.getSQLState());
594     }
595    
596     } // end of getEntropy
597    
598     } // end class