1 |
lappe |
169 |
import tools.MySQLConnection; |
2 |
|
|
|
3 |
|
|
import java.sql.SQLException; |
4 |
|
|
import java.sql.Statement; |
5 |
|
|
import java.sql.ResultSet; |
6 |
|
|
|
7 |
|
|
public class maxInfoGain { |
8 |
|
|
|
9 |
|
|
/** |
10 |
|
|
* "Hello World" for entropy calculations given a nbhoodstring |
11 |
|
|
* |
12 |
|
|
* @author lappe |
13 |
|
|
*/ |
14 |
|
|
|
15 |
|
|
static String user = "lappe" ; // change user name!! |
16 |
|
|
static MySQLConnection conn; |
17 |
|
|
|
18 |
duarte |
202 |
public static void main(String[] args) throws SQLException { |
19 |
lappe |
169 |
double entropy = 0.0, newentropy=0.0, gain, maxgain=0.0; |
20 |
|
|
String nbhood = "", front, middle, tail, newhood=""; |
21 |
|
|
if (args.length<1){ |
22 |
|
|
System.err.println("The starting neighborhood-string needs to be given .... i.e. %K%D%L%I%x%D%C%"); |
23 |
|
|
System.exit(1); |
24 |
|
|
} |
25 |
|
|
nbhood = args[0]; |
26 |
|
|
|
27 |
|
|
conn = new MySQLConnection("white",user,"nieve","pdb_reps_graph_4_2"); |
28 |
|
|
int l = (int)((nbhood.length()-1)/2); |
29 |
|
|
|
30 |
|
|
while (l>=2) { |
31 |
|
|
System.out.println("MaxInfoGain"); |
32 |
|
|
System.out.print("0 - ("+nbhood+")("+l+") "); |
33 |
|
|
entropy = getEntropy( nbhood); |
34 |
|
|
System.out.println( entropy + " bits."); |
35 |
|
|
System.out.println("Symbols in nbhood :"+l); |
36 |
|
|
maxgain = 0.0; |
37 |
|
|
for (int i = 0; i<l; i++) { |
38 |
|
|
front = nbhood.substring(0,i*2); |
39 |
|
|
middle = nbhood.substring(i*2, i*2+2); |
40 |
|
|
tail = nbhood.substring(i*2+2); |
41 |
|
|
System.out.print((i+1)+" - "+front+"("+middle+")"+tail); |
42 |
|
|
newentropy = getEntropy( (front+tail)); |
43 |
|
|
gain = newentropy-entropy; |
44 |
|
|
System.out.print( " : "+newentropy +"bits, gain="+gain); |
45 |
|
|
if (gain>maxgain && !(middle.equals("%x"))) { |
46 |
|
|
maxgain = gain; |
47 |
|
|
newhood = front+tail; |
48 |
|
|
System.out.println(" <<-"); |
49 |
|
|
} else { |
50 |
|
|
System.out.println(" ."); |
51 |
|
|
} |
52 |
|
|
} // next symbol in nbhood; |
53 |
|
|
if (newhood.equals(nbhood)) { // in case we got stcuk with the same string |
54 |
|
|
l=0; // - exit loop |
55 |
|
|
} else { // continue with new nbhoodstring |
56 |
|
|
nbhood = newhood; |
57 |
|
|
l = (int)((nbhood.length()-1)/2); |
58 |
|
|
} |
59 |
|
|
System.out.println("-->> "+nbhood+"("+l+")"); |
60 |
|
|
} // |
61 |
|
|
System.out.println("fin."); |
62 |
|
|
} |
63 |
|
|
|
64 |
|
|
|
65 |
|
|
|
66 |
|
|
public static double getEntropy( String nbs) { |
67 |
duarte |
202 |
int total = 0; |
68 |
|
|
String sql; |
69 |
lappe |
169 |
Statement stmt; |
70 |
|
|
ResultSet rsst; |
71 |
|
|
double p, psum=0.0, logp, plogp, plogpsum=0.0; |
72 |
|
|
|
73 |
|
|
try { |
74 |
|
|
sql = "select count(*) from single_model_node where n like '"+nbs+"';"; |
75 |
|
|
// System.out.println( sql); |
76 |
|
|
stmt = conn.createStatement(); |
77 |
|
|
rsst = stmt.executeQuery(sql); |
78 |
|
|
if (rsst.next()) total = rsst.getInt( 1); |
79 |
|
|
rsst.close(); |
80 |
|
|
stmt.close(); |
81 |
|
|
|
82 |
|
|
sql = "select res, count(*) as t, count(*)/"+total+" as p from single_model_node where n like '"+nbs+"' group by res order by res;"; |
83 |
|
|
stmt = conn.createStatement(); |
84 |
|
|
rsst = stmt.executeQuery(sql); |
85 |
|
|
// System.out.println("res : total t : fraction p : log2(p) : -p*log2(p)"); |
86 |
|
|
while (rsst.next()) { |
87 |
duarte |
202 |
//res = rsst.getString(1); // 1st column -- res |
88 |
|
|
//num = rsst.getInt(2); // 2nd column -- num |
89 |
lappe |
169 |
p = rsst.getDouble(3); // 3rd: fraction p |
90 |
|
|
// System.out.print(res+" : "+num+ " : " + p); |
91 |
|
|
logp = Math.log(p)/Math.log(2.0); // to basis 2 for info in bits |
92 |
|
|
// System.out.print(" : " + logp); |
93 |
|
|
plogp = -1.0 * p * logp; |
94 |
|
|
// System.out.print(" : " + plogp); |
95 |
|
|
plogpsum += plogp; |
96 |
|
|
psum += p; |
97 |
|
|
// System.out.println(""); |
98 |
|
|
} |
99 |
|
|
// System.out.println("Sum :"+total+" : "+psum+" : "+plogpsum); |
100 |
|
|
rsst.close(); |
101 |
|
|
stmt.close(); |
102 |
|
|
|
103 |
|
|
} catch (SQLException e) { |
104 |
|
|
e.printStackTrace(); |
105 |
|
|
System.err.println("SQLException: " + e.getMessage()); |
106 |
|
|
System.err.println("SQLState: " + e.getSQLState()); |
107 |
|
|
} |
108 |
|
|
return plogpsum; |
109 |
|
|
} // end of getEntropy |
110 |
|
|
|
111 |
|
|
} |