1 |
import tools.MySQLConnection; |
2 |
|
3 |
import java.sql.SQLException; |
4 |
import java.sql.Statement; |
5 |
import java.sql.ResultSet; |
6 |
|
7 |
public class minInfoGain { |
8 |
|
9 |
/** |
10 |
* "Hello World" for entropy calculations given a nbhoodstring |
11 |
* |
12 |
* @author lappe |
13 |
*/ |
14 |
|
15 |
static String user = "lappe" ; // change user name!! |
16 |
static MySQLConnection conn; |
17 |
|
18 |
public static void main(String[] args) throws SQLException { |
19 |
double entropy = 0.0, newentropy=0.0, gain, mingain=99999999.9; |
20 |
String nbhood = "", front, middle, tail, newhood=""; |
21 |
if (args.length<1){ |
22 |
System.err.println("The starting neighborhood-string needs to be given .... i.e. %K%D%L%I%x%D%C%"); |
23 |
System.exit(1); |
24 |
} |
25 |
nbhood = args[0]; |
26 |
|
27 |
conn = new MySQLConnection("white",user,"nieve","pdb_reps_graph_4_2"); |
28 |
int l = (int)((nbhood.length()-1)/2); |
29 |
|
30 |
while (l>=2) { |
31 |
System.out.println("MinInfoGain"); |
32 |
System.out.print("0 - ("+nbhood+")("+l+") "); |
33 |
entropy = getEntropy( nbhood); |
34 |
System.out.println( entropy + " bits."); |
35 |
System.out.println("Symbols in nbhood :"+l); |
36 |
mingain = 99999999.9; |
37 |
for (int i = 0; i<l; i++) { |
38 |
front = nbhood.substring(0,i*2); |
39 |
middle = nbhood.substring(i*2, i*2+2); |
40 |
tail = nbhood.substring(i*2+2); |
41 |
System.out.print((i+1)+" - "+front+"("+middle+")"+tail); |
42 |
newentropy = getEntropy( (front+tail)); |
43 |
gain = newentropy-entropy; |
44 |
System.out.print( " : "+newentropy +"bits, gain="+gain); |
45 |
if (gain<mingain && !(middle.equals("%x"))) { |
46 |
mingain = gain; |
47 |
newhood = front+tail; |
48 |
System.out.println(" <<-"); |
49 |
} else { |
50 |
System.out.println(" ."); |
51 |
} |
52 |
} // next symbol in nbhood; |
53 |
|
54 |
if (newhood.equals(nbhood)) { // in case we got stcuk with the same string |
55 |
l=0; // - exit loop |
56 |
} else { // continue with new nbhoodstring |
57 |
nbhood = newhood; |
58 |
l = (int)((nbhood.length()-1)/2); |
59 |
} |
60 |
System.out.println("-->> "+nbhood+"("+l+")"); |
61 |
|
62 |
} // |
63 |
System.out.println("fin."); |
64 |
} |
65 |
|
66 |
|
67 |
|
68 |
public static double getEntropy( String nbs) { |
69 |
int total = 0; |
70 |
String sql; |
71 |
Statement stmt; |
72 |
ResultSet rsst; |
73 |
double p, psum=0.0, logp, plogp, plogpsum=0.0; |
74 |
|
75 |
try { |
76 |
sql = "select count(*) from single_model_node where n like '"+nbs+"';"; |
77 |
// System.out.println( sql); |
78 |
stmt = conn.createStatement(); |
79 |
rsst = stmt.executeQuery(sql); |
80 |
if (rsst.next()) total = rsst.getInt( 1); |
81 |
rsst.close(); |
82 |
stmt.close(); |
83 |
|
84 |
sql = "select res, count(*) as t, count(*)/"+total+" as p from single_model_node where n like '"+nbs+"' group by res order by res;"; |
85 |
stmt = conn.createStatement(); |
86 |
rsst = stmt.executeQuery(sql); |
87 |
// System.out.println("res : total t : fraction p : log2(p) : -p*log2(p)"); |
88 |
while (rsst.next()) { |
89 |
//res = rsst.getString(1); // 1st column -- res |
90 |
//num = rsst.getInt(2); // 2nd column -- num |
91 |
p = rsst.getDouble(3); // 3rd: fraction p |
92 |
// System.out.print(res+" : "+num+ " : " + p); |
93 |
logp = Math.log(p)/Math.log(2.0); // to basis 2 for info in bits |
94 |
// System.out.print(" : " + logp); |
95 |
plogp = -1.0 * p * logp; |
96 |
// System.out.print(" : " + plogp); |
97 |
plogpsum += plogp; |
98 |
psum += p; |
99 |
// System.out.println(""); |
100 |
} |
101 |
// System.out.println("Sum :"+total+" : "+psum+" : "+plogpsum); |
102 |
rsst.close(); |
103 |
stmt.close(); |
104 |
|
105 |
} catch (SQLException e) { |
106 |
e.printStackTrace(); |
107 |
System.err.println("SQLException: " + e.getMessage()); |
108 |
System.err.println("SQLState: " + e.getSQLState()); |
109 |
} |
110 |
return plogpsum; |
111 |
} // end of getEntropy |
112 |
|
113 |
} |