1 |
lappe |
148 |
import tools.MySQLConnection; |
2 |
|
|
|
3 |
|
|
import java.sql.SQLException; |
4 |
|
|
import java.sql.Statement; |
5 |
|
|
import java.sql.ResultSet; |
6 |
|
|
|
7 |
|
|
public class listInfoGain { |
8 |
|
|
|
9 |
|
|
/** |
10 |
|
|
* "Hello World" for entropy calculations given a nbhoodstring |
11 |
|
|
* |
12 |
|
|
* @author lappe |
13 |
|
|
*/ |
14 |
|
|
|
15 |
|
|
static String user = "lappe" ; // change user name!! |
16 |
|
|
static MySQLConnection conn; |
17 |
lappe |
167 |
static double orgFreq, lastFreq, orgAUC, lastAUC; |
18 |
|
|
static int orgRank, lastRank; |
19 |
lappe |
148 |
|
20 |
|
|
public static void main(String[] args) { |
21 |
lappe |
167 |
double entropy = 0.0, newentropy=0.0, gain; |
22 |
|
|
String nbhood = "", front, middle, tail, newhood="", central ="I"; |
23 |
|
|
if (args.length<2){ |
24 |
|
|
System.err.println("The starting neighborhood-string and central residue type needs to be given .... i.e. %K%D%L%I%x%D%C% I"); |
25 |
lappe |
148 |
System.exit(1); |
26 |
|
|
} |
27 |
|
|
nbhood = args[0]; |
28 |
lappe |
167 |
central = args[1]; |
29 |
lappe |
148 |
|
30 |
|
|
conn = new MySQLConnection("white",user,"nieve","pdb_reps_graph_4_2"); |
31 |
|
|
int l = (int)((nbhood.length()-1)/2); |
32 |
|
|
int N = 1; |
33 |
|
|
System.out.println("ListInfoGain"); |
34 |
lappe |
151 |
System.out.print("0 - (%x%)("+l+") "); |
35 |
lappe |
167 |
entropy = getEntropy( "%x%", central); |
36 |
|
|
System.out.print( entropy + " bits."); |
37 |
|
|
// System.out.println("Symbols in nbhood :"+l); |
38 |
|
|
orgFreq = lastFreq; |
39 |
|
|
orgAUC = lastAUC; |
40 |
|
|
orgRank = lastRank; |
41 |
|
|
System.out.println("central Residue "+central+ " rank#"+lastRank+" p="+lastFreq+" AUC="+String.format("%.5f",lastAUC)); |
42 |
|
|
// System.out.println("Symbols in nbhood :"+l); |
43 |
lappe |
148 |
|
44 |
|
|
for (int i = 0; i<l; i++) { |
45 |
|
|
front = nbhood.substring(0,i*2); |
46 |
|
|
middle = nbhood.substring(i*2, i*2+2); |
47 |
|
|
tail = nbhood.substring(i*2+2); |
48 |
|
|
System.out.print((i+1)+" - "+front+"("+middle+")"+tail); |
49 |
|
|
|
50 |
|
|
if (middle.equals("%x")) { // switch from N to C of X |
51 |
|
|
N = -1; |
52 |
lappe |
151 |
newhood = "%x%"; |
53 |
lappe |
148 |
} else { |
54 |
|
|
if (N < 0) // we are in the C-terminal section |
55 |
|
|
newhood = "%x"+middle+"%"; |
56 |
|
|
else // N terminal (before X) |
57 |
|
|
newhood = middle+"%x%"; |
58 |
|
|
} // end if |
59 |
lappe |
151 |
System.out.print( " -> " + newhood); |
60 |
lappe |
167 |
newentropy = getEntropy( newhood, central); |
61 |
lappe |
148 |
gain = newentropy-entropy; |
62 |
lappe |
167 |
System.out.print( " : "+String.format("%.5f", newentropy) +"bits ("+String.format("%.5f", gain)+")"); |
63 |
|
|
System.out.print( " : #"+lastRank +" ("+(lastRank-orgRank)+")"); |
64 |
|
|
System.out.print( " : "+String.format("%.5f", lastFreq) +"("+String.format("%.5f", (lastFreq-orgFreq))+")"); |
65 |
|
|
System.out.print( " : "+String.format("%.5f", lastAUC) +"("+String.format("%.5f", (lastAUC-orgAUC))+")"); |
66 |
lappe |
148 |
|
67 |
lappe |
167 |
System.out.println( "."); |
68 |
|
|
} // next symbol in nbhood; |
69 |
lappe |
148 |
|
70 |
|
|
System.out.println("fin."); |
71 |
|
|
} |
72 |
|
|
|
73 |
|
|
|
74 |
|
|
|
75 |
lappe |
167 |
public static double getEntropy( String nbs, String centRes) { |
76 |
lappe |
148 |
int total = 0, num; |
77 |
|
|
String sql, res; |
78 |
|
|
Statement stmt; |
79 |
|
|
ResultSet rsst; |
80 |
|
|
double p, psum=0.0, logp, plogp, plogpsum=0.0; |
81 |
|
|
|
82 |
|
|
try { |
83 |
|
|
sql = "select count(*) from single_model_node where n like '"+nbs+"';"; |
84 |
|
|
// System.out.println( sql); |
85 |
|
|
stmt = conn.createStatement(); |
86 |
|
|
rsst = stmt.executeQuery(sql); |
87 |
|
|
if (rsst.next()) total = rsst.getInt( 1); |
88 |
|
|
rsst.close(); |
89 |
|
|
stmt.close(); |
90 |
|
|
|
91 |
lappe |
167 |
sql = "select res, count(*) as t, count(*)/"+total+" as p from single_model_node where n like '"+nbs+"' group by res order by p DESC;"; |
92 |
lappe |
148 |
stmt = conn.createStatement(); |
93 |
|
|
rsst = stmt.executeQuery(sql); |
94 |
lappe |
167 |
// System.out.println("rank : res : total t : fraction p : log2(p) : -p*log2(p)"); |
95 |
|
|
int rank = 0; |
96 |
|
|
boolean seenCentRes = false; |
97 |
|
|
lastAUC= 0; |
98 |
|
|
while (rsst.next()) { |
99 |
|
|
rank ++; |
100 |
lappe |
148 |
res = rsst.getString(1); // 1st column -- res |
101 |
|
|
num = rsst.getInt(2); // 2nd column -- num |
102 |
|
|
p = rsst.getDouble(3); // 3rd: fraction p |
103 |
lappe |
167 |
// System.out.print(rank+ " : " + res+" : "+num+ " : " + p); |
104 |
lappe |
148 |
logp = Math.log(p)/Math.log(2.0); // to basis 2 for info in bits |
105 |
|
|
// System.out.print(" : " + logp); |
106 |
|
|
plogp = -1.0 * p * logp; |
107 |
|
|
// System.out.print(" : " + plogp); |
108 |
|
|
plogpsum += plogp; |
109 |
|
|
psum += p; |
110 |
lappe |
167 |
|
111 |
|
|
if (res.equals(centRes)) { |
112 |
|
|
// System.out.print(" <==" + centRes); |
113 |
|
|
seenCentRes = true; |
114 |
|
|
lastFreq = p; |
115 |
|
|
lastRank = rank; |
116 |
|
|
} |
117 |
|
|
if (seenCentRes) lastAUC += p; |
118 |
lappe |
148 |
// System.out.println(""); |
119 |
|
|
} |
120 |
|
|
// System.out.println("Sum :"+total+" : "+psum+" : "+plogpsum); |
121 |
|
|
rsst.close(); |
122 |
|
|
stmt.close(); |
123 |
|
|
|
124 |
|
|
} catch (SQLException e) { |
125 |
|
|
e.printStackTrace(); |
126 |
|
|
System.err.println("SQLException: " + e.getMessage()); |
127 |
|
|
System.err.println("SQLState: " + e.getSQLState()); |
128 |
|
|
} |
129 |
|
|
return plogpsum; |
130 |
|
|
} // end of getEntropy |
131 |
|
|
|
132 |
|
|
} |