1 |
import proteinstructure.*; |
2 |
import java.util.*; |
3 |
|
4 |
/** |
5 |
* Perform a connected-component clustering for contact graphs (like the one implemented in matlab) |
6 |
* @author stehr |
7 |
* |
8 |
*/ |
9 |
public class testGraphClustering { |
10 |
|
11 |
// TODO: This whole thing does currently not work |
12 |
|
13 |
HashMap<Edge,Integer> e2c; |
14 |
TreeSet<Integer> clusters; |
15 |
HashMap<Integer,EdgeSet> members; |
16 |
EdgeSet edges; |
17 |
int seqLen; |
18 |
|
19 |
/* constructor */ |
20 |
public testGraphClustering() { |
21 |
try { |
22 |
e2c = new HashMap<Edge,Integer>(); // map from edge to cluster |
23 |
clusters = new TreeSet<Integer>(); // set of all non-empty clusters |
24 |
members = new HashMap<Integer,EdgeSet>(); // cluster members |
25 |
|
26 |
Pdb pdb = new PdbasePdb("3eca","A"); |
27 |
Graph g = pdb.get_graph("Ca", 8.0); |
28 |
edges = g.getContacts(); |
29 |
seqLen = g.getFullLength(); |
30 |
|
31 |
System.out.println("Sequence length: " + seqLen); |
32 |
System.out.println("Number of contacts: " + edges.size()); |
33 |
|
34 |
} catch(Exception e) { |
35 |
e.printStackTrace(); |
36 |
} |
37 |
} |
38 |
|
39 |
private int dist(Edge e, Edge f) { |
40 |
return Math.abs(e.i - f.i) + Math.abs(e.j - f.j); |
41 |
} |
42 |
|
43 |
private void merge(Edge e1, Edge e2) { |
44 |
int cid1 = e2c.get(e1); |
45 |
int cid2 = e2c.get(e2); |
46 |
EdgeSet cluster1 = members.get(cid1); |
47 |
EdgeSet cluster2 = members.get(cid2); |
48 |
if(cluster1.size() > cluster1.size()) { // swap clusters |
49 |
EdgeSet temp = cluster1; |
50 |
cluster1 = cluster2; |
51 |
cluster2 = temp; |
52 |
int tmp = cid1; |
53 |
cid1 = cid2; |
54 |
cid2 = tmp; |
55 |
} |
56 |
//System.out.println("Merging clusters " + cluster1 + " and " + cluster2); |
57 |
for(Edge e3:cluster2) { |
58 |
e2c.put(e3, cid1); // point all edges to cluster1 |
59 |
} |
60 |
cluster1.addAll(cluster2); // put all edges to cluster1 |
61 |
cluster2.removeAll(cluster2); // empty cluster2 |
62 |
if(!clusters.remove(cid2)) { |
63 |
System.err.println("Error!"); // trash cluster2 |
64 |
} |
65 |
//System.out.println("Result: " + cluster1 + " and " + cluster2); |
66 |
} |
67 |
|
68 |
public static void main(String[] args) { |
69 |
|
70 |
testGraphClustering testGC = new testGraphClustering(); |
71 |
testGC.doGridClustering(4,3,15); |
72 |
|
73 |
} |
74 |
|
75 |
public void doGridClustering(int maxManhDist, int minClustSize, int minSeqSep) { |
76 |
|
77 |
int size = (seqLen / maxManhDist) + 1; |
78 |
System.out.println("Grid size: " + size); |
79 |
|
80 |
EdgeSet[][] grid = new EdgeSet[size][size]; // grid of clusters |
81 |
|
82 |
int cid = 1; |
83 |
for(Edge e:edges) { |
84 |
EdgeSet newCluster = new EdgeSet(); |
85 |
newCluster.add(e); |
86 |
clusters.add(cid); // add cluster to clusters |
87 |
members.put(cid, newCluster); |
88 |
e2c.put(e, cid); |
89 |
cid++; |
90 |
} |
91 |
|
92 |
// put edges in grid cells |
93 |
for(Edge e:edges) { |
94 |
int x = e.i / maxManhDist; |
95 |
int y = e.j / maxManhDist; |
96 |
if(grid[x][y] == null) { |
97 |
EdgeSet gridCluster = new EdgeSet(); |
98 |
gridCluster.add(e); |
99 |
grid[x][y] = gridCluster; |
100 |
} else { |
101 |
grid[x][y].add(e); |
102 |
} |
103 |
} |
104 |
|
105 |
System.out.println("Before clustering: Number of clusters:" + clusters.size()); |
106 |
|
107 |
boolean change = true; |
108 |
while(change) { |
109 |
change = false; |
110 |
System.out.println("While clustering: Number of clusters:" + clusters.size()); |
111 |
for(Edge e:edges) { |
112 |
int x = e.i / maxManhDist; |
113 |
int y = e.j / maxManhDist; |
114 |
// search my own bin |
115 |
for(Edge e2:grid[x][y]) { |
116 |
if(dist(e,e2) <= maxManhDist) { |
117 |
int cid1 = e2c.get(e); |
118 |
int cid2 = e2c.get(e2); |
119 |
if(cid1 != cid2) { |
120 |
merge(e,e2); |
121 |
change=true; |
122 |
} |
123 |
} |
124 |
} |
125 |
// search bin to the right |
126 |
if(x+1 < size && grid[x+1][y] != null) { |
127 |
for(Edge e2:grid[x+1][y]) { |
128 |
if(dist(e,e2) <= maxManhDist) { |
129 |
int cid1 = e2c.get(e); |
130 |
int cid2 = e2c.get(e2); |
131 |
if(cid1 != cid2) { |
132 |
merge(e,e2); |
133 |
change=true; |
134 |
} |
135 |
} |
136 |
} |
137 |
} |
138 |
// search bin below |
139 |
if(y+1 < size && grid[x][y+1] != null) { |
140 |
for(Edge e2:grid[x][y+1]) { |
141 |
if(dist(e,e2) <= maxManhDist) { |
142 |
int cid1 = e2c.get(e); |
143 |
int cid2 = e2c.get(e2); |
144 |
if(cid1 != cid2) { |
145 |
merge(e,e2); |
146 |
change=true; |
147 |
} |
148 |
} |
149 |
} |
150 |
} |
151 |
// search bin diagonal |
152 |
if(y+1 < size && x+1 < size && grid[x+1][y+1] != null) { |
153 |
for(Edge e2:grid[x+1][y+1]) { |
154 |
if(dist(e,e2) <= maxManhDist) { |
155 |
int cid1 = e2c.get(e); |
156 |
int cid2 = e2c.get(e2); |
157 |
if(cid1 != cid2) { |
158 |
merge(e,e2); |
159 |
change=true; |
160 |
} |
161 |
} |
162 |
} |
163 |
} |
164 |
} |
165 |
} |
166 |
|
167 |
// Verify clustering |
168 |
for(Integer cId1:clusters) { |
169 |
for(Integer cId2:clusters) { |
170 |
if(cId1 != cId2) { |
171 |
for(Edge e1: members.get(cId1)) { |
172 |
for(Edge e2: members.get(cId2)) { |
173 |
if(dist(e1,e2) <= maxManhDist) System.err.println("Error: " + e1 + "," + e2 + " in " + cId1 + " and " + cId2); |
174 |
} |
175 |
} |
176 |
} |
177 |
} |
178 |
} |
179 |
|
180 |
System.out.println("Finished clustering. Number of clusters:" + clusters.size()); |
181 |
int numEdges = 0; |
182 |
for(Integer cId:clusters) { |
183 |
numEdges += members.get(cId).size(); |
184 |
} |
185 |
System.out.println("Number of edges in clusters: " + numEdges); |
186 |
|
187 |
TreeSet<Integer> filteredClusters = new TreeSet<Integer>(); |
188 |
|
189 |
// delete small clusters |
190 |
for(Integer c:clusters) { |
191 |
EdgeSet clust = members.get(c); |
192 |
if(clust.size() >= minClustSize) filteredClusters.add(c); |
193 |
} |
194 |
// delete short range clusters |
195 |
for(Edge e:edges) { |
196 |
if(Math.abs(e.i - e.j) < minSeqSep) { |
197 |
filteredClusters.remove(e2c.get(e)); |
198 |
} |
199 |
} |
200 |
|
201 |
System.out.println("Finished filtering. Number of clusters:" + filteredClusters.size()); |
202 |
numEdges = 0; |
203 |
for(Integer cId:filteredClusters) { |
204 |
numEdges += members.get(cId).size(); |
205 |
} |
206 |
System.out.println("Number of edges in clusters: " + numEdges); |
207 |
|
208 |
int i = 1; |
209 |
for(Integer cId:filteredClusters) { |
210 |
EdgeSet c = members.get(cId); |
211 |
System.out.println(i++ + " (" + c.size() + "):" + c); |
212 |
} |
213 |
|
214 |
|
215 |
} |
216 |
} |