ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/testGraphClustering.java
Revision: 351
Committed: Thu Oct 11 12:43:54 2007 UTC (16 years, 11 months ago) by stehr
File size: 5783 byte(s)
Log Message:
added some test programs to default package
Line File contents
1 import proteinstructure.*;
2 import java.util.*;
3
4 /**
5 * Perform a connected-component clustering for contact graphs (like the one implemented in matlab)
6 * @author stehr
7 *
8 */
9 public class testGraphClustering {
10
11 // TODO: This whole thing does currently not work
12
13 HashMap<Edge,Integer> e2c;
14 TreeSet<Integer> clusters;
15 HashMap<Integer,EdgeSet> members;
16 EdgeSet edges;
17 int seqLen;
18
19 /* constructor */
20 public testGraphClustering() {
21 try {
22 e2c = new HashMap<Edge,Integer>(); // map from edge to cluster
23 clusters = new TreeSet<Integer>(); // set of all non-empty clusters
24 members = new HashMap<Integer,EdgeSet>(); // cluster members
25
26 Pdb pdb = new PdbasePdb("3eca","A");
27 Graph g = pdb.get_graph("Ca", 8.0);
28 edges = g.getContacts();
29 seqLen = g.getFullLength();
30
31 System.out.println("Sequence length: " + seqLen);
32 System.out.println("Number of contacts: " + edges.size());
33
34 } catch(Exception e) {
35 e.printStackTrace();
36 }
37 }
38
39 private int dist(Edge e, Edge f) {
40 return Math.abs(e.i - f.i) + Math.abs(e.j - f.j);
41 }
42
43 private void merge(Edge e1, Edge e2) {
44 int cid1 = e2c.get(e1);
45 int cid2 = e2c.get(e2);
46 EdgeSet cluster1 = members.get(cid1);
47 EdgeSet cluster2 = members.get(cid2);
48 if(cluster1.size() > cluster1.size()) { // swap clusters
49 EdgeSet temp = cluster1;
50 cluster1 = cluster2;
51 cluster2 = temp;
52 int tmp = cid1;
53 cid1 = cid2;
54 cid2 = tmp;
55 }
56 //System.out.println("Merging clusters " + cluster1 + " and " + cluster2);
57 for(Edge e3:cluster2) {
58 e2c.put(e3, cid1); // point all edges to cluster1
59 }
60 cluster1.addAll(cluster2); // put all edges to cluster1
61 cluster2.removeAll(cluster2); // empty cluster2
62 if(!clusters.remove(cid2)) {
63 System.err.println("Error!"); // trash cluster2
64 }
65 //System.out.println("Result: " + cluster1 + " and " + cluster2);
66 }
67
68 public static void main(String[] args) {
69
70 testGraphClustering testGC = new testGraphClustering();
71 testGC.doGridClustering(4,3,15);
72
73 }
74
75 public void doGridClustering(int maxManhDist, int minClustSize, int minSeqSep) {
76
77 int size = (seqLen / maxManhDist) + 1;
78 System.out.println("Grid size: " + size);
79
80 EdgeSet[][] grid = new EdgeSet[size][size]; // grid of clusters
81
82 int cid = 1;
83 for(Edge e:edges) {
84 EdgeSet newCluster = new EdgeSet();
85 newCluster.add(e);
86 clusters.add(cid); // add cluster to clusters
87 members.put(cid, newCluster);
88 e2c.put(e, cid);
89 cid++;
90 }
91
92 // put edges in grid cells
93 for(Edge e:edges) {
94 int x = e.i / maxManhDist;
95 int y = e.j / maxManhDist;
96 if(grid[x][y] == null) {
97 EdgeSet gridCluster = new EdgeSet();
98 gridCluster.add(e);
99 grid[x][y] = gridCluster;
100 } else {
101 grid[x][y].add(e);
102 }
103 }
104
105 System.out.println("Before clustering: Number of clusters:" + clusters.size());
106
107 boolean change = true;
108 while(change) {
109 change = false;
110 System.out.println("While clustering: Number of clusters:" + clusters.size());
111 for(Edge e:edges) {
112 int x = e.i / maxManhDist;
113 int y = e.j / maxManhDist;
114 // search my own bin
115 for(Edge e2:grid[x][y]) {
116 if(dist(e,e2) <= maxManhDist) {
117 int cid1 = e2c.get(e);
118 int cid2 = e2c.get(e2);
119 if(cid1 != cid2) {
120 merge(e,e2);
121 change=true;
122 }
123 }
124 }
125 // search bin to the right
126 if(x+1 < size && grid[x+1][y] != null) {
127 for(Edge e2:grid[x+1][y]) {
128 if(dist(e,e2) <= maxManhDist) {
129 int cid1 = e2c.get(e);
130 int cid2 = e2c.get(e2);
131 if(cid1 != cid2) {
132 merge(e,e2);
133 change=true;
134 }
135 }
136 }
137 }
138 // search bin below
139 if(y+1 < size && grid[x][y+1] != null) {
140 for(Edge e2:grid[x][y+1]) {
141 if(dist(e,e2) <= maxManhDist) {
142 int cid1 = e2c.get(e);
143 int cid2 = e2c.get(e2);
144 if(cid1 != cid2) {
145 merge(e,e2);
146 change=true;
147 }
148 }
149 }
150 }
151 // search bin diagonal
152 if(y+1 < size && x+1 < size && grid[x+1][y+1] != null) {
153 for(Edge e2:grid[x+1][y+1]) {
154 if(dist(e,e2) <= maxManhDist) {
155 int cid1 = e2c.get(e);
156 int cid2 = e2c.get(e2);
157 if(cid1 != cid2) {
158 merge(e,e2);
159 change=true;
160 }
161 }
162 }
163 }
164 }
165 }
166
167 // Verify clustering
168 for(Integer cId1:clusters) {
169 for(Integer cId2:clusters) {
170 if(cId1 != cId2) {
171 for(Edge e1: members.get(cId1)) {
172 for(Edge e2: members.get(cId2)) {
173 if(dist(e1,e2) <= maxManhDist) System.err.println("Error: " + e1 + "," + e2 + " in " + cId1 + " and " + cId2);
174 }
175 }
176 }
177 }
178 }
179
180 System.out.println("Finished clustering. Number of clusters:" + clusters.size());
181 int numEdges = 0;
182 for(Integer cId:clusters) {
183 numEdges += members.get(cId).size();
184 }
185 System.out.println("Number of edges in clusters: " + numEdges);
186
187 TreeSet<Integer> filteredClusters = new TreeSet<Integer>();
188
189 // delete small clusters
190 for(Integer c:clusters) {
191 EdgeSet clust = members.get(c);
192 if(clust.size() >= minClustSize) filteredClusters.add(c);
193 }
194 // delete short range clusters
195 for(Edge e:edges) {
196 if(Math.abs(e.i - e.j) < minSeqSep) {
197 filteredClusters.remove(e2c.get(e));
198 }
199 }
200
201 System.out.println("Finished filtering. Number of clusters:" + filteredClusters.size());
202 numEdges = 0;
203 for(Integer cId:filteredClusters) {
204 numEdges += members.get(cId).size();
205 }
206 System.out.println("Number of edges in clusters: " + numEdges);
207
208 int i = 1;
209 for(Integer cId:filteredClusters) {
210 EdgeSet c = members.get(cId);
211 System.out.println(i++ + " (" + c.size() + "):" + c);
212 }
213
214
215 }
216 }