ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Graph.java
Revision: 234
Committed: Tue Jul 10 14:14:58 2007 UTC (17 years, 7 months ago) by duarte
File size: 10826 byte(s)
Log Message:
Made ContactList a TreeSet instead of an ArrayList, should improve performance (log(n) rather than linear)
REFACTORING: 
ContactList -> EdgeSet
Contact -> Edge
Line File contents
1 package proteinstructure;
2
3 import java.io.FileOutputStream;
4 import java.io.PrintStream;
5 import java.io.IOException;
6 import java.util.TreeMap;
7 import java.util.HashMap;
8
9 /**
10 * A residue interaction graph derived from a single chain pdb protein structure
11 *
12 * @author Jose Duarte
13 * Class: Graph
14 * Package: proteinstructure
15 */
16 public class Graph {
17
18 public final static String GRAPHFILEFORMATVERSION = "1.0";
19
20 public EdgeSet contacts; // we keep it public to be able to re-reference the object directly (getContacts() copies it)
21
22 protected TreeMap<Integer,String> nodes; // nodes is a TreeMap of residue serials to residue types (3 letter code)
23 protected String sequence; // the full sequence (with unobserved residues and non-standard aas ='X')
24 protected String pdbCode;
25 protected String chainCode;
26 protected String pdbChainCode;
27 protected double cutoff;
28 protected String ct; // the contact type
29 protected boolean directed;
30
31 // fullLength is length of full sequence or:
32 // -if sequence not provided (when reading from db): length of everything except possible unobserved residues at end of chain
33 // -if sequence and nodes not provided (when reading from file and sequence field missing): length except possible unobserved residues at end of chain and possible nodes without contacts at end of chain
34 protected int fullLength;
35 protected int obsLength; // length without unobserved, non standard aas
36
37 protected int numContacts;
38
39 protected boolean modified;
40
41 public Graph() {
42
43 }
44
45 /**
46 * Constructs Graph object by passing ArrayList with contacts and TreeMap with nodes (res serials and types)
47 * Must also pass contact type, cutoff, pdbCode and chainCode
48 * @param contacts
49 * @param nodes
50 * @param sequence
51 * @param cutoff
52 * @param ct
53 * @param pdbCode
54 * @param chainCode
55 */
56 protected Graph (EdgeSet contacts, TreeMap<Integer,String> nodes, String sequence, double cutoff,String ct, String pdbCode, String chainCode, String pdbChainCode) {
57 this.contacts=contacts;
58 this.cutoff=cutoff;
59 this.nodes=nodes;
60 this.sequence=sequence;
61 this.pdbCode=pdbCode;
62 this.chainCode=chainCode;
63 this.pdbChainCode=pdbChainCode;
64 this.ct=ct;
65 this.fullLength=sequence.length();
66 this.obsLength=nodes.size();
67 this.numContacts=contacts.size();
68 this.modified=false;
69 this.directed=false;
70 if (ct.contains("/")){
71 directed=true;
72 }
73
74 assert(this.pdbCode.equals(this.pdbCode.toLowerCase())); // pdb codes should be always lower case
75 assert(this.pdbChainCode.equals(this.pdbChainCode.toUpperCase())); // pdb chain codes should be always upper case
76 }
77
78
79 //TODO implement (from python) write_graph_to_db, do we really need it here??
80
81 public void write_graph_to_file (String outfile) throws IOException {
82 PrintStream Out = new PrintStream(new FileOutputStream(outfile));
83 Out.println("#AGLAPPE GRAPH FILE ver: "+GRAPHFILEFORMATVERSION);
84 Out.println("#SEQUENCE: "+sequence);
85 Out.println("#PDB: "+pdbCode);
86 Out.println("#PDB CHAIN CODE: "+pdbChainCode);
87 Out.println("#CHAIN: "+chainCode);
88 Out.println("#CT: "+ct);
89 Out.println("#CUTOFF: "+cutoff);
90 for (Edge pair:contacts){
91 int i_resser=pair.i;
92 int j_resser=pair.j;
93 Out.println(i_resser+"\t"+j_resser);
94 }
95 Out.close();
96 }
97
98 /**
99 * Gets list of contacts as a new EdgeSet (deep copied)
100 *
101 */
102 public EdgeSet getContacts(){
103 EdgeSet newContacts = new EdgeSet();
104 for (Edge cont:contacts){
105 newContacts.add(new Edge(cont.i,cont.j));
106 }
107 return newContacts;
108 }
109
110 /**
111 * Gets TreeMap of nodes, deep copying
112 *
113 */
114 public TreeMap<Integer,String> getNodes(){
115 TreeMap<Integer,String> newNodes = new TreeMap<Integer,String>();
116 for (int resser:nodes.keySet()){
117 newNodes.put(resser, nodes.get(resser));
118 }
119 return newNodes;
120 }
121
122 /**
123 * Deep copies this Graph object returning new one
124 * @return
125 */
126 public Graph copy(){
127 return new Graph(getContacts(),getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
128 }
129
130 /**
131 * Gets a reference to this Graph deep copying contacts but re-referencing nodes
132 * @return
133 */
134 public Graph copyKeepingNodes(){
135 return new Graph(getContacts(),nodes,sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
136 }
137
138 /**
139 * Returns an int matrix with 1s for contacts and 0s for non contacts, i.e. the contact map
140 * In non-crossed cases this should give us the upper half matrix (contacts are only j>i)
141 * In crossed cases this gives us a full matrix (contacts are both j>i and i>j since they are directed)
142 * @return
143 */
144 public int[][] getIntMatrix(){
145 // this initialises the matrix to 0 (i.e. no contact)
146 int[][] cm = new int[fullLength][fullLength];
147 // we put a 1 for all given contacts
148 for (Edge cont:contacts){
149 int i_resser = cont.i;
150 int j_resser = cont.j;
151 cm[i_resser-1][j_resser-1]=1;
152 }
153 return cm;
154 }
155
156 /**
157 * Gets a node's residue type given the residue serial
158 * @param resser
159 * @return
160 */
161 public String getResType(int resser){
162 return nodes.get(resser);
163 }
164
165 /**
166 * Gets node neighbourhood given a residue serial
167 * @param resser
168 * @return
169 */
170 public NodeNbh getNodeNbh(int resser){
171 NodeNbh nbh = new NodeNbh(resser, getResType(resser));
172 //this could be implemented using the contact map matrix and scanning through 1 column/row
173 //it would be just slightly faster, here we do 2*numContacts iterations, using matrix would be only fullLength iterations
174 //however we would then have the overhead of creating the matrix
175 for (Edge cont:contacts){
176 if (cont.i==resser) nbh.put(cont.j, nodes.get(cont.j));
177 if (cont.j==resser) nbh.put(cont.i, nodes.get(cont.i));
178 }
179 return nbh;
180 }
181
182 /**
183 * Gets edge neighbourhood (common neighbourhood) given a residue serial pair
184 * @param i_resser
185 * @param j_resser
186 * @return
187 */
188 public EdgeNbh getEdgeNbh(int i_resser, int j_resser){
189 EdgeNbh nbh = new EdgeNbh(i_resser, getResType(i_resser), j_resser, getResType(j_resser));
190 NodeNbh i_nbhd = getNodeNbh(i_resser);
191 NodeNbh j_nbhd = getNodeNbh(j_resser);
192 if (j_nbhd.size()>=i_nbhd.size()) { //with this we will be slightly faster, always iterating through smallest TreeMap
193 for (int resser:i_nbhd.keySet()) {
194 if (j_nbhd.containsKey(resser)) nbh.put(resser, i_nbhd.get(resser));
195 }
196 } else {
197 for (int resser:j_nbhd.keySet()) {
198 if (i_nbhd.containsKey(resser)) nbh.put(resser, j_nbhd.get(resser));
199 }
200 }
201 return nbh;
202 }
203
204 /**
205 * Gets 2nd shell node neighbourhood
206 * @param resser
207 */
208 public NodeNbh get2ndshellNodeNbh(int resser){
209 // first we create a NodeNbh object for the second shell, central residue is given resser
210 NodeNbh nbh2ndshell = new NodeNbh(resser,getResType(resser));
211 // we get 1st neighbourhood
212 NodeNbh nbh = this.getNodeNbh(resser);
213 for (int nb:nbh.keySet()){
214 NodeNbh nbh2 = this.getNodeNbh(nb); // for each first neighbour we take its neighbourhood
215 for (int nb2:nbh2.keySet()){
216 if (nb2!=resser && !nbh.containsKey(nb2)){ // if the 2nd neighbour nb2 is not the given resser or is not a 1st neighbour
217 nbh2ndshell.put(nb2, getResType(nb2));
218 }
219 }
220 }
221 return nbh2ndshell;
222 }
223
224 public void addEdge(Edge cont){
225 contacts.add(cont); // contacts is a TreeSet and thus takes care of duplicates
226 int oldNumContacts = numContacts;
227 numContacts=getNumContacts();
228 // if number of contacts changed that means we actually added a new contact and thus we modified the graph
229 if (numContacts!=oldNumContacts) modified=true;
230
231 }
232
233 public void delEdge(Edge cont){
234 contacts.remove(cont);
235 numContacts--;
236 modified=true;
237 }
238
239 public void restrictContactsToMaxRange(int range){
240 EdgeSet edgesToDelete = new EdgeSet();
241 for (Edge cont:contacts){
242 if (cont.getRange()>range) edgesToDelete.add(cont);
243 }
244 for (Edge cont:edgesToDelete){
245 delEdge(cont);
246 }
247 }
248
249 public void restrictContactsToMinRange(int range){
250 EdgeSet edgesToDelete = new EdgeSet();
251 for (Edge cont:contacts){
252 if (cont.getRange()<range) edgesToDelete.add(cont);
253 }
254 for (Edge cont:edgesToDelete){
255 delEdge(cont);
256 }
257 }
258
259 /**
260 * Returns a HashMap with all edge neighbourhood sizes (if they are >0) for each cell in the contact map
261 * @return
262 */
263 public HashMap<Edge,Integer> getAllEdgeNbhSizes() {
264 HashMap<Edge,Integer> sizes = new HashMap<Edge, Integer>();
265 if (!directed) {
266 for (int i=1; i<fullLength;i++){
267 for (int j=i+1; j<fullLength;j++){
268 int size = getEdgeNbh(i, j).size();
269 if (size>0) sizes.put(new Edge(i,j), size);
270 }
271 }
272 } else {
273 for (int i=1; i<fullLength;i++){
274 for (int j=1; j<fullLength;j++){
275 if (i!=j){
276 int size = getEdgeNbh(i, j).size();
277 if (size>0) sizes.put(new Edge(i,j), size);
278 }
279 }
280 }
281 }
282 return sizes;
283 }
284
285 //TODO not sure what kind of return we want, for now is a HashMap with three graph objects
286 public HashMap<String,Graph> compare(Graph other) throws Exception{
287 //first check that other has same sequence than this, otherwise throw exception
288 if (!this.sequence.equals(other.sequence)){
289 //TODO throw specific exception
290 throw new Exception("Sequence of 2 graphs to compare differ, can't compare them.");
291 }
292 EdgeSet common = new EdgeSet();
293 EdgeSet onlythis = new EdgeSet();
294 EdgeSet onlyother = new EdgeSet();
295 for (Edge cont:this.contacts){
296 if (other.contacts.contains(cont)) {
297 common.add(cont);
298 } else{
299 onlythis.add(cont);
300 }
301 }
302 for (Edge cont:other.contacts){
303 if (!this.contacts.contains(cont)){
304 onlyother.add(cont);
305 }
306 }
307 Graph commongraph = new Graph (common,getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
308 Graph onlythisgraph = new Graph (onlythis,getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
309 Graph onlyothergraph = new Graph (onlyother,getNodes(),sequence,cutoff,ct,other.pdbCode,other.chainCode,other.pdbChainCode);
310 HashMap<String,Graph> result = new HashMap<String,Graph>();
311 result.put("common", commongraph);
312 result.put("onlythis", onlythisgraph);
313 result.put("onlyother",onlyothergraph);
314 return result;
315 }
316
317 public boolean isModified(){
318 return modified;
319 }
320
321 public boolean isDirected(){
322 return directed;
323 }
324
325 public String getPdbCode() {
326 return pdbCode;
327 }
328
329 public String getPdbChainCode(){
330 return pdbChainCode;
331 }
332
333 public String getChainCode(){
334 return chainCode;
335 }
336
337 public String getSequence(){
338 return sequence;
339 }
340
341 public int getFullLength(){
342 return fullLength;
343 }
344
345 public int getObsLength(){
346 return obsLength;
347 }
348
349 public int getNumContacts(){
350 // in theory we could return just numContacts, because we have taken care of updating it every time contacts changed
351 // however we call directly contacts.size() as I feel is safer
352 return contacts.size();
353 }
354
355 public String getContactType() {
356 return ct;
357 }
358
359 public double getCutoff(){
360 return cutoff;
361 }
362 }