ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Graph.java
Revision: 240
Committed: Tue Jul 31 13:51:59 2007 UTC (17 years, 6 months ago) by duarte
File size: 11617 byte(s)
Log Message:
Fixed bug in delEdge. Wasn't taking care of if contact was given as i>j in undirected case. Also recount of contacts and set of modified to true was wrong
Line File contents
1 package proteinstructure;
2
3 import java.io.FileOutputStream;
4 import java.io.PrintStream;
5 import java.io.IOException;
6 import java.util.TreeMap;
7 import java.util.HashMap;
8
9 /**
10 * A residue interaction graph derived from a single chain pdb protein structure
11 *
12 * @author Jose Duarte
13 * Class: Graph
14 * Package: proteinstructure
15 */
16 public class Graph {
17
18 public final static String GRAPHFILEFORMATVERSION = "1.0";
19
20 public EdgeSet contacts; // we keep it public to be able to re-reference the object directly (getContacts() copies it)
21
22 protected TreeMap<Integer,String> nodes; // nodes is a TreeMap of residue serials to residue types (3 letter code)
23 protected String sequence; // the full sequence (with unobserved residues and non-standard aas ='X')
24 protected String pdbCode;
25 protected String chainCode;
26 protected String pdbChainCode;
27 protected double cutoff;
28 protected String ct; // the contact type
29 protected boolean directed;
30
31 // fullLength is length of full sequence or:
32 // -if sequence not provided (when reading from db): length of everything except possible unobserved residues at end of chain
33 // -if sequence and nodes not provided (when reading from file and sequence field missing): length except possible unobserved residues at end of chain and possible nodes without contacts at end of chain
34 protected int fullLength;
35 protected int obsLength; // length without unobserved, non standard aas
36
37 protected int numContacts;
38
39 protected boolean modified;
40
41 public Graph() {
42
43 }
44
45 /**
46 * Constructs Graph object by passing ArrayList with contacts and TreeMap with nodes (res serials and types)
47 * Must also pass contact type, cutoff, pdbCode and chainCode
48 * @param contacts
49 * @param nodes
50 * @param sequence
51 * @param cutoff
52 * @param ct
53 * @param pdbCode
54 * @param chainCode
55 */
56 protected Graph (EdgeSet contacts, TreeMap<Integer,String> nodes, String sequence, double cutoff,String ct, String pdbCode, String chainCode, String pdbChainCode) {
57 this.contacts=contacts;
58 this.cutoff=cutoff;
59 this.nodes=nodes;
60 this.sequence=sequence;
61 this.pdbCode=pdbCode;
62 this.chainCode=chainCode;
63 this.pdbChainCode=pdbChainCode;
64 this.ct=ct;
65 this.fullLength=sequence.length();
66 this.obsLength=nodes.size();
67 this.numContacts=contacts.size();
68 this.modified=false;
69 this.directed=false;
70 if (ct.contains("/")){
71 directed=true;
72 }
73
74 assert(this.pdbCode.equals(this.pdbCode.toLowerCase())); // pdb codes should be always lower case
75 assert(this.pdbChainCode.equals(this.pdbChainCode.toUpperCase())); // pdb chain codes should be always upper case
76 }
77
78
79 //TODO implement (from python) write_graph_to_db, do we really need it here??
80
81 public void write_graph_to_file (String outfile) throws IOException {
82 PrintStream Out = new PrintStream(new FileOutputStream(outfile));
83 Out.println("#AGLAPPE GRAPH FILE ver: "+GRAPHFILEFORMATVERSION);
84 Out.println("#SEQUENCE: "+sequence);
85 Out.println("#PDB: "+pdbCode);
86 Out.println("#PDB CHAIN CODE: "+pdbChainCode);
87 Out.println("#CHAIN: "+chainCode);
88 Out.println("#CT: "+ct);
89 Out.println("#CUTOFF: "+cutoff);
90 for (Edge pair:contacts){
91 int i_resser=pair.i;
92 int j_resser=pair.j;
93 Out.println(i_resser+"\t"+j_resser);
94 }
95 Out.close();
96 }
97
98 /**
99 * Gets list of contacts as a new EdgeSet (deep copied)
100 *
101 */
102 public EdgeSet getContacts(){
103 EdgeSet newContacts = new EdgeSet();
104 for (Edge cont:contacts){
105 newContacts.add(new Edge(cont.i,cont.j));
106 }
107 return newContacts;
108 }
109
110 /**
111 * Gets TreeMap of nodes, deep copying
112 *
113 */
114 public TreeMap<Integer,String> getNodes(){
115 TreeMap<Integer,String> newNodes = new TreeMap<Integer,String>();
116 for (int resser:nodes.keySet()){
117 newNodes.put(resser, nodes.get(resser));
118 }
119 return newNodes;
120 }
121
122 /**
123 * Deep copies this Graph object returning new one
124 * @return
125 */
126 public Graph copy(){
127 return new Graph(getContacts(),getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
128 }
129
130 /**
131 * Gets a reference to this Graph deep copying contacts but re-referencing nodes
132 * @return
133 */
134 public Graph copyKeepingNodes(){
135 return new Graph(getContacts(),nodes,sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
136 }
137
138 /**
139 * Returns an int matrix with 1s for contacts and 0s for non contacts, i.e. the contact map
140 * In non-crossed cases this should give us the upper half matrix (contacts are only j>i)
141 * In crossed cases this gives us a full matrix (contacts are both j>i and i>j since they are directed)
142 * @return
143 */
144 public int[][] getIntMatrix(){
145 // this initialises the matrix to 0 (i.e. no contact)
146 int[][] cm = new int[fullLength][fullLength];
147 // we put a 1 for all given contacts
148 for (Edge cont:contacts){
149 int i_resser = cont.i;
150 int j_resser = cont.j;
151 cm[i_resser-1][j_resser-1]=1;
152 }
153 return cm;
154 }
155
156 /**
157 * Gets a node's residue type given the residue serial
158 * @param resser
159 * @return
160 */
161 public String getResType(int resser){
162 return nodes.get(resser);
163 }
164
165 /**
166 * Gets node neighbourhood given a residue serial
167 * @param resser
168 * @return
169 */
170 public NodeNbh getNodeNbh(int resser){
171 NodeNbh nbh = new NodeNbh(resser, getResType(resser));
172 //this could be implemented using the contact map matrix and scanning through 1 column/row
173 //it would be just slightly faster, here we do 2*numContacts iterations, using matrix would be only fullLength iterations
174 //however we would then have the overhead of creating the matrix
175 for (Edge cont:contacts){
176 if (cont.i==resser) nbh.put(cont.j, nodes.get(cont.j));
177 if (cont.j==resser) nbh.put(cont.i, nodes.get(cont.i));
178 }
179 return nbh;
180 }
181
182 /**
183 * Gets edge neighbourhood (common neighbourhood) given a residue serial pair
184 * @param i_resser
185 * @param j_resser
186 * @return
187 */
188 public EdgeNbh getEdgeNbh(int i_resser, int j_resser){
189 EdgeNbh nbh = new EdgeNbh(i_resser, getResType(i_resser), j_resser, getResType(j_resser));
190 NodeNbh i_nbhd = getNodeNbh(i_resser);
191 NodeNbh j_nbhd = getNodeNbh(j_resser);
192 if (j_nbhd.size()>=i_nbhd.size()) { //with this we will be slightly faster, always iterating through smallest TreeMap
193 for (int resser:i_nbhd.keySet()) {
194 if (j_nbhd.containsKey(resser)) nbh.put(resser, i_nbhd.get(resser));
195 }
196 } else {
197 for (int resser:j_nbhd.keySet()) {
198 if (i_nbhd.containsKey(resser)) nbh.put(resser, j_nbhd.get(resser));
199 }
200 }
201 return nbh;
202 }
203
204 /**
205 * Gets 2nd shell node neighbourhood
206 * @param resser
207 */
208 public NodeNbh get2ndshellNodeNbh(int resser){
209 // first we create a NodeNbh object for the second shell, central residue is given resser
210 NodeNbh nbh2ndshell = new NodeNbh(resser,getResType(resser));
211 // we get 1st neighbourhood
212 NodeNbh nbh = this.getNodeNbh(resser);
213 for (int nb:nbh.keySet()){
214 NodeNbh nbh2 = this.getNodeNbh(nb); // for each first neighbour we take its neighbourhood
215 for (int nb2:nbh2.keySet()){
216 if (nb2!=resser && !nbh.containsKey(nb2)){ // if the 2nd neighbour nb2 is not the given resser or is not a 1st neighbour
217 nbh2ndshell.put(nb2, getResType(nb2));
218 }
219 }
220 }
221 return nbh2ndshell;
222 }
223
224 public void addEdge(Edge cont){
225 if (!directed && cont.i>cont.j){
226 // we invert in case of undirected and i>j because in undirected we have only the half of the matrix j>i
227 // if we added an edge i>j it could happen that the edge was already there but inverted and wouldn't be detected as a duplicate
228 cont = new Edge(cont.j,cont.i);
229 }
230 contacts.add(cont); // contacts is a TreeSet and thus takes care of duplicates
231 int oldNumContacts = numContacts;
232 numContacts=getNumContacts();
233 // if number of contacts changed that means we actually added a new contact and thus we modified the graph
234 if (numContacts!=oldNumContacts) modified=true;
235
236 }
237
238 public void delEdge(Edge cont){
239 if (!directed && cont.i>cont.j){
240 // we invert in case of undirected and i>j because in undirected we have only the half of the matrix j>i
241 // if we try to delete an edge i>j it won't be there, we have to invert it and then try to delete
242 cont = new Edge(cont.j,cont.i);
243 }
244 contacts.remove(cont);
245 int oldNumContacts = numContacts;
246 numContacts=getNumContacts();
247 // if number of contacts changed that means we actually added a new contact and thus we modified the graph
248 if (numContacts!=oldNumContacts) modified=true;
249 }
250
251 public void restrictContactsToMaxRange(int range){
252 EdgeSet edgesToDelete = new EdgeSet();
253 for (Edge cont:contacts){
254 if (cont.getRange()>range) edgesToDelete.add(cont);
255 }
256 for (Edge cont:edgesToDelete){
257 delEdge(cont);
258 }
259 }
260
261 public void restrictContactsToMinRange(int range){
262 EdgeSet edgesToDelete = new EdgeSet();
263 for (Edge cont:contacts){
264 if (cont.getRange()<range) edgesToDelete.add(cont);
265 }
266 for (Edge cont:edgesToDelete){
267 delEdge(cont);
268 }
269 }
270
271 /**
272 * Returns a HashMap with all edge neighbourhood sizes (if they are >0) for each cell in the contact map
273 * @return
274 */
275 public HashMap<Edge,Integer> getAllEdgeNbhSizes() {
276 HashMap<Edge,Integer> sizes = new HashMap<Edge, Integer>();
277 if (!directed) {
278 for (int i=1; i<fullLength;i++){
279 for (int j=i+1; j<fullLength;j++){
280 int size = getEdgeNbh(i, j).size();
281 if (size>0) sizes.put(new Edge(i,j), size);
282 }
283 }
284 } else {
285 for (int i=1; i<fullLength;i++){
286 for (int j=1; j<fullLength;j++){
287 if (i!=j){
288 int size = getEdgeNbh(i, j).size();
289 if (size>0) sizes.put(new Edge(i,j), size);
290 }
291 }
292 }
293 }
294 return sizes;
295 }
296
297 //TODO not sure what kind of return we want, for now is a HashMap with three graph objects
298 public HashMap<String,Graph> compare(Graph other) throws Exception{
299 //first check that other has same sequence than this, otherwise throw exception
300 if (!this.sequence.equals(other.sequence)){
301 //TODO throw specific exception
302 throw new Exception("Sequence of 2 graphs to compare differ, can't compare them.");
303 }
304 EdgeSet common = new EdgeSet();
305 EdgeSet onlythis = new EdgeSet();
306 EdgeSet onlyother = new EdgeSet();
307 for (Edge cont:this.contacts){
308 if (other.contacts.contains(cont)) {
309 common.add(cont);
310 } else{
311 onlythis.add(cont);
312 }
313 }
314 for (Edge cont:other.contacts){
315 if (!this.contacts.contains(cont)){
316 onlyother.add(cont);
317 }
318 }
319 Graph commongraph = new Graph (common,getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
320 Graph onlythisgraph = new Graph (onlythis,getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
321 Graph onlyothergraph = new Graph (onlyother,getNodes(),sequence,cutoff,ct,other.pdbCode,other.chainCode,other.pdbChainCode);
322 HashMap<String,Graph> result = new HashMap<String,Graph>();
323 result.put("common", commongraph);
324 result.put("onlythis", onlythisgraph);
325 result.put("onlyother",onlyothergraph);
326 return result;
327 }
328
329 public boolean isModified(){
330 return modified;
331 }
332
333 public boolean isDirected(){
334 return directed;
335 }
336
337 public String getPdbCode() {
338 return pdbCode;
339 }
340
341 public String getPdbChainCode(){
342 return pdbChainCode;
343 }
344
345 public String getChainCode(){
346 return chainCode;
347 }
348
349 public String getSequence(){
350 return sequence;
351 }
352
353 public int getFullLength(){
354 return fullLength;
355 }
356
357 public int getObsLength(){
358 return obsLength;
359 }
360
361 public int getNumContacts(){
362 // in theory we could return just numContacts, because we have taken care of updating it every time contacts changed
363 // however we call directly contacts.size() as I feel is safer
364 return contacts.size();
365 }
366
367 public String getContactType() {
368 return ct;
369 }
370
371 public double getCutoff(){
372 return cutoff;
373 }
374 }