ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Graph.java
Revision: 252
Committed: Thu Aug 9 10:37:14 2007 UTC (17 years, 6 months ago) by duarte
File size: 11994 byte(s)
Log Message:
Made constructor public
Line User Rev File contents
1 duarte 123 package proteinstructure;
2 duarte 191
3 duarte 123 import java.io.FileOutputStream;
4     import java.io.PrintStream;
5     import java.io.IOException;
6 duarte 129 import java.util.TreeMap;
7 duarte 189 import java.util.HashMap;
8 duarte 123
9 duarte 207 /**
10     * A residue interaction graph derived from a single chain pdb protein structure
11     *
12     * @author Jose Duarte
13     * Class: Graph
14     * Package: proteinstructure
15     */
16 duarte 123 public class Graph {
17    
18 duarte 144 public final static String GRAPHFILEFORMATVERSION = "1.0";
19 duarte 206
20 duarte 234 public EdgeSet contacts; // we keep it public to be able to re-reference the object directly (getContacts() copies it)
21 duarte 123
22 duarte 207 protected TreeMap<Integer,String> nodes; // nodes is a TreeMap of residue serials to residue types (3 letter code)
23     protected String sequence; // the full sequence (with unobserved residues and non-standard aas ='X')
24     protected String pdbCode;
25     protected String chainCode;
26 duarte 208 protected String pdbChainCode;
27 duarte 207 protected double cutoff;
28     protected String ct; // the contact type
29 duarte 208 protected boolean directed;
30 duarte 207
31 duarte 159 // fullLength is length of full sequence or:
32     // -if sequence not provided (when reading from db): length of everything except possible unobserved residues at end of chain
33     // -if sequence and nodes not provided (when reading from file and sequence field missing): length except possible unobserved residues at end of chain and possible nodes without contacts at end of chain
34 duarte 207 protected int fullLength;
35     protected int obsLength; // length without unobserved, non standard aas
36 duarte 159
37 duarte 207 protected int numContacts;
38 duarte 159
39 duarte 207 protected boolean modified;
40 duarte 175
41 duarte 207 public Graph() {
42    
43     }
44    
45 duarte 134 /**
46     * Constructs Graph object by passing ArrayList with contacts and TreeMap with nodes (res serials and types)
47 duarte 206 * Must also pass contact type, cutoff, pdbCode and chainCode
48 duarte 134 * @param contacts
49     * @param nodes
50     * @param sequence
51     * @param cutoff
52     * @param ct
53 duarte 206 * @param pdbCode
54     * @param chainCode
55 duarte 134 */
56 duarte 252 public Graph (EdgeSet contacts, TreeMap<Integer,String> nodes, String sequence, double cutoff,String ct, String pdbCode, String chainCode, String pdbChainCode) {
57 duarte 123 this.contacts=contacts;
58     this.cutoff=cutoff;
59 duarte 129 this.nodes=nodes;
60     this.sequence=sequence;
61 duarte 206 this.pdbCode=pdbCode;
62     this.chainCode=chainCode;
63     this.pdbChainCode=pdbChainCode;
64 duarte 123 this.ct=ct;
65 duarte 159 this.fullLength=sequence.length();
66     this.obsLength=nodes.size();
67     this.numContacts=contacts.size();
68 duarte 175 this.modified=false;
69 duarte 208 this.directed=false;
70 duarte 129 if (ct.contains("/")){
71     directed=true;
72     }
73 stehr 217
74     assert(this.pdbCode.equals(this.pdbCode.toLowerCase())); // pdb codes should be always lower case
75     assert(this.pdbChainCode.equals(this.pdbChainCode.toUpperCase())); // pdb chain codes should be always upper case
76 duarte 123 }
77 duarte 135
78 duarte 129
79 duarte 135 //TODO implement (from python) write_graph_to_db, do we really need it here??
80    
81 duarte 144 public void write_graph_to_file (String outfile) throws IOException {
82     PrintStream Out = new PrintStream(new FileOutputStream(outfile));
83 duarte 208 Out.println("#AGLAPPE GRAPH FILE ver: "+GRAPHFILEFORMATVERSION);
84 duarte 144 Out.println("#SEQUENCE: "+sequence);
85 duarte 206 Out.println("#PDB: "+pdbCode);
86     Out.println("#PDB CHAIN CODE: "+pdbChainCode);
87     Out.println("#CHAIN: "+chainCode);
88 duarte 144 Out.println("#CT: "+ct);
89     Out.println("#CUTOFF: "+cutoff);
90 duarte 234 for (Edge pair:contacts){
91 duarte 144 int i_resser=pair.i;
92     int j_resser=pair.j;
93     Out.println(i_resser+"\t"+j_resser);
94     }
95     Out.close();
96     }
97 duarte 175
98 duarte 159 /**
99 duarte 234 * Gets list of contacts as a new EdgeSet (deep copied)
100 duarte 175 *
101     */
102 duarte 234 public EdgeSet getContacts(){
103     EdgeSet newContacts = new EdgeSet();
104     for (Edge cont:contacts){
105     newContacts.add(new Edge(cont.i,cont.j));
106 duarte 175 }
107     return newContacts;
108     }
109    
110     /**
111     * Gets TreeMap of nodes, deep copying
112     *
113     */
114     public TreeMap<Integer,String> getNodes(){
115     TreeMap<Integer,String> newNodes = new TreeMap<Integer,String>();
116     for (int resser:nodes.keySet()){
117     newNodes.put(resser, nodes.get(resser));
118     }
119     return newNodes;
120     }
121    
122     /**
123     * Deep copies this Graph object returning new one
124     * @return
125     */
126     public Graph copy(){
127 duarte 206 return new Graph(getContacts(),getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
128 duarte 175 }
129    
130     /**
131 duarte 232 * Gets a reference to this Graph deep copying contacts but re-referencing nodes
132     * @return
133     */
134     public Graph copyKeepingNodes(){
135     return new Graph(getContacts(),nodes,sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
136     }
137    
138     /**
139 duarte 159 * Returns an int matrix with 1s for contacts and 0s for non contacts, i.e. the contact map
140     * In non-crossed cases this should give us the upper half matrix (contacts are only j>i)
141     * In crossed cases this gives us a full matrix (contacts are both j>i and i>j since they are directed)
142     * @return
143     */
144     public int[][] getIntMatrix(){
145     // this initialises the matrix to 0 (i.e. no contact)
146     int[][] cm = new int[fullLength][fullLength];
147     // we put a 1 for all given contacts
148 duarte 234 for (Edge cont:contacts){
149 duarte 159 int i_resser = cont.i;
150     int j_resser = cont.j;
151     cm[i_resser-1][j_resser-1]=1;
152 duarte 129 }
153     return cm;
154     }
155 duarte 159
156 duarte 165 /**
157 duarte 179 * Gets a node's residue type given the residue serial
158     * @param resser
159     * @return
160     */
161     public String getResType(int resser){
162     return nodes.get(resser);
163     }
164    
165     /**
166 duarte 165 * Gets node neighbourhood given a residue serial
167     * @param resser
168     * @return
169     */
170 duarte 179 public NodeNbh getNodeNbh(int resser){
171     NodeNbh nbh = new NodeNbh(resser, getResType(resser));
172 duarte 165 //this could be implemented using the contact map matrix and scanning through 1 column/row
173     //it would be just slightly faster, here we do 2*numContacts iterations, using matrix would be only fullLength iterations
174 duarte 179 //however we would then have the overhead of creating the matrix
175 duarte 234 for (Edge cont:contacts){
176 duarte 165 if (cont.i==resser) nbh.put(cont.j, nodes.get(cont.j));
177     if (cont.j==resser) nbh.put(cont.i, nodes.get(cont.i));
178     }
179     return nbh;
180     }
181    
182     /**
183     * Gets edge neighbourhood (common neighbourhood) given a residue serial pair
184     * @param i_resser
185     * @param j_resser
186     * @return
187     */
188 duarte 179 public EdgeNbh getEdgeNbh(int i_resser, int j_resser){
189     EdgeNbh nbh = new EdgeNbh(i_resser, getResType(i_resser), j_resser, getResType(j_resser));
190     NodeNbh i_nbhd = getNodeNbh(i_resser);
191     NodeNbh j_nbhd = getNodeNbh(j_resser);
192 duarte 175 if (j_nbhd.size()>=i_nbhd.size()) { //with this we will be slightly faster, always iterating through smallest TreeMap
193     for (int resser:i_nbhd.keySet()) {
194     if (j_nbhd.containsKey(resser)) nbh.put(resser, i_nbhd.get(resser));
195     }
196     } else {
197     for (int resser:j_nbhd.keySet()) {
198     if (i_nbhd.containsKey(resser)) nbh.put(resser, j_nbhd.get(resser));
199     }
200 duarte 165 }
201     return nbh;
202     }
203 duarte 232
204     /**
205     * Gets 2nd shell node neighbourhood
206     * @param resser
207     */
208     public NodeNbh get2ndshellNodeNbh(int resser){
209     // first we create a NodeNbh object for the second shell, central residue is given resser
210     NodeNbh nbh2ndshell = new NodeNbh(resser,getResType(resser));
211     // we get 1st neighbourhood
212     NodeNbh nbh = this.getNodeNbh(resser);
213     for (int nb:nbh.keySet()){
214     NodeNbh nbh2 = this.getNodeNbh(nb); // for each first neighbour we take its neighbourhood
215     for (int nb2:nbh2.keySet()){
216     if (nb2!=resser && !nbh.containsKey(nb2)){ // if the 2nd neighbour nb2 is not the given resser or is not a 1st neighbour
217     nbh2ndshell.put(nb2, getResType(nb2));
218     }
219     }
220     }
221     return nbh2ndshell;
222     }
223    
224 duarte 234 public void addEdge(Edge cont){
225 duarte 240 if (!directed && cont.i>cont.j){
226 duarte 239 // we invert in case of undirected and i>j because in undirected we have only the half of the matrix j>i
227     // if we added an edge i>j it could happen that the edge was already there but inverted and wouldn't be detected as a duplicate
228 duarte 240 cont = new Edge(cont.j,cont.i);
229 duarte 239 }
230 duarte 234 contacts.add(cont); // contacts is a TreeSet and thus takes care of duplicates
231     int oldNumContacts = numContacts;
232     numContacts=getNumContacts();
233     // if number of contacts changed that means we actually added a new contact and thus we modified the graph
234     if (numContacts!=oldNumContacts) modified=true;
235    
236 duarte 175 }
237    
238 duarte 234 public void delEdge(Edge cont){
239 duarte 240 if (!directed && cont.i>cont.j){
240     // we invert in case of undirected and i>j because in undirected we have only the half of the matrix j>i
241     // if we try to delete an edge i>j it won't be there, we have to invert it and then try to delete
242     cont = new Edge(cont.j,cont.i);
243     }
244 duarte 175 contacts.remove(cont);
245 duarte 240 int oldNumContacts = numContacts;
246     numContacts=getNumContacts();
247     // if number of contacts changed that means we actually added a new contact and thus we modified the graph
248     if (numContacts!=oldNumContacts) modified=true;
249 duarte 175 }
250    
251     public void restrictContactsToMaxRange(int range){
252 duarte 234 EdgeSet edgesToDelete = new EdgeSet();
253     for (Edge cont:contacts){
254 duarte 179 if (cont.getRange()>range) edgesToDelete.add(cont);
255 duarte 175 }
256 duarte 234 for (Edge cont:edgesToDelete){
257 duarte 179 delEdge(cont);
258     }
259 duarte 175 }
260    
261     public void restrictContactsToMinRange(int range){
262 duarte 234 EdgeSet edgesToDelete = new EdgeSet();
263     for (Edge cont:contacts){
264 duarte 179 if (cont.getRange()<range) edgesToDelete.add(cont);
265 duarte 175 }
266 duarte 234 for (Edge cont:edgesToDelete){
267 duarte 179 delEdge(cont);
268     }
269 duarte 175 }
270 duarte 189
271 duarte 191 /**
272     * Returns a HashMap with all edge neighbourhood sizes (if they are >0) for each cell in the contact map
273     * @return
274     */
275 duarte 234 public HashMap<Edge,Integer> getAllEdgeNbhSizes() {
276     HashMap<Edge,Integer> sizes = new HashMap<Edge, Integer>();
277 duarte 191 if (!directed) {
278     for (int i=1; i<fullLength;i++){
279     for (int j=i+1; j<fullLength;j++){
280     int size = getEdgeNbh(i, j).size();
281 duarte 234 if (size>0) sizes.put(new Edge(i,j), size);
282 duarte 191 }
283     }
284     } else {
285     for (int i=1; i<fullLength;i++){
286     for (int j=1; j<fullLength;j++){
287     if (i!=j){
288     int size = getEdgeNbh(i, j).size();
289 duarte 234 if (size>0) sizes.put(new Edge(i,j), size);
290 duarte 191 }
291     }
292     }
293     }
294     return sizes;
295     }
296    
297 duarte 189 //TODO not sure what kind of return we want, for now is a HashMap with three graph objects
298     public HashMap<String,Graph> compare(Graph other) throws Exception{
299     //first check that other has same sequence than this, otherwise throw exception
300     if (!this.sequence.equals(other.sequence)){
301     //TODO throw specific exception
302     throw new Exception("Sequence of 2 graphs to compare differ, can't compare them.");
303     }
304 duarte 234 EdgeSet common = new EdgeSet();
305     EdgeSet onlythis = new EdgeSet();
306     EdgeSet onlyother = new EdgeSet();
307     for (Edge cont:this.contacts){
308 duarte 189 if (other.contacts.contains(cont)) {
309     common.add(cont);
310     } else{
311     onlythis.add(cont);
312     }
313     }
314 duarte 234 for (Edge cont:other.contacts){
315 duarte 189 if (!this.contacts.contains(cont)){
316     onlyother.add(cont);
317     }
318     }
319 duarte 206 Graph commongraph = new Graph (common,getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
320     Graph onlythisgraph = new Graph (onlythis,getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
321     Graph onlyothergraph = new Graph (onlyother,getNodes(),sequence,cutoff,ct,other.pdbCode,other.chainCode,other.pdbChainCode);
322 duarte 189 HashMap<String,Graph> result = new HashMap<String,Graph>();
323     result.put("common", commongraph);
324     result.put("onlythis", onlythisgraph);
325     result.put("onlyother",onlyothergraph);
326     return result;
327     }
328 duarte 206
329     public boolean isModified(){
330     return modified;
331     }
332    
333     public boolean isDirected(){
334     return directed;
335     }
336    
337     public String getPdbCode() {
338     return pdbCode;
339     }
340    
341     public String getPdbChainCode(){
342     return pdbChainCode;
343     }
344    
345     public String getChainCode(){
346     return chainCode;
347     }
348    
349     public String getSequence(){
350     return sequence;
351     }
352    
353     public int getFullLength(){
354     return fullLength;
355     }
356    
357     public int getObsLength(){
358     return obsLength;
359     }
360    
361     public int getNumContacts(){
362     // in theory we could return just numContacts, because we have taken care of updating it every time contacts changed
363     // however we call directly contacts.size() as I feel is safer
364     return contacts.size();
365     }
366    
367     public String getContactType() {
368     return ct;
369     }
370    
371     public double getCutoff(){
372     return cutoff;
373     }
374 duarte 249
375     public boolean containsContact(Edge cont){
376     // be careful with order, this checks strictly whether the cont.i, cont.j is given, strictly in that order!
377     // in undirected case contacts are stored only in 1 direction (j>i) and thus if wrong order given it won't be found
378     return contacts.contains(cont);
379     }
380    
381     public void resetContacts(){
382     this.contacts = new EdgeSet();
383     }
384 duarte 123 }