ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Graph.java
Revision: 208
Committed: Wed Jun 27 14:42:12 2007 UTC (17 years, 3 months ago) by duarte
File size: 9518 byte(s)
Log Message:
FIXED BUGS:
- directed was not set when reading from cm file
- pdbChainCode not set when reading from db given pdbCode
- filling pdbresser2resser and resser2pdbresser hashmaps also in reading from pdb file
- using "A" as chainCode when reading from pdb file
- some error handling for file formats: new exception classes GraphFileFormatError and PdbfileFormatError
Line User Rev File contents
1 duarte 123 package proteinstructure;
2 duarte 191
3 duarte 123 import java.io.FileOutputStream;
4     import java.io.PrintStream;
5     import java.io.IOException;
6 duarte 129 import java.util.TreeMap;
7 duarte 189 import java.util.HashMap;
8 duarte 123
9 duarte 207 /**
10     * A residue interaction graph derived from a single chain pdb protein structure
11     *
12     * @author Jose Duarte
13     * Class: Graph
14     * Package: proteinstructure
15     */
16 duarte 123 public class Graph {
17    
18 duarte 144 public final static String GRAPHFILEFORMATVERSION = "1.0";
19 duarte 206
20 duarte 207 public ContactList contacts; // we keep it public to be able to re-reference the object directly (getContacts() copies it)
21 duarte 123
22 duarte 207 protected TreeMap<Integer,String> nodes; // nodes is a TreeMap of residue serials to residue types (3 letter code)
23     protected String sequence; // the full sequence (with unobserved residues and non-standard aas ='X')
24     protected String pdbCode;
25     protected String chainCode;
26 duarte 208 protected String pdbChainCode;
27 duarte 207 protected double cutoff;
28     protected String ct; // the contact type
29 duarte 208 protected boolean directed;
30 duarte 207
31 duarte 159 // fullLength is length of full sequence or:
32     // -if sequence not provided (when reading from db): length of everything except possible unobserved residues at end of chain
33     // -if sequence and nodes not provided (when reading from file and sequence field missing): length except possible unobserved residues at end of chain and possible nodes without contacts at end of chain
34 duarte 207 protected int fullLength;
35     protected int obsLength; // length without unobserved, non standard aas
36 duarte 159
37 duarte 207 protected int numContacts;
38 duarte 159
39 duarte 207 protected boolean modified;
40 duarte 175
41 duarte 207 public Graph() {
42    
43     }
44    
45 duarte 134 /**
46     * Constructs Graph object by passing ArrayList with contacts and TreeMap with nodes (res serials and types)
47 duarte 206 * Must also pass contact type, cutoff, pdbCode and chainCode
48 duarte 134 * @param contacts
49     * @param nodes
50     * @param sequence
51     * @param cutoff
52     * @param ct
53 duarte 206 * @param pdbCode
54     * @param chainCode
55 duarte 134 */
56 duarte 206 public Graph (ContactList contacts, TreeMap<Integer,String> nodes, String sequence, double cutoff,String ct, String pdbCode, String chainCode, String pdbChainCode) {
57 duarte 123 this.contacts=contacts;
58     this.cutoff=cutoff;
59 duarte 129 this.nodes=nodes;
60     this.sequence=sequence;
61 duarte 206 this.pdbCode=pdbCode;
62     this.chainCode=chainCode;
63     this.pdbChainCode=pdbChainCode;
64 duarte 123 this.ct=ct;
65 duarte 159 this.fullLength=sequence.length();
66     this.obsLength=nodes.size();
67     this.numContacts=contacts.size();
68 duarte 175 this.modified=false;
69 duarte 208 this.directed=false;
70 duarte 129 if (ct.contains("/")){
71     directed=true;
72     }
73 duarte 123 }
74 duarte 135
75 duarte 129
76 duarte 135 //TODO implement (from python) write_graph_to_db, do we really need it here??
77    
78 duarte 144 public void write_graph_to_file (String outfile) throws IOException {
79     PrintStream Out = new PrintStream(new FileOutputStream(outfile));
80 duarte 208 Out.println("#AGLAPPE GRAPH FILE ver: "+GRAPHFILEFORMATVERSION);
81 duarte 144 Out.println("#SEQUENCE: "+sequence);
82 duarte 206 Out.println("#PDB: "+pdbCode);
83     Out.println("#PDB CHAIN CODE: "+pdbChainCode);
84     Out.println("#CHAIN: "+chainCode);
85 duarte 144 Out.println("#CT: "+ct);
86     Out.println("#CUTOFF: "+cutoff);
87     for (Contact pair:contacts){
88     int i_resser=pair.i;
89     int j_resser=pair.j;
90     Out.println(i_resser+"\t"+j_resser);
91     }
92     Out.close();
93     }
94 duarte 175
95 duarte 159 /**
96 duarte 175 * Gets list of contacts as a new ContactList (deep copied)
97     *
98     */
99     public ContactList getContacts(){
100     ContactList newContacts = new ContactList();
101     for (Contact cont:contacts){
102     newContacts.add(new Contact(cont.i,cont.j));
103     }
104     return newContacts;
105     }
106    
107     /**
108     * Gets TreeMap of nodes, deep copying
109     *
110     */
111     public TreeMap<Integer,String> getNodes(){
112     TreeMap<Integer,String> newNodes = new TreeMap<Integer,String>();
113     for (int resser:nodes.keySet()){
114     newNodes.put(resser, nodes.get(resser));
115     }
116     return newNodes;
117     }
118    
119     /**
120     * Deep copies this Graph object returning new one
121     * @return
122     */
123     public Graph copy(){
124 duarte 206 return new Graph(getContacts(),getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
125 duarte 175 }
126    
127     /**
128 duarte 159 * Returns an int matrix with 1s for contacts and 0s for non contacts, i.e. the contact map
129     * In non-crossed cases this should give us the upper half matrix (contacts are only j>i)
130     * In crossed cases this gives us a full matrix (contacts are both j>i and i>j since they are directed)
131     * @return
132     */
133     public int[][] getIntMatrix(){
134     // this initialises the matrix to 0 (i.e. no contact)
135     int[][] cm = new int[fullLength][fullLength];
136     // we put a 1 for all given contacts
137     for (Contact cont:contacts){
138     int i_resser = cont.i;
139     int j_resser = cont.j;
140     cm[i_resser-1][j_resser-1]=1;
141 duarte 129 }
142     return cm;
143     }
144 duarte 159
145 duarte 165 /**
146 duarte 179 * Gets a node's residue type given the residue serial
147     * @param resser
148     * @return
149     */
150     public String getResType(int resser){
151     return nodes.get(resser);
152     }
153    
154     /**
155 duarte 165 * Gets node neighbourhood given a residue serial
156     * @param resser
157     * @return
158     */
159 duarte 179 public NodeNbh getNodeNbh(int resser){
160     NodeNbh nbh = new NodeNbh(resser, getResType(resser));
161 duarte 165 //this could be implemented using the contact map matrix and scanning through 1 column/row
162     //it would be just slightly faster, here we do 2*numContacts iterations, using matrix would be only fullLength iterations
163 duarte 179 //however we would then have the overhead of creating the matrix
164 duarte 165 for (Contact cont:contacts){
165     if (cont.i==resser) nbh.put(cont.j, nodes.get(cont.j));
166     if (cont.j==resser) nbh.put(cont.i, nodes.get(cont.i));
167     }
168     return nbh;
169     }
170    
171     /**
172     * Gets edge neighbourhood (common neighbourhood) given a residue serial pair
173     * @param i_resser
174     * @param j_resser
175     * @return
176     */
177 duarte 179 public EdgeNbh getEdgeNbh(int i_resser, int j_resser){
178     EdgeNbh nbh = new EdgeNbh(i_resser, getResType(i_resser), j_resser, getResType(j_resser));
179     NodeNbh i_nbhd = getNodeNbh(i_resser);
180     NodeNbh j_nbhd = getNodeNbh(j_resser);
181 duarte 175 if (j_nbhd.size()>=i_nbhd.size()) { //with this we will be slightly faster, always iterating through smallest TreeMap
182     for (int resser:i_nbhd.keySet()) {
183     if (j_nbhd.containsKey(resser)) nbh.put(resser, i_nbhd.get(resser));
184     }
185     } else {
186     for (int resser:j_nbhd.keySet()) {
187     if (i_nbhd.containsKey(resser)) nbh.put(resser, j_nbhd.get(resser));
188     }
189 duarte 165 }
190     return nbh;
191     }
192    
193 duarte 175 public void addEdge(Contact cont){
194     contacts.add(cont);
195     numContacts++;
196     modified=true;
197     }
198    
199     public void delEdge(Contact cont){
200     contacts.remove(cont);
201     numContacts--;
202     modified=true;
203     }
204    
205     public void restrictContactsToMaxRange(int range){
206 duarte 179 ContactList edgesToDelete = new ContactList();
207 duarte 175 for (Contact cont:contacts){
208 duarte 179 if (cont.getRange()>range) edgesToDelete.add(cont);
209 duarte 175 }
210 duarte 179 for (Contact cont:edgesToDelete){
211     delEdge(cont);
212     }
213 duarte 175 }
214    
215     public void restrictContactsToMinRange(int range){
216 duarte 179 ContactList edgesToDelete = new ContactList();
217 duarte 175 for (Contact cont:contacts){
218 duarte 179 if (cont.getRange()<range) edgesToDelete.add(cont);
219 duarte 175 }
220 duarte 179 for (Contact cont:edgesToDelete){
221     delEdge(cont);
222     }
223 duarte 175 }
224 duarte 189
225 duarte 191 /**
226     * Returns a HashMap with all edge neighbourhood sizes (if they are >0) for each cell in the contact map
227     * @return
228     */
229     public HashMap<Contact,Integer> getAllEdgeNbhSizes() {
230     HashMap<Contact,Integer> sizes = new HashMap<Contact, Integer>();
231     if (!directed) {
232     for (int i=1; i<fullLength;i++){
233     for (int j=i+1; j<fullLength;j++){
234     int size = getEdgeNbh(i, j).size();
235     if (size>0) sizes.put(new Contact(i,j), size);
236     }
237     }
238     } else {
239     for (int i=1; i<fullLength;i++){
240     for (int j=1; j<fullLength;j++){
241     if (i!=j){
242     int size = getEdgeNbh(i, j).size();
243     if (size>0) sizes.put(new Contact(i,j), size);
244     }
245     }
246     }
247     }
248     return sizes;
249     }
250    
251 duarte 189 //TODO not sure what kind of return we want, for now is a HashMap with three graph objects
252     public HashMap<String,Graph> compare(Graph other) throws Exception{
253     //first check that other has same sequence than this, otherwise throw exception
254     if (!this.sequence.equals(other.sequence)){
255     //TODO throw specific exception
256     throw new Exception("Sequence of 2 graphs to compare differ, can't compare them.");
257     }
258     ContactList common = new ContactList();
259     ContactList onlythis = new ContactList();
260     ContactList onlyother = new ContactList();
261     for (Contact cont:this.contacts){
262     if (other.contacts.contains(cont)) {
263     common.add(cont);
264     } else{
265     onlythis.add(cont);
266     }
267     }
268     for (Contact cont:other.contacts){
269     if (!this.contacts.contains(cont)){
270     onlyother.add(cont);
271     }
272     }
273 duarte 206 Graph commongraph = new Graph (common,getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
274     Graph onlythisgraph = new Graph (onlythis,getNodes(),sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
275     Graph onlyothergraph = new Graph (onlyother,getNodes(),sequence,cutoff,ct,other.pdbCode,other.chainCode,other.pdbChainCode);
276 duarte 189 HashMap<String,Graph> result = new HashMap<String,Graph>();
277     result.put("common", commongraph);
278     result.put("onlythis", onlythisgraph);
279     result.put("onlyother",onlyothergraph);
280     return result;
281     }
282 duarte 206
283     public boolean isModified(){
284     return modified;
285     }
286    
287     public boolean isDirected(){
288     return directed;
289     }
290    
291     public String getPdbCode() {
292     return pdbCode;
293     }
294    
295     public String getPdbChainCode(){
296     return pdbChainCode;
297     }
298    
299     public String getChainCode(){
300     return chainCode;
301     }
302    
303     public String getSequence(){
304     return sequence;
305     }
306    
307     public int getFullLength(){
308     return fullLength;
309     }
310    
311     public int getObsLength(){
312     return obsLength;
313     }
314    
315     public int getNumContacts(){
316     // in theory we could return just numContacts, because we have taken care of updating it every time contacts changed
317     // however we call directly contacts.size() as I feel is safer
318     return contacts.size();
319     }
320    
321     public String getContactType() {
322     return ct;
323     }
324    
325     public double getCutoff(){
326     return cutoff;
327     }
328 duarte 123 }