ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Pdb.java
Revision: 228
Committed: Mon Jul 9 14:32:47 2007 UTC (17 years, 2 months ago) by duarte
File size: 11960 byte(s)
Log Message:
Now can do crossed contacts also with geometric hashing
Renamed getGraphGeometricHashing to get_graph (so interface remains as it was before we introduced the geometric hashing)
Line User Rev File contents
1 duarte 123 package proteinstructure;
2    
3     import java.io.FileOutputStream;
4     import java.io.PrintStream;
5     import java.io.IOException;
6     import java.util.HashMap;
7 duarte 216 import java.util.Locale;
8 duarte 123 import java.util.TreeMap;
9     import java.util.ArrayList;
10     import java.util.Collections;
11    
12 duarte 226 import javax.vecmath.Point3d;
13     import javax.vecmath.Point3i;
14    
15 duarte 207 /**
16     * A single chain pdb protein structure
17     *
18     * @author Jose Duarte
19     * Class: Pdb
20     * Package: proteinstructure
21     */
22     public abstract class Pdb {
23 duarte 123
24 duarte 208 protected final static int DEFAULT_MODEL=1; // default model serial (NMR structures)
25     public final static String NONSTANDARD_AA_LETTER="X"; // letter we assign to nonstandard aas to use in sequence
26 duarte 153
27 duarte 208 protected HashMap<String,Integer> resser_atom2atomserial; // residue serial+atom name (separated by underscore) to atom serials
28     protected HashMap<Integer,String> resser2restype; // residue serial to 3 letter residue type
29 duarte 226 protected HashMap<Integer,Point3d> atomser2coord; // atom serials to 3D coordinates
30 duarte 208 protected HashMap<Integer,Integer> atomser2resser; // atom serials to residue serials
31     protected HashMap<String,Integer> pdbresser2resser; // pdb (author) residue serials (can include insetion codes so they are strings) to internal residue serials
32     protected HashMap<Integer,String> resser2pdbresser; // internal residue serials to pdb (author) residue serials (can include insertion codes so they are strings)
33 duarte 190
34 duarte 222 protected HashMap<Integer,String> resser2secstruct; // residue serials to secondary structure
35     protected TreeMap<String,Interval> secstruct2resinterval;// secondary structure element to residue serial intervals
36 duarte 219
37 duarte 208 protected HashMap<String,ArrayList<String>> aas2atoms = AA.getaas2atoms(); // contains atom names for each aminoacid
38 duarte 135
39 duarte 208 protected String sequence; // full sequence as it appears in SEQRES field
40     protected String pdbCode;
41     // given "external" pdb chain code, i.e. the classic (author's) pdb code ("NULL" if it is blank in original pdb file)
42     protected String pdbChainCode;
43     // Our internal chain identifier:
44 duarte 135 // - in reading from pdbase or from msdsd it will be set to the internal chain id (asym_id field for pdbase, pchain_id for msdsd)
45 duarte 208 // - in reading from pdb file it coincides with pdbChainCode except for "NULL" where we use "A"
46 duarte 207 protected String chainCode;
47 duarte 208 protected int model; // the model serial for NMR structures
48 duarte 123
49 duarte 208 protected String db; // the db from which we have taken the data (if applies)
50 duarte 153
51 duarte 208
52    
53 duarte 133
54 duarte 153 /**
55     * Dumps coordinate data into a file in pdb format (ATOM lines only)
56 duarte 206 * The chain dumped is the value of the chainCode variable, i.e. our internal chain identifier for Pdb objects
57 duarte 153 * @param outfile
58     * @throws IOException
59     */
60 duarte 123 public void dump2pdbfile(String outfile) throws IOException {
61 duarte 130 TreeMap<Integer,Object[]> lines = new TreeMap<Integer,Object[]>();
62 duarte 123 PrintStream Out = new PrintStream(new FileOutputStream(outfile));
63 duarte 206 Out.println("HEADER Dumped from "+db+". pdb code="+pdbCode+", chain='"+chainCode+"'");
64 duarte 123 for (String resser_atom:resser_atom2atomserial.keySet()){
65     int atomserial = resser_atom2atomserial.get(resser_atom);
66     int res_serial = Integer.parseInt(resser_atom.split("_")[0]);
67     String atom = resser_atom.split("_")[1];
68     String res_type = resser2restype.get(res_serial);
69 duarte 226 Point3d coords = atomser2coord.get(atomserial);
70     Object[] fields = {atomserial, atom, res_type, chainCode, res_serial, coords.x, coords.y, coords.z};
71 duarte 130 lines.put(atomserial, fields);
72 duarte 123 }
73 duarte 130 for (int atomserial:lines.keySet()){
74 duarte 216 // Local.US is necessary, otherwise java prints the doubles locale-dependant (i.e. with ',' for some locales)
75     Out.printf(Locale.US,"ATOM %5d %3s %3s %1s%4d %8.3f%8.3f%8.3f\n",lines.get(atomserial));
76 duarte 130 }
77 duarte 123 Out.println("END");
78     Out.close();
79     }
80    
81     public void dumpseq(String seqfile) throws IOException {
82     PrintStream Out = new PrintStream(new FileOutputStream(seqfile));
83 duarte 206 Out.println(">"+pdbCode+"_"+pdbChainCode);
84 duarte 123 Out.println(sequence);
85     Out.close();
86     }
87    
88     public int get_length(){
89     return resser2restype.size();
90     }
91    
92 duarte 226 public TreeMap<Integer,Point3d> get_coords_for_ct(String ct) {
93     TreeMap<Integer,Point3d> coords = new TreeMap<Integer,Point3d>();
94 duarte 123 HashMap<String,String[]> restype2atoms = AA.ct2atoms(ct);
95     for (int resser:resser2restype.keySet()){
96     String[] atoms = restype2atoms.get(resser2restype.get(resser));
97     for (String atom:atoms){
98     if (resser_atom2atomserial.containsKey(resser+"_"+atom)){
99     int atomser = resser_atom2atomserial.get(resser+"_"+atom);
100 duarte 226 Point3d coord = atomser2coord.get(atomser);
101 duarte 123 coords.put(atomser, coord);
102     }
103     else if (atom.equals("O") && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){
104     int atomser = resser_atom2atomserial.get(resser+"_"+"OXT");
105 duarte 226 Point3d coord = atomser2coord.get(atomser);
106 duarte 123 coords.put(atomser, coord);
107     }
108     else {
109     System.err.println("Couldn't find "+atom+" atom for resser="+resser+". Continuing without that atom for this resser.");
110     }
111     }
112     }
113     return coords;
114     }
115    
116 duarte 224 public HashMap<Contact, Double> calculate_dist_matrix(String ct){
117     HashMap<Contact,Double> dist_matrix = new HashMap<Contact,Double>();
118 duarte 123 if (!ct.contains("/")){
119 duarte 226 TreeMap<Integer,Point3d> coords = get_coords_for_ct(ct);
120 duarte 123 for (int i_atomser:coords.keySet()){
121     for (int j_atomser:coords.keySet()){
122     if (j_atomser>i_atomser) {
123     Contact pair = new Contact(i_atomser,j_atomser);
124 duarte 226 dist_matrix.put(pair, coords.get(i_atomser).distance(coords.get(j_atomser)));
125 duarte 123 }
126     }
127     }
128     } else {
129     String i_ct = ct.split("/")[0];
130     String j_ct = ct.split("/")[1];
131 duarte 226 TreeMap<Integer,Point3d> i_coords = get_coords_for_ct(i_ct);
132     TreeMap<Integer,Point3d> j_coords = get_coords_for_ct(j_ct);
133 duarte 123 for (int i_atomser:i_coords.keySet()){
134     for (int j_atomser:j_coords.keySet()){
135     if (j_atomser!=i_atomser){
136     Contact pair = new Contact(i_atomser,j_atomser);
137 duarte 226 dist_matrix.put(pair, i_coords.get(i_atomser).distance(j_coords.get(j_atomser)));
138 duarte 123 }
139     }
140     }
141     }
142     return dist_matrix;
143     }
144    
145 duarte 129 /**
146 duarte 226 * Get the graph for given contact type and cutoff for this Pdb object.
147     * Returns a Graph object with the contacts
148     * We do geometric hashing for fast contact computation (without needing to calculate full distance matrix)
149     * @param ct
150     * @param cutoff
151     * @return
152     */
153     @SuppressWarnings("unchecked")
154 duarte 228 public Graph get_graph(String ct, double cutoff){
155     TreeMap<Integer,Point3d> i_coords = null;
156     TreeMap<Integer,Point3d> j_coords = null;
157     boolean directed = false;
158     if (!ct.contains("/")){
159     i_coords = get_coords_for_ct(ct);
160     directed = false;
161     } else {
162     String i_ct = ct.split("/")[0];
163     String j_ct = ct.split("/")[1];
164     i_coords = get_coords_for_ct(i_ct);
165     j_coords = get_coords_for_ct(j_ct);
166     directed = true;
167     }
168     int[] i_atomserials = new int[i_coords.size()]; // map from matrix indices to atomserials
169     int[] j_atomserials = null;
170 duarte 226
171     int SCALE=100; // i.e. we use units of hundredths of Amstrongs (thus cutoffs can be specified with a maximum precission of 0.01A)
172    
173     int boxSize = (int) Math.floor(cutoff*SCALE);
174    
175     HashMap<Point3i,Box> boxes = new HashMap<Point3i,Box>();
176     int i=0;
177 duarte 228 for (int i_atomser:i_coords.keySet()){
178 duarte 226 //coordinates for atom serial atomser, we will use i as its identifier below
179 duarte 228 Point3d coord = i_coords.get(i_atomser);
180 duarte 226 int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize));
181     int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize));
182     int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize));
183     Point3i floor = new Point3i(floorX,floorY,floorZ);
184     if (boxes.containsKey(floor)){
185     // we put the coords for atom i in its corresponding box (identified by floor)
186 duarte 228 boxes.get(floor).put_i_Point(i, coord);
187     if (!directed){
188     boxes.get(floor).put_j_Point(i, coord);
189     }
190 duarte 226 } else {
191     Box box = new Box(floor);
192 duarte 228 box.put_i_Point(i, coord);
193     if (!directed){
194     box.put_j_Point(i, coord);
195     }
196 duarte 226 boxes.put(floor,box);
197     }
198 duarte 228 i_atomserials[i]=i_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered
199 duarte 226 i++;
200     }
201 duarte 228 int j=0;
202     if (directed) {
203     j_atomserials = new int[j_coords.size()];
204     for (int j_atomser:j_coords.keySet()){
205     //coordinates for atom serial atomser, we will use j as its identifier below
206     Point3d coord = j_coords.get(j_atomser);
207     int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize));
208     int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize));
209     int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize));
210     Point3i floor = new Point3i(floorX,floorY,floorZ);
211     if (boxes.containsKey(floor)){
212     // we put the coords for atom j in its corresponding box (identified by floor)
213     boxes.get(floor).put_j_Point(j, coord);
214     } else {
215     Box box = new Box(floor);
216     box.put_j_Point(j, coord);
217     boxes.put(floor,box);
218     }
219     j_atomserials[j]=j_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered
220     j++;
221     }
222     } else {
223     j_atomserials = i_atomserials;
224     }
225    
226 duarte 226
227 duarte 228 double[][]distMatrix = new double[i_atomserials.length][j_atomserials.length];
228 duarte 226
229     for (Point3i floor:boxes.keySet()){ // for each box
230     // distances of points within this box
231 duarte 228 boxes.get(floor).getDistancesWithinBox(distMatrix,directed);
232 duarte 226
233 duarte 228 //TODO should iterate only through half of the neighbours here
234 duarte 226 // distances of points from this box to all neighbouring boxes: 26 iterations (26 neighbouring boxes)
235     for (int x=floor.x-boxSize;x<=floor.x+boxSize;x+=boxSize){
236     for (int y=floor.y-boxSize;y<=floor.y+boxSize;y+=boxSize){
237     for (int z=floor.z-boxSize;z<=floor.z+boxSize;z+=boxSize){
238     if (!((x==floor.x)&&(y==floor.y)&&(z==floor.z))) { // skip this box
239     Point3i neighbor = new Point3i(x,y,z);
240     if (boxes.containsKey(neighbor)){
241 duarte 228 boxes.get(floor).getDistancesToNeighborBox(boxes.get(neighbor),distMatrix,directed);
242 duarte 226 }
243     }
244     }
245     }
246     }
247     }
248    
249 duarte 228 // getting the contacts (in residue serials) from the atom serials (partial) distance matrix
250 duarte 226 ContactList contacts = new ContactList();
251     for (i=0;i<distMatrix.length;i++){
252 duarte 228 for (j=0;j<distMatrix[i].length;j++){
253     // the condition distMatrix[i][j]!=0.0 takes care of skipping several things:
254     // - diagonal of the matrix in case of undirected
255     // - lower half of matrix in case of undirected
256     // - cells for which we didn't calculate a distance as the 2 points were not in same or neighbouring boxes (i.e. too far apart)
257     if (distMatrix[i][j]!=0.0 && distMatrix[i][j]<=cutoff){
258     int i_resser = atomser2resser.get(i_atomserials[i]);
259     int j_resser = atomser2resser.get(j_atomserials[j]);
260 duarte 226 Contact resser_pair = new Contact(i_resser,j_resser);
261 duarte 228 // for multi-atom models (BB, SC, ALL or BB/SC) we need to make sure that we don't have contacts from residue to itself or that we don't have duplicates
262 duarte 226 if (i_resser!=j_resser && (! contacts.contains(resser_pair))){
263     contacts.add(resser_pair);
264     }
265     }
266 duarte 228
267 duarte 226 }
268     }
269 duarte 228
270     // creating and returning the graph object
271 duarte 226 Collections.sort(contacts);
272     TreeMap<Integer,String> nodes = new TreeMap<Integer,String>();
273     for (int resser:resser2restype.keySet()){
274     nodes.put(resser,resser2restype.get(resser));
275     }
276     Graph graph = new Graph (contacts,nodes,sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
277    
278     return graph;
279     }
280    
281 duarte 190 public int get_resser_from_pdbresser (String pdbresser){
282     return pdbresser2resser.get(pdbresser);
283     }
284    
285     public String get_pdbresser_from_resser (int resser){
286     return resser2pdbresser.get(resser);
287     }
288    
289 duarte 200 public int get_resser_from_atomser(int atomser){
290     return atomser2resser.get(atomser);
291     }
292 duarte 206
293     public String getChainCode(){
294     return this.chainCode;
295     }
296    
297     public String getPdbChainCode(){
298     return this.pdbChainCode;
299     }
300 duarte 219
301     public String getSecStructure(int resser){
302     return this.resser2secstruct.get(resser);
303     }
304 duarte 222
305     public TreeMap<String,Interval> getAllSecStructElements(){
306     return secstruct2resinterval;
307     }
308 duarte 123 }