ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Pdb.java
Revision: 229
Committed: Mon Jul 9 15:02:24 2007 UTC (17 years, 2 months ago) by duarte
File size: 12639 byte(s)
Log Message:
Now calculate_dist_matrix returns the matrix with residue serials as indices (rather than atom serials)
Now AA.isValidSingleCT and AA.isValidMultiCT check also for crossed cases
Line User Rev File contents
1 duarte 123 package proteinstructure;
2    
3     import java.io.FileOutputStream;
4     import java.io.PrintStream;
5     import java.io.IOException;
6     import java.util.HashMap;
7 duarte 216 import java.util.Locale;
8 duarte 123 import java.util.TreeMap;
9     import java.util.ArrayList;
10     import java.util.Collections;
11    
12 duarte 226 import javax.vecmath.Point3d;
13     import javax.vecmath.Point3i;
14    
15 duarte 207 /**
16     * A single chain pdb protein structure
17     *
18     * @author Jose Duarte
19     * Class: Pdb
20     * Package: proteinstructure
21     */
22     public abstract class Pdb {
23 duarte 123
24 duarte 208 protected final static int DEFAULT_MODEL=1; // default model serial (NMR structures)
25     public final static String NONSTANDARD_AA_LETTER="X"; // letter we assign to nonstandard aas to use in sequence
26 duarte 153
27 duarte 208 protected HashMap<String,Integer> resser_atom2atomserial; // residue serial+atom name (separated by underscore) to atom serials
28     protected HashMap<Integer,String> resser2restype; // residue serial to 3 letter residue type
29 duarte 226 protected HashMap<Integer,Point3d> atomser2coord; // atom serials to 3D coordinates
30 duarte 208 protected HashMap<Integer,Integer> atomser2resser; // atom serials to residue serials
31     protected HashMap<String,Integer> pdbresser2resser; // pdb (author) residue serials (can include insetion codes so they are strings) to internal residue serials
32     protected HashMap<Integer,String> resser2pdbresser; // internal residue serials to pdb (author) residue serials (can include insertion codes so they are strings)
33 duarte 190
34 duarte 222 protected HashMap<Integer,String> resser2secstruct; // residue serials to secondary structure
35     protected TreeMap<String,Interval> secstruct2resinterval;// secondary structure element to residue serial intervals
36 duarte 219
37 duarte 208 protected HashMap<String,ArrayList<String>> aas2atoms = AA.getaas2atoms(); // contains atom names for each aminoacid
38 duarte 135
39 duarte 208 protected String sequence; // full sequence as it appears in SEQRES field
40     protected String pdbCode;
41     // given "external" pdb chain code, i.e. the classic (author's) pdb code ("NULL" if it is blank in original pdb file)
42     protected String pdbChainCode;
43     // Our internal chain identifier:
44 duarte 135 // - in reading from pdbase or from msdsd it will be set to the internal chain id (asym_id field for pdbase, pchain_id for msdsd)
45 duarte 208 // - in reading from pdb file it coincides with pdbChainCode except for "NULL" where we use "A"
46 duarte 207 protected String chainCode;
47 duarte 208 protected int model; // the model serial for NMR structures
48 duarte 123
49 duarte 208 protected String db; // the db from which we have taken the data (if applies)
50 duarte 153
51 duarte 208
52    
53 duarte 133
54 duarte 153 /**
55     * Dumps coordinate data into a file in pdb format (ATOM lines only)
56 duarte 206 * The chain dumped is the value of the chainCode variable, i.e. our internal chain identifier for Pdb objects
57 duarte 153 * @param outfile
58     * @throws IOException
59     */
60 duarte 123 public void dump2pdbfile(String outfile) throws IOException {
61 duarte 130 TreeMap<Integer,Object[]> lines = new TreeMap<Integer,Object[]>();
62 duarte 123 PrintStream Out = new PrintStream(new FileOutputStream(outfile));
63 duarte 206 Out.println("HEADER Dumped from "+db+". pdb code="+pdbCode+", chain='"+chainCode+"'");
64 duarte 123 for (String resser_atom:resser_atom2atomserial.keySet()){
65     int atomserial = resser_atom2atomserial.get(resser_atom);
66     int res_serial = Integer.parseInt(resser_atom.split("_")[0]);
67     String atom = resser_atom.split("_")[1];
68     String res_type = resser2restype.get(res_serial);
69 duarte 226 Point3d coords = atomser2coord.get(atomserial);
70     Object[] fields = {atomserial, atom, res_type, chainCode, res_serial, coords.x, coords.y, coords.z};
71 duarte 130 lines.put(atomserial, fields);
72 duarte 123 }
73 duarte 130 for (int atomserial:lines.keySet()){
74 duarte 216 // Local.US is necessary, otherwise java prints the doubles locale-dependant (i.e. with ',' for some locales)
75     Out.printf(Locale.US,"ATOM %5d %3s %3s %1s%4d %8.3f%8.3f%8.3f\n",lines.get(atomserial));
76 duarte 130 }
77 duarte 123 Out.println("END");
78     Out.close();
79     }
80    
81     public void dumpseq(String seqfile) throws IOException {
82     PrintStream Out = new PrintStream(new FileOutputStream(seqfile));
83 duarte 206 Out.println(">"+pdbCode+"_"+pdbChainCode);
84 duarte 123 Out.println(sequence);
85     Out.close();
86     }
87    
88     public int get_length(){
89     return resser2restype.size();
90     }
91    
92 duarte 226 public TreeMap<Integer,Point3d> get_coords_for_ct(String ct) {
93     TreeMap<Integer,Point3d> coords = new TreeMap<Integer,Point3d>();
94 duarte 123 HashMap<String,String[]> restype2atoms = AA.ct2atoms(ct);
95     for (int resser:resser2restype.keySet()){
96     String[] atoms = restype2atoms.get(resser2restype.get(resser));
97     for (String atom:atoms){
98     if (resser_atom2atomserial.containsKey(resser+"_"+atom)){
99     int atomser = resser_atom2atomserial.get(resser+"_"+atom);
100 duarte 226 Point3d coord = atomser2coord.get(atomser);
101 duarte 123 coords.put(atomser, coord);
102     }
103     else if (atom.equals("O") && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){
104     int atomser = resser_atom2atomserial.get(resser+"_"+"OXT");
105 duarte 226 Point3d coord = atomser2coord.get(atomser);
106 duarte 123 coords.put(atomser, coord);
107     }
108     else {
109     System.err.println("Couldn't find "+atom+" atom for resser="+resser+". Continuing without that atom for this resser.");
110     }
111     }
112     }
113     return coords;
114     }
115 duarte 229
116     /**
117     * Returns the distance matrix as a HashMap with Contacts (residue serial pairs) as keys
118     * It doesn't make sense to call this method for multi atom contact
119     * types (for each residue serial pair there's more than 1 distance)
120     * Thus before calling this we should check AA.isValidSingleAtomCT(ct)
121     * @param ct the contact type
122     * @return
123     */
124 duarte 224 public HashMap<Contact, Double> calculate_dist_matrix(String ct){
125 duarte 229 HashMap<Contact,Double> distMatrixAtoms = new HashMap<Contact,Double>();
126 duarte 123 if (!ct.contains("/")){
127 duarte 226 TreeMap<Integer,Point3d> coords = get_coords_for_ct(ct);
128 duarte 123 for (int i_atomser:coords.keySet()){
129     for (int j_atomser:coords.keySet()){
130     if (j_atomser>i_atomser) {
131     Contact pair = new Contact(i_atomser,j_atomser);
132 duarte 229 distMatrixAtoms.put(pair, coords.get(i_atomser).distance(coords.get(j_atomser)));
133 duarte 123 }
134     }
135     }
136     } else {
137     String i_ct = ct.split("/")[0];
138     String j_ct = ct.split("/")[1];
139 duarte 226 TreeMap<Integer,Point3d> i_coords = get_coords_for_ct(i_ct);
140     TreeMap<Integer,Point3d> j_coords = get_coords_for_ct(j_ct);
141 duarte 123 for (int i_atomser:i_coords.keySet()){
142     for (int j_atomser:j_coords.keySet()){
143     if (j_atomser!=i_atomser){
144     Contact pair = new Contact(i_atomser,j_atomser);
145 duarte 229 distMatrixAtoms.put(pair, i_coords.get(i_atomser).distance(j_coords.get(j_atomser)));
146 duarte 123 }
147     }
148     }
149     }
150 duarte 229
151     HashMap<Contact,Double> distMatrixRes = new HashMap<Contact, Double>();
152     for (Contact cont: distMatrixAtoms.keySet()){
153     int i_resser = get_resser_from_atomser(cont.i);
154     int j_resser = get_resser_from_atomser(cont.j);
155     distMatrixRes.put(new Contact(i_resser,j_resser), distMatrixAtoms.get(cont));
156     }
157    
158     return distMatrixRes;
159 duarte 123 }
160    
161 duarte 129 /**
162 duarte 226 * Get the graph for given contact type and cutoff for this Pdb object.
163     * Returns a Graph object with the contacts
164     * We do geometric hashing for fast contact computation (without needing to calculate full distance matrix)
165     * @param ct
166     * @param cutoff
167     * @return
168     */
169     @SuppressWarnings("unchecked")
170 duarte 228 public Graph get_graph(String ct, double cutoff){
171     TreeMap<Integer,Point3d> i_coords = null;
172     TreeMap<Integer,Point3d> j_coords = null;
173     boolean directed = false;
174     if (!ct.contains("/")){
175     i_coords = get_coords_for_ct(ct);
176     directed = false;
177     } else {
178     String i_ct = ct.split("/")[0];
179     String j_ct = ct.split("/")[1];
180     i_coords = get_coords_for_ct(i_ct);
181     j_coords = get_coords_for_ct(j_ct);
182     directed = true;
183     }
184     int[] i_atomserials = new int[i_coords.size()]; // map from matrix indices to atomserials
185     int[] j_atomserials = null;
186 duarte 226
187     int SCALE=100; // i.e. we use units of hundredths of Amstrongs (thus cutoffs can be specified with a maximum precission of 0.01A)
188    
189     int boxSize = (int) Math.floor(cutoff*SCALE);
190    
191     HashMap<Point3i,Box> boxes = new HashMap<Point3i,Box>();
192     int i=0;
193 duarte 228 for (int i_atomser:i_coords.keySet()){
194 duarte 226 //coordinates for atom serial atomser, we will use i as its identifier below
195 duarte 228 Point3d coord = i_coords.get(i_atomser);
196 duarte 226 int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize));
197     int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize));
198     int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize));
199     Point3i floor = new Point3i(floorX,floorY,floorZ);
200     if (boxes.containsKey(floor)){
201     // we put the coords for atom i in its corresponding box (identified by floor)
202 duarte 228 boxes.get(floor).put_i_Point(i, coord);
203     if (!directed){
204     boxes.get(floor).put_j_Point(i, coord);
205     }
206 duarte 226 } else {
207     Box box = new Box(floor);
208 duarte 228 box.put_i_Point(i, coord);
209     if (!directed){
210     box.put_j_Point(i, coord);
211     }
212 duarte 226 boxes.put(floor,box);
213     }
214 duarte 228 i_atomserials[i]=i_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered
215 duarte 226 i++;
216     }
217 duarte 228 int j=0;
218     if (directed) {
219     j_atomserials = new int[j_coords.size()];
220     for (int j_atomser:j_coords.keySet()){
221     //coordinates for atom serial atomser, we will use j as its identifier below
222     Point3d coord = j_coords.get(j_atomser);
223     int floorX = boxSize*((int)Math.floor(coord.x*SCALE/boxSize));
224     int floorY = boxSize*((int)Math.floor(coord.y*SCALE/boxSize));
225     int floorZ = boxSize*((int)Math.floor(coord.z*SCALE/boxSize));
226     Point3i floor = new Point3i(floorX,floorY,floorZ);
227     if (boxes.containsKey(floor)){
228     // we put the coords for atom j in its corresponding box (identified by floor)
229     boxes.get(floor).put_j_Point(j, coord);
230     } else {
231     Box box = new Box(floor);
232     box.put_j_Point(j, coord);
233     boxes.put(floor,box);
234     }
235     j_atomserials[j]=j_atomser; //as atomserials in coords were ordered (TreeMap) the new indexing will still be ordered
236     j++;
237     }
238     } else {
239     j_atomserials = i_atomserials;
240     }
241    
242 duarte 226
243 duarte 228 double[][]distMatrix = new double[i_atomserials.length][j_atomserials.length];
244 duarte 226
245     for (Point3i floor:boxes.keySet()){ // for each box
246     // distances of points within this box
247 duarte 228 boxes.get(floor).getDistancesWithinBox(distMatrix,directed);
248 duarte 226
249 duarte 228 //TODO should iterate only through half of the neighbours here
250 duarte 226 // distances of points from this box to all neighbouring boxes: 26 iterations (26 neighbouring boxes)
251     for (int x=floor.x-boxSize;x<=floor.x+boxSize;x+=boxSize){
252     for (int y=floor.y-boxSize;y<=floor.y+boxSize;y+=boxSize){
253     for (int z=floor.z-boxSize;z<=floor.z+boxSize;z+=boxSize){
254     if (!((x==floor.x)&&(y==floor.y)&&(z==floor.z))) { // skip this box
255     Point3i neighbor = new Point3i(x,y,z);
256     if (boxes.containsKey(neighbor)){
257 duarte 228 boxes.get(floor).getDistancesToNeighborBox(boxes.get(neighbor),distMatrix,directed);
258 duarte 226 }
259     }
260     }
261     }
262     }
263     }
264    
265 duarte 228 // getting the contacts (in residue serials) from the atom serials (partial) distance matrix
266 duarte 226 ContactList contacts = new ContactList();
267     for (i=0;i<distMatrix.length;i++){
268 duarte 228 for (j=0;j<distMatrix[i].length;j++){
269     // the condition distMatrix[i][j]!=0.0 takes care of skipping several things:
270     // - diagonal of the matrix in case of undirected
271     // - lower half of matrix in case of undirected
272     // - cells for which we didn't calculate a distance as the 2 points were not in same or neighbouring boxes (i.e. too far apart)
273     if (distMatrix[i][j]!=0.0 && distMatrix[i][j]<=cutoff){
274     int i_resser = atomser2resser.get(i_atomserials[i]);
275     int j_resser = atomser2resser.get(j_atomserials[j]);
276 duarte 226 Contact resser_pair = new Contact(i_resser,j_resser);
277 duarte 228 // for multi-atom models (BB, SC, ALL or BB/SC) we need to make sure that we don't have contacts from residue to itself or that we don't have duplicates
278 duarte 226 if (i_resser!=j_resser && (! contacts.contains(resser_pair))){
279     contacts.add(resser_pair);
280     }
281     }
282 duarte 228
283 duarte 226 }
284     }
285 duarte 228
286     // creating and returning the graph object
287 duarte 226 Collections.sort(contacts);
288     TreeMap<Integer,String> nodes = new TreeMap<Integer,String>();
289     for (int resser:resser2restype.keySet()){
290     nodes.put(resser,resser2restype.get(resser));
291     }
292     Graph graph = new Graph (contacts,nodes,sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
293    
294     return graph;
295     }
296    
297 duarte 190 public int get_resser_from_pdbresser (String pdbresser){
298     return pdbresser2resser.get(pdbresser);
299     }
300    
301     public String get_pdbresser_from_resser (int resser){
302     return resser2pdbresser.get(resser);
303     }
304    
305 duarte 200 public int get_resser_from_atomser(int atomser){
306     return atomser2resser.get(atomser);
307     }
308 duarte 206
309     public String getChainCode(){
310     return this.chainCode;
311     }
312    
313     public String getPdbChainCode(){
314     return this.pdbChainCode;
315     }
316 duarte 219
317     public String getSecStructure(int resser){
318     return this.resser2secstruct.get(resser);
319     }
320 duarte 222
321     public TreeMap<String,Interval> getAllSecStructElements(){
322     return secstruct2resinterval;
323     }
324 duarte 123 }