ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Pdb.java
Revision: 206
Committed: Tue Jun 26 17:01:48 2007 UTC (17 years, 3 months ago) by duarte
File size: 18280 byte(s)
Log Message:
Major refactoring:
 accode -> pdbCode
 chaincode -> pdbChainCode
 chain -> chainCode
Major change of signatures (publics to privates)
Implemented getters for needed member variables
Major clean up
Line User Rev File contents
1 duarte 123 package proteinstructure;
2    
3 duarte 127 import java.io.BufferedReader;
4     import java.io.File;
5 duarte 123 import java.io.FileOutputStream;
6 duarte 127 import java.io.FileReader;
7 duarte 123 import java.io.PrintStream;
8 duarte 127 import java.io.FileNotFoundException;
9 duarte 123 import java.io.IOException;
10 duarte 202 import java.sql.SQLException;
11 duarte 123 import java.util.HashMap;
12     import java.util.TreeMap;
13     import java.util.ArrayList;
14     import java.util.Collections;
15 duarte 127 import java.util.regex.Matcher;
16     import java.util.regex.Pattern;
17 duarte 123
18     public class Pdb {
19    
20 duarte 153 public final static int DEFAULT_MODEL=1;
21    
22 duarte 206 private HashMap<String,Integer> resser_atom2atomserial;
23     private HashMap<Integer,String> resser2restype;
24     private HashMap<Integer,Double[]> atomser2coord;
25     private HashMap<Integer,Integer> atomser2resser;
26     private HashMap<String,Integer> pdbresser2resser; // only used when reading from database
27     private HashMap<Integer,String> resser2pdbresser; // only used when reading from database
28 duarte 190
29 duarte 206 private HashMap<String,ArrayList<String>> aas2atoms = AA.getaas2atoms();
30     private String sequence="";
31     private String pdbCode="";
32 duarte 135 // given "external" pdb chain code, i.e. the classic pdb code ("NULL" if it is blank in original pdb file)
33 duarte 206 private String pdbChainCode="";
34     private int model=DEFAULT_MODEL;
35     private String db;
36 duarte 135
37     // Our internal chain identifier (taken from dbs or pdb):
38     // - in reading from pdbase or from msdsd it will be set to the internal chain id (asym_id field for pdbase, pchain_id for msdsd)
39 duarte 153 // - in reading from pdb file it gets set to whatever parsed from the pdb file (i.e. can be also ' ')
40 duarte 206 private String chainCode;
41 duarte 123
42 duarte 153 /**
43 duarte 206 * Constructs Pdb object given pdb code and pdb chain code. Model will be DEFAULT_MODEL
44 duarte 153 * Takes data from default pdbase-like database: PdbaseInfo.pdbaseDB
45 duarte 206 * @param pdbCode
46     * @param pdbChainCode
47 duarte 153 * @throws PdbaseInconsistencyError
48     * @throws PdbaseAcCodeNotFoundError
49     * @throws MsdsdAcCodeNotFoundError
50     * @throws MsdsdInconsistentResidueNumbersError
51 duarte 202 * @throws SQLException
52 duarte 153 */
53 duarte 206 public Pdb (String pdbCode, String pdbChainCode) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError, MsdsdAcCodeNotFoundError, MsdsdInconsistentResidueNumbersError, SQLException {
54     this(pdbCode,pdbChainCode,DEFAULT_MODEL,PdbaseInfo.DEFAULT_PDBASE_DB);
55 duarte 123 }
56 duarte 133
57 duarte 153 /**
58 duarte 206 * Constructs Pdb object given pdb code, pdb chain code and a model serial
59 duarte 153 * Takes data from default pdbase-like database: PdbaseInfo.pdbaseDB
60 duarte 206 * @param pdbCode
61     * @param pdbChainCode
62 duarte 153 * @param model_serial
63     * @throws PdbaseInconsistencyError
64     * @throws PdbaseAcCodeNotFoundError
65     * @throws MsdsdAcCodeNotFoundError
66     * @throws MsdsdInconsistentResidueNumbersError
67 duarte 202 * @throws SQLException
68 duarte 153 */
69 duarte 206 public Pdb (String pdbCode, String pdbChainCode, int model_serial) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError, MsdsdAcCodeNotFoundError, MsdsdInconsistentResidueNumbersError, SQLException {
70     this(pdbCode,pdbChainCode,model_serial,PdbaseInfo.DEFAULT_PDBASE_DB);
71 duarte 133 }
72 duarte 153
73     /**
74 duarte 206 * Constructs Pdb object given pdb code, pdb chain code and a source db for the data. Model will be DEFAULT_MODEL
75 duarte 153 * The db can be pdbase-like or msdsd-like
76 duarte 206 * @param pdbCode
77     * @param pdbChainCode
78 duarte 153 * @param db
79     * @throws PdbaseInconsistencyError
80     * @throws PdbaseAcCodeNotFoundError
81     * @throws MsdsdAcCodeNotFoundError
82     * @throws MsdsdInconsistentResidueNumbersError
83 duarte 202 * @throws SQLException
84 duarte 153 */
85 duarte 206 public Pdb (String pdbCode, String pdbChainCode, String db) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError, MsdsdAcCodeNotFoundError, MsdsdInconsistentResidueNumbersError, SQLException {
86     this(pdbCode,pdbChainCode,DEFAULT_MODEL,db);
87 duarte 133 }
88    
89 duarte 153 /**
90 duarte 206 * Constructs Pdb object given pdb code, pdb chain code, model serial and a source db for the data.
91 duarte 153 * The db can be pdbase-like or msdsd-like
92 duarte 206 * @param pdbCode
93     * @param pdbChainCode
94 duarte 153 * @param model_serial
95     * @param db
96     * @throws PdbaseInconsistencyError
97     * @throws PdbaseAcCodeNotFoundError
98     * @throws MsdsdAcCodeNotFoundError
99     * @throws MsdsdInconsistentResidueNumbersError
100 duarte 202 * @throws SQLException
101 duarte 153 */
102 duarte 206 public Pdb (String pdbCode, String pdbChainCode, int model_serial, String db) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError, MsdsdAcCodeNotFoundError, MsdsdInconsistentResidueNumbersError, SQLException {
103     this.pdbCode=pdbCode;
104     this.pdbChainCode=pdbChainCode;
105 duarte 133 this.model=model_serial;
106 duarte 123 this.db=db;
107 duarte 206 // we initialise it to pdbChainCode, in read_pdb_data_from_pdbase gets reset to the right internal chain id. If reading msdsd it stays as pdbChainCode
108     this.chainCode=pdbChainCode;
109 duarte 130 if (db.contains("msdsd")){
110     read_pdb_data_from_msdsd(db);
111     } else if (db.contains("pdbase")){
112     read_pdb_data_from_pdbase(db);
113     }
114 duarte 190 // we initialise resser2pdbresser from the pdbresser2resser HashMap
115     resser2pdbresser = new HashMap<Integer, String>();
116     for (String pdbresser:pdbresser2resser.keySet()){
117     resser2pdbresser.put(pdbresser2resser.get(pdbresser), pdbresser);
118     }
119 duarte 123 }
120    
121 duarte 153 /**
122     * Constructs Pdb object reading from pdb file given pdb chain code. Model will be DEFAULT_MODEL
123     * @param pdbfile
124 duarte 206 * @param pdbChainCode
125 duarte 153 * @param dummy dummy parameter to distinguish the method from other with the same signature
126     * TODO dummy parameter is a dirty hack, must solve it in other way: subclassing
127     * @throws FileNotFoundException
128     * @throws IOException
129     */
130 duarte 206 public Pdb (String pdbfile, String pdbChainCode, boolean dummy) throws FileNotFoundException, IOException {
131     this(pdbfile,pdbChainCode,DEFAULT_MODEL,dummy);
132 duarte 133 }
133 duarte 153
134     /**
135     * Constructs Pdb object reading from pdb file given pdb chain code and model serial
136     * @param pdbfile
137 duarte 206 * @param pdbChainCode
138 duarte 153 * @param model_serial
139     * @param dummy dummy parameter to distinguish the method from other with the same signature
140     * TODO dummy parameter is a dirty hack, must solve it in other way: subclassing
141     * @throws FileNotFoundException
142     * @throws IOException
143     */
144 duarte 206 public Pdb (String pdbfile, String pdbChainCode, int model_serial, boolean dummy) throws FileNotFoundException, IOException {
145 duarte 133 this.model=model_serial;
146 duarte 206 this.pdbChainCode=pdbChainCode;
147 duarte 127 read_pdb_data_from_file(pdbfile);
148 duarte 123 }
149 duarte 133
150 duarte 206 private void read_pdb_data_from_pdbase(String db) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError, SQLException{
151 duarte 123 resser_atom2atomserial = new HashMap<String,Integer>();
152     resser2restype = new HashMap<Integer,String>();
153     atomser2coord = new HashMap<Integer,Double[]>();
154     atomser2resser = new HashMap<Integer,Integer>();
155    
156 duarte 206 PdbaseInfo mypdbaseinfo = new PdbaseInfo(pdbCode,pdbChainCode,model,db);
157 duarte 123 ArrayList<ArrayList> resultset = mypdbaseinfo.read_atomData();
158 duarte 206 chainCode = mypdbaseinfo.getChainCode();
159 duarte 123 sequence = mypdbaseinfo.read_seq();
160 duarte 190 pdbresser2resser = mypdbaseinfo.get_ressers_mapping();
161 duarte 123 mypdbaseinfo.close();
162    
163     for (ArrayList result:resultset){
164     int atomserial = (Integer) result.get(0);
165     String atom = (String) result.get(1);
166     String res_type = (String) result.get(2);
167 duarte 135 int res_serial = (Integer) result.get(3);
168     double x = (Double) result.get(4);
169     double y = (Double) result.get(5);
170     double z = (Double) result.get(6);
171 duarte 123 Double[] coords = {x, y, z};
172     ArrayList<String> aalist=AA.aas();
173     if (aalist.contains(res_type)) {
174     atomser2coord.put(atomserial, coords);
175     atomser2resser.put(atomserial, res_serial);
176     resser2restype.put(res_serial, res_type);
177     ArrayList<String> atomlist = aas2atoms.get(res_type);
178     if (atomlist.contains(atom)){
179     resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
180     }
181     }
182     }
183     }
184    
185 duarte 206 private void read_pdb_data_from_msdsd(String db) throws MsdsdAcCodeNotFoundError, MsdsdInconsistentResidueNumbersError, SQLException {
186 duarte 130 resser_atom2atomserial = new HashMap<String,Integer>();
187     resser2restype = new HashMap<Integer,String>();
188     atomser2coord = new HashMap<Integer,Double[]>();
189     atomser2resser = new HashMap<Integer,Integer>();
190    
191 duarte 206 MsdsdInfo mymsdsdinfo = new MsdsdInfo(pdbCode,pdbChainCode,model,db);
192 duarte 130 ArrayList<ArrayList> resultset = mymsdsdinfo.read_atomData();
193     sequence = mymsdsdinfo.read_seq();
194 duarte 206 chainCode = mymsdsdinfo.getChainCode();
195 duarte 190 pdbresser2resser = mymsdsdinfo.get_ressers_mapping();
196 duarte 130 mymsdsdinfo.close();
197    
198     for (ArrayList result:resultset){
199     int atomserial = (Integer) result.get(0);
200     String atom = (String) result.get(1);
201     String res_type = (String) result.get(2);
202     int res_serial = (Integer) result.get(3);
203     double x = (Double) result.get(4);
204     double y = (Double) result.get(5);
205     double z = (Double) result.get(6);
206     Double[] coords = {x, y, z};
207     ArrayList<String> aalist=AA.aas();
208     if (aalist.contains(res_type)) {
209     atomser2coord.put(atomserial, coords);
210     atomser2resser.put(atomserial, res_serial);
211     resser2restype.put(res_serial, res_type);
212     ArrayList<String> atomlist = aas2atoms.get(res_type);
213     if (atomlist.contains(atom)){
214     resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
215     }
216     }
217     }
218     }
219    
220 duarte 133 /**
221     * To read the pdb data (atom coordinates, residue serials, atom serials) from file
222 duarte 206 * chainCode gets set to internal identifier: if input chain code NULL then chainCode will be ' '
223     * pdbCode gets set to the one parsed in HEADER or to 'Unknown' if not found
224 duarte 153 * sequence gets set to the sequence read from ATOM lines (i.e. observed resdiues only)
225 duarte 133 * @param pdbfile
226     * @throws FileNotFoundException
227     * @throws IOException
228     */
229 duarte 206 private void read_pdb_data_from_file(String pdbfile) throws FileNotFoundException, IOException{
230 duarte 127 resser_atom2atomserial = new HashMap<String,Integer>();
231     resser2restype = new HashMap<Integer,String>();
232     atomser2coord = new HashMap<Integer,Double[]>();
233     atomser2resser = new HashMap<Integer,Integer>();
234 duarte 206 boolean empty = true; // controls whether we don't find any atom line for given pdbChainCode and model
235     // we set chainCode to pdbChainCode except for case NULL where we use " " (NULL is a blank chain code in pdb files)
236     chainCode=pdbChainCode;
237     if (pdbChainCode.equals("NULL")) chainCode=" ";
238 duarte 153 int thismodel=DEFAULT_MODEL; // we initialise to DEFAULT_MODEL, in case file doesn't have MODEL lines
239 duarte 127 BufferedReader fpdb = new BufferedReader(new FileReader(new File(pdbfile)));
240     String line;
241     while ((line = fpdb.readLine() ) != null ) {
242 duarte 153 Pattern p = Pattern.compile("^HEADER");
243 duarte 127 Matcher m = p.matcher(line);
244     if (m.find()){
245 duarte 153 Pattern ph = Pattern.compile("^HEADER.{56}(\\d\\w{3})");
246     Matcher mh = ph.matcher(line);
247     if (mh.find()) {
248 duarte 206 pdbCode=mh.group(1).toLowerCase();
249 duarte 153 } else {
250 duarte 206 pdbCode="Unknown";
251 duarte 153 }
252     }
253     p = Pattern.compile("^MODEL\\s+(\\d+)");
254     m = p.matcher(line);
255     if (m.find()){
256     thismodel=Integer.parseInt(m.group(1));
257     }
258     if (thismodel!=model) continue; // we skip reading of atom lines if we are not in the desired model
259     p = Pattern.compile("^ATOM");
260     m = p.matcher(line);
261     if (m.find()){
262 duarte 206 // serial atom res_type chain res_ser x y z
263     Pattern pl = Pattern.compile(".{6}(.....).{2}(...).{1}(...).{1}"+chainCode+"(.{4}).{4}(.{8})(.{8})(.{8})",Pattern.CASE_INSENSITIVE);
264 duarte 127 Matcher ml = pl.matcher(line);
265     if (ml.find()) {
266 duarte 153 empty=false;
267 duarte 127 int atomserial=Integer.parseInt(ml.group(1).trim());
268     String atom = ml.group(2).trim();
269     String res_type = ml.group(3).trim();
270 duarte 154 int res_serial = Integer.parseInt(ml.group(4).trim());
271     double x = Double.parseDouble(ml.group(5).trim());
272     double y = Double.parseDouble(ml.group(6).trim());
273     double z = Double.parseDouble(ml.group(7).trim());
274 duarte 127 Double[] coords = {x, y, z};
275     ArrayList<String> aalist=AA.aas();
276     if (aalist.contains(res_type)) {
277     atomser2coord.put(atomserial, coords);
278     atomser2resser.put(atomserial, res_serial);
279     resser2restype.put(res_serial, res_type);
280     ArrayList<String> atomlist = aas2atoms.get(res_type);
281     if (atomlist.contains(atom)){
282     resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
283     }
284 duarte 153 }
285     }
286 duarte 127 }
287     }
288     fpdb.close();
289 duarte 206 if (empty) System.err.println("Couldn't find any atom line for given pdbChainCode: "+pdbChainCode+", model: "+model);
290 duarte 127 // now we read the sequence from the resser2restype HashMap
291     // NOTE: we must make sure elsewhere that there are no unobserved residues, we can't check that here!
292     ArrayList<Integer> ressers = new ArrayList<Integer>();
293     for (int resser:resser2restype.keySet()) {
294     ressers.add(resser);
295     }
296     Collections.sort(ressers);
297     sequence="";
298     for (int resser:ressers){
299     String oneletter = AA.threeletter2oneletter(resser2restype.get(resser));
300     sequence += oneletter;
301     }
302     }
303    
304 duarte 153 /**
305     * Dumps coordinate data into a file in pdb format (ATOM lines only)
306 duarte 206 * The chain dumped is the value of the chainCode variable, i.e. our internal chain identifier for Pdb objects
307 duarte 153 * @param outfile
308     * @throws IOException
309     */
310 duarte 123 public void dump2pdbfile(String outfile) throws IOException {
311 duarte 130 TreeMap<Integer,Object[]> lines = new TreeMap<Integer,Object[]>();
312 duarte 123 PrintStream Out = new PrintStream(new FileOutputStream(outfile));
313 duarte 206 Out.println("HEADER Dumped from "+db+". pdb code="+pdbCode+", chain='"+chainCode+"'");
314 duarte 123 for (String resser_atom:resser_atom2atomserial.keySet()){
315     int atomserial = resser_atom2atomserial.get(resser_atom);
316     int res_serial = Integer.parseInt(resser_atom.split("_")[0]);
317     String atom = resser_atom.split("_")[1];
318     String res_type = resser2restype.get(res_serial);
319     Double[] coords = atomser2coord.get(atomserial);
320 duarte 206 Object[] fields = {atomserial, atom, res_type, chainCode, res_serial, coords[0], coords[1], coords[2]};
321 duarte 130 lines.put(atomserial, fields);
322 duarte 123 }
323 duarte 130 for (int atomserial:lines.keySet()){
324     Out.printf("ATOM %5d %3s %3s %1s%4d %8.3f%8.3f%8.3f\n",lines.get(atomserial));
325     }
326 duarte 123 Out.println("END");
327     Out.close();
328     }
329    
330     public void dumpseq(String seqfile) throws IOException {
331     PrintStream Out = new PrintStream(new FileOutputStream(seqfile));
332 duarte 206 Out.println(">"+pdbCode+"_"+pdbChainCode);
333 duarte 123 Out.println(sequence);
334     Out.close();
335     }
336    
337     public int get_length(){
338     return resser2restype.size();
339     }
340    
341     public HashMap<Integer,Double[]> get_coords_for_ct(String ct) {
342     HashMap<Integer,Double[]> coords = new HashMap<Integer,Double[]>();
343     HashMap<String,String[]> restype2atoms = AA.ct2atoms(ct);
344     for (int resser:resser2restype.keySet()){
345     String[] atoms = restype2atoms.get(resser2restype.get(resser));
346     for (String atom:atoms){
347     if (resser_atom2atomserial.containsKey(resser+"_"+atom)){
348     int atomser = resser_atom2atomserial.get(resser+"_"+atom);
349     Double[] coord = atomser2coord.get(atomser);
350     coords.put(atomser, coord);
351     }
352     else if (atom.equals("O") && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){
353     int atomser = resser_atom2atomserial.get(resser+"_"+"OXT");
354     Double[] coord = atomser2coord.get(atomser);
355     coords.put(atomser, coord);
356     }
357     else {
358     System.err.println("Couldn't find "+atom+" atom for resser="+resser+". Continuing without that atom for this resser.");
359     }
360     }
361     }
362     return coords;
363     }
364    
365     public TreeMap<Contact, Double> calculate_dist_matrix(String ct){
366     TreeMap<Contact,Double> dist_matrix = new TreeMap<Contact,Double>();
367     if (!ct.contains("/")){
368     HashMap<Integer,Double[]> coords = get_coords_for_ct(ct);
369     for (int i_atomser:coords.keySet()){
370     for (int j_atomser:coords.keySet()){
371     if (j_atomser>i_atomser) {
372     Contact pair = new Contact(i_atomser,j_atomser);
373     dist_matrix.put(pair, distance(coords.get(i_atomser),coords.get(j_atomser)));
374     }
375     }
376     }
377     } else {
378     String i_ct = ct.split("/")[0];
379     String j_ct = ct.split("/")[1];
380     HashMap<Integer,Double[]> i_coords = get_coords_for_ct(i_ct);
381     HashMap<Integer,Double[]> j_coords = get_coords_for_ct(j_ct);
382     for (int i_atomser:i_coords.keySet()){
383     for (int j_atomser:j_coords.keySet()){
384     if (j_atomser!=i_atomser){
385     Contact pair = new Contact(i_atomser,j_atomser);
386     dist_matrix.put(pair, distance(i_coords.get(i_atomser),j_coords.get(j_atomser)));
387     }
388     }
389     }
390     }
391     return dist_matrix;
392     }
393    
394 duarte 129 /**
395     * Get the contacts for given contact type and cutoff for this Pdb object.
396     * Returns a Graph object with the contacts
397     * The graph is always directional, i.e. in crossed cases xx/yy: i corresponds to xx and j to yy
398     * ct here can be single contact type (e.g. Ca, BB) or crossed (e.g. BB/SC or Ca/Cb)
399     * @param ct
400     * @param cutoff
401     * @return
402     */
403 duarte 190 @SuppressWarnings("unchecked")
404 duarte 123 public Graph get_graph(String ct, double cutoff){
405     TreeMap<Contact,Double> dist_matrix = calculate_dist_matrix(ct);
406 duarte 175 ContactList contacts = new ContactList();
407 duarte 129 // we loop here over all indices of dist_matrix,
408     // we took care already that in symmetric cases (i.e. single contact type, not crossed) we have only one side of the matrix and
409     // in asymmetrical cases (i.e. crossed cases) we have both sides of the matrix
410 duarte 123 for (Contact pair:dist_matrix.keySet()){
411     int i_atomser=pair.i;
412     int j_atomser=pair.j;
413     if (dist_matrix.get(pair)<=cutoff){
414     int i_resser = atomser2resser.get(i_atomser);
415     int j_resser = atomser2resser.get(j_atomser);
416     Contact resser_pair = new Contact(i_resser,j_resser);
417 duarte 129 // for multi-atom models (BB, SC, ALL or BB/SC) we need to make sure that we don't have contacts from residue to itself or that we don't have duplicates
418 duarte 123 if (i_resser!=j_resser && (! contacts.contains(resser_pair))){
419     contacts.add(resser_pair);
420     }
421     }
422     }
423     Collections.sort(contacts);
424 duarte 129 TreeMap<Integer,String> nodes = new TreeMap<Integer,String>();
425     for (int resser:resser2restype.keySet()){
426     nodes.put(resser,resser2restype.get(resser));
427     }
428 duarte 206 Graph graph = new Graph (contacts,nodes,sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
429 duarte 123 return graph;
430     }
431    
432 duarte 206 private Double distance(Double[] coords1, Double[] coords2){
433 duarte 123 return Math.sqrt(Math.pow((coords1[0]-coords2[0]),2)+Math.pow((coords1[1]-coords2[1]),2)+Math.pow((coords1[2]-coords2[2]),2));
434     }
435 duarte 190
436     public int get_resser_from_pdbresser (String pdbresser){
437     return pdbresser2resser.get(pdbresser);
438     }
439    
440     public String get_pdbresser_from_resser (int resser){
441     return resser2pdbresser.get(resser);
442     }
443    
444 duarte 200 public int get_resser_from_atomser(int atomser){
445     return atomser2resser.get(atomser);
446     }
447 duarte 206
448     public String getChainCode(){
449     return this.chainCode;
450     }
451    
452     public String getPdbChainCode(){
453     return this.pdbChainCode;
454     }
455 duarte 123 }