ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/PdbfilePdb.java
Revision: 217
Committed: Fri Jun 29 09:04:00 2007 UTC (17 years, 3 months ago) by stehr
File size: 6777 byte(s)
Log Message:
now casting pdb codes to lower and pdb chain codes to upper in all constructors
Line User Rev File contents
1 duarte 207 package proteinstructure;
2    
3     import java.io.BufferedReader;
4     import java.io.File;
5     import java.io.FileNotFoundException;
6     import java.io.FileReader;
7     import java.io.IOException;
8     import java.util.ArrayList;
9     import java.util.Collections;
10     import java.util.HashMap;
11     import java.util.regex.Matcher;
12     import java.util.regex.Pattern;
13    
14     public class PdbfilePdb extends Pdb {
15    
16 duarte 208 private static final String UNKNOWN_STRING ="Unknown";
17     private static final String NULL_chainCode = "A";
18    
19 duarte 207 private String pdbfile;
20    
21     /**
22     * Constructs Pdb object reading from pdb file given pdb chain code. Model will be DEFAULT_MODEL
23     * @param pdbfile
24     * @param pdbChainCode
25     * @throws FileNotFoundException
26     * @throws IOException
27 duarte 208 * @throws PdbfileFormatError
28 stehr 215 * @throws PdbChainCodeNotFoundError
29 duarte 207 */
30 stehr 215 public PdbfilePdb (String pdbfile, String pdbChainCode) throws FileNotFoundException, IOException, PdbfileFormatError, PdbChainCodeNotFoundError {
31 duarte 207 this(pdbfile,pdbChainCode,DEFAULT_MODEL);
32     }
33    
34     /**
35     * Constructs Pdb object reading from pdb file given pdb chain code and model serial
36     * @param pdbfile
37     * @param pdbChainCode
38     * @param model_serial
39     * @throws FileNotFoundException
40     * @throws IOException
41 duarte 208 * @throws PdbfileFormatError
42 stehr 215 * @throws PdbChainCodeNotFoundError
43 duarte 207 */
44 stehr 215 public PdbfilePdb (String pdbfile, String pdbChainCode, int model_serial) throws FileNotFoundException, IOException, PdbfileFormatError, PdbChainCodeNotFoundError {
45 duarte 207 this.pdbfile = pdbfile;
46     this.model=model_serial;
47 duarte 208 this.pdbCode=UNKNOWN_STRING; // we initialise to unknown in case we don't find it in pdb file
48 stehr 217 this.pdbChainCode=pdbChainCode.toUpperCase(); // our convention: chain codes are upper case
49 duarte 208 // we set chainCode to pdbChainCode except for case NULL where we use "A"
50     this.chainCode=pdbChainCode;
51     if (pdbChainCode.equals("NULL")) this.chainCode=NULL_chainCode;
52    
53     this.sequence=""; // we initialise it to empty string, then is set inread_pdb_data_from_file
54    
55 duarte 207 read_pdb_data_from_file();
56 duarte 208
57     // when reading from pdb file we have no information of residue numbers or author's (original) pdb residue number, so we fill the mapping with the residue numbers we know
58     //TODO eventually we could assign our own internal residue numbers when reading from pdb and thus this map would be used
59     this.resser2pdbresser = new HashMap<Integer, String>();
60     this.pdbresser2resser = new HashMap<String, Integer>();
61     for (int resser:resser2restype.keySet()){
62     resser2pdbresser.put(resser, String.valueOf(resser));
63     pdbresser2resser.put(String.valueOf(resser), resser);
64     }
65 duarte 207 }
66    
67    
68    
69     /**
70     * To read the pdb data (atom coordinates, residue serials, atom serials) from file
71     * chainCode gets set to internal identifier: if input chain code NULL then chainCode will be ' '
72     * pdbCode gets set to the one parsed in HEADER or to 'Unknown' if not found
73     * sequence gets set to the sequence read from ATOM lines (i.e. observed resdiues only)
74     * @param pdbfile
75     * @throws FileNotFoundException
76     * @throws IOException
77 duarte 208 * @throws PdbfileFormatError
78 stehr 215 * @throws PdbChainCodeNotFoundError
79 duarte 207 */
80 stehr 215 private void read_pdb_data_from_file() throws FileNotFoundException, IOException, PdbfileFormatError, PdbChainCodeNotFoundError {
81 duarte 207 resser_atom2atomserial = new HashMap<String,Integer>();
82     resser2restype = new HashMap<Integer,String>();
83     atomser2coord = new HashMap<Integer,Double[]>();
84     atomser2resser = new HashMap<Integer,Integer>();
85     boolean empty = true; // controls whether we don't find any atom line for given pdbChainCode and model
86 duarte 208 // we set chainCodeStr (for regex) to pdbChainCode except for case NULL where we use " " (NULL is a blank chain code in pdb files)
87     String chainCodeStr=pdbChainCode;
88     if (pdbChainCode.equals("NULL")) chainCodeStr=" ";
89    
90 duarte 207 int thismodel=DEFAULT_MODEL; // we initialise to DEFAULT_MODEL, in case file doesn't have MODEL lines
91     BufferedReader fpdb = new BufferedReader(new FileReader(new File(pdbfile)));
92 duarte 208 int linecount=0;
93 duarte 207 String line;
94     while ((line = fpdb.readLine() ) != null ) {
95 duarte 208 linecount++;
96 duarte 207 Pattern p = Pattern.compile("^HEADER");
97     Matcher m = p.matcher(line);
98     if (m.find()){
99     Pattern ph = Pattern.compile("^HEADER.{56}(\\d\\w{3})");
100     Matcher mh = ph.matcher(line);
101     if (mh.find()) {
102     pdbCode=mh.group(1).toLowerCase();
103     }
104 duarte 208 } else if (linecount==1) { // header not found and we are in line 1
105     // a HEADER is the minimum we ask at the moment for a pdb file to have, if we don't find it in line 1 we throw an exception
106     throw new PdbfileFormatError("The pdb file "+pdbfile+" doesn't seem to have the right format");
107 duarte 207 }
108     p = Pattern.compile("^MODEL\\s+(\\d+)");
109     m = p.matcher(line);
110     if (m.find()){
111     thismodel=Integer.parseInt(m.group(1));
112     }
113     if (thismodel!=model) continue; // we skip reading of atom lines if we are not in the desired model
114     p = Pattern.compile("^ATOM");
115     m = p.matcher(line);
116     if (m.find()){
117 duarte 208 // serial atom res_type chain res_ser x y z
118     Pattern pl = Pattern.compile(".{6}(.....).{2}(...).{1}(...).{1}"+chainCodeStr+"(.{4}).{4}(.{8})(.{8})(.{8})",Pattern.CASE_INSENSITIVE);
119 duarte 207 Matcher ml = pl.matcher(line);
120     if (ml.find()) {
121     empty=false;
122     int atomserial=Integer.parseInt(ml.group(1).trim());
123     String atom = ml.group(2).trim();
124     String res_type = ml.group(3).trim();
125     int res_serial = Integer.parseInt(ml.group(4).trim());
126     double x = Double.parseDouble(ml.group(5).trim());
127     double y = Double.parseDouble(ml.group(6).trim());
128     double z = Double.parseDouble(ml.group(7).trim());
129     Double[] coords = {x, y, z};
130     ArrayList<String> aalist=AA.aas();
131     if (aalist.contains(res_type)) {
132     atomser2coord.put(atomserial, coords);
133     atomser2resser.put(atomserial, res_serial);
134     resser2restype.put(res_serial, res_type);
135     ArrayList<String> atomlist = aas2atoms.get(res_type);
136     if (atomlist.contains(atom)){
137     resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
138     }
139     }
140     }
141     }
142     }
143     fpdb.close();
144 stehr 215 if (empty) {
145     //System.err.println("Couldn't find any atom line for given pdbChainCode: "+pdbChainCode+", model: "+model);
146     throw new PdbChainCodeNotFoundError("Couldn't find any ATOM line for given pdbChainCode: "+pdbChainCode+", model: "+model);
147     }
148 duarte 207 // now we read the sequence from the resser2restype HashMap
149     // NOTE: we must make sure elsewhere that there are no unobserved residues, we can't check that here!
150     ArrayList<Integer> ressers = new ArrayList<Integer>();
151     for (int resser:resser2restype.keySet()) {
152     ressers.add(resser);
153     }
154     Collections.sort(ressers);
155     for (int resser:ressers){
156     String oneletter = AA.threeletter2oneletter(resser2restype.get(resser));
157     sequence += oneletter;
158     }
159     }
160    
161     }