ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Pdb.java
Revision: 175
Committed: Fri May 25 17:31:58 2007 UTC (17 years, 9 months ago) by duarte
File size: 16818 byte(s)
Log Message:
FIXED BUG in initialisation of fullLength when reading from file. In case of no sequence and nodes provided, was not getting correctly the maximum value for contacts. Now using the method getMaxContact from new class ContactList
NEW FUNCTIONALITY in Graph:
-New member variable modified
-New methods addEdge, delEdge, restrictContactsToMaxRange, restrictContactsToMinRange, getContacts, getNodes, copy
-Improved slightly the implementation of getEdgeNbh 
FIXED BUG in initialisation of fullLenght when reading from file. In case of no sequence and nodes provided, was not getting correctly the maximum value for contacts. Now using the method getMaxContact from ContactList
New class ContactList
NEW FUNCTIONALITY in Graph:
-New member variable modified
-New methods addEdge, delEdge, restrictContactsToMaxRange, restrictContactsToMinRange, getContacts, getNodes, copy
-Improved slightly the implementation of getEdgeNbh 
New method getRange in Contact
Line User Rev File contents
1 duarte 123 package proteinstructure;
2    
3 duarte 127 import java.io.BufferedReader;
4     import java.io.File;
5 duarte 123 import java.io.FileOutputStream;
6 duarte 127 import java.io.FileReader;
7 duarte 123 import java.io.PrintStream;
8 duarte 127 import java.io.FileNotFoundException;
9 duarte 123 import java.io.IOException;
10     import java.util.HashMap;
11     import java.util.TreeMap;
12     import java.util.ArrayList;
13     import java.util.Collections;
14 duarte 127 import java.util.regex.Matcher;
15     import java.util.regex.Pattern;
16 duarte 123
17     public class Pdb {
18    
19 duarte 153 public final static int DEFAULT_MODEL=1;
20    
21 duarte 123 HashMap<String,Integer> resser_atom2atomserial;
22     HashMap<Integer,String> resser2restype;
23     HashMap<Integer,Double[]> atomser2coord;
24     HashMap<Integer,Integer> atomser2resser;
25     HashMap<String,ArrayList<String>> aas2atoms = AA.getaas2atoms();
26 duarte 145 public String sequence="";
27 stehr 140 public String accode="";
28 duarte 135 // given "external" pdb chain code, i.e. the classic pdb code ("NULL" if it is blank in original pdb file)
29 duarte 145 public String chaincode="";
30     public int model=DEFAULT_MODEL;
31 duarte 123 String db;
32 duarte 135
33     // Our internal chain identifier (taken from dbs or pdb):
34     // - in reading from pdbase or from msdsd it will be set to the internal chain id (asym_id field for pdbase, pchain_id for msdsd)
35 duarte 153 // - in reading from pdb file it gets set to whatever parsed from the pdb file (i.e. can be also ' ')
36 stehr 137 public String chain;
37 duarte 123
38 duarte 153 /**
39     * Constructs Pdb object given accession code and pdb chain code. Model will be DEFAULT_MODEL
40     * Takes data from default pdbase-like database: PdbaseInfo.pdbaseDB
41     * @param accode
42     * @param chaincode
43     * @throws PdbaseInconsistencyError
44     * @throws PdbaseAcCodeNotFoundError
45     * @throws MsdsdAcCodeNotFoundError
46     * @throws MsdsdInconsistentResidueNumbersError
47     */
48 duarte 130 public Pdb (String accode, String chaincode) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError, MsdsdAcCodeNotFoundError, MsdsdInconsistentResidueNumbersError {
49 duarte 133 this(accode,chaincode,DEFAULT_MODEL,PdbaseInfo.pdbaseDB);
50 duarte 123 }
51 duarte 133
52 duarte 153 /**
53     * Constructs Pdb object given accession code, pdb chain code and a model serial
54     * Takes data from default pdbase-like database: PdbaseInfo.pdbaseDB
55     * @param accode
56     * @param chaincode
57     * @param model_serial
58     * @throws PdbaseInconsistencyError
59     * @throws PdbaseAcCodeNotFoundError
60     * @throws MsdsdAcCodeNotFoundError
61     * @throws MsdsdInconsistentResidueNumbersError
62     */
63 duarte 133 public Pdb (String accode, String chaincode, int model_serial) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError, MsdsdAcCodeNotFoundError, MsdsdInconsistentResidueNumbersError {
64     this(accode,chaincode,model_serial,PdbaseInfo.pdbaseDB);
65     }
66 duarte 153
67     /**
68     * Constructs Pdb object given accession code, pdb chain code and a source db for the data. Model will be DEFAULT_MODEL
69     * The db can be pdbase-like or msdsd-like
70     * @param accode
71     * @param chaincode
72     * @param db
73     * @throws PdbaseInconsistencyError
74     * @throws PdbaseAcCodeNotFoundError
75     * @throws MsdsdAcCodeNotFoundError
76     * @throws MsdsdInconsistentResidueNumbersError
77     */
78 duarte 130 public Pdb (String accode, String chaincode, String db) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError, MsdsdAcCodeNotFoundError, MsdsdInconsistentResidueNumbersError {
79 duarte 133 this(accode,chaincode,DEFAULT_MODEL,db);
80     }
81    
82 duarte 153 /**
83     * Constructs Pdb object given accession code, pdb chain code, model serial and a source db for the data.
84     * The db can be pdbase-like or msdsd-like
85     * @param accode
86     * @param chaincode
87     * @param model_serial
88     * @param db
89     * @throws PdbaseInconsistencyError
90     * @throws PdbaseAcCodeNotFoundError
91     * @throws MsdsdAcCodeNotFoundError
92     * @throws MsdsdInconsistentResidueNumbersError
93     */
94 duarte 133 public Pdb (String accode, String chaincode, int model_serial, String db) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError, MsdsdAcCodeNotFoundError, MsdsdInconsistentResidueNumbersError {
95 duarte 123 this.accode=accode;
96     this.chaincode=chaincode;
97 duarte 133 this.model=model_serial;
98 duarte 123 this.db=db;
99 duarte 130 // we initialise it to chaincode, in read_pdb_data_from_pdbase gets reset to the right internal chain id. If reading msdsd it stays as chaincode
100     this.chain=chaincode;
101     if (db.contains("msdsd")){
102     read_pdb_data_from_msdsd(db);
103     } else if (db.contains("pdbase")){
104     read_pdb_data_from_pdbase(db);
105     }
106 duarte 123 }
107    
108 duarte 153 /**
109     * Constructs Pdb object reading from pdb file given pdb chain code. Model will be DEFAULT_MODEL
110     * @param pdbfile
111     * @param chaincode
112     * @param dummy dummy parameter to distinguish the method from other with the same signature
113     * TODO dummy parameter is a dirty hack, must solve it in other way: subclassing
114     * @throws FileNotFoundException
115     * @throws IOException
116     */
117     public Pdb (String pdbfile, String chaincode, boolean dummy) throws FileNotFoundException, IOException {
118     this(pdbfile,chaincode,DEFAULT_MODEL,dummy);
119 duarte 133 }
120 duarte 153
121     /**
122     * Constructs Pdb object reading from pdb file given pdb chain code and model serial
123     * @param pdbfile
124     * @param chaincode
125     * @param model_serial
126     * @param dummy dummy parameter to distinguish the method from other with the same signature
127     * TODO dummy parameter is a dirty hack, must solve it in other way: subclassing
128     * @throws FileNotFoundException
129     * @throws IOException
130     */
131     public Pdb (String pdbfile, String chaincode, int model_serial, boolean dummy) throws FileNotFoundException, IOException {
132 duarte 133 this.model=model_serial;
133 duarte 153 this.chaincode=chaincode;
134 duarte 127 read_pdb_data_from_file(pdbfile);
135 duarte 123 }
136 duarte 133
137 duarte 124 public void read_pdb_data_from_pdbase(String db) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError{
138 duarte 123 resser_atom2atomserial = new HashMap<String,Integer>();
139     resser2restype = new HashMap<Integer,String>();
140     atomser2coord = new HashMap<Integer,Double[]>();
141     atomser2resser = new HashMap<Integer,Integer>();
142    
143 duarte 133 PdbaseInfo mypdbaseinfo = new PdbaseInfo(accode,chaincode,model,db);
144 duarte 123 ArrayList<ArrayList> resultset = mypdbaseinfo.read_atomData();
145 duarte 135 chain = mypdbaseinfo.chain;
146 duarte 123 sequence = mypdbaseinfo.read_seq();
147     mypdbaseinfo.close();
148    
149     for (ArrayList result:resultset){
150     int atomserial = (Integer) result.get(0);
151     String atom = (String) result.get(1);
152     String res_type = (String) result.get(2);
153 duarte 135 int res_serial = (Integer) result.get(3);
154     double x = (Double) result.get(4);
155     double y = (Double) result.get(5);
156     double z = (Double) result.get(6);
157 duarte 123 Double[] coords = {x, y, z};
158     ArrayList<String> aalist=AA.aas();
159     if (aalist.contains(res_type)) {
160     atomser2coord.put(atomserial, coords);
161     atomser2resser.put(atomserial, res_serial);
162     resser2restype.put(res_serial, res_type);
163     ArrayList<String> atomlist = aas2atoms.get(res_type);
164     if (atomlist.contains(atom)){
165     resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
166     }
167     }
168     }
169     }
170    
171 duarte 130 public void read_pdb_data_from_msdsd(String db) throws MsdsdAcCodeNotFoundError, MsdsdInconsistentResidueNumbersError {
172     resser_atom2atomserial = new HashMap<String,Integer>();
173     resser2restype = new HashMap<Integer,String>();
174     atomser2coord = new HashMap<Integer,Double[]>();
175     atomser2resser = new HashMap<Integer,Integer>();
176    
177 duarte 133 MsdsdInfo mymsdsdinfo = new MsdsdInfo(accode,chaincode,model,db);
178 duarte 130 ArrayList<ArrayList> resultset = mymsdsdinfo.read_atomData();
179     sequence = mymsdsdinfo.read_seq();
180 duarte 138 chain = mymsdsdinfo.chain;
181 duarte 130 mymsdsdinfo.close();
182    
183     for (ArrayList result:resultset){
184     int atomserial = (Integer) result.get(0);
185     String atom = (String) result.get(1);
186     String res_type = (String) result.get(2);
187     int res_serial = (Integer) result.get(3);
188     double x = (Double) result.get(4);
189     double y = (Double) result.get(5);
190     double z = (Double) result.get(6);
191     Double[] coords = {x, y, z};
192     ArrayList<String> aalist=AA.aas();
193     if (aalist.contains(res_type)) {
194     atomser2coord.put(atomserial, coords);
195     atomser2resser.put(atomserial, res_serial);
196     resser2restype.put(res_serial, res_type);
197     ArrayList<String> atomlist = aas2atoms.get(res_type);
198     if (atomlist.contains(atom)){
199     resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
200     }
201     }
202     }
203     }
204    
205 duarte 133 /**
206     * To read the pdb data (atom coordinates, residue serials, atom serials) from file
207 duarte 153 * chain gets set to internal identifier: if input chain code NULL then chain will be ' '
208     * accode gets set to the one parsed in HEADER or to 'Unknown' if not found
209     * sequence gets set to the sequence read from ATOM lines (i.e. observed resdiues only)
210 duarte 133 * @param pdbfile
211     * @throws FileNotFoundException
212     * @throws IOException
213     */
214 duarte 127 public void read_pdb_data_from_file(String pdbfile) throws FileNotFoundException, IOException{
215     resser_atom2atomserial = new HashMap<String,Integer>();
216     resser2restype = new HashMap<Integer,String>();
217     atomser2coord = new HashMap<Integer,Double[]>();
218     atomser2resser = new HashMap<Integer,Integer>();
219 duarte 153 boolean empty = true; // controls whether we don't find any atom line for given chaincode and model
220 duarte 154 // we set chain to chaincode except for case NULL where we use " " (NULL is a blank chain code in pdb files)
221     chain=chaincode;
222     if (chaincode.equals("NULL")) chain=" ";
223 duarte 153 int thismodel=DEFAULT_MODEL; // we initialise to DEFAULT_MODEL, in case file doesn't have MODEL lines
224 duarte 127 BufferedReader fpdb = new BufferedReader(new FileReader(new File(pdbfile)));
225     String line;
226     while ((line = fpdb.readLine() ) != null ) {
227 duarte 153 Pattern p = Pattern.compile("^HEADER");
228 duarte 127 Matcher m = p.matcher(line);
229     if (m.find()){
230 duarte 153 Pattern ph = Pattern.compile("^HEADER.{56}(\\d\\w{3})");
231     Matcher mh = ph.matcher(line);
232     if (mh.find()) {
233     accode=mh.group(1).toLowerCase();
234     } else {
235     accode="Unknown";
236     }
237     }
238     p = Pattern.compile("^MODEL\\s+(\\d+)");
239     m = p.matcher(line);
240     if (m.find()){
241     thismodel=Integer.parseInt(m.group(1));
242     }
243     if (thismodel!=model) continue; // we skip reading of atom lines if we are not in the desired model
244     p = Pattern.compile("^ATOM");
245     m = p.matcher(line);
246     if (m.find()){
247 duarte 154 // serial atom res_type chain res_ser x y z
248     Pattern pl = Pattern.compile(".{6}(.....).{2}(...).{1}(...).{1}"+chain+"(.{4}).{4}(.{8})(.{8})(.{8})",Pattern.CASE_INSENSITIVE);
249 duarte 127 Matcher ml = pl.matcher(line);
250     if (ml.find()) {
251 duarte 153 empty=false;
252 duarte 127 int atomserial=Integer.parseInt(ml.group(1).trim());
253     String atom = ml.group(2).trim();
254     String res_type = ml.group(3).trim();
255 duarte 154 int res_serial = Integer.parseInt(ml.group(4).trim());
256     double x = Double.parseDouble(ml.group(5).trim());
257     double y = Double.parseDouble(ml.group(6).trim());
258     double z = Double.parseDouble(ml.group(7).trim());
259 duarte 127 Double[] coords = {x, y, z};
260     ArrayList<String> aalist=AA.aas();
261     if (aalist.contains(res_type)) {
262     atomser2coord.put(atomserial, coords);
263     atomser2resser.put(atomserial, res_serial);
264     resser2restype.put(res_serial, res_type);
265     ArrayList<String> atomlist = aas2atoms.get(res_type);
266     if (atomlist.contains(atom)){
267     resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
268     }
269 duarte 153 }
270     }
271 duarte 127 }
272     }
273     fpdb.close();
274 duarte 153 if (empty) System.err.println("Couldn't find any atom line for given chaincode: "+chaincode+", model: "+model);
275 duarte 127 // now we read the sequence from the resser2restype HashMap
276     // NOTE: we must make sure elsewhere that there are no unobserved residues, we can't check that here!
277     ArrayList<Integer> ressers = new ArrayList<Integer>();
278     for (int resser:resser2restype.keySet()) {
279     ressers.add(resser);
280     }
281     Collections.sort(ressers);
282     sequence="";
283     for (int resser:ressers){
284     String oneletter = AA.threeletter2oneletter(resser2restype.get(resser));
285     sequence += oneletter;
286     }
287     }
288    
289 duarte 153 /**
290     * Dumps coordinate data into a file in pdb format (ATOM lines only)
291     * The chain dumped is the value of the chain field, i.e. our internal chain identifier for Pdb objects
292     * @param outfile
293     * @throws IOException
294     */
295 duarte 123 public void dump2pdbfile(String outfile) throws IOException {
296 duarte 130 TreeMap<Integer,Object[]> lines = new TreeMap<Integer,Object[]>();
297 duarte 123 PrintStream Out = new PrintStream(new FileOutputStream(outfile));
298 duarte 153 Out.println("HEADER Dumped from "+db+". pdb accession code="+accode+", chain='"+chain+"'");
299 duarte 123 for (String resser_atom:resser_atom2atomserial.keySet()){
300     int atomserial = resser_atom2atomserial.get(resser_atom);
301     int res_serial = Integer.parseInt(resser_atom.split("_")[0]);
302     String atom = resser_atom.split("_")[1];
303     String res_type = resser2restype.get(res_serial);
304     Double[] coords = atomser2coord.get(atomserial);
305 duarte 153 Object[] fields = {atomserial, atom, res_type, chain, res_serial, coords[0], coords[1], coords[2]};
306 duarte 130 lines.put(atomserial, fields);
307 duarte 123 }
308 duarte 130 for (int atomserial:lines.keySet()){
309     Out.printf("ATOM %5d %3s %3s %1s%4d %8.3f%8.3f%8.3f\n",lines.get(atomserial));
310     }
311 duarte 123 Out.println("END");
312     Out.close();
313     }
314    
315     public void dumpseq(String seqfile) throws IOException {
316     PrintStream Out = new PrintStream(new FileOutputStream(seqfile));
317     Out.println(">"+accode+"_"+chaincode);
318     Out.println(sequence);
319     Out.close();
320     }
321    
322     public int get_length(){
323     return resser2restype.size();
324     }
325    
326     public HashMap<Integer,Double[]> get_coords_for_ct(String ct) {
327     HashMap<Integer,Double[]> coords = new HashMap<Integer,Double[]>();
328     HashMap<String,String[]> restype2atoms = AA.ct2atoms(ct);
329     for (int resser:resser2restype.keySet()){
330     String[] atoms = restype2atoms.get(resser2restype.get(resser));
331     for (String atom:atoms){
332     if (resser_atom2atomserial.containsKey(resser+"_"+atom)){
333     int atomser = resser_atom2atomserial.get(resser+"_"+atom);
334     Double[] coord = atomser2coord.get(atomser);
335     coords.put(atomser, coord);
336     }
337     else if (atom.equals("O") && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){
338     int atomser = resser_atom2atomserial.get(resser+"_"+"OXT");
339     Double[] coord = atomser2coord.get(atomser);
340     coords.put(atomser, coord);
341     }
342     else {
343     System.err.println("Couldn't find "+atom+" atom for resser="+resser+". Continuing without that atom for this resser.");
344     }
345     }
346     }
347     return coords;
348     }
349    
350     public TreeMap<Contact, Double> calculate_dist_matrix(String ct){
351     TreeMap<Contact,Double> dist_matrix = new TreeMap<Contact,Double>();
352     if (!ct.contains("/")){
353     HashMap<Integer,Double[]> coords = get_coords_for_ct(ct);
354     for (int i_atomser:coords.keySet()){
355     for (int j_atomser:coords.keySet()){
356     if (j_atomser>i_atomser) {
357     Contact pair = new Contact(i_atomser,j_atomser);
358     dist_matrix.put(pair, distance(coords.get(i_atomser),coords.get(j_atomser)));
359     }
360     }
361     }
362     } else {
363     String i_ct = ct.split("/")[0];
364     String j_ct = ct.split("/")[1];
365     HashMap<Integer,Double[]> i_coords = get_coords_for_ct(i_ct);
366     HashMap<Integer,Double[]> j_coords = get_coords_for_ct(j_ct);
367     for (int i_atomser:i_coords.keySet()){
368     for (int j_atomser:j_coords.keySet()){
369     if (j_atomser!=i_atomser){
370     Contact pair = new Contact(i_atomser,j_atomser);
371     dist_matrix.put(pair, distance(i_coords.get(i_atomser),j_coords.get(j_atomser)));
372     }
373     }
374     }
375     }
376     return dist_matrix;
377     }
378    
379 duarte 129 /**
380     * Get the contacts for given contact type and cutoff for this Pdb object.
381     * Returns a Graph object with the contacts
382     * The graph is always directional, i.e. in crossed cases xx/yy: i corresponds to xx and j to yy
383     * ct here can be single contact type (e.g. Ca, BB) or crossed (e.g. BB/SC or Ca/Cb)
384     * @param ct
385     * @param cutoff
386     * @return
387     */
388 duarte 123 public Graph get_graph(String ct, double cutoff){
389     TreeMap<Contact,Double> dist_matrix = calculate_dist_matrix(ct);
390 duarte 175 ContactList contacts = new ContactList();
391 duarte 129 // we loop here over all indices of dist_matrix,
392     // we took care already that in symmetric cases (i.e. single contact type, not crossed) we have only one side of the matrix and
393     // in asymmetrical cases (i.e. crossed cases) we have both sides of the matrix
394 duarte 123 for (Contact pair:dist_matrix.keySet()){
395     int i_atomser=pair.i;
396     int j_atomser=pair.j;
397     if (dist_matrix.get(pair)<=cutoff){
398     int i_resser = atomser2resser.get(i_atomser);
399     int j_resser = atomser2resser.get(j_atomser);
400     Contact resser_pair = new Contact(i_resser,j_resser);
401 duarte 129 // for multi-atom models (BB, SC, ALL or BB/SC) we need to make sure that we don't have contacts from residue to itself or that we don't have duplicates
402 duarte 123 if (i_resser!=j_resser && (! contacts.contains(resser_pair))){
403     contacts.add(resser_pair);
404     }
405     }
406     }
407     Collections.sort(contacts);
408 duarte 129 TreeMap<Integer,String> nodes = new TreeMap<Integer,String>();
409     for (int resser:resser2restype.keySet()){
410     nodes.put(resser,resser2restype.get(resser));
411     }
412 duarte 156 Graph graph = new Graph (contacts,nodes,sequence,cutoff,ct,accode,chain,chaincode);
413 duarte 123 return graph;
414     }
415    
416     public Double distance(Double[] coords1, Double[] coords2){
417     return Math.sqrt(Math.pow((coords1[0]-coords2[0]),2)+Math.pow((coords1[1]-coords2[1]),2)+Math.pow((coords1[2]-coords2[2]),2));
418     }
419     }