ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Pdb.java
Revision: 140
Committed: Mon May 14 15:34:21 2007 UTC (17 years, 5 months ago) by stehr
File size: 13465 byte(s)
Log Message:
Made Pdb.acccode public
Line User Rev File contents
1 duarte 123 package proteinstructure;
2    
3 duarte 127 import java.io.BufferedReader;
4     import java.io.File;
5 duarte 123 import java.io.FileOutputStream;
6 duarte 127 import java.io.FileReader;
7 duarte 123 import java.io.PrintStream;
8 duarte 127 import java.io.FileNotFoundException;
9 duarte 123 import java.io.IOException;
10     import java.util.HashMap;
11     import java.util.TreeMap;
12     import java.util.ArrayList;
13     import java.util.Collections;
14 duarte 127 import java.util.regex.Matcher;
15     import java.util.regex.Pattern;
16 duarte 123
17     public class Pdb {
18    
19     HashMap<String,Integer> resser_atom2atomserial;
20     HashMap<Integer,String> resser2restype;
21     HashMap<Integer,Double[]> atomser2coord;
22     HashMap<Integer,Integer> atomser2resser;
23     HashMap<String,ArrayList<String>> aas2atoms = AA.getaas2atoms();
24     String sequence="";
25 stehr 140 public String accode="";
26 duarte 135 // given "external" pdb chain code, i.e. the classic pdb code ("NULL" if it is blank in original pdb file)
27 duarte 123 String chaincode="";
28 duarte 133 int model=DEFAULT_MODEL;
29 duarte 123 String db;
30 duarte 135
31     // Our internal chain identifier (taken from dbs or pdb):
32     // - in reading from pdbase or from msdsd it will be set to the internal chain id (asym_id field for pdbase, pchain_id for msdsd)
33     // - in reading from pdb file it gets set to whatever parsed from the pdb file
34 stehr 137 public String chain;
35 duarte 123
36 duarte 133 public static int DEFAULT_MODEL=1;
37    
38 duarte 130 public Pdb (String accode, String chaincode) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError, MsdsdAcCodeNotFoundError, MsdsdInconsistentResidueNumbersError {
39 duarte 133 this(accode,chaincode,DEFAULT_MODEL,PdbaseInfo.pdbaseDB);
40 duarte 123 }
41 duarte 133
42     public Pdb (String accode, String chaincode, int model_serial) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError, MsdsdAcCodeNotFoundError, MsdsdInconsistentResidueNumbersError {
43     this(accode,chaincode,model_serial,PdbaseInfo.pdbaseDB);
44     }
45    
46 duarte 130 public Pdb (String accode, String chaincode, String db) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError, MsdsdAcCodeNotFoundError, MsdsdInconsistentResidueNumbersError {
47 duarte 133 this(accode,chaincode,DEFAULT_MODEL,db);
48     }
49    
50     public Pdb (String accode, String chaincode, int model_serial, String db) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError, MsdsdAcCodeNotFoundError, MsdsdInconsistentResidueNumbersError {
51 duarte 123 this.accode=accode;
52     this.chaincode=chaincode;
53 duarte 133 this.model=model_serial;
54 duarte 123 this.db=db;
55 duarte 130 // we initialise it to chaincode, in read_pdb_data_from_pdbase gets reset to the right internal chain id. If reading msdsd it stays as chaincode
56     this.chain=chaincode;
57     if (db.contains("msdsd")){
58     read_pdb_data_from_msdsd(db);
59     } else if (db.contains("pdbase")){
60     read_pdb_data_from_pdbase(db);
61     }
62 duarte 123 }
63    
64 duarte 127 public Pdb (String pdbfile) throws FileNotFoundException, IOException {
65 duarte 133 this(pdbfile,DEFAULT_MODEL);
66     }
67    
68 duarte 139 //TODO implement read of pdb file given model
69 duarte 133 public Pdb (String pdbfile, int model_serial) throws FileNotFoundException, IOException {
70 duarte 127 this.accode="";
71 duarte 133 this.model=model_serial;
72 duarte 127 read_pdb_data_from_file(pdbfile);
73 duarte 123 }
74 duarte 133
75 duarte 124 public void read_pdb_data_from_pdbase(String db) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError{
76 duarte 123 resser_atom2atomserial = new HashMap<String,Integer>();
77     resser2restype = new HashMap<Integer,String>();
78     atomser2coord = new HashMap<Integer,Double[]>();
79     atomser2resser = new HashMap<Integer,Integer>();
80    
81 duarte 133 PdbaseInfo mypdbaseinfo = new PdbaseInfo(accode,chaincode,model,db);
82 duarte 123 ArrayList<ArrayList> resultset = mypdbaseinfo.read_atomData();
83 duarte 135 chain = mypdbaseinfo.chain;
84 duarte 123 sequence = mypdbaseinfo.read_seq();
85     mypdbaseinfo.close();
86    
87     for (ArrayList result:resultset){
88     int atomserial = (Integer) result.get(0);
89     String atom = (String) result.get(1);
90     String res_type = (String) result.get(2);
91 duarte 135 int res_serial = (Integer) result.get(3);
92     double x = (Double) result.get(4);
93     double y = (Double) result.get(5);
94     double z = (Double) result.get(6);
95 duarte 123 Double[] coords = {x, y, z};
96     ArrayList<String> aalist=AA.aas();
97     if (aalist.contains(res_type)) {
98     atomser2coord.put(atomserial, coords);
99     atomser2resser.put(atomserial, res_serial);
100     resser2restype.put(res_serial, res_type);
101     ArrayList<String> atomlist = aas2atoms.get(res_type);
102     if (atomlist.contains(atom)){
103     resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
104     }
105     }
106     }
107     }
108    
109 duarte 130 public void read_pdb_data_from_msdsd(String db) throws MsdsdAcCodeNotFoundError, MsdsdInconsistentResidueNumbersError {
110     resser_atom2atomserial = new HashMap<String,Integer>();
111     resser2restype = new HashMap<Integer,String>();
112     atomser2coord = new HashMap<Integer,Double[]>();
113     atomser2resser = new HashMap<Integer,Integer>();
114    
115 duarte 133 MsdsdInfo mymsdsdinfo = new MsdsdInfo(accode,chaincode,model,db);
116 duarte 130 ArrayList<ArrayList> resultset = mymsdsdinfo.read_atomData();
117     sequence = mymsdsdinfo.read_seq();
118 duarte 138 chain = mymsdsdinfo.chain;
119 duarte 130 mymsdsdinfo.close();
120    
121     for (ArrayList result:resultset){
122     int atomserial = (Integer) result.get(0);
123     String atom = (String) result.get(1);
124     String res_type = (String) result.get(2);
125     int res_serial = (Integer) result.get(3);
126     double x = (Double) result.get(4);
127     double y = (Double) result.get(5);
128     double z = (Double) result.get(6);
129     Double[] coords = {x, y, z};
130     ArrayList<String> aalist=AA.aas();
131     if (aalist.contains(res_type)) {
132     atomser2coord.put(atomserial, coords);
133     atomser2resser.put(atomserial, res_serial);
134     resser2restype.put(res_serial, res_type);
135     ArrayList<String> atomlist = aas2atoms.get(res_type);
136     if (atomlist.contains(atom)){
137     resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
138     }
139     }
140     }
141     }
142    
143 duarte 133 /**
144     * To read the pdb data (atom coordinates, residue serials, atom serials) from file
145     * NOTE: for now they must be single model, single chain files!! Nothing else implemented!
146     * @param pdbfile
147     * @throws FileNotFoundException
148     * @throws IOException
149     */
150 duarte 127 public void read_pdb_data_from_file(String pdbfile) throws FileNotFoundException, IOException{
151     resser_atom2atomserial = new HashMap<String,Integer>();
152     resser2restype = new HashMap<Integer,String>();
153     atomser2coord = new HashMap<Integer,Double[]>();
154     atomser2resser = new HashMap<Integer,Integer>();
155    
156     BufferedReader fpdb = new BufferedReader(new FileReader(new File(pdbfile)));
157     String line;
158     while ((line = fpdb.readLine() ) != null ) {
159     Pattern p = Pattern.compile("^ATOM");
160     Matcher m = p.matcher(line);
161     if (m.find()){
162 duarte 135 // serial atom res_type chain res_ser x y z
163     Pattern pl = Pattern.compile(".{6}(.....).{2}(...).{1}(...).{1}(.)(.{4}).{4}(.{8})(.{8})(.{8})",Pattern.CASE_INSENSITIVE);
164 duarte 127 Matcher ml = pl.matcher(line);
165     if (ml.find()) {
166     int atomserial=Integer.parseInt(ml.group(1).trim());
167     String atom = ml.group(2).trim();
168     String res_type = ml.group(3).trim();
169 duarte 135 chain = ml.group(4).trim();
170     int res_serial = Integer.parseInt(ml.group(5).trim());
171     double x = Double.parseDouble(ml.group(6).trim());
172     double y = Double.parseDouble(ml.group(7).trim());
173     double z = Double.parseDouble(ml.group(8).trim());
174 duarte 127 Double[] coords = {x, y, z};
175     ArrayList<String> aalist=AA.aas();
176     if (aalist.contains(res_type)) {
177     atomser2coord.put(atomserial, coords);
178     atomser2resser.put(atomserial, res_serial);
179     resser2restype.put(res_serial, res_type);
180     ArrayList<String> atomlist = aas2atoms.get(res_type);
181     if (atomlist.contains(atom)){
182     resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
183     }
184     }
185     }
186     }
187     }
188     fpdb.close();
189     // now we read the sequence from the resser2restype HashMap
190     // NOTE: we must make sure elsewhere that there are no unobserved residues, we can't check that here!
191     ArrayList<Integer> ressers = new ArrayList<Integer>();
192     for (int resser:resser2restype.keySet()) {
193     ressers.add(resser);
194     }
195     Collections.sort(ressers);
196     sequence="";
197     for (int resser:ressers){
198     String oneletter = AA.threeletter2oneletter(resser2restype.get(resser));
199     sequence += oneletter;
200     }
201     // finally we set accode and chaincode to unknown
202     //TODO: we should parse accode and chaincode from appropriate fields in pdb file,
203     // problem: in case of a non-original pdb file there won't be accession code
204 duarte 139 accode="Unknown";
205     chaincode="Unknown";
206 duarte 127 }
207    
208 duarte 123 public void dump2pdbfile(String outfile) throws IOException {
209     String chainstr=chain;
210     if (chain.equals("NULL")){
211     chainstr="A";
212     }
213 duarte 130 TreeMap<Integer,Object[]> lines = new TreeMap<Integer,Object[]>();
214 duarte 123 PrintStream Out = new PrintStream(new FileOutputStream(outfile));
215     Out.println("HEADER Dumped from "+db+". pdb accession code="+accode+", pdb chain code="+chaincode);
216     for (String resser_atom:resser_atom2atomserial.keySet()){
217     int atomserial = resser_atom2atomserial.get(resser_atom);
218     int res_serial = Integer.parseInt(resser_atom.split("_")[0]);
219     String atom = resser_atom.split("_")[1];
220     String res_type = resser2restype.get(res_serial);
221     Double[] coords = atomser2coord.get(atomserial);
222     Object[] fields = {atomserial, atom, res_type, chainstr, res_serial, coords[0], coords[1], coords[2]};
223 duarte 130 lines.put(atomserial, fields);
224 duarte 123 }
225 duarte 130 for (int atomserial:lines.keySet()){
226     Out.printf("ATOM %5d %3s %3s %1s%4d %8.3f%8.3f%8.3f\n",lines.get(atomserial));
227     }
228 duarte 123 Out.println("END");
229     Out.close();
230     }
231    
232     public void dumpseq(String seqfile) throws IOException {
233     PrintStream Out = new PrintStream(new FileOutputStream(seqfile));
234     Out.println(">"+accode+"_"+chaincode);
235     Out.println(sequence);
236     Out.close();
237     }
238    
239     public int get_length(){
240     return resser2restype.size();
241     }
242    
243     public HashMap<Integer,Double[]> get_coords_for_ct(String ct) {
244     HashMap<Integer,Double[]> coords = new HashMap<Integer,Double[]>();
245     HashMap<String,String[]> restype2atoms = AA.ct2atoms(ct);
246     for (int resser:resser2restype.keySet()){
247     String[] atoms = restype2atoms.get(resser2restype.get(resser));
248     for (String atom:atoms){
249     if (resser_atom2atomserial.containsKey(resser+"_"+atom)){
250     int atomser = resser_atom2atomserial.get(resser+"_"+atom);
251     Double[] coord = atomser2coord.get(atomser);
252     coords.put(atomser, coord);
253     }
254     else if (atom.equals("O") && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){
255     int atomser = resser_atom2atomserial.get(resser+"_"+"OXT");
256     Double[] coord = atomser2coord.get(atomser);
257     coords.put(atomser, coord);
258     }
259     else {
260     System.err.println("Couldn't find "+atom+" atom for resser="+resser+". Continuing without that atom for this resser.");
261     }
262     }
263     }
264     return coords;
265     }
266    
267     public TreeMap<Contact, Double> calculate_dist_matrix(String ct){
268     TreeMap<Contact,Double> dist_matrix = new TreeMap<Contact,Double>();
269     if (!ct.contains("/")){
270     HashMap<Integer,Double[]> coords = get_coords_for_ct(ct);
271     for (int i_atomser:coords.keySet()){
272     for (int j_atomser:coords.keySet()){
273     if (j_atomser>i_atomser) {
274     Contact pair = new Contact(i_atomser,j_atomser);
275     dist_matrix.put(pair, distance(coords.get(i_atomser),coords.get(j_atomser)));
276     }
277     }
278     }
279     } else {
280     String i_ct = ct.split("/")[0];
281     String j_ct = ct.split("/")[1];
282     HashMap<Integer,Double[]> i_coords = get_coords_for_ct(i_ct);
283     HashMap<Integer,Double[]> j_coords = get_coords_for_ct(j_ct);
284     for (int i_atomser:i_coords.keySet()){
285     for (int j_atomser:j_coords.keySet()){
286     if (j_atomser!=i_atomser){
287     Contact pair = new Contact(i_atomser,j_atomser);
288     dist_matrix.put(pair, distance(i_coords.get(i_atomser),j_coords.get(j_atomser)));
289     }
290     }
291     }
292     }
293     return dist_matrix;
294     }
295    
296 duarte 129 /**
297     * Get the contacts for given contact type and cutoff for this Pdb object.
298     * Returns a Graph object with the contacts
299     * The graph is always directional, i.e. in crossed cases xx/yy: i corresponds to xx and j to yy
300     * ct here can be single contact type (e.g. Ca, BB) or crossed (e.g. BB/SC or Ca/Cb)
301     * @param ct
302     * @param cutoff
303     * @return
304     */
305 duarte 123 public Graph get_graph(String ct, double cutoff){
306     TreeMap<Contact,Double> dist_matrix = calculate_dist_matrix(ct);
307     ArrayList<Contact> contacts = new ArrayList<Contact>();
308 duarte 129 // we loop here over all indices of dist_matrix,
309     // we took care already that in symmetric cases (i.e. single contact type, not crossed) we have only one side of the matrix and
310     // in asymmetrical cases (i.e. crossed cases) we have both sides of the matrix
311 duarte 123 for (Contact pair:dist_matrix.keySet()){
312     int i_atomser=pair.i;
313     int j_atomser=pair.j;
314     if (dist_matrix.get(pair)<=cutoff){
315     int i_resser = atomser2resser.get(i_atomser);
316     int j_resser = atomser2resser.get(j_atomser);
317     Contact resser_pair = new Contact(i_resser,j_resser);
318 duarte 129 // for multi-atom models (BB, SC, ALL or BB/SC) we need to make sure that we don't have contacts from residue to itself or that we don't have duplicates
319 duarte 123 if (i_resser!=j_resser && (! contacts.contains(resser_pair))){
320     contacts.add(resser_pair);
321     }
322     }
323     }
324     Collections.sort(contacts);
325 duarte 129 TreeMap<Integer,String> nodes = new TreeMap<Integer,String>();
326     for (int resser:resser2restype.keySet()){
327     nodes.put(resser,resser2restype.get(resser));
328     }
329 duarte 135 // NOTE: we pass to the graph object the chain (internal chain identifier) not the pdb chain code!
330 duarte 129 Graph graph = new Graph (contacts,nodes,sequence,cutoff,ct,accode,chain);
331 duarte 123 return graph;
332     }
333    
334     public Double distance(Double[] coords1, Double[] coords2){
335     return Math.sqrt(Math.pow((coords1[0]-coords2[0]),2)+Math.pow((coords1[1]-coords2[1]),2)+Math.pow((coords1[2]-coords2[2]),2));
336     }
337     }