ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Pdb.java
Revision: 133
Committed: Fri May 11 09:37:38 2007 UTC (17 years, 4 months ago) by duarte
File size: 12833 byte(s)
Log Message:
New functionality: now can create a Pdb object passing a model serial for NMR structures. Not implemented when reading from pdb files!
Line User Rev File contents
1 duarte 123 package proteinstructure;
2    
3 duarte 127 import java.io.BufferedReader;
4     import java.io.File;
5 duarte 123 import java.io.FileOutputStream;
6 duarte 127 import java.io.FileReader;
7 duarte 123 import java.io.PrintStream;
8 duarte 127 import java.io.FileNotFoundException;
9 duarte 123 import java.io.IOException;
10     import java.util.HashMap;
11     import java.util.TreeMap;
12     import java.util.ArrayList;
13     import java.util.Collections;
14 duarte 127 import java.util.regex.Matcher;
15     import java.util.regex.Pattern;
16 duarte 123
17     public class Pdb {
18    
19     HashMap<String,Integer> resser_atom2atomserial;
20     HashMap<Integer,String> resser2restype;
21     HashMap<Integer,Double[]> atomser2coord;
22     HashMap<Integer,Integer> atomser2resser;
23     HashMap<String,ArrayList<String>> aas2atoms = AA.getaas2atoms();
24     String sequence="";
25     String accode="";
26     String chaincode="";
27 duarte 133 int model=DEFAULT_MODEL;
28 duarte 123 String db;
29     String chain;
30    
31 duarte 133 public static int DEFAULT_MODEL=1;
32    
33 duarte 130 public Pdb (String accode, String chaincode) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError, MsdsdAcCodeNotFoundError, MsdsdInconsistentResidueNumbersError {
34 duarte 133 this(accode,chaincode,DEFAULT_MODEL,PdbaseInfo.pdbaseDB);
35 duarte 123 }
36 duarte 133
37     public Pdb (String accode, String chaincode, int model_serial) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError, MsdsdAcCodeNotFoundError, MsdsdInconsistentResidueNumbersError {
38     this(accode,chaincode,model_serial,PdbaseInfo.pdbaseDB);
39     }
40    
41 duarte 130 public Pdb (String accode, String chaincode, String db) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError, MsdsdAcCodeNotFoundError, MsdsdInconsistentResidueNumbersError {
42 duarte 133 this(accode,chaincode,DEFAULT_MODEL,db);
43     }
44    
45     public Pdb (String accode, String chaincode, int model_serial, String db) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError, MsdsdAcCodeNotFoundError, MsdsdInconsistentResidueNumbersError {
46 duarte 123 this.accode=accode;
47     this.chaincode=chaincode;
48 duarte 133 this.model=model_serial;
49 duarte 123 this.db=db;
50 duarte 130 // we initialise it to chaincode, in read_pdb_data_from_pdbase gets reset to the right internal chain id. If reading msdsd it stays as chaincode
51     this.chain=chaincode;
52     if (db.contains("msdsd")){
53     read_pdb_data_from_msdsd(db);
54     } else if (db.contains("pdbase")){
55     read_pdb_data_from_pdbase(db);
56     }
57 duarte 123 }
58    
59 duarte 127 public Pdb (String pdbfile) throws FileNotFoundException, IOException {
60 duarte 133 this(pdbfile,DEFAULT_MODEL);
61     }
62    
63     public Pdb (String pdbfile, int model_serial) throws FileNotFoundException, IOException {
64 duarte 127 this.accode="";
65 duarte 133 this.model=model_serial;
66 duarte 127 read_pdb_data_from_file(pdbfile);
67 duarte 123 }
68 duarte 133
69 duarte 124 public void read_pdb_data_from_pdbase(String db) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError{
70 duarte 123 resser_atom2atomserial = new HashMap<String,Integer>();
71     resser2restype = new HashMap<Integer,String>();
72     atomser2coord = new HashMap<Integer,Double[]>();
73     atomser2resser = new HashMap<Integer,Integer>();
74    
75 duarte 133 PdbaseInfo mypdbaseinfo = new PdbaseInfo(accode,chaincode,model,db);
76 duarte 123 ArrayList<ArrayList> resultset = mypdbaseinfo.read_atomData();
77     sequence = mypdbaseinfo.read_seq();
78     mypdbaseinfo.close();
79    
80     for (ArrayList result:resultset){
81     int atomserial = (Integer) result.get(0);
82     String atom = (String) result.get(1);
83     String res_type = (String) result.get(2);
84     chain=(String) result.get(3);
85     int res_serial = (Integer) result.get(4);
86     double x = (Double) result.get(5);
87     double y = (Double) result.get(6);
88     double z = (Double) result.get(7);
89     Double[] coords = {x, y, z};
90     ArrayList<String> aalist=AA.aas();
91     if (aalist.contains(res_type)) {
92     atomser2coord.put(atomserial, coords);
93     atomser2resser.put(atomserial, res_serial);
94     resser2restype.put(res_serial, res_type);
95     ArrayList<String> atomlist = aas2atoms.get(res_type);
96     if (atomlist.contains(atom)){
97     resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
98     }
99     }
100     }
101     }
102    
103 duarte 130 public void read_pdb_data_from_msdsd(String db) throws MsdsdAcCodeNotFoundError, MsdsdInconsistentResidueNumbersError {
104     resser_atom2atomserial = new HashMap<String,Integer>();
105     resser2restype = new HashMap<Integer,String>();
106     atomser2coord = new HashMap<Integer,Double[]>();
107     atomser2resser = new HashMap<Integer,Integer>();
108    
109 duarte 133 MsdsdInfo mymsdsdinfo = new MsdsdInfo(accode,chaincode,model,db);
110 duarte 130 ArrayList<ArrayList> resultset = mymsdsdinfo.read_atomData();
111     sequence = mymsdsdinfo.read_seq();
112     mymsdsdinfo.close();
113    
114     for (ArrayList result:resultset){
115     int atomserial = (Integer) result.get(0);
116     String atom = (String) result.get(1);
117     String res_type = (String) result.get(2);
118     int res_serial = (Integer) result.get(3);
119     double x = (Double) result.get(4);
120     double y = (Double) result.get(5);
121     double z = (Double) result.get(6);
122     Double[] coords = {x, y, z};
123     ArrayList<String> aalist=AA.aas();
124     if (aalist.contains(res_type)) {
125     atomser2coord.put(atomserial, coords);
126     atomser2resser.put(atomserial, res_serial);
127     resser2restype.put(res_serial, res_type);
128     ArrayList<String> atomlist = aas2atoms.get(res_type);
129     if (atomlist.contains(atom)){
130     resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
131     }
132     }
133     }
134     }
135    
136 duarte 133 /**
137     * To read the pdb data (atom coordinates, residue serials, atom serials) from file
138     * NOTE: for now they must be single model, single chain files!! Nothing else implemented!
139     * @param pdbfile
140     * @throws FileNotFoundException
141     * @throws IOException
142     */
143 duarte 127 public void read_pdb_data_from_file(String pdbfile) throws FileNotFoundException, IOException{
144     resser_atom2atomserial = new HashMap<String,Integer>();
145     resser2restype = new HashMap<Integer,String>();
146     atomser2coord = new HashMap<Integer,Double[]>();
147     atomser2resser = new HashMap<Integer,Integer>();
148    
149     BufferedReader fpdb = new BufferedReader(new FileReader(new File(pdbfile)));
150     String line;
151     while ((line = fpdb.readLine() ) != null ) {
152     Pattern p = Pattern.compile("^ATOM");
153     Matcher m = p.matcher(line);
154     if (m.find()){
155     Pattern pl = Pattern.compile(".{6}(.....).{2}(...).{1}(...).{2}(.{4}).{4}(.{8})(.{8})(.{8})",Pattern.CASE_INSENSITIVE);
156     Matcher ml = pl.matcher(line);
157     if (ml.find()) {
158     int atomserial=Integer.parseInt(ml.group(1).trim());
159     String atom = ml.group(2).trim();
160     String res_type = ml.group(3).trim();
161     int res_serial = Integer.parseInt(ml.group(4).trim());
162     double x = Double.parseDouble(ml.group(5).trim());
163     double y = Double.parseDouble(ml.group(6).trim());
164     double z = Double.parseDouble(ml.group(7).trim());
165     Double[] coords = {x, y, z};
166     ArrayList<String> aalist=AA.aas();
167     if (aalist.contains(res_type)) {
168     atomser2coord.put(atomserial, coords);
169     atomser2resser.put(atomserial, res_serial);
170     resser2restype.put(res_serial, res_type);
171     ArrayList<String> atomlist = aas2atoms.get(res_type);
172     if (atomlist.contains(atom)){
173     resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
174     }
175     }
176     }
177     }
178     }
179     fpdb.close();
180     // now we read the sequence from the resser2restype HashMap
181     // NOTE: we must make sure elsewhere that there are no unobserved residues, we can't check that here!
182     ArrayList<Integer> ressers = new ArrayList<Integer>();
183     for (int resser:resser2restype.keySet()) {
184     ressers.add(resser);
185     }
186     Collections.sort(ressers);
187     sequence="";
188     for (int resser:ressers){
189     String oneletter = AA.threeletter2oneletter(resser2restype.get(resser));
190     sequence += oneletter;
191     }
192     // finally we set accode and chaincode to unknown
193     //TODO: we should parse accode and chaincode from appropriate fields in pdb file,
194     // problem: in case of a non-original pdb file there won't be accession code
195     accode="?";
196     chaincode="?";
197     }
198    
199 duarte 123 public void dump2pdbfile(String outfile) throws IOException {
200     String chainstr=chain;
201     if (chain.equals("NULL")){
202     chainstr="A";
203     }
204 duarte 130 TreeMap<Integer,Object[]> lines = new TreeMap<Integer,Object[]>();
205 duarte 123 PrintStream Out = new PrintStream(new FileOutputStream(outfile));
206     Out.println("HEADER Dumped from "+db+". pdb accession code="+accode+", pdb chain code="+chaincode);
207     for (String resser_atom:resser_atom2atomserial.keySet()){
208     int atomserial = resser_atom2atomserial.get(resser_atom);
209     int res_serial = Integer.parseInt(resser_atom.split("_")[0]);
210     String atom = resser_atom.split("_")[1];
211     String res_type = resser2restype.get(res_serial);
212     Double[] coords = atomser2coord.get(atomserial);
213     Object[] fields = {atomserial, atom, res_type, chainstr, res_serial, coords[0], coords[1], coords[2]};
214 duarte 130 lines.put(atomserial, fields);
215 duarte 123 }
216 duarte 130 for (int atomserial:lines.keySet()){
217     Out.printf("ATOM %5d %3s %3s %1s%4d %8.3f%8.3f%8.3f\n",lines.get(atomserial));
218     }
219 duarte 123 Out.println("END");
220     Out.close();
221     }
222    
223     public void dumpseq(String seqfile) throws IOException {
224     PrintStream Out = new PrintStream(new FileOutputStream(seqfile));
225     Out.println(">"+accode+"_"+chaincode);
226     Out.println(sequence);
227     Out.close();
228     }
229    
230     public int get_length(){
231     return resser2restype.size();
232     }
233    
234     public HashMap<Integer,Double[]> get_coords_for_ct(String ct) {
235     HashMap<Integer,Double[]> coords = new HashMap<Integer,Double[]>();
236     HashMap<String,String[]> restype2atoms = AA.ct2atoms(ct);
237     for (int resser:resser2restype.keySet()){
238     String[] atoms = restype2atoms.get(resser2restype.get(resser));
239     for (String atom:atoms){
240     if (resser_atom2atomserial.containsKey(resser+"_"+atom)){
241     int atomser = resser_atom2atomserial.get(resser+"_"+atom);
242     Double[] coord = atomser2coord.get(atomser);
243     coords.put(atomser, coord);
244     }
245     else if (atom.equals("O") && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){
246     int atomser = resser_atom2atomserial.get(resser+"_"+"OXT");
247     Double[] coord = atomser2coord.get(atomser);
248     coords.put(atomser, coord);
249     }
250     else {
251     System.err.println("Couldn't find "+atom+" atom for resser="+resser+". Continuing without that atom for this resser.");
252     }
253     }
254     }
255     return coords;
256     }
257    
258     public TreeMap<Contact, Double> calculate_dist_matrix(String ct){
259     TreeMap<Contact,Double> dist_matrix = new TreeMap<Contact,Double>();
260     if (!ct.contains("/")){
261     HashMap<Integer,Double[]> coords = get_coords_for_ct(ct);
262     for (int i_atomser:coords.keySet()){
263     for (int j_atomser:coords.keySet()){
264     if (j_atomser>i_atomser) {
265     Contact pair = new Contact(i_atomser,j_atomser);
266     dist_matrix.put(pair, distance(coords.get(i_atomser),coords.get(j_atomser)));
267     }
268     }
269     }
270     } else {
271     String i_ct = ct.split("/")[0];
272     String j_ct = ct.split("/")[1];
273     HashMap<Integer,Double[]> i_coords = get_coords_for_ct(i_ct);
274     HashMap<Integer,Double[]> j_coords = get_coords_for_ct(j_ct);
275     for (int i_atomser:i_coords.keySet()){
276     for (int j_atomser:j_coords.keySet()){
277     if (j_atomser!=i_atomser){
278     Contact pair = new Contact(i_atomser,j_atomser);
279     dist_matrix.put(pair, distance(i_coords.get(i_atomser),j_coords.get(j_atomser)));
280     }
281     }
282     }
283     }
284     return dist_matrix;
285     }
286    
287 duarte 129 /**
288     * Get the contacts for given contact type and cutoff for this Pdb object.
289     * Returns a Graph object with the contacts
290     * The graph is always directional, i.e. in crossed cases xx/yy: i corresponds to xx and j to yy
291     * ct here can be single contact type (e.g. Ca, BB) or crossed (e.g. BB/SC or Ca/Cb)
292     * @param ct
293     * @param cutoff
294     * @return
295     */
296 duarte 123 public Graph get_graph(String ct, double cutoff){
297     TreeMap<Contact,Double> dist_matrix = calculate_dist_matrix(ct);
298     ArrayList<Contact> contacts = new ArrayList<Contact>();
299 duarte 129 // we loop here over all indices of dist_matrix,
300     // we took care already that in symmetric cases (i.e. single contact type, not crossed) we have only one side of the matrix and
301     // in asymmetrical cases (i.e. crossed cases) we have both sides of the matrix
302 duarte 123 for (Contact pair:dist_matrix.keySet()){
303     int i_atomser=pair.i;
304     int j_atomser=pair.j;
305     if (dist_matrix.get(pair)<=cutoff){
306     int i_resser = atomser2resser.get(i_atomser);
307     int j_resser = atomser2resser.get(j_atomser);
308     Contact resser_pair = new Contact(i_resser,j_resser);
309 duarte 129 // for multi-atom models (BB, SC, ALL or BB/SC) we need to make sure that we don't have contacts from residue to itself or that we don't have duplicates
310 duarte 123 if (i_resser!=j_resser && (! contacts.contains(resser_pair))){
311     contacts.add(resser_pair);
312     }
313     }
314     }
315     Collections.sort(contacts);
316 duarte 129 TreeMap<Integer,String> nodes = new TreeMap<Integer,String>();
317     for (int resser:resser2restype.keySet()){
318     nodes.put(resser,resser2restype.get(resser));
319     }
320     // NOTE: we pass to the graph object the chain (internal pdbase id) not the pdb chain code!
321     Graph graph = new Graph (contacts,nodes,sequence,cutoff,ct,accode,chain);
322 duarte 123 return graph;
323     }
324    
325     public Double distance(Double[] coords1, Double[] coords2){
326     return Math.sqrt(Math.pow((coords1[0]-coords2[0]),2)+Math.pow((coords1[1]-coords2[1]),2)+Math.pow((coords1[2]-coords2[2]),2));
327     }
328     }