ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Pdb.java
Revision: 127
Committed: Wed May 9 14:54:02 2007 UTC (17 years, 4 months ago) by duarte
File size: 8991 byte(s)
Log Message:
Implemented read of pdb data from pdb file.
FIXED BUG in dumping of pdb file. Was missing end of line in atom lines
Added more tests in testPdb

Line User Rev File contents
1 duarte 123 package proteinstructure;
2    
3 duarte 127 import java.io.BufferedReader;
4     import java.io.File;
5 duarte 123 import java.io.FileOutputStream;
6 duarte 127 import java.io.FileReader;
7 duarte 123 import java.io.PrintStream;
8 duarte 127 import java.io.FileNotFoundException;
9 duarte 123 import java.io.IOException;
10     import java.util.HashMap;
11     import java.util.TreeMap;
12     import java.util.ArrayList;
13     import java.util.Collections;
14 duarte 127 import java.util.regex.Matcher;
15     import java.util.regex.Pattern;
16 duarte 123
17     public class Pdb {
18    
19     HashMap<String,Integer> resser_atom2atomserial;
20     HashMap<Integer,String> resser2restype;
21     HashMap<Integer,Double[]> atomser2coord;
22     HashMap<Integer,Integer> atomser2resser;
23     HashMap<String,ArrayList<String>> aas2atoms = AA.getaas2atoms();
24     String sequence="";
25     String accode="";
26     String chaincode="";
27     String db;
28     String chain;
29    
30 duarte 124 public Pdb (String accode, String chaincode) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError {
31 duarte 123 this(accode,chaincode,PdbaseInfo.pdbaseDB);
32    
33     }
34    
35 duarte 124 public Pdb (String accode, String chaincode, String db) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError {
36 duarte 123 this.accode=accode;
37     this.chaincode=chaincode;
38     this.db=db;
39     this.chain=chaincode; // we initialise it to chaincode, in read_pdb_data_from_pdbase gets reset to the right internal chain id
40     read_pdb_data_from_pdbase(db);
41     }
42    
43 duarte 127 public Pdb (String pdbfile) throws FileNotFoundException, IOException {
44     this.accode="";
45     read_pdb_data_from_file(pdbfile);
46 duarte 123 }
47    
48 duarte 124 public void read_pdb_data_from_pdbase(String db) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError{
49 duarte 123 resser_atom2atomserial = new HashMap<String,Integer>();
50     resser2restype = new HashMap<Integer,String>();
51     atomser2coord = new HashMap<Integer,Double[]>();
52     atomser2resser = new HashMap<Integer,Integer>();
53    
54     PdbaseInfo mypdbaseinfo = new PdbaseInfo(accode,chaincode,db);
55     ArrayList<ArrayList> resultset = mypdbaseinfo.read_atomData();
56     sequence = mypdbaseinfo.read_seq();
57     mypdbaseinfo.close();
58    
59     for (ArrayList result:resultset){
60     int atomserial = (Integer) result.get(0);
61     String atom = (String) result.get(1);
62     String res_type = (String) result.get(2);
63     chain=(String) result.get(3);
64     int res_serial = (Integer) result.get(4);
65     double x = (Double) result.get(5);
66     double y = (Double) result.get(6);
67     double z = (Double) result.get(7);
68     Double[] coords = {x, y, z};
69     ArrayList<String> aalist=AA.aas();
70     if (aalist.contains(res_type)) {
71     atomser2coord.put(atomserial, coords);
72     atomser2resser.put(atomserial, res_serial);
73     resser2restype.put(res_serial, res_type);
74     ArrayList<String> atomlist = aas2atoms.get(res_type);
75     if (atomlist.contains(atom)){
76     resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
77     }
78     }
79     }
80     }
81    
82 duarte 127 public void read_pdb_data_from_file(String pdbfile) throws FileNotFoundException, IOException{
83     resser_atom2atomserial = new HashMap<String,Integer>();
84     resser2restype = new HashMap<Integer,String>();
85     atomser2coord = new HashMap<Integer,Double[]>();
86     atomser2resser = new HashMap<Integer,Integer>();
87    
88     BufferedReader fpdb = new BufferedReader(new FileReader(new File(pdbfile)));
89     String line;
90     while ((line = fpdb.readLine() ) != null ) {
91     Pattern p = Pattern.compile("^ATOM");
92     Matcher m = p.matcher(line);
93     if (m.find()){
94     Pattern pl = Pattern.compile(".{6}(.....).{2}(...).{1}(...).{2}(.{4}).{4}(.{8})(.{8})(.{8})",Pattern.CASE_INSENSITIVE);
95     Matcher ml = pl.matcher(line);
96     if (ml.find()) {
97     int atomserial=Integer.parseInt(ml.group(1).trim());
98     String atom = ml.group(2).trim();
99     String res_type = ml.group(3).trim();
100     int res_serial = Integer.parseInt(ml.group(4).trim());
101     double x = Double.parseDouble(ml.group(5).trim());
102     double y = Double.parseDouble(ml.group(6).trim());
103     double z = Double.parseDouble(ml.group(7).trim());
104     Double[] coords = {x, y, z};
105     ArrayList<String> aalist=AA.aas();
106     if (aalist.contains(res_type)) {
107     atomser2coord.put(atomserial, coords);
108     atomser2resser.put(atomserial, res_serial);
109     resser2restype.put(res_serial, res_type);
110     ArrayList<String> atomlist = aas2atoms.get(res_type);
111     if (atomlist.contains(atom)){
112     resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
113     }
114     }
115     }
116     }
117     }
118     fpdb.close();
119     // now we read the sequence from the resser2restype HashMap
120     // NOTE: we must make sure elsewhere that there are no unobserved residues, we can't check that here!
121     ArrayList<Integer> ressers = new ArrayList<Integer>();
122     for (int resser:resser2restype.keySet()) {
123     ressers.add(resser);
124     }
125     Collections.sort(ressers);
126     sequence="";
127     for (int resser:ressers){
128     String oneletter = AA.threeletter2oneletter(resser2restype.get(resser));
129     sequence += oneletter;
130     }
131     // finally we set accode and chaincode to unknown
132     //TODO: we should parse accode and chaincode from appropriate fields in pdb file,
133     // problem: in case of a non-original pdb file there won't be accession code
134     accode="?";
135     chaincode="?";
136     }
137    
138 duarte 123 public void dump2pdbfile(String outfile) throws IOException {
139     String chainstr=chain;
140     if (chain.equals("NULL")){
141     chainstr="A";
142     }
143     PrintStream Out = new PrintStream(new FileOutputStream(outfile));
144     Out.println("HEADER Dumped from "+db+". pdb accession code="+accode+", pdb chain code="+chaincode);
145     for (String resser_atom:resser_atom2atomserial.keySet()){
146     int atomserial = resser_atom2atomserial.get(resser_atom);
147     int res_serial = Integer.parseInt(resser_atom.split("_")[0]);
148     String atom = resser_atom.split("_")[1];
149     String res_type = resser2restype.get(res_serial);
150     Double[] coords = atomser2coord.get(atomserial);
151     Object[] fields = {atomserial, atom, res_type, chainstr, res_serial, coords[0], coords[1], coords[2]};
152 duarte 127 Out.printf("ATOM %5d %3s %3s %1s%4d %8.3f%8.3f%8.3f\n",fields);
153 duarte 123 }
154     Out.println("END");
155     Out.close();
156     }
157    
158     public void dumpseq(String seqfile) throws IOException {
159     PrintStream Out = new PrintStream(new FileOutputStream(seqfile));
160     Out.println(">"+accode+"_"+chaincode);
161     Out.println(sequence);
162     Out.close();
163     }
164    
165     public int get_length(){
166     return resser2restype.size();
167     }
168    
169     public HashMap<Integer,Double[]> get_coords_for_ct(String ct) {
170     HashMap<Integer,Double[]> coords = new HashMap<Integer,Double[]>();
171     HashMap<String,String[]> restype2atoms = AA.ct2atoms(ct);
172     for (int resser:resser2restype.keySet()){
173     String[] atoms = restype2atoms.get(resser2restype.get(resser));
174     for (String atom:atoms){
175     if (resser_atom2atomserial.containsKey(resser+"_"+atom)){
176     int atomser = resser_atom2atomserial.get(resser+"_"+atom);
177     Double[] coord = atomser2coord.get(atomser);
178     coords.put(atomser, coord);
179     }
180     else if (atom.equals("O") && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){
181     int atomser = resser_atom2atomserial.get(resser+"_"+"OXT");
182     Double[] coord = atomser2coord.get(atomser);
183     coords.put(atomser, coord);
184     }
185     else {
186     System.err.println("Couldn't find "+atom+" atom for resser="+resser+". Continuing without that atom for this resser.");
187     }
188     }
189     }
190     return coords;
191     }
192    
193     public TreeMap<Contact, Double> calculate_dist_matrix(String ct){
194     TreeMap<Contact,Double> dist_matrix = new TreeMap<Contact,Double>();
195     if (!ct.contains("/")){
196     HashMap<Integer,Double[]> coords = get_coords_for_ct(ct);
197     for (int i_atomser:coords.keySet()){
198     for (int j_atomser:coords.keySet()){
199     if (j_atomser>i_atomser) {
200     Contact pair = new Contact(i_atomser,j_atomser);
201     dist_matrix.put(pair, distance(coords.get(i_atomser),coords.get(j_atomser)));
202     }
203     }
204     }
205     } else {
206     String i_ct = ct.split("/")[0];
207     String j_ct = ct.split("/")[1];
208     HashMap<Integer,Double[]> i_coords = get_coords_for_ct(i_ct);
209     HashMap<Integer,Double[]> j_coords = get_coords_for_ct(j_ct);
210     for (int i_atomser:i_coords.keySet()){
211     for (int j_atomser:j_coords.keySet()){
212     if (j_atomser!=i_atomser){
213     Contact pair = new Contact(i_atomser,j_atomser);
214     dist_matrix.put(pair, distance(i_coords.get(i_atomser),j_coords.get(j_atomser)));
215     }
216     }
217     }
218     }
219     return dist_matrix;
220     }
221    
222     public Graph get_graph(String ct, double cutoff){
223     TreeMap<Contact,Double> dist_matrix = calculate_dist_matrix(ct);
224     ArrayList<Contact> contacts = new ArrayList<Contact>();
225     for (Contact pair:dist_matrix.keySet()){
226     int i_atomser=pair.i;
227     int j_atomser=pair.j;
228     if (dist_matrix.get(pair)<=cutoff){
229     int i_resser = atomser2resser.get(i_atomser);
230     int j_resser = atomser2resser.get(j_atomser);
231     Contact resser_pair = new Contact(i_resser,j_resser);
232     if (i_resser!=j_resser && (! contacts.contains(resser_pair))){
233     contacts.add(resser_pair);
234     }
235     }
236     }
237     Collections.sort(contacts);
238     Graph graph = new Graph (contacts,cutoff,ct);
239     return graph;
240     }
241    
242     public Double distance(Double[] coords1, Double[] coords2){
243     return Math.sqrt(Math.pow((coords1[0]-coords2[0]),2)+Math.pow((coords1[1]-coords2[1]),2)+Math.pow((coords1[2]-coords2[2]),2));
244     }
245     }