ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Pdb.java
Revision: 129
Committed: Thu May 10 13:12:32 2007 UTC (17 years, 4 months ago) by duarte
File size: 10095 byte(s)
Log Message:
NEW FUNCTIONALITY: can retrieve now a ContactMap object from a Graph object
New method getCM in Graph. New member variables: sequence, accode, chain, nodes, directed
Passing more data from Pdb to Graph in get_graph
New constructor in ContactMap
Line User Rev File contents
1 duarte 123 package proteinstructure;
2    
3 duarte 127 import java.io.BufferedReader;
4     import java.io.File;
5 duarte 123 import java.io.FileOutputStream;
6 duarte 127 import java.io.FileReader;
7 duarte 123 import java.io.PrintStream;
8 duarte 127 import java.io.FileNotFoundException;
9 duarte 123 import java.io.IOException;
10     import java.util.HashMap;
11     import java.util.TreeMap;
12     import java.util.ArrayList;
13     import java.util.Collections;
14 duarte 127 import java.util.regex.Matcher;
15     import java.util.regex.Pattern;
16 duarte 123
17     public class Pdb {
18    
19     HashMap<String,Integer> resser_atom2atomserial;
20     HashMap<Integer,String> resser2restype;
21     HashMap<Integer,Double[]> atomser2coord;
22     HashMap<Integer,Integer> atomser2resser;
23     HashMap<String,ArrayList<String>> aas2atoms = AA.getaas2atoms();
24     String sequence="";
25     String accode="";
26     String chaincode="";
27     String db;
28     String chain;
29    
30 duarte 124 public Pdb (String accode, String chaincode) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError {
31 duarte 123 this(accode,chaincode,PdbaseInfo.pdbaseDB);
32    
33     }
34    
35 duarte 124 public Pdb (String accode, String chaincode, String db) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError {
36 duarte 123 this.accode=accode;
37     this.chaincode=chaincode;
38     this.db=db;
39     this.chain=chaincode; // we initialise it to chaincode, in read_pdb_data_from_pdbase gets reset to the right internal chain id
40     read_pdb_data_from_pdbase(db);
41     }
42    
43 duarte 127 public Pdb (String pdbfile) throws FileNotFoundException, IOException {
44     this.accode="";
45     read_pdb_data_from_file(pdbfile);
46 duarte 123 }
47    
48 duarte 124 public void read_pdb_data_from_pdbase(String db) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError{
49 duarte 123 resser_atom2atomserial = new HashMap<String,Integer>();
50     resser2restype = new HashMap<Integer,String>();
51     atomser2coord = new HashMap<Integer,Double[]>();
52     atomser2resser = new HashMap<Integer,Integer>();
53    
54     PdbaseInfo mypdbaseinfo = new PdbaseInfo(accode,chaincode,db);
55     ArrayList<ArrayList> resultset = mypdbaseinfo.read_atomData();
56     sequence = mypdbaseinfo.read_seq();
57     mypdbaseinfo.close();
58    
59     for (ArrayList result:resultset){
60     int atomserial = (Integer) result.get(0);
61     String atom = (String) result.get(1);
62     String res_type = (String) result.get(2);
63     chain=(String) result.get(3);
64     int res_serial = (Integer) result.get(4);
65     double x = (Double) result.get(5);
66     double y = (Double) result.get(6);
67     double z = (Double) result.get(7);
68     Double[] coords = {x, y, z};
69     ArrayList<String> aalist=AA.aas();
70     if (aalist.contains(res_type)) {
71     atomser2coord.put(atomserial, coords);
72     atomser2resser.put(atomserial, res_serial);
73     resser2restype.put(res_serial, res_type);
74     ArrayList<String> atomlist = aas2atoms.get(res_type);
75     if (atomlist.contains(atom)){
76     resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
77     }
78     }
79     }
80     }
81    
82 duarte 127 public void read_pdb_data_from_file(String pdbfile) throws FileNotFoundException, IOException{
83     resser_atom2atomserial = new HashMap<String,Integer>();
84     resser2restype = new HashMap<Integer,String>();
85     atomser2coord = new HashMap<Integer,Double[]>();
86     atomser2resser = new HashMap<Integer,Integer>();
87    
88     BufferedReader fpdb = new BufferedReader(new FileReader(new File(pdbfile)));
89     String line;
90     while ((line = fpdb.readLine() ) != null ) {
91     Pattern p = Pattern.compile("^ATOM");
92     Matcher m = p.matcher(line);
93     if (m.find()){
94     Pattern pl = Pattern.compile(".{6}(.....).{2}(...).{1}(...).{2}(.{4}).{4}(.{8})(.{8})(.{8})",Pattern.CASE_INSENSITIVE);
95     Matcher ml = pl.matcher(line);
96     if (ml.find()) {
97     int atomserial=Integer.parseInt(ml.group(1).trim());
98     String atom = ml.group(2).trim();
99     String res_type = ml.group(3).trim();
100     int res_serial = Integer.parseInt(ml.group(4).trim());
101     double x = Double.parseDouble(ml.group(5).trim());
102     double y = Double.parseDouble(ml.group(6).trim());
103     double z = Double.parseDouble(ml.group(7).trim());
104     Double[] coords = {x, y, z};
105     ArrayList<String> aalist=AA.aas();
106     if (aalist.contains(res_type)) {
107     atomser2coord.put(atomserial, coords);
108     atomser2resser.put(atomserial, res_serial);
109     resser2restype.put(res_serial, res_type);
110     ArrayList<String> atomlist = aas2atoms.get(res_type);
111     if (atomlist.contains(atom)){
112     resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
113     }
114     }
115     }
116     }
117     }
118     fpdb.close();
119     // now we read the sequence from the resser2restype HashMap
120     // NOTE: we must make sure elsewhere that there are no unobserved residues, we can't check that here!
121     ArrayList<Integer> ressers = new ArrayList<Integer>();
122     for (int resser:resser2restype.keySet()) {
123     ressers.add(resser);
124     }
125     Collections.sort(ressers);
126     sequence="";
127     for (int resser:ressers){
128     String oneletter = AA.threeletter2oneletter(resser2restype.get(resser));
129     sequence += oneletter;
130     }
131     // finally we set accode and chaincode to unknown
132     //TODO: we should parse accode and chaincode from appropriate fields in pdb file,
133     // problem: in case of a non-original pdb file there won't be accession code
134     accode="?";
135     chaincode="?";
136     }
137    
138 duarte 123 public void dump2pdbfile(String outfile) throws IOException {
139     String chainstr=chain;
140     if (chain.equals("NULL")){
141     chainstr="A";
142     }
143     PrintStream Out = new PrintStream(new FileOutputStream(outfile));
144     Out.println("HEADER Dumped from "+db+". pdb accession code="+accode+", pdb chain code="+chaincode);
145     for (String resser_atom:resser_atom2atomserial.keySet()){
146     int atomserial = resser_atom2atomserial.get(resser_atom);
147     int res_serial = Integer.parseInt(resser_atom.split("_")[0]);
148     String atom = resser_atom.split("_")[1];
149     String res_type = resser2restype.get(res_serial);
150     Double[] coords = atomser2coord.get(atomserial);
151     Object[] fields = {atomserial, atom, res_type, chainstr, res_serial, coords[0], coords[1], coords[2]};
152 duarte 127 Out.printf("ATOM %5d %3s %3s %1s%4d %8.3f%8.3f%8.3f\n",fields);
153 duarte 123 }
154     Out.println("END");
155     Out.close();
156     }
157    
158     public void dumpseq(String seqfile) throws IOException {
159     PrintStream Out = new PrintStream(new FileOutputStream(seqfile));
160     Out.println(">"+accode+"_"+chaincode);
161     Out.println(sequence);
162     Out.close();
163     }
164    
165     public int get_length(){
166     return resser2restype.size();
167     }
168    
169     public HashMap<Integer,Double[]> get_coords_for_ct(String ct) {
170     HashMap<Integer,Double[]> coords = new HashMap<Integer,Double[]>();
171     HashMap<String,String[]> restype2atoms = AA.ct2atoms(ct);
172     for (int resser:resser2restype.keySet()){
173     String[] atoms = restype2atoms.get(resser2restype.get(resser));
174     for (String atom:atoms){
175     if (resser_atom2atomserial.containsKey(resser+"_"+atom)){
176     int atomser = resser_atom2atomserial.get(resser+"_"+atom);
177     Double[] coord = atomser2coord.get(atomser);
178     coords.put(atomser, coord);
179     }
180     else if (atom.equals("O") && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){
181     int atomser = resser_atom2atomserial.get(resser+"_"+"OXT");
182     Double[] coord = atomser2coord.get(atomser);
183     coords.put(atomser, coord);
184     }
185     else {
186     System.err.println("Couldn't find "+atom+" atom for resser="+resser+". Continuing without that atom for this resser.");
187     }
188     }
189     }
190     return coords;
191     }
192    
193     public TreeMap<Contact, Double> calculate_dist_matrix(String ct){
194     TreeMap<Contact,Double> dist_matrix = new TreeMap<Contact,Double>();
195     if (!ct.contains("/")){
196     HashMap<Integer,Double[]> coords = get_coords_for_ct(ct);
197     for (int i_atomser:coords.keySet()){
198     for (int j_atomser:coords.keySet()){
199     if (j_atomser>i_atomser) {
200     Contact pair = new Contact(i_atomser,j_atomser);
201     dist_matrix.put(pair, distance(coords.get(i_atomser),coords.get(j_atomser)));
202     }
203     }
204     }
205     } else {
206     String i_ct = ct.split("/")[0];
207     String j_ct = ct.split("/")[1];
208     HashMap<Integer,Double[]> i_coords = get_coords_for_ct(i_ct);
209     HashMap<Integer,Double[]> j_coords = get_coords_for_ct(j_ct);
210     for (int i_atomser:i_coords.keySet()){
211     for (int j_atomser:j_coords.keySet()){
212     if (j_atomser!=i_atomser){
213     Contact pair = new Contact(i_atomser,j_atomser);
214     dist_matrix.put(pair, distance(i_coords.get(i_atomser),j_coords.get(j_atomser)));
215     }
216     }
217     }
218     }
219     return dist_matrix;
220     }
221    
222 duarte 129 /**
223     * Get the contacts for given contact type and cutoff for this Pdb object.
224     * Returns a Graph object with the contacts
225     * The graph is always directional, i.e. in crossed cases xx/yy: i corresponds to xx and j to yy
226     * ct here can be single contact type (e.g. Ca, BB) or crossed (e.g. BB/SC or Ca/Cb)
227     * @param ct
228     * @param cutoff
229     * @return
230     */
231 duarte 123 public Graph get_graph(String ct, double cutoff){
232     TreeMap<Contact,Double> dist_matrix = calculate_dist_matrix(ct);
233     ArrayList<Contact> contacts = new ArrayList<Contact>();
234 duarte 129 // we loop here over all indices of dist_matrix,
235     // we took care already that in symmetric cases (i.e. single contact type, not crossed) we have only one side of the matrix and
236     // in asymmetrical cases (i.e. crossed cases) we have both sides of the matrix
237 duarte 123 for (Contact pair:dist_matrix.keySet()){
238     int i_atomser=pair.i;
239     int j_atomser=pair.j;
240     if (dist_matrix.get(pair)<=cutoff){
241     int i_resser = atomser2resser.get(i_atomser);
242     int j_resser = atomser2resser.get(j_atomser);
243     Contact resser_pair = new Contact(i_resser,j_resser);
244 duarte 129 // for multi-atom models (BB, SC, ALL or BB/SC) we need to make sure that we don't have contacts from residue to itself or that we don't have duplicates
245 duarte 123 if (i_resser!=j_resser && (! contacts.contains(resser_pair))){
246     contacts.add(resser_pair);
247     }
248     }
249     }
250     Collections.sort(contacts);
251 duarte 129 TreeMap<Integer,String> nodes = new TreeMap<Integer,String>();
252     for (int resser:resser2restype.keySet()){
253     nodes.put(resser,resser2restype.get(resser));
254     }
255     // NOTE: we pass to the graph object the chain (internal pdbase id) not the pdb chain code!
256     Graph graph = new Graph (contacts,nodes,sequence,cutoff,ct,accode,chain);
257 duarte 123 return graph;
258     }
259    
260     public Double distance(Double[] coords1, Double[] coords2){
261     return Math.sqrt(Math.pow((coords1[0]-coords2[0]),2)+Math.pow((coords1[1]-coords2[1]),2)+Math.pow((coords1[2]-coords2[2]),2));
262     }
263     }