ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Pdb.java
Revision: 135
Committed: Mon May 14 09:49:04 2007 UTC (17 years, 4 months ago) by duarte
File size: 13363 byte(s)
Log Message:
NEW FUNCTIONALITY: reading of graph from db is fully implemented for all cases

New chain member variable in the Info classes, read in get_asym_id (Pdbase) and in get_chain_id (Msdsd)
Reading chain also in Pdb in read_pdb_data_from_file
Not reading chain anymore in read_atomData of PdbaseInfo
Added oneletter2threeletter and getoneletter2threeletter to AA class
Changes in Graph:
- added db static vars and getUserName method
- new member variables graphid and sm_id
- new method read_graph_from_db to read contacts, nodes (and sequence from nodes) from db
- new method getgraphid
New Exception class GraphIdNotFoundError
Line User Rev File contents
1 duarte 123 package proteinstructure;
2    
3 duarte 127 import java.io.BufferedReader;
4     import java.io.File;
5 duarte 123 import java.io.FileOutputStream;
6 duarte 127 import java.io.FileReader;
7 duarte 123 import java.io.PrintStream;
8 duarte 127 import java.io.FileNotFoundException;
9 duarte 123 import java.io.IOException;
10     import java.util.HashMap;
11     import java.util.TreeMap;
12     import java.util.ArrayList;
13     import java.util.Collections;
14 duarte 127 import java.util.regex.Matcher;
15     import java.util.regex.Pattern;
16 duarte 123
17     public class Pdb {
18    
19     HashMap<String,Integer> resser_atom2atomserial;
20     HashMap<Integer,String> resser2restype;
21     HashMap<Integer,Double[]> atomser2coord;
22     HashMap<Integer,Integer> atomser2resser;
23     HashMap<String,ArrayList<String>> aas2atoms = AA.getaas2atoms();
24     String sequence="";
25     String accode="";
26 duarte 135 // given "external" pdb chain code, i.e. the classic pdb code ("NULL" if it is blank in original pdb file)
27 duarte 123 String chaincode="";
28 duarte 133 int model=DEFAULT_MODEL;
29 duarte 123 String db;
30 duarte 135
31     // Our internal chain identifier (taken from dbs or pdb):
32     // - in reading from pdbase or from msdsd it will be set to the internal chain id (asym_id field for pdbase, pchain_id for msdsd)
33     // - in reading from pdb file it gets set to whatever parsed from the pdb file
34 duarte 123 String chain;
35    
36 duarte 133 public static int DEFAULT_MODEL=1;
37    
38 duarte 130 public Pdb (String accode, String chaincode) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError, MsdsdAcCodeNotFoundError, MsdsdInconsistentResidueNumbersError {
39 duarte 133 this(accode,chaincode,DEFAULT_MODEL,PdbaseInfo.pdbaseDB);
40 duarte 123 }
41 duarte 133
42     public Pdb (String accode, String chaincode, int model_serial) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError, MsdsdAcCodeNotFoundError, MsdsdInconsistentResidueNumbersError {
43     this(accode,chaincode,model_serial,PdbaseInfo.pdbaseDB);
44     }
45    
46 duarte 130 public Pdb (String accode, String chaincode, String db) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError, MsdsdAcCodeNotFoundError, MsdsdInconsistentResidueNumbersError {
47 duarte 133 this(accode,chaincode,DEFAULT_MODEL,db);
48     }
49    
50     public Pdb (String accode, String chaincode, int model_serial, String db) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError, MsdsdAcCodeNotFoundError, MsdsdInconsistentResidueNumbersError {
51 duarte 123 this.accode=accode;
52     this.chaincode=chaincode;
53 duarte 133 this.model=model_serial;
54 duarte 123 this.db=db;
55 duarte 130 // we initialise it to chaincode, in read_pdb_data_from_pdbase gets reset to the right internal chain id. If reading msdsd it stays as chaincode
56     this.chain=chaincode;
57     if (db.contains("msdsd")){
58     read_pdb_data_from_msdsd(db);
59     } else if (db.contains("pdbase")){
60     read_pdb_data_from_pdbase(db);
61     }
62 duarte 123 }
63    
64 duarte 127 public Pdb (String pdbfile) throws FileNotFoundException, IOException {
65 duarte 133 this(pdbfile,DEFAULT_MODEL);
66     }
67    
68     public Pdb (String pdbfile, int model_serial) throws FileNotFoundException, IOException {
69 duarte 127 this.accode="";
70 duarte 133 this.model=model_serial;
71 duarte 127 read_pdb_data_from_file(pdbfile);
72 duarte 123 }
73 duarte 133
74 duarte 124 public void read_pdb_data_from_pdbase(String db) throws PdbaseInconsistencyError, PdbaseAcCodeNotFoundError{
75 duarte 123 resser_atom2atomserial = new HashMap<String,Integer>();
76     resser2restype = new HashMap<Integer,String>();
77     atomser2coord = new HashMap<Integer,Double[]>();
78     atomser2resser = new HashMap<Integer,Integer>();
79    
80 duarte 133 PdbaseInfo mypdbaseinfo = new PdbaseInfo(accode,chaincode,model,db);
81 duarte 123 ArrayList<ArrayList> resultset = mypdbaseinfo.read_atomData();
82 duarte 135 chain = mypdbaseinfo.chain;
83 duarte 123 sequence = mypdbaseinfo.read_seq();
84     mypdbaseinfo.close();
85    
86     for (ArrayList result:resultset){
87     int atomserial = (Integer) result.get(0);
88     String atom = (String) result.get(1);
89     String res_type = (String) result.get(2);
90 duarte 135 int res_serial = (Integer) result.get(3);
91     double x = (Double) result.get(4);
92     double y = (Double) result.get(5);
93     double z = (Double) result.get(6);
94 duarte 123 Double[] coords = {x, y, z};
95     ArrayList<String> aalist=AA.aas();
96     if (aalist.contains(res_type)) {
97     atomser2coord.put(atomserial, coords);
98     atomser2resser.put(atomserial, res_serial);
99     resser2restype.put(res_serial, res_type);
100     ArrayList<String> atomlist = aas2atoms.get(res_type);
101     if (atomlist.contains(atom)){
102     resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
103     }
104     }
105     }
106     }
107    
108 duarte 130 public void read_pdb_data_from_msdsd(String db) throws MsdsdAcCodeNotFoundError, MsdsdInconsistentResidueNumbersError {
109     resser_atom2atomserial = new HashMap<String,Integer>();
110     resser2restype = new HashMap<Integer,String>();
111     atomser2coord = new HashMap<Integer,Double[]>();
112     atomser2resser = new HashMap<Integer,Integer>();
113    
114 duarte 133 MsdsdInfo mymsdsdinfo = new MsdsdInfo(accode,chaincode,model,db);
115 duarte 130 ArrayList<ArrayList> resultset = mymsdsdinfo.read_atomData();
116     sequence = mymsdsdinfo.read_seq();
117     mymsdsdinfo.close();
118    
119     for (ArrayList result:resultset){
120     int atomserial = (Integer) result.get(0);
121     String atom = (String) result.get(1);
122     String res_type = (String) result.get(2);
123     int res_serial = (Integer) result.get(3);
124     double x = (Double) result.get(4);
125     double y = (Double) result.get(5);
126     double z = (Double) result.get(6);
127     Double[] coords = {x, y, z};
128     ArrayList<String> aalist=AA.aas();
129     if (aalist.contains(res_type)) {
130     atomser2coord.put(atomserial, coords);
131     atomser2resser.put(atomserial, res_serial);
132     resser2restype.put(res_serial, res_type);
133     ArrayList<String> atomlist = aas2atoms.get(res_type);
134     if (atomlist.contains(atom)){
135     resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
136     }
137     }
138     }
139     }
140    
141 duarte 133 /**
142     * To read the pdb data (atom coordinates, residue serials, atom serials) from file
143     * NOTE: for now they must be single model, single chain files!! Nothing else implemented!
144     * @param pdbfile
145     * @throws FileNotFoundException
146     * @throws IOException
147     */
148 duarte 127 public void read_pdb_data_from_file(String pdbfile) throws FileNotFoundException, IOException{
149     resser_atom2atomserial = new HashMap<String,Integer>();
150     resser2restype = new HashMap<Integer,String>();
151     atomser2coord = new HashMap<Integer,Double[]>();
152     atomser2resser = new HashMap<Integer,Integer>();
153    
154     BufferedReader fpdb = new BufferedReader(new FileReader(new File(pdbfile)));
155     String line;
156     while ((line = fpdb.readLine() ) != null ) {
157     Pattern p = Pattern.compile("^ATOM");
158     Matcher m = p.matcher(line);
159     if (m.find()){
160 duarte 135 // serial atom res_type chain res_ser x y z
161     Pattern pl = Pattern.compile(".{6}(.....).{2}(...).{1}(...).{1}(.)(.{4}).{4}(.{8})(.{8})(.{8})",Pattern.CASE_INSENSITIVE);
162 duarte 127 Matcher ml = pl.matcher(line);
163     if (ml.find()) {
164     int atomserial=Integer.parseInt(ml.group(1).trim());
165     String atom = ml.group(2).trim();
166     String res_type = ml.group(3).trim();
167 duarte 135 chain = ml.group(4).trim();
168     int res_serial = Integer.parseInt(ml.group(5).trim());
169     double x = Double.parseDouble(ml.group(6).trim());
170     double y = Double.parseDouble(ml.group(7).trim());
171     double z = Double.parseDouble(ml.group(8).trim());
172 duarte 127 Double[] coords = {x, y, z};
173     ArrayList<String> aalist=AA.aas();
174     if (aalist.contains(res_type)) {
175     atomser2coord.put(atomserial, coords);
176     atomser2resser.put(atomserial, res_serial);
177     resser2restype.put(res_serial, res_type);
178     ArrayList<String> atomlist = aas2atoms.get(res_type);
179     if (atomlist.contains(atom)){
180     resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
181     }
182     }
183     }
184     }
185     }
186     fpdb.close();
187     // now we read the sequence from the resser2restype HashMap
188     // NOTE: we must make sure elsewhere that there are no unobserved residues, we can't check that here!
189     ArrayList<Integer> ressers = new ArrayList<Integer>();
190     for (int resser:resser2restype.keySet()) {
191     ressers.add(resser);
192     }
193     Collections.sort(ressers);
194     sequence="";
195     for (int resser:ressers){
196     String oneletter = AA.threeletter2oneletter(resser2restype.get(resser));
197     sequence += oneletter;
198     }
199     // finally we set accode and chaincode to unknown
200     //TODO: we should parse accode and chaincode from appropriate fields in pdb file,
201     // problem: in case of a non-original pdb file there won't be accession code
202     accode="?";
203     chaincode="?";
204     }
205    
206 duarte 123 public void dump2pdbfile(String outfile) throws IOException {
207     String chainstr=chain;
208     if (chain.equals("NULL")){
209     chainstr="A";
210     }
211 duarte 130 TreeMap<Integer,Object[]> lines = new TreeMap<Integer,Object[]>();
212 duarte 123 PrintStream Out = new PrintStream(new FileOutputStream(outfile));
213     Out.println("HEADER Dumped from "+db+". pdb accession code="+accode+", pdb chain code="+chaincode);
214     for (String resser_atom:resser_atom2atomserial.keySet()){
215     int atomserial = resser_atom2atomserial.get(resser_atom);
216     int res_serial = Integer.parseInt(resser_atom.split("_")[0]);
217     String atom = resser_atom.split("_")[1];
218     String res_type = resser2restype.get(res_serial);
219     Double[] coords = atomser2coord.get(atomserial);
220     Object[] fields = {atomserial, atom, res_type, chainstr, res_serial, coords[0], coords[1], coords[2]};
221 duarte 130 lines.put(atomserial, fields);
222 duarte 123 }
223 duarte 130 for (int atomserial:lines.keySet()){
224     Out.printf("ATOM %5d %3s %3s %1s%4d %8.3f%8.3f%8.3f\n",lines.get(atomserial));
225     }
226 duarte 123 Out.println("END");
227     Out.close();
228     }
229    
230     public void dumpseq(String seqfile) throws IOException {
231     PrintStream Out = new PrintStream(new FileOutputStream(seqfile));
232     Out.println(">"+accode+"_"+chaincode);
233     Out.println(sequence);
234     Out.close();
235     }
236    
237     public int get_length(){
238     return resser2restype.size();
239     }
240    
241     public HashMap<Integer,Double[]> get_coords_for_ct(String ct) {
242     HashMap<Integer,Double[]> coords = new HashMap<Integer,Double[]>();
243     HashMap<String,String[]> restype2atoms = AA.ct2atoms(ct);
244     for (int resser:resser2restype.keySet()){
245     String[] atoms = restype2atoms.get(resser2restype.get(resser));
246     for (String atom:atoms){
247     if (resser_atom2atomserial.containsKey(resser+"_"+atom)){
248     int atomser = resser_atom2atomserial.get(resser+"_"+atom);
249     Double[] coord = atomser2coord.get(atomser);
250     coords.put(atomser, coord);
251     }
252     else if (atom.equals("O") && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){
253     int atomser = resser_atom2atomserial.get(resser+"_"+"OXT");
254     Double[] coord = atomser2coord.get(atomser);
255     coords.put(atomser, coord);
256     }
257     else {
258     System.err.println("Couldn't find "+atom+" atom for resser="+resser+". Continuing without that atom for this resser.");
259     }
260     }
261     }
262     return coords;
263     }
264    
265     public TreeMap<Contact, Double> calculate_dist_matrix(String ct){
266     TreeMap<Contact,Double> dist_matrix = new TreeMap<Contact,Double>();
267     if (!ct.contains("/")){
268     HashMap<Integer,Double[]> coords = get_coords_for_ct(ct);
269     for (int i_atomser:coords.keySet()){
270     for (int j_atomser:coords.keySet()){
271     if (j_atomser>i_atomser) {
272     Contact pair = new Contact(i_atomser,j_atomser);
273     dist_matrix.put(pair, distance(coords.get(i_atomser),coords.get(j_atomser)));
274     }
275     }
276     }
277     } else {
278     String i_ct = ct.split("/")[0];
279     String j_ct = ct.split("/")[1];
280     HashMap<Integer,Double[]> i_coords = get_coords_for_ct(i_ct);
281     HashMap<Integer,Double[]> j_coords = get_coords_for_ct(j_ct);
282     for (int i_atomser:i_coords.keySet()){
283     for (int j_atomser:j_coords.keySet()){
284     if (j_atomser!=i_atomser){
285     Contact pair = new Contact(i_atomser,j_atomser);
286     dist_matrix.put(pair, distance(i_coords.get(i_atomser),j_coords.get(j_atomser)));
287     }
288     }
289     }
290     }
291     return dist_matrix;
292     }
293    
294 duarte 129 /**
295     * Get the contacts for given contact type and cutoff for this Pdb object.
296     * Returns a Graph object with the contacts
297     * The graph is always directional, i.e. in crossed cases xx/yy: i corresponds to xx and j to yy
298     * ct here can be single contact type (e.g. Ca, BB) or crossed (e.g. BB/SC or Ca/Cb)
299     * @param ct
300     * @param cutoff
301     * @return
302     */
303 duarte 123 public Graph get_graph(String ct, double cutoff){
304     TreeMap<Contact,Double> dist_matrix = calculate_dist_matrix(ct);
305     ArrayList<Contact> contacts = new ArrayList<Contact>();
306 duarte 129 // we loop here over all indices of dist_matrix,
307     // we took care already that in symmetric cases (i.e. single contact type, not crossed) we have only one side of the matrix and
308     // in asymmetrical cases (i.e. crossed cases) we have both sides of the matrix
309 duarte 123 for (Contact pair:dist_matrix.keySet()){
310     int i_atomser=pair.i;
311     int j_atomser=pair.j;
312     if (dist_matrix.get(pair)<=cutoff){
313     int i_resser = atomser2resser.get(i_atomser);
314     int j_resser = atomser2resser.get(j_atomser);
315     Contact resser_pair = new Contact(i_resser,j_resser);
316 duarte 129 // for multi-atom models (BB, SC, ALL or BB/SC) we need to make sure that we don't have contacts from residue to itself or that we don't have duplicates
317 duarte 123 if (i_resser!=j_resser && (! contacts.contains(resser_pair))){
318     contacts.add(resser_pair);
319     }
320     }
321     }
322     Collections.sort(contacts);
323 duarte 129 TreeMap<Integer,String> nodes = new TreeMap<Integer,String>();
324     for (int resser:resser2restype.keySet()){
325     nodes.put(resser,resser2restype.get(resser));
326     }
327 duarte 135 // NOTE: we pass to the graph object the chain (internal chain identifier) not the pdb chain code!
328 duarte 129 Graph graph = new Graph (contacts,nodes,sequence,cutoff,ct,accode,chain);
329 duarte 123 return graph;
330     }
331    
332     public Double distance(Double[] coords1, Double[] coords2){
333     return Math.sqrt(Math.pow((coords1[0]-coords2[0]),2)+Math.pow((coords1[1]-coords2[1]),2)+Math.pow((coords1[2]-coords2[2]),2));
334     }
335     }