ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Pdb.java
Revision: 208
Committed: Wed Jun 27 14:42:12 2007 UTC (17 years, 3 months ago) by duarte
File size: 7922 byte(s)
Log Message:
FIXED BUGS:
- directed was not set when reading from cm file
- pdbChainCode not set when reading from db given pdbCode
- filling pdbresser2resser and resser2pdbresser hashmaps also in reading from pdb file
- using "A" as chainCode when reading from pdb file
- some error handling for file formats: new exception classes GraphFileFormatError and PdbfileFormatError
Line User Rev File contents
1 duarte 123 package proteinstructure;
2    
3     import java.io.FileOutputStream;
4     import java.io.PrintStream;
5     import java.io.IOException;
6     import java.util.HashMap;
7     import java.util.TreeMap;
8     import java.util.ArrayList;
9     import java.util.Collections;
10    
11 duarte 207 /**
12     * A single chain pdb protein structure
13     *
14     * @author Jose Duarte
15     * Class: Pdb
16     * Package: proteinstructure
17     */
18     public abstract class Pdb {
19 duarte 123
20 duarte 208 protected final static int DEFAULT_MODEL=1; // default model serial (NMR structures)
21     public final static String NONSTANDARD_AA_LETTER="X"; // letter we assign to nonstandard aas to use in sequence
22 duarte 153
23 duarte 208 protected HashMap<String,Integer> resser_atom2atomserial; // residue serial+atom name (separated by underscore) to atom serials
24     protected HashMap<Integer,String> resser2restype; // residue serial to 3 letter residue type
25     protected HashMap<Integer,Double[]> atomser2coord; // atom serials to 3D coordinates
26     protected HashMap<Integer,Integer> atomser2resser; // atom serials to residue serials
27     protected HashMap<String,Integer> pdbresser2resser; // pdb (author) residue serials (can include insetion codes so they are strings) to internal residue serials
28     protected HashMap<Integer,String> resser2pdbresser; // internal residue serials to pdb (author) residue serials (can include insertion codes so they are strings)
29 duarte 190
30 duarte 208 protected HashMap<String,ArrayList<String>> aas2atoms = AA.getaas2atoms(); // contains atom names for each aminoacid
31 duarte 135
32 duarte 208 protected String sequence; // full sequence as it appears in SEQRES field
33     protected String pdbCode;
34     // given "external" pdb chain code, i.e. the classic (author's) pdb code ("NULL" if it is blank in original pdb file)
35     protected String pdbChainCode;
36     // Our internal chain identifier:
37 duarte 135 // - in reading from pdbase or from msdsd it will be set to the internal chain id (asym_id field for pdbase, pchain_id for msdsd)
38 duarte 208 // - in reading from pdb file it coincides with pdbChainCode except for "NULL" where we use "A"
39 duarte 207 protected String chainCode;
40 duarte 208 protected int model; // the model serial for NMR structures
41 duarte 123
42 duarte 208 protected String db; // the db from which we have taken the data (if applies)
43 duarte 153
44 duarte 208
45    
46 duarte 133
47 duarte 153 /**
48     * Dumps coordinate data into a file in pdb format (ATOM lines only)
49 duarte 206 * The chain dumped is the value of the chainCode variable, i.e. our internal chain identifier for Pdb objects
50 duarte 153 * @param outfile
51     * @throws IOException
52     */
53 duarte 123 public void dump2pdbfile(String outfile) throws IOException {
54 duarte 130 TreeMap<Integer,Object[]> lines = new TreeMap<Integer,Object[]>();
55 duarte 123 PrintStream Out = new PrintStream(new FileOutputStream(outfile));
56 duarte 206 Out.println("HEADER Dumped from "+db+". pdb code="+pdbCode+", chain='"+chainCode+"'");
57 duarte 123 for (String resser_atom:resser_atom2atomserial.keySet()){
58     int atomserial = resser_atom2atomserial.get(resser_atom);
59     int res_serial = Integer.parseInt(resser_atom.split("_")[0]);
60     String atom = resser_atom.split("_")[1];
61     String res_type = resser2restype.get(res_serial);
62     Double[] coords = atomser2coord.get(atomserial);
63 duarte 206 Object[] fields = {atomserial, atom, res_type, chainCode, res_serial, coords[0], coords[1], coords[2]};
64 duarte 130 lines.put(atomserial, fields);
65 duarte 123 }
66 duarte 130 for (int atomserial:lines.keySet()){
67     Out.printf("ATOM %5d %3s %3s %1s%4d %8.3f%8.3f%8.3f\n",lines.get(atomserial));
68     }
69 duarte 123 Out.println("END");
70     Out.close();
71     }
72    
73     public void dumpseq(String seqfile) throws IOException {
74     PrintStream Out = new PrintStream(new FileOutputStream(seqfile));
75 duarte 206 Out.println(">"+pdbCode+"_"+pdbChainCode);
76 duarte 123 Out.println(sequence);
77     Out.close();
78     }
79    
80     public int get_length(){
81     return resser2restype.size();
82     }
83    
84     public HashMap<Integer,Double[]> get_coords_for_ct(String ct) {
85     HashMap<Integer,Double[]> coords = new HashMap<Integer,Double[]>();
86     HashMap<String,String[]> restype2atoms = AA.ct2atoms(ct);
87     for (int resser:resser2restype.keySet()){
88     String[] atoms = restype2atoms.get(resser2restype.get(resser));
89     for (String atom:atoms){
90     if (resser_atom2atomserial.containsKey(resser+"_"+atom)){
91     int atomser = resser_atom2atomserial.get(resser+"_"+atom);
92     Double[] coord = atomser2coord.get(atomser);
93     coords.put(atomser, coord);
94     }
95     else if (atom.equals("O") && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){
96     int atomser = resser_atom2atomserial.get(resser+"_"+"OXT");
97     Double[] coord = atomser2coord.get(atomser);
98     coords.put(atomser, coord);
99     }
100     else {
101     System.err.println("Couldn't find "+atom+" atom for resser="+resser+". Continuing without that atom for this resser.");
102     }
103     }
104     }
105     return coords;
106     }
107    
108     public TreeMap<Contact, Double> calculate_dist_matrix(String ct){
109     TreeMap<Contact,Double> dist_matrix = new TreeMap<Contact,Double>();
110     if (!ct.contains("/")){
111     HashMap<Integer,Double[]> coords = get_coords_for_ct(ct);
112     for (int i_atomser:coords.keySet()){
113     for (int j_atomser:coords.keySet()){
114     if (j_atomser>i_atomser) {
115     Contact pair = new Contact(i_atomser,j_atomser);
116     dist_matrix.put(pair, distance(coords.get(i_atomser),coords.get(j_atomser)));
117     }
118     }
119     }
120     } else {
121     String i_ct = ct.split("/")[0];
122     String j_ct = ct.split("/")[1];
123     HashMap<Integer,Double[]> i_coords = get_coords_for_ct(i_ct);
124     HashMap<Integer,Double[]> j_coords = get_coords_for_ct(j_ct);
125     for (int i_atomser:i_coords.keySet()){
126     for (int j_atomser:j_coords.keySet()){
127     if (j_atomser!=i_atomser){
128     Contact pair = new Contact(i_atomser,j_atomser);
129     dist_matrix.put(pair, distance(i_coords.get(i_atomser),j_coords.get(j_atomser)));
130     }
131     }
132     }
133     }
134     return dist_matrix;
135     }
136    
137 duarte 129 /**
138     * Get the contacts for given contact type and cutoff for this Pdb object.
139     * Returns a Graph object with the contacts
140     * The graph is always directional, i.e. in crossed cases xx/yy: i corresponds to xx and j to yy
141     * ct here can be single contact type (e.g. Ca, BB) or crossed (e.g. BB/SC or Ca/Cb)
142     * @param ct
143     * @param cutoff
144     * @return
145     */
146 duarte 190 @SuppressWarnings("unchecked")
147 duarte 123 public Graph get_graph(String ct, double cutoff){
148     TreeMap<Contact,Double> dist_matrix = calculate_dist_matrix(ct);
149 duarte 175 ContactList contacts = new ContactList();
150 duarte 129 // we loop here over all indices of dist_matrix,
151     // we took care already that in symmetric cases (i.e. single contact type, not crossed) we have only one side of the matrix and
152     // in asymmetrical cases (i.e. crossed cases) we have both sides of the matrix
153 duarte 123 for (Contact pair:dist_matrix.keySet()){
154     int i_atomser=pair.i;
155     int j_atomser=pair.j;
156     if (dist_matrix.get(pair)<=cutoff){
157     int i_resser = atomser2resser.get(i_atomser);
158     int j_resser = atomser2resser.get(j_atomser);
159     Contact resser_pair = new Contact(i_resser,j_resser);
160 duarte 129 // for multi-atom models (BB, SC, ALL or BB/SC) we need to make sure that we don't have contacts from residue to itself or that we don't have duplicates
161 duarte 123 if (i_resser!=j_resser && (! contacts.contains(resser_pair))){
162     contacts.add(resser_pair);
163     }
164     }
165     }
166     Collections.sort(contacts);
167 duarte 129 TreeMap<Integer,String> nodes = new TreeMap<Integer,String>();
168     for (int resser:resser2restype.keySet()){
169     nodes.put(resser,resser2restype.get(resser));
170     }
171 duarte 206 Graph graph = new Graph (contacts,nodes,sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
172 duarte 123 return graph;
173     }
174    
175 duarte 206 private Double distance(Double[] coords1, Double[] coords2){
176 duarte 123 return Math.sqrt(Math.pow((coords1[0]-coords2[0]),2)+Math.pow((coords1[1]-coords2[1]),2)+Math.pow((coords1[2]-coords2[2]),2));
177     }
178 duarte 190
179     public int get_resser_from_pdbresser (String pdbresser){
180     return pdbresser2resser.get(pdbresser);
181     }
182    
183     public String get_pdbresser_from_resser (int resser){
184     return resser2pdbresser.get(resser);
185     }
186    
187 duarte 200 public int get_resser_from_atomser(int atomser){
188     return atomser2resser.get(atomser);
189     }
190 duarte 206
191     public String getChainCode(){
192     return this.chainCode;
193     }
194    
195     public String getPdbChainCode(){
196     return this.pdbChainCode;
197     }
198 duarte 123 }