ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Pdb.java
Revision: 222
Committed: Wed Jul 4 15:39:01 2007 UTC (17 years, 2 months ago) by duarte
File size: 8476 byte(s)
Log Message:
New secstruct2resinterval TreeMap to store secondary structure elements as a map of ss string ids to intervals
New class Interval
Line User Rev File contents
1 duarte 123 package proteinstructure;
2    
3     import java.io.FileOutputStream;
4     import java.io.PrintStream;
5     import java.io.IOException;
6     import java.util.HashMap;
7 duarte 216 import java.util.Locale;
8 duarte 123 import java.util.TreeMap;
9     import java.util.ArrayList;
10     import java.util.Collections;
11    
12 duarte 207 /**
13     * A single chain pdb protein structure
14     *
15     * @author Jose Duarte
16     * Class: Pdb
17     * Package: proteinstructure
18     */
19     public abstract class Pdb {
20 duarte 123
21 duarte 208 protected final static int DEFAULT_MODEL=1; // default model serial (NMR structures)
22     public final static String NONSTANDARD_AA_LETTER="X"; // letter we assign to nonstandard aas to use in sequence
23 duarte 153
24 duarte 208 protected HashMap<String,Integer> resser_atom2atomserial; // residue serial+atom name (separated by underscore) to atom serials
25     protected HashMap<Integer,String> resser2restype; // residue serial to 3 letter residue type
26     protected HashMap<Integer,Double[]> atomser2coord; // atom serials to 3D coordinates
27     protected HashMap<Integer,Integer> atomser2resser; // atom serials to residue serials
28     protected HashMap<String,Integer> pdbresser2resser; // pdb (author) residue serials (can include insetion codes so they are strings) to internal residue serials
29     protected HashMap<Integer,String> resser2pdbresser; // internal residue serials to pdb (author) residue serials (can include insertion codes so they are strings)
30 duarte 190
31 duarte 222 protected HashMap<Integer,String> resser2secstruct; // residue serials to secondary structure
32     protected TreeMap<String,Interval> secstruct2resinterval;// secondary structure element to residue serial intervals
33 duarte 219
34 duarte 208 protected HashMap<String,ArrayList<String>> aas2atoms = AA.getaas2atoms(); // contains atom names for each aminoacid
35 duarte 135
36 duarte 208 protected String sequence; // full sequence as it appears in SEQRES field
37     protected String pdbCode;
38     // given "external" pdb chain code, i.e. the classic (author's) pdb code ("NULL" if it is blank in original pdb file)
39     protected String pdbChainCode;
40     // Our internal chain identifier:
41 duarte 135 // - in reading from pdbase or from msdsd it will be set to the internal chain id (asym_id field for pdbase, pchain_id for msdsd)
42 duarte 208 // - in reading from pdb file it coincides with pdbChainCode except for "NULL" where we use "A"
43 duarte 207 protected String chainCode;
44 duarte 208 protected int model; // the model serial for NMR structures
45 duarte 123
46 duarte 208 protected String db; // the db from which we have taken the data (if applies)
47 duarte 153
48 duarte 208
49    
50 duarte 133
51 duarte 153 /**
52     * Dumps coordinate data into a file in pdb format (ATOM lines only)
53 duarte 206 * The chain dumped is the value of the chainCode variable, i.e. our internal chain identifier for Pdb objects
54 duarte 153 * @param outfile
55     * @throws IOException
56     */
57 duarte 123 public void dump2pdbfile(String outfile) throws IOException {
58 duarte 130 TreeMap<Integer,Object[]> lines = new TreeMap<Integer,Object[]>();
59 duarte 123 PrintStream Out = new PrintStream(new FileOutputStream(outfile));
60 duarte 206 Out.println("HEADER Dumped from "+db+". pdb code="+pdbCode+", chain='"+chainCode+"'");
61 duarte 123 for (String resser_atom:resser_atom2atomserial.keySet()){
62     int atomserial = resser_atom2atomserial.get(resser_atom);
63     int res_serial = Integer.parseInt(resser_atom.split("_")[0]);
64     String atom = resser_atom.split("_")[1];
65     String res_type = resser2restype.get(res_serial);
66     Double[] coords = atomser2coord.get(atomserial);
67 duarte 206 Object[] fields = {atomserial, atom, res_type, chainCode, res_serial, coords[0], coords[1], coords[2]};
68 duarte 130 lines.put(atomserial, fields);
69 duarte 123 }
70 duarte 130 for (int atomserial:lines.keySet()){
71 duarte 216 // Local.US is necessary, otherwise java prints the doubles locale-dependant (i.e. with ',' for some locales)
72     Out.printf(Locale.US,"ATOM %5d %3s %3s %1s%4d %8.3f%8.3f%8.3f\n",lines.get(atomserial));
73 duarte 130 }
74 duarte 123 Out.println("END");
75     Out.close();
76     }
77    
78     public void dumpseq(String seqfile) throws IOException {
79     PrintStream Out = new PrintStream(new FileOutputStream(seqfile));
80 duarte 206 Out.println(">"+pdbCode+"_"+pdbChainCode);
81 duarte 123 Out.println(sequence);
82     Out.close();
83     }
84    
85     public int get_length(){
86     return resser2restype.size();
87     }
88    
89     public HashMap<Integer,Double[]> get_coords_for_ct(String ct) {
90     HashMap<Integer,Double[]> coords = new HashMap<Integer,Double[]>();
91     HashMap<String,String[]> restype2atoms = AA.ct2atoms(ct);
92     for (int resser:resser2restype.keySet()){
93     String[] atoms = restype2atoms.get(resser2restype.get(resser));
94     for (String atom:atoms){
95     if (resser_atom2atomserial.containsKey(resser+"_"+atom)){
96     int atomser = resser_atom2atomserial.get(resser+"_"+atom);
97     Double[] coord = atomser2coord.get(atomser);
98     coords.put(atomser, coord);
99     }
100     else if (atom.equals("O") && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){
101     int atomser = resser_atom2atomserial.get(resser+"_"+"OXT");
102     Double[] coord = atomser2coord.get(atomser);
103     coords.put(atomser, coord);
104     }
105     else {
106     System.err.println("Couldn't find "+atom+" atom for resser="+resser+". Continuing without that atom for this resser.");
107     }
108     }
109     }
110     return coords;
111     }
112    
113     public TreeMap<Contact, Double> calculate_dist_matrix(String ct){
114     TreeMap<Contact,Double> dist_matrix = new TreeMap<Contact,Double>();
115     if (!ct.contains("/")){
116     HashMap<Integer,Double[]> coords = get_coords_for_ct(ct);
117     for (int i_atomser:coords.keySet()){
118     for (int j_atomser:coords.keySet()){
119     if (j_atomser>i_atomser) {
120     Contact pair = new Contact(i_atomser,j_atomser);
121     dist_matrix.put(pair, distance(coords.get(i_atomser),coords.get(j_atomser)));
122     }
123     }
124     }
125     } else {
126     String i_ct = ct.split("/")[0];
127     String j_ct = ct.split("/")[1];
128     HashMap<Integer,Double[]> i_coords = get_coords_for_ct(i_ct);
129     HashMap<Integer,Double[]> j_coords = get_coords_for_ct(j_ct);
130     for (int i_atomser:i_coords.keySet()){
131     for (int j_atomser:j_coords.keySet()){
132     if (j_atomser!=i_atomser){
133     Contact pair = new Contact(i_atomser,j_atomser);
134     dist_matrix.put(pair, distance(i_coords.get(i_atomser),j_coords.get(j_atomser)));
135     }
136     }
137     }
138     }
139     return dist_matrix;
140     }
141    
142 duarte 129 /**
143     * Get the contacts for given contact type and cutoff for this Pdb object.
144     * Returns a Graph object with the contacts
145     * The graph is always directional, i.e. in crossed cases xx/yy: i corresponds to xx and j to yy
146     * ct here can be single contact type (e.g. Ca, BB) or crossed (e.g. BB/SC or Ca/Cb)
147     * @param ct
148     * @param cutoff
149     * @return
150     */
151 duarte 190 @SuppressWarnings("unchecked")
152 duarte 123 public Graph get_graph(String ct, double cutoff){
153     TreeMap<Contact,Double> dist_matrix = calculate_dist_matrix(ct);
154 duarte 175 ContactList contacts = new ContactList();
155 duarte 129 // we loop here over all indices of dist_matrix,
156     // we took care already that in symmetric cases (i.e. single contact type, not crossed) we have only one side of the matrix and
157     // in asymmetrical cases (i.e. crossed cases) we have both sides of the matrix
158 duarte 123 for (Contact pair:dist_matrix.keySet()){
159     int i_atomser=pair.i;
160     int j_atomser=pair.j;
161     if (dist_matrix.get(pair)<=cutoff){
162     int i_resser = atomser2resser.get(i_atomser);
163     int j_resser = atomser2resser.get(j_atomser);
164     Contact resser_pair = new Contact(i_resser,j_resser);
165 duarte 129 // for multi-atom models (BB, SC, ALL or BB/SC) we need to make sure that we don't have contacts from residue to itself or that we don't have duplicates
166 duarte 123 if (i_resser!=j_resser && (! contacts.contains(resser_pair))){
167     contacts.add(resser_pair);
168     }
169     }
170     }
171     Collections.sort(contacts);
172 duarte 129 TreeMap<Integer,String> nodes = new TreeMap<Integer,String>();
173     for (int resser:resser2restype.keySet()){
174     nodes.put(resser,resser2restype.get(resser));
175     }
176 duarte 206 Graph graph = new Graph (contacts,nodes,sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
177 duarte 123 return graph;
178     }
179    
180 duarte 206 private Double distance(Double[] coords1, Double[] coords2){
181 duarte 123 return Math.sqrt(Math.pow((coords1[0]-coords2[0]),2)+Math.pow((coords1[1]-coords2[1]),2)+Math.pow((coords1[2]-coords2[2]),2));
182     }
183 duarte 190
184     public int get_resser_from_pdbresser (String pdbresser){
185     return pdbresser2resser.get(pdbresser);
186     }
187    
188     public String get_pdbresser_from_resser (int resser){
189     return resser2pdbresser.get(resser);
190     }
191    
192 duarte 200 public int get_resser_from_atomser(int atomser){
193     return atomser2resser.get(atomser);
194     }
195 duarte 206
196     public String getChainCode(){
197     return this.chainCode;
198     }
199    
200     public String getPdbChainCode(){
201     return this.pdbChainCode;
202     }
203 duarte 219
204     public String getSecStructure(int resser){
205     return this.resser2secstruct.get(resser);
206     }
207 duarte 222
208     public TreeMap<String,Interval> getAllSecStructElements(){
209     return secstruct2resinterval;
210     }
211 duarte 123 }