ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Pdb.java
Revision: 224
Committed: Thu Jul 5 13:42:37 2007 UTC (17 years, 3 months ago) by duarte
File size: 8476 byte(s)
Log Message:
Now calculate distance matrix uses HashMap instead of TreeMap
Needed to change the hashCode function in Contact to make it faster
Line File contents
1 package proteinstructure;
2
3 import java.io.FileOutputStream;
4 import java.io.PrintStream;
5 import java.io.IOException;
6 import java.util.HashMap;
7 import java.util.Locale;
8 import java.util.TreeMap;
9 import java.util.ArrayList;
10 import java.util.Collections;
11
12 /**
13 * A single chain pdb protein structure
14 *
15 * @author Jose Duarte
16 * Class: Pdb
17 * Package: proteinstructure
18 */
19 public abstract class Pdb {
20
21 protected final static int DEFAULT_MODEL=1; // default model serial (NMR structures)
22 public final static String NONSTANDARD_AA_LETTER="X"; // letter we assign to nonstandard aas to use in sequence
23
24 protected HashMap<String,Integer> resser_atom2atomserial; // residue serial+atom name (separated by underscore) to atom serials
25 protected HashMap<Integer,String> resser2restype; // residue serial to 3 letter residue type
26 protected HashMap<Integer,Double[]> atomser2coord; // atom serials to 3D coordinates
27 protected HashMap<Integer,Integer> atomser2resser; // atom serials to residue serials
28 protected HashMap<String,Integer> pdbresser2resser; // pdb (author) residue serials (can include insetion codes so they are strings) to internal residue serials
29 protected HashMap<Integer,String> resser2pdbresser; // internal residue serials to pdb (author) residue serials (can include insertion codes so they are strings)
30
31 protected HashMap<Integer,String> resser2secstruct; // residue serials to secondary structure
32 protected TreeMap<String,Interval> secstruct2resinterval;// secondary structure element to residue serial intervals
33
34 protected HashMap<String,ArrayList<String>> aas2atoms = AA.getaas2atoms(); // contains atom names for each aminoacid
35
36 protected String sequence; // full sequence as it appears in SEQRES field
37 protected String pdbCode;
38 // given "external" pdb chain code, i.e. the classic (author's) pdb code ("NULL" if it is blank in original pdb file)
39 protected String pdbChainCode;
40 // Our internal chain identifier:
41 // - in reading from pdbase or from msdsd it will be set to the internal chain id (asym_id field for pdbase, pchain_id for msdsd)
42 // - in reading from pdb file it coincides with pdbChainCode except for "NULL" where we use "A"
43 protected String chainCode;
44 protected int model; // the model serial for NMR structures
45
46 protected String db; // the db from which we have taken the data (if applies)
47
48
49
50
51 /**
52 * Dumps coordinate data into a file in pdb format (ATOM lines only)
53 * The chain dumped is the value of the chainCode variable, i.e. our internal chain identifier for Pdb objects
54 * @param outfile
55 * @throws IOException
56 */
57 public void dump2pdbfile(String outfile) throws IOException {
58 TreeMap<Integer,Object[]> lines = new TreeMap<Integer,Object[]>();
59 PrintStream Out = new PrintStream(new FileOutputStream(outfile));
60 Out.println("HEADER Dumped from "+db+". pdb code="+pdbCode+", chain='"+chainCode+"'");
61 for (String resser_atom:resser_atom2atomserial.keySet()){
62 int atomserial = resser_atom2atomserial.get(resser_atom);
63 int res_serial = Integer.parseInt(resser_atom.split("_")[0]);
64 String atom = resser_atom.split("_")[1];
65 String res_type = resser2restype.get(res_serial);
66 Double[] coords = atomser2coord.get(atomserial);
67 Object[] fields = {atomserial, atom, res_type, chainCode, res_serial, coords[0], coords[1], coords[2]};
68 lines.put(atomserial, fields);
69 }
70 for (int atomserial:lines.keySet()){
71 // Local.US is necessary, otherwise java prints the doubles locale-dependant (i.e. with ',' for some locales)
72 Out.printf(Locale.US,"ATOM %5d %3s %3s %1s%4d %8.3f%8.3f%8.3f\n",lines.get(atomserial));
73 }
74 Out.println("END");
75 Out.close();
76 }
77
78 public void dumpseq(String seqfile) throws IOException {
79 PrintStream Out = new PrintStream(new FileOutputStream(seqfile));
80 Out.println(">"+pdbCode+"_"+pdbChainCode);
81 Out.println(sequence);
82 Out.close();
83 }
84
85 public int get_length(){
86 return resser2restype.size();
87 }
88
89 public HashMap<Integer,Double[]> get_coords_for_ct(String ct) {
90 HashMap<Integer,Double[]> coords = new HashMap<Integer,Double[]>();
91 HashMap<String,String[]> restype2atoms = AA.ct2atoms(ct);
92 for (int resser:resser2restype.keySet()){
93 String[] atoms = restype2atoms.get(resser2restype.get(resser));
94 for (String atom:atoms){
95 if (resser_atom2atomserial.containsKey(resser+"_"+atom)){
96 int atomser = resser_atom2atomserial.get(resser+"_"+atom);
97 Double[] coord = atomser2coord.get(atomser);
98 coords.put(atomser, coord);
99 }
100 else if (atom.equals("O") && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){
101 int atomser = resser_atom2atomserial.get(resser+"_"+"OXT");
102 Double[] coord = atomser2coord.get(atomser);
103 coords.put(atomser, coord);
104 }
105 else {
106 System.err.println("Couldn't find "+atom+" atom for resser="+resser+". Continuing without that atom for this resser.");
107 }
108 }
109 }
110 return coords;
111 }
112
113 public HashMap<Contact, Double> calculate_dist_matrix(String ct){
114 HashMap<Contact,Double> dist_matrix = new HashMap<Contact,Double>();
115 if (!ct.contains("/")){
116 HashMap<Integer,Double[]> coords = get_coords_for_ct(ct);
117 for (int i_atomser:coords.keySet()){
118 for (int j_atomser:coords.keySet()){
119 if (j_atomser>i_atomser) {
120 Contact pair = new Contact(i_atomser,j_atomser);
121 dist_matrix.put(pair, distance(coords.get(i_atomser),coords.get(j_atomser)));
122 }
123 }
124 }
125 } else {
126 String i_ct = ct.split("/")[0];
127 String j_ct = ct.split("/")[1];
128 HashMap<Integer,Double[]> i_coords = get_coords_for_ct(i_ct);
129 HashMap<Integer,Double[]> j_coords = get_coords_for_ct(j_ct);
130 for (int i_atomser:i_coords.keySet()){
131 for (int j_atomser:j_coords.keySet()){
132 if (j_atomser!=i_atomser){
133 Contact pair = new Contact(i_atomser,j_atomser);
134 dist_matrix.put(pair, distance(i_coords.get(i_atomser),j_coords.get(j_atomser)));
135 }
136 }
137 }
138 }
139 return dist_matrix;
140 }
141
142 /**
143 * Get the contacts for given contact type and cutoff for this Pdb object.
144 * Returns a Graph object with the contacts
145 * The graph is always directional, i.e. in crossed cases xx/yy: i corresponds to xx and j to yy
146 * ct here can be single contact type (e.g. Ca, BB) or crossed (e.g. BB/SC or Ca/Cb)
147 * @param ct
148 * @param cutoff
149 * @return
150 */
151 @SuppressWarnings("unchecked")
152 public Graph get_graph(String ct, double cutoff){
153 HashMap<Contact,Double> dist_matrix = calculate_dist_matrix(ct);
154 ContactList contacts = new ContactList();
155 // we loop here over all indices of dist_matrix,
156 // we took care already that in symmetric cases (i.e. single contact type, not crossed) we have only one side of the matrix and
157 // in asymmetrical cases (i.e. crossed cases) we have both sides of the matrix
158 for (Contact pair:dist_matrix.keySet()){
159 int i_atomser=pair.i;
160 int j_atomser=pair.j;
161 if (dist_matrix.get(pair)<=cutoff){
162 int i_resser = atomser2resser.get(i_atomser);
163 int j_resser = atomser2resser.get(j_atomser);
164 Contact resser_pair = new Contact(i_resser,j_resser);
165 // for multi-atom models (BB, SC, ALL or BB/SC) we need to make sure that we don't have contacts from residue to itself or that we don't have duplicates
166 if (i_resser!=j_resser && (! contacts.contains(resser_pair))){
167 contacts.add(resser_pair);
168 }
169 }
170 }
171 Collections.sort(contacts);
172 TreeMap<Integer,String> nodes = new TreeMap<Integer,String>();
173 for (int resser:resser2restype.keySet()){
174 nodes.put(resser,resser2restype.get(resser));
175 }
176 Graph graph = new Graph (contacts,nodes,sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
177 return graph;
178 }
179
180 private Double distance(Double[] coords1, Double[] coords2){
181 return Math.sqrt(Math.pow((coords1[0]-coords2[0]),2)+Math.pow((coords1[1]-coords2[1]),2)+Math.pow((coords1[2]-coords2[2]),2));
182 }
183
184 public int get_resser_from_pdbresser (String pdbresser){
185 return pdbresser2resser.get(pdbresser);
186 }
187
188 public String get_pdbresser_from_resser (int resser){
189 return resser2pdbresser.get(resser);
190 }
191
192 public int get_resser_from_atomser(int atomser){
193 return atomser2resser.get(atomser);
194 }
195
196 public String getChainCode(){
197 return this.chainCode;
198 }
199
200 public String getPdbChainCode(){
201 return this.pdbChainCode;
202 }
203
204 public String getSecStructure(int resser){
205 return this.resser2secstruct.get(resser);
206 }
207
208 public TreeMap<String,Interval> getAllSecStructElements(){
209 return secstruct2resinterval;
210 }
211 }