ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/Pdb.java
Revision: 216
Committed: Fri Jun 29 08:30:55 2007 UTC (17 years, 3 months ago) by duarte
File size: 8070 byte(s)
Log Message:
Fixed bug: ATOM lines printing was locale dependant, thus was printing ',' for decimal separators in some locales
Line User Rev File contents
1 duarte 123 package proteinstructure;
2    
3     import java.io.FileOutputStream;
4     import java.io.PrintStream;
5     import java.io.IOException;
6     import java.util.HashMap;
7 duarte 216 import java.util.Locale;
8 duarte 123 import java.util.TreeMap;
9     import java.util.ArrayList;
10     import java.util.Collections;
11    
12 duarte 207 /**
13     * A single chain pdb protein structure
14     *
15     * @author Jose Duarte
16     * Class: Pdb
17     * Package: proteinstructure
18     */
19     public abstract class Pdb {
20 duarte 123
21 duarte 208 protected final static int DEFAULT_MODEL=1; // default model serial (NMR structures)
22     public final static String NONSTANDARD_AA_LETTER="X"; // letter we assign to nonstandard aas to use in sequence
23 duarte 153
24 duarte 208 protected HashMap<String,Integer> resser_atom2atomserial; // residue serial+atom name (separated by underscore) to atom serials
25     protected HashMap<Integer,String> resser2restype; // residue serial to 3 letter residue type
26     protected HashMap<Integer,Double[]> atomser2coord; // atom serials to 3D coordinates
27     protected HashMap<Integer,Integer> atomser2resser; // atom serials to residue serials
28     protected HashMap<String,Integer> pdbresser2resser; // pdb (author) residue serials (can include insetion codes so they are strings) to internal residue serials
29     protected HashMap<Integer,String> resser2pdbresser; // internal residue serials to pdb (author) residue serials (can include insertion codes so they are strings)
30 duarte 190
31 duarte 208 protected HashMap<String,ArrayList<String>> aas2atoms = AA.getaas2atoms(); // contains atom names for each aminoacid
32 duarte 135
33 duarte 208 protected String sequence; // full sequence as it appears in SEQRES field
34     protected String pdbCode;
35     // given "external" pdb chain code, i.e. the classic (author's) pdb code ("NULL" if it is blank in original pdb file)
36     protected String pdbChainCode;
37     // Our internal chain identifier:
38 duarte 135 // - in reading from pdbase or from msdsd it will be set to the internal chain id (asym_id field for pdbase, pchain_id for msdsd)
39 duarte 208 // - in reading from pdb file it coincides with pdbChainCode except for "NULL" where we use "A"
40 duarte 207 protected String chainCode;
41 duarte 208 protected int model; // the model serial for NMR structures
42 duarte 123
43 duarte 208 protected String db; // the db from which we have taken the data (if applies)
44 duarte 153
45 duarte 208
46    
47 duarte 133
48 duarte 153 /**
49     * Dumps coordinate data into a file in pdb format (ATOM lines only)
50 duarte 206 * The chain dumped is the value of the chainCode variable, i.e. our internal chain identifier for Pdb objects
51 duarte 153 * @param outfile
52     * @throws IOException
53     */
54 duarte 123 public void dump2pdbfile(String outfile) throws IOException {
55 duarte 130 TreeMap<Integer,Object[]> lines = new TreeMap<Integer,Object[]>();
56 duarte 123 PrintStream Out = new PrintStream(new FileOutputStream(outfile));
57 duarte 206 Out.println("HEADER Dumped from "+db+". pdb code="+pdbCode+", chain='"+chainCode+"'");
58 duarte 123 for (String resser_atom:resser_atom2atomserial.keySet()){
59     int atomserial = resser_atom2atomserial.get(resser_atom);
60     int res_serial = Integer.parseInt(resser_atom.split("_")[0]);
61     String atom = resser_atom.split("_")[1];
62     String res_type = resser2restype.get(res_serial);
63     Double[] coords = atomser2coord.get(atomserial);
64 duarte 206 Object[] fields = {atomserial, atom, res_type, chainCode, res_serial, coords[0], coords[1], coords[2]};
65 duarte 130 lines.put(atomserial, fields);
66 duarte 123 }
67 duarte 130 for (int atomserial:lines.keySet()){
68 duarte 216 // Local.US is necessary, otherwise java prints the doubles locale-dependant (i.e. with ',' for some locales)
69     Out.printf(Locale.US,"ATOM %5d %3s %3s %1s%4d %8.3f%8.3f%8.3f\n",lines.get(atomserial));
70 duarte 130 }
71 duarte 123 Out.println("END");
72     Out.close();
73     }
74    
75     public void dumpseq(String seqfile) throws IOException {
76     PrintStream Out = new PrintStream(new FileOutputStream(seqfile));
77 duarte 206 Out.println(">"+pdbCode+"_"+pdbChainCode);
78 duarte 123 Out.println(sequence);
79     Out.close();
80     }
81    
82     public int get_length(){
83     return resser2restype.size();
84     }
85    
86     public HashMap<Integer,Double[]> get_coords_for_ct(String ct) {
87     HashMap<Integer,Double[]> coords = new HashMap<Integer,Double[]>();
88     HashMap<String,String[]> restype2atoms = AA.ct2atoms(ct);
89     for (int resser:resser2restype.keySet()){
90     String[] atoms = restype2atoms.get(resser2restype.get(resser));
91     for (String atom:atoms){
92     if (resser_atom2atomserial.containsKey(resser+"_"+atom)){
93     int atomser = resser_atom2atomserial.get(resser+"_"+atom);
94     Double[] coord = atomser2coord.get(atomser);
95     coords.put(atomser, coord);
96     }
97     else if (atom.equals("O") && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){
98     int atomser = resser_atom2atomserial.get(resser+"_"+"OXT");
99     Double[] coord = atomser2coord.get(atomser);
100     coords.put(atomser, coord);
101     }
102     else {
103     System.err.println("Couldn't find "+atom+" atom for resser="+resser+". Continuing without that atom for this resser.");
104     }
105     }
106     }
107     return coords;
108     }
109    
110     public TreeMap<Contact, Double> calculate_dist_matrix(String ct){
111     TreeMap<Contact,Double> dist_matrix = new TreeMap<Contact,Double>();
112     if (!ct.contains("/")){
113     HashMap<Integer,Double[]> coords = get_coords_for_ct(ct);
114     for (int i_atomser:coords.keySet()){
115     for (int j_atomser:coords.keySet()){
116     if (j_atomser>i_atomser) {
117     Contact pair = new Contact(i_atomser,j_atomser);
118     dist_matrix.put(pair, distance(coords.get(i_atomser),coords.get(j_atomser)));
119     }
120     }
121     }
122     } else {
123     String i_ct = ct.split("/")[0];
124     String j_ct = ct.split("/")[1];
125     HashMap<Integer,Double[]> i_coords = get_coords_for_ct(i_ct);
126     HashMap<Integer,Double[]> j_coords = get_coords_for_ct(j_ct);
127     for (int i_atomser:i_coords.keySet()){
128     for (int j_atomser:j_coords.keySet()){
129     if (j_atomser!=i_atomser){
130     Contact pair = new Contact(i_atomser,j_atomser);
131     dist_matrix.put(pair, distance(i_coords.get(i_atomser),j_coords.get(j_atomser)));
132     }
133     }
134     }
135     }
136     return dist_matrix;
137     }
138    
139 duarte 129 /**
140     * Get the contacts for given contact type and cutoff for this Pdb object.
141     * Returns a Graph object with the contacts
142     * The graph is always directional, i.e. in crossed cases xx/yy: i corresponds to xx and j to yy
143     * ct here can be single contact type (e.g. Ca, BB) or crossed (e.g. BB/SC or Ca/Cb)
144     * @param ct
145     * @param cutoff
146     * @return
147     */
148 duarte 190 @SuppressWarnings("unchecked")
149 duarte 123 public Graph get_graph(String ct, double cutoff){
150     TreeMap<Contact,Double> dist_matrix = calculate_dist_matrix(ct);
151 duarte 175 ContactList contacts = new ContactList();
152 duarte 129 // we loop here over all indices of dist_matrix,
153     // we took care already that in symmetric cases (i.e. single contact type, not crossed) we have only one side of the matrix and
154     // in asymmetrical cases (i.e. crossed cases) we have both sides of the matrix
155 duarte 123 for (Contact pair:dist_matrix.keySet()){
156     int i_atomser=pair.i;
157     int j_atomser=pair.j;
158     if (dist_matrix.get(pair)<=cutoff){
159     int i_resser = atomser2resser.get(i_atomser);
160     int j_resser = atomser2resser.get(j_atomser);
161     Contact resser_pair = new Contact(i_resser,j_resser);
162 duarte 129 // for multi-atom models (BB, SC, ALL or BB/SC) we need to make sure that we don't have contacts from residue to itself or that we don't have duplicates
163 duarte 123 if (i_resser!=j_resser && (! contacts.contains(resser_pair))){
164     contacts.add(resser_pair);
165     }
166     }
167     }
168     Collections.sort(contacts);
169 duarte 129 TreeMap<Integer,String> nodes = new TreeMap<Integer,String>();
170     for (int resser:resser2restype.keySet()){
171     nodes.put(resser,resser2restype.get(resser));
172     }
173 duarte 206 Graph graph = new Graph (contacts,nodes,sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode);
174 duarte 123 return graph;
175     }
176    
177 duarte 206 private Double distance(Double[] coords1, Double[] coords2){
178 duarte 123 return Math.sqrt(Math.pow((coords1[0]-coords2[0]),2)+Math.pow((coords1[1]-coords2[1]),2)+Math.pow((coords1[2]-coords2[2]),2));
179     }
180 duarte 190
181     public int get_resser_from_pdbresser (String pdbresser){
182     return pdbresser2resser.get(pdbresser);
183     }
184    
185     public String get_pdbresser_from_resser (int resser){
186     return resser2pdbresser.get(resser);
187     }
188    
189 duarte 200 public int get_resser_from_atomser(int atomser){
190     return atomser2resser.get(atomser);
191     }
192 duarte 206
193     public String getChainCode(){
194     return this.chainCode;
195     }
196    
197     public String getPdbChainCode(){
198     return this.pdbChainCode;
199     }
200 duarte 123 }