1 |
package proteinstructure; |
2 |
|
3 |
import java.io.FileOutputStream; |
4 |
import java.io.PrintStream; |
5 |
import java.io.IOException; |
6 |
import java.util.HashMap; |
7 |
import java.util.Locale; |
8 |
import java.util.TreeMap; |
9 |
import java.util.ArrayList; |
10 |
import java.util.Collections; |
11 |
|
12 |
/** |
13 |
* A single chain pdb protein structure |
14 |
* |
15 |
* @author Jose Duarte |
16 |
* Class: Pdb |
17 |
* Package: proteinstructure |
18 |
*/ |
19 |
public abstract class Pdb { |
20 |
|
21 |
protected final static int DEFAULT_MODEL=1; // default model serial (NMR structures) |
22 |
public final static String NONSTANDARD_AA_LETTER="X"; // letter we assign to nonstandard aas to use in sequence |
23 |
|
24 |
protected HashMap<String,Integer> resser_atom2atomserial; // residue serial+atom name (separated by underscore) to atom serials |
25 |
protected HashMap<Integer,String> resser2restype; // residue serial to 3 letter residue type |
26 |
protected HashMap<Integer,Double[]> atomser2coord; // atom serials to 3D coordinates |
27 |
protected HashMap<Integer,Integer> atomser2resser; // atom serials to residue serials |
28 |
protected HashMap<String,Integer> pdbresser2resser; // pdb (author) residue serials (can include insetion codes so they are strings) to internal residue serials |
29 |
protected HashMap<Integer,String> resser2pdbresser; // internal residue serials to pdb (author) residue serials (can include insertion codes so they are strings) |
30 |
|
31 |
protected HashMap<Integer,String> resser2secstruct; // residue serials to secondary structure |
32 |
protected TreeMap<String,Interval> secstruct2resinterval;// secondary structure element to residue serial intervals |
33 |
|
34 |
protected HashMap<String,ArrayList<String>> aas2atoms = AA.getaas2atoms(); // contains atom names for each aminoacid |
35 |
|
36 |
protected String sequence; // full sequence as it appears in SEQRES field |
37 |
protected String pdbCode; |
38 |
// given "external" pdb chain code, i.e. the classic (author's) pdb code ("NULL" if it is blank in original pdb file) |
39 |
protected String pdbChainCode; |
40 |
// Our internal chain identifier: |
41 |
// - in reading from pdbase or from msdsd it will be set to the internal chain id (asym_id field for pdbase, pchain_id for msdsd) |
42 |
// - in reading from pdb file it coincides with pdbChainCode except for "NULL" where we use "A" |
43 |
protected String chainCode; |
44 |
protected int model; // the model serial for NMR structures |
45 |
|
46 |
protected String db; // the db from which we have taken the data (if applies) |
47 |
|
48 |
|
49 |
|
50 |
|
51 |
/** |
52 |
* Dumps coordinate data into a file in pdb format (ATOM lines only) |
53 |
* The chain dumped is the value of the chainCode variable, i.e. our internal chain identifier for Pdb objects |
54 |
* @param outfile |
55 |
* @throws IOException |
56 |
*/ |
57 |
public void dump2pdbfile(String outfile) throws IOException { |
58 |
TreeMap<Integer,Object[]> lines = new TreeMap<Integer,Object[]>(); |
59 |
PrintStream Out = new PrintStream(new FileOutputStream(outfile)); |
60 |
Out.println("HEADER Dumped from "+db+". pdb code="+pdbCode+", chain='"+chainCode+"'"); |
61 |
for (String resser_atom:resser_atom2atomserial.keySet()){ |
62 |
int atomserial = resser_atom2atomserial.get(resser_atom); |
63 |
int res_serial = Integer.parseInt(resser_atom.split("_")[0]); |
64 |
String atom = resser_atom.split("_")[1]; |
65 |
String res_type = resser2restype.get(res_serial); |
66 |
Double[] coords = atomser2coord.get(atomserial); |
67 |
Object[] fields = {atomserial, atom, res_type, chainCode, res_serial, coords[0], coords[1], coords[2]}; |
68 |
lines.put(atomserial, fields); |
69 |
} |
70 |
for (int atomserial:lines.keySet()){ |
71 |
// Local.US is necessary, otherwise java prints the doubles locale-dependant (i.e. with ',' for some locales) |
72 |
Out.printf(Locale.US,"ATOM %5d %3s %3s %1s%4d %8.3f%8.3f%8.3f\n",lines.get(atomserial)); |
73 |
} |
74 |
Out.println("END"); |
75 |
Out.close(); |
76 |
} |
77 |
|
78 |
public void dumpseq(String seqfile) throws IOException { |
79 |
PrintStream Out = new PrintStream(new FileOutputStream(seqfile)); |
80 |
Out.println(">"+pdbCode+"_"+pdbChainCode); |
81 |
Out.println(sequence); |
82 |
Out.close(); |
83 |
} |
84 |
|
85 |
public int get_length(){ |
86 |
return resser2restype.size(); |
87 |
} |
88 |
|
89 |
public HashMap<Integer,Double[]> get_coords_for_ct(String ct) { |
90 |
HashMap<Integer,Double[]> coords = new HashMap<Integer,Double[]>(); |
91 |
HashMap<String,String[]> restype2atoms = AA.ct2atoms(ct); |
92 |
for (int resser:resser2restype.keySet()){ |
93 |
String[] atoms = restype2atoms.get(resser2restype.get(resser)); |
94 |
for (String atom:atoms){ |
95 |
if (resser_atom2atomserial.containsKey(resser+"_"+atom)){ |
96 |
int atomser = resser_atom2atomserial.get(resser+"_"+atom); |
97 |
Double[] coord = atomser2coord.get(atomser); |
98 |
coords.put(atomser, coord); |
99 |
} |
100 |
else if (atom.equals("O") && resser_atom2atomserial.containsKey(resser+"_"+"OXT")){ |
101 |
int atomser = resser_atom2atomserial.get(resser+"_"+"OXT"); |
102 |
Double[] coord = atomser2coord.get(atomser); |
103 |
coords.put(atomser, coord); |
104 |
} |
105 |
else { |
106 |
System.err.println("Couldn't find "+atom+" atom for resser="+resser+". Continuing without that atom for this resser."); |
107 |
} |
108 |
} |
109 |
} |
110 |
return coords; |
111 |
} |
112 |
|
113 |
public HashMap<Contact, Double> calculate_dist_matrix(String ct){ |
114 |
HashMap<Contact,Double> dist_matrix = new HashMap<Contact,Double>(); |
115 |
if (!ct.contains("/")){ |
116 |
HashMap<Integer,Double[]> coords = get_coords_for_ct(ct); |
117 |
for (int i_atomser:coords.keySet()){ |
118 |
for (int j_atomser:coords.keySet()){ |
119 |
if (j_atomser>i_atomser) { |
120 |
Contact pair = new Contact(i_atomser,j_atomser); |
121 |
dist_matrix.put(pair, distance(coords.get(i_atomser),coords.get(j_atomser))); |
122 |
} |
123 |
} |
124 |
} |
125 |
} else { |
126 |
String i_ct = ct.split("/")[0]; |
127 |
String j_ct = ct.split("/")[1]; |
128 |
HashMap<Integer,Double[]> i_coords = get_coords_for_ct(i_ct); |
129 |
HashMap<Integer,Double[]> j_coords = get_coords_for_ct(j_ct); |
130 |
for (int i_atomser:i_coords.keySet()){ |
131 |
for (int j_atomser:j_coords.keySet()){ |
132 |
if (j_atomser!=i_atomser){ |
133 |
Contact pair = new Contact(i_atomser,j_atomser); |
134 |
dist_matrix.put(pair, distance(i_coords.get(i_atomser),j_coords.get(j_atomser))); |
135 |
} |
136 |
} |
137 |
} |
138 |
} |
139 |
return dist_matrix; |
140 |
} |
141 |
|
142 |
/** |
143 |
* Get the contacts for given contact type and cutoff for this Pdb object. |
144 |
* Returns a Graph object with the contacts |
145 |
* The graph is always directional, i.e. in crossed cases xx/yy: i corresponds to xx and j to yy |
146 |
* ct here can be single contact type (e.g. Ca, BB) or crossed (e.g. BB/SC or Ca/Cb) |
147 |
* @param ct |
148 |
* @param cutoff |
149 |
* @return |
150 |
*/ |
151 |
@SuppressWarnings("unchecked") |
152 |
public Graph get_graph(String ct, double cutoff){ |
153 |
HashMap<Contact,Double> dist_matrix = calculate_dist_matrix(ct); |
154 |
ContactList contacts = new ContactList(); |
155 |
// we loop here over all indices of dist_matrix, |
156 |
// we took care already that in symmetric cases (i.e. single contact type, not crossed) we have only one side of the matrix and |
157 |
// in asymmetrical cases (i.e. crossed cases) we have both sides of the matrix |
158 |
for (Contact pair:dist_matrix.keySet()){ |
159 |
int i_atomser=pair.i; |
160 |
int j_atomser=pair.j; |
161 |
if (dist_matrix.get(pair)<=cutoff){ |
162 |
int i_resser = atomser2resser.get(i_atomser); |
163 |
int j_resser = atomser2resser.get(j_atomser); |
164 |
Contact resser_pair = new Contact(i_resser,j_resser); |
165 |
// for multi-atom models (BB, SC, ALL or BB/SC) we need to make sure that we don't have contacts from residue to itself or that we don't have duplicates |
166 |
if (i_resser!=j_resser && (! contacts.contains(resser_pair))){ |
167 |
contacts.add(resser_pair); |
168 |
} |
169 |
} |
170 |
} |
171 |
Collections.sort(contacts); |
172 |
TreeMap<Integer,String> nodes = new TreeMap<Integer,String>(); |
173 |
for (int resser:resser2restype.keySet()){ |
174 |
nodes.put(resser,resser2restype.get(resser)); |
175 |
} |
176 |
Graph graph = new Graph (contacts,nodes,sequence,cutoff,ct,pdbCode,chainCode,pdbChainCode); |
177 |
return graph; |
178 |
} |
179 |
|
180 |
private Double distance(Double[] coords1, Double[] coords2){ |
181 |
return Math.sqrt(Math.pow((coords1[0]-coords2[0]),2)+Math.pow((coords1[1]-coords2[1]),2)+Math.pow((coords1[2]-coords2[2]),2)); |
182 |
} |
183 |
|
184 |
public int get_resser_from_pdbresser (String pdbresser){ |
185 |
return pdbresser2resser.get(pdbresser); |
186 |
} |
187 |
|
188 |
public String get_pdbresser_from_resser (int resser){ |
189 |
return resser2pdbresser.get(resser); |
190 |
} |
191 |
|
192 |
public int get_resser_from_atomser(int atomser){ |
193 |
return atomser2resser.get(atomser); |
194 |
} |
195 |
|
196 |
public String getChainCode(){ |
197 |
return this.chainCode; |
198 |
} |
199 |
|
200 |
public String getPdbChainCode(){ |
201 |
return this.pdbChainCode; |
202 |
} |
203 |
|
204 |
public String getSecStructure(int resser){ |
205 |
return this.resser2secstruct.get(resser); |
206 |
} |
207 |
|
208 |
public TreeMap<String,Interval> getAllSecStructElements(){ |
209 |
return secstruct2resinterval; |
210 |
} |
211 |
} |