1 |
duarte |
306 |
package proteinstructure; |
2 |
|
|
|
3 |
|
|
import java.io.BufferedReader; |
4 |
|
|
import java.io.File; |
5 |
|
|
|
6 |
|
|
import java.io.FileReader; |
7 |
|
|
import java.io.IOException; |
8 |
|
|
import java.util.ArrayList; |
9 |
|
|
import java.util.Collections; |
10 |
|
|
import java.util.HashMap; |
11 |
|
|
import java.util.TreeMap; |
12 |
|
|
import java.util.TreeSet; |
13 |
|
|
import java.util.regex.Matcher; |
14 |
|
|
import java.util.regex.Pattern; |
15 |
|
|
|
16 |
|
|
import javax.vecmath.Point3d; |
17 |
|
|
|
18 |
|
|
|
19 |
|
|
/** |
20 |
|
|
* A single chain pdb protein structure loaded from a mmCIF file |
21 |
|
|
* |
22 |
|
|
* @author Jose Duarte |
23 |
|
|
* Class: CiffilePdb |
24 |
|
|
* Package: proteinstructure |
25 |
|
|
*/ |
26 |
|
|
public class CiffilePdb extends Pdb { |
27 |
|
|
|
28 |
|
|
|
29 |
|
|
private String ciffile; |
30 |
duarte |
307 |
|
31 |
|
|
// fields we will read |
32 |
|
|
private static final String entryId = "_entry"; |
33 |
|
|
private static final String atomSiteId = "_atom_site"; |
34 |
|
|
private static final String atomSitesAltId = "_atom_sites_alt"; |
35 |
|
|
private static final String pdbxPolySeqId = "_pdbx_poly_seq_scheme"; |
36 |
|
|
private static final String structConfId = "_struct_conf"; |
37 |
|
|
private static final String structSheetId = "_struct_sheet_range"; |
38 |
|
|
private static final String[] ids = {entryId,atomSitesAltId,atomSiteId,pdbxPolySeqId,structConfId,structSheetId}; |
39 |
duarte |
306 |
|
40 |
|
|
private TreeMap<String,Integer> ids2elements; // map of ids to element serials |
41 |
duarte |
307 |
private TreeMap<String,String> fields2values; // map of field names (id.field) to values (for non-loop elements) |
42 |
|
|
private TreeMap<String,Integer> fields2indices; // map of field names (id.field) to index (for loop elements) |
43 |
duarte |
306 |
private TreeSet<Integer> loopElements; // contains list of elements that are of loop type |
44 |
|
|
private TreeMap<Integer,Interval> loopelements2contentIndex; // begin and end line index of each loop element |
45 |
duarte |
309 |
|
46 |
|
|
private String altLoc; |
47 |
duarte |
307 |
|
48 |
duarte |
306 |
/** |
49 |
|
|
* Constructs Pdb object given pdb code and pdb chain code. |
50 |
|
|
* Model will be DEFAULT_MODEL |
51 |
|
|
* @param ciffile |
52 |
|
|
* @param pdbChainCode |
53 |
|
|
* @throws PdbChainCodeNotFoundError |
54 |
|
|
* @throws IOException |
55 |
|
|
*/ |
56 |
|
|
public CiffilePdb (String ciffile, String pdbChainCode) throws PdbChainCodeNotFoundError, IOException { |
57 |
|
|
this(ciffile, pdbChainCode, DEFAULT_MODEL); |
58 |
|
|
} |
59 |
|
|
|
60 |
|
|
/** |
61 |
|
|
* Constructs Pdb object given pdb code, pdb chain code, model serial, source db and a MySQLConnection. |
62 |
|
|
* The db must be a pdbase database |
63 |
|
|
* @param ciffile |
64 |
|
|
* @param pdbChainCode |
65 |
|
|
* @param model_serial |
66 |
|
|
* @throws PdbChainCodeNotFoundError |
67 |
|
|
* @throws IOException |
68 |
|
|
*/ |
69 |
|
|
public CiffilePdb (String ciffile, String pdbChainCode, int model_serial) throws PdbChainCodeNotFoundError, IOException { |
70 |
|
|
this.ciffile = ciffile; |
71 |
|
|
this.pdbChainCode=pdbChainCode.toUpperCase(); // our convention: chain codes are upper case |
72 |
|
|
this.model=model_serial; |
73 |
|
|
|
74 |
|
|
parseCifFile(); |
75 |
|
|
|
76 |
|
|
this.pdbCode = readPdbCode(); |
77 |
|
|
|
78 |
duarte |
309 |
readAtomAltLocs(); // sets altLoc String (needed in parseLoopElements to get the right alt atom locations in reading atom_site) |
79 |
|
|
|
80 |
duarte |
306 |
secondaryStructure = new SecondaryStructure(); // create empty secondary structure first to make sure object is not null |
81 |
|
|
|
82 |
|
|
parseLoopElements(); // populates resser_atom2atomserial, resser2restype, atomser2coord, atomser2resser, sequence, pdbresser2resser, secondaryStructure |
83 |
|
|
|
84 |
|
|
this.fullLength = sequence.length(); |
85 |
|
|
|
86 |
|
|
this.obsLength = resser2restype.size(); |
87 |
|
|
|
88 |
|
|
if(!secondaryStructure.isEmpty()) { |
89 |
|
|
secondaryStructure.setComment("CIFfile"); |
90 |
|
|
} |
91 |
|
|
|
92 |
|
|
// we initialise resser2pdbresser from the pdbresser2resser HashMap |
93 |
|
|
this.resser2pdbresser = new HashMap<Integer, String>(); |
94 |
|
|
for (String pdbresser:pdbresser2resser.keySet()){ |
95 |
|
|
resser2pdbresser.put(pdbresser2resser.get(pdbresser), pdbresser); |
96 |
|
|
} |
97 |
|
|
|
98 |
|
|
// initialising atomser2atom from resser_atom2atomserial |
99 |
|
|
atomser2atom = new HashMap<Integer, String>(); |
100 |
|
|
for (String resser_atom:resser_atom2atomserial.keySet()){ |
101 |
|
|
int atomserial = resser_atom2atomserial.get(resser_atom); |
102 |
|
|
String atom = resser_atom.split("_")[1]; |
103 |
|
|
atomser2atom.put(atomserial,atom); |
104 |
|
|
} |
105 |
|
|
} |
106 |
|
|
|
107 |
|
|
private void parseCifFile() throws IOException{ |
108 |
|
|
// data structures to store the parsed fields |
109 |
|
|
ids2elements = new TreeMap<String, Integer>(); |
110 |
duarte |
307 |
fields2indices = new TreeMap<String,Integer>(); |
111 |
duarte |
306 |
fields2values = new TreeMap<String, String>(); |
112 |
|
|
loopElements = new TreeSet<Integer>(); // contains list of elements that are of loop type |
113 |
|
|
loopelements2contentIndex = new TreeMap<Integer,Interval>(); |
114 |
duarte |
307 |
TreeMap<String,Integer> fieldsIdx = new TreeMap<String,Integer>(); // this map holds the field index counters for each element id |
115 |
duarte |
306 |
|
116 |
|
|
BufferedReader fcif = new BufferedReader(new FileReader(new File(ciffile))); |
117 |
|
|
int element = 0; |
118 |
|
|
String line; |
119 |
|
|
line = fcif.readLine(); // skip first line |
120 |
|
|
int linecount = 1; // we have read one line already, we initialise count to 1 |
121 |
|
|
while((line = fcif.readLine()) != null ) { |
122 |
|
|
linecount++; |
123 |
|
|
if (line.startsWith("#")) { |
124 |
|
|
element++; |
125 |
|
|
continue; |
126 |
|
|
} |
127 |
|
|
if (line.startsWith("loop_")) { |
128 |
|
|
loopElements.add(element); |
129 |
|
|
continue; |
130 |
|
|
} |
131 |
|
|
|
132 |
|
|
for (String id:ids){ |
133 |
duarte |
307 |
if (!fieldsIdx.containsKey(id)) fieldsIdx.put(id,0); |
134 |
duarte |
306 |
Pattern p = Pattern.compile("^"+id+"\\.(\\w+)(?:\\s+(.*))?$"); |
135 |
|
|
Matcher m = p.matcher(line); |
136 |
|
|
if (m.find()){ |
137 |
|
|
ids2elements.put(id,element); |
138 |
duarte |
307 |
String field = id + "." + m.group(1); |
139 |
duarte |
306 |
if (!loopElements.contains(element)) { // if not a loop element |
140 |
duarte |
307 |
fields2values.put(field, m.group(2)); // 2nd capture group only matches for non-loops where the value of the field is in same line as field name |
141 |
|
|
} else { // for loop elements we fill the fields2indices TreeMap |
142 |
|
|
fields2indices.put(field,fieldsIdx.get(id)); |
143 |
duarte |
306 |
} |
144 |
duarte |
307 |
fieldsIdx.put(id,fieldsIdx.get(id)+1); |
145 |
duarte |
306 |
continue; |
146 |
|
|
} |
147 |
|
|
} |
148 |
|
|
if (!line.startsWith("_") && !line.startsWith("#")){ // not in field definition, we are in values of a loop element |
149 |
|
|
if (ids2elements.containsValue(element)) { // if this is one of the fields we want to parse (members of String[] ids) |
150 |
|
|
if (!loopelements2contentIndex.containsKey(element)) { |
151 |
|
|
//loopelements2content.put(element,line+"\n"); |
152 |
|
|
Interval interval = new Interval(linecount,linecount); |
153 |
|
|
loopelements2contentIndex.put(element,interval); |
154 |
|
|
} else { |
155 |
|
|
//loopelements2content.put(element,loopelements2content.get(element)+line+"\n"); |
156 |
|
|
loopelements2contentIndex.get(element).end=linecount; |
157 |
|
|
} |
158 |
|
|
} |
159 |
|
|
} |
160 |
|
|
} // end scanning lines |
161 |
|
|
|
162 |
|
|
fcif.close(); |
163 |
|
|
} |
164 |
|
|
|
165 |
|
|
private String readPdbCode(){ |
166 |
|
|
return fields2values.get("_entry.id").trim(); |
167 |
|
|
} |
168 |
|
|
|
169 |
duarte |
309 |
private void readAtomAltLocs() throws IOException { |
170 |
|
|
// The read of the atom_sites_alt element must be done in a separate scan of the file, previous to scanning the atom_site element |
171 |
|
|
// This is because the order of the different elements in the cif files is not guaranteed, so atom_sites_alt can come before or after atom_site |
172 |
|
|
// (and altLoc needs to be set before starting reading the atom_site element) |
173 |
|
|
|
174 |
|
|
ArrayList<String> altLocs = new ArrayList<String>(); |
175 |
|
|
// we initialise to ".", this is the default value in the cif files for the alt loc field. If no atom_sites_alt is present it's ok to stay with this value |
176 |
|
|
altLoc = "."; |
177 |
|
|
|
178 |
|
|
// atom_sites_alt element is optional |
179 |
|
|
Interval intAtomSitesAlt = null; |
180 |
|
|
if (ids2elements.containsKey(atomSitesAltId)){ |
181 |
|
|
intAtomSitesAlt = loopelements2contentIndex.get(ids2elements.get(atomSitesAltId)); |
182 |
|
|
} |
183 |
|
|
|
184 |
|
|
BufferedReader fcif = new BufferedReader(new FileReader(new File(ciffile))); |
185 |
|
|
String line; |
186 |
|
|
int linecount=0; |
187 |
|
|
while((line = fcif.readLine()) != null ) { |
188 |
|
|
linecount++; |
189 |
|
|
// atom_sites_alt (optional element) |
190 |
|
|
if (intAtomSitesAlt!=null && linecount>=intAtomSitesAlt.beg && linecount<=intAtomSitesAlt.end){ |
191 |
|
|
int idIdx = fields2indices.get(atomSitesAltId+".id"); |
192 |
|
|
// id=0 |
193 |
|
|
// A ? |
194 |
|
|
String[] tokens = line.split("\\s+"); |
195 |
|
|
if (!tokens[idIdx].equals(".")) { |
196 |
|
|
altLocs.add(tokens[idIdx]); |
197 |
|
|
} |
198 |
|
|
} |
199 |
|
|
} |
200 |
|
|
fcif.close(); |
201 |
|
|
if (!altLocs.isEmpty()){ |
202 |
|
|
altLoc = Collections.min(altLocs); |
203 |
|
|
} |
204 |
|
|
} |
205 |
|
|
|
206 |
duarte |
306 |
private void parseLoopElements() throws IOException, PdbChainCodeNotFoundError { |
207 |
|
|
resser_atom2atomserial = new HashMap<String,Integer>(); |
208 |
|
|
resser2restype = new HashMap<Integer,String>(); |
209 |
|
|
atomser2coord = new HashMap<Integer,Point3d>(); |
210 |
|
|
atomser2resser = new HashMap<Integer,Integer>(); |
211 |
|
|
pdbresser2resser = new HashMap<String, Integer>(); |
212 |
|
|
sequence = ""; |
213 |
|
|
secondaryStructure = new SecondaryStructure(); |
214 |
|
|
|
215 |
|
|
ArrayList<String> aalist=AA.aas(); // list of standard 3 letter code aminoacids |
216 |
|
|
|
217 |
|
|
String chainCodeStr=pdbChainCode; |
218 |
|
|
if (pdbChainCode.equals("NULL")) chainCodeStr="A"; |
219 |
|
|
|
220 |
duarte |
307 |
Interval intAtomSite = loopelements2contentIndex.get(ids2elements.get(atomSiteId)); |
221 |
|
|
Interval intPdbxPoly = loopelements2contentIndex.get(ids2elements.get(pdbxPolySeqId)); |
222 |
duarte |
306 |
// struct_conf element is optional |
223 |
|
|
Interval intStructConf = null; |
224 |
duarte |
307 |
if (ids2elements.containsKey(structConfId)){ |
225 |
|
|
intStructConf = loopelements2contentIndex.get(ids2elements.get(structConfId)); |
226 |
duarte |
306 |
} |
227 |
|
|
// struct_sheet_range element is optional |
228 |
|
|
Interval intStructSheet = null; |
229 |
duarte |
307 |
if (ids2elements.containsKey(structSheetId)) { |
230 |
|
|
intStructSheet = loopelements2contentIndex.get(ids2elements.get(structSheetId)); |
231 |
duarte |
306 |
} |
232 |
|
|
|
233 |
|
|
boolean empty = true; |
234 |
|
|
BufferedReader fcif = new BufferedReader(new FileReader(new File(ciffile))); |
235 |
|
|
String line; |
236 |
|
|
int linecount=0; |
237 |
|
|
while((line = fcif.readLine()) != null ) { |
238 |
|
|
linecount++; |
239 |
|
|
// atom_site |
240 |
duarte |
307 |
if (linecount>=intAtomSite.beg && linecount<=intAtomSite.end){ |
241 |
duarte |
308 |
int groupPdbIdx = fields2indices.get(atomSiteId+".group_PDB"); |
242 |
duarte |
307 |
int idIdx = fields2indices.get(atomSiteId+".id"); |
243 |
|
|
int labelAtomIdIdx = fields2indices.get(atomSiteId+".label_atom_id"); |
244 |
|
|
int labelAltIdIdx = fields2indices.get(atomSiteId+".label_alt_id"); |
245 |
|
|
int labelCompIdIdx = fields2indices.get(atomSiteId+".label_comp_id"); |
246 |
|
|
int labelAsymIdIdx = fields2indices.get(atomSiteId+".label_asym_id"); |
247 |
|
|
int labelSeqIdIdx = fields2indices.get(atomSiteId+".label_seq_id"); |
248 |
|
|
int cartnXIdx = fields2indices.get(atomSiteId+".Cartn_x"); |
249 |
|
|
int cartnYIdx = fields2indices.get(atomSiteId+".Cartn_y"); |
250 |
|
|
int cartnZIdx = fields2indices.get(atomSiteId+".Cartn_z"); |
251 |
|
|
int authAsymIdIdx = fields2indices.get(atomSiteId+".auth_asym_id"); |
252 |
|
|
int pdbxPDBModelNumIdx = fields2indices.get(atomSiteId+".pdbx_PDB_model_num"); |
253 |
duarte |
308 |
// group_PDB=0, auth_asym_id=22, pdbx_PDB_model_num=24, label_alt_id=4, id=1, label_atom_id=3, label_comp_id=5, label_asym_id=6, label_seq_id=8, Cartn_x=10, Cartn_y=11, Cartn_z=12 |
254 |
duarte |
306 |
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 151617181920 2122 23 24 |
255 |
|
|
//ATOM 2 C CA . MET A 1 1 ? 38.591 8.543 15.660 1.00 77.79 ? ? ? ? ? 1 MET A CA 1 |
256 |
|
|
String[] tokens = line.split("\\s+"); |
257 |
duarte |
308 |
if (tokens[groupPdbIdx].equals("ATOM") && tokens[authAsymIdIdx].equals(chainCodeStr) && Integer.parseInt(tokens[pdbxPDBModelNumIdx])==model) { // match our given chain and model |
258 |
duarte |
306 |
empty = false; |
259 |
duarte |
307 |
if (tokens[labelAltIdIdx].equals(".") || tokens[labelAltIdIdx].equals(altLoc)) { // don't read lines with something else as "." or altLoc |
260 |
|
|
int atomserial=Integer.parseInt(tokens[idIdx]); // id |
261 |
|
|
String atom = tokens[labelAtomIdIdx]; // label_atom_id |
262 |
|
|
String res_type = tokens[labelCompIdIdx]; // label_comp_id |
263 |
|
|
chainCode = tokens[labelAsymIdIdx]; // label_asym_id |
264 |
|
|
int res_serial = Integer.parseInt(tokens[labelSeqIdIdx]); // label_seq_id |
265 |
|
|
double x = Double.parseDouble(tokens[cartnXIdx]); // Cartn_x |
266 |
|
|
double y = Double.parseDouble(tokens[cartnYIdx]); // Cartn_y |
267 |
|
|
double z = Double.parseDouble(tokens[cartnZIdx]); // Cartn_z |
268 |
duarte |
306 |
Point3d coords = new Point3d(x,y,z); |
269 |
|
|
if (aalist.contains(res_type)) { |
270 |
|
|
atomser2coord.put(atomserial, coords); |
271 |
|
|
atomser2resser.put(atomserial, res_serial); |
272 |
|
|
resser2restype.put(res_serial, res_type); |
273 |
|
|
ArrayList<String> atomlist = aas2atoms.get(res_type); |
274 |
|
|
atomlist.add("OXT"); // the extra atom OXT is there in the last residue of the chain |
275 |
|
|
if (atomlist.contains(atom)){ |
276 |
|
|
resser_atom2atomserial.put(res_serial+"_"+atom, atomserial); |
277 |
|
|
} |
278 |
|
|
} |
279 |
|
|
} |
280 |
|
|
} |
281 |
|
|
continue; |
282 |
|
|
} |
283 |
|
|
// pdbx_poly_seq_scheme |
284 |
|
|
if (linecount>=intPdbxPoly.beg && linecount<=intPdbxPoly.end){ |
285 |
duarte |
307 |
int seqIdIdx = fields2indices.get(pdbxPolySeqId+".seq_id"); |
286 |
|
|
int authSeqNumIdx = fields2indices.get(pdbxPolySeqId+".auth_seq_num"); |
287 |
|
|
int pdbInsCodeIdx = fields2indices.get(pdbxPolySeqId+".pdb_ins_code"); |
288 |
|
|
int monIdIdx = fields2indices.get(pdbxPolySeqId+".mon_id"); |
289 |
duarte |
308 |
int pdbStrandIdIdx = fields2indices.get(pdbxPolySeqId+".pdb_strand_id"); |
290 |
duarte |
306 |
// asym_id=0, seq_id=2, auth_seq_num=6, pdb_ins_code=10, mon_id=3 |
291 |
|
|
// 0 1 2 3 4 5 6 7 8 910 |
292 |
|
|
// A 1 1 ASP 1 1 1 ASP ASP A . |
293 |
|
|
String[] tokens = line.split("\\s+"); |
294 |
duarte |
308 |
if (tokens[pdbStrandIdIdx].equals(chainCodeStr)) { // we can't rely on using chainCode, because the order of elements is not guranteed (pdbx_poly_seq_scheme doesn't always come after atom_site) |
295 |
duarte |
307 |
int res_serial = Integer.parseInt(tokens[seqIdIdx]); // seq_id |
296 |
|
|
String pdb_res_serial = tokens[authSeqNumIdx]; // auth_seq_num |
297 |
|
|
String pdb_ins_code = tokens[pdbInsCodeIdx]; // pdb_ins_code |
298 |
duarte |
306 |
String pdb_res_serial_with_icode = pdb_res_serial; |
299 |
|
|
if (!pdb_ins_code.equals(".")) { |
300 |
|
|
pdb_res_serial_with_icode=pdb_res_serial+pdb_ins_code; |
301 |
|
|
} |
302 |
duarte |
307 |
String res_type = tokens[monIdIdx]; // mon_id |
303 |
duarte |
306 |
// sequence |
304 |
|
|
if (aalist.contains(res_type)){ |
305 |
|
|
sequence+=AA.threeletter2oneletter(res_type); |
306 |
|
|
} else { |
307 |
|
|
sequence+=NONSTANDARD_AA_LETTER; |
308 |
|
|
} |
309 |
|
|
// pdbresser2resser |
310 |
duarte |
308 |
if (!pdb_res_serial_with_icode.equals("?")) { // question marks are author missing serials, we don't want them in the map |
311 |
|
|
pdbresser2resser.put(pdb_res_serial_with_icode,res_serial); |
312 |
|
|
} |
313 |
duarte |
306 |
} |
314 |
|
|
continue; |
315 |
|
|
} |
316 |
|
|
// struct_conf (optional element), HELIX and TURN secondary structure |
317 |
|
|
if (intStructConf!=null && linecount>=intStructConf.beg && linecount<=intStructConf.end){ |
318 |
duarte |
307 |
int idIdx = fields2indices.get(structConfId+".id"); |
319 |
duarte |
308 |
int begAuthAsymIdIdx = fields2indices.get(structConfId+".beg_auth_asym_id"); |
320 |
duarte |
307 |
int begLabelSeqIdIdx = fields2indices.get(structConfId+".beg_label_seq_id"); |
321 |
|
|
int endLabelSeqIdIdx = fields2indices.get(structConfId+".end_label_seq_id"); |
322 |
duarte |
306 |
//id=1, beg_label_seq_id=5, end_label_seq_id=9, beg_label_asym_id=4 |
323 |
|
|
// 0 1 2 3 4 5 6 7 8 9 10 111213 1415 16 1718 19 |
324 |
|
|
//HELX_P HELX_P1 1 ASN A 2 ? GLY A 12 ? ASN A 2 GLY A 12 1 ? 11 |
325 |
|
|
String[] tokens = line.split("\\s+"); |
326 |
duarte |
308 |
if (tokens[begAuthAsymIdIdx].equals(chainCodeStr)) { // we don't have yet chainCode (normally struct_conf appears before atom_site in cif file), so we need to use the auth_asym_id |
327 |
duarte |
307 |
String id = tokens[idIdx]; |
328 |
duarte |
306 |
Pattern p = Pattern.compile("^(\\w).+_P(\\d)+$"); |
329 |
|
|
Matcher m = p.matcher(id); |
330 |
|
|
String ssId="Unknown"; |
331 |
|
|
if (m.find()){ |
332 |
|
|
ssId = m.group(1)+m.group(2); // e.g.: Hnn (helices) or Tnn (turns) |
333 |
|
|
} |
334 |
duarte |
307 |
int beg = Integer.parseInt(tokens[begLabelSeqIdIdx]); |
335 |
|
|
int end = Integer.parseInt(tokens[endLabelSeqIdIdx]); |
336 |
duarte |
306 |
char ssType = SecStrucElement.OTHER; |
337 |
|
|
if(id.startsWith("H")) { |
338 |
|
|
ssType = SecStrucElement.HELIX; |
339 |
|
|
} else if(id.startsWith("T")) { |
340 |
|
|
ssType = SecStrucElement.TURN; |
341 |
|
|
} else { |
342 |
|
|
System.err.println("Unknown secondary structure type " + id + " encountered when reading from ciffile. Skipping."); |
343 |
|
|
} |
344 |
|
|
if(ssType != SecStrucElement.OTHER) { |
345 |
|
|
SecStrucElement ssElem = new SecStrucElement(ssType, beg, end, ssId); |
346 |
|
|
secondaryStructure.add(ssElem); |
347 |
|
|
} |
348 |
|
|
} |
349 |
|
|
continue; |
350 |
|
|
} |
351 |
|
|
// struct_sheet_range (optional element), SHEETs |
352 |
|
|
if (intStructSheet!=null && linecount>=intStructSheet.beg && linecount<=intStructSheet.end){ |
353 |
duarte |
307 |
int sheetIdIdx = fields2indices.get(structSheetId+".sheet_id"); |
354 |
|
|
int idIdx = fields2indices.get(structSheetId+".id"); |
355 |
duarte |
308 |
int begAuthAsymIdIdx = fields2indices.get(structSheetId+".beg_auth_asym_id"); |
356 |
duarte |
307 |
int begLabelSeqIdIdx = fields2indices.get(structSheetId+".beg_label_seq_id"); |
357 |
|
|
int endLabelSeqIdIdx = fields2indices.get(structSheetId+".end_label_seq_id"); |
358 |
duarte |
306 |
//sheet_id=0, id=1, beg_label_seq_id=4, end_label_seq_id=8, beg_label_asym_id=3 |
359 |
|
|
//0 1 2 3 4 5 6 7 8 910 1112 13 1415 16 |
360 |
|
|
//A 1 ARG A 14 ? LYS A 19 ? ? ARG A 14 LYS A 19 |
361 |
|
|
String[] tokens = line.split("\\s+"); |
362 |
duarte |
308 |
if (tokens[begAuthAsymIdIdx].equals(chainCodeStr)){ // we don't have yet chainCode (normally struct_sheet_range appears before atom_site in cif file), so we need to use the auth_asym_id |
363 |
duarte |
307 |
String sheetid = tokens[sheetIdIdx]; |
364 |
|
|
int id = Integer.parseInt(tokens[idIdx]); |
365 |
|
|
int beg = Integer.parseInt(tokens[begLabelSeqIdIdx]); |
366 |
|
|
int end = Integer.parseInt(tokens[endLabelSeqIdIdx]); |
367 |
duarte |
306 |
String ssId=SecStrucElement.STRAND+sheetid+id; // e.g.: SA1, SA2..., SB1, SB2,... |
368 |
|
|
SecStrucElement ssElem = new SecStrucElement(SecStrucElement.STRAND, beg, end, ssId); |
369 |
|
|
secondaryStructure.add(ssElem); |
370 |
|
|
} |
371 |
|
|
continue; |
372 |
|
|
} |
373 |
|
|
} |
374 |
|
|
fcif.close(); |
375 |
|
|
if (empty) { // no atom data was found for given pdb chain code and model |
376 |
|
|
throw new PdbChainCodeNotFoundError("Couldn't find _atom_site data for given pdbChainCode: "+pdbChainCode+", model: "+model); |
377 |
|
|
} |
378 |
|
|
} |
379 |
|
|
} |