53 |
|
this.chainCode=pdbChainCode; |
54 |
|
if (pdbChainCode.equals("NULL")) this.chainCode=NULL_chainCode; |
55 |
|
|
56 |
< |
this.sequence=""; // we initialise it to empty string, then is set inread_pdb_data_from_file |
56 |
> |
this.sequence=""; // we initialise it to empty string, then is set in read_pdb_data_from_file |
57 |
|
|
58 |
|
// we initialise the resser2secstruct and secstruct2resinterval Maps to empty, if no sec structure info is found then it remains empty |
59 |
|
this.resser2secstruct = new HashMap<Integer, String>(); |
60 |
|
this.secstruct2resinterval = new TreeMap<String, Interval>(); |
61 |
|
|
62 |
|
read_pdb_data_from_file(); |
63 |
+ |
if(!resser2secstruct.isEmpty()) { |
64 |
+ |
hasSecondaryStructure = true; |
65 |
+ |
secondaryStructureSource = "Author"; |
66 |
+ |
} |
67 |
|
|
68 |
|
// when reading from pdb file we have no information of residue numbers or author's (original) pdb residue number, so we fill the mapping with the residue numbers we know |
69 |
|
//TODO eventually we could assign our own internal residue numbers when reading from pdb and thus this map would be used |
89 |
|
* To read the pdb data (atom coordinates, residue serials, atom serials) from file. |
90 |
|
* chainCode gets set to same as pdbChainCode, except if input chain code NULL then chainCode will be 'A' |
91 |
|
* pdbCode gets set to the one parsed in HEADER or to 'Unknown' if not found |
92 |
< |
* sequence gets set to the sequence read from ATOM lines (i.e. observed resdiues only) |
92 |
> |
* sequence gets set to the sequence read from ATOM lines (i.e. observed residues only) |
93 |
|
* No insertion codes are parsed or taken into account at the moment. Thus files with |
94 |
|
* insertion codes will be incorrectly read |
95 |
|
* @param pdbfile |
144 |
|
// read further lines |
145 |
|
while ((line = fpdb.readLine() ) != null ) { |
146 |
|
linecount++; |
147 |
+ |
// SEQRES |
148 |
+ |
//SEQRES 1 A 348 VAL ASN ILE LYS THR ASN PRO PHE LYS ALA VAL SER PHE |
149 |
+ |
p = Pattern.compile("^SEQRES.{5}"+chainCodeStr); |
150 |
+ |
m = p.matcher(line); |
151 |
+ |
if (m.find()){ |
152 |
+ |
for (int i=19;i<=67;i+=4) { |
153 |
+ |
if (!line.substring(i, i+3).equals(" ")) { |
154 |
+ |
sequence+= AA.threeletter2oneletter(line.substring(i, i+3)); |
155 |
+ |
} |
156 |
+ |
} |
157 |
+ |
} |
158 |
|
// SECONDARY STRUCTURE |
159 |
|
// helix |
160 |
|
//HELIX 1 1 LYS A 17 LEU A 26 1 |
258 |
|
//System.err.println("Couldn't find any atom line for given pdbChainCode: "+pdbChainCode+", model: "+model); |
259 |
|
throw new PdbChainCodeNotFoundError("Couldn't find any ATOM line for given pdbChainCode: "+pdbChainCode+", model: "+model); |
260 |
|
} |
261 |
< |
// now we read the sequence from the resser2restype HashMap |
262 |
< |
// NOTE: we must make sure elsewhere that there are no unobserved residues, we can't check that here! |
263 |
< |
ArrayList<Integer> ressers = new ArrayList<Integer>(); |
264 |
< |
for (int resser:resser2restype.keySet()) { |
265 |
< |
ressers.add(resser); |
266 |
< |
} |
267 |
< |
Collections.sort(ressers); |
268 |
< |
for (int resser:ressers){ |
269 |
< |
String oneletter = AA.threeletter2oneletter(resser2restype.get(resser)); |
270 |
< |
sequence += oneletter; |
261 |
> |
if (sequence.equals("")){ |
262 |
> |
// if we couldn't read anything from SEQRES then we read it from the resser2restype HashMap |
263 |
> |
// NOTE: we must make sure elsewhere that there are no unobserved residues, we can't check that here! |
264 |
> |
ArrayList<Integer> ressers = new ArrayList<Integer>(); |
265 |
> |
for (int resser:resser2restype.keySet()) { |
266 |
> |
ressers.add(resser); |
267 |
> |
} |
268 |
> |
Collections.sort(ressers); |
269 |
> |
for (int resser:ressers){ |
270 |
> |
String oneletter = AA.threeletter2oneletter(resser2restype.get(resser)); |
271 |
> |
sequence += oneletter; |
272 |
> |
} |
273 |
|
} |
274 |
|
} |
275 |
|
|