trunk/proteinstructure/PdbfilePdb.java

package proteinstructure;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.vecmath.Point3d;

public class PdbfilePdb extends Pdb {
        
        private static final String UNKNOWN_STRING ="XXXX";
        private static final String NULL_chainCode = "A";
        
        private String pdbfile;

        /**
         * Constructs an empty Pdb object given a pdbfile name
         * Data will be loaded from pdb file upon call of load(pdbChainCode, modelSerial) 
         * @param pdbfile
         */
        public PdbfilePdb (String pdbfile) {
                this.pdbfile = pdbfile;
                this.pdbCode=UNKNOWN_STRING; // we initialise to unknown in case we don't find it in pdb file
                this.dataLoaded = false;
                
                this.sequence=""; // we initialise it to empty string, then is set in read_pdb_data_from_file 
                
                // we initialise the secondary structure to empty, if no sec structure info is found then they remain empty
                this.secondaryStructure = new SecondaryStructure();             
                
        }

        public void load(String pdbChainCode, int modelSerial) throws PdbLoadError {
                try {
                        this.model=modelSerial;
                        this.pdbChainCode=pdbChainCode;                 // NOTE! pdb chain codes are case sensitive!
                        // we set chainCode to pdbChainCode except for case NULL where we use "A"
                        this.chainCode=pdbChainCode;
                        if (pdbChainCode.equals(Pdb.NULL_CHAIN_CODE)) this.chainCode=NULL_chainCode;

                        read_pdb_data_from_file();
                        
                        this.obsLength = resser2restype.size();
                        
                        if(!secondaryStructure.isEmpty()) {
                                secondaryStructure.setComment("Author");
                        }
                        
                        // when reading from pdb file we have no information of residue numbers or author's (original) pdb residue number, so we fill the mapping with the residue numbers we know
                        //TODO eventually we could assign our own internal residue numbers when reading from pdb and thus this map would be used
                        this.resser2pdbresser = new HashMap<Integer, String>();
                        this.pdbresser2resser = new HashMap<String, Integer>();
                        for (int resser:resser2restype.keySet()){
                                resser2pdbresser.put(resser, String.valueOf(resser));
                                pdbresser2resser.put(String.valueOf(resser), resser);
                        }
                        
                        // initialising atomser2atom from resser_atom2atomserial
                        atomser2atom = new HashMap<Integer, String>();
                        for (String resser_atom:resser_atom2atomserial.keySet()){
                                int atomserial = resser_atom2atomserial.get(resser_atom);
                                String atom = resser_atom.split("_")[1];
                                atomser2atom.put(atomserial,atom);
                        }
                        
                        dataLoaded = true;
                        
                } catch (FileNotFoundException e) {
                        throw new PdbLoadError(e);
                } catch (PdbfileFormatError e) {
                        throw new PdbLoadError(e);
                } catch (IOException e) {
                        throw new PdbLoadError(e);
                } catch (PdbChainCodeNotFoundError e) {
                        throw new PdbLoadError(e);
                }
        }
        
        public String[] getChains() throws PdbLoadError {
                TreeSet<String> chains = new TreeSet<String>();
                try {
                        BufferedReader fpdb = new BufferedReader(new FileReader(new File(pdbfile)));
                        String  line;
                        while ((line=fpdb.readLine())!=null) {
                                if (line.startsWith("ATOM")) {
                                        String chain = line.substring(21, 22);
                                        if (chain.equals(" ")) chain="NULL";
                                        chains.add(chain);
                                }
                        }
                        fpdb.close();
                } catch (IOException e) {
                        throw new PdbLoadError(e);
                }
                
                if (chains.isEmpty()) return null;
                
                String[] chainsArray = new String[chains.size()];
                chains.toArray(chainsArray);
                return chainsArray;
        }
        
        public Integer[] getModels() throws PdbLoadError {
                TreeSet<Integer> models = new TreeSet<Integer>();
                try {
                        BufferedReader fpdb = new BufferedReader(new FileReader(new File(pdbfile)));
                        String  line;
                        while ((line=fpdb.readLine())!=null) {
                                if (line.startsWith("MODEL")) {
                                        int model = Integer.parseInt(line.substring(6,line.length()));
                                        models.add(model);
                                }
                        }
                        fpdb.close();
                } catch (IOException e) {
                        throw new PdbLoadError(e);
                } catch (NumberFormatException e) {
                        throw new PdbLoadError("Wrong format for MODEL lines!");
                }
                
                if (models.isEmpty()) models.add(DEFAULT_MODEL);//return null;          
                Integer[] modelsArray = new Integer[models.size()];
                models.toArray(modelsArray);
                return modelsArray;
        }
        
        /**
         * To read the pdb data (atom coordinates, residue serials, atom serials) from file.
         * chainCode gets set to same as pdbChainCode, except if input chain code NULL then chainCode will be 'A'
         * pdbCode gets set to the one parsed in HEADER or to 'Unknown' if not found
         * sequence gets set to the sequence read from ATOM lines (i.e. observed residues only)
         * No insertion codes are parsed or taken into account at the moment. Thus files with 
         * insertion codes will be incorrectly read
         * @param pdbfile
         * @throws FileNotFoundException
         * @throws IOException
         * @throws PdbfileFormatError
         * @throws PdbChainCodeNotFoundError  
         */
        private void read_pdb_data_from_file() throws FileNotFoundException, IOException, PdbfileFormatError, PdbChainCodeNotFoundError {
                resser_atom2atomserial = new HashMap<String,Integer>();
                resser2restype = new HashMap<Integer,String>();
                atomser2coord = new HashMap<Integer,Point3d>();
                atomser2resser = new HashMap<Integer,Integer>();
                Pattern p;
                Matcher m;
                boolean empty = true; // controls whether we don't find any atom line for given pdbChainCode and model
                // we set chainCodeStr (for regex) to pdbChainCode except for case NULL where we use " " (NULL is a blank chain code in pdb files)
                String chainCodeStr=pdbChainCode;
                if (pdbChainCode.equals(Pdb.NULL_CHAIN_CODE)) chainCodeStr=" ";
                
                int thismodel=DEFAULT_MODEL; // we initialise to DEFAULT_MODEL, in case file doesn't have MODEL lines 
                BufferedReader fpdb = new BufferedReader(new FileReader(new File(pdbfile)));
                int linecount=0;
                String line;
                // read first line
                if((line = fpdb.readLine()) != null ) {
                        linecount = 1;
                        // HEADER
                        p = Pattern.compile("^HEADER");
                        m = p.matcher(line);
                        if (m.find()){
                                Pattern ph = Pattern.compile("^HEADER.{56}(\\d\\w{3})");
                                Matcher mh = ph.matcher(line);
                                if (mh.find()) {
                                        pdbCode=mh.group(1).toLowerCase();
                                }
                        } else { // header not found
                                // check whether this is a Casp prediction file
                                p = Pattern.compile("^PFRMAT\\s+TS");
                                m = p.matcher(line);
                                if(m.find()) {
                                        // ok, it is
                                        pdbCode = "CASP";
                                } else {
                                        // a HEADER is the minimum we ask at the moment for a pdb file to have, if we don't find it in line 1 we throw an exception
                                        throw new PdbfileFormatError("The pdb file "+pdbfile+" does not have a HEADER record");
                                }
                        }
                } else {
                        throw new PdbfileFormatError("The file "+pdbfile+" is empty.");
                }
                // read further lines
                while ((line = fpdb.readLine() ) != null ) {
                        linecount++;
                        // SEQRES
                        //SEQRES   1 A  348  VAL ASN ILE LYS THR ASN PRO PHE LYS ALA VAL SER PHE
                        p = Pattern.compile("^SEQRES.{5}"+chainCodeStr);
                        m = p.matcher(line);
                        if (m.find()){
                                for (int i=19;i<=67;i+=4) {
                                        if (!line.substring(i, i+3).equals("   ")) {
                                                if (AAinfo.isValidAA(line.substring(i, i+3))) { // for non-standard aas
                                                        sequence+= AAinfo.threeletter2oneletter(line.substring(i, i+3));
                                                } else {
                                                        sequence+= NONSTANDARD_AA_LETTER;
                                                }
                                        }
                                }
                        }
                        // SECONDARY STRUCTURE
                        // helix
                        //HELIX    1   1 LYS A   17  LEU A   26  1
                        //                                                      helix ser                               beg res ser                                     end res ser
                        p = Pattern.compile("^HELIX..(...).{9}"+chainCodeStr+".(....).{6}"+chainCodeStr+".(....)");
                        m = p.matcher(line);
                        if (m.find()){
                                int serial = Integer.valueOf(m.group(1).trim());
                                int beg = Integer.valueOf(m.group(2).trim());
                                int end = Integer.valueOf(m.group(3).trim());
                                String ssId = new Character(SecStrucElement.HELIX).toString()+serial;
                                SecStrucElement ssElem = new SecStrucElement(SecStrucElement.HELIX,beg,end,ssId);
                                secondaryStructure.add(ssElem);
                        }
                        // sheet
                        //SHEET    2   A 5 ILE A  96  THR A  99 -1  N  LYS A  98   O  THR A 107
                        //                       strand ser sheet id                     beg res ser                 end res ser
                        p = Pattern.compile("^SHEET..(...).(...).{7}"+chainCodeStr+"(....).{6}"+chainCodeStr+"(....)");
                        m = p.matcher(line);
                        if (m.find()){
                                int strandSerial = Integer.valueOf(m.group(1).trim());
                                String sheetId = m.group(2).trim();
                                int beg = Integer.valueOf(m.group(3).trim());
                                int end = Integer.valueOf(m.group(4).trim());
                                String ssId = new Character(SecStrucElement.STRAND).toString()+sheetId+strandSerial;
                                SecStrucElement ssElem = new SecStrucElement(SecStrucElement.STRAND,beg,end,ssId);
                                secondaryStructure.add(ssElem);
                        }
                        // we've stored the sec structure info in the strands2begEnd and sheets2strands maps.
                        // the assignment to resser2secstruct is done when we reach the ATOM lines, see below
                        // turn
                        //TURN     1 S1A GLY A  16  GLN A  18     SURFACE
                        //                                                      turn ser                                beg res ser                                     end res ser
                        p = Pattern.compile("^TURN...(...).{9}"+chainCodeStr+"(....).{6}"+chainCodeStr+"(....)");
                        m = p.matcher(line);
                        if (m.find()){
                                int serial = Integer.valueOf(m.group(1).trim());
                                int beg = Integer.valueOf(m.group(2).trim());
                                int end = Integer.valueOf(m.group(3).trim());
                                String ssId = new Character(SecStrucElement.TURN).toString()+serial;
                                SecStrucElement ssElem = new SecStrucElement(SecStrucElement.TURN,beg,end,ssId);
                                secondaryStructure.add(ssElem);
                        }                       
                        // MODEL
                        p = Pattern.compile("^MODEL\\s+(\\d+)");
                        m = p.matcher(line);
                        if (m.find()){
                                thismodel=Integer.parseInt(m.group(1));
                        }
                        if (thismodel!=model) continue; // we skip reading of atom lines if we are not in the desired model
                        // ATOM
                        p = Pattern.compile("^ATOM");
                        m = p.matcher(line);
                        if (m.find()){
                                //                                 serial    atom   res_type      chain            res_ser     x     y     z
                                Pattern pl = Pattern.compile("^.{6}(.....).{2}(...).{1}(...).{1}"+chainCodeStr+"(.{4}).{4}(.{8})(.{8})(.{8})",Pattern.CASE_INSENSITIVE);
                                Matcher ml = pl.matcher(line);
                                if (ml.find()) {
                                        empty=false;
                                        int atomserial=Integer.parseInt(ml.group(1).trim());
                                        String atom = ml.group(2).trim();
                                        String res_type = ml.group(3).trim();
                                        int res_serial = Integer.parseInt(ml.group(4).trim());
                                        double x = Double.parseDouble(ml.group(5).trim());
                                        double y = Double.parseDouble(ml.group(6).trim());
                                        double z = Double.parseDouble(ml.group(7).trim());
                                        Point3d coords = new Point3d(x,y,z);
                                        if (AAinfo.isValidAA(res_type)) {
                                                atomser2coord.put(atomserial, coords);
                                                atomser2resser.put(atomserial, res_serial);
                                                resser2restype.put(res_serial, res_type);
                                                if (AAinfo.isValidAtomWithOXT(res_type,atom)){
                                                        resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
                                                }
                                        }                                       
                                }
                        }
                }
                fpdb.close();
                if (empty) {
                        //System.err.println("Couldn't find any atom line for given pdbChainCode: "+pdbChainCode+", model: "+model);
                        throw new PdbChainCodeNotFoundError("Couldn't find any ATOM line for given pdbChainCode: "+pdbChainCode+", model: "+model);
                }
                if (sequence.equals("")){
                        // if we couldn't read anything from SEQRES then we read it from the resser2restype HashMap
                        // NOTE: we must make sure elsewhere that there are no unobserved residues, we can't check that here!
                        ArrayList<Integer> ressers = new ArrayList<Integer>();
                        for (int resser:resser2restype.keySet()) {
                                ressers.add(resser);
                        }
                        Collections.sort(ressers);
                        for (int resser:ressers){
                                String oneletter = AAinfo.threeletter2oneletter(resser2restype.get(resser));
                                sequence += oneletter;
                        }
                        // not size but maximum: if residue numbering in pdb file is correct, then this takes care of non-observed except for possible non-observed at end of chain
                        fullLength = Collections.max(resser2restype.keySet()); 
                } else { // we read the sequence from SEQRES
                        if( sequence.length() < Collections.max(resser2restype.keySet())) {
                                throw new PdbfileFormatError("Last residue serial in ATOM lines is bigger than SEQRES length!");
                        }
                        fullLength = sequence.length();
                }
        }
        
}
Revision:	492
Committed:	Wed Jan 2 13:18:57 2008 UTC (16 years, 9 months ago) by duarte
File size:	12219 byte(s)
Log Message:	Copied the aglappe-jung branch into trunk.
Line	File contents
1	package proteinstructure;
2
3	import java.io.BufferedReader;
4	import java.io.File;
5	import java.io.FileNotFoundException;
6	import java.io.FileReader;
7	import java.io.IOException;
8	import java.util.ArrayList;
9	import java.util.Collections;
10	import java.util.HashMap;
11	import java.util.TreeSet;
12	import java.util.regex.Matcher;
13	import java.util.regex.Pattern;
14
15	import javax.vecmath.Point3d;
16
17	public class PdbfilePdb extends Pdb {
18
19	private static final String UNKNOWN_STRING ="XXXX";
20	private static final String NULL_chainCode = "A";
21
22	private String pdbfile;
23
24	/**
25	* Constructs an empty Pdb object given a pdbfile name
26	* Data will be loaded from pdb file upon call of load(pdbChainCode, modelSerial)
27	* @param pdbfile
28	*/
29	public PdbfilePdb (String pdbfile) {
30	this.pdbfile = pdbfile;
31	this.pdbCode=UNKNOWN_STRING; // we initialise to unknown in case we don't find it in pdb file
32	this.dataLoaded = false;
33
34	this.sequence=""; // we initialise it to empty string, then is set in read_pdb_data_from_file
35
36	// we initialise the secondary structure to empty, if no sec structure info is found then they remain empty
37	this.secondaryStructure = new SecondaryStructure();
38
39	}
40
41	public void load(String pdbChainCode, int modelSerial) throws PdbLoadError {
42	try {
43	this.model=modelSerial;
44	this.pdbChainCode=pdbChainCode; // NOTE! pdb chain codes are case sensitive!
45	// we set chainCode to pdbChainCode except for case NULL where we use "A"
46	this.chainCode=pdbChainCode;
47	if (pdbChainCode.equals(Pdb.NULL_CHAIN_CODE)) this.chainCode=NULL_chainCode;
48
49	read_pdb_data_from_file();
50
51	this.obsLength = resser2restype.size();
52
53	if(!secondaryStructure.isEmpty()) {
54	secondaryStructure.setComment("Author");
55	}
56
57	// when reading from pdb file we have no information of residue numbers or author's (original) pdb residue number, so we fill the mapping with the residue numbers we know
58	//TODO eventually we could assign our own internal residue numbers when reading from pdb and thus this map would be used
59	this.resser2pdbresser = new HashMap<Integer, String>();
60	this.pdbresser2resser = new HashMap<String, Integer>();
61	for (int resser:resser2restype.keySet()){
62	resser2pdbresser.put(resser, String.valueOf(resser));
63	pdbresser2resser.put(String.valueOf(resser), resser);
64	}
65
66	// initialising atomser2atom from resser_atom2atomserial
67	atomser2atom = new HashMap<Integer, String>();
68	for (String resser_atom:resser_atom2atomserial.keySet()){
69	int atomserial = resser_atom2atomserial.get(resser_atom);
70	String atom = resser_atom.split("_")[1];
71	atomser2atom.put(atomserial,atom);
72	}
73
74	dataLoaded = true;
75
76	} catch (FileNotFoundException e) {
77	throw new PdbLoadError(e);
78	} catch (PdbfileFormatError e) {
79	throw new PdbLoadError(e);
80	} catch (IOException e) {
81	throw new PdbLoadError(e);
82	} catch (PdbChainCodeNotFoundError e) {
83	throw new PdbLoadError(e);
84	}
85	}
86
87	public String[] getChains() throws PdbLoadError {
88	TreeSet<String> chains = new TreeSet<String>();
89	try {
90	BufferedReader fpdb = new BufferedReader(new FileReader(new File(pdbfile)));
91	String line;
92	while ((line=fpdb.readLine())!=null) {
93	if (line.startsWith("ATOM")) {
94	String chain = line.substring(21, 22);
95	if (chain.equals(" ")) chain="NULL";
96	chains.add(chain);
97	}
98	}
99	fpdb.close();
100	} catch (IOException e) {
101	throw new PdbLoadError(e);
102	}
103
104	if (chains.isEmpty()) return null;
105
106	String[] chainsArray = new String[chains.size()];
107	chains.toArray(chainsArray);
108	return chainsArray;
109	}
110
111	public Integer[] getModels() throws PdbLoadError {
112	TreeSet<Integer> models = new TreeSet<Integer>();
113	try {
114	BufferedReader fpdb = new BufferedReader(new FileReader(new File(pdbfile)));
115	String line;
116	while ((line=fpdb.readLine())!=null) {
117	if (line.startsWith("MODEL")) {
118	int model = Integer.parseInt(line.substring(6,line.length()));
119	models.add(model);
120	}
121	}
122	fpdb.close();
123	} catch (IOException e) {
124	throw new PdbLoadError(e);
125	} catch (NumberFormatException e) {
126	throw new PdbLoadError("Wrong format for MODEL lines!");
127	}
128
129	if (models.isEmpty()) models.add(DEFAULT_MODEL);//return null;
130	Integer[] modelsArray = new Integer[models.size()];
131	models.toArray(modelsArray);
132	return modelsArray;
133	}
134
135	/**
136	* To read the pdb data (atom coordinates, residue serials, atom serials) from file.
137	* chainCode gets set to same as pdbChainCode, except if input chain code NULL then chainCode will be 'A'
138	* pdbCode gets set to the one parsed in HEADER or to 'Unknown' if not found
139	* sequence gets set to the sequence read from ATOM lines (i.e. observed residues only)
140	* No insertion codes are parsed or taken into account at the moment. Thus files with
141	* insertion codes will be incorrectly read
142	* @param pdbfile
143	* @throws FileNotFoundException
144	* @throws IOException
145	* @throws PdbfileFormatError
146	* @throws PdbChainCodeNotFoundError
147	*/
148	private void read_pdb_data_from_file() throws FileNotFoundException, IOException, PdbfileFormatError, PdbChainCodeNotFoundError {
149	resser_atom2atomserial = new HashMap<String,Integer>();
150	resser2restype = new HashMap<Integer,String>();
151	atomser2coord = new HashMap<Integer,Point3d>();
152	atomser2resser = new HashMap<Integer,Integer>();
153	Pattern p;
154	Matcher m;
155	boolean empty = true; // controls whether we don't find any atom line for given pdbChainCode and model
156	// we set chainCodeStr (for regex) to pdbChainCode except for case NULL where we use " " (NULL is a blank chain code in pdb files)
157	String chainCodeStr=pdbChainCode;
158	if (pdbChainCode.equals(Pdb.NULL_CHAIN_CODE)) chainCodeStr=" ";
159
160	int thismodel=DEFAULT_MODEL; // we initialise to DEFAULT_MODEL, in case file doesn't have MODEL lines
161	BufferedReader fpdb = new BufferedReader(new FileReader(new File(pdbfile)));
162	int linecount=0;
163	String line;
164	// read first line
165	if((line = fpdb.readLine()) != null ) {
166	linecount = 1;
167	// HEADER
168	p = Pattern.compile("^HEADER");
169	m = p.matcher(line);
170	if (m.find()){
171	Pattern ph = Pattern.compile("^HEADER.{56}(\\d\\w{3})");
172	Matcher mh = ph.matcher(line);
173	if (mh.find()) {
174	pdbCode=mh.group(1).toLowerCase();
175	}
176	} else { // header not found
177	// check whether this is a Casp prediction file
178	p = Pattern.compile("^PFRMAT\\s+TS");
179	m = p.matcher(line);
180	if(m.find()) {
181	// ok, it is
182	pdbCode = "CASP";
183	} else {
184	// a HEADER is the minimum we ask at the moment for a pdb file to have, if we don't find it in line 1 we throw an exception
185	throw new PdbfileFormatError("The pdb file "+pdbfile+" does not have a HEADER record");
186	}
187	}
188	} else {
189	throw new PdbfileFormatError("The file "+pdbfile+" is empty.");
190	}
191	// read further lines
192	while ((line = fpdb.readLine() ) != null ) {
193	linecount++;
194	// SEQRES
195	//SEQRES 1 A 348 VAL ASN ILE LYS THR ASN PRO PHE LYS ALA VAL SER PHE
196	p = Pattern.compile("^SEQRES.{5}"+chainCodeStr);
197	m = p.matcher(line);
198	if (m.find()){
199	for (int i=19;i<=67;i+=4) {
200	if (!line.substring(i, i+3).equals(" ")) {
201	if (AAinfo.isValidAA(line.substring(i, i+3))) { // for non-standard aas
202	sequence+= AAinfo.threeletter2oneletter(line.substring(i, i+3));
203	} else {
204	sequence+= NONSTANDARD_AA_LETTER;
205	}
206	}
207	}
208	}
209	// SECONDARY STRUCTURE
210	// helix
211	//HELIX 1 1 LYS A 17 LEU A 26 1
212	// helix ser beg res ser end res ser
213	p = Pattern.compile("^HELIX..(...).{9}"+chainCodeStr+".(....).{6}"+chainCodeStr+".(....)");
214	m = p.matcher(line);
215	if (m.find()){
216	int serial = Integer.valueOf(m.group(1).trim());
217	int beg = Integer.valueOf(m.group(2).trim());
218	int end = Integer.valueOf(m.group(3).trim());
219	String ssId = new Character(SecStrucElement.HELIX).toString()+serial;
220	SecStrucElement ssElem = new SecStrucElement(SecStrucElement.HELIX,beg,end,ssId);
221	secondaryStructure.add(ssElem);
222	}
223	// sheet
224	//SHEET 2 A 5 ILE A 96 THR A 99 -1 N LYS A 98 O THR A 107
225	// strand ser sheet id beg res ser end res ser
226	p = Pattern.compile("^SHEET..(...).(...).{7}"+chainCodeStr+"(....).{6}"+chainCodeStr+"(....)");
227	m = p.matcher(line);
228	if (m.find()){
229	int strandSerial = Integer.valueOf(m.group(1).trim());
230	String sheetId = m.group(2).trim();
231	int beg = Integer.valueOf(m.group(3).trim());
232	int end = Integer.valueOf(m.group(4).trim());
233	String ssId = new Character(SecStrucElement.STRAND).toString()+sheetId+strandSerial;
234	SecStrucElement ssElem = new SecStrucElement(SecStrucElement.STRAND,beg,end,ssId);
235	secondaryStructure.add(ssElem);
236	}
237	// we've stored the sec structure info in the strands2begEnd and sheets2strands maps.
238	// the assignment to resser2secstruct is done when we reach the ATOM lines, see below
239	// turn
240	//TURN 1 S1A GLY A 16 GLN A 18 SURFACE
241	// turn ser beg res ser end res ser
242	p = Pattern.compile("^TURN...(...).{9}"+chainCodeStr+"(....).{6}"+chainCodeStr+"(....)");
243	m = p.matcher(line);
244	if (m.find()){
245	int serial = Integer.valueOf(m.group(1).trim());
246	int beg = Integer.valueOf(m.group(2).trim());
247	int end = Integer.valueOf(m.group(3).trim());
248	String ssId = new Character(SecStrucElement.TURN).toString()+serial;
249	SecStrucElement ssElem = new SecStrucElement(SecStrucElement.TURN,beg,end,ssId);
250	secondaryStructure.add(ssElem);
251	}
252	// MODEL
253	p = Pattern.compile("^MODEL\\s+(\\d+)");
254	m = p.matcher(line);
255	if (m.find()){
256	thismodel=Integer.parseInt(m.group(1));
257	}
258	if (thismodel!=model) continue; // we skip reading of atom lines if we are not in the desired model
259	// ATOM
260	p = Pattern.compile("^ATOM");
261	m = p.matcher(line);
262	if (m.find()){
263	// serial atom res_type chain res_ser x y z
264	Pattern pl = Pattern.compile("^.{6}(.....).{2}(...).{1}(...).{1}"+chainCodeStr+"(.{4}).{4}(.{8})(.{8})(.{8})",Pattern.CASE_INSENSITIVE);
265	Matcher ml = pl.matcher(line);
266	if (ml.find()) {
267	empty=false;
268	int atomserial=Integer.parseInt(ml.group(1).trim());
269	String atom = ml.group(2).trim();
270	String res_type = ml.group(3).trim();
271	int res_serial = Integer.parseInt(ml.group(4).trim());
272	double x = Double.parseDouble(ml.group(5).trim());
273	double y = Double.parseDouble(ml.group(6).trim());
274	double z = Double.parseDouble(ml.group(7).trim());
275	Point3d coords = new Point3d(x,y,z);
276	if (AAinfo.isValidAA(res_type)) {
277	atomser2coord.put(atomserial, coords);
278	atomser2resser.put(atomserial, res_serial);
279	resser2restype.put(res_serial, res_type);
280	if (AAinfo.isValidAtomWithOXT(res_type,atom)){
281	resser_atom2atomserial.put(res_serial+"_"+atom, atomserial);
282	}
283	}
284	}
285	}
286	}
287	fpdb.close();
288	if (empty) {
289	//System.err.println("Couldn't find any atom line for given pdbChainCode: "+pdbChainCode+", model: "+model);
290	throw new PdbChainCodeNotFoundError("Couldn't find any ATOM line for given pdbChainCode: "+pdbChainCode+", model: "+model);
291	}
292	if (sequence.equals("")){
293	// if we couldn't read anything from SEQRES then we read it from the resser2restype HashMap
294	// NOTE: we must make sure elsewhere that there are no unobserved residues, we can't check that here!
295	ArrayList<Integer> ressers = new ArrayList<Integer>();
296	for (int resser:resser2restype.keySet()) {
297	ressers.add(resser);
298	}
299	Collections.sort(ressers);
300	for (int resser:ressers){
301	String oneletter = AAinfo.threeletter2oneletter(resser2restype.get(resser));
302	sequence += oneletter;
303	}
304	// not size but maximum: if residue numbering in pdb file is correct, then this takes care of non-observed except for possible non-observed at end of chain
305	fullLength = Collections.max(resser2restype.keySet());
306	} else { // we read the sequence from SEQRES
307	if( sequence.length() < Collections.max(resser2restype.keySet())) {
308	throw new PdbfileFormatError("Last residue serial in ATOM lines is bigger than SEQRES length!");
309	}
310	fullLength = sequence.length();
311	}
312	}
313
314	}