ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/FileGraph.java
Revision: 208
Committed: Wed Jun 27 14:42:12 2007 UTC (17 years, 3 months ago) by duarte
File size: 4300 byte(s)
Log Message:
FIXED BUGS:
- directed was not set when reading from cm file
- pdbChainCode not set when reading from db given pdbCode
- filling pdbresser2resser and resser2pdbresser hashmaps also in reading from pdb file
- using "A" as chainCode when reading from pdb file
- some error handling for file formats: new exception classes GraphFileFormatError and PdbfileFormatError
Line User Rev File contents
1 duarte 207 package proteinstructure;
2    
3     import java.io.BufferedReader;
4     import java.io.File;
5     import java.io.FileNotFoundException;
6     import java.io.FileReader;
7     import java.io.IOException;
8     import java.util.TreeMap;
9     import java.util.regex.Matcher;
10     import java.util.regex.Pattern;
11    
12     /**
13     * A residue interaction graph derived from a single chain pdb protein structure loaded from a graph file in aglappe's format
14     *
15     * @author Jose Duarte
16     * Class: FileGraph
17     * Package: proteinstructure
18     */
19     public class FileGraph extends Graph {
20    
21    
22     /**
23     * Constructs Graph object by reading a file with contacts
24     * If the contacts file doesn't have the sequence then the graph object won't have sequence or nodes
25     * That means it won't be possible to get a ContactMap from it using getCM because CM needs both sequence and nodes
26     * @param contactsfile
27     * @throws IOException
28     * @throws FileNotFoundException
29 duarte 208 * @throws GraphFileFormatError
30 duarte 207 */
31 duarte 208 public FileGraph (String contactsfile) throws IOException, FileNotFoundException, GraphFileFormatError{
32 duarte 207 // we set the sequence to blank when we read from file as we don't have the full sequence
33     // if sequence is present in contactsfile then is read from there
34     this.sequence="";
35     this.ct="";
36     this.cutoff=0.0;
37     // we initialise pdbCode, chainCode and pdbChainCode to empty strings in case the file doesn't specify then
38     this.pdbCode="";
39     this.chainCode="";
40     this.pdbChainCode="";
41 duarte 208 this.directed=false;
42    
43     read_graph_from_file(contactsfile); // initialises contacts, and nodes (only if sequence is given)
44    
45 duarte 207 if (ct.contains("/")){
46     directed=true;
47     }
48 duarte 208
49 duarte 207 if (!sequence.equals("")){
50     this.fullLength=sequence.length();
51     this.obsLength=nodes.size();
52     } else {
53     // if contacts have correct residue numbering then this should get the right full length up to the maximum node that makes a contact,
54     // we will miss: nodes without contacts at the end of sequence and gaps (unobserved residues) at the end of the sequence.
55     // We don't know more without nodes and sequence
56     this.fullLength=contacts.getMaxNode();
57     // in this case nodes has not been initialised so we set obsLength=fullLength as we don't have the information
58     this.obsLength=fullLength;
59     }
60     this.numContacts=contacts.size();
61     this.modified=false;
62     }
63    
64 duarte 208 private void read_graph_from_file (String contactsfile) throws FileNotFoundException, IOException, GraphFileFormatError {
65 duarte 207 contacts = new ContactList();
66     //System.out.println("Reading contacts from file "+contactsfile);
67     BufferedReader fcont = new BufferedReader(new FileReader(new File(contactsfile)));
68     String line;
69     while ((line = fcont.readLine() ) != null ) {
70     Pattern p = Pattern.compile("^#");
71     Matcher m = p.matcher(line);
72     if (m.find()){
73 duarte 208 Pattern ps = Pattern.compile("^#AGLAPPE.*ver: (\\d\\.\\d)");
74 duarte 207 Matcher ms = ps.matcher(line);
75     if (ms.find()){
76 duarte 208 if (!ms.group(1).equals(GRAPHFILEFORMATVERSION)){
77     throw new GraphFileFormatError("The graph file "+contactsfile+" can't be read, wrong file format version");
78     }
79     }
80     ps = Pattern.compile("^#SEQUENCE:\\s*(\\w+)$");
81     ms = ps.matcher(line);
82     if (ms.find()){
83 duarte 207 sequence=ms.group(1);
84     }
85     ps = Pattern.compile("^#PDB:\\s*(\\w+)");
86     ms = ps.matcher(line);
87     if (ms.find()){
88     pdbCode=ms.group(1);
89     }
90     ps = Pattern.compile("^#PDB CHAIN CODE:\\s*(\\w)");
91     ms = ps.matcher(line);
92     if (ms.find()){
93     pdbChainCode=ms.group(1);
94     }
95     ps = Pattern.compile("^#CHAIN:\\s*(\\w)");
96     ms = ps.matcher(line);
97     if (ms.find()){
98     chainCode=ms.group(1);
99     }
100     ps = Pattern.compile("^#CT:\\s*([a-zA-Z/]+)");
101     ms = ps.matcher(line);
102     if (ms.find()){
103     ct=ms.group(1);
104     }
105     ps = Pattern.compile("^#CUTOFF:\\s*(\\d+\\.\\d+)");
106     ms = ps.matcher(line);
107     if (ms.find()){
108     cutoff=Double.parseDouble(ms.group(1));
109     }
110     }
111     else{
112     int i = Integer.parseInt(line.split("\\s+")[0]);
113     int j = Integer.parseInt(line.split("\\s+")[1]);
114     contacts.add(new Contact(i,j));
115     }
116     }
117     fcont.close();
118     // if sequence was given we take nodes from it
119     nodes = new TreeMap<Integer, String>();
120     for (int i=0;i<sequence.length();i++){
121     String letter = String.valueOf(sequence.charAt(i));
122     nodes.put(i+1, AA.oneletter2threeletter(letter));
123     }
124    
125     }
126    
127     }