ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/FileGraph.java
Revision: 209
Committed: Wed Jun 27 15:14:31 2007 UTC (17 years, 3 months ago) by duarte
File size: 4511 byte(s)
Log Message:
Throwing exception also in case graph file not in right format at all (no #AGLAPPE in 1st line)
Fixed bug: was not reading correctly #PDB CHAIN CODE
Line User Rev File contents
1 duarte 207 package proteinstructure;
2    
3     import java.io.BufferedReader;
4     import java.io.File;
5     import java.io.FileNotFoundException;
6     import java.io.FileReader;
7     import java.io.IOException;
8     import java.util.TreeMap;
9     import java.util.regex.Matcher;
10     import java.util.regex.Pattern;
11    
12     /**
13     * A residue interaction graph derived from a single chain pdb protein structure loaded from a graph file in aglappe's format
14     *
15     * @author Jose Duarte
16     * Class: FileGraph
17     * Package: proteinstructure
18     */
19     public class FileGraph extends Graph {
20    
21    
22     /**
23     * Constructs Graph object by reading a file with contacts
24     * If the contacts file doesn't have the sequence then the graph object won't have sequence or nodes
25     * That means it won't be possible to get a ContactMap from it using getCM because CM needs both sequence and nodes
26     * @param contactsfile
27     * @throws IOException
28     * @throws FileNotFoundException
29 duarte 208 * @throws GraphFileFormatError
30 duarte 207 */
31 duarte 208 public FileGraph (String contactsfile) throws IOException, FileNotFoundException, GraphFileFormatError{
32 duarte 207 // we set the sequence to blank when we read from file as we don't have the full sequence
33     // if sequence is present in contactsfile then is read from there
34     this.sequence="";
35     this.ct="";
36     this.cutoff=0.0;
37     // we initialise pdbCode, chainCode and pdbChainCode to empty strings in case the file doesn't specify then
38     this.pdbCode="";
39     this.chainCode="";
40     this.pdbChainCode="";
41 duarte 208 this.directed=false;
42    
43     read_graph_from_file(contactsfile); // initialises contacts, and nodes (only if sequence is given)
44    
45 duarte 207 if (ct.contains("/")){
46     directed=true;
47     }
48 duarte 208
49 duarte 207 if (!sequence.equals("")){
50     this.fullLength=sequence.length();
51     this.obsLength=nodes.size();
52     } else {
53     // if contacts have correct residue numbering then this should get the right full length up to the maximum node that makes a contact,
54     // we will miss: nodes without contacts at the end of sequence and gaps (unobserved residues) at the end of the sequence.
55     // We don't know more without nodes and sequence
56     this.fullLength=contacts.getMaxNode();
57     // in this case nodes has not been initialised so we set obsLength=fullLength as we don't have the information
58     this.obsLength=fullLength;
59     }
60     this.numContacts=contacts.size();
61     this.modified=false;
62     }
63    
64 duarte 208 private void read_graph_from_file (String contactsfile) throws FileNotFoundException, IOException, GraphFileFormatError {
65 duarte 207 contacts = new ContactList();
66     //System.out.println("Reading contacts from file "+contactsfile);
67     BufferedReader fcont = new BufferedReader(new FileReader(new File(contactsfile)));
68 duarte 209 int linecount=0;
69 duarte 207 String line;
70     while ((line = fcont.readLine() ) != null ) {
71 duarte 209 linecount++;
72 duarte 207 Pattern p = Pattern.compile("^#");
73     Matcher m = p.matcher(line);
74     if (m.find()){
75 duarte 208 Pattern ps = Pattern.compile("^#AGLAPPE.*ver: (\\d\\.\\d)");
76 duarte 207 Matcher ms = ps.matcher(line);
77     if (ms.find()){
78 duarte 208 if (!ms.group(1).equals(GRAPHFILEFORMATVERSION)){
79     throw new GraphFileFormatError("The graph file "+contactsfile+" can't be read, wrong file format version");
80     }
81 duarte 209 } else if (linecount==1){ // #AGLAPPE not found and in first line
82     throw new GraphFileFormatError("The graph file "+contactsfile+" can't be read, wrong file format");
83 duarte 208 }
84     ps = Pattern.compile("^#SEQUENCE:\\s*(\\w+)$");
85     ms = ps.matcher(line);
86     if (ms.find()){
87 duarte 207 sequence=ms.group(1);
88     }
89     ps = Pattern.compile("^#PDB:\\s*(\\w+)");
90     ms = ps.matcher(line);
91     if (ms.find()){
92     pdbCode=ms.group(1);
93     }
94 duarte 209 ps = Pattern.compile("^#PDB CHAIN CODE:\\s*(\\w+)");
95 duarte 207 ms = ps.matcher(line);
96     if (ms.find()){
97     pdbChainCode=ms.group(1);
98     }
99     ps = Pattern.compile("^#CHAIN:\\s*(\\w)");
100     ms = ps.matcher(line);
101     if (ms.find()){
102     chainCode=ms.group(1);
103     }
104     ps = Pattern.compile("^#CT:\\s*([a-zA-Z/]+)");
105     ms = ps.matcher(line);
106     if (ms.find()){
107     ct=ms.group(1);
108     }
109     ps = Pattern.compile("^#CUTOFF:\\s*(\\d+\\.\\d+)");
110     ms = ps.matcher(line);
111     if (ms.find()){
112     cutoff=Double.parseDouble(ms.group(1));
113     }
114     }
115     else{
116     int i = Integer.parseInt(line.split("\\s+")[0]);
117     int j = Integer.parseInt(line.split("\\s+")[1]);
118     contacts.add(new Contact(i,j));
119     }
120     }
121     fcont.close();
122     // if sequence was given we take nodes from it
123     nodes = new TreeMap<Integer, String>();
124     for (int i=0;i<sequence.length();i++){
125     String letter = String.valueOf(sequence.charAt(i));
126     nodes.put(i+1, AA.oneletter2threeletter(letter));
127     }
128    
129     }
130    
131     }