ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/FileGraph.java
Revision: 234
Committed: Tue Jul 10 14:14:58 2007 UTC (17 years, 2 months ago) by duarte
File size: 4592 byte(s)
Log Message:
Made ContactList a TreeSet instead of an ArrayList, should improve performance (log(n) rather than linear)
REFACTORING: 
ContactList -> EdgeSet
Contact -> Edge
Line User Rev File contents
1 duarte 207 package proteinstructure;
2    
3     import java.io.BufferedReader;
4     import java.io.File;
5     import java.io.FileNotFoundException;
6     import java.io.FileReader;
7     import java.io.IOException;
8     import java.util.TreeMap;
9     import java.util.regex.Matcher;
10     import java.util.regex.Pattern;
11    
12     /**
13     * A residue interaction graph derived from a single chain pdb protein structure loaded from a graph file in aglappe's format
14     *
15     * @author Jose Duarte
16     * Class: FileGraph
17     * Package: proteinstructure
18     */
19     public class FileGraph extends Graph {
20    
21    
22     /**
23     * Constructs Graph object by reading a file with contacts
24     * If the contacts file doesn't have the sequence then the graph object won't have sequence or nodes
25     * That means it won't be possible to get a ContactMap from it using getCM because CM needs both sequence and nodes
26     * @param contactsfile
27     * @throws IOException
28     * @throws FileNotFoundException
29 duarte 208 * @throws GraphFileFormatError
30 duarte 207 */
31 duarte 208 public FileGraph (String contactsfile) throws IOException, FileNotFoundException, GraphFileFormatError{
32 duarte 207 // we set the sequence to blank when we read from file as we don't have the full sequence
33     // if sequence is present in contactsfile then is read from there
34     this.sequence="";
35     this.ct="";
36     this.cutoff=0.0;
37     // we initialise pdbCode, chainCode and pdbChainCode to empty strings in case the file doesn't specify then
38     this.pdbCode="";
39     this.chainCode="";
40     this.pdbChainCode="";
41 duarte 208 this.directed=false;
42    
43     read_graph_from_file(contactsfile); // initialises contacts, and nodes (only if sequence is given)
44    
45 duarte 207 if (ct.contains("/")){
46     directed=true;
47     }
48 duarte 208
49 duarte 207 if (!sequence.equals("")){
50     this.fullLength=sequence.length();
51     this.obsLength=nodes.size();
52     } else {
53     // if contacts have correct residue numbering then this should get the right full length up to the maximum node that makes a contact,
54     // we will miss: nodes without contacts at the end of sequence and gaps (unobserved residues) at the end of the sequence.
55     // We don't know more without nodes and sequence
56     this.fullLength=contacts.getMaxNode();
57     // in this case nodes has not been initialised so we set obsLength=fullLength as we don't have the information
58     this.obsLength=fullLength;
59     }
60     this.numContacts=contacts.size();
61     this.modified=false;
62     }
63    
64 duarte 208 private void read_graph_from_file (String contactsfile) throws FileNotFoundException, IOException, GraphFileFormatError {
65 duarte 234 contacts = new EdgeSet();
66 duarte 207 //System.out.println("Reading contacts from file "+contactsfile);
67     BufferedReader fcont = new BufferedReader(new FileReader(new File(contactsfile)));
68 duarte 209 int linecount=0;
69 duarte 207 String line;
70     while ((line = fcont.readLine() ) != null ) {
71 duarte 209 linecount++;
72 duarte 211 Pattern p = Pattern.compile("^#AGLAPPE.*ver: (\\d\\.\\d)");
73 duarte 207 Matcher m = p.matcher(line);
74     if (m.find()){
75 duarte 211 if (!m.group(1).equals(GRAPHFILEFORMATVERSION)){
76     throw new GraphFileFormatError("The graph file "+contactsfile+" can't be read, wrong file format version. Supported version is "+GRAPHFILEFORMATVERSION+" and found version was "+m.group(1));
77 duarte 208 }
78 duarte 211 } else if (linecount==1){ // #AGLAPPE not found and in first line
79     throw new GraphFileFormatError("The graph file "+contactsfile+" can't be read, wrong file format");
80 duarte 207 }
81 duarte 211 Pattern ps = Pattern.compile("^#SEQUENCE:\\s*(\\w+)$");
82     Matcher ms = ps.matcher(line);
83     if (ms.find()){
84     sequence=ms.group(1);
85     }
86     ps = Pattern.compile("^#PDB:\\s*(\\w+)");
87     ms = ps.matcher(line);
88     if (ms.find()){
89     pdbCode=ms.group(1);
90     }
91     ps = Pattern.compile("^#PDB CHAIN CODE:\\s*(\\w+)");
92     ms = ps.matcher(line);
93     if (ms.find()){
94     pdbChainCode=ms.group(1);
95     }
96     ps = Pattern.compile("^#CHAIN:\\s*(\\w)");
97     ms = ps.matcher(line);
98     if (ms.find()){
99     chainCode=ms.group(1);
100     }
101     ps = Pattern.compile("^#CT:\\s*([a-zA-Z/]+)");
102     ms = ps.matcher(line);
103     if (ms.find()){
104     ct=ms.group(1);
105     }
106     ps = Pattern.compile("^#CUTOFF:\\s*(\\d+\\.\\d+)");
107     ms = ps.matcher(line);
108     if (ms.find()){
109     cutoff=Double.parseDouble(ms.group(1));
110     }
111    
112 duarte 212 Pattern pcontact = Pattern.compile("^\\s*\\d+\\s+\\d+\\s*$");
113 duarte 211 Matcher mcontact = pcontact.matcher(line);
114     if (mcontact.find()){
115 duarte 207 int i = Integer.parseInt(line.split("\\s+")[0]);
116     int j = Integer.parseInt(line.split("\\s+")[1]);
117 duarte 234 contacts.add(new Edge(i,j));
118 duarte 207 }
119 duarte 211
120 duarte 207 }
121     fcont.close();
122     // if sequence was given we take nodes from it
123     nodes = new TreeMap<Integer, String>();
124     for (int i=0;i<sequence.length();i++){
125     String letter = String.valueOf(sequence.charAt(i));
126     nodes.put(i+1, AA.oneletter2threeletter(letter));
127     }
128    
129     }
130    
131     }