ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/proteinstructure/FileGraph.java
Revision: 326
Committed: Thu Sep 20 14:49:55 2007 UTC (17 years ago) by duarte
File size: 4876 byte(s)
Log Message:
Removed class AA and replace it by AAinfo, which reads contact types from separate file contactTypes.dat
New class ContactType which contains atoms for each contact type and residue. A static object for each contact type is loaded into AAinfo upon reading the contactTypes.dat file
Changed all references accordingly
Line User Rev File contents
1 duarte 207 package proteinstructure;
2    
3     import java.io.BufferedReader;
4     import java.io.File;
5     import java.io.FileNotFoundException;
6     import java.io.FileReader;
7     import java.io.IOException;
8     import java.util.TreeMap;
9     import java.util.regex.Matcher;
10     import java.util.regex.Pattern;
11    
12     /**
13     * A residue interaction graph derived from a single chain pdb protein structure loaded from a graph file in aglappe's format
14     *
15     * @author Jose Duarte
16     * Class: FileGraph
17     * Package: proteinstructure
18     */
19     public class FileGraph extends Graph {
20    
21 duarte 284 private static double DEFAULT_WEIGHT = 1.0;
22 duarte 207
23     /**
24     * Constructs Graph object by reading a file with contacts
25     * If the contacts file doesn't have the sequence then the graph object won't have sequence or nodes
26     * That means it won't be possible to get a ContactMap from it using getCM because CM needs both sequence and nodes
27     * @param contactsfile
28     * @throws IOException
29     * @throws FileNotFoundException
30 duarte 208 * @throws GraphFileFormatError
31 duarte 207 */
32 duarte 208 public FileGraph (String contactsfile) throws IOException, FileNotFoundException, GraphFileFormatError{
33 duarte 207 // we set the sequence to blank when we read from file as we don't have the full sequence
34     // if sequence is present in contactsfile then is read from there
35     this.sequence="";
36     this.ct="";
37     this.cutoff=0.0;
38     // we initialise pdbCode, chainCode and pdbChainCode to empty strings in case the file doesn't specify then
39     this.pdbCode="";
40     this.chainCode="";
41     this.pdbChainCode="";
42 duarte 208 this.directed=false;
43    
44     read_graph_from_file(contactsfile); // initialises contacts, and nodes (only if sequence is given)
45    
46 duarte 207 if (ct.contains("/")){
47     directed=true;
48     }
49 duarte 208
50 duarte 207 if (!sequence.equals("")){
51     this.fullLength=sequence.length();
52     this.obsLength=nodes.size();
53     } else {
54     // if contacts have correct residue numbering then this should get the right full length up to the maximum node that makes a contact,
55     // we will miss: nodes without contacts at the end of sequence and gaps (unobserved residues) at the end of the sequence.
56     // We don't know more without nodes and sequence
57     this.fullLength=contacts.getMaxNode();
58     // in this case nodes has not been initialised so we set obsLength=fullLength as we don't have the information
59     this.obsLength=fullLength;
60     }
61     this.numContacts=contacts.size();
62     this.modified=false;
63     }
64    
65 duarte 208 private void read_graph_from_file (String contactsfile) throws FileNotFoundException, IOException, GraphFileFormatError {
66 duarte 234 contacts = new EdgeSet();
67 duarte 284 weights = new TreeMap<Edge, Double>();
68 duarte 207 //System.out.println("Reading contacts from file "+contactsfile);
69     BufferedReader fcont = new BufferedReader(new FileReader(new File(contactsfile)));
70 duarte 209 int linecount=0;
71 duarte 207 String line;
72     while ((line = fcont.readLine() ) != null ) {
73 duarte 209 linecount++;
74 duarte 211 Pattern p = Pattern.compile("^#AGLAPPE.*ver: (\\d\\.\\d)");
75 duarte 207 Matcher m = p.matcher(line);
76     if (m.find()){
77 duarte 211 if (!m.group(1).equals(GRAPHFILEFORMATVERSION)){
78     throw new GraphFileFormatError("The graph file "+contactsfile+" can't be read, wrong file format version. Supported version is "+GRAPHFILEFORMATVERSION+" and found version was "+m.group(1));
79 duarte 208 }
80 duarte 211 } else if (linecount==1){ // #AGLAPPE not found and in first line
81     throw new GraphFileFormatError("The graph file "+contactsfile+" can't be read, wrong file format");
82 duarte 207 }
83 duarte 211 Pattern ps = Pattern.compile("^#SEQUENCE:\\s*(\\w+)$");
84     Matcher ms = ps.matcher(line);
85     if (ms.find()){
86     sequence=ms.group(1);
87     }
88     ps = Pattern.compile("^#PDB:\\s*(\\w+)");
89     ms = ps.matcher(line);
90     if (ms.find()){
91     pdbCode=ms.group(1);
92     }
93     ps = Pattern.compile("^#PDB CHAIN CODE:\\s*(\\w+)");
94     ms = ps.matcher(line);
95     if (ms.find()){
96     pdbChainCode=ms.group(1);
97     }
98     ps = Pattern.compile("^#CHAIN:\\s*(\\w)");
99     ms = ps.matcher(line);
100     if (ms.find()){
101     chainCode=ms.group(1);
102     }
103     ps = Pattern.compile("^#CT:\\s*([a-zA-Z/]+)");
104     ms = ps.matcher(line);
105     if (ms.find()){
106     ct=ms.group(1);
107     }
108     ps = Pattern.compile("^#CUTOFF:\\s*(\\d+\\.\\d+)");
109     ms = ps.matcher(line);
110     if (ms.find()){
111     cutoff=Double.parseDouble(ms.group(1));
112     }
113    
114 duarte 284 Pattern pcontact = Pattern.compile("^\\s*(\\d+)\\s+(\\d+)(?:\\s+(\\d+\\.\\d+))?\\s*$");
115 duarte 211 Matcher mcontact = pcontact.matcher(line);
116     if (mcontact.find()){
117 duarte 284 int i = Integer.valueOf(mcontact.group(1));
118     int j = Integer.valueOf(mcontact.group(2));
119     double weight = DEFAULT_WEIGHT;
120     if (mcontact.group(3)!=null) {
121     weight = Double.valueOf(mcontact.group(3));
122     }
123     Edge cont = new Edge(i,j);
124     contacts.add(cont);
125     weights.put(cont,weight);
126 duarte 207 }
127 duarte 211
128 duarte 207 }
129     fcont.close();
130     // if sequence was given we take nodes from it
131     nodes = new TreeMap<Integer, String>();
132     for (int i=0;i<sequence.length();i++){
133     String letter = String.valueOf(sequence.charAt(i));
134 duarte 326 nodes.put(i+1, AAinfo.oneletter2threeletter(letter));
135 duarte 207 }
136    
137     }
138    
139     }