1 |
duarte |
207 |
package proteinstructure; |
2 |
|
|
|
3 |
|
|
import java.io.BufferedReader; |
4 |
|
|
import java.io.File; |
5 |
|
|
import java.io.FileNotFoundException; |
6 |
|
|
import java.io.FileReader; |
7 |
|
|
import java.io.IOException; |
8 |
|
|
import java.util.TreeMap; |
9 |
|
|
import java.util.regex.Matcher; |
10 |
|
|
import java.util.regex.Pattern; |
11 |
|
|
|
12 |
|
|
/** |
13 |
|
|
* A residue interaction graph derived from a single chain pdb protein structure loaded from a graph file in aglappe's format |
14 |
|
|
* |
15 |
|
|
* @author Jose Duarte |
16 |
|
|
* Class: FileGraph |
17 |
|
|
* Package: proteinstructure |
18 |
|
|
*/ |
19 |
|
|
public class FileGraph extends Graph { |
20 |
|
|
|
21 |
|
|
|
22 |
|
|
/** |
23 |
|
|
* Constructs Graph object by reading a file with contacts |
24 |
|
|
* If the contacts file doesn't have the sequence then the graph object won't have sequence or nodes |
25 |
|
|
* That means it won't be possible to get a ContactMap from it using getCM because CM needs both sequence and nodes |
26 |
|
|
* @param contactsfile |
27 |
|
|
* @throws IOException |
28 |
|
|
* @throws FileNotFoundException |
29 |
duarte |
208 |
* @throws GraphFileFormatError |
30 |
duarte |
207 |
*/ |
31 |
duarte |
208 |
public FileGraph (String contactsfile) throws IOException, FileNotFoundException, GraphFileFormatError{ |
32 |
duarte |
207 |
// we set the sequence to blank when we read from file as we don't have the full sequence |
33 |
|
|
// if sequence is present in contactsfile then is read from there |
34 |
|
|
this.sequence=""; |
35 |
|
|
this.ct=""; |
36 |
|
|
this.cutoff=0.0; |
37 |
|
|
// we initialise pdbCode, chainCode and pdbChainCode to empty strings in case the file doesn't specify then |
38 |
|
|
this.pdbCode=""; |
39 |
|
|
this.chainCode=""; |
40 |
|
|
this.pdbChainCode=""; |
41 |
duarte |
208 |
this.directed=false; |
42 |
|
|
|
43 |
|
|
read_graph_from_file(contactsfile); // initialises contacts, and nodes (only if sequence is given) |
44 |
|
|
|
45 |
duarte |
207 |
if (ct.contains("/")){ |
46 |
|
|
directed=true; |
47 |
|
|
} |
48 |
duarte |
208 |
|
49 |
duarte |
207 |
if (!sequence.equals("")){ |
50 |
|
|
this.fullLength=sequence.length(); |
51 |
|
|
this.obsLength=nodes.size(); |
52 |
|
|
} else { |
53 |
|
|
// if contacts have correct residue numbering then this should get the right full length up to the maximum node that makes a contact, |
54 |
|
|
// we will miss: nodes without contacts at the end of sequence and gaps (unobserved residues) at the end of the sequence. |
55 |
|
|
// We don't know more without nodes and sequence |
56 |
|
|
this.fullLength=contacts.getMaxNode(); |
57 |
|
|
// in this case nodes has not been initialised so we set obsLength=fullLength as we don't have the information |
58 |
|
|
this.obsLength=fullLength; |
59 |
|
|
} |
60 |
|
|
this.numContacts=contacts.size(); |
61 |
|
|
this.modified=false; |
62 |
|
|
} |
63 |
|
|
|
64 |
duarte |
208 |
private void read_graph_from_file (String contactsfile) throws FileNotFoundException, IOException, GraphFileFormatError { |
65 |
duarte |
234 |
contacts = new EdgeSet(); |
66 |
duarte |
207 |
//System.out.println("Reading contacts from file "+contactsfile); |
67 |
|
|
BufferedReader fcont = new BufferedReader(new FileReader(new File(contactsfile))); |
68 |
duarte |
209 |
int linecount=0; |
69 |
duarte |
207 |
String line; |
70 |
|
|
while ((line = fcont.readLine() ) != null ) { |
71 |
duarte |
209 |
linecount++; |
72 |
duarte |
211 |
Pattern p = Pattern.compile("^#AGLAPPE.*ver: (\\d\\.\\d)"); |
73 |
duarte |
207 |
Matcher m = p.matcher(line); |
74 |
|
|
if (m.find()){ |
75 |
duarte |
211 |
if (!m.group(1).equals(GRAPHFILEFORMATVERSION)){ |
76 |
|
|
throw new GraphFileFormatError("The graph file "+contactsfile+" can't be read, wrong file format version. Supported version is "+GRAPHFILEFORMATVERSION+" and found version was "+m.group(1)); |
77 |
duarte |
208 |
} |
78 |
duarte |
211 |
} else if (linecount==1){ // #AGLAPPE not found and in first line |
79 |
|
|
throw new GraphFileFormatError("The graph file "+contactsfile+" can't be read, wrong file format"); |
80 |
duarte |
207 |
} |
81 |
duarte |
211 |
Pattern ps = Pattern.compile("^#SEQUENCE:\\s*(\\w+)$"); |
82 |
|
|
Matcher ms = ps.matcher(line); |
83 |
|
|
if (ms.find()){ |
84 |
|
|
sequence=ms.group(1); |
85 |
|
|
} |
86 |
|
|
ps = Pattern.compile("^#PDB:\\s*(\\w+)"); |
87 |
|
|
ms = ps.matcher(line); |
88 |
|
|
if (ms.find()){ |
89 |
|
|
pdbCode=ms.group(1); |
90 |
|
|
} |
91 |
|
|
ps = Pattern.compile("^#PDB CHAIN CODE:\\s*(\\w+)"); |
92 |
|
|
ms = ps.matcher(line); |
93 |
|
|
if (ms.find()){ |
94 |
|
|
pdbChainCode=ms.group(1); |
95 |
|
|
} |
96 |
|
|
ps = Pattern.compile("^#CHAIN:\\s*(\\w)"); |
97 |
|
|
ms = ps.matcher(line); |
98 |
|
|
if (ms.find()){ |
99 |
|
|
chainCode=ms.group(1); |
100 |
|
|
} |
101 |
|
|
ps = Pattern.compile("^#CT:\\s*([a-zA-Z/]+)"); |
102 |
|
|
ms = ps.matcher(line); |
103 |
|
|
if (ms.find()){ |
104 |
|
|
ct=ms.group(1); |
105 |
|
|
} |
106 |
|
|
ps = Pattern.compile("^#CUTOFF:\\s*(\\d+\\.\\d+)"); |
107 |
|
|
ms = ps.matcher(line); |
108 |
|
|
if (ms.find()){ |
109 |
|
|
cutoff=Double.parseDouble(ms.group(1)); |
110 |
|
|
} |
111 |
|
|
|
112 |
duarte |
212 |
Pattern pcontact = Pattern.compile("^\\s*\\d+\\s+\\d+\\s*$"); |
113 |
duarte |
211 |
Matcher mcontact = pcontact.matcher(line); |
114 |
|
|
if (mcontact.find()){ |
115 |
duarte |
207 |
int i = Integer.parseInt(line.split("\\s+")[0]); |
116 |
|
|
int j = Integer.parseInt(line.split("\\s+")[1]); |
117 |
duarte |
234 |
contacts.add(new Edge(i,j)); |
118 |
duarte |
207 |
} |
119 |
duarte |
211 |
|
120 |
duarte |
207 |
} |
121 |
|
|
fcont.close(); |
122 |
|
|
// if sequence was given we take nodes from it |
123 |
|
|
nodes = new TreeMap<Integer, String>(); |
124 |
|
|
for (int i=0;i<sequence.length();i++){ |
125 |
|
|
String letter = String.valueOf(sequence.charAt(i)); |
126 |
|
|
nodes.put(i+1, AA.oneletter2threeletter(letter)); |
127 |
|
|
} |
128 |
|
|
|
129 |
|
|
} |
130 |
|
|
|
131 |
|
|
} |