# -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Copyright (C) 2007 Author: Martha Nason # This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. # This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- #This is a function to create files of sets of genes where the lengths are specified as a vector (m) # and the probabilities of each letter is given by pA, pC, pT, or pG Gene.cluster.null <- function(m,nGenes=length(m),pACTG,length=max(m), filename="c:\\searchpattool\\sampletest.txt"){ temp <- matrix(" ",nrow=nGenes,ncol=length) for (i in 1:nGenes) { write(paste(">seq",i,sep=""),file=filename,append=i!=1) if (is.vector(pACTG)){ temp[i,1:m[i]] <- sample(rep(c("A","C","T","G"),times=rmultinom(1,m[i],pACTG)))} else if (is.matrix(pACTG)){ print("UNDER CONSTRUCTION-- use vector of probabilities for now"); break } write.table(temp[i,], file = filename, append = TRUE, quote = FALSE, sep = "", eol = "", na = "NA", dec = ".", row.names = FALSE, col.names = FALSE, qmethod = c("escape", "double")) write("",file=filename,append=TRUE) } } # Example Call #Gene.cluster.null(m=c(10,8,6,5,12),pACTG=c(.4,.1,.3,.2)) #This function allows you to loop a specified number of times (1000) # and call the above function, creating separate files for each call. Create.multiple.Files<- function(nFiles=1000,m, pACTG,filename.root){ for (j in 1:nFiles){ Gene.cluster.null(m=m,pACTG=pACTG,filename=paste(filename.root,j,".txt",sep="")) } } #Examples Call #1 Create.multiple.Files(3,m=c(10,8,6,5,12),pACTG=c(.4,.1,.3,.2),filename.root="C:\\searchpattool\\sample") #create 3 random files: sample1.txt, sample2.txt and sample3.txt. Each file has 5 sequences with the background probabilties 0.4, 0.1, 0.3 and 0.2. The length of the sequences are # respectively 10, 8, 6 ,5 and 12 #2 Create.multiple.Files(1000,m=c(197,264,195,218,206,451,332,245,245,448,230,451,434,241,172,267,171,207,451,451,161),pACTG=c(.328008,.167476,.193845,.310671),filename.root="C:\\searchpattool\\sample") #create 1000 random files: sample1.txt, sample2.txt ....and sample1000.txt. Each file has 21 sequences with the background probabilties 0.328008,0.167476,0.193845 and 0.310671. The length # of the sequences are respectively 197,264,195,218,206,451,332,245,245,448,230,451,434,241,172,267,171,207,451,451 and 161.