#!/usr/bin/python ## All this code is copyright Ramon Diaz-Uriarte. For security reasons, this is for ## now confidential. No license is granted to copy, distribute, or modify it. ## Once everything is OK, it will be distributed under the GPL. import sys import os import cgi import types import time import shutil import string import signal import re import glob import tarfile import cgitb cgitb.enable() ## zz: eliminar for real work? sys.stderr = sys.stdout ## eliminar? R_MAX_time = 12 * 3600 ## 4 hours is max duration allowd for any process ## For redirections, from Python Cookbook def printPalsURLADaCGH(newDir, application_url = "http://adacgh.bioinfo.cnio.es"): """ Based on Pomelo II's Send_to_Pals.cgi.""" f=open("idtype") idtype = f.read().strip() f.close() f=open("organism") organism = f.read().strip() f.close() if (idtype != "None" and organism != "None"): url_org_id = "org=" + organism + "&idtype=" + idtype + "&" else: url_org_id = "" gl_base = application_url + '/tmp/' + newDir + '/' gl1 = gl_base + 'Lost_for_PaLS.txt' gl2 = gl_base + 'Gained_for_PaLS.txt' gl3 = gl_base + 'Gained_or_Lost_for_PaLS.txt' outstr0 = '

' + \ '

Send results to ' + \ '

' outstr = outstr0 + \ '

Send set of genes with copy number LOSS to PaLS

' + \ '

Send set of genes with copy number GAIN to PaLS

' + \ '

Send set of genes with copy number ALTERATION (either gain or loss) to PaLS

' return(outstr) def pdf2html(rootname, tmpDir, outf, compressedFile, maxsthumb = 350): """ From a multipage pdf obtain jpegs; the thumbnails are inserted in the webpage, the large jpeg viewed on clik. rootname is all the stuff before the `pdf', tmpDir is the directory where the files live, maxsthumb max size of thumbnail and outf the html file. We also decrease the size of the jpeg for showing. Finally, we add the generated jpeg to the compressed file""" mst = str(maxsthumb) mst2 = str(1600) os.chdir(tmpDir) os.system('/usr/bin/pdftoppm ' + rootname + '.pdf tmpppms') tmps = glob.glob('tmpppms*.ppm') for fignum in range(len(tmps)): os.system('/usr/bin/ppmtojpeg ' + tmps[fignum] + ' > ' + rootname + '.' + str(fignum + 1) + '.jpeg') os.system('/usr/bin/convert -size ' + mst + 'x' + mst + ' ' + rootname + '.' + str(fignum + 1) + '.jpeg' + ' -resize ' + mst + 'x' + mst + ' thumb.' + rootname + '.' + str(fignum + 1) + '.jpeg') os.system('/usr/bin/convert ' + rootname + '.' + str(fignum + 1) + '.jpeg' + ' -resize ' + mst2 + 'x' + mst2 + ' ' + rootname + '.' + str(fignum + 1) + '.jpeg') outf.write(' \n') # compressedFile.add(rootname + '.' + str(fignum + 1) + '.jpeg', # rootname + '.' + str(fignum + 1) + '.jpeg') os.chdir('/http/adacgh/cgi') def thumb(tmpDir, fnames, outf, maxsthumb = 350): """ From a set of pngs, obtain thumbnails and add links to html. The thumbnails are inserted in the webpage, the large png viewed on clik. tmpDir is the directory where the files live, fnames is a list with the base file names to process maxsthumb max size of thumbnail and outf the html file. """ mst = str(maxsthumb) os.chdir(tmpDir) for bname in fnames: os.system(''.join(['/usr/bin/convert ', bname, '.png', ' -resize ', mst, 'x', mst, ' thumb.', bname, '.jpeg'])) outf.write(''.join([' ',
	                    bname, ''])) os.chdir('/http/adacgh/cgi') def getQualifiedURL(uri = None): """ Return a full URL starting with schema, servername and port. *uri* -- append this server-rooted uri (must start with a slash) """ schema, stdport = ('http', '80') host = os.environ.get('HTTP_HOST') if not host: host = os.environ.get('SERVER_NAME') port = os.environ.get('SERVER_PORT', '80') if port != stdport: host = host + ":" + port result = "%s://%s" % (schema, host) if uri: result = result + uri return result def getScriptname(): """ Return te scriptname part of the URL.""" return os.environ.get('SCRIPT_NAME', '') # def getPathinfo(): # """ Return the remaining part of the URL. """ # pathinfo = os.environ.get('PATH_INFO', '') # return pathinfo def getBaseURL(): """ Return a fully qualified URL to this script. """ return getQualifiedURL(getScriptname()) def commonOutput(): print "Content-type: text/html\n\n" print """ ADaCGH results """ ## to keep executing myself: def relaunchCGI(): print "Content-type: text/html\n\n" print """ """ print '' print 'ADaCGH results' print ' ' print '

This is an autorefreshing page; your results will eventually be displayed here.\n' print 'If your browser does not autorefresh, the results will be kept for five days at

' print '

', 'http://adacgh.bioinfo.cnio.es/tmp/'+ newDir + '/results.html.' print '

' ## Output-generating functions def printErrorRun(): Rresults = open(tmpDir + "/results.txt") resultsFile = Rresults.read() errormsg = open(tmpDir + "/error.msg").read() outf = open(tmpDir + "/pre-results.html", mode = "w") outf.write("ADaCGH results \n") outf.write("

ERROR: There was a problem with the R code

\n") outf.write("

This could be a bug on our code, or a problem ") outf.write("with your data (that we hadn't tought of). Below is all the output from the execution ") outf.write("of the run. Unless it is obvious to you that this is a fault of your data ") outf.write("(and that there is no way we could have avoided the crash) ") outf.write("please let us know so we can fix the problem. ") outf.write("Please sed us this URL and the output below

") outf.write("

This is the results file:

") outf.write("

")
    outf.write(cgi.escape(resultsFile))
    outf.write("
") outf.write("

And this is the error message:

") outf.write("

")
    outf.write(cgi.escape(errormsg))
    outf.write("
") outf.write("") outf.close() Rresults.close() shutil.copyfile(tmpDir + "/pre-results.html", tmpDir + "/results.html") def printOKRun(): Rresults = open(tmpDir + "/results.txt") resultsFile = Rresults.read() outf = open(tmpDir + "/pre-results.html", mode = "w") outf.write("ADaCGH results\n") if os.path.exists(tmpDir + "/ErrorFigure.png"): outf.write('') outf.write("


") outf.write("
")
        outf.write('

Results (help)

\n') outf.write("


") outf.write(cgi.escape(resultsFile)) outf.write("
") outf.write("") outf.close() Rresults.close() shutil.copyfile(tmpDir + "/pre-results.html", tmpDir + "/results.html") else: outf.write('

ADaCGH Results (help)

\n') outf.write(open(tmpDir + "/results.for.html").read()) outf.write('
') outf.write('
') outf.write('

Genes/clones per chromosome

') outf.write(open(tmpDir + "/clones.per.chrom.html").read()) outf.write('
') outf.write('

Summary statistics before centering

') outf.write(open(tmpDir + "/stats.before.centering.html").read()) outf.write('
') outf.write('

Summary statistics: subject/array by chromosome before centering

') outf.write('

Mean

') outf.write(open(tmpDir + "/stats.subj.by.chrom.mean.BEFORE.html").read()) outf.write('
') outf.write('

Median

') outf.write(open(tmpDir + "/stats.subj.by.chrom.median.BEFORE.html").read()) outf.write('
') outf.write('

MAD

') outf.write(open(tmpDir + "/stats.subj.by.chrom.mad.BEFORE.html").read()) outf.write('
') outf.write('
') outf.write('

Summary statistics after centering

') outf.write(open(tmpDir + "/stats.after.centering.html").read()) outf.write('
') outf.write('

Summary statistics: subject/array by chromosome after centering

') outf.write('

Mean

') outf.write(open(tmpDir + "/stats.subj.by.chrom.mean.AFTER.html").read()) outf.write('
') outf.write('

Median

') outf.write(open(tmpDir + "/stats.subj.by.chrom.median.AFTER.html").read()) outf.write('
') outf.write('

MAD

') outf.write(open(tmpDir + "/stats.subj.by.chrom.mad.AFTER.html").read()) outf.write('
') ## The following is common to all allResults = tarfile.open(tmpDir + '/all.results.tar.gz', 'w:gz') allResults.add(tmpDir + '/results.txt', 'summary.statistics.txt') methodUsed = open(tmpDir + '/methodaCGH').read() if (methodUsed == 'CBS') or (methodUsed == 'CBS\n'): outf.write('

Diagnostic plots (help)

\n') outf.write('

One plot per array/sample

') outf.write('View/save the (multipage) pdf') outf.write('

One plot per array/sample and chromosome

') outf.write('View/save the (multipage) pdf') outf.write('

Click on thumbnails to expand.

') outf.write('

Segmented data plots (help)

\n') thumb(tmpDir, open(tmpDir + '/arrayNames', mode = 'r').read().split('\n')[0].split('\t'), outf, maxsthumb = 350) thumb(tmpDir, ['All_arrays'], outf, maxsthumb = 350) ## pdf2html('CBS.segmented.plots', tmpDir, outf, allResults, 350) outf.write('

Smoothed values for all genes/clones are available from file' + ' "CBS.results.txt".

') outf.write('
') merge = open(tmpDir + '/DNA.merge', mode = 'r').readline() if merge == "Yes": outf.write('

Plateau plots (help)

\n') outf.write('Plateau plots are not available (do not make sense) with merge levels.\n') else: outf.write('

Plateau plots (help)

\n') pdf2html('CBS.plateau.plots', tmpDir, outf, allResults, 150) outf.write('
') if os.path.exists(tmpDir + "/mcr.results.html"): outf.write('

Minimal common regions

\n') outf.write(open(tmpDir + "/mcr.results.html").read()) outf.write('
') if os.path.exists(tmpDir + '/f1.R'): os.remove(tmpDir + '/f1.R') if os.path.exists(tmpDir + '/ace-figs.R'): os.remove(tmpDir + '/ace-figs.R') if os.path.exists(tmpDir + '/f1.Rout'): os.remove(tmpDir + '/f1.Rout') #if os.path.exists(tmpDir + '/.RData'): os.remove(tmpDir + '/.RData') allResults = tarfile.open(tmpDir + '/all.results.tar.gz', 'w:gz') os.chdir(tmpDir) ll1 = glob.glob('*.log') for dname in ll1: os.remove(dname) lll = glob.glob('*') for flname in lll: try: allResults.add(flname) except: None allResults.close() outf.write('
Download all figures and text results.') if(open('DNA.merge').read().strip() == 'Yes'): outf.write(printPalsURLADaCGH(newDir)) outf.write("") outf.close() Rresults.close() shutil.copyfile(tmpDir + "/pre-results.html", tmpDir + "/results.html") if (methodUsed == 'WS') or (methodUsed == 'WS\n'): outf.write('

Diagnostic autocorrelation plots (help)

\n') outf.write('View/save the (multipage) pdf') ##zz: later, provide thumbnails ## and true images outf.write('
') outf.write('

Click on thumbnails to expand.

') outf.write('

Segmented data plots (help)

\n') thumb(tmpDir, open(tmpDir + '/arrayNames', mode = 'r').read().split('\n')[0].split('\t'), outf, maxsthumb = 350) thumb(tmpDir, ['All_arrays'], outf, maxsthumb = 350) outf.write('

Smoothed values for all genes/clones are available from file' + ' "Wavelets.results.txt".

') outf.write('
') outf.write('

Plateau plots (help)

\n') pdf2html('WS.plateau.plots', tmpDir, outf, allResults, 150) outf.write('
') if os.path.exists(tmpDir + '/f1.R'): os.remove(tmpDir + '/f1.R') if os.path.exists(tmpDir + '/ace-figs.R'): os.remove(tmpDir + '/ace-figs.R') if os.path.exists(tmpDir + '/f1.Rout'): os.remove(tmpDir + '/f1.Rout') #if os.path.exists(tmpDir + '/.RData'): os.remove(tmpDir + '/.RData') allResults = tarfile.open(tmpDir + '/all.results.tar.gz', 'w:gz') os.chdir(tmpDir) ll1 = glob.glob('*.log') for dname in ll1: os.remove(dname) lll = glob.glob('*') for flname in lll: allResults.add(flname) allResults.close() outf.write('
Download all figures and text results.') outf.write("") outf.close() Rresults.close() shutil.copyfile(tmpDir + "/pre-results.html", tmpDir + "/results.html") if (methodUsed == 'PSW') or (methodUsed == 'PSW\n'): arrayNames = open(tmpDir + '/arrayNames', mode = 'r').read().split('\n')[0].split('\t') outf.write('

Island plots, gains (help)

\n') outf.write('

Click on thumbnails to expand.

') gains_fig_list = [''.join(['Gains.', aname]) for aname in arrayNames] thumb(tmpDir, gains_fig_list, outf, maxsthumb = 350) outf.write('
') outf.write('

Island plots, losses (help)

\n') outf.write('

Click on thumbnails to expand.

') loss_fig_list = [''.join(['Losses.', aname]) for aname in arrayNames] thumb(tmpDir, loss_fig_list, outf, maxsthumb = 350) outf.write('
') outf.write('

Smith-Waterman results for all genes/clones are available from files ' + '"Gains.Price.Smith.Waterman.results.txt"' + ' "Losses.Price.Smith.Waterman.results.txt."

') if os.path.exists(tmpDir + '/f1.R'): os.remove(tmpDir + '/f1.R') if os.path.exists(tmpDir + '/ace-figs.R'): os.remove(tmpDir + '/ace-figs.R') if os.path.exists(tmpDir + '/f1.Rout'): os.remove(tmpDir + '/f1.Rout') #if os.path.exists(tmpDir + '/.RData'): os.remove(tmpDir + '/.RData') allResults = tarfile.open(tmpDir + '/all.results.tar.gz', 'w:gz') os.chdir(tmpDir) ll1 = glob.glob('*.log') for dname in ll1: os.remove(dname) lll = glob.glob('*') for flname in lll: allResults.add(flname) allResults.close() outf.write('
Download all figures and text results.') outf.write(printPalsURLADaCGH(newDir)) outf.write("") outf.close() Rresults.close() shutil.copyfile(tmpDir + "/pre-results.html", tmpDir + "/results.html") if (methodUsed == 'ACE') or (methodUsed == 'ACE\n'): outf.write('

FDR table

') acefdrtable = open(tmpDir + "/ace.fdrtable.html") acefdr = acefdrtable.read() acefdrtable.close() outf.write(acefdr) outf.write('
\n') outf.write('') currentfdr = str(open(tmpDir + '/aceFDR').readline()) outf.write('
\n') outf.write(' (Change the desired FDR and Press "Submit" to obtain figures with new FDR)') outf.write('

Segmented plots

Click on thumbnails to expand.

') thumb(tmpDir, open(tmpDir + '/arrayNames', mode = 'r').read().split('\n')[0].split('\t'), outf, maxsthumb = 350) thumb(tmpDir, ['All_arrays'], outf, maxsthumb = 350) outf.write('

Inferred gains and losses available from file' + '' + '"ACE.results.FDR=' + currentfdr + '.txt"

') if os.path.exists(tmpDir + '/rerunACE.Rout'): os.remove(tmpDir + '/rerunACE.Rout') if os.path.exists(tmpDir + '/f1.R'): os.remove(tmpDir + '/f1.R') if os.path.exists(tmpDir + '/rerunACE.R'): os.remove(tmpDir + '/rerunACE.R') if os.path.exists(tmpDir + '/f1.Rout'): os.remove(tmpDir + '/f1.Rout') #if os.path.exists(tmpDir + '/.RData'): os.remove(tmpDir + '/.RData') allResults = tarfile.open(tmpDir + '/all.results.tar.gz', 'w:gz') os.chdir(tmpDir) ll1 = glob.glob('*.log') for dname in ll1: os.remove(dname) lll = glob.glob('*') for flname in lll: allResults.add(flname) allResults.close() outf.write('
Download all figures and text results.') outf.write(printPalsURLADaCGH(newDir)) outf.write("") outf.close() Rresults.close() shutil.copyfile(tmpDir + "/pre-results.html", tmpDir + "/results.html") def printRKilled(): Rresults = open(tmpDir + "/results.txt") resultsFile = Rresults.read() outf = open(tmpDir + "/pre-results.html", mode = "w") outf.write("ADaCGH results \n") outf.write("

ERROR: R process killed

\n") outf.write("

The R process lasted longer than the maximum allowed time, ") outf.write(str(R_MAX_time)) outf.write(" seconds, and was killed.") ### outf.write("

This is the output from the R run:

") ### outf.write("

")
###     outf.write(cgi.escape(soFar))
###     outf.write("
") outf.write("

This is the results file:

") outf.write("

")
    outf.write(cgi.escape(resultsFile))
    outf.write("
") outf.write("") outf.close() Rresults.close() shutil.copyfile(tmpDir + "/pre-results.html", tmpDir + "/results.html") ## Changing to the appropriate directory form = cgi.FieldStorage() if form.has_key('newDir'): value = form['newDir'] if type(value) is types.ListType: commonOutput() print "

ERROR

" print "

newDir should not be a list.

" print "

Anyone trying to mess with it?

" print "" sys.exit() else: newDir = value.value else: commonOutput() print "

ERROR

" print "

newDir is empty.

" print "" sys.exit() if re.search(r'[^0-9]', str(newDir)): ## newDir can ONLY contain digits. commonOutput() print "

ERROR

" print "

newDir does not have a valid format.

" print "

Anyone trying to mess with it?

" print "" sys.exit() ##redirectLoc = "/tmp/" + newDir tmpDir = "/http/adacgh/www/tmp/" + newDir if not os.path.isdir(tmpDir): commonOutput() print "

ERROR

" print "

newDir is not a valid directory.

" print "

Anyone trying to mess with it?

" print "" sys.exit() ## Were we already done in a previous execution? ## No need to reopen files or check anything else. Return url with results ## and bail out. if os.path.exists(tmpDir + "/natural.death.pid.txt") or os.path.exists(tmpDir + "/killed.pid.txt"): print 'Location: http://adacgh.bioinfo.cnio.es/tmp/'+ newDir + '/results.html \n\n' sys.exit() ## No, we were not done. Need to examine R output Rrout = open(tmpDir + "/f1.Rout") soFar = Rrout.read() Rrout.close() finishedOK = soFar.endswith("Normal termination\n") errorRun = soFar.endswith("Execution halted\n") if os.path.exists(tmpDir + '/RterminatedOK'): finishedOK = True ## zz: refactor. alterar. Aquí solo entrar si no OK? try: Rerrorrout = open(tmpDir + "/error.msg") soFar = Rerrorrout.read() Rerrorrout.close() errorRun = soFar.endswith("Execution halted\n") except: None ##if os.path.exists(tmpDir + "/pid.txt"): zzz: alterar en los demás también. Refactor!! if (not finishedOK) and (not errorRun) and (os.path.exists(tmpDir + "/pid.txt")): ## do we need to kill an R process? if (time.time() - os.path.getmtime(tmpDir + "/pid.txt")) > R_MAX_time: lamenv = open(tmpDir + "/lamSuffix", mode = "r").readline() try: os.system('export LAM_MPI_SESSION_SUFFIX=' + lamenv + '; lamhalt -H; lamwipe -H') except: None # try: # os.kill(int(open(tmpDir + "/pid.txt", mode = "r").readline()), # signal.SIGINT) ## maybe sigint is better than sigkill?? # finally: printRKilled() os.rename(tmpDir + '/pid.txt', tmpDir + '/killed.pid.txt') try: os.remove(tmpDir + '/f1.R') except: None try: os.remove("/http/adacgh/www/R.running.procs/R." + newDir + "*") except: None print 'Location: http://adacgh.bioinfo.cnio.es/tmp/'+ newDir + '/results.html \n\n' sys.exit() if errorRun > 0: printErrorRun() os.rename(tmpDir + '/pid.txt', tmpDir + '/natural.death.pid.txt') os.remove(tmpDir + '/f1.R') try: os.remove("/http/adacgh/www/R.running.procs/R." + newDir) except: None try: lamenv = open(tmpDir + "/lamSuffix", mode = "r").readline() except: None try: lamkill = os.system('export LAM_MPI_SESSION_SUFFIX=' + lamenv + '; lamhalt -H; lamwipe -H') except: None print 'Location: http://adacgh.bioinfo.cnio.es/tmp/'+ newDir + '/results.html \n\n' elif finishedOK > 0: try: lamenv = open(tmpDir + "/lamSuffix", mode = "r").readline() except: None try: lamkill = os.system('export LAM_MPI_SESSION_SUFFIX=' + lamenv + '; lamhalt -H; lamwipe -H') except: None printOKRun() try: os.rename(tmpDir + '/pid.txt', tmpDir + '/natural.death.pid.txt') except: None try: os.remove(tmpDir + '/f1.R') except: None try: os.remove("/http/adacgh/www/R.running.procs/R." + newDir) finally: print 'Location: http://adacgh.bioinfo.cnio.es/tmp/'+ newDir + '/results.html \n\n' else: ## we only end up here if: we were not done in a previous run AND no process was overtime ## AND we did not just finish. So we must continue. relaunchCGI()