<?php /* rTxt2htm 1.2.1, 22 January 2019 Copyright Santosh Patnaik GPL v3 license A PHP Labware internal utility - www.bioinformatics.org/phplabware/internal_utilities See rTxt2htm_README.txt or rTxt2htm_README.htm for more rTxt2htm creates HTML files from text files with special but simple and unobtrusive markup. It is intended for generating HTML versions of plain-text documentation (like 'readme' files). Formatters (processing done in shown order) are: - block of text with '+-----(5 or more)+' at top and at bottom is rendered as plain, mono-spaced text for tables, ASCII diagrams, etc.; rest of formatters don't apply to its content - block of text with '== Content ==(any number of)' at top and >1 empty lines at bottom is considered a table of content (TOC); rest of formatters except `, * and ' don't apply to its content - block of text flanked with a line with '/*'-style PHP comment markers at top and at bottom = a 'subtle' div - lines like '@@title:...' are used to identify title, language, encoding, keywords and description, and removed - 4 spaces before sentence = sentence shown as code (a tab is 4 spaces) - 'term' = term given different appearance using 'span class="term"'; `, * and : are neutralized - `term` = italics - *term* = bold - one_word1:- 1 space one_word2 = one_word1 hyperlinked to one_word2; use #one_word2 for anchors - term with http:, https:, mailto:, ftp:, sftp:, file: = link created - empty line and == (optional number) (text) ==(any no. of times)(optional o's) on next line = section div start; text in h2; no. of o's dictate number of previous opened divs to be closed; number like '1' and '3.2.1'; the headings get an anchor named same as the number but prefixed with 's', like 's1' and 's3.2.1' - empty line and -- (optional number) (text) --(any no. of times)(optional o's) on next line = like above but for sub-section and h3 - empty line and .. (optional number) (text) ..(any no. of times)(optional o's) on next line = like above but for sub-sub-section and h4 - empty line and _____ (5 or more underscores) on next line by themselves = <hr>; optional o's at end for div closures To try, use following: == Content =========================== 1 Check section 1.1 `Check` *sub-section* 1.1.1 Check sub-sub-section 1.2 Check section named 'bløf Charlène' 2 Check another section (2A) Alphanumeric identifier == 1 Check section ================== Check 'this' & '<this>' +--------------------------+ Simple table ============ ID Name Age .. .... ... 1 John 23 2 Ram 8 +--------------------------+ -- 1.1 `Check` *sub-section* ------------- Check `this` and *this* and `*this*`! .. 1.1.1 Check sub-sub-section ....... Check these: * for URLs, see section:- #2 * here is CNN:- http://cnn.com (here:- http://abc.com is ABC) * send me mail:- mailto:hello@me.com -- 1.2 Check section named 'bløf Charlène' ------------------------oo Check code with 'this' `this` *this* Check a horizontal rule ______ == 2 Check another section =========oo * http://www.cnn.com * mailto:someone@somewhere.com * file://a_file Check some non-English characters: * 'Bløf Charlène', *bløf Charlène*, `bløf Charlène`, `bløf Charlène` * Charlène:- where? Charlène:- where? * *¥ · £ · € · $ · ¢ · ₡* -- (2A) Alphanumeric identifier ------- _____________________________________oo @@title: example text */ // time-limit set_time_limit(360); // errors error_reporting(E_ALL | (defined('E_STRICT') ? E_STRICT : 0)); ini_set('display_errors', 1); // 1 to debug // defaults $title = 'rTxt2htm : convert text to HTML'; $text_file = ''; $maxsize = 500000; $meta_kword = 'rTxt2htm, text, HTM/HTML, converter, conversion, convert, PHP, Labware, rst, reStructured'; $meta_desc = 'rTxt2htm from PHP Labware converts plain text to HTML'; $lang = 'en'; // IANA-recognized language $enc = 'utf-8'; // best if same as plain-text file's encoding; IANA-recognized charset encoding $action = $direct = 0; $css = " a {text-decoration:none; color: blue;} a:hover {color: red;} a:visited {color: blue;} body {margin: 0; padding: 0;} body, div, html, p {font-family: Georgia, 'Times new roman', Times;} code.code {font-family: 'Bitstream vera sans mono', 'Courier New', 'Courier', monospace;} div.comment {padding: 5px; color: #999999; font-size: 80%;} div.comment a {color: #6699cc;} div#body {width: 70%; margin: 5px; padding: 5px;} /* holds non-toc content */ div#toc {position: fixed; top: 5px; left: 73%; z-index: 2; margin-top: 5px; margin-left: 5px; border: 1px solid gray; padding: 5px; background-color: #ededed; width: 23%; overflow: auto; max-height:94%; font-size: 90%;} /* holds content table (toc) */ div#top {font-size: 14px; margin: 5px; padding: 5px;} /* holds all content */ div.monospace {overflow: auto; font-family: 'Bitstream vera sans mono', 'Courier New', 'Courier', monospace;} div.sub-section {padding-left: 15px;} div.sub-sub-section {padding-left: 30px;} h1 {font-size: 22px; margin-top: 5px; margin-bottom: 5px;} h2 {font-size: 20px; float: left; margin-top: 15px; margin-bottom: 5px;} h3 {font-size: 18px; float: left; margin-top: 15px; margin-bottom: 5px;} h4 {font-size: 16px; float: left; margin-top: 15px; margin-bottom: 5px;} hr {margin-top: 15px; margin-bottom: 5px;} input, textarea {font-family: 'Bitstream vera sans mono', 'Courier New', 'Courier', monospace;} p.subtle {color: gray; padding: 0; padding-top: 10px; margin: 0;} p.subtle a, p.subtle a:visited {color: #6699cc;} span.item-no {color: black;} span.subtle {color: gray; margin: 0; padding:0;} span.subtle a, span.subtle a:visited {color: #6699cc;} span.term {font-family: 'Bitstream vera sans mono', 'Courier New', 'Courier', monospace;} span.toc-item {color: black;} span.totop {float: right; margin-top: 15px; margin-bottom: 5px;} span.totop a, span.totop a:visited {color: #6699cc;} @media screen { /* fixes for old IE */ * html, * html body {overflow-y: auto!important; height: 100%; margin: 0; padding: 0;} * html div#body {height: 100%; overflow-y: auto; position: relative;} * html div#toc {position: absolute;} } "; // form values if(isset($_POST['action']) && $_POST['action'] == 1){ $action = 1; if(get_magic_quotes_gpc()){ foreach($_POST as $k => $v){ $_POST[$k] = stripslashes($v); } ini_set('magic_quotes_gpc', 0); } if(version_compare(PHP_VERSION, '5.3.0', '<')){ set_magic_quotes_runtime(0); } $css = isset($_POST['css'][0]) ? $_POST['css'] : $css; $direct = isset($_POST['direct']) ? (bool)$_POST['direct'] : $direct; $enc = isset($_POST['enc'][0]) ? $_POST['enc'] : $enc; $lang = isset($_POST['lang'][0]) ? $_POST['lang'] : $lang; $meta_desc = isset($_POST['meta_desc'][0]) ? $_POST['meta_desc'] : $meta_desc; $meta_kword = isset($_POST['meta_kword'][0]) ? $_POST['meta_kword'] : $meta_kword; $title = isset($_POST['title'][0]) ? $_POST['title'] : $title; if((int)$_FILES['text_file']['size'] > 0 && (int)$_FILES['text_file']['size'] < $maxsize){ $_FILES['text_file']['name'] = str_replace(array("\0", '\\', ':'), '', $_FILES['text_file']['name']); $text_file = empty($_FILES['text_file']['name']) ? 'readme' : $_FILES['text_file']['name']; $t = file_get_contents($_FILES['text_file']['tmp_name']); }elseif(isset($_POST['text_in'][0])){ $t = substr($_POST['text_in'], 0, $maxsize); }else{ $action = 0; } } if(isset($t)){ // speed is not of concern // hide special chars <, > and &; make tab = 4 spaces; standardize line-breaks; to insert real line-breaks later, we will use \r (since \n will be converted to HTML <br>) $t = str_replace(array('<', '>', '&', "\t", "\r\n", "\r"), array("\x01", "\x02", "\x03", ' ', "\n", "\n"), $t); // remove spaces before line-breaks; usually unintended; if present, hamper regex matching $t = preg_replace('` *\n`m', "\n", $t); // unslash - for strange behavior with e modifier in preg_replace $unslash = 1; function unslash($w){ global $unslash; if(!$unslash){return $w;} $x = 'He wrote "This".'; $y = preg_replace_callback('`(^He )(wrote)( "This".)$`', function($p){return $p[1]. $p[2]. $p[3];}, $x); if($x != $y){return str_replace('\"', '"', $w);} else{ $unslash = 0; return $w; } } // non-formatted or pre $t = preg_replace_callback('`(?:^|\n) *\+-{5,}\+\n(.*?)\n *\+-{5,}\+\n(?=\n|\r|$)`sm', function($p){return "\r\n". '<div class="monospace">'. str_replace(array("*", "'", "`", "-", ":", ".", "=", "_", " "), array("*", "'", "`", "-", ":", ".", "=", "_", " "), unslash($p[1])). '</div>'. "\r";}, $t); // extract TOC into separate variable; thus other formatters don't apply anymore to TOC content; hyperlink TOC items; items have numeric identifiers or alphanumeric ones inside round brackets pointing to relevant sections $toc = ''; if(preg_match('`(?:^|\n)== *Content *==*\n*(.*?)\n\n`sm', $t, $m)){ $t = preg_replace('`(?:^|\n)== *Content *==*\n* *(.*?)\n\n`sm', "\n", $t); $toc = preg_replace_callback('`(^|\s[\([]?)\'([^\s][^\'\n\r]*)\'(?=[:;!?,.)\]]*\s|$)`', function($p){return $p[1]. '<span class="term">'. str_replace(array("*", "`", ":"), array("*", "`", ":"), unslash($p[2])). '</span>';}, $m[1]); $toc = preg_replace('/(^|\s[\([]?)`([^\s][^`\n\r]*)`(?=[:;!?,.)\]]*\s|$)/', "$1<em>$2</em>", $toc); $toc = preg_replace('`(^|\s[\([]?|<em>)\*([^\s][^*\n\r]*)\*(?=[:;!?,.)\]]*\s|</em>|$)`', "$1<strong>$2</strong>", $toc); $toc = "\r". '<div id="toc">'. preg_replace_callback('`^( *)(\([a-zA-Z\d.]+\)|[\d.]+)?(.*)`m', function($p){return $p[1]. '<span class="toc-item">'. (strlen($p[2]) ? '<a href="#s'. str_replace(array("(", ")"), "_", $p[2]). '">' : ''). '<span class="item-no">'. str_replace(array("(", ")"), "", $p[2]). '</span>'. unslash($p[3]). (strlen($p[2]) ? '</a>' : ''). '</span>';}, $toc). "</div><!-- ended div toc -->\r"; } // /*-style PHP comment blocks put into a subtle div; replacing @, `, ', - and * with entities as they are formatter characters and not needed here, and we are allowing regular links but not those with ':- '; we add an extra space before the '</div>' in case the last word in the block is a regular link (regular link regex will fail with '<' of '</div>') $t = preg_replace_callback('`(?:^|\n) */\*\n(.+?)\n *\*/\n(?=\n|\r|$)`ms', function($p){return "\r\n". '<div class="comment">' . str_replace(array("*", "'", "`", "-", "@"), array("*", "'", "`", "-", "@"), unslash($p[1])). ' </div>'. "\r";}, $t); // auto-identify title, etc. if(preg_match('`\n@@encoding:(.*)`m', $t, $m)){ $enc = trim($m[1]); $t = preg_replace('`\n@@encoding:(.*)`m', '', $t); } if(preg_match('`\n@@language:(.*)`m', $t, $m)){ $lang = trim($m[1]); $t = preg_replace('`\n@@language:(.*)`m', '', $t); } if(preg_match('`\n@@description:(.*)`m', $t, $m)){ $meta_desc = trim($m[1]); $t = preg_replace('`\n@@description:(.*)`m', '', $t); } if(preg_match('`\n@@keywords:(.*)`m', $t, $m)){ $meta_kword = trim($m[1]); $t = preg_replace('`\n@@keywords:(.*)`m', '', $t); } if(preg_match('`\n@@title:(.*)`m', $t, $m)){ $title = trim($m[1]); $t = preg_replace('`\n@@title:(.*)`m', '', $t); } } if($direct){ ob_start(); } ?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="<?php echo $lang; ?>" lang="<?php echo $lang; ?>"> <head> <meta http-equiv="Content-Type" content="text/html; charset=<?php echo htmlspecialchars($enc); ?>" /> <meta http-equiv="Content-Language" content="en" /> <meta name="description" content="<?php echo htmlspecialchars($meta_desc. ' - '. $text_file); ?> - presented with rTxt2htm, a PHP Labware utility" /> <meta name="keywords" content="<?php echo htmlspecialchars($meta_kword. ', '. $text_file); ?>, rTxt2htm, PHP Labware" /> <style type="text/css" media="all"> <!--/*--><![CDATA[/*><!--*/ <?php echo htmlspecialchars($css); ?> /*]]>*/--> </style> <title><?php echo htmlspecialchars($title. ' | '. $meta_desc); ?></title> </head> <body> <div id="top"> <h1><a id="peak" name="peak"></a><?php echo htmlspecialchars($title); ?></h1> <?php if($action){ // line with 4 leading spaces put into a code div; replacing `, ', * and : with entities as they are formatter characters and not needed here $t = preg_replace_callback('`(?<=^|\n|\r) (?:[^\n]+?)(?=\n|\r|$)`m', function($p){return "\r". '<code class="code">' . str_replace(array("*", "'", "`", ":"), array("*", "'", "`", ":"), unslash($p[0])). '</code>'. "\r";}, $t); // 'special' text, italics and bold; allowing for some punctuation marks $t = preg_replace_callback('`(^|\s[\([]?)\'([^\s][^\'\n\r]*)\'(?=[:;!?,.)\]]*\s|$)`', function($p){return $p[1]. '<span class="term">'. str_replace(array("*", "`", ":"), array("*", "`", ":"), unslash($p[2])). '</span>';}, $t); $t = preg_replace('/(^|\s[\([]?)`([^\s][^`\n\r]*)`(?=[:;!?,.)\]]*\s|$)/', "$1<em>$2</em>", $t); $t = preg_replace('`(^|\s[\([]?|<em>)\*([^\s][^*\n\r]*)\*(?=[:;!?,.)\]]*\s|</em>|$)`', "$1<strong>$2</strong>", $t); // links of type 'word:- resource' $t = preg_replace_callback('`(^|\s[\([]?)(\S+?):- ([#a-zA-Z_\-0-9./](?:\S*)[a-zA-Z_\-0-9/])(?=[:;!?,.)\]]*\s|$)`', function($p){return $p[1]. '<a href="'. (substr(unslash($p[3]), 0, 1) == '#' ? '#s'. substr(unslash($p[3]), 1) : unslash($p[3])). '">'. unslash($p[2]). (substr(unslash($p[3]), 0, 1) == '#' ? ' '. substr(unslash($p[3]), 1) : ''). '</a>';}, $t); // regular links $t = preg_replace('`(^|\s[\([]?)((?:http|https|mailto|ftp|sftp|file):(?:[#a-zA-Z_\-0-9./](?:\S+)[a-zA-Z_\-0-9/]))(?=[:;!?,.)\]]*\s|$)`', '$1<a href="$2">$2</a>', $t); // sections, sub-sections and sub-sub-sections; create anchors named with section number with 's' prefix (XHTML spec: IDs start with a letter); add a 'to top' link; close any open divs as indicated by terminating o's $t = preg_replace_callback('`(?:\r|\n)\n==+ *(\([a-zA-Z\d.]+\)|[\d.]+)?(.*?) *=+(o*)\n(\n(?=\n))?`', function($p){return str_repeat("\r</div>", strlen(unslash($p[3]))). "\r". '<div class="section"><h2>'. "\r". (strlen(unslash($p[1])) ? '<a name="s'. str_replace(array("(", ")"), "_", $p[1]). '" id="s'. str_replace(array("(", ")"), "_", $p[1]). '"></a>' : ''). '<span class="item-no">'. str_replace(array("(", ")"), "", $p[1]). '</span>'. unslash($p[2]). "\r". '</h2><span class="totop"><a href="#peak">(to top)</a></span><br style="clear: both;" />'. "\r";}, $t); $t = preg_replace_callback('`(?:\r|\n)\n--+ *(\([a-zA-Z\d.]+\)|[\d.]+)?(.*?) *-+(o*)\n(\n(?=\n))?`', function($p){return str_repeat("\r</div>", strlen(unslash($p[3]))). "\r". '<div class="sub-section"><h3>'. "\r". (strlen(unslash($p[1])) ? '<a name="s'. str_replace(array("(", ")"), "_", $p[1]). '" id="s'. str_replace(array("(", ")"), "_", $p[1]). '"></a>' : ''). '<span class="item-no">'. str_replace(array("(", ")"), "", $p[1]). '</span>'. unslash($p[2]). "\r". '</h3><span class="totop"><a href="#peak">(to top)</a></span><br style="clear: both;" />'. "\r";}, $t); $t = preg_replace_callback('`(?:\r|\n)\n\.\.+ *(\([a-zA-Z\d.]+\)|[\d.]+)?(.*?) *\.+(o*)\n(\n(?=\n))?`', function($p){return str_repeat("\r</div>", strlen(unslash($p[3]))). "\r". '<div class="sub-sub-section"><h4>'. "\r". (strlen(unslash($p[1])) ? '<a name="s'. str_replace(array("(", ")"), "_", $p[1]). '" id="s'. str_replace(array("(", ")"), "_", $p[1]). '"></a>' : ''). '<span class="item-no">'. str_replace(array("(", ")"), "", $p[1]). '</span>'. unslash($p[2]). "\r". '</h4><span class="totop"><a href="#peak">(to top)</a></span><br style="clear: both;" />'. "\r";}, $t); // put non-TOC content into 'body' div; also care for <hr> $t = $toc. "\r". '<div id="body">'. preg_replace_callback('`(?:\r|\n)\n *__{4,}(o*)(?=\r|\n|$)`', function($p){return str_repeat("\r</div>", strlen(unslash($p[1]))). "\r\n". '<hr />';}, $t); // preserving spaces $t = str_replace(' ', '  ', preg_replace('`(?<=^|\r|\n|>) `', ' ', $t)); // \n to <br> $t = str_replace("\n", "<br />\n", rtrim($t)); // entitify <, > and & and bring them out (hidden so far as special characters) $t = str_replace(array("\x01", "\x02", "\x03"), array('<', '>', '&'), $t); // intended real line-breaks; \r proxying so far $t = str_replace("\r", "\n", $t); echo $t; ?> <br /><br /><span class="subtle"><small>HTM version of <?php echo '<em><a href="', htmlspecialchars($text_file), '">', htmlspecialchars($text_file), '</a></em> generated on ', gmdate('d M, Y'); ?> using <a href="http://www.bioinformatics.org/phplabware/internal_utilities">rTxt2htm</a> from PHP Labware</small></span> </div><!-- ended div body --> <?php }else // show form { echo '<hr /> <br /> <form id="form" enctype="multipart/form-data" action="rTxt2htm.php" method="post"> <div><input type="hidden" name="action" value="1" id="action" /> <table summary="form" cellpadding="2"> <tr><td style="text-align: right">File:</td><td style="text-align: left"><input type="hidden" name="MAX_FILE_SIZE" id="MAX_FILE_SIZE" value="', htmlspecialchars($maxsize), '" /><input type="file" name="text_file" id="text_file" /></td><td style="text-align: left"><span class="subtle">upload the text-file; upto ', htmlspecialchars($maxsize/1000), ' KB</span></td></tr> <tr><td style="text-align: right">Or type/paste:</td><td style="text-align: left"><textarea name="text_in" id="text_in" rows="5" cols="50"></textarea></td><td style="text-align: left"><span class="subtle">text input in right format; upto ', htmlspecialchars($maxsize), ' chars</span></td></tr> <tr><td style="text-align: right">Character encoding:</td><td style="text-align: left"><input type="text" size="50" name="enc" value="', htmlspecialchars($enc), '" id="enc" /></td><td style="text-align: left"><span class="subtle">best if same as file\'s; <a href="http://www.iana.org/assignments/character-sets">IANA-recognized</a> value; * (any auto-discovered value will overwrite it)</span></td></tr> <tr><td style="text-align: right">CSS style:</td><td style="text-align: left"><textarea rows="5" cols="50" name="css" id="css">', htmlspecialchars($css), '</textarea></td><td style="text-align: left"><span class="subtle">need not be changed</span></td></tr> <tr><td style="text-align: right">Description:</td><td style="text-align: left"><input type="text" size="50" name="meta_desc" value="', htmlspecialchars($meta_desc), '" id="meta_desc" /></td><td style="text-align: left"><span class="subtle">*</span></td></tr> <tr><td style="text-align: right">Keywords:</td><td style="text-align: left"><input type="text" size="50" name="meta_kword" value="', htmlspecialchars($meta_kword), '" id="meta_kword" /></td><td style="text-align: left"><span class="subtle">*</span></td></tr> <tr><td style="text-align: right">Language:</td><td style="text-align: left"><input type="text" size="50" name="lang" value="', htmlspecialchars($lang), '" id="lang" /></td><td style="text-align: left"><span class="subtle"><a href="http://www.iana.org/assignments/language-subtag-registry">IANA-recognized</a> value; *</span><br /></td></tr> <tr><td style="text-align: right">Title:</td><td style="text-align: left"><input type="text" size="50" name="title" value="', htmlspecialchars($title), '" id="title" /></td><td style="text-align: left"><span class="subtle">*</span><br /></td></tr> <tr><td> </td><td colspan="2" style="text-align: left;"><input type="submit" value="Convert" /> <input type="checkbox" id="direct" name="direct" value="1" />Direct download</td></tr> </table> </div> </form> <span class="subtle" style="float: right;"><a href="rTxt2htm_README.txt">txt</a> / <a href="rTxt2htm_README.htm">htm</a> documentation | <a href="http://www.bioinformatics.org/phplabware/internal_utilities">rTxt2htm</a> from PHP Labware</span><hr style="clear: both;" />'; } ?></div><!-- ended div top --> </body> </html><?php if($action && $direct){ $out = ob_get_contents(); ob_end_clean(); $extn = substr(strrchr($text_file, '.'), 1); if($extn == 'txt' or $extn == 'text' or $extn == 'doc' ){ $htm_file = substr($text_file, 0, -strlen($extn)). 'htm'; } else{ $htm_file = $text_file. '.htm'; } header('Accept-Ranges: bytes'); header('Content-Type: text/html; charset='. $enc); header("Content-Transfer-Encoding: binary\n"); header('Content-Disposition: attachment; filename="'.$htm_file.'"'); echo $out; exit; }