and PHPMarkdown . file type you should use for ReMarkable files is '.rem' or '.remark' bugs / suggestions → kroccamen@gmail.com */ /* ----------------------------------------------------------------------------------------------------------------------- */ //allow ReMarkable usage from the command line: //use `php ./path/to/remarkable.php ./path/to/file.rem ` if ($_SERVER['argc'] > 1) exit (reMarkable ( file_get_contents ($_SERVER['argv'][1]), //read in specififed file @$_SERVER['argv'][2] ? $_SERVER['argv'][2] : 0, //indent (optional) @$_SERVER['argv'][3] ? $_SERVER['argv'][3] : 125, //margin (optional) @$_SERVER['argv'][4] ? $_SERVER['argv'][4] : '.' //base path (optional) )); /* ----------------------------------------------------------------------------------------------------------------------- */ function reMarkable ( $source_text, /* source text to process, UTF-8 only */ $indent=0, /* indent the resulting HTML by n tabs */ $margin=125, /* word-wrap paragraphs at this character limit. use `false` for none */ $base_path='.' /* relative or absolute path to serve as a base path from this script to images in $source_text */ ) { /* the reason 125 is used as the wrap margin is because Firefox’s view->source window maxmized at 1024x768 is 125 chars wide and seems like a modern enough standard for code compared to the behaviour of writing readme files at 77 chars wide because that’s the viewport of a maximised Notepad window on a 640x480 screen. tabs of 8 are used because that is what Firefox & Notepad use and it maintains the same rendering in both the editor, and the browser */ //if an esentially empty string is given, return blank if (!strlen (trim ($source_text))) return ''; //unify carriage returns (if you happen to be processing ReMarkable files written on Windows/Linux/Mac &c.) //a blank line is added to the end to allow lists and blockquotes to convert if the user leaves no trailing line //we don’t left-trim inline whitespace in case the source text starts with a purely typographical tab indent $source_text = trim (rtrim (preg_replace ('/\r\n?/', "\n", $source_text)), "\n")."\n\n"; /* 1. remove chunks not to be processed =============================================================================================================== */ /* ReMarkable markup is *not* processed inside ReMarkable code/pre spans, they are HTML encoded and then removed from the source text until the end where they are re-inserted */ //this will be used to store the HTML until the end. it’ll expand for each HTML tag as it’s met $placeholders = array ('@' => array (), 'PRE' => array (), 'CODE' => array (), '#' => array ()); foreach (array ( //placeholders already in the text (e.g. documentation) '@' => '/\xA1[@#A-Z1-6]+%*!/u', /* ---
 --------------------------------------------------------------------------------------------- */
		//e.g.	~~~>			or	~~~ PHP ~~~>		(with optional language)
		//	code goes here			code goes here
		//	<~~~				<~~~
		'PRE'	=> '/~~~(?: ([a-z]+) ~~~)?>\n((?>(?R)|(?>.))*?)\n(\t*) /  ----------------------------------------------------------------------------------- */
		//e.g.	Using `_emphasis_` will generate ``emphasis``.
		'CODE'	=> '/(``|(`))((?(2)(?:``|[^`]+)+?|.+?))\1(?!`)/',
		//HTML comments could contain ReMarkable syntax. the TOC marker is stored here as the “&” must not be
		//encoded by ReMarkable (allowing for the TOC marker in CODE/PRE) and musn’t be wrapped in `

` '#' => '/|&__TOC__;/s' ) as $tag => $regx) if (!$offset=0) while ( preg_match ($regx, $source_text, $m, PREG_OFFSET_CAPTURE, (int) $offset) ) { switch ($tag) { case 'PRE': //if language paramter given, wrap in a code span too $text = (strlen ($m[1][0]) ? '

' : '
').
					//HTML-encode the preformatted block (HTML code examples, &c.)
					htmlspecialchars (
						//if the PRE block was indented (inside a list), unindent accordingly
						preg_replace ('/^\t{'.strlen ($m[3][0]).'}/m', '', $m[2][0]),
						ENT_NOQUOTES, 'UTF-8'
					).
					(strlen ($m[1][0]) ? '
' : '
') ; break; case 'CODE': $text = (strlen ($m[2][0]) ? '' : ''). htmlspecialchars ($m[3][0], ENT_NOQUOTES, 'UTF-8'). (strlen ($m[2][0]) ? '' : '') ; break; default: $text = $m[0][0]; } //capture the element array_push ($placeholders[$tag], $text); //replace with placeholder tag $source_text = substr_replace ($source_text, //make the placeholder tag the same size of the content being replaced, for word wrapping to work "¡$tag".str_repeat ('%', max (0, strlen ($text) - (strlen ($tag) + 3)))."!", $m[0][1], strlen ($m[0][0]) ); //continue searching from after this placeholder $offset = $m[0][1] + strlen ($m[0][0]); } /* 2. hyperlinks =============================================================================================================== */ /* ReMarkable hyperlinks have to be processed before removing the remaining HTML tags because: a. they look like HTML tags, and b. accidental inline markup could break the URL. for example, two underscores in a URL would add ``s */ //list of mime-types for hyperlinks pointing directly to a file: //-------------------------------------------------------------- //note: this is absolutely not supposed to be a comprehensive list; quite the opposite in fact. this list is just //my idea of the most important files that are directly hyperlinked to in articles--that user’s may want to be //warned about beforehand via CSS mime-type icons &c. $link_mimes = array ( //images 'jpg' => 'image/jpeg', 'jpeg' => 'image/jpeg', 'png' => 'image/png', 'gif' => 'image/gif', 'psd' => 'image/vnd.adobe.photoshop', 'ai' => 'application/postscript', 'eps' => 'application/postscript', 'svg' => 'image/svg+xml', 'svgz' => 'image/svg+xml', //documents 'txt' => 'text/plain', 'pdf' => 'application/pdf', 'doc' => 'application/msword', 'odt' => 'application/vnd.oasis.opendocument.text', 'xls' => 'application/vnd.ms-excel', 'ods' => 'application/vnd.oasis.opendocument.spreadsheet', 'ppt' => 'application/vnd.ms-powerpoint', 'odp' => 'application/vnd.oasis.opendocument.presentation', 'csv' => 'text/csv', //code 'css' => 'text/css', 'js' => 'application/javascript', //downloads 'exe' => 'application/octet-stream', 'dmg' => 'application/octet-stream', 'iso' => 'application/octet-stream', 'rar' => 'application/x-rar-compressed', 'zip' => 'application/zip', 'tar' => 'application/x-tar', 'gz' => 'application/x-gzip', 'torrent' => 'application/x-bittorrent', //audio 'oga' => 'audo/ogg', 'wav' => 'audio/wav', 'mp3' => 'audio/mpeg', 'm4a' => 'audio/mp4a-latm', 'midi' => 'audio/midi', //video 'mp4' => 'video/mp4', 'm4v' => 'video/mp4', 'mpeg' => 'video/mpeg', 'mpg' => 'video/mpeg', 'mov' => 'video/quicktime', 'avi' => 'video/x-msvideo', 'ogv' => 'video/ogg' ); //regular expression to identify the various forms of a ReMarkable hyperlink: $regx = //e.g. Click (with description) // Click . (relative, with description) // Visit (without description. protocol is optional in this style) '/<(?:([^<>]+?)[ ]\()? # $1 = optional description (\^)? # $2 = optional no-follow marker ( # $3 = whole URL ((?:[a-z]{3,10}:)?\/{0,3})? # $4 = protocol (?:www\.)? # ignore www ( # $5 = friendly URL (no protocol) ([a-z0-9._%+-]+@[a-z0-9.-]+)? # $6 = email address (?(4)[a-z0-9\.-]{2,}(?:\.[a-z]{2,4})+ | # domain name (mandatory if protocol given) (?(1)|[a-z0-9\.-]{2,}(?:\.[a-z]{2,4})+)) # domain name (mandatory if no description) ( # $7 = folders and filename, relative URL (?(4)\/|(?(1)|\/)) # slash required after full domain [\/a-z0-9_!~\*\'\(\)\.;\?:@&=\+\$,%-]* # folders and filename )? (?(7)(?:\#[a-z0-9_\.-]+)?) # #bookmark ) ) (?(1)\))>/xi'; # closing ) if description present //step through each hyperlink found… while (preg_match ($regx, $source_text, $m)) $source_text = preg_replace ($regx, ''. //link text: either the description, or the friendly URL (strlen ($m[1]) ? $m[1] : $m[5]). '', //replace only one link so that the loop steps through one link at a time (for `$m` in `preg_match`) $source_text, 1 ); /* 3. images =============================================================================================================== */ /* --- inline images --------------------------------------------------------------------------------------------- */ //e.g. <"alt text" /url/to/image.png> with alt-text // <"alt text" /url/to/image.png "optional title"> -or- with alt-text and title $regx = '/<("[^"]*")(?: |\n\s*)(\S+?\.(?:png|gif|jpe?g|svgz?))(?:(?: |\n\s*)("[^"]+"))?>/i'; while (preg_match ($regx, $source_text, $m)) { //get the image size $info = getimagesize ($base_path.$m[2]); //construct the img tag $source_text = preg_replace ($regx, '.$m[1].($m[3] ? ', $source_text, 1); } /* 4. remove HTML tags =============================================================================================================== */ //the negative before and after avoids accidentally detecting inline quotes `<>` while (preg_match ('/(?|(?: [^>]+)?>(?!>))/', $source_text, $m, PREG_OFFSET_CAPTURE)) { //get the placeholder name $tag = strtoupper ($m[2][0]); //if this type of tag has not yet been recoreded, add a slot for it if (!isset ($placeholders[$tag])) $placeholders[$tag] = array (); //capture the tag array_push ($placeholders[$tag], $m[0][0]); //replace with placeholder tag $source_text = substr_replace ($source_text, //make the placeholder tag the same size of the content being replaced for word wrapping to work "¡$tag".str_repeat('%', max (0, strlen ($m[0][0]) - (strlen ($tag) + 3))).'!', $m[0][1], strlen ($m[0][0]) ); } //encode essential HTML entities not already encoded (`&`, `<`, `>`), //we do not double encode, so that entities that you’ve already written in the source text are kept $source_text = htmlspecialchars ($source_text, ENT_NOQUOTES, 'UTF-8', false); /* 5. auto-correction: replace some ASCII conventions with unicode / HTML =============================================================================================================== */ foreach (array ( //prime: 5′ (ft) //double prime: 15″ (in) '/(\d)\'/' => '$1′', '/([\d½¼])"/u' => '$1″', //smart single quotes: ‘ ’ //smart double quotes: “ ” '/(\B)\'(.*?)\'(\B)/' => '$1‘$2’$3', '/(\B)"(.*?)"(\B)/' => '$1“$2”$3', //apostrophes: “won’t can’t” and “’till the ’90s” &c. '/(\B(?:\w+)?)\'(\w+\b)/' => '$1’$2', //em-dash: “--” //en-dash “-to-” (don’t allow inside H* id) '/(? '—', '/-to-(?!\S+\)\n[=-])/' => '–', //copyright: “(C)” //all rights reserved: “(R)” '/\(C\)/i' => '©', '/\(R\)/' => '®', //trademark: “^tm” '/\^tm/i' => '™', //ordinals / superscript '/(\d)(st|nd|rd|th)/' => '$1$2', '/([\w])\^([\w]+)/u' => '$1$2', //multiplication (use `x` to avoid) '/([\d ])x([\d ])?\b/' => '$1×$2', //fractions '/ 1\/2/' => '½', '/ 1\/4/' => '¼', //plus-minus: “+/-” -> "±" //“:therefore:” '/\+\/-/' => '±', '/:therefore:|:ergo:/' => '∴' ) as $regx => $replace) $source_text = preg_replace ($regx, $replace, $source_text); /* 6. process main markup =============================================================================================================== */ //I hope you’re fluent in regex $source_text = preg_replace (array ( /* ---
-------------------------------------------------------------------------------------------- */ //e.g. The quick brown fox _ // jumps over¬the lazy dog '/¬| _(?=$)/m', /* ---
-------------------------------------------------------------------------------------------- */ //i.e. * * * '/^\s*\* \* \*$/m', /* --- / ----------------------------------------------------------------------------------- */ //e.g. I’m _emphasising_ this point *strongly*. '/(?:^|\b)_(.+?)_(?:\b|$)/', '/\*(?!\t)(.+?)\*(?!\*)/', /* --- / ------------------------------------------------------------------------------------- */ //e.g. ---This statement is false --- [This statement is true]. '/\[(.+?)\]/', '/---(?!-+)(.+?)(? -------------------------------------------------------------------------------------------- */ //e.g. I’ve finished reading ~The Lion, the Witch and the Wardrobe~. '/~(.+?)~/', /* --- ----------------------------------------------------------------------------------------------- */ //e.g. He said «turn left here», but she said <>. '/(?:(\xAB)|(?:<){2})(.*?)(?(1)\xBB|(?:>){2})/u', /* --- -------------------------------------------------------------------------------------------- */ //e.g. My {CSS|style sheet} is tweaked almost daily. (with title) // The {FBI} are like the British {MI5}. (without title) '/\{([^\|}]+)(?:\|([^}]+))?}/e', /* --- ------------------------------------------------------------------------------------------- */ //e.g. ((legalese goes here)) (inline, block version handled later on) '/\({2}(.*?)\){2}(?!\))/' ), array ( /*
*/ '
', /*
*/ "\n
", /**/ '$1', /**/ '$1', /**/ '$1', /**/ '$1', /**/ '$1', /**/ '$2', /**/ '"".stripslashes("$1").""', /**/ '$1' ), $source_text); /* 7. headings =============================================================================================================== */ //capture the headings in the source text while (preg_match ( //e.g. ### title ### (#id) (atx-style, `# h1 #`, `## h2 ##`…, id is optional) //or- Title (#id) (H2, id is optional) // =========== // Title (#id) (H3, id is optional) // ----------- '/^(#{1,6})?(?(1) )(.*?)(?(1) \1)(?: \(#([0-9a-z_-]+)\))?(?(1)|\n([=-]+))(?:\n|$)/mi', $source_text, $m1, PREG_OFFSET_CAPTURE )) { //detect heading level (number of #’s or ‘=’ bar for H2 / ‘-’ bar for H3) $h = strlen ($m1[1][0]) ? strlen ($m1[1][0]) : (substr ($m1[4][0], 0, 1) == "=" ? 2 : 3); $title = &$m1[2][0]; $hid = &$m1[3][0]; /* title case the heading: ------------------------------------------------------------------------------------------------------- */ /* original Title Case script © John Gruber javascript port © David Gouch */ //remove HTML, storing it for later // placeholders | tags | entities $regx = '/\xA1[@#A-Z]+%*!|<\/?[^>]+>|&\S+;/u'; preg_match_all ($regx, $title, $html, PREG_OFFSET_CAPTURE); $title = preg_replace ($regx, '', $title); //find each word (including punctuation attached) preg_match_all ('/[\w&`\'‘’"“\.@:\/\{\(\[<>_]+-? */u', $title, $m2, PREG_OFFSET_CAPTURE); foreach ($m2[0] as $m3) { //shorthand these- "match" and "index" list ($m, $i) = $m3; //correct offsets for multi-byte characters (`PREG_OFFSET_CAPTURE` returns *byte*-offset) //we fix this by recounting the text before the offset using multi-byte aware `strlen` $i = mb_strlen (substr ($title, 0, $i), 'UTF-8'); //find words that should always be lowercase… //(never on the first word, and never if preceded by a colon) $m = $i>0 && mb_substr ($title, max (0, $i-2), 1, 'UTF-8') !== ':' && preg_match ( '/^(a(nd?|s|t)?|b(ut|y)|en|for|i[fn]|o[fnr]|t(he|o)|vs?\.?|via)[ \-]/i', $m ) ? //…and convert them to lowercase mb_strtolower ($m, 'UTF-8') //else: brackets and other wrappers : ( preg_match ('/[\'"_{(\[‘“]/u', mb_substr ($title, max (0, $i-1), 3, 'UTF-8')) ? //convert first letter within wrapper to uppercase mb_substr ($m, 0, 1, 'UTF-8'). mb_strtoupper (mb_substr ($m, 1, 1, 'UTF-8'), 'UTF-8'). mb_substr ($m, 2, mb_strlen ($m, 'UTF-8')-2, 'UTF-8') //else: do not uppercase these cases : ( preg_match ('/[\])}]/', mb_substr ($title, max (0, $i-1), 3, 'UTF-8')) || preg_match ('/[A-Z]+|&|\w+[._]\w+/u', mb_substr ($m, 1, mb_strlen ($m, 'UTF-8')-1, 'UTF-8')) ? $m //if all else fails, then no more fringe-cases; uppercase the word : mb_strtoupper (mb_substr ($m, 0, 1, 'UTF-8'), 'UTF-8'). mb_substr ($m, 1, mb_strlen ($m, 'UTF-8'), 'UTF-8') )); //resplice the title with the change (`substr_replace` is not multi-byte aware) $title = mb_substr ($title, 0, $i, 'UTF-8').$m. mb_substr ($title, $i+mb_strlen ($m, 'UTF-8'), mb_strlen ($title, 'UTF-8'), 'UTF-8') ; } //restore the HTML foreach ($html[0] as &$tag) $title = substr_replace ($title, $tag[0], $tag[1], 0); /* ------------------------------------------------------------------------------------------------------- */ //replace heading with HTML $source_text = substr_replace ($source_text, "$title\n\n", $m1[0][1], strlen ($m1[0][0]) ); } /* 8. blocks - lists / blockquotes =============================================================================================================== */ //see documentation (or read regex) for full list of supported bullet types. note that this has a capturing group $bullet = '(?:([\x{2022}*+-])|(?:[a-zA-Z]\.|#|(?:\d+\.){1,6}))'; //capture, convert and unindent lists and blockquotes, recursively: do $source_text = preg_replace (array ( /* --- «whitespace» -------------------------------------------------------------------------------------- */ //remove white space on empty lines - simplifies regexes dealing with multiple lines '/^\s+\n/m', /* --- ------------------------------------------------------------------------------------------- */ //e.g. (( // small used as a paragraph // )) '/^\({2}\n((?:.*\n)+?)\){2}/me', /* ---
-------------------------------------------------------------------------------------- */ //e.g. | blockquote text '/^(?:\|(?:\t.*?)?\n)+\n/me', /* ---
    /
      --------------------------------------------------------------------------------------- */ //i.e. a number of li’s, see below "/^((?:$bullet(?:\\t+.*\\n{1,2})+)+)/emu", /* ---
    1. ---------------------------------------------------------------------------------------------- */ //e.g. • text "/(?:(?<=(?)(\n\n)))?^$bullet((?:\\t+.*(\n))+|(?:\\t+.*(?:\n|(\n\n)))+)(?=$bullet|\n<\/[uo]l>)/emu", /* ---
      ---------------------------------------------------------------------------------------------- */ '/^(:: .*\n{1,2}(?:(?:\t+.*\n{1,2})+)?)+/me', /* --
      /
      ---------------------------------------------------------------------------------------- */ //e.g. :: definition term // description… '/^:: (.*)\n?((?:\t+.*\n)+|(?:\t+.*(?:\n|(\n)\n)?)+)?\n(?=\n::|<\/dl>)/me' ), array ( /*«whitespace»*/"\n", /**/ '"\n".preg_replace("/^\\t/m","",trim(stripslashes("$1")))."\n\n"', /*
      */'"\n
      \n\n".preg_replace("/^\|\\t?/m","",stripslashes("$0"))."
      \n\n"', /*
        /
          */ '"\n<".("$2"?"u":"o")."l>\n\n".trim(stripslashes("$1"))."\n\n\n\n"', /*
        1. */ '"
        2. $1$4$5".preg_replace("/^\\t/m","",trim(stripslashes("$3")))."$1$4$5
        3. \n\n"', /*
          */ '"
          \n\n".trim(stripslashes("$0"))."\n
          \n\n"', /*
          /
      */ '"
      ".stripslashes("$1")."
      \n\n".'.'("$2"?"
      \n".stripslashes("$3").'. '(preg_replace("/^\\t/m","",stripslashes("$2"))).stripslashes("$3")."\n
      \n\n":"")' ), $source_text, -1, $continue); //because a list can contain another list / blockquote, once one is converted we loop again to catch the next level while ($continue); /* 9. indent and word-wrap =============================================================================================================== */ //start indenting at the base level for the whole document $depth = $indent; //the regex section above places blank lines either side of paragraphs in lists and either side of any tag that //begins / ends an indent. this section steps through these blank lines assessing the content inbetween: foreach (preg_split ('/\n{2,}/', $source_text, -1, PREG_SPLIT_NO_EMPTY) as $chunk) { //indent according to the current level if ($depth) $chunk = preg_replace ('/^/m', str_repeat ("\t", $depth), $chunk); //check each condition… foreach (array ( //PRE blocks (will always have no indent regardless if they are inside an indented block) 'pre' => '/^\s*(\xA1PRE%*!)/u', //list item without paragraphs 'li' => '/^(\s*)
    2. \n\1(?P

      .*)\n\1<\/li>/s', //`

      ` without any paragraphs 'dd' => '/^(\s*)
      \n(?P

      (?:\t+.*\n?)+)\1<\/dd>/m', //`` block paragraph 'small' => '/^(\s*)\n(?P

      (?:.*\n)+)\1<\/small>/m', //opening indent 'open' => '/(.*?)^(\s*)<([uo]l|li|d[ld]|blockquote)>$(?P

      .*)/ms', //closing indent 'close' => '/(.*?)^(\s*?)\t<(\/)([uo]l|li|d[ld]|blockquote)>$(?P

      .*)/ms', //block level elements that should not be wrapped in P tags 'p' => '/^\s*(?:<\/?|(\xA1))(?: # tags alone on the line that should not be wrapped (img|small) | # elements that start a line that should not be wrapped (?:article|aside|audio|blockquote|canvas|caption|col|colgroup|dialog|div|d[ltd]|embed |fieldset|figure|footer|form|h[1-6r]|header|input|label|legend|li|nav|noscript|object|[ou]l |optgroup|option|p|param|pre|script|section|select|source|table|t(?:body|foot|head)|t[dhr] |textarea|video # don’t wrap HTML comments or TOC markers |\# ) )(?(1)%*!|[^>]*>)(?(2)(?:$|\n))/xui' ) as $tag => $regx) if ( //once a match is found, capture the regex results in `$m` and stop searching preg_match ($regx, $chunk, $m) ) break; //note: ReMarkable does not wrap paragraphs around block elements. the “p” condition therefore works in //reverse and we know that an actual paragraph is matched when the regex doesn’t match and drops out of the //list of conditions -- leaving `$m` as empty //the “li”, “dd”, “small” and not “p” conditions contain a paragraph of text that has to be word-wrapped. //this text is stored in the regex named capture group “p” -> `$m['p']`. if no match is made `(!$m)` then //the whole chunk is a paragraph to be wrapped $p = rtrim (!$m ? $chunk : @$m['p']); //as explained above, word-wrap these conditions: if (($tag == 'li' || $tag == 'dd' || $tag == 'small' || !$m) && $margin>0) { //collapse whitespace in paragraphs. this removes HTML newlines (except before or after a `
      `) //so that the paragraph can be wrapped cleanly by ReMarkable $p = rtrim (preg_replace ('/(?)\n\t*+(?!
      )/', ' ', $p)); //word-wrap: //calculate the current loss of margin due to the indent level $width = $margin - (8 * ($depth+1)); //keep finding oversized lines until none are left… do $p = preg_replace ( //find i. any line that’s longer than the margin cut-off point // ii. the last space before the margin, as long as it’s not within an HTML tag -or- // iii. the first space after the margin (for lines with very long URLs for example) '/^(?=.{'.($width+4).',})(.{1,'.$width.'}|.{'.$width.',}?) (?![^<]*?>)/m', //and chop "$1\n".str_repeat ("\t", $depth), $p, -1, $continue ); while ($continue); } //reconstruct the chunk switch ($tag) { case 'pre' : $chunk = $m[1]; break; case 'li' : $chunk = $m[1]."

    3. $p
    4. "; break; case 'dd' : $chunk = $m[1]."
      \n".preg_replace ('/^/m', "\t", $p."\n").$m[1]."
      "; break; case 'small' : $chunk = $m[1]."\n".preg_replace ('/^/m',"\t",$p."\n").$m[1].""; break; case 'open' : $chunk = $m[1].$m[2]."<${m[3]}>".preg_replace ('/\n/', "\n\t", $p); $depth++; break; case 'close' : $chunk = $m[1].$m[2]."<${m[3]}${m[4]}>".$p; $depth--; break; default: //wrap paragraph if (!$m) $chunk = str_repeat ("\t", $depth)."

      \n". preg_replace ('/^/m', "\t", $p)."\n". str_repeat ("\t", $depth)."

      " ; } $source_text = @$result .= "\n$chunk"; }; /* 10. finalise =============================================================================================================== */ //tidy up the HTML foreach (array ( //pair `

      ` tags together '/<\/p>\n\t*

      /' => '

      ', //flatten a single line paragraph in a `

    5. ` -> `
    6. ...

    7. ` '/
    8. \n(\t*)\t

      \n\t+(.*)\n\t+<\/p>\n\t+<\/li>/' => "

    9. \n$1\t

      $2

      \n$1
    10. ", //pair `
    11. ` tags together (except single-line ones) '/\n(\t*)<\/li>\n\t*
    12. \n/' => "\n$1
    13. \n", //add double blank lines above H2,3 (easy to see headings when scrolling) '/^(\t*)(]*>.*)$/m' => "$1\n$1\n$1$2", //but not when one immediately proceeds another '/(<\/h[23]>)(?:\n(\t*)){3}(]*>)/' => "$1\n$2$3", //blank line either side of `
      ` '/^(\t*)
      /ms' => "$1\n$0\n$1", //blank line either side of PRE blocks (have no indent themselves, so it has to be borrowed) '/^\xA1PRE%*!\n(\t*)/mu' => "$1\n$1$0\n$1", //remove tripple blank lines caused by combinations of the above '/^(?:(\t*)\n){3}/m' => "$1\n$1\n" ) as $regx => $replace) $source_text = preg_replace ($regx, $replace, $source_text); /* restore placeholders --------------------------------------------------------------------------------------------------------------- */ //restore in reverse order so that pre and code spans that contain placeholders [documentation] don’t conflict $placeholders = array_reverse ($placeholders, true); //restore each saved chunk of HTML, for each type of tag foreach ($placeholders as $tag => &$tags) foreach ($tags as &$html) if ( preg_match ("/\\xA1$tag%*!/u", $source_text, $m, PREG_OFFSET_CAPTURE) ) $source_text = substr_replace ($source_text, $html, $m[0][1], strlen ($m[0][0])); /* auto table of contents --------------------------------------------------------------------------------------------------------------- */ //creates a table of contents from headings with IDs. this has to be done last because `` spans in headings //would be duplicated in the TOC and the HTML would not be restored correctly above. the offset is captured so that //only headings *after* the TOC marker are included in the table of contents if (preg_match ('/^(\t*)&__TOC__;/m', $source_text, $i, PREG_OFFSET_CAPTURE)) { preg_match_all ('/(.*?)<\/h\1>/i', $source_text, $h, PREG_SET_ORDER, $i[0][1]); //the simplest way to create a nested list is to let ReMarkable do it! foreach ($h as &$m) $toc .= str_repeat ("\t", (int) $m[1]-2)."#\t${m[3]}\n"; $source_text = str_replace ('&__TOC__;', reMarkable ($toc, strlen ($i[1][0]), $margin), $source_text); } //a trailing line break is never given so that ReMarkable can be used for short inline strings in your HTML return trim ($source_text, "\n"); } /* ==================================================================================================== code is art === */ ?>