--------------------------------------------------------------------------------------------- */
//e.g. ~~~> or ~~~ PHP ~~~> (with optional language)
// code goes here code goes here
// <~~~ <~~~
'PRE' => '/~~~(?: ([a-z]+) ~~~)?>\n((?>(?R)|(?>.))*?)\n(\t*)~~~$/msi',
/* --- / ----------------------------------------------------------------------------------- */
//e.g. Using `_emphasis_` will generate ``emphasis``.
'CODE' => '/(``|(`))((?(2)(?:``|[^`]+)+?|.+?))\1(?!`)/',
//HTML comments could contain ReMarkable syntax. the TOC marker is stored here as the “&” must not be
//encoded by ReMarkable (allowing for the TOC marker in CODE/PRE) and musn’t be wrapped in ``
'#' => '/|&__TOC__;/s'
) as $tag => $regx) if (!$offset=0) while (
preg_match ($regx, $source_text, $m, PREG_OFFSET_CAPTURE, (int) $offset)
) {
switch ($tag) {
case 'PRE':
//if language paramter given, wrap in a code span too
$text = (strlen ($m[1][0]) ? '
' : '').
//HTML-encode the preformatted block (HTML code examples, &c.)
htmlspecialchars (
//if the PRE block was indented (inside a list), unindent accordingly
preg_replace ('/^\t{'.strlen ($m[3][0]).'}/m', '', $m[2][0]),
ENT_NOQUOTES, 'UTF-8'
).
(strlen ($m[1][0]) ? '
' : '')
;
break;
case 'CODE':
$text = (strlen ($m[2][0]) ? '' : '').
htmlspecialchars ($m[3][0], ENT_NOQUOTES, 'UTF-8').
(strlen ($m[2][0]) ? '' : '')
;
break;
default:
$text = $m[0][0];
}
//capture the element
array_push ($placeholders[$tag], $text);
//replace with placeholder tag
$source_text = substr_replace ($source_text,
//make the placeholder tag the same size of the content being replaced, for word wrapping to work
"¡$tag".str_repeat ('%', max (0, strlen ($text) - (strlen ($tag) + 3)))."!",
$m[0][1], strlen ($m[0][0])
);
//continue searching from after this placeholder
$offset = $m[0][1] + strlen ($m[0][0]);
}
/* 2. hyperlinks
=============================================================================================================== */
/* ReMarkable hyperlinks have to be processed before removing the remaining HTML tags because:
a. they look like HTML tags, and
b. accidental inline markup could break the URL. for example, two underscores in a URL would add ``s
*/
//list of mime-types for hyperlinks pointing directly to a file:
//--------------------------------------------------------------
//note: this is absolutely not supposed to be a comprehensive list; quite the opposite in fact. this list is just
//my idea of the most important files that are directly hyperlinked to in articles--that user’s may want to be
//warned about beforehand via CSS mime-type icons &c.
$link_mimes = array (
//images
'jpg' => 'image/jpeg', 'jpeg' => 'image/jpeg',
'png' => 'image/png', 'gif' => 'image/gif',
'psd' => 'image/vnd.adobe.photoshop',
'ai' => 'application/postscript', 'eps' => 'application/postscript',
'svg' => 'image/svg+xml', 'svgz' => 'image/svg+xml',
//documents
'txt' => 'text/plain', 'pdf' => 'application/pdf',
'doc' => 'application/msword', 'odt' => 'application/vnd.oasis.opendocument.text',
'xls' => 'application/vnd.ms-excel', 'ods' => 'application/vnd.oasis.opendocument.spreadsheet',
'ppt' => 'application/vnd.ms-powerpoint', 'odp' => 'application/vnd.oasis.opendocument.presentation',
'csv' => 'text/csv',
//code
'css' => 'text/css',
'js' => 'application/javascript',
//downloads
'exe' => 'application/octet-stream',
'dmg' => 'application/octet-stream', 'iso' => 'application/octet-stream',
'rar' => 'application/x-rar-compressed', 'zip' => 'application/zip',
'tar' => 'application/x-tar', 'gz' => 'application/x-gzip',
'torrent' => 'application/x-bittorrent',
//audio
'oga' => 'audo/ogg', 'wav' => 'audio/wav',
'mp3' => 'audio/mpeg', 'm4a' => 'audio/mp4a-latm',
'midi' => 'audio/midi',
//video
'mp4' => 'video/mp4', 'm4v' => 'video/mp4',
'mpeg' => 'video/mpeg', 'mpg' => 'video/mpeg',
'mov' => 'video/quicktime', 'avi' => 'video/x-msvideo',
'ogv' => 'video/ogg'
);
//regular expression to identify the various forms of a ReMarkable hyperlink:
$regx =
//e.g. Click ----------------------------------------------------------------------------------------------- */ //e.g. He said «turn left here», but she said <>. '/(?:(\xAB)|(?:<){2})(.*?)(?(1)\xBB|(?:>){2})/u', /* --- -------------------------------------------------------------------------------------------- */ //e.g. My {CSS|style sheet} is tweaked almost daily. (with title) // The {FBI} are like the British {MI5}. (without title) '/\{([^\|}]+)(?:\|([^}]+))?}/e', /* --- ------------------------------------------------------------------------------------------- */ //e.g. ((legalese goes here)) (inline, block version handled later on) '/\({2}(.*?)\){2}(?!\))/' ), array ( /*
*/ '
', /*
*/ "\n
", /**/ '$1', /**/ '$1', /**/ '$1', /**/ '$1', /**/ '$1', /**/ '$2', /**/ '"".stripslashes("$1").""', /**/ '$1' ), $source_text); /* 7. headings =============================================================================================================== */ //capture the headings in the source text while (preg_match ( //e.g. ### title ### (#id) (atx-style, `# h1 #`, `## h2 ##`…, id is optional) //or- Title (#id) (H2, id is optional) // =========== // Title (#id) (H3, id is optional) // ----------- '/^(#{1,6})?(?(1) )(.*?)(?(1) \1)(?: \(#([0-9a-z_-]+)\))?(?(1)|\n([=-]+))(?:\n|$)/mi', $source_text, $m1, PREG_OFFSET_CAPTURE )) { //detect heading level (number of #’s or ‘=’ bar for H2 / ‘-’ bar for H3) $h = strlen ($m1[1][0]) ? strlen ($m1[1][0]) : (substr ($m1[4][0], 0, 1) == "=" ? 2 : 3); $title = &$m1[2][0]; $hid = &$m1[3][0]; /* title case the heading: ------------------------------------------------------------------------------------------------------- */ /* original Title Case script © John Gruberjavascript port © David Gouch */ //remove HTML, storing it for later // placeholders | tags | entities $regx = '/\xA1[@#A-Z]+%*!|<\/?[^>]+>|&\S+;/u'; preg_match_all ($regx, $title, $html, PREG_OFFSET_CAPTURE); $title = preg_replace ($regx, '', $title); //find each word (including punctuation attached) preg_match_all ('/[\w&`\'‘’"“\.@:\/\{\(\[<>_]+-? */u', $title, $m2, PREG_OFFSET_CAPTURE); foreach ($m2[0] as $m3) { //shorthand these- "match" and "index" list ($m, $i) = $m3; //correct offsets for multi-byte characters (`PREG_OFFSET_CAPTURE` returns *byte*-offset) //we fix this by recounting the text before the offset using multi-byte aware `strlen` $i = mb_strlen (substr ($title, 0, $i), 'UTF-8'); //find words that should always be lowercase… //(never on the first word, and never if preceded by a colon) $m = $i>0 && mb_substr ($title, max (0, $i-2), 1, 'UTF-8') !== ':' && preg_match ( '/^(a(nd?|s|t)?|b(ut|y)|en|for|i[fn]|o[fnr]|t(he|o)|vs?\.?|via)[ \-]/i', $m ) ? //…and convert them to lowercase mb_strtolower ($m, 'UTF-8') //else: brackets and other wrappers : ( preg_match ('/[\'"_{(\[‘“]/u', mb_substr ($title, max (0, $i-1), 3, 'UTF-8')) ? //convert first letter within wrapper to uppercase mb_substr ($m, 0, 1, 'UTF-8'). mb_strtoupper (mb_substr ($m, 1, 1, 'UTF-8'), 'UTF-8'). mb_substr ($m, 2, mb_strlen ($m, 'UTF-8')-2, 'UTF-8') //else: do not uppercase these cases : ( preg_match ('/[\])}]/', mb_substr ($title, max (0, $i-1), 3, 'UTF-8')) || preg_match ('/[A-Z]+|&|\w+[._]\w+/u', mb_substr ($m, 1, mb_strlen ($m, 'UTF-8')-1, 'UTF-8')) ? $m //if all else fails, then no more fringe-cases; uppercase the word : mb_strtoupper (mb_substr ($m, 0, 1, 'UTF-8'), 'UTF-8'). mb_substr ($m, 1, mb_strlen ($m, 'UTF-8'), 'UTF-8') )); //resplice the title with the change (`substr_replace` is not multi-byte aware) $title = mb_substr ($title, 0, $i, 'UTF-8').$m. mb_substr ($title, $i+mb_strlen ($m, 'UTF-8'), mb_strlen ($title, 'UTF-8'), 'UTF-8') ; } //restore the HTML foreach ($html[0] as &$tag) $title = substr_replace ($title, $tag[0], $tag[1], 0); /* ------------------------------------------------------------------------------------------------------- */ //replace heading with HTML $source_text = substr_replace ($source_text, " $title \n\n", $m1[0][1], strlen ($m1[0][0]) ); } /* 8. blocks - lists / blockquotes =============================================================================================================== */ //see documentation (or read regex) for full list of supported bullet types. note that this has a capturing group $bullet = '(?:([\x{2022}*+-])|(?:[a-zA-Z]\.|#|(?:\d+\.){1,6}))'; //capture, convert and unindent lists and blockquotes, recursively: do $source_text = preg_replace (array ( /* --- «whitespace» -------------------------------------------------------------------------------------- */ //remove white space on empty lines - simplifies regexes dealing with multiple lines '/^\s+\n/m', /* --- ------------------------------------------------------------------------------------------- */ //e.g. (( // small used as a paragraph // )) '/^\({2}\n((?:.*\n)+?)\){2}/me', /* ----------------------------------------------------------------------------------------- */ //e.g. | blockquote text '/^(?:\|(?:\t.*?)?\n)+\n/me', /* ---/
--------------------------------------------------------------------------------------- */ //i.e. a number of li’s, see below "/^((?:$bullet(?:\\t+.*\\n{1,2})+)+)/emu", /* ---
- ---------------------------------------------------------------------------------------------- */ //e.g. • text "/(?:(?<=(?)(\n\n)))?^$bullet((?:\\t+.*(\n))+|(?:\\t+.*(?:\n|(\n\n)))+)(?=$bullet|\n<\/[uo]l>)/emu", /* ---
---------------------------------------------------------------------------------------------- */ '/^(:: .*\n{1,2}(?:(?:\t+.*\n{1,2})+)?)+/me', /* --
- /
- ---------------------------------------------------------------------------------------- */ //e.g. :: definition term // description… '/^:: (.*)\n?((?:\t+.*\n)+|(?:\t+.*(?:\n|(\n)\n)?)+)?\n(?=\n::|<\/dl>)/me' ), array ( /*«whitespace»*/"\n", /**/ '"\n".preg_replace("/^\\t/m","",trim(stripslashes("$1")))."\n\n"', /*
*/ '"*/'"\n\n\n".preg_replace("/^\|\\t?/m","",stripslashes("$0"))."\n\n"', /*/
*/ '"\n<".("$2"?"u":"o")."l>\n\n".trim(stripslashes("$1"))."\n\n".("$2"?"u":"o")."l>\n\n"', /*
- */ '"
- $1$4$5".preg_replace("/^\\t/m","",trim(stripslashes("$3")))."$1$4$5
\n\n"', /**/ '"
\n\n".trim(stripslashes("$0"))."\n
\n\n"', /*- /
- ".stripslashes("$1")."
\n\n".'.'("$2"?"- \n".stripslashes("$3").'. '(preg_replace("/^\\t/m","",stripslashes("$2"))).stripslashes("$3")."\n
\n\n":"")' ), $source_text, -1, $continue); //because a list can contain another list / blockquote, once one is converted we loop again to catch the next level while ($continue); /* 9. indent and word-wrap =============================================================================================================== */ //start indenting at the base level for the whole document $depth = $indent; //the regex section above places blank lines either side of paragraphs in lists and either side of any tag that //begins / ends an indent. this section steps through these blank lines assessing the content inbetween: foreach (preg_split ('/\n{2,}/', $source_text, -1, PREG_SPLIT_NO_EMPTY) as $chunk) { //indent according to the current level if ($depth) $chunk = preg_replace ('/^/m', str_repeat ("\t", $depth), $chunk); //check each condition… foreach (array ( //PRE blocks (will always have no indent regardless if they are inside an indented block) 'pre' => '/^\s*(\xA1PRE%*!)/u', //list item without paragraphs 'li' => '/^(\s*)- \n\1(?P
.*)\n\1<\/li>/s', //`
- ` without any paragraphs 'dd' => '/^(\s*)
- \n(?P
(?:\t+.*\n?)+)\1<\/dd>/m', //`` block paragraph 'small' => '/^(\s*)\n(?P
(?:.*\n)+)\1<\/small>/m', //opening indent 'open' => '/(.*?)^(\s*)<([uo]l|li|d[ld]|blockquote)>$(?P
.*)/ms', //closing indent 'close' => '/(.*?)^(\s*?)\t<(\/)([uo]l|li|d[ld]|blockquote)>$(?P
.*)/ms', //block level elements that should not be wrapped in P tags 'p' => '/^\s*(?:<\/?|(\xA1))(?: # tags alone on the line that should not be wrapped (img|small) | # elements that start a line that should not be wrapped (?:article|aside|audio|blockquote|canvas|caption|col|colgroup|dialog|div|d[ltd]|embed |fieldset|figure|footer|form|h[1-6r]|header|input|label|legend|li|nav|noscript|object|[ou]l |optgroup|option|p|param|pre|script|section|select|source|table|t(?:body|foot|head)|t[dhr] |textarea|video # don’t wrap HTML comments or TOC markers |\# ) )(?(1)%*!|[^>]*>)(?(2)(?:$|\n))/xui' ) as $tag => $regx) if ( //once a match is found, capture the regex results in `$m` and stop searching preg_match ($regx, $chunk, $m) ) break; //note: ReMarkable does not wrap paragraphs around block elements. the “p” condition therefore works in //reverse and we know that an actual paragraph is matched when the regex doesn’t match and drops out of the //list of conditions -- leaving `$m` as empty //the “li”, “dd”, “small” and not “p” conditions contain a paragraph of text that has to be word-wrapped. //this text is stored in the regex named capture group “p” -> `$m['p']`. if no match is made `(!$m)` then //the whole chunk is a paragraph to be wrapped $p = rtrim (!$m ? $chunk : @$m['p']); //as explained above, word-wrap these conditions: if (($tag == 'li' || $tag == 'dd' || $tag == 'small' || !$m) && $margin>0) { //collapse whitespace in paragraphs. this removes HTML newlines (except before or after a `
`) //so that the paragraph can be wrapped cleanly by ReMarkable $p = rtrim (preg_replace ('/(?)\n\t*+(?!
)/', ' ', $p)); //word-wrap: //calculate the current loss of margin due to the indent level $width = $margin - (8 * ($depth+1)); //keep finding oversized lines until none are left… do $p = preg_replace ( //find i. any line that’s longer than the margin cut-off point // ii. the last space before the margin, as long as it’s not within an HTML tag -or- // iii. the first space after the margin (for lines with very long URLs for example) '/^(?=.{'.($width+4).',})(.{1,'.$width.'}|.{'.$width.',}?) (?![^<]*?>)/m', //and chop "$1\n".str_repeat ("\t", $depth), $p, -1, $continue ); while ($continue); } //reconstruct the chunk switch ($tag) { case 'pre' : $chunk = $m[1]; break; case 'li' : $chunk = $m[1]."- $p
"; break; case 'dd' : $chunk = $m[1]."- \n".preg_replace ('/^/m', "\t", $p."\n").$m[1]."
"; break; case 'small' : $chunk = $m[1]."\n".preg_replace ('/^/m',"\t",$p."\n").$m[1].""; break; case 'open' : $chunk = $m[1].$m[2]."<${m[3]}>".preg_replace ('/\n/', "\n\t", $p); $depth++; break; case 'close' : $chunk = $m[1].$m[2]."<${m[3]}${m[4]}>".$p; $depth--; break; default: //wrap paragraph if (!$m) $chunk = str_repeat ("\t", $depth)."\n". preg_replace ('/^/m', "\t", $p)."\n". str_repeat ("\t", $depth)."
" ; } $source_text = @$result .= "\n$chunk"; }; /* 10. finalise =============================================================================================================== */ //tidy up the HTML foreach (array ( //pair `` tags together '/<\/p>\n\t*
/' => '
', //flatten a single line paragraph in a `
- ` -> `
- ` '/
...
- \n(\t*)\t
\n\t+(.*)\n\t+<\/p>\n\t+<\/li>/' => "
- \n$1\t
", //pair `$2
\n$1- ` tags together (except single-line ones) '/\n(\t*)<\/li>\n\t*
- \n/' => "\n$1
- \n", //add double blank lines above H2,3 (easy to see headings when scrolling) '/^(\t*)(
]*>.*)$/m' => "$1\n$1\n$1$2", //but not when one immediately proceeds another '/(<\/h[23]>)(?:\n(\t*)){3}( ]*>)/' => "$1\n$2$3", //blank line either side of `
` '/^(\t*)
/ms' => "$1\n$0\n$1", //blank line either side of PRE blocks (have no indent themselves, so it has to be borrowed) '/^\xA1PRE%*!\n(\t*)/mu' => "$1\n$1$0\n$1", //remove tripple blank lines caused by combinations of the above '/^(?:(\t*)\n){3}/m' => "$1\n$1\n" ) as $regx => $replace) $source_text = preg_replace ($regx, $replace, $source_text); /* restore placeholders --------------------------------------------------------------------------------------------------------------- */ //restore in reverse order so that pre and code spans that contain placeholders [documentation] don’t conflict $placeholders = array_reverse ($placeholders, true); //restore each saved chunk of HTML, for each type of tag foreach ($placeholders as $tag => &$tags) foreach ($tags as &$html) if ( preg_match ("/\\xA1$tag%*!/u", $source_text, $m, PREG_OFFSET_CAPTURE) ) $source_text = substr_replace ($source_text, $html, $m[0][1], strlen ($m[0][0])); /* auto table of contents --------------------------------------------------------------------------------------------------------------- */ //creates a table of contents from headings with IDs. this has to be done last because `` spans in headings //would be duplicated in the TOC and the HTML would not be restored correctly above. the offset is captured so that //only headings *after* the TOC marker are included in the table of contents if (preg_match ('/^(\t*)&__TOC__;/m', $source_text, $i, PREG_OFFSET_CAPTURE)) { preg_match_all ('/(.*?)<\/h\1>/i', $source_text, $h, PREG_SET_ORDER, $i[0][1]); //the simplest way to create a nested list is to let ReMarkable do it! foreach ($h as &$m) $toc .= str_repeat ("\t", (int) $m[1]-2)."#\t${m[3]}\n"; $source_text = str_replace ('&__TOC__;', reMarkable ($toc, strlen ($i[1][0]), $margin), $source_text); } //a trailing line break is never given so that ReMarkable can be used for short inline strings in your HTML return trim ($source_text, "\n"); } /* ==================================================================================================== code is art === */ ?>