Home | History | Annotate | Download | only in PHP-Markdown-Extra-1.2.3
      1 <?php
      2 #
      3 # Markdown Extra  -  A text-to-HTML conversion tool for web writers
      4 #
      5 # PHP Markdown & Extra
      6 # Copyright (c) 2004-2008 Michel Fortin
      7 # <http://www.michelf.com/projects/php-markdown/>
      8 #
      9 # Original Markdown
     10 # Copyright (c) 2004-2006 John Gruber
     11 # <http://daringfireball.net/projects/markdown/>
     12 #
     13 
     14 
     15 define( 'MARKDOWN_VERSION',  "1.0.1m" ); # Sat 21 Jun 2008
     16 define( 'MARKDOWNEXTRA_VERSION',  "1.2.3" ); # Wed 31 Dec 2008
     17 
     18 
     19 #
     20 # Global default settings:
     21 #
     22 
     23 # Change to ">" for HTML output
     24 @define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX',  " />");
     25 
     26 # Define the width of a tab for code blocks.
     27 @define( 'MARKDOWN_TAB_WIDTH',     4 );
     28 
     29 # Optional title attribute for footnote links and backlinks.
     30 @define( 'MARKDOWN_FN_LINK_TITLE',         "" );
     31 @define( 'MARKDOWN_FN_BACKLINK_TITLE',     "" );
     32 
     33 # Optional class attribute for footnote links and backlinks.
     34 @define( 'MARKDOWN_FN_LINK_CLASS',         "" );
     35 @define( 'MARKDOWN_FN_BACKLINK_CLASS',     "" );
     36 
     37 
     38 #
     39 # WordPress settings:
     40 #
     41 
     42 # Change to false to remove Markdown from posts and/or comments.
     43 @define( 'MARKDOWN_WP_POSTS',      true );
     44 @define( 'MARKDOWN_WP_COMMENTS',   true );
     45 
     46 
     47 
     48 ### Standard Function Interface ###
     49 
     50 @define( 'MARKDOWN_PARSER_CLASS',  'MarkdownExtra_Parser' );
     51 
     52 function Markdown($text) {
     53 #
     54 # Initialize the parser and return the result of its transform method.
     55 #
     56     # Setup static parser variable.
     57     static $parser;
     58     if (!isset($parser)) {
     59         $parser_class = MARKDOWN_PARSER_CLASS;
     60         $parser = new $parser_class;
     61     }
     62 
     63     # Transform text using parser.
     64     return $parser->transform($text);
     65 }
     66 
     67 
     68 ### WordPress Plugin Interface ###
     69 
     70 /*
     71 Plugin Name: Markdown Extra
     72 Plugin URI: http://www.michelf.com/projects/php-markdown/
     73 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
     74 Version: 1.2.2
     75 Author: Michel Fortin
     76 Author URI: http://www.michelf.com/
     77 */
     78 
     79 if (isset($wp_version)) {
     80     # More details about how it works here:
     81     # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
     82 
     83     # Post content and excerpts
     84     # - Remove WordPress paragraph generator.
     85     # - Run Markdown on excerpt, then remove all tags.
     86     # - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
     87     if (MARKDOWN_WP_POSTS) {
     88         remove_filter('the_content',     'wpautop');
     89         remove_filter('the_content_rss', 'wpautop');
     90         remove_filter('the_excerpt',     'wpautop');
     91         add_filter('the_content',     'mdwp_MarkdownPost', 6);
     92         add_filter('the_content_rss', 'mdwp_MarkdownPost', 6);
     93         add_filter('get_the_excerpt', 'mdwp_MarkdownPost', 6);
     94         add_filter('get_the_excerpt', 'trim', 7);
     95         add_filter('the_excerpt',     'mdwp_add_p');
     96         add_filter('the_excerpt_rss', 'mdwp_strip_p');
     97 
     98         remove_filter('content_save_pre',  'balanceTags', 50);
     99         remove_filter('excerpt_save_pre',  'balanceTags', 50);
    100         add_filter('the_content',     'balanceTags', 50);
    101         add_filter('get_the_excerpt', 'balanceTags', 9);
    102     }
    103 
    104     # Add a footnote id prefix to posts when inside a loop.
    105     function mdwp_MarkdownPost($text) {
    106         static $parser;
    107         if (!$parser) {
    108             $parser_class = MARKDOWN_PARSER_CLASS;
    109             $parser = new $parser_class;
    110         }
    111         if (is_single() || is_page() || is_feed()) {
    112             $parser->fn_id_prefix = "";
    113         } else {
    114             $parser->fn_id_prefix = get_the_ID() . ".";
    115         }
    116         return $parser->transform($text);
    117     }
    118 
    119     # Comments
    120     # - Remove WordPress paragraph generator.
    121     # - Remove WordPress auto-link generator.
    122     # - Scramble important tags before passing them to the kses filter.
    123     # - Run Markdown on excerpt then remove paragraph tags.
    124     if (MARKDOWN_WP_COMMENTS) {
    125         remove_filter('comment_text', 'wpautop', 30);
    126         remove_filter('comment_text', 'make_clickable');
    127         add_filter('pre_comment_content', 'Markdown', 6);
    128         add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
    129         add_filter('pre_comment_content', 'mdwp_show_tags', 12);
    130         add_filter('get_comment_text',    'Markdown', 6);
    131         add_filter('get_comment_excerpt', 'Markdown', 6);
    132         add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
    133 
    134         global $mdwp_hidden_tags, $mdwp_placeholders;
    135         $mdwp_hidden_tags = explode(' ',
    136             '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>');
    137         $mdwp_placeholders = explode(' ', str_rot13(
    138             'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '.
    139             'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli'));
    140     }
    141 
    142     function mdwp_add_p($text) {
    143         if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
    144             $text = '<p>'.$text.'</p>';
    145             $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text);
    146         }
    147         return $text;
    148     }
    149 
    150     function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
    151 
    152     function mdwp_hide_tags($text) {
    153         global $mdwp_hidden_tags, $mdwp_placeholders;
    154         return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text);
    155     }
    156     function mdwp_show_tags($text) {
    157         global $mdwp_hidden_tags, $mdwp_placeholders;
    158         return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text);
    159     }
    160 }
    161 
    162 
    163 ### bBlog Plugin Info ###
    164 
    165 function identify_modifier_markdown() {
    166     return array(
    167         'name' => 'markdown',
    168         'type' => 'modifier',
    169         'nicename' => 'PHP Markdown Extra',
    170         'description' => 'A text-to-HTML conversion tool for web writers',
    171         'authors' => 'Michel Fortin and John Gruber',
    172         'licence' => 'GPL',
    173         'version' => MARKDOWNEXTRA_VERSION,
    174         'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>',
    175         );
    176 }
    177 
    178 
    179 ### Smarty Modifier Interface ###
    180 
    181 function smarty_modifier_markdown($text) {
    182     return Markdown($text);
    183 }
    184 
    185 
    186 ### Textile Compatibility Mode ###
    187 
    188 # Rename this file to "classTextile.php" and it can replace Textile everywhere.
    189 
    190 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
    191     # Try to include PHP SmartyPants. Should be in the same directory.
    192     @include_once 'smartypants.php';
    193     # Fake Textile class. It calls Markdown instead.
    194     class Textile {
    195         function TextileThis($text, $lite='', $encode='') {
    196             if ($lite == '' && $encode == '')    $text = Markdown($text);
    197             if (function_exists('SmartyPants'))  $text = SmartyPants($text);
    198             return $text;
    199         }
    200         # Fake restricted version: restrictions are not supported for now.
    201         function TextileRestricted($text, $lite='', $noimage='') {
    202             return $this->TextileThis($text, $lite);
    203         }
    204         # Workaround to ensure compatibility with TextPattern 4.0.3.
    205         function blockLite($text) { return $text; }
    206     }
    207 }
    208 
    209 
    210 
    211 #
    212 # Markdown Parser Class
    213 #
    214 
    215 class Markdown_Parser {
    216 
    217     # Regex to match balanced [brackets].
    218     # Needed to insert a maximum bracked depth while converting to PHP.
    219     var $nested_brackets_depth = 6;
    220     var $nested_brackets_re;
    221 
    222     var $nested_url_parenthesis_depth = 4;
    223     var $nested_url_parenthesis_re;
    224 
    225     # Table of hash values for escaped characters:
    226     var $escape_chars = '\`*_{}[]()>#+-.!';
    227     var $escape_chars_re;
    228 
    229     # Change to ">" for HTML output.
    230     var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
    231     var $tab_width = MARKDOWN_TAB_WIDTH;
    232 
    233     # Change to `true` to disallow markup or entities.
    234     var $no_markup = false;
    235     var $no_entities = false;
    236 
    237     # Predefined urls and titles for reference links and images.
    238     var $predef_urls = array();
    239     var $predef_titles = array();
    240 
    241 
    242     function Markdown_Parser() {
    243     #
    244     # Constructor function. Initialize appropriate member variables.
    245     #
    246         $this->_initDetab();
    247         $this->prepareItalicsAndBold();
    248 
    249         $this->nested_brackets_re =
    250             str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
    251             str_repeat('\])*', $this->nested_brackets_depth);
    252 
    253         $this->nested_url_parenthesis_re =
    254             str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
    255             str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
    256 
    257         $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
    258 
    259         # Sort document, block, and span gamut in ascendent priority order.
    260         asort($this->document_gamut);
    261         asort($this->block_gamut);
    262         asort($this->span_gamut);
    263     }
    264 
    265 
    266     # Internal hashes used during transformation.
    267     var $urls = array();
    268     var $titles = array();
    269     var $html_hashes = array();
    270 
    271     # Status flag to avoid invalid nesting.
    272     var $in_anchor = false;
    273 
    274 
    275     function setup() {
    276     #
    277     # Called before the transformation process starts to setup parser
    278     # states.
    279     #
    280         # Clear global hashes.
    281         $this->urls = $this->predef_urls;
    282         $this->titles = $this->predef_titles;
    283         $this->html_hashes = array();
    284 
    285         $in_anchor = false;
    286     }
    287 
    288     function teardown() {
    289     #
    290     # Called after the transformation process to clear any variable
    291     # which may be taking up memory unnecessarly.
    292     #
    293         $this->urls = array();
    294         $this->titles = array();
    295         $this->html_hashes = array();
    296     }
    297 
    298 
    299     function transform($text) {
    300     #
    301     # Main function. Performs some preprocessing on the input text
    302     # and pass it through the document gamut.
    303     #
    304         $this->setup();
    305 
    306         # Remove UTF-8 BOM and marker character in input, if present.
    307         $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
    308 
    309         # Standardize line endings:
    310         #   DOS to Unix and Mac to Unix
    311         $text = preg_replace('{\r\n?}', "\n", $text);
    312 
    313         # Make sure $text ends with a couple of newlines:
    314         $text .= "\n\n";
    315 
    316         # Convert all tabs to spaces.
    317         $text = $this->detab($text);
    318 
    319         # Turn block-level HTML blocks into hash entries
    320         $text = $this->hashHTMLBlocks($text);
    321 
    322         # Strip any lines consisting only of spaces and tabs.
    323         # This makes subsequent regexen easier to write, because we can
    324         # match consecutive blank lines with /\n+/ instead of something
    325         # contorted like /[ ]*\n+/ .
    326         $text = preg_replace('/^[ ]+$/m', '', $text);
    327 
    328         # Run document gamut methods.
    329         foreach ($this->document_gamut as $method => $priority) {
    330             $text = $this->$method($text);
    331         }
    332 
    333         $this->teardown();
    334 
    335         return $text . "\n";
    336     }
    337 
    338     var $document_gamut = array(
    339         # Strip link definitions, store in hashes.
    340         "stripLinkDefinitions" => 20,
    341 
    342         "runBasicBlockGamut"   => 30,
    343         );
    344 
    345 
    346     function stripLinkDefinitions($text) {
    347     #
    348     # Strips link definitions from text, stores the URLs and titles in
    349     # hash references.
    350     #
    351         $less_than_tab = $this->tab_width - 1;
    352 
    353         # Link defs are in the form: ^[id]: url "optional title"
    354         $text = preg_replace_callback('{
    355                             ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
    356                               [ ]*
    357                               \n?               # maybe *one* newline
    358                               [ ]*
    359                             <?(\S+?)>?          # url = $2
    360                               [ ]*
    361                               \n?               # maybe one newline
    362                               [ ]*
    363                             (?:
    364                                 (?<=\s)         # lookbehind for whitespace
    365                                 ["(]
    366                                 (.*?)           # title = $3
    367                                 [")]
    368                                 [ ]*
    369                             )?  # title is optional
    370                             (?:\n+|\Z)
    371             }xm',
    372             array(&$this, '_stripLinkDefinitions_callback'),
    373             $text);
    374         return $text;
    375     }
    376     function _stripLinkDefinitions_callback($matches) {
    377         $link_id = strtolower($matches[1]);
    378         $this->urls[$link_id] = $matches[2];
    379         $this->titles[$link_id] =& $matches[3];
    380         return ''; # String that will replace the block
    381     }
    382 
    383 
    384     function hashHTMLBlocks($text) {
    385         if ($this->no_markup)  return $text;
    386 
    387         $less_than_tab = $this->tab_width - 1;
    388 
    389         # Hashify HTML blocks:
    390         # We only want to do this for block-level HTML tags, such as headers,
    391         # lists, and tables. That's because we still want to wrap <p>s around
    392         # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
    393         # phrase emphasis, and spans. The list of tags we're looking for is
    394         # hard-coded:
    395         #
    396         # *  List "a" is made of tags which can be both inline or block-level.
    397         #    These will be treated block-level when the start tag is alone on
    398         #    its line, otherwise they're not matched here and will be taken as
    399         #    inline later.
    400         # *  List "b" is made of tags which are always block-level;
    401         #
    402         $block_tags_a_re = 'ins|del';
    403         $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
    404                            'script|noscript|form|fieldset|iframe|math';
    405 
    406         # Regular expression for the content of a block tag.
    407         $nested_tags_level = 4;
    408         $attr = '
    409             (?>             # optional tag attributes
    410               \s            # starts with whitespace
    411               (?>
    412                 [^>"/]+     # text outside quotes
    413               |
    414                 /+(?!>)     # slash not followed by ">"
    415               |
    416                 "[^"]*"     # text inside double quotes (tolerate ">")
    417               |
    418                 \'[^\']*\'  # text inside single quotes (tolerate ">")
    419               )*
    420             )?
    421             ';
    422         $content =
    423             str_repeat('
    424                 (?>
    425                   [^<]+         # content without tag
    426                 |
    427                   <\2           # nested opening tag
    428                     '.$attr.'   # attributes
    429                     (?>
    430                       />
    431                     |
    432                       >', $nested_tags_level).  # end of opening tag
    433                       '.*?'.                    # last level nested tag content
    434             str_repeat('
    435                       </\2\s*>  # closing nested tag
    436                     )
    437                   |
    438                     <(?!/\2\s*> # other tags with a different name
    439                   )
    440                 )*',
    441                 $nested_tags_level);
    442         $content2 = str_replace('\2', '\3', $content);
    443 
    444         # First, look for nested blocks, e.g.:
    445         #   <div>
    446         #       <div>
    447         #       tags for inner block must be indented.
    448         #       </div>
    449         #   </div>
    450         #
    451         # The outermost tags must start at the left margin for this to match, and
    452         # the inner nested divs must be indented.
    453         # We need to do this before the next, more liberal match, because the next
    454         # match will start at the first `<div>` and stop at the first `</div>`.
    455         $text = preg_replace_callback('{(?>
    456             (?>
    457                 (?<=\n\n)       # Starting after a blank line
    458                 |               # or
    459                 \A\n?           # the beginning of the doc
    460             )
    461             (                       # save in $1
    462 
    463               # Match from `\n<tag>` to `</tag>\n`, handling nested tags
    464               # in between.
    465 
    466                         [ ]{0,'.$less_than_tab.'}
    467                         <('.$block_tags_b_re.')# start tag = $2
    468                         '.$attr.'>          # attributes followed by > and \n
    469                         '.$content.'        # content, support nesting
    470                         </\2>               # the matching end tag
    471                         [ ]*                # trailing spaces/tabs
    472                         (?=\n+|\Z)  # followed by a newline or end of document
    473 
    474             | # Special version for tags of group a.
    475 
    476                         [ ]{0,'.$less_than_tab.'}
    477                         <('.$block_tags_a_re.')# start tag = $3
    478                         '.$attr.'>[ ]*\n    # attributes followed by >
    479                         '.$content2.'       # content, support nesting
    480                         </\3>               # the matching end tag
    481                         [ ]*                # trailing spaces/tabs
    482                         (?=\n+|\Z)  # followed by a newline or end of document
    483 
    484             | # Special case just for <hr />. It was easier to make a special
    485               # case than to make the other regex more complicated.
    486 
    487                         [ ]{0,'.$less_than_tab.'}
    488                         <(hr)               # start tag = $2
    489                         '.$attr.'           # attributes
    490                         /?>                 # the matching end tag
    491                         [ ]*
    492                         (?=\n{2,}|\Z)       # followed by a blank line or end of document
    493 
    494             | # Special case for standalone HTML comments:
    495 
    496                     [ ]{0,'.$less_than_tab.'}
    497                     (?s:
    498                         <!-- .*? -->
    499                     )
    500                     [ ]*
    501                     (?=\n{2,}|\Z)       # followed by a blank line or end of document
    502 
    503             | # PHP and ASP-style processor instructions (<? and <%)
    504 
    505                     [ ]{0,'.$less_than_tab.'}
    506                     (?s:
    507                         <([?%])         # $2
    508                         .*?
    509                         \2>
    510                     )
    511                     [ ]*
    512                     (?=\n{2,}|\Z)       # followed by a blank line or end of document
    513 
    514             )
    515             )}Sxmi',
    516             array(&$this, '_hashHTMLBlocks_callback'),
    517             $text);
    518 
    519         return $text;
    520     }
    521     function _hashHTMLBlocks_callback($matches) {
    522         $text = $matches[1];
    523         $key  = $this->hashBlock($text);
    524         return "\n\n$key\n\n";
    525     }
    526 
    527 
    528     function hashPart($text, $boundary = 'X') {
    529     #
    530     # Called whenever a tag must be hashed when a function insert an atomic
    531     # element in the text stream. Passing $text to through this function gives
    532     # a unique text-token which will be reverted back when calling unhash.
    533     #
    534     # The $boundary argument specify what character should be used to surround
    535     # the token. By convension, "B" is used for block elements that needs not
    536     # to be wrapped into paragraph tags at the end, ":" is used for elements
    537     # that are word separators and "X" is used in the general case.
    538     #
    539         # Swap back any tag hash found in $text so we do not have to `unhash`
    540         # multiple times at the end.
    541         $text = $this->unhash($text);
    542 
    543         # Then hash the block.
    544         static $i = 0;
    545         $key = "$boundary\x1A" . ++$i . $boundary;
    546         $this->html_hashes[$key] = $text;
    547         return $key; # String that will replace the tag.
    548     }
    549 
    550 
    551     function hashBlock($text) {
    552     #
    553     # Shortcut function for hashPart with block-level boundaries.
    554     #
    555         return $this->hashPart($text, 'B');
    556     }
    557 
    558 
    559     var $block_gamut = array(
    560     #
    561     # These are all the transformations that form block-level
    562     # tags like paragraphs, headers, and list items.
    563     #
    564         "doHeaders"         => 10,
    565         "doHorizontalRules" => 20,
    566 
    567         "doLists"           => 40,
    568         "doCodeBlocks"      => 50,
    569         "doBlockQuotes"     => 60,
    570         );
    571 
    572     function runBlockGamut($text) {
    573     #
    574     # Run block gamut tranformations.
    575     #
    576         # We need to escape raw HTML in Markdown source before doing anything
    577         # else. This need to be done for each block, and not only at the
    578         # begining in the Markdown function since hashed blocks can be part of
    579         # list items and could have been indented. Indented blocks would have
    580         # been seen as a code block in a previous pass of hashHTMLBlocks.
    581         $text = $this->hashHTMLBlocks($text);
    582 
    583         return $this->runBasicBlockGamut($text);
    584     }
    585 
    586     function runBasicBlockGamut($text) {
    587     #
    588     # Run block gamut tranformations, without hashing HTML blocks. This is
    589     # useful when HTML blocks are known to be already hashed, like in the first
    590     # whole-document pass.
    591     #
    592         foreach ($this->block_gamut as $method => $priority) {
    593             $text = $this->$method($text);
    594         }
    595 
    596         # Finally form paragraph and restore hashed blocks.
    597         $text = $this->formParagraphs($text);
    598 
    599         return $text;
    600     }
    601 
    602 
    603     function doHorizontalRules($text) {
    604         # Do Horizontal Rules:
    605         return preg_replace(
    606             '{
    607                 ^[ ]{0,3}   # Leading space
    608                 ([-*_])     # $1: First marker
    609                 (?>         # Repeated marker group
    610                     [ ]{0,2}    # Zero, one, or two spaces.
    611                     \1          # Marker character
    612                 ){2,}       # Group repeated at least twice
    613                 [ ]*        # Tailing spaces
    614                 $           # End of line.
    615             }mx',
    616             "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
    617             $text);
    618     }
    619 
    620 
    621     var $span_gamut = array(
    622     #
    623     # These are all the transformations that occur *within* block-level
    624     # tags like paragraphs, headers, and list items.
    625     #
    626         # Process character escapes, code spans, and inline HTML
    627         # in one shot.
    628         "parseSpan"           => -30,
    629 
    630         # Process anchor and image tags. Images must come first,
    631         # because ![foo][f] looks like an anchor.
    632         "doImages"            =>  10,
    633         "doAnchors"           =>  20,
    634 
    635         # Make links out of things like `<http://example.com/>`
    636         # Must come after doAnchors, because you can use < and >
    637         # delimiters in inline links like [this](<url>).
    638         "doAutoLinks"         =>  30,
    639         "encodeAmpsAndAngles" =>  40,
    640 
    641         "doItalicsAndBold"    =>  50,
    642         "doHardBreaks"        =>  60,
    643         );
    644 
    645     function runSpanGamut($text) {
    646     #
    647     # Run span gamut tranformations.
    648     #
    649         foreach ($this->span_gamut as $method => $priority) {
    650             $text = $this->$method($text);
    651         }
    652 
    653         return $text;
    654     }
    655 
    656 
    657     function doHardBreaks($text) {
    658         # Do hard breaks:
    659         return preg_replace_callback('/ {2,}\n/',
    660             array(&$this, '_doHardBreaks_callback'), $text);
    661     }
    662     function _doHardBreaks_callback($matches) {
    663         return $this->hashPart("<br$this->empty_element_suffix\n");
    664     }
    665 
    666 
    667     function doAnchors($text) {
    668     #
    669     # Turn Markdown link shortcuts into XHTML <a> tags.
    670     #
    671         if ($this->in_anchor) return $text;
    672         $this->in_anchor = true;
    673 
    674         #
    675         # First, handle reference-style links: [link text] [id]
    676         #
    677         $text = preg_replace_callback('{
    678             (                   # wrap whole match in $1
    679               \[
    680                 ('.$this->nested_brackets_re.') # link text = $2
    681               \]
    682 
    683               [ ]?              # one optional space
    684               (?:\n[ ]*)?       # one optional newline followed by spaces
    685 
    686               \[
    687                 (.*?)       # id = $3
    688               \]
    689             )
    690             }xs',
    691             array(&$this, '_doAnchors_reference_callback'), $text);
    692 
    693         #
    694         # Next, inline-style links: [link text](url "optional title")
    695         #
    696         $text = preg_replace_callback('{
    697             (               # wrap whole match in $1
    698               \[
    699                 ('.$this->nested_brackets_re.') # link text = $2
    700               \]
    701               \(            # literal paren
    702                 [ ]*
    703                 (?:
    704                     <(\S*)> # href = $3
    705                 |
    706                     ('.$this->nested_url_parenthesis_re.')  # href = $4
    707                 )
    708                 [ ]*
    709                 (           # $5
    710                   ([\'"])   # quote char = $6
    711                   (.*?)     # Title = $7
    712                   \6        # matching quote
    713                   [ ]*  # ignore any spaces/tabs between closing quote and )
    714                 )?          # title is optional
    715               \)
    716             )
    717             }xs',
    718             array(&$this, '_DoAnchors_inline_callback'), $text);
    719 
    720         #
    721         # Last, handle reference-style shortcuts: [link text]
    722         # These must come last in case you've also got [link test][1]
    723         # or [link test](/foo)
    724         #
    725 //      $text = preg_replace_callback('{
    726 //          (                   # wrap whole match in $1
    727 //            \[
    728 //              ([^\[\]]+)      # link text = $2; can\'t contain [ or ]
    729 //            \]
    730 //          )
    731 //          }xs',
    732 //          array(&$this, '_doAnchors_reference_callback'), $text);
    733 
    734         $this->in_anchor = false;
    735         return $text;
    736     }
    737     function _doAnchors_reference_callback($matches) {
    738         $whole_match =  $matches[1];
    739         $link_text   =  $matches[2];
    740         $link_id     =& $matches[3];
    741 
    742         if ($link_id == "") {
    743             # for shortcut links like [this][] or [this].
    744             $link_id = $link_text;
    745         }
    746 
    747         # lower-case and turn embedded newlines into spaces
    748         $link_id = strtolower($link_id);
    749         $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
    750 
    751         if (isset($this->urls[$link_id])) {
    752             $url = $this->urls[$link_id];
    753             $url = $this->encodeAttribute($url);
    754 
    755             $result = "<a href=\"$url\"";
    756             if ( isset( $this->titles[$link_id] ) ) {
    757                 $title = $this->titles[$link_id];
    758                 $title = $this->encodeAttribute($title);
    759                 $result .=  " title=\"$title\"";
    760             }
    761 
    762             $link_text = $this->runSpanGamut($link_text);
    763             $result .= ">$link_text</a>";
    764             $result = $this->hashPart($result);
    765         }
    766         else {
    767             $result = $whole_match;
    768         }
    769         return $result;
    770     }
    771     function _doAnchors_inline_callback($matches) {
    772         $whole_match    =  $matches[1];
    773         $link_text      =  $this->runSpanGamut($matches[2]);
    774         $url            =  $matches[3] == '' ? $matches[4] : $matches[3];
    775         $title          =& $matches[7];
    776 
    777         $url = $this->encodeAttribute($url);
    778 
    779         $result = "<a href=\"$url\"";
    780         if (isset($title)) {
    781             $title = $this->encodeAttribute($title);
    782             $result .=  " title=\"$title\"";
    783         }
    784 
    785         $link_text = $this->runSpanGamut($link_text);
    786         $result .= ">$link_text</a>";
    787 
    788         return $this->hashPart($result);
    789     }
    790 
    791 
    792     function doImages($text) {
    793     #
    794     # Turn Markdown image shortcuts into <img> tags.
    795     #
    796         #
    797         # First, handle reference-style labeled images: ![alt text][id]
    798         #
    799         $text = preg_replace_callback('{
    800             (               # wrap whole match in $1
    801               !\[
    802                 ('.$this->nested_brackets_re.')     # alt text = $2
    803               \]
    804 
    805               [ ]?              # one optional space
    806               (?:\n[ ]*)?       # one optional newline followed by spaces
    807 
    808               \[
    809                 (.*?)       # id = $3
    810               \]
    811 
    812             )
    813             }xs',
    814             array(&$this, '_doImages_reference_callback'), $text);
    815 
    816         #
    817         # Next, handle inline images:  ![alt text](url "optional title")
    818         # Don't forget: encode * and _
    819         #
    820         $text = preg_replace_callback('{
    821             (               # wrap whole match in $1
    822               !\[
    823                 ('.$this->nested_brackets_re.')     # alt text = $2
    824               \]
    825               \s?           # One optional whitespace character
    826               \(            # literal paren
    827                 [ ]*
    828                 (?:
    829                     <(\S*)> # src url = $3
    830                 |
    831                     ('.$this->nested_url_parenthesis_re.')  # src url = $4
    832                 )
    833                 [ ]*
    834                 (           # $5
    835                   ([\'"])   # quote char = $6
    836                   (.*?)     # title = $7
    837                   \6        # matching quote
    838                   [ ]*
    839                 )?          # title is optional
    840               \)
    841             )
    842             }xs',
    843             array(&$this, '_doImages_inline_callback'), $text);
    844 
    845         return $text;
    846     }
    847     function _doImages_reference_callback($matches) {
    848         $whole_match = $matches[1];
    849         $alt_text    = $matches[2];
    850         $link_id     = strtolower($matches[3]);
    851 
    852         if ($link_id == "") {
    853             $link_id = strtolower($alt_text); # for shortcut links like ![this][].
    854         }
    855 
    856         $alt_text = $this->encodeAttribute($alt_text);
    857         if (isset($this->urls[$link_id])) {
    858             $url = $this->encodeAttribute($this->urls[$link_id]);
    859             $result = "<img src=\"$url\" alt=\"$alt_text\"";
    860             if (isset($this->titles[$link_id])) {
    861                 $title = $this->titles[$link_id];
    862                 $title = $this->encodeAttribute($title);
    863                 $result .=  " title=\"$title\"";
    864             }
    865             $result .= $this->empty_element_suffix;
    866             $result = $this->hashPart($result);
    867         }
    868         else {
    869             # If there's no such link ID, leave intact:
    870             $result = $whole_match;
    871         }
    872 
    873         return $result;
    874     }
    875     function _doImages_inline_callback($matches) {
    876         $whole_match    = $matches[1];
    877         $alt_text       = $matches[2];
    878         $url            = $matches[3] == '' ? $matches[4] : $matches[3];
    879         $title          =& $matches[7];
    880 
    881         $alt_text = $this->encodeAttribute($alt_text);
    882         $url = $this->encodeAttribute($url);
    883         $result = "<img src=\"$url\" alt=\"$alt_text\"";
    884         if (isset($title)) {
    885             $title = $this->encodeAttribute($title);
    886             $result .=  " title=\"$title\""; # $title already quoted
    887         }
    888         $result .= $this->empty_element_suffix;
    889 
    890         return $this->hashPart($result);
    891     }
    892 
    893 
    894     function doHeaders($text) {
    895         # Setext-style headers:
    896         #     Header 1
    897         #     ========
    898         #
    899         #     Header 2
    900         #     --------
    901         #
    902         $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
    903             array(&$this, '_doHeaders_callback_setext'), $text);
    904 
    905         # atx-style headers:
    906         #   # Header 1
    907         #   ## Header 2
    908         #   ## Header 2 with closing hashes ##
    909         #   ...
    910         #   ###### Header 6
    911         #
    912         $text = preg_replace_callback('{
    913                 ^(\#{1,6})  # $1 = string of #\'s
    914                 [ ]*
    915                 (.+?)       # $2 = Header text
    916                 [ ]*
    917                 \#*         # optional closing #\'s (not counted)
    918                 \n+
    919             }xm',
    920             array(&$this, '_doHeaders_callback_atx'), $text);
    921 
    922         return $text;
    923     }
    924     function _doHeaders_callback_setext($matches) {
    925         # Terrible hack to check we haven't found an empty list item.
    926         if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
    927             return $matches[0];
    928 
    929         $level = $matches[2]{0} == '=' ? 1 : 2;
    930         $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
    931         return "\n" . $this->hashBlock($block) . "\n\n";
    932     }
    933     function _doHeaders_callback_atx($matches) {
    934         $level = strlen($matches[1]);
    935         $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
    936         return "\n" . $this->hashBlock($block) . "\n\n";
    937     }
    938 
    939 
    940     function doLists($text) {
    941     #
    942     # Form HTML ordered (numbered) and unordered (bulleted) lists.
    943     #
    944         $less_than_tab = $this->tab_width - 1;
    945 
    946         # Re-usable patterns to match list item bullets and number markers:
    947         $marker_ul_re  = '[*+-]';
    948         $marker_ol_re  = '\d+[.]';
    949         $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
    950 
    951         $markers_relist = array($marker_ul_re, $marker_ol_re);
    952 
    953         foreach ($markers_relist as $marker_re) {
    954             # Re-usable pattern to match any entirel ul or ol list:
    955             $whole_list_re = '
    956                 (                               # $1 = whole list
    957                   (                             # $2
    958                     [ ]{0,'.$less_than_tab.'}
    959                     ('.$marker_re.')            # $3 = first list item marker
    960                     [ ]+
    961                   )
    962                   (?s:.+?)
    963                   (                             # $4
    964                       \z
    965                     |
    966                       \n{2,}
    967                       (?=\S)
    968                       (?!                       # Negative lookahead for another list item marker
    969                         [ ]*
    970                         '.$marker_re.'[ ]+
    971                       )
    972                   )
    973                 )
    974             '; // mx
    975 
    976             # We use a different prefix before nested lists than top-level lists.
    977             # See extended comment in _ProcessListItems().
    978 
    979             if ($this->list_level) {
    980                 $text = preg_replace_callback('{
    981                         ^
    982                         '.$whole_list_re.'
    983                     }mx',
    984                     array(&$this, '_doLists_callback'), $text);
    985             }
    986             else {
    987                 $text = preg_replace_callback('{
    988                         (?:(?<=\n)\n|\A\n?) # Must eat the newline
    989                         '.$whole_list_re.'
    990                     }mx',
    991                     array(&$this, '_doLists_callback'), $text);
    992             }
    993         }
    994 
    995         return $text;
    996     }
    997     function _doLists_callback($matches) {
    998         # Re-usable patterns to match list item bullets and number markers:
    999         $marker_ul_re  = '[*+-]';
   1000         $marker_ol_re  = '\d+[.]';
   1001         $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
   1002 
   1003         $list = $matches[1];
   1004         $list_type = preg_match("/$marker_ul_re/", $matches[3]) ? "ul" : "ol";
   1005 
   1006         $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
   1007 
   1008         $list .= "\n";
   1009         $result = $this->processListItems($list, $marker_any_re);
   1010 
   1011         $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
   1012         return "\n". $result ."\n\n";
   1013     }
   1014 
   1015     var $list_level = 0;
   1016 
   1017     function processListItems($list_str, $marker_any_re) {
   1018     #
   1019     #   Process the contents of a single ordered or unordered list, splitting it
   1020     #   into individual list items.
   1021     #
   1022         # The $this->list_level global keeps track of when we're inside a list.
   1023         # Each time we enter a list, we increment it; when we leave a list,
   1024         # we decrement. If it's zero, we're not in a list anymore.
   1025         #
   1026         # We do this because when we're not inside a list, we want to treat
   1027         # something like this:
   1028         #
   1029         #       I recommend upgrading to version
   1030         #       8. Oops, now this line is treated
   1031         #       as a sub-list.
   1032         #
   1033         # As a single paragraph, despite the fact that the second line starts
   1034         # with a digit-period-space sequence.
   1035         #
   1036         # Whereas when we're inside a list (or sub-list), that line will be
   1037         # treated as the start of a sub-list. What a kludge, huh? This is
   1038         # an aspect of Markdown's syntax that's hard to parse perfectly
   1039         # without resorting to mind-reading. Perhaps the solution is to
   1040         # change the syntax rules such that sub-lists must start with a
   1041         # starting cardinal number; e.g. "1." or "a.".
   1042 
   1043         $this->list_level++;
   1044 
   1045         # trim trailing blank lines:
   1046         $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
   1047 
   1048         $list_str = preg_replace_callback('{
   1049             (\n)?                           # leading line = $1
   1050             (^[ ]*)                         # leading whitespace = $2
   1051             ('.$marker_any_re.'             # list marker and space = $3
   1052                 (?:[ ]+|(?=\n)) # space only required if item is not empty
   1053             )
   1054             ((?s:.*?))                      # list item text   = $4
   1055             (?:(\n+(?=\n))|\n)              # tailing blank line = $5
   1056             (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
   1057             }xm',
   1058             array(&$this, '_processListItems_callback'), $list_str);
   1059 
   1060         $this->list_level--;
   1061         return $list_str;
   1062     }
   1063     function _processListItems_callback($matches) {
   1064         $item = $matches[4];
   1065         $leading_line =& $matches[1];
   1066         $leading_space =& $matches[2];
   1067         $marker_space = $matches[3];
   1068         $tailing_blank_line =& $matches[5];
   1069 
   1070         if ($leading_line || $tailing_blank_line ||
   1071             preg_match('/\n{2,}/', $item))
   1072         {
   1073             # Replace marker with the appropriate whitespace indentation
   1074             $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
   1075             $item = $this->runBlockGamut($this->outdent($item)."\n");
   1076         }
   1077         else {
   1078             # Recursion for sub-lists:
   1079             $item = $this->doLists($this->outdent($item));
   1080             $item = preg_replace('/\n+$/', '', $item);
   1081             $item = $this->runSpanGamut($item);
   1082         }
   1083 
   1084         return "<li>" . $item . "</li>\n";
   1085     }
   1086 
   1087 
   1088     function doCodeBlocks($text) {
   1089     #
   1090     #   Process Markdown `<pre><code>` blocks.
   1091     #
   1092         $text = preg_replace_callback('{
   1093                 (?:\n\n|\A\n?)
   1094                 (               # $1 = the code block -- one or more lines, starting with a space/tab
   1095                   (?>
   1096                     [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
   1097                     .*\n+
   1098                   )+
   1099                 )
   1100                 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
   1101             }xm',
   1102             array(&$this, '_doCodeBlocks_callback'), $text);
   1103 
   1104         return $text;
   1105     }
   1106     function _doCodeBlocks_callback($matches) {
   1107         $codeblock = $matches[1];
   1108 
   1109         $codeblock = $this->outdent($codeblock);
   1110         $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
   1111 
   1112         # trim leading newlines and trailing newlines
   1113         $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
   1114 
   1115         $codeblock = "<pre><code>$codeblock\n</code></pre>";
   1116         return "\n\n".$this->hashBlock($codeblock)."\n\n";
   1117     }
   1118 
   1119 
   1120     function makeCodeSpan($code) {
   1121     #
   1122     # Create a code span markup for $code. Called from handleSpanToken.
   1123     #
   1124         $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
   1125         return $this->hashPart("<code>$code</code>");
   1126     }
   1127 
   1128 
   1129     var $em_relist = array(
   1130         ''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S)(?![.,:;]\s)',
   1131         '*' => '(?<=\S)(?<!\*)\*(?!\*)',
   1132         '_' => '(?<=\S)(?<!_)_(?!_)',
   1133         );
   1134     var $strong_relist = array(
   1135         ''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S)(?![.,:;]\s)',
   1136         '**' => '(?<=\S)(?<!\*)\*\*(?!\*)',
   1137         '__' => '(?<=\S)(?<!_)__(?!_)',
   1138         );
   1139     var $em_strong_relist = array(
   1140         ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S)(?![.,:;]\s)',
   1141         '***' => '(?<=\S)(?<!\*)\*\*\*(?!\*)',
   1142         '___' => '(?<=\S)(?<!_)___(?!_)',
   1143         );
   1144     var $em_strong_prepared_relist;
   1145 
   1146     function prepareItalicsAndBold() {
   1147     #
   1148     # Prepare regular expressions for seraching emphasis tokens in any
   1149     # context.
   1150     #
   1151         foreach ($this->em_relist as $em => $em_re) {
   1152             foreach ($this->strong_relist as $strong => $strong_re) {
   1153                 # Construct list of allowed token expressions.
   1154                 $token_relist = array();
   1155                 if (isset($this->em_strong_relist["$em$strong"])) {
   1156                     $token_relist[] = $this->em_strong_relist["$em$strong"];
   1157                 }
   1158                 $token_relist[] = $em_re;
   1159                 $token_relist[] = $strong_re;
   1160 
   1161                 # Construct master expression from list.
   1162                 $token_re = '{('. implode('|', $token_relist) .')}';
   1163                 $this->em_strong_prepared_relist["$em$strong"] = $token_re;
   1164             }
   1165         }
   1166     }
   1167 
   1168     function doItalicsAndBold($text) {
   1169         $token_stack = array('');
   1170         $text_stack = array('');
   1171         $em = '';
   1172         $strong = '';
   1173         $tree_char_em = false;
   1174 
   1175         while (1) {
   1176             #
   1177             # Get prepared regular expression for seraching emphasis tokens
   1178             # in current context.
   1179             #
   1180             $token_re = $this->em_strong_prepared_relist["$em$strong"];
   1181 
   1182             #
   1183             # Each loop iteration seach for the next emphasis token.
   1184             # Each token is then passed to handleSpanToken.
   1185             #
   1186             $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
   1187             $text_stack[0] .= $parts[0];
   1188             $token =& $parts[1];
   1189             $text =& $parts[2];
   1190 
   1191             if (empty($token)) {
   1192                 # Reached end of text span: empty stack without emitting.
   1193                 # any more emphasis.
   1194                 while ($token_stack[0]) {
   1195                     $text_stack[1] .= array_shift($token_stack);
   1196                     $text_stack[0] .= array_shift($text_stack);
   1197                 }
   1198                 break;
   1199             }
   1200 
   1201             $token_len = strlen($token);
   1202             if ($tree_char_em) {
   1203                 # Reached closing marker while inside a three-char emphasis.
   1204                 if ($token_len == 3) {
   1205                     # Three-char closing marker, close em and strong.
   1206                     array_shift($token_stack);
   1207                     $span = array_shift($text_stack);
   1208                     $span = $this->runSpanGamut($span);
   1209                     $span = "<strong><em>$span</em></strong>";
   1210                     $text_stack[0] .= $this->hashPart($span);
   1211                     $em = '';
   1212                     $strong = '';
   1213                 } else {
   1214                     # Other closing marker: close one em or strong and
   1215                     # change current token state to match the other
   1216                     $token_stack[0] = str_repeat($token{0}, 3-$token_len);
   1217                     $tag = $token_len == 2 ? "strong" : "em";
   1218                     $span = $text_stack[0];
   1219                     $span = $this->runSpanGamut($span);
   1220                     $span = "<$tag>$span</$tag>";
   1221                     $text_stack[0] = $this->hashPart($span);
   1222                     $$tag = ''; # $$tag stands for $em or $strong
   1223                 }
   1224                 $tree_char_em = false;
   1225             } else if ($token_len == 3) {
   1226                 if ($em) {
   1227                     # Reached closing marker for both em and strong.
   1228                     # Closing strong marker:
   1229                     for ($i = 0; $i < 2; ++$i) {
   1230                         $shifted_token = array_shift($token_stack);
   1231                         $tag = strlen($shifted_token) == 2 ? "strong" : "em";
   1232                         $span = array_shift($text_stack);
   1233                         $span = $this->runSpanGamut($span);
   1234                         $span = "<$tag>$span</$tag>";
   1235                         $text_stack[0] .= $this->hashPart($span);
   1236                         $$tag = ''; # $$tag stands for $em or $strong
   1237                     }
   1238                 } else {
   1239                     # Reached opening three-char emphasis marker. Push on token
   1240                     # stack; will be handled by the special condition above.
   1241                     $em = $token{0};
   1242                     $strong = "$em$em";
   1243                     array_unshift($token_stack, $token);
   1244                     array_unshift($text_stack, '');
   1245                     $tree_char_em = true;
   1246                 }
   1247             } else if ($token_len == 2) {
   1248                 if ($strong) {
   1249                     # Unwind any dangling emphasis marker:
   1250                     if (strlen($token_stack[0]) == 1) {
   1251                         $text_stack[1] .= array_shift($token_stack);
   1252                         $text_stack[0] .= array_shift($text_stack);
   1253                     }
   1254                     # Closing strong marker:
   1255                     array_shift($token_stack);
   1256                     $span = array_shift($text_stack);
   1257                     $span = $this->runSpanGamut($span);
   1258                     $span = "<strong>$span</strong>";
   1259                     $text_stack[0] .= $this->hashPart($span);
   1260                     $strong = '';
   1261                 } else {
   1262                     array_unshift($token_stack, $token);
   1263                     array_unshift($text_stack, '');
   1264                     $strong = $token;
   1265                 }
   1266             } else {
   1267                 # Here $token_len == 1
   1268                 if ($em) {
   1269                     if (strlen($token_stack[0]) == 1) {
   1270                         # Closing emphasis marker:
   1271                         array_shift($token_stack);
   1272                         $span = array_shift($text_stack);
   1273                         $span = $this->runSpanGamut($span);
   1274                         $span = "<em>$span</em>";
   1275                         $text_stack[0] .= $this->hashPart($span);
   1276                         $em = '';
   1277                     } else {
   1278                         $text_stack[0] .= $token;
   1279                     }
   1280                 } else {
   1281                     array_unshift($token_stack, $token);
   1282                     array_unshift($text_stack, '');
   1283                     $em = $token;
   1284                 }
   1285             }
   1286         }
   1287         return $text_stack[0];
   1288     }
   1289 
   1290 
   1291     function doBlockQuotes($text) {
   1292         $text = preg_replace_callback('/
   1293               (                             # Wrap whole match in $1
   1294                 (?>
   1295                   ^[ ]*>[ ]?            # ">" at the start of a line
   1296                     .+\n                    # rest of the first line
   1297                   (.+\n)*                   # subsequent consecutive lines
   1298                   \n*                       # blanks
   1299                 )+
   1300               )
   1301             /xm',
   1302             array(&$this, '_doBlockQuotes_callback'), $text);
   1303 
   1304         return $text;
   1305     }
   1306     function _doBlockQuotes_callback($matches) {
   1307         $bq = $matches[1];
   1308         # trim one level of quoting - trim whitespace-only lines
   1309         $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
   1310         $bq = $this->runBlockGamut($bq);        # recurse
   1311 
   1312         $bq = preg_replace('/^/m', "  ", $bq);
   1313         # These leading spaces cause problem with <pre> content,
   1314         # so we need to fix that:
   1315         $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
   1316             array(&$this, '_DoBlockQuotes_callback2'), $bq);
   1317 
   1318         return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
   1319     }
   1320     function _doBlockQuotes_callback2($matches) {
   1321         $pre = $matches[1];
   1322         $pre = preg_replace('/^  /m', '', $pre);
   1323         return $pre;
   1324     }
   1325 
   1326 
   1327     function formParagraphs($text) {
   1328     #
   1329     #   Params:
   1330     #       $text - string to process with html <p> tags
   1331     #
   1332         # Strip leading and trailing lines:
   1333         $text = preg_replace('/\A\n+|\n+\z/', '', $text);
   1334 
   1335         $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
   1336 
   1337         #
   1338         # Wrap <p> tags and unhashify HTML blocks
   1339         #
   1340         foreach ($grafs as $key => $value) {
   1341             if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
   1342                 # Is a paragraph.
   1343                 $value = $this->runSpanGamut($value);
   1344                 $value = preg_replace('/^([ ]*)/', "<p>", $value);
   1345                 $value .= "</p>";
   1346                 $grafs[$key] = $this->unhash($value);
   1347             }
   1348             else {
   1349                 # Is a block.
   1350                 # Modify elements of @grafs in-place...
   1351                 $graf = $value;
   1352                 $block = $this->html_hashes[$graf];
   1353                 $graf = $block;
   1354 //              if (preg_match('{
   1355 //                  \A
   1356 //                  (                           # $1 = <div> tag
   1357 //                    <div  \s+
   1358 //                    [^>]*
   1359 //                    \b
   1360 //                    markdown\s*=\s*  ([\'"])  #   $2 = attr quote char
   1361 //                    1
   1362 //                    \2
   1363 //                    [^>]*
   1364 //                    >
   1365 //                  )
   1366 //                  (                           # $3 = contents
   1367 //                  .*
   1368 //                  )
   1369 //                  (</div>)                    # $4 = closing tag
   1370 //                  \z
   1371 //                  }xs', $block, $matches))
   1372 //              {
   1373 //                  list(, $div_open, , $div_content, $div_close) = $matches;
   1374 //
   1375 //                  # We can't call Markdown(), because that resets the hash;
   1376 //                  # that initialization code should be pulled into its own sub, though.
   1377 //                  $div_content = $this->hashHTMLBlocks($div_content);
   1378 //
   1379 //                  # Run document gamut methods on the content.
   1380 //                  foreach ($this->document_gamut as $method => $priority) {
   1381 //                      $div_content = $this->$method($div_content);
   1382 //                  }
   1383 //
   1384 //                  $div_open = preg_replace(
   1385 //                      '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
   1386 //
   1387 //                  $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
   1388 //              }
   1389                 $grafs[$key] = $graf;
   1390             }
   1391         }
   1392 
   1393         return implode("\n\n", $grafs);
   1394     }
   1395 
   1396 
   1397     function encodeAttribute($text) {
   1398     #
   1399     # Encode text for a double-quoted HTML attribute. This function
   1400     # is *not* suitable for attributes enclosed in single quotes.
   1401     #
   1402         $text = $this->encodeAmpsAndAngles($text);
   1403         $text = str_replace('"', '&quot;', $text);
   1404         return $text;
   1405     }
   1406 
   1407 
   1408     function encodeAmpsAndAngles($text) {
   1409     #
   1410     # Smart processing for ampersands and angle brackets that need to
   1411     # be encoded. Valid character entities are left alone unless the
   1412     # no-entities mode is set.
   1413     #
   1414         if ($this->no_entities) {
   1415             $text = str_replace('&', '&amp;', $text);
   1416         } else {
   1417             # Ampersand-encoding based entirely on Nat Irons's Amputator
   1418             # MT plugin: <http://bumppo.net/projects/amputator/>
   1419             $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
   1420                                 '&amp;', $text);;
   1421         }
   1422         # Encode remaining <'s
   1423         $text = str_replace('<', '&lt;', $text);
   1424 
   1425         return $text;
   1426     }
   1427 
   1428 
   1429     function doAutoLinks($text) {
   1430         $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i',
   1431             array(&$this, '_doAutoLinks_url_callback'), $text);
   1432 
   1433         # Email addresses: <address@domain.foo>
   1434         $text = preg_replace_callback('{
   1435             <
   1436             (?:mailto:)?
   1437             (
   1438                 [-.\w\x80-\xFF]+
   1439                 \@
   1440                 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
   1441             )
   1442             >
   1443             }xi',
   1444             array(&$this, '_doAutoLinks_email_callback'), $text);
   1445 
   1446         return $text;
   1447     }
   1448     function _doAutoLinks_url_callback($matches) {
   1449         $url = $this->encodeAttribute($matches[1]);
   1450         $link = "<a href=\"$url\">$url</a>";
   1451         return $this->hashPart($link);
   1452     }
   1453     function _doAutoLinks_email_callback($matches) {
   1454         $address = $matches[1];
   1455         $link = $this->encodeEmailAddress($address);
   1456         return $this->hashPart($link);
   1457     }
   1458 
   1459 
   1460     function encodeEmailAddress($addr) {
   1461     #
   1462     #   Input: an email address, e.g. "foo (at) example.com"
   1463     #
   1464     #   Output: the email address as a mailto link, with each character
   1465     #       of the address encoded as either a decimal or hex entity, in
   1466     #       the hopes of foiling most address harvesting spam bots. E.g.:
   1467     #
   1468     #     <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
   1469     #        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
   1470     #        &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
   1471     #        &#101;&#46;&#x63;&#111;&#x6d;</a></p>
   1472     #
   1473     #   Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
   1474     #   With some optimizations by Milian Wolff.
   1475     #
   1476         $addr = "mailto:" . $addr;
   1477         $chars = preg_split('/(?<!^)(?!$)/', $addr);
   1478         $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
   1479 
   1480         foreach ($chars as $key => $char) {
   1481             $ord = ord($char);
   1482             # Ignore non-ascii chars.
   1483             if ($ord < 128) {
   1484                 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
   1485                 # roughly 10% raw, 45% hex, 45% dec
   1486                 # '@' *must* be encoded. I insist.
   1487                 if ($r > 90 && $char != '@') /* do nothing */;
   1488                 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
   1489                 else              $chars[$key] = '&#'.$ord.';';
   1490             }
   1491         }
   1492 
   1493         $addr = implode('', $chars);
   1494         $text = implode('', array_slice($chars, 7)); # text without `mailto:`
   1495         $addr = "<a href=\"$addr\">$text</a>";
   1496 
   1497         return $addr;
   1498     }
   1499 
   1500 
   1501     function parseSpan($str) {
   1502     #
   1503     # Take the string $str and parse it into tokens, hashing embeded HTML,
   1504     # escaped characters and handling code spans.
   1505     #
   1506         $output = '';
   1507 
   1508         $span_re = '{
   1509                 (
   1510                     \\\\'.$this->escape_chars_re.'
   1511                 |
   1512                     (?<![`\\\\])
   1513                     `+                      # code span marker
   1514             '.( $this->no_markup ? '' : '
   1515                 |
   1516                     <!--    .*?     -->     # comment
   1517                 |
   1518                     <\?.*?\?> | <%.*?%>     # processing instruction
   1519                 |
   1520                     <[/!$]?[-a-zA-Z0-9:]+   # regular tags
   1521                     (?>
   1522                         \s
   1523                         (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
   1524                     )?
   1525                     >
   1526             ').'
   1527                 )
   1528                 }xs';
   1529 
   1530         while (1) {
   1531             #
   1532             # Each loop iteration seach for either the next tag, the next
   1533             # openning code span marker, or the next escaped character.
   1534             # Each token is then passed to handleSpanToken.
   1535             #
   1536             $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
   1537 
   1538             # Create token from text preceding tag.
   1539             if ($parts[0] != "") {
   1540                 $output .= $parts[0];
   1541             }
   1542 
   1543             # Check if we reach the end.
   1544             if (isset($parts[1])) {
   1545                 $output .= $this->handleSpanToken($parts[1], $parts[2]);
   1546                 $str = $parts[2];
   1547             }
   1548             else {
   1549                 break;
   1550             }
   1551         }
   1552 
   1553         return $output;
   1554     }
   1555 
   1556 
   1557     function handleSpanToken($token, &$str) {
   1558     #
   1559     # Handle $token provided by parseSpan by determining its nature and
   1560     # returning the corresponding value that should replace it.
   1561     #
   1562         switch ($token{0}) {
   1563             case "\\":
   1564                 return $this->hashPart("&#". ord($token{1}). ";");
   1565             case "`":
   1566                 # Search for end marker in remaining text.
   1567                 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
   1568                     $str, $matches))
   1569                 {
   1570                     $str = $matches[2];
   1571                     $codespan = $this->makeCodeSpan($matches[1]);
   1572                     return $this->hashPart($codespan);
   1573                 }
   1574                 return $token; // return as text since no ending marker found.
   1575             default:
   1576                 return $this->hashPart($token);
   1577         }
   1578     }
   1579 
   1580 
   1581     function outdent($text) {
   1582     #
   1583     # Remove one level of line-leading tabs or spaces
   1584     #
   1585         return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
   1586     }
   1587 
   1588 
   1589     # String length function for detab. `_initDetab` will create a function to
   1590     # hanlde UTF-8 if the default function does not exist.
   1591     var $utf8_strlen = 'mb_strlen';
   1592 
   1593     function detab($text) {
   1594     #
   1595     # Replace tabs with the appropriate amount of space.
   1596     #
   1597         # For each line we separate the line in blocks delemited by
   1598         # tab characters. Then we reconstruct every line by adding the
   1599         # appropriate number of space between each blocks.
   1600 
   1601         $text = preg_replace_callback('/^.*\t.*$/m',
   1602             array(&$this, '_detab_callback'), $text);
   1603 
   1604         return $text;
   1605     }
   1606     function _detab_callback($matches) {
   1607         $line = $matches[0];
   1608         $strlen = $this->utf8_strlen; # strlen function for UTF-8.
   1609 
   1610         # Split in blocks.
   1611         $blocks = explode("\t", $line);
   1612         # Add each blocks to the line.
   1613         $line = $blocks[0];
   1614         unset($blocks[0]); # Do not add first block twice.
   1615         foreach ($blocks as $block) {
   1616             # Calculate amount of space, insert spaces, insert block.
   1617             $amount = $this->tab_width -
   1618                 $strlen($line, 'UTF-8') % $this->tab_width;
   1619             $line .= str_repeat(" ", $amount) . $block;
   1620         }
   1621         return $line;
   1622     }
   1623     function _initDetab() {
   1624     #
   1625     # Check for the availability of the function in the `utf8_strlen` property
   1626     # (initially `mb_strlen`). If the function is not available, create a
   1627     # function that will loosely count the number of UTF-8 characters with a
   1628     # regular expression.
   1629     #
   1630         if (function_exists($this->utf8_strlen)) return;
   1631         $this->utf8_strlen = create_function('$text', 'return preg_match_all(
   1632             "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
   1633             $text, $m);');
   1634     }
   1635 
   1636 
   1637     function unhash($text) {
   1638     #
   1639     # Swap back in all the tags hashed by _HashHTMLBlocks.
   1640     #
   1641         return preg_replace_callback('/(.)\x1A[0-9]+\1/',
   1642             array(&$this, '_unhash_callback'), $text);
   1643     }
   1644     function _unhash_callback($matches) {
   1645         return $this->html_hashes[$matches[0]];
   1646     }
   1647 
   1648 }
   1649 
   1650 
   1651 #
   1652 # Markdown Extra Parser Class
   1653 #
   1654 
   1655 class MarkdownExtra_Parser extends Markdown_Parser {
   1656 
   1657     # Prefix for footnote ids.
   1658     var $fn_id_prefix = "";
   1659 
   1660     # Optional title attribute for footnote links and backlinks.
   1661     var $fn_link_title = MARKDOWN_FN_LINK_TITLE;
   1662     var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;
   1663 
   1664     # Optional class attribute for footnote links and backlinks.
   1665     var $fn_link_class = MARKDOWN_FN_LINK_CLASS;
   1666     var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;
   1667 
   1668     # Predefined abbreviations.
   1669     var $predef_abbr = array();
   1670 
   1671 
   1672     function MarkdownExtra_Parser() {
   1673     #
   1674     # Constructor function. Initialize the parser object.
   1675     #
   1676         # Add extra escapable characters before parent constructor
   1677         # initialize the table.
   1678         $this->escape_chars .= ':|';
   1679 
   1680         # Insert extra document, block, and span transformations.
   1681         # Parent constructor will do the sorting.
   1682         $this->document_gamut += array(
   1683             "doFencedCodeBlocks" => 5,
   1684             "stripFootnotes"     => 15,
   1685             "stripAbbreviations" => 25,
   1686             "appendFootnotes"    => 50,
   1687             );
   1688         $this->block_gamut += array(
   1689             "doFencedCodeBlocks" => 5,
   1690             "doTables"           => 15,
   1691             "doDefLists"         => 45,
   1692             );
   1693         $this->span_gamut += array(
   1694             "doFootnotes"        => 5,
   1695             "doAbbreviations"    => 70,
   1696             );
   1697 
   1698         parent::Markdown_Parser();
   1699     }
   1700 
   1701 
   1702     # Extra variables used during extra transformations.
   1703     var $footnotes = array();
   1704     var $footnotes_ordered = array();
   1705     var $abbr_desciptions = array();
   1706     var $abbr_word_re = '';
   1707 
   1708     # Give the current footnote number.
   1709     var $footnote_counter = 1;
   1710 
   1711 
   1712     function setup() {
   1713     #
   1714     # Setting up Extra-specific variables.
   1715     #
   1716         parent::setup();
   1717 
   1718         $this->footnotes = array();
   1719         $this->footnotes_ordered = array();
   1720         $this->abbr_desciptions = array();
   1721         $this->abbr_word_re = '';
   1722         $this->footnote_counter = 1;
   1723 
   1724         foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
   1725             if ($this->abbr_word_re)
   1726                 $this->abbr_word_re .= '|';
   1727             $this->abbr_word_re .= preg_quote($abbr_word);
   1728             $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
   1729         }
   1730     }
   1731 
   1732     function teardown() {
   1733     #
   1734     # Clearing Extra-specific variables.
   1735     #
   1736         $this->footnotes = array();
   1737         $this->footnotes_ordered = array();
   1738         $this->abbr_desciptions = array();
   1739         $this->abbr_word_re = '';
   1740 
   1741         parent::teardown();
   1742     }
   1743 
   1744 
   1745     ### HTML Block Parser ###
   1746 
   1747     # Tags that are always treated as block tags:
   1748     var $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
   1749 
   1750     # Tags treated as block tags only if the opening tag is alone on it's line:
   1751     var $context_block_tags_re = 'script|noscript|math|ins|del';
   1752 
   1753     # Tags where markdown="1" default to span mode:
   1754     var $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
   1755 
   1756     # Tags which must not have their contents modified, no matter where
   1757     # they appear:
   1758     var $clean_tags_re = 'script|math';
   1759 
   1760     # Tags that do not need to be closed.
   1761     var $auto_close_tags_re = 'hr|img';
   1762 
   1763 
   1764     function hashHTMLBlocks($text) {
   1765     #
   1766     # Hashify HTML Blocks and "clean tags".
   1767     #
   1768     # We only want to do this for block-level HTML tags, such as headers,
   1769     # lists, and tables. That's because we still want to wrap <p>s around
   1770     # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
   1771     # phrase emphasis, and spans. The list of tags we're looking for is
   1772     # hard-coded.
   1773     #
   1774     # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
   1775     # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
   1776     # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
   1777     #  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
   1778     # These two functions are calling each other. It's recursive!
   1779     #
   1780         #
   1781         # Call the HTML-in-Markdown hasher.
   1782         #
   1783         list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
   1784 
   1785         return $text;
   1786     }
   1787     function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
   1788                                         $enclosing_tag_re = '', $span = false)
   1789     {
   1790     #
   1791     # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
   1792     #
   1793     # *   $indent is the number of space to be ignored when checking for code
   1794     #     blocks. This is important because if we don't take the indent into
   1795     #     account, something like this (which looks right) won't work as expected:
   1796     #
   1797     #     <div>
   1798     #         <div markdown="1">
   1799     #         Hello World.  <-- Is this a Markdown code block or text?
   1800     #         </div>  <-- Is this a Markdown code block or a real tag?
   1801     #     <div>
   1802     #
   1803     #     If you don't like this, just don't indent the tag on which
   1804     #     you apply the markdown="1" attribute.
   1805     #
   1806     # *   If $enclosing_tag_re is not empty, stops at the first unmatched closing
   1807     #     tag with that name. Nested tags supported.
   1808     #
   1809     # *   If $span is true, text inside must treated as span. So any double
   1810     #     newline will be replaced by a single newline so that it does not create
   1811     #     paragraphs.
   1812     #
   1813     # Returns an array of that form: ( processed text , remaining text )
   1814     #
   1815         if ($text === '') return array('', '');
   1816 
   1817         # Regex to check for the presense of newlines around a block tag.
   1818         $newline_before_re = '/(?:^\n?|\n\n)*$/';
   1819         $newline_after_re =
   1820             '{
   1821                 ^                       # Start of text following the tag.
   1822                 (?>[ ]*<!--.*?-->)?     # Optional comment.
   1823                 [ ]*\n                  # Must be followed by newline.
   1824             }xs';
   1825 
   1826         # Regex to match any tag.
   1827         $block_tag_re =
   1828             '{
   1829                 (                   # $2: Capture hole tag.
   1830                     </?                 # Any opening or closing tag.
   1831                         (?>             # Tag name.
   1832                             '.$this->block_tags_re.'            |
   1833                             '.$this->context_block_tags_re.'    |
   1834                             '.$this->clean_tags_re.'            |
   1835                             (?!\s)'.$enclosing_tag_re.'
   1836                         )
   1837                         (?:
   1838                             (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
   1839                             (?>
   1840                                 ".*?"       |   # Double quotes (can contain `>`)
   1841                                 \'.*?\'     |   # Single quotes (can contain `>`)
   1842                                 .+?             # Anything but quotes and `>`.
   1843                             )*?
   1844                         )?
   1845                     >                   # End of tag.
   1846                 |
   1847                     <!--    .*?     --> # HTML Comment
   1848                 |
   1849                     <\?.*?\?> | <%.*?%> # Processing instruction
   1850                 |
   1851                     <!\[CDATA\[.*?\]\]> # CData Block
   1852                 |
   1853                     # Code span marker
   1854                     `+
   1855                 '. ( !$span ? ' # If not in span.
   1856                 |
   1857                     # Indented code block
   1858                     (?> ^[ ]*\n? | \n[ ]*\n )
   1859                     [ ]{'.($indent+4).'}[^\n]* \n
   1860                     (?>
   1861                         (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
   1862                     )*
   1863                 |
   1864                     # Fenced code block marker
   1865                     (?> ^ | \n )
   1866                     [ ]{'.($indent).'}~~~+[ ]*\n
   1867                 ' : '' ). ' # End (if not is span).
   1868                 )
   1869             }xs';
   1870 
   1871 
   1872         $depth = 0;     # Current depth inside the tag tree.
   1873         $parsed = "";   # Parsed text that will be returned.
   1874 
   1875         #
   1876         # Loop through every tag until we find the closing tag of the parent
   1877         # or loop until reaching the end of text if no parent tag specified.
   1878         #
   1879         do {
   1880             #
   1881             # Split the text using the first $tag_match pattern found.
   1882             # Text before  pattern will be first in the array, text after
   1883             # pattern will be at the end, and between will be any catches made
   1884             # by the pattern.
   1885             #
   1886             $parts = preg_split($block_tag_re, $text, 2,
   1887                                 PREG_SPLIT_DELIM_CAPTURE);
   1888 
   1889             # If in Markdown span mode, add a empty-string span-level hash
   1890             # after each newline to prevent triggering any block element.
   1891             if ($span) {
   1892                 $void = $this->hashPart("", ':');
   1893                 $newline = "$void\n";
   1894                 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
   1895             }
   1896 
   1897             $parsed .= $parts[0]; # Text before current tag.
   1898 
   1899             # If end of $text has been reached. Stop loop.
   1900             if (count($parts) < 3) {
   1901                 $text = "";
   1902                 break;
   1903             }
   1904 
   1905             $tag  = $parts[1]; # Tag to handle.
   1906             $text = $parts[2]; # Remaining text after current tag.
   1907             $tag_re = preg_quote($tag); # For use in a regular expression.
   1908 
   1909             #
   1910             # Check for: Code span marker
   1911             #
   1912             if ($tag{0} == "`") {
   1913                 # Find corresponding end marker.
   1914                 $tag_re = preg_quote($tag);
   1915                 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
   1916                     $text, $matches))
   1917                 {
   1918                     # End marker found: pass text unchanged until marker.
   1919                     $parsed .= $tag . $matches[0];
   1920                     $text = substr($text, strlen($matches[0]));
   1921                 }
   1922                 else {
   1923                     # Unmatched marker: just skip it.
   1924                     $parsed .= $tag;
   1925                 }
   1926             }
   1927             #
   1928             # Check for: Indented code block or fenced code block marker.
   1929             #
   1930             else if ($tag{0} == "\n" || $tag{0} == "~") {
   1931                 if ($tag{1} == "\n" || $tag{1} == " ") {
   1932                     # Indented code block: pass it unchanged, will be handled
   1933                     # later.
   1934                     $parsed .= $tag;
   1935                 }
   1936                 else {
   1937                     # Fenced code block marker: find matching end marker.
   1938                     $tag_re = preg_quote(trim($tag));
   1939                     if (preg_match('{^(?>.*\n)+?'.$tag_re.' *\n}', $text,
   1940                         $matches))
   1941                     {
   1942                         # End marker found: pass text unchanged until marker.
   1943                         $parsed .= $tag . $matches[0];
   1944                         $text = substr($text, strlen($matches[0]));
   1945                     }
   1946                     else {
   1947                         # No end marker: just skip it.
   1948                         $parsed .= $tag;
   1949                     }
   1950                 }
   1951             }
   1952             #
   1953             # Check for: Opening Block level tag or
   1954             #            Opening Context Block tag (like ins and del)
   1955             #               used as a block tag (tag is alone on it's line).
   1956             #
   1957             else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
   1958                 (   preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
   1959                     preg_match($newline_before_re, $parsed) &&
   1960                     preg_match($newline_after_re, $text)    )
   1961                 )
   1962             {
   1963                 # Need to parse tag and following text using the HTML parser.
   1964                 list($block_text, $text) =
   1965                     $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
   1966 
   1967                 # Make sure it stays outside of any paragraph by adding newlines.
   1968                 $parsed .= "\n\n$block_text\n\n";
   1969             }
   1970             #
   1971             # Check for: Clean tag (like script, math)
   1972             #            HTML Comments, processing instructions.
   1973             #
   1974             else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
   1975                 $tag{1} == '!' || $tag{1} == '?')
   1976             {
   1977                 # Need to parse tag and following text using the HTML parser.
   1978                 # (don't check for markdown attribute)
   1979                 list($block_text, $text) =
   1980                     $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
   1981 
   1982                 $parsed .= $block_text;
   1983             }
   1984             #
   1985             # Check for: Tag with same name as enclosing tag.
   1986             #
   1987             else if ($enclosing_tag_re !== '' &&
   1988                 # Same name as enclosing tag.
   1989                 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
   1990             {
   1991                 #
   1992                 # Increase/decrease nested tag count.
   1993                 #
   1994                 if ($tag{1} == '/')                     $depth--;
   1995                 else if ($tag{strlen($tag)-2} != '/')   $depth++;
   1996 
   1997                 if ($depth < 0) {
   1998                     #
   1999                     # Going out of parent element. Clean up and break so we
   2000                     # return to the calling function.
   2001                     #
   2002                     $text = $tag . $text;
   2003                     break;
   2004                 }
   2005 
   2006                 $parsed .= $tag;
   2007             }
   2008             else {
   2009                 $parsed .= $tag;
   2010             }
   2011         } while ($depth >= 0);
   2012 
   2013         return array($parsed, $text);
   2014     }
   2015     function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
   2016     #
   2017     # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
   2018     #
   2019     # *   Calls $hash_method to convert any blocks.
   2020     # *   Stops when the first opening tag closes.
   2021     # *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
   2022     #     (it is not inside clean tags)
   2023     #
   2024     # Returns an array of that form: ( processed text , remaining text )
   2025     #
   2026         if ($text === '') return array('', '');
   2027 
   2028         # Regex to match `markdown` attribute inside of a tag.
   2029         $markdown_attr_re = '
   2030             {
   2031                 \s*         # Eat whitespace before the `markdown` attribute
   2032                 markdown
   2033                 \s*=\s*
   2034                 (?>
   2035                     (["\'])     # $1: quote delimiter
   2036                     (.*?)       # $2: attribute value
   2037                     \1          # matching delimiter
   2038                 |
   2039                     ([^\s>]*)   # $3: unquoted attribute value
   2040                 )
   2041                 ()              # $4: make $3 always defined (avoid warnings)
   2042             }xs';
   2043 
   2044         # Regex to match any tag.
   2045         $tag_re = '{
   2046                 (                   # $2: Capture hole tag.
   2047                     </?                 # Any opening or closing tag.
   2048                         [\w:$]+         # Tag name.
   2049                         (?:
   2050                             (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
   2051                             (?>
   2052                                 ".*?"       |   # Double quotes (can contain `>`)
   2053                                 \'.*?\'     |   # Single quotes (can contain `>`)
   2054                                 .+?             # Anything but quotes and `>`.
   2055                             )*?
   2056                         )?
   2057                     >                   # End of tag.
   2058                 |
   2059                     <!--    .*?     --> # HTML Comment
   2060                 |
   2061                     <\?.*?\?> | <%.*?%> # Processing instruction
   2062                 |
   2063                     <!\[CDATA\[.*?\]\]> # CData Block
   2064                 )
   2065             }xs';
   2066 
   2067         $original_text = $text;     # Save original text in case of faliure.
   2068 
   2069         $depth      = 0;    # Current depth inside the tag tree.
   2070         $block_text = "";   # Temporary text holder for current text.
   2071         $parsed     = "";   # Parsed text that will be returned.
   2072 
   2073         #
   2074         # Get the name of the starting tag.
   2075         # (This pattern makes $base_tag_name_re safe without quoting.)
   2076         #
   2077         if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
   2078             $base_tag_name_re = $matches[1];
   2079 
   2080         #
   2081         # Loop through every tag until we find the corresponding closing tag.
   2082         #
   2083         do {
   2084             #
   2085             # Split the text using the first $tag_match pattern found.
   2086             # Text before  pattern will be first in the array, text after
   2087             # pattern will be at the end, and between will be any catches made
   2088             # by the pattern.
   2089             #
   2090             $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
   2091 
   2092             if (count($parts) < 3) {
   2093                 #
   2094                 # End of $text reached with unbalenced tag(s).
   2095                 # In that case, we return original text unchanged and pass the
   2096                 # first character as filtered to prevent an infinite loop in the
   2097                 # parent function.
   2098                 #
   2099                 return array($original_text{0}, substr($original_text, 1));
   2100             }
   2101 
   2102             $block_text .= $parts[0]; # Text before current tag.
   2103             $tag         = $parts[1]; # Tag to handle.
   2104             $text        = $parts[2]; # Remaining text after current tag.
   2105 
   2106             #
   2107             # Check for: Auto-close tag (like <hr/>)
   2108             #            Comments and Processing Instructions.
   2109             #
   2110             if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
   2111                 $tag{1} == '!' || $tag{1} == '?')
   2112             {
   2113                 # Just add the tag to the block as if it was text.
   2114                 $block_text .= $tag;
   2115             }
   2116             else {
   2117                 #
   2118                 # Increase/decrease nested tag count. Only do so if
   2119                 # the tag's name match base tag's.
   2120                 #
   2121                 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
   2122                     if ($tag{1} == '/')                     $depth--;
   2123                     else if ($tag{strlen($tag)-2} != '/')   $depth++;
   2124                 }
   2125 
   2126                 #
   2127                 # Check for `markdown="1"` attribute and handle it.
   2128                 #
   2129                 if ($md_attr &&
   2130                     preg_match($markdown_attr_re, $tag, $attr_m) &&
   2131                     preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
   2132                 {
   2133                     # Remove `markdown` attribute from opening tag.
   2134                     $tag = preg_replace($markdown_attr_re, '', $tag);
   2135 
   2136                     # Check if text inside this tag must be parsed in span mode.
   2137                     $this->mode = $attr_m[2] . $attr_m[3];
   2138                     $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
   2139                         preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
   2140 
   2141                     # Calculate indent before tag.
   2142                     if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
   2143                         $strlen = $this->utf8_strlen;
   2144                         $indent = $strlen($matches[1], 'UTF-8');
   2145                     } else {
   2146                         $indent = 0;
   2147                     }
   2148 
   2149                     # End preceding block with this tag.
   2150                     $block_text .= $tag;
   2151                     $parsed .= $this->$hash_method($block_text);
   2152 
   2153                     # Get enclosing tag name for the ParseMarkdown function.
   2154                     # (This pattern makes $tag_name_re safe without quoting.)
   2155                     preg_match('/^<([\w:$]*)\b/', $tag, $matches);
   2156                     $tag_name_re = $matches[1];
   2157 
   2158                     # Parse the content using the HTML-in-Markdown parser.
   2159                     list ($block_text, $text)
   2160                         = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
   2161                             $tag_name_re, $span_mode);
   2162 
   2163                     # Outdent markdown text.
   2164                     if ($indent > 0) {
   2165                         $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
   2166                                                     $block_text);
   2167                     }
   2168 
   2169                     # Append tag content to parsed text.
   2170                     if (!$span_mode)    $parsed .= "\n\n$block_text\n\n";
   2171                     else                $parsed .= "$block_text";
   2172 
   2173                     # Start over a new block.
   2174                     $block_text = "";
   2175                 }
   2176                 else $block_text .= $tag;
   2177             }
   2178 
   2179         } while ($depth > 0);
   2180 
   2181         #
   2182         # Hash last block text that wasn't processed inside the loop.
   2183         #
   2184         $parsed .= $this->$hash_method($block_text);
   2185 
   2186         return array($parsed, $text);
   2187     }
   2188 
   2189 
   2190     function hashClean($text) {
   2191     #
   2192     # Called whenever a tag must be hashed when a function insert a "clean" tag
   2193     # in $text, it pass through this function and is automaticaly escaped,
   2194     # blocking invalid nested overlap.
   2195     #
   2196         return $this->hashPart($text, 'C');
   2197     }
   2198 
   2199 
   2200     function doHeaders($text) {
   2201     #
   2202     # Redefined to add id attribute support.
   2203     #
   2204         # Setext-style headers:
   2205         #     Header 1  {#header1}
   2206         #     ========
   2207         #
   2208         #     Header 2  {#header2}
   2209         #     --------
   2210         #
   2211         $text = preg_replace_callback(
   2212             '{
   2213                 (^.+?)                              # $1: Header text
   2214                 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})?    # $2: Id attribute
   2215                 [ ]*\n(=+|-+)[ ]*\n+                # $3: Header footer
   2216             }mx',
   2217             array(&$this, '_doHeaders_callback_setext'), $text);
   2218 
   2219         # atx-style headers:
   2220         #   # Header 1        {#header1}
   2221         #   ## Header 2       {#header2}
   2222         #   ## Header 2 with closing hashes ##  {#header3}
   2223         #   ...
   2224         #   ###### Header 6   {#header2}
   2225         #
   2226         $text = preg_replace_callback('{
   2227                 ^(\#{1,6})  # $1 = string of #\'s
   2228                 [ ]*
   2229                 (.+?)       # $2 = Header text
   2230                 [ ]*
   2231                 \#*         # optional closing #\'s (not counted)
   2232                 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute
   2233                 [ ]*
   2234                 \n+
   2235             }xm',
   2236             array(&$this, '_doHeaders_callback_atx'), $text);
   2237 
   2238         return $text;
   2239     }
   2240     function _doHeaders_attr($attr) {
   2241         if (empty($attr))  return "";
   2242         return " id=\"$attr\"";
   2243     }
   2244     function _doHeaders_callback_setext($matches) {
   2245         if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
   2246             return $matches[0];
   2247         $level = $matches[3]{0} == '=' ? 1 : 2;
   2248         $attr  = $this->_doHeaders_attr($id =& $matches[2]);
   2249         $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
   2250         return "\n" . $this->hashBlock($block) . "\n\n";
   2251     }
   2252     function _doHeaders_callback_atx($matches) {
   2253         $level = strlen($matches[1]);
   2254         $attr  = $this->_doHeaders_attr($id =& $matches[3]);
   2255         $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
   2256         return "\n" . $this->hashBlock($block) . "\n\n";
   2257     }
   2258 
   2259 
   2260     function doTables($text) {
   2261     #
   2262     # Form HTML tables.
   2263     #
   2264         $less_than_tab = $this->tab_width - 1;
   2265         #
   2266         # Find tables with leading pipe.
   2267         #
   2268         #   | Header 1 | Header 2
   2269         #   | -------- | --------
   2270         #   | Cell 1   | Cell 2
   2271         #   | Cell 3   | Cell 4
   2272         #
   2273         $text = preg_replace_callback('
   2274             {
   2275                 ^                           # Start of a line
   2276                 [ ]{0,'.$less_than_tab.'}   # Allowed whitespace.
   2277                 [|]                         # Optional leading pipe (present)
   2278                 (.+) \n                     # $1: Header row (at least one pipe)
   2279 
   2280                 [ ]{0,'.$less_than_tab.'}   # Allowed whitespace.
   2281                 [|] ([ ]*[-:]+[-| :]*) \n   # $2: Header underline
   2282 
   2283                 (                           # $3: Cells
   2284                     (?>
   2285                         [ ]*                # Allowed whitespace.
   2286                         [|] .* \n           # Row content.
   2287                     )*
   2288                 )
   2289                 (?=\n|\Z)                   # Stop at final double newline.
   2290             }xm',
   2291             array(&$this, '_doTable_leadingPipe_callback'), $text);
   2292 
   2293         #
   2294         # Find tables without leading pipe.
   2295         #
   2296         #   Header 1 | Header 2
   2297         #   -------- | --------
   2298         #   Cell 1   | Cell 2
   2299         #   Cell 3   | Cell 4
   2300         #
   2301         $text = preg_replace_callback('
   2302             {
   2303                 ^                           # Start of a line
   2304                 [ ]{0,'.$less_than_tab.'}   # Allowed whitespace.
   2305                 (\S.*[|].*) \n              # $1: Header row (at least one pipe)
   2306 
   2307                 [ ]{0,'.$less_than_tab.'}   # Allowed whitespace.
   2308                 ([-:]+[ ]*[|][-| :]*) \n    # $2: Header underline
   2309 
   2310                 (                           # $3: Cells
   2311                     (?>
   2312                         .* [|] .* \n        # Row content
   2313                     )*
   2314                 )
   2315                 (?=\n|\Z)                   # Stop at final double newline.
   2316             }xm',
   2317             array(&$this, '_DoTable_callback'), $text);
   2318 
   2319         return $text;
   2320     }
   2321     function _doTable_leadingPipe_callback($matches) {
   2322         $head       = $matches[1];
   2323         $underline  = $matches[2];
   2324         $content    = $matches[3];
   2325 
   2326         # Remove leading pipe for each row.
   2327         $content    = preg_replace('/^ *[|]/m', '', $content);
   2328 
   2329         return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
   2330     }
   2331     function _doTable_callback($matches) {
   2332         $head       = $matches[1];
   2333         $underline  = $matches[2];
   2334         $content    = $matches[3];
   2335 
   2336         # Remove any tailing pipes for each line.
   2337         $head       = preg_replace('/[|] *$/m', '', $head);
   2338         $underline  = preg_replace('/[|] *$/m', '', $underline);
   2339         $content    = preg_replace('/[|] *$/m', '', $content);
   2340 
   2341         # Reading alignement from header underline.
   2342         $separators = preg_split('/ *[|] */', $underline);
   2343         foreach ($separators as $n => $s) {
   2344             if (preg_match('/^ *-+: *$/', $s))      $attr[$n] = ' align="right"';
   2345             else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"';
   2346             else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"';
   2347             else                                    $attr[$n] = '';
   2348         }
   2349 
   2350         # Parsing span elements, including code spans, character escapes,
   2351         # and inline HTML tags, so that pipes inside those gets ignored.
   2352         $head       = $this->parseSpan($head);
   2353         $headers    = preg_split('/ *[|] */', $head);
   2354         $col_count  = count($headers);
   2355 
   2356         # Write column headers.
   2357         $text = "<table>\n";
   2358         $text .= "<thead>\n";
   2359         $text .= "<tr>\n";
   2360         foreach ($headers as $n => $header)
   2361             $text .= "  <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
   2362         $text .= "</tr>\n";
   2363         $text .= "</thead>\n";
   2364 
   2365         # Split content by row.
   2366         $rows = explode("\n", trim($content, "\n"));
   2367 
   2368         $text .= "<tbody>\n";
   2369         foreach ($rows as $row) {
   2370             # Parsing span elements, including code spans, character escapes,
   2371             # and inline HTML tags, so that pipes inside those gets ignored.
   2372             $row = $this->parseSpan($row);
   2373 
   2374             # Split row by cell.
   2375             $row_cells = preg_split('/ *[|] */', $row, $col_count);
   2376             $row_cells = array_pad($row_cells, $col_count, '');
   2377 
   2378             $text .= "<tr>\n";
   2379             foreach ($row_cells as $n => $cell)
   2380                 $text .= "  <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
   2381             $text .= "</tr>\n";
   2382         }
   2383         $text .= "</tbody>\n";
   2384         $text .= "</table>";
   2385 
   2386         return $this->hashBlock($text) . "\n";
   2387     }
   2388 
   2389 
   2390     function doDefLists($text) {
   2391     #
   2392     # Form HTML definition lists.
   2393     #
   2394         $less_than_tab = $this->tab_width - 1;
   2395 
   2396         # Re-usable pattern to match any entire dl list:
   2397         $whole_list_re = '(?>
   2398             (                               # $1 = whole list
   2399               (                             # $2
   2400                 [ ]{0,'.$less_than_tab.'}
   2401                 ((?>.*\S.*\n)+)             # $3 = defined term
   2402                 \n?
   2403                 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
   2404               )
   2405               (?s:.+?)
   2406               (                             # $4
   2407                   \z
   2408                 |
   2409                   \n{2,}
   2410                   (?=\S)
   2411                   (?!                       # Negative lookahead for another term
   2412                     [ ]{0,'.$less_than_tab.'}
   2413                     (?: \S.*\n )+?          # defined term
   2414                     \n?
   2415                     [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
   2416                   )
   2417                   (?!                       # Negative lookahead for another definition
   2418                     [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
   2419                   )
   2420               )
   2421             )
   2422         )'; // mx
   2423 
   2424         $text = preg_replace_callback('{
   2425                 (?>\A\n?|(?<=\n\n))
   2426                 '.$whole_list_re.'
   2427             }mx',
   2428             array(&$this, '_doDefLists_callback'), $text);
   2429 
   2430         return $text;
   2431     }
   2432     function _doDefLists_callback($matches) {
   2433         # Re-usable patterns to match list item bullets and number markers:
   2434         $list = $matches[1];
   2435 
   2436         # Turn double returns into triple returns, so that we can make a
   2437         # paragraph for the last item in a list, if necessary:
   2438         $result = trim($this->processDefListItems($list));
   2439         $result = "<dl>\n" . $result . "\n</dl>";
   2440         return $this->hashBlock($result) . "\n\n";
   2441     }
   2442 
   2443 
   2444     function processDefListItems($list_str) {
   2445     #
   2446     #   Process the contents of a single definition list, splitting it
   2447     #   into individual term and definition list items.
   2448     #
   2449         $less_than_tab = $this->tab_width - 1;
   2450 
   2451         # trim trailing blank lines:
   2452         $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
   2453 
   2454         # Process definition terms.
   2455         $list_str = preg_replace_callback('{
   2456             (?>\A\n?|\n\n+)                 # leading line
   2457             (                               # definition terms = $1
   2458                 [ ]{0,'.$less_than_tab.'}   # leading whitespace
   2459                 (?![:][ ]|[ ])              # negative lookahead for a definition
   2460                                             #   mark (colon) or more whitespace.
   2461                 (?> \S.* \n)+?              # actual term (not whitespace).
   2462             )
   2463             (?=\n?[ ]{0,3}:[ ])             # lookahead for following line feed
   2464                                             #   with a definition mark.
   2465             }xm',
   2466             array(&$this, '_processDefListItems_callback_dt'), $list_str);
   2467 
   2468         # Process actual definitions.
   2469         $list_str = preg_replace_callback('{
   2470             \n(\n+)?                        # leading line = $1
   2471             (                               # marker space = $2
   2472                 [ ]{0,'.$less_than_tab.'}   # whitespace before colon
   2473                 [:][ ]+                     # definition mark (colon)
   2474             )
   2475             ((?s:.+?))                      # definition text = $3
   2476             (?= \n+                         # stop at next definition mark,
   2477                 (?:                         # next term or end of text
   2478                     [ ]{0,'.$less_than_tab.'} [:][ ]    |
   2479                     <dt> | \z
   2480                 )
   2481             )
   2482             }xm',
   2483             array(&$this, '_processDefListItems_callback_dd'), $list_str);
   2484 
   2485         return $list_str;
   2486     }
   2487     function _processDefListItems_callback_dt($matches) {
   2488         $terms = explode("\n", trim($matches[1]));
   2489         $text = '';
   2490         foreach ($terms as $term) {
   2491             $term = $this->runSpanGamut(trim($term));
   2492             $text .= "\n<dt>" . $term . "</dt>";
   2493         }
   2494         return $text . "\n";
   2495     }
   2496     function _processDefListItems_callback_dd($matches) {
   2497         $leading_line   = $matches[1];
   2498         $marker_space   = $matches[2];
   2499         $def            = $matches[3];
   2500 
   2501         if ($leading_line || preg_match('/\n{2,}/', $def)) {
   2502             # Replace marker with the appropriate whitespace indentation
   2503             $def = str_repeat(' ', strlen($marker_space)) . $def;
   2504             $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
   2505             $def = "\n". $def ."\n";
   2506         }
   2507         else {
   2508             $def = rtrim($def);
   2509             $def = $this->runSpanGamut($this->outdent($def));
   2510         }
   2511 
   2512         return "\n<dd>" . $def . "</dd>\n";
   2513     }
   2514 
   2515 
   2516     function doFencedCodeBlocks($text) {
   2517     #
   2518     # Adding the fenced code block syntax to regular Markdown:
   2519     #
   2520     # ~~~
   2521     # Code block
   2522     # ~~~
   2523     #
   2524         $less_than_tab = $this->tab_width;
   2525 
   2526         $text = preg_replace_callback('{
   2527                 (?:\n|\A)
   2528                 # 1: Opening marker
   2529                 (
   2530                     ~{3,} # Marker: three tilde or more.
   2531                 )
   2532                 [ ]* \n # Whitespace and newline following marker.
   2533 
   2534                 # 2: Content
   2535                 (
   2536                     (?>
   2537                         (?!\1 [ ]* \n)  # Not a closing marker.
   2538                         .*\n+
   2539                     )+
   2540                 )
   2541 
   2542                 # Closing marker.
   2543                 \1 [ ]* \n
   2544             }xm',
   2545             array(&$this, '_doFencedCodeBlocks_callback'), $text);
   2546 
   2547         return $text;
   2548     }
   2549     function _doFencedCodeBlocks_callback($matches) {
   2550         $codeblock = $matches[2];
   2551         $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
   2552         $codeblock = preg_replace_callback('/^\n+/',
   2553             array(&$this, '_doFencedCodeBlocks_newlines'), $codeblock);
   2554         $codeblock = "<pre><code>$codeblock</code></pre>";
   2555         return "\n\n".$this->hashBlock($codeblock)."\n\n";
   2556     }
   2557     function _doFencedCodeBlocks_newlines($matches) {
   2558         return str_repeat("<br$this->empty_element_suffix",
   2559             strlen($matches[0]));
   2560     }
   2561 
   2562 
   2563     #
   2564     # Redefining emphasis markers so that emphasis by underscore does not
   2565     # work in the middle of a word.
   2566     #
   2567     var $em_relist = array(
   2568         ''  => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?=\S)(?![.,:;]\s)',
   2569         '*' => '(?<=\S)(?<!\*)\*(?!\*)',
   2570         '_' => '(?<=\S)(?<!_)_(?![a-zA-Z0-9_])',
   2571         );
   2572     var $strong_relist = array(
   2573         ''   => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?=\S)(?![.,:;]\s)',
   2574         '**' => '(?<=\S)(?<!\*)\*\*(?!\*)',
   2575         '__' => '(?<=\S)(?<!_)__(?![a-zA-Z0-9_])',
   2576         );
   2577     var $em_strong_relist = array(
   2578         ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?=\S)(?![.,:;]\s)',
   2579         '***' => '(?<=\S)(?<!\*)\*\*\*(?!\*)',
   2580         '___' => '(?<=\S)(?<!_)___(?![a-zA-Z0-9_])',
   2581         );
   2582 
   2583 
   2584     function formParagraphs($text) {
   2585     #
   2586     #   Params:
   2587     #       $text - string to process with html <p> tags
   2588     #
   2589         # Strip leading and trailing lines:
   2590         $text = preg_replace('/\A\n+|\n+\z/', '', $text);
   2591 
   2592         $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
   2593 
   2594         #
   2595         # Wrap <p> tags and unhashify HTML blocks
   2596         #
   2597         foreach ($grafs as $key => $value) {
   2598             $value = trim($this->runSpanGamut($value));
   2599 
   2600             # Check if this should be enclosed in a paragraph.
   2601             # Clean tag hashes & block tag hashes are left alone.
   2602             $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
   2603 
   2604             if ($is_p) {
   2605                 $value = "<p>$value</p>";
   2606             }
   2607             $grafs[$key] = $value;
   2608         }
   2609 
   2610         # Join grafs in one text, then unhash HTML tags.
   2611         $text = implode("\n\n", $grafs);
   2612 
   2613         # Finish by removing any tag hashes still present in $text.
   2614         $text = $this->unhash($text);
   2615 
   2616         return $text;
   2617     }
   2618 
   2619 
   2620     ### Footnotes
   2621 
   2622     function stripFootnotes($text) {
   2623     #
   2624     # Strips link definitions from text, stores the URLs and titles in
   2625     # hash references.
   2626     #
   2627         $less_than_tab = $this->tab_width - 1;
   2628 
   2629         # Link defs are in the form: [^id]: url "optional title"
   2630         $text = preg_replace_callback('{
   2631             ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?:  # note_id = $1
   2632               [ ]*
   2633               \n?                   # maybe *one* newline
   2634             (                       # text = $2 (no blank lines allowed)
   2635                 (?:
   2636                     .+              # actual text
   2637                 |
   2638                     \n              # newlines but
   2639                     (?!\[\^.+?\]:\s)# negative lookahead for footnote marker.
   2640                     (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
   2641                                     # by non-indented content
   2642                 )*
   2643             )
   2644             }xm',
   2645             array(&$this, '_stripFootnotes_callback'),
   2646             $text);
   2647         return $text;
   2648     }
   2649     function _stripFootnotes_callback($matches) {
   2650         $note_id = $this->fn_id_prefix . $matches[1];
   2651         $this->footnotes[$note_id] = $this->outdent($matches[2]);
   2652         return ''; # String that will replace the block
   2653     }
   2654 
   2655 
   2656     function doFootnotes($text) {
   2657     #
   2658     # Replace footnote references in $text [^id] with a special text-token
   2659     # which will be replaced by the actual footnote marker in appendFootnotes.
   2660     #
   2661         if (!$this->in_anchor) {
   2662             $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
   2663         }
   2664         return $text;
   2665     }
   2666 
   2667 
   2668     function appendFootnotes($text) {
   2669     #
   2670     # Append footnote list to text.
   2671     #
   2672         $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
   2673             array(&$this, '_appendFootnotes_callback'), $text);
   2674 
   2675         if (!empty($this->footnotes_ordered)) {
   2676             $text .= "\n\n";
   2677             $text .= "<div class=\"footnotes\">\n";
   2678             $text .= "<hr". MARKDOWN_EMPTY_ELEMENT_SUFFIX ."\n";
   2679             $text .= "<ol>\n\n";
   2680 
   2681             $attr = " rev=\"footnote\"";
   2682             if ($this->fn_backlink_class != "") {
   2683                 $class = $this->fn_backlink_class;
   2684                 $class = $this->encodeAttribute($class);
   2685                 $attr .= " class=\"$class\"";
   2686             }
   2687             if ($this->fn_backlink_title != "") {
   2688                 $title = $this->fn_backlink_title;
   2689                 $title = $this->encodeAttribute($title);
   2690                 $attr .= " title=\"$title\"";
   2691             }
   2692             $num = 0;
   2693 
   2694             while (!empty($this->footnotes_ordered)) {
   2695                 $footnote = reset($this->footnotes_ordered);
   2696                 $note_id = key($this->footnotes_ordered);
   2697                 unset($this->footnotes_ordered[$note_id]);
   2698 
   2699                 $footnote .= "\n"; # Need to append newline before parsing.
   2700                 $footnote = $this->runBlockGamut("$footnote\n");
   2701                 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
   2702                     array(&$this, '_appendFootnotes_callback'), $footnote);
   2703 
   2704                 $attr = str_replace("%%", ++$num, $attr);
   2705                 $note_id = $this->encodeAttribute($note_id);
   2706 
   2707                 # Add backlink to last paragraph; create new paragraph if needed.
   2708                 $backlink = "<a href=\"#fnref:$note_id\"$attr>&#8617;</a>";
   2709                 if (preg_match('{</p>$}', $footnote)) {
   2710                     $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
   2711                 } else {
   2712                     $footnote .= "\n\n<p>$backlink</p>";
   2713                 }
   2714 
   2715                 $text .= "<li id=\"fn:$note_id\">\n";
   2716                 $text .= $footnote . "\n";
   2717                 $text .= "</li>\n\n";
   2718             }
   2719 
   2720             $text .= "</ol>\n";
   2721             $text .= "</div>";
   2722         }
   2723         return $text;
   2724     }
   2725     function _appendFootnotes_callback($matches) {
   2726         $node_id = $this->fn_id_prefix . $matches[1];
   2727 
   2728         # Create footnote marker only if it has a corresponding footnote *and*
   2729         # the footnote hasn't been used by another marker.
   2730         if (isset($this->footnotes[$node_id])) {
   2731             # Transfert footnote content to the ordered list.
   2732             $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
   2733             unset($this->footnotes[$node_id]);
   2734 
   2735             $num = $this->footnote_counter++;
   2736             $attr = " rel=\"footnote\"";
   2737             if ($this->fn_link_class != "") {
   2738                 $class = $this->fn_link_class;
   2739                 $class = $this->encodeAttribute($class);
   2740                 $attr .= " class=\"$class\"";
   2741             }
   2742             if ($this->fn_link_title != "") {
   2743                 $title = $this->fn_link_title;
   2744                 $title = $this->encodeAttribute($title);
   2745                 $attr .= " title=\"$title\"";
   2746             }
   2747 
   2748             $attr = str_replace("%%", $num, $attr);
   2749             $node_id = $this->encodeAttribute($node_id);
   2750 
   2751             return
   2752                 "<sup id=\"fnref:$node_id\">".
   2753                 "<a href=\"#fn:$node_id\"$attr>$num</a>".
   2754                 "</sup>";
   2755         }
   2756 
   2757         return "[^".$matches[1]."]";
   2758     }
   2759 
   2760 
   2761     ### Abbreviations ###
   2762 
   2763     function stripAbbreviations($text) {
   2764     #
   2765     # Strips abbreviations from text, stores titles in hash references.
   2766     #
   2767         $less_than_tab = $this->tab_width - 1;
   2768 
   2769         # Link defs are in the form: [id]*: url "optional title"
   2770         $text = preg_replace_callback('{
   2771             ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?:  # abbr_id = $1
   2772             (.*)                    # text = $2 (no blank lines allowed)
   2773             }xm',
   2774             array(&$this, '_stripAbbreviations_callback'),
   2775             $text);
   2776         return $text;
   2777     }
   2778     function _stripAbbreviations_callback($matches) {
   2779         $abbr_word = $matches[1];
   2780         $abbr_desc = $matches[2];
   2781         if ($this->abbr_word_re)
   2782             $this->abbr_word_re .= '|';
   2783         $this->abbr_word_re .= preg_quote($abbr_word);
   2784         $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
   2785         return ''; # String that will replace the block
   2786     }
   2787 
   2788 
   2789     function doAbbreviations($text) {
   2790     #
   2791     # Find defined abbreviations in text and wrap them in <abbr> elements.
   2792     #
   2793         if ($this->abbr_word_re) {
   2794             // cannot use the /x modifier because abbr_word_re may
   2795             // contain significant spaces:
   2796             $text = preg_replace_callback('{'.
   2797                 '(?<![\w\x1A])'.
   2798                 '(?:'.$this->abbr_word_re.')'.
   2799                 '(?![\w\x1A])'.
   2800                 '}',
   2801                 array(&$this, '_doAbbreviations_callback'), $text);
   2802         }
   2803         return $text;
   2804     }
   2805     function _doAbbreviations_callback($matches) {
   2806         $abbr = $matches[0];
   2807         if (isset($this->abbr_desciptions[$abbr])) {
   2808             $desc = $this->abbr_desciptions[$abbr];
   2809             if (empty($desc)) {
   2810                 return $this->hashPart("<abbr>$abbr</abbr>");
   2811             } else {
   2812                 $desc = $this->encodeAttribute($desc);
   2813                 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
   2814             }
   2815         } else {
   2816             return $matches[0];
   2817         }
   2818     }
   2819 
   2820 }
   2821 
   2822 
   2823 /*
   2824 
   2825 PHP Markdown Extra
   2826 ==================
   2827 
   2828 Description
   2829 -----------
   2830 
   2831 This is a PHP port of the original Markdown formatter written in Perl
   2832 by John Gruber. This special "Extra" version of PHP Markdown features
   2833 further enhancements to the syntax for making additional constructs
   2834 such as tables and definition list.
   2835 
   2836 Markdown is a text-to-HTML filter; it translates an easy-to-read /
   2837 easy-to-write structured text format into HTML. Markdown's text format
   2838 is most similar to that of plain text email, and supports features such
   2839 as headers, *emphasis*, code blocks, blockquotes, and links.
   2840 
   2841 Markdown's syntax is designed not as a generic markup language, but
   2842 specifically to serve as a front-end to (X)HTML. You can use span-level
   2843 HTML tags anywhere in a Markdown document, and you can use block level
   2844 HTML tags (like <div> and <table> as well).
   2845 
   2846 For more information about Markdown's syntax, see:
   2847 
   2848 <http://daringfireball.net/projects/markdown/>
   2849 
   2850 
   2851 Bugs
   2852 ----
   2853 
   2854 To file bug reports please send email to:
   2855 
   2856 <michel.fortin@michelf.com>
   2857 
   2858 Please include with your report: (1) the example input; (2) the output you
   2859 expected; (3) the output Markdown actually produced.
   2860 
   2861 
   2862 Version History
   2863 ---------------
   2864 
   2865 See the readme file for detailed release notes for this version.
   2866 
   2867 
   2868 Copyright and License
   2869 ---------------------
   2870 
   2871 PHP Markdown & Extra
   2872 Copyright (c) 2004-2008 Michel Fortin
   2873 <http://www.michelf.com/>
   2874 All rights reserved.
   2875 
   2876 Based on Markdown
   2877 Copyright (c) 2003-2006 John Gruber
   2878 <http://daringfireball.net/>
   2879 All rights reserved.
   2880 
   2881 Redistribution and use in source and binary forms, with or without
   2882 modification, are permitted provided that the following conditions are
   2883 met:
   2884 
   2885 *   Redistributions of source code must retain the above copyright notice,
   2886     this list of conditions and the following disclaimer.
   2887 
   2888 *   Redistributions in binary form must reproduce the above copyright
   2889     notice, this list of conditions and the following disclaimer in the
   2890     documentation and/or other materials provided with the distribution.
   2891 
   2892 *   Neither the name "Markdown" nor the names of its contributors may
   2893     be used to endorse or promote products derived from this software
   2894     without specific prior written permission.
   2895 
   2896 This software is provided by the copyright holders and contributors "as
   2897 is" and any express or implied warranties, including, but not limited
   2898 to, the implied warranties of merchantability and fitness for a
   2899 particular purpose are disclaimed. In no event shall the copyright owner
   2900 or contributors be liable for any direct, indirect, incidental, special,
   2901 exemplary, or consequential damages (including, but not limited to,
   2902 procurement of substitute goods or services; loss of use, data, or
   2903 profits; or business interruption) however caused and on any theory of
   2904 liability, whether in contract, strict liability, or tort (including
   2905 negligence or otherwise) arising in any way out of the use of this
   2906 software, even if advised of the possibility of such damage.
   2907 
   2908 */
   2909 ?>