Home | History | Annotate | Download | only in PHP-SmartyPants-1.5.1e
      1 <?php
      2 
      3 #
      4 # SmartyPants  -  Smart punctuation for web sites
      5 #
      6 # by John Gruber
      7 # <http://daringfireball.net>
      8 #
      9 # PHP port by Michel Fortin
     10 # <http://www.michelf.com/>
     11 #
     12 # Copyright (c) 2003-2004 John Gruber
     13 # Copyright (c) 2004-2005 Michel Fortin
     14 #
     15 
     16 
     17 global  $SmartyPantsPHPVersion, $SmartyPantsSyntaxVersion,
     18         $smartypants_attr, $sp_tags_to_skip;
     19 
     20 $SmartyPantsPHPVersion    = '1.5.1e'; # Fru 9 Dec 2005
     21 $SmartyPantsSyntaxVersion = '1.5.1';  # Fri 12 Mar 2004
     22 
     23 
     24 # Configurable variables:
     25 $smartypants_attr = "1";  # Change this to configure.
     26                           #  1 =>  "--" for em-dashes; no en-dash support
     27                           #  2 =>  "---" for em-dashes; "--" for en-dashes
     28                           #  3 =>  "--" for em-dashes; "---" for en-dashes
     29                           #  See docs for more configuration options.
     30 
     31 # Globals:
     32 $sp_tags_to_skip = '<(/?)(?:pre|code|kbd|script|math)[\s>]';
     33 
     34 
     35 # -- WordPress plugin interface -----------------------------------------------
     36 /*
     37 Plugin Name: SmartyPants
     38 Plugin URI: http://www.michelf.com/projects/php-smartypants/
     39 Description: SmartyPants is a web publishing utility that translates plain ASCII punctuation characters into &#8220;smart&#8221; typographic punctuation HTML entities. This plugin <strong>replace the default WordPress Texturize algorithm</strong> for the content and the title of your posts, the comments body and author name, and everywhere else Texturize normally apply. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>.
     40 Version: 1.5.1e
     41 Author: Michel Fortin
     42 Author URI: http://www.michelf.com/
     43 */
     44 if (isset($wp_version)) {
     45     # Remove default Texturize filter that would conflict with SmartyPants.
     46     remove_filter('category_description', 'wptexturize');
     47     remove_filter('list_cats', 'wptexturize');
     48     remove_filter('comment_author', 'wptexturize');
     49     remove_filter('comment_text', 'wptexturize');
     50     remove_filter('single_post_title', 'wptexturize');
     51     remove_filter('the_title', 'wptexturize');
     52     remove_filter('the_content', 'wptexturize');
     53     remove_filter('the_excerpt', 'wptexturize');
     54     # Add SmartyPants filter with priority 10 (same as Texturize).
     55     add_filter('category_description', 'SmartyPants', 10);
     56     add_filter('list_cats', 'SmartyPants', 10);
     57     add_filter('comment_author', 'SmartyPants', 10);
     58     add_filter('comment_text', 'SmartyPants', 10);
     59     add_filter('single_post_title', 'SmartyPants', 10);
     60     add_filter('the_title', 'SmartyPants', 10);
     61     add_filter('the_content', 'SmartyPants', 10);
     62     add_filter('the_excerpt', 'SmartyPants', 10);
     63 }
     64 
     65 # -- Smarty Modifier Interface ------------------------------------------------
     66 function smarty_modifier_smartypants($text, $attr = NULL) {
     67     return SmartyPants($text, $attr);
     68 }
     69 
     70 
     71 
     72 function SmartyPants($text, $attr = NULL, $ctx = NULL) {
     73     global $smartypants_attr, $sp_tags_to_skip;
     74     # Paramaters:
     75     $text;   # text to be parsed
     76     $attr;   # value of the smart_quotes="" attribute
     77     $ctx;    # MT context object (unused)
     78     if ($attr == NULL) $attr = $smartypants_attr;
     79 
     80     # Options to specify which transformations to make:
     81     $do_stupefy = FALSE;
     82     $convert_quot = 0;  # should we translate &quot; entities into normal quotes?
     83 
     84     # Parse attributes:
     85     # 0 : do nothing
     86     # 1 : set all
     87     # 2 : set all, using old school en- and em- dash shortcuts
     88     # 3 : set all, using inverted old school en and em- dash shortcuts
     89     #
     90     # q : quotes
     91     # b : backtick quotes (``double'' only)
     92     # B : backtick quotes (``double'' and `single')
     93     # d : dashes
     94     # D : old school dashes
     95     # i : inverted old school dashes
     96     # e : ellipses
     97     # w : convert &quot; entities to " for Dreamweaver users
     98 
     99     if ($attr == "0") {
    100         # Do nothing.
    101         return $text;
    102     }
    103     else if ($attr == "1") {
    104         # Do everything, turn all options on.
    105         $do_quotes    = 1;
    106         $do_backticks = 1;
    107         $do_dashes    = 1;
    108         $do_ellipses  = 1;
    109     }
    110     else if ($attr == "2") {
    111         # Do everything, turn all options on, use old school dash shorthand.
    112         $do_quotes    = 1;
    113         $do_backticks = 1;
    114         $do_dashes    = 2;
    115         $do_ellipses  = 1;
    116     }
    117     else if ($attr == "3") {
    118         # Do everything, turn all options on, use inverted old school dash shorthand.
    119         $do_quotes    = 1;
    120         $do_backticks = 1;
    121         $do_dashes    = 3;
    122         $do_ellipses  = 1;
    123     }
    124     else if ($attr == "-1") {
    125         # Special "stupefy" mode.
    126         $do_stupefy   = 1;
    127     }
    128     else {
    129         $chars = preg_split('//', $attr);
    130         foreach ($chars as $c){
    131             if      ($c == "q") { $do_quotes    = 1; }
    132             else if ($c == "b") { $do_backticks = 1; }
    133             else if ($c == "B") { $do_backticks = 2; }
    134             else if ($c == "d") { $do_dashes    = 1; }
    135             else if ($c == "D") { $do_dashes    = 2; }
    136             else if ($c == "i") { $do_dashes    = 3; }
    137             else if ($c == "e") { $do_ellipses  = 1; }
    138             else if ($c == "w") { $convert_quot = 1; }
    139             else {
    140                 # Unknown attribute option, ignore.
    141             }
    142         }
    143     }
    144 
    145     $tokens = _TokenizeHTML($text);
    146     $result = '';
    147     $in_pre = 0;  # Keep track of when we're inside <pre> or <code> tags.
    148 
    149     $prev_token_last_char = "";     # This is a cheat, used to get some context
    150                                     # for one-character tokens that consist of
    151                                     # just a quote char. What we do is remember
    152                                     # the last character of the previous text
    153                                     # token, to use as context to curl single-
    154                                     # character quote tokens correctly.
    155 
    156     foreach ($tokens as $cur_token) {
    157         if ($cur_token[0] == "tag") {
    158             # Don't mess with quotes inside tags.
    159             $result .= $cur_token[1];
    160             if (preg_match("@$sp_tags_to_skip@", $cur_token[1], $matches)) {
    161                 $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
    162             }
    163         } else {
    164             $t = $cur_token[1];
    165             $last_char = substr($t, -1); # Remember last char of this token before processing.
    166             if (! $in_pre) {
    167                 $t = ProcessEscapes($t);
    168 
    169                 if ($convert_quot) {
    170                     $t = preg_replace('/&quot;/', '"', $t);
    171                 }
    172 
    173                 if ($do_dashes) {
    174                     if ($do_dashes == 1) $t = EducateDashes($t);
    175                     if ($do_dashes == 2) $t = EducateDashesOldSchool($t);
    176                     if ($do_dashes == 3) $t = EducateDashesOldSchoolInverted($t);
    177                 }
    178 
    179                 if ($do_ellipses) $t = EducateEllipses($t);
    180 
    181                 # Note: backticks need to be processed before quotes.
    182                 if ($do_backticks) {
    183                     $t = EducateBackticks($t);
    184                     if ($do_backticks == 2) $t = EducateSingleBackticks($t);
    185                 }
    186 
    187                 if ($do_quotes) {
    188                     if ($t == "'") {
    189                         # Special case: single-character ' token
    190                         if (preg_match('/\S/', $prev_token_last_char)) {
    191                             $t = "&#8217;";
    192                         }
    193                         else {
    194                             $t = "&#8216;";
    195                         }
    196                     }
    197                     else if ($t == '"') {
    198                         # Special case: single-character " token
    199                         if (preg_match('/\S/', $prev_token_last_char)) {
    200                             $t = "&#8221;";
    201                         }
    202                         else {
    203                             $t = "&#8220;";
    204                         }
    205                     }
    206                     else {
    207                         # Normal case:
    208                         $t = EducateQuotes($t);
    209                     }
    210                 }
    211 
    212                 if ($do_stupefy) $t = StupefyEntities($t);
    213             }
    214             $prev_token_last_char = $last_char;
    215             $result .= $t;
    216         }
    217     }
    218 
    219     return $result;
    220 }
    221 
    222 
    223 function SmartQuotes($text, $attr = NULL, $ctx = NULL) {
    224     global $smartypants_attr, $sp_tags_to_skip;
    225     # Paramaters:
    226     $text;   # text to be parsed
    227     $attr;   # value of the smart_quotes="" attribute
    228     $ctx;    # MT context object (unused)
    229     if ($attr == NULL) $attr = $smartypants_attr;
    230 
    231     $do_backticks;   # should we educate ``backticks'' -style quotes?
    232 
    233     if ($attr == 0) {
    234         # do nothing;
    235         return $text;
    236     }
    237     else if ($attr == 2) {
    238         # smarten ``backticks'' -style quotes
    239         $do_backticks = 1;
    240     }
    241     else {
    242         $do_backticks = 0;
    243     }
    244 
    245     # Special case to handle quotes at the very end of $text when preceded by
    246     # an HTML tag. Add a space to give the quote education algorithm a bit of
    247     # context, so that it can guess correctly that it's a closing quote:
    248     $add_extra_space = 0;
    249     if (preg_match("/>['\"]\\z/", $text)) {
    250         $add_extra_space = 1; # Remember, so we can trim the extra space later.
    251         $text .= " ";
    252     }
    253 
    254     $tokens = _TokenizeHTML($text);
    255     $result = '';
    256     $in_pre = 0;  # Keep track of when we're inside <pre> or <code> tags
    257 
    258     $prev_token_last_char = "";     # This is a cheat, used to get some context
    259                                     # for one-character tokens that consist of
    260                                     # just a quote char. What we do is remember
    261                                     # the last character of the previous text
    262                                     # token, to use as context to curl single-
    263                                     # character quote tokens correctly.
    264 
    265     foreach ($tokens as $cur_token) {
    266         if ($cur_token[0] == "tag") {
    267             # Don't mess with quotes inside tags
    268             $result .= $cur_token[1];
    269             if (preg_match("@$sp_tags_to_skip@", $cur_token[1], $matches)) {
    270                 $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
    271             }
    272         } else {
    273             $t = $cur_token[1];
    274             $last_char = substr($t, -1); # Remember last char of this token before processing.
    275             if (! $in_pre) {
    276                 $t = ProcessEscapes($t);
    277                 if ($do_backticks) {
    278                     $t = EducateBackticks($t);
    279                 }
    280 
    281                 if ($t == "'") {
    282                     # Special case: single-character ' token
    283                     if (preg_match('/\S/', $prev_token_last_char)) {
    284                         $t = "&#8217;";
    285                     }
    286                     else {
    287                         $t = "&#8216;";
    288                     }
    289                 }
    290                 else if ($t == '"') {
    291                     # Special case: single-character " token
    292                     if (preg_match('/\S/', $prev_token_last_char)) {
    293                         $t = "&#8221;";
    294                     }
    295                     else {
    296                         $t = "&#8220;";
    297                     }
    298                 }
    299                 else {
    300                     # Normal case:
    301                     $t = EducateQuotes($t);
    302                 }
    303 
    304             }
    305             $prev_token_last_char = $last_char;
    306             $result .= $t;
    307         }
    308     }
    309 
    310     if ($add_extra_space) {
    311         preg_replace('/ \z/', '', $result);  # Trim trailing space if we added one earlier.
    312     }
    313     return $result;
    314 }
    315 
    316 
    317 function SmartDashes($text, $attr = NULL, $ctx = NULL) {
    318     global $smartypants_attr, $sp_tags_to_skip;
    319     # Paramaters:
    320     $text;   # text to be parsed
    321     $attr;   # value of the smart_dashes="" attribute
    322     $ctx;    # MT context object (unused)
    323     if ($attr == NULL) $attr = $smartypants_attr;
    324 
    325     # reference to the subroutine to use for dash education, default to EducateDashes:
    326     $dash_sub_ref = 'EducateDashes';
    327 
    328     if ($attr == 0) {
    329         # do nothing;
    330         return $text;
    331     }
    332     else if ($attr == 2) {
    333         # use old smart dash shortcuts, "--" for en, "---" for em
    334         $dash_sub_ref = 'EducateDashesOldSchool';
    335     }
    336     else if ($attr == 3) {
    337         # inverse of 2, "--" for em, "---" for en
    338         $dash_sub_ref = 'EducateDashesOldSchoolInverted';
    339     }
    340 
    341     $tokens;
    342     $tokens = _TokenizeHTML($text);
    343 
    344     $result = '';
    345     $in_pre = 0;  # Keep track of when we're inside <pre> or <code> tags
    346     foreach ($tokens as $cur_token) {
    347         if ($cur_token[0] == "tag") {
    348             # Don't mess with quotes inside tags
    349             $result .= $cur_token[1];
    350             if (preg_match("@$sp_tags_to_skip@", $cur_token[1], $matches)) {
    351                 $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
    352             }
    353         } else {
    354             $t = $cur_token[1];
    355             if (! $in_pre) {
    356                 $t = ProcessEscapes($t);
    357                 $t = $dash_sub_ref($t);
    358             }
    359             $result .= $t;
    360         }
    361     }
    362     return $result;
    363 }
    364 
    365 
    366 function SmartEllipses($text, $attr = NULL, $ctx = NULL) {
    367     # Paramaters:
    368     $text;   # text to be parsed
    369     $attr;   # value of the smart_ellipses="" attribute
    370     $ctx;    # MT context object (unused)
    371     if ($attr == NULL) $attr = $smartypants_attr;
    372 
    373     if ($attr == 0) {
    374         # do nothing;
    375         return $text;
    376     }
    377 
    378     $tokens;
    379     $tokens = _TokenizeHTML($text);
    380 
    381     $result = '';
    382     $in_pre = 0;  # Keep track of when we're inside <pre> or <code> tags
    383     foreach ($tokens as $cur_token) {
    384         if ($cur_token[0] == "tag") {
    385             # Don't mess with quotes inside tags
    386             $result .= $cur_token[1];
    387             if (preg_match("@$sp_tags_to_skip@", $cur_token[1], $matches)) {
    388                 $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
    389             }
    390         } else {
    391             $t = $cur_token[1];
    392             if (! $in_pre) {
    393                 $t = ProcessEscapes($t);
    394                 $t = EducateEllipses($t);
    395             }
    396             $result .= $t;
    397         }
    398     }
    399     return $result;
    400 }
    401 
    402 
    403 function EducateQuotes($_) {
    404 #
    405 #   Parameter:  String.
    406 #
    407 #   Returns:    The string, with "educated" curly quote HTML entities.
    408 #
    409 #   Example input:  "Isn't this fun?"
    410 #   Example output: &#8220;Isn&#8217;t this fun?&#8221;
    411 #
    412     # Make our own "punctuation" character class, because the POSIX-style
    413     # [:PUNCT:] is only available in Perl 5.6 or later:
    414     $punct_class = "[!\"#\\$\\%'()*+,-.\\/:;<=>?\\@\\[\\\\\]\\^_`{|}~]";
    415 
    416     # Special case if the very first character is a quote
    417     # followed by punctuation at a non-word-break. Close the quotes by brute force:
    418     $_ = preg_replace(
    419         array("/^'(?=$punct_class\\B)/", "/^\"(?=$punct_class\\B)/"),
    420         array('&#8217;',                 '&#8221;'), $_);
    421 
    422 
    423     # Special case for double sets of quotes, e.g.:
    424     #   <p>He said, "'Quoted' words in a larger quote."</p>
    425     $_ = preg_replace(
    426         array("/\"'(?=\w)/",    "/'\"(?=\w)/"),
    427         array('&#8220;&#8216;', '&#8216;&#8220;'), $_);
    428 
    429     # Special case for decade abbreviations (the '80s):
    430     $_ = preg_replace("/'(?=\\d{2}s)/", '&#8217;', $_);
    431 
    432     $close_class = '[^\ \t\r\n\[\{\(\-]';
    433     $dec_dashes = '&\#8211;|&\#8212;';
    434 
    435     # Get most opening single quotes:
    436     $_ = preg_replace("{
    437         (
    438             \\s          |   # a whitespace char, or
    439             &nbsp;      |   # a non-breaking space entity, or
    440             --          |   # dashes, or
    441             &[mn]dash;  |   # named dash entities
    442             $dec_dashes |   # or decimal entities
    443             &\\#x201[34];    # or hex
    444         )
    445         '                   # the quote
    446         (?=\\w)              # followed by a word character
    447         }x", '\1&#8216;', $_);
    448     # Single closing quotes:
    449     $_ = preg_replace("{
    450         ($close_class)?
    451         '
    452         (?(1)|          # If $1 captured, then do nothing;
    453           (?=\\s | s\\b)  # otherwise, positive lookahead for a whitespace
    454         )               # char or an 's' at a word ending position. This
    455                         # is a special case to handle something like:
    456                         # \"<i>Custer</i>'s Last Stand.\"
    457         }xi", '\1&#8217;', $_);
    458 
    459     # Any remaining single quotes should be opening ones:
    460     $_ = str_replace("'", '&#8216;', $_);
    461 
    462 
    463     # Get most opening double quotes:
    464     $_ = preg_replace("{
    465         (
    466             \\s          |   # a whitespace char, or
    467             &nbsp;      |   # a non-breaking space entity, or
    468             --          |   # dashes, or
    469             &[mn]dash;  |   # named dash entities
    470             $dec_dashes |   # or decimal entities
    471             &\\#x201[34];    # or hex
    472         )
    473         \"                   # the quote
    474         (?=\\w)              # followed by a word character
    475         }x", '\1&#8220;', $_);
    476 
    477     # Double closing quotes:
    478     $_ = preg_replace("{
    479         ($close_class)?
    480         \"
    481         (?(1)|(?=\\s))   # If $1 captured, then do nothing;
    482                            # if not, then make sure the next char is whitespace.
    483         }x", '\1&#8221;', $_);
    484 
    485     # Any remaining quotes should be opening ones.
    486     $_ = str_replace('"', '&#8220;', $_);
    487 
    488     return $_;
    489 }
    490 
    491 
    492 function EducateBackticks($_) {
    493 #
    494 #   Parameter:  String.
    495 #   Returns:    The string, with ``backticks'' -style double quotes
    496 #               translated into HTML curly quote entities.
    497 #
    498 #   Example input:  ``Isn't this fun?''
    499 #   Example output: &#8220;Isn't this fun?&#8221;
    500 #
    501 
    502     $_ = str_replace(array("``",       "''",),
    503                      array('&#8220;', '&#8221;'), $_);
    504     return $_;
    505 }
    506 
    507 
    508 function EducateSingleBackticks($_) {
    509 #
    510 #   Parameter:  String.
    511 #   Returns:    The string, with `backticks' -style single quotes
    512 #               translated into HTML curly quote entities.
    513 #
    514 #   Example input:  `Isn't this fun?'
    515 #   Example output: &#8216;Isn&#8217;t this fun?&#8217;
    516 #
    517 
    518     $_ = str_replace(array("`",       "'",),
    519                      array('&#8216;', '&#8217;'), $_);
    520     return $_;
    521 }
    522 
    523 
    524 function EducateDashes($_) {
    525 #
    526 #   Parameter:  String.
    527 #
    528 #   Returns:    The string, with each instance of "--" translated to
    529 #               an em-dash HTML entity.
    530 #
    531 
    532     $_ = str_replace('--', '&#8212;', $_);
    533     return $_;
    534 }
    535 
    536 
    537 function EducateDashesOldSchool($_) {
    538 #
    539 #   Parameter:  String.
    540 #
    541 #   Returns:    The string, with each instance of "--" translated to
    542 #               an en-dash HTML entity, and each "---" translated to
    543 #               an em-dash HTML entity.
    544 #
    545 
    546     #                      em         en
    547     $_ = str_replace(array("---",     "--",),
    548                      array('&#8212;', '&#8211;'), $_);
    549     return $_;
    550 }
    551 
    552 
    553 function EducateDashesOldSchoolInverted($_) {
    554 #
    555 #   Parameter:  String.
    556 #
    557 #   Returns:    The string, with each instance of "--" translated to
    558 #               an em-dash HTML entity, and each "---" translated to
    559 #               an en-dash HTML entity. Two reasons why: First, unlike the
    560 #               en- and em-dash syntax supported by
    561 #               EducateDashesOldSchool(), it's compatible with existing
    562 #               entries written before SmartyPants 1.1, back when "--" was
    563 #               only used for em-dashes.  Second, em-dashes are more
    564 #               common than en-dashes, and so it sort of makes sense that
    565 #               the shortcut should be shorter to type. (Thanks to Aaron
    566 #               Swartz for the idea.)
    567 #
    568 
    569     #                      en         em
    570     $_ = str_replace(array("---",     "--",),
    571                      array('&#8211;', '&#8212;'), $_);
    572     return $_;
    573 }
    574 
    575 
    576 function EducateEllipses($_) {
    577 #
    578 #   Parameter:  String.
    579 #   Returns:    The string, with each instance of "..." translated to
    580 #               an ellipsis HTML entity. Also converts the case where
    581 #               there are spaces between the dots.
    582 #
    583 #   Example input:  Huh...?
    584 #   Example output: Huh&#8230;?
    585 #
    586 
    587     $_ = str_replace(array("...",     ". . .",), '&#8230;', $_);
    588     return $_;
    589 }
    590 
    591 
    592 function StupefyEntities($_) {
    593 #
    594 #   Parameter:  String.
    595 #   Returns:    The string, with each SmartyPants HTML entity translated to
    596 #               its ASCII counterpart.
    597 #
    598 #   Example input:  &#8220;Hello &#8212; world.&#8221;
    599 #   Example output: "Hello -- world."
    600 #
    601 
    602                         #  en-dash    em-dash
    603     $_ = str_replace(array('&#8211;', '&#8212;'),
    604                      array('-',       '--'), $_);
    605 
    606     # single quote         open       close
    607     $_ = str_replace(array('&#8216;', '&#8217;'), "'", $_);
    608 
    609     # double quote         open       close
    610     $_ = str_replace(array('&#8220;', '&#8221;'), '"', $_);
    611 
    612     $_ = str_replace('&#8230;', '...', $_); # ellipsis
    613 
    614     return $_;
    615 }
    616 
    617 
    618 function ProcessEscapes($_) {
    619 #
    620 #   Parameter:  String.
    621 #   Returns:    The string, with after processing the following backslash
    622 #               escape sequences. This is useful if you want to force a "dumb"
    623 #               quote or other character to appear.
    624 #
    625 #               Escape  Value
    626 #               ------  -----
    627 #               \\      &#92;
    628 #               \"      &#34;
    629 #               \'      &#39;
    630 #               \.      &#46;
    631 #               \-      &#45;
    632 #               \`      &#96;
    633 #
    634     $_ = str_replace(
    635         array('\\\\',  '\"',    "\'",    '\.',    '\-',    '\`'),
    636         array('&#92;', '&#34;', '&#39;', '&#46;', '&#45;', '&#96;'), $_);
    637 
    638     return $_;
    639 }
    640 
    641 
    642 # _TokenizeHTML is shared between PHP SmartyPants and PHP Markdown.
    643 # We only define it if it is not already defined.
    644 if (!function_exists('_TokenizeHTML')) :
    645 function _TokenizeHTML($str) {
    646 #
    647 #   Parameter:  String containing HTML markup.
    648 #   Returns:    An array of the tokens comprising the input
    649 #               string. Each token is either a tag (possibly with nested,
    650 #               tags contained therein, such as <a href="<MTFoo>">, or a
    651 #               run of text between tags. Each element of the array is a
    652 #               two-element array; the first is either 'tag' or 'text';
    653 #               the second is the actual value.
    654 #
    655 #
    656 #   Regular expression derived from the _tokenize() subroutine in
    657 #   Brad Choate's MTRegex plugin.
    658 #   <http://www.bradchoate.com/past/mtregex.php>
    659 #
    660     $index = 0;
    661     $tokens = array();
    662 
    663     $match = '(?s:<!(?:--.*?--\s*)+>)|'.    # comment
    664              '(?s:<\?.*?\?>)|'.             # processing instruction
    665                                             # regular tags
    666              '(?:<[/!$]?[-a-zA-Z0-9:]+\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)';
    667 
    668     $parts = preg_split("{($match)}", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
    669 
    670     foreach ($parts as $part) {
    671         if (++$index % 2 && $part != '')
    672             $tokens[] = array('text', $part);
    673         else
    674             $tokens[] = array('tag', $part);
    675     }
    676     return $tokens;
    677 }
    678 endif;
    679 
    680 
    681 /*
    682 
    683 PHP SmartyPants
    684 ===============
    685 
    686 Description
    687 -----------
    688 
    689 This is a PHP translation of the original SmartyPants quote educator written in
    690 Perl by John Gruber.
    691 
    692 SmartyPants is a web publishing utility that translates plain ASCII
    693 punctuation characters into "smart" typographic punctuation HTML
    694 entities. SmartyPants can perform the following transformations:
    695 
    696 *   Straight quotes (`"` and `'`) into "curly" quote HTML entities
    697 *   Backticks-style quotes (` ``like this'' `) into "curly" quote HTML
    698     entities
    699 *   Dashes (`--` and `---`) into en- and em-dash entities
    700 *   Three consecutive dots (`...`) into an ellipsis entity
    701 
    702 SmartyPants does not modify characters within `<pre>`, `<code>`, `<kbd>`,
    703 `<script>`, or `<math>` tag blocks. Typically, these tags are used to
    704 display text where smart quotes and other "smart punctuation" would not
    705 be appropriate, such as source code or example markup.
    706 
    707 
    708 ### Backslash Escapes ###
    709 
    710 If you need to use literal straight quotes (or plain hyphens and
    711 periods), SmartyPants accepts the following backslash escape sequences
    712 to force non-smart punctuation. It does so by transforming the escape
    713 sequence into a decimal-encoded HTML entity:
    714 
    715     Escape  Value  Character
    716     ------  -----  ---------
    717       \\    &#92;    \
    718       \"    &#34;    "
    719       \'    &#39;    '
    720       \.    &#46;    .
    721       \-    &#45;    -
    722       \`    &#96;    `
    723 
    724 This is useful, for example, when you want to use straight quotes as
    725 foot and inch marks: 6'2" tall; a 17" iMac.
    726 
    727 
    728 Bugs
    729 ----
    730 
    731 To file bug reports or feature requests (other than topics listed in the
    732 Caveats section above) please send email to:
    733 
    734 <michel.fortin (at) michelf.com>
    735 
    736 If the bug involves quotes being curled the wrong way, please send example
    737 text to illustrate.
    738 
    739 
    740 ### Algorithmic Shortcomings ###
    741 
    742 One situation in which quotes will get curled the wrong way is when
    743 apostrophes are used at the start of leading contractions. For example:
    744 
    745     'Twas the night before Christmas.
    746 
    747 In the case above, SmartyPants will turn the apostrophe into an opening
    748 single-quote, when in fact it should be a closing one. I don't think
    749 this problem can be solved in the general case -- every word processor
    750 I've tried gets this wrong as well. In such cases, it's best to use the
    751 proper HTML entity for closing single-quotes (`&#8217;`) by hand.
    752 
    753 
    754 Version History
    755 ---------------
    756 
    757 1.5.1e (9 Dec 2005)
    758 
    759 *   Corrected a bug that prevented special characters from being
    760     escaped.
    761 
    762 
    763 1.5.1d (25 May 2005)
    764 
    765 *   Corrected a small bug in `_TokenizeHTML` where a Doctype declaration
    766     was not seen as HTML (smart quotes where applied inside).
    767 
    768 
    769 1.5.1c (13 Dec 2004)
    770 
    771 *   Changed a regular expression in `_TokenizeHTML` that could lead to
    772     a segmentation fault with PHP 4.3.8 on Linux.
    773 
    774 
    775 1.5.1b (6 Sep 2004)
    776 
    777 *   Corrected a problem with quotes immediately following a dash
    778     with no space between: `Text--"quoted text"--text.`
    779 
    780 *   PHP SmartyPants can now be used as a modifier by the Smarty
    781     template engine. Rename the file to "modifier.smartypants.php"
    782     and put it in your smarty plugins folder.
    783 
    784 *   Replaced a lot of space characters by tabs, saving about 4 KB.
    785 
    786 
    787 1.5.1a (30 Jun 2004)
    788 
    789 *   PHP Markdown and PHP Smartypants now share the same `_TokenizeHTML`
    790     function when loaded simultanously.
    791 
    792 *   Changed the internals of `_TokenizeHTML` to lower the PHP version
    793     requirement to PHP 4.0.5.
    794 
    795 
    796 1.5.1 (6 Jun 2004)
    797 
    798 *   Initial release of PHP SmartyPants, based on version 1.5.1 of the
    799     original SmartyPants written in Perl.
    800 
    801 
    802 Author
    803 ------
    804 
    805 John Gruber
    806 <http://daringfireball.net/>
    807 
    808 Ported to PHP by Michel Fortin
    809 <http://www.michelf.com/>
    810 
    811 
    812 Additional Credits
    813 ------------------
    814 
    815 Portions of this plug-in are based on Brad Choate's nifty MTRegex plug-in.
    816 Brad Choate also contributed a few bits of source code to this plug-in.
    817 Brad Choate is a fine hacker indeed. (<http://bradchoate.com/>)
    818 
    819 Jeremy Hedley (<http://antipixel.com/>) and Charles Wiltgen
    820 (<http://playbacktime.com/>) deserve mention for exemplary beta testing.
    821 
    822 
    823 Copyright and License
    824 ---------------------
    825 
    826 Copyright (c) 2003 John Gruber
    827 <http://daringfireball.net/>
    828 All rights reserved.
    829 
    830 Copyright (c) 2004-2005 Michel Fortin
    831 <http://www.michelf.com>
    832 
    833 Redistribution and use in source and binary forms, with or without
    834 modification, are permitted provided that the following conditions are met:
    835 
    836 *   Redistributions of source code must retain the above copyright
    837     notice, this list of conditions and the following disclaimer.
    838 
    839 *   Redistributions in binary form must reproduce the above copyright
    840     notice, this list of conditions and the following disclaimer in the
    841     documentation and/or other materials provided with the distribution.
    842 
    843 *   Neither the name "SmartyPants" nor the names of its contributors may
    844     be used to endorse or promote products derived from this software
    845     without specific prior written permission.
    846 
    847 This software is provided by the copyright holders and contributors "as is"
    848 and any express or implied warranties, including, but not limited to, the
    849 implied warranties of merchantability and fitness for a particular purpose
    850 are disclaimed. In no event shall the copyright owner or contributors be
    851 liable for any direct, indirect, incidental, special, exemplary, or
    852 consequential damages (including, but not limited to, procurement of
    853 substitute goods or services; loss of use, data, or profits; or business
    854 interruption) however caused and on any theory of liability, whether in
    855 contract, strict liability, or tort (including negligence or otherwise)
    856 arising in any way out of the use of this software, even if advised of the
    857 possibility of such damage.
    858 
    859 */
    860 ?>