Home | History | Annotate | Download | only in geshi
      1 <?php
      2 /**
      3  * GeSHi - Generic Syntax Highlighter
      4  *
      5  * The GeSHi class for Generic Syntax Highlighting. Please refer to the
      6  * documentation at http://qbnz.com/highlighter/documentation.php for more
      7  * information about how to use this class.
      8  *
      9  * For changes, release notes, TODOs etc, see the relevant files in the docs/
     10  * directory.
     11  *
     12  *   This file is part of GeSHi.
     13  *
     14  *  GeSHi is free software; you can redistribute it and/or modify
     15  *  it under the terms of the GNU General Public License as published by
     16  *  the Free Software Foundation; either version 2 of the License, or
     17  *  (at your option) any later version.
     18  *
     19  *  GeSHi is distributed in the hope that it will be useful,
     20  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     21  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     22  *  GNU General Public License for more details.
     23  *
     24  *  You should have received a copy of the GNU General Public License
     25  *  along with GeSHi; if not, write to the Free Software
     26  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     27  *
     28  * @package    geshi
     29  * @subpackage core
     30  * @author     Nigel McNie <nigel (at) geshi.org>, Benny Baumann <BenBE (at) omorphia.de>
     31  * @copyright  (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
     32  * @license    http://gnu.org/copyleft/gpl.html GNU GPL
     33  *
     34  */
     35 
     36 //
     37 // GeSHi Constants
     38 // You should use these constant names in your programs instead of
     39 // their values - you never know when a value may change in a future
     40 // version
     41 //
     42 
     43 /** The version of this GeSHi file */
     44 define('GESHI_VERSION', '1.0.8.3');
     45 
     46 // Define the root directory for the GeSHi code tree
     47 if (!defined('GESHI_ROOT')) {
     48     /** The root directory for GeSHi */
     49     define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR);
     50 }
     51 /** The language file directory for GeSHi
     52     @access private */
     53 define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR);
     54 
     55 // Define if GeSHi should be paranoid about security
     56 if (!defined('GESHI_SECURITY_PARANOID')) {
     57     /** Tells GeSHi to be paranoid about security settings */
     58     define('GESHI_SECURITY_PARANOID', false);
     59 }
     60 
     61 // Line numbers - use with enable_line_numbers()
     62 /** Use no line numbers when building the result */
     63 define('GESHI_NO_LINE_NUMBERS', 0);
     64 /** Use normal line numbers when building the result */
     65 define('GESHI_NORMAL_LINE_NUMBERS', 1);
     66 /** Use fancy line numbers when building the result */
     67 define('GESHI_FANCY_LINE_NUMBERS', 2);
     68 
     69 // Container HTML type
     70 /** Use nothing to surround the source */
     71 define('GESHI_HEADER_NONE', 0);
     72 /** Use a "div" to surround the source */
     73 define('GESHI_HEADER_DIV', 1);
     74 /** Use a "pre" to surround the source */
     75 define('GESHI_HEADER_PRE', 2);
     76 /** Use a pre to wrap lines when line numbers are enabled or to wrap the whole code. */
     77 define('GESHI_HEADER_PRE_VALID', 3);
     78 /**
     79  * Use a "table" to surround the source:
     80  *
     81  *  <table>
     82  *    <thead><tr><td colspan="2">$header</td></tr></thead>
     83  *    <tbody><tr><td><pre>$linenumbers</pre></td><td><pre>$code></pre></td></tr></tbody>
     84  *    <tfooter><tr><td colspan="2">$footer</td></tr></tfoot>
     85  *  </table>
     86  *
     87  * this is essentially only a workaround for Firefox, see sf#1651996 or take a look at
     88  * https://bugzilla.mozilla.org/show_bug.cgi?id=365805
     89  * @note when linenumbers are disabled this is essentially the same as GESHI_HEADER_PRE
     90  */
     91 define('GESHI_HEADER_PRE_TABLE', 4);
     92 
     93 // Capatalisation constants
     94 /** Lowercase keywords found */
     95 define('GESHI_CAPS_NO_CHANGE', 0);
     96 /** Uppercase keywords found */
     97 define('GESHI_CAPS_UPPER', 1);
     98 /** Leave keywords found as the case that they are */
     99 define('GESHI_CAPS_LOWER', 2);
    100 
    101 // Link style constants
    102 /** Links in the source in the :link state */
    103 define('GESHI_LINK', 0);
    104 /** Links in the source in the :hover state */
    105 define('GESHI_HOVER', 1);
    106 /** Links in the source in the :active state */
    107 define('GESHI_ACTIVE', 2);
    108 /** Links in the source in the :visited state */
    109 define('GESHI_VISITED', 3);
    110 
    111 // Important string starter/finisher
    112 // Note that if you change these, they should be as-is: i.e., don't
    113 // write them as if they had been run through htmlentities()
    114 /** The starter for important parts of the source */
    115 define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>');
    116 /** The ender for important parts of the source */
    117 define('GESHI_END_IMPORTANT', '<END GeSHi>');
    118 
    119 /**#@+
    120  *  @access private
    121  */
    122 // When strict mode applies for a language
    123 /** Strict mode never applies (this is the most common) */
    124 define('GESHI_NEVER', 0);
    125 /** Strict mode *might* apply, and can be enabled or
    126     disabled by {@link GeSHi->enable_strict_mode()} */
    127 define('GESHI_MAYBE', 1);
    128 /** Strict mode always applies */
    129 define('GESHI_ALWAYS', 2);
    130 
    131 // Advanced regexp handling constants, used in language files
    132 /** The key of the regex array defining what to search for */
    133 define('GESHI_SEARCH', 0);
    134 /** The key of the regex array defining what bracket group in a
    135     matched search to use as a replacement */
    136 define('GESHI_REPLACE', 1);
    137 /** The key of the regex array defining any modifiers to the regular expression */
    138 define('GESHI_MODIFIERS', 2);
    139 /** The key of the regex array defining what bracket group in a
    140     matched search to put before the replacement */
    141 define('GESHI_BEFORE', 3);
    142 /** The key of the regex array defining what bracket group in a
    143     matched search to put after the replacement */
    144 define('GESHI_AFTER', 4);
    145 /** The key of the regex array defining a custom keyword to use
    146     for this regexp's html tag class */
    147 define('GESHI_CLASS', 5);
    148 
    149 /** Used in language files to mark comments */
    150 define('GESHI_COMMENTS', 0);
    151 
    152 /** Used to work around missing PHP features **/
    153 define('GESHI_PHP_PRE_433', !(version_compare(PHP_VERSION, '4.3.3') === 1));
    154 
    155 /** make sure we can call stripos **/
    156 if (!function_exists('stripos')) {
    157     // the offset param of preg_match is not supported below PHP 4.3.3
    158     if (GESHI_PHP_PRE_433) {
    159         /**
    160          * @ignore
    161          */
    162         function stripos($haystack, $needle, $offset = null) {
    163             if (!is_null($offset)) {
    164                 $haystack = substr($haystack, $offset);
    165             }
    166             if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE)) {
    167                 return $match[0][1];
    168             }
    169             return false;
    170         }
    171     }
    172     else {
    173         /**
    174          * @ignore
    175          */
    176         function stripos($haystack, $needle, $offset = null) {
    177             if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE, $offset)) {
    178                 return $match[0][1];
    179             }
    180             return false;
    181         }
    182     }
    183 }
    184 
    185 /** some old PHP / PCRE subpatterns only support up to xxx subpatterns in
    186     regular expressions. Set this to false if your PCRE lib is up to date
    187     @see GeSHi->optimize_regexp_list()
    188     **/
    189 define('GESHI_MAX_PCRE_SUBPATTERNS', 500);
    190 /** it's also important not to generate too long regular expressions
    191     be generous here... but keep in mind, that when reaching this limit we
    192     still have to close open patterns. 12k should do just fine on a 16k limit.
    193     @see GeSHi->optimize_regexp_list()
    194     **/
    195 define('GESHI_MAX_PCRE_LENGTH', 12288);
    196 
    197 //Number format specification
    198 /** Basic number format for integers */
    199 define('GESHI_NUMBER_INT_BASIC', 1);        //Default integers \d+
    200 /** Enhanced number format for integers like seen in C */
    201 define('GESHI_NUMBER_INT_CSTYLE', 2);       //Default C-Style \d+[lL]?
    202 /** Number format to highlight binary numbers with a suffix "b" */
    203 define('GESHI_NUMBER_BIN_SUFFIX', 16);           //[01]+[bB]
    204 /** Number format to highlight binary numbers with a prefix % */
    205 define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32);   //%[01]+
    206 /** Number format to highlight binary numbers with a prefix 0b (C) */
    207 define('GESHI_NUMBER_BIN_PREFIX_0B', 64);        //0b[01]+
    208 /** Number format to highlight octal numbers with a leading zero */
    209 define('GESHI_NUMBER_OCT_PREFIX', 256);           //0[0-7]+
    210 /** Number format to highlight octal numbers with a suffix of o */
    211 define('GESHI_NUMBER_OCT_SUFFIX', 512);           //[0-7]+[oO]
    212 /** Number format to highlight hex numbers with a prefix 0x */
    213 define('GESHI_NUMBER_HEX_PREFIX', 4096);           //0x[0-9a-fA-F]+
    214 /** Number format to highlight hex numbers with a suffix of h */
    215 define('GESHI_NUMBER_HEX_SUFFIX', 8192);           //[0-9][0-9a-fA-F]*h
    216 /** Number format to highlight floating-point numbers without support for scientific notation */
    217 define('GESHI_NUMBER_FLT_NONSCI', 65536);          //\d+\.\d+
    218 /** Number format to highlight floating-point numbers without support for scientific notation */
    219 define('GESHI_NUMBER_FLT_NONSCI_F', 131072);       //\d+(\.\d+)?f
    220 /** Number format to highlight floating-point numbers with support for scientific notation (E) and optional leading zero */
    221 define('GESHI_NUMBER_FLT_SCI_SHORT', 262144);      //\.\d+e\d+
    222 /** Number format to highlight floating-point numbers with support for scientific notation (E) and required leading digit */
    223 define('GESHI_NUMBER_FLT_SCI_ZERO', 524288);       //\d+(\.\d+)?e\d+
    224 //Custom formats are passed by RX array
    225 
    226 // Error detection - use these to analyse faults
    227 /** No sourcecode to highlight was specified
    228  * @deprecated
    229  */
    230 define('GESHI_ERROR_NO_INPUT', 1);
    231 /** The language specified does not exist */
    232 define('GESHI_ERROR_NO_SUCH_LANG', 2);
    233 /** GeSHi could not open a file for reading (generally a language file) */
    234 define('GESHI_ERROR_FILE_NOT_READABLE', 3);
    235 /** The header type passed to {@link GeSHi->set_header_type()} was invalid */
    236 define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
    237 /** The line number type passed to {@link GeSHi->enable_line_numbers()} was invalid */
    238 define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
    239 /**#@-*/
    240 
    241 
    242 /**
    243  * The GeSHi Class.
    244  *
    245  * Please refer to the documentation for GeSHi 1.0.X that is available
    246  * at http://qbnz.com/highlighter/documentation.php for more information
    247  * about how to use this class.
    248  *
    249  * @package   geshi
    250  * @author    Nigel McNie <nigel (at) geshi.org>, Benny Baumann <BenBE (at) omorphia.de>
    251  * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
    252  */
    253 class GeSHi {
    254     /**#@+
    255      * @access private
    256      */
    257     /**
    258      * The source code to highlight
    259      * @var string
    260      */
    261     var $source = '';
    262 
    263     /**
    264      * The language to use when highlighting
    265      * @var string
    266      */
    267     var $language = '';
    268 
    269     /**
    270      * The data for the language used
    271      * @var array
    272      */
    273     var $language_data = array();
    274 
    275     /**
    276      * The path to the language files
    277      * @var string
    278      */
    279     var $language_path = GESHI_LANG_ROOT;
    280 
    281     /**
    282      * The error message associated with an error
    283      * @var string
    284      * @todo check err reporting works
    285      */
    286     var $error = false;
    287 
    288     /**
    289      * Possible error messages
    290      * @var array
    291      */
    292     var $error_messages = array(
    293         GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})',
    294         GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable',
    295         GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid',
    296         GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid'
    297     );
    298 
    299     /**
    300      * Whether highlighting is strict or not
    301      * @var boolean
    302      */
    303     var $strict_mode = false;
    304 
    305     /**
    306      * Whether to use CSS classes in output
    307      * @var boolean
    308      */
    309     var $use_classes = false;
    310 
    311     /**
    312      * The type of header to use. Can be one of the following
    313      * values:
    314      *
    315      * - GESHI_HEADER_PRE: Source is outputted in a "pre" HTML element.
    316      * - GESHI_HEADER_DIV: Source is outputted in a "div" HTML element.
    317      * - GESHI_HEADER_NONE: No header is outputted.
    318      *
    319      * @var int
    320      */
    321     var $header_type = GESHI_HEADER_PRE;
    322 
    323     /**
    324      * Array of permissions for which lexics should be highlighted
    325      * @var array
    326      */
    327     var $lexic_permissions = array(
    328         'KEYWORDS' =>    array(),
    329         'COMMENTS' =>    array('MULTI' => true),
    330         'REGEXPS' =>     array(),
    331         'ESCAPE_CHAR' => true,
    332         'BRACKETS' =>    true,
    333         'SYMBOLS' =>     false,
    334         'STRINGS' =>     true,
    335         'NUMBERS' =>     true,
    336         'METHODS' =>     true,
    337         'SCRIPT' =>      true
    338     );
    339 
    340     /**
    341      * The time it took to parse the code
    342      * @var double
    343      */
    344     var $time = 0;
    345 
    346     /**
    347      * The content of the header block
    348      * @var string
    349      */
    350     var $header_content = '';
    351 
    352     /**
    353      * The content of the footer block
    354      * @var string
    355      */
    356     var $footer_content = '';
    357 
    358     /**
    359      * The style of the header block
    360      * @var string
    361      */
    362     var $header_content_style = '';
    363 
    364     /**
    365      * The style of the footer block
    366      * @var string
    367      */
    368     var $footer_content_style = '';
    369 
    370     /**
    371      * Tells if a block around the highlighted source should be forced
    372      * if not using line numbering
    373      * @var boolean
    374      */
    375     var $force_code_block = false;
    376 
    377     /**
    378      * The styles for hyperlinks in the code
    379      * @var array
    380      */
    381     var $link_styles = array();
    382 
    383     /**
    384      * Whether important blocks should be recognised or not
    385      * @var boolean
    386      * @deprecated
    387      * @todo REMOVE THIS FUNCTIONALITY!
    388      */
    389     var $enable_important_blocks = false;
    390 
    391     /**
    392      * Styles for important parts of the code
    393      * @var string
    394      * @deprecated
    395      * @todo As above - rethink the whole idea of important blocks as it is buggy and
    396      * will be hard to implement in 1.2
    397      */
    398     var $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code
    399 
    400     /**
    401      * Whether CSS IDs should be added to the code
    402      * @var boolean
    403      */
    404     var $add_ids = false;
    405 
    406     /**
    407      * Lines that should be highlighted extra
    408      * @var array
    409      */
    410     var $highlight_extra_lines = array();
    411 
    412     /**
    413      * Styles of lines that should be highlighted extra
    414      * @var array
    415      */
    416     var $highlight_extra_lines_styles = array();
    417 
    418     /**
    419      * Styles of extra-highlighted lines
    420      * @var string
    421      */
    422     var $highlight_extra_lines_style = 'background-color: #ffc;';
    423 
    424     /**
    425      * The line ending
    426      * If null, nl2br() will be used on the result string.
    427      * Otherwise, all instances of \n will be replaced with $line_ending
    428      * @var string
    429      */
    430     var $line_ending = null;
    431 
    432     /**
    433      * Number at which line numbers should start at
    434      * @var int
    435      */
    436     var $line_numbers_start = 1;
    437 
    438     /**
    439      * The overall style for this code block
    440      * @var string
    441      */
    442     var $overall_style = 'font-family:monospace;';
    443 
    444     /**
    445      *  The style for the actual code
    446      * @var string
    447      */
    448     var $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;';
    449 
    450     /**
    451      * The overall class for this code block
    452      * @var string
    453      */
    454     var $overall_class = '';
    455 
    456     /**
    457      * The overall ID for this code block
    458      * @var string
    459      */
    460     var $overall_id = '';
    461 
    462     /**
    463      * Line number styles
    464      * @var string
    465      */
    466     var $line_style1 = 'font-weight: normal; vertical-align:top;';
    467 
    468     /**
    469      * Line number styles for fancy lines
    470      * @var string
    471      */
    472     var $line_style2 = 'font-weight: bold; vertical-align:top;';
    473 
    474     /**
    475      * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen
    476      * @var string
    477      */
    478     var $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;';
    479 
    480     /**
    481      * Flag for how line numbers are displayed
    482      * @var boolean
    483      */
    484     var $line_numbers = GESHI_NO_LINE_NUMBERS;
    485 
    486     /**
    487      * Flag to decide if multi line spans are allowed. Set it to false to make sure
    488      * each tag is closed before and reopened after each linefeed.
    489      * @var boolean
    490      */
    491     var $allow_multiline_span = true;
    492 
    493     /**
    494      * The "nth" value for fancy line highlighting
    495      * @var int
    496      */
    497     var $line_nth_row = 0;
    498 
    499     /**
    500      * The size of tab stops
    501      * @var int
    502      */
    503     var $tab_width = 8;
    504 
    505     /**
    506      * Should we use language-defined tab stop widths?
    507      * @var int
    508      */
    509     var $use_language_tab_width = false;
    510 
    511     /**
    512      * Default target for keyword links
    513      * @var string
    514      */
    515     var $link_target = '';
    516 
    517     /**
    518      * The encoding to use for entity encoding
    519      * NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#2037598)
    520      * @var string
    521      */
    522     var $encoding = 'utf-8';
    523 
    524     /**
    525      * Should keywords be linked?
    526      * @var boolean
    527      */
    528     var $keyword_links = true;
    529 
    530     /**
    531      * Currently loaded language file
    532      * @var string
    533      * @since 1.0.7.22
    534      */
    535     var $loaded_language = '';
    536 
    537     /**
    538      * Wether the caches needed for parsing are built or not
    539      *
    540      * @var bool
    541      * @since 1.0.8
    542      */
    543     var $parse_cache_built = false;
    544 
    545     /**
    546      * Work around for Suhosin Patch with disabled /e modifier
    547      *
    548      * Note from suhosins author in config file:
    549      * <blockquote>
    550      *   The /e modifier inside <code>preg_replace()</code> allows code execution.
    551      *   Often it is the cause for remote code execution exploits. It is wise to
    552      *   deactivate this feature and test where in the application it is used.
    553      *   The developer using the /e modifier should be made aware that he should
    554      *   use <code>preg_replace_callback()</code> instead
    555      * </blockquote>
    556      *
    557      * @var array
    558      * @since 1.0.8
    559      */
    560     var $_kw_replace_group = 0;
    561     var $_rx_key = 0;
    562 
    563     /**
    564      * some "callback parameters" for handle_multiline_regexps
    565      *
    566      * @since 1.0.8
    567      * @access private
    568      * @var string
    569      */
    570     var $_hmr_before = '';
    571     var $_hmr_replace = '';
    572     var $_hmr_after = '';
    573     var $_hmr_key = 0;
    574 
    575     /**#@-*/
    576 
    577     /**
    578      * Creates a new GeSHi object, with source and language
    579      *
    580      * @param string The source code to highlight
    581      * @param string The language to highlight the source with
    582      * @param string The path to the language file directory. <b>This
    583      *               is deprecated!</b> I've backported the auto path
    584      *               detection from the 1.1.X dev branch, so now it
    585      *               should be automatically set correctly. If you have
    586      *               renamed the language directory however, you will
    587      *               still need to set the path using this parameter or
    588      *               {@link GeSHi->set_language_path()}
    589      * @since 1.0.0
    590      */
    591     function GeSHi($source = '', $language = '', $path = '') {
    592         if (!empty($source)) {
    593             $this->set_source($source);
    594         }
    595         if (!empty($language)) {
    596             $this->set_language($language);
    597         }
    598         $this->set_language_path($path);
    599     }
    600 
    601     /**
    602      * Returns an error message associated with the last GeSHi operation,
    603      * or false if no error has occured
    604      *
    605      * @return string|false An error message if there has been an error, else false
    606      * @since  1.0.0
    607      */
    608     function error() {
    609         if ($this->error) {
    610             //Put some template variables for debugging here ...
    611             $debug_tpl_vars = array(
    612                 '{LANGUAGE}' => $this->language,
    613                 '{PATH}' => $this->language_path
    614             );
    615             $msg = str_replace(
    616                 array_keys($debug_tpl_vars),
    617                 array_values($debug_tpl_vars),
    618                 $this->error_messages[$this->error]);
    619 
    620             return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />";
    621         }
    622         return false;
    623     }
    624 
    625     /**
    626      * Gets a human-readable language name (thanks to Simon Patterson
    627      * for the idea :))
    628      *
    629      * @return string The name for the current language
    630      * @since  1.0.2
    631      */
    632     function get_language_name() {
    633         if (GESHI_ERROR_NO_SUCH_LANG == $this->error) {
    634             return $this->language_data['LANG_NAME'] . ' (Unknown Language)';
    635         }
    636         return $this->language_data['LANG_NAME'];
    637     }
    638 
    639     /**
    640      * Sets the source code for this object
    641      *
    642      * @param string The source code to highlight
    643      * @since 1.0.0
    644      */
    645     function set_source($source) {
    646         $this->source = $source;
    647         $this->highlight_extra_lines = array();
    648     }
    649 
    650     /**
    651      * Sets the language for this object
    652      *
    653      * @note since 1.0.8 this function won't reset language-settings by default anymore!
    654      *       if you need this set $force_reset = true
    655      *
    656      * @param string The name of the language to use
    657      * @since 1.0.0
    658      */
    659     function set_language($language, $force_reset = false) {
    660         if ($force_reset) {
    661             $this->loaded_language = false;
    662         }
    663 
    664         //Clean up the language name to prevent malicious code injection
    665         $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
    666 
    667         $language = strtolower($language);
    668 
    669         //Retreive the full filename
    670         $file_name = $this->language_path . $language . '.php';
    671         if ($file_name == $this->loaded_language) {
    672             // this language is already loaded!
    673             return;
    674         }
    675 
    676         $this->language = $language;
    677 
    678         $this->error = false;
    679         $this->strict_mode = GESHI_NEVER;
    680 
    681         //Check if we can read the desired file
    682         if (!is_readable($file_name)) {
    683             $this->error = GESHI_ERROR_NO_SUCH_LANG;
    684             return;
    685         }
    686 
    687         // Load the language for parsing
    688         $this->load_language($file_name);
    689     }
    690 
    691     /**
    692      * Sets the path to the directory containing the language files. Note
    693      * that this path is relative to the directory of the script that included
    694      * geshi.php, NOT geshi.php itself.
    695      *
    696      * @param string The path to the language directory
    697      * @since 1.0.0
    698      * @deprecated The path to the language files should now be automatically
    699      *             detected, so this method should no longer be needed. The
    700      *             1.1.X branch handles manual setting of the path differently
    701      *             so this method will disappear in 1.2.0.
    702      */
    703     function set_language_path($path) {
    704         if(strpos($path,':')) {
    705             //Security Fix to prevent external directories using fopen wrappers.
    706             if(DIRECTORY_SEPARATOR == "\\") {
    707                 if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) {
    708                     return;
    709                 }
    710             } else {
    711                 return;
    712             }
    713         }
    714         if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) {
    715             //Security Fix to prevent external directories using fopen wrappers.
    716             return;
    717         }
    718         if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) {
    719             //Security Fix to prevent external directories using fopen wrappers.
    720             return;
    721         }
    722         if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) {
    723             //Security Fix to prevent external directories using fopen wrappers.
    724             return;
    725         }
    726         if ($path) {
    727             $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/';
    728             $this->set_language($this->language); // otherwise set_language_path has no effect
    729         }
    730     }
    731 
    732     /**
    733      * Sets the type of header to be used.
    734      *
    735      * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This
    736      * means more source code but more control over tab width and line-wrapping.
    737      * GESHI_HEADER_PRE means that a "pre" is used - less source, but less
    738      * control. Default is GESHI_HEADER_PRE.
    739      *
    740      * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code
    741      * should be outputted.
    742      *
    743      * @param int The type of header to be used
    744      * @since 1.0.0
    745      */
    746     function set_header_type($type) {
    747         //Check if we got a valid header type
    748         if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV,
    749             GESHI_HEADER_PRE, GESHI_HEADER_PRE_VALID, GESHI_HEADER_PRE_TABLE))) {
    750             $this->error = GESHI_ERROR_INVALID_HEADER_TYPE;
    751             return;
    752         }
    753 
    754         //Set that new header type
    755         $this->header_type = $type;
    756     }
    757 
    758     /**
    759      * Sets the styles for the code that will be outputted
    760      * when this object is parsed. The style should be a
    761      * string of valid stylesheet declarations
    762      *
    763      * @param string  The overall style for the outputted code block
    764      * @param boolean Whether to merge the styles with the current styles or not
    765      * @since 1.0.0
    766      */
    767     function set_overall_style($style, $preserve_defaults = false) {
    768         if (!$preserve_defaults) {
    769             $this->overall_style = $style;
    770         } else {
    771             $this->overall_style .= $style;
    772         }
    773     }
    774 
    775     /**
    776      * Sets the overall classname for this block of code. This
    777      * class can then be used in a stylesheet to style this object's
    778      * output
    779      *
    780      * @param string The class name to use for this block of code
    781      * @since 1.0.0
    782      */
    783     function set_overall_class($class) {
    784         $this->overall_class = $class;
    785     }
    786 
    787     /**
    788      * Sets the overall id for this block of code. This id can then
    789      * be used in a stylesheet to style this object's output
    790      *
    791      * @param string The ID to use for this block of code
    792      * @since 1.0.0
    793      */
    794     function set_overall_id($id) {
    795         $this->overall_id = $id;
    796     }
    797 
    798     /**
    799      * Sets whether CSS classes should be used to highlight the source. Default
    800      * is off, calling this method with no arguments will turn it on
    801      *
    802      * @param boolean Whether to turn classes on or not
    803      * @since 1.0.0
    804      */
    805     function enable_classes($flag = true) {
    806         $this->use_classes = ($flag) ? true : false;
    807     }
    808 
    809     /**
    810      * Sets the style for the actual code. This should be a string
    811      * containing valid stylesheet declarations. If $preserve_defaults is
    812      * true, then styles are merged with the default styles, with the
    813      * user defined styles having priority
    814      *
    815      * Note: Use this method to override any style changes you made to
    816      * the line numbers if you are using line numbers, else the line of
    817      * code will have the same style as the line number! Consult the
    818      * GeSHi documentation for more information about this.
    819      *
    820      * @param string  The style to use for actual code
    821      * @param boolean Whether to merge the current styles with the new styles
    822      * @since 1.0.2
    823      */
    824     function set_code_style($style, $preserve_defaults = false) {
    825         if (!$preserve_defaults) {
    826             $this->code_style = $style;
    827         } else {
    828             $this->code_style .= $style;
    829         }
    830     }
    831 
    832     /**
    833      * Sets the styles for the line numbers.
    834      *
    835      * @param string The style for the line numbers that are "normal"
    836      * @param string|boolean If a string, this is the style of the line
    837      *        numbers that are "fancy", otherwise if boolean then this
    838      *        defines whether the normal styles should be merged with the
    839      *        new normal styles or not
    840      * @param boolean If set, is the flag for whether to merge the "fancy"
    841      *        styles with the current styles or not
    842      * @since 1.0.2
    843      */
    844     function set_line_style($style1, $style2 = '', $preserve_defaults = false) {
    845         //Check if we got 2 or three parameters
    846         if (is_bool($style2)) {
    847             $preserve_defaults = $style2;
    848             $style2 = '';
    849         }
    850 
    851         //Actually set the new styles
    852         if (!$preserve_defaults) {
    853             $this->line_style1 = $style1;
    854             $this->line_style2 = $style2;
    855         } else {
    856             $this->line_style1 .= $style1;
    857             $this->line_style2 .= $style2;
    858         }
    859     }
    860 
    861     /**
    862      * Sets whether line numbers should be displayed.
    863      *
    864      * Valid values for the first parameter are:
    865      *
    866      *  - GESHI_NO_LINE_NUMBERS: Line numbers will not be displayed
    867      *  - GESHI_NORMAL_LINE_NUMBERS: Line numbers will be displayed
    868      *  - GESHI_FANCY_LINE_NUMBERS: Fancy line numbers will be displayed
    869      *
    870      * For fancy line numbers, the second parameter is used to signal which lines
    871      * are to be fancy. For example, if the value of this parameter is 5 then every
    872      * 5th line will be fancy.
    873      *
    874      * @param int How line numbers should be displayed
    875      * @param int Defines which lines are fancy
    876      * @since 1.0.0
    877      */
    878     function enable_line_numbers($flag, $nth_row = 5) {
    879         if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag
    880             && GESHI_FANCY_LINE_NUMBERS != $flag) {
    881             $this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE;
    882         }
    883         $this->line_numbers = $flag;
    884         $this->line_nth_row = $nth_row;
    885     }
    886 
    887     /**
    888      * Sets wether spans and other HTML markup generated by GeSHi can
    889      * span over multiple lines or not. Defaults to true to reduce overhead.
    890      * Set it to false if you want to manipulate the output or manually display
    891      * the code in an ordered list.
    892      *
    893      * @param boolean Wether multiline spans are allowed or not
    894      * @since 1.0.7.22
    895      */
    896     function enable_multiline_span($flag) {
    897         $this->allow_multiline_span = (bool) $flag;
    898     }
    899 
    900     /**
    901      * Get current setting for multiline spans, see GeSHi->enable_multiline_span().
    902      *
    903      * @see enable_multiline_span
    904      * @return bool
    905      */
    906     function get_multiline_span() {
    907         return $this->allow_multiline_span;
    908     }
    909 
    910     /**
    911      * Sets the style for a keyword group. If $preserve_defaults is
    912      * true, then styles are merged with the default styles, with the
    913      * user defined styles having priority
    914      *
    915      * @param int     The key of the keyword group to change the styles of
    916      * @param string  The style to make the keywords
    917      * @param boolean Whether to merge the new styles with the old or just
    918      *                to overwrite them
    919      * @since 1.0.0
    920      */
    921     function set_keyword_group_style($key, $style, $preserve_defaults = false) {
    922         //Set the style for this keyword group
    923         if (!$preserve_defaults) {
    924             $this->language_data['STYLES']['KEYWORDS'][$key] = $style;
    925         } else {
    926             $this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
    927         }
    928 
    929         //Update the lexic permissions
    930         if (!isset($this->lexic_permissions['KEYWORDS'][$key])) {
    931             $this->lexic_permissions['KEYWORDS'][$key] = true;
    932         }
    933     }
    934 
    935     /**
    936      * Turns highlighting on/off for a keyword group
    937      *
    938      * @param int     The key of the keyword group to turn on or off
    939      * @param boolean Whether to turn highlighting for that group on or off
    940      * @since 1.0.0
    941      */
    942     function set_keyword_group_highlighting($key, $flag = true) {
    943         $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false;
    944     }
    945 
    946     /**
    947      * Sets the styles for comment groups.  If $preserve_defaults is
    948      * true, then styles are merged with the default styles, with the
    949      * user defined styles having priority
    950      *
    951      * @param int     The key of the comment group to change the styles of
    952      * @param string  The style to make the comments
    953      * @param boolean Whether to merge the new styles with the old or just
    954      *                to overwrite them
    955      * @since 1.0.0
    956      */
    957     function set_comments_style($key, $style, $preserve_defaults = false) {
    958         if (!$preserve_defaults) {
    959             $this->language_data['STYLES']['COMMENTS'][$key] = $style;
    960         } else {
    961             $this->language_data['STYLES']['COMMENTS'][$key] .= $style;
    962         }
    963     }
    964 
    965     /**
    966      * Turns highlighting on/off for comment groups
    967      *
    968      * @param int     The key of the comment group to turn on or off
    969      * @param boolean Whether to turn highlighting for that group on or off
    970      * @since 1.0.0
    971      */
    972     function set_comments_highlighting($key, $flag = true) {
    973         $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false;
    974     }
    975 
    976     /**
    977      * Sets the styles for escaped characters. If $preserve_defaults is
    978      * true, then styles are merged with the default styles, with the
    979      * user defined styles having priority
    980      *
    981      * @param string  The style to make the escape characters
    982      * @param boolean Whether to merge the new styles with the old or just
    983      *                to overwrite them
    984      * @since 1.0.0
    985      */
    986     function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) {
    987         if (!$preserve_defaults) {
    988             $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style;
    989         } else {
    990             $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style;
    991         }
    992     }
    993 
    994     /**
    995      * Turns highlighting on/off for escaped characters
    996      *
    997      * @param boolean Whether to turn highlighting for escape characters on or off
    998      * @since 1.0.0
    999      */
   1000     function set_escape_characters_highlighting($flag = true) {
   1001         $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false;
   1002     }
   1003 
   1004     /**
   1005      * Sets the styles for brackets. If $preserve_defaults is
   1006      * true, then styles are merged with the default styles, with the
   1007      * user defined styles having priority
   1008      *
   1009      * This method is DEPRECATED: use set_symbols_style instead.
   1010      * This method will be removed in 1.2.X
   1011      *
   1012      * @param string  The style to make the brackets
   1013      * @param boolean Whether to merge the new styles with the old or just
   1014      *                to overwrite them
   1015      * @since 1.0.0
   1016      * @deprecated In favour of set_symbols_style
   1017      */
   1018     function set_brackets_style($style, $preserve_defaults = false) {
   1019         if (!$preserve_defaults) {
   1020             $this->language_data['STYLES']['BRACKETS'][0] = $style;
   1021         } else {
   1022             $this->language_data['STYLES']['BRACKETS'][0] .= $style;
   1023         }
   1024     }
   1025 
   1026     /**
   1027      * Turns highlighting on/off for brackets
   1028      *
   1029      * This method is DEPRECATED: use set_symbols_highlighting instead.
   1030      * This method will be remove in 1.2.X
   1031      *
   1032      * @param boolean Whether to turn highlighting for brackets on or off
   1033      * @since 1.0.0
   1034      * @deprecated In favour of set_symbols_highlighting
   1035      */
   1036     function set_brackets_highlighting($flag) {
   1037         $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false;
   1038     }
   1039 
   1040     /**
   1041      * Sets the styles for symbols. If $preserve_defaults is
   1042      * true, then styles are merged with the default styles, with the
   1043      * user defined styles having priority
   1044      *
   1045      * @param string  The style to make the symbols
   1046      * @param boolean Whether to merge the new styles with the old or just
   1047      *                to overwrite them
   1048      * @param int     Tells the group of symbols for which style should be set.
   1049      * @since 1.0.1
   1050      */
   1051     function set_symbols_style($style, $preserve_defaults = false, $group = 0) {
   1052         // Update the style of symbols
   1053         if (!$preserve_defaults) {
   1054             $this->language_data['STYLES']['SYMBOLS'][$group] = $style;
   1055         } else {
   1056             $this->language_data['STYLES']['SYMBOLS'][$group] .= $style;
   1057         }
   1058 
   1059         // For backward compatibility
   1060         if (0 == $group) {
   1061             $this->set_brackets_style ($style, $preserve_defaults);
   1062         }
   1063     }
   1064 
   1065     /**
   1066      * Turns highlighting on/off for symbols
   1067      *
   1068      * @param boolean Whether to turn highlighting for symbols on or off
   1069      * @since 1.0.0
   1070      */
   1071     function set_symbols_highlighting($flag) {
   1072         // Update lexic permissions for this symbol group
   1073         $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false;
   1074 
   1075         // For backward compatibility
   1076         $this->set_brackets_highlighting ($flag);
   1077     }
   1078 
   1079     /**
   1080      * Sets the styles for strings. If $preserve_defaults is
   1081      * true, then styles are merged with the default styles, with the
   1082      * user defined styles having priority
   1083      *
   1084      * @param string  The style to make the escape characters
   1085      * @param boolean Whether to merge the new styles with the old or just
   1086      *                to overwrite them
   1087      * @since 1.0.0
   1088      */
   1089     function set_strings_style($style, $preserve_defaults = false) {
   1090         if (!$preserve_defaults) {
   1091             $this->language_data['STYLES']['STRINGS'][0] = $style;
   1092         } else {
   1093             $this->language_data['STYLES']['STRINGS'][0] .= $style;
   1094         }
   1095     }
   1096 
   1097     /**
   1098      * Turns highlighting on/off for strings
   1099      *
   1100      * @param boolean Whether to turn highlighting for strings on or off
   1101      * @since 1.0.0
   1102      */
   1103     function set_strings_highlighting($flag) {
   1104         $this->lexic_permissions['STRINGS'] = ($flag) ? true : false;
   1105     }
   1106 
   1107     /**
   1108      * Sets the styles for numbers. If $preserve_defaults is
   1109      * true, then styles are merged with the default styles, with the
   1110      * user defined styles having priority
   1111      *
   1112      * @param string  The style to make the numbers
   1113      * @param boolean Whether to merge the new styles with the old or just
   1114      *                to overwrite them
   1115      * @since 1.0.0
   1116      */
   1117     function set_numbers_style($style, $preserve_defaults = false) {
   1118         if (!$preserve_defaults) {
   1119             $this->language_data['STYLES']['NUMBERS'][0] = $style;
   1120         } else {
   1121             $this->language_data['STYLES']['NUMBERS'][0] .= $style;
   1122         }
   1123     }
   1124 
   1125     /**
   1126      * Turns highlighting on/off for numbers
   1127      *
   1128      * @param boolean Whether to turn highlighting for numbers on or off
   1129      * @since 1.0.0
   1130      */
   1131     function set_numbers_highlighting($flag) {
   1132         $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false;
   1133     }
   1134 
   1135     /**
   1136      * Sets the styles for methods. $key is a number that references the
   1137      * appropriate "object splitter" - see the language file for the language
   1138      * you are highlighting to get this number. If $preserve_defaults is
   1139      * true, then styles are merged with the default styles, with the
   1140      * user defined styles having priority
   1141      *
   1142      * @param int     The key of the object splitter to change the styles of
   1143      * @param string  The style to make the methods
   1144      * @param boolean Whether to merge the new styles with the old or just
   1145      *                to overwrite them
   1146      * @since 1.0.0
   1147      */
   1148     function set_methods_style($key, $style, $preserve_defaults = false) {
   1149         if (!$preserve_defaults) {
   1150             $this->language_data['STYLES']['METHODS'][$key] = $style;
   1151         } else {
   1152             $this->language_data['STYLES']['METHODS'][$key] .= $style;
   1153         }
   1154     }
   1155 
   1156     /**
   1157      * Turns highlighting on/off for methods
   1158      *
   1159      * @param boolean Whether to turn highlighting for methods on or off
   1160      * @since 1.0.0
   1161      */
   1162     function set_methods_highlighting($flag) {
   1163         $this->lexic_permissions['METHODS'] = ($flag) ? true : false;
   1164     }
   1165 
   1166     /**
   1167      * Sets the styles for regexps. If $preserve_defaults is
   1168      * true, then styles are merged with the default styles, with the
   1169      * user defined styles having priority
   1170      *
   1171      * @param string  The style to make the regular expression matches
   1172      * @param boolean Whether to merge the new styles with the old or just
   1173      *                to overwrite them
   1174      * @since 1.0.0
   1175      */
   1176     function set_regexps_style($key, $style, $preserve_defaults = false) {
   1177         if (!$preserve_defaults) {
   1178             $this->language_data['STYLES']['REGEXPS'][$key] = $style;
   1179         } else {
   1180             $this->language_data['STYLES']['REGEXPS'][$key] .= $style;
   1181         }
   1182     }
   1183 
   1184     /**
   1185      * Turns highlighting on/off for regexps
   1186      *
   1187      * @param int     The key of the regular expression group to turn on or off
   1188      * @param boolean Whether to turn highlighting for the regular expression group on or off
   1189      * @since 1.0.0
   1190      */
   1191     function set_regexps_highlighting($key, $flag) {
   1192         $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false;
   1193     }
   1194 
   1195     /**
   1196      * Sets whether a set of keywords are checked for in a case sensitive manner
   1197      *
   1198      * @param int The key of the keyword group to change the case sensitivity of
   1199      * @param boolean Whether to check in a case sensitive manner or not
   1200      * @since 1.0.0
   1201      */
   1202     function set_case_sensitivity($key, $case) {
   1203         $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false;
   1204     }
   1205 
   1206     /**
   1207      * Sets the case that keywords should use when found. Use the constants:
   1208      *
   1209      *  - GESHI_CAPS_NO_CHANGE: leave keywords as-is
   1210      *  - GESHI_CAPS_UPPER: convert all keywords to uppercase where found
   1211      *  - GESHI_CAPS_LOWER: convert all keywords to lowercase where found
   1212      *
   1213      * @param int A constant specifying what to do with matched keywords
   1214      * @since 1.0.1
   1215      */
   1216     function set_case_keywords($case) {
   1217         if (in_array($case, array(
   1218             GESHI_CAPS_NO_CHANGE, GESHI_CAPS_UPPER, GESHI_CAPS_LOWER))) {
   1219             $this->language_data['CASE_KEYWORDS'] = $case;
   1220         }
   1221     }
   1222 
   1223     /**
   1224      * Sets how many spaces a tab is substituted for
   1225      *
   1226      * Widths below zero are ignored
   1227      *
   1228      * @param int The tab width
   1229      * @since 1.0.0
   1230      */
   1231     function set_tab_width($width) {
   1232         $this->tab_width = intval($width);
   1233 
   1234         //Check if it fit's the constraints:
   1235         if ($this->tab_width < 1) {
   1236             //Return it to the default
   1237             $this->tab_width = 8;
   1238         }
   1239     }
   1240 
   1241     /**
   1242      * Sets whether or not to use tab-stop width specifed by language
   1243      *
   1244      * @param boolean Whether to use language-specific tab-stop widths
   1245      * @since 1.0.7.20
   1246      */
   1247     function set_use_language_tab_width($use) {
   1248         $this->use_language_tab_width = (bool) $use;
   1249     }
   1250 
   1251     /**
   1252      * Returns the tab width to use, based on the current language and user
   1253      * preference
   1254      *
   1255      * @return int Tab width
   1256      * @since 1.0.7.20
   1257      */
   1258     function get_real_tab_width() {
   1259         if (!$this->use_language_tab_width ||
   1260             !isset($this->language_data['TAB_WIDTH'])) {
   1261             return $this->tab_width;
   1262         } else {
   1263             return $this->language_data['TAB_WIDTH'];
   1264         }
   1265     }
   1266 
   1267     /**
   1268      * Enables/disables strict highlighting. Default is off, calling this
   1269      * method without parameters will turn it on. See documentation
   1270      * for more details on strict mode and where to use it.
   1271      *
   1272      * @param boolean Whether to enable strict mode or not
   1273      * @since 1.0.0
   1274      */
   1275     function enable_strict_mode($mode = true) {
   1276         if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) {
   1277             $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER;
   1278         }
   1279     }
   1280 
   1281     /**
   1282      * Disables all highlighting
   1283      *
   1284      * @since 1.0.0
   1285      * @todo  Rewrite with array traversal
   1286      * @deprecated In favour of enable_highlighting
   1287      */
   1288     function disable_highlighting() {
   1289         $this->enable_highlighting(false);
   1290     }
   1291 
   1292     /**
   1293      * Enables all highlighting
   1294      *
   1295      * The optional flag parameter was added in version 1.0.7.21 and can be used
   1296      * to enable (true) or disable (false) all highlighting.
   1297      *
   1298      * @since 1.0.0
   1299      * @param boolean A flag specifying whether to enable or disable all highlighting
   1300      * @todo  Rewrite with array traversal
   1301      */
   1302     function enable_highlighting($flag = true) {
   1303         $flag = $flag ? true : false;
   1304         foreach ($this->lexic_permissions as $key => $value) {
   1305             if (is_array($value)) {
   1306                 foreach ($value as $k => $v) {
   1307                     $this->lexic_permissions[$key][$k] = $flag;
   1308                 }
   1309             } else {
   1310                 $this->lexic_permissions[$key] = $flag;
   1311             }
   1312         }
   1313 
   1314         // Context blocks
   1315         $this->enable_important_blocks = $flag;
   1316     }
   1317 
   1318     /**
   1319      * Given a file extension, this method returns either a valid geshi language
   1320      * name, or the empty string if it couldn't be found
   1321      *
   1322      * @param string The extension to get a language name for
   1323      * @param array  A lookup array to use instead of the default one
   1324      * @since 1.0.5
   1325      * @todo Re-think about how this method works (maybe make it private and/or make it
   1326      *       a extension->lang lookup?)
   1327      * @todo static?
   1328      */
   1329     function get_language_name_from_extension( $extension, $lookup = array() ) {
   1330         if ( !is_array($lookup) || empty($lookup)) {
   1331             $lookup = array(
   1332                 'actionscript' => array('as'),
   1333                 'ada' => array('a', 'ada', 'adb', 'ads'),
   1334                 'apache' => array('conf'),
   1335                 'asm' => array('ash', 'asm', 'inc'),
   1336                 'asp' => array('asp'),
   1337                 'bash' => array('sh'),
   1338                 'bf' => array('bf'),
   1339                 'c' => array('c', 'h'),
   1340                 'c_mac' => array('c', 'h'),
   1341                 'caddcl' => array(),
   1342                 'cadlisp' => array(),
   1343                 'cdfg' => array('cdfg'),
   1344                 'cobol' => array('cbl'),
   1345                 'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'),
   1346                 'csharp' => array('cs'),
   1347                 'css' => array('css'),
   1348                 'd' => array('d'),
   1349                 'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
   1350                 'diff' => array('diff', 'patch'),
   1351                 'dos' => array('bat', 'cmd'),
   1352                 'gettext' => array('po', 'pot'),
   1353                 'gml' => array('gml'),
   1354                 'gnuplot' => array('plt'),
   1355                 'groovy' => array('groovy'),
   1356                 'haskell' => array('hs'),
   1357                 'html4strict' => array('html', 'htm'),
   1358                 'ini' => array('ini', 'desktop'),
   1359                 'java' => array('java'),
   1360                 'javascript' => array('js'),
   1361                 'klonec' => array('kl1'),
   1362                 'klonecpp' => array('klx'),
   1363                 'latex' => array('tex'),
   1364                 'lisp' => array('lisp'),
   1365                 'lua' => array('lua'),
   1366                 'matlab' => array('m'),
   1367                 'mpasm' => array(),
   1368                 'mysql' => array('sql'),
   1369                 'nsis' => array(),
   1370                 'objc' => array(),
   1371                 'oobas' => array(),
   1372                 'oracle8' => array(),
   1373                 'oracle10' => array(),
   1374                 'pascal' => array('pas'),
   1375                 'perl' => array('pl', 'pm'),
   1376                 'php' => array('php', 'php5', 'phtml', 'phps'),
   1377                 'povray' => array('pov'),
   1378                 'providex' => array('pvc', 'pvx'),
   1379                 'prolog' => array('pl'),
   1380                 'python' => array('py'),
   1381                 'qbasic' => array('bi'),
   1382                 'reg' => array('reg'),
   1383                 'ruby' => array('rb'),
   1384                 'sas' => array('sas'),
   1385                 'scala' => array('scala'),
   1386                 'scheme' => array('scm'),
   1387                 'scilab' => array('sci'),
   1388                 'smalltalk' => array('st'),
   1389                 'smarty' => array(),
   1390                 'tcl' => array('tcl'),
   1391                 'vb' => array('bas'),
   1392                 'vbnet' => array(),
   1393                 'visualfoxpro' => array(),
   1394                 'whitespace' => array('ws'),
   1395                 'xml' => array('xml', 'svg'),
   1396                 'z80' => array('z80', 'asm', 'inc')
   1397             );
   1398         }
   1399 
   1400         foreach ($lookup as $lang => $extensions) {
   1401             if (in_array($extension, $extensions)) {
   1402                 return $lang;
   1403             }
   1404         }
   1405         return '';
   1406     }
   1407 
   1408     /**
   1409      * Given a file name, this method loads its contents in, and attempts
   1410      * to set the language automatically. An optional lookup table can be
   1411      * passed for looking up the language name. If not specified a default
   1412      * table is used
   1413      *
   1414      * The language table is in the form
   1415      * <pre>array(
   1416      *   'lang_name' => array('extension', 'extension', ...),
   1417      *   'lang_name' ...
   1418      * );</pre>
   1419      *
   1420      * @param string The filename to load the source from
   1421      * @param array  A lookup array to use instead of the default one
   1422      * @todo Complete rethink of this and above method
   1423      * @since 1.0.5
   1424      */
   1425     function load_from_file($file_name, $lookup = array()) {
   1426         if (is_readable($file_name)) {
   1427             $this->set_source(file_get_contents($file_name));
   1428             $this->set_language($this->get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
   1429         } else {
   1430             $this->error = GESHI_ERROR_FILE_NOT_READABLE;
   1431         }
   1432     }
   1433 
   1434     /**
   1435      * Adds a keyword to a keyword group for highlighting
   1436      *
   1437      * @param int    The key of the keyword group to add the keyword to
   1438      * @param string The word to add to the keyword group
   1439      * @since 1.0.0
   1440      */
   1441     function add_keyword($key, $word) {
   1442         if (!in_array($word, $this->language_data['KEYWORDS'][$key])) {
   1443             $this->language_data['KEYWORDS'][$key][] = $word;
   1444 
   1445             //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it
   1446             if ($this->parse_cache_built) {
   1447                 $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1;
   1448                 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/');
   1449             }
   1450         }
   1451     }
   1452 
   1453     /**
   1454      * Removes a keyword from a keyword group
   1455      *
   1456      * @param int    The key of the keyword group to remove the keyword from
   1457      * @param string The word to remove from the keyword group
   1458      * @param bool   Wether to automatically recompile the optimized regexp list or not.
   1459      *               Note: if you set this to false and @see GeSHi->parse_code() was already called once,
   1460      *               for the current language, you have to manually call @see GeSHi->optimize_keyword_group()
   1461      *               or the removed keyword will stay in cache and still be highlighted! On the other hand
   1462      *               it might be too expensive to recompile the regexp list for every removal if you want to
   1463      *               remove a lot of keywords.
   1464      * @since 1.0.0
   1465      */
   1466     function remove_keyword($key, $word, $recompile = true) {
   1467         $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]);
   1468         if ($key_to_remove !== false) {
   1469             unset($this->language_data['KEYWORDS'][$key][$key_to_remove]);
   1470 
   1471             //NEW in 1.0.8, optionally recompile keyword group
   1472             if ($recompile && $this->parse_cache_built) {
   1473                 $this->optimize_keyword_group($key);
   1474             }
   1475         }
   1476     }
   1477 
   1478     /**
   1479      * Creates a new keyword group
   1480      *
   1481      * @param int    The key of the keyword group to create
   1482      * @param string The styles for the keyword group
   1483      * @param boolean Whether the keyword group is case sensitive ornot
   1484      * @param array  The words to use for the keyword group
   1485      * @since 1.0.0
   1486      */
   1487     function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) {
   1488         $words = (array) $words;
   1489         if  (empty($words)) {
   1490             // empty word lists mess up highlighting
   1491             return false;
   1492         }
   1493 
   1494         //Add the new keyword group internally
   1495         $this->language_data['KEYWORDS'][$key] = $words;
   1496         $this->lexic_permissions['KEYWORDS'][$key] = true;
   1497         $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive;
   1498         $this->language_data['STYLES']['KEYWORDS'][$key] = $styles;
   1499 
   1500         //NEW in 1.0.8, cache keyword regexp
   1501         if ($this->parse_cache_built) {
   1502             $this->optimize_keyword_group($key);
   1503         }
   1504     }
   1505 
   1506     /**
   1507      * Removes a keyword group
   1508      *
   1509      * @param int    The key of the keyword group to remove
   1510      * @since 1.0.0
   1511      */
   1512     function remove_keyword_group ($key) {
   1513         //Remove the keyword group internally
   1514         unset($this->language_data['KEYWORDS'][$key]);
   1515         unset($this->lexic_permissions['KEYWORDS'][$key]);
   1516         unset($this->language_data['CASE_SENSITIVE'][$key]);
   1517         unset($this->language_data['STYLES']['KEYWORDS'][$key]);
   1518 
   1519         //NEW in 1.0.8
   1520         unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]);
   1521     }
   1522 
   1523     /**
   1524      * compile optimized regexp list for keyword group
   1525      *
   1526      * @param int   The key of the keyword group to compile & optimize
   1527      * @since 1.0.8
   1528      */
   1529     function optimize_keyword_group($key) {
   1530         $this->language_data['CACHED_KEYWORD_LISTS'][$key] =
   1531             $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]);
   1532         $space_as_whitespace = false;
   1533         if(isset($this->language_data['PARSER_CONTROL'])) {
   1534             if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
   1535                 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) {
   1536                     $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'];
   1537                 }
   1538                 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
   1539                     if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
   1540                         $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'];
   1541                     }
   1542                 }
   1543             }
   1544         }
   1545         if($space_as_whitespace) {
   1546             foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) {
   1547                 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] =
   1548                     str_replace(" ", "\\s+", $rxv);
   1549             }
   1550         }
   1551     }
   1552 
   1553     /**
   1554      * Sets the content of the header block
   1555      *
   1556      * @param string The content of the header block
   1557      * @since 1.0.2
   1558      */
   1559     function set_header_content($content) {
   1560         $this->header_content = $content;
   1561     }
   1562 
   1563     /**
   1564      * Sets the content of the footer block
   1565      *
   1566      * @param string The content of the footer block
   1567      * @since 1.0.2
   1568      */
   1569     function set_footer_content($content) {
   1570         $this->footer_content = $content;
   1571     }
   1572 
   1573     /**
   1574      * Sets the style for the header content
   1575      *
   1576      * @param string The style for the header content
   1577      * @since 1.0.2
   1578      */
   1579     function set_header_content_style($style) {
   1580         $this->header_content_style = $style;
   1581     }
   1582 
   1583     /**
   1584      * Sets the style for the footer content
   1585      *
   1586      * @param string The style for the footer content
   1587      * @since 1.0.2
   1588      */
   1589     function set_footer_content_style($style) {
   1590         $this->footer_content_style = $style;
   1591     }
   1592 
   1593     /**
   1594      * Sets whether to force a surrounding block around
   1595      * the highlighted code or not
   1596      *
   1597      * @param boolean Tells whether to enable or disable this feature
   1598      * @since 1.0.7.20
   1599      */
   1600     function enable_inner_code_block($flag) {
   1601         $this->force_code_block = (bool)$flag;
   1602     }
   1603 
   1604     /**
   1605      * Sets the base URL to be used for keywords
   1606      *
   1607      * @param int The key of the keyword group to set the URL for
   1608      * @param string The URL to set for the group. If {FNAME} is in
   1609      *               the url somewhere, it is replaced by the keyword
   1610      *               that the URL is being made for
   1611      * @since 1.0.2
   1612      */
   1613     function set_url_for_keyword_group($group, $url) {
   1614         $this->language_data['URLS'][$group] = $url;
   1615     }
   1616 
   1617     /**
   1618      * Sets styles for links in code
   1619      *
   1620      * @param int A constant that specifies what state the style is being
   1621      *            set for - e.g. :hover or :visited
   1622      * @param string The styles to use for that state
   1623      * @since 1.0.2
   1624      */
   1625     function set_link_styles($type, $styles) {
   1626         $this->link_styles[$type] = $styles;
   1627     }
   1628 
   1629     /**
   1630      * Sets the target for links in code
   1631      *
   1632      * @param string The target for links in the code, e.g. _blank
   1633      * @since 1.0.3
   1634      */
   1635     function set_link_target($target) {
   1636         if (!$target) {
   1637             $this->link_target = '';
   1638         } else {
   1639             $this->link_target = ' target="' . $target . '"';
   1640         }
   1641     }
   1642 
   1643     /**
   1644      * Sets styles for important parts of the code
   1645      *
   1646      * @param string The styles to use on important parts of the code
   1647      * @since 1.0.2
   1648      */
   1649     function set_important_styles($styles) {
   1650         $this->important_styles = $styles;
   1651     }
   1652 
   1653     /**
   1654      * Sets whether context-important blocks are highlighted
   1655      *
   1656      * @param boolean Tells whether to enable or disable highlighting of important blocks
   1657      * @todo REMOVE THIS SHIZ FROM GESHI!
   1658      * @deprecated
   1659      * @since 1.0.2
   1660      */
   1661     function enable_important_blocks($flag) {
   1662         $this->enable_important_blocks = ( $flag ) ? true : false;
   1663     }
   1664 
   1665     /**
   1666      * Whether CSS IDs should be added to each line
   1667      *
   1668      * @param boolean If true, IDs will be added to each line.
   1669      * @since 1.0.2
   1670      */
   1671     function enable_ids($flag = true) {
   1672         $this->add_ids = ($flag) ? true : false;
   1673     }
   1674 
   1675     /**
   1676      * Specifies which lines to highlight extra
   1677      *
   1678      * The extra style parameter was added in 1.0.7.21.
   1679      *
   1680      * @param mixed An array of line numbers to highlight, or just a line
   1681      *              number on its own.
   1682      * @param string A string specifying the style to use for this line.
   1683      *              If null is specified, the default style is used.
   1684      *              If false is specified, the line will be removed from
   1685      *              special highlighting
   1686      * @since 1.0.2
   1687      * @todo  Some data replication here that could be cut down on
   1688      */
   1689     function highlight_lines_extra($lines, $style = null) {
   1690         if (is_array($lines)) {
   1691             //Split up the job using single lines at a time
   1692             foreach ($lines as $line) {
   1693                 $this->highlight_lines_extra($line, $style);
   1694             }
   1695         } else {
   1696             //Mark the line as being highlighted specially
   1697             $lines = intval($lines);
   1698             $this->highlight_extra_lines[$lines] = $lines;
   1699 
   1700             //Decide on which style to use
   1701             if ($style === null) { //Check if we should use default style
   1702                 unset($this->highlight_extra_lines_styles[$lines]);
   1703             } else if ($style === false) { //Check if to remove this line
   1704                 unset($this->highlight_extra_lines[$lines]);
   1705                 unset($this->highlight_extra_lines_styles[$lines]);
   1706             } else {
   1707                 $this->highlight_extra_lines_styles[$lines] = $style;
   1708             }
   1709         }
   1710     }
   1711 
   1712     /**
   1713      * Sets the style for extra-highlighted lines
   1714      *
   1715      * @param string The style for extra-highlighted lines
   1716      * @since 1.0.2
   1717      */
   1718     function set_highlight_lines_extra_style($styles) {
   1719         $this->highlight_extra_lines_style = $styles;
   1720     }
   1721 
   1722     /**
   1723      * Sets the line-ending
   1724      *
   1725      * @param string The new line-ending
   1726      * @since 1.0.2
   1727      */
   1728     function set_line_ending($line_ending) {
   1729         $this->line_ending = (string)$line_ending;
   1730     }
   1731 
   1732     /**
   1733      * Sets what number line numbers should start at. Should
   1734      * be a positive integer, and will be converted to one.
   1735      *
   1736      * <b>Warning:</b> Using this method will add the "start"
   1737      * attribute to the &lt;ol&gt; that is used for line numbering.
   1738      * This is <b>not</b> valid XHTML strict, so if that's what you
   1739      * care about then don't use this method. Firefox is getting
   1740      * support for the CSS method of doing this in 1.1 and Opera
   1741      * has support for the CSS method, but (of course) IE doesn't
   1742      * so it's not worth doing it the CSS way yet.
   1743      *
   1744      * @param int The number to start line numbers at
   1745      * @since 1.0.2
   1746      */
   1747     function start_line_numbers_at($number) {
   1748         $this->line_numbers_start = abs(intval($number));
   1749     }
   1750 
   1751     /**
   1752      * Sets the encoding used for htmlspecialchars(), for international
   1753      * support.
   1754      *
   1755      * NOTE: This is not needed for now because htmlspecialchars() is not
   1756      * being used (it has a security hole in PHP4 that has not been patched).
   1757      * Maybe in a future version it may make a return for speed reasons, but
   1758      * I doubt it.
   1759      *
   1760      * @param string The encoding to use for the source
   1761      * @since 1.0.3
   1762      */
   1763     function set_encoding($encoding) {
   1764         if ($encoding) {
   1765           $this->encoding = strtolower($encoding);
   1766         }
   1767     }
   1768 
   1769     /**
   1770      * Turns linking of keywords on or off.
   1771      *
   1772      * @param boolean If true, links will be added to keywords
   1773      * @since 1.0.2
   1774      */
   1775     function enable_keyword_links($enable = true) {
   1776         $this->keyword_links = (bool) $enable;
   1777     }
   1778 
   1779     /**
   1780      * Setup caches needed for styling. This is automatically called in
   1781      * parse_code() and get_stylesheet() when appropriate. This function helps
   1782      * stylesheet generators as they rely on some style information being
   1783      * preprocessed
   1784      *
   1785      * @since 1.0.8
   1786      * @access private
   1787      */
   1788     function build_style_cache() {
   1789         //Build the style cache needed to highlight numbers appropriate
   1790         if($this->lexic_permissions['NUMBERS']) {
   1791             //First check what way highlighting information for numbers are given
   1792             if(!isset($this->language_data['NUMBERS'])) {
   1793                 $this->language_data['NUMBERS'] = 0;
   1794             }
   1795 
   1796             if(is_array($this->language_data['NUMBERS'])) {
   1797                 $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS'];
   1798             } else {
   1799                 $this->language_data['NUMBERS_CACHE'] = array();
   1800                 if(!$this->language_data['NUMBERS']) {
   1801                     $this->language_data['NUMBERS'] =
   1802                         GESHI_NUMBER_INT_BASIC |
   1803                         GESHI_NUMBER_FLT_NONSCI;
   1804                 }
   1805 
   1806                 for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) {
   1807                     //Rearrange style indices if required ...
   1808                     if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) {
   1809                         $this->language_data['STYLES']['NUMBERS'][$i] =
   1810                             $this->language_data['STYLES']['NUMBERS'][1<<$i];
   1811                         unset($this->language_data['STYLES']['NUMBERS'][1<<$i]);
   1812                     }
   1813 
   1814                     //Check if this bit is set for highlighting
   1815                     if($j&1) {
   1816                         //So this bit is set ...
   1817                         //Check if it belongs to group 0 or the actual stylegroup
   1818                         if(isset($this->language_data['STYLES']['NUMBERS'][$i])) {
   1819                             $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i;
   1820                         } else {
   1821                             if(!isset($this->language_data['NUMBERS_CACHE'][0])) {
   1822                                 $this->language_data['NUMBERS_CACHE'][0] = 0;
   1823                             }
   1824                             $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i;
   1825                         }
   1826                     }
   1827                 }
   1828             }
   1829         }
   1830     }
   1831 
   1832     /**
   1833      * Setup caches needed for parsing. This is automatically called in parse_code() when appropriate.
   1834      * This function makes stylesheet generators much faster as they do not need these caches.
   1835      *
   1836      * @since 1.0.8
   1837      * @access private
   1838      */
   1839     function build_parse_cache() {
   1840         // cache symbol regexp
   1841         //As this is a costy operation, we avoid doing it for multiple groups ...
   1842         //Instead we perform it for all symbols at once.
   1843         //
   1844         //For this to work, we need to reorganize the data arrays.
   1845         if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
   1846             $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1;
   1847 
   1848             $this->language_data['SYMBOL_DATA'] = array();
   1849             $symbol_preg_multi = array(); // multi char symbols
   1850             $symbol_preg_single = array(); // single char symbols
   1851             foreach ($this->language_data['SYMBOLS'] as $key => $symbols) {
   1852                 if (is_array($symbols)) {
   1853                     foreach ($symbols as $sym) {
   1854                         $sym = $this->hsc($sym);
   1855                         if (!isset($this->language_data['SYMBOL_DATA'][$sym])) {
   1856                             $this->language_data['SYMBOL_DATA'][$sym] = $key;
   1857                             if (isset($sym[1])) { // multiple chars
   1858                                 $symbol_preg_multi[] = preg_quote($sym, '/');
   1859                             } else { // single char
   1860                                 if ($sym == '-') {
   1861                                     // don't trigger range out of order error
   1862                                     $symbol_preg_single[] = '\-';
   1863                                 } else {
   1864                                     $symbol_preg_single[] = preg_quote($sym, '/');
   1865                                 }
   1866                             }
   1867                         }
   1868                     }
   1869                 } else {
   1870                     $symbols = $this->hsc($symbols);
   1871                     if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) {
   1872                         $this->language_data['SYMBOL_DATA'][$symbols] = 0;
   1873                         if (isset($symbols[1])) { // multiple chars
   1874                             $symbol_preg_multi[] = preg_quote($symbols, '/');
   1875                         } else if ($symbols == '-') {
   1876                             // don't trigger range out of order error
   1877                             $symbol_preg_single[] = '\-';
   1878                         } else { // single char
   1879                             $symbol_preg_single[] = preg_quote($symbols, '/');
   1880                         }
   1881                     }
   1882                 }
   1883             }
   1884 
   1885             //Now we have an array with each possible symbol as the key and the style as the actual data.
   1886             //This way we can set the correct style just the moment we highlight ...
   1887             //
   1888             //Now we need to rewrite our array to get a search string that
   1889             $symbol_preg = array();
   1890             if (!empty($symbol_preg_multi)) {
   1891                 rsort($symbol_preg_multi);
   1892                 $symbol_preg[] = implode('|', $symbol_preg_multi);
   1893             }
   1894             if (!empty($symbol_preg_single)) {
   1895                 rsort($symbol_preg_single);
   1896                 $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']';
   1897             }
   1898             $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg);
   1899         }
   1900 
   1901         // cache optimized regexp for keyword matching
   1902         // remove old cache
   1903         $this->language_data['CACHED_KEYWORD_LISTS'] = array();
   1904         foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
   1905             if (!isset($this->lexic_permissions['KEYWORDS'][$key]) ||
   1906                     $this->lexic_permissions['KEYWORDS'][$key]) {
   1907                 $this->optimize_keyword_group($key);
   1908             }
   1909         }
   1910 
   1911         // brackets
   1912         if ($this->lexic_permissions['BRACKETS']) {
   1913             $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}');
   1914             if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) {
   1915                 $this->language_data['CACHE_BRACKET_REPLACE'] = array(
   1916                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#91;|>',
   1917                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#93;|>',
   1918                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#40;|>',
   1919                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#41;|>',
   1920                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#123;|>',
   1921                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#125;|>',
   1922                 );
   1923             }
   1924             else {
   1925                 $this->language_data['CACHE_BRACKET_REPLACE'] = array(
   1926                     '<| class="br0">&#91;|>',
   1927                     '<| class="br0">&#93;|>',
   1928                     '<| class="br0">&#40;|>',
   1929                     '<| class="br0">&#41;|>',
   1930                     '<| class="br0">&#123;|>',
   1931                     '<| class="br0">&#125;|>',
   1932                 );
   1933             }
   1934         }
   1935 
   1936         //Build the parse cache needed to highlight numbers appropriate
   1937         if($this->lexic_permissions['NUMBERS']) {
   1938             //Check if the style rearrangements have been processed ...
   1939             //This also does some preprocessing to check which style groups are useable ...
   1940             if(!isset($this->language_data['NUMBERS_CACHE'])) {
   1941                 $this->build_style_cache();
   1942             }
   1943 
   1944             //Number format specification
   1945             //All this formats are matched case-insensitively!
   1946             static $numbers_format = array(
   1947                 GESHI_NUMBER_INT_BASIC =>
   1948                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z\.])',
   1949                 GESHI_NUMBER_INT_CSTYLE =>
   1950                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z\.])',
   1951                 GESHI_NUMBER_BIN_SUFFIX =>
   1952                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?b(?![0-9a-z\.])',
   1953                 GESHI_NUMBER_BIN_PREFIX_PERCENT =>
   1954                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z\.])',
   1955                 GESHI_NUMBER_BIN_PREFIX_0B =>
   1956                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z\.])',
   1957                 GESHI_NUMBER_OCT_PREFIX =>
   1958                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z\.])',
   1959                 GESHI_NUMBER_OCT_SUFFIX =>
   1960                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z\.])',
   1961                 GESHI_NUMBER_HEX_PREFIX =>
   1962                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-f]+?(?![0-9a-z\.])',
   1963                 GESHI_NUMBER_HEX_SUFFIX =>
   1964                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-f]*?h(?![0-9a-z\.])',
   1965                 GESHI_NUMBER_FLT_NONSCI =>
   1966                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z\.])',
   1967                 GESHI_NUMBER_FLT_NONSCI_F =>
   1968                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z\.])',
   1969                 GESHI_NUMBER_FLT_SCI_SHORT =>
   1970                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z\.])',
   1971                 GESHI_NUMBER_FLT_SCI_ZERO =>
   1972                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z\.])'
   1973                 );
   1974 
   1975             //At this step we have an associative array with flag groups for a
   1976             //specific style or an string denoting a regexp given its index.
   1977             $this->language_data['NUMBERS_RXCACHE'] = array();
   1978             foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) {
   1979                 if(is_string($rxdata)) {
   1980                     $regexp = $rxdata;
   1981                 } else {
   1982                     //This is a bitfield of number flags to highlight:
   1983                     //Build an array, implode them together and make this the actual RX
   1984                     $rxuse = array();
   1985                     for($i = 1; $i <= $rxdata; $i<<=1) {
   1986                         if($rxdata & $i) {
   1987                             $rxuse[] = $numbers_format[$i];
   1988                         }
   1989                     }
   1990                     $regexp = implode("|", $rxuse);
   1991                 }
   1992 
   1993                 $this->language_data['NUMBERS_RXCACHE'][$key] =
   1994                     "/(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!\|>)/i";
   1995             }
   1996         }
   1997 
   1998         $this->parse_cache_built = true;
   1999     }
   2000 
   2001     /**
   2002      * Returns the code in $this->source, highlighted and surrounded by the
   2003      * nessecary HTML.
   2004      *
   2005      * This should only be called ONCE, cos it's SLOW! If you want to highlight
   2006      * the same source multiple times, you're better off doing a whole lot of
   2007      * str_replaces to replace the &lt;span&gt;s
   2008      *
   2009      * @since 1.0.0
   2010      */
   2011     function parse_code () {
   2012         // Start the timer
   2013         $start_time = microtime();
   2014 
   2015         // Firstly, if there is an error, we won't highlight
   2016         if ($this->error) {
   2017             //Escape the source for output
   2018             $result = $this->hsc($this->source);
   2019 
   2020             //This fix is related to SF#1923020, but has to be applied regardless of
   2021             //actually highlighting symbols.
   2022             $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result);
   2023 
   2024             // Timing is irrelevant
   2025             $this->set_time($start_time, $start_time);
   2026             $this->finalise($result);
   2027             return $result;
   2028         }
   2029 
   2030         // make sure the parse cache is up2date
   2031         if (!$this->parse_cache_built) {
   2032             $this->build_parse_cache();
   2033         }
   2034 
   2035         // Replace all newlines to a common form.
   2036         $code = str_replace("\r\n", "\n", $this->source);
   2037         $code = str_replace("\r", "\n", $code);
   2038 
   2039         // Add spaces for regular expression matching and line numbers
   2040 //        $code = "\n" . $code . "\n";
   2041 
   2042         // Initialise various stuff
   2043         $length           = strlen($code);
   2044         $COMMENT_MATCHED  = false;
   2045         $stuff_to_parse   = '';
   2046         $endresult        = '';
   2047 
   2048         // "Important" selections are handled like multiline comments
   2049         // @todo GET RID OF THIS SHIZ
   2050         if ($this->enable_important_blocks) {
   2051             $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT;
   2052         }
   2053 
   2054         if ($this->strict_mode) {
   2055             // Break the source into bits. Each bit will be a portion of the code
   2056             // within script delimiters - for example, HTML between < and >
   2057             $k = 0;
   2058             $parts = array();
   2059             $matches = array();
   2060             $next_match_pointer = null;
   2061             // we use a copy to unset delimiters on demand (when they are not found)
   2062             $delim_copy = $this->language_data['SCRIPT_DELIMITERS'];
   2063             $i = 0;
   2064             while ($i < $length) {
   2065                 $next_match_pos = $length + 1; // never true
   2066                 foreach ($delim_copy as $dk => $delimiters) {
   2067                     if(is_array($delimiters)) {
   2068                         foreach ($delimiters as $open => $close) {
   2069                             // make sure the cache is setup properly
   2070                             if (!isset($matches[$dk][$open])) {
   2071                                 $matches[$dk][$open] = array(
   2072                                     'next_match' => -1,
   2073                                     'dk' => $dk,
   2074 
   2075                                     'open' => $open, // needed for grouping of adjacent code blocks (see below)
   2076                                     'open_strlen' => strlen($open),
   2077 
   2078                                     'close' => $close,
   2079                                     'close_strlen' => strlen($close),
   2080                                 );
   2081                             }
   2082                             // Get the next little bit for this opening string
   2083                             if ($matches[$dk][$open]['next_match'] < $i) {
   2084                                 // only find the next pos if it was not already cached
   2085                                 $open_pos = strpos($code, $open, $i);
   2086                                 if ($open_pos === false) {
   2087                                     // no match for this delimiter ever
   2088                                     unset($delim_copy[$dk][$open]);
   2089                                     continue;
   2090                                 }
   2091                                 $matches[$dk][$open]['next_match'] = $open_pos;
   2092                             }
   2093                             if ($matches[$dk][$open]['next_match'] < $next_match_pos) {
   2094                                 //So we got a new match, update the close_pos
   2095                                 $matches[$dk][$open]['close_pos'] =
   2096                                     strpos($code, $close, $matches[$dk][$open]['next_match']+1);
   2097 
   2098                                 $next_match_pointer =& $matches[$dk][$open];
   2099                                 $next_match_pos = $matches[$dk][$open]['next_match'];
   2100                             }
   2101                         }
   2102                     } else {
   2103                         //So we should match an RegExp as Strict Block ...
   2104                         /**
   2105                          * The value in $delimiters is expected to be an RegExp
   2106                          * containing exactly 2 matching groups:
   2107                          *  - Group 1 is the opener
   2108                          *  - Group 2 is the closer
   2109                          */
   2110                         if(!GESHI_PHP_PRE_433 && //Needs proper rewrite to work with PHP >=4.3.0; 4.3.3 is guaranteed to work.
   2111                             preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) {
   2112                             //We got a match ...
   2113                             $matches[$dk] = array(
   2114                                 'next_match' => $matches_rx[1][1],
   2115                                 'dk' => $dk,
   2116 
   2117                                 'close_strlen' => strlen($matches_rx[2][0]),
   2118                                 'close_pos' => $matches_rx[2][1],
   2119                                 );
   2120                         } else {
   2121                             // no match for this delimiter ever
   2122                             unset($delim_copy[$dk]);
   2123                             continue;
   2124                         }
   2125 
   2126                         if ($matches[$dk]['next_match'] <= $next_match_pos) {
   2127                             $next_match_pointer =& $matches[$dk];
   2128                             $next_match_pos = $matches[$dk]['next_match'];
   2129                         }
   2130                     }
   2131                 }
   2132                 // non-highlightable text
   2133                 $parts[$k] = array(
   2134                     1 => substr($code, $i, $next_match_pos - $i)
   2135                 );
   2136                 ++$k;
   2137 
   2138                 if ($next_match_pos > $length) {
   2139                     // out of bounds means no next match was found
   2140                     break;
   2141                 }
   2142 
   2143                 // highlightable code
   2144                 $parts[$k][0] = $next_match_pointer['dk'];
   2145 
   2146                 //Only combine for non-rx script blocks
   2147                 if(is_array($delim_copy[$next_match_pointer['dk']])) {
   2148                     // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three!
   2149                     $i = $next_match_pos + $next_match_pointer['open_strlen'];
   2150                     while (true) {
   2151                         $close_pos = strpos($code, $next_match_pointer['close'], $i);
   2152                         if ($close_pos == false) {
   2153                             break;
   2154                         }
   2155                         $i = $close_pos + $next_match_pointer['close_strlen'];
   2156                         if ($i == $length) {
   2157                             break;
   2158                         }
   2159                         if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 ||
   2160                             substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) {
   2161                             // merge adjacent but make sure we don't merge things like <tag><!-- comment -->
   2162                             foreach ($matches as $submatches) {
   2163                                 foreach ($submatches as $match) {
   2164                                     if ($match['next_match'] == $i) {
   2165                                         // a different block already matches here!
   2166                                         break 3;
   2167                                     }
   2168                                 }
   2169                             }
   2170                         } else {
   2171                             break;
   2172                         }
   2173                     }
   2174                 } else {
   2175                     $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen'];
   2176                     $i = $close_pos;
   2177                 }
   2178 
   2179                 if ($close_pos === false) {
   2180                     // no closing delimiter found!
   2181                     $parts[$k][1] = substr($code, $next_match_pos);
   2182                     ++$k;
   2183                     break;
   2184                 } else {
   2185                     $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos);
   2186                     ++$k;
   2187                 }
   2188             }
   2189             unset($delim_copy, $next_match_pointer, $next_match_pos, $matches);
   2190             $num_parts = $k;
   2191 
   2192             if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) {
   2193                 // when we have only one part, we don't have anything to highlight at all.
   2194                 // if we have a "maybe" strict language, this should be handled as highlightable code
   2195                 $parts = array(
   2196                     0 => array(
   2197                         0 => '',
   2198                         1 => ''
   2199                     ),
   2200                     1 => array(
   2201                         0 => null,
   2202                         1 => $parts[0][1]
   2203                     )
   2204                 );
   2205                 $num_parts = 2;
   2206             }
   2207 
   2208         } else {
   2209             // Not strict mode - simply dump the source into
   2210             // the array at index 1 (the first highlightable block)
   2211             $parts = array(
   2212                 0 => array(
   2213                     0 => '',
   2214                     1 => ''
   2215                 ),
   2216                 1 => array(
   2217                     0 => null,
   2218                     1 => $code
   2219                 )
   2220             );
   2221             $num_parts = 2;
   2222         }
   2223 
   2224         //Unset variables we won't need any longer
   2225         unset($code);
   2226 
   2227         //Preload some repeatedly used values regarding hardquotes ...
   2228         $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
   2229         $hq_strlen = strlen($hq);
   2230 
   2231         //Preload if line numbers are to be generated afterwards
   2232         //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398
   2233         $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS ||
   2234             !empty($this->highlight_extra_lines) || !$this->allow_multiline_span;
   2235 
   2236         //preload the escape char for faster checking ...
   2237         $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']);
   2238 
   2239         // this is used for single-line comments
   2240         $sc_disallowed_before = "";
   2241         $sc_disallowed_after = "";
   2242 
   2243         if (isset($this->language_data['PARSER_CONTROL'])) {
   2244             if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
   2245                 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
   2246                     $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
   2247                 }
   2248                 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
   2249                     $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
   2250                 }
   2251             }
   2252         }
   2253 
   2254         //Fix for SF#1932083: Multichar Quotemarks unsupported
   2255         $is_string_starter = array();
   2256         if ($this->lexic_permissions['STRINGS']) {
   2257             foreach ($this->language_data['QUOTEMARKS'] as $quotemark) {
   2258                 if (!isset($is_string_starter[$quotemark[0]])) {
   2259                     $is_string_starter[$quotemark[0]] = (string)$quotemark;
   2260                 } else if (is_string($is_string_starter[$quotemark[0]])) {
   2261                     $is_string_starter[$quotemark[0]] = array(
   2262                         $is_string_starter[$quotemark[0]],
   2263                         $quotemark);
   2264                 } else {
   2265                     $is_string_starter[$quotemark[0]][] = $quotemark;
   2266                 }
   2267             }
   2268         }
   2269 
   2270         // Now we go through each part. We know that even-indexed parts are
   2271         // code that shouldn't be highlighted, and odd-indexed parts should
   2272         // be highlighted
   2273         for ($key = 0; $key < $num_parts; ++$key) {
   2274             $STRICTATTRS = '';
   2275 
   2276             // If this block should be highlighted...
   2277             if (!($key & 1)) {
   2278                 // Else not a block to highlight
   2279                 $endresult .= $this->hsc($parts[$key][1]);
   2280                 unset($parts[$key]);
   2281                 continue;
   2282             }
   2283 
   2284             $result = '';
   2285             $part = $parts[$key][1];
   2286 
   2287             $highlight_part = true;
   2288             if ($this->strict_mode && !is_null($parts[$key][0])) {
   2289                 // get the class key for this block of code
   2290                 $script_key = $parts[$key][0];
   2291                 $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key];
   2292                 if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
   2293                     $this->lexic_permissions['SCRIPT']) {
   2294                     // Add a span element around the source to
   2295                     // highlight the overall source block
   2296                     if (!$this->use_classes &&
   2297                         $this->language_data['STYLES']['SCRIPT'][$script_key] != '') {
   2298                         $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"';
   2299                     } else {
   2300                         $attributes = ' class="sc' . $script_key . '"';
   2301                     }
   2302                     $result .= "<span$attributes>";
   2303                     $STRICTATTRS = $attributes;
   2304                 }
   2305             }
   2306 
   2307             if ($highlight_part) {
   2308                 // Now, highlight the code in this block. This code
   2309                 // is really the engine of GeSHi (along with the method
   2310                 // parse_non_string_part).
   2311 
   2312                 // cache comment regexps incrementally
   2313                 $next_comment_regexp_key = '';
   2314                 $next_comment_regexp_pos = -1;
   2315                 $next_comment_multi_pos = -1;
   2316                 $next_comment_single_pos = -1;
   2317                 $comment_regexp_cache_per_key = array();
   2318                 $comment_multi_cache_per_key = array();
   2319                 $comment_single_cache_per_key = array();
   2320                 $next_open_comment_multi = '';
   2321                 $next_comment_single_key = '';
   2322                 $escape_regexp_cache_per_key = array();
   2323                 $next_escape_regexp_key = '';
   2324                 $next_escape_regexp_pos = -1;
   2325 
   2326                 $length = strlen($part);
   2327                 for ($i = 0; $i < $length; ++$i) {
   2328                     // Get the next char
   2329                     $char = $part[$i];
   2330                     $char_len = 1;
   2331 
   2332                     // update regexp comment cache if needed
   2333                     if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
   2334                         $next_comment_regexp_pos = $length;
   2335                         foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
   2336                             $match_i = false;
   2337                             if (isset($comment_regexp_cache_per_key[$comment_key]) &&
   2338                                 ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i ||
   2339                                  $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) {
   2340                                 // we have already matched something
   2341                                 if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) {
   2342                                     // this comment is never matched
   2343                                     continue;
   2344                                 }
   2345                                 $match_i = $comment_regexp_cache_per_key[$comment_key]['pos'];
   2346                             } else if (
   2347                                 //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
   2348                                 (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $i), $match, PREG_OFFSET_CAPTURE)) ||
   2349                                 (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i))
   2350                                 ) {
   2351                                 $match_i = $match[0][1];
   2352                                 if (GESHI_PHP_PRE_433) {
   2353                                     $match_i += $i;
   2354                                 }
   2355 
   2356                                 $comment_regexp_cache_per_key[$comment_key] = array(
   2357                                     'key' => $comment_key,
   2358                                     'length' => strlen($match[0][0]),
   2359                                     'pos' => $match_i
   2360                                 );
   2361                             } else {
   2362                                 $comment_regexp_cache_per_key[$comment_key]['pos'] = false;
   2363                                 continue;
   2364                             }
   2365 
   2366                             if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
   2367                                 $next_comment_regexp_pos = $match_i;
   2368                                 $next_comment_regexp_key = $comment_key;
   2369                                 if ($match_i === $i) {
   2370                                     break;
   2371                                 }
   2372                             }
   2373                         }
   2374                     }
   2375 
   2376                     $string_started = false;
   2377 
   2378                     if (isset($is_string_starter[$char])) {
   2379                         // Possibly the start of a new string ...
   2380 
   2381                         //Check which starter it was ...
   2382                         //Fix for SF#1932083: Multichar Quotemarks unsupported
   2383                         if (is_array($is_string_starter[$char])) {
   2384                             $char_new = '';
   2385                             foreach ($is_string_starter[$char] as $testchar) {
   2386                                 if ($testchar === substr($part, $i, strlen($testchar)) &&
   2387                                     strlen($testchar) > strlen($char_new)) {
   2388                                     $char_new = $testchar;
   2389                                     $string_started = true;
   2390                                 }
   2391                             }
   2392                             if ($string_started) {
   2393                                 $char = $char_new;
   2394                             }
   2395                         } else {
   2396                             $testchar = $is_string_starter[$char];
   2397                             if ($testchar === substr($part, $i, strlen($testchar))) {
   2398                                 $char = $testchar;
   2399                                 $string_started = true;
   2400                             }
   2401                         }
   2402                         $char_len = strlen($char);
   2403                     }
   2404 
   2405                     if ($string_started && $i != $next_comment_regexp_pos) {
   2406                         // Hand out the correct style information for this string
   2407                         $string_key = array_search($char, $this->language_data['QUOTEMARKS']);
   2408                         if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) ||
   2409                             !isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) {
   2410                             $string_key = 0;
   2411                         }
   2412 
   2413                         // parse the stuff before this
   2414                         $result .= $this->parse_non_string_part($stuff_to_parse);
   2415                         $stuff_to_parse = '';
   2416 
   2417                         if (!$this->use_classes) {
   2418                             $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"';
   2419                         } else {
   2420                             $string_attributes = ' class="st'.$string_key.'"';
   2421                         }
   2422 
   2423                         // now handle the string
   2424                         $string = "<span$string_attributes>" . GeSHi::hsc($char);
   2425                         $start = $i + $char_len;
   2426                         $string_open = true;
   2427 
   2428                         if(empty($this->language_data['ESCAPE_REGEXP'])) {
   2429                             $next_escape_regexp_pos = $length;
   2430                         }
   2431 
   2432                         do {
   2433                             //Get the regular ending pos ...
   2434                             $close_pos = strpos($part, $char, $start);
   2435                             if(false === $close_pos) {
   2436                                 $close_pos = $length;
   2437                             }
   2438 
   2439                             if($this->lexic_permissions['ESCAPE_CHAR']) {
   2440                                 // update escape regexp cache if needed
   2441                                 if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) {
   2442                                     $next_escape_regexp_pos = $length;
   2443                                     foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) {
   2444                                         $match_i = false;
   2445                                         if (isset($escape_regexp_cache_per_key[$escape_key]) &&
   2446                                             ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start ||
   2447                                              $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) {
   2448                                             // we have already matched something
   2449                                             if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) {
   2450                                                 // this comment is never matched
   2451                                                 continue;
   2452                                             }
   2453                                             $match_i = $escape_regexp_cache_per_key[$escape_key]['pos'];
   2454                                         } else if (
   2455                                             //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
   2456                                             (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $start), $match, PREG_OFFSET_CAPTURE)) ||
   2457                                             (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start))
   2458                                             ) {
   2459                                             $match_i = $match[0][1];
   2460                                             if (GESHI_PHP_PRE_433) {
   2461                                                 $match_i += $start;
   2462                                             }
   2463 
   2464                                             $escape_regexp_cache_per_key[$escape_key] = array(
   2465                                                 'key' => $escape_key,
   2466                                                 'length' => strlen($match[0][0]),
   2467                                                 'pos' => $match_i
   2468                                             );
   2469                                         } else {
   2470                                             $escape_regexp_cache_per_key[$escape_key]['pos'] = false;
   2471                                             continue;
   2472                                         }
   2473 
   2474                                         if ($match_i !== false && $match_i < $next_escape_regexp_pos) {
   2475                                             $next_escape_regexp_pos = $match_i;
   2476                                             $next_escape_regexp_key = $escape_key;
   2477                                             if ($match_i === $start) {
   2478                                                 break;
   2479                                             }
   2480                                         }
   2481                                     }
   2482                                 }
   2483 
   2484                                 //Find the next simple escape position
   2485                                 if('' != $this->language_data['ESCAPE_CHAR']) {
   2486                                     $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start);
   2487                                     if(false === $simple_escape) {
   2488                                         $simple_escape = $length;
   2489                                     }
   2490                                 } else {
   2491                                     $simple_escape = $length;
   2492                                 }
   2493                             } else {
   2494                                 $next_escape_regexp_pos = $length;
   2495                                 $simple_escape = $length;
   2496                             }
   2497 
   2498                             if($simple_escape < $next_escape_regexp_pos &&
   2499                                 $simple_escape < $length &&
   2500                                 $simple_escape < $close_pos) {
   2501                                 //The nexxt escape sequence is a simple one ...
   2502                                 $es_pos = $simple_escape;
   2503 
   2504                                 //Add the stuff not in the string yet ...
   2505                                 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
   2506 
   2507                                 //Get the style for this escaped char ...
   2508                                 if (!$this->use_classes) {
   2509                                     $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
   2510                                 } else {
   2511                                     $escape_char_attributes = ' class="es0"';
   2512                                 }
   2513 
   2514                                 //Add the style for the escape char ...
   2515                                 $string .= "<span$escape_char_attributes>" .
   2516                                     GeSHi::hsc($this->language_data['ESCAPE_CHAR']);
   2517 
   2518                                 //Get the byte AFTER the ESCAPE_CHAR we just found
   2519                                 $es_char = $part[$es_pos + 1];
   2520                                 if ($es_char == "\n") {
   2521                                     // don't put a newline around newlines
   2522                                     $string .= "</span>\n";
   2523                                     $start = $es_pos + 2;
   2524                                 } else if (ord($es_char) >= 128) {
   2525                                     //This is an non-ASCII char (UTF8 or single byte)
   2526                                     //This code tries to work around SF#2037598 ...
   2527                                     if(function_exists('mb_substr')) {
   2528                                         $es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding);
   2529                                         $string .= $es_char_m . '</span>';
   2530                                     } else if (!GESHI_PHP_PRE_433 && 'utf-8' == $this->encoding) {
   2531                                         if(preg_match("/[\xC2-\xDF][\x80-\xBF]".
   2532                                             "|\xE0[\xA0-\xBF][\x80-\xBF]".
   2533                                             "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}".
   2534                                             "|\xED[\x80-\x9F][\x80-\xBF]".
   2535                                             "|\xF0[\x90-\xBF][\x80-\xBF]{2}".
   2536                                             "|[\xF1-\xF3][\x80-\xBF]{3}".
   2537                                             "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s",
   2538                                             $part, $es_char_m, null, $es_pos + 1)) {
   2539                                             $es_char_m = $es_char_m[0];
   2540                                         } else {
   2541                                             $es_char_m = $es_char;
   2542                                         }
   2543                                         $string .= $this->hsc($es_char_m) . '</span>';
   2544                                     } else {
   2545                                         $es_char_m = $this->hsc($es_char);
   2546                                     }
   2547                                     $start = $es_pos + strlen($es_char_m) + 1;
   2548                                 } else {
   2549                                     $string .= $this->hsc($es_char) . '</span>';
   2550                                     $start = $es_pos + 2;
   2551                                 }
   2552                             } else if ($next_escape_regexp_pos < $length &&
   2553                                 $next_escape_regexp_pos < $close_pos) {
   2554                                 $es_pos = $next_escape_regexp_pos;
   2555                                 //Add the stuff not in the string yet ...
   2556                                 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
   2557 
   2558                                 //Get the key and length of this match ...
   2559                                 $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key];
   2560                                 $escape_str = substr($part, $es_pos, $escape['length']);
   2561                                 $escape_key = $escape['key'];
   2562 
   2563                                 //Get the style for this escaped char ...
   2564                                 if (!$this->use_classes) {
   2565                                     $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"';
   2566                                 } else {
   2567                                     $escape_char_attributes = ' class="es' . $escape_key . '"';
   2568                                 }
   2569 
   2570                                 //Add the style for the escape char ...
   2571                                 $string .= "<span$escape_char_attributes>" .
   2572                                     $this->hsc($escape_str) . '</span>';
   2573 
   2574                                 $start = $es_pos + $escape['length'];
   2575                             } else {
   2576                                 //Copy the remainder of the string ...
   2577                                 $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>';
   2578                                 $start = $close_pos + $char_len;
   2579                                 $string_open = false;
   2580                             }
   2581                         } while($string_open);
   2582 
   2583                         if ($check_linenumbers) {
   2584                             // Are line numbers used? If, we should end the string before
   2585                             // the newline and begin it again (so when <li>s are put in the source
   2586                             // remains XHTML compliant)
   2587                             // note to self: This opens up possibility of config files specifying
   2588                             // that languages can/cannot have multiline strings???
   2589                             $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
   2590                         }
   2591 
   2592                         $result .= $string;
   2593                         $string = '';
   2594                         $i = $start - 1;
   2595                         continue;
   2596                     } else if ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char &&
   2597                         substr($part, $i, $hq_strlen) == $hq) {
   2598                         // The start of a hard quoted string
   2599                         if (!$this->use_classes) {
   2600                             $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"';
   2601                             $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"';
   2602                         } else {
   2603                             $string_attributes = ' class="st_h"';
   2604                             $escape_char_attributes = ' class="es_h"';
   2605                         }
   2606                         // parse the stuff before this
   2607                         $result .= $this->parse_non_string_part($stuff_to_parse);
   2608                         $stuff_to_parse = '';
   2609 
   2610                         // now handle the string
   2611                         $string = '';
   2612 
   2613                         // look for closing quote
   2614                         $start = $i + $hq_strlen;
   2615                         while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) {
   2616                             $start = $close_pos + 1;
   2617                             if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['HARDCHAR']) {
   2618                                 // make sure this quote is not escaped
   2619                                 foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
   2620                                     if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) {
   2621                                         // check wether this quote is escaped or if it is something like '\\'
   2622                                         $escape_char_pos = $close_pos - 1;
   2623                                         while ($escape_char_pos > 0
   2624                                                 && $part[$escape_char_pos - 1] == $this->language_data['HARDCHAR']) {
   2625                                             --$escape_char_pos;
   2626                                         }
   2627                                         if (($close_pos - $escape_char_pos) & 1) {
   2628                                             // uneven number of escape chars => this quote is escaped
   2629                                             continue 2;
   2630                                         }
   2631                                     }
   2632                                 }
   2633                             }
   2634 
   2635                             // found closing quote
   2636                             break;
   2637                         }
   2638 
   2639                         //Found the closing delimiter?
   2640                         if (!$close_pos) {
   2641                             // span till the end of this $part when no closing delimiter is found
   2642                             $close_pos = $length;
   2643                         }
   2644 
   2645                         //Get the actual string
   2646                         $string = substr($part, $i, $close_pos - $i + 1);
   2647                         $i = $close_pos;
   2648 
   2649                         // handle escape chars and encode html chars
   2650                         // (special because when we have escape chars within our string they may not be escaped)
   2651                         if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
   2652                             $start = 0;
   2653                             $new_string = '';
   2654                             while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
   2655                                 // hmtl escape stuff before
   2656                                 $new_string .= $this->hsc(substr($string, $start, $es_pos - $start));
   2657                                 // check if this is a hard escape
   2658                                 foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
   2659                                     if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) {
   2660                                         // indeed, this is a hardescape
   2661                                         $new_string .= "<span$escape_char_attributes>" .
   2662                                             $this->hsc($hardescape) . '</span>';
   2663                                         $start = $es_pos + strlen($hardescape);
   2664                                         continue 2;
   2665                                     }
   2666                                 }
   2667                                 // not a hard escape, but a normal escape
   2668                                 // they come in pairs of two
   2669                                 $c = 0;
   2670                                 while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1])
   2671                                     && $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR']
   2672                                     && $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) {
   2673                                     $c += 2;
   2674                                 }
   2675                                 if ($c) {
   2676                                     $new_string .= "<span$escape_char_attributes>" .
   2677                                         str_repeat($escaped_escape_char, $c) .
   2678                                         '</span>';
   2679                                     $start = $es_pos + $c;
   2680                                 } else {
   2681                                     // this is just a single lonely escape char...
   2682                                     $new_string .= $escaped_escape_char;
   2683                                     $start = $es_pos + 1;
   2684                                 }
   2685                             }
   2686                             $string = $new_string . $this->hsc(substr($string, $start));
   2687                         } else {
   2688                             $string = $this->hsc($string);
   2689                         }
   2690 
   2691                         if ($check_linenumbers) {
   2692                             // Are line numbers used? If, we should end the string before
   2693                             // the newline and begin it again (so when <li>s are put in the source
   2694                             // remains XHTML compliant)
   2695                             // note to self: This opens up possibility of config files specifying
   2696                             // that languages can/cannot have multiline strings???
   2697                             $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
   2698                         }
   2699 
   2700                         $result .= "<span$string_attributes>" . $string . '</span>';
   2701                         $string = '';
   2702                         continue;
   2703                     } else {
   2704                         //Have a look for regexp comments
   2705                         if ($i == $next_comment_regexp_pos) {
   2706                             $COMMENT_MATCHED = true;
   2707                             $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key];
   2708                             $test_str = $this->hsc(substr($part, $i, $comment['length']));
   2709 
   2710                             //@todo If remove important do remove here
   2711                             if ($this->lexic_permissions['COMMENTS']['MULTI']) {
   2712                                 if (!$this->use_classes) {
   2713                                     $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"';
   2714                                 } else {
   2715                                     $attributes = ' class="co' . $comment['key'] . '"';
   2716                                 }
   2717 
   2718                                 $test_str = "<span$attributes>" . $test_str . "</span>";
   2719 
   2720                                 // Short-cut through all the multiline code
   2721                                 if ($check_linenumbers) {
   2722                                     // strreplace to put close span and open span around multiline newlines
   2723                                     $test_str = str_replace(
   2724                                         "\n", "</span>\n<span$attributes>",
   2725                                         str_replace("\n ", "\n&nbsp;", $test_str)
   2726                                     );
   2727                                 }
   2728                             }
   2729 
   2730                             $i += $comment['length'] - 1;
   2731 
   2732                             // parse the rest
   2733                             $result .= $this->parse_non_string_part($stuff_to_parse);
   2734                             $stuff_to_parse = '';
   2735                         }
   2736 
   2737                         // If we haven't matched a regexp comment, try multi-line comments
   2738                         if (!$COMMENT_MATCHED) {
   2739                             // Is this a multiline comment?
   2740                             if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) {
   2741                                 $next_comment_multi_pos = $length;
   2742                                 foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
   2743                                     $match_i = false;
   2744                                     if (isset($comment_multi_cache_per_key[$open]) &&
   2745                                         ($comment_multi_cache_per_key[$open] >= $i ||
   2746                                          $comment_multi_cache_per_key[$open] === false)) {
   2747                                         // we have already matched something
   2748                                         if ($comment_multi_cache_per_key[$open] === false) {
   2749                                             // this comment is never matched
   2750                                             continue;
   2751                                         }
   2752                                         $match_i = $comment_multi_cache_per_key[$open];
   2753                                     } else if (($match_i = stripos($part, $open, $i)) !== false) {
   2754                                         $comment_multi_cache_per_key[$open] = $match_i;
   2755                                     } else {
   2756                                         $comment_multi_cache_per_key[$open] = false;
   2757                                         continue;
   2758                                     }
   2759                                     if ($match_i !== false && $match_i < $next_comment_multi_pos) {
   2760                                         $next_comment_multi_pos = $match_i;
   2761                                         $next_open_comment_multi = $open;
   2762                                         if ($match_i === $i) {
   2763                                             break;
   2764                                         }
   2765                                     }
   2766                                 }
   2767                             }
   2768                             if ($i == $next_comment_multi_pos) {
   2769                                 $open = $next_open_comment_multi;
   2770                                 $close = $this->language_data['COMMENT_MULTI'][$open];
   2771                                 $open_strlen = strlen($open);
   2772                                 $close_strlen = strlen($close);
   2773                                 $COMMENT_MATCHED = true;
   2774                                 $test_str_match = $open;
   2775                                 //@todo If remove important do remove here
   2776                                 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
   2777                                     $open == GESHI_START_IMPORTANT) {
   2778                                     if ($open != GESHI_START_IMPORTANT) {
   2779                                         if (!$this->use_classes) {
   2780                                             $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
   2781                                         } else {
   2782                                             $attributes = ' class="coMULTI"';
   2783                                         }
   2784                                         $test_str = "<span$attributes>" . $this->hsc($open);
   2785                                     } else {
   2786                                         if (!$this->use_classes) {
   2787                                             $attributes = ' style="' . $this->important_styles . '"';
   2788                                         } else {
   2789                                             $attributes = ' class="imp"';
   2790                                         }
   2791 
   2792                                         // We don't include the start of the comment if it's an
   2793                                         // "important" part
   2794                                         $test_str = "<span$attributes>";
   2795                                     }
   2796                                 } else {
   2797                                     $test_str = $this->hsc($open);
   2798                                 }
   2799 
   2800                                 $close_pos = strpos( $part, $close, $i + $open_strlen );
   2801 
   2802                                 if ($close_pos === false) {
   2803                                     $close_pos = $length;
   2804                                 }
   2805 
   2806                                 // Short-cut through all the multiline code
   2807                                 $rest_of_comment = $this->hsc(substr($part, $i + $open_strlen, $close_pos - $i - $open_strlen + $close_strlen));
   2808                                 if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
   2809                                     $test_str_match == GESHI_START_IMPORTANT) &&
   2810                                     $check_linenumbers) {
   2811 
   2812                                     // strreplace to put close span and open span around multiline newlines
   2813                                     $test_str .= str_replace(
   2814                                         "\n", "</span>\n<span$attributes>",
   2815                                         str_replace("\n ", "\n&nbsp;", $rest_of_comment)
   2816                                     );
   2817                                 } else {
   2818                                     $test_str .= $rest_of_comment;
   2819                                 }
   2820 
   2821                                 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
   2822                                     $test_str_match == GESHI_START_IMPORTANT) {
   2823                                     $test_str .= '</span>';
   2824                                 }
   2825 
   2826                                 $i = $close_pos + $close_strlen - 1;
   2827 
   2828                                 // parse the rest
   2829                                 $result .= $this->parse_non_string_part($stuff_to_parse);
   2830                                 $stuff_to_parse = '';
   2831                             }
   2832                         }
   2833 
   2834                         // If we haven't matched a multiline comment, try single-line comments
   2835                         if (!$COMMENT_MATCHED) {
   2836                             // cache potential single line comment occurances
   2837                             if (!empty($this->language_data['COMMENT_SINGLE']) && $next_comment_single_pos < $i) {
   2838                                 $next_comment_single_pos = $length;
   2839                                 foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) {
   2840                                     $match_i = false;
   2841                                     if (isset($comment_single_cache_per_key[$comment_key]) &&
   2842                                         ($comment_single_cache_per_key[$comment_key] >= $i ||
   2843                                          $comment_single_cache_per_key[$comment_key] === false)) {
   2844                                         // we have already matched something
   2845                                         if ($comment_single_cache_per_key[$comment_key] === false) {
   2846                                             // this comment is never matched
   2847                                             continue;
   2848                                         }
   2849                                         $match_i = $comment_single_cache_per_key[$comment_key];
   2850                                     } else if (
   2851                                         // case sensitive comments
   2852                                         ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
   2853                                         ($match_i = stripos($part, $comment_mark, $i)) !== false) ||
   2854                                         // non case sensitive
   2855                                         (!$this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
   2856                                           (($match_i = strpos($part, $comment_mark, $i)) !== false))) {
   2857                                         $comment_single_cache_per_key[$comment_key] = $match_i;
   2858                                     } else {
   2859                                         $comment_single_cache_per_key[$comment_key] = false;
   2860                                         continue;
   2861                                     }
   2862                                     if ($match_i !== false && $match_i < $next_comment_single_pos) {
   2863                                         $next_comment_single_pos = $match_i;
   2864                                         $next_comment_single_key = $comment_key;
   2865                                         if ($match_i === $i) {
   2866                                             break;
   2867                                         }
   2868                                     }
   2869                                 }
   2870                             }
   2871                             if ($next_comment_single_pos == $i) {
   2872                                 $comment_key = $next_comment_single_key;
   2873                                 $comment_mark = $this->language_data['COMMENT_SINGLE'][$comment_key];
   2874                                 $com_len = strlen($comment_mark);
   2875 
   2876                                 // This check will find special variables like $# in bash
   2877                                 // or compiler directives of Delphi beginning {$
   2878                                 if ((empty($sc_disallowed_before) || ($i == 0) ||
   2879                                     (false === strpos($sc_disallowed_before, $part[$i-1]))) &&
   2880                                     (empty($sc_disallowed_after) || ($length <= $i + $com_len) ||
   2881                                     (false === strpos($sc_disallowed_after, $part[$i + $com_len]))))
   2882                                 {
   2883                                     // this is a valid comment
   2884                                     $COMMENT_MATCHED = true;
   2885                                     if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
   2886                                         if (!$this->use_classes) {
   2887                                             $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"';
   2888                                         } else {
   2889                                             $attributes = ' class="co' . $comment_key . '"';
   2890                                         }
   2891                                         $test_str = "<span$attributes>" . $this->hsc($this->change_case($comment_mark));
   2892                                     } else {
   2893                                         $test_str = $this->hsc($comment_mark);
   2894                                     }
   2895 
   2896                                     //Check if this comment is the last in the source
   2897                                     $close_pos = strpos($part, "\n", $i);
   2898                                     $oops = false;
   2899                                     if ($close_pos === false) {
   2900                                         $close_pos = $length;
   2901                                         $oops = true;
   2902                                     }
   2903                                     $test_str .= $this->hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len));
   2904                                     if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
   2905                                         $test_str .= "</span>";
   2906                                     }
   2907 
   2908                                     // Take into account that the comment might be the last in the source
   2909                                     if (!$oops) {
   2910                                       $test_str .= "\n";
   2911                                     }
   2912 
   2913                                     $i = $close_pos;
   2914 
   2915                                     // parse the rest
   2916                                     $result .= $this->parse_non_string_part($stuff_to_parse);
   2917                                     $stuff_to_parse = '';
   2918                                 }
   2919                             }
   2920                         }
   2921                     }
   2922 
   2923                     // Where are we adding this char?
   2924                     if (!$COMMENT_MATCHED) {
   2925                         $stuff_to_parse .= $char;
   2926                     } else {
   2927                         $result .= $test_str;
   2928                         unset($test_str);
   2929                         $COMMENT_MATCHED = false;
   2930                     }
   2931                 }
   2932                 // Parse the last bit
   2933                 $result .= $this->parse_non_string_part($stuff_to_parse);
   2934                 $stuff_to_parse = '';
   2935             } else {
   2936                 $result .= $this->hsc($part);
   2937             }
   2938             // Close the <span> that surrounds the block
   2939             if ($STRICTATTRS != '') {
   2940                 $result = str_replace("\n", "</span>\n<span$STRICTATTRS>", $result);
   2941                 $result .= '</span>';
   2942             }
   2943 
   2944             $endresult .= $result;
   2945             unset($part, $parts[$key], $result);
   2946         }
   2947 
   2948         //This fix is related to SF#1923020, but has to be applied regardless of
   2949         //actually highlighting symbols.
   2950         /** NOTE: memorypeak #3 */
   2951         $endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $endresult);
   2952 
   2953 //        // Parse the last stuff (redundant?)
   2954 //        $result .= $this->parse_non_string_part($stuff_to_parse);
   2955 
   2956         // Lop off the very first and last spaces
   2957 //        $result = substr($result, 1, -1);
   2958 
   2959         // We're finished: stop timing
   2960         $this->set_time($start_time, microtime());
   2961 
   2962         $this->finalise($endresult);
   2963         return $endresult;
   2964     }
   2965 
   2966     /**
   2967      * Swaps out spaces and tabs for HTML indentation. Not needed if
   2968      * the code is in a pre block...
   2969      *
   2970      * @param  string The source to indent (reference!)
   2971      * @since  1.0.0
   2972      * @access private
   2973      */
   2974     function indent(&$result) {
   2975         /// Replace tabs with the correct number of spaces
   2976         if (false !== strpos($result, "\t")) {
   2977             $lines = explode("\n", $result);
   2978             $result = null;//Save memory while we process the lines individually
   2979             $tab_width = $this->get_real_tab_width();
   2980             $tab_string = '&nbsp;' . str_repeat(' ', $tab_width);
   2981 
   2982             for ($key = 0, $n = count($lines); $key < $n; $key++) {
   2983                 $line = $lines[$key];
   2984                 if (false === strpos($line, "\t")) {
   2985                     continue;
   2986                 }
   2987 
   2988                 $pos = 0;
   2989                 $length = strlen($line);
   2990                 $lines[$key] = ''; // reduce memory
   2991 
   2992                 $IN_TAG = false;
   2993                 for ($i = 0; $i < $length; ++$i) {
   2994                     $char = $line[$i];
   2995                     // Simple engine to work out whether we're in a tag.
   2996                     // If we are we modify $pos. This is so we ignore HTML
   2997                     // in the line and only workout the tab replacement
   2998                     // via the actual content of the string
   2999                     // This test could be improved to include strings in the
   3000                     // html so that < or > would be allowed in user's styles
   3001                     // (e.g. quotes: '<' '>'; or similar)
   3002                     if ($IN_TAG) {
   3003                         if ('>' == $char) {
   3004                             $IN_TAG = false;
   3005                         }
   3006                         $lines[$key] .= $char;
   3007                     } else if ('<' == $char) {
   3008                         $IN_TAG = true;
   3009                         $lines[$key] .= '<';
   3010                     } else if ('&' == $char) {
   3011                         $substr = substr($line, $i + 3, 5);
   3012                         $posi = strpos($substr, ';');
   3013                         if (false === $posi) {
   3014                             ++$pos;
   3015                         } else {
   3016                             $pos -= $posi+2;
   3017                         }
   3018                         $lines[$key] .= $char;
   3019                     } else if ("\t" == $char) {
   3020                         $str = '';
   3021                         // OPTIMISE - move $strs out. Make an array:
   3022                         // $tabs = array(
   3023                         //  1 => '&nbsp;',
   3024                         //  2 => '&nbsp; ',
   3025                         //  3 => '&nbsp; &nbsp;' etc etc
   3026                         // to use instead of building a string every time
   3027                         $tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop
   3028                         if (($pos & 1) || 1 == $tab_end_width) {
   3029                             $str .= substr($tab_string, 6, $tab_end_width);
   3030                         } else {
   3031                             $str .= substr($tab_string, 0, $tab_end_width+5);
   3032                         }
   3033                         $lines[$key] .= $str;
   3034                         $pos += $tab_end_width;
   3035 
   3036                         if (false === strpos($line, "\t", $i + 1)) {
   3037                             $lines[$key] .= substr($line, $i + 1);
   3038                             break;
   3039                         }
   3040                     } else if (0 == $pos && ' ' == $char) {
   3041                         $lines[$key] .= '&nbsp;';
   3042                         ++$pos;
   3043                     } else {
   3044                         $lines[$key] .= $char;
   3045                         ++$pos;
   3046                     }
   3047                 }
   3048             }
   3049             $result = implode("\n", $lines);
   3050             unset($lines);//We don't need the lines separated beyond this --- free them!
   3051         }
   3052         // Other whitespace
   3053         // BenBE: Fix to reduce the number of replacements to be done
   3054         $result = preg_replace('/^ /m', '&nbsp;', $result);
   3055         $result = str_replace('  ', ' &nbsp;', $result);
   3056 
   3057         if ($this->line_numbers == GESHI_NO_LINE_NUMBERS) {
   3058             if ($this->line_ending === null) {
   3059                 $result = nl2br($result);
   3060             } else {
   3061                 $result = str_replace("\n", $this->line_ending, $result);
   3062             }
   3063         }
   3064     }
   3065 
   3066     /**
   3067      * Changes the case of a keyword for those languages where a change is asked for
   3068      *
   3069      * @param  string The keyword to change the case of
   3070      * @return string The keyword with its case changed
   3071      * @since  1.0.0
   3072      * @access private
   3073      */
   3074     function change_case($instr) {
   3075         switch ($this->language_data['CASE_KEYWORDS']) {
   3076             case GESHI_CAPS_UPPER:
   3077                 return strtoupper($instr);
   3078             case GESHI_CAPS_LOWER:
   3079                 return strtolower($instr);
   3080             default:
   3081                 return $instr;
   3082         }
   3083     }
   3084 
   3085     /**
   3086      * Handles replacements of keywords to include markup and links if requested
   3087      *
   3088      * @param  string The keyword to add the Markup to
   3089      * @return The HTML for the match found
   3090      * @since  1.0.8
   3091      * @access private
   3092      *
   3093      * @todo   Get rid of ender in keyword links
   3094      */
   3095     function handle_keyword_replace($match) {
   3096         $k = $this->_kw_replace_group;
   3097         $keyword = $match[0];
   3098 
   3099         $before = '';
   3100         $after = '';
   3101 
   3102         if ($this->keyword_links) {
   3103             // Keyword links have been ebabled
   3104 
   3105             if (isset($this->language_data['URLS'][$k]) &&
   3106                 $this->language_data['URLS'][$k] != '') {
   3107                 // There is a base group for this keyword
   3108 
   3109                 // Old system: strtolower
   3110                 //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
   3111                 // New system: get keyword from language file to get correct case
   3112                 if (!$this->language_data['CASE_SENSITIVE'][$k] &&
   3113                     strpos($this->language_data['URLS'][$k], '{FNAME}') !== false) {
   3114                     foreach ($this->language_data['KEYWORDS'][$k] as $word) {
   3115                         if (strcasecmp($word, $keyword) == 0) {
   3116                             break;
   3117                         }
   3118                     }
   3119                 } else {
   3120                     $word = $keyword;
   3121                 }
   3122 
   3123                 $before = '<|UR1|"' .
   3124                     str_replace(
   3125                         array(
   3126                             '{FNAME}',
   3127                             '{FNAMEL}',
   3128                             '{FNAMEU}',
   3129                             '.'),
   3130                         array(
   3131                             str_replace('+', '%20', urlencode($this->hsc($word))),
   3132                             str_replace('+', '%20', urlencode($this->hsc(strtolower($word)))),
   3133                             str_replace('+', '%20', urlencode($this->hsc(strtoupper($word)))),
   3134                             '<DOT>'),
   3135                         $this->language_data['URLS'][$k]
   3136                     ) . '">';
   3137                 $after = '</a>';
   3138             }
   3139         }
   3140 
   3141         return $before . '<|/'. $k .'/>' . $this->change_case($keyword) . '|>' . $after;
   3142     }
   3143 
   3144     /**
   3145      * handles regular expressions highlighting-definitions with callback functions
   3146      *
   3147      * @note this is a callback, don't use it directly
   3148      *
   3149      * @param array the matches array
   3150      * @return The highlighted string
   3151      * @since 1.0.8
   3152      * @access private
   3153      */
   3154     function handle_regexps_callback($matches) {
   3155         // before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1|>'",
   3156         return  ' style="' . call_user_func($this->language_data['STYLES']['REGEXPS'][$this->_rx_key], $matches[1]) . '"'. $matches[1] . '|>';
   3157     }
   3158 
   3159     /**
   3160      * handles newlines in REGEXPS matches. Set the _hmr_* vars before calling this
   3161      *
   3162      * @note this is a callback, don't use it directly
   3163      *
   3164      * @param array the matches array
   3165      * @return string
   3166      * @since 1.0.8
   3167      * @access private
   3168      */
   3169     function handle_multiline_regexps($matches) {
   3170         $before = $this->_hmr_before;
   3171         $after = $this->_hmr_after;
   3172         if ($this->_hmr_replace) {
   3173             $replace = $this->_hmr_replace;
   3174             $search = array();
   3175 
   3176             foreach (array_keys($matches) as $k) {
   3177                 $search[] = '\\' . $k;
   3178             }
   3179 
   3180             $before = str_replace($search, $matches, $before);
   3181             $after = str_replace($search, $matches, $after);
   3182             $replace = str_replace($search, $matches, $replace);
   3183         } else {
   3184             $replace = $matches[0];
   3185         }
   3186         return $before
   3187                     . '<|!REG3XP' . $this->_hmr_key .'!>'
   3188                         . str_replace("\n", "|>\n<|!REG3XP" . $this->_hmr_key . '!>', $replace)
   3189                     . '|>'
   3190               . $after;
   3191     }
   3192 
   3193     /**
   3194      * Takes a string that has no strings or comments in it, and highlights
   3195      * stuff like keywords, numbers and methods.
   3196      *
   3197      * @param string The string to parse for keyword, numbers etc.
   3198      * @since 1.0.0
   3199      * @access private
   3200      * @todo BUGGY! Why? Why not build string and return?
   3201      */
   3202     function parse_non_string_part($stuff_to_parse) {
   3203         $stuff_to_parse = ' ' . $this->hsc($stuff_to_parse);
   3204 
   3205         // Regular expressions
   3206         foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
   3207             if ($this->lexic_permissions['REGEXPS'][$key]) {
   3208                 if (is_array($regexp)) {
   3209                     if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
   3210                         // produce valid HTML when we match multiple lines
   3211                         $this->_hmr_replace = $regexp[GESHI_REPLACE];
   3212                         $this->_hmr_before = $regexp[GESHI_BEFORE];
   3213                         $this->_hmr_key = $key;
   3214                         $this->_hmr_after = $regexp[GESHI_AFTER];
   3215                         $stuff_to_parse = preg_replace_callback(
   3216                             "/" . $regexp[GESHI_SEARCH] . "/{$regexp[GESHI_MODIFIERS]}",
   3217                             array($this, 'handle_multiline_regexps'),
   3218                             $stuff_to_parse);
   3219                         $this->_hmr_replace = false;
   3220                         $this->_hmr_before = '';
   3221                         $this->_hmr_after = '';
   3222                     } else {
   3223                         $stuff_to_parse = preg_replace(
   3224                             '/' . $regexp[GESHI_SEARCH] . '/' . $regexp[GESHI_MODIFIERS],
   3225                             $regexp[GESHI_BEFORE] . '<|!REG3XP'. $key .'!>' . $regexp[GESHI_REPLACE] . '|>' . $regexp[GESHI_AFTER],
   3226                             $stuff_to_parse);
   3227                     }
   3228                 } else {
   3229                     if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
   3230                         // produce valid HTML when we match multiple lines
   3231                         $this->_hmr_key = $key;
   3232                         $stuff_to_parse = preg_replace_callback( "/(" . $regexp . ")/",
   3233                                               array($this, 'handle_multiline_regexps'), $stuff_to_parse);
   3234                         $this->_hmr_key = '';
   3235                     } else {
   3236                         $stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse);
   3237                     }
   3238                 }
   3239             }
   3240         }
   3241 
   3242         // Highlight numbers. As of 1.0.8 we support diffent types of numbers
   3243         $numbers_found = false;
   3244         if ($this->lexic_permissions['NUMBERS'] && preg_match('#\d#', $stuff_to_parse )) {
   3245             $numbers_found = true;
   3246 
   3247             //For each of the formats ...
   3248             foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
   3249                 //Check if it should be highlighted ...
   3250                 $stuff_to_parse = preg_replace($regexp, "<|/NUM!$id/>\\1|>", $stuff_to_parse);
   3251             }
   3252         }
   3253 
   3254         // Highlight keywords
   3255         $disallowed_before = "(?<![a-zA-Z0-9\$_\|\#;>|^&";
   3256         $disallowed_after = "(?![a-zA-Z0-9_\|%\\-&;";
   3257         if ($this->lexic_permissions['STRINGS']) {
   3258             $quotemarks = preg_quote(implode($this->language_data['QUOTEMARKS']), '/');
   3259             $disallowed_before .= $quotemarks;
   3260             $disallowed_after .= $quotemarks;
   3261         }
   3262         $disallowed_before .= "])";
   3263         $disallowed_after .= "])";
   3264 
   3265         $parser_control_pergroup = false;
   3266         if (isset($this->language_data['PARSER_CONTROL'])) {
   3267             if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
   3268                 $x = 0; // check wether per-keyword-group parser_control is enabled
   3269                 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) {
   3270                     $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'];
   3271                     ++$x;
   3272                 }
   3273                 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) {
   3274                     $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'];
   3275                     ++$x;
   3276                 }
   3277                 $parser_control_pergroup = (count($this->language_data['PARSER_CONTROL']['KEYWORDS']) - $x) > 0;
   3278             }
   3279         }
   3280 
   3281         // if this is changed, don't forget to change it below
   3282 //        if (!empty($disallowed_before)) {
   3283 //            $disallowed_before = "(?<![$disallowed_before])";
   3284 //        }
   3285 //        if (!empty($disallowed_after)) {
   3286 //            $disallowed_after = "(?![$disallowed_after])";
   3287 //        }
   3288 
   3289         foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
   3290             if (!isset($this->lexic_permissions['KEYWORDS'][$k]) ||
   3291                 $this->lexic_permissions['KEYWORDS'][$k]) {
   3292 
   3293                 $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k];
   3294                 $modifiers = $case_sensitive ? '' : 'i';
   3295 
   3296                 // NEW in 1.0.8 - per-keyword-group parser control
   3297                 $disallowed_before_local = $disallowed_before;
   3298                 $disallowed_after_local = $disallowed_after;
   3299                 if ($parser_control_pergroup && isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k])) {
   3300                     if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'])) {
   3301                         $disallowed_before_local =
   3302                             $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'];
   3303                     }
   3304 
   3305                     if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'])) {
   3306                         $disallowed_after_local =
   3307                             $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'];
   3308                     }
   3309                 }
   3310 
   3311                 $this->_kw_replace_group = $k;
   3312 
   3313                 //NEW in 1.0.8, the cached regexp list
   3314                 // since we don't want PHP / PCRE to crash due to too large patterns we split them into smaller chunks
   3315                 for ($set = 0, $set_length = count($this->language_data['CACHED_KEYWORD_LISTS'][$k]); $set <  $set_length; ++$set) {
   3316                     $keywordset =& $this->language_data['CACHED_KEYWORD_LISTS'][$k][$set];
   3317                     // Might make a more unique string for putting the number in soon
   3318                     // Basically, we don't put the styles in yet because then the styles themselves will
   3319                     // get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
   3320                     $stuff_to_parse = preg_replace_callback(
   3321                         "/$disallowed_before_local({$keywordset})(?!\<DOT\>(?:htm|php))$disallowed_after_local/$modifiers",
   3322                         array($this, 'handle_keyword_replace'),
   3323                         $stuff_to_parse
   3324                         );
   3325                 }
   3326             }
   3327         }
   3328 
   3329         //
   3330         // Now that's all done, replace /[number]/ with the correct styles
   3331         //
   3332         foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
   3333             if (!$this->use_classes) {
   3334                 $attributes = ' style="' .
   3335                     (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ?
   3336                     $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"';
   3337             } else {
   3338                 $attributes = ' class="kw' . $k . '"';
   3339             }
   3340             $stuff_to_parse = str_replace("<|/$k/>", "<|$attributes>", $stuff_to_parse);
   3341         }
   3342 
   3343         if ($numbers_found) {
   3344             // Put number styles in
   3345             foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
   3346 //Commented out for now, as this needs some review ...
   3347 //                if ($numbers_permissions & $id) {
   3348                     //Get the appropriate style ...
   3349                         //Checking for unset styles is done by the style cache builder ...
   3350                     if (!$this->use_classes) {
   3351                         $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][$id] . '"';
   3352                     } else {
   3353                         $attributes = ' class="nu'.$id.'"';
   3354                     }
   3355 
   3356                     //Set in the correct styles ...
   3357                     $stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stuff_to_parse);
   3358 //                }
   3359             }
   3360         }
   3361 
   3362         // Highlight methods and fields in objects
   3363         if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) {
   3364             $oolang_spaces = "[\s]*";
   3365             $oolang_before = "";
   3366             $oolang_after = "[a-zA-Z][a-zA-Z0-9_]*";
   3367             if (isset($this->language_data['PARSER_CONTROL'])) {
   3368                 if (isset($this->language_data['PARSER_CONTROL']['OOLANG'])) {
   3369                     if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'])) {
   3370                         $oolang_before = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'];
   3371                     }
   3372                     if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'])) {
   3373                         $oolang_after = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'];
   3374                     }
   3375                     if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'])) {
   3376                         $oolang_spaces = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'];
   3377                     }
   3378                 }
   3379             }
   3380 
   3381             foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) {
   3382                 if (false !== strpos($stuff_to_parse, $splitter)) {
   3383                     if (!$this->use_classes) {
   3384                         $attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"';
   3385                     } else {
   3386                         $attributes = ' class="me' . $key . '"';
   3387                     }
   3388                     $stuff_to_parse = preg_replace("/($oolang_before)(" . preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($oolang_after)/", "\\1\\2\\3<|$attributes>\\4|>", $stuff_to_parse);
   3389                 }
   3390             }
   3391         }
   3392 
   3393         //
   3394         // Highlight brackets. Yes, I've tried adding a semi-colon to this list.
   3395         // You try it, and see what happens ;)
   3396         // TODO: Fix lexic permissions not converting entities if shouldn't
   3397         // be highlighting regardless
   3398         //
   3399         if ($this->lexic_permissions['BRACKETS']) {
   3400             $stuff_to_parse = str_replace( $this->language_data['CACHE_BRACKET_MATCH'],
   3401                               $this->language_data['CACHE_BRACKET_REPLACE'], $stuff_to_parse );
   3402         }
   3403 
   3404 
   3405         //FIX for symbol highlighting ...
   3406         if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
   3407             //Get all matches and throw away those witin a block that is already highlighted... (i.e. matched by a regexp)
   3408             $n_symbols = preg_match_all("/<\|(?:<DOT>|[^>])+>(?:(?!\|>).*?)\|>|<\/a>|(?:" . $this->language_data['SYMBOL_SEARCH'] . ")+/", $stuff_to_parse, $pot_symbols, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
   3409             $global_offset = 0;
   3410             for ($s_id = 0; $s_id < $n_symbols; ++$s_id) {
   3411                 $symbol_match = $pot_symbols[$s_id][0][0];
   3412                 if (strpos($symbol_match, '<') !== false || strpos($symbol_match, '>') !== false) {
   3413                     // already highlighted blocks _must_ include either < or >
   3414                     // so if this conditional applies, we have to skip this match
   3415                     // BenBE: UNLESS the block contains <SEMI> or <PIPE>
   3416                     if(strpos($symbol_match, '<SEMI>') === false &&
   3417                         strpos($symbol_match, '<PIPE>') === false) {
   3418                         continue;
   3419                     }
   3420                 }
   3421 
   3422                 // if we reach this point, we have a valid match which needs to be highlighted
   3423 
   3424                 $symbol_length = strlen($symbol_match);
   3425                 $symbol_offset = $pot_symbols[$s_id][0][1];
   3426                 unset($pot_symbols[$s_id]);
   3427                 $symbol_end = $symbol_length + $symbol_offset;
   3428                 $symbol_hl = "";
   3429 
   3430                 // if we have multiple styles, we have to handle them properly
   3431                 if ($this->language_data['MULTIPLE_SYMBOL_GROUPS']) {
   3432                     $old_sym = -1;
   3433                     // Split the current stuff to replace into its atomic symbols ...
   3434                     preg_match_all("/" . $this->language_data['SYMBOL_SEARCH'] . "/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER);
   3435                     foreach ($sym_match_syms[0] as $sym_ms) {
   3436                         //Check if consequtive symbols belong to the same group to save output ...
   3437                         if (isset($this->language_data['SYMBOL_DATA'][$sym_ms])
   3438                             && ($this->language_data['SYMBOL_DATA'][$sym_ms] != $old_sym)) {
   3439                             if (-1 != $old_sym) {
   3440                                 $symbol_hl .= "|>";
   3441                             }
   3442                             $old_sym = $this->language_data['SYMBOL_DATA'][$sym_ms];
   3443                             if (!$this->use_classes) {
   3444                                 $symbol_hl .= '<| style="' . $this->language_data['STYLES']['SYMBOLS'][$old_sym] . '">';
   3445                             } else {
   3446                                 $symbol_hl .= '<| class="sy' . $old_sym . '">';
   3447                             }
   3448                         }
   3449                         $symbol_hl .= $sym_ms;
   3450                     }
   3451                     unset($sym_match_syms);
   3452 
   3453                     //Close remaining tags and insert the replacement at the right position ...
   3454                     //Take caution if symbol_hl is empty to avoid doubled closing spans.
   3455                     if (-1 != $old_sym) {
   3456                         $symbol_hl .= "|>";
   3457                     }
   3458                 } else {
   3459                     if (!$this->use_classes) {
   3460                         $symbol_hl = '<| style="' . $this->language_data['STYLES']['SYMBOLS'][0] . '">';
   3461                     } else {
   3462                         $symbol_hl = '<| class="sy0">';
   3463                     }
   3464                     $symbol_hl .= $symbol_match . '|>';
   3465                 }
   3466 
   3467                 $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset + $global_offset, $symbol_length);
   3468 
   3469                 // since we replace old text with something of different size,
   3470                 // we'll have to keep track of the differences
   3471                 $global_offset += strlen($symbol_hl) - $symbol_length;
   3472             }
   3473         }
   3474         //FIX for symbol highlighting ...
   3475 
   3476         // Add class/style for regexps
   3477         foreach (array_keys($this->language_data['REGEXPS']) as $key) {
   3478             if ($this->lexic_permissions['REGEXPS'][$key]) {
   3479                 if (is_callable($this->language_data['STYLES']['REGEXPS'][$key])) {
   3480                     $this->_rx_key = $key;
   3481                     $stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\|>/U",
   3482                         array($this, 'handle_regexps_callback'),
   3483                         $stuff_to_parse);
   3484                 } else {
   3485                     if (!$this->use_classes) {
   3486                         $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"';
   3487                     } else {
   3488                         if (is_array($this->language_data['REGEXPS'][$key]) &&
   3489                             array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$key])) {
   3490                             $attributes = ' class="' .
   3491                                 $this->language_data['REGEXPS'][$key][GESHI_CLASS] . '"';
   3492                         } else {
   3493                            $attributes = ' class="re' . $key . '"';
   3494                         }
   3495                     }
   3496                     $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse);
   3497                 }
   3498             }
   3499         }
   3500 
   3501         // Replace <DOT> with . for urls
   3502         $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse);
   3503         // Replace <|UR1| with <a href= for urls also
   3504         if (isset($this->link_styles[GESHI_LINK])) {
   3505             if ($this->use_classes) {
   3506                 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
   3507             } else {
   3508                 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse);
   3509             }
   3510         } else {
   3511             $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
   3512         }
   3513 
   3514         //
   3515         // NOW we add the span thingy ;)
   3516         //
   3517 
   3518         $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse);
   3519         $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse );
   3520         return substr($stuff_to_parse, 1);
   3521     }
   3522 
   3523     /**
   3524      * Sets the time taken to parse the code
   3525      *
   3526      * @param microtime The time when parsing started
   3527      * @param microtime The time when parsing ended
   3528      * @since 1.0.2
   3529      * @access private
   3530      */
   3531     function set_time($start_time, $end_time) {
   3532         $start = explode(' ', $start_time);
   3533         $end = explode(' ', $end_time);
   3534         $this->time = $end[0] + $end[1] - $start[0] - $start[1];
   3535     }
   3536 
   3537     /**
   3538      * Gets the time taken to parse the code
   3539      *
   3540      * @return double The time taken to parse the code
   3541      * @since  1.0.2
   3542      */
   3543     function get_time() {
   3544         return $this->time;
   3545     }
   3546 
   3547     /**
   3548      * Merges arrays recursively, overwriting values of the first array with values of later arrays
   3549      *
   3550      * @since 1.0.8
   3551      * @access private
   3552      */
   3553     function merge_arrays() {
   3554         $arrays = func_get_args();
   3555         $narrays = count($arrays);
   3556 
   3557         // check arguments
   3558         // comment out if more performance is necessary (in this case the foreach loop will trigger a warning if the argument is not an array)
   3559         for ($i = 0; $i < $narrays; $i ++) {
   3560             if (!is_array($arrays[$i])) {
   3561                 // also array_merge_recursive returns nothing in this case
   3562                 trigger_error('Argument #' . ($i+1) . ' is not an array - trying to merge array with scalar! Returning false!', E_USER_WARNING);
   3563                 return false;
   3564             }
   3565         }
   3566 
   3567         // the first array is in the output set in every case
   3568         $ret = $arrays[0];
   3569 
   3570         // merege $ret with the remaining arrays
   3571         for ($i = 1; $i < $narrays; $i ++) {
   3572             foreach ($arrays[$i] as $key => $value) {
   3573                 if (is_array($value) && isset($ret[$key])) {
   3574                     // if $ret[$key] is not an array you try to merge an scalar value with an array - the result is not defined (incompatible arrays)
   3575                     // in this case the call will trigger an E_USER_WARNING and the $ret[$key] will be false.
   3576                     $ret[$key] = $this->merge_arrays($ret[$key], $value);
   3577                 } else {
   3578                     $ret[$key] = $value;
   3579                 }
   3580             }
   3581         }
   3582 
   3583         return $ret;
   3584     }
   3585 
   3586     /**
   3587      * Gets language information and stores it for later use
   3588      *
   3589      * @param string The filename of the language file you want to load
   3590      * @since 1.0.0
   3591      * @access private
   3592      * @todo Needs to load keys for lexic permissions for keywords, regexps etc
   3593      */
   3594     function load_language($file_name) {
   3595         if ($file_name == $this->loaded_language) {
   3596             // this file is already loaded!
   3597             return;
   3598         }
   3599 
   3600         //Prepare some stuff before actually loading the language file
   3601         $this->loaded_language = $file_name;
   3602         $this->parse_cache_built = false;
   3603         $this->enable_highlighting();
   3604         $language_data = array();
   3605 
   3606         //Load the language file
   3607         require $file_name;
   3608 
   3609         // Perhaps some checking might be added here later to check that
   3610         // $language data is a valid thing but maybe not
   3611         $this->language_data = $language_data;
   3612 
   3613         // Set strict mode if should be set
   3614         $this->strict_mode = $this->language_data['STRICT_MODE_APPLIES'];
   3615 
   3616         // Set permissions for all lexics to true
   3617         // so they'll be highlighted by default
   3618         foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
   3619             if (!empty($this->language_data['KEYWORDS'][$key])) {
   3620                 $this->lexic_permissions['KEYWORDS'][$key] = true;
   3621             } else {
   3622                 $this->lexic_permissions['KEYWORDS'][$key] = false;
   3623             }
   3624         }
   3625 
   3626         foreach (array_keys($this->language_data['COMMENT_SINGLE']) as $key) {
   3627             $this->lexic_permissions['COMMENTS'][$key] = true;
   3628         }
   3629         foreach (array_keys($this->language_data['REGEXPS']) as $key) {
   3630             $this->lexic_permissions['REGEXPS'][$key] = true;
   3631         }
   3632 
   3633         // for BenBE and future code reviews:
   3634         // we can use empty here since we only check for existance and emptiness of an array
   3635         // if it is not an array at all but rather false or null this will work as intended as well
   3636         // even if $this->language_data['PARSER_CONTROL'] is undefined this won't trigger a notice
   3637         if (!empty($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'])) {
   3638             foreach ($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'] as $flag => $value) {
   3639                 // it's either true or false and maybe is true as well
   3640                 $perm = $value !== GESHI_NEVER;
   3641                 if ($flag == 'ALL') {
   3642                     $this->enable_highlighting($perm);
   3643                     continue;
   3644                 }
   3645                 if (!isset($this->lexic_permissions[$flag])) {
   3646                     // unknown lexic permission
   3647                     continue;
   3648                 }
   3649                 if (is_array($this->lexic_permissions[$flag])) {
   3650                     foreach ($this->lexic_permissions[$flag] as $key => $val) {
   3651                         $this->lexic_permissions[$flag][$key] = $perm;
   3652                     }
   3653                 } else {
   3654                     $this->lexic_permissions[$flag] = $perm;
   3655                 }
   3656             }
   3657             unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']);
   3658         }
   3659 
   3660         //Fix: Problem where hardescapes weren't handled if no ESCAPE_CHAR was given
   3661         //You need to set one for HARDESCAPES only in this case.
   3662         if(!isset($this->language_data['HARDCHAR'])) {
   3663             $this->language_data['HARDCHAR'] = $this->language_data['ESCAPE_CHAR'];
   3664         }
   3665 
   3666         //NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults
   3667         $style_filename = substr($file_name, 0, -4) . '.style.php';
   3668         if (is_readable($style_filename)) {
   3669             //Clear any style_data that could have been set before ...
   3670             if (isset($style_data)) {
   3671                 unset($style_data);
   3672             }
   3673 
   3674             //Read the Style Information from the style file
   3675             include $style_filename;
   3676 
   3677             //Apply the new styles to our current language styles
   3678             if (isset($style_data) && is_array($style_data)) {
   3679                 $this->language_data['STYLES'] =
   3680                     $this->merge_arrays($this->language_data['STYLES'], $style_data);
   3681             }
   3682         }
   3683     }
   3684 
   3685     /**
   3686      * Takes the parsed code and various options, and creates the HTML
   3687      * surrounding it to make it look nice.
   3688      *
   3689      * @param  string The code already parsed (reference!)
   3690      * @since  1.0.0
   3691      * @access private
   3692      */
   3693     function finalise(&$parsed_code) {
   3694         // Remove end parts of important declarations
   3695         // This is BUGGY!! My fault for bad code: fix coming in 1.2
   3696         // @todo Remove this crap
   3697         if ($this->enable_important_blocks &&
   3698             (strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT)) === false)) {
   3699             $parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT), '', $parsed_code);
   3700         }
   3701 
   3702         // Add HTML whitespace stuff if we're using the <div> header
   3703         if ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) {
   3704             $this->indent($parsed_code);
   3705         }
   3706 
   3707         // purge some unnecessary stuff
   3708         /** NOTE: memorypeak #1 */
   3709         $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code);
   3710 
   3711         // If we are using IDs for line numbers, there needs to be an overall
   3712         // ID set to prevent collisions.
   3713         if ($this->add_ids && !$this->overall_id) {
   3714             $this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4);
   3715         }
   3716 
   3717         // Get code into lines
   3718         /** NOTE: memorypeak #2 */
   3719         $code = explode("\n", $parsed_code);
   3720         $parsed_code = $this->header();
   3721 
   3722         // If we're using line numbers, we insert <li>s and appropriate
   3723         // markup to style them (otherwise we don't need to do anything)
   3724         if ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
   3725             // If we're using the <pre> header, we shouldn't add newlines because
   3726             // the <pre> will line-break them (and the <li>s already do this for us)
   3727             $ls = ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) ? "\n" : '';
   3728 
   3729             // Set vars to defaults for following loop
   3730             $i = 0;
   3731 
   3732             // Foreach line...
   3733             for ($i = 0, $n = count($code); $i < $n;) {
   3734                 //Reset the attributes for a new line ...
   3735                 $attrs = array();
   3736 
   3737                 // Make lines have at least one space in them if they're empty
   3738                 // BenBE: Checking emptiness using trim instead of relying on blanks
   3739                 if ('' == trim($code[$i])) {
   3740                     $code[$i] = '&nbsp;';
   3741                 }
   3742 
   3743                 // If this is a "special line"...
   3744                 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
   3745                     $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
   3746                     // Set the attributes to style the line
   3747                     if ($this->use_classes) {
   3748                         //$attr = ' class="li2"';
   3749                         $attrs['class'][] = 'li2';
   3750                         $def_attr = ' class="de2"';
   3751                     } else {
   3752                         //$attr = ' style="' . $this->line_style2 . '"';
   3753                         $attrs['style'][] = $this->line_style2;
   3754                         // This style "covers up" the special styles set for special lines
   3755                         // so that styles applied to special lines don't apply to the actual
   3756                         // code on that line
   3757                         $def_attr = ' style="' . $this->code_style . '"';
   3758                     }
   3759                 } else {
   3760                     if ($this->use_classes) {
   3761                         //$attr = ' class="li1"';
   3762                         $attrs['class'][] = 'li1';
   3763                         $def_attr = ' class="de1"';
   3764                     } else {
   3765                         //$attr = ' style="' . $this->line_style1 . '"';
   3766                         $attrs['style'][] = $this->line_style1;
   3767                         $def_attr = ' style="' . $this->code_style . '"';
   3768                     }
   3769                 }
   3770 
   3771                 //Check which type of tag to insert for this line
   3772                 if ($this->header_type == GESHI_HEADER_PRE_VALID) {
   3773                     $start = "<pre$def_attr>";
   3774                     $end = '</pre>';
   3775                 } else {
   3776                     // Span or div?
   3777                     $start = "<div$def_attr>";
   3778                     $end = '</div>';
   3779                 }
   3780 
   3781                 ++$i;
   3782 
   3783                 // Are we supposed to use ids? If so, add them
   3784                 if ($this->add_ids) {
   3785                     $attrs['id'][] = "$this->overall_id-$i";
   3786                 }
   3787 
   3788                 //Is this some line with extra styles???
   3789                 if (in_array($i, $this->highlight_extra_lines)) {
   3790                     if ($this->use_classes) {
   3791                         if (isset($this->highlight_extra_lines_styles[$i])) {
   3792                             $attrs['class'][] = "lx$i";
   3793                         } else {
   3794                             $attrs['class'][] = "ln-xtra";
   3795                         }
   3796                     } else {
   3797                         array_push($attrs['style'], $this->get_line_style($i));
   3798                     }
   3799                 }
   3800 
   3801                 // Add in the line surrounded by appropriate list HTML
   3802                 $attr_string = '';
   3803                 foreach ($attrs as $key => $attr) {
   3804                     $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"';
   3805                 }
   3806 
   3807                 $parsed_code .= "<li$attr_string>$start{$code[$i-1]}$end</li>$ls";
   3808                 unset($code[$i - 1]);
   3809             }
   3810         } else {
   3811             $n = count($code);
   3812             if ($this->use_classes) {
   3813                 $attributes = ' class="de1"';
   3814             } else {
   3815                 $attributes = ' style="'. $this->code_style .'"';
   3816             }
   3817             if ($this->header_type == GESHI_HEADER_PRE_VALID) {
   3818                 $parsed_code .= '<pre'. $attributes .'>';
   3819             } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
   3820                 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
   3821                     if ($this->use_classes) {
   3822                         $attrs = ' class="ln"';
   3823                     } else {
   3824                         $attrs = ' style="'. $this->table_linenumber_style .'"';
   3825                     }
   3826                     $parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>';
   3827                     // get linenumbers
   3828                     // we don't merge it with the for below, since it should be better for
   3829                     // memory consumption this way
   3830                     // @todo: but... actually it would still be somewhat nice to merge the two loops
   3831                     //        the mem peaks are at different positions
   3832                     for ($i = 0; $i < $n; ++$i) {
   3833                         $close = 0;
   3834                         // fancy lines
   3835                         if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
   3836                             $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
   3837                             // Set the attributes to style the line
   3838                             if ($this->use_classes) {
   3839                                 $parsed_code .= '<span class="xtra li2"><span class="de2">';
   3840                             } else {
   3841                                 // This style "covers up" the special styles set for special lines
   3842                                 // so that styles applied to special lines don't apply to the actual
   3843                                 // code on that line
   3844                                 $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
   3845                                                   .'<span style="' . $this->code_style .'">';
   3846                             }
   3847                             $close += 2;
   3848                         }
   3849                         //Is this some line with extra styles???
   3850                         if (in_array($i + 1, $this->highlight_extra_lines)) {
   3851                             if ($this->use_classes) {
   3852                                 if (isset($this->highlight_extra_lines_styles[$i])) {
   3853                                     $parsed_code .= "<span class=\"xtra lx$i\">";
   3854                                 } else {
   3855                                     $parsed_code .= "<span class=\"xtra ln-xtra\">";
   3856                                 }
   3857                             } else {
   3858                                 $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
   3859                             }
   3860                             ++$close;
   3861                         }
   3862                         $parsed_code .= $this->line_numbers_start + $i;
   3863                         if ($close) {
   3864                             $parsed_code .= str_repeat('</span>', $close);
   3865                         } else if ($i != $n) {
   3866                             $parsed_code .= "\n";
   3867                         }
   3868                     }
   3869                     $parsed_code .= '</pre></td><td'.$attributes.'>';
   3870                 }
   3871                 $parsed_code .= '<pre'. $attributes .'>';
   3872             }
   3873             // No line numbers, but still need to handle highlighting lines extra.
   3874             // Have to use divs so the full width of the code is highlighted
   3875             $close = 0;
   3876             for ($i = 0; $i < $n; ++$i) {
   3877                 // Make lines have at least one space in them if they're empty
   3878                 // BenBE: Checking emptiness using trim instead of relying on blanks
   3879                 if ('' == trim($code[$i])) {
   3880                     $code[$i] = '&nbsp;';
   3881                 }
   3882                 // fancy lines
   3883                 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
   3884                     $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
   3885                     // Set the attributes to style the line
   3886                     if ($this->use_classes) {
   3887                         $parsed_code .= '<span class="xtra li2"><span class="de2">';
   3888                     } else {
   3889                         // This style "covers up" the special styles set for special lines
   3890                         // so that styles applied to special lines don't apply to the actual
   3891                         // code on that line
   3892                         $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
   3893                                           .'<span style="' . $this->code_style .'">';
   3894                     }
   3895                     $close += 2;
   3896                 }
   3897                 //Is this some line with extra styles???
   3898                 if (in_array($i + 1, $this->highlight_extra_lines)) {
   3899                     if ($this->use_classes) {
   3900                         if (isset($this->highlight_extra_lines_styles[$i])) {
   3901                             $parsed_code .= "<span class=\"xtra lx$i\">";
   3902                         } else {
   3903                             $parsed_code .= "<span class=\"xtra ln-xtra\">";
   3904                         }
   3905                     } else {
   3906                         $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
   3907                     }
   3908                     ++$close;
   3909                 }
   3910 
   3911                 $parsed_code .= $code[$i];
   3912 
   3913                 if ($close) {
   3914                   $parsed_code .= str_repeat('</span>', $close);
   3915                   $close = 0;
   3916                 }
   3917                 elseif ($i + 1 < $n) {
   3918                     $parsed_code .= "\n";
   3919                 }
   3920                 unset($code[$i]);
   3921             }
   3922 
   3923             if ($this->header_type == GESHI_HEADER_PRE_VALID || $this->header_type == GESHI_HEADER_PRE_TABLE) {
   3924                 $parsed_code .= '</pre>';
   3925             }
   3926             if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
   3927                 $parsed_code .= '</td>';
   3928             }
   3929         }
   3930 
   3931         $parsed_code .= $this->footer();
   3932     }
   3933 
   3934     /**
   3935      * Creates the header for the code block (with correct attributes)
   3936      *
   3937      * @return string The header for the code block
   3938      * @since  1.0.0
   3939      * @access private
   3940      */
   3941     function header() {
   3942         // Get attributes needed
   3943         /**
   3944          * @todo   Document behaviour change - class is outputted regardless of whether
   3945          *         we're using classes or not. Same with style
   3946          */
   3947         $attributes = ' class="' . $this->language;
   3948         if ($this->overall_class != '') {
   3949             $attributes .= " ".$this->overall_class;
   3950         }
   3951         $attributes .= '"';
   3952 
   3953         if ($this->overall_id != '') {
   3954             $attributes .= " id=\"{$this->overall_id}\"";
   3955         }
   3956         if ($this->overall_style != '') {
   3957             $attributes .= ' style="' . $this->overall_style . '"';
   3958         }
   3959 
   3960         $ol_attributes = '';
   3961 
   3962         if ($this->line_numbers_start != 1) {
   3963             $ol_attributes .= ' start="' . $this->line_numbers_start . '"';
   3964         }
   3965 
   3966         // Get the header HTML
   3967         $header = $this->header_content;
   3968         if ($header) {
   3969             if ($this->header_type == GESHI_HEADER_PRE || $this->header_type == GESHI_HEADER_PRE_VALID) {
   3970                 $header = str_replace("\n", '', $header);
   3971             }
   3972             $header = $this->replace_keywords($header);
   3973 
   3974             if ($this->use_classes) {
   3975                 $attr = ' class="head"';
   3976             } else {
   3977                 $attr = " style=\"{$this->header_content_style}\"";
   3978             }
   3979             if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
   3980                 $header = "<thead><tr><td colspan=\"2\" $attr>$header</td></tr></thead>";
   3981             } else {
   3982                 $header = "<div$attr>$header</div>";
   3983             }
   3984         }
   3985 
   3986         if (GESHI_HEADER_NONE == $this->header_type) {
   3987             if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
   3988                 return "$header<ol$attributes$ol_attributes>";
   3989             }
   3990             return $header . ($this->force_code_block ? '<div>' : '');
   3991         }
   3992 
   3993         // Work out what to return and do it
   3994         if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
   3995             if ($this->header_type == GESHI_HEADER_PRE) {
   3996                 return "<pre$attributes>$header<ol$ol_attributes>";
   3997             } else if ($this->header_type == GESHI_HEADER_DIV ||
   3998                 $this->header_type == GESHI_HEADER_PRE_VALID) {
   3999                 return "<div$attributes>$header<ol$ol_attributes>";
   4000             } else if ($this->header_type == GESHI_HEADER_PRE_TABLE) {
   4001                 return "<table$attributes>$header<tbody><tr class=\"li1\">";
   4002             }
   4003         } else {
   4004             if ($this->header_type == GESHI_HEADER_PRE) {
   4005                 return "<pre$attributes>$header"  .
   4006                     ($this->force_code_block ? '<div>' : '');
   4007             } else {
   4008                 return "<div$attributes>$header" .
   4009                     ($this->force_code_block ? '<div>' : '');
   4010             }
   4011         }
   4012     }
   4013 
   4014     /**
   4015      * Returns the footer for the code block.
   4016      *
   4017      * @return string The footer for the code block
   4018      * @since  1.0.0
   4019      * @access private
   4020      */
   4021     function footer() {
   4022         $footer = $this->footer_content;
   4023         if ($footer) {
   4024             if ($this->header_type == GESHI_HEADER_PRE) {
   4025                 $footer = str_replace("\n", '', $footer);;
   4026             }
   4027             $footer = $this->replace_keywords($footer);
   4028 
   4029             if ($this->use_classes) {
   4030                 $attr = ' class="foot"';
   4031             } else {
   4032                 $attr = " style=\"{$this->footer_content_style}\"";
   4033             }
   4034             if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
   4035                 $footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot>";
   4036             } else {
   4037                 $footer = "<div$attr>$footer</div>";
   4038             }
   4039         }
   4040 
   4041         if (GESHI_HEADER_NONE == $this->header_type) {
   4042             return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $footer : $footer;
   4043         }
   4044 
   4045         if ($this->header_type == GESHI_HEADER_DIV || $this->header_type == GESHI_HEADER_PRE_VALID) {
   4046             if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
   4047                 return "</ol>$footer</div>";
   4048             }
   4049             return ($this->force_code_block ? '</div>' : '') .
   4050                 "$footer</div>";
   4051         }
   4052         elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
   4053             if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
   4054                 return "</tr></tbody>$footer</table>";
   4055             }
   4056             return ($this->force_code_block ? '</div>' : '') .
   4057                 "$footer</div>";
   4058         }
   4059         else {
   4060             if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
   4061                 return "</ol>$footer</pre>";
   4062             }
   4063             return ($this->force_code_block ? '</div>' : '') .
   4064                 "$footer</pre>";
   4065         }
   4066     }
   4067 
   4068     /**
   4069      * Replaces certain keywords in the header and footer with
   4070      * certain configuration values
   4071      *
   4072      * @param  string The header or footer content to do replacement on
   4073      * @return string The header or footer with replaced keywords
   4074      * @since  1.0.2
   4075      * @access private
   4076      */
   4077     function replace_keywords($instr) {
   4078         $keywords = $replacements = array();
   4079 
   4080         $keywords[] = '<TIME>';
   4081         $keywords[] = '{TIME}';
   4082         $replacements[] = $replacements[] = number_format($time = $this->get_time(), 3);
   4083 
   4084         $keywords[] = '<LANGUAGE>';
   4085         $keywords[] = '{LANGUAGE}';
   4086         $replacements[] = $replacements[] = $this->language_data['LANG_NAME'];
   4087 
   4088         $keywords[] = '<VERSION>';
   4089         $keywords[] = '{VERSION}';
   4090         $replacements[] = $replacements[] = GESHI_VERSION;
   4091 
   4092         $keywords[] = '<SPEED>';
   4093         $keywords[] = '{SPEED}';
   4094         if ($time <= 0) {
   4095             $speed = 'N/A';
   4096         } else {
   4097             $speed = strlen($this->source) / $time;
   4098             if ($speed >= 1024) {
   4099                 $speed = sprintf("%.2f KB/s", $speed / 1024.0);
   4100             } else {
   4101                 $speed = sprintf("%.0f B/s", $speed);
   4102             }
   4103         }
   4104         $replacements[] = $replacements[] = $speed;
   4105 
   4106         return str_replace($keywords, $replacements, $instr);
   4107     }
   4108 
   4109     /**
   4110      * Secure replacement for PHP built-in function htmlspecialchars().
   4111      *
   4112      * See ticket #427 (http://wush.net/trac/wikka/ticket/427) for the rationale
   4113      * for this replacement function.
   4114      *
   4115      * The INTERFACE for this function is almost the same as that for
   4116      * htmlspecialchars(), with the same default for quote style; however, there
   4117      * is no 'charset' parameter. The reason for this is as follows:
   4118      *
   4119      * The PHP docs say:
   4120      *      "The third argument charset defines character set used in conversion."
   4121      *
   4122      * I suspect PHP's htmlspecialchars() is working at the byte-value level and
   4123      * thus _needs_ to know (or asssume) a character set because the special
   4124      * characters to be replaced could exist at different code points in
   4125      * different character sets. (If indeed htmlspecialchars() works at
   4126      * byte-value level that goes some  way towards explaining why the
   4127      * vulnerability would exist in this function, too, and not only in
   4128      * htmlentities() which certainly is working at byte-value level.)
   4129      *
   4130      * This replacement function however works at character level and should
   4131      * therefore be "immune" to character set differences - so no charset
   4132      * parameter is needed or provided. If a third parameter is passed, it will
   4133      * be silently ignored.
   4134      *
   4135      * In the OUTPUT there is a minor difference in that we use '&#39;' instead
   4136      * of PHP's '&#039;' for a single quote: this provides compatibility with
   4137      *      get_html_translation_table(HTML_SPECIALCHARS, ENT_QUOTES)
   4138      * (see comment by mikiwoz at yahoo dot co dot uk on
   4139      * http://php.net/htmlspecialchars); it also matches the entity definition
   4140      * for XML 1.0
   4141      * (http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters).
   4142      * Like PHP we use a numeric character reference instead of '&apos;' for the
   4143      * single quote. For the other special characters we use the named entity
   4144      * references, as PHP is doing.
   4145      *
   4146      * @author      {@link http://wikkawiki.org/JavaWoman Marjolein Katsma}
   4147      *
   4148      * @license     http://www.gnu.org/copyleft/lgpl.html
   4149      *              GNU Lesser General Public License
   4150      * @copyright   Copyright 2007, {@link http://wikkawiki.org/CreditsPage
   4151      *              Wikka Development Team}
   4152      *
   4153      * @access      private
   4154      * @param       string  $string string to be converted
   4155      * @param       integer $quote_style
   4156      *                      - ENT_COMPAT:   escapes &, <, > and double quote (default)
   4157      *                      - ENT_NOQUOTES: escapes only &, < and >
   4158      *                      - ENT_QUOTES:   escapes &, <, >, double and single quotes
   4159      * @return      string  converted string
   4160      * @since       1.0.7.18
   4161      */
   4162     function hsc($string, $quote_style = ENT_COMPAT) {
   4163         // init
   4164         static $aTransSpecchar = array(
   4165             '&' => '&amp;',
   4166             '"' => '&quot;',
   4167             '<' => '&lt;',
   4168             '>' => '&gt;',
   4169 
   4170             //This fix is related to SF#1923020, but has to be applied
   4171             //regardless of actually highlighting symbols.
   4172 
   4173             //Circumvent a bug with symbol highlighting
   4174             //This is required as ; would produce undesirable side-effects if it
   4175             //was not to be processed as an entity.
   4176             ';' => '<SEMI>', // Force ; to be processed as entity
   4177             '|' => '<PIPE>' // Force | to be processed as entity
   4178             );                      // ENT_COMPAT set
   4179 
   4180         switch ($quote_style) {
   4181             case ENT_NOQUOTES: // don't convert double quotes
   4182                 unset($aTransSpecchar['"']);
   4183                 break;
   4184             case ENT_QUOTES: // convert single quotes as well
   4185                 $aTransSpecchar["'"] = '&#39;'; // (apos) htmlspecialchars() uses '&#039;'
   4186                 break;
   4187         }
   4188 
   4189         // return translated string
   4190         return strtr($string, $aTransSpecchar);
   4191     }
   4192 
   4193     /**
   4194      * Returns a stylesheet for the highlighted code. If $economy mode
   4195      * is true, we only return the stylesheet declarations that matter for
   4196      * this code block instead of the whole thing
   4197      *
   4198      * @param  boolean Whether to use economy mode or not
   4199      * @return string A stylesheet built on the data for the current language
   4200      * @since  1.0.0
   4201      */
   4202     function get_stylesheet($economy_mode = true) {
   4203         // If there's an error, chances are that the language file
   4204         // won't have populated the language data file, so we can't
   4205         // risk getting a stylesheet...
   4206         if ($this->error) {
   4207             return '';
   4208         }
   4209 
   4210         //Check if the style rearrangements have been processed ...
   4211         //This also does some preprocessing to check which style groups are useable ...
   4212         if(!isset($this->language_data['NUMBERS_CACHE'])) {
   4213             $this->build_style_cache();
   4214         }
   4215 
   4216         // First, work out what the selector should be. If there's an ID,
   4217         // that should be used, the same for a class. Otherwise, a selector
   4218         // of '' means that these styles will be applied anywhere
   4219         if ($this->overall_id) {
   4220             $selector = '#' . $this->overall_id;
   4221         } else {
   4222             $selector = '.' . $this->language;
   4223             if ($this->overall_class) {
   4224                 $selector .= '.' . $this->overall_class;
   4225             }
   4226         }
   4227         $selector .= ' ';
   4228 
   4229         // Header of the stylesheet
   4230         if (!$economy_mode) {
   4231             $stylesheet = "/**\n".
   4232                 " * GeSHi Dynamically Generated Stylesheet\n".
   4233                 " * --------------------------------------\n".
   4234                 " * Dynamically generated stylesheet for {$this->language}\n".
   4235                 " * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n".
   4236                 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
   4237                 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
   4238                 " * --------------------------------------\n".
   4239                 " */\n";
   4240         } else {
   4241             $stylesheet = "/**\n".
   4242                 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
   4243                 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
   4244                 " */\n";
   4245         }
   4246 
   4247         // Set the <ol> to have no effect at all if there are line numbers
   4248         // (<ol>s have margins that should be destroyed so all layout is
   4249         // controlled by the set_overall_style method, which works on the
   4250         // <pre> or <div> container). Additionally, set default styles for lines
   4251         if (!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
   4252             //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n";
   4253             $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n";
   4254         }
   4255 
   4256         // Add overall styles
   4257         // note: neglect economy_mode, empty styles are meaningless
   4258         if ($this->overall_style != '') {
   4259             $stylesheet .= "$selector {{$this->overall_style}}\n";
   4260         }
   4261 
   4262         // Add styles for links
   4263         // note: economy mode does not make _any_ sense here
   4264         //       either the style is empty and thus no selector is needed
   4265         //       or the appropriate key is given.
   4266         foreach ($this->link_styles as $key => $style) {
   4267             if ($style != '') {
   4268                 switch ($key) {
   4269                     case GESHI_LINK:
   4270                         $stylesheet .= "{$selector}a:link {{$style}}\n";
   4271                         break;
   4272                     case GESHI_HOVER:
   4273                         $stylesheet .= "{$selector}a:hover {{$style}}\n";
   4274                         break;
   4275                     case GESHI_ACTIVE:
   4276                         $stylesheet .= "{$selector}a:active {{$style}}\n";
   4277                         break;
   4278                     case GESHI_VISITED:
   4279                         $stylesheet .= "{$selector}a:visited {{$style}}\n";
   4280                         break;
   4281                 }
   4282             }
   4283         }
   4284 
   4285         // Header and footer
   4286         // note: neglect economy_mode, empty styles are meaningless
   4287         if ($this->header_content_style != '') {
   4288             $stylesheet .= "$selector.head {{$this->header_content_style}}\n";
   4289         }
   4290         if ($this->footer_content_style != '') {
   4291             $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n";
   4292         }
   4293 
   4294         // Styles for important stuff
   4295         // note: neglect economy_mode, empty styles are meaningless
   4296         if ($this->important_styles != '') {
   4297             $stylesheet .= "$selector.imp {{$this->important_styles}}\n";
   4298         }
   4299 
   4300         // Simple line number styles
   4301         if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->line_style1 != '') {
   4302             $stylesheet .= "{$selector}li, {$selector}.li1 {{$this->line_style1}}\n";
   4303         }
   4304         if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->table_linenumber_style != '') {
   4305             $stylesheet .= "{$selector}.ln {{$this->table_linenumber_style}}\n";
   4306         }
   4307         // If there is a style set for fancy line numbers, echo it out
   4308         if ((!$economy_mode || $this->line_numbers == GESHI_FANCY_LINE_NUMBERS) && $this->line_style2 != '') {
   4309             $stylesheet .= "{$selector}.li2 {{$this->line_style2}}\n";
   4310         }
   4311 
   4312         // note: empty styles are meaningless
   4313         foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles) {
   4314             if ($styles != '' && (!$economy_mode ||
   4315                 (isset($this->lexic_permissions['KEYWORDS'][$group]) &&
   4316                 $this->lexic_permissions['KEYWORDS'][$group]))) {
   4317                 $stylesheet .= "$selector.kw$group {{$styles}}\n";
   4318             }
   4319         }
   4320         foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles) {
   4321             if ($styles != '' && (!$economy_mode ||
   4322                 (isset($this->lexic_permissions['COMMENTS'][$group]) &&
   4323                 $this->lexic_permissions['COMMENTS'][$group]) ||
   4324                 (!empty($this->language_data['COMMENT_REGEXP']) &&
   4325                 !empty($this->language_data['COMMENT_REGEXP'][$group])))) {
   4326                 $stylesheet .= "$selector.co$group {{$styles}}\n";
   4327             }
   4328         }
   4329         foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) {
   4330             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ESCAPE_CHAR'])) {
   4331                 // NEW: since 1.0.8 we have to handle hardescapes
   4332                 if ($group === 'HARD') {
   4333                     $group = '_h';
   4334                 }
   4335                 $stylesheet .= "$selector.es$group {{$styles}}\n";
   4336             }
   4337         }
   4338         foreach ($this->language_data['STYLES']['BRACKETS'] as $group => $styles) {
   4339             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['BRACKETS'])) {
   4340                 $stylesheet .= "$selector.br$group {{$styles}}\n";
   4341             }
   4342         }
   4343         foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles) {
   4344             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['SYMBOLS'])) {
   4345                 $stylesheet .= "$selector.sy$group {{$styles}}\n";
   4346             }
   4347         }
   4348         foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles) {
   4349             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['STRINGS'])) {
   4350                 // NEW: since 1.0.8 we have to handle hardquotes
   4351                 if ($group === 'HARD') {
   4352                     $group = '_h';
   4353                 }
   4354                 $stylesheet .= "$selector.st$group {{$styles}}\n";
   4355             }
   4356         }
   4357         foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles) {
   4358             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['NUMBERS'])) {
   4359                 $stylesheet .= "$selector.nu$group {{$styles}}\n";
   4360             }
   4361         }
   4362         foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles) {
   4363             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['METHODS'])) {
   4364                 $stylesheet .= "$selector.me$group {{$styles}}\n";
   4365             }
   4366         }
   4367         // note: neglect economy_mode, empty styles are meaningless
   4368         foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles) {
   4369             if ($styles != '') {
   4370                 $stylesheet .= "$selector.sc$group {{$styles}}\n";
   4371             }
   4372         }
   4373         foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles) {
   4374             if ($styles != '' && (!$economy_mode ||
   4375                 (isset($this->lexic_permissions['REGEXPS'][$group]) &&
   4376                 $this->lexic_permissions['REGEXPS'][$group]))) {
   4377                 if (is_array($this->language_data['REGEXPS'][$group]) &&
   4378                     array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$group])) {
   4379                     $stylesheet .= "$selector.";
   4380                     $stylesheet .= $this->language_data['REGEXPS'][$group][GESHI_CLASS];
   4381                     $stylesheet .= " {{$styles}}\n";
   4382                 } else {
   4383                     $stylesheet .= "$selector.re$group {{$styles}}\n";
   4384                 }
   4385             }
   4386         }
   4387         // Styles for lines being highlighted extra
   4388         if (!$economy_mode || (count($this->highlight_extra_lines)!=count($this->highlight_extra_lines_styles))) {
   4389             $stylesheet .= "{$selector}.ln-xtra, {$selector}li.ln-xtra, {$selector}div.ln-xtra {{$this->highlight_extra_lines_style}}\n";
   4390         }
   4391         $stylesheet .= "{$selector}span.xtra { display:block; }\n";
   4392         foreach ($this->highlight_extra_lines_styles as $lineid => $linestyle) {
   4393             $stylesheet .= "{$selector}.lx$lineid, {$selector}li.lx$lineid, {$selector}div.lx$lineid {{$linestyle}}\n";
   4394         }
   4395 
   4396         return $stylesheet;
   4397     }
   4398 
   4399     /**
   4400      * Get's the style that is used for the specified line
   4401      *
   4402      * @param int The line number information is requested for
   4403      * @access private
   4404      * @since 1.0.7.21
   4405      */
   4406     function get_line_style($line) {
   4407         //$style = null;
   4408         $style = null;
   4409         if (isset($this->highlight_extra_lines_styles[$line])) {
   4410             $style = $this->highlight_extra_lines_styles[$line];
   4411         } else { // if no "extra" style assigned
   4412             $style = $this->highlight_extra_lines_style;
   4413         }
   4414 
   4415         return $style;
   4416     }
   4417 
   4418     /**
   4419     * this functions creates an optimized regular expression list
   4420     * of an array of strings.
   4421     *
   4422     * Example:
   4423     * <code>$list = array('faa', 'foo', 'foobar');
   4424     *          => string 'f(aa|oo(bar)?)'</code>
   4425     *
   4426     * @param $list array of (unquoted) strings
   4427     * @param $regexp_delimiter your regular expression delimiter, @see preg_quote()
   4428     * @return string for regular expression
   4429     * @author Milian Wolff <mail (at) milianw.de>
   4430     * @since 1.0.8
   4431     * @access private
   4432     */
   4433     function optimize_regexp_list($list, $regexp_delimiter = '/') {
   4434         $regex_chars = array('.', '\\', '+', '*', '?', '[', '^', ']', '$',
   4435             '(', ')', '{', '}', '=', '!', '<', '>', '|', ':', $regexp_delimiter);
   4436         sort($list);
   4437         $regexp_list = array('');
   4438         $num_subpatterns = 0;
   4439         $list_key = 0;
   4440 
   4441         // the tokens which we will use to generate the regexp list
   4442         $tokens = array();
   4443         $prev_keys = array();
   4444         // go through all entries of the list and generate the token list
   4445         $cur_len = 0;
   4446         for ($i = 0, $i_max = count($list); $i < $i_max; ++$i) {
   4447             if ($cur_len > GESHI_MAX_PCRE_LENGTH) {
   4448                 // seems like the length of this pcre is growing exorbitantly
   4449                 $regexp_list[++$list_key] = $this->_optimize_regexp_list_tokens_to_string($tokens);
   4450                 $num_subpatterns = substr_count($regexp_list[$list_key], '(?:');
   4451                 $tokens = array();
   4452                 $cur_len = 0;
   4453             }
   4454             $level = 0;
   4455             $entry = preg_quote((string) $list[$i], $regexp_delimiter);
   4456             $pointer = &$tokens;
   4457             // properly assign the new entry to the correct position in the token array
   4458             // possibly generate smaller common denominator keys
   4459             while (true) {
   4460                 // get the common denominator
   4461                 if (isset($prev_keys[$level])) {
   4462                     if ($prev_keys[$level] == $entry) {
   4463                         // this is a duplicate entry, skip it
   4464                         continue 2;
   4465                     }
   4466                     $char = 0;
   4467                     while (isset($entry[$char]) && isset($prev_keys[$level][$char])
   4468                             && $entry[$char] == $prev_keys[$level][$char]) {
   4469                         ++$char;
   4470                     }
   4471                     if ($char > 0) {
   4472                         // this entry has at least some chars in common with the current key
   4473                         if ($char == strlen($prev_keys[$level])) {
   4474                             // current key is totally matched, i.e. this entry has just some bits appended
   4475                             $pointer = &$pointer[$prev_keys[$level]];
   4476                         } else {
   4477                             // only part of the keys match
   4478                             $new_key_part1 = substr($prev_keys[$level], 0, $char);
   4479                             $new_key_part2 = substr($prev_keys[$level], $char);
   4480 
   4481                             if (in_array($new_key_part1[0], $regex_chars)
   4482                                 || in_array($new_key_part2[0], $regex_chars)) {
   4483                                 // this is bad, a regex char as first character
   4484                                 $pointer[$entry] = array('' => true);
   4485                                 array_splice($prev_keys, $level, count($prev_keys), $entry);
   4486                                 $cur_len += strlen($entry);
   4487                                 continue;
   4488                             } else {
   4489                                 // relocate previous tokens
   4490                                 $pointer[$new_key_part1] = array($new_key_part2 => $pointer[$prev_keys[$level]]);
   4491                                 unset($pointer[$prev_keys[$level]]);
   4492                                 $pointer = &$pointer[$new_key_part1];
   4493                                 // recreate key index
   4494                                 array_splice($prev_keys, $level, count($prev_keys), array($new_key_part1, $new_key_part2));
   4495                                 $cur_len += strlen($new_key_part2);
   4496                             }
   4497                         }
   4498                         ++$level;
   4499                         $entry = substr($entry, $char);
   4500                         continue;
   4501                     }
   4502                     // else: fall trough, i.e. no common denominator was found
   4503                 }
   4504                 if ($level == 0 && !empty($tokens)) {
   4505                     // we can dump current tokens into the string and throw them away afterwards
   4506                     $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
   4507                     $new_subpatterns = substr_count($new_entry, '(?:');
   4508                     if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + $new_subpatterns > GESHI_MAX_PCRE_SUBPATTERNS) {
   4509                         $regexp_list[++$list_key] = $new_entry;
   4510                         $num_subpatterns = $new_subpatterns;
   4511                     } else {
   4512                         if (!empty($regexp_list[$list_key])) {
   4513                             $new_entry = '|' . $new_entry;
   4514                         }
   4515                         $regexp_list[$list_key] .= $new_entry;
   4516                         $num_subpatterns += $new_subpatterns;
   4517                     }
   4518                     $tokens = array();
   4519                     $cur_len = 0;
   4520                 }
   4521                 // no further common denominator found
   4522                 $pointer[$entry] = array('' => true);
   4523                 array_splice($prev_keys, $level, count($prev_keys), $entry);
   4524 
   4525                 $cur_len += strlen($entry);
   4526                 break;
   4527             }
   4528             unset($list[$i]);
   4529         }
   4530         // make sure the last tokens get converted as well
   4531         $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
   4532         if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + substr_count($new_entry, '(?:') > GESHI_MAX_PCRE_SUBPATTERNS) {
   4533             $regexp_list[++$list_key] = $new_entry;
   4534         } else {
   4535             if (!empty($regexp_list[$list_key])) {
   4536                 $new_entry = '|' . $new_entry;
   4537             }
   4538             $regexp_list[$list_key] .= $new_entry;
   4539         }
   4540         return $regexp_list;
   4541     }
   4542     /**
   4543     * this function creates the appropriate regexp string of an token array
   4544     * you should not call this function directly, @see $this->optimize_regexp_list().
   4545     *
   4546     * @param &$tokens array of tokens
   4547     * @param $recursed bool to know wether we recursed or not
   4548     * @return string
   4549     * @author Milian Wolff <mail (at) milianw.de>
   4550     * @since 1.0.8
   4551     * @access private
   4552     */
   4553     function _optimize_regexp_list_tokens_to_string(&$tokens, $recursed = false) {
   4554         $list = '';
   4555         foreach ($tokens as $token => $sub_tokens) {
   4556             $list .= $token;
   4557             $close_entry = isset($sub_tokens['']);
   4558             unset($sub_tokens['']);
   4559             if (!empty($sub_tokens)) {
   4560                 $list .= '(?:' . $this->_optimize_regexp_list_tokens_to_string($sub_tokens, true) . ')';
   4561                 if ($close_entry) {
   4562                     // make sub_tokens optional
   4563                     $list .= '?';
   4564                 }
   4565             }
   4566             $list .= '|';
   4567         }
   4568         if (!$recursed) {
   4569             // do some optimizations
   4570             // common trailing strings
   4571             // BUGGY!
   4572             //$list = preg_replace_callback('#(?<=^|\:|\|)\w+?(\w+)(?:\|.+\1)+(?=\|)#', create_function(
   4573             //    '$matches', 'return "(?:" . preg_replace("#" . preg_quote($matches[1], "#") . "(?=\||$)#", "", $matches[0]) . ")" . $matches[1];'), $list);
   4574             // (?:p)? => p?
   4575             $list = preg_replace('#\(\?\:(.)\)\?#', '\1?', $list);
   4576             // (?:a|b|c|d|...)? => [abcd...]?
   4577             // TODO: a|bb|c => [ac]|bb
   4578             static $callback_2;
   4579             if (!isset($callback_2)) {
   4580                 $callback_2 = create_function('$matches', 'return "[" . str_replace("|", "", $matches[1]) . "]";');
   4581             }
   4582             $list = preg_replace_callback('#\(\?\:((?:.\|)+.)\)#', $callback_2, $list);
   4583         }
   4584         // return $list without trailing pipe
   4585         return substr($list, 0, -1);
   4586     }
   4587 } // End Class GeSHi
   4588 
   4589 
   4590 if (!function_exists('geshi_highlight')) {
   4591     /**
   4592      * Easy way to highlight stuff. Behaves just like highlight_string
   4593      *
   4594      * @param string The code to highlight
   4595      * @param string The language to highlight the code in
   4596      * @param string The path to the language files. You can leave this blank if you need
   4597      *               as from version 1.0.7 the path should be automatically detected
   4598      * @param boolean Whether to return the result or to echo
   4599      * @return string The code highlighted (if $return is true)
   4600      * @since 1.0.2
   4601      */
   4602     function geshi_highlight($string, $language, $path = null, $return = false) {
   4603         $geshi = new GeSHi($string, $language, $path);
   4604         $geshi->set_header_type(GESHI_HEADER_NONE);
   4605 
   4606         if ($return) {
   4607             return '<code>' . $geshi->parse_code() . '</code>';
   4608         }
   4609 
   4610         echo '<code>' . $geshi->parse_code() . '</code>';
   4611 
   4612         if ($geshi->error()) {
   4613             return false;
   4614         }
   4615         return true;
   4616     }
   4617 }
   4618 
   4619 ?>