| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643 |
- <?php
-
- /* Copyright (c)
- * - 2006-2013, Ivan Sagalaev (maniac@softwaremaniacs.org), highlight.js
- * (original author)
- * - 2013-2019, Geert Bergman (geert@scrivo.nl), highlight.php
- * - 2014 Daniel Lynge, highlight.php (contributor)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- * 3. Neither the name of "highlight.js", "highlight.php", nor the names of its
- * contributors may be used to endorse or promote products derived from this
- * software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
- namespace Highlight;
-
- class Highlighter
- {
- const SPAN_END_TAG = "</span>";
-
- private $options;
-
- private $modeBuffer = "";
- private $result = "";
- private $top = null;
- private $language = null;
- private $keywordCount = 0;
- private $relevance = 0;
- private $ignoreIllegals = false;
-
- private static $classMap = array();
- private static $languages = null;
- private static $aliases = null;
-
- private $autodetectSet = array(
- "xml", "json", "javascript", "css", "php", "http",
- );
-
- public function __construct()
- {
- $this->options = array(
- 'classPrefix' => 'hljs-',
- 'tabReplace' => null,
- 'useBR' => false,
- 'languages' => null,
- );
-
- self::registerLanguages();
- }
-
- private static function registerLanguages()
- {
- // Languages that take precedence in the classMap array.
- $languagePath = __DIR__ . DIRECTORY_SEPARATOR . "languages" . DIRECTORY_SEPARATOR;
- foreach (array("xml", "django", "javascript", "matlab", "cpp") as $languageId) {
- $filePath = $languagePath . $languageId . ".json";
- if (is_readable($filePath)) {
- self::registerLanguage($languageId, $filePath);
- }
- }
-
- $d = @dir($languagePath);
- if ($d) {
- while (($entry = $d->read()) !== false) {
- if (substr($entry, -5) === ".json") {
- $languageId = substr($entry, 0, -5);
- $filePath = $languagePath . $entry;
- if (is_readable($filePath)) {
- self::registerLanguage($languageId, $filePath);
- }
- }
- }
- $d->close();
- }
-
- self::$languages = array_keys(self::$classMap);
- }
-
- /**
- * Register a language definition with the Highlighter's internal language
- * storage. Languages are stored in a static variable, so they'll be available
- * across all instances. You only need to register a language once.
- *
- * @param string $languageId The unique name of a language
- * @param string $filePath The file path to the language definition
- * @param bool $overwrite Overwrite language if it already exists
- *
- * @return Language The object containing the definition for a language's markup
- */
- public static function registerLanguage($languageId, $filePath, $overwrite = false)
- {
- if (!isset(self::$classMap[$languageId]) || $overwrite) {
- $lang = new Language($languageId, $filePath);
- self::$classMap[$languageId] = $lang;
-
- if (isset($lang->mode->aliases)) {
- foreach ($lang->mode->aliases as $alias) {
- self::$aliases[$alias] = $languageId;
- }
- }
- }
-
- return self::$classMap[$languageId];
- }
-
- private function testRe($re, $lexeme)
- {
- if (!$re) {
- return false;
- }
- $test = preg_match($re, $lexeme, $match, PREG_OFFSET_CAPTURE);
- if ($test === false) {
- throw new \Exception("Invalid regexp: " . var_export($re, true));
- }
-
- return count($match) && ($match[0][1] == 0);
- }
-
- private function escapeRe($value)
- {
- return sprintf('/%s/m', preg_quote($value));
- }
-
- private function subMode($lexeme, $mode)
- {
- for ($i = 0; $i < count($mode->contains); ++$i) {
- if ($this->testRe($mode->contains[$i]->beginRe, $lexeme)) {
- if ($mode->contains[$i]->endSameAsBegin) {
- $matches = array();
- preg_match($mode->contains[$i]->beginRe, $lexeme, $matches);
-
- $mode->contains[$i]->endRe = $this->escapeRe($matches[0]);
- }
-
- return $mode->contains[$i];
- }
- }
- }
-
- private function endOfMode($mode, $lexeme)
- {
- if ($this->testRe($mode->endRe, $lexeme)) {
- while ($mode->endsParent && $mode->parent) {
- $mode = $mode->parent;
- }
-
- return $mode;
- }
- if ($mode->endsWithParent) {
- return $this->endOfMode($mode->parent, $lexeme);
- }
- }
-
- private function isIllegal($lexeme, $mode)
- {
- return !$this->ignoreIllegals && $this->testRe($mode->illegalRe, $lexeme);
- }
-
- private function keywordMatch($mode, $match)
- {
- $kwd = $this->language->caseInsensitive ? mb_strtolower($match[0], "UTF-8") : $match[0];
-
- return isset($mode->keywords[$kwd]) ? $mode->keywords[$kwd] : null;
- }
-
- private function buildSpan($classname, $insideSpan, $leaveOpen = false, $noPrefix = false)
- {
- $classPrefix = $noPrefix ? "" : $this->options['classPrefix'];
- $openSpan = "<span class=\"" . $classPrefix;
- $closeSpan = $leaveOpen ? "" : self::SPAN_END_TAG;
-
- $openSpan .= $classname . "\">";
-
- if (!$classname) {
- return $insideSpan;
- }
-
- return $openSpan . $insideSpan . $closeSpan;
- }
-
- private function escape($value)
- {
- return htmlspecialchars($value, ENT_NOQUOTES);
- }
-
- private function processKeywords()
- {
- if (empty($this->top->keywords)) {
- return $this->escape($this->modeBuffer);
- }
-
- $result = "";
- $lastIndex = 0;
-
- /* TODO: when using the crystal language file on django and twigs code
- * the values of $this->top->lexemesRe can become "" (empty). Check
- * if this behaviour is consistent with highlight.js.
- */
- if ($this->top->lexemesRe) {
- while (preg_match($this->top->lexemesRe, $this->modeBuffer, $match, PREG_OFFSET_CAPTURE, $lastIndex)) {
- $result .= $this->escape(substr($this->modeBuffer, $lastIndex, $match[0][1] - $lastIndex));
- $keyword_match = $this->keywordMatch($this->top, $match[0]);
-
- if ($keyword_match) {
- $this->relevance += $keyword_match[1];
- $result .= $this->buildSpan($keyword_match[0], $this->escape($match[0][0]));
- } else {
- $result .= $this->escape($match[0][0]);
- }
-
- $lastIndex = strlen($match[0][0]) + $match[0][1];
- }
- }
-
- return $result . $this->escape(substr($this->modeBuffer, $lastIndex));
- }
-
- private function processSubLanguage()
- {
- try {
- $hl = new Highlighter();
- $hl->setAutodetectLanguages($this->autodetectSet);
-
- $explicit = is_string($this->top->subLanguage);
- if ($explicit && !in_array($this->top->subLanguage, self::$languages)) {
- return $this->escape($this->modeBuffer);
- }
-
- if ($explicit) {
- $res = $hl->highlight(
- $this->top->subLanguage,
- $this->modeBuffer,
- true,
- isset($this->continuations[$this->top->subLanguage]) ? $this->continuations[$this->top->subLanguage] : null
- );
- } else {
- $res = $hl->highlightAuto(
- $this->modeBuffer,
- count($this->top->subLanguage) ? $this->top->subLanguage : null
- );
- }
- // Counting embedded language score towards the host language may
- // be disabled with zeroing the containing mode relevance. Usecase
- // in point is Markdown that allows XML everywhere and makes every
- // XML snippet to have a much larger Markdown score.
- if ($this->top->relevance > 0) {
- $this->relevance += $res->relevance;
- }
- if ($explicit) {
- $this->continuations[$this->top->subLanguage] = $res->top;
- }
-
- return $this->buildSpan($res->language, $res->value, false, true);
- } catch (\Exception $e) {
- error_log("TODO, is this a relevant catch?");
- error_log($e);
-
- return $this->escape($this->modeBuffer);
- }
- }
-
- private function processBuffer()
- {
- if (is_object($this->top) && $this->top->subLanguage) {
- $this->result .= $this->processSubLanguage();
- } else {
- $this->result .= $this->processKeywords();
- }
-
- $this->modeBuffer = '';
- }
-
- private function startNewMode($mode)
- {
- $this->result .= $mode->className ? $this->buildSpan($mode->className, "", true) : "";
-
- $t = clone $mode;
- $t->parent = $this->top;
- $this->top = $t;
- }
-
- private function processLexeme($buffer, $lexeme = null)
- {
- $this->modeBuffer .= $buffer;
-
- if ($lexeme === null) {
- $this->processBuffer();
-
- return 0;
- }
-
- $new_mode = $this->subMode($lexeme, $this->top);
- if ($new_mode) {
- if ($new_mode->skip) {
- $this->modeBuffer .= $lexeme;
- } else {
- if ($new_mode->excludeBegin) {
- $this->modeBuffer .= $lexeme;
- }
- $this->processBuffer();
- if (!$new_mode->returnBegin && !$new_mode->excludeBegin) {
- $this->modeBuffer = $lexeme;
- }
- }
- $this->startNewMode($new_mode, $lexeme);
-
- return $new_mode->returnBegin ? 0 : strlen($lexeme);
- }
-
- $end_mode = $this->endOfMode($this->top, $lexeme);
- if ($end_mode) {
- $origin = $this->top;
- if ($origin->skip) {
- $this->modeBuffer .= $lexeme;
- } else {
- if (!($origin->returnEnd || $origin->excludeEnd)) {
- $this->modeBuffer .= $lexeme;
- }
- $this->processBuffer();
- if ($origin->excludeEnd) {
- $this->modeBuffer = $lexeme;
- }
- }
- do {
- if ($this->top->className) {
- $this->result .= self::SPAN_END_TAG;
- }
- if (!$this->top->skip && !$this->top->subLanguage) {
- $this->relevance += $this->top->relevance;
- }
- $this->top = $this->top->parent;
- } while ($this->top != $end_mode->parent);
- if ($end_mode->starts) {
- if ($end_mode->endSameAsBegin) {
- $end_mode->starts->endRe = $end_mode->endRe;
- }
- $this->startNewMode($end_mode->starts, "");
- }
-
- return $origin->returnEnd ? 0 : strlen($lexeme);
- }
-
- if ($this->isIllegal($lexeme, $this->top)) {
- $className = $this->top->className ? $this->top->className : "unnamed";
- $err = "Illegal lexeme \"{$lexeme}\" for mode \"{$className}\"";
-
- throw new \Exception($err);
- }
-
- // Parser should not reach this point as all types of lexemes should
- // be caught earlier, but if it does due to some bug make sure it
- // advances at least one character forward to prevent infinite looping.
-
- $this->modeBuffer .= $lexeme;
- $l = strlen($lexeme);
-
- return $l ? $l : 1;
- }
-
- /**
- * Replace tabs for something more usable.
- */
- private function replaceTabs($code)
- {
- if ($this->options['tabReplace'] !== null) {
- return str_replace("\t", $this->options['tabReplace'], $code);
- }
-
- return $code;
- }
-
- /**
- * Set the set of languages used for autodetection. When using
- * autodetection the code to highlight will be probed for every language
- * in this set. Limiting this set to only the languages you want to use
- * will greatly improve highlighting speed.
- *
- * @param array $set An array of language games to use for autodetection. This defaults
- * to a typical set Web development languages.
- */
- public function setAutodetectLanguages(array $set)
- {
- $this->autodetectSet = array_unique($set);
- self::registerLanguages();
- }
-
- /**
- * Get the tab replacement string.
- *
- * @return string The tab replacement string
- */
- public function getTabReplace()
- {
- return $this->options['tabReplace'];
- }
-
- /**
- * Set the tab replacement string. This defaults to NULL: no tabs
- * will be replaced.
- *
- * @param string $tabReplace The tab replacement string
- */
- public function setTabReplace($tabReplace)
- {
- $this->options['tabReplace'] = $tabReplace;
- }
-
- /**
- * Get the class prefix string.
- *
- * @return string
- * The class prefix string
- */
- public function getClassPrefix()
- {
- return $this->options['classPrefix'];
- }
-
- /**
- * Set the class prefix string.
- *
- * @param string $classPrefix The class prefix string
- */
- public function setClassPrefix($classPrefix)
- {
- $this->options['classPrefix'] = $classPrefix;
- }
-
- /**
- * @throws \DomainException if the requested language was not in this
- * Highlighter's language set
- */
- private function getLanguage($name)
- {
- if (isset(self::$classMap[$name])) {
- return self::$classMap[$name];
- } elseif (isset(self::$aliases[$name]) && isset(self::$classMap[self::$aliases[$name]])) {
- return self::$classMap[self::$aliases[$name]];
- }
-
- throw new \DomainException("Unknown language: $name");
- }
-
- /**
- * Determine whether or not a language definition supports auto detection.
- *
- * @param string $name Language name
- *
- * @return bool
- */
- private function autoDetection($name)
- {
- return !$this->getLanguage($name)->disableAutodetect;
- }
-
- /**
- * Core highlighting function. Accepts a language name, or an alias, and a
- * string with the code to highlight. Returns an object with the following
- * properties:
- * - relevance (int)
- * - value (an HTML string with highlighting markup).
- *
- * @throws \DomainException if the requested language was not in this
- * Highlighter's language set
- * @throws \Exception if an invalid regex was given in a language file
- */
- public function highlight($language, $code, $ignoreIllegals = true, $continuation = null)
- {
- $this->language = $this->getLanguage($language);
- $this->language->compile();
- $this->top = $continuation ? $continuation : $this->language->mode;
- $this->continuations = array();
- $this->result = "";
-
- for ($current = $this->top; $current != $this->language->mode; $current = $current->parent) {
- if ($current->className) {
- $this->result = $this->buildSpan($current->className, '', true) . $this->result;
- }
- }
-
- $this->modeBuffer = "";
- $this->relevance = 0;
- $this->ignoreIllegals = $ignoreIllegals;
-
- $res = new \stdClass();
- $res->relevance = 0;
- $res->value = "";
- $res->language = "";
-
- try {
- $match = null;
- $count = 0;
- $index = 0;
-
- while ($this->top && $this->top->terminators) {
- $test = @preg_match($this->top->terminators, $code, $match, PREG_OFFSET_CAPTURE, $index);
- if ($test === false) {
- throw new \Exception("Invalid " . $this->language->name . " regExp " . var_export($this->top->terminators, true));
- } elseif ($test === 0) {
- break;
- }
- $count = $this->processLexeme(substr($code, $index, $match[0][1] - $index), $match[0][0]);
- $index = $match[0][1] + $count;
- }
- $this->processLexeme(substr($code, $index));
-
- for ($current = $this->top; isset($current->parent); $current = $current->parent) {
- if ($current->className) {
- $this->result .= self::SPAN_END_TAG;
- }
- }
-
- $res->relevance = $this->relevance;
- $res->value = $this->replaceTabs($this->result);
- $res->language = $this->language->name;
- $res->top = $this->top;
-
- return $res;
- } catch (\Exception $e) {
- if (strpos($e->getMessage(), "Illegal") !== false) {
- $res->value = $this->escape($code);
-
- return $res;
- }
- throw $e;
- }
- }
-
- /**
- * Highlight the given code by highlighting the given code with each
- * registered language and then finding the match with highest accuracy.
- *
- * @param string $code
- * @param string[]|null $languageSubset When set to null, this method will
- * attempt to highlight $code with each language (170+). Set this to
- * an array of languages of your choice to limit the amount of languages
- * to try.
- *
- * @throws \DomainException if the attempted language to check does not exist
- * @throws \Exception if an invalid regex was given in a language file
- *
- * @return \stdClass
- */
- public function highlightAuto($code, $languageSubset = null)
- {
- $res = new \stdClass();
- $res->relevance = 0;
- $res->value = $this->escape($code);
- $res->language = "";
- $scnd = clone $res;
-
- $tmp = $languageSubset ? $languageSubset : $this->autodetectSet;
-
- foreach ($tmp as $l) {
- // don't fail if we run into a non-existent language
- try {
- // skip any languages that don't support auto detection
- if (!$this->autoDetection($l)) {
- continue;
- }
-
- $current = $this->highlight($l, $code, false);
- } catch (\DomainException $e) {
- continue;
- }
-
- if ($current->relevance > $scnd->relevance) {
- $scnd = $current;
- }
- if ($current->relevance > $res->relevance) {
- $scnd = $res;
- $res = $current;
- }
- }
-
- if ($scnd->language) {
- $res->secondBest = $scnd;
- }
-
- return $res;
- }
-
- /**
- * Return a list of all supported languages. Using this list in
- * setAutodetectLanguages will turn on autodetection for all supported
- * languages.
- *
- * @param bool $include_aliases specify whether language aliases
- * should be included as well
- *
- * @return string[] An array of language names
- */
- public function listLanguages($include_aliases = false)
- {
- if ($include_aliases === true) {
- return array_merge(self::$languages, array_keys(self::$aliases));
- }
-
- return self::$languages;
- }
-
- /**
- * Returns list of all available aliases for given language name.
- *
- * @param string $language name or alias of language to look-up
- *
- * @throws \DomainException if the requested language was not in this
- * Highlighter's language set
- *
- * @return string[] An array of all aliases associated with the requested
- * language name language. Passed-in name is included as
- * well.
- */
- public function getAliasesForLanguage($language)
- {
- $language = self::getLanguage($language);
-
- if ($language->aliases === null) {
- return array($language->name);
- }
-
- return array_merge(array($language->name), $language->aliases);
- }
- }
|