"; private $options; private $modeBuffer = ""; private $result = ""; private $top = null; private $language = null; private $keywordCount = 0; private $relevance = 0; private $ignoreIllegals = false; private static $classMap = array(); private static $languages = null; private static $aliases = null; private $autodetectSet = array( "xml", "json", "javascript", "css", "php", "http", ); public function __construct() { $this->options = array( 'classPrefix' => 'hljs-', 'tabReplace' => null, 'useBR' => false, 'languages' => null, ); self::registerLanguages(); } private static function registerLanguages() { // Languages that take precedence in the classMap array. $languagePath = __DIR__ . DIRECTORY_SEPARATOR . "languages" . DIRECTORY_SEPARATOR; foreach (array("xml", "django", "javascript", "matlab", "cpp") as $languageId) { $filePath = $languagePath . $languageId . ".json"; if (is_readable($filePath)) { self::registerLanguage($languageId, $filePath); } } $d = @dir($languagePath); if ($d) { while (($entry = $d->read()) !== false) { if (substr($entry, -5) === ".json") { $languageId = substr($entry, 0, -5); $filePath = $languagePath . $entry; if (is_readable($filePath)) { self::registerLanguage($languageId, $filePath); } } } $d->close(); } self::$languages = array_keys(self::$classMap); } /** * Register a language definition with the Highlighter's internal language * storage. Languages are stored in a static variable, so they'll be available * across all instances. You only need to register a language once. * * @param string $languageId The unique name of a language * @param string $filePath The file path to the language definition * @param bool $overwrite Overwrite language if it already exists * * @return Language The object containing the definition for a language's markup */ public static function registerLanguage($languageId, $filePath, $overwrite = false) { if (!isset(self::$classMap[$languageId]) || $overwrite) { $lang = new Language($languageId, $filePath); self::$classMap[$languageId] = $lang; if (isset($lang->mode->aliases)) { foreach ($lang->mode->aliases as $alias) { self::$aliases[$alias] = $languageId; } } } return self::$classMap[$languageId]; } private function testRe($re, $lexeme) { if (!$re) { return false; } $test = preg_match($re, $lexeme, $match, PREG_OFFSET_CAPTURE); if ($test === false) { throw new \Exception("Invalid regexp: " . var_export($re, true)); } return count($match) && ($match[0][1] == 0); } private function escapeRe($value) { return sprintf('/%s/m', preg_quote($value)); } private function subMode($lexeme, $mode) { for ($i = 0; $i < count($mode->contains); ++$i) { if ($this->testRe($mode->contains[$i]->beginRe, $lexeme)) { if ($mode->contains[$i]->endSameAsBegin) { $matches = array(); preg_match($mode->contains[$i]->beginRe, $lexeme, $matches); $mode->contains[$i]->endRe = $this->escapeRe($matches[0]); } return $mode->contains[$i]; } } } private function endOfMode($mode, $lexeme) { if ($this->testRe($mode->endRe, $lexeme)) { while ($mode->endsParent && $mode->parent) { $mode = $mode->parent; } return $mode; } if ($mode->endsWithParent) { return $this->endOfMode($mode->parent, $lexeme); } } private function isIllegal($lexeme, $mode) { return !$this->ignoreIllegals && $this->testRe($mode->illegalRe, $lexeme); } private function keywordMatch($mode, $match) { $kwd = $this->language->caseInsensitive ? mb_strtolower($match[0], "UTF-8") : $match[0]; return isset($mode->keywords[$kwd]) ? $mode->keywords[$kwd] : null; } private function buildSpan($classname, $insideSpan, $leaveOpen = false, $noPrefix = false) { $classPrefix = $noPrefix ? "" : $this->options['classPrefix']; $openSpan = ""; if (!$classname) { return $insideSpan; } return $openSpan . $insideSpan . $closeSpan; } private function escape($value) { return htmlspecialchars($value, ENT_NOQUOTES); } private function processKeywords() { if (empty($this->top->keywords)) { return $this->escape($this->modeBuffer); } $result = ""; $lastIndex = 0; /* TODO: when using the crystal language file on django and twigs code * the values of $this->top->lexemesRe can become "" (empty). Check * if this behaviour is consistent with highlight.js. */ if ($this->top->lexemesRe) { while (preg_match($this->top->lexemesRe, $this->modeBuffer, $match, PREG_OFFSET_CAPTURE, $lastIndex)) { $result .= $this->escape(substr($this->modeBuffer, $lastIndex, $match[0][1] - $lastIndex)); $keyword_match = $this->keywordMatch($this->top, $match[0]); if ($keyword_match) { $this->relevance += $keyword_match[1]; $result .= $this->buildSpan($keyword_match[0], $this->escape($match[0][0])); } else { $result .= $this->escape($match[0][0]); } $lastIndex = strlen($match[0][0]) + $match[0][1]; } } return $result . $this->escape(substr($this->modeBuffer, $lastIndex)); } private function processSubLanguage() { try { $hl = new Highlighter(); $hl->setAutodetectLanguages($this->autodetectSet); $explicit = is_string($this->top->subLanguage); if ($explicit && !in_array($this->top->subLanguage, self::$languages)) { return $this->escape($this->modeBuffer); } if ($explicit) { $res = $hl->highlight( $this->top->subLanguage, $this->modeBuffer, true, isset($this->continuations[$this->top->subLanguage]) ? $this->continuations[$this->top->subLanguage] : null ); } else { $res = $hl->highlightAuto( $this->modeBuffer, count($this->top->subLanguage) ? $this->top->subLanguage : null ); } // Counting embedded language score towards the host language may // be disabled with zeroing the containing mode relevance. Usecase // in point is Markdown that allows XML everywhere and makes every // XML snippet to have a much larger Markdown score. if ($this->top->relevance > 0) { $this->relevance += $res->relevance; } if ($explicit) { $this->continuations[$this->top->subLanguage] = $res->top; } return $this->buildSpan($res->language, $res->value, false, true); } catch (\Exception $e) { error_log("TODO, is this a relevant catch?"); error_log($e); return $this->escape($this->modeBuffer); } } private function processBuffer() { if (is_object($this->top) && $this->top->subLanguage) { $this->result .= $this->processSubLanguage(); } else { $this->result .= $this->processKeywords(); } $this->modeBuffer = ''; } private function startNewMode($mode) { $this->result .= $mode->className ? $this->buildSpan($mode->className, "", true) : ""; $t = clone $mode; $t->parent = $this->top; $this->top = $t; } private function processLexeme($buffer, $lexeme = null) { $this->modeBuffer .= $buffer; if ($lexeme === null) { $this->processBuffer(); return 0; } $new_mode = $this->subMode($lexeme, $this->top); if ($new_mode) { if ($new_mode->skip) { $this->modeBuffer .= $lexeme; } else { if ($new_mode->excludeBegin) { $this->modeBuffer .= $lexeme; } $this->processBuffer(); if (!$new_mode->returnBegin && !$new_mode->excludeBegin) { $this->modeBuffer = $lexeme; } } $this->startNewMode($new_mode, $lexeme); return $new_mode->returnBegin ? 0 : strlen($lexeme); } $end_mode = $this->endOfMode($this->top, $lexeme); if ($end_mode) { $origin = $this->top; if ($origin->skip) { $this->modeBuffer .= $lexeme; } else { if (!($origin->returnEnd || $origin->excludeEnd)) { $this->modeBuffer .= $lexeme; } $this->processBuffer(); if ($origin->excludeEnd) { $this->modeBuffer = $lexeme; } } do { if ($this->top->className) { $this->result .= self::SPAN_END_TAG; } if (!$this->top->skip && !$this->top->subLanguage) { $this->relevance += $this->top->relevance; } $this->top = $this->top->parent; } while ($this->top != $end_mode->parent); if ($end_mode->starts) { if ($end_mode->endSameAsBegin) { $end_mode->starts->endRe = $end_mode->endRe; } $this->startNewMode($end_mode->starts, ""); } return $origin->returnEnd ? 0 : strlen($lexeme); } if ($this->isIllegal($lexeme, $this->top)) { $className = $this->top->className ? $this->top->className : "unnamed"; $err = "Illegal lexeme \"{$lexeme}\" for mode \"{$className}\""; throw new \Exception($err); } // Parser should not reach this point as all types of lexemes should // be caught earlier, but if it does due to some bug make sure it // advances at least one character forward to prevent infinite looping. $this->modeBuffer .= $lexeme; $l = strlen($lexeme); return $l ? $l : 1; } /** * Replace tabs for something more usable. */ private function replaceTabs($code) { if ($this->options['tabReplace'] !== null) { return str_replace("\t", $this->options['tabReplace'], $code); } return $code; } /** * Set the set of languages used for autodetection. When using * autodetection the code to highlight will be probed for every language * in this set. Limiting this set to only the languages you want to use * will greatly improve highlighting speed. * * @param array $set An array of language games to use for autodetection. This defaults * to a typical set Web development languages. */ public function setAutodetectLanguages(array $set) { $this->autodetectSet = array_unique($set); self::registerLanguages(); } /** * Get the tab replacement string. * * @return string The tab replacement string */ public function getTabReplace() { return $this->options['tabReplace']; } /** * Set the tab replacement string. This defaults to NULL: no tabs * will be replaced. * * @param string $tabReplace The tab replacement string */ public function setTabReplace($tabReplace) { $this->options['tabReplace'] = $tabReplace; } /** * Get the class prefix string. * * @return string * The class prefix string */ public function getClassPrefix() { return $this->options['classPrefix']; } /** * Set the class prefix string. * * @param string $classPrefix The class prefix string */ public function setClassPrefix($classPrefix) { $this->options['classPrefix'] = $classPrefix; } /** * @throws \DomainException if the requested language was not in this * Highlighter's language set */ private function getLanguage($name) { if (isset(self::$classMap[$name])) { return self::$classMap[$name]; } elseif (isset(self::$aliases[$name]) && isset(self::$classMap[self::$aliases[$name]])) { return self::$classMap[self::$aliases[$name]]; } throw new \DomainException("Unknown language: $name"); } /** * Determine whether or not a language definition supports auto detection. * * @param string $name Language name * * @return bool */ private function autoDetection($name) { return !$this->getLanguage($name)->disableAutodetect; } /** * Core highlighting function. Accepts a language name, or an alias, and a * string with the code to highlight. Returns an object with the following * properties: * - relevance (int) * - value (an HTML string with highlighting markup). * * @throws \DomainException if the requested language was not in this * Highlighter's language set * @throws \Exception if an invalid regex was given in a language file */ public function highlight($language, $code, $ignoreIllegals = true, $continuation = null) { $this->language = $this->getLanguage($language); $this->language->compile(); $this->top = $continuation ? $continuation : $this->language->mode; $this->continuations = array(); $this->result = ""; for ($current = $this->top; $current != $this->language->mode; $current = $current->parent) { if ($current->className) { $this->result = $this->buildSpan($current->className, '', true) . $this->result; } } $this->modeBuffer = ""; $this->relevance = 0; $this->ignoreIllegals = $ignoreIllegals; $res = new \stdClass(); $res->relevance = 0; $res->value = ""; $res->language = ""; try { $match = null; $count = 0; $index = 0; while ($this->top && $this->top->terminators) { $test = @preg_match($this->top->terminators, $code, $match, PREG_OFFSET_CAPTURE, $index); if ($test === false) { throw new \Exception("Invalid " . $this->language->name . " regExp " . var_export($this->top->terminators, true)); } elseif ($test === 0) { break; } $count = $this->processLexeme(substr($code, $index, $match[0][1] - $index), $match[0][0]); $index = $match[0][1] + $count; } $this->processLexeme(substr($code, $index)); for ($current = $this->top; isset($current->parent); $current = $current->parent) { if ($current->className) { $this->result .= self::SPAN_END_TAG; } } $res->relevance = $this->relevance; $res->value = $this->replaceTabs($this->result); $res->language = $this->language->name; $res->top = $this->top; return $res; } catch (\Exception $e) { if (strpos($e->getMessage(), "Illegal") !== false) { $res->value = $this->escape($code); return $res; } throw $e; } } /** * Highlight the given code by highlighting the given code with each * registered language and then finding the match with highest accuracy. * * @param string $code * @param string[]|null $languageSubset When set to null, this method will * attempt to highlight $code with each language (170+). Set this to * an array of languages of your choice to limit the amount of languages * to try. * * @throws \DomainException if the attempted language to check does not exist * @throws \Exception if an invalid regex was given in a language file * * @return \stdClass */ public function highlightAuto($code, $languageSubset = null) { $res = new \stdClass(); $res->relevance = 0; $res->value = $this->escape($code); $res->language = ""; $scnd = clone $res; $tmp = $languageSubset ? $languageSubset : $this->autodetectSet; foreach ($tmp as $l) { // don't fail if we run into a non-existent language try { // skip any languages that don't support auto detection if (!$this->autoDetection($l)) { continue; } $current = $this->highlight($l, $code, false); } catch (\DomainException $e) { continue; } if ($current->relevance > $scnd->relevance) { $scnd = $current; } if ($current->relevance > $res->relevance) { $scnd = $res; $res = $current; } } if ($scnd->language) { $res->secondBest = $scnd; } return $res; } /** * Return a list of all supported languages. Using this list in * setAutodetectLanguages will turn on autodetection for all supported * languages. * * @param bool $include_aliases specify whether language aliases * should be included as well * * @return string[] An array of language names */ public function listLanguages($include_aliases = false) { if ($include_aliases === true) { return array_merge(self::$languages, array_keys(self::$aliases)); } return self::$languages; } /** * Returns list of all available aliases for given language name. * * @param string $language name or alias of language to look-up * * @throws \DomainException if the requested language was not in this * Highlighter's language set * * @return string[] An array of all aliases associated with the requested * language name language. Passed-in name is included as * well. */ public function getAliasesForLanguage($language) { $language = self::getLanguage($language); if ($language->aliases === null) { return array($language->name); } return array_merge(array($language->name), $language->aliases); } }