Dashboard sipadu mbip
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

Highlighter.php 21KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643
  1. <?php
  2. /* Copyright (c)
  3. * - 2006-2013, Ivan Sagalaev (maniac@softwaremaniacs.org), highlight.js
  4. * (original author)
  5. * - 2013-2019, Geert Bergman (geert@scrivo.nl), highlight.php
  6. * - 2014 Daniel Lynge, highlight.php (contributor)
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions are met:
  10. *
  11. * 1. Redistributions of source code must retain the above copyright notice,
  12. * this list of conditions and the following disclaimer.
  13. * 2. Redistributions in binary form must reproduce the above copyright notice,
  14. * this list of conditions and the following disclaimer in the documentation
  15. * and/or other materials provided with the distribution.
  16. * 3. Neither the name of "highlight.js", "highlight.php", nor the names of its
  17. * contributors may be used to endorse or promote products derived from this
  18. * software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  21. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  24. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  25. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  26. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  27. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  29. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  30. * POSSIBILITY OF SUCH DAMAGE.
  31. */
  32. namespace Highlight;
  33. class Highlighter
  34. {
  35. const SPAN_END_TAG = "</span>";
  36. private $options;
  37. private $modeBuffer = "";
  38. private $result = "";
  39. private $top = null;
  40. private $language = null;
  41. private $keywordCount = 0;
  42. private $relevance = 0;
  43. private $ignoreIllegals = false;
  44. private static $classMap = array();
  45. private static $languages = null;
  46. private static $aliases = null;
  47. private $autodetectSet = array(
  48. "xml", "json", "javascript", "css", "php", "http",
  49. );
  50. public function __construct()
  51. {
  52. $this->options = array(
  53. 'classPrefix' => 'hljs-',
  54. 'tabReplace' => null,
  55. 'useBR' => false,
  56. 'languages' => null,
  57. );
  58. self::registerLanguages();
  59. }
  60. private static function registerLanguages()
  61. {
  62. // Languages that take precedence in the classMap array.
  63. $languagePath = __DIR__ . DIRECTORY_SEPARATOR . "languages" . DIRECTORY_SEPARATOR;
  64. foreach (array("xml", "django", "javascript", "matlab", "cpp") as $languageId) {
  65. $filePath = $languagePath . $languageId . ".json";
  66. if (is_readable($filePath)) {
  67. self::registerLanguage($languageId, $filePath);
  68. }
  69. }
  70. $d = @dir($languagePath);
  71. if ($d) {
  72. while (($entry = $d->read()) !== false) {
  73. if (substr($entry, -5) === ".json") {
  74. $languageId = substr($entry, 0, -5);
  75. $filePath = $languagePath . $entry;
  76. if (is_readable($filePath)) {
  77. self::registerLanguage($languageId, $filePath);
  78. }
  79. }
  80. }
  81. $d->close();
  82. }
  83. self::$languages = array_keys(self::$classMap);
  84. }
  85. /**
  86. * Register a language definition with the Highlighter's internal language
  87. * storage. Languages are stored in a static variable, so they'll be available
  88. * across all instances. You only need to register a language once.
  89. *
  90. * @param string $languageId The unique name of a language
  91. * @param string $filePath The file path to the language definition
  92. * @param bool $overwrite Overwrite language if it already exists
  93. *
  94. * @return Language The object containing the definition for a language's markup
  95. */
  96. public static function registerLanguage($languageId, $filePath, $overwrite = false)
  97. {
  98. if (!isset(self::$classMap[$languageId]) || $overwrite) {
  99. $lang = new Language($languageId, $filePath);
  100. self::$classMap[$languageId] = $lang;
  101. if (isset($lang->mode->aliases)) {
  102. foreach ($lang->mode->aliases as $alias) {
  103. self::$aliases[$alias] = $languageId;
  104. }
  105. }
  106. }
  107. return self::$classMap[$languageId];
  108. }
  109. private function testRe($re, $lexeme)
  110. {
  111. if (!$re) {
  112. return false;
  113. }
  114. $test = preg_match($re, $lexeme, $match, PREG_OFFSET_CAPTURE);
  115. if ($test === false) {
  116. throw new \Exception("Invalid regexp: " . var_export($re, true));
  117. }
  118. return count($match) && ($match[0][1] == 0);
  119. }
  120. private function escapeRe($value)
  121. {
  122. return sprintf('/%s/m', preg_quote($value));
  123. }
  124. private function subMode($lexeme, $mode)
  125. {
  126. for ($i = 0; $i < count($mode->contains); ++$i) {
  127. if ($this->testRe($mode->contains[$i]->beginRe, $lexeme)) {
  128. if ($mode->contains[$i]->endSameAsBegin) {
  129. $matches = array();
  130. preg_match($mode->contains[$i]->beginRe, $lexeme, $matches);
  131. $mode->contains[$i]->endRe = $this->escapeRe($matches[0]);
  132. }
  133. return $mode->contains[$i];
  134. }
  135. }
  136. }
  137. private function endOfMode($mode, $lexeme)
  138. {
  139. if ($this->testRe($mode->endRe, $lexeme)) {
  140. while ($mode->endsParent && $mode->parent) {
  141. $mode = $mode->parent;
  142. }
  143. return $mode;
  144. }
  145. if ($mode->endsWithParent) {
  146. return $this->endOfMode($mode->parent, $lexeme);
  147. }
  148. }
  149. private function isIllegal($lexeme, $mode)
  150. {
  151. return !$this->ignoreIllegals && $this->testRe($mode->illegalRe, $lexeme);
  152. }
  153. private function keywordMatch($mode, $match)
  154. {
  155. $kwd = $this->language->caseInsensitive ? mb_strtolower($match[0], "UTF-8") : $match[0];
  156. return isset($mode->keywords[$kwd]) ? $mode->keywords[$kwd] : null;
  157. }
  158. private function buildSpan($classname, $insideSpan, $leaveOpen = false, $noPrefix = false)
  159. {
  160. $classPrefix = $noPrefix ? "" : $this->options['classPrefix'];
  161. $openSpan = "<span class=\"" . $classPrefix;
  162. $closeSpan = $leaveOpen ? "" : self::SPAN_END_TAG;
  163. $openSpan .= $classname . "\">";
  164. if (!$classname) {
  165. return $insideSpan;
  166. }
  167. return $openSpan . $insideSpan . $closeSpan;
  168. }
  169. private function escape($value)
  170. {
  171. return htmlspecialchars($value, ENT_NOQUOTES);
  172. }
  173. private function processKeywords()
  174. {
  175. if (empty($this->top->keywords)) {
  176. return $this->escape($this->modeBuffer);
  177. }
  178. $result = "";
  179. $lastIndex = 0;
  180. /* TODO: when using the crystal language file on django and twigs code
  181. * the values of $this->top->lexemesRe can become "" (empty). Check
  182. * if this behaviour is consistent with highlight.js.
  183. */
  184. if ($this->top->lexemesRe) {
  185. while (preg_match($this->top->lexemesRe, $this->modeBuffer, $match, PREG_OFFSET_CAPTURE, $lastIndex)) {
  186. $result .= $this->escape(substr($this->modeBuffer, $lastIndex, $match[0][1] - $lastIndex));
  187. $keyword_match = $this->keywordMatch($this->top, $match[0]);
  188. if ($keyword_match) {
  189. $this->relevance += $keyword_match[1];
  190. $result .= $this->buildSpan($keyword_match[0], $this->escape($match[0][0]));
  191. } else {
  192. $result .= $this->escape($match[0][0]);
  193. }
  194. $lastIndex = strlen($match[0][0]) + $match[0][1];
  195. }
  196. }
  197. return $result . $this->escape(substr($this->modeBuffer, $lastIndex));
  198. }
  199. private function processSubLanguage()
  200. {
  201. try {
  202. $hl = new Highlighter();
  203. $hl->setAutodetectLanguages($this->autodetectSet);
  204. $explicit = is_string($this->top->subLanguage);
  205. if ($explicit && !in_array($this->top->subLanguage, self::$languages)) {
  206. return $this->escape($this->modeBuffer);
  207. }
  208. if ($explicit) {
  209. $res = $hl->highlight(
  210. $this->top->subLanguage,
  211. $this->modeBuffer,
  212. true,
  213. isset($this->continuations[$this->top->subLanguage]) ? $this->continuations[$this->top->subLanguage] : null
  214. );
  215. } else {
  216. $res = $hl->highlightAuto(
  217. $this->modeBuffer,
  218. count($this->top->subLanguage) ? $this->top->subLanguage : null
  219. );
  220. }
  221. // Counting embedded language score towards the host language may
  222. // be disabled with zeroing the containing mode relevance. Usecase
  223. // in point is Markdown that allows XML everywhere and makes every
  224. // XML snippet to have a much larger Markdown score.
  225. if ($this->top->relevance > 0) {
  226. $this->relevance += $res->relevance;
  227. }
  228. if ($explicit) {
  229. $this->continuations[$this->top->subLanguage] = $res->top;
  230. }
  231. return $this->buildSpan($res->language, $res->value, false, true);
  232. } catch (\Exception $e) {
  233. error_log("TODO, is this a relevant catch?");
  234. error_log($e);
  235. return $this->escape($this->modeBuffer);
  236. }
  237. }
  238. private function processBuffer()
  239. {
  240. if (is_object($this->top) && $this->top->subLanguage) {
  241. $this->result .= $this->processSubLanguage();
  242. } else {
  243. $this->result .= $this->processKeywords();
  244. }
  245. $this->modeBuffer = '';
  246. }
  247. private function startNewMode($mode)
  248. {
  249. $this->result .= $mode->className ? $this->buildSpan($mode->className, "", true) : "";
  250. $t = clone $mode;
  251. $t->parent = $this->top;
  252. $this->top = $t;
  253. }
  254. private function processLexeme($buffer, $lexeme = null)
  255. {
  256. $this->modeBuffer .= $buffer;
  257. if ($lexeme === null) {
  258. $this->processBuffer();
  259. return 0;
  260. }
  261. $new_mode = $this->subMode($lexeme, $this->top);
  262. if ($new_mode) {
  263. if ($new_mode->skip) {
  264. $this->modeBuffer .= $lexeme;
  265. } else {
  266. if ($new_mode->excludeBegin) {
  267. $this->modeBuffer .= $lexeme;
  268. }
  269. $this->processBuffer();
  270. if (!$new_mode->returnBegin && !$new_mode->excludeBegin) {
  271. $this->modeBuffer = $lexeme;
  272. }
  273. }
  274. $this->startNewMode($new_mode, $lexeme);
  275. return $new_mode->returnBegin ? 0 : strlen($lexeme);
  276. }
  277. $end_mode = $this->endOfMode($this->top, $lexeme);
  278. if ($end_mode) {
  279. $origin = $this->top;
  280. if ($origin->skip) {
  281. $this->modeBuffer .= $lexeme;
  282. } else {
  283. if (!($origin->returnEnd || $origin->excludeEnd)) {
  284. $this->modeBuffer .= $lexeme;
  285. }
  286. $this->processBuffer();
  287. if ($origin->excludeEnd) {
  288. $this->modeBuffer = $lexeme;
  289. }
  290. }
  291. do {
  292. if ($this->top->className) {
  293. $this->result .= self::SPAN_END_TAG;
  294. }
  295. if (!$this->top->skip && !$this->top->subLanguage) {
  296. $this->relevance += $this->top->relevance;
  297. }
  298. $this->top = $this->top->parent;
  299. } while ($this->top != $end_mode->parent);
  300. if ($end_mode->starts) {
  301. if ($end_mode->endSameAsBegin) {
  302. $end_mode->starts->endRe = $end_mode->endRe;
  303. }
  304. $this->startNewMode($end_mode->starts, "");
  305. }
  306. return $origin->returnEnd ? 0 : strlen($lexeme);
  307. }
  308. if ($this->isIllegal($lexeme, $this->top)) {
  309. $className = $this->top->className ? $this->top->className : "unnamed";
  310. $err = "Illegal lexeme \"{$lexeme}\" for mode \"{$className}\"";
  311. throw new \Exception($err);
  312. }
  313. // Parser should not reach this point as all types of lexemes should
  314. // be caught earlier, but if it does due to some bug make sure it
  315. // advances at least one character forward to prevent infinite looping.
  316. $this->modeBuffer .= $lexeme;
  317. $l = strlen($lexeme);
  318. return $l ? $l : 1;
  319. }
  320. /**
  321. * Replace tabs for something more usable.
  322. */
  323. private function replaceTabs($code)
  324. {
  325. if ($this->options['tabReplace'] !== null) {
  326. return str_replace("\t", $this->options['tabReplace'], $code);
  327. }
  328. return $code;
  329. }
  330. /**
  331. * Set the set of languages used for autodetection. When using
  332. * autodetection the code to highlight will be probed for every language
  333. * in this set. Limiting this set to only the languages you want to use
  334. * will greatly improve highlighting speed.
  335. *
  336. * @param array $set An array of language games to use for autodetection. This defaults
  337. * to a typical set Web development languages.
  338. */
  339. public function setAutodetectLanguages(array $set)
  340. {
  341. $this->autodetectSet = array_unique($set);
  342. self::registerLanguages();
  343. }
  344. /**
  345. * Get the tab replacement string.
  346. *
  347. * @return string The tab replacement string
  348. */
  349. public function getTabReplace()
  350. {
  351. return $this->options['tabReplace'];
  352. }
  353. /**
  354. * Set the tab replacement string. This defaults to NULL: no tabs
  355. * will be replaced.
  356. *
  357. * @param string $tabReplace The tab replacement string
  358. */
  359. public function setTabReplace($tabReplace)
  360. {
  361. $this->options['tabReplace'] = $tabReplace;
  362. }
  363. /**
  364. * Get the class prefix string.
  365. *
  366. * @return string
  367. * The class prefix string
  368. */
  369. public function getClassPrefix()
  370. {
  371. return $this->options['classPrefix'];
  372. }
  373. /**
  374. * Set the class prefix string.
  375. *
  376. * @param string $classPrefix The class prefix string
  377. */
  378. public function setClassPrefix($classPrefix)
  379. {
  380. $this->options['classPrefix'] = $classPrefix;
  381. }
  382. /**
  383. * @throws \DomainException if the requested language was not in this
  384. * Highlighter's language set
  385. */
  386. private function getLanguage($name)
  387. {
  388. if (isset(self::$classMap[$name])) {
  389. return self::$classMap[$name];
  390. } elseif (isset(self::$aliases[$name]) && isset(self::$classMap[self::$aliases[$name]])) {
  391. return self::$classMap[self::$aliases[$name]];
  392. }
  393. throw new \DomainException("Unknown language: $name");
  394. }
  395. /**
  396. * Determine whether or not a language definition supports auto detection.
  397. *
  398. * @param string $name Language name
  399. *
  400. * @return bool
  401. */
  402. private function autoDetection($name)
  403. {
  404. return !$this->getLanguage($name)->disableAutodetect;
  405. }
  406. /**
  407. * Core highlighting function. Accepts a language name, or an alias, and a
  408. * string with the code to highlight. Returns an object with the following
  409. * properties:
  410. * - relevance (int)
  411. * - value (an HTML string with highlighting markup).
  412. *
  413. * @throws \DomainException if the requested language was not in this
  414. * Highlighter's language set
  415. * @throws \Exception if an invalid regex was given in a language file
  416. */
  417. public function highlight($language, $code, $ignoreIllegals = true, $continuation = null)
  418. {
  419. $this->language = $this->getLanguage($language);
  420. $this->language->compile();
  421. $this->top = $continuation ? $continuation : $this->language->mode;
  422. $this->continuations = array();
  423. $this->result = "";
  424. for ($current = $this->top; $current != $this->language->mode; $current = $current->parent) {
  425. if ($current->className) {
  426. $this->result = $this->buildSpan($current->className, '', true) . $this->result;
  427. }
  428. }
  429. $this->modeBuffer = "";
  430. $this->relevance = 0;
  431. $this->ignoreIllegals = $ignoreIllegals;
  432. $res = new \stdClass();
  433. $res->relevance = 0;
  434. $res->value = "";
  435. $res->language = "";
  436. try {
  437. $match = null;
  438. $count = 0;
  439. $index = 0;
  440. while ($this->top && $this->top->terminators) {
  441. $test = @preg_match($this->top->terminators, $code, $match, PREG_OFFSET_CAPTURE, $index);
  442. if ($test === false) {
  443. throw new \Exception("Invalid " . $this->language->name . " regExp " . var_export($this->top->terminators, true));
  444. } elseif ($test === 0) {
  445. break;
  446. }
  447. $count = $this->processLexeme(substr($code, $index, $match[0][1] - $index), $match[0][0]);
  448. $index = $match[0][1] + $count;
  449. }
  450. $this->processLexeme(substr($code, $index));
  451. for ($current = $this->top; isset($current->parent); $current = $current->parent) {
  452. if ($current->className) {
  453. $this->result .= self::SPAN_END_TAG;
  454. }
  455. }
  456. $res->relevance = $this->relevance;
  457. $res->value = $this->replaceTabs($this->result);
  458. $res->language = $this->language->name;
  459. $res->top = $this->top;
  460. return $res;
  461. } catch (\Exception $e) {
  462. if (strpos($e->getMessage(), "Illegal") !== false) {
  463. $res->value = $this->escape($code);
  464. return $res;
  465. }
  466. throw $e;
  467. }
  468. }
  469. /**
  470. * Highlight the given code by highlighting the given code with each
  471. * registered language and then finding the match with highest accuracy.
  472. *
  473. * @param string $code
  474. * @param string[]|null $languageSubset When set to null, this method will
  475. * attempt to highlight $code with each language (170+). Set this to
  476. * an array of languages of your choice to limit the amount of languages
  477. * to try.
  478. *
  479. * @throws \DomainException if the attempted language to check does not exist
  480. * @throws \Exception if an invalid regex was given in a language file
  481. *
  482. * @return \stdClass
  483. */
  484. public function highlightAuto($code, $languageSubset = null)
  485. {
  486. $res = new \stdClass();
  487. $res->relevance = 0;
  488. $res->value = $this->escape($code);
  489. $res->language = "";
  490. $scnd = clone $res;
  491. $tmp = $languageSubset ? $languageSubset : $this->autodetectSet;
  492. foreach ($tmp as $l) {
  493. // don't fail if we run into a non-existent language
  494. try {
  495. // skip any languages that don't support auto detection
  496. if (!$this->autoDetection($l)) {
  497. continue;
  498. }
  499. $current = $this->highlight($l, $code, false);
  500. } catch (\DomainException $e) {
  501. continue;
  502. }
  503. if ($current->relevance > $scnd->relevance) {
  504. $scnd = $current;
  505. }
  506. if ($current->relevance > $res->relevance) {
  507. $scnd = $res;
  508. $res = $current;
  509. }
  510. }
  511. if ($scnd->language) {
  512. $res->secondBest = $scnd;
  513. }
  514. return $res;
  515. }
  516. /**
  517. * Return a list of all supported languages. Using this list in
  518. * setAutodetectLanguages will turn on autodetection for all supported
  519. * languages.
  520. *
  521. * @param bool $include_aliases specify whether language aliases
  522. * should be included as well
  523. *
  524. * @return string[] An array of language names
  525. */
  526. public function listLanguages($include_aliases = false)
  527. {
  528. if ($include_aliases === true) {
  529. return array_merge(self::$languages, array_keys(self::$aliases));
  530. }
  531. return self::$languages;
  532. }
  533. /**
  534. * Returns list of all available aliases for given language name.
  535. *
  536. * @param string $language name or alias of language to look-up
  537. *
  538. * @throws \DomainException if the requested language was not in this
  539. * Highlighter's language set
  540. *
  541. * @return string[] An array of all aliases associated with the requested
  542. * language name language. Passed-in name is included as
  543. * well.
  544. */
  545. public function getAliasesForLanguage($language)
  546. {
  547. $language = self::getLanguage($language);
  548. if ($language->aliases === null) {
  549. return array($language->name);
  550. }
  551. return array_merge(array($language->name), $language->aliases);
  552. }
  553. }