Dashboard sipadu mbip
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

Language.php 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
  1. <?php
  2. /* Copyright (c)
  3. * - 2006-2013, Ivan Sagalaev (maniacsoftwaremaniacs.org), highlight.js
  4. * (original author)
  5. * - 2013-2019, Geert Bergman (geertscrivo.nl), highlight.php
  6. * - 2014 Daniel Lynge, highlight.php (contributor)
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions are met:
  10. *
  11. * 1. Redistributions of source code must retain the above copyright notice,
  12. * this list of conditions and the following disclaimer.
  13. * 2. Redistributions in binary form must reproduce the above copyright notice,
  14. * this list of conditions and the following disclaimer in the documentation
  15. * and/or other materials provided with the distribution.
  16. * 3. Neither the name of "highlight.js", "highlight.php", nor the names of its
  17. * contributors may be used to endorse or promote products derived from this
  18. * software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  21. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  24. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  25. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  26. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  27. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  29. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  30. * POSSIBILITY OF SUCH DAMAGE.
  31. */
  32. namespace Highlight;
  33. class Language
  34. {
  35. public $disableAutodetect = false;
  36. public $caseInsensitive = false;
  37. public $aliases = null;
  38. public $name = null;
  39. public function complete(&$e)
  40. {
  41. if (!isset($e)) {
  42. $e = new \stdClass();
  43. }
  44. $patch = array(
  45. "begin" => true,
  46. "end" => true,
  47. "lexemes" => true,
  48. "illegal" => true,
  49. );
  50. $def = array(
  51. "begin" => "",
  52. "beginRe" => "",
  53. "beginKeywords" => "",
  54. "excludeBegin" => "",
  55. "returnBegin" => "",
  56. "end" => "",
  57. "endRe" => "",
  58. "endSameAsBegin" => "",
  59. "endsParent" => "",
  60. "endsWithParent" => "",
  61. "excludeEnd" => "",
  62. "returnEnd" => "",
  63. "starts" => "",
  64. "terminators" => "",
  65. "terminatorEnd" => "",
  66. "lexemes" => "",
  67. "lexemesRe" => "",
  68. "illegal" => "",
  69. "illegalRe" => "",
  70. "className" => "",
  71. "contains" => array(),
  72. "keywords" => null,
  73. "subLanguage" => null,
  74. "subLanguageMode" => "",
  75. "compiled" => false,
  76. "relevance" => 1,
  77. "skip" => false,
  78. );
  79. foreach ($patch as $k => $v) {
  80. if (isset($e->$k)) {
  81. $e->$k = str_replace("\\/", "/", $e->$k);
  82. $e->$k = str_replace("/", "\\/", $e->$k);
  83. }
  84. }
  85. foreach ($def as $k => $v) {
  86. if (!isset($e->$k) && is_object($e)) {
  87. $e->$k = $v;
  88. }
  89. }
  90. }
  91. public function __construct($lang, $filePath)
  92. {
  93. $json = file_get_contents($filePath);
  94. $this->mode = json_decode($json);
  95. $this->name = $lang;
  96. $this->aliases = isset($this->mode->aliases) ? $this->mode->aliases : null;
  97. $this->caseInsensitive = isset($this->mode->case_insensitive) ? $this->mode->case_insensitive : false;
  98. $this->disableAutodetect = isset($this->mode->disableAutodetect) ? $this->mode->disableAutodetect : false;
  99. }
  100. private function langRe($value, $global = false)
  101. {
  102. // PCRE allows us to change the definition of "new line." The
  103. // `(*ANYCRLF)` matches `\r`, `\n`, and `\r\n` for `$`
  104. //
  105. // https://www.pcre.org/original/doc/html/pcrepattern.html
  106. return "/(*ANYCRLF){$value}/um" . ($this->caseInsensitive ? "i" : "");
  107. }
  108. private function processKeyWords($kw)
  109. {
  110. if (is_string($kw)) {
  111. if ($this->caseInsensitive) {
  112. $kw = mb_strtolower($kw, "UTF-8");
  113. }
  114. $kw = array("keyword" => explode(" ", $kw));
  115. } else {
  116. foreach ($kw as $cls => $vl) {
  117. if (!is_array($vl)) {
  118. if ($this->caseInsensitive) {
  119. $vl = mb_strtolower($vl, "UTF-8");
  120. }
  121. $kw->$cls = explode(" ", $vl);
  122. }
  123. }
  124. }
  125. return $kw;
  126. }
  127. private function inherit()
  128. {
  129. $result = new \stdClass();
  130. $objects = func_get_args();
  131. $parent = array_shift($objects);
  132. foreach ($parent as $key => $value) {
  133. $result->{$key} = $value;
  134. }
  135. foreach ($objects as $object) {
  136. foreach ($object as $key => $value) {
  137. $result->{$key} = $value;
  138. }
  139. }
  140. return $result;
  141. }
  142. private function expandMode($mode)
  143. {
  144. if (isset($mode->variants) && !isset($mode->cachedVariants)) {
  145. $mode->cachedVariants = array();
  146. foreach ($mode->variants as $variant) {
  147. $mode->cachedVariants[] = $this->inherit($mode, array('variants' => null), $variant);
  148. }
  149. }
  150. if (isset($mode->cachedVariants)) {
  151. return $mode->cachedVariants;
  152. }
  153. if (isset($mode->endsWithParent) && $mode->endsWithParent) {
  154. return array($this->inherit($mode));
  155. }
  156. return array($mode);
  157. }
  158. /**
  159. * joinRe logically computes regexps.join(separator), but fixes the
  160. * backreferences so they continue to match.
  161. *
  162. * @param array $regexps
  163. * @param string $separator
  164. *
  165. * @return string
  166. */
  167. private function joinRe($regexps, $separator)
  168. {
  169. // backreferenceRe matches an open parenthesis or backreference. To avoid
  170. // an incorrect parse, it additionally matches the following:
  171. // - [...] elements, where the meaning of parentheses and escapes change
  172. // - other escape sequences, so we do not misparse escape sequences as
  173. // interesting elements
  174. // - non-matching or lookahead parentheses, which do not capture. These
  175. // follow the '(' with a '?'.
  176. $backreferenceRe = '#\[(?:[^\\\\\]]|\\\.)*\]|\(\??|\\\([1-9][0-9]*)|\\\.#';
  177. $numCaptures = 0;
  178. $ret = '';
  179. $strLen = count($regexps);
  180. for ($i = 0; $i < $strLen; ++$i) {
  181. $offset = $numCaptures;
  182. $re = $regexps[$i];
  183. if ($i > 0) {
  184. $ret .= $separator;
  185. }
  186. while (strlen($re) > 0) {
  187. $matches = array();
  188. $matchFound = preg_match($backreferenceRe, $re, $matches, PREG_OFFSET_CAPTURE);
  189. if ($matchFound === 0) {
  190. $ret .= $re;
  191. break;
  192. }
  193. // PHP aliases to match the JS naming conventions
  194. $match = $matches[0];
  195. $index = $match[1];
  196. $ret .= substr($re, 0, $index);
  197. $re = substr($re, $index + strlen($match[0]));
  198. if (substr($match[0], 0, 1) === '\\' && isset($matches[1])) {
  199. // Adjust the backreference.
  200. $ret .= "\\" . strval(intval($matches[1][0]) + $offset);
  201. } else {
  202. $ret .= $match[0];
  203. if ($match[0] == "(") {
  204. ++$numCaptures;
  205. }
  206. }
  207. }
  208. }
  209. return $ret;
  210. }
  211. private function compileMode($mode, $parent = null)
  212. {
  213. if (isset($mode->compiled)) {
  214. return;
  215. }
  216. $this->complete($mode);
  217. $mode->compiled = true;
  218. $mode->keywords = $mode->keywords ? $mode->keywords : $mode->beginKeywords;
  219. /* Note: JsonRef method creates different references as those in the
  220. * original source files. Two modes may refer to the same keywords
  221. * set, so only testing if the mode has keywords is not enough: the
  222. * mode's keywords might be compiled already, so it is necessary
  223. * to do an 'is_array' check.
  224. */
  225. if ($mode->keywords && !is_array($mode->keywords)) {
  226. $compiledKeywords = array();
  227. $mode->lexemesRe = $this->langRe($mode->lexemes ? $mode->lexemes : "\w+", true);
  228. foreach ($this->processKeyWords($mode->keywords) as $clsNm => $dat) {
  229. if (!is_array($dat)) {
  230. $dat = array($dat);
  231. }
  232. foreach ($dat as $kw) {
  233. $pair = explode("|", $kw);
  234. $compiledKeywords[$pair[0]] = array($clsNm, isset($pair[1]) ? intval($pair[1]) : 1);
  235. }
  236. }
  237. $mode->keywords = $compiledKeywords;
  238. }
  239. if ($parent) {
  240. if ($mode->beginKeywords) {
  241. $mode->begin = "\\b(" . implode("|", explode(" ", $mode->beginKeywords)) . ")\\b";
  242. }
  243. if (!$mode->begin) {
  244. $mode->begin = "\B|\b";
  245. }
  246. $mode->beginRe = $this->langRe($mode->begin);
  247. if ($mode->endSameAsBegin) {
  248. $mode->end = $mode->begin;
  249. }
  250. if (!$mode->end && !$mode->endsWithParent) {
  251. $mode->end = "\B|\b";
  252. }
  253. if ($mode->end) {
  254. $mode->endRe = $this->langRe($mode->end);
  255. }
  256. $mode->terminatorEnd = $mode->end;
  257. if ($mode->endsWithParent && $parent->terminatorEnd) {
  258. $mode->terminatorEnd .= ($mode->end ? "|" : "") . $parent->terminatorEnd;
  259. }
  260. }
  261. if ($mode->illegal) {
  262. $mode->illegalRe = $this->langRe($mode->illegal);
  263. }
  264. $expandedContains = array();
  265. foreach ($mode->contains as $c) {
  266. $expandedContains = array_merge($expandedContains, $this->expandMode(
  267. $c === 'self' ? $mode : $c
  268. ));
  269. }
  270. $mode->contains = $expandedContains;
  271. for ($i = 0; $i < count($mode->contains); ++$i) {
  272. $this->compileMode($mode->contains[$i], $mode);
  273. }
  274. if ($mode->starts) {
  275. $this->compileMode($mode->starts, $parent);
  276. }
  277. $terminators = array();
  278. for ($i = 0; $i < count($mode->contains); ++$i) {
  279. $terminators[] = $mode->contains[$i]->beginKeywords
  280. ? "\.?(?:" . $mode->contains[$i]->begin . ")\.?"
  281. : $mode->contains[$i]->begin;
  282. }
  283. if ($mode->terminatorEnd) {
  284. $terminators[] = $mode->terminatorEnd;
  285. }
  286. if ($mode->illegal) {
  287. $terminators[] = $mode->illegal;
  288. }
  289. $mode->terminators = count($terminators) ? $this->langRe($this->joinRe($terminators, "|"), true) : null;
  290. }
  291. public function compile()
  292. {
  293. if (!isset($this->mode->compiled)) {
  294. $jr = new JsonRef();
  295. $this->mode = $jr->decode($this->mode);
  296. $this->compileMode($this->mode);
  297. }
  298. }
  299. }