true, "end" => true, "lexemes" => true, "illegal" => true, ); $def = array( "begin" => "", "beginRe" => "", "beginKeywords" => "", "excludeBegin" => "", "returnBegin" => "", "end" => "", "endRe" => "", "endSameAsBegin" => "", "endsParent" => "", "endsWithParent" => "", "excludeEnd" => "", "returnEnd" => "", "starts" => "", "terminators" => "", "terminatorEnd" => "", "lexemes" => "", "lexemesRe" => "", "illegal" => "", "illegalRe" => "", "className" => "", "contains" => array(), "keywords" => null, "subLanguage" => null, "subLanguageMode" => "", "compiled" => false, "relevance" => 1, "skip" => false, ); foreach ($patch as $k => $v) { if (isset($e->$k)) { $e->$k = str_replace("\\/", "/", $e->$k); $e->$k = str_replace("/", "\\/", $e->$k); } } foreach ($def as $k => $v) { if (!isset($e->$k) && is_object($e)) { $e->$k = $v; } } } public function __construct($lang, $filePath) { $json = file_get_contents($filePath); $this->mode = json_decode($json); $this->name = $lang; $this->aliases = isset($this->mode->aliases) ? $this->mode->aliases : null; $this->caseInsensitive = isset($this->mode->case_insensitive) ? $this->mode->case_insensitive : false; $this->disableAutodetect = isset($this->mode->disableAutodetect) ? $this->mode->disableAutodetect : false; } private function langRe($value, $global = false) { // PCRE allows us to change the definition of "new line." The // `(*ANYCRLF)` matches `\r`, `\n`, and `\r\n` for `$` // // https://www.pcre.org/original/doc/html/pcrepattern.html return "/(*ANYCRLF){$value}/um" . ($this->caseInsensitive ? "i" : ""); } private function processKeyWords($kw) { if (is_string($kw)) { if ($this->caseInsensitive) { $kw = mb_strtolower($kw, "UTF-8"); } $kw = array("keyword" => explode(" ", $kw)); } else { foreach ($kw as $cls => $vl) { if (!is_array($vl)) { if ($this->caseInsensitive) { $vl = mb_strtolower($vl, "UTF-8"); } $kw->$cls = explode(" ", $vl); } } } return $kw; } private function inherit() { $result = new \stdClass(); $objects = func_get_args(); $parent = array_shift($objects); foreach ($parent as $key => $value) { $result->{$key} = $value; } foreach ($objects as $object) { foreach ($object as $key => $value) { $result->{$key} = $value; } } return $result; } private function expandMode($mode) { if (isset($mode->variants) && !isset($mode->cachedVariants)) { $mode->cachedVariants = array(); foreach ($mode->variants as $variant) { $mode->cachedVariants[] = $this->inherit($mode, array('variants' => null), $variant); } } if (isset($mode->cachedVariants)) { return $mode->cachedVariants; } if (isset($mode->endsWithParent) && $mode->endsWithParent) { return array($this->inherit($mode)); } return array($mode); } /** * joinRe logically computes regexps.join(separator), but fixes the * backreferences so they continue to match. * * @param array $regexps * @param string $separator * * @return string */ private function joinRe($regexps, $separator) { // backreferenceRe matches an open parenthesis or backreference. To avoid // an incorrect parse, it additionally matches the following: // - [...] elements, where the meaning of parentheses and escapes change // - other escape sequences, so we do not misparse escape sequences as // interesting elements // - non-matching or lookahead parentheses, which do not capture. These // follow the '(' with a '?'. $backreferenceRe = '#\[(?:[^\\\\\]]|\\\.)*\]|\(\??|\\\([1-9][0-9]*)|\\\.#'; $numCaptures = 0; $ret = ''; $strLen = count($regexps); for ($i = 0; $i < $strLen; ++$i) { $offset = $numCaptures; $re = $regexps[$i]; if ($i > 0) { $ret .= $separator; } while (strlen($re) > 0) { $matches = array(); $matchFound = preg_match($backreferenceRe, $re, $matches, PREG_OFFSET_CAPTURE); if ($matchFound === 0) { $ret .= $re; break; } // PHP aliases to match the JS naming conventions $match = $matches[0]; $index = $match[1]; $ret .= substr($re, 0, $index); $re = substr($re, $index + strlen($match[0])); if (substr($match[0], 0, 1) === '\\' && isset($matches[1])) { // Adjust the backreference. $ret .= "\\" . strval(intval($matches[1][0]) + $offset); } else { $ret .= $match[0]; if ($match[0] == "(") { ++$numCaptures; } } } } return $ret; } private function compileMode($mode, $parent = null) { if (isset($mode->compiled)) { return; } $this->complete($mode); $mode->compiled = true; $mode->keywords = $mode->keywords ? $mode->keywords : $mode->beginKeywords; /* Note: JsonRef method creates different references as those in the * original source files. Two modes may refer to the same keywords * set, so only testing if the mode has keywords is not enough: the * mode's keywords might be compiled already, so it is necessary * to do an 'is_array' check. */ if ($mode->keywords && !is_array($mode->keywords)) { $compiledKeywords = array(); $mode->lexemesRe = $this->langRe($mode->lexemes ? $mode->lexemes : "\w+", true); foreach ($this->processKeyWords($mode->keywords) as $clsNm => $dat) { if (!is_array($dat)) { $dat = array($dat); } foreach ($dat as $kw) { $pair = explode("|", $kw); $compiledKeywords[$pair[0]] = array($clsNm, isset($pair[1]) ? intval($pair[1]) : 1); } } $mode->keywords = $compiledKeywords; } if ($parent) { if ($mode->beginKeywords) { $mode->begin = "\\b(" . implode("|", explode(" ", $mode->beginKeywords)) . ")\\b"; } if (!$mode->begin) { $mode->begin = "\B|\b"; } $mode->beginRe = $this->langRe($mode->begin); if ($mode->endSameAsBegin) { $mode->end = $mode->begin; } if (!$mode->end && !$mode->endsWithParent) { $mode->end = "\B|\b"; } if ($mode->end) { $mode->endRe = $this->langRe($mode->end); } $mode->terminatorEnd = $mode->end; if ($mode->endsWithParent && $parent->terminatorEnd) { $mode->terminatorEnd .= ($mode->end ? "|" : "") . $parent->terminatorEnd; } } if ($mode->illegal) { $mode->illegalRe = $this->langRe($mode->illegal); } $expandedContains = array(); foreach ($mode->contains as $c) { $expandedContains = array_merge($expandedContains, $this->expandMode( $c === 'self' ? $mode : $c )); } $mode->contains = $expandedContains; for ($i = 0; $i < count($mode->contains); ++$i) { $this->compileMode($mode->contains[$i], $mode); } if ($mode->starts) { $this->compileMode($mode->starts, $parent); } $terminators = array(); for ($i = 0; $i < count($mode->contains); ++$i) { $terminators[] = $mode->contains[$i]->beginKeywords ? "\.?(?:" . $mode->contains[$i]->begin . ")\.?" : $mode->contains[$i]->begin; } if ($mode->terminatorEnd) { $terminators[] = $mode->terminatorEnd; } if ($mode->illegal) { $terminators[] = $mode->illegal; } $mode->terminators = count($terminators) ? $this->langRe($this->joinRe($terminators, "|"), true) : null; } public function compile() { if (!isset($this->mode->compiled)) { $jr = new JsonRef(); $this->mode = $jr->decode($this->mode); $this->compileMode($this->mode); } } }