123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250 |
- <?php declare(strict_types=1);
-
- namespace PhpParser\Lexer;
-
- use PhpParser\Error;
- use PhpParser\ErrorHandler;
- use PhpParser\Lexer;
- use PhpParser\Lexer\TokenEmulator\CoaleseEqualTokenEmulator;
- use PhpParser\Lexer\TokenEmulator\FnTokenEmulator;
- use PhpParser\Lexer\TokenEmulator\NumericLiteralSeparatorEmulator;
- use PhpParser\Lexer\TokenEmulator\TokenEmulatorInterface;
- use PhpParser\Parser\Tokens;
-
- class Emulative extends Lexer
- {
- const PHP_7_3 = '7.3.0dev';
- const PHP_7_4 = '7.4.0dev';
-
- const T_COALESCE_EQUAL = 1007;
- const T_FN = 1008;
-
- const FLEXIBLE_DOC_STRING_REGEX = <<<'REGEX'
- /<<<[ \t]*(['"]?)([a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*)\1\r?\n
- (?:.*\r?\n)*?
- (?<indentation>\h*)\2(?![a-zA-Z_\x80-\xff])(?<separator>(?:;?[\r\n])?)/x
- REGEX;
-
- /** @var mixed[] Patches used to reverse changes introduced in the code */
- private $patches = [];
-
- /** @var TokenEmulatorInterface[] */
- private $tokenEmulators = [];
-
- /**
- * @param mixed[] $options
- */
- public function __construct(array $options = [])
- {
- parent::__construct($options);
-
- $this->tokenEmulators[] = new FnTokenEmulator();
- $this->tokenEmulators[] = new CoaleseEqualTokenEmulator();
- $this->tokenEmulators[] = new NumericLiteralSeparatorEmulator();
-
- $this->tokenMap[self::T_COALESCE_EQUAL] = Tokens::T_COALESCE_EQUAL;
- $this->tokenMap[self::T_FN] = Tokens::T_FN;
- }
-
- public function startLexing(string $code, ErrorHandler $errorHandler = null) {
- $this->patches = [];
-
- if ($this->isEmulationNeeded($code) === false) {
- // Nothing to emulate, yay
- parent::startLexing($code, $errorHandler);
- return;
- }
-
- $collector = new ErrorHandler\Collecting();
-
- // 1. emulation of heredoc and nowdoc new syntax
- $preparedCode = $this->processHeredocNowdoc($code);
- parent::startLexing($preparedCode, $collector);
- $this->fixupTokens();
-
- $errors = $collector->getErrors();
- if (!empty($errors)) {
- $this->fixupErrors($errors);
- foreach ($errors as $error) {
- $errorHandler->handleError($error);
- }
- }
-
- // add token emulation
- foreach ($this->tokenEmulators as $emulativeToken) {
- if ($emulativeToken->isEmulationNeeded($code)) {
- $this->tokens = $emulativeToken->emulate($code, $this->tokens);
- }
- }
- }
-
- private function isHeredocNowdocEmulationNeeded(string $code): bool
- {
- // skip version where this works without emulation
- if (version_compare(\PHP_VERSION, self::PHP_7_3, '>=')) {
- return false;
- }
-
- return strpos($code, '<<<') !== false;
- }
-
- private function processHeredocNowdoc(string $code): string
- {
- if ($this->isHeredocNowdocEmulationNeeded($code) === false) {
- return $code;
- }
-
- if (!preg_match_all(self::FLEXIBLE_DOC_STRING_REGEX, $code, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE)) {
- // No heredoc/nowdoc found
- return $code;
- }
-
- // Keep track of how much we need to adjust string offsets due to the modifications we
- // already made
- $posDelta = 0;
- foreach ($matches as $match) {
- $indentation = $match['indentation'][0];
- $indentationStart = $match['indentation'][1];
-
- $separator = $match['separator'][0];
- $separatorStart = $match['separator'][1];
-
- if ($indentation === '' && $separator !== '') {
- // Ordinary heredoc/nowdoc
- continue;
- }
-
- if ($indentation !== '') {
- // Remove indentation
- $indentationLen = strlen($indentation);
- $code = substr_replace($code, '', $indentationStart + $posDelta, $indentationLen);
- $this->patches[] = [$indentationStart + $posDelta, 'add', $indentation];
- $posDelta -= $indentationLen;
- }
-
- if ($separator === '') {
- // Insert newline as separator
- $code = substr_replace($code, "\n", $separatorStart + $posDelta, 0);
- $this->patches[] = [$separatorStart + $posDelta, 'remove', "\n"];
- $posDelta += 1;
- }
- }
-
- return $code;
- }
-
- private function isEmulationNeeded(string $code): bool
- {
- foreach ($this->tokenEmulators as $emulativeToken) {
- if ($emulativeToken->isEmulationNeeded($code)) {
- return true;
- }
- }
-
- return $this->isHeredocNowdocEmulationNeeded($code);
- }
-
- private function fixupTokens()
- {
- if (\count($this->patches) === 0) {
- return;
- }
-
- // Load first patch
- $patchIdx = 0;
-
- list($patchPos, $patchType, $patchText) = $this->patches[$patchIdx];
-
- // We use a manual loop over the tokens, because we modify the array on the fly
- $pos = 0;
- for ($i = 0, $c = \count($this->tokens); $i < $c; $i++) {
- $token = $this->tokens[$i];
- if (\is_string($token)) {
- // We assume that patches don't apply to string tokens
- $pos += \strlen($token);
- continue;
- }
-
- $len = \strlen($token[1]);
- $posDelta = 0;
- while ($patchPos >= $pos && $patchPos < $pos + $len) {
- $patchTextLen = \strlen($patchText);
- if ($patchType === 'remove') {
- if ($patchPos === $pos && $patchTextLen === $len) {
- // Remove token entirely
- array_splice($this->tokens, $i, 1, []);
- $i--;
- $c--;
- } else {
- // Remove from token string
- $this->tokens[$i][1] = substr_replace(
- $token[1], '', $patchPos - $pos + $posDelta, $patchTextLen
- );
- $posDelta -= $patchTextLen;
- }
- } elseif ($patchType === 'add') {
- // Insert into the token string
- $this->tokens[$i][1] = substr_replace(
- $token[1], $patchText, $patchPos - $pos + $posDelta, 0
- );
- $posDelta += $patchTextLen;
- } else {
- assert(false);
- }
-
- // Fetch the next patch
- $patchIdx++;
- if ($patchIdx >= \count($this->patches)) {
- // No more patches, we're done
- return;
- }
-
- list($patchPos, $patchType, $patchText) = $this->patches[$patchIdx];
-
- // Multiple patches may apply to the same token. Reload the current one to check
- // If the new patch applies
- $token = $this->tokens[$i];
- }
-
- $pos += $len;
- }
-
- // A patch did not apply
- assert(false);
- }
-
- /**
- * Fixup line and position information in errors.
- *
- * @param Error[] $errors
- */
- private function fixupErrors(array $errors) {
- foreach ($errors as $error) {
- $attrs = $error->getAttributes();
-
- $posDelta = 0;
- $lineDelta = 0;
- foreach ($this->patches as $patch) {
- list($patchPos, $patchType, $patchText) = $patch;
- if ($patchPos >= $attrs['startFilePos']) {
- // No longer relevant
- break;
- }
-
- if ($patchType === 'add') {
- $posDelta += strlen($patchText);
- $lineDelta += substr_count($patchText, "\n");
- } else {
- $posDelta -= strlen($patchText);
- $lineDelta -= substr_count($patchText, "\n");
- }
- }
-
- $attrs['startFilePos'] += $posDelta;
- $attrs['endFilePos'] += $posDelta;
- $attrs['startLine'] += $lineDelta;
- $attrs['endLine'] += $lineDelta;
- $error->setAttributes($attrs);
- }
- }
- }
|