javascript - 读取代码点时出现偏移问题-6ren

javascript - 读取代码点时出现偏移问题

转载作者：行者123 更新时间：2023-11-30 21:15:38

简历:我目前正在编写一个将源代码转换为标记的 ActionScript 3 词法分析器。我选择通过代码点来解释输入，一个带有可选代理项对的字符串，该字符串包含在 UString 类中。在后台，我使用 UStringPos 类缓存最后读取的位置。

我测试了它如何扫描标识符 "huehuehue" 与...

'use strict';

import {Lexer}      from 'core/Lexer';
import {UString}    from 'utils/UString';
import ErrorHandler from 'core/ErrorHandler';

const errorHandler = new ErrorHandler(true);

// Tell the length to the `Lexer` manually.
const lexer = new Lexer(
  new UString('huehuehue'), 9, errorHandler);

// Scan first token
lexer.next();

const id = lexer.lookahead.value;

console.log(
    id,
    id.length
);

它应该记录 "huehuehue", 9，但这是另一个故事...

为什么它缺少最后一个 'e'？与扫描相关的最内层方法是 Lexer#getCommonIdentifier。顺便说一句，我已经测试了我的 UString 部分，它工作正常。

词法分析器相关定义

/*
 * Class that turns AS3 code into tokens.
 */
export class Lexer
{
  /*
   * @param {UString} source
   * @param {Number} length
   * @param {ErrorHandler} errorHandler
   */
  constructor(source, length, errorHandler)
  {
    this.source = source;
    this.length = length;
    this.index = 0;
    this.lineStart = 0;
    this.lineNumber = 1;
    this.comments = [];

    this.errorHandler = errorHandler;

    this.previousToken = null;
    this.token         = null;
    this.lookahead     = null;

    this._special = [];
  }

  /*
   * Verifies the end of file.
   */
  eof()
  {
    return this.index >= this.length;
  }

  /*
   * Advance the previous, current and lookahead tokens.
   * The lexer however does not depend on these tokens.
   */
  next()
  {
    this.previousToken = this.token;
    this.token         = this.lookahead;
    this.lookahead     = this.lex();
  }

  /*
   * Consumes the next token and return it.
   */
  lex()
  {
    this.consumeWhiteSpaces();

    while (this.consumeComment())
      this.consumeWhiteSpaces();

    let cp = this.source.codePointAt(this.index);

    let pureIdentifier =
      Character.isIdentifierStart(cp);

    if (pureIdentifier || (cp === 0x5C))
      return this.scanIdentifierOrKeyword(!pureIdentifier);

    if (this.eof())
    {
      let loc = [ this.index, this.lineNumber ];
      return new Token(TokenType.EOF, loc, loc, '<end>');
    }
  }

  /*
   * Scan an identifier, keyword or boolean literal.
   */
  scanIdentifierOrKeyword(usingEscape)
  {
    const start = this.index;
    let id;

    /* Like Esprima does: only identifiers containing
     * escapes need some overheads. */
    if (usingEscape)
    {
      id = this.getEscapedIdentifier(
        String.fromCodePoint(this.scanUnicodeEscapeSequence()));
    }
    else
      id = this.getCommonIdentifier();

    return new Token(
      TokenType.IDENTIFIER,
      [ start     , this.lineNumber ],
      [ this.index, this.lineNumber ],
      id
    );
  }

  /*
   * Interprets an identifier. If any escape appears, switches to
   * getEscapedIdentifier().
   */
  getCommonIdentifier()
  {
    const start = this.source.position.offset;
    let cp = 0;

    // Jump the starting symbol.
    ++this.index;

    while (!this.eof())
    {
      cp = this.source.codePointAt(this.index);

      if (Character.isIdentifierPart(cp))
        ++this.index;

      // Switches to escape-minded task...
      else if (cp === 0x5C)
        return this.getUnicodeEscapedIdentifier(
          this.source.string.slice(
            start, this.source.position.offset
          )
        );

      else break;
    }
    return this.source.string.slice(
      start, this.source.position.offset
    );
  }

  /* ... */
}

utils/UString.js

'use strict';

/*
 * String wrapper with methods _based_ on code points.
 */
export class UString
{
  /*
   * Constructs the {UString}.
   *
   * @param {String} s String to be wrapped.
   */
  constructor(s)
  {
    /*
     * @type {String}
     */
    this.string = s;

    /*
     * Tracks the last accessed position.
     *
     * @type {UStringPos}
     */
    this.position = new UStringPos(0, 0);
  }

  /*
   * Reads a code point at specific index.
   *
   * @param {Number} index
   * @return {Number}
   */
  codePointAt(index)
  {
    this.position.walk(this.string, index);
    return this.string.codePointAt(this.position.offset);
  }

  /*
   * Slices the internal string by code point indices.
   *
   * @param {Number} i
   * @param {Number} j
   * @return {String}
   */
  slice(i, j)
  {
    this.position.walk(this.string, i);
    i = this.position.offset;

    this.position.walk(this.string, j);
    j = this.position.offset;

    return this.string.slice(i, j);
  }
};

/*
 * Class that tracks the position of a code point on a string.
 */
export class UStringPos
{
  /*
   * Constructs the {UStringPos}.
   *
   * @param {Number} index The initial index.
   * @param {Number} offset The initial offset.
   */
  constructor(index, offset)
  {
    /*
     * @type {Number}
     */
    this.index = index;

    /*
     * @type {Number}
     */
    this.offset = offset;
  }

  /*
   * Walks to the given index.
   *
   * @param {String} s
   * @param {Number} index
   * @note No backward. Track the previous position instead.
   * @return {void}
   */
  walk(s, index)
  {
    for (; this.index < index; ++this.index)
      this.offset += (
        this._usingSurrogates(
          s.charCodeAt(this.offset)
        ) ? 2 : 1
      );
  }

  /*
   * @private
   */
  _usingSurrogates(ch)
  {
    return (ch >= 0xD800) && (ch <= 0xDBFF);
  }
};

有什么吗？

最佳答案

好的。所以这是 this.source.position.offset 的问题:当我执行 ++this.index 时，我的 UStringPos 的偏移量没有'更新。问题出在切片上。

    this.source.string.slice(
      start, this.source.position.offset
    );

这个切片是基于偏移量的，因为我必须跟踪标识符开始的前一个偏移量。

解决方案

我可以使用我自己的 UString 类的切片，并将第一个参数用作偏移量，将最后一个参数用作普通索引。

'use strict';

export class UString
{
  // ...

  /*
   * Slices the internal string by using a pair of
   * offset and code point indices.
   *
   * @param {Number} i Offset
   * @param {Number} j
   * @return {String}
   */
  slice(i, j)
  {
    this.position.walk(this.string, j);
    j = this.position.offset;

    return this.string.slice(i, j);
  }

};

关于javascript - 读取代码点时出现偏移问题，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/45721257/

文章推荐： c# - 如何处理用户代码未处理的索引超出范围异常？

文章推荐： c# - LINQ 和各种连接示例

文章推荐： c# - LINQ 返回条件对象

javascript - 执行存在于输入值字段中的 javascript - JavaScript
我有一个 html 格式的表单: 我需要得到 JavaScript在value input 字段执行，但只能通过表单的 submit .原因是页面是一个模板所以我不控制它(不能有
javascript - JavaScript 代码片段正在破坏其他 JavaScript
我管理的论坛是托管软件，因此我无法访问源代码，我只能向页面添加 JavaScript 来实现我需要完成的任务。我正在尝试用超链接替换所有页面上某些文本关键字的第一个实例。我还根据国家/地区代码对这些
javascript - JavaScript 如何使新页面包含更多 JavaScript？
我正在使用 JS 打开新页面并将 HTML 代码写入其中，但是当我尝试使用 document.write() 在新页面中编写 JS 时功能不起作用。显然，一旦看到，主 JS 就会关闭。用于即将打开的
javascript - Javascript 引擎如何在浏览器中执行 Javascript？
提问不是为了解决问题，提问是为了更好地理解系统专家!我知道每当你将 javascript 代码输入 javascript 引擎时，它会立即由 javascript 引擎执行。由于没有看过Engi
javascript - 如何将 JavaScript 变量从一个 JavaScript 文件传递到另一个 JavaScript 文件？
我在一个文件夹中有两个 javascript 文件。我想将一个变量的 javascript 文件传递到另一个。我应该使用什么程序？最佳答案 window.postMessage用于跨文档消息。使
javascript - javascript 中的 javascript 输入问题
我有一个练习，我需要输入两个输入并检查它们是否都等于一个。如果是 console.log 正则 console.log false 我试过这样的事情: function isPositive(fir
javascript - 在加载其他 javascript 库的页面上嵌入 Javascript？
我正在做一个Web应用程序，计划允许其他网站(客户端)在其页面上嵌入以下javascript: 我的网络应用程序位于 http://example.org 。我不能假设客户端网站的页面有 JQue
javascript - 从 Javascript 内部调用 Javascript
目前我正在使用三个外部 JS 文件。我喜欢将所有三个 JS 文件合而为一。尽一切可能。我创建 aio.js 并在 aio.js 中 src="https://code.jquery.com/
javascript - AngularJS/javascript javascript 对象的特殊排序顺序
我有例如像这样的数组: var myArray = []; var item1 = { start: '08:00', end: '09:30' } var item2 = {
javascript - 在 Javascript 内部执行 Javascript？
所以我正在制作一个 Chrome 扩展，它使用我制作的一些 TamperMonkey 脚本。我想要一个“主”javascript 文件，您可以在其中包含并执行其他脚本。我很擅长使用以下行将其他 jav
javascript - 如何将变量从 javascript 移动到 javascript？
我有 A、B html 和 A、B javascript 文件。并且，如何将 A JavaScript 中使用的全局变量直接移动到 B JavaScript 中？示例 JavaScript) va
javascript - 从 javascript 调用 javascript
我需要将以下整个代码放入名为 activate.js 的 JavaScript 中。你能告诉我怎么做吗？ var int = new int({ seconds: 30, mark
javascript - 将变量值从一个 JavaScript 传递到另一个 JavaScript
我已经为我的 .net Web 应用程序创建了母版页 EXAMPLE1.Master。他们的 I 将值存储在 JavaScript 变量中。我想在另一个 JS 文件中检索该变量。示例1.大师:-
javascript - 有没有开源库可以在浏览器中使用 Javascript 来整理 Javascript？
是否有任何库可以用来转换这样的代码: function () { var a = 1; } 像这样的代码: function () { var a = 1; } 在我的浏览器中。因为我在 Gi
javascript - javascript 中的参数列表后缺少语法 javascript 错误 )
我收到语法缺失 ) 错误 $(document).ready(function changeText() { var p = document.getElementById('bidp
javascript - 第一个 JavaScript 完成后启动第二个 JavaScript
我正在制作进度条。它有一个标签。我想调整某个脚本完成的标签。在找到可能的解决方案的一些答案后，我想出了以下脚本。第一个启动并按预期工作。然而，第二个却没有。它出什么问题了？代码如下: HTML:
javascript - 为什么外部 javascript 库会阻止我页面上的 javascript？
这里有一个很简单的问题，我简单的头脑无法回答:为什么我在外部库中加载时，下面的匿名和onload函数没有运行？我错过了一些非常非常基本的东西。 Library.js 只有一行:console.log(
javascript - 如何区分代码内 javascript 和客户端 javascript
我知道 javascript 是一种客户端语言，但如果实际代码中嵌入的 javascript 代码以某种方式与在控制台上运行的代码不同，我会尝试找到答案。让我用一个例子来解释它: 我想创建一个像 Mi
javascript - 将 Javascript 内联到不显眼的 JavaScript？
我如何将这个内联 javascript 更改为 Unobtrusive JavaScript？谢谢! 感谢您的回答，但它不起作用。我的代码是: PHP js文件 document.getElem
javascript - 如何将 JavaScript 对象导出为 JavaScript？
我正在寻找将简单的 JavaScript 对象“转储”到动态生成的 JavaScript 源代码中的最优雅的方法。目的:假设我们有 node.js 服务器生成 HTML。我们在服务器端有一个对象x。

行者123

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城

javascript - 读取代码点时出现偏移问题