gpt4 book ai didi

javascript - 如何检测分号是否用于终止 Esprima 生成的 Mozilla 解析器 AST 中的表达式?

转载 作者:行者123 更新时间:2023-12-03 12:43:50 26 4
gpt4 key购买 nike

开发人员创建了一个非常简单的程序:

var a = 6;
var b = 7
console.log(a * b);

我想确保开发人员使用分号,因为我不相信所有开发人员都知道所有 ASI规则。由于我将添加其他代码质量检查,因此我想使用 Esprima生成 AST要检查的代码。当使用 Esprima online parser 解析上面的简单程序时(选中“基于行和列”选项),the following structure is created :

{
"loc": {
"start": {
"line": 1,
"column": 0
},
"end": {
"line": 3,
"column": 19
}
},
"type": "Program",
"body": [
{
"loc": {
"start": {
"line": 1,
"column": 0
},
"end": {
"line": 1,
"column": 10
}
},
"type": "VariableDeclaration",
"declarations": [
{
"loc": {
"start": {
"line": 1,
"column": 4
},
"end": {
"line": 1,
"column": 9
}
},
"type": "VariableDeclarator",
"id": {
"loc": {
"start": {
"line": 1,
"column": 4
},
"end": {
"line": 1,
"column": 5
}
},
"type": "Identifier",
"name": "a"
},
"init": {
"loc": {
"start": {
"line": 1,
"column": 8
},
"end": {
"line": 1,
"column": 9
}
},
"type": "Literal",
"value": 6,
"raw": "6"
}
}
],
"kind": "var"
},
{
"loc": {
"start": {
"line": 2,
"column": 0
},
"end": {
"line": 3,
"column": 0
}
},
"type": "VariableDeclaration",
"declarations": [
{
"loc": {
"start": {
"line": 2,
"column": 4
},
"end": {
"line": 2,
"column": 9
}
},
"type": "VariableDeclarator",
"id": {
"loc": {
"start": {
"line": 2,
"column": 4
},
"end": {
"line": 2,
"column": 5
}
},
"type": "Identifier",
"name": "b"
},
"init": {
"loc": {
"start": {
"line": 2,
"column": 8
},
"end": {
"line": 2,
"column": 9
}
},
"type": "Literal",
"value": 7,
"raw": "7"
}
}
],
"kind": "var"
},
{
"loc": {
"start": {
"line": 3,
"column": 0
},
"end": {
"line": 3,
"column": 19
}
},
"type": "ExpressionStatement",
"expression": {
"loc": {
"start": {
"line": 3,
"column": 0
},
"end": {
"line": 3,
"column": 18
}
},
"type": "CallExpression",
"callee": {
"loc": {
"start": {
"line": 3,
"column": 0
},
"end": {
"line": 3,
"column": 11
}
},
"type": "MemberExpression",
"computed": false,
"object": {
"loc": {
"start": {
"line": 3,
"column": 0
},
"end": {
"line": 3,
"column": 7
}
},
"type": "Identifier",
"name": "console"
},
"property": {
"loc": {
"start": {
"line": 3,
"column": 8
},
"end": {
"line": 3,
"column": 11
}
},
"type": "Identifier",
"name": "log"
}
},
"arguments": [
{
"loc": {
"start": {
"line": 3,
"column": 12
},
"end": {
"line": 3,
"column": 17
}
},
"type": "BinaryExpression",
"operator": "*",
"left": {
"loc": {
"start": {
"line": 3,
"column": 12
},
"end": {
"line": 3,
"column": 13
}
},
"type": "Identifier",
"name": "a"
},
"right": {
"loc": {
"start": {
"line": 3,
"column": 16
},
"end": {
"line": 3,
"column": 17
}
},
"type": "Identifier",
"name": "b"
}
}
]
}
}
]
}

我该如何检查是否使用了分号?我可以推断第二行可能没有使用其中一个,因为 AST 中的第二个 VariableDeclaration 显示它以 {line: 3, column: 0} 结束,如下所示.

The 2nd VariableDeclaration's location ends on line 3

其他使用 Esprima 的工具也是这样做的吗?检查 \r\n\n 行结尾怎么样? Esprima 不是执行此任务的正确工具吗?

编辑

与我分享这个问题的一位同事告诉我,我“可能需要解析树”,这样我就可以获得 token 列表。这样就解决了我的部分问题。以下是 Esprima 提供的代币:

[
{
"type": "Keyword",
"value": "var"
},
{
"type": "Identifier",
"value": "a"
},
{
"type": "Punctuator",
"value": "="
},
{
"type": "Numeric",
"value": "6"
},
{
"type": "Punctuator",
"value": ";"
},
{
"type": "Keyword",
"value": "var"
},
{
"type": "Identifier",
"value": "b"
},
{
"type": "Punctuator",
"value": "="
},
{
"type": "Numeric",
"value": "7"
},
{
"type": "Identifier",
"value": "console"
},
{
"type": "Punctuator",
"value": "."
},
{
"type": "Identifier",
"value": "log"
},
{
"type": "Punctuator",
"value": "("
},
{
"type": "Identifier",
"value": "a"
},
{
"type": "Punctuator",
"value": "*"
},
{
"type": "Identifier",
"value": "b"
},
{
"type": "Punctuator",
"value": ")"
},
{
"type": "Punctuator",
"value": ";"
}
]

现在我需要弄清楚如何将此标记列表与 AST 结合使用,以告诉我应该在第 2 行有一个分号。

最佳答案

要捕获 JavaScript 解释器无法捕获的逻辑或协议(protocol)错误(例如,始终以分号终止语句),您应该编写自己的状态机来对语法进行建模。对于您给出的示例,这是 CoffeeScript + Node.js 中的一种方法:

esprima = require 'esprima'

p_type = (is_valid) -> (token) -> is_valid(token.type)
p_value = (is_valid) -> (token) -> is_valid(token.value)

p_is = (target) -> (value) -> value is target
p_in = (targets...) -> (value) -> targets.indexOf(value) >= 0
p_tautology = () -> true

p_disjoin = (fs...) ->
switch fs.length
when 0
p_tautology
when 1
[f] = fs
(value) -> f(value)
when 2
[f, g] = fs
(value) -> f(value) || g(value)
else
[f, gs...] = fs
g = p_disjoin.apply(null, gs)
(value) -> f(value) || g(value)

p_conjoin = (fs...) ->
switch fs.length
when 0
p_tautology
when 1
[f] = fs
(value) -> f(value)
when 2
[f, g] = fs
(value) -> f(value) && g(value)
else
[f, gs...] = fs
g = p_conjoin.apply(null, gs)
(value) -> f(value) && g(value)

f_type = (token) -> token.type
f_value = (token) -> token.value
f_constant = (value) -> () -> value
f_identity = (x) -> x
f_token = (fn) -> (token) -> fn(token)
f_transition = (dispatch, transition) -> (token) -> transition[dispatch token]
f_default = (default_value, transition_fn) -> (token) -> transition_fn(token) || default_value

to_string = (value) ->
if value is null
'null'
else if value is `undefined`
'undefined'
else if typeof value is 'string'
'"' + value + '"'
else if typeof value.length is 'number' and value.length >= 0
elems = []
for e in value
elems.push to_string(e)
'[' + elems.join(', ') + ']'
else if typeof value is 'object'
if value.toString is Object::toString
attrs = []
for own k,v of value
attrs.push k + ': ' + to_string(v)
'{' + attrs.join(', ') + '}'
else
value.toString()
else
value.toString()

root =
is_valid: p_disjoin(
p_conjoin(p_type(p_is 'Keyword'), p_value(p_is 'var')),
p_type(p_is 'Identifier')
)
next_label: f_transition f_type, 'Keyword': 'variable_declaration', 'Identifier': 'identifier'
handle_error: (tokens, index) ->
if index > 0
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: Expected variable "+
"declaration after #{to_string prev_token.value}, but received "+
"#{to_string curr_token.value}\n")
process.exit(1)
else
curr_token = tokens[index]
{line, column} = curr_token.loc.start
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: Expected variable "+
"declaration but received #{to_string curr_token.value}\n")
process.exit(1)
transition:
identifier: () ->
is_valid: p_conjoin p_type(p_is 'Punctuator'), p_value(p_in '.')
next_label: f_transition f_value, '.': 'membership'
handle_error: (tokens, index) ->
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: Expected '.' after "+
"#{to_string prev_token.value}, but received #{to_string curr_token.value}\n")
process.exit(1)
transition:
membership: () ->
is_valid: p_type(p_is 'Identifier')
next_label: f_constant 'invocation'
handle_error: (tokens, index) ->
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: Expected an identifier "+
"after #{to_string prev_token.value}, but received "+
"#{to_string curr_token.value}\n")
process.exit(1)
transition:
invocation: () ->
is_valid: p_conjoin p_type(p_is 'Punctuator'), p_value(p_is '(')
next_label: f_constant 'identifier'
handle_error: (tokens, index) ->
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: Expected '(' after "+
"#{to_string prev_token.value}, but received "+
"#{to_string curr_token.value}\n")
process.exit(1)
transition:
identifier: () ->
is_valid: p_type(p_in 'Identifier')
next_label: f_constant 'punctuator'
handle_error: (tokens, index) ->
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: Expected "+
"an identifier after #{to_string prev_token.value}, "+
"but received #{to_string curr_token.value}\n")
process.exit(1)
transition:
punctuator: () ->
is_valid: p_conjoin p_type(p_is 'Punctuator'), p_value(p_in '*')
next_label: f_transition f_value, '*': 'identifier'
handle_error: (tokens, index) ->
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: "+
"Expected a binary operator after "+
"#{to_string prev_token.value}, but received "+
"#{to_string curr_token.value}\n")
process.exit(1)
transition:
identifier: () ->
is_valid: p_conjoin p_type(p_is 'Identifier')
next_label: f_constant 'punctuator'
handle_error: (tokens, index) ->
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: Expected "+
"an identifier after #{to_string prev_token.value}, "+
"but received #{to_string curr_token.value}\n")
process.exit(1)
transition:
punctuator: () ->
is_valid: p_conjoin p_type(p_is 'Punctuator'), p_value(p_is ')')
next_label: f_constant 'punctuator'
handle_error: (tokens, index) ->
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: "+
"Expected ')' after #{to_string prev_token.value}, "+
"but received #{to_string curr_token.value}\n")
process.exit(1)
transition:
punctuator: () ->
is_valid: f_constant p_type(p_is 'Punctuator'), p_value(p_is ';')
next_label: f_transition f_value, ';': 'terminator'
handle_error: (tokens, index) ->
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: "+
"Expected ';' after #{to_string prev_token.value}, "+
"but received #{to_string curr_token.value}\n")
process.exit(1)
transition:
terminator: () -> root
variable_declaration: () ->
is_valid: p_type(p_is 'Identifier')
next_label: f_constant 'punctuator'
handle_error: (tokens, index) ->
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: Expected an identifier "+
"after #{to_string prev_token.value}, but received "+
"#{to_string curr_token.value}\n")
process.exit(1)
transition:
punctuator: () ->
is_valid: p_conjoin p_type(p_is 'Punctuator'), p_value(p_in '=', ',', ';')
next_label: f_token f_transition f_value, '=': 'assignment', ',': 'separator', ';': 'terminator'
handle_error: (tokens, index) ->
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: Expected '=', ',', "+
"or ';' after #{to_string prev_token.value}, but received "+
"#{to_string curr_token.value}\n")
process.exit(1)
transition:
assignment: () ->
is_valid: p_type(p_in 'Boolean', 'Identifier', 'Null', 'Numeric', 'String', 'RegularExpression')
next_label: f_constant 'punctuator'
handle_error: (tokens, index) ->
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column #{1 + column}: Expected a "+
"literal or an identifier after #{to_string prev_token.value}, "+
"but received #{to_string curr_token.value}\n")
process.exit(1)
transition:
punctuator: () ->
is_valid: p_conjoin p_type(p_is 'Punctuator'), p_value(p_in ',', ';', '.', '(', '{')
next_label: f_transition f_value, ',': 'identifier', ';': 'terminator'
handle_error: (tokens, index) ->
[prev_token, curr_token] = tokens.slice(index - 1, index + 1)
{line, column} = prev_token.loc.end
process.stderr.write(
"[Error] line #{line}, column: #{1 + column}: "+
"Expected ',' or ';' after #{to_string prev_token.value}, "+
"but received #{to_string curr_token.value}\n")
process.exit(1)
transition:
identifier: () -> root.transition.variable_declaration()
terminator: () -> root
separator: () -> root.transition.variable_declaration()
terminator: () -> root

lint = (tokens) ->
state = root
index = 0
prev_token = null
while index < tokens.length
token = tokens[index]
if state.is_valid(token)
state = state.transition[state.next_label token]()
else
state.handle_error(tokens, index)
prev_token = token
index += 1

text = '''
var a = 6;
var b = 7
console.log(a * b);
'''

tokens = esprima.tokenize(text, loc: true)
lint tokens

关于javascript - 如何检测分号是否用于终止 Esprima 生成的 Mozilla 解析器 AST 中的表达式?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/23413531/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com