前语

之前写了几篇编译原理学习笔记, 断了一段时间后重读以前的笔记, 才发现写得过于晦涩难明, 有点影响阅读. 甚至连写作脉络都难寻, 所以仍是重开一个篇章, 以单条句子为切入点, 力争在写完单篇后都能了解对应的内容.

变量赋值

变量赋值在任何的代码学习教材中, 都是除了hello world之外的第一个学习的语法, 以此作为切入点会是一个很好的开始.

源码

const num = 123; var str = 'string'

ast树

{
  "type": "Program",
  "start": 0,
  "end": 35,
  "body": [
    {
      "type": "VariableDeclaration",
      "start": 0,
      "end": 16,
      "declarations": [
        {
          "type": "VariableDeclarator",
          "start": 6,
          "end": 15,
          "id": {
            "type": "Identifier",
            "start": 6,
            "end": 9,
            "name": "num"
          },
          "init": {
            "type": "Literal",
            "start": 12,
            "end": 15,
            "value": 123,
            "raw": "123"
          }
        }
      ],
      "kind": "const"
    },
    {
      "type": "VariableDeclaration",
      "start": 17,
      "end": 35,
      "declarations": [
        {
          "type": "VariableDeclarator",
          "start": 21,
          "end": 35,
          "id": {
            "type": "Identifier",
            "start": 21,
            "end": 24,
            "name": "str"
          },
          "init": {
            "type": "Literal",
            "start": 27,
            "end": 35,
            "value": "string",
            "raw": "'string'"
          }
        }
      ],
      "kind": "var"
    }
  ],
  "sourceType": "module"
}

进口

从下面代码可以看出, 其实解析要点就是循环的调用parseStatement, 直到遇到type == tt.eof, 此时就完成了解析.

然后再进行格式检查和Directive处理.

/**
 * parse的进口函数, 解分出program的node.
 * @param {Node} node 
 * @returns {Node}
 */
pp.parseTopLevel = function(node) {
  let exports = Object.create(null)
  if (!node.body) node.body = []
  // 要点
  while (this.type !== tt.eof) {
    let stmt = this.parseStatement(null, true, exports)
    node.body.push(stmt)
  }
  if (this.inModule)
    // undefined exports在checkLocalExport时刺进, 假如是没有界说的本地变量, 则报错.
    for (let name of Object.keys(this.undefinedExports))
      this.raiseRecoverable(this.undefinedExports[name].start, `Export '${name}' is not defined`)
  this.adaptDirectivePrologue(node.body)
  this.next()
  node.sourceType = this.options.sourceType
  return this.finishNode(node, NodeTypes.Program)
}

parseVarStatement

parseStatement中遇到const, let, var后, 调用parseVarStatement, 并回来为VariableDeclaration类型节点

/**
 * `let a = 1`界说赋值
 * @param {string} kind, 可能是let, const, var
 */
pp.parseVarStatement = function(node, kind) {
  this.next()
  // 调用parseVar解析等号左右
  this.parseVar(node, false, kind)
  // 测验刺进分号
  this.semicolon()
  return this.finishNode(node, NodeTypes.VariableDeclaration)
}

parseVar

解析变量界说, 句子相似num = 123, 也可所以num = 123, str = 'abc'这种接连写法. 在parseMaybeAssign的解析中, 兼容了num = n = 123这样的接连赋值写法.

/**
 * 解析变量界说, 设置到declarations中, 回来Node
 * @param {Node} node 
 * @param {boolean} isFor 
 * @param {string} kind 'let','var','const'
 * @returns {Node}
 */
pp.parseVar = function(node, isFor, kind) {
  node.declarations = []
  node.kind = kind
  for (;;) {
    let decl = this.startNode()
    this.parseVarId(decl, kind)
    if (this.eat(tt.eq)) {
      decl.init = this.parseMaybeAssign(isFor)
    } else if (kind === "const" && !(this.type === tt._in || (this.options.ecmaVersion >= 6 && this.isContextual("of")))) {
      // const aaa  = 这种方式报错
      this.unexpected()
    } else if (decl.id.type !== "Identifier" && !(isFor && (this.type === tt._in || this.isContextual("of")))) {
      this.raise(this.lastTokEnd, "Complex binding patterns require an initialization value")
    } else {
      decl.init = null
    }
    node.declarations.push(this.finishNode(decl, NodeTypes.VariableDeclarator))
    if (!this.eat(tt.comma)) break
  }
  return node
}

等号左边表达式解读

parseVarId

读出变量名

/**
 * 读出变量名, 设置到decl.id上
 * @param {Node} decl 
 * @param {string} kind 
 */
pp.parseVarId = function(decl, kind) {
  decl.id = this.parseBindingAtom()
  this.checkLValPattern(decl.id, kind === "var" ? BIND_VAR : BIND_LEXICAL, false)
}

parseBindingAtom

解析单个变量或表达式

读出下一个token, 区别不同状况, 假如是遇到[则调用parseBindingList解析为ArrayPattern节点, 假如遇到了{, 则调用parseObj解析, 不然默认调用parseIdent解析.

/**
 * 解析单个变量或表达式, 如let [a,b], {c, d}, e中的let后边的内容
 * @returns {Node}
 */
pp.parseBindingAtom = function() {
  if (this.options.ecmaVersion >= 6) {
    switch (this.type) {
    case tt.bracketL: // [a,b]
      let node = this.startNode()
      this.next()
      node.elements = this.parseBindingList(tt.bracketR, true, true)
      return this.finishNode(node, NodeTypes.ArrayPattern) // 待赋值属性使用
    case tt.braceL: // {c, d}
      return this.parseObj(true)
    }
  }
  return this.parseIdent() // e
}

parseBindingList

解析列表方式表达的变量, 顺次调用parseMaybeDefault读出多个变量, 一起兼容...Rest语法.

/**
 * 在let [a,b] = c 这种赋值句子中使用, 也可所以function (a,b){}这种句子中
 * 接连解析多个node, 直到遇到close, 然后回来解析到的node
 * @param {TokenType} close 预期读到的结束符
 * @param {boolean} allowEmpty 
 * @param {boolean} allowTrailingComma 是否答应以逗号结尾
 * @returns 
 */
pp.parseBindingList = function(close, allowEmpty, allowTrailingComma) {
  let elts = [], first = true
  while (!this.eat(close)) {
    if (first) first = false
    else this.expect(tt.comma) // 假如不是第一个, 则预期遇到逗号
    if (allowEmpty && this.type === tt.comma) {
      // 相似[a,,b]这种中心有空白的
      elts.push(null)
    } else if (allowTrailingComma && this.afterTrailingComma(close)) {
      // [a,b,]这种逗号后遇到结束符
      break
    } else if (this.type === tt.ellipsis) {
      // ...搜集符
      let rest = this.parseRestBinding()
      this.parseBindingListItem(rest)
      elts.push(rest)
      // 搜集符后不答应再有逗号
      if (this.type === tt.comma) this.raise(this.start, "Comma is not permitted after the rest element")
      this.expect(close)
      break
    } else {
      // 如let [a = 1] = [2]这种有默认值
      let elem = this.parseMaybeDefault(this.start, this.startLoc)
      this.parseBindingListItem(elem)
      elts.push(elem)
    }
  }
  return elts
}

parseMaybeDefault

解析单个变量, 一起答应赋值, 这儿是与parseBindingAtom组成了递归调用, 归根到底是使用parseIdent解析

/**
 * 解析单个变量, 一起兼容 a = 1这种有默认值的状况
 * @param {*} startPos 
 * @param {*} startLoc 
 * @param {*} left 
 * @returns 
 */
pp.parseMaybeDefault = function(startPos, startLoc, left) {
  left = left || this.parseBindingAtom()
  if (this.options.ecmaVersion < 6 || !this.eat(tt.eq)) return left
  let node = this.startNodeAt(startPos, startLoc)
  node.left = left
  node.right = this.parseMaybeAssign()
  return this.finishNode(node, NodeTypes.AssignmentPattern)
}

parseRestBinding

解析...rest后的rest语法, 与parseBindingAtom组成递归

/** ...解构赋值, 如 let [...aaa] = bbb; */
pp.parseRestBinding = function() {
  let node = this.startNode()
  this.next()
  // RestElement inside of a function parameter must be an identifier
  if (this.options.ecmaVersion === 6 && this.type !== tt.name)
    this.unexpected()
  node.argument = this.parseBindingAtom()
  return this.finishNode(node, NodeTypes.RestElement)
}

parseIdent

解析单个token并作为变量名回来, 这儿不需要兼容任何状况, 是最小的子类型.

/**
 * 解析当前token并作为变量类型回来
 * @param {boolean} liberal 是否解析properties
 * @returns 
 */
pp.parseIdent = function(liberal) {
  let node = this.startNode()
  if (this.type === tt.name) {
    node.name = this.value
  } else if (this.type.keyword) {
    node.name = this.type.keyword
    // To fix https://github.com/acornjs/acorn/issues/575
    // `class` and `function` keywords push new context into this.context.
    if ((node.name === "class" || node.name === "function") &&
        (this.lastTokEnd !== this.lastTokStart   1 || this.input.charCodeAt(this.lastTokStart) !== 46)) { // 46 .
      // 只有是xxx.class这种状况不需要pop
      this.context.pop()
    }
  } else {
    this.unexpected()
  }
  this.next(!!liberal)
  this.finishNode(node, "Identifier")
  if (!liberal) {
    // 检查变量合法性
    this.checkUnreserved(node)
    if (node.name === "await" && !this.awaitIdentPos)
      this.awaitIdentPos = node.start
  }
  return node
}

结果示例

{
    "type": "Identifier",
    "start": 21,
    "end": 24,
    "name": "str"
}