在计算机科学中,抽象语法树(Abstract Syntax Tree,AST),或简称语法树(Syntax tree),是源代码语法结构的一种抽象表示。它以树状的形式表现编程语言的语法结构,树上的每个节点都表示源代码中的一种结构。之所以说语法是“抽象”的,是因为这里的语法并不会表示出真实语法中出现的每个细节。比如,嵌套括号被隐含在树的结构中,并没有以节点的形式呈现;而类似于 if-condition-then 这样的条件跳转语句,可以使用带有两个分支的节点来表示。

AST的作用:分析语法,解析程序。目前常见的JS代码高亮脚本一般有2种原理,第一种是用正则匹配出来,第二种就是利用AST分析一遍后再来处理。Babel将ES6代码转为ES5代码的第一步解析就是将代码字符串解析成抽象语法树。

指导:https://github.com/starkwang/the-super-tiny-compiler-cn/blob/master/super-tiny-compiler-chinese.js

执行:

Mathast.run("sin(90-30) + pow2 2")

输出:

{
    "ast": {
        "type": "mathExp",
        "body": [
            {
                "type": "nameLi",
                "value": "sin",
                "params": [
                    {
                        "type": "callExp",
                        "arguments": [
                            {
                                "type": "numberLi",
                                "value": "90"
                            },
                            {
                                "type": "operatorLi",
                                "value": "-"
                            },
                            {
                                "type": "numberLi",
                                "value": "30"
                            }
                        ]
                    }
                ]
            },
            {
                "type": "operatorLi",
                "value": "+"
            },
            {
                "type": "nameLi",
                "value": "pow",
                "params": [
                    {
                        "type": "numberLi",
                        "value": "2"
                    },
                    {
                        "type": "numberLi",
                        "value": "2"
                    }
                ]
            }
        ]
    },
    "result": {
        "type": "numberLi",
        "value": 3.6951893788977834,
        "jscode": "Math.sin(60)+ Math.pow(2, 2)"
    }
}

说明:ln/cos/sin/tan后面的第一个表达式会被读取为它们的唯一参数,其他如pow/log/A/C等会向下读取2个表达式作为第一和第二参数,而E/PI会被直接替换成对应的值(Math.E / Math.PI)。同时,若要使用logE E/logE E,应该写成log E E/log E E,否则会识别成logE,同理,5^(-(2^5))应该写成pow(5) (-(pow2 5))。sqr为开平方根,squ为平方。

JS源码:

var Mathast = {
  tokenizer: function tokenizer(exp) {
    function isNumber(c) {
      if (/[0-9\.\-e]/.test(c)) return true;
      return false;
    }

    function isLetter(c) {
      if (/[a-z]/i.test(c)) return true;
      return false;
    }

    function isOperator(c) {
      if (/[\+\-\*\/]/.test(c)) return true;
      return false;
    }

    exp = exp.split('');
    var index = 0;
    var tokens = [];
    var char, tmp;

    while (index < exp.length) {
      char = exp[index];
      tmp = "";

      if (/\(|\)/.test(char)) {
        tokens.push({
          type: 'paren',
          value: char
        });
        index++;
        continue;
      }

      if (/\s/.test(char)) {
        index++;
        continue;
      }

      if (isNumber(char)) {
        while (isNumber(char)) {
          tmp += String(char);
          char = exp[++index];
          if (index >= exp.length) break;
        }

        tokens.push({
          type: 'number',
          value: tmp
        });
        continue;
      }

      if (isLetter(char)) {
        while (isLetter(char)) {
          tmp += char;
          char = exp[++index];
          if (index >= exp.length) break;
        }

        tokens.push({
          type: 'name',
          value: tmp
        });
        continue;
      }

      if (isOperator(char)) {
        while (isOperator(char)) {
          tmp += char;
          char = exp[++index];
          if (index >= exp.length) break;
        }

        tokens.push({
          type: 'operator',
          value: tmp
        });
        continue;
      }

      throw new TypeError('Unknown: ' + char);
    }

    return tokens;
  },
  parser: function parser(tokens) {
    var index = 0;

    function walk() {
      var token = tokens[index];
      var temp_params;

      if (/number|operator/i.test(token.type)) {
        index++;
        return {
          type: token.type + "Li",
          value: token.value
        };
      }

      if (token.type == 'name') {
        index++;

        if (['ln', 'sin', 'cos', 'lg', 'sqr', 'tan', 'squ', 'abs'].indexOf(token.value) > -1) {
          // only one param
          temp_params = [walk()];
        } else if (['log', 'pow', 'C', 'A'].indexOf(token.value) > -1) {
          // two params
          temp_params = [walk(), walk()];
        } else if (['E', 'PI'].indexOf(token.value) > -1) {
          switch (token.value) {
            case 'E':
              var nameValue = 'Math.E';
              break;

            case 'PI':
              var nameValue = 'Math.PI';
              break;

            default:
              throw new TypeError('Unknown: ' + token.value);
          }

          return {
            type: 'numberLi',
            value: nameValue
          };
        } else {
          throw new TypeError("Unknown: " + token.value);
        }

        return {
          type: token.type + "Li",
          value: token.value,
          params: temp_params
        };
      }

      if (token.type == 'paren' && token.value == '(') {
        var node = {
          type: 'callExp',
          arguments: []
        };
        token = tokens[++index];

        while (token.type != 'paren' || token.type == 'paren' && token.value != ')') {
          node.arguments.push(walk());
          token = tokens[index];
        }

        index++;
        return node;
      }

      throw new TypeError(token.type);
    }

    var AST = {
      type: 'mathExp',
      body: []
    };

    while (index < tokens.length) {
      AST.body.push(walk());
    }

    return AST;
  },
  traverser: function traverser(AST, visior) {
    function traverseArray(array, parent) {
      array.forEach(function (child) {
        traverseNode(child, parent);
      });
    }

    function traverseNode(node, parent) {
      var method = visior[node.type];

      if (method) {
        method(node, parent);
      }

      switch (node.type) {
        case 'mathExp':
          traverseArray(node.body, node);
          break;

        case 'callExp':
          if (typeof node.params == 'undefined') traverseArray(node.arguments, node);else traverseArray(node.params, node);
          break;

        case 'numberLi':
          break;

        case 'nameLi':
          break;

        case 'operatorLi':
          break;

        default:
          throw new TypeError(node.type);
      }
    }

    traverseNode(AST, null);
  },
  transformer: function transformer(AST) {
    var newAST = {
      type: 'mathExp',
      body: []
    };
    AST._context = newAST.body;
    this.traverser(AST, {
      numberLi: function numberLi(node, parent) {
        parent._context.push({
          type: 'numberLi',
          value: node.value
        });
      },
      nameLi: function nameLi(node, parent) {
        parent._context.push({
          type: 'nameLi',
          value: node.value,
          params: node.params
        });
      },
      operatorLi: function operatorLi(node, parent) {
        parent._context.push({
          type: 'operatorLi',
          value: node.value
        });
      },
      callExp: function callExp(node, parent) {
        var expression = {
          type: 'callExp',
          arguments: []
        };
        node._context = expression.arguments;

        parent._context.push(expression);
      }
    });
    return newAST;
  },
  run: function run(exp) {
    var node = this.transformer(this.parser(this.tokenizer(exp)));

    function solve(node) {
      var result = "",
          index = 0,
          rc,
          _result;

      while (index < node.length) {
        rc = node[index];

        if (rc.type == 'nameLi') {
          var firstP = rc.params[0].value;
          if (typeof rc.params[1] != 'undefined') var secondP = rc.params[1].value;

          switch (rc.value) {
            case 'sin':
              result += 'Math.sin('+firstP+')';
              break;

            case 'cos':
              result += 'Math.cos('+firstP+')';
              break;

            case 'tan':
              result += 'Math.tan('+firstP+')';
              break;

            case 'log':
              result += 'Math.log('+secondP+', '+firstP+')';
              break;

            case 'ln':
              result += 'Math.log('+firstP+', Math.E)';
              break;

            case 'lg':
              result += 'Math.log('+firstP+', 10)';
              break;

            case 'pow':
              result += 'Math.pow('+firstP+', '+secondP+')';
              break;

            case 'sqr':
              result += 'Math.sqrt('+firstP+')';
              break;

            case 'squ':
              result += 'Math.pow('+firstP+', 2)';
              break;
            case 'abs':
              result += 'Math.abs('+firstP+')';
              break;
            case 'A':
              var tmp = 1;

              for (var i = 0; i < Number(secondP); i++) {
                tmp = tmp * (Number(firstP) - i);
              }

              result += tmp;
              break;

            case 'C':
              var tmp = 1,
                  _tmp = 1;

              for (var i = 0; i < Number(secondP); i++) {
                tmp = tmp * (Number(firstP) - i);
              }

              for (var i = 0; i < Number(secondP); i++) {
                _tmp = _tmp * (Number(secondP) - i);
              }

              result += tmp / _tmp;
              break;

            default:
              throw new TypeError("Unknown: " + rc.value);
          }
        } else if (typeof rc.type == 'undefined' && rc.length > 0) {
          result += rc.map(function (v) {
            return v.value;
          });
        } else {
          result += rc.value + (index != node.length - 1 ? ' ' : '');
        }

        index++;
      }
      try{
          result = eval(result);
      }catch(e){
          console.log(e, "Code:" + result);
      }
      return {
        type: 'numberLi',
        value: result,
      };
    }

    function hasExp(node) {
      if (typeof node == 'undefined') return false;

      if (node[0] != 'undefined') {
        var index = 0,
            rc;

        while (index < node.length) {
          rc = node[index];
          if (rc.type == 'callExp') return true;
          if (rc.type == 'nameLi') if (hasExp(rc.params)) return true;
          index++;
        }

        return false;
      }

      return false;
    }

    function anaNode(node) {
      var foo = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : true;
      var index = 0,
          result = [],
          rc;

      if (typeof node[0] != 'undefined') {
        while (index < node.length) {
          rc = node[index];

          if (rc.type == 'callExp') {
            result.push(anaNode(rc.arguments));
          } else if (rc.type == 'nameLi' && hasExp(rc.params)) {
            result.push({
              type: 'nameLi',
              value: rc.value,
              params: anaNode(rc.params, false)
            });
          } else {
            result.push(rc);
          }

          index++;
        }

        if (foo)
            result = solve(result);
        return result;
      } else {
        return node;
      }
    }

    return {
      ast: node,
      result: anaNode(node.body)
    };
  }
};