用JS+AST写数学表达式解析器
评论 0 热度 874
在计算机科学中,抽象语法树(Abstract Syntax Tree,AST),或简称语法树(Syntax tree),是源代码语法结构的一种抽象表示。它以树状的形式表现编程语言的语法结构,树上的每个节点都表示源代码中的一种结构。之所以说语法是“抽象”的,是因为这里的语法并不会表示出真实语法中出现的每个细节。比如,嵌套括号被隐含在树的结构中,并没有以节点的形式呈现;而类似于 if-condition-then 这样的条件跳转语句,可以使用带有两个分支的节点来表示。
AST的作用:分析语法,解析程序。目前常见的JS代码高亮脚本一般有2种原理,第一种是用正则匹配出来,第二种就是利用AST分析一遍后再来处理。Babel将ES6代码转为ES5代码的第一步解析就是将代码字符串解析成抽象语法树。
指导:https://github.com/starkwang/the-super-tiny-compiler-cn/blob/master/super-tiny-compiler-chinese.js
执行:
Mathast.run("sin(90-30) + pow2 2")
输出:
{
"ast": {
"type": "mathExp",
"body": [
{
"type": "nameLi",
"value": "sin",
"params": [
{
"type": "callExp",
"arguments": [
{
"type": "numberLi",
"value": "90"
},
{
"type": "operatorLi",
"value": "-"
},
{
"type": "numberLi",
"value": "30"
}
]
}
]
},
{
"type": "operatorLi",
"value": "+"
},
{
"type": "nameLi",
"value": "pow",
"params": [
{
"type": "numberLi",
"value": "2"
},
{
"type": "numberLi",
"value": "2"
}
]
}
]
},
"result": {
"type": "numberLi",
"value": 3.6951893788977834,
"jscode": "Math.sin(60)+ Math.pow(2, 2)"
}
}
说明:ln/cos/sin/tan后面的第一个表达式会被读取为它们的唯一参数,其他如pow/log/A/C等会向下读取2个表达式作为第一和第二参数,而E/PI会被直接替换成对应的值(Math.E / Math.PI
)。同时,若要使用logE E/logE E
,应该写成log E E/log E E
,否则会识别成logE,同理,5^(-(2^5))
应该写成pow(5) (-(pow2 5))
。sqr为开平方根,squ为平方。
JS源码:
var Mathast = {
tokenizer: function tokenizer(exp) {
function isNumber(c) {
if (/[0-9\.\-e]/.test(c)) return true;
return false;
}
function isLetter(c) {
if (/[a-z]/i.test(c)) return true;
return false;
}
function isOperator(c) {
if (/[\+\-\*\/]/.test(c)) return true;
return false;
}
exp = exp.split('');
var index = 0;
var tokens = [];
var char, tmp;
while (index < exp.length) {
char = exp[index];
tmp = "";
if (/\(|\)/.test(char)) {
tokens.push({
type: 'paren',
value: char
});
index++;
continue;
}
if (/\s/.test(char)) {
index++;
continue;
}
if (isNumber(char)) {
while (isNumber(char)) {
tmp += String(char);
char = exp[++index];
if (index >= exp.length) break;
}
tokens.push({
type: 'number',
value: tmp
});
continue;
}
if (isLetter(char)) {
while (isLetter(char)) {
tmp += char;
char = exp[++index];
if (index >= exp.length) break;
}
tokens.push({
type: 'name',
value: tmp
});
continue;
}
if (isOperator(char)) {
while (isOperator(char)) {
tmp += char;
char = exp[++index];
if (index >= exp.length) break;
}
tokens.push({
type: 'operator',
value: tmp
});
continue;
}
throw new TypeError('Unknown: ' + char);
}
return tokens;
},
parser: function parser(tokens) {
var index = 0;
function walk() {
var token = tokens[index];
var temp_params;
if (/number|operator/i.test(token.type)) {
index++;
return {
type: token.type + "Li",
value: token.value
};
}
if (token.type == 'name') {
index++;
if (['ln', 'sin', 'cos', 'lg', 'sqr', 'tan', 'squ', 'abs'].indexOf(token.value) > -1) {
// only one param
temp_params = [walk()];
} else if (['log', 'pow', 'C', 'A'].indexOf(token.value) > -1) {
// two params
temp_params = [walk(), walk()];
} else if (['E', 'PI'].indexOf(token.value) > -1) {
switch (token.value) {
case 'E':
var nameValue = 'Math.E';
break;
case 'PI':
var nameValue = 'Math.PI';
break;
default:
throw new TypeError('Unknown: ' + token.value);
}
return {
type: 'numberLi',
value: nameValue
};
} else {
throw new TypeError("Unknown: " + token.value);
}
return {
type: token.type + "Li",
value: token.value,
params: temp_params
};
}
if (token.type == 'paren' && token.value == '(') {
var node = {
type: 'callExp',
arguments: []
};
token = tokens[++index];
while (token.type != 'paren' || token.type == 'paren' && token.value != ')') {
node.arguments.push(walk());
token = tokens[index];
}
index++;
return node;
}
throw new TypeError(token.type);
}
var AST = {
type: 'mathExp',
body: []
};
while (index < tokens.length) {
AST.body.push(walk());
}
return AST;
},
traverser: function traverser(AST, visior) {
function traverseArray(array, parent) {
array.forEach(function (child) {
traverseNode(child, parent);
});
}
function traverseNode(node, parent) {
var method = visior[node.type];
if (method) {
method(node, parent);
}
switch (node.type) {
case 'mathExp':
traverseArray(node.body, node);
break;
case 'callExp':
if (typeof node.params == 'undefined') traverseArray(node.arguments, node);else traverseArray(node.params, node);
break;
case 'numberLi':
break;
case 'nameLi':
break;
case 'operatorLi':
break;
default:
throw new TypeError(node.type);
}
}
traverseNode(AST, null);
},
transformer: function transformer(AST) {
var newAST = {
type: 'mathExp',
body: []
};
AST._context = newAST.body;
this.traverser(AST, {
numberLi: function numberLi(node, parent) {
parent._context.push({
type: 'numberLi',
value: node.value
});
},
nameLi: function nameLi(node, parent) {
parent._context.push({
type: 'nameLi',
value: node.value,
params: node.params
});
},
operatorLi: function operatorLi(node, parent) {
parent._context.push({
type: 'operatorLi',
value: node.value
});
},
callExp: function callExp(node, parent) {
var expression = {
type: 'callExp',
arguments: []
};
node._context = expression.arguments;
parent._context.push(expression);
}
});
return newAST;
},
run: function run(exp) {
var node = this.transformer(this.parser(this.tokenizer(exp)));
function solve(node) {
var result = "",
index = 0,
rc,
_result;
while (index < node.length) {
rc = node[index];
if (rc.type == 'nameLi') {
var firstP = rc.params[0].value;
if (typeof rc.params[1] != 'undefined') var secondP = rc.params[1].value;
switch (rc.value) {
case 'sin':
result += 'Math.sin('+firstP+')';
break;
case 'cos':
result += 'Math.cos('+firstP+')';
break;
case 'tan':
result += 'Math.tan('+firstP+')';
break;
case 'log':
result += 'Math.log('+secondP+', '+firstP+')';
break;
case 'ln':
result += 'Math.log('+firstP+', Math.E)';
break;
case 'lg':
result += 'Math.log('+firstP+', 10)';
break;
case 'pow':
result += 'Math.pow('+firstP+', '+secondP+')';
break;
case 'sqr':
result += 'Math.sqrt('+firstP+')';
break;
case 'squ':
result += 'Math.pow('+firstP+', 2)';
break;
case 'abs':
result += 'Math.abs('+firstP+')';
break;
case 'A':
var tmp = 1;
for (var i = 0; i < Number(secondP); i++) {
tmp = tmp * (Number(firstP) - i);
}
result += tmp;
break;
case 'C':
var tmp = 1,
_tmp = 1;
for (var i = 0; i < Number(secondP); i++) {
tmp = tmp * (Number(firstP) - i);
}
for (var i = 0; i < Number(secondP); i++) {
_tmp = _tmp * (Number(secondP) - i);
}
result += tmp / _tmp;
break;
default:
throw new TypeError("Unknown: " + rc.value);
}
} else if (typeof rc.type == 'undefined' && rc.length > 0) {
result += rc.map(function (v) {
return v.value;
});
} else {
result += rc.value + (index != node.length - 1 ? ' ' : '');
}
index++;
}
try{
result = eval(result);
}catch(e){
console.log(e, "Code:" + result);
}
return {
type: 'numberLi',
value: result,
};
}
function hasExp(node) {
if (typeof node == 'undefined') return false;
if (node[0] != 'undefined') {
var index = 0,
rc;
while (index < node.length) {
rc = node[index];
if (rc.type == 'callExp') return true;
if (rc.type == 'nameLi') if (hasExp(rc.params)) return true;
index++;
}
return false;
}
return false;
}
function anaNode(node) {
var foo = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : true;
var index = 0,
result = [],
rc;
if (typeof node[0] != 'undefined') {
while (index < node.length) {
rc = node[index];
if (rc.type == 'callExp') {
result.push(anaNode(rc.arguments));
} else if (rc.type == 'nameLi' && hasExp(rc.params)) {
result.push({
type: 'nameLi',
value: rc.value,
params: anaNode(rc.params, false)
});
} else {
result.push(rc);
}
index++;
}
if (foo)
result = solve(result);
return result;
} else {
return node;
}
}
return {
ast: node,
result: anaNode(node.body)
};
}
};