mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-14 23:16:06 +09:00
parser wip
This commit is contained in:
@@ -139,8 +139,8 @@ basicFunctions._isSeparator = function(code) {
|
|||||||
};
|
};
|
||||||
basicFunctions._operatorPrecedence = {
|
basicFunctions._operatorPrecedence = {
|
||||||
// function call in itself has highest precedence
|
// function call in itself has highest precedence
|
||||||
"^":13,
|
"NOT":13,
|
||||||
"UNARYPLUS":12,"UNARYMINUS":12,"NOT":12,
|
"^":12,
|
||||||
"*":11,"/":11,
|
"*":11,"/":11,
|
||||||
"MOD":10,
|
"MOD":10,
|
||||||
"+":9,"-":9,
|
"+":9,"-":9,
|
||||||
@@ -153,6 +153,9 @@ basicFunctions._operatorPrecedence = {
|
|||||||
"OR":2,
|
"OR":2,
|
||||||
"=":1
|
"=":1
|
||||||
};
|
};
|
||||||
|
basicFunctions._isUnaryOp = function(word) {
|
||||||
|
return 13 == basicFunctions._operatorPrecedence[word];
|
||||||
|
};
|
||||||
basicFunctions._isOperatorWord = function(word) {
|
basicFunctions._isOperatorWord = function(word) {
|
||||||
return (basicFunctions._operatorPrecedence[word] !== undefined) // force the return type to be a boolean
|
return (basicFunctions._operatorPrecedence[word] !== undefined) // force the return type to be a boolean
|
||||||
};
|
};
|
||||||
@@ -545,21 +548,20 @@ basicFunctions._parseTokens = function(lnum, tokens, states, recDepth) {
|
|||||||
// at this point you can't (and shouldn't) distinguish whether or not defuns/variables are previously declared
|
// at this point you can't (and shouldn't) distinguish whether or not defuns/variables are previously declared
|
||||||
|
|
||||||
// a line has one of these forms:
|
// a line has one of these forms:
|
||||||
// VARIABLE = LITERAL
|
// EXPRESSION -> LITERAL
|
||||||
// VARIABLE = FUNCTION ARGUMENTS
|
// BINARY_OP
|
||||||
// FUNCTION
|
// UNARY_OP
|
||||||
// FUNCTION ARGUMENTS --arguments may contain another function call
|
// FOR_LOOP
|
||||||
// "FOR" VARIABLE "=" ARGUMENT "TO" ARGUMENT
|
// IF_STMT
|
||||||
// "FOR" VARIABLE "=" ARGUMENT "TO" ARGUMENT "STEP" ARGUMENT
|
// WHILE_LOOP
|
||||||
// "IF" EXPRESSION "THEN" EXPRESSION
|
// FUNCTION_CALL
|
||||||
// "IF" EXPRESSION "THEN" EXPRESSION "ELSE" EXPRESSION
|
// GROUPING
|
||||||
// "IF" EXPRESSION "GOTO" ARGUMENT
|
//
|
||||||
// "IF" EXPRESSION "GOTO" ARGUMENT "ELSE" EXPRESSION
|
// LITERAL -> NUMBERS | FUNCTION_OR_VARIABLE_NAME | BOOLS | QUOTES
|
||||||
// "WHILE" EXPRESSION
|
// BINARY_OP -> EXPRSSION OPERATOR EXPRESSION
|
||||||
// additionally, sub-line also has one of these:
|
// UNARY_OP -> OPERATOR EXPRESSION
|
||||||
// LITERAL (leaf node)
|
// FUNCTION_CALL -> LITERAL GROUPING
|
||||||
// VARIABLE (leaf node)
|
// GROUPING -> "(" EXPRESSION ")"
|
||||||
// {VARIABLE, LITERAL} COMPARISON_OP {VARIABLE, LITERAL}
|
|
||||||
|
|
||||||
// THIS FUNCTION CANNOT PARSE ANY OPERATORS, THEY MUST BE CONVERTED TO POLISH NOTATION BEFOREHAND!
|
// THIS FUNCTION CANNOT PARSE ANY OPERATORS, THEY MUST BE CONVERTED TO POLISH NOTATION BEFOREHAND!
|
||||||
// providing a test string:
|
// providing a test string:
|
||||||
@@ -568,6 +570,10 @@ basicFunctions._parseTokens = function(lnum, tokens, states, recDepth) {
|
|||||||
// plus(cin(tan(minus(2,5)),plus(4,sin(32))),cin(unaryMinus(2)))
|
// plus(cin(tan(minus(2,5)),plus(4,sin(32))),cin(unaryMinus(2)))
|
||||||
// prior to the calling of this function
|
// prior to the calling of this function
|
||||||
|
|
||||||
|
function isSemanticLiteral(state) {
|
||||||
|
return "quote" == state || "number" == state || "bool" == state || "literal" == state; // technically, closing quote also counts
|
||||||
|
}
|
||||||
|
|
||||||
var _debugSyntaxAnalysis = true;
|
var _debugSyntaxAnalysis = true;
|
||||||
|
|
||||||
if (_debugSyntaxAnalysis) println("@@ SYNTAX ANALYSIS @@");
|
if (_debugSyntaxAnalysis) println("@@ SYNTAX ANALYSIS @@");
|
||||||
@@ -593,17 +599,12 @@ basicFunctions._parseTokens = function(lnum, tokens, states, recDepth) {
|
|||||||
// test string: print((minus(plus(3,2),times(8,7))))
|
// test string: print((minus(plus(3,2),times(8,7))))
|
||||||
// ^ ^ these extra parens break your parser
|
// ^ ^ these extra parens break your parser
|
||||||
|
|
||||||
// IF statement
|
// LITERAL
|
||||||
if ("IF" == tokens[0].toUpperCase()) {
|
if (tokens.length == 1 && (isSemanticLiteral(states[0]))) {
|
||||||
throw "TODO";
|
|
||||||
}
|
|
||||||
// LEAF: is this a literal?
|
|
||||||
else if (tokens.length == 1 && ("quote" == states[0] || "number" == states[0] || "bool" == states[0])) {
|
|
||||||
if (_debugSyntaxAnalysis) println("literal/number: "+tokens[0]);
|
if (_debugSyntaxAnalysis) println("literal/number: "+tokens[0]);
|
||||||
treeHead.value = ("quote" == states[0]) ? tokens[0] : tokens[0].toUpperCase();
|
treeHead.value = ("quote" == states[0]) ? tokens[0] : tokens[0].toUpperCase();
|
||||||
treeHead.type = "literal";
|
treeHead.type = "literal";
|
||||||
}
|
}
|
||||||
// is this a function/operators?
|
|
||||||
else {
|
else {
|
||||||
// scan for operators with highest precedence, use rightmost one if multiple were found
|
// scan for operators with highest precedence, use rightmost one if multiple were found
|
||||||
var topmostOp;
|
var topmostOp;
|
||||||
@@ -630,28 +631,46 @@ basicFunctions._parseTokens = function(lnum, tokens, states, recDepth) {
|
|||||||
if (parenDepth == 1 && states[k] == "sep") {
|
if (parenDepth == 1 && states[k] == "sep") {
|
||||||
separators.push(k);
|
separators.push(k);
|
||||||
}
|
}
|
||||||
if (parenDepth == 0 && states[k] == "operator" && basicFunctions._operatorPrecedence[tokens[k].toUpperCase()] >= topmostOpPrc) {
|
if (parenDepth == 0) {
|
||||||
topmostOp = tokens[k].toUpperCase();
|
if (states[k] == "operator" && isSemanticLiteral(states[k - 1]) && basicFunctions._operatorPrecedence[tokens[k].toUpperCase()] > topmostOpPrc) {
|
||||||
topmostOpPrc = basicFunctions._operatorPrecedence[tokens[k]];
|
topmostOp = tokens[k].toUpperCase();
|
||||||
operatorPos = k;
|
topmostOpPrc = basicFunctions._operatorPrecedence[tokens[k]];
|
||||||
|
operatorPos = k;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (parenDepth != 0) throw "Unmatched brackets";
|
if (parenDepth != 0) throw "Unmatched brackets";
|
||||||
|
|
||||||
// if there is an operator, split using it
|
// BINARY_OP/UNARY_OP
|
||||||
if (topmostOp !== undefined) {
|
if (topmostOp !== undefined) {
|
||||||
if (_debugSyntaxAnalysis) println("operator: "+topmostOp+", pos: "+operatorPos);
|
if (_debugSyntaxAnalysis) println("operator: "+topmostOp+", pos: "+operatorPos);
|
||||||
|
|
||||||
treeHead.value = topmostOp;
|
var subtknL = tokens.slice(0, operatorPos);
|
||||||
treeHead.type = "operator";
|
var subtknR = tokens.slice(operatorPos + 1, tokens.length);
|
||||||
treeHead.leaves[0] = basicFunctions._parseTokens(lnum, tokens.slice(0, operatorPos), states.slice(0, operatorPos), recDepth + 1);
|
var substaL = states.slice(0, operatorPos);
|
||||||
treeHead.leaves[1] = basicFunctions._parseTokens(lnum, tokens.slice(operatorPos + 1, tokens.length), states.slice(operatorPos + 1, tokens.length), recDepth + 1);
|
var substaR = states.slice(operatorPos + 1, tokens.length);
|
||||||
|
|
||||||
|
// BINARY_OP?
|
||||||
|
if (operatorPos > 0) {
|
||||||
|
treeHead.value = topmostOp;
|
||||||
|
treeHead.type = "operator";
|
||||||
|
treeHead.leaves[0] = basicFunctions._parseTokens(lnum, subtknL, substaL, recDepth + 1);
|
||||||
|
treeHead.leaves[1] = basicFunctions._parseTokens(lnum, subtknR, substaR, recDepth + 1);
|
||||||
|
}
|
||||||
|
else { // TODO do I ever reach this branch?
|
||||||
|
// this also takes care of nested unary ops (e.g. "- NOT 43")
|
||||||
|
treeHead.value = (topmostOp == "+") ? "UNARYPLUS" : (topmostOp == "-") ? "UNARYMINUS" : topmostOp;
|
||||||
|
treeHead.type = "operator";
|
||||||
|
treeHead.leaves[0] = basicFunctions._parseTokens(lnum, subtknR, substaR, recDepth + 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
// FUNCTION CALL
|
||||||
else {
|
else {
|
||||||
if (_debugSyntaxAnalysis) println("function call");
|
if (_debugSyntaxAnalysis) println("function call");
|
||||||
var currentFunction = (states[0] == "paren") ? undefined : tokens[0];
|
var currentFunction = (states[0] == "paren") ? undefined : tokens[0];
|
||||||
treeHead.value = currentFunction;
|
treeHead.value = currentFunction;
|
||||||
|
|
||||||
treeHead.type = (currentFunction === undefined) ? "null" : "function";
|
treeHead.type = (currentFunction === undefined) ? "null" : "function";
|
||||||
var leaves = [];
|
var leaves = [];
|
||||||
|
|
||||||
@@ -685,6 +704,10 @@ basicFunctions._parseTokens = function(lnum, tokens, states, recDepth) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
treeHead.leaves = leaves;//.filter(function(__v) { return __v !== undefined; });
|
treeHead.leaves = leaves;//.filter(function(__v) { return __v !== undefined; });
|
||||||
|
|
||||||
|
// after-the-fact fix for some unary ops
|
||||||
|
if (treeHead.value == "-" && treeHead.leaves.length == 1) treeHead.value = "UNARYMINUS";
|
||||||
|
else if (treeHead.value == "+" && treeHead.leaves.length == 1) treeHead.value = "UNARYPLUS";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user