mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-17 00:16:04 +09:00
basic: new parser wip
This commit is contained in:
197
assets/tbas/parser_wip.js
Normal file
197
assets/tbas/parser_wip.js
Normal file
@@ -0,0 +1,197 @@
|
||||
class ParserError extends Error {
|
||||
constructor(...args) {
|
||||
super(...args);
|
||||
Error.captureStackTrace(this, ParserError);
|
||||
}
|
||||
}
|
||||
|
||||
/** Parses following EBNF rule:
|
||||
* stmt =
|
||||
* "IF" , equation , "THEN" , stmt , ["ELSE" , stmt]
|
||||
* | "DEFUN" , [lit] , "(" , [lit , {" , " , lit}] , ")" , "=" , stmt
|
||||
* | "ON" , lit , function , equation , [{"," , equation}]
|
||||
* | function , [equation , {argsep , equation}]
|
||||
* | function , "(" , [equation , {argsep , equation}] , ")"
|
||||
* | equation
|
||||
* | "(" , stmt , ")" ;
|
||||
* @return: BasicAST
|
||||
*/
|
||||
bF._parseStmt = function(lnum, tokens, states, recDepth) {
|
||||
let headTkn = tokens[0].toUpperCase();
|
||||
let headSta = states[0];
|
||||
|
||||
let treeHead = new BasicAST();
|
||||
treeHead.astDepth = recDepth;
|
||||
treeHead.astLnum = lnum;
|
||||
|
||||
if ("IF" == headTkn && "lit" == headSta) {
|
||||
// find nearest THEN and ELSE but also take parens into account
|
||||
let thenPos = -1;
|
||||
let elsePos = -1;
|
||||
let parenDepth = 0;
|
||||
let parenStart = -1;
|
||||
let parenEnd = -1;
|
||||
|
||||
// Scan for unmatched parens and mark off the right operator we must deal with
|
||||
for (k = 0; k < tokens.length; k++) {
|
||||
// increase paren depth and mark paren start position
|
||||
if (tokens[k] == "(" && states[k] != "qot") {
|
||||
parenDepth += 1;
|
||||
if (parenStart == -1 && parenDepth == 1) parenStart = k;
|
||||
}
|
||||
// decrease paren depth
|
||||
else if (tokens[k] == ")" && states[k] != "qot") {
|
||||
if (parenEnd == -1 && parenDepth == 1) parenEnd = k;
|
||||
parenDepth -= 1;
|
||||
}
|
||||
|
||||
if (parenDepth == 0) {
|
||||
if (-1 == thenPos && "THEN" == tokens[k].toUpperCase() && "lit" == states[k])
|
||||
thenPos = k;
|
||||
else if (-1 == elsePos && "ELSE" == tokens[k].toUpperCase() && "lit" == states[k])
|
||||
elsePos = k;
|
||||
}
|
||||
}
|
||||
|
||||
// unmatched brackets, duh!
|
||||
if (parenDepth != 0) throw lang.syntaxfehler(lnum, lang.unmatchedBrackets);
|
||||
|
||||
// "THEN" not found, raise error!
|
||||
if (thenPos == -1) throw ParserError("IF without THEN in " + lnum);
|
||||
|
||||
// TODO gotta go home :)
|
||||
}
|
||||
}
|
||||
/** Parses following EBNF rule:
|
||||
* lit (* which is parsed by the tokeniser already *)
|
||||
* @return: BasicAST
|
||||
*/
|
||||
bF._parseLit = function(lnum, tokens, states, recDepth) {
|
||||
let treeHead = new BasicAST();
|
||||
treeHead.astDepth = recDepth;
|
||||
treeHead.astLnum = lnum;
|
||||
|
||||
// special case where there /were only one word
|
||||
if (recDepth == 0) {
|
||||
// if that word is literal (e.g. "10 CLEAR"), interpret it as a function
|
||||
if (states[0] == "lit") {
|
||||
treeHead.astValue = tokens[0];
|
||||
treeHead.astType = "function";
|
||||
|
||||
return treeHead;
|
||||
}
|
||||
// else, screw it
|
||||
else {
|
||||
throw lang.syntaxfehler(lnum, "TRAP_LITERALLY_LITERAL");
|
||||
}
|
||||
}
|
||||
|
||||
if (_debugSyntaxAnalysis) serial.println("literal/number: "+tokens[0]);
|
||||
treeHead.astValue = ("qot" == states[0]) ? tokens[0] : tokens[0].toUpperCase();
|
||||
treeHead.astType = ("qot" == states[0]) ? "string" : ("num" == states[0]) ? "num" : "lit";
|
||||
}
|
||||
/** Parses following EBNF rule:
|
||||
* equation = equation , op , equation
|
||||
* | op_uni , equation
|
||||
* | lit
|
||||
* | "(" , equation , ")"
|
||||
* @return: BasicAST
|
||||
*/
|
||||
bF._EquationIllegalTokens = ["IF","THEN","ELSE","DEFUN","ON"];
|
||||
bF._parseEquation = function(lnum, tokens, states, recDepth) {
|
||||
// scan for operators with highest precedence, use rightmost one if multiple were found
|
||||
let topmostOp;
|
||||
let topmostOpPrc = 0;
|
||||
let operatorPos = -1;
|
||||
|
||||
// find and mark position of parentheses
|
||||
// properly deal with the nested function calls
|
||||
let parenDepth = 0;
|
||||
let parenStart = -1;
|
||||
let parenEnd = -1;
|
||||
|
||||
// Scan for unmatched parens and mark off the right operator we must deal with
|
||||
for (k = 0; k < tokens.length; k++) {
|
||||
// increase paren depth and mark paren start position
|
||||
if (tokens[k] == "(" && states[k] != "qot") {
|
||||
parenDepth += 1;
|
||||
if (parenStart == -1 && parenDepth == 1) parenStart = k;
|
||||
}
|
||||
// decrease paren depth
|
||||
else if (tokens[k] == ")" && states[k] != "qot") {
|
||||
if (parenEnd == -1 && parenDepth == 1) parenEnd = k;
|
||||
parenDepth -= 1;
|
||||
}
|
||||
|
||||
// determine the right operator to deal with
|
||||
if (parenDepth == 0) {
|
||||
if (states[k] == "op" && isSemanticLiteral(tokens[k-1], states[k-1]) &&
|
||||
((bF._opPrc[tokens[k].toUpperCase()] > topmostOpPrc) ||
|
||||
(!bF._opRh[tokens[k].toUpperCase()] && bF._opPrc[tokens[k].toUpperCase()] == topmostOpPrc))
|
||||
) {
|
||||
topmostOp = tokens[k].toUpperCase();
|
||||
topmostOpPrc = bF._opPrc[tokens[k].toUpperCase()];
|
||||
operatorPos = k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// unmatched brackets, duh!
|
||||
if (parenDepth != 0) throw lang.syntaxfehler(lnum, lang.unmatchedBrackets);
|
||||
if (_debugSyntaxAnalysis) serial.println("Equation NEW Paren position: "+parenStart+", "+parenEnd);
|
||||
|
||||
// ## case for:
|
||||
// "(" , equation , ")"
|
||||
if (parenStart == 0 && parenEnd == tokens.length - 1) {
|
||||
return bF._parseEquation(lnum,
|
||||
tokens.slice(parenStart + 1, parenEnd),
|
||||
states.slice(parenStart + 1, parenEnd),
|
||||
recDepth + 1
|
||||
);
|
||||
}
|
||||
// ## case for:
|
||||
// lit , op, lit
|
||||
// | op_uni , lit
|
||||
// if operator is found, split by the operator and recursively parse the LH and RH
|
||||
if (topmostOp !== undefined) {
|
||||
if (_debugSyntaxAnalysis) serial.println("operator: "+topmostOp+", pos: "+operatorPos);
|
||||
|
||||
// this is the AST we're going to build up and return
|
||||
// (other IF clauses don't use this)
|
||||
let treeHead = new BasicAST();
|
||||
treeHead.astDepth = recDepth;
|
||||
treeHead.astLnum = lnum;
|
||||
treeHead.astValue = topmostOp;
|
||||
treeHead.astType = "op";
|
||||
|
||||
// BINARY_OP?
|
||||
if (operatorPos > 0) {
|
||||
let subtknL = tokens.slice(0, operatorPos);
|
||||
let substaL = states.slice(0, operatorPos);
|
||||
let subtknR = tokens.slice(operatorPos + 1, tokens.length);
|
||||
let substaR = states.slice(operatorPos + 1, tokens.length);
|
||||
|
||||
treeHead.astLeaves[0] = bF._parseEquation(lnum, subtknL, substaL, recDepth + 1);
|
||||
treeHead.astLeaves[1] = bF._parseEquation(lnum, subtknR, substaR, recDepth + 1);
|
||||
}
|
||||
else {
|
||||
treeHead.astValue = (topmostOp === "-") ? "UNARYMINUS" : "UNARYPLUS";
|
||||
treeHead.astLeaves[0] = bF._parseEquation(lnum,
|
||||
tokens.slice(operatorPos + 1, tokens.length),
|
||||
states.slice(operatorPos + 1, states.length),
|
||||
recDepth + 1
|
||||
);
|
||||
}
|
||||
|
||||
return treeHead;
|
||||
}
|
||||
// ## case for:
|
||||
// lit
|
||||
let headTkn = tokens[0].toUpperCase();
|
||||
if (!bF._EquationIllegalTokens.includes(headTkn)) {
|
||||
return bF._parseLit(lnum, tokens, states, recDepth + 1);
|
||||
}
|
||||
|
||||
throw ParserError(`Equation - illegal token "${headTkn}" in ${lnum}`);
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user