mostly working parser

This commit is contained in:
minjaesong
2020-06-11 10:15:41 +09:00
parent b2fea56a9f
commit b22797e942

View File

@@ -28,7 +28,7 @@ function getUsedMemSize() {
} }
var reLineNum = /^[0-9]+ +[^0-9]/; var reLineNum = /^[0-9]+ /;
//var reFloat = /^([\-+]?[0-9]*[.][0-9]+[eE]*[\-+0-9]*[fF]*|[\-+]?[0-9]+[.eEfF][0-9+\-]*[fF]?)$/; //var reFloat = /^([\-+]?[0-9]*[.][0-9]+[eE]*[\-+0-9]*[fF]*|[\-+]?[0-9]+[.eEfF][0-9+\-]*[fF]?)$/;
//var reDec = /^([\-+]?[0-9_]+)$/; //var reDec = /^([\-+]?[0-9_]+)$/;
//var reHex = /^(0[Xx][0-9A-Fa-f_]+)$/; //var reHex = /^(0[Xx][0-9A-Fa-f_]+)$/;
@@ -504,6 +504,13 @@ basicFunctions._tokenise = function(lnum, cmd) {
if (sb.length > 0) { if (sb.length > 0) {
tokens.push(sb); states.push(mode); tokens.push(sb); states.push(mode);
} }
// filter off initial empty token if the statement does NOT start with literal (e.g. "-3+5")
if (tokens[0].length == 0) {
tokens = tokens.slice(1, tokens.length);
states = states.slice(1, states.length);
}
if (tokens.length != states.length) throw "InternalError: size of tokens and states does not match (line: "+lnum+")"; if (tokens.length != states.length) throw "InternalError: size of tokens and states does not match (line: "+lnum+")";
return { "tokens": tokens, "states": states }; return { "tokens": tokens, "states": states };
@@ -525,23 +532,6 @@ basicFunctions._parserElaboration = function(lnum, tokens, states) {
k += 1; k += 1;
} }
};
basicFunctions._unaryToBinary = function(lnum, tokens, states) {
// turn some + and - into unary ops
// + 2
// 5 * + 2
// + 7 - - 4
//
// ( 0 + 2 )
// 5 * ( 0 + 2 )
// ( 0 + 7 ) - ( 0 - 4 )
var _debugprintLuka = true;
if (_debugprintLuka) println("@@ UNARY-TO-BINARY @@")
}; };
basicFunctions._parseTokens = function(lnum, tokens, states, recDepth) { basicFunctions._parseTokens = function(lnum, tokens, states, recDepth) {
// DO NOT PERFORM SEMANTIC ANALYSIS HERE // DO NOT PERFORM SEMANTIC ANALYSIS HERE
@@ -570,8 +560,9 @@ basicFunctions._parseTokens = function(lnum, tokens, states, recDepth) {
// plus(cin(tan(minus(2,5)),plus(4,sin(32))),cin(unaryMinus(2))) // plus(cin(tan(minus(2,5)),plus(4,sin(32))),cin(unaryMinus(2)))
// prior to the calling of this function // prior to the calling of this function
function isSemanticLiteral(state) { function isSemanticLiteral(token, state) {
return "quote" == state || "number" == state || "bool" == state || "literal" == state; // technically, closing quote also counts return "]" == token || ")" == token ||
"quote" == state || "number" == state || "bool" == state || "literal" == state;
} }
var _debugSyntaxAnalysis = true; var _debugSyntaxAnalysis = true;
@@ -600,7 +591,7 @@ basicFunctions._parseTokens = function(lnum, tokens, states, recDepth) {
// ^ ^ these extra parens break your parser // ^ ^ these extra parens break your parser
// LITERAL // LITERAL
if (tokens.length == 1 && (isSemanticLiteral(states[0]))) { if (tokens.length == 1 && (isSemanticLiteral(tokens[0], states[0]))) {
if (_debugSyntaxAnalysis) println("literal/number: "+tokens[0]); if (_debugSyntaxAnalysis) println("literal/number: "+tokens[0]);
treeHead.value = ("quote" == states[0]) ? tokens[0] : tokens[0].toUpperCase(); treeHead.value = ("quote" == states[0]) ? tokens[0] : tokens[0].toUpperCase();
treeHead.type = "literal"; treeHead.type = "literal";
@@ -632,7 +623,7 @@ basicFunctions._parseTokens = function(lnum, tokens, states, recDepth) {
separators.push(k); separators.push(k);
} }
if (parenDepth == 0) { if (parenDepth == 0) {
if (states[k] == "operator" && isSemanticLiteral(states[k - 1]) && basicFunctions._operatorPrecedence[tokens[k].toUpperCase()] > topmostOpPrc) { if (states[k] == "operator" && isSemanticLiteral(tokens[k-1], states[k-1]) && basicFunctions._operatorPrecedence[tokens[k].toUpperCase()] > topmostOpPrc) {
topmostOp = tokens[k].toUpperCase(); topmostOp = tokens[k].toUpperCase();
topmostOpPrc = basicFunctions._operatorPrecedence[tokens[k]]; topmostOpPrc = basicFunctions._operatorPrecedence[tokens[k]];
operatorPos = k; operatorPos = k;
@@ -685,6 +676,7 @@ basicFunctions._parseTokens = function(lnum, tokens, states, recDepth) {
} }
else if (parenEnd > parenStart) { else if (parenEnd > parenStart) {
separators = [parenStart].concat(separators, [parenEnd]); separators = [parenStart].concat(separators, [parenEnd]);
if (_debugSyntaxAnalysis) println("separators: "+separators.join(","));
// recursively parse comma-separated arguments // recursively parse comma-separated arguments
// print ( plus ( 3 , 2 ) , times ( 8 , 7 ) ) // print ( plus ( 3 , 2 ) , times ( 8 , 7 ) )
@@ -736,14 +728,6 @@ basicFunctions._interpretLine = function(lnum, cmd) {
if (_debugprintHighestLevel) println(states.join(" ")); if (_debugprintHighestLevel) println(states.join(" "));
// ŁUKASIEWICZATION : turn infix notation into polish notation
basicFunctions._unaryToBinary(lnum, tokens, states);
if (_debugprintHighestLevel) println(tokens.join("~"));
if (_debugprintHighestLevel) println(states.join(" "));
// PARSING (SYNTAX ANALYSIS) // PARSING (SYNTAX ANALYSIS)
var syntaxTree = basicFunctions._parseTokens(lnum, tokens, states, 0); var syntaxTree = basicFunctions._parseTokens(lnum, tokens, states, 0);