Skip to content

Commit

Permalink
[ruby] Reduce Array Parser Ambiguity (#4938)
Browse files Browse the repository at this point in the history
Profiled `ArrayTests` to detect ambiguity and decisions with high lookaheads and modified test fixture to print profiler logs if enabled. This led to converting certain array rules to use more specific rules and fall back to more general rules less often.

Some small improvements on `railsgoat` measured with `time` command on `joern-parse`:
```
// With ambiguity
75.58s user 1.98s system 356% cpu 21.762 total
73.56s user 2.61s system 492% cpu 15.452 total
66.52s user 2.01s system 387% cpu 17.667 total

// With reduced ambiguity
65.42s user 1.94s system 443% cpu 15.189 total
74.58s user 2.01s system 557% cpu 13.744 total
74.39s user 1.75s system 560% cpu 13.595 total
 ```
  • Loading branch information
DavidBakerEffendi authored Sep 20, 2024
1 parent 226f441 commit 2101f5a
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -173,18 +173,16 @@ bracketedArrayElementList
;

bracketedArrayElement
: operatorExpressionList
: indexingArgument
| command
| indexingArgument
| associationList
| hashLiteral
| splattingArgument
| indexingArgumentList
;

indexingArgumentList
: operatorExpressionList COMMA?
# operatorExpressionListIndexingArgumentList
| command
# commandIndexingArgumentList
| operatorExpressionList COMMA splattingArgument
# operatorExpressionListWithSplattingArgumentIndexingArgumentList
| indexingArgument (COMMA? NL* indexingArgument)*
Expand Down Expand Up @@ -298,6 +296,10 @@ primary
# primaryValuePrimary
;

hashLiteral
: LCURLY NL* (associationList COMMA?)? NL* RCURLY
;

primaryValue
: // Assignment expressions
lhs=variable assignmentOperator NL* rhs=operatorExpression
Expand Down Expand Up @@ -361,8 +363,8 @@ primaryValue
# quotedExpandedStringArrayLiteral
| QUOTED_EXPANDED_SYMBOL_ARRAY_LITERAL_START quotedExpandedArrayElementList? QUOTED_EXPANDED_SYMBOL_ARRAY_LITERAL_END
# quotedExpandedSymbolArrayLiteral
| LCURLY NL* (associationList COMMA?)? NL* RCURLY
# hashLiteral
| hashLiteral
# primaryValueHashLiteral
| sign=(PLUS | MINUS)? unsignedNumericLiteral
# numericLiteral
| singleQuotedString singleOrDoubleQuotedString*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,8 @@ object AntlrContextHelpers {
case x: AssociationListContext => x.associations
case x: SplattingArgumentContext => x :: Nil
case x: IndexingArgumentContext => x :: Nil
case x: IndexingArgumentListContext => x.arguments
case x: HashLiteralContext => x :: Nil
}
.toList
.flatten
Expand All @@ -280,7 +282,6 @@ object AntlrContextHelpers {

sealed implicit class IndexingArgumentListContextHelper(ctx: IndexingArgumentListContext) {
def arguments: List[ParserRuleContext] = ctx match
case ctx: CommandIndexingArgumentListContext => List(ctx.command())
case ctx: OperatorExpressionListIndexingArgumentListContext =>
ctx.operatorExpressionList().operatorExpression().asScala.toList
case ctx: AssociationListIndexingArgumentListContext => ctx.associationList().associations
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class AntlrParser(inputDir: File, filename: String, withDebugging: Boolean = fal
val stopToken = recognizer.getTokenStream.get(stopIndex)

warnings.append(
s"Parser ambiguity detected for rule '$ruleName' from token '${startToken.getText}' to '${stopToken.getText}', alternatives: ${ambigAlts.toString}"
s"Parser ambiguity detected for rule '$ruleName' (decision ${dfa.decision}) from token '${startToken.getText}' [startIndex=$startIndex] to '${stopToken.getText}' [stopIndex=$stopIndex], alternatives: ${ambigAlts.toString}"
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@ import io.joern.dataflowengineoss.language.Path
import io.joern.dataflowengineoss.semanticsloader.FlowSemantic
import io.joern.dataflowengineoss.testfixtures.{SemanticCpgTestFixture, SemanticTestCpg}
import io.joern.rubysrc2cpg.{Config, RubySrc2Cpg}
import io.joern.x2cpg.testfixtures.*
import io.joern.x2cpg.ValidationMode
import io.joern.x2cpg.testfixtures.*
import io.shiftleft.codepropertygraph.generated.Cpg
import io.shiftleft.semanticcpg.language.{ICallResolver, NoResolve}
import org.scalatest.Tag
import org.scalatest.Inside

import java.io.File
import org.scalatest.Inside
import java.nio.file.Files
import scala.jdk.CollectionConverters.*

trait RubyFrontend(
withDownloadDependencies: Boolean,
Expand All @@ -31,7 +32,28 @@ trait RubyFrontend(
.withAntlrProfiling(antlrProfiling)

override def execute(sourceCodeFile: File): Cpg = {
new RubySrc2Cpg().createCpg(sourceCodeFile.getAbsolutePath).get
val cpg = new RubySrc2Cpg().createCpg(sourceCodeFile.getAbsolutePath).get
if (antlrProfiling) {
if (sourceCodeFile.isDirectory) {
Files
.walk(sourceCodeFile.toPath)
.iterator()
.asScala
.filter(_.getFileName.toString.endsWith(".log"))
.map(_.toFile)
.foreach(printAntlrProfilingInfo)
} else {
printAntlrProfilingInfo(sourceCodeFile)
}
}
cpg
}

private def printAntlrProfilingInfo(logfile: File): Unit = {
if (logfile.exists()) {
println(Files.readString(logfile.toPath))
logfile.delete() // cleanup
}
}

}
Expand All @@ -40,7 +62,7 @@ class DefaultTestCpgWithRuby(
downloadDependencies: Boolean = false,
disableFileContent: Boolean = true,
antlrDebugging: Boolean = false,
antlrProfiling: Boolean = false
antlrProfiling: Boolean
) extends DefaultTestCpg
with RubyFrontend(downloadDependencies, disableFileContent, antlrDebugging, antlrProfiling)
with SemanticTestCpg {
Expand Down

0 comments on commit 2101f5a

Please sign in to comment.