cargo: Fix cfg() parsing

Add cfg token to be able to parse the top level cfg() function. Fix parser which was creating a new iterator when recursing which causer the caller function's iterator to not have advanced when recursing function returns.
xclaesse · Oct 11, 2024 · df6cae5 · df6cae5
1 parent db26175
commit df6cae5
Show file tree

Hide file tree

Showing 2 changed files with 69 additions and 50 deletions.
diff --git a/mesonbuild/cargo/cfg.py b/mesonbuild/cargo/cfg.py
@@ -4,6 +4,7 @@
 """Rust CFG parser.
 
 Rust uses its `cfg()` format in cargo.
+https://doc.rust-lang.org/reference/conditional-compilation.html
 
 This may have the following functions:
  - all()
@@ -33,7 +34,7 @@
 if T.TYPE_CHECKING:
     _T = T.TypeVar('_T')
     _LEX_TOKEN = T.Tuple['TokenType', T.Optional[str]]
-    _LEX_STREAM = T.Iterable[_LEX_TOKEN]
+    _LEX_STREAM = T.Iterator[_LEX_TOKEN]
     _LEX_STREAM_AH = T.Iterator[T.Tuple[_LEX_TOKEN, T.Optional[_LEX_TOKEN]]]
 
 
@@ -48,6 +49,7 @@ class TokenType(enum.Enum):
     NOT = enum.auto()
     COMMA = enum.auto()
     EQUAL = enum.auto()
+    CFG = enum.auto()
 
 
 def lexer(raw: str) -> _LEX_STREAM:
@@ -56,45 +58,41 @@ def lexer(raw: str) -> _LEX_STREAM:
     :param raw: The raw cfg() expression
     :return: An iterable of tokens
     """
-    buffer: T.List[str] = []
+    start: int = 0
     is_string: bool = False
-    for s in raw:
-        if s.isspace() or s in {')', '(', ',', '='} or (s == '"' and buffer):
-            val = ''.join(buffer)
-            buffer.clear()
-            if is_string:
+    for i, s in enumerate(raw):
+        if s.isspace() or s in {')', '(', ',', '=', '"'}:
+            val = raw[start:i]
+            start = i + 1
+            if s == '"' and is_string:
                 yield (TokenType.STRING, val)
+                is_string = False
+                continue
             elif val == 'any':
                 yield (TokenType.ANY, None)
             elif val == 'all':
                 yield (TokenType.ALL, None)
             elif val == 'not':
                 yield (TokenType.NOT, None)
+            elif val == 'cfg':
+                yield (TokenType.CFG, None)
             elif val:
                 yield (TokenType.IDENTIFIER, val)
 
             if s == '(':
                 yield (TokenType.LPAREN, None)
-                continue
             elif s == ')':
                 yield (TokenType.RPAREN, None)
-                continue
             elif s == ',':
                 yield (TokenType.COMMA, None)
-                continue
             elif s == '=':
                 yield (TokenType.EQUAL, None)
-                continue
-            elif s.isspace():
-                continue
-
-        if s == '"':
-            is_string = not is_string
-        else:
-            buffer.append(s)
-    if buffer:
+            elif s == '"':
+                is_string = True
+    val = raw[start:]
+    if val:
         # This should always be an identifier
-        yield (TokenType.IDENTIFIER, ''.join(buffer))
+        yield (TokenType.IDENTIFIER, val)
 
 
 def lookahead(iter: T.Iterator[_T]) -> T.Iterator[T.Tuple[_T, T.Optional[_T]]]:
@@ -175,41 +173,40 @@ def _parse(ast: _LEX_STREAM_AH) -> IR:
     else:
         ntoken, _ = (None, None)
 
-    stream: T.List[_LEX_TOKEN]
     if token is TokenType.IDENTIFIER:
+        assert value
+        id_ = Identifier(value)
         if ntoken is TokenType.EQUAL:
-            return Equal(Identifier(value), _parse(ast))
-    if token is TokenType.STRING:
-        return String(value)
-    if token is TokenType.EQUAL:
-        # In this case the previous caller already has handled the equal
-        return _parse(ast)
-    if token in {TokenType.ANY, TokenType.ALL}:
+            next(ast)
+            (token, value), _ = next(ast)
+            assert token is TokenType.STRING
+            assert value is not None
+            return Equal(id_, String(value))
+        return id_
+    elif token in {TokenType.ANY, TokenType.ALL}:
         type_ = All if token is TokenType.ALL else Any
-        assert ntoken is TokenType.LPAREN
-        next(ast)  # advance the iterator to get rid of the LPAREN
-        stream = []
         args: T.List[IR] = []
-        while token is not TokenType.RPAREN:
+        (token, value), n_stream = next(ast)
+        assert token is TokenType.LPAREN
+        if n_stream and n_stream[0] == TokenType.RPAREN:
+            return type_(args)
+        while True:
+            args.append(_parse(ast))
             (token, value), _ = next(ast)
-            if token is TokenType.COMMA:
-                args.append(_parse(lookahead(iter(stream))))
-                stream.clear()
-            else:
-                stream.append((token, value))
-        if stream:
-            args.append(_parse(lookahead(iter(stream))))
+            if token is TokenType.RPAREN:
+                break
+            assert token is TokenType.COMMA
         return type_(args)
-    if token is TokenType.NOT:
-        next(ast)  # advance the iterator to get rid of the LPAREN
-        stream = []
-        # Mypy can't figure out that token is overridden inside the while loop
-        while token is not TokenType.RPAREN:  # type: ignore
-            (token, value), _ = next(ast)
-            stream.append((token, value))
-        return Not(_parse(lookahead(iter(stream))))
-
-    raise MesonBugException(f'Unhandled Cargo token: {token}')
+    elif token in {TokenType.NOT, TokenType.CFG}:
+        is_not = token is TokenType.NOT
+        (token, value), _ = next(ast)
+        assert token is TokenType.LPAREN
+        arg = _parse(ast)
+        (token, value), _ = next(ast)
+        assert token is TokenType.RPAREN
+        return Not(arg) if is_not else arg
+    else:
+        raise MesonBugException(f'Unhandled Cargo token:{token} {value}')
 
 
 def parse(ast: _LEX_STREAM) -> IR:
@@ -218,7 +215,7 @@ def parse(ast: _LEX_STREAM) -> IR:
     :param ast: An iterable of Tokens
     :return: An mparser Node to be used as a conditional
     """
-    ast_i: _LEX_STREAM_AH = lookahead(iter(ast))
+    ast_i: _LEX_STREAM_AH = lookahead(ast)
     return _parse(ast_i)
 
 
@@ -256,6 +253,8 @@ def _(ir: Not, build: builder.Builder) -> mparser.BaseNode:
 
 @ir_to_meson.register
 def _(ir: Any, build: builder.Builder) -> mparser.BaseNode:
+    if not ir.args:
+        return build.bool(False)
     args = iter(reversed(ir.args))
     last = next(args)
     cur = build.or_(ir_to_meson(next(args), build), ir_to_meson(last, build))
@@ -266,6 +265,8 @@ def _(ir: Any, build: builder.Builder) -> mparser.BaseNode:
 
 @ir_to_meson.register
 def _(ir: All, build: builder.Builder) -> mparser.BaseNode:
+    if not ir.args:
+        return build.bool(True)
     args = iter(reversed(ir.args))
     last = next(args)
     cur = build.and_(ir_to_meson(next(args), build), ir_to_meson(last, build))

diff --git a/unittests/cargotests.py b/unittests/cargotests.py
@@ -101,6 +101,12 @@ def test_lex(self) -> None:
                 (TokenType.IDENTIFIER, 'unix'),
                 (TokenType.RPAREN, None),
             ]),
+            ('cfg(windows)', [
+                (TokenType.CFG, None),
+                (TokenType.LPAREN, None),
+                (TokenType.IDENTIFIER, 'windows'),
+                (TokenType.RPAREN, None),
+            ]),
         ]
         for data, expected in cases:
             with self.subTest():
@@ -130,6 +136,16 @@ def test_parse(self) -> None:
                             cfg.Equal(cfg.Identifier("target_arch"), cfg.String("x86")),
                             cfg.Equal(cfg.Identifier("target_os"), cfg.String("linux")),
                         ]))),
+            ('cfg(all(any(target_os = "android", target_os = "linux"), any(custom_cfg)))',
+                cfg.All([
+                    cfg.Any([
+                        cfg.Equal(cfg.Identifier("target_os"), cfg.String("android")),
+                        cfg.Equal(cfg.Identifier("target_os"), cfg.String("linux")),
+                    ]),
+                    cfg.Any([
+                        cfg.Identifier("custom_cfg"),
+                    ]),
+                ])),
         ]
         for data, expected in cases:
             with self.subTest():
@@ -183,6 +199,8 @@ def test_ir_to_meson(self) -> None:
                                 build.string('x86_64')),
                     build.equal(build.method('cpu_family', HOST_MACHINE),
                                 build.string('aarch64'))))),
+            ('all()', build.bool(True)),
+            ('any()', build.bool(False)),
         ]
         for data, expected in cases:
             with self.subTest():