From 79df0af5f8bde5c5808248ee433996cb79cf0f16 Mon Sep 17 00:00:00 2001 From: Andreas Lauser Date: Mon, 8 Jun 2026 13:23:38 +0200 Subject: [PATCH 1/3] add type annotations to the module and the unit tests For some of these I am not 100% sure if they are correct. A lot of the code churn of this patch stems from the fact that some classes had to be reordered because python bails out if annotations use classes that are only defined later in the file. This patch features a few minor functional changes which were neccessiated by type annotations: - Support for python2 is no longer declared in `setup.py` - The `Parser.grammar()` methods are supposed to return `Grammar` objects instead of a `Pattern` object which gets implicitly converted to `Grammar` in `Parser.parse()`. IMO this makes the whole affair much less confusing and user code only needs to return `Grammar(x)` instead of `x`. If `grammar()` returns something else, the old behavior is fallen back to, but this will lead to `mypy` complaints in user code... - The `_Token` class is converted from `namedtuple` to a dataclass because named tuples do not play nicely with type annotations and the performance is basically identical. - The internal `_String` class now uses `Pattern` as its base class. (It already had the same API as `Pattern`, but not deriving `_String` from Pattern would necessiate `_String` to be handled separately anyway.) Signed-off-by: Andreas Lauser Approved-by: Christian Hackenbeck --- py.typed | 0 tests/test_textparser.py | 323 ++++++++++++++++++--------------- textparser.py | 379 ++++++++++++++++++++------------------- 3 files changed, 372 insertions(+), 330 deletions(-) create mode 100644 py.typed diff --git a/py.typed b/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_textparser.py b/tests/test_textparser.py index 0af6c8b..408c131 100644 --- a/tests/test_textparser.py +++ b/tests/test_textparser.py @@ -1,9 +1,15 @@ +import collections import pickle import unittest +import typing from collections import namedtuple +from typing import cast + import textparser +from textparser import MatchObject from textparser import Grammar +from textparser import Pattern from textparser import Sequence from textparser import Choice from textparser import choice @@ -14,8 +20,10 @@ from textparser import OneOrMoreDict from textparser import DelimitedList from textparser import Token +from textparser import _Tokens from textparser import TokenizeError from textparser import tokenize_init +from textparser import _Mismatch from textparser import Any from textparser import AnyUntil from textparser import Optional @@ -27,8 +35,19 @@ from textparser import markup_line from textparser import replace_blocks +# list of tuples containing the arguments for the Token class. Used to +# create a list of Token objects. +TokenizeItems = list[tuple[str,str]|tuple[str,str,int]] + +# Specify the tree of tokens and the expected match result for a given +# grammar +GrammarMatchSpec = tuple[TokenizeItems, MatchObject] + +# Specify the tree of tokens and the line number where the grammar +# is supposed to not match the token tree +GrammarMismatchSpec = tuple[TokenizeItems, int] -def tokenize(items, add_eof_token=True): +def tokenize(items: TokenizeItems, add_eof_token: bool=True) -> list[Token]: tokens = [] for item in items: @@ -47,30 +66,30 @@ def tokenize(items, add_eof_token=True): class TextParserTest(unittest.TestCase): - def parse_and_assert_tree(self, grammar, datas): - for tokens, expected_tree in datas: - tree = grammar.parse(tokenize(tokens)) - self.assertEqual(tree, expected_tree) + def parse_and_assert_tree(self, grammar: Grammar, test_specs: list[GrammarMatchSpec]) -> None: + for token_items, expected_tree in test_specs: + token_tree = grammar.parse(tokenize(token_items)) + self.assertEqual(token_tree, expected_tree) - def parse_and_assert_mismatch(self, grammar, datas): - for tokens, line in datas: - tokens = tokenize(tokens) + def parse_and_assert_mismatch(self, grammar: Grammar, test_specs: list[GrammarMismatchSpec]) -> None: + for token_items, line in test_specs: + token_tree = tokenize(token_items) with self.assertRaises(textparser.GrammarError) as cm: - grammar.parse(tokens) + grammar.parse(token_tree) self.assertEqual(cm.exception.offset, line) - def test_grammar_sequence(self): + def test_grammar_sequence(self) -> None: grammar = Grammar(Sequence('NUMBER', 'WORD')) tokens = tokenize([ ('NUMBER', '1.45'), ('WORD', 'm') ]) - tree = grammar.parse(tokens) - self.assertEqual(tree, ['1.45', 'm']) + match_object = grammar.parse(tokens) + self.assertEqual(match_object, ['1.45', 'm']) - def test_grammar_sequence_mismatch(self): + def test_grammar_sequence_mismatch(self) -> None: grammar = Grammar(Sequence('NUMBER', 'WORD')) tokens = tokenize([('NUMBER', '1.45')]) @@ -79,10 +98,10 @@ def test_grammar_sequence_mismatch(self): self.assertEqual(cm.exception.offset, -1) - def test_grammar_choice(self): + def test_grammar_choice(self) -> None: grammar = Grammar(Choice('NUMBER', 'WORD')) - datas = [ + datas: list[GrammarMatchSpec] = [ ( [('WORD', 'm')], 'm' @@ -95,18 +114,18 @@ def test_grammar_choice(self): self.parse_and_assert_tree(grammar, datas) - def test_grammar_choice_mismatch(self): + def test_grammar_choice_mismatch(self) -> None: grammar = Grammar(Choice(Sequence('NUMBER', 'WORD'), 'WORD')) - datas = [ + datas: list[GrammarMismatchSpec] = [ ([('NUMBER', '1', 5)], -1), ([('NUMBER', '1', 5), ('NUMBER', '2', 7)], 7) ] self.parse_and_assert_mismatch(grammar, datas) - def test_grammar_choice_dict(self): + def test_grammar_choice_dict(self) -> None: number = Forward() number <<= Sequence('NUMBER') grammar = Grammar(ChoiceDict(number, @@ -114,10 +133,13 @@ def test_grammar_choice_dict(self): ChoiceDict('BAR'), 'FIE')) - datas = [ + datas: list[GrammarMatchSpec] = [ ( [('WORD', 'm')], - ('foo', ['m']) + # the cast is necessary because mypy does not + # recognize (str, MatchObject) tuples as MatchObject, + # even though it should... + cast(MatchObject, ('foo', ['m'])) ), ( [('NUMBER', '5')], @@ -135,18 +157,18 @@ def test_grammar_choice_dict(self): self.parse_and_assert_tree(grammar, datas) - def test_grammar_choice_dict_mismatch(self): + def test_grammar_choice_dict_mismatch(self) -> None: grammar = Grammar(ChoiceDict(Sequence('NUMBER'), Sequence('WORD'))) tokens = tokenize([(',', ',', 3)]) - with self.assertRaises(textparser.Error) as cm: + with self.assertRaises(textparser.GrammarError) as cm: grammar.parse(tokens) self.assertEqual(cm.exception.offset, 3) - def test_grammar_choice_dict_init(self): - datas = [ + def test_grammar_choice_dict_init(self) -> None: + datas: list[tuple[collections.abc.Sequence[Pattern|str], str]] = [ ( ('WORD', 'WORD'), "First token kind must be unique, but WORD isn't." @@ -167,10 +189,10 @@ def test_grammar_choice_dict_init(self): self.assertEqual(str(cm.exception), message) - def test_grammar_delimited_list(self): + def test_grammar_delimited_list(self) -> None: grammar = Grammar(Sequence(DelimitedList('WORD'), Optional('.'))) - datas = [ + datas: list[GrammarMatchSpec] = [ ( [('WORD', 'foo')], [['foo'], []] @@ -187,10 +209,10 @@ def test_grammar_delimited_list(self): self.parse_and_assert_tree(grammar, datas) - def test_grammar_delimited_list_mismatch(self): + def test_grammar_delimited_list_mismatch(self) -> None: grammar = Grammar(Sequence(DelimitedList('WORD'), Optional('.'))) - datas = [ + datas: list[GrammarMismatchSpec] = [ ( [ ('WORD', 'foo', 1), @@ -212,10 +234,10 @@ def test_grammar_delimited_list_mismatch(self): self.parse_and_assert_mismatch(grammar, datas) - def test_grammar_zero_or_more(self): + def test_grammar_zero_or_more(self) -> None: grammar = Grammar(ZeroOrMore('WORD')) - datas = [ + datas: list[GrammarMatchSpec] = [ ( [], [] @@ -232,11 +254,11 @@ def test_grammar_zero_or_more(self): self.parse_and_assert_tree(grammar, datas) - def test_grammar_zero_or_more_partial_element_match(self): + def test_grammar_zero_or_more_partial_element_match(self) -> None: grammar = Grammar(Sequence( ZeroOrMore(Sequence('WORD', 'NUMBER')), 'WORD')) - datas = [ + datas: list[GrammarMatchSpec] = [ ( [ ('WORD', 'foo'), @@ -250,10 +272,10 @@ def test_grammar_zero_or_more_partial_element_match(self): self.parse_and_assert_tree(grammar, datas) - def test_grammar_zero_or_more_dict(self): + def test_grammar_zero_or_more_dict(self) -> None: grammar = Grammar(ZeroOrMoreDict(Sequence('WORD', 'NUMBER'))) - datas = [ + datas: list[GrammarMatchSpec] = [ ( [], {} @@ -271,10 +293,10 @@ def test_grammar_zero_or_more_dict(self): self.parse_and_assert_tree(grammar, datas) - def test_grammar_one_or_more(self): + def test_grammar_one_or_more(self) -> None: grammar = Grammar(OneOrMore('WORD')) - datas = [ + datas: list[GrammarMatchSpec] = [ ( [('WORD', 'foo')], ['foo'] @@ -287,10 +309,10 @@ def test_grammar_one_or_more(self): self.parse_and_assert_tree(grammar, datas) - def test_grammar_one_or_more_mismatch(self): + def test_grammar_one_or_more_mismatch(self) -> None: grammar = Grammar(OneOrMore('WORD')) - datas = [ + datas = cast(list[GrammarMismatchSpec], [ ( [] , -1 @@ -299,14 +321,14 @@ def test_grammar_one_or_more_mismatch(self): [('NUMBER', 'foo', 2)], 2 ) - ] + ]) self.parse_and_assert_mismatch(grammar, datas) - def test_grammar_one_or_more_dict(self): + def test_grammar_one_or_more_dict(self) -> None: grammar = Grammar(OneOrMoreDict(Sequence('WORD', 'NUMBER'))) - datas = [ + datas: list[GrammarMatchSpec] = [ ( [('WORD', 'foo'), ('NUMBER', '1')], { @@ -326,10 +348,10 @@ def test_grammar_one_or_more_dict(self): self.parse_and_assert_tree(grammar, datas) - def test_grammar_one_or_more_dict_mismatch(self): + def test_grammar_one_or_more_dict_mismatch(self) -> None: grammar = Grammar(OneOrMoreDict(Sequence('WORD', 'NUMBER'))) - datas = [ + datas = cast(list[GrammarMismatchSpec], [ ( [('WORD', 'foo', 5)], -1 @@ -350,14 +372,14 @@ def test_grammar_one_or_more_dict_mismatch(self): ], 8 ) - ] + ]) self.parse_and_assert_mismatch(grammar, datas) - def test_grammar_any(self): + def test_grammar_any(self) -> None: grammar = Grammar(Any()) - datas = [ + datas: list[GrammarMatchSpec] = [ ( [('A', r'a')], 'a' @@ -370,10 +392,10 @@ def test_grammar_any(self): self.parse_and_assert_tree(grammar, datas) - def test_grammar_any_until(self): + def test_grammar_any_until(self) -> None: grammar = Grammar(Sequence(AnyUntil('STRING'), 'STRING')) - datas = [ + datas: list[GrammarMatchSpec] = [ ( [('NUMBER', '1'), ('WORD', 'a'), @@ -384,12 +406,12 @@ def test_grammar_any_until(self): self.parse_and_assert_tree(grammar, datas) - def test_grammar_any_until_sequence(self): + def test_grammar_any_until_sequence(self) -> None: grammar = Grammar(Sequence(AnyUntil(Sequence('WORD', 'STRING')), 'WORD', 'STRING')) - datas = [ + datas: list[GrammarMatchSpec] = [ ( [('NUMBER', '1'), ('WORD', 'a'), @@ -401,7 +423,7 @@ def test_grammar_any_until_sequence(self): self.parse_and_assert_tree(grammar, datas) - def test_grammar_1(self): + def test_grammar_1(self) -> None: grammar = Grammar(Sequence( 'IF', choice(Sequence(choice('A', 'B'), 'STRING'), @@ -412,7 +434,7 @@ def test_grammar_1(self): choice(DelimitedList('STRING'), ZeroOrMore('NUMBER')), '.'), '.'))) - datas = [ + datas: list[GrammarMatchSpec] = [ ( [ ('IF', 'IF'), @@ -437,7 +459,7 @@ def test_grammar_1(self): self.parse_and_assert_tree(grammar, datas) - def test_grammar_1_mismatch(self): + def test_grammar_1_mismatch(self) -> None: grammar = Grammar(Sequence( 'IF', choice(Sequence(choice('A', 'B'), 'STRING'), @@ -448,7 +470,7 @@ def test_grammar_1_mismatch(self): choice(DelimitedList('STRING'), ZeroOrMore('NUMBER')), '.'), '.'))) - datas = [ + datas = cast(list[GrammarMismatchSpec], [ ( [ ('IF', 'IF', 1), @@ -483,16 +505,16 @@ def test_grammar_1_mismatch(self): ], 5 ) - ] + ]) self.parse_and_assert_mismatch(grammar, datas) - def test_grammar_forward(self): + def test_grammar_forward(self) -> None: foo = Forward() foo <<= Sequence('FOO') grammar = Grammar(foo) - datas = [ + datas: list[GrammarMatchSpec] = [ ( [('FOO', 'foo')], ['foo'] @@ -501,12 +523,12 @@ def test_grammar_forward(self): self.parse_and_assert_tree(grammar, datas) - def test_grammar_forward_text(self): + def test_grammar_forward_text(self) -> None: foo = Forward() foo <<= 'FOO' grammar = Grammar(foo) - datas = [ + datas: list[GrammarMatchSpec] = [ ( [('FOO', 'foo')], 'foo' @@ -515,12 +537,12 @@ def test_grammar_forward_text(self): self.parse_and_assert_tree(grammar, datas) - def test_grammar_optional(self): + def test_grammar_optional(self) -> None: grammar = Grammar(Sequence(Optional('WORD'), Optional('WORD'), Optional('NUMBER'))) - datas = [ + datas: list[GrammarMatchSpec] = [ ( [], [[], [], []] @@ -545,33 +567,33 @@ def test_grammar_optional(self): self.parse_and_assert_tree(grammar, datas) - def test_grammar_tag(self): + def test_grammar_tag(self) -> None: grammar = Grammar(Tag('a', Tag('b', choice(Tag('c', 'WORD'), Tag('d', Optional('NUMBER')))))) - datas = [ + datas: list[GrammarMatchSpec] = [ ( [('WORD', 'bar')], - ('a', ('b', ('c', 'bar'))) + cast(MatchObject, ('a', ('b', ('c', 'bar')))) ), ( [('NUMBER', '1')], - ('a', ('b', ('d', ['1']))) + cast(MatchObject, ('a', ('b', ('d', ['1'])))) ), ( [], - ('a', ('b', ('d', []))) + cast(MatchObject, ('a', ('b', ('d', [])))) ) ] self.parse_and_assert_tree(grammar, datas) - def test_grammar_tag_mismatch(self): + def test_grammar_tag_mismatch(self) -> None: grammar = Grammar(Tag('a', 'WORD')) - datas = [ + datas: list[GrammarMismatchSpec] = [ ( [('NUMBER', 'bar')], 1 @@ -580,10 +602,10 @@ def test_grammar_tag_mismatch(self): self.parse_and_assert_mismatch(grammar, datas) - def test_grammar_and(self): + def test_grammar_and(self) -> None: grammar = Grammar(Sequence(And('NUMBER'), 'NUMBER')) - datas = [ + datas: list[GrammarMatchSpec] = [ ( [('NUMBER', '1')], [[], '1'] @@ -592,10 +614,10 @@ def test_grammar_and(self): self.parse_and_assert_tree(grammar, datas) - def test_grammar_and_mismatch(self): + def test_grammar_and_mismatch(self) -> None: grammar = Grammar(Sequence(And('NUMBER'), 'NUMBER')) - datas = [ + datas: list[GrammarMismatchSpec] = [ ( [('WORD', 'foo', 3), ('NUMBER', '1', 4)], 3 @@ -604,10 +626,10 @@ def test_grammar_and_mismatch(self): self.parse_and_assert_mismatch(grammar, datas) - def test_grammar_not(self): + def test_grammar_not(self) -> None: grammar = Grammar(Sequence(Not('WORD'), 'NUMBER')) - datas = [ + datas: list[GrammarMatchSpec] = [ ( [('NUMBER', '1')], [[], '1'] @@ -616,10 +638,10 @@ def test_grammar_not(self): self.parse_and_assert_tree(grammar, datas) - def test_grammar_not_mismatch(self): + def test_grammar_not_mismatch(self) -> None: grammar = Grammar(Sequence(Not('WORD'), 'NUMBER')) - datas = [ + datas: list[GrammarMismatchSpec] = [ ( [('WORD', 'foo', 3), ('NUMBER', '1', 4)], 3 @@ -628,10 +650,10 @@ def test_grammar_not_mismatch(self): self.parse_and_assert_mismatch(grammar, datas) - def test_grammar_no_match(self): + def test_grammar_no_match(self) -> None: grammar = Grammar(NoMatch()) - datas = [ + datas: list[GrammarMismatchSpec] = [ ( [('NUMBER', '1', 3)], 3 @@ -644,20 +666,20 @@ def test_grammar_no_match(self): self.parse_and_assert_mismatch(grammar, datas) - def test_parse_start_and_end_of_file(self): + def test_parse_start_and_end_of_file(self) -> None: class Parser(textparser.Parser): - def grammar(self): - return Sequence('__SOF__', '__EOF__') + def grammar(self) -> Grammar: + return Grammar(Sequence('__SOF__', '__EOF__')) self.assertEqual(Parser().parse('', match_sof=True), ['__SOF__', '__EOF__']) - def test_parse_start_of_file_mismatch(self): + def test_parse_start_of_file_mismatch(self) -> None: class Parser(textparser.Parser): - def grammar(self): - return Sequence('__EOF__') + def grammar(self) -> Grammar: + return Grammar(Sequence('__EOF__')) with self.assertRaises(textparser.ParseError) as cm: Parser().parse('123', match_sof=True) @@ -665,43 +687,46 @@ def grammar(self): self.assertEqual(str(cm.exception), 'Invalid syntax at line 1, column 1: ">>!<<123"') - def test_parse_end_of_file(self): + def test_parse_end_of_file(self) -> None: class Parser(textparser.Parser): - def grammar(self): - return '__EOF__' + def grammar(self) -> Grammar: + return Grammar('__EOF__') self.assertEqual(Parser().parse('', match_sof=False), '__EOF__') - def test_grammar_none(self): + def test_grammar_none(self) -> None: class AnyAsNone(textparser.Pattern): - def match(self, tokens): + def match(self, tokens: _Tokens) -> MatchObject|_Mismatch: tokens.get_value() - return None + # the cast is a bit hacky because Pattern.match() is + # not supposed to return None. (this should possibly + # return textparser.MISMATCH) + return cast(MatchObject, None) grammar = Grammar(AnyAsNone()) - datas = [ + datas: list[GrammarMatchSpec] = [ ( [('NUMBER', '1')], - None + cast(MatchObject, None) ) ] self.parse_and_assert_tree(grammar, datas) - def test_grammar_error(self): + def test_grammar_error(self) -> None: grammar = Grammar(NoMatch()) - datas = [ + datas: list[list[tuple[str, str]|tuple[str, str, int]]] = [ [('NUMBER', '1', 3)], [('WORD', 'foo', 3)] ] - for tokens in datas: - tokens = tokenize(tokens) + for token_args in datas: + tokens = tokenize(token_args) with self.assertRaises(textparser.GrammarError) as cm: grammar.parse(tokens) @@ -710,8 +735,9 @@ def test_grammar_error(self): self.assertEqual(str(cm.exception), 'Invalid syntax at offset 3.') - def test_tokenize_error(self): - datas = [ + def test_tokenize_error(self) -> None: + # list of (offset, text, message) tuples + datas: list[tuple[int, str, str]] = [ (2, 'hej', 'Invalid syntax at line 1, column 3: "he>>!<>!<>!<<"'), @@ -726,8 +752,9 @@ def test_tokenize_error(self): self.assertEqual(cm.exception.offset, offset) self.assertEqual(str(cm.exception), message) - def test_create_token_re(self): - datas = [ + def test_create_token_re(self) -> None: + # list of (TokenTree, expected_regex) tuples + datas: list[tuple[TokenizeItems, str]] = [ ( [('A', r'a')], '(?Pa)' @@ -744,17 +771,17 @@ def test_create_token_re(self): [Token(kind='__SOF__', value='__SOF__', offset=0)]) self.assertEqual(re_token, expected_re_token) - def test_parser(self): + def test_parser(self) -> None: class Parser(textparser.Parser): - def keywords(self): + def keywords(self) -> set[str]: return set([ 'IF', 'A', 'B' ]) - def token_specs(self): + def token_specs(self) -> list[tuple[str, str]|tuple[str,str,str]]: return [ ('SKIP', r'[ \r\n\t]+'), ('NUMBER', r'-?\d+(\.\d+)?([eE][+-]?\d+)?'), @@ -764,17 +791,17 @@ def token_specs(self): ('MISMATCH', r'.') ] - def grammar(self): - return Sequence( + def grammar(self) -> Grammar: + return Grammar(Sequence( 'IF', Optional(choice('A', 'B')), 'ESCAPED_STRING', 'WORD', Optional(choice(DelimitedList('ESCAPED_STRING'), ZeroOrMore('NUMBER'))), - '.') + '.')) - datas = [ + datas: list[tuple[str, MatchObject, MatchObject]] = [ ( 'IF "foo" bar .', ['IF', [], '"foo"', 'bar', [[]], '.'], @@ -814,10 +841,10 @@ def grammar(self): tree = Parser().parse(text, token_tree=True) self.assertEqual(tree, expected_token_tree) - def test_parser_default_keywords(self): + def test_parser_default_keywords(self) -> None: class Parser(textparser.Parser): - def token_specs(self): + def token_specs(self) -> list[tuple[str, str]|tuple[str,str,str]]: return [ ('SKIP', r'[ \r\n\t]+'), ('NUMBER', r'-?\d+(\.\d+)?([eE][+-]?\d+)?'), @@ -827,17 +854,18 @@ def token_specs(self): ('MISMATCH', r'.') ] - def grammar(self): - return Sequence( + def grammar(self) -> Grammar: + return Grammar(Sequence( 'WORD', Optional('WORD'), 'ESCAPED_STRING', 'WORD', Optional(choice(DelimitedList('ESCAPED_STRING'), ZeroOrMore('NUMBER'))), - '.') + '.')) - datas = [ + # list of (input_string, expected_flat_match, expected_tree_match) tuples + datas: list[tuple[str, MatchObject, MatchObject]] = [ ( 'IF "foo" bar .', ['IF', [], '"foo"', 'bar', [[]], '.'], @@ -877,7 +905,7 @@ def grammar(self): tree = Parser().parse(text, token_tree=True) self.assertEqual(tree, expected_token_tree) - def test_parser_bare(self): + def test_parser_bare(self) -> None: class Parser(textparser.Parser): pass @@ -887,26 +915,26 @@ class Parser(textparser.Parser): self.assertEqual(str(cm.exception), 'No grammar defined.') - def test_parser_default_token_specs(self): + def test_parser_default_token_specs(self) -> None: class Parser(textparser.Parser): - def grammar(self): - return 'WORD' + def grammar(self) -> Grammar: + return Grammar('WORD') tree = Parser().parse('foo') self.assertEqual(tree, 'foo') - def test_parser_tokenize_mismatch(self): + def test_parser_tokenize_mismatch(self) -> None: class Parser(textparser.Parser): - def token_specs(self): + def token_specs(self) -> list[tuple[str, str]|tuple[str,str,str]]: return [ ('SKIP', r'[ \r\n\t]+'), ('NUMBER', r'-?\d+(\.\d+)?([eE][+-]?\d+)?'), ('MISMATCH', r'.') ] - def grammar(self): + def grammar(self) -> Grammar: return Grammar('NUMBER') with self.assertRaises(textparser.ParseError) as cm: @@ -918,17 +946,17 @@ def grammar(self): self.assertEqual(str(cm.exception), 'Invalid syntax at line 2, column 3: "34>>!< None: class Parser(textparser.Parser): - def tokenize(self, _text): + def tokenize(self, _text: str) -> list[Token]: return tokenize([ ('NUMBER', '1.45', 0), ('NUMBER', '2', 5) ]) - def grammar(self): - return Sequence('NUMBER', 'WORD') + def grammar(self) -> Grammar: + return Grammar(Sequence('NUMBER', 'WORD')) with self.assertRaises(textparser.ParseError) as cm: Parser().parse('1.45 2') @@ -939,18 +967,18 @@ def grammar(self): self.assertEqual(str(cm.exception), 'Invalid syntax at line 1, column 6: "1.45 >>!<<2"') - def test_parser_grammar_mismatch_choice_max(self): + def test_parser_grammar_mismatch_choice_max(self) -> None: class Parser(textparser.Parser): - def __init__(self, tokens): + def __init__(self, tokens: TokenizeItems) -> None: self._tokens = tokens - def tokenize(self, _text): + def tokenize(self, _text: str) -> list[Token]: return tokenize(self._tokens, add_eof_token=False) - def grammar(self): - return Choice(Sequence('NUMBER', 'WORD'), - 'WORD') + def grammar(self) -> Grammar: + return Grammar(Choice(Sequence('NUMBER', 'WORD'), + 'WORD')) Data = namedtuple('Data', [ @@ -995,13 +1023,13 @@ def grammar(self): self.assertEqual(cm.exception.column, column) self.assertEqual(str(cm.exception), message) - def test_parse_error(self): + def test_parse_error(self) -> None: class Parser(textparser.Parser): - def tokenize(self, text): + def tokenize(self, text: str) -> list[Token]: raise TokenizeError(text, 5) - def grammar(self): + def grammar(self) -> Grammar: return Grammar(Sequence('NUMBER', 'WORD')) with self.assertRaises(textparser.ParseError) as cm: @@ -1014,7 +1042,7 @@ def grammar(self): self.assertEqual(str(cm.exception), 'Invalid syntax at line 2, column 3: "34>>!<<56"') - def test_markup_line(self): + def test_markup_line(self) -> None: datas = [ (0, '>>!<<0', None), (1, '0>>!<<', None), @@ -1037,7 +1065,7 @@ def test_markup_line(self): self.assertEqual(text, line) - def test_replace_blocks(self): + def test_replace_blocks(self) -> None: datas = [ ('{}', '{}'), ('{{}}', '{ }'), @@ -1049,7 +1077,7 @@ def test_replace_blocks(self): new = replace_blocks(old) self.assertEqual(new, expected) - def test_replace_blocks_start_end(self): + def test_replace_blocks_start_end(self) -> None: datas = [ ('1[a]2[b]3', '1[ ]2[ ]3', '[', ']'), ('1{a}2{b}3', '1{ }2{ }3', '{', '}'), @@ -1061,13 +1089,13 @@ def test_replace_blocks_start_end(self): new = replace_blocks(old, start, end) self.assertEqual(new, expected) - def test_any_zero_or_more(self): + def test_any_zero_or_more(self) -> None: class Parser(textparser.Parser): - def keywords(self): - return ['interesting_group'] + def keywords(self) -> set[str]: + return set(['interesting_group']) - def token_specs(self): + def token_specs(self) -> list[tuple[str,str]|tuple[str,str,str]]: return [ ('SKIP', r'[ \r\n\t]+'), ('WORD', r'[A-Za-z0-9_]+'), @@ -1077,16 +1105,16 @@ def token_specs(self): ('EQUAL', '=', r'='), ] - def grammar(self): + def grammar(self) -> Grammar: interesting_group = textparser.Sequence( 'interesting_group', '{', ZeroOrMore(Sequence('WORD', '=', 'WORD', ';')), '}', ';') - return Sequence(AnyUntil('interesting_group'), - interesting_group, - ZeroOrMore(Any())) + return Grammar(Sequence(AnyUntil('interesting_group'), + interesting_group, + ZeroOrMore(Any()))) text = ''' @@ -1105,6 +1133,7 @@ def grammar(self): ''' tree = Parser().parse(text) + assert isinstance(tree, list) self.assertEqual(tree[1], [ 'interesting_group', @@ -1116,11 +1145,11 @@ def grammar(self): '}', ';']) - def test_error_picklable(self): + def test_error_picklable(self) -> None: class Parser(textparser.Parser): - def grammar(self): - return Sequence('__EOF__') + def grammar(self) -> Grammar: + return Grammar(Sequence('__EOF__')) try: Parser().parse('123', match_sof=True) diff --git a/textparser.py b/textparser.py index 8d76d72..ead0d58 100644 --- a/textparser.py +++ b/textparser.py @@ -1,9 +1,11 @@ # A text parser. import re -from collections import namedtuple -from operator import itemgetter +import collections.abc +import typing +from dataclasses import dataclass +from operator import itemgetter __author__ = 'Erik Moqvist' __version__ = '0.24.0' @@ -18,40 +20,30 @@ class _Mismatch(object): """ - -class _String(object): - """Matches a specific token kind. - - """ - - def __init__(self, kind): - self.kind = kind - - def match(self, tokens): - if self.kind == tokens.peek().kind: - return tokens.get_value() - else: - return MISMATCH - +@dataclass(slots=True) +class Token: + kind: str + value: str|None + offset: int class _Tokens(object): - def __init__(self, tokens): + def __init__(self, tokens: list[Token]): self._tokens = tokens self._pos = 0 self._max_pos = -1 - self._stack = [] + self._stack: list[int] = [] - def get_value(self): + def get_value(self) -> Token|str: pos = self._pos self._pos += 1 return self._tokens[pos] - def peek(self): + def peek(self) -> Token: return self._tokens[self._pos] - def peek_max(self): + def peek_max(self) -> Token: pos = self._pos if self._max_pos > pos: @@ -62,55 +54,82 @@ def peek_max(self): else: return self._tokens[pos] - def save(self): + def save(self) -> None: self._stack.append(self._pos) - def restore(self): + def restore(self) -> None: self._pos = self._stack.pop() - def update(self): + def update(self) -> None: self._stack[-1] = self._pos - def mark_max_restore(self): + def mark_max_restore(self) -> None: if self._pos > self._max_pos: self._max_pos = self._pos self._pos = self._stack.pop() - def mark_max_load(self): + def mark_max_load(self) -> None: if self._pos > self._max_pos: self._max_pos = self._pos self._pos = self._stack[-1] - def drop(self): + def drop(self) -> None: self._stack.pop() - def __repr__(self): + def __repr__(self) -> str: return str(self._tokens[self._pos:self._pos + 2]) +MatchObject = list["MatchObject"]|dict[str, list["MatchObject"]]|tuple[str,"MatchObject"]|Token|str + +class Pattern(object): + """Base class of all patterns. + + """ + + def match(self, tokens: _Tokens) -> MatchObject|_Mismatch: + """Returns :data:`~textparser.MISMATCH` on mismatch, and anything else + on match. + + """ + + raise NotImplementedError('To be implemented by subclasses.') + +class _String(Pattern): + """Matches a specific token kind. + + """ + + def __init__(self, kind: str) -> None: + self.kind = kind + + def match(self, tokens: _Tokens) -> MatchObject|_Mismatch: + if self.kind == tokens.peek().kind: + return tokens.get_value() + else: + return MISMATCH class _StringTokens(_Tokens): - def get_value(self): + def get_value(self) -> Token|str: pos = self._pos self._pos += 1 - return self._tokens[pos].value + return typing.cast(str, self._tokens[pos].value) -def _wrap_string(item): +def _wrap_string(item: Pattern|str) -> Pattern: if isinstance(item, str): item = _String(item) return item - -def _wrap_strings(items): +def _wrap_strings(items: collections.abc.Sequence[Pattern|str]) -> list[Pattern]: return [_wrap_string(item) for item in items] -def _format_invalid_syntax(text, offset): +def _format_invalid_syntax(text: str, offset: int) -> str: return 'Invalid syntax at line {}, column {}: "{}"'.format( line(text, offset), column(text, offset), @@ -131,14 +150,14 @@ class TokenizeError(Error): """ - def __init__(self, text, offset): + def __init__(self, text: str, offset: int) -> None: self._text = text self._offset = offset message = _format_invalid_syntax(text, offset) super(TokenizeError, self).__init__(message) @property - def text(self): + def text(self) -> str: """The input text to the tokenizer. """ @@ -146,7 +165,7 @@ def text(self): return self._text @property - def offset(self): + def offset(self) -> int: """Offset into the text where the tokenizer failed. """ @@ -160,13 +179,13 @@ class GrammarError(Error): """ - def __init__(self, offset): + def __init__(self, offset: int) -> None: self._offset = offset message = 'Invalid syntax at offset {}.'.format(offset) super(GrammarError, self).__init__(message) @property - def offset(self): + def offset(self) -> int: """Offset into the text where the parser failed. """ @@ -179,7 +198,7 @@ class ParseError(Error): """ - def __init__(self, text, offset): + def __init__(self, text: str, offset: int): self._text = text self._offset = offset self._line = line(text, offset) @@ -188,7 +207,7 @@ def __init__(self, text, offset): super(ParseError, self).__init__(message) @property - def text(self): + def text(self) -> str: """The input text to the parser. """ @@ -196,7 +215,7 @@ def text(self): return self._text @property - def offset(self): + def offset(self) -> int: """Offset into the text where the parser failed. """ @@ -204,7 +223,7 @@ def offset(self): return self._offset @property - def line(self): + def line(self) -> int: """Line where the parser failed. """ @@ -212,50 +231,32 @@ def line(self): return self._line @property - def column(self): + def column(self) -> int: """Column where the parser failed. """ return self._column - def __reduce__(self): + def __reduce__(self) -> tuple[typing.Any, ...]: """Adds pickling support.""" return type(self), (self._text, self._offset), {} - -Token = namedtuple('Token', ['kind', 'value', 'offset']) - - -class Pattern(object): - """Base class of all patterns. - - """ - - def match(self, tokens): - """Returns :data:`~textparser.MISMATCH` on mismatch, and anything else - on match. - - """ - - raise NotImplementedError('To be implemented by subclasses.') - - class Sequence(Pattern): """Matches a sequence of patterns. Becomes a list in the parse tree. """ - def __init__(self, *patterns): + def __init__(self, *patterns: Pattern|str) -> None: self.patterns = _wrap_strings(patterns) - def match(self, tokens): - matched = [] + def match(self, tokens: _Tokens) -> MatchObject|_Mismatch: + matched: list[MatchObject] = [] for pattern in self.patterns: mo = pattern.match(tokens) - if mo is MISMATCH: + if isinstance(mo, _Mismatch): return MISMATCH matched.append(mo) @@ -269,17 +270,17 @@ class Choice(Pattern): """ - def __init__(self, *patterns): + def __init__(self, *patterns: Pattern|str) -> None: self._patterns = _wrap_strings(patterns) - def match(self, tokens): + def match(self, tokens: _Tokens) -> MatchObject|_Mismatch: tokens.save() for pattern in self._patterns: tokens.mark_max_load() mo = pattern.match(tokens) - if mo is not MISMATCH: + if not isinstance(mo, _Mismatch): tokens.drop() return mo @@ -288,6 +289,55 @@ def match(self, tokens): return MISMATCH +class Tag(Pattern): + """Tags any matched `pattern` with name `name`. Becomes a two-tuple of + `name` and match in the parse tree. + + """ + + def __init__(self, name: str, pattern: Pattern|str) -> None: + self._name = name + self._pattern = _wrap_string(pattern) + + @property + def pattern(self) -> Pattern: + return self._pattern + + def match(self, tokens: _Tokens) -> MatchObject|_Mismatch: + mo = self._pattern.match(tokens) + + if not isinstance(mo, _Mismatch): + return (self._name, mo) + else: + return MISMATCH + + +class Forward(Pattern): + """Forward declaration of a pattern. + + .. code-block:: python + + >>> foo = Forward() + >>> foo <<= Sequence('NUMBER') + + """ + + def __init__(self) -> None: + self._pattern: Pattern|None = None + + @property + def pattern(self) -> Pattern|None: + return self._pattern + + def __ilshift__(self, other: Pattern|str) -> "Forward": + self._pattern = _wrap_string(other) + + return self + + def match(self, tokens: _Tokens) -> MatchObject|_Mismatch: + if self._pattern is not None: + return self._pattern.match(tokens) + return MISMATCH class ChoiceDict(Pattern): """Matches any of given patterns. The first token kind of all patterns @@ -299,23 +349,26 @@ class ChoiceDict(Pattern): """ - def __init__(self, *patterns): - self._patterns_map = {} - patterns = _wrap_strings(patterns) + def __init__(self, *patterns: Pattern|str) -> None: + self._patterns_map: dict[str, Pattern] = {} + wrapped_patterns = _wrap_strings(patterns) - for pattern in patterns: + for pattern in wrapped_patterns: self._check_pattern(pattern, pattern) @property - def patterns_map(self): + def patterns_map(self) -> dict[str, Pattern]: return self._patterns_map - def _check_pattern(self, inner, outer): + def _check_pattern(self, inner: Pattern, outer: Pattern) -> None: if isinstance(inner, _String): self._add_pattern(inner.kind, outer) elif isinstance(inner, Sequence): self._check_pattern(inner.patterns[0], outer) elif isinstance(inner, (Tag, Forward)): + if inner.pattern is None: + raise Error( + 'No inner pattern defined for {}.'.format(type(inner))) self._check_pattern(inner.pattern, outer) elif isinstance(inner, ChoiceDict): for pattern in inner.patterns_map.values(): @@ -324,7 +377,7 @@ def _check_pattern(self, inner, outer): raise Error( 'Unsupported pattern type {}.'.format(type(inner))) - def _add_pattern(self, kind, pattern): + def _add_pattern(self, kind: str, pattern: Pattern) -> None: if kind in self._patterns_map: raise Error( "First token kind must be unique, but {} isn't.".format( @@ -332,7 +385,7 @@ def _add_pattern(self, kind, pattern): self._patterns_map[kind] = pattern - def match(self, tokens): + def match(self, tokens: _Tokens) -> MatchObject|_Mismatch: kind = tokens.peek().kind if kind in self._patterns_map: @@ -347,18 +400,18 @@ class Repeated(Pattern): """ - def __init__(self, pattern, minimum=0): + def __init__(self, pattern: Pattern|str, minimum: int=0) -> None: self._pattern = _wrap_string(pattern) self._minimum = minimum - def match(self, tokens): + def match(self, tokens: _Tokens) -> MatchObject|_Mismatch: matched = [] tokens.save() while True: mo = self._pattern.match(tokens) - if mo is MISMATCH: + if isinstance(mo, _Mismatch): tokens.mark_max_restore() break @@ -381,22 +434,22 @@ class RepeatedDict(Repeated): """ - def __init__(self, pattern, minimum=0, key=None): + def __init__(self, pattern: Pattern|str, minimum: int=0, key: typing.Callable[[MatchObject], str]|None=None) -> None: super(RepeatedDict, self).__init__(pattern, minimum) if key is None: - key = itemgetter(0) + key = typing.cast(typing.Callable[[MatchObject], str], itemgetter(0)) self._key = key - def match(self, tokens): - matched = {} + def match(self, tokens: _Tokens) -> MatchObject|_Mismatch: + matched: dict[str, list[MatchObject]] = {} tokens.save() while True: mo = self._pattern.match(tokens) - if mo is MISMATCH: + if isinstance(mo, _Mismatch): tokens.mark_max_restore() break @@ -422,7 +475,7 @@ class ZeroOrMore(Repeated): """ - def __init__(self, pattern): + def __init__(self, pattern: Pattern|str) -> None: super(ZeroOrMore, self).__init__(pattern, 0) @@ -433,7 +486,7 @@ class ZeroOrMoreDict(RepeatedDict): """ - def __init__(self, pattern, key=None): + def __init__(self, pattern: Pattern|str, key: typing.Callable[[MatchObject], str]|None=None) -> None: super(ZeroOrMoreDict, self).__init__(pattern, 0, key) @@ -444,7 +497,7 @@ class OneOrMore(Repeated): """ - def __init__(self, pattern): + def __init__(self, pattern: Pattern|str) -> None: super(OneOrMore, self).__init__(pattern, 1) @@ -455,7 +508,7 @@ class OneOrMoreDict(RepeatedDict): """ - def __init__(self, pattern, key=None): + def __init__(self, pattern: Pattern|str, key: typing.Callable[[MatchObject], str]|None=None) -> None: super(OneOrMoreDict, self).__init__(pattern, 1, key) @@ -466,15 +519,15 @@ class DelimitedList(Pattern): """ - def __init__(self, pattern, delim=','): + def __init__(self, pattern: Pattern|str, delim: str=',') -> None: self._pattern = _wrap_string(pattern) self._delim = _wrap_string(delim) - def match(self, tokens): + def match(self, tokens: _Tokens) -> MatchObject|_Mismatch: # First pattern. mo = self._pattern.match(tokens) - if mo is MISMATCH: + if isinstance(mo, _Mismatch): return MISMATCH matched = [mo] @@ -484,13 +537,13 @@ def match(self, tokens): # Discard the delimiter. mo = self._delim.match(tokens) - if mo is MISMATCH: + if isinstance(mo, _Mismatch): break # Pattern. mo = self._pattern.match(tokens) - if mo is MISMATCH: + if isinstance(mo, _Mismatch): break matched.append(mo) @@ -507,14 +560,14 @@ class Optional(Pattern): """ - def __init__(self, pattern): + def __init__(self, pattern: Pattern|str) -> None: self._pattern = _wrap_string(pattern) - def match(self, tokens): + def match(self, tokens: _Tokens) -> MatchObject|_Mismatch: tokens.save() mo = self._pattern.match(tokens) - if mo is MISMATCH: + if isinstance(mo, _Mismatch): tokens.mark_max_restore() return [] @@ -529,7 +582,7 @@ class Any(Pattern): """ - def match(self, tokens): + def match(self, tokens: _Tokens) -> MatchObject|_Mismatch: if tokens.peek().kind == '__EOF__': return MISMATCH else: @@ -542,17 +595,17 @@ class AnyUntil(Pattern): """ - def __init__(self, pattern): + def __init__(self, pattern: Pattern|str) -> None: self._pattern = _wrap_string(pattern) - def match(self, tokens): - matched = [] + def match(self, tokens: _Tokens) -> MatchObject|_Mismatch: + matched: list[MatchObject] = [] while True: tokens.save() mo = self._pattern.match(tokens) - if mo is not MISMATCH: + if not isinstance(mo, _Mismatch): break tokens.restore() @@ -569,15 +622,15 @@ class And(Pattern): """ - def __init__(self, pattern): + def __init__(self, pattern: Pattern|str) -> None: self._pattern = _wrap_string(pattern) - def match(self, tokens): + def match(self, tokens: _Tokens) -> MatchObject|_Mismatch: tokens.save() mo = self._pattern.match(tokens) tokens.restore() - if mo is MISMATCH: + if isinstance(mo, _Mismatch): return MISMATCH else: return [] @@ -591,15 +644,15 @@ class Not(Pattern): """ - def __init__(self, pattern): + def __init__(self, pattern: Pattern|str) -> None: self._pattern = _wrap_string(pattern) - def match(self, tokens): + def match(self, tokens: _Tokens) -> MatchObject|_Mismatch: tokens.save() mo = self._pattern.match(tokens) tokens.restore() - if mo is MISMATCH: + if isinstance(mo, _Mismatch): return [] else: return MISMATCH @@ -610,85 +663,36 @@ class NoMatch(Pattern): """ - def match(self, tokens): + def match(self, tokens: _Tokens) -> MatchObject|_Mismatch: return MISMATCH - -class Tag(Pattern): - """Tags any matched `pattern` with name `name`. Becomes a two-tuple of - `name` and match in the parse tree. - - """ - - def __init__(self, name, pattern): - self._name = name - self._pattern = _wrap_string(pattern) - - @property - def pattern(self): - return self._pattern - - def match(self, tokens): - mo = self._pattern.match(tokens) - - if mo is not MISMATCH: - return (self._name, mo) - else: - return MISMATCH - - -class Forward(Pattern): - """Forward declaration of a pattern. - - .. code-block:: python - - >>> foo = Forward() - >>> foo <<= Sequence('NUMBER') - - """ - - def __init__(self): - self._pattern = None - - @property - def pattern(self): - return self._pattern - - def __ilshift__(self, other): - self._pattern = _wrap_string(other) - - return self - - def match(self, tokens): - return self._pattern.match(tokens) - - class Grammar(object): """Creates a tree of given tokens using the grammar `grammar`. """ - def __init__(self, grammar): + def __init__(self, grammar: Pattern|str) -> None: + self._root: Pattern if isinstance(grammar, str): - grammar = _wrap_string(grammar) - - self._root = grammar + self._root = _wrap_string(grammar) + else: + self._root = grammar - def parse(self, tokens, token_tree=False): + def parse(self, token_list: list[Token], token_tree: bool=False) -> MatchObject: if token_tree: - tokens = _Tokens(tokens) + tokens = _Tokens(token_list) else: - tokens = _StringTokens(tokens) + tokens = _StringTokens(token_list) parsed = self._root.match(tokens) - if parsed is not MISMATCH and tokens.peek_max().kind == '__EOF__': + if not isinstance(parsed, _Mismatch) and tokens.peek_max().kind == '__EOF__': return parsed else: raise GrammarError(tokens.peek_max().offset) -def choice(*patterns): +def choice(*patterns: Pattern|str) -> Choice|ChoiceDict: """Returns an instance of the fastest choice class for given patterns `patterns`. It is recommended to use this function instead of instantiate :class:`~textparser.Choice` or @@ -702,7 +706,7 @@ def choice(*patterns): return Choice(*patterns) -def markup_line(text, offset, marker='>>!<<'): +def markup_line(text: str, offset: int, marker: str='>>!<<') -> str: """Insert `marker` at `offset` into `text`, and return the marked line. @@ -724,17 +728,17 @@ def markup_line(text, offset, marker='>>!<<'): return text[begin:offset] + marker + text[offset:end] -def line(text, offset): +def line(text: str, offset: int) -> int: return text[:offset].count('\n') + 1 -def column(text, offset): +def column(text: str, offset: int) -> int: line_start = text.rfind('\n', 0, offset) return offset - line_start -def tokenize_init(spec): +def tokenize_init(spec: collections.abc.Sequence[tuple[str, str]|tuple[str, str, int]]) -> tuple[list[Token], str]: """Initialize a tokenizer. Should only be called by the :func:`~textparser.Parser.tokenize` method in the parser. @@ -742,7 +746,7 @@ def tokenize_init(spec): tokens = [Token('__SOF__', '__SOF__', 0)] re_token = '|'.join([ - '(?P<{}>{})'.format(name, regex) for name, regex in spec + '(?P<{}>{})'.format(token_spec[0], token_spec[1]) for token_spec in spec ]) return tokens, re_token @@ -768,7 +772,8 @@ class Parser(object): """ - def _unpack_token_specs(self): + def _unpack_token_specs(self) -> tuple[dict[str, str], + list[tuple[str,str]]]: names = {} specs = [] @@ -781,7 +786,7 @@ def _unpack_token_specs(self): return names, specs - def keywords(self): + def keywords(self) -> set[str]: """A set of keywords in the text. .. code-block:: python @@ -793,7 +798,7 @@ def keywords(self): return set() - def token_specs(self): + def token_specs(self) -> list[tuple[str, str]|tuple[str, str, str]]: """The token specifications with token name, regular expression, and optionally a user friendly name. @@ -813,7 +818,7 @@ def token_specs(self): ('MISMATCH', r'.') ] - def tokenize(self, text): + def tokenize(self, text: str) -> list[Token]: """Tokenize given string `text`, and return a list of tokens. Raises :class:`~textparser.TokenizeError` on failure. @@ -830,6 +835,7 @@ def tokenize(self, text): for mo in re.finditer(re_token, text, re.DOTALL): kind = mo.lastgroup + assert isinstance(kind, str) if kind == 'SKIP': pass @@ -848,7 +854,7 @@ def tokenize(self, text): return tokens - def grammar(self): + def grammar(self) -> Grammar: """The text grammar is used to create a parse tree out of a list of tokens. @@ -858,7 +864,7 @@ def grammar(self): raise NotImplementedError('No grammar defined.') - def parse(self, text, token_tree=False, match_sof=False): + def parse(self, text: str, token_tree: bool=False, match_sof:bool=False) -> _Mismatch|MatchObject: """Parse given string `text` and return the parse tree. Raises :class:`~textparser.ParseError` on failure. @@ -888,12 +894,19 @@ def parse(self, text, token_tree=False, match_sof=False): if len(tokens) > 0 and tokens[0].kind == '__SOF__': del tokens[0] - return Grammar(self.grammar()).parse(tokens, token_tree) + grammar = self.grammar() + if isinstance(grammar, Grammar): + return grammar.parse(tokens, token_tree) + else: + # used for compatibility with old user code from the + # pre-type hints era... + return Grammar(grammar).parse(tokens, token_tree) + except (TokenizeError, GrammarError) as e: raise ParseError(text, e.offset) -def replace_blocks(string, start='{', end='}'): +def replace_blocks(string: str, start: str='{', end: str='}') -> str: """Replace all blocks starting with `start` and ending with `end` with spaces (not including `start` and `end`). From 0cd09fe8abfb1733177b9990a693cdfb02a73751 Mon Sep 17 00:00:00 2001 From: Andreas Lauser Date: Tue, 9 Jun 2026 17:08:17 +0200 Subject: [PATCH 2/3] github actions: run the mypy type checker in CI thanks to [at]zariiii9003 for the suggestion! Signed-off-by: Andreas Lauser --- .github/workflows/pythonpackage.yml | 8 ++++++++ setup.py | 5 +++++ 2 files changed, 13 insertions(+) diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index bd6a462..f89cc27 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -17,9 +17,17 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + - name: Install textparser development and testing dependencies + run: | + pip install --upgrade pip + pip install . + pip install .[test] - name: Test run: | python -m unittest + - name: Static type checking (mypy) + run: | + python -m mypy --strict textparser.py tests release: needs: [test] diff --git a/setup.py b/setup.py index e7ed7a5..2248563 100755 --- a/setup.py +++ b/setup.py @@ -26,4 +26,9 @@ def find_version(): url='https://github.com/eerimoq/textparser', py_modules=['textparser'], python_requires='>=3.10', + extras_require={ + "test": [ + "mypy >= 2.1", + ], + }, test_suite="tests") From 64b420458a14f7a121d4067704e563a62cb78389 Mon Sep 17 00:00:00 2001 From: Andreas Lauser Date: Tue, 9 Jun 2026 17:10:33 +0200 Subject: [PATCH 3/3] run ruff in the CI system also fix the complaint about the unused import of the `typing` module. Signed-off-by: Andreas Lauser --- .github/workflows/pythonpackage.yml | 3 +++ setup.py | 1 + tests/test_textparser.py | 1 - 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index f89cc27..f8c9dda 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -28,6 +28,9 @@ jobs: - name: Static type checking (mypy) run: | python -m mypy --strict textparser.py tests + - name: Linting (ruff) + run: | + ruff check textparser.py tests release: needs: [test] diff --git a/setup.py b/setup.py index 2248563..8f321e8 100755 --- a/setup.py +++ b/setup.py @@ -29,6 +29,7 @@ def find_version(): extras_require={ "test": [ "mypy >= 2.1", + "ruff >= 0.15.12", ], }, test_suite="tests") diff --git a/tests/test_textparser.py b/tests/test_textparser.py index 408c131..d4d2f4c 100644 --- a/tests/test_textparser.py +++ b/tests/test_textparser.py @@ -1,7 +1,6 @@ import collections import pickle import unittest -import typing from collections import namedtuple from typing import cast