Source code for openpyxl.formula.tests.test_tokenizer

from __future__ import absolute_import

import pytest

@pytest.fixture
[docs]def tokenizer(): from .. import tokenizer return tokenizer
# Constants from tokenizer.Token: LITERAL = "LITERAL" OPERAND = "OPERAND" FUNC = "FUNC" ARRAY = "ARRAY" PAREN = "PAREN" SEP = "SEP" OP_PRE = "OPERATOR-PREFIX" OP_IN = "OPERATOR-INFIX" OP_POST = "OPERATOR-POSTFIX" WSPACE = "WHITE-SPACE" TEXT = 'TEXT' NUMBER = 'NUMBER' LOGICAL = 'LOGICAL' ERROR = 'ERROR' RANGE = 'RANGE' OPEN = "OPEN" CLOSE = "CLOSE" ARG = "ARG" ROW = "ROW"
[docs]class TestTokenizerRegexes(object): @pytest.mark.parametrize("string, success", [ ('1.0E', True), ('1.53321E', True), ('9.999E', True), ('3E', True), ('12E', False), ('0.1E', False), ('0E', False), ('', False), ('E', False), ])
[docs] def test_scientific_re(self, tokenizer, string, success): regex = tokenizer.Tokenizer.SN_RE assert bool(regex.match(string)) is success
@pytest.mark.parametrize('string, expected', [ (' ', ' '), (' *', ' '), (' ', ' '), (' a', ' '), (' ', ' '), (' +', ' '), ('', None), ('*', None), ])
[docs] def test_whitespace_re(self, tokenizer, string, expected): if expected is None: assert not tokenizer.Tokenizer.WSPACE_RE.match(string) else: assert tokenizer.Tokenizer.WSPACE_RE.match(string) assert tokenizer.Tokenizer.WSPACE_RE.match(string).group(0) == expected
@pytest.mark.parametrize('string, expected', [ ('"spamspamspam"', '"spamspamspam"'), ('"this is "" a test "" "', '"this is "" a test "" "'), ('""', '""'), ('"spam and ""cheese"""+"ignore"', ('"spam and ""cheese"""')), ('\'"spam and ""cheese"""+"ignore"', None), ('"oops ""', None), ])
[docs] def test_string_re(self, tokenizer, string, expected): regex = tokenizer.Tokenizer.STRING_REGEXES['"'] if expected is None: assert not regex.match(string) else: assert regex.match(string) assert regex.match(string).group(0) == expected
@pytest.mark.parametrize('string, expected', [ ("'spam and ham'", "'spam and ham'"), ("'double'' triple''' quadruple ''''", "'double'' triple'''"), ("'sextuple '''''' and septuple''''''' and more", "'sextuple '''''' and septuple'''''''",), ("''", "''"), ("'oops ''", None), ("gunk'hello world'", None), ])
[docs]class TestTokenizer(object):
[docs] def test_init(self, tokenizer): tok = tokenizer.Tokenizer("abcdefg") assert tok.formula == "abcdefg" tok = tokenizer.Tokenizer("=abcdefg") assert tok.formula == "=abcdefg"
@pytest.mark.parametrize('formula, tokens', [ ('=IF(A$3<40%,"",INDEX(Pipeline!B$4:B$138,#REF!))', [('IF(', FUNC, OPEN), ('A$3', OPERAND, RANGE), ('<', OP_IN, ""), ('40', OPERAND, NUMBER), ('%', OP_POST, ""), (',', SEP, ARG), ('""', OPERAND, TEXT), (',', SEP, ARG), ('INDEX(', FUNC, OPEN), ('Pipeline!B$4:B$138', OPERAND, RANGE), (',', SEP, ARG), ('#REF!', OPERAND, ERROR), (')', FUNC, CLOSE), (')', FUNC, CLOSE)]), ("='Summary slices'!$C$3", [("'Summary slices'!$C$3", OPERAND, RANGE)]), ('=-MAX(Pipeline!AA4:AA138)', [("-", OP_PRE, ""), ('MAX(', FUNC, OPEN), ('Pipeline!AA4:AA138', OPERAND, RANGE), (')', FUNC, CLOSE)]), ('=TEXT(-S7/1000,"$#,##0""M""")', [('TEXT(', FUNC, OPEN), ('-', OP_PRE, ""), ('S7', OPERAND, RANGE), ('/', OP_IN, ""), ('1000', OPERAND, NUMBER), (',', SEP, ARG), ('"$#,##0""M"""', OPERAND, TEXT), (')', FUNC, CLOSE)]), ("=IF(A$3<1.3E-8,\"\",IF(ISNA('External Ref'!K7)," + '"N/A",TEXT(K7*1E+12,"0")&"bp"', [('IF(', FUNC, OPEN), ('A$3', OPERAND, RANGE), ('<', OP_IN, ""), ('1.3E-8', OPERAND, NUMBER), (',', SEP, ARG), ('""', OPERAND, TEXT), (',', SEP, ARG), ('IF(', FUNC, OPEN), ('ISNA(', FUNC, OPEN), ("'External Ref'!K7", OPERAND, RANGE), (')', FUNC, CLOSE), (',', SEP, ARG), ('"N/A"', OPERAND, TEXT), (',', SEP, ARG), ('TEXT(', FUNC, OPEN), ('K7', OPERAND, RANGE), ('*', OP_IN, ""), ('1E+12', OPERAND, NUMBER), (',', SEP, ARG), ('"0"', OPERAND, TEXT), (')', FUNC, CLOSE), ('&', OP_IN, ""), ('"bp"', OPERAND, TEXT)]), ('=+IF(A$3<>$B7,"",(MIN(IF({TRUE, FALSE;1,2},A6:B6,$S7))>=' + 'LOWER_BOUND)*($BR6>$S72123))', [("+", OP_PRE, ""), ('IF(', FUNC, OPEN), ('A$3', OPERAND, RANGE), ('<>', OP_IN, ""), ('$B7', OPERAND, RANGE), (',', SEP, ARG), ('""', OPERAND, TEXT), (',', SEP, ARG), ('(', PAREN, OPEN), ('MIN(', FUNC, OPEN), ('IF(', FUNC, OPEN), ('{', ARRAY, OPEN), ('TRUE', OPERAND, LOGICAL), (',', SEP, ARG), (' ', WSPACE, ''), ('FALSE', OPERAND, LOGICAL), (';', SEP, ROW), ('1', OPERAND, NUMBER), (',', SEP, ARG), ('2', OPERAND, NUMBER), ('}', ARRAY, CLOSE), (',', SEP, ARG), ('A6:B6', OPERAND, RANGE), (',', SEP, ARG), ('$S7', OPERAND, RANGE ), (')', FUNC, CLOSE), (')', FUNC, CLOSE), ('>=', OP_IN, ''), ('LOWER_BOUND', OPERAND, RANGE), (')', PAREN, CLOSE), ('*', OP_IN, ''), ('(', PAREN, OPEN), ('$BR6', OPERAND, RANGE), ('>', OP_IN, ''), ('$S72123', OPERAND, RANGE), (')', PAREN, CLOSE), (')', FUNC, CLOSE)]), ('=(AW$4=$D7)+0%', [('(', PAREN, OPEN), ('AW$4', OPERAND, RANGE), ('=', OP_IN, ''), ('$D7', OPERAND, RANGE), (')', PAREN, CLOSE), ('+', OP_IN, ''), ('0', OPERAND, NUMBER), ('%', OP_POST, '')]), ('=$A:$A,$C:$C', [('$A:$A', OPERAND, RANGE), (',', OP_IN, ""), ('$C:$C', OPERAND, RANGE)]), ("Just text", [("Just text", LITERAL, "")]), ("123.456", [("123.456", LITERAL, "")]), ("31/12/1999", [("31/12/1999", LITERAL, "")]), ("", []), ])
[docs] def test_parse(self, tokenizer, formula, tokens): tok = tokenizer.Tokenizer(formula) result = [(token.value, token.type, token.subtype) for token in tok.items] assert result == tokens
@pytest.mark.parametrize('formula, offset, result', [ ('"spamspamspam"spam', 0, '"spamspamspam"'), ('"this is "" a test "" "test', 0, '"this is "" a test "" "'), ('""', 0, '""'), ('a"bcd""efg"hijk', 1, '"bcd""efg"'), ('"oops ""', 0, None), ("'spam and ham'", 0, "'spam and ham'"), ("'double'' triple''' quad ''''", 0, "'double'' triple'''"), ("123'sextuple '''''' and septuple''''''' and more", 3, "'sextuple '''''' and septuple'''''''"), ("''", 0, "''"), ("'oops ''", 0, None), ])
[docs] def test_parse_string(self, tokenizer, formula, offset, result): tok = tokenizer.Tokenizer(formula) del tok.items[:] tok.offset = offset if result is None: with pytest.raises(tokenizer.TokenizerError): tok._parse_string() return assert tok._parse_string() == len(result) if formula[offset] == '"': token = tok.items[0] assert token.value == result assert token.type == OPERAND assert token.subtype == TEXT assert not tok.token else: assert not tok.items assert tok.token[0] == result assert len(tok.token) == 1
@pytest.mark.parametrize('formula, offset, result', [ ('[abc]def', 0, '[abc]'), ('[]abcdef', 0, '[]'), ('[abcdef]', 0, '[abcdef]'), ('a[bcd]ef', 1, '[bcd]'), ('ab[cde]f', 2, '[cde]'), ])
[docs] def test_parse_brackets(self, tokenizer, formula, offset, result): tok = tokenizer.Tokenizer(formula) del tok.items[:] tok.offset = offset assert tok._parse_brackets() == len(result) assert not tok.items assert tok.token[0] == result assert len(tok.token) == 1
[docs] def test_parse_brackets_error(self, tokenizer): tok = tokenizer.Tokenizer('[unfinished business') with pytest.raises(tokenizer.TokenizerError): tok._parse_brackets()
@pytest.mark.parametrize('error', [ "#NULL!", "#DIV/0!", "#VALUE!", "#REF!", "#NAME?", "#NUM!", "#N/A", "#GETTING_DATA", ])
[docs] def test_parse_error(self, tokenizer, error): tok = tokenizer.Tokenizer(error) tok.offset = 0 del tok.items[:] assert tok._parse_error() == len(error) assert len(tok.items) == 1 assert not tok.token token = tok.items[0] assert token.value == error assert token.type == OPERAND assert token.subtype == ERROR
[docs] def test_parse_error_error(self, tokenizer): tok = tokenizer.Tokenizer("#NotAnError") tok.offset = 0 del tok.items[:] with pytest.raises(tokenizer.TokenizerError): tok._parse_error()
@pytest.mark.parametrize('formula', [' ' * i for i in range(1, 10)])
[docs] def test_parse_whitespace(self, tokenizer, formula): tok = tokenizer.Tokenizer(formula) tok.offset = 0 del tok.items[:] assert tok._parse_whitespace() == len(formula) assert len(tok.items) == 1 token = tok.items[0] assert token.value == " " assert token.type == WSPACE assert token.subtype == "" assert not tok.token
@pytest.mark.parametrize('formula, result, type_', [ ('>=', '>=', OP_IN), ('<=', '<=', OP_IN), ('<>', '<>', OP_IN), ('%', '%', OP_POST), ('*', '*', OP_IN), ('/', '/', OP_IN), ('^', '^', OP_IN), ('&', '&', OP_IN), ('=', '=', OP_IN), ('>', '>', OP_IN), ('<', '<', OP_IN), ('+', '+', OP_PRE), ('-', '-', OP_PRE), ('=<', '=', OP_IN), ('><', '>', OP_IN), ('<<', '<', OP_IN), ('>>', '>', OP_IN), ])
[docs] def test_parse_operator(self, tokenizer, formula, result, type_): tok = tokenizer.Tokenizer(formula) tok.offset = 0 del tok.items[:] assert tok._parse_operator() == len(result) assert len(tok.items) == 1 assert not tok.token token = tok.items[0] assert token.value == result assert token.type == type_ assert token.subtype == ''
@pytest.mark.parametrize('prefix, char, type_', [ ('name', '(', FUNC), ('', '(', PAREN), ('', '{', ARRAY), ])
[docs] def test_parse_opener(self, tokenizer, prefix, char, type_): tok = tokenizer.Tokenizer(prefix + char) del tok.items[:] tok.offset = len(prefix) if prefix: tok.token.append(prefix) assert tok._parse_opener() == 1 assert not tok.token assert len(tok.items) == 1 token = tok.items[0] assert token.value == prefix + char assert token.type == type_ assert token.subtype == OPEN assert len(tok.token_stack) == 1 assert tok.token_stack[0] is token
[docs] def test_parse_opener_error(self, tokenizer): tok = tokenizer.Tokenizer('name{') tok.offset = 4 tok.token[:] = ('name',) with pytest.raises(tokenizer.TokenizerError): tok._parse_opener()
@pytest.mark.parametrize('formula, offset, opener', [ ('func(a)', 6, ('func(', FUNC, OPEN)), ('(a)', 2, ('(', PAREN, OPEN)), ('{a,b,c}', 6, ('{', ARRAY, OPEN)), ])
[docs] def test_parse_closer(self, tokenizer, formula, offset, opener): tok = tokenizer.Tokenizer(formula) del tok.items[:] tok.offset = offset tok.token_stack.append(tokenizer.Token(*opener)) assert tok._parse_closer() == 1 assert len(tok.items) == 1 token = tok.items[0] assert token.value == formula[offset] assert token.type == opener[1] assert token.subtype == CLOSE
@pytest.mark.parametrize('formula, offset, opener', [ ('func(a}', 6, ('func(', FUNC, OPEN)), ('(a}', 2, ('(', PAREN, OPEN)), ('{a,b,c)', 6, ('{', ARRAY, OPEN)), ])
[docs] def test_parse_closer_error(self, tokenizer, formula, offset, opener): tok = tokenizer.Tokenizer(formula) del tok.items[:] tok.offset = offset tok.token_stack.append(tokenizer.Token(*opener)) with pytest.raises(tokenizer.TokenizerError): tok._parse_closer()
@pytest.mark.parametrize('formula, offset, opener, type_, subtype', [ ("{a;b}", 2, ('{', ARRAY, OPEN), SEP, ROW), ("{a,b}", 2, ('{', ARRAY, OPEN), SEP, ARG), ("(a,b)", 2, ('(', PAREN, OPEN), OP_IN, ''), ("FUNC(a,b)", 6, ('FUNC(', FUNC, OPEN), SEP, ARG), ("$A$15:$B$20,$A$1:$B$5", 11, None, OP_IN, "") ])
[docs] def test_parse_separator(self, tokenizer, formula, offset, opener, type_, subtype): tok = tokenizer.Tokenizer(formula) del tok.items[:] tok.offset = offset if opener: tok.token_stack.append(tokenizer.Token(*opener)) assert tok._parse_separator() == 1 assert len(tok.items) == 1 token = tok.items[0] assert token.value == formula[offset] assert token.type == type_ assert token.subtype == subtype
@pytest.mark.parametrize('formula, offset, token, ret', [ ('1.0E-5', 4, ['1', '.', '0', 'E'], True), ('1.53321E+3', 8, ['1.53321', 'E'], True), ('9.9E+12', 4, ['9.', '9E'], True), ('3E+155', 2, ['9.', '9', 'E'], True), ('12E+15', 3, ['12', 'E'], False), ('0.1E-5', 4, ['0', '.1', 'E'], False), ('0E+7', 2, ['0', 'E'], False), ('12+', 2, ['1', '2'], False), ('13-E+', 4, ['E'], False), ('+', 0, [], False), ('1.0e-5', 4, ['1', '.', '0', 'e'], True), ('1.53321e+3', 8, ['1.53321', 'e'], True), ('9.9e+12', 4, ['9.', '9e'], True), ('3e+155', 2, ['9.', '9', 'e'], True), ('12e+15', 3, ['12', 'e'], False), ('0.1e-5', 4, ['0', '.1', 'e'], False), ('0e+7', 2, ['0', 'e'], False), ('12+', 2, ['1', '2'], False), ('13-e+', 4, ['e'], False), ('+', 0, [], False), ])
[docs] def test_check_scientific_notation(self, tokenizer, formula, offset, token, ret): tok = tokenizer.Tokenizer(formula) del tok.items[:] tok.offset = offset tok.token[:] = token assert ret is tok.check_scientific_notation() if ret: assert offset + 1 == tok.offset assert token == tok.token[:-1] assert tok.token[-1] == formula[offset] else: assert offset == tok.offset assert token == tok.token
[docs] def test_assert_empty_token(self, tokenizer): tok = tokenizer.Tokenizer("") try: tok.assert_empty_token() except tokenizer.TokenizerError: pytest.fail( "assert_empty_token raised TokenizerError incorrectly") tok.token.append("test") with pytest.raises(tokenizer.TokenizerError): tok.assert_empty_token()
[docs] def test_save_token(self, tokenizer): tok = tokenizer.Tokenizer("") tok.save_token() assert not tok.items tok.token.append("test") tok.save_token() assert len(tok.items) == 1 token = tok.items[0] assert token.value == "test" assert token.type == OPERAND
@pytest.mark.parametrize('formula', [ '=IF(A$3<40%,"",INDEX(Pipeline!B$4:B$138,#REF!))', "='Summary slices'!$C$3", '=-MAX(Pipeline!AA4:AA138)', '=TEXT(-S7/1000,"$#,##0""M""")', ("=IF(A$3<1.3E-8,\"\",IF(ISNA('External Ref'!K7)," '"N/A",TEXT(K7*1E+12,"0")&"bp"'), ('=+IF(A$3<>$B7,"",(MIN(IF({TRUE, FALSE;1,2},A6:B6,$S7))>=' + 'LOWER_BOUND)*($BR6>$S72123))'), '=(AW$4=$D7)+0%', "Just text", "123.456", "31/12/1999", "", ])
[docs] def test_render(self, tokenizer, formula): tok = tokenizer.Tokenizer(formula) assert tok.render() == formula
[docs]class TestToken(object):
[docs] def test_init(self, tokenizer): tokenizer.Token('val', 'type', 'subtype')
@pytest.mark.parametrize('value, subtype', [ ('"text"', TEXT), ('#REF!', ERROR), ('123', NUMBER), ('0', NUMBER), ('0.123', NUMBER), ('.123', NUMBER), ('1.234E5', NUMBER), ('1E+5', NUMBER), ('1.13E-55', NUMBER), ('TRUE', LOGICAL), ('FALSE', LOGICAL), ('A1', RANGE), ('ABCD12345', RANGE), ("'Hello world'!R123C[-12]", RANGE), ("[outside-workbook.xlsx]'A sheet name'!$AB$122", RANGE), ])
[docs] def test_make_operand(self, tokenizer, value, subtype): tok = tokenizer.Token.make_operand(value) assert tok.value == value assert tok.type == OPERAND assert tok.subtype == subtype
@pytest.mark.parametrize('value, type_, subtype', [ ('{', ARRAY, OPEN), ('}', ARRAY, CLOSE), ('(', PAREN, OPEN), (')', PAREN, CLOSE), ('FUNC(', FUNC, OPEN), ])
[docs] def test_make_subexp(self, tokenizer, value, type_, subtype): tok = tokenizer.Token.make_subexp(value) assert tok.value == value assert tok.type == type_ assert tok.subtype == subtype
[docs] def test_make_subexp_func(self, tokenizer): tok = tokenizer.Token.make_subexp(')', True) assert tok.value == ')' assert tok.type == FUNC assert tok.subtype == CLOSE tok = tokenizer.Token.make_subexp('TEST(', True) assert tok.value == 'TEST(' assert tok.type == FUNC assert tok.subtype == OPEN
@pytest.mark.parametrize('token, close_val', [ (('(', PAREN, OPEN), ')'), (('{', ARRAY, OPEN), '}'), (('FUNC(', FUNC, OPEN), ')'), ])
[docs] def test_get_closer(self, tokenizer, token, close_val): closer = tokenizer.Token(*token).get_closer() assert closer.value == close_val assert closer.type == token[1] assert closer.subtype == CLOSE
[docs] def test_make_separator(self, tokenizer): token = tokenizer.Token.make_separator(',') assert token.value == ',' assert token.type == SEP assert token.subtype == ARG token = tokenizer.Token.make_separator(';') assert token.value == ';' assert token.type == SEP assert token.subtype == ROW