Coverage for src / mafw / tools / regexp.py: 100%
25 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-09 09:08 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-09 09:08 +0000
1# Copyright 2025 European Union
2# Author: Bulgheroni Antonio (antonio.bulgheroni@ec.europa.eu)
3# SPDX-License-Identifier: EUPL-1.2
4"""
5Module implements some basic functions involving regular expressions.
6"""
8import logging
9import re
11from mafw.mafw_errors import UnknownProcessor
13log = logging.getLogger(__name__)
16def extract_protocol(url: str) -> str | None:
17 """
18 Extract the protocol portion from a database connection URL.
20 The extract_protocol function takes a database connection URL string as input and extracts the protocol portion
21 (the part before "://"). This function is useful for identifying the database type from connection strings.
23 :param url: The url from which the protocol will be extracted.
24 :type url: str
25 :return: The protocol or None, if the extraction failed
26 :rtype: str | None
27 """
28 pattern = r'^([a-z0-9_\-+.]+)://'
29 match = re.match(pattern, url)
30 if match:
31 return match.group(1)
32 return None
35def normalize_sql_spaces(sql_string: str) -> str:
36 """
37 Normalize multiple consecutive spaces in SQL string to single spaces.
38 Only handles spaces, preserves other whitespace characters.
40 :param sql_string: The SQL string for space normalization.
41 :type sql_string: str
42 :return: The normalized SQL command.
43 :rtype: str
44 """
45 return re.sub(r' +', ' ', sql_string.strip())
48def parse_processor_name(processor_string: str) -> tuple[str, str | None]:
49 """
50 Parse a processor name string into name and replica identifier components.
52 Given a string in the form 'MyProcessorName#156a', returns a tuple ('MyProcessorName', '156a').
53 If the input string is 'MyProcessorName' only, then it returns ('MyProcessorName', None).
54 If it gets 'MyProcessorName#', it returns ('MyProcessorName', None) but emits a warning
55 informing of a possible malformed name.
57 The processor name must be a valid Python identifier (class name).
59 :param processor_string: The processor name string to parse.
60 :type processor_string: str
61 :return: A tuple of (name, replica_id) where replica_id can be None.
62 :rtype: tuple[str, str | None]
63 :raise UnknownProcessor: if the name part is empty or not a valid Python identifier
64 """
65 # Split on '#' character
66 parts = processor_string.strip().split('#', 1)
68 # Get the name part (always exists)
69 name = parts[0]
71 if len(name) == 0:
72 raise UnknownProcessor('Invalid processor name (empty)')
74 # Validate that the name is a valid Python identifier
75 if not name.isidentifier():
76 raise UnknownProcessor(f'Invalid processor name "{name}" - not a valid Python identifier')
78 # Check if there's a replica part
79 if len(parts) == 1 or parts[1] == '':
80 # No or empty replica id part
81 if len(parts) == 2 and parts[1] == '':
82 # Warn about malformed input like "Name#"
83 log.warning(
84 f"Malformed processor name '{processor_string}': empty replica part after '#'",
85 )
86 return name, None
87 else:
88 replica_id = parts[1]
89 return name, replica_id