Skip to content

mapper.base.matcher_base

Common base class for all matcher widgets.

Provides

• Qt signals updated / removedbuild_result() – combines a statistics row and a geo row • Placeholder methods that subclasses must override

BaseMatcher

Bases: QWidget

Source code in src/mapper/base/matcher_base.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
class BaseMatcher(QWidget):
    # --------------------------------------------------------------------------
    #  Public Qt signals
    # --------------------------------------------------------------------------
    updated = Signal()  # Emitted when configuration changes
    removed = Signal()  # Emitted to request removal of this matcher

    # --------------------------------------------------------------------------
    #  Constructor
    # --------------------------------------------------------------------------
    def __init__(self, nr: int, stats_cols: List[str], geo_cols: List[str], parent=None) -> None:
        """
        Initialize the base matcher widget.

        Steps:
          1. Call the superclass constructor with the given parent.
          2. Store the unique identifier `nr` and lists of available statistics and geo columns.
        """
        super().__init__(parent)
        # Store identifier and column lists for use by subclasses
        self._nr = nr
        self._stats_cols = stats_cols
        self._geo_cols = geo_cols

    # --------------------------------------------------------------------------
    #  Helper routine used by subclasses
    # --------------------------------------------------------------------------
    @staticmethod
    def build_result(stats_df: pd.DataFrame, geo_df: pd.DataFrame, label: str | None = None) -> pd.DataFrame:
        """
        Combine exactly one statistics row and one geo row into a single result record.

        Steps:
          1. Reset indices on both DataFrames and make copies to avoid side effects.
          2. Rename statistics columns to have suffix `_stats`.
          3. Rename geo columns to have suffix `_geodata`.
          4. Concatenate the two side by side (axis=1).
          5. If a label is provided, append a column `matcher` with that label.
        """
        # Prepare statistics DataFrame
        st = stats_df.reset_index(drop=True).copy()
        st.columns = [f"{c}_stats" for c in st.columns]

        # Prepare geo DataFrame
        ge = geo_df.reset_index(drop=True).copy()
        ge.columns = [f"{c}_geodata" for c in ge.columns]

        # Concatenate results horizontally
        res = pd.concat([st, ge], axis=1)
        # Optionally record which matcher produced this row
        if label is not None:
            res["matcher"] = label
        return res

    # --------------------------------------------------------------------------
    #  Abstract API – SUBCLASSES MUST OVERRIDE
    # --------------------------------------------------------------------------
    def match(self, stats_df: pd.DataFrame, geo_df: pd.DataFrame) -> Tuple[Optional[pd.DataFrame], Optional[List[int]], Optional[List[int]]]:
        """
        Execute the actual matching logic between statistics and geo data.

        Steps:
          1. Subclass should identify which rows from `stats_df` and `geo_df` match.
          2. Build a DataFrame of combined rows using `build_result()`.
          3. Return the combined DataFrame and lists of used row indices from both tables.
        """
        raise NotImplementedError("match() must be implemented in the subclass")

    def description(self) -> str:
        """
        Provide a brief text description of the current matcher configuration.

        Steps:
          1. Subclass should return a string summarizing which columns or criteria are used.
        """
        raise NotImplementedError("description() must be implemented in the subclass")

    # --------------------------------------------------------------------------
    #  Optional hooks – SUBCLASSES CAN OVERRIDE
    # --------------------------------------------------------------------------
    def update_stats_columns(self, cols: List[str]) -> None:
        """
        Update the list of available statistics columns.

        Steps:
          1. Replace the internal `_stats_cols` list with the new `cols`.
        """
        self._stats_cols = cols

    def update_geo_columns(self, cols: List[str]) -> None:
        """
        Update the list of available geo columns.

        Steps:
          1. Replace the internal `_geo_cols` list with the new `cols`.
        """
        self._geo_cols = cols

__init__(nr, stats_cols, geo_cols, parent=None)

Initialize the base matcher widget.

Steps
  1. Call the superclass constructor with the given parent.
  2. Store the unique identifier nr and lists of available statistics and geo columns.
Source code in src/mapper/base/matcher_base.py
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(self, nr: int, stats_cols: List[str], geo_cols: List[str], parent=None) -> None:
    """
    Initialize the base matcher widget.

    Steps:
      1. Call the superclass constructor with the given parent.
      2. Store the unique identifier `nr` and lists of available statistics and geo columns.
    """
    super().__init__(parent)
    # Store identifier and column lists for use by subclasses
    self._nr = nr
    self._stats_cols = stats_cols
    self._geo_cols = geo_cols

build_result(stats_df, geo_df, label=None) staticmethod

Combine exactly one statistics row and one geo row into a single result record.

Steps
  1. Reset indices on both DataFrames and make copies to avoid side effects.
  2. Rename statistics columns to have suffix _stats.
  3. Rename geo columns to have suffix _geodata.
  4. Concatenate the two side by side (axis=1).
  5. If a label is provided, append a column matcher with that label.
Source code in src/mapper/base/matcher_base.py
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
@staticmethod
def build_result(stats_df: pd.DataFrame, geo_df: pd.DataFrame, label: str | None = None) -> pd.DataFrame:
    """
    Combine exactly one statistics row and one geo row into a single result record.

    Steps:
      1. Reset indices on both DataFrames and make copies to avoid side effects.
      2. Rename statistics columns to have suffix `_stats`.
      3. Rename geo columns to have suffix `_geodata`.
      4. Concatenate the two side by side (axis=1).
      5. If a label is provided, append a column `matcher` with that label.
    """
    # Prepare statistics DataFrame
    st = stats_df.reset_index(drop=True).copy()
    st.columns = [f"{c}_stats" for c in st.columns]

    # Prepare geo DataFrame
    ge = geo_df.reset_index(drop=True).copy()
    ge.columns = [f"{c}_geodata" for c in ge.columns]

    # Concatenate results horizontally
    res = pd.concat([st, ge], axis=1)
    # Optionally record which matcher produced this row
    if label is not None:
        res["matcher"] = label
    return res

description()

Provide a brief text description of the current matcher configuration.

Steps
  1. Subclass should return a string summarizing which columns or criteria are used.
Source code in src/mapper/base/matcher_base.py
90
91
92
93
94
95
96
97
def description(self) -> str:
    """
    Provide a brief text description of the current matcher configuration.

    Steps:
      1. Subclass should return a string summarizing which columns or criteria are used.
    """
    raise NotImplementedError("description() must be implemented in the subclass")

match(stats_df, geo_df)

Execute the actual matching logic between statistics and geo data.

Steps
  1. Subclass should identify which rows from stats_df and geo_df match.
  2. Build a DataFrame of combined rows using build_result().
  3. Return the combined DataFrame and lists of used row indices from both tables.
Source code in src/mapper/base/matcher_base.py
79
80
81
82
83
84
85
86
87
88
def match(self, stats_df: pd.DataFrame, geo_df: pd.DataFrame) -> Tuple[Optional[pd.DataFrame], Optional[List[int]], Optional[List[int]]]:
    """
    Execute the actual matching logic between statistics and geo data.

    Steps:
      1. Subclass should identify which rows from `stats_df` and `geo_df` match.
      2. Build a DataFrame of combined rows using `build_result()`.
      3. Return the combined DataFrame and lists of used row indices from both tables.
    """
    raise NotImplementedError("match() must be implemented in the subclass")

update_geo_columns(cols)

Update the list of available geo columns.

Steps
  1. Replace the internal _geo_cols list with the new cols.
Source code in src/mapper/base/matcher_base.py
111
112
113
114
115
116
117
118
def update_geo_columns(self, cols: List[str]) -> None:
    """
    Update the list of available geo columns.

    Steps:
      1. Replace the internal `_geo_cols` list with the new `cols`.
    """
    self._geo_cols = cols

update_stats_columns(cols)

Update the list of available statistics columns.

Steps
  1. Replace the internal _stats_cols list with the new cols.
Source code in src/mapper/base/matcher_base.py
102
103
104
105
106
107
108
109
def update_stats_columns(self, cols: List[str]) -> None:
    """
    Update the list of available statistics columns.

    Steps:
      1. Replace the internal `_stats_cols` list with the new `cols`.
    """
    self._stats_cols = cols