Common base class for all matcher widgets.
Provides
• Qt signals updated / removed
• build_result() – combines a statistics row and a geo row
• Placeholder methods that subclasses must override
BaseMatcher
Bases: QWidget
Source code in src/mapper/base/matcher_base.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118 | class BaseMatcher(QWidget):
# --------------------------------------------------------------------------
# Public Qt signals
# --------------------------------------------------------------------------
updated = Signal() # Emitted when configuration changes
removed = Signal() # Emitted to request removal of this matcher
# --------------------------------------------------------------------------
# Constructor
# --------------------------------------------------------------------------
def __init__(self, nr: int, stats_cols: List[str], geo_cols: List[str], parent=None) -> None:
"""
Initialize the base matcher widget.
Steps:
1. Call the superclass constructor with the given parent.
2. Store the unique identifier `nr` and lists of available statistics and geo columns.
"""
super().__init__(parent)
# Store identifier and column lists for use by subclasses
self._nr = nr
self._stats_cols = stats_cols
self._geo_cols = geo_cols
# --------------------------------------------------------------------------
# Helper routine used by subclasses
# --------------------------------------------------------------------------
@staticmethod
def build_result(stats_df: pd.DataFrame, geo_df: pd.DataFrame, label: str | None = None) -> pd.DataFrame:
"""
Combine exactly one statistics row and one geo row into a single result record.
Steps:
1. Reset indices on both DataFrames and make copies to avoid side effects.
2. Rename statistics columns to have suffix `_stats`.
3. Rename geo columns to have suffix `_geodata`.
4. Concatenate the two side by side (axis=1).
5. If a label is provided, append a column `matcher` with that label.
"""
# Prepare statistics DataFrame
st = stats_df.reset_index(drop=True).copy()
st.columns = [f"{c}_stats" for c in st.columns]
# Prepare geo DataFrame
ge = geo_df.reset_index(drop=True).copy()
ge.columns = [f"{c}_geodata" for c in ge.columns]
# Concatenate results horizontally
res = pd.concat([st, ge], axis=1)
# Optionally record which matcher produced this row
if label is not None:
res["matcher"] = label
return res
# --------------------------------------------------------------------------
# Abstract API – SUBCLASSES MUST OVERRIDE
# --------------------------------------------------------------------------
def match(self, stats_df: pd.DataFrame, geo_df: pd.DataFrame) -> Tuple[Optional[pd.DataFrame], Optional[List[int]], Optional[List[int]]]:
"""
Execute the actual matching logic between statistics and geo data.
Steps:
1. Subclass should identify which rows from `stats_df` and `geo_df` match.
2. Build a DataFrame of combined rows using `build_result()`.
3. Return the combined DataFrame and lists of used row indices from both tables.
"""
raise NotImplementedError("match() must be implemented in the subclass")
def description(self) -> str:
"""
Provide a brief text description of the current matcher configuration.
Steps:
1. Subclass should return a string summarizing which columns or criteria are used.
"""
raise NotImplementedError("description() must be implemented in the subclass")
# --------------------------------------------------------------------------
# Optional hooks – SUBCLASSES CAN OVERRIDE
# --------------------------------------------------------------------------
def update_stats_columns(self, cols: List[str]) -> None:
"""
Update the list of available statistics columns.
Steps:
1. Replace the internal `_stats_cols` list with the new `cols`.
"""
self._stats_cols = cols
def update_geo_columns(self, cols: List[str]) -> None:
"""
Update the list of available geo columns.
Steps:
1. Replace the internal `_geo_cols` list with the new `cols`.
"""
self._geo_cols = cols
|
__init__(nr, stats_cols, geo_cols, parent=None)
Initialize the base matcher widget.
Steps
- Call the superclass constructor with the given parent.
- Store the unique identifier
nr and lists of available statistics and geo columns.
Source code in src/mapper/base/matcher_base.py
32
33
34
35
36
37
38
39
40
41
42
43
44 | def __init__(self, nr: int, stats_cols: List[str], geo_cols: List[str], parent=None) -> None:
"""
Initialize the base matcher widget.
Steps:
1. Call the superclass constructor with the given parent.
2. Store the unique identifier `nr` and lists of available statistics and geo columns.
"""
super().__init__(parent)
# Store identifier and column lists for use by subclasses
self._nr = nr
self._stats_cols = stats_cols
self._geo_cols = geo_cols
|
build_result(stats_df, geo_df, label=None)
staticmethod
Combine exactly one statistics row and one geo row into a single result record.
Steps
- Reset indices on both DataFrames and make copies to avoid side effects.
- Rename statistics columns to have suffix
_stats.
- Rename geo columns to have suffix
_geodata.
- Concatenate the two side by side (axis=1).
- If a label is provided, append a column
matcher with that label.
Source code in src/mapper/base/matcher_base.py
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74 | @staticmethod
def build_result(stats_df: pd.DataFrame, geo_df: pd.DataFrame, label: str | None = None) -> pd.DataFrame:
"""
Combine exactly one statistics row and one geo row into a single result record.
Steps:
1. Reset indices on both DataFrames and make copies to avoid side effects.
2. Rename statistics columns to have suffix `_stats`.
3. Rename geo columns to have suffix `_geodata`.
4. Concatenate the two side by side (axis=1).
5. If a label is provided, append a column `matcher` with that label.
"""
# Prepare statistics DataFrame
st = stats_df.reset_index(drop=True).copy()
st.columns = [f"{c}_stats" for c in st.columns]
# Prepare geo DataFrame
ge = geo_df.reset_index(drop=True).copy()
ge.columns = [f"{c}_geodata" for c in ge.columns]
# Concatenate results horizontally
res = pd.concat([st, ge], axis=1)
# Optionally record which matcher produced this row
if label is not None:
res["matcher"] = label
return res
|
description()
Provide a brief text description of the current matcher configuration.
Steps
- Subclass should return a string summarizing which columns or criteria are used.
Source code in src/mapper/base/matcher_base.py
| def description(self) -> str:
"""
Provide a brief text description of the current matcher configuration.
Steps:
1. Subclass should return a string summarizing which columns or criteria are used.
"""
raise NotImplementedError("description() must be implemented in the subclass")
|
match(stats_df, geo_df)
Execute the actual matching logic between statistics and geo data.
Steps
- Subclass should identify which rows from
stats_df and geo_df match.
- Build a DataFrame of combined rows using
build_result().
- Return the combined DataFrame and lists of used row indices from both tables.
Source code in src/mapper/base/matcher_base.py
79
80
81
82
83
84
85
86
87
88 | def match(self, stats_df: pd.DataFrame, geo_df: pd.DataFrame) -> Tuple[Optional[pd.DataFrame], Optional[List[int]], Optional[List[int]]]:
"""
Execute the actual matching logic between statistics and geo data.
Steps:
1. Subclass should identify which rows from `stats_df` and `geo_df` match.
2. Build a DataFrame of combined rows using `build_result()`.
3. Return the combined DataFrame and lists of used row indices from both tables.
"""
raise NotImplementedError("match() must be implemented in the subclass")
|
update_geo_columns(cols)
Update the list of available geo columns.
Steps
- Replace the internal
_geo_cols list with the new cols.
Source code in src/mapper/base/matcher_base.py
111
112
113
114
115
116
117
118 | def update_geo_columns(self, cols: List[str]) -> None:
"""
Update the list of available geo columns.
Steps:
1. Replace the internal `_geo_cols` list with the new `cols`.
"""
self._geo_cols = cols
|
update_stats_columns(cols)
Update the list of available statistics columns.
Steps
- Replace the internal
_stats_cols list with the new cols.
Source code in src/mapper/base/matcher_base.py
102
103
104
105
106
107
108
109 | def update_stats_columns(self, cols: List[str]) -> None:
"""
Update the list of available statistics columns.
Steps:
1. Replace the internal `_stats_cols` list with the new `cols`.
"""
self._stats_cols = cols
|