Coverage for src / mafw / processor_library / abstract_plotter.py: 98%

101 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-30 16:10 +0000

1# Copyright 2025–2026 European Union 

2# Author: Bulgheroni Antonio (antonio.bulgheroni@ec.europa.eu) 

3# SPDX-License-Identifier: EUPL-1.2 

4""" 

5Module implements the abstract base interface to a processor to generate plots. 

6 

7This abstract interface is needed because MAFw does not force the user to select a specific plot and data manipulation 

8library. 

9 

10The basic idea is to have a :class:`basic processor class <.GenericPlotter>` featuring a modified 

11:meth:`~.GenericPlotter.process` method where a skeleton of the standard operations required to generate a graphical 

12representation of a dataset is provided. 

13 

14The user has the possibility to compose the :class:`~.GenericPlotter` by mixing it with one :class:`~.DataRetriever` 

15and a :class:`~.FigurePlotter`. 

16 

17For a specific implementation based on :link:`seaborn`, please refer to :mod:`.sns_plotter`. 

18""" 

19 

20import logging 

21import typing 

22from abc import ABC, abstractmethod 

23from pathlib import Path 

24from typing import Any, Protocol 

25 

26import peewee 

27 

28from mafw.db.std_tables import PlotterOutput, TriggerDisabler 

29from mafw.enumerators import LoopingStatus 

30from mafw.processor import ActiveParameter, Processor, ProcessorMeta 

31from mafw.tools.file_tools import file_checksum 

32 

33log = logging.getLogger(__name__) 

34 

35 

36class PlotterMeta(type(Protocol), ProcessorMeta): # type: ignore[misc] 

37 """Metaclass for the plotter mixed classes""" 

38 

39 pass 

40 

41 

42class DataRetriever(ABC): 

43 """Base mixin class to retrieve a data frame from an external source""" 

44 

45 def __init__(self, *args: Any, **kwargs: Any) -> None: 

46 # leave it here, otherwise the Protocol init will not call the main class init. 

47 # not sure why this is happening, but it costs nothing to have it here. 

48 

49 """The dataframe instance. It will be filled for the main class""" 

50 super().__init__(*args, **kwargs) 

51 

52 @abstractmethod 

53 def get_data_frame(self) -> None: 

54 """The mixin implementation of the shared method with the base class""" 

55 pass # pragma: no cover 

56 

57 @abstractmethod 

58 def patch_data_frame(self) -> None: 

59 """The mixin implementation of the shared method with the base class""" 

60 if hasattr(self, '_mark_super_call'): 

61 self._mark_super_call('patch_data_frame') 

62 

63 @abstractmethod 

64 def _attributes_valid(self) -> bool: 

65 pass # pragma: no cover 

66 

67 

68class FigurePlotter(ABC): 

69 @abstractmethod 

70 def plot(self) -> None: 

71 pass # pragma: no cover 

72 

73 @abstractmethod 

74 def _attributes_valid(self) -> bool: 

75 pass # pragma: no cover 

76 

77 

78class GenericPlotter(Processor, metaclass=PlotterMeta): 

79 """ 

80 The Generic Plotter processor. 

81 

82 This is a subclass of a Processor with advanced functionality to fetch data in the form of a dataframe and to 

83 produce plots. When mentioning dataframe in the context of the generic plotter, we do not have in mind any 

84 specific dataframe implementation. 

85 

86 The GenericPlotter is actually a kind of abstract class: since MAFw is not forcing you to use any specific 

87 plotting and data manipulation library, you need to subclass the GenericPlotter in your code, be sure that the 

88 required dependencies are available for import and use it as a normal processor. 

89 

90 If you are ok with using :link:`seaborn` (with :link:`matplotlib` as a graphical backend and :link:`pandas` for 

91 data storage and manipulation), then be sure to install mafw with the optional feature `seaborn` (``pip install 

92 mafw[seaborn]``) and have a look at the :mod:`~.sns_plotter` for an already prepared implementation of a Plotter. 

93 

94 The key difference with respect to a normal processor is its :meth:`.process` method that has been already 

95 implemented as follows: 

96 

97 .. literalinclude:: ../../../src/mafw/processor_library/abstract_plotter.py 

98 :pyobject: GenericPlotter.process 

99 :dedent: 

100 

101 This actually means that when you are subclassing a GenericPlotter you do not have to implement the process method 

102 as you would do for a normal Processor, but you will have to implement the following methods: 

103 

104 * :meth:`~.in_loop_customization`. 

105 

106 The processor execution workflow (LoopType) can be any of the available, so 

107 actually the process method might be invoked only once, or multiple times inside a loop structure 

108 (for or while). 

109 If the execution is cyclic, then you may want to have the possibility to do some customisation for each 

110 iteration, for example, changing the plot title, or modifying the data selection, or the filename where the 

111 plots will be saved. 

112 

113 You can use this method also in case of a single loop processor, in this case you will not have access to 

114 the loop parameters. 

115 

116 * :meth:`~.get_data_frame`. 

117 

118 This method has the task to get the data to be plotted. Since it is an almost abstract class, you need to 

119 

120 * :meth:`~.patch_data_frame`. 

121 

122 A convenient method to apply data frame manipulation to the data just retrieved. A typical use case is for 

123 conversion of unit of measurement. Imagine you saved the data in the S.I. units, but for the visualization 

124 you prefer to use practical units, so you can subclass this method to add a new column containing the same 

125 converted values of the original one. 

126 

127 * :meth:`~.slice_data_frame`. 

128 

129 Slicing a dataframe is similar as applying a where clause in a SQL query. Implement this method to select 

130 which row should be used in the generation of your plot. 

131 

132 * :meth:`~.group_and_aggregate_data_frame`. 

133 

134 In this method, you can manipulate your data frame to perform row grouping and aggregation. 

135 

136 * :meth:`~.is_data_frame_empty`. 

137 

138 A simple method to test if the dataframe contains any data to be plotted. In fact, after the slicing, grouping 

139 and aggregation operations, it is possible that the dataframe is now left without any row. In this case, 

140 it makes no sense to waste time in plotting an empty graph. 

141 

142 * :meth:`~.plot`. 

143 

144 This method is where the actual plotting occurs. 

145 

146 * :meth:`~.customize_plot`. 

147 

148 This method can be optionally used to customize the appearance of the facet grid produced by the 

149 :meth:`~plot` method. It is particularly useful when the user is mixing this class with one of the 

150 :class:`~.FigurePlotter` mixin, thus not having direct access to the plot method. 

151 

152 * :meth:`~.save`. 

153 

154 This method is where the produced plot is saved in a file. Remember to append the output file name to the 

155 :attr:`list of produced outputs <.output_filename_list>` so that the :meth:`~._update_plotter_db` method 

156 will automatically store this file in the database during the :meth:`~.finish` execution. 

157 

158 * :meth:`~.update_db`. 

159 

160 If the user wants to update a specific table in the database, they can use this method. 

161 

162 It is worth reminding that all plotters are saving all generated files in the standard table PlotterOutput. 

163 This is automatically done by the :meth:`~._update_plotter_db` method that is called in the 

164 :meth:`~.finish` method. 

165 

166 """ 

167 

168 output_folder = ActiveParameter( 

169 'output_folder', default=Path.cwd(), help_doc='The path where the output file will be saved' 

170 ) 

171 

172 force_replot = ActiveParameter( 

173 'force_replot', default=False, help_doc='Whether to force re-plotting even if the output file already exists' 

174 ) 

175 """Flag to force the regeneration of the output file even if it is already existing.""" 

176 

177 @typing.no_type_check 

178 def is_output_existing(self) -> bool: 

179 """ 

180 Check for plotter output existence. 

181 

182 Generally, plotter subclasses do not have a real output that can be saved to a database. This class is meant to 

183 generate one or more graphical output files. 

184 

185 One of the biggest advantages of having the output of a processor stored in the database is the ability to 

186 conditionally execute the processor if, and only if, the output is missing or changed. 

187 

188 In order to allow also plotter processor to benefit from this feature, a :class:`dedicated table 

189 <.PlotterOutput>` is available among the :ref:`standard tables <std_tables>`. 

190 

191 If a connection to the database is provided, then this method is invoked at the beginning of the 

192 :meth:`~.process` and a select query over the :class:`~.PlotterOutput` model is executed filtering by 

193 processor name. All files in the filename lists are checked for existence and also the checksum is verified. 

194 

195 Especially during debugging phase of the processor, it is often needed to generate the plot several times, for 

196 this reason the user can switch the :attr:`.force_replot` parameter to True in the steering file and the output 

197 file will be generated even if it is already existing. 

198 

199 This method will return True, if the output of the processor is already existing and valid, False, otherwise. 

200 

201 .. versionchanged:: v2.0.0 

202 Using :attr:`.Processor.replica_name` instead of :attr:`.Processor.name` for storage in the :class:`.PlotterOutput` 

203 

204 :return: True if the processor output exists and it is valid. 

205 :rtype: bool 

206 """ 

207 if self.force_replot: 207 ↛ 208line 207 didn't jump to line 208 because the condition on line 207 was never true

208 return False 

209 

210 if self._database is None: 

211 # no active database connection. it makes no sense to continue. inform the user and return 

212 log.warning('No database connection available. Impossible to check for existing output') 

213 return False 

214 

215 try: 

216 query = PlotterOutput.get(PlotterOutput.plotter_name == self.replica_name) 

217 # check if all files exist: 

218 if not all([f.exists() for f in query.filename_list]): 

219 # at least one file is missing. 

220 # delete the whole row and continue 

221 with TriggerDisabler(trigger_type_id=4): 

222 PlotterOutput.delete().where(PlotterOutput.plotter_name == self.replica_name).execute() 

223 

224 return False 

225 else: 

226 # all files exist. 

227 # check that they are still actual 

228 if query.checksum != file_checksum(query.filename_list): 

229 # at least one file is changed. 

230 # delete the whole row and continue 

231 with TriggerDisabler(trigger_type_id=4): 

232 PlotterOutput.delete().where(PlotterOutput.plotter_name == self.replica_name).execute() 

233 return False 

234 else: 

235 # all files exit and the checksum is the same. 

236 # we stop it here 

237 return True 

238 

239 except peewee.DoesNotExist: 

240 # no output for this plotter processor found in the DB. 

241 return False 

242 

243 def process(self) -> None: 

244 """ 

245 Process method overload. 

246 

247 In the case of a plotter subclass, the process method is already implemented and the user should not overload 

248 it. On the contrary, the user must overload the other implementation methods described in the general 

249 :class:`class description <.SNSPlotter>`. 

250 """ 

251 if self.filter_register.new_only: 

252 if self.is_output_existing(): 

253 return 

254 

255 self.in_loop_customization() 

256 self.get_data_frame() 

257 self.patch_data_frame() 

258 self.slice_data_frame() 

259 self.group_and_aggregate_data_frame() 

260 if not self.is_data_frame_empty(): 

261 self.plot() 

262 self.customize_plot() 

263 self.save() 

264 self.update_db() 

265 

266 def is_data_frame_empty(self) -> bool: 

267 """Check if the data frame is empty""" 

268 return False 

269 

270 def in_loop_customization(self) -> None: 

271 """ 

272 Customize the parameters for the output or input data for each execution iteration. 

273 """ 

274 pass 

275 

276 def get_data_frame(self) -> None: 

277 """ 

278 Get the data frame with the data to be plotted. 

279 

280 This method can be either implemented in the SNSPlotter subclass or via a :class:`.DataRetriever` mixin 

281 class. 

282 """ 

283 # it must be overloaded. 

284 pass 

285 

286 def format_progress_message(self) -> None: 

287 self.progress_message = f'{self.name} is working' 

288 

289 def plot(self) -> None: 

290 """ 

291 The plot method. 

292 

293 This is where the user has to implement the real plot generation 

294 """ 

295 pass 

296 

297 def customize_plot(self) -> None: 

298 """ 

299 The customize plot method. 

300 

301 The user can overload this method to customize the output produced by the :meth:`~.plot` method, like, for 

302 example, adding meaningful axis titles, changing format, and so on. 

303 

304 As usual, it is possible to use the :attr:`~.Processor.item`, :attr:`~.Processor.i_item` and 

305 :attr:`~.Processor.n_item` to 

306 access the loop 

307 parameters. 

308 """ 

309 pass 

310 

311 def save(self) -> None: 

312 """ 

313 The save method. 

314 

315 This is where the user has to implement the saving of the plot on disc. 

316 """ 

317 pass 

318 

319 def update_db(self) -> None: 

320 """ 

321 The update database method. 

322 

323 This is where the user has to implement the optional update of the database. 

324 

325 .. seealso: 

326 

327 The plotter output table is automatically update by :meth:`~._update_plotter_db`. 

328 """ 

329 pass 

330 

331 def slice_data_frame(self) -> None: 

332 pass 

333 

334 def group_and_aggregate_data_frame(self) -> None: 

335 pass 

336 

337 def finish(self) -> None: 

338 if self.looping_status == LoopingStatus.Continue: 

339 self._update_plotter_db() 

340 super().finish() 

341 

342 def patch_data_frame(self) -> None: 

343 """ 

344 Modify the data frame 

345 

346 This method can be used to perform operation on the data frame, like adding new columns. 

347 It can be either implemented in the plotter processor subclasses or via a mixin class. 

348 """ 

349 self._mark_super_call('patch_data_frame') 

350 

351 @typing.no_type_check 

352 def _update_plotter_db(self) -> None: 

353 """ 

354 Updates the Plotter DB. 

355 

356 A plotter subclass primarily generates plots as output in most cases, which means that no additional information 

357 needs to be stored in the database. This is sufficient to prevent unnecessary execution of the processor 

358 when it is not required. 

359 

360 This method is actually protected against execution without a valid database instance. 

361 

362 .. versionchanged:: v2.0.0 

363 Using the :attr:`.Processor.replica_name` instead of the :attr:`.Processor.name` as plotter_name in the 

364 :class:`.PlotterOutput` Model. 

365 

366 """ 

367 if self._database is None: 

368 # there is no active database connection. No need to continue. Inform the user and continue 

369 log.warning('No database connection available. Impossible to update the plotter output') 

370 return 

371 

372 if len(self.output_filename_list) == 0: 

373 # there is no need to make an entry because there are no saved file 

374 return 

375 

376 PlotterOutput.std_upsert( 

377 { 

378 'plotter_name': self.replica_name, 

379 'filename_list': self.output_filename_list, 

380 'checksum': self.output_filename_list, 

381 } 

382 ).execute()