Coverage for src/mafw/processor_library/abstract

2# Author: Bulgheroni Antonio (antonio.bulgheroni@ec.europa.eu)

3# SPDX-License-Identifier: EUPL-1.2

4"""

5Module implements the abstract base interface to a processor to generate plots.

7This abstract interface is needed because MAFw does not force the user to select a specific plot and data manipulation

8library.

10The basic idea is to have a :class:`basic processor class <.GenericPlotter>` featuring a modified

11:meth:`~.GenericPlotter.process` method where a skeleton of the standard operations required to generate a graphical

12representation of a dataset is provided.

14The user has the possibility to compose the :class:`~.GenericPlotter` by mixing it with one :class:`~.DataRetriever`

15and a :class:`~.FigurePlotter`.

17For a specific implementation based on :link:`seaborn`, please refer to :mod:`.sns_plotter`.

18"""

20import logging

21import typing

22from abc import ABC, abstractmethod

23from pathlib import Path

24from typing import Any, Protocol

26import peewee

28from mafw.db.std_tables import PlotterOutput, TriggerDisabler

29from mafw.enumerators import LoopingStatus

30from mafw.processor import ActiveParameter, Processor, ProcessorMeta

31from mafw.tools.file_tools import file_checksum

33log = logging.getLogger(__name__)

36class PlotterMeta(type(Protocol), ProcessorMeta): # type: ignore[misc]

37 """Metaclass for the plotter mixed classes"""

39 pass

42class DataRetriever(ABC):

43 """Base mixin class to retrieve a data frame from an external source"""

45 def __init__(self, *args: Any, **kwargs: Any) -> None:

46 # leave it here, otherwise the Protocol init will not call the main class init.

47 # not sure why this is happening, but it costs nothing to have it here.

49 """The dataframe instance. It will be filled for the main class"""

50 super().__init__(*args, **kwargs)

52 @abstractmethod

53 def get_data_frame(self) -> None:

54 """The mixin implementation of the shared method with the base class"""

55 pass # pragma: no cover

57 @abstractmethod

58 def patch_data_frame(self) -> None:

59 """The mixin implementation of the shared method with the base class"""

60 pass # pragma: no cover

62 @abstractmethod

63 def _attributes_valid(self) -> bool:

64 pass # pragma: no cover

67class FigurePlotter(ABC):

68 @abstractmethod

69 def plot(self) -> None:

70 pass # pragma: no cover

72 @abstractmethod

73 def _attributes_valid(self) -> bool:

74 pass # pragma: no cover

77class GenericPlotter(Processor, metaclass=PlotterMeta):

78 """

79 The Generic Plotter processor.

81 This is a subclass of a Processor with advanced functionality to fetch data in the form of a dataframe and to

82 produce plots. When mentioning dataframe in the context of the generic plotter, we do not have in mind any

83 specific dataframe implementation.

85 The GenericPlotter is actually a kind of abstract class: since MAFw is not forcing you to use any specific

86 plotting and data manipulation library, you need to subclass the GenericPlotter in your code, be sure that the

87 required dependencies are available for import and use it as a normal processor.

89 If you are ok with using :link:`seaborn` (with :link:`matplotlib` as a graphical backend and :link:`pandas` for

90 data storage and manipulation), then be sure to install mafw with the optional feature `seaborn` (``pip install

91 mafw[seaborn]``) and have a look at the :mod:`~.sns_plotter` for an already prepared implementation of a Plotter.

93 The key difference with respect to a normal processor is its :meth:`.process` method that has been already

94 implemented as follows:

96 .. literalinclude:: ../../../src/mafw/processor_library/abstract_plotter.py

97 :pyobject: GenericPlotter.process

98 :dedent:

100 This actually means that when you are subclassing a GenericPlotter you do not have to implement the process method

101 as you would do for a normal Processor, but you will have to implement the following methods:

102

103 * :meth:`~.in_loop_customization`.

104

105 The processor execution workflow (LoopType) can be any of the available, so

106 actually the process method might be invoked only once, or multiple times inside a loop structure

107 (for or while).

108 If the execution is cyclic, then you may want to have the possibility to do some customisation for each

109 iteration, for example, changing the plot title, or modifying the data selection, or the filename where the

110 plots will be saved.

111

112 You can use this method also in case of a single loop processor, in this case you will not have access to

113 the loop parameters.

114

115 * :meth:`~.get_data_frame`.

116

117 This method has the task to get the data to be plotted. Since it is an almost abstract class, you need to

118

119 * :meth:`~.patch_data_frame`.

120

121 A convenient method to apply data frame manipulation to the data just retrieved. A typical use case is for

122 conversion of unit of measurement. Imagine you saved the data in the S.I. units, but for the visualization

123 you prefer to use practical units, so you can subclass this method to add a new column containing the same

124 converted values of the original one.

125

126 * :meth:`~.slice_data_frame`.

127

128 Slicing a dataframe is similar as applying a where clause in a SQL query. Implement this method to select

129 which row should be used in the generation of your plot.

130

131 * :meth:`~.group_and_aggregate_data_frame`.

132

133 In this method, you can manipulate your data frame to perform row grouping and aggregation.

134

135 * :meth:`~.is_data_frame_empty`.

136

137 A simple method to test if the dataframe contains any data to be plotted. In fact, after the slicing, grouping

138 and aggregation operations, it is possible that the dataframe is now left without any row. In this case,

139 it makes no sense to waste time in plotting an empty graph.

140

141 * :meth:`~.plot`.

142

143 This method is where the actual plotting occurs.

144

145 * :meth:`~.customize_plot`.

146

147 This method can be optionally used to customize the appearance of the facet grid produced by the

148 :meth:`~plot` method. It is particularly useful when the user is mixing this class with one of the

149 :class:`~.FigurePlotter` mixin, thus not having direct access to the plot method.

150

151 * :meth:`~.save`.

152

153 This method is where the produced plot is saved in a file. Remember to append the output file name to the

154 :attr:`list of produced outputs <.output_filename_list>` so that the :meth:`~._update_plotter_db` method

155 will automatically store this file in the database during the :meth:`~.finish` execution.

156

157 * :meth:`~.update_db`.

158

159 If the user wants to update a specific table in the database, they can use this method.

160

161 It is worth reminding that all plotters are saving all generated files in the standard table PlotterOutput.

162 This is automatically done by the :meth:`~._update_plotter_db` method that is called in the

163 :meth:`~.finish` method.

164

165 """

166

167 output_folder = ActiveParameter(

168 'output_folder', default=Path.cwd(), help_doc='The path where the output file will be saved'

169 )

170

171 force_replot = ActiveParameter(

172 'force_replot', default=False, help_doc='Whether to force re-plotting even if the output file already exists'

173 )

174 """Flag to force the regeneration of the output file even if it is already existing."""

175

176 @typing.no_type_check

177 def is_output_existing(self) -> bool:

178 """

179 Check for plotter output existence.

180

181 Generally, plotter subclasses do not have a real output that can be saved to a database. This class is meant to

182 generate one or more graphical output files.

183

184 One of the biggest advantages of having the output of a processor stored in the database is the ability to

185 conditionally execute the processor if, and only if, the output is missing or changed.

186

187 In order to allow also plotter processor to benefit from this feature, a :class:`dedicated table

188 <.PlotterOutput>` is available among the :ref:`standard tables <std_tables>`.

189

190 If a connection to the database is provided, then this method is invoked at the beginning of the

191 :meth:`~.process` and a select query over the :class:`~.PlotterOutput` model is executed filtering by

192 processor name. All files in the filename lists are checked for existence and also the checksum is verified.

193

194 Especially during debugging phase of the processor, it is often needed to generate the plot several times, for

195 this reason the user can switch the :attr:`.force_replot` parameter to True in the steering file and the output

196 file will be generated even if it is already existing.

197

198 This method will return True, if the output of the processor is already existing and valid, False, otherwise.

199

200 .. versionchanged:: v2.0.0

201 Using :attr:`.Processor.replica_name` instead of :attr:`.Processor.name` for storage in the :class:`.PlotterOutput`

202

203 :return: True if the processor output exists and it is valid.

204 :rtype: bool

205 """

206 if self.force_replot: 206 ↛ 207line 206 didn't jump to line 207 because the condition on line 206 was never true

207 return False

208

209 if self._database is None:

210 # no active database connection. it makes no sense to continue. inform the user and return

211 log.warning('No database connection available. Impossible to check for existing output')

212 return False

213

214 try:

215 query = PlotterOutput.get(PlotterOutput.plotter_name == self.replica_name)

216 # check if all files exist:

217 if not all([f.exists() for f in query.filename_list]):

218 # at least one file is missing.

219 # delete the whole row and continue

220 with TriggerDisabler(trigger_type_id=4):

221 PlotterOutput.delete().where(PlotterOutput.plotter_name == self.name).execute()

222

223 return False

224 else:

225 # all files exist.

226 # check that they are still actual

227 if query.checksum != file_checksum(query.filename_list):

228 # at least one file is changed.

229 # delete the whole row and continue

230 with TriggerDisabler(trigger_type_id=4):

231 PlotterOutput.delete().where(PlotterOutput.plotter_name == self.name).execute()

232 return False

233 else:

234 # all files exit and the checksum is the same.

235 # we stop it here

236 return True

237

238 except peewee.DoesNotExist:

239 # no output for this plotter processor found in the DB.

240 return False

241

242 def process(self) -> None:

243 """

244 Process method overload.

245

246 In the case of a plotter subclass, the process method is already implemented and the user should not overload

247 it. On the contrary, the user must overload the other implementation methods described in the general

248 :class:`class description <.SNSPlotter>`.

249 """

250 if self.filter_register.new_only:

251 if self.is_output_existing():

252 return

253

254 self.in_loop_customization()

255 self.get_data_frame()

256 self.patch_data_frame()

257 self.slice_data_frame()

258 self.group_and_aggregate_data_frame()

259 if not self.is_data_frame_empty():

260 self.plot()

261 self.customize_plot()

262 self.save()

263 self.update_db()

264

265 def is_data_frame_empty(self) -> bool:

266 """Check if the data frame is empty"""

267 return False

268

269 def in_loop_customization(self) -> None:

270 """

271 Customize the parameters for the output or input data for each execution iteration.

272 """

273 pass

274

275 def get_data_frame(self) -> None:

276 """

277 Get the data frame with the data to be plotted.

278

279 This method can be either implemented in the SNSPlotter subclass or via a :class:`.DataRetriever` mixin

280 class.

281 """

282 # it must be overloaded.

283 pass

284

285 def format_progress_message(self) -> None:

286 self.progress_message = f'{self.name} is working'

287

288 def plot(self) -> None:

289 """

290 The plot method.

291

292 This is where the user has to implement the real plot generation

293 """

294 pass

295

296 def customize_plot(self) -> None:

297 """

298 The customize plot method.

299

300 The user can overload this method to customize the output produced by the :meth:`~.plot` method, like, for

301 example, adding meaningful axis titles, changing format, and so on.

302

303 As usual, it is possible to use the :attr:`~.Processor.item`, :attr:`~.Processor.i_item` and

304 :attr:`~.Processor.n_item` to

305 access the loop

306 parameters.

307 """

308 pass

309

310 def save(self) -> None:

311 """

312 The save method.

313

314 This is where the user has to implement the saving of the plot on disc.

315 """

316 pass

317

318 def update_db(self) -> None:

319 """

320 The update database method.

321

322 This is where the user has to implement the optional update of the database.

323

324 .. seealso:

325

326 The plotter output table is automatically update by :meth:`~._update_plotter_db`.

327 """

328 pass

329

330 def slice_data_frame(self) -> None:

331 pass

332

333 def group_and_aggregate_data_frame(self) -> None:

334 pass

335

336 def finish(self) -> None:

337 if self.looping_status == LoopingStatus.Continue:

338 self._update_plotter_db() # type: ignore[no-untyped-call]

339 super().finish()

340

341 def patch_data_frame(self) -> None:

342 """

343 Modify the data frame

344

345 This method can be used to perform operation on the data frame, like adding new columns.

346 It can be either implemented in the plotter processor subclasses or via a mixin class.

347 """

348 pass

349

350 @typing.no_type_check

351 def _update_plotter_db(self) -> None:

352 """

353 Updates the Plotter DB.

354

355 A plotter subclass primarily generates plots as output in most cases, which means that no additional information

356 needs to be stored in the database. This is sufficient to prevent unnecessary execution of the processor

357 when it is not required.

358

359 This method is actually protected against execution without a valid database instance.

360

361 .. versionchanged:: v2.0.0

362 Using the :attr:`.Processor.replica_name` instead of the :attr:`.Processor.name` as plotter_name in the

363 :class:`.PlotterOutput` Model.

364

365 """

366 if self._database is None:

367 # there is no active database connection. No need to continue. Inform the user and continue

368 log.warning('No database connection available. Impossible to update the plotter output')

369 return

370

371 if len(self.output_filename_list) == 0:

372 # there is no need to make an entry because there are no saved file

373 return

374

375 PlotterOutput.std_upsert(

376 {

377 'plotter_name': self.replica_name,

378 'filename_list': self.output_filename_list,

379 'checksum': self.output_filename_list,

380 }

381 ).execute()

Coverage for src / mafw / processor_library / abstract_plotter.py: 98%

99 statements