Source code for health_tracking.workouts

import pandas as pd
import seaborn as sns

from . import AppleHealthParser, constants


[docs]class Workouts(object): """ Parse and gives access to ``Workout`` data of a Apple Health App dump data. Provides plotting functionalities. Args: zip_dump_path (str, optional): Path to the zipped data dump. Defaults to constants.ZIP_PATH. unzip_path (str, optional): Path to the unzipped data dump. Defaults to constants.UNZIP_PATH. force_unzip (bool, optional): Flag to force unzipping the data again. Can be useful for new data. Defaults to False. """ def __init__( self, zip_dump_path: str = constants.ZIP_PATH, unzip_path: str = constants.UNZIP_PATH, force_unzip: bool = False ) -> None: def create_offset_column(row: pd.DataFrame) -> None: """ Helper for creating offset column since the first training Args: row (pd.DataFrame): Workouts ``pd.DataFrame`` """ first_workout = row[constants.WORKOUT_COLUMN_DATE][0] row[constants.WORKOUT_COLUMN_OFFSET] = row.apply(lambda row: (row[constants.WORKOUT_COLUMN_DATE] - first_workout).days, axis=1) self._parser = AppleHealthParser(zip_dump_path, unzip_path, force_unzip) self.workouts, self.workout_types = self._parser.extract_workouts() self._valid_data_frames = [*[f"{workout_type}s" for workout_type in self.workout_types], "workouts"] # new column with the offset in days since the first workout create_offset_column(self.workouts) # create data frames for each workout type for workout_type in self.workout_types: type_df = self.workouts[self.workouts[constants.WORKOUT_TYPE] == workout_type].copy().reset_index(drop=True) type_df[constants.WORKOUT_COLUMN_MINUTES_PER_KM] = type_df.apply(calc_minutes_per_km, axis=1) # new column with the offset in days since the first workout of ``workout_type`` create_offset_column(type_df) self.__setattr__(f"{workout_type.lower()}s", type_df) def __getitem__(self, workout_type: str) -> pd.DataFrame: """ Get the ``DataFrame``s with the help of subscriptions. Args: workout_type (str): One of the existing ``workout_types`` or ``workouts`` for the whole data Raises: ValueError: If a incorrect ``workout_type`` is give Returns: pd.DataFrame: The ``DataFrame`` of type ``workout_type`` """ if workout_type not in self._valid_data_frames: raise ValueError(f"'workout_type' need to be one of: {self._valid_data_frames}\n\tGiven: {workout_type}") return self.__getattribute__(workout_type)
[docs] def plot( self, x: str, y: str, plot_type: str, workout_type: str = "runnings", outlier: (int, int) = None, z: str = None, kind: str = "reg", xlim: (int, int) = 0.01, show_new_years: bool = True, legend: str = "brief" ): # check x, y, z parameter if x not in constants.WORKOUT_PLOTTING_CLOUMN or \ y not in constants.WORKOUT_PLOTTING_CLOUMN or \ (z is not None and z not in constants.WORKOUT_PLOTTING_CLOUMN): raise ValueError( f"Parameter 'x', 'y', and 'z' must be one of: {constants.WORKOUT_PLOTTING_CLOUMN}\n\tGiven: \tx: {x}\n\t\tx: {y}\n\t\tx: {z}" ) # check workout_type parameter if workout_type in self._valid_data_frames: data = self.__getitem__(workout_type) else: raise ValueError(f"Parameter 'workout_type' must be one of: {self._valid_data_frames}\n\tGiven: {workout_type}") # check outlier parameter if outlier is not None: data = data[ (data[y] <= outlier[1]) & (data[y] >= outlier[0]) ] if data.empty: raise ValueError(f"Choose 'outlier' such that the resulting plotting data is not empty!\n\tGiven: {outlier}") # check xlim parameter if xlim is None: x_limits = (data[x].min(), data[x].max()) elif type(xlim) == tuple: x_limits = xlim # ``xlim`` is interpreted as percentage value # subtract/add this percentage of data range to create elif type(xlim) == float and xlim < 1: upper = data[x].max() lower = data[x].min() x_range = upper - lower x_limits = (lower - xlim * x_range, upper + xlim * x_range) else: raise ValueError(f"Value of 'xlim' is invalid!\n\tGiven: {xlim}") # Plot or raise Exception if plot_type in ["joint", "jointplot", "joint_plot", "joint-plot", "joint plot"]: return sns.jointplot(x, y, data=data, kind=kind, xlim=x_limits) elif plot_type in ["scatter", "scatterplot", "scatter_plot", "scatter-plot", "scatter plot"]: scatter_plot = sns.scatterplot(x, y, data=data, hue=z, legend=legend) # FIXME: does not plot vlines if show_new_years: for new_year_offset in get_new_years_offsets(data): scatter_plot.axvline(new_year_offset, data[y].min(), data[y].max()) return scatter_plot else: raise ValueError(f"Parameter 'plot_type' is invalid!\n\tGiven: {plot_type}")
[docs]def calc_minutes_per_km(row: pd.DataFrame) -> pd.Series: """ Helper function that calculates the pace as minutes per kilometer. Apply via: ``data_frame.applyc(alc_minutes_per_km, axis=1)``. Args: row (pd.DataFrame): Row of workouts ``pd.DataFrame`` as ``pd.Series`` Returns: pd.Series: New column for workflow ``DataFrame`` """ result = 0 # Calculation or downstream computations will fail if one of the operands is 0 # would result in division by 0 or inf as result error_state = row[constants.WORKOUT_COLUMN_DURATION] == 0 or row[constants.WORKOUT_COLUMN_DISTANCE] == 0 if not error_state: result = row[constants.WORKOUT_COLUMN_DURATION] / row[constants.WORKOUT_COLUMN_DISTANCE] return result
[docs]def get_new_years_offsets(workout_data_frame: pd.DataFrame) -> list: """ Helper function that computes the offsets for new years since the first workout, in days. Args: workout_data_frame (pd.DataFrame): Workouts ``pd.DataFrame`` Returns: list: elements are the offsets for new years in days """ first_workout = workout_data_frame[constants.WORKOUT_COLUMN_DATE][0] last_workout = workout_data_frame[constants.WORKOUT_COLUMN_DATE][workout_data_frame.shape[0] - 1] new_years = last_workout.year - first_workout.year new_year_offsets = [] for i in range(new_years): new_year_offset = pd.Timestamp(f"1.1.{first_workout.year + i + 1}", tz=first_workout.tz) - first_workout new_year_offsets.append(new_year_offset.days) return new_year_offsets