Source code for health_tracking

# -*- coding: utf-8 -*-
from pkg_resources import get_distribution, DistributionNotFound

try:
    # Change here if project is renamed and does not equal the package name
    dist_name = 'health-tracking'
    __version__ = get_distribution(dist_name).version
except DistributionNotFound:
    __version__ = 'unknown'
finally:
    del get_distribution, DistributionNotFound


##########################################


import re
import os
import io
import shutil
import zipfile

import pandas as pd
import xml.etree.ElementTree as ET

from . import constants


[docs]class Singleton(type):
    """
    Is used as `metaclass` to achieve a singleton pattern.
    """

    _instances = {}

    def __call__(cls, *args, **kwargs):
        if cls not in cls._instances:
            cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
        return cls._instances[cls]


[docs]class AppleHealthParser(metaclass=Singleton):
    """
    Parse and gives access to Apple Health App dump data.

    Args:
        zip_dump_path (str, optional): Path to the zipped data dump. Defaults to constants.ZIP_PATH.
        unzip_path (str, optional): Path to the unzipped data dump. Defaults to constants.UNZIP_PATH.
        force_unzip (bool, optional): Flag to force unzipping the data again. Can be useful for new data. Defaults to False.
    """

    def __init__(
        self,
        zip_dump_path: str = constants.ZIP_PATH,
        unzip_path: str = constants.UNZIP_PATH,
        force_unzip: bool = False
    ) -> None:

        # give information about may changing Version
        print("AppleHealthParser is tested for HealthKit Export Version: 11")

        # handle some cases
        if force_unzip:
            shutil.rmtree(unzip_path)

        if not os.path.exists(unzip_path):
            with open(zip_dump_path, "rb") as file:
                zip_file_bytes = io.BytesIO(file.read())
                zipped_export = zipfile.ZipFile(zip_file_bytes)
                zipped_export.extractall(os.path.split(unzip_path)[0])  # need path to dir not file

        self._tree = ET.parse(constants.XML_PATH)
        self._health_data = self._tree.getroot()

        # element types
        self._export_date = None
        self._me = None
        self._workouts = None
        self._workout_types = None
        self._activity_summaries = None
        self._records = None
        self._correlations = None
        self._clinical_records = None

    def _fix_data_types(self, data_frame: pd.DataFrame) -> pd.DataFrame:
        """
        Fix the data types of a extracted ``DataFrame``.

        Args:
            data_frame (pd.DataFrame): Extracted ``DataFrame``

        Returns:
            pd.DataFrame: ``DataFrame`` with fixed data types
        """

        result = data_frame.apply(pd.to_numeric, errors='ignore')

        for column in result.columns:
            if "date" in column.lower():
                try:
                    result[column] = pd.to_datetime(result[column])
                except:

                    # just catch to keep code running
                    pass

        return result

    def _extract_elements_of_type(self, element_type: str) -> pd.DataFrame:
        """
        Returns a ``DataFrame`` with the elements of ``element_type``. Do not use by your own!

        Args:
            element_type (str): Need to fit one of ``constants.ELEMENT_TAGS``

        Raises:
            ValueError: If wrong ``element_type`` is given

        Returns:
            pd.DataFrame: of given ``element_type`` or ``None`` if empty
        """

        if element_type not in constants.ELEMENT_TAGS:
            raise ValueError(f"'element_type' need to be one of: {constants.ELEMENT_TAGS}")

        elements = self._tree.findall(element_type)
        result = pd.DataFrame([element.attrib for element in elements])

        result = self._fix_data_types(result)

        return None if result.empty else result

[docs]    def extract_workouts(self) -> (pd.DataFrame, set):
        """
        Returns ``Workout`` elements and ``set`` of all workout existing types. Shortens the workout types.

        Returns:
            (pd.DataFrame, set): of type ``Workout`` or ``None`` if empty and set of available workout types
        """

        # increase performace by do not parse again.
        if self._workouts is None and self._workout_types is None:

            self._workouts = self._extract_elements_of_type(constants.WORKOUT_TAG)
            self._workouts[constants.WORKOUT_TYPE] = self._workouts.apply(
                lambda row: re.match(constants.WORKOUT_REGEX, row[constants.WORKOUT_TYPE]).group(1).lower(),
                axis=1
            )
            self._workout_types = set(self._workouts[constants.WORKOUT_TYPE])

        return self._workouts, self._workout_types

[docs]    def extract_me(self) -> pd.DataFrame:
        """
        Returns ``Me`` elements.

        Returns:
            pd.DataFrame: of type ``Me`` or ``None`` if empty
        """

        # increase performace by do not parse again.
        if self._me is None:
            self._me = self._extract_elements_of_type(constants.ME_TAG)

        return self._me

[docs]    def extract_records(self) -> pd.DataFrame:
        """
        Returns ``Record`` elements.

        Returns:
            pd.DataFrame: of type ``Record`` or ``None`` if empty
        """

        # increase performace by do not parse again.
        if self._records is None:
            self._records = self._extract_elements_of_type(constants.RECORD_TAG)

        return self._records

[docs]    def extract_correlations(self) -> pd.DataFrame:
        """
        Returns ``Correlation`` elements.

        Returns:
            pd.DataFrame: of type ``Correlation`` or ``None`` if empty
        """

        # increase performace by do not parse again.
        if self._correlations is None:
            self._correlations = self._extract_elements_of_type(constants.CORRELATION_TAG)

        return self._correlations

[docs]    def extract_activity_summaries(self) -> pd.DataFrame:
        """
        Returns ``ActivitySummary`` elements.

        Returns:
            pd.DataFrame: of type ``ActivitySummary`` or ``None`` if empty
        """

        # increase performace by do not parse again.
        if self._activity_summaries is None:
            self._activity_summaries = self._extract_elements_of_type(constants.ACTIVITY_SUMMARY_TAG)

        return self._activity_summaries

[docs]    def extract_clinical_records(self) -> pd.DataFrame:
        """
        Returns ``ClinicalRecord`` elements.

        Returns:
            pd.DataFrame: of type ``ClinicalRecord`` or ``None`` if empty
        """

        # increase performace by do not parse again.
        if self._clinical_records is None:
            self._clinical_records = self._extract_elements_of_type(constants.CLINICAL_RECORD_TAG)

        return self._clinical_records

[docs]    def get_export_date(self) -> pd.Timestamp:
        """
        Returns the ``pd.Timestamp`` of exporting.

        Returns:
            pd.Timestamp: Export timestamp
        """

        # increase performace by do not parse again.
        if self._export_date is None:
            data_frame = self._extract_elements_of_type(constants.EXPORT_DATE_TAG)
            self._export_date = pd.to_datetime(data_frame["value"])[0]

        return self._export_date