Source code for health_tracking

# -*- coding: utf-8 -*-
from pkg_resources import get_distribution, DistributionNotFound

try:
    # Change here if project is renamed and does not equal the package name
    dist_name = 'health-tracking'
    __version__ = get_distribution(dist_name).version
except DistributionNotFound:
    __version__ = 'unknown'
finally:
    del get_distribution, DistributionNotFound


##########################################


import re
import os
import io
import shutil
import zipfile

import pandas as pd
import xml.etree.ElementTree as ET

from . import constants


[docs]class Singleton(type): """ Is used as `metaclass` to achieve a singleton pattern. """ _instances = {} def __call__(cls, *args, **kwargs): if cls not in cls._instances: cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) return cls._instances[cls]
[docs]class AppleHealthParser(metaclass=Singleton): """ Parse and gives access to Apple Health App dump data. Args: zip_dump_path (str, optional): Path to the zipped data dump. Defaults to constants.ZIP_PATH. unzip_path (str, optional): Path to the unzipped data dump. Defaults to constants.UNZIP_PATH. force_unzip (bool, optional): Flag to force unzipping the data again. Can be useful for new data. Defaults to False. """ def __init__( self, zip_dump_path: str = constants.ZIP_PATH, unzip_path: str = constants.UNZIP_PATH, force_unzip: bool = False ) -> None: # give information about may changing Version print("AppleHealthParser is tested for HealthKit Export Version: 11") # handle some cases if force_unzip: shutil.rmtree(unzip_path) if not os.path.exists(unzip_path): with open(zip_dump_path, "rb") as file: zip_file_bytes = io.BytesIO(file.read()) zipped_export = zipfile.ZipFile(zip_file_bytes) zipped_export.extractall(os.path.split(unzip_path)[0]) # need path to dir not file self._tree = ET.parse(constants.XML_PATH) self._health_data = self._tree.getroot() # element types self._export_date = None self._me = None self._workouts = None self._workout_types = None self._activity_summaries = None self._records = None self._correlations = None self._clinical_records = None def _fix_data_types(self, data_frame: pd.DataFrame) -> pd.DataFrame: """ Fix the data types of a extracted ``DataFrame``. Args: data_frame (pd.DataFrame): Extracted ``DataFrame`` Returns: pd.DataFrame: ``DataFrame`` with fixed data types """ result = data_frame.apply(pd.to_numeric, errors='ignore') for column in result.columns: if "date" in column.lower(): try: result[column] = pd.to_datetime(result[column]) except: # just catch to keep code running pass return result def _extract_elements_of_type(self, element_type: str) -> pd.DataFrame: """ Returns a ``DataFrame`` with the elements of ``element_type``. Do not use by your own! Args: element_type (str): Need to fit one of ``constants.ELEMENT_TAGS`` Raises: ValueError: If wrong ``element_type`` is given Returns: pd.DataFrame: of given ``element_type`` or ``None`` if empty """ if element_type not in constants.ELEMENT_TAGS: raise ValueError(f"'element_type' need to be one of: {constants.ELEMENT_TAGS}") elements = self._tree.findall(element_type) result = pd.DataFrame([element.attrib for element in elements]) result = self._fix_data_types(result) return None if result.empty else result
[docs] def extract_workouts(self) -> (pd.DataFrame, set): """ Returns ``Workout`` elements and ``set`` of all workout existing types. Shortens the workout types. Returns: (pd.DataFrame, set): of type ``Workout`` or ``None`` if empty and set of available workout types """ # increase performace by do not parse again. if self._workouts is None and self._workout_types is None: self._workouts = self._extract_elements_of_type(constants.WORKOUT_TAG) self._workouts[constants.WORKOUT_TYPE] = self._workouts.apply( lambda row: re.match(constants.WORKOUT_REGEX, row[constants.WORKOUT_TYPE]).group(1).lower(), axis=1 ) self._workout_types = set(self._workouts[constants.WORKOUT_TYPE]) return self._workouts, self._workout_types
[docs] def extract_me(self) -> pd.DataFrame: """ Returns ``Me`` elements. Returns: pd.DataFrame: of type ``Me`` or ``None`` if empty """ # increase performace by do not parse again. if self._me is None: self._me = self._extract_elements_of_type(constants.ME_TAG) return self._me
[docs] def extract_records(self) -> pd.DataFrame: """ Returns ``Record`` elements. Returns: pd.DataFrame: of type ``Record`` or ``None`` if empty """ # increase performace by do not parse again. if self._records is None: self._records = self._extract_elements_of_type(constants.RECORD_TAG) return self._records
[docs] def extract_correlations(self) -> pd.DataFrame: """ Returns ``Correlation`` elements. Returns: pd.DataFrame: of type ``Correlation`` or ``None`` if empty """ # increase performace by do not parse again. if self._correlations is None: self._correlations = self._extract_elements_of_type(constants.CORRELATION_TAG) return self._correlations
[docs] def extract_activity_summaries(self) -> pd.DataFrame: """ Returns ``ActivitySummary`` elements. Returns: pd.DataFrame: of type ``ActivitySummary`` or ``None`` if empty """ # increase performace by do not parse again. if self._activity_summaries is None: self._activity_summaries = self._extract_elements_of_type(constants.ACTIVITY_SUMMARY_TAG) return self._activity_summaries
[docs] def extract_clinical_records(self) -> pd.DataFrame: """ Returns ``ClinicalRecord`` elements. Returns: pd.DataFrame: of type ``ClinicalRecord`` or ``None`` if empty """ # increase performace by do not parse again. if self._clinical_records is None: self._clinical_records = self._extract_elements_of_type(constants.CLINICAL_RECORD_TAG) return self._clinical_records
[docs] def get_export_date(self) -> pd.Timestamp: """ Returns the ``pd.Timestamp`` of exporting. Returns: pd.Timestamp: Export timestamp """ # increase performace by do not parse again. if self._export_date is None: data_frame = self._extract_elements_of_type(constants.EXPORT_DATE_TAG) self._export_date = pd.to_datetime(data_frame["value"])[0] return self._export_date