Tabular Classification
Scikit-learn
English
hierarchical
healthcare
ehr
copd
clinical-risk
tabular
scikit-learn
clustering
unsupervised
Instructions to use stormid/copd-model-e with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Scikit-learn
How to use stormid/copd-model-e with Scikit-learn:
from huggingface_hub import hf_hub_download import joblib model = joblib.load( hf_hub_download("stormid/copd-model-e", "sklearn_model.joblib") ) # only load pickle files from sources you trust # read more about it here https://skops.readthedocs.io/en/stable/persistence.html - Notebooks
- Google Colab
- Kaggle
| """ | |
| Script for preprocessing pharmacy data | |
| -------- | |
| Process pharmacy data and track inhaler prescriptions and rescue meds | |
| """ | |
| import json | |
| import pandas as pd | |
| from datetime import date | |
| from dateutil.relativedelta import relativedelta | |
| from utils.common import (add_hist_adm_presc, correct_column_names, | |
| first_patient_appearance) | |
| from utils.presc_common import initialize_presc_data, track_medication | |
| def add_inhaler_mappings(df): | |
| """ | |
| Load inhaler prescription mappings and track where they appear in the data | |
| -------- | |
| :param df: dataframe | |
| :return: dataframe with column added for each inhaler type | |
| """ | |
| print('Mapping inhaler prescriptions') | |
| # Load in inhaler mapping | |
| with open('mappings/inhaler_mapping.json') as json_file: | |
| inhaler_mapping = json.load(json_file) | |
| for k, v in inhaler_mapping.items(): | |
| df[k + '_inhaler'] = df.PI_Approved_Name.str.contains( | |
| '|'.join(v)).astype(int) | |
| # Remove for now as empty | |
| df = df.drop(['LABA-LAMA-ICS_inhaler', 'Ignore_inhaler'], axis=1) | |
| return df | |
| def add_eoy_column(df, dt_col, eoy_date): | |
| """ | |
| Add EOY relative to user-specified end date | |
| -------- | |
| :param df: dataframe | |
| :param dt_col: date column in dataframe | |
| :param eoy_date: EOY date from config | |
| :return: updated df with EOY column added | |
| """ | |
| # Needed to stop error with creating a new column | |
| df = df.reset_index(drop=True) | |
| # Add column with user-specified end of year date | |
| end_date = pd.to_datetime(eoy_date) | |
| end_month = end_date.month | |
| end_day = end_date.day | |
| # Add for every year | |
| df['eoy'] = [date(y, end_month, end_day) for y in df[dt_col].dt.year] | |
| # Check that EOY date is after dt_col for each entry | |
| eoy_index = df.columns[df.columns == 'eoy'] | |
| adm_vs_eoy = df[dt_col] > df.eoy | |
| row_index = df.index[adm_vs_eoy] | |
| df.loc[row_index, eoy_index] = df[adm_vs_eoy].eoy + relativedelta(years=1) | |
| df['eoy'] = pd.to_datetime(df.eoy) | |
| return df | |
| def calc_presc_per_year(df): | |
| """ | |
| Reduce data to 1 row per year | |
| -------- | |
| :param df: dataframe to reduced | |
| :return: reduced dataframe | |
| """ | |
| print('Reducing to 1 row per year') | |
| # Add end of year columns | |
| eoy_cols = ['presc_to_date', 'days_since_rescue', 'rescue_to_date', | |
| 'anxiety_depression_presc_to_date', 'rescue_date'] | |
| last = df.groupby(['SafeHavenID', 'eoy'])[eoy_cols].last() | |
| # Total columns | |
| sum_cols = ['SALBUTAMOL', 'SABA_inhaler', 'LABA_inhaler', 'LAMA_inhaler', | |
| 'SAMA_inhaler', 'ICS_inhaler', 'LABA-ICS_inhaler', | |
| 'LAMA +LABA-ICS_inhaler', 'SABA + SAMA_inhaler', | |
| 'MCS_inhaler', 'rescue_meds', 'presc', 'anxiety_depression_presc'] | |
| total_cols = [col + '_per_year' for col in sum_cols] | |
| total = df.groupby(['SafeHavenID', 'eoy'])[sum_cols].sum() | |
| total.columns = total_cols | |
| # Join together | |
| results = last.join(total) | |
| return results | |
| def main(): | |
| # Load in config items | |
| with open('../../../config.json') as json_config_file: | |
| config = json.load(json_config_file) | |
| # Load in data | |
| presc_file = config['extract_data_path'] + 'Pharmacy_Cohort3R.csv' | |
| presc = initialize_presc_data(presc_file) | |
| # Save first date in dataset | |
| data_path = config['model_data_path'] | |
| first_patient_appearance(presc, 'PRESC_DATE', 'presc', data_path) | |
| # Add inhaler mapping | |
| presc = add_inhaler_mappings(presc) | |
| # Track salbutamol and rescue meds | |
| presc = track_medication(presc) | |
| # Drop columns | |
| cols_2_drop = ['PI_Approved_Name', 'PI_BNF_Item_Code', 'code'] | |
| presc = presc.drop(cols_2_drop, axis=1) | |
| # Add column relative to user-specified date | |
| presc = add_eoy_column(presc, 'PRESC_DATE', config['date']) | |
| # Track rows which are admissions | |
| presc['presc'] = 1 | |
| # Add any historical count columns | |
| presc = presc.groupby('SafeHavenID').apply( | |
| add_hist_adm_presc, 'presc', 'PRESC_DATE') | |
| presc = presc.reset_index(drop=True) | |
| # Save per event dataset | |
| presc.to_pickle(data_path + 'validation_presc_proc.pkl') | |
| # Reduce data to 1 row per year | |
| presc_yearly = calc_presc_per_year(presc) | |
| # Correct column names | |
| presc_yearly.columns = correct_column_names(presc_yearly.columns, 'presc') | |
| # Save data | |
| presc_yearly.to_pickle(data_path + 'presc_proc.pkl') | |
| main() | |