Tabular Classification
Scikit-learn
English
hierarchical
healthcare
ehr
copd
clinical-risk
tabular
scikit-learn
clustering
unsupervised
Instructions to use stormid/copd-model-e with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Scikit-learn
How to use stormid/copd-model-e with Scikit-learn:
from huggingface_hub import hf_hub_download import joblib model = joblib.load( hf_hub_download("stormid/copd-model-e", "sklearn_model.joblib") ) # only load pickle files from sources you trust # read more about it here https://skops.readthedocs.io/en/stable/persistence.html - Notebooks
- Google Colab
- Kaggle
| """ | |
| Map GOLD standard COPD groupings from REC/SUP IDs to SafeHavenIDs. | |
| -------- | |
| NB: Data contained within 'RC_SU1_spirometry_data.csv' has been created using | |
| from data within the teams space. | |
| """ | |
| import pandas as pd | |
| # Set file paths | |
| file_path = '<YOUR_DATA_PATH>/copd.model-e/' | |
| input_file_path = file_path + 'training/src/data/' | |
| output_file_path = '<YOUR_DATA_PATH>/Model_E_Extracts/rec_sup_spirometry_data.pkl' | |
| def read_data(file): | |
| """ | |
| Read in data source | |
| -------- | |
| :param file: string filename | |
| :return: dataframe | |
| """ | |
| df = pd.read_csv(file) | |
| return df | |
| def calc_gold_grade(data): | |
| """ | |
| Calculate GOLD grade for COPD classification using FEV1% | |
| -------- | |
| :param data: dataframe containing FEV1% column | |
| :return: GOLD grade values based on if else statement | |
| """ | |
| fev1 = data['FEV1%'] | |
| if fev1 >= 80: | |
| val = 'GOLD 1' | |
| elif (fev1 >= 50) & (fev1 < 80): | |
| val = 'GOLD 2' | |
| elif (fev1 >= 30) & (fev1 < 50): | |
| val = 'GOLD 3' | |
| elif fev1 < 30: | |
| val = 'GOLD 4' | |
| else: | |
| val = '' | |
| return val | |
| def add_SH_mappings_for_RC_and_SU1(RC_IDs, SU1_IDs, spirometry_data): | |
| """ | |
| Join the SH ID mappings to the spirometry data for RC and SU1 | |
| -------- | |
| :param RC_IDs: dataframe containing RECEIVER - SH ID mappings | |
| :param SU1_IDs: dataframe containing SU1 - SH ID mappings | |
| :param spirometry_data: spirometry data for RC and SU1 | |
| :return: RC and SU1 spirometry data with SH ID mapping columns | |
| """ | |
| receiver_IDs = RC_IDs.rename(columns={'RNo': 'StudyId'}) | |
| scaleup_IDs = SU1_IDs.rename(columns={'Study_Number': 'StudyId'}) | |
| all_service_IDs = pd.concat([receiver_IDs, scaleup_IDs], ignore_index=True) | |
| spirometry_mappings = pd.merge( | |
| spirometry_data, all_service_IDs, on="StudyId", how="left").dropna() | |
| type_map = {'FEV1%': 'int32', 'SafeHavenID': 'int32'} | |
| spirometry_mappings = spirometry_mappings.astype(type_map) | |
| return spirometry_mappings | |
| def main(): | |
| # Read spirometry data | |
| rec_sup_spiro_file = input_file_path + "RC_SU1_spirometry_data.csv" | |
| rec_sup_spiro_data = read_data(rec_sup_spiro_file).dropna() | |
| # Create new columns showing the GOLD group of each study participant | |
| rec_sup_spiro_data['GOLD grade'] = rec_sup_spiro_data.apply( | |
| calc_gold_grade, axis=1) | |
| # Read RC and SU1 SafeHaven ID mapping files | |
| rec_id_file = "<YOUR_DATA_PATH>/EXAMPLE_STUDY_DATA/Cohort3Rand.csv" | |
| sup_id_file = "<YOUR_DATA_PATH>/SU_IDs/Scale_Up_lookup.csv" | |
| rec_id_map_data = read_data(rec_id_file) | |
| sup_id_map_data = read_data(sup_id_file) | |
| # Join spirometry data to SH mappings | |
| mapped_data = add_SH_mappings_for_RC_and_SU1( | |
| rec_id_map_data, sup_id_map_data, rec_sup_spiro_data) | |
| # Save data | |
| mapped_data.to_pickle(output_file_path) | |
| main() | |