Page 177 - FULL REPORT 30012024
P. 177
APPENDIX B: TRAINED_MODEL.PY CODES
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score,
recall_score, f1_score, confusion_matrix
from imblearn.over_sampling import SMOTE
from joblib import dump
import seaborn as sns
import matplotlib.pyplot as plt
# Load data
data = pd.read_csv('cleaned_dataset.csv')
# Data preprocessing
data = data[['gender', 'age', 'hypertension', 'heart_disease',
'ever_married', 'work_type',
'Residence_type', 'bmi', 'smoking_status', 'stroke']]
# Feature Engineering for Age and BMI
def categorize_age(age):
if age <= 14:
return 'Child'
elif age <= 24:
return 'Youth'
elif age <= 64:
return 'Adult'
else:
return 'Senior'
def categorize_bmi(bmi):
if bmi < 18.5:
return 'Underweight'
elif bmi < 25:
return 'Normal weight'
elif bmi < 30:
return 'Overweight'
else:
return 'Obesity'
data['age_category'] = data['age'].apply(categorize_age)
160