Page 178 - FULL REPORT 30012024
P. 178
data['bmi_category'] = data['bmi'].apply(categorize_bmi)
# Encoding categorical variables
le_gender = LabelEncoder()
le_smoking_status = LabelEncoder()
le_ever_married = LabelEncoder()
le_work_type = LabelEncoder()
le_Residence_type = LabelEncoder()
le_age_category = LabelEncoder()
le_bmi_category = LabelEncoder()
data['gender'] = le_gender.fit_transform(data['gender'])
data['smoking_status'] =
le_smoking_status.fit_transform(data['smoking_status'])
data['ever_married'] =
le_ever_married.fit_transform(data['ever_married'])
data['work_type'] = le_work_type.fit_transform(data['work_type'])
data['Residence_type'] =
le_Residence_type.fit_transform(data['Residence_type'])
data['age_category'] =
le_age_category.fit_transform(data['age_category'])
data['bmi_category'] =
le_bmi_category.fit_transform(data['bmi_category'])
# Drop original age and bmi columns
data.drop(['age', 'bmi'], axis=1, inplace=True)
# Prepare data for modeling
X = data.drop('stroke', axis=1)
y = data['stroke']
# Balancing using SMOTE
smote = SMOTE(random_state=42)
X_balanced, y_balanced = smote.fit_resample(X, y)
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_balanced,
y_balanced, test_size=0.2, random_state=42)
# Model training
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
# Model Evaluation
y_pred = model.predict(X_test)
# evaluation metrics
161