Page 178 - FULL REPORT 30012024
P. 178

data['bmi_category'] = data['bmi'].apply(categorize_bmi)

                        # Encoding categorical variables
                        le_gender = LabelEncoder()
                        le_smoking_status = LabelEncoder()
                        le_ever_married = LabelEncoder()
                        le_work_type = LabelEncoder()
                        le_Residence_type = LabelEncoder()
                        le_age_category = LabelEncoder()
                        le_bmi_category = LabelEncoder()

                        data['gender'] = le_gender.fit_transform(data['gender'])
                        data['smoking_status'] =
                        le_smoking_status.fit_transform(data['smoking_status'])
                        data['ever_married'] =
                        le_ever_married.fit_transform(data['ever_married'])
                        data['work_type'] = le_work_type.fit_transform(data['work_type'])
                        data['Residence_type'] =
                        le_Residence_type.fit_transform(data['Residence_type'])
                        data['age_category'] =
                        le_age_category.fit_transform(data['age_category'])
                        data['bmi_category'] =
                        le_bmi_category.fit_transform(data['bmi_category'])

                        # Drop original age and bmi columns
                        data.drop(['age', 'bmi'], axis=1, inplace=True)

                        # Prepare data for modeling
                        X = data.drop('stroke', axis=1)
                        y = data['stroke']

                        # Balancing using SMOTE
                        smote = SMOTE(random_state=42)
                        X_balanced, y_balanced = smote.fit_resample(X, y)

                        # Train-test split
                        X_train, X_test, y_train, y_test = train_test_split(X_balanced,
                        y_balanced, test_size=0.2, random_state=42)

                        # Model training
                        model = RandomForestClassifier(n_estimators=100, random_state=42)
                        model.fit(X_train, y_train)

                        # Model Evaluation
                        y_pred = model.predict(X_test)

                        # evaluation metrics
                                                               161
   173   174   175   176   177   178   179   180   181   182   183