import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier

from sklearn.datasets import make_classification

X, y = make_classification(
    n_samples=5000,
    n_features=10,
    n_informative=6,
    n_redundant=2,
    random_state=42
)

df = pd.DataFrame(X)
df["target"] = y

df.head()

X = df.drop("target", axis=1)
y = df["target"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

ada_model = AdaBoostClassifier(
    estimator=DecisionTreeClassifier(max_depth=1),  # parameter name
    n_estimators=100,
    learning_rate=0.5,
    random_state=42
)

ada_model.fit(X_train, y_train)

AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=1),
                   learning_rate=0.5, n_estimators=100, random_state=42)

AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=1),
                   learning_rate=0.5, n_estimators=100, random_state=42)

DecisionTreeClassifier(max_depth=1)

DecisionTreeClassifier(max_depth=1)

y_pred_ada = ada_model.predict(X_test)

print("AdaBoost Accuracy:", accuracy_score(y_test, y_pred_ada))

AdaBoost Accuracy: 0.778

import pandas as pd
from sklearn.metrics import classification_report

report_dict = classification_report(y_test, y_pred_ada, output_dict=True)
df_report = pd.DataFrame(report_dict).transpose()

print(df_report.round(3))

              precision  recall  f1-score   support
0                 0.783   0.774     0.778   504.000
1                 0.773   0.782     0.778   496.000
accuracy          0.778   0.778     0.778     0.778
macro avg         0.778   0.778     0.778  1000.000
weighted avg      0.778   0.778     0.778  1000.000

gb_model = GradientBoostingClassifier(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    random_state=42
)

gb_model.fit(X_train, y_train)

GradientBoostingClassifier(random_state=42)

GradientBoostingClassifier(random_state=42)

y_pred_gb = gb_model.predict(X_test)

print("Gradient Boosting Accuracy:", accuracy_score(y_test, y_pred_gb))
print(classification_report(y_test, y_pred_gb))

Gradient Boosting Accuracy: 0.895
              precision    recall  f1-score   support

           0       0.90      0.89      0.89       504
           1       0.89      0.90      0.90       496

    accuracy                           0.90      1000
   macro avg       0.90      0.90      0.89      1000
weighted avg       0.90      0.90      0.89      1000

print("AdaBoost:", accuracy_score(y_test, y_pred_ada))
print("Gradient Boosting:", accuracy_score(y_test, y_pred_gb))

AdaBoost: 0.778
Gradient Boosting: 0.895

importances_ada = ada_model.feature_importances_
importances_gb = gb_model.feature_importances_

plt.figure(figsize=(10,5))
plt.plot(importances_ada, label="AdaBoost")
plt.plot(importances_gb, label="Gradient Boosting")
plt.legend()
plt.title("Feature Importance Comparison")
plt.show()

import seaborn as sns

cm = confusion_matrix(y_test, y_pred_gb)

sns.heatmap(cm, annot=True, fmt="d")
plt.title("Gradient Boosting Confusion Matrix")
plt.show()

n_estimators = 50 # –200
learning_rate = 0.01 # –1

n_estimators = 100 #–500
learning_rate = 0.01 #–0.1
max_depth = 3 # –6
subsample = 0.6 # –1.0

	0	1	2	3	4	5	6	7	8	9	target
0	3.316690	-2.029674	2.355629	-0.215829	-1.419279	0.914109	-5.980685	1.518092	-0.042540	-3.397662	0
1	4.161930	-3.898558	0.174588	-0.091221	-1.360185	-0.606968	-4.648657	-2.454777	-0.667688	-0.629240	0
2	-0.477509	3.103860	2.694822	1.562327	4.191341	1.230190	3.491643	0.827346	0.635312	-2.593035	0
3	-1.894990	2.218811	-0.062834	-2.495924	-1.738007	-1.256467	-1.842087	1.668228	0.494795	-0.994083	0
4	-0.101203	0.444587	-0.793103	-2.204839	3.360553	0.922288	0.683308	-1.889515	-1.772399	1.312052	1

© Arun Kumar Pandey (2025)

🚀 Project: Classification with AdaBoost & Gradient Boosting¶

🎯 Problem Statement¶

1️⃣ Setup¶

2️⃣ Load Dataset¶

3️⃣ Train-Test Split¶

4️⃣ Feature Scaling (important for boosting stability)¶

5️⃣ AdaBoost Model¶

Train¶

Predict¶

Evaluate¶

6️⃣ Gradient Boosting Model¶

Model Definition¶

Train¶

Predict¶

Evaluate¶

7️⃣ Compare Models¶

📊 Expected Insight¶

8️⃣ Feature Importance¶

🧠 Insight¶

9️⃣ Confusion Matrix¶

🔟 Deep Understanding (Important)¶

AdaBoost Learning Behavior¶

Gradient Boosting Learning Behavior¶

1️⃣1️⃣ Hyperparameter Tuning (Critical in Practice)¶

AdaBoost¶

Gradient Boosting¶