<no title>

[ ]:

from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from blocksnet.machine_learning.strategy.sklearn.ensemble.voting.classification_strategy import (
    SKLearnVotingClassificationStrategy
)

BASE_PARAMS = {"random_state": 42, "n_jobs": -1}
MODEL_PARAMS = {
    "rf": {"n_estimators": 200, "max_depth": 7, "class_weight": "balanced", **BASE_PARAMS},
    "xgb": {"n_estimators": 200, "max_depth": 7, "learning_rate": 0.05,
            "scale_pos_weight": 1, **BASE_PARAMS},
    "lgb": {"n_estimators": 200, "max_depth": 7, "learning_rate": 0.05,
            "class_weight": "balanced", **BASE_PARAMS},
    "cb": {"iterations": 200, "depth": 7, "learning_rate": 0.05,
           "thread_count": -1, "auto_class_weights": "Balanced", "random_seed": 42},
    "hgb": {"max_iter": 200, "max_depth": 7, "learning_rate": 0.05, "random_state": 42},
}
estimators = [
    ("rf",  RandomForestClassifier(**MODEL_PARAMS["rf"])),
    ("xgb", XGBClassifier(**MODEL_PARAMS["xgb"])),
    ("lgb", LGBMClassifier(**MODEL_PARAMS["lgb"])),
    ("hgb", HistGradientBoostingClassifier(**MODEL_PARAMS["hgb"])),
]
voting_params = {"voting": "soft", "n_jobs": -1}
strategy = SKLearnVotingClassificationStrategy(estimators, voting_params)

[7]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Имитация исходных данных
df = pd.DataFrame({
    "floor": [1, 2, 3, 4, 5, 6, 7, 8],
    "area": [40.5, 50.0, 60.2, 55.1, 70.0, 45.0, 66.6, 52.3],
    "rooms": [1, 2, 2, 3, 3, 1, 2, 2],
    "is_living": [1, 1, 1, 0, 0, 1, 0, 0]
})

# Разделение признаков и целевой переменной
X = df.drop(columns=["is_living"])
y = df["is_living"]

# Разделение на обучающую и тестовую выборки
x_train_df, x_test_df, y_train_series, y_test_series = train_test_split(X, y, test_size=0.25, random_state=42)

# Преобразование в numpy
x_train = x_train_df.to_numpy()
x_test = x_test_df.to_numpy()
y_train = y_train_series.to_numpy()
y_test = y_test_series.to_numpy()

[8]:

score = strategy.train(x_train, y_train, x_test, y_test)

e:\Github\blocksnet\.venv\lib\site-packages\sklearn\utils\validation.py:2749: UserWarning: X does not have valid feature names, but LGBMClassifier was fitted with feature names
  warnings.warn(

[9]:

strategy.save("artifacts/voting_model")

[10]:

loaded = SKLearnVotingClassificationStrategy([], None)  # заглушки; параметры подтянутся из meta
loaded.load("artifacts/voting_model")

[12]:

y_pred  = loaded.predict(x_test)
y_proba = loaded.predict_proba(x_test)

print(f'predicted: {y_pred}')
print(f'probabilities: {y_proba}')

predicted: [0 0]
probabilities: [[0.56583332 0.43416667]
 [0.60083332 0.39916667]]

e:\Github\blocksnet\.venv\lib\site-packages\sklearn\utils\validation.py:2749: UserWarning: X does not have valid feature names, but LGBMClassifier was fitted with feature names
  warnings.warn(
e:\Github\blocksnet\.venv\lib\site-packages\sklearn\utils\validation.py:2749: UserWarning: X does not have valid feature names, but LGBMClassifier was fitted with feature names
  warnings.warn(