lightgbmのチートシート
インストール
linux
$ python3 -n pip install lightgbm
osx
$ brew install lightgbm # 必ずbrewから入れる
$ python3 -n pip install lightgbm -U
hold outのテンプレート
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
import lightgbm as lgb
param = {
"objective": "binary",
"metric": "auc",
"max_depth": 5,
"num_leaves": 5,
"learning_rate": 0.01,
"bagging_fraction": 0.5,
"feature_fraction": 0.5,
"lambda_l1": 0.1,
"lambda_l2": 0.1,
"bagging_seed": 777,
"verbosity": -1,
"seed": 777,
# 'max_bin': 512,
"n_jobs": -1 # 並列数
}
def shot_train(X_train, y_train, category, param, eval_func, verbose, early_stopping_rounds, n_estimators):
X_trn, X_val, y_trn, y_val = train_test_split(X_train, y_train, test_size=0.25, shuffle=False)
logger.info(f'now start to train')
trn_data = lgb.Dataset(X_trn, label=y_trn, categorical_feature=category)
val_data = lgb.Dataset(X_val, label=y_val, categorical_feature=category)
num_round = n_estimators
clf = lgb.train(
param,
trn_data,
num_round,
valid_sets=[trn_data, val_data],
verbose_eval=verbose,
early_stopping_rounds=early_stopping_rounds)
eval_loss = eval_func(y_val, clf.predict(X_val))
logger.info(f'end eval_loss={eval_loss}')
return {'eval_loss': eval_loss,
'model': clf}
cvのテンプレート
import lightgbm as lgb
import sys
from loguru import logger
from sklearn.metrics import roc_auc_score
import numpy as np
logger.remove()
logger.add(sys.stdout, level="INFO")
param = {
"objective": "binary",
"metric": "auc",
"max_depth": 5,
"num_leaves": 5,
"learning_rate": 0.1,
# "bagging_fraction": 0.0,
# "feature_fraction": 0.0,
"lambda_l1": 0.0,
"lambda_l2": 0.0,
"is_unbalance": True, # binary時の非対称時データの場合
"bagging_seed": 777,
"verbosity": -1,
"seed": 777,
# 'max_bin': 512,
"n_jobs": -1 # 並列数
}
def cv_train(X_train, y_train, category, param, eval_func, verbose, early_stopping_rounds, n_estimators):
logger.info(f'now start to train')
trn_data = lgb.Dataset(X_train, label=y_train, categorical_feature=category)
num_round = n_estimators
lgb.reset_parameter()
ret = lgb.cv(
params=param,
train_set=trn_data,
nfold=3, # fold数
num_boost_round=num_round,
verbose_eval=verbose,
early_stopping_rounds=early_stopping_rounds,
return_cvbooster=True) # 作成したモデルを戻す
logger.info(f"auc-mean {ret['auc-mean'][-1]}")
cvclf = ret["cvbooster"]
preds = np.vstack([clf.predict(X_train) for clf in cvclf.boosters]).mean(axis=0)
eval_loss = eval_func(y_train, preds)
logger.info(f'end eval_loss={eval_loss}')
return {'eval_loss': eval_loss,
'models': cvclf.boosters} # 推論時は平均する必要がある
推論のときに使用するCPUを限定する
df["pred"] = model.predict(df, n_jobs=1)
出力値をweightingする(rmspeなどの目的変数)
lgb.Dataset
にweight
の引数を与える
trn_data = lgb.Dataset(X_trn, label=y_trn, weight=1/np.square(y_trn), categorical_feature=category)
val_data = lgb.Dataset(X_val, label=y_val, weight=1/np.square(y_val), categorical_feature=category)
lightgbmはマルチモーダル(多峰)の推論をできるか?
- A. できる
エラー時の対応ケース
1
if self.handle is not None and feature_name is not None and feature_name != 'auto':
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
lightgbmのモデルを作成しようとしたときに、feature_name
を入れようとする時点で、feature_name
とlgb.Dataset
のカラムの大きさなどが一致しないことが原因
model = lgb.train(
params,
tr_data,
num_boost_round=5000,
valid_sets=[tr_data, va_data],
early_stopping_rounds=50,
feature_name=features_tmp,
categorical_feature=categorical_columns,
verbose_eval=50
)
特徴量重要度の取得
- gainと分岐数がある
- 一般的には重要度を見るにはgainを見ると良いとされている
def lgb_imps(models=[], train_columns=[]):
c_i = {}
for idx, model in enumerate(models):
for c, i in zip(train_columns, model.feature_importance(importance_type='gain')):
if c_i.get(c) is None:
c_i[c] = 0
c_i[c]+=i
imps = pd.DataFrame({'cs':list(c_i.keys()), 'is':list(c_i.values())})
imps = imps.sort_values(by=['is'], ascending=False).reset_index()
imps = imps.drop(['index'], axis=1)
return imps
トラブルシューティング
mac osxでpythonがクラッシュする
- ログ
OMP: Error #131: Thread identifier invalid.
- 原因
- appleの
libomp
にバグが有ることが原因
- appleの
- 対処
- libompをアンインストールして古いバージョンをインストールする
$ wget https://raw.githubusercontent.com/Homebrew/homebrew-core/fb8323f2b170bd4ae97e1bac9bf3e2983af3fdb0/Formula/libomp.rb
$ brew unlink libomp
$ brew install libomp.rb