Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion include/LightGBM/cuda/cuda_tree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ class CUDATree : public Tree {

double* cuda_leaf_value_ref() const { return cuda_leaf_value_; }

int host_leaf_depth(int leaf_index) {
int host_leaf_depth(int leaf_index) {
if (leaf_index >= 0 && leaf_index < num_leaves_) {
return host_leaf_depth_[leaf_index];
} else {
Expand Down
6 changes: 3 additions & 3 deletions src/treelearner/cuda/cuda_best_split_finder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,18 +331,18 @@ void CUDABestSplitFinder::FindBestSplitsForLeaf(
const data_size_t num_data_in_larger_leaf,
const double sum_hessians_in_smaller_leaf,
const double sum_hessians_in_larger_leaf,
const int small_leaf_depth,
const int smaller_leaf_depth,
const int larger_leaf_depth,
const score_t* grad_scale,
const score_t* hess_scale,
const uint8_t smaller_num_bits_in_histogram_bins,
const uint8_t larger_num_bits_in_histogram_bins) {
const bool is_smaller_leaf_valid = (num_data_in_smaller_leaf > min_data_in_leaf_ &&
sum_hessians_in_smaller_leaf > min_sum_hessian_in_leaf_ &&
(max_depth > 0 && smaller_leaf_depth > 0 && smaller_leaf_depth < max_depth));
((max_depth_ > 0 && smaller_leaf_depth > 0 && smaller_leaf_depth < max_depth_) || (max_depth_ <= 0)));
const bool is_larger_leaf_valid = (num_data_in_larger_leaf > min_data_in_leaf_ &&
sum_hessians_in_larger_leaf > min_sum_hessian_in_leaf_ && larger_leaf_index >= 0 &&
(max_depth > 0 && larger_leaf_depth > 0 && larger_leaf_depth < max_depth));
((max_depth_ > 0 && larger_leaf_depth > 0 && larger_leaf_depth < max_depth_) || (max_depth_ <= 0)));
if (grad_scale != nullptr && hess_scale != nullptr) {
LaunchFindBestSplitsDiscretizedForLeafKernel(smaller_leaf_splits, larger_leaf_splits,
smaller_leaf_index, larger_leaf_index, is_smaller_leaf_valid, is_larger_leaf_valid,
Expand Down
2 changes: 2 additions & 0 deletions src/treelearner/cuda/cuda_best_split_finder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ class CUDABestSplitFinder {
const data_size_t num_data_in_larger_leaf,
const double sum_hessians_in_smaller_leaf,
const double sum_hessians_in_larger_leaf,
const int smaller_leaf_depth,
const int larger_leaf_depth,
const score_t* grad_scale,
const score_t* hess_scale,
const uint8_t smaller_num_bits_in_histogram_bins,
Expand Down
35 changes: 35 additions & 0 deletions tests/python_package_test/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -913,6 +913,41 @@ def test_feature_names_are_set_correctly_when_no_feature_names_passed_into_Datas
assert ds.construct().feature_name == ["Column_0", "Column_1", "Column_2"]


def test_max_depth_is_enforced(capsys):
params = {
"objective": "binary",
"min_data": 10,
"num_leaves": 15,
"verbose": -1,
"num_threads": 1,
"max_bin": 255,
"gpu_use_dp": True,
"deterministic": True,
"random_state": 2,
}
X, y = make_blobs(n_samples=1_000, n_features=1, centers=2, random_state=2)
model = lgb.LGBMRegressor(**params)
model.fit(X, y)
fitted_max_depth = (
model.booster_.trees_to_dataframe().groupby("tree_index")["node_depth"].max().value_counts().index.max()
)
assert fitted_max_depth == 9, (
"This data generation and model fitting procedure should be deterministic within backends. "
"Both cpu and cuda should result in models with maximal tree depth 9."
)
# set a constraining value of max_depth, i.e. lower than 9
constrained_model = lgb.LGBMRegressor(max_depth=6, **params)
constrained_model.fit(X, y)
assert (
constrained_model.booster_.trees_to_dataframe()
.groupby("tree_index")["node_depth"]
.max()
.value_counts()
.index.max()
<= 6
), "Trained model contains trees deeper than max_depth = 6"


# NOTE: this intentionally contains values where num_leaves <, ==, and > (max_depth^2)
@pytest.mark.parametrize(("max_depth", "num_leaves"), [(-1, 3), (-1, 50), (5, 3), (5, 31), (5, 32), (8, 3), (8, 31)])
def test_max_depth_warning_is_not_raised_if_num_leaves_is_also_provided(capsys, num_leaves, max_depth):
Expand Down