I am working on a dataset and have to make two predictions,i.e 2 columns of y and each column is also multiclass. So,I am using XGBoost with MultiOutput Classfier and to tune it I want to use Grid Search CV.
xgb_clf = xgb.XGBClassifier(learning_rate=0.1,
n_estimators=3000,
max_depth=3,
min_child_weight=1,
subsample=0.8,
colsample_bytree=0.8,
objective='multi:softmax',
nthread=4,
num_class=9,
seed=27
)
model = MultiOutputClassifier(estimator=xgb_clf)
param_test1 = { 'estimator__max_depth':[3],'estimator__min_child_weight':[4]}
gsearch1 = GridSearchCV(estimator =model,
param_grid = param_test1, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch1.fit(X_train_split,y_train_split)
gsearch1.grid_scores_, gsearch1.best_params_, gsearch1.best_score_
But when i do so I get a error
_RemoteTraceback Traceback (most recent call last)
_RemoteTraceback:
"""
Traceback (most recent call last):
File "/usr/local/lib/python3.6/dist-packages/joblib/externals/loky/process_executor.py", line 431, in _process_worker
r = call_item()
File "/usr/local/lib/python3.6/dist-packages/joblib/externals/loky/process_executor.py", line 285, in __call__
return self.fn(*self.args, **self.kwargs)
File "/usr/local/lib/python3.6/dist-packages/joblib/_parallel_backends.py", line 595, in __call__
return self.func(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/joblib/parallel.py", line 253, in __call__
for func, args, kwargs in self.items]
File "/usr/local/lib/python3.6/dist-packages/joblib/parallel.py", line 253, in <listcomp>
for func, args, kwargs in self.items]
File "/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_validation.py", line 544, in _fit_and_score
test_scores = _score(estimator, X_test, y_test, scorer)
File "/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_validation.py", line 591, in _score
scores = scorer(estimator, X_test, y_test)
File "/usr/local/lib/python3.6/dist-packages/sklearn/metrics/_scorer.py", line 87, in __call__
*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/sklearn/metrics/_scorer.py", line 300, in _score
raise ValueError("{0} format is not supported".format(y_type))
ValueError: multiclass-multioutput format is not supported
"""
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
<ipython-input-42-e53fdaaedf6b> in <module>()
5 gsearch1 = GridSearchCV(estimator =model,
6 param_grid = param_test1, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
----> 7 gsearch1.fit(X_train_split,y_train_split)
8 gsearch1.grid_scores_, gsearch1.best_params_, gsearch1.best_score_
7 frames
/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_search.py in fit(self, X, y, groups, **fit_params)
708 return results
709
--> 710 self._run_search(evaluate_candidates)
711
712 # For multi-metric evaluation, store the best_index_, best_params_ and
/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_search.py in _run_search(self, evaluate_candidates)
1149 def _run_search(self, evaluate_candidates):
1150 """Search all candidates in param_grid"""
-> 1151 evaluate_candidates(ParameterGrid(self.param_grid))
1152
1153
/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_search.py in evaluate_candidates(candidate_params)
687 for parameters, (train, test)
688 in product(candidate_params,
--> 689 cv.split(X, y, groups)))
690
691 if len(out) < 1:
/usr/local/lib/python3.6/dist-packages/joblib/parallel.py in __call__(self, iterable)
1040
1041 with self._backend.retrieval_context():
-> 1042 self.retrieve()
1043 # Make sure that we get a last message telling us we are done
1044 elapsed_time = time.time() - self._start_time
/usr/local/lib/python3.6/dist-packages/joblib/parallel.py in retrieve(self)
919 try:
920 if getattr(self._backend, 'supports_timeout', False):
--> 921 self._output.extend(job.get(timeout=self.timeout))
922 else:
923 self._output.extend(job.get())
/usr/local/lib/python3.6/dist-packages/joblib/_parallel_backends.py in wrap_future_result(future, timeout)
540 AsyncResults.get from multiprocessing."""
541 try:
--> 542 return future.result(timeout=timeout)
543 except CfTimeoutError as e:
544 raise TimeoutError from e
/usr/lib/python3.6/concurrent/futures/_base.py in result(self, timeout)
430 raise CancelledError()
431 elif self._state == FINISHED:
--> 432 return self.__get_result()
433 else:
434 raise TimeoutError()
/usr/lib/python3.6/concurrent/futures/_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
ValueError: multiclass-multioutput format is not supported
I think the error occurs as I am using roc_auc as my scoring method but I don't know how to fix it. Should I use any other scoring method?
Yes, you think right. The issue is coming from the fact that ROC AUC score is valid for binary classification case. Instead you can use the average of ROC AUC scores across all classes.
# from https://stackoverflow.com/questions/39685740/calculate-sklearn-roc-auc-score-for-multi-class
from sklearn.metrics import roc_auc_score
import numpy as np
def roc_auc_score_multiclass(actual_class, pred_class, average = "macro"):
#creating a set of all the unique classes using the actual class list
unique_class = set(actual_class)
roc_auc_dict = {}
for per_class in unique_class:
#creating a list of all the classes except the current class
other_class = [x for x in unique_class if x != per_class]
#marking the current class as 1 and all other classes as 0
new_actual_class = [0 if x in other_class else 1 for x in actual_class]
new_pred_class = [0 if x in other_class else 1 for x in pred_class]
#using the sklearn metrics method to calculate the roc_auc_score
roc_auc = roc_auc_score(new_actual_class, new_pred_class, average = average)
roc_auc_dict[per_class] = roc_auc
return np.mean([x for x in roc_auc_dict.values()])
Using this function you can get the ROC AUC score for each class against all others. Then you can take the mean of that values and use it as a scorer. You might need to convert you function to scorer object using make_scorer
function (https://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html).
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With