Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Getting Error 0 when plotting boxplot of a filtered dataset

I am working on the Kaggle: Abalone dataset and I am facing a weird problem when plotting a boxplot.

import pandas as pd
import seaborn as sns

df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data', header=None)
df.columns = ['sex', 'Length', 'Diameter', 'Height', 'Whole weight', 'Shucked weight', 'Viscera weight', 'Shell weight', 'rings']

If a run:

plt.figure(figsize=(16,6))
plt.subplot(121)
sns.boxplot(data=df['rings'])

working perfectly!

If I filter the dataset by sex like this:

df_f = df[df['sex']=='F']
df_m = df[df['sex']=='M']
df_i = df[df['sex']=='I']

df_f = (1307,9), df_m=(1528,9) and df_i=(1342,9)

And I run:

plt.figure(figsize=(16,6))
plt.subplot(121)
sns.boxplot(data=df_m['rings'])

working perfectly!

But if I run the code above for df_f and df_i datasets I get an error:

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
~/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   3360             try:
-> 3361                 return self._engine.get_loc(casted_key)
   3362             except KeyError as err:

~/anaconda3/lib/python3.9/site-packages/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

~/anaconda3/lib/python3.9/site-packages/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()

KeyError: 0

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
/tmp/ipykernel_434828/3363262611.py in <module>
----> 1 sns.boxplot(data=df_f['Rings'])

~/anaconda3/lib/python3.9/site-packages/seaborn/_decorators.py in inner_f(*args, **kwargs)
     44             )
     45         kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46         return f(**kwargs)
     47     return inner_f
     48 

~/anaconda3/lib/python3.9/site-packages/seaborn/categorical.py in boxplot(x, y, hue, data, order, hue_order, orient, color, palette, saturation, width, dodge, fliersize, linewidth, whis, ax, **kwargs)
   2241 ):
   2242 
-> 2243     plotter = _BoxPlotter(x, y, hue, data, order, hue_order,
   2244                           orient, color, palette, saturation,
   2245                           width, dodge, fliersize, linewidth)

~/anaconda3/lib/python3.9/site-packages/seaborn/categorical.py in __init__(self, x, y, hue, data, order, hue_order, orient, color, palette, saturation, width, dodge, fliersize, linewidth)
    404                  width, dodge, fliersize, linewidth):
    405 
--> 406         self.establish_variables(x, y, hue, data, orient, order, hue_order)
    407         self.establish_colors(color, palette, saturation)
    408 

~/anaconda3/lib/python3.9/site-packages/seaborn/categorical.py in establish_variables(self, x, y, hue, data, orient, order, hue_order, units)
     96                 if hasattr(data, "shape"):
     97                     if len(data.shape) == 1:
---> 98                         if np.isscalar(data[0]):
     99                             plot_data = [data]
    100                         else:

~/anaconda3/lib/python3.9/site-packages/pandas/core/series.py in __getitem__(self, key)
    940 
    941         elif key_is_scalar:
--> 942             return self._get_value(key)
    943 
    944         if is_hashable(key):

~/anaconda3/lib/python3.9/site-packages/pandas/core/series.py in _get_value(self, label, takeable)
   1049 
   1050         # Similar to Index.get_value, but we do not fall back to positional
-> 1051         loc = self.index.get_loc(label)
   1052         return self.index._get_values_for_loc(self, loc, label)
   1053 

~/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   3361                 return self._engine.get_loc(casted_key)
   3362             except KeyError as err:
-> 3363                 raise KeyError(key) from err
   3364 
   3365         if is_scalar(key) and isna(key) and not self.hasnans:

KeyError: 0

There's no missing values, all values are int.

What am I missing here?

like image 411
pouchewar Avatar asked Oct 11 '25 18:10

pouchewar


1 Answers

If you want a box plot per value of a categorical column I suggest:

sns.boxplot(data=df, x='rings', y='sex')