Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to make mixed statistical subplots using plotly in python?

I have some dataset in csv files(3 at total) and need to represent it in differents ways. They are necessarily line charts, box plots and histogram with kde(kernel density estimation).

I know how to plot them individually, but to make it more convenient I need to merge them into a single output. After consulting the reference I did write some code but it didn't run.

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import numpy as np

y1 = np.random.randn(200) - 1
y2 = np.random.randn(200)
y3 = np.random.randn(200) + 1
x = np.linspace(0, 1, 200)

fig = make_subplots(
    rows=3, cols=2,
    column_widths=[0.6, 0.4],
    row_heights=[0.3, 0.6],
    specs=[[{"type": "scatter"}, {"type": "box"}],
           [{"type": "scatter"}, {"type": "dist", "rowspan": 2}]
           [{"type": "scatter"},            None           ]])

fig.add_trace(
    go.Scatter(x = x, 
                y = y1,
                hoverinfo = 'x+y',
                mode='lines',
                line=dict(color='rgb(0, 0, 0)',
                width=1),
                showlegend=False,
                )
    row=1, col=1
)

fig.add_trace(
    go.Scatter(x = x, 
                y = y2,
                hoverinfo = 'x+y',
                mode='lines',
                line=dict(color='rgb(246, 52, 16)',
                width=1),
                showlegend=False,
                )
    row=2, col=1
)

fig.add_trace(
    go.Scatter(x = x, 
                y = y3,
                hoverinfo = 'x+y',
                mode='lines',
                line=dict(color='rgb(16, 154, 246)',
                width=1),
                showlegend=False,
                )
    row=3, col=1
)

fig.add_trace(
    go.Box(x=y1)
    go.Box(x=y2)
    go.Box(x=y3)
    row=1, col=2
)

hist_data = [y1, y2, y3]

fig.add_trace(
    ff.create_distplot(hist_data,
                         bin_size=.02, show_rug=False)

    row=2, col=2
)

fig.show()

What is wrong with the code above, or how can I plot these charts with a unique output?

P.S. the line charts need to be separated for better visualization.

like image 414
Gastyr Avatar asked Oct 30 '25 12:10

Gastyr


2 Answers

I posted this same question on plotly forum, and the user empet, answered gracefully.

As I suspected, make_subplots() can`t handle a figure object, the way to go is "add figure data as a single trace at a time".

Plot: mixed-statistical-subplots Code:

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import numpy as np

y1 = np.random.randn(200) - 1
y2 = np.random.randn(200)
y3 = np.random.randn(200) + 1
x = np.linspace(0, 1, 200)

colors = ['#3f3f3f', '#00bfff', '#ff7f00']

fig = make_subplots(
    rows=3, cols=2,
    column_widths=[0.55, 0.45],
    row_heights=[1., 1., 1.],
    specs=[[{"type": "scatter"}, {"type": "xy"}],
           [{"type": "scatter"}, {"type": "xy", "rowspan": 2}],
           [{"type": "scatter"},            None           ]])

fig.add_trace(
    go.Scatter(x = x, 
                y = y1,
                hoverinfo = 'x+y',
                mode='lines',
                line=dict(color='#3f3f3f',
                width=1),
                showlegend=False,
                ),
    row=1, col=1
)

fig.add_trace(
    go.Scatter(x = x, 
                y = y2,
                hoverinfo = 'x+y',
                mode='lines',
                line=dict(color='#00bfff',
                width=1),
                showlegend=False,
                ),
    row=2, col=1
)

fig.add_trace(
    go.Scatter(x = x, 
                y = y3,
                hoverinfo = 'x+y',
                mode='lines',
                line=dict(color='#ff7f00',
                width=1),
                showlegend=False,
                ),
    row=3, col=1
)

boxfig= go.Figure(data=[go.Box(x=y1, showlegend=False, notched=True, marker_color="#3f3f3f", name='3'),
                        go.Box(x=y2, showlegend=False, notched=True, marker_color="#00bfff", name='2'),
                        go.Box(x=y3, showlegend=False, notched=True, marker_color="#ff7f00", name='1')])

for k in range(len(boxfig.data)):
     fig.add_trace(boxfig.data[k], row=1, col=2)

group_labels = ['Group 1', 'Group 2', 'Group 3']
hist_data = [y1, y2, y3]

distplfig = ff.create_distplot(hist_data, group_labels, colors=colors,
                         bin_size=.2, show_rug=False)

for k in range(len(distplfig.data)):
    fig.add_trace(distplfig.data[k],
    row=2, col=2
)
fig.update_layout(barmode='overlay')
fig.show()
like image 125
Gastyr Avatar answered Nov 01 '25 01:11

Gastyr


Second attempt after dialogue in the comments.

The following is the best I could do. It's an approach where I build an ff.create_distplot like in your sample code, and then "steal" the data and use it in a combination of go.Histogram, go.Scatter and go.Box objects to emulate the distribution and rug plots from the former.

Plot:

enter image description here

Code:

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import numpy as np
import pandas as pd

# data
y1 = np.random.randn(200) - 1
y2 = np.random.randn(200)
y3 = np.random.randn(200) + 1
x = np.linspace(0, 1, 200)

# subplot setupt
fig = make_subplots(
    rows=3, cols=2
)

# Line [1,1]
fig.add_trace(go.Scatter(x = x, 
                        y = y1,
                        hoverinfo = 'x+y',
                        mode='lines',
                        line=dict(color='rgb(0, 0, 0)',width=1),
                        showlegend=False,
                        name = 'series 1'
                        ),
             row=1, col=1
)

# Line [2,1]
fig.add_trace(
    go.Scatter(x = x, 
                y = y2,
                hoverinfo = 'x+y',
                mode='lines',
                line=dict(color='rgb(246, 52, 16)',
                width=1),
                showlegend=False,
                ),
    row=2, col=1
)

# Line [2,1]
fig.add_trace(
    go.Scatter(x = x, 
                y = y3,
                hoverinfo = 'x+y',
                mode='lines',
                line=dict(color='rgb(16, 154, 246)',
                width=1),
                showlegend=False,
                ),
    row=3, col=1
)

fig.update_layout(showlegend=False)

# box plots [1,1]
fig.add_trace(go.Box(x=y3, marker_color='rgb(16, 154, 246)'),
    row=1, col=2
)

fig.add_trace(go.Box(x=y2, marker_color='rgb(246, 52, 16)'),
    row=1, col=2
)

fig.add_trace(go.Box(x=y1, marker_color='rgb(0, 0, 0)'),
    row=1, col=2
)

# density plots [2,2]
hist_data = [y1, y2, y3]
group_labels = ['Group 1', 'Group 2', 'Group 3']

# fig 2 is only there to produce the numbers to fill
# in later go.Histogram and go.Scatter
fig2 = ff.create_distplot(hist_data, group_labels)

fig.add_trace(go.Histogram(fig2['data'][0],
                           marker_color='rgb(0, 0, 0)',
                           showlegend=False
                          ), row=2, col=2)

fig.add_trace(go.Histogram(fig2['data'][1],
                           marker_color='rgb(246, 52, 16)'
                          ), row=2, col=2)

fig.add_trace(go.Histogram(fig2['data'][2],
                           marker_color='rgb(16, 154, 246)'
                          ), row=2, col=2)

fig.add_trace(go.Scatter(fig2['data'][3],
                         line=dict(color='rgb(0, 0, 0)', width=0.5)
                        ), row=2, col=2)

fig.add_trace(go.Scatter(fig2['data'][4],
                         line=dict(color='rgb(246, 52, 16)', width=0.5)
                        ), row=2, col=2)

fig.add_trace(go.Scatter(fig2['data'][5],
                         line=dict(color='rgb(16, 154, 246)', width=0.5)
                        ), row=2, col=2)

# sorry excuse for a rugplot [3,2]
df = pd.DataFrame({'y1':y1, 'y2':y2, 'y3':y3}, index = x)
df['rug1'] = 1.2
df['rug2'] = 1.1
df['rug3'] = 1
df.tail()
#f=go.Figure()
fig.add_trace(go.Scatter(x=df['y1'], y = df['rug1'],
                       mode = 'markers',
                       marker=dict(color = 'rgb(0, 0, 0)', symbol='line-ns-open')
                        ), row=3, col=2)

fig.add_trace(go.Scatter(x=df['y2'], y = df['rug2'],
                       mode = 'markers',
                       marker=dict(color = 'rgb(246, 52, 16)', symbol='line-ns-open')
                        ), row=3, col=2)

fig.add_trace(go.Scatter(x=df['y3'], y = df['rug3'],
                       mode = 'markers',
                       marker=dict(color = 'rgb(16, 154, 246)', symbol='line-ns-open')
                        ), row=3, col=2)

# some manual adjustments on the rugplot
fig.update_yaxes(range=[0.9,1.3], tickfont=dict(color='rgba(0,0,0,0)', size=14), row=3, col=2)
fig.update_layout(title ='Mixed statistical subplots', showlegend=False)

fig.show()

EDIT - First attempt:

We'll start with this:

What is wrong with the code above [...]?

That depends on what you're aiming to do here. But first of all, you are missing a lot of commas in places like this:

fig.add_trace(
    go.Scatter(x = x, 
                y = y3,
                hoverinfo = 'x+y',
                mode='lines',
                line=dict(color='rgb(16, 154, 246)',
                width=1),
                showlegend=False,
                ) # <==================== insert missing comma !!!
    row=3, col=1
)

The next snippet leaves me puzzled:

fig.add_trace(
    go.Box(x=y1)
    go.Box(x=y2)
    go.Box(x=y3)
    row=1, col=2
)

There's at least one comma missing here, but It's still not very clear to me what you're trying to do. It seems to me that you'd like to put all boxes within the same chart and plot it on top of the second column, but I don't think it would make much sense with the rest of the setup. And there are more worries ahead, because it does not seem that you'll be able to include your ff.create_distplot() in your setup at all.

The best I can do for you at the time being is to set up a plots for each series with a go.Scatter() in the first column and corresponding go.Box() in the right column like this:

Plot 1:

enter image description here

This is perhaps not 100% what you're looking for, but at least I think it makes sense to look at your data this way.

Code 1:

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import numpy as np

# data
y1 = np.random.randn(200) - 1
y2 = np.random.randn(200)
y3 = np.random.randn(200) + 1
x = np.linspace(0, 1, 200)

# subplot setupt
fig = make_subplots(
    rows=3, cols=2
)

# raw data with go.Scatter
fig.add_trace(go.Scatter(x = x, 
                        y = y1,
                        hoverinfo = 'x+y',
                        mode='lines',
                        line=dict(color='rgb(0, 0, 0)',width=1),
                        showlegend=False,
                        ),
             row=1, col=1
)

fig.add_trace(
    go.Scatter(x = x, 
                y = y2,
                hoverinfo = 'x+y',
                mode='lines',
                line=dict(color='rgb(246, 52, 16)',
                width=1),
                showlegend=False,
                ),
    row=2, col=1
)

fig.add_trace(
    go.Scatter(x = x, 
                y = y3,
                hoverinfo = 'x+y',
                mode='lines',
                line=dict(color='rgb(16, 154, 246)',
                width=1),
                showlegend=False,
                ),
    row=3, col=1
)

# box plots
fig.add_trace(go.Box(x=y1),
    row=1, col=2
)

fig.add_trace(go.Box(x=y1),
    row=2, col=2
)

fig.add_trace(go.Box(x=y1),
    row=3, col=2
)

fig.show()

And then you can show the distribution of all the series together like this:

Plot 2:

enter image description here

Code 2:

hist_data = [y1, y2, y3]
group_labels = ['Group 1', 'Group 2', 'Group 3']
fig2 = ff.create_distplot(hist_data, group_labels)
fig2.show()
like image 27
vestland Avatar answered Nov 01 '25 03:11

vestland



Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!