Volcano plots are a common way to visualize both effect size and statistical significance in a single chart. Altair works well for this because it lets you express the chart structure declaratively, making it straightforward to layer points, threshold lines, colors, and tooltips. This tutorial shows how to build a volcano plot in Altair, add fold-change and significance cutoffs, highlight meaningful groups, and create a more practical interactive example. ### Basic Volcano Plot Let's start with a simple volcano plot built from simulated data.
import numpy as np
import pandas as pd
import altair as alt
np.random.seed(0)
# Sample data
log_fold_change = np.random.normal(0, 1.2, 200)
p_values = np.random.uniform(0.001, 1.0, 200)
data = pd.DataFrame(
{
"log_fold_change": log_fold_change,
"neg_log_p": -np.log10(p_values),
}
)
chart = (
alt.Chart(data)
.mark_circle(size=60, opacity=0.5)
.encode(
x=alt.X("log_fold_change", title="Log2 Fold Change"),
y=alt.Y("neg_log_p", title="-log10(p-value)"),
)
.properties(title="Basic Volcano Plot", width=600, height=400)
.interactive()
)
chart- The x-axis shows the size and direction of the change. - The y-axis shows significance, so smaller p-values appear higher in the chart. ### Adding Threshold Lines Threshold rules help show which observations pass the cutoffs you care about.
import numpy as np
import pandas as pd
import altair as alt
np.random.seed(1)
# Sample data
log_fold_change = np.random.normal(0, 1.1, 250)
p_values = np.random.uniform(0.001, 1.0, 250)
data = pd.DataFrame(
{
"log_fold_change": log_fold_change,
"neg_log_p": -np.log10(p_values),
}
)
p_cutoff = 0.05
fc_cutoff = 1.0
points = (
alt.Chart(data)
.mark_circle(size=50, color="gray", opacity=0.5)
.encode(
x=alt.X("log_fold_change", title="Log2 Fold Change"),
y=alt.Y("neg_log_p", title="-log10(p-value)"),
)
)
horizontal_rule = alt.Chart(pd.DataFrame({"y": [-np.log10(p_cutoff)]})).mark_rule(
color="red",
strokeDash=[6, 4],
).encode(y="y:Q")
vertical_rules = alt.Chart(
pd.DataFrame({"x": [-fc_cutoff, fc_cutoff]})
).mark_rule(color="black", strokeDash=[6, 4]).encode(x="x:Q")
(points + horizontal_rule + vertical_rules).properties(
title="Volcano Plot with Thresholds",
width=600,
height=400,
).interactive()- The horizontal line marks the significance cutoff. - The two vertical lines mark the fold-change boundaries on either side of zero. ### Highlighting Significant Groups Coloring the points by status makes the chart easier to read.
import numpy as np
import pandas as pd
import altair as alt
np.random.seed(2)
# Sample data
log_fold_change = np.random.normal(0, 1.3, 300)
p_values = np.random.uniform(0.001, 1.0, 300)
data = pd.DataFrame(
{
"log_fold_change": log_fold_change,
"p_value": p_values,
}
)
data["neg_log_p"] = -np.log10(data["p_value"])
data["status"] = np.where(
(data["log_fold_change"] >= 1) & (data["p_value"] < 0.05),
"Upregulated",
np.where(
(data["log_fold_change"] <= -1) & (data["p_value"] < 0.05),
"Downregulated",
"Background",
),
)
chart = (
alt.Chart(data)
.mark_circle(size=55, opacity=0.7)
.encode(
x=alt.X("log_fold_change", title="Log2 Fold Change"),
y=alt.Y("neg_log_p", title="-log10(p-value)"),
color=alt.Color(
"status:N",
scale=alt.Scale(
domain=["Background", "Upregulated", "Downregulated"],
range=["#BDBDBD", "#D62728", "#1F77B4"],
),
),
)
.properties(title="Volcano Plot with Highlighted Groups", width=600, height=400)
.interactive()
)
chart- Background points remain muted. - Upregulated and downregulated points are separated by color so the main findings stand out immediately. ### Adding Interactive Tooltips Tooltips help you inspect individual points without cluttering the chart with labels.
import numpy as np
import pandas as pd
import altair as alt
np.random.seed(3)
# Sample data
labels = [f"Gene {i}" for i in range(1, 101)]
log_fold_change = np.random.normal(0, 1.4, 100)
p_values = np.random.uniform(0.001, 1.0, 100)
data = pd.DataFrame(
{
"label": labels,
"log_fold_change": log_fold_change,
"p_value": p_values,
}
)
data["neg_log_p"] = -np.log10(data["p_value"])
chart = (
alt.Chart(data)
.mark_circle(size=65, color="darkslategray", opacity=0.65)
.encode(
x=alt.X("log_fold_change", title="Log2 Fold Change"),
y=alt.Y("neg_log_p", title="-log10(p-value)"),
tooltip=[
alt.Tooltip("label:N", title="Label"),
alt.Tooltip("log_fold_change:Q", title="Log2 fold change", format=".2f"),
alt.Tooltip("p_value:Q", title="p-value", format=".4f"),
alt.Tooltip("neg_log_p:Q", title="-log10(p)", format=".2f"),
],
)
.properties(title="Interactive Volcano Plot", width=600, height=400)
.interactive()
)
chart- Altair handles tooltip configuration directly in the encoding. - `.interactive()` enables zooming and panning in supported notebook/front-end renderers. ### Practical Example: Differential Expression Style Volcano Plot Here is a more realistic example combining thresholds, colors, and tooltips in one chart.
import numpy as np
import pandas as pd
import altair as alt
np.random.seed(4)
n = 400
labels = [f"Feature {i}" for i in range(1, n + 1)]
log_fold_change = np.random.normal(0, 1.35, n)
p_values = np.random.uniform(0.0001, 1.0, n)
data = pd.DataFrame(
{
"label": labels,
"log_fold_change": log_fold_change,
"p_value": p_values,
}
)
data["neg_log_p"] = -np.log10(data["p_value"])
p_cutoff = 0.05
fc_cutoff = 1.0
data["status"] = np.where(
(data["log_fold_change"] >= fc_cutoff) & (data["p_value"] < p_cutoff),
"Upregulated",
np.where(
(data["log_fold_change"] <= -fc_cutoff) & (data["p_value"] < p_cutoff),
"Downregulated",
"Background",
),
)
points = (
alt.Chart(data)
.mark_circle(size=55, opacity=0.7)
.encode(
x=alt.X("log_fold_change", title="Log2 Fold Change"),
y=alt.Y("neg_log_p", title="-log10(p-value)"),
color=alt.Color(
"status:N",
scale=alt.Scale(
domain=["Background", "Upregulated", "Downregulated"],
range=["#CFCFCF", "#D62728", "#1F77B4"],
),
),
tooltip=[
alt.Tooltip("label:N", title="Label"),
alt.Tooltip("status:N", title="Status"),
alt.Tooltip("log_fold_change:Q", title="Log2 fold change", format=".2f"),
alt.Tooltip("p_value:Q", title="p-value", format=".4f"),
],
)
)
horizontal_rule = alt.Chart(pd.DataFrame({"y": [-np.log10(p_cutoff)]})).mark_rule(
color="black",
strokeDash=[6, 4],
).encode(y="y:Q")
vertical_rules = alt.Chart(
pd.DataFrame({"x": [-fc_cutoff, fc_cutoff]})
).mark_rule(color="black", strokeDash=[6, 4]).encode(x="x:Q")
(points + horizontal_rule + vertical_rules).properties(
title="Differential Expression Style Volcano Plot",
width=650,
height=420,
).interactive()- This version is close to what you would use for exploratory analysis or reporting. - Layering makes it easy to combine raw points and threshold references in one chart. ### Conclusion Altair is a strong choice for volcano plots when you want concise chart definitions with layering, conditional color encoding, and interactive inspection. By combining scatter points, rules, and tooltips, you can build a clean and informative volcano plot with relatively little code.