Protein Design Explorer
Explore synthesized proteins generated via RFDiffusion. "Minibinders" are small proteins that bind to a specific protein target. When designing a minibinder, a researcher inputs the structure of the target protein and other parameters into the AI diffusion model. Often, a single, promising (parent) version can be run through the model again to produce additional, similar designs to better sample the design space.
The pipeline generates tens of thousands of protein designs. The metric pAE (predicted alignment error) measures how accurate a model was at predicting the minibinder shape, whereas pLDDT (predicted local distance difference test) measures a model's confidence in minibinder structure prediction. For pAE lower is better, for pLDDT higher is better.
Additional parameters include partial t to set the time steps used by the model, noise to create more diversity of designs, gradient decay function and gradient scale to guide prioritizing different positions at different time points, and movement to denote whether the minibinder was left in its original position ("og") or moved to a desirable position ("moved").
The dashboard below enables exploration of the results to identify promising protein designs and assess the effects of process parameters.
Credit: Adapted from a UW CSE 512 project by Christina Savvides, Alexander Shida, Riti Biswas, and Nora McNamara-Bordewick. Data from the UW Institute for Protein Design.
Specification
import * as vg from "@uwdata/vgplot";
await vg.coordinator().exec([
vg.loadParquet("proteins", "data/protein-design.parquet")
]);
const $query = vg.Selection.crossfilter();
const $point = vg.Selection.intersect({empty: true});
const $plddt_domain = vg.Param.array([67, 94.5]);
const $pae_domain = vg.Param.array([5, 29]);
const $scheme = vg.Param.value("observable10");
export default vg.vconcat(
vg.hconcat(
vg.menu({from: "proteins", column: "partial_t", label: "Partial t", as: $query}),
vg.menu({from: "proteins", column: "noise", label: "Noise", as: $query}),
vg.menu({
from: "proteins",
column: "gradient_decay_function",
label: "Gradient Decay",
as: $query
}),
vg.menu({
from: "proteins",
column: "gradient_scale",
label: "Gradient Scale",
as: $query
})
),
vg.vspace("1.5em"),
vg.hconcat(
vg.plot(
vg.rectY(
vg.from("proteins", {filterBy: $query}),
{
x: vg.bin("plddt_total", {steps: 60}),
y: vg.count(),
z: "version",
fill: "version",
order: "z",
reverse: true,
insetLeft: 0.5,
insetRight: 0.5
}
),
vg.width(600),
vg.height(55),
vg.xAxis(null),
vg.yAxis(null),
vg.xDomain($plddt_domain),
vg.colorDomain(vg.Fixed),
vg.colorScheme($scheme),
vg.marginLeft(40),
vg.marginRight(0),
vg.marginTop(0),
vg.marginBottom(0)
),
vg.hspace(5),
vg.colorLegend({for: "scatter", columns: 1, as: $query})
),
vg.hconcat(
vg.plot(
vg.frame({stroke: "#ccc"}),
vg.raster(
vg.from("proteins", {filterBy: $query}),
{x: "plddt_total", y: "pae_interaction", fill: "version", pad: 0}
),
vg.intervalXY({as: $query, brush: {stroke: "currentColor", fill: "transparent"}}),
vg.dot(
vg.from("proteins", {filterBy: $point}),
{
x: "plddt_total",
y: "pae_interaction",
fill: "version",
stroke: "currentColor",
strokeWidth: 0.5
}
),
vg.name("scatter"),
vg.opacityDomain([0, 2]),
vg.opacityClamp(true),
vg.colorDomain(vg.Fixed),
vg.colorScheme($scheme),
vg.xDomain($plddt_domain),
vg.yDomain($pae_domain),
vg.xLabelAnchor("center"),
vg.yLabelAnchor("center"),
vg.marginTop(0),
vg.marginLeft(40),
vg.marginRight(0),
vg.width(600),
vg.height(450)
),
vg.plot(
vg.rectX(
vg.from("proteins", {filterBy: $query}),
{
x: vg.count(),
y: vg.bin("pae_interaction", {steps: 60}),
z: "version",
fill: "version",
order: "z",
reverse: true,
insetTop: 0.5,
insetBottom: 0.5
}
),
vg.width(55),
vg.height(450),
vg.xAxis(null),
vg.yAxis(null),
vg.marginTop(0),
vg.marginLeft(0),
vg.marginRight(0),
vg.yDomain($pae_domain),
vg.colorDomain(vg.Fixed),
vg.colorScheme($scheme)
)
),
vg.vspace("1em"),
vg.table({
as: $point,
filterBy: $query,
from: "proteins",
columns: [
"version",
"pae_interaction",
"plddt_total",
"noise",
"gradient_decay_function",
"gradient_scale",
"movement"
],
width: 680,
height: 215
})
);
meta:
title: Protein Design Explorer
description: |
Explore synthesized proteins generated via
[RFDiffusion](https://www.bakerlab.org/2023/07/11/diffusion-model-for-protein-design/).
"Minibinders" are small proteins that bind to a specific protein target.
When designing a minibinder, a researcher inputs the structure of the
target protein and other parameters into the AI diffusion model. Often, a
single, promising (parent) _version_ can be run through the model again to
produce additional, similar designs to better sample the design space.
The pipeline generates tens of thousands of protein designs. The metric
_pAE_ (predicted alignment error) measures how accurate a model was at
predicting the minibinder shape, whereas _pLDDT_ (predicted local distance
difference test) measures a model's confidence in minibinder structure
prediction. For _pAE_ lower is better, for _pLDDT_ higher is better.
Additional parameters include _partial t_ to set the time steps used by
the model, _noise_ to create more diversity of designs, _gradient decay
function_ and _gradient scale_ to guide prioritizing different positions
at different time points, and _movement_ to denote whether the minibinder
was left in its original position ("og") or moved to a desirable position
("moved").
The dashboard below enables exploration of the results to identify
promising protein designs and assess the effects of process parameters.
credit: >
Adapted from a [UW CSE 512](https://courses.cs.washington.edu/courses/cse512/24sp/)
project by Christina Savvides, Alexander Shida, Riti Biswas, and
Nora McNamara-Bordewick. Data from the
[UW Institute for Protein Design](https://www.ipd.uw.edu/).
data:
proteins: { file: data/protein-design.parquet }
params:
query: { select: crossfilter }
point: { select: intersect, empty: true }
plddt_domain: [67, 94.5]
pae_domain: [5, 29]
scheme: observable10
vconcat:
- hconcat:
- input: menu
from: proteins
column: partial_t
label: Partial t
as: $query
- input: menu
from: proteins
column: noise
label: Noise
as: $query
- input: menu
from: proteins
column: gradient_decay_function
label: Gradient Decay
as: $query
- input: menu
from: proteins
column: gradient_scale
label: Gradient Scale
as: $query
- vspace: 1.5em
- hconcat:
- plot:
- mark: rectY
data: { from: proteins, filterBy: $query }
x: { bin: plddt_total, steps: 60 }
y: { count: null }
z: version
fill: version
order: z
reverse: true
insetLeft: 0.5
insetRight: 0.5
width: 600
height: 55
xAxis: null
yAxis: null
xDomain: $plddt_domain
colorDomain: Fixed
colorScheme: $scheme
marginLeft: 40
marginRight: 0
marginTop: 0
marginBottom: 0
- hspace: 5
- legend: color
for: scatter
columns: 1
as: $query
- hconcat:
- name: scatter
plot:
- mark: frame
stroke: "#ccc"
- mark: raster
data: { from: proteins, filterBy: $query }
x: plddt_total
y: pae_interaction
fill: version
pad: 0
- select: intervalXY
as: $query
brush: { stroke: currentColor, fill: transparent }
- mark: dot
data: { from: proteins, filterBy: $point }
x: plddt_total
y: pae_interaction
fill: version
stroke: currentColor
strokeWidth: 0.5
opacityDomain: [0, 2]
opacityClamp: true
colorDomain: Fixed
colorScheme: $scheme
xDomain: $plddt_domain
yDomain: $pae_domain
xLabelAnchor: center
yLabelAnchor: center
marginTop: 0
marginLeft: 40
marginRight: 0
width: 600
height: 450
- plot:
- mark: rectX
data: { from: proteins, filterBy: $query }
x: { count: }
y: { bin: pae_interaction, steps: 60 }
z: version
fill: version
order: z
reverse: true
insetTop: 0.5
insetBottom: 0.5
width: 55
height: 450
xAxis: null
yAxis: null
marginTop: 0
marginLeft: 0
marginRight: 0
yDomain: $pae_domain
colorDomain: Fixed
colorScheme: $scheme
- vspace: 1em
- input: table
as: $point
filterBy: $query
from: proteins
columns:
- version
- pae_interaction
- plddt_total
- noise
- gradient_decay_function
- gradient_scale
- movement
width: 680
height: 215
{
"meta": {
"title": "Protein Design Explorer",
"description": "Explore synthesized proteins generated via\n[RFDiffusion](https://www.bakerlab.org/2023/07/11/diffusion-model-for-protein-design/).\n\"Minibinders\" are small proteins that bind to a specific protein target.\nWhen designing a minibinder, a researcher inputs the structure of the\ntarget protein and other parameters into the AI diffusion model. Often, a\nsingle, promising (parent) _version_ can be run through the model again to\nproduce additional, similar designs to better sample the design space.\n\nThe pipeline generates tens of thousands of protein designs. The metric\n_pAE_ (predicted alignment error) measures how accurate a model was at\npredicting the minibinder shape, whereas _pLDDT_ (predicted local distance\ndifference test) measures a model's confidence in minibinder structure\nprediction. For _pAE_ lower is better, for _pLDDT_ higher is better.\n\nAdditional parameters include _partial t_ to set the time steps used by\nthe model, _noise_ to create more diversity of designs, _gradient decay\nfunction_ and _gradient scale_ to guide prioritizing different positions\nat different time points, and _movement_ to denote whether the minibinder\nwas left in its original position (\"og\") or moved to a desirable position\n(\"moved\").\n\nThe dashboard below enables exploration of the results to identify\npromising protein designs and assess the effects of process parameters.\n",
"credit": "Adapted from a [UW CSE 512](https://courses.cs.washington.edu/courses/cse512/24sp/) project by Christina Savvides, Alexander Shida, Riti Biswas, and Nora McNamara-Bordewick. Data from the [UW Institute for Protein Design](https://www.ipd.uw.edu/).\n"
},
"data": {
"proteins": {
"file": "data/protein-design.parquet"
}
},
"params": {
"query": {
"select": "crossfilter"
},
"point": {
"select": "intersect",
"empty": true
},
"plddt_domain": [
67,
94.5
],
"pae_domain": [
5,
29
],
"scheme": "observable10"
},
"vconcat": [
{
"hconcat": [
{
"input": "menu",
"from": "proteins",
"column": "partial_t",
"label": "Partial t",
"as": "$query"
},
{
"input": "menu",
"from": "proteins",
"column": "noise",
"label": "Noise",
"as": "$query"
},
{
"input": "menu",
"from": "proteins",
"column": "gradient_decay_function",
"label": "Gradient Decay",
"as": "$query"
},
{
"input": "menu",
"from": "proteins",
"column": "gradient_scale",
"label": "Gradient Scale",
"as": "$query"
}
]
},
{
"vspace": "1.5em"
},
{
"hconcat": [
{
"plot": [
{
"mark": "rectY",
"data": {
"from": "proteins",
"filterBy": "$query"
},
"x": {
"bin": "plddt_total",
"steps": 60
},
"y": {
"count": null
},
"z": "version",
"fill": "version",
"order": "z",
"reverse": true,
"insetLeft": 0.5,
"insetRight": 0.5
}
],
"width": 600,
"height": 55,
"xAxis": null,
"yAxis": null,
"xDomain": "$plddt_domain",
"colorDomain": "Fixed",
"colorScheme": "$scheme",
"marginLeft": 40,
"marginRight": 0,
"marginTop": 0,
"marginBottom": 0
},
{
"hspace": 5
},
{
"legend": "color",
"for": "scatter",
"columns": 1,
"as": "$query"
}
]
},
{
"hconcat": [
{
"name": "scatter",
"plot": [
{
"mark": "frame",
"stroke": "#ccc"
},
{
"mark": "raster",
"data": {
"from": "proteins",
"filterBy": "$query"
},
"x": "plddt_total",
"y": "pae_interaction",
"fill": "version",
"pad": 0
},
{
"select": "intervalXY",
"as": "$query",
"brush": {
"stroke": "currentColor",
"fill": "transparent"
}
},
{
"mark": "dot",
"data": {
"from": "proteins",
"filterBy": "$point"
},
"x": "plddt_total",
"y": "pae_interaction",
"fill": "version",
"stroke": "currentColor",
"strokeWidth": 0.5
}
],
"opacityDomain": [
0,
2
],
"opacityClamp": true,
"colorDomain": "Fixed",
"colorScheme": "$scheme",
"xDomain": "$plddt_domain",
"yDomain": "$pae_domain",
"xLabelAnchor": "center",
"yLabelAnchor": "center",
"marginTop": 0,
"marginLeft": 40,
"marginRight": 0,
"width": 600,
"height": 450
},
{
"plot": [
{
"mark": "rectX",
"data": {
"from": "proteins",
"filterBy": "$query"
},
"x": {
"count": null
},
"y": {
"bin": "pae_interaction",
"steps": 60
},
"z": "version",
"fill": "version",
"order": "z",
"reverse": true,
"insetTop": 0.5,
"insetBottom": 0.5
}
],
"width": 55,
"height": 450,
"xAxis": null,
"yAxis": null,
"marginTop": 0,
"marginLeft": 0,
"marginRight": 0,
"yDomain": "$pae_domain",
"colorDomain": "Fixed",
"colorScheme": "$scheme"
}
]
},
{
"vspace": "1em"
},
{
"input": "table",
"as": "$point",
"filterBy": "$query",
"from": "proteins",
"columns": [
"version",
"pae_interaction",
"plddt_total",
"noise",
"gradient_decay_function",
"gradient_scale",
"movement"
],
"width": 680,
"height": 215
}
]
}