Skip to content

Protein Design Explorer

Explore synthesized proteins generated via RFDiffusion. "Minibinders" are small proteins that bind to a specific protein target. When designing a minibinder, a researcher inputs the structure of the target protein and other parameters into the AI diffusion model. Often, a single, promising (parent) version can be run through the model again to produce additional, similar designs to better sample the design space.

The pipeline generates tens of thousands of protein designs. The metric pAE (predicted alignment error) measures how accurate a model was at predicting the minibinder shape, whereas pLDDT (predicted local distance difference test) measures a model's confidence in minibinder structure prediction. For pAE lower is better, for pLDDT higher is better.

Additional parameters include partial t to set the time steps used by the model, noise to create more diversity of designs, gradient decay function and gradient scale to guide prioritizing different positions at different time points, and movement to denote whether the minibinder was left in its original position ("og") or moved to a desirable position ("moved").

The dashboard below enables exploration of the results to identify promising protein designs and assess the effects of process parameters.

Loading Example...

Credit: Adapted from a UW CSE 512 project by Christina Savvides, Alexander Shida, Riti Biswas, and Nora McNamara-Bordewick. Data from the UW Institute for Protein Design.

Specification

js
import * as vg from "@uwdata/vgplot";

await vg.coordinator().exec([
  vg.loadParquet("proteins", "data/protein-design.parquet")
]);

const $query = vg.Selection.crossfilter();
const $point = vg.Selection.intersect({empty: true});
const $plddt_domain = vg.Param.array([67, 94.5]);
const $pae_domain = vg.Param.array([5, 29]);
const $scheme = vg.Param.value("observable10");

export default vg.vconcat(
  vg.hconcat(
    vg.menu({from: "proteins", column: "partial_t", label: "Partial t", as: $query}),
    vg.menu({from: "proteins", column: "noise", label: "Noise", as: $query}),
    vg.menu({
      from: "proteins",
      column: "gradient_decay_function",
      label: "Gradient Decay",
      as: $query
    }),
    vg.menu({
      from: "proteins",
      column: "gradient_scale",
      label: "Gradient Scale",
      as: $query
    })
  ),
  vg.vspace("1.5em"),
  vg.hconcat(
    vg.plot(
      vg.rectY(
        vg.from("proteins", {filterBy: $query}),
        {
          x: vg.bin("plddt_total", {steps: 60}),
          y: vg.count(),
          z: "version",
          fill: "version",
          order: "z",
          reverse: true,
          insetLeft: 0.5,
          insetRight: 0.5
        }
      ),
      vg.width(600),
      vg.height(55),
      vg.xAxis(null),
      vg.yAxis(null),
      vg.xDomain($plddt_domain),
      vg.colorDomain(vg.Fixed),
      vg.colorScheme($scheme),
      vg.marginLeft(40),
      vg.marginRight(0),
      vg.marginTop(0),
      vg.marginBottom(0)
    ),
    vg.hspace(5),
    vg.colorLegend({for: "scatter", columns: 1, as: $query})
  ),
  vg.hconcat(
    vg.plot(
      vg.frame({stroke: "#ccc"}),
      vg.raster(
        vg.from("proteins", {filterBy: $query}),
        {x: "plddt_total", y: "pae_interaction", fill: "version", pad: 0}
      ),
      vg.intervalXY({as: $query, brush: {stroke: "currentColor", fill: "transparent"}}),
      vg.dot(
        vg.from("proteins", {filterBy: $point}),
        {
          x: "plddt_total",
          y: "pae_interaction",
          fill: "version",
          stroke: "currentColor",
          strokeWidth: 0.5
        }
      ),
      vg.name("scatter"),
      vg.opacityDomain([0, 2]),
      vg.opacityClamp(true),
      vg.colorDomain(vg.Fixed),
      vg.colorScheme($scheme),
      vg.xDomain($plddt_domain),
      vg.yDomain($pae_domain),
      vg.xLabelAnchor("center"),
      vg.yLabelAnchor("center"),
      vg.marginTop(0),
      vg.marginLeft(40),
      vg.marginRight(0),
      vg.width(600),
      vg.height(450)
    ),
    vg.plot(
      vg.rectX(
        vg.from("proteins", {filterBy: $query}),
        {
          x: vg.count(),
          y: vg.bin("pae_interaction", {steps: 60}),
          z: "version",
          fill: "version",
          order: "z",
          reverse: true,
          insetTop: 0.5,
          insetBottom: 0.5
        }
      ),
      vg.width(55),
      vg.height(450),
      vg.xAxis(null),
      vg.yAxis(null),
      vg.marginTop(0),
      vg.marginLeft(0),
      vg.marginRight(0),
      vg.yDomain($pae_domain),
      vg.colorDomain(vg.Fixed),
      vg.colorScheme($scheme)
    )
  ),
  vg.vspace("1em"),
  vg.table({
    as: $point,
    filterBy: $query,
    from: "proteins",
    columns: [
    "version",
    "pae_interaction",
    "plddt_total",
    "noise",
    "gradient_decay_function",
    "gradient_scale",
    "movement"
  ],
    width: 680,
    height: 215
  })
);
yaml
meta:
  title: Protein Design Explorer
  description: |
    Explore synthesized proteins generated via
    [RFDiffusion](https://www.bakerlab.org/2023/07/11/diffusion-model-for-protein-design/).
    "Minibinders" are small proteins that bind to a specific protein target.
    When designing a minibinder, a researcher inputs the structure of the
    target protein and other parameters into the AI diffusion model. Often, a
    single, promising (parent) _version_ can be run through the model again to
    produce additional, similar designs to better sample the design space.

    The pipeline generates tens of thousands of protein designs. The metric
    _pAE_ (predicted alignment error) measures how accurate a model was at
    predicting the minibinder shape, whereas _pLDDT_ (predicted local distance
    difference test) measures a model's confidence in minibinder structure
    prediction. For _pAE_ lower is better, for _pLDDT_ higher is better.

    Additional parameters include _partial t_ to set the time steps used by
    the model, _noise_ to create more diversity of designs, _gradient decay
    function_ and _gradient scale_ to guide prioritizing different positions
    at different time points, and _movement_ to denote whether the minibinder
    was left in its original position ("og") or moved to a desirable position
    ("moved").

    The dashboard below enables exploration of the results to identify
    promising protein designs and assess the effects of process parameters.
  credit: >
    Adapted from a [UW CSE 512](https://courses.cs.washington.edu/courses/cse512/24sp/)
    project by Christina Savvides, Alexander Shida, Riti Biswas, and
    Nora McNamara-Bordewick. Data from the
    [UW Institute for Protein Design](https://www.ipd.uw.edu/).
data:
  proteins: { file: data/protein-design.parquet }
params:
  query: { select: crossfilter }
  point: { select: intersect, empty: true }
  plddt_domain: [67, 94.5]
  pae_domain: [5, 29]
  scheme: observable10
vconcat:
  - hconcat:
    - input: menu
      from: proteins
      column: partial_t
      label: Partial t
      as: $query
    - input: menu
      from: proteins
      column: noise
      label: Noise
      as: $query
    - input: menu
      from: proteins
      column: gradient_decay_function
      label: Gradient Decay
      as: $query
    - input: menu
      from: proteins
      column: gradient_scale
      label: Gradient Scale
      as: $query
  - vspace: 1.5em
  - hconcat:
    - plot:
      - mark: rectY
        data: { from: proteins, filterBy: $query }
        x: { bin: plddt_total, steps: 60 }
        y: { count: null }
        z: version
        fill: version
        order: z
        reverse: true
        insetLeft: 0.5
        insetRight: 0.5
      width: 600
      height: 55
      xAxis: null
      yAxis: null
      xDomain: $plddt_domain
      colorDomain: Fixed
      colorScheme: $scheme
      marginLeft: 40
      marginRight: 0
      marginTop: 0
      marginBottom: 0
    - hspace: 5
    - legend: color
      for: scatter
      columns: 1
      as: $query
  - hconcat:
    - name: scatter
      plot:
      - mark: frame
        stroke: "#ccc"
      - mark: raster
        data: { from: proteins, filterBy: $query }
        x: plddt_total
        y: pae_interaction
        fill: version
        pad: 0
      - select: intervalXY
        as: $query
        brush: { stroke: currentColor, fill: transparent }
      - mark: dot
        data: { from: proteins, filterBy: $point }
        x: plddt_total
        y: pae_interaction
        fill: version
        stroke: currentColor
        strokeWidth: 0.5
      opacityDomain: [0, 2]
      opacityClamp: true
      colorDomain: Fixed
      colorScheme: $scheme
      xDomain: $plddt_domain
      yDomain: $pae_domain
      xLabelAnchor: center
      yLabelAnchor: center
      marginTop: 0
      marginLeft: 40
      marginRight: 0
      width: 600
      height: 450
    - plot:
      - mark: rectX
        data: { from: proteins, filterBy: $query }
        x: { count: }
        y: { bin: pae_interaction, steps: 60 }
        z: version
        fill: version
        order: z
        reverse: true
        insetTop: 0.5
        insetBottom: 0.5
      width: 55
      height: 450
      xAxis: null
      yAxis: null
      marginTop: 0
      marginLeft: 0
      marginRight: 0
      yDomain: $pae_domain
      colorDomain: Fixed
      colorScheme: $scheme
  - vspace: 1em
  - input: table
    as: $point
    filterBy: $query
    from: proteins
    columns:
      - version
      - pae_interaction
      - plddt_total
      - noise
      - gradient_decay_function
      - gradient_scale
      - movement
    width: 680
    height: 215
json
{
  "meta": {
    "title": "Protein Design Explorer",
    "description": "Explore synthesized proteins generated via\n[RFDiffusion](https://www.bakerlab.org/2023/07/11/diffusion-model-for-protein-design/).\n\"Minibinders\" are small proteins that bind to a specific protein target.\nWhen designing a minibinder, a researcher inputs the structure of the\ntarget protein and other parameters into the AI diffusion model. Often, a\nsingle, promising (parent) _version_ can be run through the model again to\nproduce additional, similar designs to better sample the design space.\n\nThe pipeline generates tens of thousands of protein designs. The metric\n_pAE_ (predicted alignment error) measures how accurate a model was at\npredicting the minibinder shape, whereas _pLDDT_ (predicted local distance\ndifference test) measures a model's confidence in minibinder structure\nprediction. For _pAE_ lower is better, for _pLDDT_ higher is better.\n\nAdditional parameters include _partial t_ to set the time steps used by\nthe model, _noise_ to create more diversity of designs, _gradient decay\nfunction_ and _gradient scale_ to guide prioritizing different positions\nat different time points, and _movement_ to denote whether the minibinder\nwas left in its original position (\"og\") or moved to a desirable position\n(\"moved\").\n\nThe dashboard below enables exploration of the results to identify\npromising protein designs and assess the effects of process parameters.\n",
    "credit": "Adapted from a [UW CSE 512](https://courses.cs.washington.edu/courses/cse512/24sp/) project by Christina Savvides, Alexander Shida, Riti Biswas, and Nora McNamara-Bordewick. Data from the [UW Institute for Protein Design](https://www.ipd.uw.edu/).\n"
  },
  "data": {
    "proteins": {
      "file": "data/protein-design.parquet"
    }
  },
  "params": {
    "query": {
      "select": "crossfilter"
    },
    "point": {
      "select": "intersect",
      "empty": true
    },
    "plddt_domain": [
      67,
      94.5
    ],
    "pae_domain": [
      5,
      29
    ],
    "scheme": "observable10"
  },
  "vconcat": [
    {
      "hconcat": [
        {
          "input": "menu",
          "from": "proteins",
          "column": "partial_t",
          "label": "Partial t",
          "as": "$query"
        },
        {
          "input": "menu",
          "from": "proteins",
          "column": "noise",
          "label": "Noise",
          "as": "$query"
        },
        {
          "input": "menu",
          "from": "proteins",
          "column": "gradient_decay_function",
          "label": "Gradient Decay",
          "as": "$query"
        },
        {
          "input": "menu",
          "from": "proteins",
          "column": "gradient_scale",
          "label": "Gradient Scale",
          "as": "$query"
        }
      ]
    },
    {
      "vspace": "1.5em"
    },
    {
      "hconcat": [
        {
          "plot": [
            {
              "mark": "rectY",
              "data": {
                "from": "proteins",
                "filterBy": "$query"
              },
              "x": {
                "bin": "plddt_total",
                "steps": 60
              },
              "y": {
                "count": null
              },
              "z": "version",
              "fill": "version",
              "order": "z",
              "reverse": true,
              "insetLeft": 0.5,
              "insetRight": 0.5
            }
          ],
          "width": 600,
          "height": 55,
          "xAxis": null,
          "yAxis": null,
          "xDomain": "$plddt_domain",
          "colorDomain": "Fixed",
          "colorScheme": "$scheme",
          "marginLeft": 40,
          "marginRight": 0,
          "marginTop": 0,
          "marginBottom": 0
        },
        {
          "hspace": 5
        },
        {
          "legend": "color",
          "for": "scatter",
          "columns": 1,
          "as": "$query"
        }
      ]
    },
    {
      "hconcat": [
        {
          "name": "scatter",
          "plot": [
            {
              "mark": "frame",
              "stroke": "#ccc"
            },
            {
              "mark": "raster",
              "data": {
                "from": "proteins",
                "filterBy": "$query"
              },
              "x": "plddt_total",
              "y": "pae_interaction",
              "fill": "version",
              "pad": 0
            },
            {
              "select": "intervalXY",
              "as": "$query",
              "brush": {
                "stroke": "currentColor",
                "fill": "transparent"
              }
            },
            {
              "mark": "dot",
              "data": {
                "from": "proteins",
                "filterBy": "$point"
              },
              "x": "plddt_total",
              "y": "pae_interaction",
              "fill": "version",
              "stroke": "currentColor",
              "strokeWidth": 0.5
            }
          ],
          "opacityDomain": [
            0,
            2
          ],
          "opacityClamp": true,
          "colorDomain": "Fixed",
          "colorScheme": "$scheme",
          "xDomain": "$plddt_domain",
          "yDomain": "$pae_domain",
          "xLabelAnchor": "center",
          "yLabelAnchor": "center",
          "marginTop": 0,
          "marginLeft": 40,
          "marginRight": 0,
          "width": 600,
          "height": 450
        },
        {
          "plot": [
            {
              "mark": "rectX",
              "data": {
                "from": "proteins",
                "filterBy": "$query"
              },
              "x": {
                "count": null
              },
              "y": {
                "bin": "pae_interaction",
                "steps": 60
              },
              "z": "version",
              "fill": "version",
              "order": "z",
              "reverse": true,
              "insetTop": 0.5,
              "insetBottom": 0.5
            }
          ],
          "width": 55,
          "height": 450,
          "xAxis": null,
          "yAxis": null,
          "marginTop": 0,
          "marginLeft": 0,
          "marginRight": 0,
          "yDomain": "$pae_domain",
          "colorDomain": "Fixed",
          "colorScheme": "$scheme"
        }
      ]
    },
    {
      "vspace": "1em"
    },
    {
      "input": "table",
      "as": "$point",
      "filterBy": "$query",
      "from": "proteins",
      "columns": [
        "version",
        "pae_interaction",
        "plddt_total",
        "noise",
        "gradient_decay_function",
        "gradient_scale",
        "movement"
      ],
      "width": 680,
      "height": 215
    }
  ]
}