Skip to content

Linear Regression

A linear regression plot predicting athletes' heights based on their weights. Regression computation is performed in the database. The area around a regression line shows a 95% confidence interval. Select a region to view regression results for a data subset.

Loading Example...

Specification

js
import * as vg from "@uwdata/vgplot";

await vg.coordinator().exec([
  vg.loadParquet("athletes", "data/athletes.parquet")
]);

const $query = vg.Selection.intersect();

export default vg.plot(
  vg.dot(
    vg.from("athletes"),
    {x: "weight", y: "height", fill: "sex", r: 2, opacity: 0.05}
  ),
  vg.regressionY(
    vg.from("athletes", {filterBy: $query}),
    {x: "weight", y: "height", stroke: "sex"}
  ),
  vg.intervalXY({as: $query, brush: {fillOpacity: 0, stroke: "currentColor"}}),
  vg.xyDomain(vg.Fixed),
  vg.colorDomain(vg.Fixed)
);
yaml
meta:
  title: Linear Regression
  description: >
    A linear regression plot predicting athletes' heights based on their weights.
    Regression computation is performed in the database.
    The area around a regression line shows a 95% confidence interval.
    Select a region to view regression results for a data subset.
data:
  athletes: { file: data/athletes.parquet }
plot:
- mark: dot
  data: { from: athletes }
  x: weight
  y: height
  fill: sex
  r: 2
  opacity: 0.05
- mark: regressionY
  data: { from: athletes, filterBy: $query }
  x: weight
  y: height
  stroke: sex
- select: intervalXY
  as: $query
  brush: { fillOpacity: 0, stroke: currentColor }
xyDomain: Fixed
colorDomain: Fixed
json
{
  "meta": {
    "title": "Linear Regression",
    "description": "A linear regression plot predicting athletes' heights based on their weights. Regression computation is performed in the database. The area around a regression line shows a 95% confidence interval. Select a region to view regression results for a data subset.\n"
  },
  "data": {
    "athletes": {
      "file": "data/athletes.parquet"
    }
  },
  "plot": [
    {
      "mark": "dot",
      "data": {
        "from": "athletes"
      },
      "x": "weight",
      "y": "height",
      "fill": "sex",
      "r": 2,
      "opacity": 0.05
    },
    {
      "mark": "regressionY",
      "data": {
        "from": "athletes",
        "filterBy": "$query"
      },
      "x": "weight",
      "y": "height",
      "stroke": "sex"
    },
    {
      "select": "intervalXY",
      "as": "$query",
      "brush": {
        "fillOpacity": 0,
        "stroke": "currentColor"
      }
    }
  ],
  "xyDomain": "Fixed",
  "colorDomain": "Fixed"
}