Skip to content

Athlete Height Intervals

Confidence intervals of Olympic athlete heights, in meters. Data are batched into groups of 10 samples per sport. Use the samples slider to see how the intervals update as the sample size increases (as in online aggregation). For each sport, the numbers on the right show the maximum number of athletes in the full dataset.

Loading Example...

Specification

js
import * as vg from "@uwdata/vgplot";

await vg.coordinator().exec([
  vg.loadParquet("athletesBatched", "data/athletes.parquet", {
  select: ["*", "10 * CEIL(ROW_NUMBER() OVER (PARTITION BY sport) / 10) AS batch"],
  where: "height IS NOT NULL"
})
]);

const $ci = vg.Param.value(0.95);
const $query = vg.Selection.single();

export default vg.hconcat(
  vg.vconcat(
    vg.hconcat(
      vg.slider({
        select: "interval",
        as: $query,
        column: "batch",
        from: "athletesBatched",
        step: 10,
        value: 20,
        label: "Max Samples"
      }),
      vg.slider({as: $ci, min: 0.5, max: 0.999, step: 0.001, label: "Conf. Level"})
    ),
    vg.plot(
      vg.errorbarX(
        vg.from("athletesBatched", {filterBy: $query}),
        {
          ci: $ci,
          x: "height",
          y: "sport",
          stroke: "sex",
          strokeWidth: 1,
          marker: "tick",
          sort: {y: "-x"}
        }
      ),
      vg.text(
        vg.from("athletesBatched"),
        {
          frameAnchor: "right",
          fontSize: 8,
          fill: "#999",
          dx: 25,
          text: vg.count(),
          y: "sport"
        }
      ),
      vg.name("heights"),
      vg.xDomain([1.5, 2.1]),
      vg.yDomain(vg.Fixed),
      vg.yGrid(true),
      vg.yLabel(null),
      vg.marginTop(5),
      vg.marginLeft(105),
      vg.marginRight(30),
      vg.height(420)
    ),
    vg.colorLegend({for: "heights"})
  )
);
yaml
meta:
  title: Athlete Height Intervals
  description: >
    Confidence intervals of Olympic athlete heights, in meters. Data are
    batched into groups of 10 samples per sport. Use the samples slider to see
    how the intervals update as the sample size increases (as in
    [online aggregation](https://en.wikipedia.org/wiki/Online_aggregation)).
    For each sport, the numbers on the right show the maximum number of
    athletes in the full dataset.
data:
  athletesBatched:
    file: data/athletes.parquet
    select:
      - '*'
      - 10 * CEIL(ROW_NUMBER() OVER (PARTITION BY sport) / 10) AS batch
    where: height IS NOT NULL
params:
  ci: 0.95
  query: { select: single }
hconcat:
- vconcat:
  - hconcat:
    - input: slider
      select: interval
      as: $query
      column: batch
      from: athletesBatched
      step: 10
      value: 20
      label: Max Samples
    - input: slider
      as: $ci
      min: 0.5
      max: 0.999
      step: 0.001
      label: Conf. Level
  - name: heights
    plot:
    - mark: errorbarX
      data: { from: athletesBatched, filterBy: $query }
      ci: $ci
      x: height
      y: sport
      stroke: sex
      strokeWidth: 1
      marker: tick
      sort: { y: -x }
    - mark: text
      data: { from: athletesBatched }
      frameAnchor: right
      fontSize: 8
      fill: "#999"
      dx: 25
      text: { count: }
      y: sport
    xDomain: [1.5, 2.1]
    yDomain: Fixed
    yGrid: true
    yLabel: null
    marginTop: 5
    marginLeft: 105
    marginRight: 30
    height: 420
  - legend: color
    for: heights
json
{
  "meta": {
    "title": "Athlete Height Intervals",
    "description": "Confidence intervals of Olympic athlete heights, in meters. Data are batched into groups of 10 samples per sport. Use the samples slider to see how the intervals update as the sample size increases (as in [online aggregation](https://en.wikipedia.org/wiki/Online_aggregation)). For each sport, the numbers on the right show the maximum number of athletes in the full dataset.\n"
  },
  "data": {
    "athletesBatched": {
      "file": "data/athletes.parquet",
      "select": [
        "*",
        "10 * CEIL(ROW_NUMBER() OVER (PARTITION BY sport) / 10) AS batch"
      ],
      "where": "height IS NOT NULL"
    }
  },
  "params": {
    "ci": 0.95,
    "query": {
      "select": "single"
    }
  },
  "hconcat": [
    {
      "vconcat": [
        {
          "hconcat": [
            {
              "input": "slider",
              "select": "interval",
              "as": "$query",
              "column": "batch",
              "from": "athletesBatched",
              "step": 10,
              "value": 20,
              "label": "Max Samples"
            },
            {
              "input": "slider",
              "as": "$ci",
              "min": 0.5,
              "max": 0.999,
              "step": 0.001,
              "label": "Conf. Level"
            }
          ]
        },
        {
          "name": "heights",
          "plot": [
            {
              "mark": "errorbarX",
              "data": {
                "from": "athletesBatched",
                "filterBy": "$query"
              },
              "ci": "$ci",
              "x": "height",
              "y": "sport",
              "stroke": "sex",
              "strokeWidth": 1,
              "marker": "tick",
              "sort": {
                "y": "-x"
              }
            },
            {
              "mark": "text",
              "data": {
                "from": "athletesBatched"
              },
              "frameAnchor": "right",
              "fontSize": 8,
              "fill": "#999",
              "dx": 25,
              "text": {
                "count": null
              },
              "y": "sport"
            }
          ],
          "xDomain": [
            1.5,
            2.1
          ],
          "yDomain": "Fixed",
          "yGrid": true,
          "yLabel": null,
          "marginTop": 5,
          "marginLeft": 105,
          "marginRight": 30,
          "height": 420
        },
        {
          "legend": "color",
          "for": "heights"
        }
      ]
    }
  ]
}