Skip to content

NYC Taxi Rides

Pickup and dropoff points for 1M NYC taxi rides on Jan 1-3, 2010. This example projects lon/lat coordinates in the database upon load. Select a region in one plot to filter the other. What spatial patterns can you find? Requires the DuckDB spatial extension.

You may need to wait a few seconds for the dataset to load.

Loading Example...

Specification

js
import * as vg from "@uwdata/vgplot";

await vg.coordinator().exec([
  vg.loadExtension("spatial"),
  vg.loadParquet("rides", "https://idl.uw.edu/mosaic-datasets/data/nyc-rides-2010.parquet", {
  select: [
  "pickup_datetime::TIMESTAMP AS datetime",
  "ST_Transform(ST_Point(pickup_latitude, pickup_longitude), 'EPSG:4326', 'ESRI:102718') AS pick",
  "ST_Transform(ST_Point(dropoff_latitude, dropoff_longitude), 'EPSG:4326', 'ESRI:102718') AS drop"
]
}),
  `CREATE TABLE IF NOT EXISTS trips AS SELECT
  (HOUR(datetime) + MINUTE(datetime)/60) AS time,
  ST_X(pick) AS px, ST_Y(pick) AS py,
  ST_X(drop) AS dx, ST_Y(drop) AS dy
FROM rides`
]);

const $filter = vg.Selection.crossfilter();

export default vg.vconcat(
  vg.hconcat(
    vg.plot(
      vg.raster(
        vg.from("trips", {filterBy: $filter}),
        {x: "px", y: "py", bandwidth: 0}
      ),
      vg.intervalXY({as: $filter}),
      vg.text(
        [{label: "Taxi Pickups"}],
        {
          dx: 10,
          dy: 10,
          text: "label",
          fill: "black",
          fontSize: "1.2em",
          frameAnchor: "top-left"
        }
      ),
      vg.width(335),
      vg.height(550),
      vg.margin(0),
      vg.xAxis(null),
      vg.yAxis(null),
      vg.xDomain([975000, 1005000]),
      vg.yDomain([190000, 240000]),
      vg.colorScale("symlog"),
      vg.colorScheme("blues")
    ),
    vg.hspace(10),
    vg.plot(
      vg.raster(
        vg.from("trips", {filterBy: $filter}),
        {x: "dx", y: "dy", bandwidth: 0}
      ),
      vg.intervalXY({as: $filter}),
      vg.text(
        [{label: "Taxi Dropoffs"}],
        {
          dx: 10,
          dy: 10,
          text: "label",
          fill: "black",
          fontSize: "1.2em",
          frameAnchor: "top-left"
        }
      ),
      vg.width(335),
      vg.height(550),
      vg.margin(0),
      vg.xAxis(null),
      vg.yAxis(null),
      vg.xDomain([975000, 1005000]),
      vg.yDomain([190000, 240000]),
      vg.colorScale("symlog"),
      vg.colorScheme("oranges")
    )
  ),
  vg.vspace(10),
  vg.plot(
    vg.rectY(
      vg.from("trips"),
      {x: vg.bin("time"), y: vg.count(), fill: "steelblue", inset: 0.5}
    ),
    vg.intervalX({as: $filter}),
    vg.yTickFormat("s"),
    vg.xLabel("Pickup Hour →"),
    vg.width(680),
    vg.height(100)
  )
);
yaml
meta:
  title: NYC Taxi Rides
  description: |
    Pickup and dropoff points for 1M NYC taxi rides on Jan 1-3, 2010.
    This example projects lon/lat coordinates in the database upon load.
    Select a region in one plot to filter the other.
    What spatial patterns can you find?
    Requires the DuckDB `spatial` extension.

    _You may need to wait a few seconds for the dataset to load._
config:
  extensions: spatial
data:
  rides:
    file: https://idl.uw.edu/mosaic-datasets/data/nyc-rides-2010.parquet
    select:
      - pickup_datetime::TIMESTAMP AS datetime
      - ST_Transform(ST_Point(pickup_latitude, pickup_longitude), 'EPSG:4326', 'ESRI:102718') AS pick
      - ST_Transform(ST_Point(dropoff_latitude, dropoff_longitude), 'EPSG:4326', 'ESRI:102718') AS drop
  trips: >
    SELECT
      (HOUR(datetime) + MINUTE(datetime)/60) AS time,
      ST_X(pick) AS px, ST_Y(pick) AS py,
      ST_X(drop) AS dx, ST_Y(drop) AS dy
    FROM rides
params:
  filter: { select: crossfilter }
vconcat:
- hconcat:
  - plot:
    - mark: raster
      data: { from: trips, filterBy: $filter }
      x: px
      y: py
      bandwidth: 0
    - select: intervalXY
      as: $filter
    - mark: text
      data: [{ label: Taxi Pickups }]
      dx: 10
      dy: 10
      text: label
      fill: black
      fontSize: 1.2em
      frameAnchor: top-left
    width: 335
    height: 550
    margin: 0
    xAxis: null
    yAxis: null
    xDomain: [9.75e5, 1.005e6]
    yDomain: [1.9e5, 2.4e5]
    colorScale: symlog
    colorScheme: blues
  - hspace: 10
  - plot:
    - mark: raster
      data: { from: trips, filterBy: $filter }
      x: dx
      y: dy
      bandwidth: 0
    - select: intervalXY
      as: $filter
    - mark: text
      data: [{ label: Taxi Dropoffs }]
      dx: 10
      dy: 10
      text: label
      fill: black
      fontSize: 1.2em
      frameAnchor: top-left
    width: 335
    height: 550
    margin: 0
    xAxis: null
    yAxis: null
    xDomain: [9.75e5, 1.005e6]
    yDomain: [1.9e5, 2.4e5]
    colorScale: symlog
    colorScheme: oranges
- vspace: 10
- plot:
  - mark: rectY
    data: { from: trips }
    x: { bin: time }
    y: { count: }
    fill: steelblue
    inset: 0.5
  - select: intervalX
    as: $filter
  yTickFormat: s
  xLabel: Pickup Hour →
  width: 680
  height: 100
json
{
  "meta": {
    "title": "NYC Taxi Rides",
    "description": "Pickup and dropoff points for 1M NYC taxi rides on Jan 1-3, 2010.\nThis example projects lon/lat coordinates in the database upon load.\nSelect a region in one plot to filter the other.\nWhat spatial patterns can you find?\nRequires the DuckDB `spatial` extension.\n\n_You may need to wait a few seconds for the dataset to load._\n"
  },
  "config": {
    "extensions": "spatial"
  },
  "data": {
    "rides": {
      "file": "https://idl.uw.edu/mosaic-datasets/data/nyc-rides-2010.parquet",
      "select": [
        "pickup_datetime::TIMESTAMP AS datetime",
        "ST_Transform(ST_Point(pickup_latitude, pickup_longitude), 'EPSG:4326', 'ESRI:102718') AS pick",
        "ST_Transform(ST_Point(dropoff_latitude, dropoff_longitude), 'EPSG:4326', 'ESRI:102718') AS drop"
      ]
    },
    "trips": "SELECT\n  (HOUR(datetime) + MINUTE(datetime)/60) AS time,\n  ST_X(pick) AS px, ST_Y(pick) AS py,\n  ST_X(drop) AS dx, ST_Y(drop) AS dy\nFROM rides\n"
  },
  "params": {
    "filter": {
      "select": "crossfilter"
    }
  },
  "vconcat": [
    {
      "hconcat": [
        {
          "plot": [
            {
              "mark": "raster",
              "data": {
                "from": "trips",
                "filterBy": "$filter"
              },
              "x": "px",
              "y": "py",
              "bandwidth": 0
            },
            {
              "select": "intervalXY",
              "as": "$filter"
            },
            {
              "mark": "text",
              "data": [
                {
                  "label": "Taxi Pickups"
                }
              ],
              "dx": 10,
              "dy": 10,
              "text": "label",
              "fill": "black",
              "fontSize": "1.2em",
              "frameAnchor": "top-left"
            }
          ],
          "width": 335,
          "height": 550,
          "margin": 0,
          "xAxis": null,
          "yAxis": null,
          "xDomain": [
            975000,
            1005000
          ],
          "yDomain": [
            190000,
            240000
          ],
          "colorScale": "symlog",
          "colorScheme": "blues"
        },
        {
          "hspace": 10
        },
        {
          "plot": [
            {
              "mark": "raster",
              "data": {
                "from": "trips",
                "filterBy": "$filter"
              },
              "x": "dx",
              "y": "dy",
              "bandwidth": 0
            },
            {
              "select": "intervalXY",
              "as": "$filter"
            },
            {
              "mark": "text",
              "data": [
                {
                  "label": "Taxi Dropoffs"
                }
              ],
              "dx": 10,
              "dy": 10,
              "text": "label",
              "fill": "black",
              "fontSize": "1.2em",
              "frameAnchor": "top-left"
            }
          ],
          "width": 335,
          "height": 550,
          "margin": 0,
          "xAxis": null,
          "yAxis": null,
          "xDomain": [
            975000,
            1005000
          ],
          "yDomain": [
            190000,
            240000
          ],
          "colorScale": "symlog",
          "colorScheme": "oranges"
        }
      ]
    },
    {
      "vspace": 10
    },
    {
      "plot": [
        {
          "mark": "rectY",
          "data": {
            "from": "trips"
          },
          "x": {
            "bin": "time"
          },
          "y": {
            "count": null
          },
          "fill": "steelblue",
          "inset": 0.5
        },
        {
          "select": "intervalX",
          "as": "$filter"
        }
      ],
      "yTickFormat": "s",
      "xLabel": "Pickup Hour →",
      "width": 680,
      "height": 100
    }
  ]
}