NCDC vignette

About the package

rnoaa is an R wrapper for many NOAA data types, including National Climatic Data Center (NCDC).

Install rnoaa

Install and load rnoaa into the R session.

install.packages("devtools")
devtools::install_github("ropensci/rnoaa")
library('rnoaa')
library('plyr')

Get info on a station by specifying a datasetid, locationid, and stationid

ncdc_stations(datasetid='GHCND', locationid='FIPS:12017', stationid='GHCND:USC00084289')
#> $meta
#> NULL
#> 
#> $data
#>                  id elevation                  name elevationUnit
#> 1 GHCND:USC00084289      12.2 INVERNESS 3 SE, FL US        METERS
#>   datacoverage longitude    mindate latitude    maxdate
#> 1            1    -82.31 1899-02-01     28.8 2014-10-14
#> 
#> attr(,"class")
#> [1] "ncdc_stations"

Search for data and get a data.frame

out <- ncdc(datasetid='NORMAL_DLY', datatypeid='dly-tmax-normal', startdate = '2010-05-01', enddate = '2010-05-10')
out$data
#>              station value        datatype                date fl_c
#> 1  GHCND:AQW00061705   869 DLY-TMAX-NORMAL 2010-05-01T00:00:00    C
#> 2  GHCND:CAW00064757   607 DLY-TMAX-NORMAL 2010-05-01T00:00:00    Q
#> 3  GHCND:CQC00914080   840 DLY-TMAX-NORMAL 2010-05-01T00:00:00    R
#> 4  GHCND:CQC00914801   858 DLY-TMAX-NORMAL 2010-05-01T00:00:00    R
#> 5  GHCND:FMC00914395   876 DLY-TMAX-NORMAL 2010-05-01T00:00:00    P
#> 6  GHCND:FMC00914419   885 DLY-TMAX-NORMAL 2010-05-01T00:00:00    P
#> 7  GHCND:FMC00914446   885 DLY-TMAX-NORMAL 2010-05-01T00:00:00    P
#> 8  GHCND:FMC00914482   868 DLY-TMAX-NORMAL 2010-05-01T00:00:00    R
#> 9  GHCND:FMC00914720   899 DLY-TMAX-NORMAL 2010-05-01T00:00:00    R
#> 10 GHCND:FMC00914761   897 DLY-TMAX-NORMAL 2010-05-01T00:00:00    P
#> 11 GHCND:FMC00914831   870 DLY-TMAX-NORMAL 2010-05-01T00:00:00    P
#> 12 GHCND:FMC00914892   883 DLY-TMAX-NORMAL 2010-05-01T00:00:00    P
#> 13 GHCND:FMC00914898   875 DLY-TMAX-NORMAL 2010-05-01T00:00:00    P
#> 14 GHCND:FMC00914911   885 DLY-TMAX-NORMAL 2010-05-01T00:00:00    P
#> 15 GHCND:FMW00040308   888 DLY-TMAX-NORMAL 2010-05-01T00:00:00    S
#> 16 GHCND:FMW00040504   879 DLY-TMAX-NORMAL 2010-05-01T00:00:00    C
#> 17 GHCND:FMW00040505   867 DLY-TMAX-NORMAL 2010-05-01T00:00:00    S
#> 18 GHCND:GQC00914025   852 DLY-TMAX-NORMAL 2010-05-01T00:00:00    P
#> 19 GHCND:GQW00041415   877 DLY-TMAX-NORMAL 2010-05-01T00:00:00    C
#> 20 GHCND:JQW00021603   852 DLY-TMAX-NORMAL 2010-05-01T00:00:00    P
#> 21 GHCND:PSC00914519   883 DLY-TMAX-NORMAL 2010-05-01T00:00:00    P
#> 22 GHCND:PSC00914712   840 DLY-TMAX-NORMAL 2010-05-01T00:00:00    P
#> 23 GHCND:PSW00040309   879 DLY-TMAX-NORMAL 2010-05-01T00:00:00    S
#> 24 GHCND:RMW00040604   867 DLY-TMAX-NORMAL 2010-05-01T00:00:00    S
#> 25 GHCND:RMW00040710   863 DLY-TMAX-NORMAL 2010-05-01T00:00:00    C

Plot data, super simple, but it's a start

out <- ncdc(datasetid='NORMAL_DLY', stationid='GHCND:USW00014895', datatypeid='dly-tmax-normal', startdate = '2010-01-01', enddate = '2010-12-10', limit = 300)
ncdc_plot(out)

plot of chunk six

More on plotting

Example 1

Search for data first, then plot

out <- ncdc(datasetid='GHCND', stationid='GHCND:USW00014895', datatypeid='PRCP', startdate = '2010-05-01', enddate = '2010-10-31', limit=500)

Default plot

ncdc_plot(out)

plot of chunk unnamed-chunk-3

Create 14 day breaks

ncdc_plot(out, breaks="14 days")

plot of chunk unnamed-chunk-4

One month breaks

ncdc_plot(out, breaks="1 month", dateformat="%d/%m")

plot of chunk unnamed-chunk-5

Example 2

Search for data

out2 <- ncdc(datasetid='GHCND', stationid='GHCND:USW00014895', datatypeid='PRCP', startdate = '2010-05-01', enddate = '2010-05-03', limit=100)

Make a plot, with 6 hour breaks, and date format with only hour

ncdc_plot(out2, breaks="6 hours", dateformat="%H")

plot of chunk unnamed-chunk-7

Combine many calls to noaa function

Search for two sets of data

out1 <- ncdc(datasetid='GHCND', stationid='GHCND:USW00014895', datatypeid='PRCP', startdate = '2010-03-01', enddate = '2010-05-31', limit=500)

out2 <- ncdc(datasetid='GHCND', stationid='GHCND:USW00014895', datatypeid='PRCP', startdate = '2010-09-01', enddate = '2010-10-31', limit=500)

Then combine with a call to ncdc_combine

df <- ncdc_combine(out1, out2)
head(df[[1]]); tail(df[[1]])
#>             station value datatype                date fl_m fl_q fl_so
#> 1 GHCND:USW00014895     0     PRCP 2010-03-01T00:00:00    T          0
#> 2 GHCND:USW00014895     0     PRCP 2010-03-02T00:00:00    T          0
#> 3 GHCND:USW00014895     0     PRCP 2010-03-03T00:00:00    T          0
#> 4 GHCND:USW00014895     0     PRCP 2010-03-04T00:00:00               0
#> 5 GHCND:USW00014895     0     PRCP 2010-03-05T00:00:00               0
#> 6 GHCND:USW00014895     0     PRCP 2010-03-06T00:00:00               0
#>   fl_t
#> 1 2400
#> 2 2400
#> 3 2400
#> 4 2400
#> 5 2400
#> 6 2400
#>               station value datatype                date fl_m fl_q fl_so
#> 148 GHCND:USW00014895   221     PRCP 2010-10-26T00:00:00               0
#> 149 GHCND:USW00014895     0     PRCP 2010-10-27T00:00:00               0
#> 150 GHCND:USW00014895     0     PRCP 2010-10-28T00:00:00    T          0
#> 151 GHCND:USW00014895     0     PRCP 2010-10-29T00:00:00    T          0
#> 152 GHCND:USW00014895     0     PRCP 2010-10-30T00:00:00               0
#> 153 GHCND:USW00014895     0     PRCP 2010-10-31T00:00:00               0
#>     fl_t
#> 148 2400
#> 149 2400
#> 150 2400
#> 151 2400
#> 152 2400
#> 153 2400

Then plot - the default passing in the combined plot plots the data together. In this case it looks kind of weird since a straight line combines two distant dates.

ncdc_plot(df)

plot of chunk unnamed-chunk-10

But we can pass in each separately, which uses facet_wrap in ggplot2 to plot each set of data in its own panel.

ncdc_plot(out1, out2, breaks="45 days")

plot of chunk unnamed-chunk-11