isdparser
is an parser for ISD/ISD NOAA files
Code liberated from rnoaa
to focus on ISD parsing since it’s sorta complicated. Has minimal dependencies, so you can parse your ISD/ISH files without needing the deps that rnoaa
needs. Will be used by rnoaa
once on CRAN.
Documentation at ftp://ftp.ncdc.noaa.gov/pub/data/noaa/ish-format-document.pdf
Package API:
isd_parse()
- parse all lines in a file, with parallel optionisd_parse_line()
- parse a single line - you choose which lines to parse and how to apply the function to your linesisd_transform()
- transform ISD data variablesStable from CRAN
install.packages("isdparser")
Dev version
devtools::install_github(c("ropensci/isdparser"))
library("isdparser")
path <- system.file('extdata/024130-99999-2016.gz', package = "isdparser")
lns <- readLines(path, encoding = "latin1")
isd_parse_line(lns[1])
#> # A tibble: 1 x 38
#> total_chars usaf_station wban_station date time date_flag latitude
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 0054 024130 99999 2016… 0000 4 +60750
#> # ... with 31 more variables: longitude <chr>, type_code <chr>,
#> # elevation <chr>, call_letter <chr>, quality <chr>,
#> # wind_direction <chr>, wind_direction_quality <chr>, wind_code <chr>,
#> # wind_speed <chr>, wind_speed_quality <chr>, ceiling_height <chr>,
#> # ceiling_height_quality <chr>, ceiling_height_determination <chr>,
#> # ceiling_height_cavok <chr>, visibility_distance <chr>,
#> # visibility_distance_quality <chr>, visibility_code <chr>,
#> # visibility_code_quality <chr>, temperature <chr>,
#> # temperature_quality <chr>, temperature_dewpoint <chr>,
#> # temperature_dewpoint_quality <chr>, air_pressure <chr>,
#> # air_pressure_quality <chr>,
#> # AW1_present_weather_observation_identifier <chr>,
#> # AW1_automated_atmospheric_condition_code <chr>,
#> # AW1_quality_automated_atmospheric_condition_code <chr>,
#> # REM_remarks <chr>, REM_identifier <chr>, REM_length_quantity <chr>,
#> # REM_comment <chr>
Or, give back a list
head(
isd_parse_line(lns[1], as_data_frame = FALSE)
)
#> $total_chars
#> [1] "0054"
#>
#> $usaf_station
#> [1] "024130"
#>
#> $wban_station
#> [1] "99999"
#>
#> $date
#> [1] "20160101"
#>
#> $time
#> [1] "0000"
#>
#> $date_flag
#> [1] "4"
Optionally don’t include “Additional” and “Remarks” sections in parsed output.
isd_parse_line(lns[1], additional = FALSE)
#> # A tibble: 1 x 31
#> total_chars usaf_station wban_station date time date_flag latitude
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 0054 024130 99999 2016… 0000 4 +60750
#> # ... with 24 more variables: longitude <chr>, type_code <chr>,
#> # elevation <chr>, call_letter <chr>, quality <chr>,
#> # wind_direction <chr>, wind_direction_quality <chr>, wind_code <chr>,
#> # wind_speed <chr>, wind_speed_quality <chr>, ceiling_height <chr>,
#> # ceiling_height_quality <chr>, ceiling_height_determination <chr>,
#> # ceiling_height_cavok <chr>, visibility_distance <chr>,
#> # visibility_distance_quality <chr>, visibility_code <chr>,
#> # visibility_code_quality <chr>, temperature <chr>,
#> # temperature_quality <chr>, temperature_dewpoint <chr>,
#> # temperature_dewpoint_quality <chr>, air_pressure <chr>,
#> # air_pressure_quality <chr>
isd_parse(path)
#> # A tibble: 2,601 x 38
#> total_chars usaf_station wban_station date time date_flag latitude
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 0054 024130 99999 2016… 0000 4 +60750
#> 2 0054 024130 99999 2016… 0100 4 +60750
#> 3 0054 024130 99999 2016… 0200 4 +60750
#> 4 0054 024130 99999 2016… 0300 4 +60750
#> 5 0054 024130 99999 2016… 0400 4 +60750
#> 6 0039 024130 99999 2016… 0500 4 +60750
#> 7 0054 024130 99999 2016… 0600 4 +60750
#> 8 0039 024130 99999 2016… 0700 4 +60750
#> 9 0054 024130 99999 2016… 0800 4 +60750
#> 10 0054 024130 99999 2016… 0900 4 +60750
#> # ... with 2,591 more rows, and 31 more variables: longitude <chr>,
#> # type_code <chr>, elevation <chr>, call_letter <chr>, quality <chr>,
#> # wind_direction <chr>, wind_direction_quality <chr>, wind_code <chr>,
#> # wind_speed <chr>, wind_speed_quality <chr>, ceiling_height <chr>,
#> # ceiling_height_quality <chr>, ceiling_height_determination <chr>,
#> # ceiling_height_cavok <chr>, visibility_distance <chr>,
#> # visibility_distance_quality <chr>, visibility_code <chr>,
#> # visibility_code_quality <chr>, temperature <chr>,
#> # temperature_quality <chr>, temperature_dewpoint <chr>,
#> # temperature_dewpoint_quality <chr>, air_pressure <chr>,
#> # air_pressure_quality <chr>,
#> # AW1_present_weather_observation_identifier <chr>,
#> # AW1_automated_atmospheric_condition_code <chr>,
#> # AW1_quality_automated_atmospheric_condition_code <chr>,
#> # REM_remarks <chr>, REM_identifier <chr>, REM_length_quantity <chr>,
#> # REM_comment <chr>
isd_parse(path, parallel = TRUE)
note: Progress not printed if
parallel = TRUE
isd_parse(path, progress = TRUE)
#>
#> |========================================================================================| 100%
#> # A tibble: 2,601 × 42
#> total_chars usaf_station wban_station date time date_flag latitude longitude type_code
#> <dbl> <chr> <chr> <date> <chr> <chr> <dbl> <dbl> <chr>
#> 1 54 024130 99999 2016-01-01 0000 4 60.75 12.767 FM-12
#> 2 54 024130 99999 2016-01-01 0100 4 60.75 12.767 FM-12
#> 3 54 024130 99999 2016-01-01 0200 4 60.75 12.767 FM-12
#> 4 54 024130 99999 2016-01-01 0300 4 60.75 12.767 FM-12
#> 5 54 024130 99999 2016-01-01 0400 4 60.75 12.767 FM-12
#> 6 39 024130 99999 2016-01-01 0500 4 60.75 12.767 FM-12
#> 7 54 024130 99999 2016-01-01 0600 4 60.75 12.767 FM-12
#> 8 39 024130 99999 2016-01-01 0700 4 60.75 12.767 FM-12
#> 9 54 024130 99999 2016-01-01 0800 4 60.75 12.767 FM-12
#> 10 54 024130 99999 2016-01-01 0900 4 60.75 12.767 FM-12
#> # ... with 2,591 more rows, and 33 more variables: elevation <dbl>, call_letter <chr>, quality <chr>,
#> # wind_direction <dbl>, wind_direction_quality <chr>, wind_code <chr>, wind_speed <dbl>,
#> # wind_speed_quality <chr>, ceiling_height <chr>, ceiling_height_quality <chr>,
#> # ceiling_height_determination <chr>, ceiling_height_cavok <chr>, visibility_distance <chr>,
#> # visibility_distance_quality <chr>, visibility_code <chr>, visibility_code_quality <chr>,
#> # temperature <dbl>, temperature_quality <chr>, temperature_dewpoint <dbl>,
#> # temperature_dewpoint_quality <chr>, air_pressure <dbl>, air_pressure_quality <chr>,
#> # AW1_present_weather_observation_identifier <chr>, AW1_automated_atmospheric_condition_code <chr>,
#> # AW1_quality_automated_atmospheric_condition_code <chr>, N03_original_observation <chr>,
#> # N03_original_value_text <chr>, N03_units_code <chr>, N03_parameter_code <chr>, REM_remarks <chr>,
#> # REM_identifier <chr>, REM_length_quantity <chr>, REM_comment <chr>
Optionally don’t include “Additional” and “Remarks” sections in parsed output.
isd_parse(path, additional = FALSE)
#> # A tibble: 2,601 x 31
#> total_chars usaf_station wban_station date time date_flag latitude
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 0054 024130 99999 2016… 0000 4 +60750
#> 2 0054 024130 99999 2016… 0100 4 +60750
#> 3 0054 024130 99999 2016… 0200 4 +60750
#> 4 0054 024130 99999 2016… 0300 4 +60750
#> 5 0054 024130 99999 2016… 0400 4 +60750
#> 6 0039 024130 99999 2016… 0500 4 +60750
#> 7 0054 024130 99999 2016… 0600 4 +60750
#> 8 0039 024130 99999 2016… 0700 4 +60750
#> 9 0054 024130 99999 2016… 0800 4 +60750
#> 10 0054 024130 99999 2016… 0900 4 +60750
#> # ... with 2,591 more rows, and 24 more variables: longitude <chr>,
#> # type_code <chr>, elevation <chr>, call_letter <chr>, quality <chr>,
#> # wind_direction <chr>, wind_direction_quality <chr>, wind_code <chr>,
#> # wind_speed <chr>, wind_speed_quality <chr>, ceiling_height <chr>,
#> # ceiling_height_quality <chr>, ceiling_height_determination <chr>,
#> # ceiling_height_cavok <chr>, visibility_distance <chr>,
#> # visibility_distance_quality <chr>, visibility_code <chr>,
#> # visibility_code_quality <chr>, temperature <chr>,
#> # temperature_quality <chr>, temperature_dewpoint <chr>,
#> # temperature_dewpoint_quality <chr>, air_pressure <chr>,
#> # air_pressure_quality <chr>