rdwd use cases

Berry Boessenkool, berry-b@gmx.de

2019-03-17

Vignette Rmd source code (Not on CRAN to reduce load on DWD server through daily new builds and checks of the vignette)

main rdwd vignette

Interactive map vignette

Get all hourly rainfall data 2014:2016

Step 1: get the URLS of data to be downloaded

library(rdwd)
links <- selectDWD(res="daily", var="more_precip", per="hist")
length(links) # 5583 stations - would take very long to download
## [1] 4635
# select only the relevant files:
data("metaIndex")
myIndex <- metaIndex[
  metaIndex$von_datum < 20140101 &
  metaIndex$bis_datum > 20161231 & metaIndex$hasfile   ,  ]
data("fileIndex")    
links <- fileIndex[
  suppressWarnings(as.numeric(fileIndex$id)) %in% myIndex$Stations_id &
  fileIndex$res=="daily" &
  fileIndex$var=="more_precip" &
  fileIndex$per=="historical"         , "path" ]  
links <- paste0("ftp://ftp-cdc.dwd.de/pub/CDC/observations_germany/climate/", links)

length(links) # 2001 elements - much better
## [1] 2006

Step 2: download the data

If some downloads fail (mostly because you’ll get kicked off the FTP server), you can just run the same code again and only the missing files will be downloaded.

If you really want to download 2k historical (large!) datasets, you definitely want to set sleep to a much higher value.

For speed, we’ll only work with the first 3 urls.

localfiles <- dataDWD(links[1:3], sleep=0.2, read=FALSE)

Step 3: read the data

2k large datasets probably is way too much for memory, so we’ll use a custom reading function. It will only select the relevant time section and rainfall column. The latter will be named with the id extracted from the filename.

readVars(localfiles[1])[,-3] # we want the RS column
##           Par               Kurz          Einheit
## RS         RS Niederschlagshoehe               mm
## RSF       RSF  Niederschlagsform numerischer Code
## SH_TAG SH_TAG        Schneehoehe               cm
read2014_2016 <- function(file, fread=TRUE, ...)
{
 out <- readDWD(file, fread=fread, ...)
 out <- out[out$MESS_DATUM > as.POSIXct(as.Date("2014-01-01")) & 
            out$MESS_DATUM < as.POSIXct(as.Date("2016-12-31"))    , ]
 out <- out[ , c("MESS_DATUM", "RS")]
 out$MESS_DATUM <- as.Date(out$MESS_DATUM) # might save some memory space...
 # Station id as column name:
 idstringloc <- unlist(gregexpr(pattern="tageswerte_RR_", file))
 idstring <- substring(file, idstringloc+14, idstringloc+18)
 colnames(out) <- c("date",  idstring)
 return(out)
}
str(read2014_2016(localfiles[1])) # test looks good
## 'data.frame':    1090 obs. of  2 variables:
##  $ date : Date, format: "2014-01-02" "2014-01-03" ...
##  $ 00006: num  1.8 0.4 2.3 0.7 0.2 0 0 8.3 0 4 ...

Now let’s apply this to all our files and merge the result.

library(pbapply) # progress bar for lapply loop

rain_list <- pblapply(localfiles, read2014_2016)
rain_df <- Reduce(function(...) merge(..., all=T), rain_list)
str(rain_df) # looks nice!
## 'data.frame':    1094 obs. of  4 variables:
##  $ date : Date, format: "2014-01-02" "2014-01-03" ...
##  $ 00006: num  1.8 0.4 2.3 0.7 0.2 0 0 8.3 0 4 ...
##  $ 00015: num  1.2 0.2 1.5 1.5 0 0 0 5.1 0.3 0.6 ...
##  $ 00019: num  3.3 0.4 2.9 0 0.2 0.1 0 6.3 0.2 3.1 ...
summary(rain_df) # 9 NAs in station 00006
##       date                00006            00015            00019       
##  Min.   :2014-01-02   Min.   : 0.000   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.:2014-10-02   1st Qu.: 0.000   1st Qu.: 0.000   1st Qu.: 0.000  
##  Median :2015-07-02   Median : 0.100   Median : 0.000   Median : 0.100  
##  Mean   :2015-07-02   Mean   : 2.142   Mean   : 1.647   Mean   : 2.152  
##  3rd Qu.:2016-03-31   3rd Qu.: 1.900   3rd Qu.: 1.500   3rd Qu.: 1.900  
##  Max.   :2016-12-30   Max.   :68.400   Max.   :54.000   Max.   :53.600  
##                       NA's   :5

TOC

step 4: visual data checks

plot(rain_df$date, rain_df[,2], type="n", ylim=range(rain_df[,-1], na.rm=T), 
     las=1, xaxt="n", xlab="Date", ylab="Daily rainfall sum  [mm]")
berryFunctions::monthAxis()
for(i in 2:ncol(rain_df)) lines(rain_df$date, rain_df[,i], col=sample(colours(), size=1))

plot(rain_df[,2:4]) # correlation plot only works for a few columns!

Let’s see the locations of our stations in an interactive map.

data(geoIndex)  ;  library(leaflet) 
mygeoIndex <- geoIndex[geoIndex$id %in% as.numeric(colnames(rain_df)[-1]),]

leaflet(data=mygeoIndex) %>% addTiles() %>%
        addCircleMarkers(~lon, ~lat, popup=~display, stroke=T)

For a static map with scaleBar, OSMscale works nicely but currently still has a Java dependency, see https://github.com/brry/OSMscale#installation

library(OSMscale)
pointsMap("lat", "lon", mygeoIndex, fx=2, fy=1, pargs=list(lwd=3), 
                    col="blue", zoom=5)

TOC

plot all rainfall values around a given point

1. Find meteo stations around a given point

m <- nearbyStations(49.211784, 9.812475, radius=30,
    res=c("daily","hourly"), var=c("precipitation","more_precip","kl"),
    mindate=20160530, statname="Braunsbach catchment center")
# Remove duplicates. if kl and more_precip are both available, keep only more_precip:
library("berryFunctions")
m <- sortDF(m, "var")
m <- m[!duplicated(paste0(m$Stations_id, m$res)),]
m <- sortDF(m, "res")
m <- sortDF(m, "dist", decreasing=FALSE)
rownames(m) <- NULL
head(m[,-14]) # don't show url column with long urls
##   Stations_id von_datum bis_datum Stationshoehe geoBreite geoLaenge
## 1          NA        NA        NA            NA  49.21178  9.812475
## 2        2848  19310101  20190309           458  49.25750  9.859400
## 3        2787  20050101  20190309           354  49.24280  9.678600
## 4        2787  19410101  20190309           354  49.24280  9.678600
## 5        5206  19510101  20161109           396  49.11750  9.897200
## 6        2575  20060901  20190309           426  49.18040  9.980000
##                     Stationsname         Bundesland    res           var
## 1    Braunsbach catchment center               <NA>      z          <NA>
## 2            Langenburg-Atzenrod Baden-Wuerttemberg  daily   more_precip
## 3            Kupferzell-Rechbach Baden-Wuerttemberg hourly precipitation
## 4            Kupferzell-Rechbach Baden-Wuerttemberg  daily   more_precip
## 5          Vellberg-Kleinaltdorf Baden-Wuerttemberg  daily   more_precip
## 6 Kirchberg/Jagst-Herboldshausen Baden-Wuerttemberg hourly precipitation
##          per hasfile     dist
## 1       <NA>      NA  0.00000
## 2 historical    TRUE  6.11954
## 3 historical    TRUE 10.31522
## 4 historical    TRUE 10.31522
## 5 historical    TRUE 12.15981
## 6 historical    TRUE 12.66318

Interactive map of just the meteo station locations:

library(leaflet)
m$col <- "red" ; m$col[1] <- "blue"
leaflet(m) %>% addTiles() %>%
  addCircles(lng=9.812475, lat=49.211784, radius=30e3) %>%
  addCircleMarkers(~geoLaenge, ~geoBreite, col=~col, popup=~Stationsname)

TOC

2. Download and process data

Download and process data for the stations, get the rainfall sums of a particular day (Braunsbach flood May 2016):

prec <- dataDWD(m$url)
names(prec) <- m$Stations_id[-1]
prec29 <- sapply(prec[m$res[-1]=="daily"], function(x)
         {
         col <- "RS"
         if(!col %in% colnames(x)) col <- "R1"
         if(!col %in% colnames(x)) col <- "RSK"
         x[x$MESS_DATUM==as.POSIXct(as.Date("2016-05-29")), col]
         })
prec29 <- data.frame(Stations_id=names(prec29), precsum=unname(prec29))
prec29 <- merge(prec29, m[m$res=="daily",c(1,4:7,14)], sort=FALSE)
head(prec29[,-7]) # don't show url column with long urls
##   Stations_id precsum Stationshoehe geoBreite geoLaenge
## 1        2848   105.0           458   49.2575    9.8594
## 2        2787    72.0           354   49.2428    9.6786
## 3        5206    82.2           396   49.1175    9.8972
## 4        2575    94.0           426   49.1804    9.9800
## 5        3416    82.5           294   49.3423    9.8073
## 6        6260    76.6           385   49.3328    9.7040
##                     Stationsname
## 1            Langenburg-Atzenrod
## 2            Kupferzell-Rechbach
## 3          Vellberg-Kleinaltdorf
## 4 Kirchberg/Jagst-Herboldshausen
## 5                Mulfingen/Jagst
## 6      Ingelfingen-Stachenhausen

TOC

3. Plot rainfall sum on map

For a quick look without a map, this works:

plot(geoBreite~geoLaenge, data=m, asp=1)
textField(prec29$geoLaenge, prec29$geoBreite, prec29$precsum, col=2)

But it’s nicer to have an actual map. If OSMscale installation fails, go to https://github.com/brry/OSMscale#installation

library(OSMscale)
map <- pointsMap(geoBreite,geoLaenge, data=m, type="osm", plot=FALSE)
pp <- projectPoints("geoBreite", "geoLaenge", data=prec29, to=map$tiles[[1]]$projection)
prec29 <- cbind(prec29,pp) ; rm(pp)
plot(map)
scaleBar(map, cex=1.5, type="line", y=0.82)
title(main="Rainfall sum  2016-05-29  7AM-7AM  [mm]", line=-1)
textField(prec29$x, prec29$y, round(prec29$precsum), font=2, cex=1.5)

TOC

Any feedback on this package (or this vignette) is very welcome via github or berry-b@gmx.de!