Population of the Czech Republic as per the latest census in 2011, per district (okres).
library(RCzechia)
library(dplyr)
library(readxl)
library(httr)
library(tmap)
library(sf)
GET("https://raw.githubusercontent.com/jlacko/RCzechia/master/data-raw/zvcr034.xls",
write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/jlacko/RCzechia/master/data-raw/zvcr034.xls]
## Date: 2019-04-05 08:07
## Status: 200
## Content-Type: application/octet-stream
## Size: 44.5 kB
## <ON DISK> /tmp/RtmpVcGE4Z/filec41cfd46e5.xls
src <- read_excel(tf, range = "Data!B5:C97") # read in with original column names
colnames(src) <- c("NAZ_LAU1", "obyvatel") # meaningful names instead of the original ones
src <- src %>%
mutate(obyvatel = as.double(obyvatel)) %>%
# convert from text to number
mutate(NAZ_LAU1 = ifelse(NAZ_LAU1 == "Hlavní město Praha", "Praha", NAZ_LAU1))
# rename Prague (from The Capital to a regular city)
okresni_data <- RCzechia::okresy("low") %>% # data shapefile
inner_join(src, by = "NAZ_LAU1")
# key for data connection - note the use of inner (i.e. filtering) join
vystup <- tm_shape(okresni_data) + tm_fill(col = "obyvatel", title = "Population",
palette = "Blues", style = "quantile", n = 5) +
tm_shape(okresni_data) + tm_borders("grey40", lwd = 0.5) + # thin edges of districts
tm_shape(republika("low")) + tm_borders("grey30", lwd = 1.5) + # thick national borders
tm_layout(frame = F) # clean does it
print(vystup)
Drawing a map: three semi-random landmarks on map, with rivers shown for better orientation.
To get the geocoded data frame function RCzechia::geocode()
is used.
library(RCzechia)
library(dplyr)
library(tmap)
library(sf)
borders <- RCzechia::republika("low")
rivers <- RCzechia::reky()
rivers <- rivers %>%
filter(Major == T)
mista <- data.frame(misto = c("Kramářova vila",
"Arcibiskupské zahrady v Kromeříži",
"Hrad Bečov nad Teplou"),
adresa = c("Gogolova 1, Praha 1",
"Sněmovní náměstí 1, Kroměříž",
"nám. 5. května 1, Bečov nad Teplou"))
# from a string vector to sf spatial points object
POI <- RCzechia::geocode(mista$adresa)
tm_plot <- tm_shape(borders) + tm_borders("grey30", lwd = 1) +
tm_shape(POI) + tm_symbols(col = "firebrick3", shape = 20, size = 0.5) +
tm_shape(rivers) + tm_lines(col = "steelblue", lwd = 1.5, alpha = 0.5) +
tm_legend(title = "Very Special Places") + # ... or whatever :)
tm_layout(frame = F)
print(tm_plot)
A visualization problem: unemployment in the Czech Republic is in low in general, but not uniformly so.
What are the hotspots?
library(dplyr)
library(RCzechia)
library(tmap)
library(sf)
src <- read.csv(url("https://raw.githubusercontent.com/jlacko/RCzechia/master/data-raw/unempl.csv"), stringsAsFactors = F)
# open data on unemployment from Czech Statistical Office - https://www.czso.cz/csu/czso/otevrena_data
# lightly edited for size (rows filtered)
src <- src %>%
mutate(KOD_OBEC = as.character(uzemi_kod)) # keys in RCzechia are of type character
podklad <- RCzechia::obce_polygony() %>% # obce_polygony = municipalities in RCzechia package
inner_join(src, by = "KOD_OBEC") # linking by key
vystup <- tm_shape(republika()) + tm_borders(col = "grey40") +
tm_shape(podklad) + tm_fill(col = "hodnota", title = "Unemployment", palette = "YlOrRd") +
tm_legend(legend.format = list(fun = function(x) paste0(formatC(x, digits = 0, format = "f"), " %"))) +
tm_layout(frame = F)
print(vystup)
Calculate distance between two spatial objects; the sf
package supports (via gdal) point to point, point to polygon and polygon to polygon distances.
Calculating distance from Prague (#1 Czech city) to Brno (#2 Czech city).
library(dplyr)
library(RCzechia)
library(sf)
library(units)
obce <- RCzechia::obce_polygony()
praha <- obce %>%
filter(NAZ_OBEC == "Praha")
brno <- obce %>%
filter(NAZ_OBEC == "Brno")
vzdalenost <- sf::st_distance(praha, brno) %>%
units::set_units("kilometers") # easier to interpret than meters, miles or decimal degrees..
print(vzdalenost)
## Units: [kilometers]
## [,1]
## [1,] 152.8073
The metaphysical center of the Brno City is well known. But where is the geographical center?
The center is calculated using sf::st_centroid()
and reversely geocoded via RCzechia::revgeo()
.
library(dplyr)
library(RCzechia)
library(tmap)
library(sf)
brno <- RCzechia::obce_polygony() %>%
filter(NAZ_OBEC == "Brno")
pupek_brna <- sf::st_centroid(brno) # calculate central point of a polygon
## Warning in st_centroid.sf(brno): st_centroid assumes attributes are
## constant over geometries of x
## Warning in st_centroid.sfc(st_geometry(x), of_largest_polygon =
## of_largest_polygon): st_centroid does not give correct centroids for
## longitude/latitude data
# the revgeo() function takes a sf points data frame and returns it back
# with address data in "revgeocoded"" column
adresa_pupku <- RCzechia::revgeo(pupek_brna)$revgeocoded
tm_plot <- tm_shape(brno) + tm_borders(col = "grey40") +
tm_shape(pupek_brna) + tm_dots(size = 1/3, col = "red", shape = 4) +
tm_legend(title = "Center of Brno") +
tm_layout(frame = F)
print(adresa_pupku)
## [1] "Žižkova 513/22, Veveří, 61600 Brno"
print(tm_plot)
Interactive maps are powerful tools for data vizualization. They are easy to produce with the tmap
package.
I found the stamen toner basemap a good company for interactive chloropleths - it gives enough context without distracting from the story of your data.
A map of the whole Czech Republic in original resolution (the accuracy is about 1 meter) would be rather sizeable, and I found it better policy to either:
sf::st_simplify()
to the shapefile. Note that RCzechia uses EPSG:4326 projection, with decimal degrees as unit. To simplify to given tolerance in meters you need to first sf::st_transform()
it to a different projection, e.g. EPSG:5513 (ing. Křovák).Note: it is technically impossible to make html in vignette interactive. As a consequence the result of code shown has been replaced by a static screenshot; the code itself is legit.
library(dplyr)
library(RCzechia)
library(tmap)
library(sf)
src <- read.csv(url("https://raw.githubusercontent.com/jlacko/RCzechia/master/data-raw/unempl.csv"), stringsAsFactors = F)
# open data on unemployment from Czech Statistical Office - https://www.czso.cz/csu/czso/otevrena_data
# lightly edited for size (rows filtered)
src <- src %>%
mutate(KOD_OBEC = as.character(uzemi_kod)) # keys in RCzechia are of type character
podklad <- RCzechia::obce_polygony() %>% # obce_polygony = municipalities in RCzechia package
inner_join(src, by = "KOD_OBEC") %>% # linking by key
filter(KOD_CZNUTS3 == "CZ071") # Olomoucký kraj
tmap_mode("view")
vystup <- tm_shape(podklad) + tm_fill(col = "hodnota", title = "Unemployment", palette = "YlOrRd", id = "NAZ_OBEC") +
tm_legend(legend.format = list(fun = function(x) paste0(formatC(x, digits = 0, format = "f"), " %"))) +
tm_view(basemaps = "Stamen.Toner")
print(vystup)
sf
PolygonsCreating custom polygons by aggregating administrative units is a common use case in sales reporting and analysis. Function RCzechia::union_sf()
makes this task easier by dissolving polygons accoring to a value of a data column.
In this demonstration the Czech LAU1 units are grouped into two categories: those with odd lettered names, and those with even letters. They are then dissolved into two multipolygons.
library(RCzechia)
library(dplyr)
library(sf)
poly <- RCzechia::okresy("low") %>% # Czech LAU1 regions as sf data frame
mutate(oddeven = ifelse(nchar(NAZ_LAU1) %% 2 == 1, "odd", "even" )) %>% # odd or even?
RCzechia::union_sf("oddeven") # ... et facta est lux
plot(poly, key.pos = 1)