Spatial and Geographical data

While single geographical locations can be identified with a single pair of coordinates, the geographical extent of trade routes or countries are series of such coordinate values and cannot easily accommodated with the observation-by-variable format of data frames. Instead such geographical data constitutes its own data type, which is discussed in this chapter. The chapter also discusses how such spatial data can be connected with other data about geographical entities, e.g. the population density or GDP per capita of countries. Further it discusses the import of geographical data from shape files and Google Maps KML files and the definition of and conversion between cartographic projections.

Below is the supporting material for the various sections of the chapter.

The Structure of Spatial Data

## The structure of objects of classes defined in the 'sp' package ###############################
library(spatial)
library(cshapes)
cshape.sp <- cshp()
# Obtaining the data for the Gambia from the Cshapes data base
Gambia <- subset(cshape.sp,CNTRY_NAME=="The Gambia")

plot(Gambia)

class(Gambia)

## The structure of objects of classes defined in the 'sf' package ###############################

# The data used here is available from https://ucdp.uu.se/downloads/index.html#ged_global
unzip("ged191-RData.zip")
yload("ged191.RData")

class(ged191)

# Number of observations
nrow(ged191)
# Number of variables
ncol(ged191)

# Using the adapted 'print()' method from the 'sf' package
library(sf)
ged191

# Examining the structure of an "sf" object with 'str()'
str(ged191)

# Obtaining the geometry contained in 'ged191'
gged191 <- st_geometry(ged191)
gged191

# Accessing an individual shape
gged191[[1]]

# The class of a shape object
class(gged191[[1]])

# Converting "sp" objects into "sf" objects

library(sf)
library(cshapes)

cshapes.1990 <- cshp(as.Date("1990-01-01"))
cshapes.1990 <- as(cshapes.1990,"sf")
cshapes.1990

cshapes.1990 <- as(cshapes.1990,"sf")
partial_out(cshapes.1990,.head=19,.tail=11)

SthAmCntry.names <- c(
    "Argentina",
    "Bolivia",
    "Brazil",
    "Chile",
    "Colombia",
    "Ecuador",
    "Guyana",
    "Paraguay",
    "Peru",
    "Suriname",
    "Uruguay",
    "Venezuela")

SthAmCountries <-
    subset(cshapes.1990,
           CNTRY_NAME %in% SthAmCntry.names)

Brazil <- subset(cshapes.1990,CNTRY_NAME=="Brazil")
Chile <-  subset(cshapes.1990,CNTRY_NAME=="Chile")
Colombia <-  subset(cshapes.1990,CNTRY_NAME=="Colombia")

cap.latlong <- with(cshapes.1990,cbind(CAPLONG,CAPLAT))

cap.latlong <- lapply(1:nrow(cap.latlong),
                      function(i)cap.latlong[i,])

cap.latlong <- lapply(cap.latlong,st_point)
cap.latlong <- st_sfc(cap.latlong)

cshapes.capitals.1990 <- cshapes.1990
st_geometry(cshapes.capitals.1990) <- cap.latlong

st_crs(cshapes.capitals.1990) <- st_crs(cshapes.1990)

cshapes.capitals.1990

Brasilia <- subset(cshape.capitals.1990,CNTRY_NAME=="Brazil")
Santiago <-  subset(cshape.capitals.1990,CNTRY_NAME=="Chile")
Bogota <-  subset(cshape.capitals.1990,CNTRY_NAME=="Colombia")


graypal <- function(n)gray.colors(n,start=.2,end=.9,alpha=.5)
plot(SthAmCountries,pal=graypal)

plot(st_geometry(SthAmCountries))

Script file: structure-spatial-data.R

Data file: ged191-RData.zip available from https://ucdp.uu.se/downloads/index.html#ged_global

Add-on packages:

Spatial Relations and Operations

library(sf)

# The following code builds on the results created by running the
# code in file "Structure Spatial Data.R"

st_contains(Brazil,Brasilia,sparse=FALSE)
st_contains(Brazil,Bogota,sparse=FALSE)

ThreeCountries <-
    subset(cshape.1990,
           CNTRY_NAME %in% c("Brazil","Chile","Colombia"))
rownames(ThreeCountries) <- ThreeCountries$CNTRY_NAME
ThreeCapitals <-
    subset(cshape.capitals.1990,
           CNTRY_NAME %in% c("Brazil","Chile","Colombia"))
rownames(ThreeCapitals) <- ThreeCapitals$CAPNAME
st_contains(ThreeCountries,ThreeCapitals,sparse=FALSE)

structure(
   st_contains(ThreeCountries,ThreeCapitals,sparse=FALSE),
   dimnames=list(rownames(ThreeCountries),rownames(ThreeCapitals))
)

st_touches(Brazil,Colombia,sparse=FALSE)
st_touches(Brazil,Chile,sparse=FALSE)

structure(
   st_touches(ThreeCountries,sparse=FALSE),
   dimnames=list(rownames(ThreeCountries),rownames(ThreeCountries))
)

st_area(Brazil)

in_km2 <- function(x) units::set_units(x,"km^2")
in_km2(st_area(Brazil))

in_km2(st_area(SthAmCountries))

structure(in_km2(st_area(SthAmCountries)),
          names=as.character(SthAmCountries$CNTRY_NAME))

st_distance(Brasilia,Bogota)

st_distance(Chile,Bogota)

st_distance(Chile,Colombia)

in_km <- function(x) units::set_units(x,"km")
in_km(st_distance(Brasilia,Bogota))

in_km(st_distance(ThreeCapitals))

## Unions and intersections of artificial shapes

library(sf)
A <- rbind(
    c(0,0),
    c(2,0),
    c(0,2),
    c(0,0)
)
A <- st_polygon(list(A))
B <- rbind(
    c( 1, 1),
    c( 1,-1),
    c(-1,-1),
    c(-1, 1),
    c( 1, 1)
)
B <- st_polygon(list(B))

op <- par(mai=c(0,0,0,0),mfrow=c(1,3),xpd=NA)
plot(A,xlim=c(-1,2),ylim=c(-1,2))
plot(B,lty=2,add=TRUE)
text(0,1.5,"A",pos=2)
text(1,-.5,"B",pos=4)
text(.5,-1.5,"Two shapes A and B",pos=1)
plot(st_union(A,B),col="gray70",xlim=c(-1,2),ylim=c(-1,2))
plot(A,lty=3,add=TRUE)
plot(B,lty=3,add=TRUE)
text(.5,-1.5,"st_union(A,B)",pos=1)
plot(A,lty=3,xlim=c(-1,2),ylim=c(-1,2))
plot(B,lty=3,add=TRUE)
plot(st_intersection(A,B),add=TRUE,col="gray70")
text(.5,-1.5,"st_intersection(A,B)",pos=1)
par(op)

op <- par(mai=c(0,0,0,0),mfrow=c(1,3),xpd=NA)
plot(st_difference(A,B),col="gray70",,xlim=c(-1,2),ylim=c(-1,2))
plot(A,lty=3,add=TRUE)
plot(B,lty=3,add=TRUE)
text(.5,-1.5,"st_difference(A,B)",pos=1)
plot(st_difference(B,A),col="gray70",,xlim=c(-1,2),ylim=c(-1,2))
plot(A,lty=3,add=TRUE)
plot(B,lty=3,add=TRUE)
text(.5,-1.5,"st_difference(B,A)",pos=1)
plot(st_sym_difference(A,B),col="gray70",,xlim=c(-1,2),ylim=c(-1,2))
text(.5,-1.5,"st_sym_difference(A,B)",pos=1)
par(op)

## Unions and intersections of geographical shapes

SouthAmerica <- st_union(SthAmCountries)

plot(st_geometry(SthAmCountries))
plot(st_geometry(SouthAmerica))

library(cshp)
cshapes.1990 <- cshp(as.Date("1990-01-01"))
cshapes.1990 <- as(cshapes.1990,"sf")
SthAmCntry.names <- c(
    "Argentina",
    "Bolivia",
    "Brazil",
    "Chile",
    "Colombia",
    "Ecuador",
    "Guyana",
    "Paraguay",
    "Peru",
    "Suriname",
    "Uruguay",
    "Venezuela")
SthAmCountries <-
    subset(cshapes.1990,
           CNTRY_NAME %in% SthAmCntry.names)
Colombia <-  subset(cshapes.1990,CNTRY_NAME=="Colombia")
ged191_ellips <- st_transform(ged191,st_crs(cshapes.1990))

aggregate(ged191_ellips["deaths_civilians"],by=SthAmCountries,sum)

within(
    aggregate(ged191_ellips["deaths_civilians"],by=SthAmCountries,sum),
    country <- SthAmCountries$CNTRY_NAME)

st_circ <- function(x,dist.km){
    dist.degr <- 360*dist.km/40007.863
    st_buffer(st_geometry(x),dist=dist.degr)
}

Bogota.region <- st_circ(Bogota,dist.km=200)
Columbia.rest <- st_difference(st_geometry(Colombia),Bogota.region)

c(Bogota.region,Columbia.rest)

aggregate(ged191_ellips["deaths_civilians"],
          by=c(Bogota.region,Columbia.rest),
          sum)

Script file: spatial-relations-operations.R

Add-on packages:

Cartographic Projections

## Geographical projections of a mainland USA map (with apologies to Alaskans and Hawaiians)

load("US_flat.RData")
US_flat

plot(st_geometry(US_flat),
     graticule=TRUE,axes=TRUE)

bbox_US <- st_bbox(US_flat)

c(xcenter = mean(bbox_US[c("xmin","xmax")]),
  ycenter = mean(bbox_US[c("ymin","ymax")]))

laea <- st_crs("+proj=laea +lon_0=-95.8 +lat_0=37.3")
US_proj <- st_transform(US_flat,laea)
plot(st_geometry(US_proj),
     graticule=TRUE,axes=TRUE)

Script file: cartographic-projections.R

Data file: US_flat.RData (This processed originall from the package maps - available from https://cran.r-project.org/package=maps.)

Add-on package: sf available from https://cran.r-project.org/package=sf

Geographical Data Files

## Importing a shapefile from the Upsala conflict data project ##############################
# The data used here are available from https://ucdp.uu.se/downloads/index.html#ged_polygons

unzip("ucdp-ged-poly-v-1-1-shape.zip",list=TRUE)

unzip("ucdp-ged-poly-v-1-1-shape.zip",
      files=c(
          "UCDPGEDpoly.shx",
          "UCDPGEDpoly.shp",
          "UCDPGEDpoly.dbf"
          ))

# All three of the following lines are equivalent:
UCDPGEDpoly <- st_read("UCDPGEDpoly.shx")
UCDPGEDpoly <- st_read("UCDPGEDpoly.shp")
UCDPGEDpoly <- st_read("UCDPGEDpoly.dbf")

UCDPGEDpoly

## Importing OpenStreatMap data ############################################################

# The file used here was extracted by hand from http://www.openstreetmap.org
st_layers("stpauls.osm")

stpauls_lines <- st_read("stpauls.osm",layer="lines")
stpauls_polygons <- st_read("stpauls.osm",layer="multipolygons")

# Plotting the polygons ...
plot(st_geometry(stpauls_polygons),
     col="gray80",
     xlim=c(-0.1,-0.097),
     ylim=c(51.5135,51.514)
     )
# and adding the lines
plot(st_geometry(stpauls_lines),add=TRUE)

Script file: geographical-data-files.R

Data files: