Dates, Times, and Time Series

Temporal data, consisting of dates and times, pose their own challenges. Time is measured in non-metric units, in hours, minutes and seconds. Dates can be recorded according to various calendaric systems, and are complicated by leap days and leap seconds. R provides facilities to convert times and dates into different calendaric systems, to format temporal data and to import temporal data recorded in different formats. This is one topic of this chapter. The other topic are time series and similar data structures (such as panels). Basic time series consist of measurements conducted in regular temporal intervals, but beside these basic variants, emph{R} also supports irregular time series. The chapter therefore also the discusses the construction and manipulation of regular and irregular time series.

Below is the supporting material for the various sections of the chapter.

Dates and Times

## Date objects and date formatting ##########################################################

as.Date(20,origin="1970-01-01")

d <- as.Date("1990-11-09")

format(d,"%e %B % Y")

format(d,"%b %d, %y")

format(d,"%Y-%m-%d")

as.Date("11/09/90", format="%m/%d/%y")

## Date arithmetic ###########################################################################

# R knows the lengths of months, e.g. that March has 31 days:
d0 <- as.Date("1968-03-01")
d0 + 31
# R also knows that 1968 was a leap year,
d1 <- as.Date("1968-02-28")
d1 + 1
# that 1900 was not a leap year,
d2 <- as.Date("1900-02-28")
d2 + 1
# that 2000 was a leap year,
d3 <- as.Date("2000-02-28")
d3 + 1
# and that leap years are 366 days long
d3 + 366

## POSIXct time objects ######################################################################

as.POSIXct(7200,origin="1970-01-01")

t0 <- as.POSIXct(7200,origin="1970-01-01",tz="GMT")
t0 <- as.POSIXct(7200,origin="1970-01-01")
attr(t0,"tzone") <- "GMT"   

as.POSIXct(c("97/11/12 12:45","98/01/23 14:20"),
           format="%y/%m/%d %H:%M",tz="GMT")

## Time arithmetic #####################################################################

# When in standard format, a string does not need a format spefication in order
# to be translatable
t0 <- as.POSIXct("2020-02-01 00:00",tz="GMT")
t0
# Adding 3600 seconds means adding an hour:
t0 + 3600
# Subtracting seconds may also change the date:
t0 - 1
# A day is 24 times 3600 seconds
day <- 24*3600
t0 + day
# Recycling also works, so we can create a week:
t0 + 1:7*day

## POSIXlt time objects ######################################################################

t0 <- as.POSIXlt(0,origin="2020-02-01",tz="GMT")

(t1 <- as.POSIXlt(t0 + 3630))

# Get the seconds component of the time point
t1$sec

# Get the minutes component of the time point
t1$min

# Get the hours component
t1$hour

# Get the day(s) of the month
t1$mday

# Get the (numeric) month
t1$mon

# Get the (numeric) year
t1$year

# Get the (numeric) day of the week
t1$wday

## Creation of date and time data from year, month, day numbers etc. ##########################

# Here we create the first days of all months in the year 2000:
# By default the time is noon
ISOdate(2000,1:12,1)
# To get the start of the date we have to set the hour to midnight:
ISOdate(2000,1:12,1,hour=0)
# We can of course also create a sequence of days:
ISOdate(2000,2,1:29,hour=0)
# 'Impossible' dates result in NA:
ISOdate(2000,2,29:31,hour=0)

## Time differences ###########################################################################

# It does not matter whether we have "POSIXct" or "POSIXlt" objects,
# we can always obtain differences between the tiems
t0 <- as.POSIXlt(0,origin="2020-02-01",tz="GMT")
t1 <- as.POSIXct(0,origin="2020-02-01 3:00",tz="GMT")
t2 <- as.POSIXlt(0,origin="2020-02-01 3:45",tz="GMT")
t3 <- as.POSIXct(0,origin="2020-02-01 3:45:06",tz="GMT")

# The unit of measurement for time differences is selected
# automatically. Usually it is the largest sensible unit:
t1 - t0
t2 - t1
t3 - t2
t3 - t0

# The last difference is in hours and hour fractions. It might be more sensible
# to have seconds as units of measuremnt.
diff.t <- t3 - t0
units(diff.t) <- "secs"
diff.t

# It is also possible to compute differences between dates:
d0 <- as.Date("2020-01-31")
d1 <- as.Date("2020-02-28")
d2 <- as.Date("2020-03-31")

# Usually the difference is in days:
d1 - d0
d2 - d0

# We may also want to see the difference in hours:
diff.d <- d1 - d0
units(diff.d) <- "hours"
diff.d

# It is also possible to create time durations from scratch
# From strings:
as.difftime("0:30:00")
# and from numbers, here it is necessary to specify the unit of measurement
as.difftime(30, units="mins")

Script file: dates-times.R

Time Series

## Regular time series ###############################################################

# The presidents time seres

# The following line is not really necessary, it is used here only to indicate
# that 'presidents' is a pre-installed data example.
data(presidents)

# The data contains quarterly data about presidents' popularity. 
# The function 'tsp' contains the time series properties: the starting point,
# the end point and the frequency in which the popularity is measured within years.
tsp(presidents)

# With  the functions 'start()', 'end()' and 'frequency()' we can obtain the
# respective time series properties.
start(presidents)
end(presidents)
frequency(presidents)

presidents[1:12]

window(presidents,
       start=1945,
       end=c(1947,4))

nixon <- window(presidents,
                start=1969,
                end=c(1974,2))
nixon
plot(nixon)

time(nixon)

# OECD unemployment data 

unemployment <- read.csv("unemployment.csv")

unemployment.ts <- ts(unemployment[2:5],
                      start = 1970)

plot(unemployment.ts)

window(unemployment.ts,
       start=1980,
       end=1989)

delta.unemployment.ts <- diff(unemployment.ts)
plot(delta.unemployment.ts)


# Artificial time series data 

x <- round(rnorm(1:12),1)

ts(x,start=2000,
   frequency=4)

ts(x,start=2000,
   frequency=12)

ts(unemployment[2:5],
   start = 1970,
   frequency=4)

## Irregaular time series ###############################################################

npresidents <- as.numeric(presidents)

# Creating "zoo" objects
library(zoo)
years <- 1945:1974
quarters <- 1:4
presi.times <- yearqtr(
    rep(years,each=4) +  # each year is repeated 4 times
    rep((quarters-1)/4,30) # the quarters are repeated 30 times
)
zpresidents <- zoo(npresidents,order.by=presi.times)
zpresidents
str(zpresidents)

coredata(zpresidents)[1:15] # To save space we only look at the
index(zpresidents)[1:15]    # first 15 elements.
time(zpresidents)[1:15]

zpresidents[1:8]

# Subsetting "zoo" objects

as.yearqtr("1945 Q2")

zpresidents[as.yearqtr("1945 Q2")]

qtrs3 <- as.yearqtr(paste(1960:1969,"Q3"))
zpresidents[qtrs3]

qtrs <- paste(rep(1960:1964,each=4),rep(4:1,4),sep="-")
qtrs
zpresidents[as.yearqtr(qtrs)]

unemployment.z[as.Date("1997-12-31")]

window(zpresidents,
       start = as.yearqtr("1969-1"),
       end   = as.yearqtr("1974-2"))

window(unemployment.z,
       start = as.Date("1980-12-31"),
       end   = as.Date("1989-12-31"))

# Handling missing data

# Leads to an error:
presidents.o <- na.omit(presidents)

zpresidents.o <- na.omit(zpresidents)
c("Original length" = length(zpresidents),
  "Length after dropping NAs"  = length(zpresidents.o))

plot(zpresidents,lty=3)
lines(na.contiguous(zpresidents),lwd=2)

plot(zpresidents,lwd=2)
lines(na.approx(zpresidents),lty=2)
lines(na.spline(zpresidents),lty=3)

# Rolling statistics

zpresidents.o <- na.omit(zpresidents)

zpresidents.o8 <- zpresidents.o[1:8]
rollmean(zpresidents.o8,k=7)
rollmean(zpresidents.o8,k=7,align="left")
rollmean(zpresidents.o8,k=7,align="right")

zpresidents.s <- na.spline(zpresidents)
plot(zpresidents.s,lty=3)
zpresidents.m <- rollmean(zpresidents.s,k=9)
lines(zpresidents.m,lwd=2)

zpresidents.sd <- rollapply(zpresidents.s,
                            width=9,
                            FUN=sd)

tv <- qt(.975,df=8)
zpresidents.u <- zpresidents.m+tv*zpresidents.sd/sqrt(8)
zpresidents.l <- zpresidents.m-tv*zpresidents.sd/sqrt(8)

plot(zpresidents.m,ylim=c(20,80))
lines(zpresidents.u,lty=2)
lines(zpresidents.l,lty=2)

# Time arithmetics with "zoo" objects

zpresidents_1 <- zpresidents[1:4]
zpresidents_2 <- zpresidents[3:6]
zpresidents_1 + zpresidents_2

presidents_1 <- presidents[1:4]
presidents_2 <- presidents[3:6]
presidents_1 + presidents_2

# Merging (multivariate) time series

Netherlands <- unemployment.z[,4]
length(Netherlands)
Belgium <- unemployment.z[,5]
length(Belgium)
Luxembourg <- na.omit(unemployment.z[,6])
length(Luxembourg)

unemployment.benelux <- merge(Netherlands,
                              Belgium,
                              Luxembourg)
head(unemployment.benelux,n=10)

# Importing data into "zoo" objects

unemployment_z <- read.csv.zoo("unemployment.csv")
str(unemployment_z)

Text <- "2012/1/6 20
2012/1/7 30
2012/1/8 40
"
read.zoo(text=Text)

read.zoo(text=Text,format="%Y/%m/%d")

Text <- "date,time,x,y
2011-05-08,22:45:21,4,41
2011-05-08,22:45:22,5,42
2011-05-08,22:45:23,5,42
2011-05-08,22:45:24,6,43
"
zobj <- read.csv.zoo(text=Text,
                     index.column=1:2)
zobj

Script file: time-series.R

Data file: unemployment.csv (originally downloaded from https://data.oecd.org)

Add-on package: zoo available from https://cran.r-project.org/package=zoo