Building Blocks of Data

This chapter describes the most basic data types which all other data structures build on. It starts with simple numeric vectors which may e.g. contain series of measurement. It further discusses character vectors, i.e. sequences of character strings, logical vectors, i.e. sequences of TRUE/FALSE data, and finally lists. The chapter covers also how simple computations on such data can be conducted and simple summaries can be obtained form elementary data types. Finally the chapter discusses how data can be stored on disk in an R-specific format.

Below is the supporting material for the various sections of the chapter.

Basic Data Types

Numeric vectors

  • Interactive notebook: https://mybinder.org/badge_logo.svg

  • Script file: numeric-vectors.R

    c(1.2,3.5,5.0,6.7,1.09e-3)
    
    x <- c(1.2,3.5,5.0,6.7,1.09e-3)
    length(x)
    
    1:100
    
    x <- c(1,2,3,4,5)
    y <- c(3,2,3,2,3)
    z <- x + y
    print(z)
    
    x <- c(3,2,4,8,7)
    y <- x + 1
    print(y)
    
    x <- c(3,2,4,8,7)
    y <- x + c(1,1,1,1,1)
    print(y)
    
    1 + NA
    
    x <- c(-2,-1,0,1,2)
    1/x
    x/0
    

Logical vectors

  • Interactive notebook: https://mybinder.org/badge_logo.svg

  • Script file: logical-vectors.R

    ## Comparisons
    x <- -3:3
    x
    
    x == 0
    
    x <- -3:3
    y <- c(1:3,0,1:3)
    x == y
    
    ## Logical operators
    a <- c(TRUE,FALSE,TRUE,FALSE)
    b <- c(TRUE,TRUE,FALSE,FALSE)
    
    a & b
    
    a | b
    
    !a
    
    a & !b
    
    !(a | b)
    
    x <- -3:3
    
    x > 1 & x < -1
    
    x > 1 | x < -1
    
    a <- c(TRUE,FALSE,NA,TRUE,FALSE,NA,TRUE,FALSE,NA)
    b <- c(TRUE,TRUE,TRUE,FALSE,FALSE,FALSE,NA,NA,NA)
    
    a & b
    
    a | b
    

Character vectors

  • Interactive notebook: https://mybinder.org/badge_logo.svg

  • Script file: character-vectors.R

    Beatles <- c("John", "Paul", "George", "Ringo")
    Beatles
    
    paste("one","and","only")
    paste(Beatles, collapse=" & ")
    
    First <- c("Mick","Keith","Ronnie","Charlie")
    Last <- c("Jagger","Richards","Wood","Watts")
    paste(First,Last)
    
    paste(First,Last,sep="_")
    
    substr(Beatles,1,2)
    
    substr(Beatles,1:4,2:5)
    
    Led.Zeppelin.song <- "Whole Lotta Love"
    ACDC.song <- sub("Love","Rosie",Led.Zeppelin.song)
    print(ACDC.song)
    
    onetofour <- 1:4
    names(onetofour) <- c("first","second","third","fourth")
    names(onetofour)
    onetofour
    

Basic Data Manipulation

Extracting and replacing elements of a vector

  • Interactive notebook: https://mybinder.org/badge_logo.svg

  • Script file: extracting-and-replacing-elements.R

    x <- c(10, 12, 30, 14, 50)
    
    x[1]
    
    x[5]
    
    x[c(2,4,6)]
    
    x[c(1,1,1,2,2)]
    
    x[-c(1,3,5)]
    
    x[c(FALSE,TRUE,FALSE,TRUE,FALSE)]
    
    x[x>=20]
    
    names(x) <- c("a","b","c","d","e")
    
    x[c("a","c")]
    
    set.seed(231)
    
    y <- rnorm(n=12)
    
    y[1:4] <- 0
    y
    
    y <- rnorm(n=12)
    
    y[y < 0] <- 0
    y
    

Reordering and sorting elements of a vector

  • Interactive notebook: https://mybinder.org/badge_logo.svg

  • Script file: reordering-and-sorting.R

    set.seed(231)
    
    x <- rnorm(n=10)
    x
    
    x.srt <- sort(x)
    x.srt
    
    sort(x,decreasing=TRUE)
    
    stex <- c("1","11","A","a","Ab","AB","ab","aB","B","b","bb")
    sort(stex)
    
    set.seed(2134)
    x <- rnorm(6)
    x
    y <- rnorm(6)
    y
    ii <- order(x)
    
    x.ordered <- x[ii]
    y.ordered <- y[ii]
    x.ordered
    y.ordered
    
    jj <- order(ii)
    
    all(x.ordered[jj] == x)
    all(y.ordered[jj] == y)
    

Regular sequences and repetitions

Sampling from a vector

  • Interactive notebook: https://mybinder.org/badge_logo.svg

  • Script file: sampling-from-vectors.R

    set.seed(143)
    
    sample(1:9)
    
    sample(1:1000,size=20)
    
    sample(6,size=10,replace=TRUE)
    

Complex Data Types

Lists

  • Interactive notebook: https://mybinder.org/badge_logo.svg

  • Script file: lists.R

    AList <- list(1:5,
                  letters[1:6],
                  c(TRUE,FALSE,FALSE,TRUE))
    AList
    
    AList[1:2]
    AList[1]
    
    AList[[2]]
    
    AList[[1:2]]
    
    AList[[1:3]]
    
    length(AList)
    
    FDR <- list(c("John","Delano"),
                c("Roosewelt"))
    names(FDR) <- c("first.name","last.name")
    
    FDR <- list(first.name=c("John","Delano"),
                last.name=c("Roosewelt"))
    FDR
    
    FDR$last.name
    
    FDR[["last.name"]]
    
    UK <- list(
        country.name = c("England","Northern Ireland","Scotland",
                                                      "Wales"),
        population   = c(54786300,1851600,5373000,3099100),
        area.sq.km   = c(130279,13562,77933,20735),
        GVA.cap      = c(26159,18584,23685,18002))
    UK
    

Attributes

  • Interactive notebook: https://mybinder.org/badge_logo.svg

  • Script file: attributes.R

    onetofour <- c(first=1,second=2,third=3,fourth=4)
    
    attributes(onetofour)
    
    set.seed(42)
    
    satisfaction <- sample(1:4,size=20,replace=TRUE)
    satisfaction <- ordered(satisfaction,
                           levels=1:4,
                           labels=c(
                               "not at all",
                               "low",
                               "medium",
                               "high"))
    attributes(satisfaction)
    
    attr(satisfaction,"levels")
    levels(satisfaction)
    
    attr(satisfaction,"class")
    class(satisfaction)