Attributes attach metadata to an object (vector or list).
attr()
attributes()
, and set all attributes using structure()
# create a integer vector
a <- 1:3
str(a)
## int [1:3] 1 2 3
# set an attribute "x" to the vector "a" with value "abc", and retrieve it afterwards
attr(a, "x") <- "abc"
attr(a, "x")
## [1] "abc"
# set another attribute "y" to the vector "a" with value (4, 5, 6), and retrieve all attributes of "a" afterwards
attr(a, "y") <- 4:6
attributes(a)
## $x
## [1] "abc"
##
## $y
## [1] 4 5 6
# set attributes in another way
a <- structure(
1:3,
x = "abc",
y = 4:6
)
attributes(a)
## $x
## [1] "abc"
##
## $y
## [1] 4 5 6
In fact, attributes of an object are just a ‘named’ list. We will talk about name
attribute shortly.
# note that the attributes are elements of a 'named' list
print(typeof(attributes(a)))
## [1] "list"
str(attributes(a))
## List of 2
## $ x: chr "abc"
## $ y: int [1:3] 4 5 6
names
attribute is a character vector. It gives each element in an object a name.
# create a named vector
x <- c(a = 1, b = 2, c = 3)
str(x)
## Named num [1:3] 1 2 3
## - attr(*, "names")= chr [1:3] "a" "b" "c"
# you can refer to the element by name
x["a"]
## a
## 1
# you can set name attribute using the name() function as well
x <- 1:3
names(x) <- c("a", "b", "c")
print(names(x))
## [1] "a" "b" "c"
str(x)
## Named int [1:3] 1 2 3
## - attr(*, "names")= chr [1:3] "a" "b" "c"
# a named list
l_n <- list(a = c(1, 2), b = c("hello", "world"))
print(l_n)
## $a
## [1] 1 2
##
## $b
## [1] "hello" "world"
print(names(l_n))
## [1] "a" "b"
print(l_n["a"])
## $a
## [1] 1 2
print(l_n[["a"]])
## [1] 1 2
Adding a dim
attribute to a vector can turn the vector into a matrix or array. You can use the dim()
to assign dimension to a vector or find out dimensions of a matrix.
In practice, matrices and arrays are usually created using matrix()
and array()
function.
x <- 1:6
# assign dim attribute to a vector
# another way to do it, dim(x) <- c(2, 3)
attr(x, "dim") <- c(2, 3)
x
## [,1] [,2] [,3]
## [1,] 1 3 5
## [2,] 2 4 6
attributes(x)
## $dim
## [1] 2 3
str(x)
## int [1:2, 1:3] 1 2 3 4 5 6
print(typeof(x))
## [1] "integer"
print(dim(x))
## [1] 2 3
print(is.matrix(x))
## [1] TRUE
# use the matrix() function to create the same matrix
y <- matrix(1:6, nrow = 2, ncol = 3)
print(y)
## [,1] [,2] [,3]
## [1,] 1 3 5
## [2,] 2 4 6
attributes(y)
## $dim
## [1] 2 3
str(y)
## int [1:2, 1:3] 1 2 3 4 5 6
print(typeof(y))
## [1] "integer"
print(dim(y))
## [1] 2 3
print(is.matrix(y))
## [1] TRUE
Subsetting a matrix is similar to subsetting a vector.
print(y[1:2, c(1,3)])
## [,1] [,2]
## [1,] 1 5
## [2,] 2 6
print(y[1:2, -2])
## [,1] [,2]
## [1,] 1 5
## [2,] 2 6
print(y[1:2, 1:2])
## [,1] [,2]
## [1,] 1 3
## [2,] 2 4
Note that []
by default simplify the subsetting result to lowest possible dimension.
# y[1, 1:2] is not a matrix any more
print(attributes(y[1, 1:2]))
## NULL
print(is.matrix(y[1, 1:2]))
## [1] FALSE
Since matrix is just a vector with the special attributes, you can subsetting a matrix the same way as subsetting a vector.
print(y[5])
## [1] 5
print(y[1:3])
## [1] 1 2 3
Matrix algebra is easy. See here for a list of operations, https://www.statmethods.net/advstats/matrix.html.
m1 <- matrix(1:4, nrow = 2)
m2 <- matrix(5:8, nrow = 2)
print(m1)
## [,1] [,2]
## [1,] 1 3
## [2,] 2 4
print(m2)
## [,1] [,2]
## [1,] 5 7
## [2,] 6 8
# element-wise multiplication
print(m1 * m2)
## [,1] [,2]
## [1,] 5 21
## [2,] 12 32
# matrix multiplication
print(m1 %*% m2)
## [,1] [,2]
## [1,] 23 31
## [2,] 34 46
# transpose
print(t(m1))
## [,1] [,2]
## [1,] 1 2
## [2,] 3 4
# solve Ax = b problem
b <- matrix(7:8, nrow = 2)
print(b)
## [,1]
## [1,] 7
## [2,] 8
print(solve(m1, b))
## [,1]
## [1,] -2
## [2,] 3
Array has more than 2 dimensions.
# array
# create a vector first
a <- 1:12
# set dim attribute
# you can set dim attribute using dim() function too
dim(a) <- c(2, 3, 2)
print(a)
## , , 1
##
## [,1] [,2] [,3]
## [1,] 1 3 5
## [2,] 2 4 6
##
## , , 2
##
## [,1] [,2] [,3]
## [1,] 7 9 11
## [2,] 8 10 12
str(a)
## int [1:2, 1:3, 1:2] 1 2 3 4 5 6 7 8 9 10 ...
# create an array using array() function
b <- array(1:12, c(2, 3, 2))
b
## , , 1
##
## [,1] [,2] [,3]
## [1,] 1 3 5
## [2,] 2 4 6
##
## , , 2
##
## [,1] [,2] [,3]
## [1,] 7 9 11
## [2,] 8 10 12
dim()
to a 1D vector?# test out here
dim(c(2, 3))
## NULL
class
is a special attribute that defines an S3 object. S3 objects are objects that behaved differently when passed to a generic function.
There are three import s3 atomic vectors.
# create a factor or categorical vector
x <- factor(c("a", "b", "b", "a"))
str(x)
## Factor w/ 2 levels "a","b": 1 2 2 1
str(attributes(x))
## List of 2
## $ levels: chr [1:2] "a" "b"
## $ class : chr "factor"
print(class(x))
## [1] "factor"
print(typeof(x))
## [1] "integer"
print(is.factor(x))
## [1] TRUE
# counts at each level
table(x)
## x
## a b
## 2 2
Date vectors are built on double vectors. (Dates are represented as the number of days since 1970-01-01, with negative values for earlier dates.)
d1 <- 1
# assign class "Date" to the double vector d
class(d1) = "Date"
print(d1)
## [1] "1970-01-02"
print(typeof(d1))
## [1] "double"
print(attributes(d1))
## $class
## [1] "Date"
In practice, you create date type in other ways.
today <- Sys.Date()
print(today)
## [1] "2019-01-21"
print(typeof(today))
## [1] "double"
print(str(today))
## Date[1:1], format: "2019-01-21"
## NULL
print(class(today))
## [1] "Date"
attributes(today)
## $class
## [1] "Date"
Create date type from character vectors using as.Date( )
.
d2 <- as.Date("2019-01-22")
print(d2)
## [1] "2019-01-22"
class(d2)
## [1] "Date"
Create a character vector (“2019-01-22”, “2019-02-11”). Convert it to date type using as.Date()
. Find the days between the two dates.
# Your code here
# use as.Date( ) to convert strings to dates
date_start_end <- as.Date(c("2019-01-22", "2019-02-11"))
print(class(date_start_end))
## [1] "Date"
days <- date_start_end[2] - date_start_end[1]
print(days)
## Time difference of 20 days
R has two way to store date-time information.
One of the best R packages to work with date/time is lubridate. You can learn it on your own.
# UTC - Universal Time Coordinated / Universal Coordinated Time
now_ct <- as.POSIXct("2018-11-11 8:00", tz = "UTC")
now_ct
## [1] "2018-11-11 08:00:00 UTC"
print(typeof(now_ct))
## [1] "double"
class(now_ct)
## [1] "POSIXct" "POSIXt"
attributes(now_ct)
## $class
## [1] "POSIXct" "POSIXt"
##
## $tzone
## [1] "UTC"
ct1 <- structure(now_ct, tzone = "Asia/Tokyo")
print(ct1)
## [1] "2018-11-11 17:00:00 JST"
attributes(ct1)
## $class
## [1] "POSIXct" "POSIXt"
##
## $tzone
## [1] "Asia/Tokyo"
A data frame is like a 2-D table in Excel. More precisely, it is a list of vectors (columns) with equal length. It has a class attribute, “data.frame”, and it also has attributes 1) (column) names; and 2) row.names.
df1 <- data.frame(x = 1:3, y = letters[1:3], z = c(1.1, 2.2, 3.3))
df1
## x y z
## 1 1 a 1.1
## 2 2 b 2.2
## 3 3 c 3.3
# Note that by default strings are converted to be factors
str(df1)
## 'data.frame': 3 obs. of 3 variables:
## $ x: int 1 2 3
## $ y: Factor w/ 3 levels "a","b","c": 1 2 3
## $ z: num 1.1 2.2 3.3
# use 'stringsAsFactors = FALSE' to keep strings as they are
df2 <- data.frame(
x = 1:3,
y = c("a", "b", "c"),
stringsAsFactors = FALSE
)
str(df2)
## 'data.frame': 3 obs. of 2 variables:
## $ x: int 1 2 3
## $ y: chr "a" "b" "c"
print(typeof(df1))
## [1] "list"
print(class(df1))
## [1] "data.frame"
# a dataframe has three attributes
print(attributes(df1))
## $names
## [1] "x" "y" "z"
##
## $row.names
## [1] 1 2 3
##
## $class
## [1] "data.frame"
is.data.frame(df1)
## [1] TRUE
Functions to find out column and row names.
# find out column names using names() or colnames()
print(names(df1))
## [1] "x" "y" "z"
print(colnames(df1))
## [1] "x" "y" "z"
# find out row names
print(rownames(df1))
## [1] "1" "2" "3"
Functions to find out number of columns and rows.
# find out number of columns
print(length(df1))
## [1] 3
print(ncol(df1))
## [1] 3
# find out number of rows
print(nrow(df1))
## [1] 3
Subsetting a dataframe is similar to that of a list or matrix.
# select a single column using []
print(df1['x'])
## x
## 1 1
## 2 2
## 3 3
# note the result is still a dataframe
class(df1['x'])
## [1] "data.frame"
# select a single column using [[]]
print(df1[['x']])
## [1] 1 2 3
# note the result is NOT a dataframe any more. It's a vector.
class(df1[['x']])
## [1] "integer"
# select multiple columns
print(df1[c('x', 'z')])
## x z
## 1 1 1.1
## 2 2 2.2
## 3 3 3.3
# note the subsetting result is still a dataframe
class(df1[c('x', 'z')])
## [1] "data.frame"
# select a single column
print(df1[, 'x'])
## [1] 1 2 3
print(df1[1:2, 'x'])
## [1] 1 2
print(df1[c(TRUE, TRUE, FALSE), 'x'])
## [1] 1 2
# note the result is NOT a dataframe any more
class(df1[, 'x'])
## [1] "integer"
# to preserve the dataframe structure
df1[, 'x', drop = FALSE]
## x
## 1 1
## 2 2
## 3 3
class(df1[, 'x', drop = FALSE])
## [1] "data.frame"
# select multiple columns
# subsetting like a matrix
df1[, c('x', 'z')]
## x z
## 1 1 1.1
## 2 2 2.2
## 3 3 3.3
df1[1:2, c('x', 'z')]
## x z
## 1 1 1.1
## 2 2 2.2
# note the result is still a dataframe
class(df1[, c('x', 'z')])
## [1] "data.frame"
$
# select a single column
print(df1$x)
## [1] 1 2 3
# note the result is NOT a dataframe any more
class(df1$x)
## [1] "integer"
df_ex <- data.frame(x = 1:5, y = letters[1:5], z = c(1.1, 2.2, 3.3, 2.2, 5.5))
. Select the rows in df_ex
where the z
column values are greater than 2.2
# your code here
df_ex <- data.frame(x = 1:5, y = letters[1:5], z = c(1.1, 2.2, 3.3, 2.2, 5.5))
print(df_ex)
## x y z
## 1 1 a 1.1
## 2 2 b 2.2
## 3 3 c 3.3
## 4 4 d 2.2
## 5 5 e 5.5
print(df_ex[["z"]] > 2.2)
## [1] FALSE FALSE TRUE FALSE TRUE
df_ex[df_ex[["z"]] > 2.2, ]
## x y z
## 3 3 c 3.3
## 5 5 e 5.5
Tibbles are data frames with minor tweaks. It changed some behaviors of the traditional R data frames to make them easy to work with. To learn more about tibbles, see References 2 and 3 below.
# load the tibble library
library(tibble)
# create a tibble
df3 <- tibble(
x = 1:3,
y = x * 2,
z = c("a", "b", "c")
)
print(df3)
## # A tibble: 3 x 3
## x y z
## <int> <dbl> <chr>
## 1 1 2. a
## 2 2 4. b
## 3 3 6. c
print(is_tibble(df3))
## [1] TRUE
str(df3)
## Classes 'tbl_df', 'tbl' and 'data.frame': 3 obs. of 3 variables:
## $ x: int 1 2 3
## $ y: num 2 4 6
## $ z: chr "a" "b" "c"
# turn a dataframe to tibble
df4 <- as_tibble(df1)
is_tibble(df4)
## [1] TRUE
Vectors and subsetting chapters in Advanced R.
Data frames and tibbles chapter in Advanced R.
Tibbles chapter in R for Data Science