Search notes:

R function: data.frame

data.frame() constructs data frames.
#
#    A data frame is like a matrix, but: while matrices
#    have the same data type for all elements, a data frame
#    can have different datatypes in its columns.
#


alphabeth <- c("Alpha", "Beta", "Gamma")
numbers   <- c( 1.1   ,  2.02 ,  3.003 )
blabla    <- c("Foo"  , "Bar" , "Baz"  )

dataFrame <- data.frame(alphabeth, numbers, blabla)

show (dataFrame)
#   alphabeth numbers blabla
# 1     Alpha   1.100    Foo
# 2      Beta   2.020    Bar
# 3     Gamma   3.003    Baz

dataFrame_2 <- data.frame (
                 col_1 = c("Foo", "Bar", "Baz"),
                 col_2 = c(  11 ,   22 ,   33 ),
                 col_3 = c("abc", "def", "ghi") )
#   col_1 col_2 col_3
# 1   Foo    11   abc
# 2   Bar    22   def
# 3   Baz    33   ghi

show (dataFrame_2)

dim(dataFrame_2)
# [1] 3 3

# -----------------------------------------------
#
#        Data Frame with «default value» for
#        all elements in a column:

dataFrame_3 <- data.frame (
                 col_1 = TRUE,
                 col_2 = c("one", "two", "three"),
                 col_3 = c("Foo", "Bar", "Baz"  ),
                 col_4 =  NA )
dataFrame_3
#   col_1 col_2 col_3 col_4
# 1  TRUE   one   Foo    NA
# 2  TRUE   two   Bar    NA
# 3  TRUE three   Baz    NA

dataFrame_4 <- data.frame (
  s1 = rnorm(10, mean = 5, sd = 1),
  s2 = rnorm(10, mean = 9, sd = 9),
  s3 = rnorm(10, mean = 0, sd = 5)
)

show (dataFrame_4)
#          s1         s2         s3
# 1  6.331567 24.8329281  0.3696351
# 2  5.923443 -0.9340891 12.3321180
# 3  5.157972 12.2860296 -2.1322848
# 4  5.967498 10.3329538 -0.2394767
# 5  4.507737  6.3054715  1.4312542
# 6  5.668018  8.7391802  1.0235536
# 7  4.866247  5.4963913  2.3345568
# 8  5.946127  1.3370167 -7.4614434
# 9  3.989125 10.4986192  1.0924467
# 10 5.301544 11.5762866 -3.0578400

attributes(dataFrame_4)
# $names
# [1] "s1" "s2" "s3"
# 
# $row.names
#  [1]  1  2  3  4  5  6  7  8  9 10
# 
# $class
# [1] "data.frame"



# -----------------------------------------------
#
#        Subscripts
#

dataFrame_5 <- data.frame (

                col_1 = c(   1 ,    2 ,    3 ,         4 ,         5 ,         6 ),
                col_2 = c('foo', 'bar', 'baz', 'more-foo', 'more-bar', 'more-baz'),
                col_3 = c(  "a",   "b",   "c",        "d",        "e",        "f"),
                col_4 = c(  22 ,    38,   17 ,        65 ,        72 ,        48 ),
                col_5 = c("ABC", "DEF", "GHI",      "JKL",      "MNO",      "PQR")
             )

dataFrame_5[,2:4]
#      col_2 col_3 col_4
# 1      foo     a    22
# 2      bar     b    38
# 3      baz     c    17
# 4 more-foo     d    65
# 5 more-bar     e    72
# 6 more-baz     f    48

cat("\n\n")

dataFrame_5[3:5,2:3]
#      col_2 col_3
# 3      baz     c
# 4 more-foo     d
# 5 more-bar     e

cat("\n\n")

#
#         Subscripts with logical tests
#         SQL's equivalent would be «where»
#

dataFrame_5[dataFrame_5$col_1 < 5 & dataFrame_5$col_4 > 20, c(1,4)]
#   col_1 col_4
# 1     1    22
# 2     2    38
# 4     4    65

# -----------------------------------------------
#
#        Sorting
#        SQL's equivalent would be «order by»
#
dataFrame_5[order(dataFrame_5[,2]),]

# { Parameter stringsAsFactors
#   By defalt, character strings inside a data frame
#   will be converted to factors.

cat("\n\n")

df <- data.frame(
        n = c(   1 ,    2 ,    3 ),
        c = c('foo', 'bar', 'baz')
)

str(df)
# 'data.frame':   3 obs. of  2 variables:
#  $ n: num  1 2 3
#  $ c: Factor w/ 3 levels "bar","baz","foo": 3 1 2

cat ("\n\n")

df <- data.frame(
        n = c(   1 ,    2 ,    3 ),
        c = c('foo', 'bar', 'baz'),
        stringsAsFactors = FALSE
)

str(df)
# 'data.frame':   3 obs. of  2 variables:
#  $ n: num  1 2 3
#  $ c: chr  "foo" "bar" "baz"

# }

# { Determine number of rows and columns

cat("\n\n")

ncol(df)
# [1] 2

nrow(df)
# [1] 3

# }
Github repository about-r, path: /functions/data.frame.R

See also

Index to (some) R functions

Index