is-vaje/v1/lab1_introduction.R

355 lines
5.6 KiB
R

###############################################################################
#
# INTRODUCTION TO R
#
###############################################################################
# calculator
(50 + 1.45)/12.5 # (CTRL + ENTER)
# Clean screen with CTRL + L
# assignment operators
x = 945
y <- sin(0.47)^2 * sqrt(5)
y^2 -> z
# to inspect the value of a variable simply type its name
x
y
z
# listing and deleting objects
ls()
rm(y)
rm(x,z)
# remove (almost) everything in the working environment
rm(list=ls())
#
# Vectors (the most basic data objects in R)
#
# creating vectors
v <- c(14,7,23.5,76.2)
v
# generating a regular sequence of numbers
v <- 1:10
v
v <- seq(from=5, to=10, by=2)
v
w <- rep(v, times = 2)
w
# scalars are vectors with a single element
w <- 45.0
# vectors can be created using other vectors
z <- c(v, 2.5, w)
z
#
# Useful functions
#
v <- c(8, 4, 2, 3, 6, 9, 1)
length(v)
max(v)
min(v)
which.min(v)
which.max(v)
sum(v)
mean(v)
sd(v)
rev(v)
sort(v)
sort(v, decreasing=T)
order(v)
# types of vectors
mode(v)
# logical vector - has logical constants as elements
b <- c(TRUE, FALSE, F, T)
b
mode(b)
x <- 5 > 3
x
mode(x)
# string vector - has strings as elements
s <- c("character", "logical", "numeric", "complex")
mode(s)
# type coercion (all elements must be of the same type)
x <- c(F, T, 34.56, 'aaa')
x
mode(x)
#
# Vectorization
#
# vector arithmetic (operations are performed element-wise)
v1 <- c(10,20,30,40)
v2 <- 1:4
v1 + v2
v1 * v2
# functions operate directly on each element of a vector
v1^2
sqrt(v1)
exp(v1)
log2(v1)
# the recycling rule (if lengths are different the elements of the shorter vector are repeated)
v1 * 10
v1 + 1
v1 + c(100, 200)
#
# Indexing
#
x <- c(-10,20,-30,40,-50,60,-70,80)
x
# individual elements can be addressed using an integer index vector
# (indexing starts with 1)
x[3]
x[c(1,4,5)]
x[1:3]
x[]
# negative integer indices address all elements but those stated
x[-1]
x[-c(4,6)]
x[-(1:3)]
# vector elements can be addressed using logical vectors
# (elements corresponding to constants TRUE are selected)
# logical vector
x > 0
# logical vector indexing
x[x>0]
x[x <= -20 | x > 50]
x[x > 40 & x < 100]
# equality operator is ==
# inequality operator is !=
# the which() function returns indices corresponding to constants TRUE
which(x > 0)
# character string index vector
point <- c(4.7, 3.6, 2.5)
names(point) <- c('x', 'y', 'z')
point
point['x']
point[c('x','z')]
# empty indices
point[] <- 0
point
# not the same as
point <- 0
point
#
# Vector editing
#
x <- c("a", "b", "c", "d")
# replacing an element
x[2] <- "BBBBB"
x
x[c(1,3)] <- c("AAAAA", "CCCCC")
x
# adding new element
x[length(x)+1] = "EEEEE"
x
# what happens if we do not define all elements in the vector?
x[10] <- "FFFFF"
x
# which elements are not defined
is.na(x)
# removing elements
x <- x[-c(1,3)]
x
x <- c(x[2],x[3])
x
#
# Flow control
#
# for loops
for (x in 1:10) {
print(x)
}
# while loops
x <- 0
while (x < 10) {
print(x)
x <- x+1
}
# if statements
x <- 1
if (x == 0) {
print('Condition 1')
} else if (x == 1){
print('Condition 2')
} else {
print('Condition 3')
}
#
# Factors
#
color <- c("blue","red","red","red","blue","red","blue")
color
# factors are useful when modelling nominal variables
color <- factor(color)
color
# argument "levels" defines all possible elements' values
dir <- factor(c('left','left','up'), levels = c('left','right','up','down'))
dir
# all possible elements' values
levels(dir)
# if no match is found
dir[1] <- "diagonal"
dir
# valid assignment
dir[1] <- "down"
dir
# frequency tables for factors
table(color)
table(dir)
#
# Lists (an ordered collection of objects - components)
#
# creating a list
student <- list(id=12345,name="Marko",marks=c(10,9,10,9,8,10))
student
# extracting elements of a list (using named components)
student$id
student$name
student$marks
# extracting elements of a list (using indexing)
student[[1]]
student[[2]]
student[[3]]
# extending lists
student$parents <- c("Ana", "Tomaz")
student
#
# Data frames
#
# creating a data frame
height <- c(179, 185, 183, 172, 174, 185, 193, 169, 173, 168)
weight <- c(95, 89, 70, 80, 92, 86, 100, 63, 72, 70)
gender <- factor(c("f","m","m","m","f","m","f","f","m","f"))
student <- c(T, T, F, F, T, T, F, F, F, T)
df <- data.frame(gender, height, weight, student)
df
# some important functions
summary(df)
names(df)
nrow(df)
ncol(df)
head(df)
# accessing elements of data frames
df[5,]
df[1:5,]
df[,1]
df[,c(1,3,4)]
df[1,3]
df[1,-3]
df$height
df[df$height < 180,]
df[df$gender == "m",]
# adding columns to a data frame
df <- cbind(df, age = c(20, 21, 30, 25, 27, 19, 24, 27, 28, 24))
df
df$name = c("Joan","Tom","John","Mike","Anna","Bill","Tina","Beth","Steve","Kim")
df
summary(df)
#
# User defined functions
#
addFunction <- function(a, b) {
return (a+b)
}
# Load in-built datasets with data()
data(iris)
# Get mean of each column with mean()
lapply(iris[,1:3], mean) ## lapply returns a list!
sapply(iris[,1:3] ,mean) ## sapply returns a vector!
apply(iris[,1:3], 1, sum) ## apply operates across a given dimension (1 = row-wise)
# Or using native methods
colSums(iris[,1:3])/nrow(iris[,1:3])
## This is additional material, just to show you how things can also be done.
library(dplyr)
data(iris)
# Lets' do some groupings
summarizedCustom <- iris %>% group_by(Species) %>% summarise(msw = max(Sepal.Width), mpw = mean(Petal.Width))
summarizedCustom
# How to create new features?
newFeature <- iris %>% mutate(newFeature = Petal.Width + Sepal.Length)
head(newFeature)
# How about some filtering?
filteredDF <- iris %>% filter(Species == c("setosa"), Petal.Width >= 0.4)
head(filteredDF)