first commit
commit
b4c1542846
|
@ -0,0 +1,112 @@
|
||||||
|
################################################################################
|
||||||
|
#
|
||||||
|
# PROBLEMS:
|
||||||
|
#
|
||||||
|
# 1. Construct a vector that contains elements: 1,2,3,...,19,20.
|
||||||
|
v <- c(1:20)
|
||||||
|
#v
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# 2. Construct a vector that contains elements: 1,2,3,...,19,20,19,...,3,2,1.
|
||||||
|
v1 <- 1:20
|
||||||
|
v2 <- 20:1
|
||||||
|
j <- c(v1, v2)
|
||||||
|
#j
|
||||||
|
#
|
||||||
|
# 3. Construct a vector that contains elements: 1,3,5,1,3,5,...,1,3,5
|
||||||
|
# where there are 10 occurrences of element 5.
|
||||||
|
h <- rep(seq(from=1, to=5, by=2), times=5)
|
||||||
|
#h
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# 4. Calculate the values of sin(x) at 0, 0.1, 0.2, 0.3, ..., 1.0
|
||||||
|
s <- seq(from=0.0, to=1.0, by=0.1)
|
||||||
|
s <- sin(s)
|
||||||
|
#s
|
||||||
|
#
|
||||||
|
# 5. Suppose we have measured the heights and weights of ten individuals:
|
||||||
|
#
|
||||||
|
# the vector of heights in 'cm'
|
||||||
|
height <- c(179, 185, 183, 172, 174, 185, 193, 169, 173, 168)
|
||||||
|
|
||||||
|
# the vector of weights in 'kg'
|
||||||
|
weight <- c(95, 89, 70, 80, 92, 86, 100, 63, 72, 70)
|
||||||
|
|
||||||
|
# Calculate the body mass index (bmi) for each individual using the formula:
|
||||||
|
# bmi = weight_in_kg / (height_in_m)^2
|
||||||
|
#
|
||||||
|
# HINT: first convert heights from 'cm' to 'm', then use the formula above.
|
||||||
|
height <- height / 100
|
||||||
|
bmi <- weight / (height ^ 2)
|
||||||
|
#bmi
|
||||||
|
#
|
||||||
|
# 6. Consider a vector:
|
||||||
|
#
|
||||||
|
x <- c(1, -2, 3, -4, 5, -6, 7, -8)
|
||||||
|
x[x < 0] <- 0
|
||||||
|
#x
|
||||||
|
|
||||||
|
# Edit the vector x as follows. Replace all elements with a negative value
|
||||||
|
# with 0. Multiply the elements with a positive value by 10.
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# 7. Without using R, determine the result of the following computation:
|
||||||
|
#
|
||||||
|
x <- c(1,2,3) # x = [1, 2, 3]
|
||||||
|
# 1 / 2^2 - 1 + 2 * 3 - 2 -> 1/4 - 1 + 6 -2 -> 1/4 + 3 -> 3.25
|
||||||
|
x[1]/x[2]^2-1+2*x[3]-x[1+1]
|
||||||
|
#x
|
||||||
|
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# 8. Consider a vector:
|
||||||
|
#
|
||||||
|
x <- 1:200
|
||||||
|
length(x[x %% 11 == 0])
|
||||||
|
# Determine how many elements in the vector are exactly divisible by 11.
|
||||||
|
#
|
||||||
|
# HINT: the integer division operator is %/%
|
||||||
|
# the modulus operator is %%
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# 9. Consider a data frame:
|
||||||
|
#
|
||||||
|
height <- c(179, 185, 183, 172, 174, 185, 193, 169, 173, 168)
|
||||||
|
weight <- c(95, 89, 70, 80, 92, 86, 100, 63, 72, 70)
|
||||||
|
gender <- factor(c("f","m","m","m","f","m","f","f","m","f"))
|
||||||
|
student <- c(T, T, F, F, T, T, F, F, F, T)
|
||||||
|
age = c(20, 21, 30, 25, 27, 19, 24, 27, 28, 24)
|
||||||
|
name = c("Joan","Tom","John","Mike","Anna","Bill","Tina","Beth","Steve","Kim")
|
||||||
|
|
||||||
|
df <- data.frame(name, gender, age, height, weight, student)
|
||||||
|
|
||||||
|
#
|
||||||
|
# - calculate the average age of persons in our dataset.
|
||||||
|
# (HINT: use the mean() function)
|
||||||
|
mean(age)
|
||||||
|
#
|
||||||
|
# - calculate the average age of students in our dataset.
|
||||||
|
mean(df$age[df$student == T])
|
||||||
|
#
|
||||||
|
# - how many males and females are in our dataset?
|
||||||
|
# (HINT: use the table() function)
|
||||||
|
table(df$gender)
|
||||||
|
#
|
||||||
|
# - print persons that are students.
|
||||||
|
df$name[df$student == T]
|
||||||
|
#
|
||||||
|
# - print persons who are between 1.8m and 1.9m tall (inclusive).
|
||||||
|
df$name[df$height >= 180 & df$height <= 190]
|
||||||
|
#
|
||||||
|
# - print students who are above average height
|
||||||
|
# (considering all persons in the dataset).
|
||||||
|
df$name[df$height > mean(df$height)]
|
||||||
|
#
|
||||||
|
# - arrange persons by their age.
|
||||||
|
# (HINT: use the order function)
|
||||||
|
# order(df$age, decreasing=TRUE)
|
||||||
|
df[order(df$age, decreasing=TRUE), ]
|
||||||
|
df
|
||||||
|
#
|
||||||
|
###############################################################################
|
||||||
|
|
|
@ -0,0 +1,354 @@
|
||||||
|
###############################################################################
|
||||||
|
#
|
||||||
|
# INTRODUCTION TO R
|
||||||
|
#
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# calculator
|
||||||
|
(50 + 1.45)/12.5 # (CTRL + ENTER)
|
||||||
|
|
||||||
|
# Clean screen with CTRL + L
|
||||||
|
|
||||||
|
# assignment operators
|
||||||
|
x = 945
|
||||||
|
y <- sin(0.47)^2 * sqrt(5)
|
||||||
|
y^2 -> z
|
||||||
|
|
||||||
|
# to inspect the value of a variable simply type its name
|
||||||
|
x
|
||||||
|
y
|
||||||
|
z
|
||||||
|
|
||||||
|
# listing and deleting objects
|
||||||
|
ls()
|
||||||
|
rm(y)
|
||||||
|
rm(x,z)
|
||||||
|
|
||||||
|
# remove (almost) everything in the working environment
|
||||||
|
rm(list=ls())
|
||||||
|
|
||||||
|
#
|
||||||
|
# Vectors (the most basic data objects in R)
|
||||||
|
#
|
||||||
|
|
||||||
|
# creating vectors
|
||||||
|
v <- c(14,7,23.5,76.2)
|
||||||
|
v
|
||||||
|
|
||||||
|
# generating a regular sequence of numbers
|
||||||
|
v <- 1:10
|
||||||
|
v
|
||||||
|
|
||||||
|
v <- seq(from=5, to=10, by=2)
|
||||||
|
v
|
||||||
|
|
||||||
|
w <- rep(v, times = 2)
|
||||||
|
w
|
||||||
|
|
||||||
|
# scalars are vectors with a single element
|
||||||
|
w <- 45.0
|
||||||
|
|
||||||
|
# vectors can be created using other vectors
|
||||||
|
z <- c(v, 2.5, w)
|
||||||
|
z
|
||||||
|
|
||||||
|
#
|
||||||
|
# Useful functions
|
||||||
|
#
|
||||||
|
|
||||||
|
v <- c(8, 4, 2, 3, 6, 9, 1)
|
||||||
|
|
||||||
|
length(v)
|
||||||
|
max(v)
|
||||||
|
min(v)
|
||||||
|
which.min(v)
|
||||||
|
which.max(v)
|
||||||
|
sum(v)
|
||||||
|
mean(v)
|
||||||
|
sd(v)
|
||||||
|
rev(v)
|
||||||
|
sort(v)
|
||||||
|
sort(v, decreasing=T)
|
||||||
|
order(v)
|
||||||
|
|
||||||
|
# types of vectors
|
||||||
|
mode(v)
|
||||||
|
|
||||||
|
# logical vector - has logical constants as elements
|
||||||
|
b <- c(TRUE, FALSE, F, T)
|
||||||
|
b
|
||||||
|
mode(b)
|
||||||
|
|
||||||
|
x <- 5 > 3
|
||||||
|
x
|
||||||
|
mode(x)
|
||||||
|
|
||||||
|
# string vector - has strings as elements
|
||||||
|
s <- c("character", "logical", "numeric", "complex")
|
||||||
|
mode(s)
|
||||||
|
|
||||||
|
# type coercion (all elements must be of the same type)
|
||||||
|
x <- c(F, T, 34.56, 'aaa')
|
||||||
|
x
|
||||||
|
mode(x)
|
||||||
|
|
||||||
|
#
|
||||||
|
# Vectorization
|
||||||
|
#
|
||||||
|
|
||||||
|
# vector arithmetic (operations are performed element-wise)
|
||||||
|
v1 <- c(10,20,30,40)
|
||||||
|
v2 <- 1:4
|
||||||
|
v1 + v2
|
||||||
|
v1 * v2
|
||||||
|
|
||||||
|
# functions operate directly on each element of a vector
|
||||||
|
v1^2
|
||||||
|
sqrt(v1)
|
||||||
|
exp(v1)
|
||||||
|
log2(v1)
|
||||||
|
|
||||||
|
# the recycling rule (if lengths are different the elements of the shorter vector are repeated)
|
||||||
|
v1 * 10
|
||||||
|
v1 + 1
|
||||||
|
v1 + c(100, 200)
|
||||||
|
|
||||||
|
#
|
||||||
|
# Indexing
|
||||||
|
#
|
||||||
|
|
||||||
|
x <- c(-10,20,-30,40,-50,60,-70,80)
|
||||||
|
x
|
||||||
|
|
||||||
|
# individual elements can be addressed using an integer index vector
|
||||||
|
# (indexing starts with 1)
|
||||||
|
x[3]
|
||||||
|
x[c(1,4,5)]
|
||||||
|
x[1:3]
|
||||||
|
x[]
|
||||||
|
|
||||||
|
# negative integer indices address all elements but those stated
|
||||||
|
x[-1]
|
||||||
|
x[-c(4,6)]
|
||||||
|
x[-(1:3)]
|
||||||
|
|
||||||
|
# vector elements can be addressed using logical vectors
|
||||||
|
# (elements corresponding to constants TRUE are selected)
|
||||||
|
|
||||||
|
# logical vector
|
||||||
|
x > 0
|
||||||
|
|
||||||
|
# logical vector indexing
|
||||||
|
x[x>0]
|
||||||
|
x[x <= -20 | x > 50]
|
||||||
|
x[x > 40 & x < 100]
|
||||||
|
|
||||||
|
# equality operator is ==
|
||||||
|
# inequality operator is !=
|
||||||
|
|
||||||
|
# the which() function returns indices corresponding to constants TRUE
|
||||||
|
which(x > 0)
|
||||||
|
|
||||||
|
# character string index vector
|
||||||
|
point <- c(4.7, 3.6, 2.5)
|
||||||
|
names(point) <- c('x', 'y', 'z')
|
||||||
|
point
|
||||||
|
|
||||||
|
point['x']
|
||||||
|
point[c('x','z')]
|
||||||
|
|
||||||
|
# empty indices
|
||||||
|
point[] <- 0
|
||||||
|
point
|
||||||
|
|
||||||
|
# not the same as
|
||||||
|
point <- 0
|
||||||
|
point
|
||||||
|
|
||||||
|
#
|
||||||
|
# Vector editing
|
||||||
|
#
|
||||||
|
|
||||||
|
x <- c("a", "b", "c", "d")
|
||||||
|
|
||||||
|
# replacing an element
|
||||||
|
x[2] <- "BBBBB"
|
||||||
|
x
|
||||||
|
|
||||||
|
x[c(1,3)] <- c("AAAAA", "CCCCC")
|
||||||
|
x
|
||||||
|
|
||||||
|
# adding new element
|
||||||
|
x[length(x)+1] = "EEEEE"
|
||||||
|
x
|
||||||
|
|
||||||
|
# what happens if we do not define all elements in the vector?
|
||||||
|
x[10] <- "FFFFF"
|
||||||
|
x
|
||||||
|
|
||||||
|
# which elements are not defined
|
||||||
|
is.na(x)
|
||||||
|
|
||||||
|
|
||||||
|
# removing elements
|
||||||
|
x <- x[-c(1,3)]
|
||||||
|
x
|
||||||
|
|
||||||
|
x <- c(x[2],x[3])
|
||||||
|
x
|
||||||
|
|
||||||
|
#
|
||||||
|
# Flow control
|
||||||
|
#
|
||||||
|
|
||||||
|
# for loops
|
||||||
|
for (x in 1:10) {
|
||||||
|
print(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
# while loops
|
||||||
|
x <- 0
|
||||||
|
while (x < 10) {
|
||||||
|
print(x)
|
||||||
|
x <- x+1
|
||||||
|
}
|
||||||
|
|
||||||
|
# if statements
|
||||||
|
x <- 1
|
||||||
|
if (x == 0) {
|
||||||
|
print('Condition 1')
|
||||||
|
} else if (x == 1){
|
||||||
|
print('Condition 2')
|
||||||
|
} else {
|
||||||
|
print('Condition 3')
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Factors
|
||||||
|
#
|
||||||
|
|
||||||
|
color <- c("blue","red","red","red","blue","red","blue")
|
||||||
|
color
|
||||||
|
|
||||||
|
# factors are useful when modelling nominal variables
|
||||||
|
color <- factor(color)
|
||||||
|
color
|
||||||
|
|
||||||
|
# argument "levels" defines all possible elements' values
|
||||||
|
dir <- factor(c('left','left','up'), levels = c('left','right','up','down'))
|
||||||
|
dir
|
||||||
|
|
||||||
|
# all possible elements' values
|
||||||
|
levels(dir)
|
||||||
|
|
||||||
|
# if no match is found
|
||||||
|
dir[1] <- "diagonal"
|
||||||
|
dir
|
||||||
|
|
||||||
|
# valid assignment
|
||||||
|
dir[1] <- "down"
|
||||||
|
dir
|
||||||
|
|
||||||
|
# frequency tables for factors
|
||||||
|
table(color)
|
||||||
|
table(dir)
|
||||||
|
|
||||||
|
#
|
||||||
|
# Lists (an ordered collection of objects - components)
|
||||||
|
#
|
||||||
|
|
||||||
|
# creating a list
|
||||||
|
student <- list(id=12345,name="Marko",marks=c(10,9,10,9,8,10))
|
||||||
|
student
|
||||||
|
|
||||||
|
# extracting elements of a list (using named components)
|
||||||
|
student$id
|
||||||
|
student$name
|
||||||
|
student$marks
|
||||||
|
|
||||||
|
# extracting elements of a list (using indexing)
|
||||||
|
student[[1]]
|
||||||
|
student[[2]]
|
||||||
|
student[[3]]
|
||||||
|
|
||||||
|
# extending lists
|
||||||
|
student$parents <- c("Ana", "Tomaz")
|
||||||
|
student
|
||||||
|
|
||||||
|
#
|
||||||
|
# Data frames
|
||||||
|
#
|
||||||
|
|
||||||
|
# creating a data frame
|
||||||
|
height <- c(179, 185, 183, 172, 174, 185, 193, 169, 173, 168)
|
||||||
|
weight <- c(95, 89, 70, 80, 92, 86, 100, 63, 72, 70)
|
||||||
|
gender <- factor(c("f","m","m","m","f","m","f","f","m","f"))
|
||||||
|
student <- c(T, T, F, F, T, T, F, F, F, T)
|
||||||
|
|
||||||
|
df <- data.frame(gender, height, weight, student)
|
||||||
|
df
|
||||||
|
|
||||||
|
# some important functions
|
||||||
|
summary(df)
|
||||||
|
names(df)
|
||||||
|
nrow(df)
|
||||||
|
ncol(df)
|
||||||
|
head(df)
|
||||||
|
|
||||||
|
# accessing elements of data frames
|
||||||
|
df[5,]
|
||||||
|
df[1:5,]
|
||||||
|
df[,1]
|
||||||
|
df[,c(1,3,4)]
|
||||||
|
df[1,3]
|
||||||
|
df[1,-3]
|
||||||
|
|
||||||
|
df$height
|
||||||
|
|
||||||
|
df[df$height < 180,]
|
||||||
|
df[df$gender == "m",]
|
||||||
|
|
||||||
|
# adding columns to a data frame
|
||||||
|
df <- cbind(df, age = c(20, 21, 30, 25, 27, 19, 24, 27, 28, 24))
|
||||||
|
df
|
||||||
|
|
||||||
|
df$name = c("Joan","Tom","John","Mike","Anna","Bill","Tina","Beth","Steve","Kim")
|
||||||
|
df
|
||||||
|
|
||||||
|
summary(df)
|
||||||
|
|
||||||
|
#
|
||||||
|
# User defined functions
|
||||||
|
#
|
||||||
|
|
||||||
|
addFunction <- function(a, b) {
|
||||||
|
return (a+b)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Load in-built datasets with data()
|
||||||
|
data(iris)
|
||||||
|
|
||||||
|
# Get mean of each column with mean()
|
||||||
|
lapply(iris[,1:3], mean) ## lapply returns a list!
|
||||||
|
sapply(iris[,1:3] ,mean) ## sapply returns a vector!
|
||||||
|
apply(iris[,1:3], 1, sum) ## apply operates across a given dimension (1 = row-wise)
|
||||||
|
|
||||||
|
# Or using native methods
|
||||||
|
colSums(iris[,1:3])/nrow(iris[,1:3])
|
||||||
|
|
||||||
|
|
||||||
|
## This is additional material, just to show you how things can also be done.
|
||||||
|
library(dplyr)
|
||||||
|
data(iris)
|
||||||
|
|
||||||
|
# Lets' do some groupings
|
||||||
|
summarizedCustom <- iris %>% group_by(Species) %>% summarise(msw = max(Sepal.Width), mpw = mean(Petal.Width))
|
||||||
|
summarizedCustom
|
||||||
|
|
||||||
|
# How to create new features?
|
||||||
|
newFeature <- iris %>% mutate(newFeature = Petal.Width + Sepal.Length)
|
||||||
|
head(newFeature)
|
||||||
|
|
||||||
|
# How about some filtering?
|
||||||
|
filteredDF <- iris %>% filter(Species == c("setosa"), Petal.Width >= 0.4)
|
||||||
|
head(filteredDF)
|
Loading…
Reference in New Issue