From b4c1542846aee71cec5d909e75801517c3fe634f Mon Sep 17 00:00:00 2001 From: Gasper Spagnolo Date: Mon, 10 Oct 2022 16:06:42 +0200 Subject: [PATCH] first commit --- v1/lab 1 - problems.R | 112 +++++++++++++ v1/lab1_introduction.R | 354 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 466 insertions(+) create mode 100644 v1/lab 1 - problems.R create mode 100644 v1/lab1_introduction.R diff --git a/v1/lab 1 - problems.R b/v1/lab 1 - problems.R new file mode 100644 index 0000000..e77feb3 --- /dev/null +++ b/v1/lab 1 - problems.R @@ -0,0 +1,112 @@ +################################################################################ +# +# PROBLEMS: +# +# 1. Construct a vector that contains elements: 1,2,3,...,19,20. + v <- c(1:20) + #v +# +# +# 2. Construct a vector that contains elements: 1,2,3,...,19,20,19,...,3,2,1. + v1 <- 1:20 + v2 <- 20:1 + j <- c(v1, v2) + #j +# +# 3. Construct a vector that contains elements: 1,3,5,1,3,5,...,1,3,5 +# where there are 10 occurrences of element 5. + h <- rep(seq(from=1, to=5, by=2), times=5) + #h +# +# +# 4. Calculate the values of sin(x) at 0, 0.1, 0.2, 0.3, ..., 1.0 + s <- seq(from=0.0, to=1.0, by=0.1) + s <- sin(s) + #s +# +# 5. Suppose we have measured the heights and weights of ten individuals: +# + # the vector of heights in 'cm' + height <- c(179, 185, 183, 172, 174, 185, 193, 169, 173, 168) + + # the vector of weights in 'kg' + weight <- c(95, 89, 70, 80, 92, 86, 100, 63, 72, 70) + +# Calculate the body mass index (bmi) for each individual using the formula: +# bmi = weight_in_kg / (height_in_m)^2 +# +# HINT: first convert heights from 'cm' to 'm', then use the formula above. + height <- height / 100 + bmi <- weight / (height ^ 2) + #bmi +# +# 6. Consider a vector: +# + x <- c(1, -2, 3, -4, 5, -6, 7, -8) + x[x < 0] <- 0 + #x + +# Edit the vector x as follows. Replace all elements with a negative value +# with 0. Multiply the elements with a positive value by 10. +# +# +# 7. Without using R, determine the result of the following computation: +# + x <- c(1,2,3) # x = [1, 2, 3] + # 1 / 2^2 - 1 + 2 * 3 - 2 -> 1/4 - 1 + 6 -2 -> 1/4 + 3 -> 3.25 + x[1]/x[2]^2-1+2*x[3]-x[1+1] + #x + +# +# +# 8. Consider a vector: +# + x <- 1:200 + length(x[x %% 11 == 0]) +# Determine how many elements in the vector are exactly divisible by 11. +# +# HINT: the integer division operator is %/% +# the modulus operator is %% +# +# +# 9. Consider a data frame: +# + height <- c(179, 185, 183, 172, 174, 185, 193, 169, 173, 168) + weight <- c(95, 89, 70, 80, 92, 86, 100, 63, 72, 70) + gender <- factor(c("f","m","m","m","f","m","f","f","m","f")) + student <- c(T, T, F, F, T, T, F, F, F, T) + age = c(20, 21, 30, 25, 27, 19, 24, 27, 28, 24) + name = c("Joan","Tom","John","Mike","Anna","Bill","Tina","Beth","Steve","Kim") + + df <- data.frame(name, gender, age, height, weight, student) + +# +# - calculate the average age of persons in our dataset. +# (HINT: use the mean() function) + mean(age) +# +# - calculate the average age of students in our dataset. + mean(df$age[df$student == T]) +# +# - how many males and females are in our dataset? +# (HINT: use the table() function) + table(df$gender) +# +# - print persons that are students. + df$name[df$student == T] +# +# - print persons who are between 1.8m and 1.9m tall (inclusive). + df$name[df$height >= 180 & df$height <= 190] +# +# - print students who are above average height +# (considering all persons in the dataset). + df$name[df$height > mean(df$height)] +# +# - arrange persons by their age. +# (HINT: use the order function) + # order(df$age, decreasing=TRUE) + df[order(df$age, decreasing=TRUE), ] + df +# +############################################################################### + diff --git a/v1/lab1_introduction.R b/v1/lab1_introduction.R new file mode 100644 index 0000000..c4c1cec --- /dev/null +++ b/v1/lab1_introduction.R @@ -0,0 +1,354 @@ +############################################################################### +# +# INTRODUCTION TO R +# +############################################################################### + +# calculator +(50 + 1.45)/12.5 # (CTRL + ENTER) + +# Clean screen with CTRL + L + +# assignment operators +x = 945 +y <- sin(0.47)^2 * sqrt(5) +y^2 -> z + +# to inspect the value of a variable simply type its name +x +y +z + +# listing and deleting objects +ls() +rm(y) +rm(x,z) + +# remove (almost) everything in the working environment +rm(list=ls()) + +# +# Vectors (the most basic data objects in R) +# + +# creating vectors +v <- c(14,7,23.5,76.2) +v + +# generating a regular sequence of numbers +v <- 1:10 +v + +v <- seq(from=5, to=10, by=2) +v + +w <- rep(v, times = 2) +w + +# scalars are vectors with a single element +w <- 45.0 + +# vectors can be created using other vectors +z <- c(v, 2.5, w) +z + +# +# Useful functions +# + +v <- c(8, 4, 2, 3, 6, 9, 1) + +length(v) +max(v) +min(v) +which.min(v) +which.max(v) +sum(v) +mean(v) +sd(v) +rev(v) +sort(v) +sort(v, decreasing=T) +order(v) + +# types of vectors +mode(v) + +# logical vector - has logical constants as elements +b <- c(TRUE, FALSE, F, T) +b +mode(b) + +x <- 5 > 3 +x +mode(x) + +# string vector - has strings as elements +s <- c("character", "logical", "numeric", "complex") +mode(s) + +# type coercion (all elements must be of the same type) +x <- c(F, T, 34.56, 'aaa') +x +mode(x) + +# +# Vectorization +# + +# vector arithmetic (operations are performed element-wise) +v1 <- c(10,20,30,40) +v2 <- 1:4 +v1 + v2 +v1 * v2 + +# functions operate directly on each element of a vector +v1^2 +sqrt(v1) +exp(v1) +log2(v1) + +# the recycling rule (if lengths are different the elements of the shorter vector are repeated) +v1 * 10 +v1 + 1 +v1 + c(100, 200) + +# +# Indexing +# + +x <- c(-10,20,-30,40,-50,60,-70,80) +x + +# individual elements can be addressed using an integer index vector +# (indexing starts with 1) +x[3] +x[c(1,4,5)] +x[1:3] +x[] + +# negative integer indices address all elements but those stated +x[-1] +x[-c(4,6)] +x[-(1:3)] + +# vector elements can be addressed using logical vectors +# (elements corresponding to constants TRUE are selected) + +# logical vector +x > 0 + +# logical vector indexing +x[x>0] +x[x <= -20 | x > 50] +x[x > 40 & x < 100] + +# equality operator is == +# inequality operator is != + +# the which() function returns indices corresponding to constants TRUE +which(x > 0) + +# character string index vector +point <- c(4.7, 3.6, 2.5) +names(point) <- c('x', 'y', 'z') +point + +point['x'] +point[c('x','z')] + +# empty indices +point[] <- 0 +point + +# not the same as +point <- 0 +point + +# +# Vector editing +# + +x <- c("a", "b", "c", "d") + +# replacing an element +x[2] <- "BBBBB" +x + +x[c(1,3)] <- c("AAAAA", "CCCCC") +x + +# adding new element +x[length(x)+1] = "EEEEE" +x + +# what happens if we do not define all elements in the vector? +x[10] <- "FFFFF" +x + +# which elements are not defined +is.na(x) + + +# removing elements +x <- x[-c(1,3)] +x + +x <- c(x[2],x[3]) +x + +# +# Flow control +# + +# for loops +for (x in 1:10) { + print(x) +} + +# while loops +x <- 0 +while (x < 10) { + print(x) + x <- x+1 +} + +# if statements +x <- 1 +if (x == 0) { + print('Condition 1') +} else if (x == 1){ + print('Condition 2') +} else { + print('Condition 3') +} + +# +# Factors +# + +color <- c("blue","red","red","red","blue","red","blue") +color + +# factors are useful when modelling nominal variables +color <- factor(color) +color + +# argument "levels" defines all possible elements' values +dir <- factor(c('left','left','up'), levels = c('left','right','up','down')) +dir + +# all possible elements' values +levels(dir) + +# if no match is found +dir[1] <- "diagonal" +dir + +# valid assignment +dir[1] <- "down" +dir + +# frequency tables for factors +table(color) +table(dir) + +# +# Lists (an ordered collection of objects - components) +# + +# creating a list +student <- list(id=12345,name="Marko",marks=c(10,9,10,9,8,10)) +student + +# extracting elements of a list (using named components) +student$id +student$name +student$marks + +# extracting elements of a list (using indexing) +student[[1]] +student[[2]] +student[[3]] + +# extending lists +student$parents <- c("Ana", "Tomaz") +student + +# +# Data frames +# + +# creating a data frame +height <- c(179, 185, 183, 172, 174, 185, 193, 169, 173, 168) +weight <- c(95, 89, 70, 80, 92, 86, 100, 63, 72, 70) +gender <- factor(c("f","m","m","m","f","m","f","f","m","f")) +student <- c(T, T, F, F, T, T, F, F, F, T) + +df <- data.frame(gender, height, weight, student) +df + +# some important functions +summary(df) +names(df) +nrow(df) +ncol(df) +head(df) + +# accessing elements of data frames +df[5,] +df[1:5,] +df[,1] +df[,c(1,3,4)] +df[1,3] +df[1,-3] + +df$height + +df[df$height < 180,] +df[df$gender == "m",] + +# adding columns to a data frame +df <- cbind(df, age = c(20, 21, 30, 25, 27, 19, 24, 27, 28, 24)) +df + +df$name = c("Joan","Tom","John","Mike","Anna","Bill","Tina","Beth","Steve","Kim") +df + +summary(df) + +# +# User defined functions +# + +addFunction <- function(a, b) { + return (a+b) +} + +# Load in-built datasets with data() +data(iris) + +# Get mean of each column with mean() +lapply(iris[,1:3], mean) ## lapply returns a list! +sapply(iris[,1:3] ,mean) ## sapply returns a vector! +apply(iris[,1:3], 1, sum) ## apply operates across a given dimension (1 = row-wise) + +# Or using native methods +colSums(iris[,1:3])/nrow(iris[,1:3]) + + +## This is additional material, just to show you how things can also be done. +library(dplyr) +data(iris) + +# Lets' do some groupings +summarizedCustom <- iris %>% group_by(Species) %>% summarise(msw = max(Sepal.Width), mpw = mean(Petal.Width)) +summarizedCustom + +# How to create new features? +newFeature <- iris %>% mutate(newFeature = Petal.Width + Sepal.Length) +head(newFeature) + +# How about some filtering? +filteredDF <- iris %>% filter(Species == c("setosa"), Petal.Width >= 0.4) +head(filteredDF)