first commit

2022-10-10 16:06:42 +02:00 · 2022-10-10 16:06:42 +02:00 · b4c1542846
commit b4c1542846
2 changed files with 466 additions and 0 deletions
--- a/problems.R
+++ b/problems.R
@ -0,0 +1,112 @@
+################################################################################
+#
+# PROBLEMS:
+#
+# 1.  Construct a vector that contains elements: 1,2,3,...,19,20.
+      v <- c(1:20)
+      #v
+#
+#
+# 2.  Construct a vector that contains elements: 1,2,3,...,19,20,19,...,3,2,1.
+      v1 <- 1:20
+      v2 <- 20:1
+      j <- c(v1, v2)
+      #j
+#
+# 3.  Construct a vector that contains elements: 1,3,5,1,3,5,...,1,3,5 
+#     where there are 10 occurrences of element 5.
+      h <- rep(seq(from=1, to=5, by=2), times=5)
+      #h
+#
+#
+# 4.  Calculate the values of sin(x) at 0, 0.1, 0.2, 0.3, ..., 1.0
+      s <- seq(from=0.0, to=1.0, by=0.1)
+      s <- sin(s)
+      #s
+#
+# 5.  Suppose we have measured the heights and weights of ten individuals:
+#
+      # the vector of heights in 'cm'
+      height <- c(179, 185, 183, 172, 174, 185, 193, 169, 173, 168)
+
+      # the vector of weights in 'kg'
+      weight <- c(95, 89, 70, 80, 92, 86, 100, 63, 72, 70)
+
+#     Calculate the body mass index (bmi) for each individual using the formula:
+#     bmi = weight_in_kg / (height_in_m)^2
+#
+#     HINT: first convert heights from 'cm' to 'm', then use the formula above.  
+      height <- height / 100
+      bmi <- weight / (height ^ 2)
+      #bmi
+#
+# 6.  Consider a vector:
+# 
+      x <- c(1, -2, 3, -4, 5, -6, 7, -8)
+      x[x < 0] <- 0
+      #x
+
+#     Edit the vector x as follows. Replace all elements with a negative value 
+#     with 0. Multiply the elements with a positive value by 10.
+#
+#
+# 7.  Without using R, determine the result of the following computation:
+#
+      x <- c(1,2,3) # x = [1, 2, 3]
+      # 1 / 2^2 - 1 + 2 * 3 - 2 -> 1/4 - 1 + 6 -2 ->  1/4 + 3 -> 3.25
+      x[1]/x[2]^2-1+2*x[3]-x[1+1]
+      #x
+
+#
+#
+# 8.  Consider a vector:
+#
+      x <- 1:200
+      length(x[x %% 11 == 0])
+#     Determine how many elements in the vector are exactly divisible by 11.
+#
+#     HINT: the integer division operator is %/%
+#           the modulus operator is %%              
+#
+#
+# 9.  Consider a data frame:
+#
+      height <- c(179, 185, 183, 172, 174, 185, 193, 169, 173, 168)
+      weight <- c(95, 89, 70, 80, 92, 86, 100, 63, 72, 70)
+      gender <- factor(c("f","m","m","m","f","m","f","f","m","f"))
+      student <- c(T, T, F, F, T, T, F, F, F, T)
+      age = c(20, 21, 30, 25, 27, 19, 24, 27, 28, 24)
+      name = c("Joan","Tom","John","Mike","Anna","Bill","Tina","Beth","Steve","Kim")
+ 
+      df <- data.frame(name, gender, age, height, weight, student)
+ 
+#     
+#     - calculate the average age of persons in our dataset. 
+#       (HINT: use the mean() function)
+        mean(age)
+#
+#     - calculate the average age of students in our dataset.
+        mean(df$age[df$student == T])
+#
+#     - how many males and females are in our dataset? 
+#       (HINT: use the table() function)
+        table(df$gender)
+#
+#     - print persons that are students.
+        df$name[df$student == T]
+#
+#     - print persons who are between 1.8m and 1.9m tall (inclusive). 
+        df$name[df$height >= 180 & df$height <= 190]
+#
+#     - print students who are above average height 
+#       (considering all persons in the dataset).
+        df$name[df$height > mean(df$height)] 
+#
+#     - arrange persons by their age. 
+#       (HINT: use the order function)
+        # order(df$age, decreasing=TRUE)
+        df[order(df$age, decreasing=TRUE), ]
+        df
+#
+###############################################################################
+
--- a/v1/lab1_introduction.R
+++ b/v1/lab1_introduction.R
@ -0,0 +1,354 @@
+###############################################################################
+#
+# INTRODUCTION TO R
+#
+###############################################################################
+
+# calculator
+(50 + 1.45)/12.5 # (CTRL + ENTER)
+
+# Clean screen with CTRL + L
+
+# assignment operators
+x = 945
+y <- sin(0.47)^2 * sqrt(5)
+y^2 -> z
+
+# to inspect the value of a variable simply type its name
+x
+y
+z
+
+# listing and deleting objects
+ls()
+rm(y)
+rm(x,z)
+
+# remove (almost) everything in the working environment
+rm(list=ls())
+
+#
+# Vectors (the most basic data objects in R)
+#
+
+# creating vectors
+v <- c(14,7,23.5,76.2)
+v
+
+# generating a regular sequence of numbers
+v <- 1:10
+v
+
+v <- seq(from=5, to=10, by=2)
+v
+
+w <- rep(v, times = 2)
+w
+
+# scalars are vectors with a single element
+w <- 45.0
+
+# vectors can be created using other vectors
+z <- c(v, 2.5, w)
+z
+
+#
+# Useful functions
+#
+
+v <- c(8, 4, 2, 3, 6, 9, 1)
+
+length(v)
+max(v)
+min(v)
+which.min(v)
+which.max(v)
+sum(v)
+mean(v)
+sd(v)
+rev(v)
+sort(v)
+sort(v, decreasing=T)
+order(v)
+
+# types of vectors
+mode(v)
+
+# logical vector - has logical constants as elements 
+b <- c(TRUE, FALSE, F, T)
+b
+mode(b)
+
+x <- 5 > 3
+x
+mode(x)
+
+# string vector - has strings as elements
+s <- c("character", "logical", "numeric", "complex")
+mode(s)
+
+# type coercion (all elements must be of the same type)
+x <- c(F, T, 34.56, 'aaa')
+x
+mode(x)
+
+#
+# Vectorization
+#
+
+# vector arithmetic (operations are performed element-wise)
+v1 <- c(10,20,30,40)
+v2 <- 1:4
+v1 + v2
+v1 * v2
+
+# functions operate directly on each element of a vector
+v1^2
+sqrt(v1)
+exp(v1)
+log2(v1)
+
+# the recycling rule (if lengths are different the elements of the shorter vector are repeated)
+v1 * 10
+v1 + 1
+v1 + c(100, 200)
+
+#
+# Indexing
+#
+
+x <- c(-10,20,-30,40,-50,60,-70,80)
+x
+
+# individual elements can be addressed using an integer index vector
+# (indexing starts with 1)
+x[3]
+x[c(1,4,5)]
+x[1:3]
+x[]
+
+# negative integer indices address all elements but those stated
+x[-1]
+x[-c(4,6)]
+x[-(1:3)]
+
+# vector elements can be addressed using logical vectors
+# (elements corresponding to constants TRUE are selected)
+
+# logical vector
+x > 0
+
+# logical vector indexing
+x[x>0]
+x[x <= -20 | x > 50]
+x[x > 40 & x < 100]
+
+# equality operator is ==
+# inequality operator is !=
+
+# the which() function returns indices corresponding to constants TRUE
+which(x > 0)
+
+# character string index vector
+point <- c(4.7, 3.6, 2.5)
+names(point) <- c('x', 'y', 'z')
+point
+
+point['x']
+point[c('x','z')]
+
+# empty indices
+point[] <- 0
+point
+
+# not the same as
+point <- 0
+point
+
+#
+# Vector editing
+#
+
+x <- c("a", "b", "c", "d")
+
+# replacing an element
+x[2] <- "BBBBB"
+x
+
+x[c(1,3)] <- c("AAAAA", "CCCCC")
+x
+
+# adding new element
+x[length(x)+1] = "EEEEE"
+x
+
+# what happens if we do not define all elements in the vector?
+x[10] <- "FFFFF"
+x
+
+# which elements are not defined
+is.na(x)
+
+
+# removing elements
+x <- x[-c(1,3)]
+x
+
+x <- c(x[2],x[3])
+x
+
+#
+# Flow control
+#
+
+# for loops
+for (x in 1:10) {
+  print(x)
+}
+
+# while loops
+x <- 0
+while (x < 10) {
+  print(x)
+  x <- x+1
+}
+
+# if statements
+x <- 1
+if (x == 0) {
+  print('Condition 1')
+} else if (x == 1){
+  print('Condition 2')
+} else {
+  print('Condition 3')
+}
+
+#
+# Factors
+#
+
+color <- c("blue","red","red","red","blue","red","blue")
+color
+
+# factors are useful when modelling nominal variables
+color <- factor(color)
+color
+
+# argument "levels" defines all possible elements' values
+dir <- factor(c('left','left','up'), levels = c('left','right','up','down'))
+dir
+
+# all possible elements' values
+levels(dir)
+
+# if no match is found
+dir[1] <- "diagonal"
+dir
+
+# valid assignment
+dir[1] <- "down"
+dir
+
+# frequency tables for factors 
+table(color)
+table(dir)
+
+#
+# Lists (an ordered collection of objects - components)
+#
+
+# creating a list
+student <- list(id=12345,name="Marko",marks=c(10,9,10,9,8,10))
+student
+
+# extracting elements of a list (using named components)
+student$id
+student$name
+student$marks
+
+# extracting elements of a list (using indexing)
+student[[1]]
+student[[2]]
+student[[3]]
+
+# extending lists
+student$parents <- c("Ana", "Tomaz")
+student
+
+#
+# Data frames
+#
+
+# creating a data frame
+height <- c(179, 185, 183, 172, 174, 185, 193, 169, 173, 168)
+weight <- c(95, 89, 70, 80, 92, 86, 100, 63, 72, 70)
+gender <- factor(c("f","m","m","m","f","m","f","f","m","f"))
+student <- c(T, T, F, F, T, T, F, F, F, T)
+
+df <- data.frame(gender, height, weight, student)
+df
+
+# some important functions
+summary(df)
+names(df)
+nrow(df)
+ncol(df)
+head(df)
+
+# accessing elements of data frames
+df[5,]
+df[1:5,]
+df[,1]
+df[,c(1,3,4)]
+df[1,3]
+df[1,-3]
+
+df$height
+
+df[df$height < 180,]
+df[df$gender == "m",]
+
+# adding columns to a data frame
+df <- cbind(df, age = c(20, 21, 30, 25, 27, 19, 24, 27, 28, 24))
+df
+
+df$name = c("Joan","Tom","John","Mike","Anna","Bill","Tina","Beth","Steve","Kim")
+df
+
+summary(df)
+
+#
+# User defined functions
+#
+
+addFunction <- function(a, b) {
+  return (a+b)
+}
+
+# Load in-built datasets with data()
+data(iris)
+
+# Get mean of each column with mean()
+lapply(iris[,1:3], mean) ## lapply returns a list!
+sapply(iris[,1:3] ,mean) ## sapply returns a vector!
+apply(iris[,1:3], 1, sum) ## apply operates across a given dimension (1 = row-wise)
+ 
+# Or using native methods
+colSums(iris[,1:3])/nrow(iris[,1:3])
+
+
+## This is additional material, just to show you how things can also be done.
+library(dplyr)
+data(iris)
+
+# Lets' do some groupings
+summarizedCustom <- iris %>% group_by(Species) %>% summarise(msw = max(Sepal.Width), mpw = mean(Petal.Width))
+summarizedCustom
+
+# How to create new features?
+newFeature <- iris %>% mutate(newFeature = Petal.Width + Sepal.Length)
+head(newFeature)   
+
+# How about some filtering?
+filteredDF <- iris %>% filter(Species == c("setosa"), Petal.Width >= 0.4)
+head(filteredDF)