jebesh dva

main
Gasper Spagnolo 2022-10-25 14:28:35 +02:00
parent 02a2f06e03
commit 7d334030e2
2 changed files with 525 additions and 1 deletions

512
v3/.Rhistory Normal file
View File

@ -0,0 +1,512 @@
l <- (TRUE, TRUE, FALSE, F, F, F T)
l <- (TRUE, TRUE, FALSE, F, F, F, T)
a <- (TRUE, TRUE, FALSE, F, F, F, T)
a <- c(TRUE, TRUE, FALSE, F, F, F, T)
mode(b)
mode(a)
install.packages('ggplot2')
install.packages('dplyr')
ls
ls
setcwd('/home/gasperspagnolo/Documents/faks/3-letnik/1sem/is/vaje/v3')
setwd('/home/gasperspagnolo/Documents/faks/3-letnik/1sem/is/vaje/v3')
ls
ls
# for example:
# setwd("c:\\labs\\data\\")
library(ggplot2)
library(dplyr)
# To read data from a text file, use the "read.table" command.
# The parameter header=TRUE indicates that the file to be read includes a first line with the column names
md <- read.table(file="movies.txt", sep=",", header=TRUE)
head(md)
summary(md)
summary(md)
str(md)
names(md)
md$Action <- as.factor(md$Action)
md$Animation <- as.factor(md$Animation)
str(md)
# The remaining columns will be transformed using the for loop
for (i in 20:24)
md[,i] <- as.factor(md[,i])
md[30,]
md[30,3]
md[30,"length"]
md[,3]
md$length
plot(md$length)
hist(md$length)
plot(density(md$length))
boxplot(md$length)
barplot(table(md$Drama))
pie(table(md$mpaa))
## nicer plots with ggplot2 + dplyr
md %>% ggplot(aes(length)) + geom_histogram(bins = 40) + ggtitle("A genetic histogram") + xlab("Length"
## nicer plots with ggplot2 + dplyr
md %>% ggplot(aes(length)) + geom_histogram(bins = 40) + ggtitle("A genetic histogram") + xlab("Length")
## nicer plots with ggplot2 + dplyr
md %>% ggplot(aes(length)) + geom_histogram(bins = 40) + ggtitle("A genetic histogram") + xlab("Length")
## plotting w.r.t. multiple mpaa categories
md %>% ggplot(aes(length,fill = mpaa)) + geom_density(alpha = 0.2)
## What about a nicer boxplot w.r.t mpaa?
## theme_bw() is more neutral theme
md %>% ggplot(aes(Drama, rating, color = mpaa)) + geom_boxplot() + theme_bw()
barplot(table(md$Comedy))
barplot(table(md$Comedy))
pie(table(md$Comedy))
tab <- table(md$Comedy)
names(tab) <- c("Other genres", "Comedies")
tab
barplot(table(md$Comedy))
pie(table(md$Comedy))
tab <- table(md$Comedy)
names(tab) <- c("Other genres", "Comedies")
tab
tab <- table(md$Comedy)
names(tab) <- c("Other genres", "Comedies")
tab
pie(tab)
sum(tab)
barplot(tab, ylab="Number of titles", main="Proportion of comedies to other genres")
barplot(tab / sum(tab) * 100, ylab="Percentage of titles", main="The proportion of comedies to other genres")
pie(tab, main = "Proportion of comedies to other genres")
# Plot the rating distribution for comedies
hist(md[md$Comedy == "1", "rating"], xlab="Rating", ylab="Frequency", main="Histogram of ratings for comedies")
boxplot(md[md$Comedy == "1", "rating"], ylab="Rating", main="Boxplot of ratings for comedies")
quantile(md$rating[md$Comedy == 1])
comedy <- md$Comedy == "1"
# Calculate the mean rating value for comedies and non-comedies
mean(md[comedy,"rating"])
mean(md[!comedy,"rating"])
boxplot(rating ~ Comedy, data=md)
boxplot(rating ~ Comedy, data=md, names=c("Other genres", "Comedies"), ylab="Rating", main="Comparison of ratings between comedies and non-comedies")
md %>% group_by(Comedy) %>% select(rating) %>% summarise(mean(rating))
md %>% group_by(Comedy)
sel <- md$year >= 1990
table(md$Comedy[sel], md$year[sel])
table(md$year[sel])
tabcomedy <- table(md$Comedy[sel], md$year[sel])
tabyear <- table(md$year[sel])
tabcomedy[2,]/tabyear
ratio <- tabcomedy[2,]/tabyear
barplot(ratio, xlab="Year", ylab="Relative frequency", main="Proportion of comedies")
plot(x=names(ratio), y=as.vector(ratio), type="l", xlab="Year", ylab="Relative frequency", main="Proportion of comedies, 1990-2005")
## Is this dependent on the mpaa?
## or with dplyr + ggplot + adding votes
md %>% select(budget, rating, votes, mpaa) %>%
na.omit() %>%
ggplot(aes(budget, rating, color = votes, fill = mpaa)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x) + theme_bw()
## or with dplyr directly
md %>% filter(year >= 1990) %>%
group_by(year, Comedy) %>%
summarise(n = n()) %>% mutate(freq = n / sum(n)) %>%
filter(Comedy == 1) %>% select(year, freq) %>%
ggplot(aes(year, freq)) + geom_point() + ggtitle("Frequency of comedies") + ylab("Frequency") + xlab("Year")
md %>% filter(year >= 1990)
summary(md$budget)
is.na(md$budget)
table(is.na(md$budget))
which(is.na(md$budget))
# select complete observations only
sel <- is.na(md$budget)
mdsub <- md[!sel,]
nrow(mdsub)
summary(mdsub$budget)
plot(mdsub$budget, mdsub$rating, xlab="Budget in $", ylab="Rating", main="Movie rating vs budget")
# Plotted points are mostly located in the upper left part of the diagram,
# which means that a higher budget usually leads to a higher rating
# Utilization of the budget in terms of rating
ratio <- mdsub$budget/mdsub$rating
hist(ratio)
# Which movie has the worst budget utilization?
mdsub[which.max(ratio),]
# Let's discretize these budgets to:
# low (less than 1M), mid (between 1M and 50M) and big (more than 50M)
disbudget <- cut(mdsub$budget, c(0, 1000000, 50000000, 500000000), labels=c("low", "mid", "big"))
barplot(table(disbudget)/length(disbudget), xlab="Budget", ylab="Relative frequency", main="Proportion of movies vs budget")
# Side-by-side boxplots of ratings grouped by budget values
boxplot(mdsub$rating ~ disbudget, xlab="Budget", ylab="Rating", main="Boxplot of movie rating vs budget")
## Is this dependent
## or with dplyr + ggplot + adding votes
md %>% select(budget, rating, votes, mpaa) %>%
na.omit() %>%
ggplot(aes(budget, rating, color = votes, fill = mpaa)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x) + theme_bw()
## or with dplyr
md %>% select(budget, year) %>% na.omit() %>%
group_by(year) %>% summarise(budget2 = sum(as.numeric(budget))) %>%
arrange(year) %>% mutate(csum = cumsum(budget2)) %>%
ggplot(aes(year, csum)) + geom_bar(stat = "identity") + theme_bw()
## Is this dependent on the mpaa?
## or with dplyr + ggplot + adding votes
md %>% select(budget, rating, votes, mpaa) %>%
na.omit() %>%
ggplot(aes(budget, rating, color = votes, fill = mpaa)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x) + theme_bw()
#######################################
md <- read.table("movies.txt", sep=",", header=TRUE)
# - Are there more movies shorter than 100 min or longer than (or equal to) 100 minutes?
# (show your answer numerically and graphically)
md
md <- read.table("movies.txt", sep=",", header=TRUE)
# - Are there more movies shorter than 100 min or longer than (or equal to) 100 minutes?
# (show your answer numerically and graphically)
movies_shorter_or_equal_100_min <- md$length >= 100
movies_shorter_or_equal_100_min
barplot(movies, xlab="Year", ylab="Relative frequency", main="Proportion of comedies")
barplot(movies_shorter_or_equal_100_min, xlab="Year", ylab="Relative frequency", main="Proportion of comedies")
md[30,]
md[30,3]
md[30,"length"]
md[,3]
md$length
# Useful data visualization functions
plot(md$length)
hist(md$length)
plot(density(md$length))
boxplot(md$length)
barplot(table(md$Drama))
pie(table(md$mpaa))
## nicer plots with
## nicer plots with ggplot2 + dplyr
md %>% ggplot(aes(length)) + geom_histogram(bins = 40) + ggtitle("A genetic histogram") + xlab("Length")
tab <- table(md$Comedy)
names(tab) <- c("Other genres", "Comedies")
tab
pie(tab)
tab
tab <- table(md$Comedy)
tab
hist(md[md$Comedy == "1", "rating"], xlab="Rating", ylab="Frequency", main="Histogram of ratings for comedies")
pie(table(md$Comedy))
md$Comedy
md$length
names(tab) <- c("Other genres", "Comedies")
tab
pie(tab)
barplot(table(md$length))
movies_shorter_or_equal_100_min <- md$length >= 100
table(movies_shorter_or_equal_100_min)
movies_shorter_or_equal_100_min <- md$length >= 100
tab <- table(movies_shorter_or_equal_100_min)
names(tab) <- c("Less than 100", "More or equal to 100")
pie(tab)
df
md
md$Comedy
md$Comedy
md <- read.table("movies.txt", sep=",", header=TRUE)
# We will transform binary attributes into nominal variables with a fixed number of possible values (factors)
md$Action <- as.factor(md$Action)
md$Animation <- as.factor(md$Animation)
# The remaining columns will be transformed using the for loop
for (i in 20:24)
md[,i] <- as.factor(md[,i])
movies_shorter_or_equal_100_min <- md$length >= 100
tab <- table(movies_shorter_or_equal_100_min)
names(tab) <- c("Less than 100", "More or equal to 100")
pie(tab)
<- md$Romance
romantic_comedies
romantic_comedies <- md$Romance
romantic_comedies
romantic_comedies <- sum(md$Romance == 1)
romantic_comedies
## or with dplyr directly
md %>% filter(year >= 1990) %>%
group_by(year, Comedy) %>%
summarise(n = n()) %>% mutate(freq = n / sum(n)) %>%
filter(Comedy == 1) %>% select(year, freq) %>%
ggplot(aes(year, freq)) + geom_point() + ggtitle("Frequency of comedies") + ylab("Frequency") + xlab("Year") + geom_line() + theme_bw()
######################################################
# - Are there more action comedies or romantic comedies?
md %>% group_by(Comedy, Action)
# - Are there more action comedies or romantic comedies?
md %>% group_by(Comedy, Action) %>% summarise(n = n())
# - Are there more action comedies or romantic comedies?
md %>% group_by(Action, Comedy) %>% summarise(n = n())
# - Are there more action comedies or romantic comedies?
md %>% group_by(Action == 1, Comedy) %>% summarise(n = n())
# - Are there more action comedies or romantic comedies?
md %>% group_by(Action == 1, Comedy == 1) %>% summarise(n = n())
# - Are there more action comedies or romantic comedies?
md %>% filter(Action == 1, Comedy == 1) %>% summarise(n = n())
md %>% filter(Action == 1, Comedy == 1) %>% summarise(n = n())
md %>% filter(Action == 1, Comedy == 1) %>% summarise(n = n())
md %>% filter(Romantic == 1, Comedy == 1) %>% summarise(n = n())
# - Are there more action comedies or romantic comedies?
md %>% filter(Comedy == 1) %>% summarise(n = n())
md %>% filter(Comedy == 1) %>% mutate(number_of_actions = sum(Action == 1)) %>%
mutate(number_of_actions = sum(Action == 1)) %>%
mutate(number_of_actions = sum(Romance == 1))
# - Are there more action comedies or romantic comedies?
md %>% filter(Comedy == 1) %>% mutate(number_of_actions = sum(Action == 1)) %>%
mutate(number_of_action_movies = sum(Action == 1)) %>%
mutate(number_of_Romances = sum(Romance == 1)) %>%
select(number_of_action_movies, number_of_Romances)
# - Are there more action comedies or romantic comedies?
md %>% filter(Comedy == 1) %>% mutate(number_of_actions = sum(Action == 1)) %>%
mutate(number_of_action_movies = sum(Action == 1)) %>%
mutate(number_of_Romances = sum(Romance == 1)) %>%
select(number_of_action_movies, number_of_Romances)
md %>% filter(Comedy == 1) %>% mutate(number_of_actions = sum(Action == 1)) %>%
mutate(number_of_action_movies = sum(Action == 1)) %>%
mutate(number_of_Romances = sum(Romance == 1)) %>%
select(number_of_action_movies, number_of_Romances) >%>%
unique()
md %>% filter(Comedy == 1) %>% mutate(number_of_actions = sum(Action == 1)) %>%
mutate(number_of_action_movies = sum(Action == 1)) %>%
mutate(number_of_Romances = sum(Romance == 1)) %>%
select(number_of_action_movies, number_of_Romances) %>%
unique()
md %>% filter(Comedy == 1) %>% mutate(number_of_actions = sum(Action == 1)) %>%
mutate(number_of_action_movies = sum(Action == 1)) %>%
mutate(number_of_Romances = sum(Romance == 1)) %>%
select(number_of_action_movies, number_of_Romances) %>%
unique() %>% pie()
# - Are there more action comedies or romantic comedies?
md %>% filter(Comedy == 1) %>% mutate(number_of_actions = sum(Action == 1)) %>%
mutate(number_of_action_movies = sum(Action == 1)) %>%
mutate(number_of_Romances = sum(Romance == 1)) %>%
select(number_of_action_movies, number_of_Romances) %>%
unique()
# - Are there more action comedies or romantic comedies?
md %>% filter(Comedy == 1) %>% mutate(number_of_actions = sum(Action == 1)) %>%
mutate(number_of_action_movies = sum(Action == 1)) %>%
mutate(number_of_Romances = sum(Romance == 1)) %>%
select(number_of_action_movies, number_of_Romances) %>%
unique()
# - Are there more action comedies or romantic comedies?
md %>% filter(Comedy == 1) %>% mutate(number_of_actions = sum(Action == 1)) %>%
mutate(number_of_action_movies = sum(Action == 1)) %>%
mutate(number_of_Romances = sum(Romance == 1)) %>%
unique() %>%
select(number_of_action_movies, number_of_Romances)
md %>% filter(Comedy == 1) %>% mutate(number_of_actions = sum(Action == 1)) %>%
mutate(number_of_action_movies = sum(Action == 1)) %>%
mutate(number_of_Romances = sum(Romance == 1)) %>%
unique() %>%
select(number_of_action_movies, number_of_Romances)
md %>% filter(Comedy == 1) %>% mutate(number_of_actions = sum(Action == 1)) %>%
mutate(number_of_action_movies = sum(Action == 1)) %>%
mutate(number_of_Romances = sum(Romance == 1)) %>%
select(number_of_action_movies, number_of_Romances) %>%
unique()
# - Plot a histogram of the ratings for drama movies.
head(md)
ratio <- mdsub$budget/mdsub$rating
hist(ratio)
plot(mdsub$budget, mdsub$rating, xlab="Budget in $", ylab="Rating", main="Movie rating vs budget")
ratio <- mdsub$budget/mdsub$rating
hist(ratio)
hist(md$mpaa)
hist(md$mpaa)
hist(md$r1)
(md$r1)
hist(md$r1)
md %>% filter(Drama == 1) %>% hist()
md %>% filter(Drama == 1
md %>% filter(Drama == 1)
md %>% filter(Drama == 1)
md %>% filter(Drama == 1) %>% select(raiting)
md %>% filter(Drama == 1) %>% select(rating)
md %>% filter(Drama == 1) %>% select(rating) %>% hist()
md %>% filter(Drama == 1) %>% select(rating) %>% class()
md %>% filter(Drama == 1) %>% select(rating)
md %>% filter(Drama == 1) %>% select(rating) %>% hist(rating)
md %>% filter(Drama == 1) %>% select(rating) %>% hist(.$rating)
md %>% filter(Drama == 1) %>% tidyr::gather(rating, value) %>% ggplot(aes(value)
md %>% filter(Drama == 1) %>% tidyr::gather(rating, value) %>% ggplot(aes(value))
md %>% filter(Drama == 1) %>% tidyr::gather(rating, value) %>% ggplot(aes(value))
md %>% filter(Drama == 1) %>% ggplot(aes(rating))
md %>% filter(Drama == 1) %>% ggplot(rating)
md %>% filter(Drama == 1) %>% ggplot(rating)
aes(
md %>% filter(Drama == 1) %>% ggplot(aes(rating)) geom_histogram(bins = 10))
md %>% filter(Drama == 1) %>% ggplot(aes(rating)) geom_histogram(bins = 10))
md %>% filter(Drama == 1) %>% ggplot(aes(rating)) + eom_histogram(bins = 10))
md %>% filter(Drama == 1) %>% ggplot(aes(rating)) + geom_histogram(bins = 10))
md %>% filter(Drama == 1) %>% ggplot(aes(rating)) + geom_histogram(bins = 10)
md %>% filter(Drama == 1) %>% select(rating) %>% mutate(higher_than_avg_rating = rating > mean(rating))
md %>% filter(Drama == 1) %>% select(rating) %>% mutate(higher_than_avg_rating = rating > mean(rating))%>%
pie(table(aes(sum(rating == TRUE), aes(sum(rating == FALSE))
md %>% filter(Drama == 1) %>% select(rating) %>% mutate(higher_than_avg_rating = rating > mean(rating))%>%
pie(table(aes(sum(rating == TRUE)), aes(sum(rating == FALSE)))
md %>% filter(Drama == 1) %>% select(rating) %>% mutate(higher_than_avg_rating = rating > mean(rating))%>%
md %>% filter(Drama == 1) %>% select(rating) %>%
mutate(higher_than_avg_rating = rating > mean(rating))
md %>% filter(Drama == 1) %>% select(rating) %>%
mutate(higher_than_avg_rating = rating > mean(rating)) %>%
select(higher_than_avg_rating)
md %>% filter(Drama == 1) %>% select(rating) %>%
mutate(higher_than_avg_rating = rating > mean(rating)) %>%
select(higher_than_avg_rating) %>%
summarise(counts = n())
md %>% filter(Drama == 1) %>% select(rating) %>%
mutate(higher_than_avg_rating = rating > mean(rating)) %>%
select(higher_than_avg_rating) %>%
summarise(is_higher = sum(higher_than_avg_rating == TRUE), is_lower = sum(higher_than_avg_rating == FALSE))
mutate(higher_than_avg_rating = rating > mean(rating)) %>%
select(higher_than_avg_rating) %>%
summarise(is_higher = sum(higher_than_avg_rating == TRUE), is_lower = sum(higher_than_avg_rating == FALSE)) %>%
pie()
md %>% filter(Drama == 1) %>% select(rating) %>%
mutate(higher_than_avg_rating = rating > mean(rating)) %>%
select(higher_than_avg_rating) %>%
summarise(is_higher = sum(higher_than_avg_rating == TRUE), is_lower = sum(higher_than_avg_rating == FALSE)) %>%
pie()
ggplot(aes(.))) +
geom_bar(stat="identity", width=1) +
coord_polar("y", start=0)
md %>% filter(Drama == 1) %>% select(rating) %>%
mutate(higher_than_avg_rating = rating > mean(rating)) %>%
select(higher_than_avg_rating) %>%
summarise(is_higher = sum(higher_than_avg_rating == TRUE), is_lower = sum(higher_than_avg_rating == FALSE)) %>%
ggplot(aes(.)) +
geom_bar(stat="identity", width=1) +
coord_polar("y", start=0)
md %>% filter(Drama == 1) %>% select(rating) %>%
mutate(higher_than_avg_rating = rating > mean(rating)) %>%
select(higher_than_avg_rating) %>%
summarise(is_higher = sum(higher_than_avg_rating == TRUE), is_lower = sum(higher_than_avg_rating == FALSE)) %>%
ggplot(aes(.)) +
coord_polar("y", start=0)
md %>% filter(Drama == 1) %>% select(rating) %>%
mutate(higher_than_avg_rating = rating > mean(rating)) %>%
select(higher_than_avg_rating) %>%
summarise(is_higher = sum(higher_than_avg_rating == TRUE), is_lower = sum(higher_than_avg_rating == FALSE)) %>%
ggplot(aes(.)) +
coord_polar("y", start=0)
summary <- md %>% filter(Drama == 1) %>% select(rating) %>%
mutate(higher_than_avg_rating = rating > mean(rating)) %>%
select(higher_than_avg_rating) %>%
summarise(is_higher = sum(higher_than_avg_rating == TRUE), is_lower = sum(higher_than_avg_rating == FALSE)) %>%
summary
summary <- md %>% filter(Drama == 1) %>% select(rating) %>%
mutate(higher_than_avg_rating = rating > mean(rating)) %>%
select(higher_than_avg_rating) %>%
summarise(is_higher = sum(higher_than_avg_rating == TRUE), is_lower = sum(higher_than_avg_rating == FALSE)) %>%
summary
summary
h <- md %>% filter(Drama == 1) %>% select(rating) %>%
mutate(higher_than_avg_rating = rating > mean(rating)) %>%
select(higher_than_avg_rating) %>%
summarise(is_higher = sum(higher_than_avg_rating == TRUE), is_lower = sum(higher_than_avg_rating == FALSE)) %>%
h
pie(h)
h <- md %>% filter(Drama == 1) %>% select(rating) %>%
mutate(higher_than_avg_rating = rating > mean(rating)) %>%
select(higher_than_avg_rating) %>%
summarise(is_higher = sum(higher_than_avg_rating == TRUE), is_lower = sum(higher_than_avg_rating == FALSE)) %>%
h <- md %>% filter(Drama == 1) %>% select(rating) %>%
mutate(higher_than_avg_rating = rating > mean(rating)) %>%
select(higher_than_avg_rating) %>%
summarise(is_higher = sum(higher_than_avg_rating == TRUE), is_lower = sum(higher_than_avg_rating == FALSE))
md %>% filter(Drama == 1) %>% select(rating) %>%
mutate(higher_than_avg_rating = rating > mean(rating)) %>%
select(higher_than_avg_rating) %>%
summarise(is_higher = sum(higher_than_avg_rating == TRUE), is_lower = sum(higher_than_avg_rating == FALSE))
lol <- md %>% filter(Drama == 1) %>% select(rating) %>%
mutate(higher_than_avg_rating = rating > mean(rating)) %>%
select(higher_than_avg_rating) %>%
summarise(is_higher = sum(higher_than_avg_rating == TRUE), is_lower = sum(higher_than_avg_rating == FALSE))
lol
pie(lol)
lol <- md %>% filter(Drama == 1) %>% select(rating) %>%
mutate(higher_than_avg_rating = rating > mean(rating)) %>%
select(higher_than_avg_rating)# %>%
# summarise(is_higher = sum(higher_than_avg_rating == TRUE), is_lower = sum(higher_than_avg_rating == FALSE))
pie(lol)
# summarise(is_higher = sum(higher_than_avg_rating == TRUE), is_lower = sum(higher_than_avg_rating == FALSE))
lol
pie(table(lol))
lol <- md %>% filter(Drama == 1) %>% select(rating) %>%
mutate(higher_than_avg_rating = rating > mean(rating)) %>%
select(higher_than_avg_rating)# %>%
# summarise(is_higher = sum(higher_than_avg_rating == TRUE), is_lower = sum(higher_than_avg_rating == FALSE))
pie(table(lol))
pie(table(lol))
sel <- md$year >= 1990
# the table() command can be used to get a two-way contigency table
table(md$Comedy[sel], md$year[sel])
sel <- md$year >= 1999 && md$year <= 2005
table(md$Animation[sel])
sel <- md$year >= 1999 && md$year <= 2005
table(md$Animation[sel], md$year[sel])
table(md$Animation[sel] == 1, md$year[sel])
sel <- md$year >= 1999 && md$year <= 2005
table(md$Animation[sel], md$year[sel])
tabcomedy <- table(md$Comedy[sel], md$year[sel])
tabyear <- table(md$year[sel])
tabcomedy[2,]/tabyear
tabyear <- table(md$year[sel])
tabyear
tabcomedy <- table(md$Comedy[sel], md$year[sel])
tabcomedy <- table(md$Comedy[sel], md$year[sel])
tabcomedy
table(md$year[sel])
table(md$Comedy[sel], md$year[sel])
table(md$year[sel])
tabcomedy <- table(md$Comedy[sel], md$year[sel])
table(md$Comedy[sel], md$year[sel])
tabcomedy <- table(md$Comedy[sel], md$year[sel])
tabcomedy
tabyear <- table(md$year[sel])
tabyear
ratio <- tabcomedy[2,]/tabyear
## or with dplyr directly
md %>% filter(year >= 1990) %>%
group_by(year, Comedy) %>%
summarise(n = n()) %>% mutate(freq = n / sum(n)) %>%
filter(Comedy == 1) %>% select(year, freq) %>%
ggplot(aes(year, freq)) + geom_point() + ggtitle("Frequency of comedies") + ylab("Frequency") + xlab("Year") + geom_line() + theme_bw()
ratio <- tabcomedy[2,]/tabyear
barplot(ratio, xlab="Year", ylab="Relative frequency", main="Proportion of comedies")
ratio <- tabcomedy[2,]/tabyear
sel <- md$year >= 1999 && md$year <= 2005
table(md$Animation[sel], md$year[sel])
tt[,2]
tt <- table(md$Animation[sel], md$year[sel])
tt[,2]
tt <- table(md$Animation[sel], md$year[sel])
tt[2,]
sel <- md$year >= 1999 && md$year <= 2005
tt <- table(md$Animation[sel], md$year[sel])
sel <- md$year >= 1999 && md$year <= 2005
tt <- table(md$Animation[sel], md$year[sel])
tt[2,]
sel <- md$year >= 1999 && md$year <= 2005
sel
tt <- table(md$Animation[sel], md$year[sel])
tt[2,]
sel <- md$year >= 1999 && md$year <= 2005
sel
tt <- table(md$Animation[sel], md$year[sel])
tt <- table(md$Animation[sel], md$year[sel])
tt[2,]
# - Plot the number of animated movies being produced every year for the period 1995-2005.
sel <- md$year >= 1999 && md$year <= 2005
tt <- table(md$Animation[sel], md$year[sel])
tt[2,]
tt <- table(md$Animation[sel], md$year[sel])
tt[3,]
sel <- md$year >= 1999 && md$year <= 2005
tt <- table(md$Animation[sel], md$year[sel])
tt[1,]
sel <- md$year >= 1999 && md$year <= 2005
tt <- table(md$Animation[sel], md$year[sel])
tt[2,]
#####################
barplot(tt[2,], xlab="Year", ylab="Relative frequency", main="Proportion of comedies")
sel <- md$year >= 1999 and md$year <= 2005
sel <- md[md$year >= 1999 && md$year <= 2005]
sel <- md[md$year >= 1999 & md$year <= 2005]
tt <- table(md$Animation[sel], md$year[sel])
sel <- md[md$year >= 1999 & md$year <= 2005]
sel <- md$year >= 1999 & md$year <= 2005
sel
tt <- table(md$Animation[sel], md$year[sel])
barplot(tt[2,], xlab="Year", ylab="Relative frequency", main="Proportion of animated movies")
sel <- md$year >= 1999 & md$year <= 2005
tt <- table(md$Animation[sel], md$year[sel])
barplot(tt[2,], xlab="Year", ylab="Relative frequency", main="Proportion of animated movies")
#######################

View File

@ -70,7 +70,7 @@ pie(table(lol))
# - Plot the number of animated movies being produced every year for the period 1995-2005.
sel <- md$year >= 1999 & md$year <= 2005
tt <- table(md$Animation[sel], md$year[sel])
barplot(tt[2,], xlab="Year", ylab="Relative frequency", main="Proportion of animated movies")
wbarplot(tt[2,], xlab="Year", ylab="Relative frequency", main="Proportion of animated movies")
# - Is there a clear boundary between short and feature movies (according to their length)?
@ -93,3 +93,15 @@ barplot(tt[2,], xlab="Year", ylab="Relative frequency", main="Proportion of anim
#
#######################################################################################################################
players <- read.table("players.txt", sep=",", header = T)
# - Plot the proportion of players according to playing positions.
position_proportion <- players %>% group_by(position) %>% select(position) %>% table()
pie(position_proportion)
# - Compare career rebounds (the "reb" attribute) with respect to playing position
boxplot(reb ~ position, data=players, ylab="Carrer rebounds", main="Comparison of carrer rebounds with respect to playing position")
# - Show the distribution of free throw percentages.
# The percentage is determined by dividing the number of shots made ("ftm") by the total number of shots attempted ("fta").
ftp <- players %>% mutate(free_throw_percentage = ftm/fta *100) %>% select(free_throw_percentage)