Jebes to stari

main
Gasper Spagnolo 2022-10-24 17:35:25 +02:00
parent a1083289a3
commit 02a2f06e03
1 changed files with 95 additions and 46 deletions

View File

@ -1,46 +1,95 @@
#######################################################################################################################
#
# PROBLEMS
#
#######################################################################################################################
#
# Load the Movies dataset using the command:
#
# md <- read.table("movies.txt", sep=",", header=TRUE)
#
# Answer the following questions:
#
# - Are there more movies shorter than 100 min or longer than (or equal to) 100 minutes?
# (show your answer numerically and graphically)
#
# - Are there more action comedies or romantic comedies?
#
# - Plot a histogram of the ratings for drama movies.
#
# - Is the average rating of dramas higher than the average rating of non-dramas?
# (show your answer numerically and graphically)
#
# - Plot the number of animated movies being produced every year for the period 1995-2005.
#
# - Is there a clear boundary between short and feature movies (according to their length)?
#
#
#######################################################################################################################
#
# Load the Players dataset using the command:
#
# players <- read.table("players.txt", sep=",", header = T)
#
# - Plot the proportion of players according to playing positions.
#
# - Compare career rebounds (the "reb" attribute) with respect to playing position.
#
# - Show the distribution of free throw percentages.
# The percentage is determined by dividing the number of shots made ("ftm") by the total number of shots attempted ("fta").
#
# - Compare career 3-pointers made for the players active between 1990 and 2007, with respect to playing position.
#
# - How does the average career length of retired players vary from year to year?
#
#######################################################################################################################
#######################################################################################################################
#
# PROBLEMS
#
#######################################################################################################################
#
# Load the Movies dataset using the command:
#
# md <- read.table("movies.txt", sep=",", header=TRUE)
#
# Answer the following questions:
#
# - Are there more movies shorter than 100 min or longer than (or equal to) 100 minutes?
# (show your answer numerically and graphically)
#
# - Are there more action comedies or romantic comedies?
#
# - Plot a histogram of the ratings for drama movies.
#
# - Is the average rating of dramas higher than the average rating of non-dramas?
# (show your answer numerically and graphically)
#
# - Plot the number of animated movies being produced every year for the period 1995-2005.
#
# - Is there a clear boundary between short and feature movies (according to their length)?
#
#
#######################################################################################################################
md <- read.table("movies.txt", sep=",", header=TRUE)
library(ggplot2)
library(dplyr)
# We will transform binary attributes into nominal variables with a fixed number of possible values (factors)
md$Action <- as.factor(md$Action)
md$Animation <- as.factor(md$Animation)
# The remaining columns will be transformed using the for loop
for (i in 20:24)
md[,i] <- as.factor(md[,i])
# - Are there more movies shorter than 100 min or longer than (or equal to) 100 minutes?
# (show your answer numerically and graphically)
movies_shorter_or_equal_100_min <- md$length >= 100
tab <- table(movies_shorter_or_equal_100_min)
names(tab) <- c("Less than 100", "More or equal to 100")
pie(tab)
# - Are there more action comedies or romantic comedies?
md %>% filter(Comedy == 1) %>% mutate(number_of_actions = sum(Action == 1)) %>%
mutate(number_of_action_movies = sum(Action == 1)) %>%
mutate(number_of_Romances = sum(Romance == 1)) %>%
select(number_of_action_movies, number_of_Romances) %>%
unique()
# - Plot a histogram of the ratings for drama movies.
md %>% filter(Drama == 1) %>% ggplot(aes(rating)) + geom_histogram(bins = 10)
# - Is the average rating of dramas higher than the average rating of non-dramas?
# (show your answer numerically and graphically)
lol <- md %>% filter(Drama == 1) %>% select(rating) %>%
mutate(higher_than_avg_rating = rating > mean(rating)) %>%
select(higher_than_avg_rating)# %>%
# summarise(is_higher = sum(higher_than_avg_rating == TRUE), is_lower = sum(higher_than_avg_rating == FALSE))
pie(table(lol))
# - Plot the number of animated movies being produced every year for the period 1995-2005.
sel <- md$year >= 1999 & md$year <= 2005
tt <- table(md$Animation[sel], md$year[sel])
barplot(tt[2,], xlab="Year", ylab="Relative frequency", main="Proportion of animated movies")
# - Is there a clear boundary between short and feature movies (according to their length)?
#######################################################################################################################1
#
# Load the Players dataset using the command:
#
# players <- read.table("players.txt", sep=",", header = T)
#
# - Plot the proportion of players according to playing positions.
#
# - Compare career rebounds (the "reb" attribute) with respect to playing position.
#
# - Show the distribution of free throw percentages.
# The percentage is determined by dividing the number of shots made ("ftm") by the total number of shots attempted ("fta").
#
# - Compare career 3-pointers made for the players active between 1990 and 2007, with respect to playing position.
#
# - How does the average career length of retired players vary from year to year?
#
#######################################################################################################################