Jebes to stari
parent
a1083289a3
commit
02a2f06e03
|
@ -1,46 +1,95 @@
|
|||
#######################################################################################################################
|
||||
#
|
||||
# PROBLEMS
|
||||
#
|
||||
#######################################################################################################################
|
||||
#
|
||||
# Load the Movies dataset using the command:
|
||||
#
|
||||
# md <- read.table("movies.txt", sep=",", header=TRUE)
|
||||
#
|
||||
# Answer the following questions:
|
||||
#
|
||||
# - Are there more movies shorter than 100 min or longer than (or equal to) 100 minutes?
|
||||
# (show your answer numerically and graphically)
|
||||
#
|
||||
# - Are there more action comedies or romantic comedies?
|
||||
#
|
||||
# - Plot a histogram of the ratings for drama movies.
|
||||
#
|
||||
# - Is the average rating of dramas higher than the average rating of non-dramas?
|
||||
# (show your answer numerically and graphically)
|
||||
#
|
||||
# - Plot the number of animated movies being produced every year for the period 1995-2005.
|
||||
#
|
||||
# - Is there a clear boundary between short and feature movies (according to their length)?
|
||||
#
|
||||
#
|
||||
#######################################################################################################################
|
||||
#
|
||||
# Load the Players dataset using the command:
|
||||
#
|
||||
# players <- read.table("players.txt", sep=",", header = T)
|
||||
#
|
||||
# - Plot the proportion of players according to playing positions.
|
||||
#
|
||||
# - Compare career rebounds (the "reb" attribute) with respect to playing position.
|
||||
#
|
||||
# - Show the distribution of free throw percentages.
|
||||
# The percentage is determined by dividing the number of shots made ("ftm") by the total number of shots attempted ("fta").
|
||||
#
|
||||
# - Compare career 3-pointers made for the players active between 1990 and 2007, with respect to playing position.
|
||||
#
|
||||
# - How does the average career length of retired players vary from year to year?
|
||||
#
|
||||
#######################################################################################################################
|
||||
|
||||
#######################################################################################################################
|
||||
#
|
||||
# PROBLEMS
|
||||
#
|
||||
#######################################################################################################################
|
||||
#
|
||||
# Load the Movies dataset using the command:
|
||||
#
|
||||
# md <- read.table("movies.txt", sep=",", header=TRUE)
|
||||
#
|
||||
# Answer the following questions:
|
||||
#
|
||||
# - Are there more movies shorter than 100 min or longer than (or equal to) 100 minutes?
|
||||
# (show your answer numerically and graphically)
|
||||
#
|
||||
# - Are there more action comedies or romantic comedies?
|
||||
#
|
||||
# - Plot a histogram of the ratings for drama movies.
|
||||
#
|
||||
# - Is the average rating of dramas higher than the average rating of non-dramas?
|
||||
# (show your answer numerically and graphically)
|
||||
#
|
||||
# - Plot the number of animated movies being produced every year for the period 1995-2005.
|
||||
#
|
||||
# - Is there a clear boundary between short and feature movies (according to their length)?
|
||||
#
|
||||
#
|
||||
#######################################################################################################################
|
||||
md <- read.table("movies.txt", sep=",", header=TRUE)
|
||||
library(ggplot2)
|
||||
library(dplyr)
|
||||
|
||||
|
||||
# We will transform binary attributes into nominal variables with a fixed number of possible values (factors)
|
||||
md$Action <- as.factor(md$Action)
|
||||
md$Animation <- as.factor(md$Animation)
|
||||
|
||||
# The remaining columns will be transformed using the for loop
|
||||
for (i in 20:24)
|
||||
md[,i] <- as.factor(md[,i])
|
||||
|
||||
|
||||
# - Are there more movies shorter than 100 min or longer than (or equal to) 100 minutes?
|
||||
# (show your answer numerically and graphically)
|
||||
movies_shorter_or_equal_100_min <- md$length >= 100
|
||||
tab <- table(movies_shorter_or_equal_100_min)
|
||||
names(tab) <- c("Less than 100", "More or equal to 100")
|
||||
pie(tab)
|
||||
|
||||
# - Are there more action comedies or romantic comedies?
|
||||
md %>% filter(Comedy == 1) %>% mutate(number_of_actions = sum(Action == 1)) %>%
|
||||
mutate(number_of_action_movies = sum(Action == 1)) %>%
|
||||
mutate(number_of_Romances = sum(Romance == 1)) %>%
|
||||
select(number_of_action_movies, number_of_Romances) %>%
|
||||
unique()
|
||||
|
||||
|
||||
# - Plot a histogram of the ratings for drama movies.
|
||||
md %>% filter(Drama == 1) %>% ggplot(aes(rating)) + geom_histogram(bins = 10)
|
||||
|
||||
# - Is the average rating of dramas higher than the average rating of non-dramas?
|
||||
# (show your answer numerically and graphically)
|
||||
|
||||
lol <- md %>% filter(Drama == 1) %>% select(rating) %>%
|
||||
mutate(higher_than_avg_rating = rating > mean(rating)) %>%
|
||||
select(higher_than_avg_rating)# %>%
|
||||
# summarise(is_higher = sum(higher_than_avg_rating == TRUE), is_lower = sum(higher_than_avg_rating == FALSE))
|
||||
pie(table(lol))
|
||||
|
||||
# - Plot the number of animated movies being produced every year for the period 1995-2005.
|
||||
sel <- md$year >= 1999 & md$year <= 2005
|
||||
tt <- table(md$Animation[sel], md$year[sel])
|
||||
barplot(tt[2,], xlab="Year", ylab="Relative frequency", main="Proportion of animated movies")
|
||||
|
||||
# - Is there a clear boundary between short and feature movies (according to their length)?
|
||||
|
||||
#######################################################################################################################1
|
||||
#
|
||||
# Load the Players dataset using the command:
|
||||
#
|
||||
# players <- read.table("players.txt", sep=",", header = T)
|
||||
#
|
||||
# - Plot the proportion of players according to playing positions.
|
||||
#
|
||||
# - Compare career rebounds (the "reb" attribute) with respect to playing position.
|
||||
#
|
||||
# - Show the distribution of free throw percentages.
|
||||
# The percentage is determined by dividing the number of shots made ("ftm") by the total number of shots attempted ("fta").
|
||||
#
|
||||
# - Compare career 3-pointers made for the players active between 1990 and 2007, with respect to playing position.
|
||||
#
|
||||
# - How does the average career length of retired players vary from year to year?
|
||||
#
|
||||
#######################################################################################################################
|
||||
|
||||
|
|
Loading…
Reference in New Issue