Jebes to stari
parent
a1083289a3
commit
02a2f06e03
|
@ -26,6 +26,55 @@
|
|||
#
|
||||
#
|
||||
#######################################################################################################################
|
||||
md <- read.table("movies.txt", sep=",", header=TRUE)
|
||||
library(ggplot2)
|
||||
library(dplyr)
|
||||
|
||||
|
||||
# We will transform binary attributes into nominal variables with a fixed number of possible values (factors)
|
||||
md$Action <- as.factor(md$Action)
|
||||
md$Animation <- as.factor(md$Animation)
|
||||
|
||||
# The remaining columns will be transformed using the for loop
|
||||
for (i in 20:24)
|
||||
md[,i] <- as.factor(md[,i])
|
||||
|
||||
|
||||
# - Are there more movies shorter than 100 min or longer than (or equal to) 100 minutes?
|
||||
# (show your answer numerically and graphically)
|
||||
movies_shorter_or_equal_100_min <- md$length >= 100
|
||||
tab <- table(movies_shorter_or_equal_100_min)
|
||||
names(tab) <- c("Less than 100", "More or equal to 100")
|
||||
pie(tab)
|
||||
|
||||
# - Are there more action comedies or romantic comedies?
|
||||
md %>% filter(Comedy == 1) %>% mutate(number_of_actions = sum(Action == 1)) %>%
|
||||
mutate(number_of_action_movies = sum(Action == 1)) %>%
|
||||
mutate(number_of_Romances = sum(Romance == 1)) %>%
|
||||
select(number_of_action_movies, number_of_Romances) %>%
|
||||
unique()
|
||||
|
||||
|
||||
# - Plot a histogram of the ratings for drama movies.
|
||||
md %>% filter(Drama == 1) %>% ggplot(aes(rating)) + geom_histogram(bins = 10)
|
||||
|
||||
# - Is the average rating of dramas higher than the average rating of non-dramas?
|
||||
# (show your answer numerically and graphically)
|
||||
|
||||
lol <- md %>% filter(Drama == 1) %>% select(rating) %>%
|
||||
mutate(higher_than_avg_rating = rating > mean(rating)) %>%
|
||||
select(higher_than_avg_rating)# %>%
|
||||
# summarise(is_higher = sum(higher_than_avg_rating == TRUE), is_lower = sum(higher_than_avg_rating == FALSE))
|
||||
pie(table(lol))
|
||||
|
||||
# - Plot the number of animated movies being produced every year for the period 1995-2005.
|
||||
sel <- md$year >= 1999 & md$year <= 2005
|
||||
tt <- table(md$Animation[sel], md$year[sel])
|
||||
barplot(tt[2,], xlab="Year", ylab="Relative frequency", main="Proportion of animated movies")
|
||||
|
||||
# - Is there a clear boundary between short and feature movies (according to their length)?
|
||||
|
||||
#######################################################################################################################1
|
||||
#
|
||||
# Load the Players dataset using the command:
|
||||
#
|
||||
|
|
Loading…
Reference in New Issue