Jebes to stari
parent
a1083289a3
commit
02a2f06e03
|
@ -26,6 +26,55 @@
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
#######################################################################################################################
|
#######################################################################################################################
|
||||||
|
md <- read.table("movies.txt", sep=",", header=TRUE)
|
||||||
|
library(ggplot2)
|
||||||
|
library(dplyr)
|
||||||
|
|
||||||
|
|
||||||
|
# We will transform binary attributes into nominal variables with a fixed number of possible values (factors)
|
||||||
|
md$Action <- as.factor(md$Action)
|
||||||
|
md$Animation <- as.factor(md$Animation)
|
||||||
|
|
||||||
|
# The remaining columns will be transformed using the for loop
|
||||||
|
for (i in 20:24)
|
||||||
|
md[,i] <- as.factor(md[,i])
|
||||||
|
|
||||||
|
|
||||||
|
# - Are there more movies shorter than 100 min or longer than (or equal to) 100 minutes?
|
||||||
|
# (show your answer numerically and graphically)
|
||||||
|
movies_shorter_or_equal_100_min <- md$length >= 100
|
||||||
|
tab <- table(movies_shorter_or_equal_100_min)
|
||||||
|
names(tab) <- c("Less than 100", "More or equal to 100")
|
||||||
|
pie(tab)
|
||||||
|
|
||||||
|
# - Are there more action comedies or romantic comedies?
|
||||||
|
md %>% filter(Comedy == 1) %>% mutate(number_of_actions = sum(Action == 1)) %>%
|
||||||
|
mutate(number_of_action_movies = sum(Action == 1)) %>%
|
||||||
|
mutate(number_of_Romances = sum(Romance == 1)) %>%
|
||||||
|
select(number_of_action_movies, number_of_Romances) %>%
|
||||||
|
unique()
|
||||||
|
|
||||||
|
|
||||||
|
# - Plot a histogram of the ratings for drama movies.
|
||||||
|
md %>% filter(Drama == 1) %>% ggplot(aes(rating)) + geom_histogram(bins = 10)
|
||||||
|
|
||||||
|
# - Is the average rating of dramas higher than the average rating of non-dramas?
|
||||||
|
# (show your answer numerically and graphically)
|
||||||
|
|
||||||
|
lol <- md %>% filter(Drama == 1) %>% select(rating) %>%
|
||||||
|
mutate(higher_than_avg_rating = rating > mean(rating)) %>%
|
||||||
|
select(higher_than_avg_rating)# %>%
|
||||||
|
# summarise(is_higher = sum(higher_than_avg_rating == TRUE), is_lower = sum(higher_than_avg_rating == FALSE))
|
||||||
|
pie(table(lol))
|
||||||
|
|
||||||
|
# - Plot the number of animated movies being produced every year for the period 1995-2005.
|
||||||
|
sel <- md$year >= 1999 & md$year <= 2005
|
||||||
|
tt <- table(md$Animation[sel], md$year[sel])
|
||||||
|
barplot(tt[2,], xlab="Year", ylab="Relative frequency", main="Proportion of animated movies")
|
||||||
|
|
||||||
|
# - Is there a clear boundary between short and feature movies (according to their length)?
|
||||||
|
|
||||||
|
#######################################################################################################################1
|
||||||
#
|
#
|
||||||
# Load the Players dataset using the command:
|
# Load the Players dataset using the command:
|
||||||
#
|
#
|
||||||
|
|
Loading…
Reference in New Issue