Zacetek na v3
parent
b4c1542846
commit
a1083289a3
|
@ -44,7 +44,8 @@
|
||||||
#
|
#
|
||||||
x <- c(1, -2, 3, -4, 5, -6, 7, -8)
|
x <- c(1, -2, 3, -4, 5, -6, 7, -8)
|
||||||
x[x < 0] <- 0
|
x[x < 0] <- 0
|
||||||
#x
|
x[x >= 0] <- x[] * 10
|
||||||
|
x
|
||||||
|
|
||||||
# Edit the vector x as follows. Replace all elements with a negative value
|
# Edit the vector x as follows. Replace all elements with a negative value
|
||||||
# with 0. Multiply the elements with a positive value by 10.
|
# with 0. Multiply the elements with a positive value by 10.
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
train.data <- read.table("./data/AlgaeLearn.txt", header = T)
|
||||||
|
test.data <- read.table("./data/AlgaeTest.txt", header = T)
|
||||||
|
lm.model <- lm(a1 ~., train.data)
|
Binary file not shown.
|
@ -0,0 +1,185 @@
|
||||||
|
"season" "size" "speed" "mxPH" "mnO2" "Cl" "NO3" "NH4" "oPO4" "PO4" "Chla" "a1"
|
||||||
|
"winter" "small" "medium" 8 9.8 60.8 6.238 578 105 170 50 0
|
||||||
|
"spring" "small" "medium" 8.35 8 57.75 1.288 370 428.75 558.75 1.3 1.4
|
||||||
|
"autumn" "small" "medium" 8.1 11.4 40.02 5.33 346.66699 125.667 187.05701 15.6 3.3
|
||||||
|
"spring" "small" "medium" 8.07 4.8 77.364 2.302 98.182 61.182 138.7 1.4 3.1
|
||||||
|
"autumn" "small" "medium" 8.06 9 55.35 10.416 233.7 58.222 97.58 10.5 9.2
|
||||||
|
"winter" "small" "high" 8.25 13.1 65.75 9.248 430 18.25 56.667 28.4 15.1
|
||||||
|
"summer" "small" "high" 8.15 10.3 73.25 1.535 110 61.25 111.75 3.2 2.4
|
||||||
|
"autumn" "small" "high" 8.05 10.6 59.067 4.99 205.66701 44.667 77.434 6.9 18.2
|
||||||
|
"winter" "small" "medium" 8.7 3.4 21.95 0.886 102.75 36.3 71 5.544 25.4
|
||||||
|
"winter" "small" "high" 7.93 9.9 8 1.39 5.8 27.25 46.6 0.8 17
|
||||||
|
"spring" "small" "high" 7.7 10.2 8 1.527 21.571 12.75 20.75 0.8 16.6
|
||||||
|
"summer" "small" "high" 7.45 11.7 8.69 1.588 18.429 10.667 19 0.6 32.1
|
||||||
|
"winter" "small" "high" 7.74 9.6 5 1.223 27.286 12 17 41 43.5
|
||||||
|
"summer" "small" "high" 7.72 11.8 6.3 1.47 8 16 15 0.5 31.1
|
||||||
|
"winter" "small" "high" 7.9 9.6 3 1.448 46.2 13 61.6 0.3 52.2
|
||||||
|
"autumn" "small" "high" 7.55 11.5 4.7 1.32 14.75 4.25 98.25 1.1 69.9
|
||||||
|
"winter" "small" "high" 7.78 12 7 1.42 34.333 18.667 50 1.1 46.2
|
||||||
|
"spring" "small" "high" 7.61 9.8 7 1.443 31.333 20 57.833 0.4 31.8
|
||||||
|
"summer" "small" "high" 7.35 10.4 7 1.718 49 41.5 61.5 0.8 50.6
|
||||||
|
"spring" "small" "medium" 7.79 3.2 64 2.822 8777.59961 564.59998 771.59998 4.5 0
|
||||||
|
"winter" "small" "medium" 7.83 10.7 88 4.825 1729 467.5 586 16 0
|
||||||
|
"spring" "small" "high" 7.2 9.2 0.8 0.642 81 15.6 18 0.5 15.5
|
||||||
|
"autumn" "small" "high" 7.75 10.3 32.92 2.942 42 16 40 7.6 23.2
|
||||||
|
"winter" "small" "high" 7.62 8.5 11.867 1.715 208.33299 3 27.5 1.7 74.2
|
||||||
|
"spring" "small" "high" 7.84 9.4 10.975 1.51 12.5 3 11.5 1.5 13
|
||||||
|
"summer" "small" "high" 7.77 10.7 12.536 3.976 58.5 9 44.136 3 4.1
|
||||||
|
"winter" "small" "high" 7.09 8.4 10.5 1.572 28 4 13.6 0.5 29.7
|
||||||
|
"winter" "small" "high" 8 9.8 16 0.73 20 26 45 0.8 17.1
|
||||||
|
"spring" "small" "high" 7.2 11.3 9 0.23 120 12 19 0.5 33.9
|
||||||
|
"autumn" "small" "high" 7.4 12.5 13 3.33 60 72 142 4.9 3.4
|
||||||
|
"winter" "small" "high" 8.1 10.3 26 3.78 60 246 304 2.8 6.9
|
||||||
|
"summer" "small" "high" 7.8 11.3 20.083 3.02 49.5 53 130.75 5.8 0
|
||||||
|
"autumn" "small" "medium" 8.4 9.9 34.5 2.818 3515 20 47 2.3 13.6
|
||||||
|
"winter" "small" "medium" 8.27 7.8 29.2 0.05 6400 7.4 23 0.9 5.3
|
||||||
|
"summer" "small" "medium" 8.66 8.4 30.523 3.444 1911 58.875 84.46 3.6 18.3
|
||||||
|
"winter" "small" "high" 8.3 10.9 1.17 0.735 13.5 1.625 3 0.2 66
|
||||||
|
"winter" "small" "medium" 8.3 8.9 20.625 3.414 228.75 196.62 253.25 12.32 2
|
||||||
|
"spring" "small" "medium" 8.1 10.5 22.286 4.071 178.57001 182.42 255.28 8.957 2.2
|
||||||
|
"winter" "small" "medium" 8 5.5 77 6.096 122.85 143.71001 296 3.7 0
|
||||||
|
"summer" "small" "medium" 8.15 7.1 54.19 3.829 647.57001 59.429 175.04601 13.2 0
|
||||||
|
"winter" "small" "high" 8.3 7.7 50 8.543 76 264.89999 344.60001 22.5 0
|
||||||
|
"spring" "small" "high" 8.3 8.8 54.143 7.83 51.429 276.85001 326.85699 11.84 4.1
|
||||||
|
"winter" "small" "high" 8.4 13.4 69.75 4.555 37.5 10 40.667 3.9 51.8
|
||||||
|
"spring" "small" "high" 8.3 12.5 87 4.87 22.5 27 43.5 3.3 29.5
|
||||||
|
"autumn" "small" "high" 8 12.1 66.3 4.535 39 16 39 0.8 54.4
|
||||||
|
"spring" "small" "medium" 7.6 9.6 15 3.02 40 27 121 2.8 89.8
|
||||||
|
"autumn" "small" "medium" 7.29 11.21 17.75 3.07 35 13 20.812 12.1 24.8
|
||||||
|
"winter" "small" "medium" 7.6 10.2 32.3 4.508 192.5 12.75 49.333 7.9 0
|
||||||
|
"summer" "small" "medium" 8 7.9 27.233 1.651 28.333 7.3 22.9 4.5 39.1
|
||||||
|
"winter" "small" "high" 7.9 11 6.167 1.172 18.333 7.75 11.8 0.5 81.9
|
||||||
|
"spring" "small" "high" 7.9 9 5.273 0.91 33.636 9 11.818 0.8 54
|
||||||
|
"spring" "small" "high" 7.57 10.8 4.575 1.203 27.5 2 6.75 1 20.3
|
||||||
|
"summer" "small" "high" 7.19 11.7 4.326 1.474 160 2.5 7.2 0.3 15.8
|
||||||
|
"winter" "small" "high" 7.44 10.1 2.933 0.77 15 1.333 6 0.6 55.5
|
||||||
|
"spring" "small" "high" 7.14 9.8 3.275 0.923 15 1.25 10.75 2.5 10.3
|
||||||
|
"summer" "small" "high" 7 12.1 3.136 1.208 16.2 1.8 2.5 0.5 64.2
|
||||||
|
"winter" "small" "medium" 7.5 1.5 32.4 0.921 1386.25 220.75 351.60001 10 0
|
||||||
|
"spring" "small" "medium" 7.5 1.8 29.775 1.051 2082.8501 209.85699 313.60001 1 1.9
|
||||||
|
"summer" "small" "medium" 7.8 7.1 32.54 1.72 2167.37012 151.125 279.06601 13.1 25.5
|
||||||
|
"autumn" "medium" "medium" 8.5 8.1 38.125 3.85 225 45 152.33299 5.2 11.3
|
||||||
|
"summer" "medium" "medium" 7.925 10.2 34.037 9.08 109 55 58.623 11.6 4.4
|
||||||
|
"winter" "medium" "medium" 8.1 8.1 136 3.773 245 136.75 249.25 20.87 1.9
|
||||||
|
"spring" "medium" "medium" 8.2 6.8 129.375 3.316 271.25 100 233.5 13 1.6
|
||||||
|
"spring" "medium" "high" 9.1 9.4 35.75 5.164 32.5 85.5 215.5 18.37 2.2
|
||||||
|
"autumn" "medium" "medium" 8.1 9.8 29.5 1.287 224.286 25.167 102.333 3.6 64.9
|
||||||
|
"winter" "medium" "medium" 8 5.9 27.4 0.735 133.636 36 105.727 3 15.1
|
||||||
|
"spring" "medium" "medium" 8 3.3 26.76 0.658 165 37.375 111.375 3 14.4
|
||||||
|
"winter" "medium" "high" 7.5 9.2 11 3.31 101 26.6 108 1.3 6.7
|
||||||
|
"spring" "medium" "high" 7.4 9.8 11 3.235 255 38.75 56.667 2 10.8
|
||||||
|
"autumn" "medium" "high" 7.3 11.7 10.4 4.93 130 10.8 60 4.3 1.2
|
||||||
|
"winter" "medium" "high" 7.4 8.9 13.5 5.442 123.333 27.667 104 21 12.6
|
||||||
|
"summer" "medium" "high" 7.4 11.17 12.146 6.188 89.6 32 69.93 3.1 14.7
|
||||||
|
"autumn" "medium" "medium" 7.5 10.8 31 4.408 737.5 111.25 214 2.9 3.3
|
||||||
|
"winter" "medium" "medium" 7.6 6 53 3.734 914 137.60001 254.60001 4.3 0
|
||||||
|
"summer" "medium" "medium" 7.4 10.77 36.248 3.73 429.20001 57.6 169.00101 3.2 2.8
|
||||||
|
"winter" "medium" "medium" 7.8 3.6 48.667 4.03 5738.33008 412.33301 607.16699 4.3 0
|
||||||
|
"summer" "medium" "medium" 7.6 9.7 53.102 7.16 4073.33008 282.16699 624.73297 6.8 0
|
||||||
|
"winter" "medium" "medium" 8.5 8.6 125.6 3.778 124.167 197.83299 303.33301 40 0
|
||||||
|
"spring" "medium" "medium" 8.7 9.4 173.75 3.318 101.25 267.75 391.75 3.5 0
|
||||||
|
"summer" "medium" "medium" 8.1 10.7 94.405 4.698 153 191.75 265.25 7.3 0
|
||||||
|
"winter" "medium" "high" 8.8 8.5 53.333 5.132 96.667 120.5 232.83299 31 1.2
|
||||||
|
"spring" "medium" "high" 7.8 10.5 70 2.443 98.333 144.66701 244 9 0
|
||||||
|
"summer" "medium" "high" 7.9 11.8 63.51 4.94 137 159.5 218 6.5 0
|
||||||
|
"autumn" "medium" "low" 8.5 10.5 56.717 0.33 215.714 23 138.5 20.829 5.7
|
||||||
|
"winter" "medium" "low" 9.1 5.4 61.05 0.308 105.556 104.222 239 72.478 3.6
|
||||||
|
"spring" "medium" "low" 8.9 4.5 57.75 0.267 155 97.333 235.66701 98.817 1.2
|
||||||
|
"winter" "medium" "high" 7.9 6.3 101.875 3.978 153.75 51.75 205.875 2 4
|
||||||
|
"summer" "medium" "high" 7.8 8.2 85.982 6.2 421.66699 31.333 211.66701 21.9 5.9
|
||||||
|
"winter" "medium" "medium" 7.7 7.1 63.625 3.14 122.5 28.625 186.5 30 16.5
|
||||||
|
"spring" "medium" "medium" 7.8 6.5 82.111 2.603 215.556 12.889 154.125 5.2 7
|
||||||
|
"winter" "medium" "low" 7.7 5.3 65.333 2.899 371.11099 51.111 183.66701 17.2 58.7
|
||||||
|
"summer" "medium" "low" 7.5 8.8 58.331 8.688 758.75 104.5 292.625 3 8.7
|
||||||
|
"autumn" "medium" "low" 7.6 10 49.625 5.456 308.75 38.625 285.71399 75 17
|
||||||
|
"winter" "medium" "low" 8.7 7.4 47.778 2.316 38.111 24.667 201.778 3 12.3
|
||||||
|
"summer" "medium" "low" 7.7 11.1 47.229 8.759 239 54 275.14301 65.7 8.8
|
||||||
|
"autumn" "medium" "high" 8.3 11.1 41.5 4.665 931.83301 39 124.2 13.1 23.7
|
||||||
|
"winter" "medium" "high" 8.43 6 40.167 2.67 723.66699 60.833 141.83299 25 0
|
||||||
|
"summer" "medium" "high" 8.16 11.1 32.056 5.694 461.875 71 132.54601 15 3.6
|
||||||
|
"winter" "medium" "high" 8.7 9.8 5.889 1.534 51.111 9.667 17.333 1 64.3
|
||||||
|
"spring" "medium" "high" 8.2 11.3 7.25 1.875 25 6.5 26 0.3 46.6
|
||||||
|
"summer" "medium" "high" 8.5 11.8 7.838 1.732 206.53799 8.692 16.662 2.1 24
|
||||||
|
"spring" "medium" "medium" 7.8 6 53.425 0.381 118.571 37.857 102.571 1.2 3.7
|
||||||
|
"summer" "medium" "medium" 8 9.7 57.848 0.461 217.75 37 86.997 3 18.1
|
||||||
|
"summer" "medium" "high" 8.6 11.62 1.549 0.445 25.833 16.833 18.293 1.4 43.7
|
||||||
|
"autumn" "medium" "medium" 8.3 11.6 5.83 0.701 12.727 3.545 13.2 3.2 86.6
|
||||||
|
"spring" "medium" "low" 8.4 5.3 74.667 3.9 131.66701 261.60001 432.909 24.917 1.9
|
||||||
|
"summer" "medium" "low" 8.2 6.6 131.39999 4.188 92 238.2 320.39999 6.8 1.2
|
||||||
|
"winter" "medium" "medium" 8.2 9.4 45.273 7.195 345.45499 144 287 9.882 1.4
|
||||||
|
"spring" "medium" "medium" 8.1 7.1 42.636 5.078 56.364 166.72701 262.72699 17.2 1.6
|
||||||
|
"summer" "medium" "medium" 8.1 9 48.429 6.64 128.571 181 222.286 6.429 3.3
|
||||||
|
"winter" "medium" "high" 7.4 10.7 11.818 2.163 170.909 36.909 122 5.555 14.6
|
||||||
|
"spring" "medium" "high" 8.3 9.7 10.556 1.921 65.556 61.556 127.222 5.233 1.7
|
||||||
|
"summer" "medium" "high" 8.6 10.7 12 2.231 43.75 62.625 89.625 2.15 3.3
|
||||||
|
"winter" "medium" "medium" 9.1 11.6 31.091 5.099 246.364 55 284 88.255 0
|
||||||
|
"spring" "medium" "medium" 9 6.9 28.333 2.954 76.667 102.333 277.33301 110.456 0
|
||||||
|
"summer" "medium" "medium" 8.3 10 30.125 3.726 102.5 75.875 177.625 50.225 1.5
|
||||||
|
"winter" "medium" "high" 8.5 10.1 10.936 1.335 236 34.636 72.9 11.1 4.2
|
||||||
|
"spring" "medium" "high" 8.3 7.7 10.078 1.212 103.333 48.667 82.444 2 4.1
|
||||||
|
"summer" "medium" "high" 7.3 10.5 11.088 1.374 92.375 48.625 66.75 3.3 1.2
|
||||||
|
"winter" "medium" "medium" 7.9 9.8 194.75 6.513 3466.65991 23 173.75 15.3 0
|
||||||
|
"spring" "medium" "medium" 7.9 8.3 391.5 6.045 380 173 317 5.5 2.4
|
||||||
|
"autumn" "medium" "medium" 8 11.9 130.67 6.54 196 75 84 4.5 7.8
|
||||||
|
"spring" "medium" "medium" 8 9.2 39 4.86 120 187 213 2 10.3
|
||||||
|
"autumn" "medium" "medium" 8.1 11.7 35.66 5.13 46.5 49 88.5 2.5 1.5
|
||||||
|
"winter" "medium" "low" 8.43 9.9 37.6 0.826 124 32.5 115 11.7 9.2
|
||||||
|
"summer" "medium" "low" 8.1 6.2 39 0.673 112.857 60 98.143 2 28.1
|
||||||
|
"winter" "medium" "medium" 7.9 11.2 49.9 9.773 505 67.5 143.75 5.45 2.1
|
||||||
|
"summer" "medium" "medium" 8.1 6.2 51.113 5.099 175 132.5 197.14301 6.4 1.4
|
||||||
|
"spring" "medium" "high" 7.8 9.5 8.3 1.67 34 16.8 35.2 1 19
|
||||||
|
"autumn" "medium" "high" 7.9 10.5 10.207 2.304 132.25 10.583 23.485 2 42.5
|
||||||
|
"winter" "medium" "low" 8 4.5 79.077 8.984 920 70 200.231 19.4 2.5
|
||||||
|
"spring" "medium" "low" 7.6 6.3 81.333 9.715 196.66701 77.333 147.83299 3 4.4
|
||||||
|
"autumn" "medium" "low" 7.8 6.5 64.093 7.74 1990.16003 47.5 276 8.1 6.5
|
||||||
|
"winter" "medium" "high" 8.22 8.1 41.25 1.415 172.5 46.667 123.333 30.4 39.7
|
||||||
|
"autumn" "medium" "high" 8.3 9.9 40.226 1.587 235 33.8 75.207 23.8 32.8
|
||||||
|
"winter" "medium" "high" 8.47 9 46.167 2.102 84.667 48 116.2 7.3 12.2
|
||||||
|
"spring" "medium" "high" 8.4 4.9 47 0.536 91.833 109 188.66701 32 1.9
|
||||||
|
"autumn" "medium" "high" 8.87 11 41.163 2.273 54.75 39 72.696 22.7 0
|
||||||
|
"summer" "medium" "high" 7.7 4.4 53 2.31 90 22.2 116.2 16 0
|
||||||
|
"autumn" "medium" "high" 7.3 11.8 44.205 45.65 24064 44 34 53.1 2.2
|
||||||
|
"spring" "medium" "medium" 7.9 6 127.833 2.68 176.66701 27.5 76.333 2.1 3.4
|
||||||
|
"autumn" "medium" "medium" 7.8 10.53 100.83 5.41 486.5 24 58.374 27.5 2.8
|
||||||
|
"spring" "large" "low" 7.8 3.2 94 4.908 1131.66003 175.66701 361 28.567 24.8
|
||||||
|
"summer" "large" "low" 7.6 4.9 69 3.685 1495 234.5 236 22.5 32.5
|
||||||
|
"spring" "large" "low" 8.6 3.6 50 0.376 134 54.1 125.8 26.8 0
|
||||||
|
"autumn" "large" "low" 8.4 10.6 19.22 1.655 96.833 20.667 54.916 20.6 0
|
||||||
|
"winter" "large" "low" 8.3 11.5 26 1.87 62.5 30.75 75.333 34.75 0
|
||||||
|
"spring" "large" "low" 9.5 5.7 44 0.102 146.66701 151.33299 252.5 93.683 12.3
|
||||||
|
"summer" "large" "low" 8.8 8.8 43 0.13 103.333 180.66701 269.66699 92.667 7.2
|
||||||
|
"autumn" "large" "low" 8.84 12.9 43.09 0.846 52.2 8.6 46.438 81.54 3.4
|
||||||
|
"winter" "large" "high" 7.3 9.9 16 4.82 101.667 14.667 85 2 0
|
||||||
|
"autumn" "large" "high" 7.4 10.68 22.35 5.414 244.60001 66.4 171.272 3.8 1.1
|
||||||
|
"spring" "large" "low" 9.1 4.3 82.857 0.86 137.27299 102.364 232.89999 54.367 0
|
||||||
|
"autumn" "large" "low" 8.53 11.1 63.292 1.726 227.60001 84.3 146.452 21.22 1.4
|
||||||
|
"winter" "large" "low" 8.56 8.7 43.97 4.053 643 221.89999 246.66701 14.7 12.5
|
||||||
|
"autumn" "large" "low" 8.06 8.3 38.902 3.678 627.27301 205.636 219.909 6.209 0
|
||||||
|
"winter" "large" "medium" 8.24 6.1 95.367 3.561 1168 236.39999 272.22198 20.578 2.5
|
||||||
|
"summer" "large" "medium" 7.91 6.2 151.83299 3.923 1081.66003 346.16699 388.16699 5.083 1.7
|
||||||
|
"winter" "large" "medium" 8.21 9.3 104.818 3.908 124.364 82.222 167.89999 5.609 1.4
|
||||||
|
"spring" "large" "medium" 8.5 7.3 71.444 2.512 66.667 64.389 137.778 9.384 0
|
||||||
|
"spring" "large" "medium" 8.6 10.6 208.364 4.459 197.909 87.333 194.10001 27.618 0
|
||||||
|
"winter" "large" "medium" 9.06 6.35 187.183 3.351 54.778 159.16701 221.278 20.8 0
|
||||||
|
"autumn" "large" "high" 8.7 10.7 4.545 0.941 32.727 16 21.3 1.1 39.7
|
||||||
|
"spring" "large" "high" 8.1 10.7 3.5 1.013 12.5 12.75 11 0.6 37.3
|
||||||
|
"summer" "large" "high" 8.4 10.29 5.326 0.996 53.846 7.667 14.354 0.8 52.4
|
||||||
|
"spring" "large" "medium" 8.6 10.1 2.111 0.663 11.111 3.222 7 1.3 48.3
|
||||||
|
"summer" "large" "medium" 8.2 9.5 2.2 0.672 10 3.8 6.2 0.8 50.4
|
||||||
|
"winter" "large" "medium" 8.5 10.5 2.75 0.758 10.5 4 7.654 4 56.8
|
||||||
|
"summer" "large" "medium" 8.3 10 3.86 0.866 32 6 16 2.86 17.3
|
||||||
|
"summer" "large" "high" 8.1 10.2 7.613 0.699 32.5 26.625 52.875 2 18.1
|
||||||
|
"winter" "large" "low" 8.7 10.8 39.109 6.225 161.81799 104.727 228.364 46.075 1.1
|
||||||
|
"winter" "large" "low" 8.7 11.7 22.455 3.765 88.182 41.3 85.4 17.491 0
|
||||||
|
"summer" "large" "low" 8.4 8.2 23.25 2.805 43.75 51.125 87.125 14.775 0
|
||||||
|
"autumn" "large" "low" 8.55 11 22.32 3.14 82.1 45.9 101.455 18.33 1.7
|
||||||
|
"spring" "large" "medium" 8.5 7.6 12.778 1.873 17.778 50.889 127 24.556 0
|
||||||
|
"autumn" "large" "medium" 8.7 11.4 15.541 2.323 103 34.5 81.558 5.62 7.6
|
||||||
|
"winter" "large" "medium" 8.4 10.5 12.182 1.519 65.455 19.727 50.455 8.155 2.9
|
||||||
|
"spring" "large" "medium" 8.2 8.2 7.333 1.003 37.778 19.111 120.889 5.111 2.2
|
||||||
|
"autumn" "large" "medium" 8.58 11.1 23.825 3.617 72.6 51.111 91.111 22.9 3.8
|
||||||
|
"summer" "large" "medium" 8.5 7.9 12.444 2.586 96.667 19.111 61.444 6.167 18.9
|
||||||
|
"autumn" "large" "medium" 8.4 8.4 17.375 3.833 83.75 53.625 79.75 2.338 12.7
|
||||||
|
"spring" "large" "medium" 8.3 10.6 14.32 3.2 125.333 35.333 75.904 4.667 18
|
||||||
|
"autumn" "large" "medium" 8.2 7 139.989 2.978 60.11 78.333 140.22 31.738 0
|
||||||
|
"summer" "large" "medium" 8.5 6.7 82.852 2.8 27.069 64 140.517 18.3 2.4
|
|
@ -0,0 +1,123 @@
|
||||||
|
"season" "size" "speed" "mxPH" "mnO2" "Cl" "NO3" "NH4" "oPO4" "PO4" "Chla" "a1"
|
||||||
|
"winter" "small" "medium" 7.98 8.8 59.333 7.392 286.66699 33.333 138 7.1 1.2
|
||||||
|
"summer" "small" "medium" 8 7.2 80 1.957 174.286 47.857 113.714 4.5 7
|
||||||
|
"spring" "small" "high" 8.35 8.4 68 3.026 458 45.2 111.8 3.2 1.4
|
||||||
|
"spring" "small" "medium" 8.1 13.2 19 0 130 6 40 2 3.9
|
||||||
|
"summer" "small" "medium" 8.37 12.1 12.85 0.84 15 5 10.507 13.8 28.4
|
||||||
|
"spring" "small" "high" 7.31 9.9 6 1.395 58.75 6 16 0.8 11.4
|
||||||
|
"autumn" "small" "high" 7.91 11.2 5 1.383 6 24.333 30 32 29.7
|
||||||
|
"summer" "small" "high" 7.99 10.7 4 1.368 117 17.25 44.75 0.8 74.3
|
||||||
|
"autumn" "small" "high" 7.82 11.5 8.18 1.488 39 16 139.5 0.4 0
|
||||||
|
"summer" "small" "medium" 7.9 6 63 1.053 11160.59961 1435 1690 4.5 0
|
||||||
|
"autumn" "small" "medium" 8.02 9.4 18.74 1.598 6249.6001 455.79999 690.59998 2 0
|
||||||
|
"summer" "small" "high" 6.6 10.8 4 1.18 80 2 59 0.6 62.5
|
||||||
|
"autumn" "small" "high" 6.79 9.4 11.42 1.966 42 3 15 0.6 21.6
|
||||||
|
"summer" "small" "high" 6.78 10.2 10.704 1.46 46 3 13.714 0.7 41.9
|
||||||
|
"summer" "small" "high" 7.8 10.8 14.568 1.228 61.25 34.5 62 1.1 35.4
|
||||||
|
"spring" "small" "high" 8.3 12.7 27 4.04 10 363 482 6 0
|
||||||
|
"spring" "small" "medium" 7.97 2.5 32.125 1.034 7912.5 132.625 164.62 1 10.1
|
||||||
|
"summer" "small" "high" 8.2 10.4 3.577 0.788 10.583 1.667 2.088 0.8 30.6
|
||||||
|
"autumn" "small" "medium" 8.1 6.4 21.2 3.222 44 54.8 155 61.52 19.4
|
||||||
|
"summer" "small" "medium" 8.54 12.83 22.545 4 170.5 68 116.069 41.6 19.2
|
||||||
|
"spring" "small" "medium" 7.7 6.8 65 1.833 782.5 77.25 340 9 0
|
||||||
|
"autumn" "small" "high" 8.4 10.5 50.6 10.494 334 209.10001 276.66699 20.72 2.3
|
||||||
|
"summer" "small" "high" 8.5 11.5 57.292 10.526 312.60001 261.39999 299.39999 23.5 0
|
||||||
|
"summer" "small" "high" 8.1 12.2 66 4.08 10 26 70 1.8 31.8
|
||||||
|
"autumn" "small" "low" 6.13 11.23 8.87 0.62 36 3 14.741 2.1 11.9
|
||||||
|
"summer" "small" "medium" 7.2 10.4 18 2.42 80 11 44 2.5 7.2
|
||||||
|
"autumn" "small" "medium" 7.8 9.1 36.124 5.974 169 13.091 71.057 3.3 4
|
||||||
|
"summer" "small" "high" 7.8 9.4 5.714 0.807 22.143 6 18.714 1.5 42.8
|
||||||
|
"autumn" "small" "high" 7.8 11.35 5.343 1.363 19.75 5.818 8.846 1.9 66.2
|
||||||
|
"winter" "small" "high" 7.66 10.8 4 0.997 15 1.5 7.333 1 17.9
|
||||||
|
"autumn" "small" "medium" 7.8 6.9 31.375 0.933 2138.57007 152.429 317.5 15.4 30.6
|
||||||
|
"winter" "medium" "medium" 8 7 37.091 2.237 146.364 84.091 172.778 2.3 7.6
|
||||||
|
"spring" "medium" "medium" 8.2 7.8 37.625 1.453 105.714 66.714 143.39999 2.6 0
|
||||||
|
"autumn" "medium" "medium" 8.2 10.7 134.66701 4.504 617.77802 49.444 164.778 19.2 2.8
|
||||||
|
"summer" "medium" "medium" 8 8.5 131.46899 3.454 792 63.1 286.60001 8.2 2.1
|
||||||
|
"autumn" "medium" "high" 8.9 10.5 34.8 6 122.556 41.111 144.11099 27.03 3.2
|
||||||
|
"summer" "medium" "high" 8.2 9.2 30.037 5.184 174.8 86.6 130.8 3.45 2.4
|
||||||
|
"summer" "medium" "medium" 7.8 8.8 29.078 2.823 263.556 27 95.12 11.5 35.2
|
||||||
|
"summer" "medium" "high" 7.5 10.8 10.357 3.35 127.667 22 34.321 1.2 7.8
|
||||||
|
"spring" "medium" "high" 7.4 9 13.75 5.268 58.75 56.25 64 2.5 2.9
|
||||||
|
"spring" "medium" "medium" 7.5 8.9 55.8 4.408 389 127.4 206.2 5 0
|
||||||
|
"spring" "medium" "medium" 7.8 10.4 49 7.557 6433.33008 170.66701 341 2.3 0
|
||||||
|
"autumn" "medium" "medium" 9.1 8 101.2 4.306 273.75 152.875 290.31299 10.7 0
|
||||||
|
"autumn" "medium" "high" 8.9 8 60.2 4.033 306.47101 136 242.94099 18.4 0
|
||||||
|
"summer" "medium" "low" 8.5 10.74 56.292 0.694 264.79999 43.4 124.942 30.48 13.7
|
||||||
|
"autumn" "medium" "high" 8.3 8.6 75 5.18 560 30.5 170 16.7 1.2
|
||||||
|
"spring" "medium" "high" 7.8 6.3 136.66701 3.734 154.444 35.556 175.33299 2.7 2.9
|
||||||
|
"autumn" "medium" "medium" 7.6 9.2 64.778 6.164 720 21.778 242.5 54.2 11.2
|
||||||
|
"summer" "medium" "medium" 7.5 9.2 61.557 7.035 558.33301 24.5 257.33301 19.5 3.8
|
||||||
|
"autumn" "medium" "low" 7.5 8.6 57.5 7.368 577 67.3 254.444 22 14.2
|
||||||
|
"spring" "medium" "low" 7.7 4.8 88.909 1.714 669.091 38.182 205.18201 2.8 9.7
|
||||||
|
"spring" "medium" "low" 7.9 7.2 55.25 2.235 89.375 17.5 141.5 17 8.7
|
||||||
|
"spring" "medium" "high" 8.06 2.2 39 2.085 773.125 90.75 163.25 26 2.7
|
||||||
|
"autumn" "medium" "high" 8.5 7.5 9.3 1.557 260 9.6 18.1 3.9 14.5
|
||||||
|
"autumn" "medium" "medium" 8.2 10.4 63.3 0.389 217.14301 24.333 114 2.7 19.8
|
||||||
|
"winter" "medium" "medium" 8 4.8 58.767 0.308 93.75 33.375 110.875 2.7 7.6
|
||||||
|
"autumn" "medium" "high" 8.7 10.8 1.118 0.534 26.364 14.818 20.9 1.4 40.7
|
||||||
|
"spring" "medium" "high" 8.4 11.2 0.5 0.32 10 21.6 27.6 0.6 37.1
|
||||||
|
"winter" "medium" "low" 8.5 8.3 36.583 5.632 440.83301 149 266.36401 19.827 2.5
|
||||||
|
"autumn" "medium" "low" 8.3 8.8 64.768 6.272 357.16699 219 302.5 8.267 3.5
|
||||||
|
"autumn" "medium" "medium" 8.4 10.8 47.304 7.773 258.909 145.091 223.04401 13.36 1
|
||||||
|
"autumn" "medium" "high" 7.9 11.9 11.862 2.209 128.636 48.091 69.079 2.755 0
|
||||||
|
"autumn" "medium" "medium" 9.13 12 30.496 4.971 99.6 64.6 146.265 54.13 1
|
||||||
|
"autumn" "medium" "high" 7.4 11.4 12.031 1.621 176.8 36.3 58.599 36.1 4.1
|
||||||
|
"summer" "medium" "medium" 8.3 8.9 271.5 6.315 375 169 313.5 2.8 16.5
|
||||||
|
"winter" "medium" "medium" 8.2 10.4 41 5.16 410 38 61 6 4.9
|
||||||
|
"summer" "medium" "medium" 8.2 11.2 36 4.4 32.5 108 155.5 3 12.4
|
||||||
|
"spring" "medium" "low" 8.17 6.3 37.3 0.527 82 62 133.10001 1.4 5.9
|
||||||
|
"autumn" "medium" "low" 8.33 10.6 36.156 1.137 119.444 92.889 112.855 10.5 74.8
|
||||||
|
"spring" "medium" "medium" 8.5 6.7 45.609 4.411 160 88.364 180.364 32.833 1.9
|
||||||
|
"autumn" "medium" "medium" 8.1 9.1 47.267 9.367 169.091 75 127.778 3.667 1.9
|
||||||
|
"winter" "medium" "high" 8.2 11.9 12.25 2.348 121.875 14 27.5 4.6 2.1
|
||||||
|
"summer" "medium" "high" 8.1 9.4 11 2.251 48.75 17.375 66.875 2.5 28.7
|
||||||
|
"summer" "medium" "low" 7.8 7.9 87 12.13 652.5 93.25 209 6 1.4
|
||||||
|
"spring" "medium" "high" 8.26 5 44.818 0.526 97.273 105.455 181.636 20.6 0
|
||||||
|
"summer" "medium" "high" 8.11 6.6 49.857 0.993 194.28 77 197.571 13 4.2
|
||||||
|
"summer" "medium" "high" 7.87 1.8 49.25 0.611 357.125 128.25 185.125 4.5 1.2
|
||||||
|
"winter" "medium" "high" 7.2 10.1 49.5 3.955 55 18 138 49 2.4
|
||||||
|
"spring" "medium" "high" 7.8 8.3 51.5 2.098 30.2 24.6 184.39999 31.3 1.9
|
||||||
|
"winter" "medium" "medium" 7.9 11.3 82.5 6.283 300 12.333 53.333 13.7 12.3
|
||||||
|
"summer" "medium" "medium" 8 8.8 176.25 0.618 440 16.25 79.25 3.5 8
|
||||||
|
"autumn" "large" "low" 8.1 9.07 71.39 2.904 1768.80005 27.6 123.06 41.54 5.8
|
||||||
|
"winter" "large" "low" 8.7 5.4 48 1.139 144.286 36.714 66.833 22.017 1.2
|
||||||
|
"summer" "large" "low" 7.9 5.3 48 0.513 138.33299 61.333 89.167 4 0
|
||||||
|
"autumn" "large" "low" 8.7 12.2 32.23 1.887 233.5 17.5 66.167 39.333 11.8
|
||||||
|
"winter" "large" "low" 8.6 6.5 43 0.668 95 10.5 74.667 63.5 30.1
|
||||||
|
"spring" "large" "high" 7.4 7.3 19 4.39 120 74.857 166.286 5.3 0
|
||||||
|
"summer" "large" "high" 7.8 10.4 22.5 4.72 178.75 116.5 201 2.7 0
|
||||||
|
"winter" "large" "low" 8.5 9.8 70.25 1.644 285 68.714 132 16.028 4.6
|
||||||
|
"spring" "large" "low" 7.98 5.6 47.06 3.088 357 311.39999 342.29999 18.53 2.4
|
||||||
|
"summer" "large" "low" 7.95 7.2 57.286 3.746 425.71399 291.14301 330 4.714 0
|
||||||
|
"spring" "large" "medium" 7.96 5.5 131.364 3.313 810.90002 311.45499 349.81799 20.47 2.7
|
||||||
|
"autumn" "large" "medium" 8.03 7.83 83.023 4.065 1222.81006 240.545 269.091 6.809 5
|
||||||
|
"winter" "large" "medium" 8.35 2.75 97.733 3.681 137.444 91 155.556 2.744 0
|
||||||
|
"summer" "large" "medium" 8.15 10.4 189.567 5.011 162.944 135.778 219.278 2.859 0
|
||||||
|
"winter" "large" "high" 8.5 10.1 3 0.851 37.778 10.778 23.889 0.5 37.5
|
||||||
|
"winter" "large" "medium" 8.5 11.4 3 0.774 10.909 3.727 8.091 3.6 35.9
|
||||||
|
"spring" "large" "medium" 8.5 8.5 4.025 0.825 23.636 5.583 31.091 2.4 38.6
|
||||||
|
"autumn" "large" "medium" 8.4 11.43 4.966 0.969 24.111 6 18.167 2.133 34.7
|
||||||
|
"spring" "large" "high" 8.2 9.9 6.4 0.553 21.429 12 76.286 1.3 16
|
||||||
|
"autumn" "large" "high" 8 10.98 9.7 0.874 67.7 26.6 51.034 2.2 26.7
|
||||||
|
"autumn" "large" "low" 8.3 8.9 42.058 5.922 116.727 150.58299 220.72301 6.7 0
|
||||||
|
"spring" "large" "low" 8.7 6.8 16.889 2.139 30 37.111 85.444 23.033 2.2
|
||||||
|
"winter" "large" "medium" 8.6 10.4 15.182 2.502 140.909 31.909 77.7 15.318 4.8
|
||||||
|
"summer" "large" "medium" 8 9.1 15.375 2.118 43.75 48.875 86.5 8.125 0
|
||||||
|
"summer" "large" "medium" 8.2 9.5 17.875 2.363 63.75 44 77 8.463 1.5
|
||||||
|
"spring" "large" "medium" 8.5 9.6 16.545 3.849 103.273 34.273 63.4 14.682 18.8
|
||||||
|
"spring" "large" "medium" 8.04 9.3 130.263 3.776 131.008 97.5 152.966 6.15 0
|
||||||
|
"autumn" "large" "medium" 7.95 9.1 76.886 3.461 93.827 68.333 146.049 3.95 1.2
|
||||||
|
"autumn" "small" "high" 7.64 10.3 34.235 2.942 41.43 17 41.567 7.43 23.2
|
||||||
|
"winter" "small" "high" 7.92 8.5 10.867 1.715 199.54 3.222 27.2 1.9 74.2
|
||||||
|
"spring" "small" "high" 7.62 9.4 11.055 1.51 13.56 4 12.65 1.456 13
|
||||||
|
"summer" "medium" "high" 7.75 10.7 15.5 3.976 57.64 10.5 43.169 3.12 4.1
|
||||||
|
"winter" "small" "high" 7.08 8.4 9.45 1.572 26.54 4 13.6 0.675 29.7
|
||||||
|
"winter" "small" "high" 8.1 9.8 14.34 0.73 22.5 23 45.5 0.85 17.1
|
||||||
|
"spring" "large" "medium" 8.61 10.1 3.518 0.663 12.22 3.222 7 1.3 48.3
|
||||||
|
"summer" "large" "medium" 8.22 9.5 2.3 0.672 9.87 4 6.123 0.8 50.4
|
||||||
|
"summer" "large" "medium" 8.4 10 3.51 0.866 29.65 5.8 15 2.86 17.3
|
||||||
|
"summer" "medium" "high" 8.12 10.2 7.613 0.699 33.56 28.034 49.658 2.2 18.1
|
||||||
|
"winter" "large" "low" 8.7 11.7 21.4656 3.765 91.45 38 83 17 0
|
||||||
|
"summer" "large" "low" 8.1 8.2 26.54 2.805 42.75 48.5 88.125 13.98 0
|
||||||
|
"autumn" "large" "low" 8.35 11.1 22.56 3.14 76.2 41 98.665 17.456 1.7
|
|
@ -0,0 +1,114 @@
|
||||||
|
##########################################################################################################################
|
||||||
|
#
|
||||||
|
# PROBLEMS
|
||||||
|
#
|
||||||
|
##########################################################################################################################
|
||||||
|
#
|
||||||
|
# - Use GA search (using the ga() function in the GA package) to find the minimum of the real-valued function
|
||||||
|
# f(x) = abs(x) + cos(x). Restrict the search interval to [-20, 20]. Carefully define the fitness function,
|
||||||
|
# since the ga() can only maximize it!
|
||||||
|
#
|
||||||
|
##########################################################################################################################
|
||||||
|
#
|
||||||
|
# - Use GA search to find the minimum of the real-valued two-dimensional function
|
||||||
|
# f(x1, x2) = 20 + x1^2 + x2^2 - 10*(cos(2*pi*x1) + cos(2*pi*x2)), where x1 and x2 are from the interval [-5.12, 5.12].
|
||||||
|
#
|
||||||
|
##########################################################################################################################
|
||||||
|
#
|
||||||
|
# - We are given the following data:
|
||||||
|
#
|
||||||
|
# Substrate <- c(1.73, 2.06, 2.20, 4.28, 4.44, 5.53, 6.32, 6.68, 7.28, 7.90, 8.80, 9.14, 9.18, 9.40, 9.88)
|
||||||
|
# Velocity <- c(12.48, 13.97, 14.59, 21.25, 21.66, 21.97, 25.36, 22.93, 24.81, 25.63, 24.68, 29.04, 28.08, 27.32, 27.77)
|
||||||
|
#
|
||||||
|
# Use GA search to fit the data to the model:
|
||||||
|
# Velocity = (M * Substrate) / (K + Substrate), where M and K are the model parameters. Restrict the search interval
|
||||||
|
# for M to [40.0, 50.0] and for K to [3.0, 5.0].
|
||||||
|
#
|
||||||
|
##########################################################################################################################
|
||||||
|
#
|
||||||
|
# - Use a binary GA to select (sub)optimal attribute subset for a linear model:
|
||||||
|
#
|
||||||
|
# train.data <- read.table("AlgaeLearn.txt", header = T)
|
||||||
|
# test.data <- read.table("AlgaeTest.txt", header = T)
|
||||||
|
# lm.model <- lm(a1 ~., train.data)
|
||||||
|
#
|
||||||
|
##########################################################################################################################
|
||||||
|
library(GA)
|
||||||
|
|
||||||
|
# - Use GA search (using the ga() function in the GA package) to find the minimum of the real-valued function
|
||||||
|
# f(x) = abs(x) + cos(x). Restrict the search interval to [-20, 20]. Carefully define the fitness function,
|
||||||
|
# since the ga() can only maximize it!
|
||||||
|
f <- function(x) {
|
||||||
|
abs(x) + cos(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
curve(f, from=-20, to=20, n=1000)
|
||||||
|
|
||||||
|
# For the maximization of this function we may use f directly as the fitness function
|
||||||
|
GA <- ga(type = "real-valued", fitness = f, lower = -20, upper = 20)
|
||||||
|
|
||||||
|
# The object returned can be plotted
|
||||||
|
plot(GA)
|
||||||
|
summary(GA)
|
||||||
|
|
||||||
|
# plot the solution
|
||||||
|
curve(f, from = -20, to = 20, n = 1000)
|
||||||
|
points(GA@solution, f(GA@solution), col="red")
|
||||||
|
|
||||||
|
# - Use GA search to find the minimum of the real-valued two-dimensional function
|
||||||
|
# f(x1, x2) = 20 + x1^2 + x2^2 - 10*(cos(2*pi*x1) + cos(2*pi*x2)), where x1 and x2 are from the interval [-5.12, 5.12].
|
||||||
|
# https://stackoverflow.com/a/68635397
|
||||||
|
|
||||||
|
fitness_f <- function(x1, x2) {
|
||||||
|
20 + x1^2 + x2^2 - 10*(cos(2*pi*x1) + cos(2*pi*x2))
|
||||||
|
}
|
||||||
|
|
||||||
|
# For the maximization of this function we may use f directly as the fitness function
|
||||||
|
GA <- ga(type = "real-valued", fitness = function(x) -fitness_f(x[1], x[2]), lower = c(-5.12, -5.12), upper = c(5.12, 5.12), maxiter=200)
|
||||||
|
|
||||||
|
# The object returned can be plotted
|
||||||
|
plot(GA)
|
||||||
|
summary(GA)
|
||||||
|
|
||||||
|
# - We are given the following data:
|
||||||
|
#
|
||||||
|
# Substrate <- c(1.73, 2.06, 2.20, 4.28, 4.44, 5.53, 6.32, 6.68, 7.28, 7.90, 8.80, 9.14, 9.18, 9.40, 9.88)
|
||||||
|
# Velocity <- c(12.48, 13.97, 14.59, 21.25, 21.66, 21.97, 25.36, 22.93, 24.81, 25.63, 24.68, 29.04, 28.08, 27.32, 27.77)
|
||||||
|
#
|
||||||
|
# Use GA search to fit the data to the model:
|
||||||
|
# Velocity = (M * Substrate) / (K + Substrate), where M and K are the model parameters. Restrict the search interval
|
||||||
|
# for M to [40.0, 50.0] and for K to [3.0, 5.0].
|
||||||
|
|
||||||
|
Substrate <- c(1.73, 2.06, 2.20, 4.28, 4.44, 5.53, 6.32, 6.68, 7.28, 7.90, 8.80, 9.14, 9.18, 9.40, 9.88)
|
||||||
|
Velocity <- c(12.48, 13.97, 14.59, 21.25, 21.66, 21.97, 25.36, 22.93, 24.81, 25.63, 24.68, 29.04, 28.08, 27.32, 27.77)
|
||||||
|
|
||||||
|
# param[1] = M, param[2] = K
|
||||||
|
model <- function(params) {
|
||||||
|
(params[1] * Substrate) / (params[2] + Substrate)
|
||||||
|
}
|
||||||
|
|
||||||
|
fitness_f <- function(params) {
|
||||||
|
-sum((Substrate - model(params))^2)
|
||||||
|
}
|
||||||
|
|
||||||
|
GA2 <- ga(type = "real-valued", fitness = fitness_f, lower = c(40.0, 3.0), upper = c(50.0, 5.0),
|
||||||
|
popSize = 500, crossover = gareal_blxCrossover, maxiter = 5000, run = 200, names = c("M", "K"))
|
||||||
|
|
||||||
|
summary(GA2)
|
||||||
|
|
||||||
|
# Let's plot our solution
|
||||||
|
plot(Substrate, Velocity)
|
||||||
|
lines(Substrate, model(GA2@solution))
|
||||||
|
|
||||||
|
|
||||||
|
# - Use a binary GA to select (sub)optimal attribute subset for a linear model:
|
||||||
|
#
|
||||||
|
# train.data <- read.table("AlgaeLearn.txt", header = T)
|
||||||
|
# test.data <- read.table("AlgaeTest.txt", header = T)
|
||||||
|
# lm.model <- lm(a1 ~., train.data)
|
||||||
|
|
||||||
|
train.data <- read.table("./data/AlgaeLearn.txt", header = T)
|
||||||
|
test.data <- read.table("./data/AlgaeTest.txt", header = T)
|
||||||
|
lm.model <- lm(a1 ~., train.data)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,377 @@
|
||||||
|
# We are going to use the GA package
|
||||||
|
# Make sure that the package is installed.
|
||||||
|
# You install a package in R with the function install.packages():
|
||||||
|
#
|
||||||
|
# install.packages("GA")
|
||||||
|
library(GA)
|
||||||
|
#
|
||||||
|
# To install packages without root access:
|
||||||
|
#
|
||||||
|
# install.packages("GA", lib="/mylibs/Rpackages/") ## or some other path, e.g., C:\yourFolder
|
||||||
|
# library(GA, lib.loc="/mylibs/Rpackages/")
|
||||||
|
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# EXAMPLE 1: One-dimensional function optimization
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
|
# The asymmetric double claw is difficult to maximize because there are many local solutions.
|
||||||
|
# Standard derivative-based optimizers would simply climb up the hill closest to the starting value.
|
||||||
|
|
||||||
|
f <- function(x)
|
||||||
|
{
|
||||||
|
y <- (0.46 * (dnorm(x, -1, 2/3) + dnorm(x, 1, 2/3)) +
|
||||||
|
(1/300) * (dnorm(x, -0.5, 0.01) + dnorm(x, -1, 0.01) +
|
||||||
|
dnorm(x, -1.5, 0.01)) +
|
||||||
|
(7/300) * (dnorm(x, 0.5, 0.07) + dnorm(x, 1, 0.07) +
|
||||||
|
dnorm(x, 1.5, 0.07)))
|
||||||
|
|
||||||
|
y ### return(y)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Plot the double claw
|
||||||
|
curve(f, from = -3, to = 3, n = 1000)
|
||||||
|
|
||||||
|
# For the maximization of this function we may use f directly as the fitness function
|
||||||
|
GA <- ga(type = "real-valued", fitness = f, lower = -3, upper = 3)
|
||||||
|
|
||||||
|
# The object returned can be plotted
|
||||||
|
plot(GA)
|
||||||
|
summary(GA)
|
||||||
|
|
||||||
|
# plot the solution
|
||||||
|
curve(f, from = -3, to = 3, n = 1000)
|
||||||
|
points(GA@solution, f(GA@solution), col="red")
|
||||||
|
|
||||||
|
# The evolution of the population units and the corresponding functions values at each
|
||||||
|
# generation can be obtained by defining a new monitor function and then passing this
|
||||||
|
# function as an optional argument to ga
|
||||||
|
|
||||||
|
myMonitor <- function(obj)
|
||||||
|
{
|
||||||
|
curve(f, obj@lower, obj@upper, n = 1000, main = paste("iteration =", obj@iter))
|
||||||
|
points(obj@population, obj@fitness, pch = 20, col = 2)
|
||||||
|
rug(obj@population, col = 2)
|
||||||
|
Sys.sleep(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
GA <- ga(type = "real-valued", fitness = f, lower = -3, upper = 3, monitor = myMonitor)
|
||||||
|
|
||||||
|
## Inspect fitness across generations
|
||||||
|
plot(GA)
|
||||||
|
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# EXAMPLE 2: Model fitting
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
|
# We consider a data on the growth of trees
|
||||||
|
|
||||||
|
# The age at which the tree was measured
|
||||||
|
Age <- c(2.44, 12.44, 22.44, 32.44, 42.44, 52.44, 62.44, 72.44, 82.44, 92.44, 102.44, 112.44)
|
||||||
|
|
||||||
|
# The bole volume of the tree
|
||||||
|
Vol <- c(2.2, 20.0, 93.0, 262.0, 476.0, 705.0, 967.0, 1203.0, 1409.0, 1659.0, 1898.0, 2106.0)
|
||||||
|
|
||||||
|
plot(Age, Vol)
|
||||||
|
|
||||||
|
# An ecological model for the plant size (measured by volume) as a function of age is the Richards curve:
|
||||||
|
# f(x) = a*(1-exp(-b*x))^c, where a, b, in c are the model parameters
|
||||||
|
|
||||||
|
# Let's fit the Richards curve using genetic algorithms
|
||||||
|
|
||||||
|
# We first define our model function (argument params represents a vector of the parameters a, b, and c)
|
||||||
|
model <- function(params)
|
||||||
|
{
|
||||||
|
params[1] * (1 - exp(-params[2] * Age))^params[3]
|
||||||
|
}
|
||||||
|
|
||||||
|
# We define the fitness function as the sum of squares of the differences between estimated and observed data
|
||||||
|
myFitness2 <- function(params)
|
||||||
|
{
|
||||||
|
-sum((Vol - model(params))^2)
|
||||||
|
}
|
||||||
|
|
||||||
|
# The fitness function needs to be maximized with respect to the model's parameters, given the observed data in x and y.
|
||||||
|
# A blend crossover is used for improving the search over the parameter space: for two parents x1 and x2 (assume x1 < x2)
|
||||||
|
# it randomly picks a solution in the range [x1 - k*(x2-x1), x2 + k*(x2-x1)], where k represents a constant between 0 and 1.
|
||||||
|
|
||||||
|
|
||||||
|
# We restrict the search interval for a,b, and c to [1000.0, 5000.0], [0.0, 5.0], and [0.0, 5.0], respectively.
|
||||||
|
|
||||||
|
|
||||||
|
GA2 <- ga(type = "real-valued", fitness = myFitness2, lower = c(1000, 0, 0), upper = c(5000, 5, 5),
|
||||||
|
popSize = 500, crossover = gareal_blxCrossover, maxiter = 5000, run = 200, names = c("a", "b", "c"))
|
||||||
|
|
||||||
|
summary(GA2)
|
||||||
|
|
||||||
|
# Let's plot our solution
|
||||||
|
|
||||||
|
plot(Age, Vol)
|
||||||
|
lines(Age, model(GA2@solution))
|
||||||
|
|
||||||
|
|
||||||
|
# we can use a monitor function to plot the current solution
|
||||||
|
|
||||||
|
myMonitor2 <- function(obj)
|
||||||
|
{
|
||||||
|
i <- which.max(obj@fitness)
|
||||||
|
plot(Age, Vol)
|
||||||
|
lines(Age, model(obj@population[i,]), col="red")
|
||||||
|
title(paste("iteration =", obj@iter), font.main = 1)
|
||||||
|
Sys.sleep(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
GA2 <- ga(type = "real-valued", fitness = myFitness2, lower = c(1000, 0, 0), upper = c(5000, 5, 5),
|
||||||
|
popSize = 500, crossover = gareal_blxCrossover, maxiter = 5000, run = 200, names = c("a", "b", "c"), monitor=myMonitor2)
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# EXAMPLE 3: The Knapsack problem
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
|
# The Knapsack problem is defined as follows: given a set of items, each with a mass and a value, determine the subset
|
||||||
|
# of items to be included in a collection so that the total weight is less than or equal to a given limit and the total value
|
||||||
|
# is as large as possible.
|
||||||
|
|
||||||
|
# a vector of the items' values
|
||||||
|
values <- c(5, 8, 3, 4, 6, 5, 4, 3, 2)
|
||||||
|
|
||||||
|
# a vector of the item's weights
|
||||||
|
weights <- c(1, 3, 2, 4, 2, 1, 3, 4, 5)
|
||||||
|
|
||||||
|
# the knapsack capacity
|
||||||
|
Capacity <- 10
|
||||||
|
|
||||||
|
# A binary GA can be used to solve the knapsack problem. The solution to this problem is a binary string equal to the number
|
||||||
|
# of items where the ith bit is 1 if the ith item is in the subset and 0 otherwise. The fitness function should penalize
|
||||||
|
# unfeasible solutions.
|
||||||
|
|
||||||
|
knapsack <- function(x)
|
||||||
|
{
|
||||||
|
f <- sum(x * values)
|
||||||
|
w <- sum(x * weights)
|
||||||
|
|
||||||
|
if (w > Capacity)
|
||||||
|
f <- Capacity - w
|
||||||
|
|
||||||
|
f
|
||||||
|
}
|
||||||
|
|
||||||
|
GA3 <- ga(type = "binary", fitness = knapsack, nBits = length(weights), maxiter = 1000, run = 200, popSize = 100)
|
||||||
|
|
||||||
|
summary(GA3)
|
||||||
|
GA3@solution
|
||||||
|
|
||||||
|
#
|
||||||
|
# Example 4: ESTABLISHING A TIMETABLE
|
||||||
|
#
|
||||||
|
|
||||||
|
# A small football club has a youth team and a senior team. The player
|
||||||
|
# training program has seven components: stamina training, strength training,
|
||||||
|
# technique, tactics, psychological preparation, teamwork, and regeneration.
|
||||||
|
# Due to lack of funds, for each component, a single staff member is responsible
|
||||||
|
# for both the youth and the senior team, with the exceptions of tactics and
|
||||||
|
# stamina training, where two staff members are assigned, one to each team.
|
||||||
|
#
|
||||||
|
# The weekly training regime is summarized in the following table:
|
||||||
|
#
|
||||||
|
#+----------+---------------------+-----------------+-----------------+
|
||||||
|
#| Coach | Component | Senior team | Youth team |
|
||||||
|
#+----------+---------------------+-----------------+-----------------+
|
||||||
|
#| Anze | Strength training | 1 time a week | 1 time a week |
|
||||||
|
#| Bojan | Technique | 3 times a week | 3 times a week |
|
||||||
|
#| Ciril | Regeneration | 2 times a week | 2 times a week |
|
||||||
|
#| Dusan | Stamina training | doesn't conduct | 4 times a week |
|
||||||
|
#| Erik | Stamina training | 4 times a week | doesn't conduct |
|
||||||
|
#| Filip | Teamwork | 3 times a week | 3 times a week |
|
||||||
|
#| Gasper | Psychological prep. | 1 time a week | 1 time a week |
|
||||||
|
#| Hugo | Tactics | 1 time a week | doesn't conduct |
|
||||||
|
#| Iztok | Tactics | doesn't conduct | 1 time a week |
|
||||||
|
#+----------+---------------------+-----------------+-----------------+
|
||||||
|
#
|
||||||
|
# Training is performed from Monday to Friday in four different time slots:
|
||||||
|
# 8:00 - 10:00, 10:15 - 12:15, 14:00 - 16:00, and 16:15 - 18:15.
|
||||||
|
#
|
||||||
|
# Constraints:
|
||||||
|
#
|
||||||
|
# - each time slot can hold only one component for the youth team and one component
|
||||||
|
# for the senior team (the youth and senior teams train separately, so a single
|
||||||
|
# staff member can only train one of the two teams in a single time slot).
|
||||||
|
#
|
||||||
|
# - a team is not allowed to train the same component 2 or more times within one day.
|
||||||
|
#
|
||||||
|
# - the main purpose of the Tactics training component is to prepare the team for
|
||||||
|
# the upcoming match. Matches are usually played during the weekend, so Tactics
|
||||||
|
# training should be scheduled for Thursday in the 16:15 - 18:15 time slot.
|
||||||
|
#
|
||||||
|
# - after a match, the players need to rest. Therefore, there is no training in
|
||||||
|
# the Monday 8:00 - 10:00 time slot.
|
||||||
|
#
|
||||||
|
# - the stamina training coach Dusan is not available on Monday mornings
|
||||||
|
# (8:00 - 10:00 in 10:15 - 12:15 time slots)
|
||||||
|
#
|
||||||
|
# - there can be no Technique training on Wednesdays, because coach Bojan is
|
||||||
|
# not available.
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# Produce a training schedule that takes into account these two and all of
|
||||||
|
# the above restrictions!
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
|
# VARIABLES
|
||||||
|
#
|
||||||
|
# senior - number of sessions per component for the senior team
|
||||||
|
# youth - number of sessions per component for the youth team
|
||||||
|
# staff - coaching staff -> the staff's actual occupacy is solved for, how much a certain coach can handle is (hard) coded in senior and youth variables!
|
||||||
|
# slots - possible slots
|
||||||
|
|
||||||
|
senior = c(1, 3, 2, 0, 4, 3, 1, 1, 0)
|
||||||
|
youth = c(1, 3, 2, 4, 0, 3, 1, 0, 1)
|
||||||
|
slots = 4*5
|
||||||
|
|
||||||
|
valueBin <- function(timetable)
|
||||||
|
{
|
||||||
|
# organize data into a multi-dimensional array
|
||||||
|
# days, time slots, staff, teams
|
||||||
|
|
||||||
|
t <- array(as.integer(timetable), c(5,4,9,2))
|
||||||
|
|
||||||
|
violations <- 0
|
||||||
|
|
||||||
|
# check all the conditions
|
||||||
|
|
||||||
|
# check the number of sessions per component
|
||||||
|
for (i in 1:9)
|
||||||
|
{
|
||||||
|
violations <- violations + abs(sum(t[,,i,1]) - senior[i])
|
||||||
|
violations <- violations + abs(sum(t[,,i,2]) - youth[i])
|
||||||
|
}
|
||||||
|
|
||||||
|
# it is not allowed to train the same component 2 or more times within one day
|
||||||
|
for (i in 1:9)
|
||||||
|
{
|
||||||
|
violations <- violations + sum(apply(t[,,i,1], 1, sum) > 1)
|
||||||
|
violations <- violations + sum(apply(t[,,i,2], 1, sum) > 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
# a single staff member can only train one of the two teams in a single time slot
|
||||||
|
violations <- violations + sum(t[,,,1] == t[,,,2] & t[,,,1] != 0)
|
||||||
|
|
||||||
|
# each time slot can hold only one component for the youth team and one component
|
||||||
|
# for the senior team
|
||||||
|
for (i in 1:5)
|
||||||
|
for (j in 1:4)
|
||||||
|
{
|
||||||
|
violations <- violations + max(0, sum(t[i,j,,1]) - 1)
|
||||||
|
violations <- violations + max(0, sum(t[i,j,,2]) - 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Tactics training should be scheduled for Thursday in the 16:15 - 18:15 time slot
|
||||||
|
violations <- violations + (t[4,3,8,1] != 1)
|
||||||
|
violations <- violations + (t[4,3,9,2] != 1)
|
||||||
|
|
||||||
|
# there is no training in the Monday 8:00 - 10:00 time slot
|
||||||
|
violations <- violations + sum(t[1,1,,])
|
||||||
|
|
||||||
|
# the stamina training coach Dusan is not available on Monday mornings
|
||||||
|
violations <- violations + sum(t[1,1:2,4,] == 1)
|
||||||
|
|
||||||
|
# there can be no Technique training on Wednesdays
|
||||||
|
violations <- violations + sum(t[3,,2,] == 1)
|
||||||
|
|
||||||
|
-violations
|
||||||
|
}
|
||||||
|
|
||||||
|
myInitPopulation <- function(object)
|
||||||
|
{
|
||||||
|
p <- gabin_Population(object)
|
||||||
|
|
||||||
|
for (i in 1:nrow(p))
|
||||||
|
{
|
||||||
|
t <- array(p[i,], c(5,4,9,2))
|
||||||
|
|
||||||
|
# Tactics training on Thursdays in the 16:15 - 18:15 time slot
|
||||||
|
t[4,3,8,1]=1
|
||||||
|
t[4,3,9,2]=1
|
||||||
|
|
||||||
|
# there is no training in the Monday 8:00 - 10:00 time slot
|
||||||
|
t[1,1,,] = 0
|
||||||
|
|
||||||
|
# there is no Stamina training on Monday mornings
|
||||||
|
t[1,1:2,4,] = 0
|
||||||
|
|
||||||
|
# there is no Technique training on Wednesdays
|
||||||
|
t[3,,2,] = 0
|
||||||
|
|
||||||
|
p[i,] <- as.vector(t)
|
||||||
|
}
|
||||||
|
p
|
||||||
|
}
|
||||||
|
|
||||||
|
GA4 <- ga(type = "binary", fitness = valueBin, nBits = 4*5*9*2,
|
||||||
|
popSize = 500, maxiter = 10, run = 200, population = myInitPopulation)
|
||||||
|
|
||||||
|
|
||||||
|
timetable2 <- function(solution,coach,team){
|
||||||
|
t <- array(solution, c(5,4,9,2))
|
||||||
|
t[,,coach,team]
|
||||||
|
}
|
||||||
|
|
||||||
|
## timetable of a coach 2 for team 1.
|
||||||
|
t <- timetable2(GA4@solution[1,],2,1)
|
||||||
|
t
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# EXAMPLE 5: Traveling salesman problem
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
|
# Given a list of cities and the distances between each pair of cities, what is the shortest possible route that visits
|
||||||
|
# each city exactly once and returns to the origin city?
|
||||||
|
|
||||||
|
data("eurodist", package = "datasets")
|
||||||
|
D <- as.matrix(eurodist)
|
||||||
|
D
|
||||||
|
|
||||||
|
# An individual round tour is represented as a permutation of a default numbering of the cities defining the current order
|
||||||
|
# in which the cities are to be visited
|
||||||
|
|
||||||
|
# Calculation of the tour length
|
||||||
|
tourLength <- function(tour)
|
||||||
|
{
|
||||||
|
N <- length(tour)
|
||||||
|
|
||||||
|
dist <- 0
|
||||||
|
for (i in 2:N)
|
||||||
|
dist <- dist + D[tour[i-1],tour[i]]
|
||||||
|
|
||||||
|
dist <- dist + D[tour[N],tour[1]]
|
||||||
|
dist
|
||||||
|
}
|
||||||
|
|
||||||
|
# The fitness function to be maximized is defined as the reciprocal of the tour length.
|
||||||
|
tspFitness <- function(tour)
|
||||||
|
{
|
||||||
|
1/tourLength(tour)
|
||||||
|
}
|
||||||
|
|
||||||
|
GA5 <- ga(type = "permutation", fitness = tspFitness, lower = 1, upper = ncol(D), popSize = 50, maxiter = 5000, run = 500, pmutation = 0.2)
|
||||||
|
|
||||||
|
summary(GA5)
|
||||||
|
|
||||||
|
# Reconstruct the solution found
|
||||||
|
tour <- GA5@solution[1, ]
|
||||||
|
tour <- c(tour, tour[1])
|
||||||
|
|
||||||
|
tourLength(tour)
|
||||||
|
colnames(D)[tour]
|
||||||
|
|
|
@ -0,0 +1,46 @@
|
||||||
|
#######################################################################################################################
|
||||||
|
#
|
||||||
|
# PROBLEMS
|
||||||
|
#
|
||||||
|
#######################################################################################################################
|
||||||
|
#
|
||||||
|
# Load the Movies dataset using the command:
|
||||||
|
#
|
||||||
|
# md <- read.table("movies.txt", sep=",", header=TRUE)
|
||||||
|
#
|
||||||
|
# Answer the following questions:
|
||||||
|
#
|
||||||
|
# - Are there more movies shorter than 100 min or longer than (or equal to) 100 minutes?
|
||||||
|
# (show your answer numerically and graphically)
|
||||||
|
#
|
||||||
|
# - Are there more action comedies or romantic comedies?
|
||||||
|
#
|
||||||
|
# - Plot a histogram of the ratings for drama movies.
|
||||||
|
#
|
||||||
|
# - Is the average rating of dramas higher than the average rating of non-dramas?
|
||||||
|
# (show your answer numerically and graphically)
|
||||||
|
#
|
||||||
|
# - Plot the number of animated movies being produced every year for the period 1995-2005.
|
||||||
|
#
|
||||||
|
# - Is there a clear boundary between short and feature movies (according to their length)?
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#######################################################################################################################
|
||||||
|
#
|
||||||
|
# Load the Players dataset using the command:
|
||||||
|
#
|
||||||
|
# players <- read.table("players.txt", sep=",", header = T)
|
||||||
|
#
|
||||||
|
# - Plot the proportion of players according to playing positions.
|
||||||
|
#
|
||||||
|
# - Compare career rebounds (the "reb" attribute) with respect to playing position.
|
||||||
|
#
|
||||||
|
# - Show the distribution of free throw percentages.
|
||||||
|
# The percentage is determined by dividing the number of shots made ("ftm") by the total number of shots attempted ("fta").
|
||||||
|
#
|
||||||
|
# - Compare career 3-pointers made for the players active between 1990 and 2007, with respect to playing position.
|
||||||
|
#
|
||||||
|
# - How does the average career length of retired players vary from year to year?
|
||||||
|
#
|
||||||
|
#######################################################################################################################
|
||||||
|
|
|
@ -0,0 +1,326 @@
|
||||||
|
##############################################################################
|
||||||
|
#
|
||||||
|
# DATA VISUALIZATION
|
||||||
|
#
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
|
# Please download data files "movies.txt" and "players.txt" into a local directory
|
||||||
|
# then set that directory as the current working directory of R.
|
||||||
|
# You can achive this using the "setwd" command or by selecting "File -> Change dir..."
|
||||||
|
|
||||||
|
# for example:
|
||||||
|
# setwd("c:\\labs\\data\\")
|
||||||
|
library(ggplot2)
|
||||||
|
library(dplyr)
|
||||||
|
|
||||||
|
# To read data from a text file, use the "read.table" command.
|
||||||
|
# The parameter header=TRUE indicates that the file to be read includes a first line with the column names
|
||||||
|
md <- read.table(file="movies.txt", sep=",", header=TRUE)
|
||||||
|
|
||||||
|
# To get more information on any specific named function, type "?" followed by the function name
|
||||||
|
?read.table
|
||||||
|
|
||||||
|
# Useful functions
|
||||||
|
head(md)
|
||||||
|
summary(md)
|
||||||
|
str(md)
|
||||||
|
names(md)
|
||||||
|
|
||||||
|
|
||||||
|
# We will transform binary attributes into nominal variables with a fixed number of possible values (factors)
|
||||||
|
md$Action <- as.factor(md$Action)
|
||||||
|
md$Animation <- as.factor(md$Animation)
|
||||||
|
|
||||||
|
# The remaining columns will be transformed using the for loop
|
||||||
|
for (i in 20:24)
|
||||||
|
md[,i] <- as.factor(md[,i])
|
||||||
|
|
||||||
|
#
|
||||||
|
# Type conversion functions:
|
||||||
|
#
|
||||||
|
# as.numeric
|
||||||
|
# as.integer
|
||||||
|
# as.character
|
||||||
|
# as.logical
|
||||||
|
# as.factor
|
||||||
|
# as.ordered
|
||||||
|
#
|
||||||
|
# values that cannot be converted to the specified type will be converted to a NA value
|
||||||
|
#
|
||||||
|
|
||||||
|
# Binary attributes are now represented as factors
|
||||||
|
summary(md)
|
||||||
|
|
||||||
|
# Accessing data frame elements...
|
||||||
|
md[30,]
|
||||||
|
md[30,3]
|
||||||
|
md[30,"length"]
|
||||||
|
md[,3]
|
||||||
|
md$length
|
||||||
|
|
||||||
|
# Useful data visualization functions
|
||||||
|
plot(md$length)
|
||||||
|
hist(md$length)
|
||||||
|
plot(density(md$length))
|
||||||
|
boxplot(md$length)
|
||||||
|
barplot(table(md$Drama))
|
||||||
|
pie(table(md$mpaa))
|
||||||
|
|
||||||
|
## nicer plots with ggplot2 + dplyr
|
||||||
|
md %>% ggplot(aes(length)) + geom_histogram(bins = 40) + ggtitle("A genetic histogram") + xlab("Length")
|
||||||
|
|
||||||
|
## plotting w.r.t. multiple mpaa categories
|
||||||
|
md %>% ggplot(aes(length,fill = mpaa)) + geom_density(alpha = 0.2)
|
||||||
|
|
||||||
|
## What about a nicer boxplot w.r.t mpaa?
|
||||||
|
## theme_bw() is more neutral theme
|
||||||
|
md %>% ggplot(aes(Drama, rating, color = mpaa)) + geom_boxplot() + theme_bw()
|
||||||
|
|
||||||
|
## show table view
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
#
|
||||||
|
# EXAMPLE 1: What is the proportion of comedies to other genres in our data set?
|
||||||
|
#
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# the table() command gives the frequency of values in the vector
|
||||||
|
table(md$Comedy)
|
||||||
|
|
||||||
|
# the proportion of comedies can be plotted
|
||||||
|
barplot(table(md$Comedy))
|
||||||
|
pie(table(md$Comedy))
|
||||||
|
|
||||||
|
|
||||||
|
# it is important to always label graphs ...
|
||||||
|
|
||||||
|
tab <- table(md$Comedy)
|
||||||
|
names(tab) <- c("Other genres", "Comedies")
|
||||||
|
tab
|
||||||
|
pie(tab)
|
||||||
|
|
||||||
|
sum(tab)
|
||||||
|
|
||||||
|
barplot(tab, ylab="Number of titles", main="Proportion of comedies to other genres")
|
||||||
|
barplot(tab / sum(tab) * 100, ylab="Percentage of titles", main="The proportion of comedies to other genres")
|
||||||
|
|
||||||
|
pie(tab, main = "Proportion of comedies to other genres")
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
#
|
||||||
|
# EXAMPLE 2: How are ratings distributed for comedies?
|
||||||
|
#
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# Plot the rating distribution for comedies
|
||||||
|
hist(md[md$Comedy == "1", "rating"], xlab="Rating", ylab="Frequency", main="Histogram of ratings for comedies")
|
||||||
|
|
||||||
|
# Box plots provide a visual display of the range and potential skewness of the data
|
||||||
|
boxplot(md[md$Comedy == "1", "rating"], ylab="Rating", main="Boxplot of ratings for comedies")
|
||||||
|
|
||||||
|
quantile(md$rating[md$Comedy == 1])
|
||||||
|
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
#
|
||||||
|
# EXAMPLE 3: Are comedies on average better rated than non-comedies?
|
||||||
|
#
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# Select comedies
|
||||||
|
comedy <- md$Comedy == "1"
|
||||||
|
|
||||||
|
# Calculate the mean rating value for comedies and non-comedies
|
||||||
|
mean(md[comedy,"rating"])
|
||||||
|
mean(md[!comedy,"rating"])
|
||||||
|
|
||||||
|
# Comedies have, on average, higher ratings than non-comedies
|
||||||
|
|
||||||
|
# Side-by-side boxplots of ratings grouped by values of the attribute "Comedy"
|
||||||
|
boxplot(rating ~ Comedy, data=md)
|
||||||
|
boxplot(rating ~ Comedy, data=md, names=c("Other genres", "Comedies"), ylab="Rating", main="Comparison of ratings between comedies and non-comedies")
|
||||||
|
|
||||||
|
## or with dplyr directly
|
||||||
|
md %>% group_by(Comedy) %>% select(rating) %>% summarise(mean(rating))
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
#
|
||||||
|
# EXAMPLE 4: What is the proportion of comedies (per year) from 1990 onwards?
|
||||||
|
#
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
sel <- md$year >= 1990
|
||||||
|
|
||||||
|
# the table() command can be used to get a two-way contigency table
|
||||||
|
table(md$Comedy[sel], md$year[sel])
|
||||||
|
|
||||||
|
table(md$year[sel])
|
||||||
|
|
||||||
|
tabcomedy <- table(md$Comedy[sel], md$year[sel])
|
||||||
|
tabyear <- table(md$year[sel])
|
||||||
|
tabcomedy[2,]/tabyear
|
||||||
|
|
||||||
|
ratio <- tabcomedy[2,]/tabyear
|
||||||
|
barplot(ratio, xlab="Year", ylab="Relative frequency", main="Proportion of comedies")
|
||||||
|
|
||||||
|
plot(x=names(ratio), y=as.vector(ratio), type="l", xlab="Year", ylab="Relative frequency", main="Proportion of comedies, 1990-2005")
|
||||||
|
|
||||||
|
## or with dplyr directly
|
||||||
|
md %>% filter(year >= 1990) %>%
|
||||||
|
group_by(year, Comedy) %>%
|
||||||
|
summarise(n = n()) %>% mutate(freq = n / sum(n)) %>%
|
||||||
|
filter(Comedy == 1) %>% select(year, freq) %>%
|
||||||
|
ggplot(aes(year, freq)) + geom_point() + ggtitle("Frequency of comedies") + ylab("Frequency") + xlab("Year") + geom_line() + theme_bw()
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
#
|
||||||
|
# EXAMPLE 5: Are there more movies above or below the average rating?
|
||||||
|
#
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# the average rating
|
||||||
|
mean(md$rating)
|
||||||
|
|
||||||
|
# how many movies are above the average rating?
|
||||||
|
tab <- table(md$rating > mean(md$rating))
|
||||||
|
tab
|
||||||
|
|
||||||
|
names(tab) <- c("below", "above")
|
||||||
|
barplot(tab, ylab="Number of titles", main="Proportion of movies above and below the average rating")
|
||||||
|
pie(tab, main="Proportion of movies above and below the average rating")
|
||||||
|
|
||||||
|
|
||||||
|
# Box plots provide a summarization of the variable distribution
|
||||||
|
boxplot(md$rating, ylab="Rating", main="Boxplot of movie ratings")
|
||||||
|
|
||||||
|
# The horizontal line inside the box represents the median rating value
|
||||||
|
|
||||||
|
# Let's plot the mean value...
|
||||||
|
abline(h=mean(md$rating))
|
||||||
|
|
||||||
|
## or with dplyr + ggplot
|
||||||
|
md %>% mutate(mRate = mean(rating)) %>%
|
||||||
|
mutate(indicator = ifelse(rating - mRate > 0, "above", "below")) %>%
|
||||||
|
group_by(indicator) %>%
|
||||||
|
summarise(counts = n()) %>%
|
||||||
|
ggplot(aes(indicator, counts, fill = indicator))+ geom_bar(stat = "identity")
|
||||||
|
|
||||||
|
# The mean differs from the median so the distribution is skewed.
|
||||||
|
# We can conclude that there are more cases above the mean value.
|
||||||
|
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
#
|
||||||
|
# EXAMPLE 6: Do movies with bigger budgets get higher ratings?
|
||||||
|
#
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# there are missing values in the budget attribute
|
||||||
|
summary(md$budget)
|
||||||
|
|
||||||
|
is.na(md$budget)
|
||||||
|
table(is.na(md$budget))
|
||||||
|
which(is.na(md$budget))
|
||||||
|
|
||||||
|
# select complete observations only
|
||||||
|
sel <- is.na(md$budget)
|
||||||
|
mdsub <- md[!sel,]
|
||||||
|
|
||||||
|
nrow(mdsub)
|
||||||
|
summary(mdsub$budget)
|
||||||
|
|
||||||
|
|
||||||
|
plot(mdsub$budget, mdsub$rating, xlab="Budget in $", ylab="Rating", main="Movie rating vs budget")
|
||||||
|
|
||||||
|
# Plotted points are mostly located in the upper left part of the diagram,
|
||||||
|
# which means that a higher budget usually leads to a higher rating
|
||||||
|
|
||||||
|
# Utilization of the budget in terms of rating
|
||||||
|
ratio <- mdsub$budget/mdsub$rating
|
||||||
|
hist(ratio)
|
||||||
|
|
||||||
|
# Which movie has the worst budget utilization?
|
||||||
|
mdsub[which.max(ratio),]
|
||||||
|
|
||||||
|
|
||||||
|
# Let's discretize these budgets to:
|
||||||
|
# low (less than 1M), mid (between 1M and 50M) and big (more than 50M)
|
||||||
|
|
||||||
|
disbudget <- cut(mdsub$budget, c(0, 1000000, 50000000, 500000000), labels=c("low", "mid", "big"))
|
||||||
|
barplot(table(disbudget)/length(disbudget), xlab="Budget", ylab="Relative frequency", main="Proportion of movies vs budget")
|
||||||
|
|
||||||
|
# Side-by-side boxplots of ratings grouped by budget values
|
||||||
|
boxplot(mdsub$rating ~ disbudget, xlab="Budget", ylab="Rating", main="Boxplot of movie rating vs budget")
|
||||||
|
|
||||||
|
## Is this dependent on the mpaa?
|
||||||
|
## or with dplyr + ggplot + adding votes
|
||||||
|
md %>% select(budget, rating, votes, mpaa) %>%
|
||||||
|
na.omit() %>%
|
||||||
|
ggplot(aes(budget, rating, color = votes, fill = mpaa)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x) + theme_bw()
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
#
|
||||||
|
# EXAMPLE 7:
|
||||||
|
# What is the cumulative movie budget for each year from 1990 to 2000?
|
||||||
|
# What is the average movie budget for each year from 1990 to 2000?
|
||||||
|
# (consider only those movies for witch information on the budget is available!)
|
||||||
|
#
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# Select the movies that contain information on their budgets
|
||||||
|
sel <- !is.na(md$budget) & md$year >= 1990 & md$year <= 2000
|
||||||
|
|
||||||
|
# We can calculate cumulative budget for each year using the "aggregate" function
|
||||||
|
|
||||||
|
# Data overflow problem!
|
||||||
|
aggregate(budget ~ year, data = md[sel,], sum)
|
||||||
|
|
||||||
|
# The budget values are represented as integers
|
||||||
|
typeof(md$budget)
|
||||||
|
|
||||||
|
# In order to avoid the overflow problem we have to convert
|
||||||
|
# the budget values into a double-precision representation (using the as.double() command)
|
||||||
|
aggregate(as.double(budget) ~ year, data = md[sel,], sum)
|
||||||
|
|
||||||
|
sum.budget <- aggregate(as.double(budget) ~ year, data = md[sel,], sum)
|
||||||
|
plot(sum.budget, type="l", xlab="Year", ylab="Cumulative budget in $", main="Cumulative movie budget per year")
|
||||||
|
|
||||||
|
avg.budget <- aggregate(as.double(budget) ~ year, data = md[sel,], mean)
|
||||||
|
plot(avg.budget, type="l", xlab="Year", ylab="Average budget in $", main="Average movie budget per year")
|
||||||
|
|
||||||
|
## or with dplyr
|
||||||
|
md %>% select(budget, year) %>% na.omit() %>%
|
||||||
|
group_by(year) %>% summarise(budget2 = sum(as.numeric(budget))) %>%
|
||||||
|
arrange(year) %>% mutate(csum = cumsum(budget2)) %>%
|
||||||
|
ggplot(aes(year, csum)) + geom_bar(stat = "identity") + theme_bw()
|
||||||
|
|
||||||
|
##############################################################################
|
||||||
|
#
|
||||||
|
# EXAMPLE 8: (players dataset)
|
||||||
|
# What is the average height for each season in the period from 1970 to 2000?
|
||||||
|
#
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
|
# Load the Players dataset
|
||||||
|
players <- read.table("players.txt", sep=",", header = T)
|
||||||
|
summary(players)
|
||||||
|
|
||||||
|
# Create an empty vector
|
||||||
|
h <- vector()
|
||||||
|
|
||||||
|
# Use a for loop to go through each year in the period from 1970 to 2000
|
||||||
|
for (y in 1970:2000)
|
||||||
|
{
|
||||||
|
# Select active players in that year
|
||||||
|
sel <- players$firstseason <= y & players$lastseason >= y
|
||||||
|
|
||||||
|
# Append the resulting vector with the mean height for the current year
|
||||||
|
h <- c(h, mean(players$height[sel]))
|
||||||
|
}
|
||||||
|
|
||||||
|
# plot the resulting vector (use type="l" for lines)
|
||||||
|
plot(1970:2000, h, type="l", xlab="Year", ylab="Height in cm", main="Average height in NBA")
|
||||||
|
|
||||||
|
## or with dplyr
|
||||||
|
dfx <- data.frame(year = 1970:2000,mh = h)
|
||||||
|
dfx %>% ggplot(aes(year, mh)) + geom_point() + geom_smooth(method = "loess") + theme_bw() + xlab("Year") + ylab("Mean height")
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue