Zacetek na v3

main
Gasper Spagnolo 2022-10-24 15:50:54 +02:00
parent b4c1542846
commit a1083289a3
11 changed files with 9261 additions and 1 deletions

View File

@ -44,7 +44,8 @@
# #
x <- c(1, -2, 3, -4, 5, -6, 7, -8) x <- c(1, -2, 3, -4, 5, -6, 7, -8)
x[x < 0] <- 0 x[x < 0] <- 0
#x x[x >= 0] <- x[] * 10
x
# Edit the vector x as follows. Replace all elements with a negative value # Edit the vector x as follows. Replace all elements with a negative value
# with 0. Multiply the elements with a positive value by 10. # with 0. Multiply the elements with a positive value by 10.

3
v2/.Rhistory Normal file
View File

@ -0,0 +1,3 @@
train.data <- read.table("./data/AlgaeLearn.txt", header = T)
test.data <- read.table("./data/AlgaeTest.txt", header = T)
lm.model <- lm(a1 ~., train.data)

BIN
v2/Rplots.pdf Normal file

Binary file not shown.

185
v2/data/AlgaeLearn.txt Normal file
View File

@ -0,0 +1,185 @@
"season" "size" "speed" "mxPH" "mnO2" "Cl" "NO3" "NH4" "oPO4" "PO4" "Chla" "a1"
"winter" "small" "medium" 8 9.8 60.8 6.238 578 105 170 50 0
"spring" "small" "medium" 8.35 8 57.75 1.288 370 428.75 558.75 1.3 1.4
"autumn" "small" "medium" 8.1 11.4 40.02 5.33 346.66699 125.667 187.05701 15.6 3.3
"spring" "small" "medium" 8.07 4.8 77.364 2.302 98.182 61.182 138.7 1.4 3.1
"autumn" "small" "medium" 8.06 9 55.35 10.416 233.7 58.222 97.58 10.5 9.2
"winter" "small" "high" 8.25 13.1 65.75 9.248 430 18.25 56.667 28.4 15.1
"summer" "small" "high" 8.15 10.3 73.25 1.535 110 61.25 111.75 3.2 2.4
"autumn" "small" "high" 8.05 10.6 59.067 4.99 205.66701 44.667 77.434 6.9 18.2
"winter" "small" "medium" 8.7 3.4 21.95 0.886 102.75 36.3 71 5.544 25.4
"winter" "small" "high" 7.93 9.9 8 1.39 5.8 27.25 46.6 0.8 17
"spring" "small" "high" 7.7 10.2 8 1.527 21.571 12.75 20.75 0.8 16.6
"summer" "small" "high" 7.45 11.7 8.69 1.588 18.429 10.667 19 0.6 32.1
"winter" "small" "high" 7.74 9.6 5 1.223 27.286 12 17 41 43.5
"summer" "small" "high" 7.72 11.8 6.3 1.47 8 16 15 0.5 31.1
"winter" "small" "high" 7.9 9.6 3 1.448 46.2 13 61.6 0.3 52.2
"autumn" "small" "high" 7.55 11.5 4.7 1.32 14.75 4.25 98.25 1.1 69.9
"winter" "small" "high" 7.78 12 7 1.42 34.333 18.667 50 1.1 46.2
"spring" "small" "high" 7.61 9.8 7 1.443 31.333 20 57.833 0.4 31.8
"summer" "small" "high" 7.35 10.4 7 1.718 49 41.5 61.5 0.8 50.6
"spring" "small" "medium" 7.79 3.2 64 2.822 8777.59961 564.59998 771.59998 4.5 0
"winter" "small" "medium" 7.83 10.7 88 4.825 1729 467.5 586 16 0
"spring" "small" "high" 7.2 9.2 0.8 0.642 81 15.6 18 0.5 15.5
"autumn" "small" "high" 7.75 10.3 32.92 2.942 42 16 40 7.6 23.2
"winter" "small" "high" 7.62 8.5 11.867 1.715 208.33299 3 27.5 1.7 74.2
"spring" "small" "high" 7.84 9.4 10.975 1.51 12.5 3 11.5 1.5 13
"summer" "small" "high" 7.77 10.7 12.536 3.976 58.5 9 44.136 3 4.1
"winter" "small" "high" 7.09 8.4 10.5 1.572 28 4 13.6 0.5 29.7
"winter" "small" "high" 8 9.8 16 0.73 20 26 45 0.8 17.1
"spring" "small" "high" 7.2 11.3 9 0.23 120 12 19 0.5 33.9
"autumn" "small" "high" 7.4 12.5 13 3.33 60 72 142 4.9 3.4
"winter" "small" "high" 8.1 10.3 26 3.78 60 246 304 2.8 6.9
"summer" "small" "high" 7.8 11.3 20.083 3.02 49.5 53 130.75 5.8 0
"autumn" "small" "medium" 8.4 9.9 34.5 2.818 3515 20 47 2.3 13.6
"winter" "small" "medium" 8.27 7.8 29.2 0.05 6400 7.4 23 0.9 5.3
"summer" "small" "medium" 8.66 8.4 30.523 3.444 1911 58.875 84.46 3.6 18.3
"winter" "small" "high" 8.3 10.9 1.17 0.735 13.5 1.625 3 0.2 66
"winter" "small" "medium" 8.3 8.9 20.625 3.414 228.75 196.62 253.25 12.32 2
"spring" "small" "medium" 8.1 10.5 22.286 4.071 178.57001 182.42 255.28 8.957 2.2
"winter" "small" "medium" 8 5.5 77 6.096 122.85 143.71001 296 3.7 0
"summer" "small" "medium" 8.15 7.1 54.19 3.829 647.57001 59.429 175.04601 13.2 0
"winter" "small" "high" 8.3 7.7 50 8.543 76 264.89999 344.60001 22.5 0
"spring" "small" "high" 8.3 8.8 54.143 7.83 51.429 276.85001 326.85699 11.84 4.1
"winter" "small" "high" 8.4 13.4 69.75 4.555 37.5 10 40.667 3.9 51.8
"spring" "small" "high" 8.3 12.5 87 4.87 22.5 27 43.5 3.3 29.5
"autumn" "small" "high" 8 12.1 66.3 4.535 39 16 39 0.8 54.4
"spring" "small" "medium" 7.6 9.6 15 3.02 40 27 121 2.8 89.8
"autumn" "small" "medium" 7.29 11.21 17.75 3.07 35 13 20.812 12.1 24.8
"winter" "small" "medium" 7.6 10.2 32.3 4.508 192.5 12.75 49.333 7.9 0
"summer" "small" "medium" 8 7.9 27.233 1.651 28.333 7.3 22.9 4.5 39.1
"winter" "small" "high" 7.9 11 6.167 1.172 18.333 7.75 11.8 0.5 81.9
"spring" "small" "high" 7.9 9 5.273 0.91 33.636 9 11.818 0.8 54
"spring" "small" "high" 7.57 10.8 4.575 1.203 27.5 2 6.75 1 20.3
"summer" "small" "high" 7.19 11.7 4.326 1.474 160 2.5 7.2 0.3 15.8
"winter" "small" "high" 7.44 10.1 2.933 0.77 15 1.333 6 0.6 55.5
"spring" "small" "high" 7.14 9.8 3.275 0.923 15 1.25 10.75 2.5 10.3
"summer" "small" "high" 7 12.1 3.136 1.208 16.2 1.8 2.5 0.5 64.2
"winter" "small" "medium" 7.5 1.5 32.4 0.921 1386.25 220.75 351.60001 10 0
"spring" "small" "medium" 7.5 1.8 29.775 1.051 2082.8501 209.85699 313.60001 1 1.9
"summer" "small" "medium" 7.8 7.1 32.54 1.72 2167.37012 151.125 279.06601 13.1 25.5
"autumn" "medium" "medium" 8.5 8.1 38.125 3.85 225 45 152.33299 5.2 11.3
"summer" "medium" "medium" 7.925 10.2 34.037 9.08 109 55 58.623 11.6 4.4
"winter" "medium" "medium" 8.1 8.1 136 3.773 245 136.75 249.25 20.87 1.9
"spring" "medium" "medium" 8.2 6.8 129.375 3.316 271.25 100 233.5 13 1.6
"spring" "medium" "high" 9.1 9.4 35.75 5.164 32.5 85.5 215.5 18.37 2.2
"autumn" "medium" "medium" 8.1 9.8 29.5 1.287 224.286 25.167 102.333 3.6 64.9
"winter" "medium" "medium" 8 5.9 27.4 0.735 133.636 36 105.727 3 15.1
"spring" "medium" "medium" 8 3.3 26.76 0.658 165 37.375 111.375 3 14.4
"winter" "medium" "high" 7.5 9.2 11 3.31 101 26.6 108 1.3 6.7
"spring" "medium" "high" 7.4 9.8 11 3.235 255 38.75 56.667 2 10.8
"autumn" "medium" "high" 7.3 11.7 10.4 4.93 130 10.8 60 4.3 1.2
"winter" "medium" "high" 7.4 8.9 13.5 5.442 123.333 27.667 104 21 12.6
"summer" "medium" "high" 7.4 11.17 12.146 6.188 89.6 32 69.93 3.1 14.7
"autumn" "medium" "medium" 7.5 10.8 31 4.408 737.5 111.25 214 2.9 3.3
"winter" "medium" "medium" 7.6 6 53 3.734 914 137.60001 254.60001 4.3 0
"summer" "medium" "medium" 7.4 10.77 36.248 3.73 429.20001 57.6 169.00101 3.2 2.8
"winter" "medium" "medium" 7.8 3.6 48.667 4.03 5738.33008 412.33301 607.16699 4.3 0
"summer" "medium" "medium" 7.6 9.7 53.102 7.16 4073.33008 282.16699 624.73297 6.8 0
"winter" "medium" "medium" 8.5 8.6 125.6 3.778 124.167 197.83299 303.33301 40 0
"spring" "medium" "medium" 8.7 9.4 173.75 3.318 101.25 267.75 391.75 3.5 0
"summer" "medium" "medium" 8.1 10.7 94.405 4.698 153 191.75 265.25 7.3 0
"winter" "medium" "high" 8.8 8.5 53.333 5.132 96.667 120.5 232.83299 31 1.2
"spring" "medium" "high" 7.8 10.5 70 2.443 98.333 144.66701 244 9 0
"summer" "medium" "high" 7.9 11.8 63.51 4.94 137 159.5 218 6.5 0
"autumn" "medium" "low" 8.5 10.5 56.717 0.33 215.714 23 138.5 20.829 5.7
"winter" "medium" "low" 9.1 5.4 61.05 0.308 105.556 104.222 239 72.478 3.6
"spring" "medium" "low" 8.9 4.5 57.75 0.267 155 97.333 235.66701 98.817 1.2
"winter" "medium" "high" 7.9 6.3 101.875 3.978 153.75 51.75 205.875 2 4
"summer" "medium" "high" 7.8 8.2 85.982 6.2 421.66699 31.333 211.66701 21.9 5.9
"winter" "medium" "medium" 7.7 7.1 63.625 3.14 122.5 28.625 186.5 30 16.5
"spring" "medium" "medium" 7.8 6.5 82.111 2.603 215.556 12.889 154.125 5.2 7
"winter" "medium" "low" 7.7 5.3 65.333 2.899 371.11099 51.111 183.66701 17.2 58.7
"summer" "medium" "low" 7.5 8.8 58.331 8.688 758.75 104.5 292.625 3 8.7
"autumn" "medium" "low" 7.6 10 49.625 5.456 308.75 38.625 285.71399 75 17
"winter" "medium" "low" 8.7 7.4 47.778 2.316 38.111 24.667 201.778 3 12.3
"summer" "medium" "low" 7.7 11.1 47.229 8.759 239 54 275.14301 65.7 8.8
"autumn" "medium" "high" 8.3 11.1 41.5 4.665 931.83301 39 124.2 13.1 23.7
"winter" "medium" "high" 8.43 6 40.167 2.67 723.66699 60.833 141.83299 25 0
"summer" "medium" "high" 8.16 11.1 32.056 5.694 461.875 71 132.54601 15 3.6
"winter" "medium" "high" 8.7 9.8 5.889 1.534 51.111 9.667 17.333 1 64.3
"spring" "medium" "high" 8.2 11.3 7.25 1.875 25 6.5 26 0.3 46.6
"summer" "medium" "high" 8.5 11.8 7.838 1.732 206.53799 8.692 16.662 2.1 24
"spring" "medium" "medium" 7.8 6 53.425 0.381 118.571 37.857 102.571 1.2 3.7
"summer" "medium" "medium" 8 9.7 57.848 0.461 217.75 37 86.997 3 18.1
"summer" "medium" "high" 8.6 11.62 1.549 0.445 25.833 16.833 18.293 1.4 43.7
"autumn" "medium" "medium" 8.3 11.6 5.83 0.701 12.727 3.545 13.2 3.2 86.6
"spring" "medium" "low" 8.4 5.3 74.667 3.9 131.66701 261.60001 432.909 24.917 1.9
"summer" "medium" "low" 8.2 6.6 131.39999 4.188 92 238.2 320.39999 6.8 1.2
"winter" "medium" "medium" 8.2 9.4 45.273 7.195 345.45499 144 287 9.882 1.4
"spring" "medium" "medium" 8.1 7.1 42.636 5.078 56.364 166.72701 262.72699 17.2 1.6
"summer" "medium" "medium" 8.1 9 48.429 6.64 128.571 181 222.286 6.429 3.3
"winter" "medium" "high" 7.4 10.7 11.818 2.163 170.909 36.909 122 5.555 14.6
"spring" "medium" "high" 8.3 9.7 10.556 1.921 65.556 61.556 127.222 5.233 1.7
"summer" "medium" "high" 8.6 10.7 12 2.231 43.75 62.625 89.625 2.15 3.3
"winter" "medium" "medium" 9.1 11.6 31.091 5.099 246.364 55 284 88.255 0
"spring" "medium" "medium" 9 6.9 28.333 2.954 76.667 102.333 277.33301 110.456 0
"summer" "medium" "medium" 8.3 10 30.125 3.726 102.5 75.875 177.625 50.225 1.5
"winter" "medium" "high" 8.5 10.1 10.936 1.335 236 34.636 72.9 11.1 4.2
"spring" "medium" "high" 8.3 7.7 10.078 1.212 103.333 48.667 82.444 2 4.1
"summer" "medium" "high" 7.3 10.5 11.088 1.374 92.375 48.625 66.75 3.3 1.2
"winter" "medium" "medium" 7.9 9.8 194.75 6.513 3466.65991 23 173.75 15.3 0
"spring" "medium" "medium" 7.9 8.3 391.5 6.045 380 173 317 5.5 2.4
"autumn" "medium" "medium" 8 11.9 130.67 6.54 196 75 84 4.5 7.8
"spring" "medium" "medium" 8 9.2 39 4.86 120 187 213 2 10.3
"autumn" "medium" "medium" 8.1 11.7 35.66 5.13 46.5 49 88.5 2.5 1.5
"winter" "medium" "low" 8.43 9.9 37.6 0.826 124 32.5 115 11.7 9.2
"summer" "medium" "low" 8.1 6.2 39 0.673 112.857 60 98.143 2 28.1
"winter" "medium" "medium" 7.9 11.2 49.9 9.773 505 67.5 143.75 5.45 2.1
"summer" "medium" "medium" 8.1 6.2 51.113 5.099 175 132.5 197.14301 6.4 1.4
"spring" "medium" "high" 7.8 9.5 8.3 1.67 34 16.8 35.2 1 19
"autumn" "medium" "high" 7.9 10.5 10.207 2.304 132.25 10.583 23.485 2 42.5
"winter" "medium" "low" 8 4.5 79.077 8.984 920 70 200.231 19.4 2.5
"spring" "medium" "low" 7.6 6.3 81.333 9.715 196.66701 77.333 147.83299 3 4.4
"autumn" "medium" "low" 7.8 6.5 64.093 7.74 1990.16003 47.5 276 8.1 6.5
"winter" "medium" "high" 8.22 8.1 41.25 1.415 172.5 46.667 123.333 30.4 39.7
"autumn" "medium" "high" 8.3 9.9 40.226 1.587 235 33.8 75.207 23.8 32.8
"winter" "medium" "high" 8.47 9 46.167 2.102 84.667 48 116.2 7.3 12.2
"spring" "medium" "high" 8.4 4.9 47 0.536 91.833 109 188.66701 32 1.9
"autumn" "medium" "high" 8.87 11 41.163 2.273 54.75 39 72.696 22.7 0
"summer" "medium" "high" 7.7 4.4 53 2.31 90 22.2 116.2 16 0
"autumn" "medium" "high" 7.3 11.8 44.205 45.65 24064 44 34 53.1 2.2
"spring" "medium" "medium" 7.9 6 127.833 2.68 176.66701 27.5 76.333 2.1 3.4
"autumn" "medium" "medium" 7.8 10.53 100.83 5.41 486.5 24 58.374 27.5 2.8
"spring" "large" "low" 7.8 3.2 94 4.908 1131.66003 175.66701 361 28.567 24.8
"summer" "large" "low" 7.6 4.9 69 3.685 1495 234.5 236 22.5 32.5
"spring" "large" "low" 8.6 3.6 50 0.376 134 54.1 125.8 26.8 0
"autumn" "large" "low" 8.4 10.6 19.22 1.655 96.833 20.667 54.916 20.6 0
"winter" "large" "low" 8.3 11.5 26 1.87 62.5 30.75 75.333 34.75 0
"spring" "large" "low" 9.5 5.7 44 0.102 146.66701 151.33299 252.5 93.683 12.3
"summer" "large" "low" 8.8 8.8 43 0.13 103.333 180.66701 269.66699 92.667 7.2
"autumn" "large" "low" 8.84 12.9 43.09 0.846 52.2 8.6 46.438 81.54 3.4
"winter" "large" "high" 7.3 9.9 16 4.82 101.667 14.667 85 2 0
"autumn" "large" "high" 7.4 10.68 22.35 5.414 244.60001 66.4 171.272 3.8 1.1
"spring" "large" "low" 9.1 4.3 82.857 0.86 137.27299 102.364 232.89999 54.367 0
"autumn" "large" "low" 8.53 11.1 63.292 1.726 227.60001 84.3 146.452 21.22 1.4
"winter" "large" "low" 8.56 8.7 43.97 4.053 643 221.89999 246.66701 14.7 12.5
"autumn" "large" "low" 8.06 8.3 38.902 3.678 627.27301 205.636 219.909 6.209 0
"winter" "large" "medium" 8.24 6.1 95.367 3.561 1168 236.39999 272.22198 20.578 2.5
"summer" "large" "medium" 7.91 6.2 151.83299 3.923 1081.66003 346.16699 388.16699 5.083 1.7
"winter" "large" "medium" 8.21 9.3 104.818 3.908 124.364 82.222 167.89999 5.609 1.4
"spring" "large" "medium" 8.5 7.3 71.444 2.512 66.667 64.389 137.778 9.384 0
"spring" "large" "medium" 8.6 10.6 208.364 4.459 197.909 87.333 194.10001 27.618 0
"winter" "large" "medium" 9.06 6.35 187.183 3.351 54.778 159.16701 221.278 20.8 0
"autumn" "large" "high" 8.7 10.7 4.545 0.941 32.727 16 21.3 1.1 39.7
"spring" "large" "high" 8.1 10.7 3.5 1.013 12.5 12.75 11 0.6 37.3
"summer" "large" "high" 8.4 10.29 5.326 0.996 53.846 7.667 14.354 0.8 52.4
"spring" "large" "medium" 8.6 10.1 2.111 0.663 11.111 3.222 7 1.3 48.3
"summer" "large" "medium" 8.2 9.5 2.2 0.672 10 3.8 6.2 0.8 50.4
"winter" "large" "medium" 8.5 10.5 2.75 0.758 10.5 4 7.654 4 56.8
"summer" "large" "medium" 8.3 10 3.86 0.866 32 6 16 2.86 17.3
"summer" "large" "high" 8.1 10.2 7.613 0.699 32.5 26.625 52.875 2 18.1
"winter" "large" "low" 8.7 10.8 39.109 6.225 161.81799 104.727 228.364 46.075 1.1
"winter" "large" "low" 8.7 11.7 22.455 3.765 88.182 41.3 85.4 17.491 0
"summer" "large" "low" 8.4 8.2 23.25 2.805 43.75 51.125 87.125 14.775 0
"autumn" "large" "low" 8.55 11 22.32 3.14 82.1 45.9 101.455 18.33 1.7
"spring" "large" "medium" 8.5 7.6 12.778 1.873 17.778 50.889 127 24.556 0
"autumn" "large" "medium" 8.7 11.4 15.541 2.323 103 34.5 81.558 5.62 7.6
"winter" "large" "medium" 8.4 10.5 12.182 1.519 65.455 19.727 50.455 8.155 2.9
"spring" "large" "medium" 8.2 8.2 7.333 1.003 37.778 19.111 120.889 5.111 2.2
"autumn" "large" "medium" 8.58 11.1 23.825 3.617 72.6 51.111 91.111 22.9 3.8
"summer" "large" "medium" 8.5 7.9 12.444 2.586 96.667 19.111 61.444 6.167 18.9
"autumn" "large" "medium" 8.4 8.4 17.375 3.833 83.75 53.625 79.75 2.338 12.7
"spring" "large" "medium" 8.3 10.6 14.32 3.2 125.333 35.333 75.904 4.667 18
"autumn" "large" "medium" 8.2 7 139.989 2.978 60.11 78.333 140.22 31.738 0
"summer" "large" "medium" 8.5 6.7 82.852 2.8 27.069 64 140.517 18.3 2.4

123
v2/data/AlgaeTest.txt Normal file
View File

@ -0,0 +1,123 @@
"season" "size" "speed" "mxPH" "mnO2" "Cl" "NO3" "NH4" "oPO4" "PO4" "Chla" "a1"
"winter" "small" "medium" 7.98 8.8 59.333 7.392 286.66699 33.333 138 7.1 1.2
"summer" "small" "medium" 8 7.2 80 1.957 174.286 47.857 113.714 4.5 7
"spring" "small" "high" 8.35 8.4 68 3.026 458 45.2 111.8 3.2 1.4
"spring" "small" "medium" 8.1 13.2 19 0 130 6 40 2 3.9
"summer" "small" "medium" 8.37 12.1 12.85 0.84 15 5 10.507 13.8 28.4
"spring" "small" "high" 7.31 9.9 6 1.395 58.75 6 16 0.8 11.4
"autumn" "small" "high" 7.91 11.2 5 1.383 6 24.333 30 32 29.7
"summer" "small" "high" 7.99 10.7 4 1.368 117 17.25 44.75 0.8 74.3
"autumn" "small" "high" 7.82 11.5 8.18 1.488 39 16 139.5 0.4 0
"summer" "small" "medium" 7.9 6 63 1.053 11160.59961 1435 1690 4.5 0
"autumn" "small" "medium" 8.02 9.4 18.74 1.598 6249.6001 455.79999 690.59998 2 0
"summer" "small" "high" 6.6 10.8 4 1.18 80 2 59 0.6 62.5
"autumn" "small" "high" 6.79 9.4 11.42 1.966 42 3 15 0.6 21.6
"summer" "small" "high" 6.78 10.2 10.704 1.46 46 3 13.714 0.7 41.9
"summer" "small" "high" 7.8 10.8 14.568 1.228 61.25 34.5 62 1.1 35.4
"spring" "small" "high" 8.3 12.7 27 4.04 10 363 482 6 0
"spring" "small" "medium" 7.97 2.5 32.125 1.034 7912.5 132.625 164.62 1 10.1
"summer" "small" "high" 8.2 10.4 3.577 0.788 10.583 1.667 2.088 0.8 30.6
"autumn" "small" "medium" 8.1 6.4 21.2 3.222 44 54.8 155 61.52 19.4
"summer" "small" "medium" 8.54 12.83 22.545 4 170.5 68 116.069 41.6 19.2
"spring" "small" "medium" 7.7 6.8 65 1.833 782.5 77.25 340 9 0
"autumn" "small" "high" 8.4 10.5 50.6 10.494 334 209.10001 276.66699 20.72 2.3
"summer" "small" "high" 8.5 11.5 57.292 10.526 312.60001 261.39999 299.39999 23.5 0
"summer" "small" "high" 8.1 12.2 66 4.08 10 26 70 1.8 31.8
"autumn" "small" "low" 6.13 11.23 8.87 0.62 36 3 14.741 2.1 11.9
"summer" "small" "medium" 7.2 10.4 18 2.42 80 11 44 2.5 7.2
"autumn" "small" "medium" 7.8 9.1 36.124 5.974 169 13.091 71.057 3.3 4
"summer" "small" "high" 7.8 9.4 5.714 0.807 22.143 6 18.714 1.5 42.8
"autumn" "small" "high" 7.8 11.35 5.343 1.363 19.75 5.818 8.846 1.9 66.2
"winter" "small" "high" 7.66 10.8 4 0.997 15 1.5 7.333 1 17.9
"autumn" "small" "medium" 7.8 6.9 31.375 0.933 2138.57007 152.429 317.5 15.4 30.6
"winter" "medium" "medium" 8 7 37.091 2.237 146.364 84.091 172.778 2.3 7.6
"spring" "medium" "medium" 8.2 7.8 37.625 1.453 105.714 66.714 143.39999 2.6 0
"autumn" "medium" "medium" 8.2 10.7 134.66701 4.504 617.77802 49.444 164.778 19.2 2.8
"summer" "medium" "medium" 8 8.5 131.46899 3.454 792 63.1 286.60001 8.2 2.1
"autumn" "medium" "high" 8.9 10.5 34.8 6 122.556 41.111 144.11099 27.03 3.2
"summer" "medium" "high" 8.2 9.2 30.037 5.184 174.8 86.6 130.8 3.45 2.4
"summer" "medium" "medium" 7.8 8.8 29.078 2.823 263.556 27 95.12 11.5 35.2
"summer" "medium" "high" 7.5 10.8 10.357 3.35 127.667 22 34.321 1.2 7.8
"spring" "medium" "high" 7.4 9 13.75 5.268 58.75 56.25 64 2.5 2.9
"spring" "medium" "medium" 7.5 8.9 55.8 4.408 389 127.4 206.2 5 0
"spring" "medium" "medium" 7.8 10.4 49 7.557 6433.33008 170.66701 341 2.3 0
"autumn" "medium" "medium" 9.1 8 101.2 4.306 273.75 152.875 290.31299 10.7 0
"autumn" "medium" "high" 8.9 8 60.2 4.033 306.47101 136 242.94099 18.4 0
"summer" "medium" "low" 8.5 10.74 56.292 0.694 264.79999 43.4 124.942 30.48 13.7
"autumn" "medium" "high" 8.3 8.6 75 5.18 560 30.5 170 16.7 1.2
"spring" "medium" "high" 7.8 6.3 136.66701 3.734 154.444 35.556 175.33299 2.7 2.9
"autumn" "medium" "medium" 7.6 9.2 64.778 6.164 720 21.778 242.5 54.2 11.2
"summer" "medium" "medium" 7.5 9.2 61.557 7.035 558.33301 24.5 257.33301 19.5 3.8
"autumn" "medium" "low" 7.5 8.6 57.5 7.368 577 67.3 254.444 22 14.2
"spring" "medium" "low" 7.7 4.8 88.909 1.714 669.091 38.182 205.18201 2.8 9.7
"spring" "medium" "low" 7.9 7.2 55.25 2.235 89.375 17.5 141.5 17 8.7
"spring" "medium" "high" 8.06 2.2 39 2.085 773.125 90.75 163.25 26 2.7
"autumn" "medium" "high" 8.5 7.5 9.3 1.557 260 9.6 18.1 3.9 14.5
"autumn" "medium" "medium" 8.2 10.4 63.3 0.389 217.14301 24.333 114 2.7 19.8
"winter" "medium" "medium" 8 4.8 58.767 0.308 93.75 33.375 110.875 2.7 7.6
"autumn" "medium" "high" 8.7 10.8 1.118 0.534 26.364 14.818 20.9 1.4 40.7
"spring" "medium" "high" 8.4 11.2 0.5 0.32 10 21.6 27.6 0.6 37.1
"winter" "medium" "low" 8.5 8.3 36.583 5.632 440.83301 149 266.36401 19.827 2.5
"autumn" "medium" "low" 8.3 8.8 64.768 6.272 357.16699 219 302.5 8.267 3.5
"autumn" "medium" "medium" 8.4 10.8 47.304 7.773 258.909 145.091 223.04401 13.36 1
"autumn" "medium" "high" 7.9 11.9 11.862 2.209 128.636 48.091 69.079 2.755 0
"autumn" "medium" "medium" 9.13 12 30.496 4.971 99.6 64.6 146.265 54.13 1
"autumn" "medium" "high" 7.4 11.4 12.031 1.621 176.8 36.3 58.599 36.1 4.1
"summer" "medium" "medium" 8.3 8.9 271.5 6.315 375 169 313.5 2.8 16.5
"winter" "medium" "medium" 8.2 10.4 41 5.16 410 38 61 6 4.9
"summer" "medium" "medium" 8.2 11.2 36 4.4 32.5 108 155.5 3 12.4
"spring" "medium" "low" 8.17 6.3 37.3 0.527 82 62 133.10001 1.4 5.9
"autumn" "medium" "low" 8.33 10.6 36.156 1.137 119.444 92.889 112.855 10.5 74.8
"spring" "medium" "medium" 8.5 6.7 45.609 4.411 160 88.364 180.364 32.833 1.9
"autumn" "medium" "medium" 8.1 9.1 47.267 9.367 169.091 75 127.778 3.667 1.9
"winter" "medium" "high" 8.2 11.9 12.25 2.348 121.875 14 27.5 4.6 2.1
"summer" "medium" "high" 8.1 9.4 11 2.251 48.75 17.375 66.875 2.5 28.7
"summer" "medium" "low" 7.8 7.9 87 12.13 652.5 93.25 209 6 1.4
"spring" "medium" "high" 8.26 5 44.818 0.526 97.273 105.455 181.636 20.6 0
"summer" "medium" "high" 8.11 6.6 49.857 0.993 194.28 77 197.571 13 4.2
"summer" "medium" "high" 7.87 1.8 49.25 0.611 357.125 128.25 185.125 4.5 1.2
"winter" "medium" "high" 7.2 10.1 49.5 3.955 55 18 138 49 2.4
"spring" "medium" "high" 7.8 8.3 51.5 2.098 30.2 24.6 184.39999 31.3 1.9
"winter" "medium" "medium" 7.9 11.3 82.5 6.283 300 12.333 53.333 13.7 12.3
"summer" "medium" "medium" 8 8.8 176.25 0.618 440 16.25 79.25 3.5 8
"autumn" "large" "low" 8.1 9.07 71.39 2.904 1768.80005 27.6 123.06 41.54 5.8
"winter" "large" "low" 8.7 5.4 48 1.139 144.286 36.714 66.833 22.017 1.2
"summer" "large" "low" 7.9 5.3 48 0.513 138.33299 61.333 89.167 4 0
"autumn" "large" "low" 8.7 12.2 32.23 1.887 233.5 17.5 66.167 39.333 11.8
"winter" "large" "low" 8.6 6.5 43 0.668 95 10.5 74.667 63.5 30.1
"spring" "large" "high" 7.4 7.3 19 4.39 120 74.857 166.286 5.3 0
"summer" "large" "high" 7.8 10.4 22.5 4.72 178.75 116.5 201 2.7 0
"winter" "large" "low" 8.5 9.8 70.25 1.644 285 68.714 132 16.028 4.6
"spring" "large" "low" 7.98 5.6 47.06 3.088 357 311.39999 342.29999 18.53 2.4
"summer" "large" "low" 7.95 7.2 57.286 3.746 425.71399 291.14301 330 4.714 0
"spring" "large" "medium" 7.96 5.5 131.364 3.313 810.90002 311.45499 349.81799 20.47 2.7
"autumn" "large" "medium" 8.03 7.83 83.023 4.065 1222.81006 240.545 269.091 6.809 5
"winter" "large" "medium" 8.35 2.75 97.733 3.681 137.444 91 155.556 2.744 0
"summer" "large" "medium" 8.15 10.4 189.567 5.011 162.944 135.778 219.278 2.859 0
"winter" "large" "high" 8.5 10.1 3 0.851 37.778 10.778 23.889 0.5 37.5
"winter" "large" "medium" 8.5 11.4 3 0.774 10.909 3.727 8.091 3.6 35.9
"spring" "large" "medium" 8.5 8.5 4.025 0.825 23.636 5.583 31.091 2.4 38.6
"autumn" "large" "medium" 8.4 11.43 4.966 0.969 24.111 6 18.167 2.133 34.7
"spring" "large" "high" 8.2 9.9 6.4 0.553 21.429 12 76.286 1.3 16
"autumn" "large" "high" 8 10.98 9.7 0.874 67.7 26.6 51.034 2.2 26.7
"autumn" "large" "low" 8.3 8.9 42.058 5.922 116.727 150.58299 220.72301 6.7 0
"spring" "large" "low" 8.7 6.8 16.889 2.139 30 37.111 85.444 23.033 2.2
"winter" "large" "medium" 8.6 10.4 15.182 2.502 140.909 31.909 77.7 15.318 4.8
"summer" "large" "medium" 8 9.1 15.375 2.118 43.75 48.875 86.5 8.125 0
"summer" "large" "medium" 8.2 9.5 17.875 2.363 63.75 44 77 8.463 1.5
"spring" "large" "medium" 8.5 9.6 16.545 3.849 103.273 34.273 63.4 14.682 18.8
"spring" "large" "medium" 8.04 9.3 130.263 3.776 131.008 97.5 152.966 6.15 0
"autumn" "large" "medium" 7.95 9.1 76.886 3.461 93.827 68.333 146.049 3.95 1.2
"autumn" "small" "high" 7.64 10.3 34.235 2.942 41.43 17 41.567 7.43 23.2
"winter" "small" "high" 7.92 8.5 10.867 1.715 199.54 3.222 27.2 1.9 74.2
"spring" "small" "high" 7.62 9.4 11.055 1.51 13.56 4 12.65 1.456 13
"summer" "medium" "high" 7.75 10.7 15.5 3.976 57.64 10.5 43.169 3.12 4.1
"winter" "small" "high" 7.08 8.4 9.45 1.572 26.54 4 13.6 0.675 29.7
"winter" "small" "high" 8.1 9.8 14.34 0.73 22.5 23 45.5 0.85 17.1
"spring" "large" "medium" 8.61 10.1 3.518 0.663 12.22 3.222 7 1.3 48.3
"summer" "large" "medium" 8.22 9.5 2.3 0.672 9.87 4 6.123 0.8 50.4
"summer" "large" "medium" 8.4 10 3.51 0.866 29.65 5.8 15 2.86 17.3
"summer" "medium" "high" 8.12 10.2 7.613 0.699 33.56 28.034 49.658 2.2 18.1
"winter" "large" "low" 8.7 11.7 21.4656 3.765 91.45 38 83 17 0
"summer" "large" "low" 8.1 8.2 26.54 2.805 42.75 48.5 88.125 13.98 0
"autumn" "large" "low" 8.35 11.1 22.56 3.14 76.2 41 98.665 17.456 1.7

114
v2/lab 10 - problems.R Normal file
View File

@ -0,0 +1,114 @@
##########################################################################################################################
#
# PROBLEMS
#
##########################################################################################################################
#
# - Use GA search (using the ga() function in the GA package) to find the minimum of the real-valued function
# f(x) = abs(x) + cos(x). Restrict the search interval to [-20, 20]. Carefully define the fitness function,
# since the ga() can only maximize it!
#
##########################################################################################################################
#
# - Use GA search to find the minimum of the real-valued two-dimensional function
# f(x1, x2) = 20 + x1^2 + x2^2 - 10*(cos(2*pi*x1) + cos(2*pi*x2)), where x1 and x2 are from the interval [-5.12, 5.12].
#
##########################################################################################################################
#
# - We are given the following data:
#
# Substrate <- c(1.73, 2.06, 2.20, 4.28, 4.44, 5.53, 6.32, 6.68, 7.28, 7.90, 8.80, 9.14, 9.18, 9.40, 9.88)
# Velocity <- c(12.48, 13.97, 14.59, 21.25, 21.66, 21.97, 25.36, 22.93, 24.81, 25.63, 24.68, 29.04, 28.08, 27.32, 27.77)
#
# Use GA search to fit the data to the model:
# Velocity = (M * Substrate) / (K + Substrate), where M and K are the model parameters. Restrict the search interval
# for M to [40.0, 50.0] and for K to [3.0, 5.0].
#
##########################################################################################################################
#
# - Use a binary GA to select (sub)optimal attribute subset for a linear model:
#
# train.data <- read.table("AlgaeLearn.txt", header = T)
# test.data <- read.table("AlgaeTest.txt", header = T)
# lm.model <- lm(a1 ~., train.data)
#
##########################################################################################################################
library(GA)
# - Use GA search (using the ga() function in the GA package) to find the minimum of the real-valued function
# f(x) = abs(x) + cos(x). Restrict the search interval to [-20, 20]. Carefully define the fitness function,
# since the ga() can only maximize it!
f <- function(x) {
abs(x) + cos(x)
}
curve(f, from=-20, to=20, n=1000)
# For the maximization of this function we may use f directly as the fitness function
GA <- ga(type = "real-valued", fitness = f, lower = -20, upper = 20)
# The object returned can be plotted
plot(GA)
summary(GA)
# plot the solution
curve(f, from = -20, to = 20, n = 1000)
points(GA@solution, f(GA@solution), col="red")
# - Use GA search to find the minimum of the real-valued two-dimensional function
# f(x1, x2) = 20 + x1^2 + x2^2 - 10*(cos(2*pi*x1) + cos(2*pi*x2)), where x1 and x2 are from the interval [-5.12, 5.12].
# https://stackoverflow.com/a/68635397
fitness_f <- function(x1, x2) {
20 + x1^2 + x2^2 - 10*(cos(2*pi*x1) + cos(2*pi*x2))
}
# For the maximization of this function we may use f directly as the fitness function
GA <- ga(type = "real-valued", fitness = function(x) -fitness_f(x[1], x[2]), lower = c(-5.12, -5.12), upper = c(5.12, 5.12), maxiter=200)
# The object returned can be plotted
plot(GA)
summary(GA)
# - We are given the following data:
#
# Substrate <- c(1.73, 2.06, 2.20, 4.28, 4.44, 5.53, 6.32, 6.68, 7.28, 7.90, 8.80, 9.14, 9.18, 9.40, 9.88)
# Velocity <- c(12.48, 13.97, 14.59, 21.25, 21.66, 21.97, 25.36, 22.93, 24.81, 25.63, 24.68, 29.04, 28.08, 27.32, 27.77)
#
# Use GA search to fit the data to the model:
# Velocity = (M * Substrate) / (K + Substrate), where M and K are the model parameters. Restrict the search interval
# for M to [40.0, 50.0] and for K to [3.0, 5.0].
Substrate <- c(1.73, 2.06, 2.20, 4.28, 4.44, 5.53, 6.32, 6.68, 7.28, 7.90, 8.80, 9.14, 9.18, 9.40, 9.88)
Velocity <- c(12.48, 13.97, 14.59, 21.25, 21.66, 21.97, 25.36, 22.93, 24.81, 25.63, 24.68, 29.04, 28.08, 27.32, 27.77)
# param[1] = M, param[2] = K
model <- function(params) {
(params[1] * Substrate) / (params[2] + Substrate)
}
fitness_f <- function(params) {
-sum((Substrate - model(params))^2)
}
GA2 <- ga(type = "real-valued", fitness = fitness_f, lower = c(40.0, 3.0), upper = c(50.0, 5.0),
popSize = 500, crossover = gareal_blxCrossover, maxiter = 5000, run = 200, names = c("M", "K"))
summary(GA2)
# Let's plot our solution
plot(Substrate, Velocity)
lines(Substrate, model(GA2@solution))
# - Use a binary GA to select (sub)optimal attribute subset for a linear model:
#
# train.data <- read.table("AlgaeLearn.txt", header = T)
# test.data <- read.table("AlgaeTest.txt", header = T)
# lm.model <- lm(a1 ~., train.data)
train.data <- read.table("./data/AlgaeLearn.txt", header = T)
test.data <- read.table("./data/AlgaeTest.txt", header = T)
lm.model <- lm(a1 ~., train.data)

377
v2/lab2_code.R Normal file
View File

@ -0,0 +1,377 @@
# We are going to use the GA package
# Make sure that the package is installed.
# You install a package in R with the function install.packages():
#
# install.packages("GA")
library(GA)
#
# To install packages without root access:
#
# install.packages("GA", lib="/mylibs/Rpackages/") ## or some other path, e.g., C:\yourFolder
# library(GA, lib.loc="/mylibs/Rpackages/")
#
#
# EXAMPLE 1: One-dimensional function optimization
#
#
# The asymmetric double claw is difficult to maximize because there are many local solutions.
# Standard derivative-based optimizers would simply climb up the hill closest to the starting value.
f <- function(x)
{
y <- (0.46 * (dnorm(x, -1, 2/3) + dnorm(x, 1, 2/3)) +
(1/300) * (dnorm(x, -0.5, 0.01) + dnorm(x, -1, 0.01) +
dnorm(x, -1.5, 0.01)) +
(7/300) * (dnorm(x, 0.5, 0.07) + dnorm(x, 1, 0.07) +
dnorm(x, 1.5, 0.07)))
y ### return(y)
}
# Plot the double claw
curve(f, from = -3, to = 3, n = 1000)
# For the maximization of this function we may use f directly as the fitness function
GA <- ga(type = "real-valued", fitness = f, lower = -3, upper = 3)
# The object returned can be plotted
plot(GA)
summary(GA)
# plot the solution
curve(f, from = -3, to = 3, n = 1000)
points(GA@solution, f(GA@solution), col="red")
# The evolution of the population units and the corresponding functions values at each
# generation can be obtained by defining a new monitor function and then passing this
# function as an optional argument to ga
myMonitor <- function(obj)
{
curve(f, obj@lower, obj@upper, n = 1000, main = paste("iteration =", obj@iter))
points(obj@population, obj@fitness, pch = 20, col = 2)
rug(obj@population, col = 2)
Sys.sleep(1)
}
GA <- ga(type = "real-valued", fitness = f, lower = -3, upper = 3, monitor = myMonitor)
## Inspect fitness across generations
plot(GA)
#
#
# EXAMPLE 2: Model fitting
#
#
# We consider a data on the growth of trees
# The age at which the tree was measured
Age <- c(2.44, 12.44, 22.44, 32.44, 42.44, 52.44, 62.44, 72.44, 82.44, 92.44, 102.44, 112.44)
# The bole volume of the tree
Vol <- c(2.2, 20.0, 93.0, 262.0, 476.0, 705.0, 967.0, 1203.0, 1409.0, 1659.0, 1898.0, 2106.0)
plot(Age, Vol)
# An ecological model for the plant size (measured by volume) as a function of age is the Richards curve:
# f(x) = a*(1-exp(-b*x))^c, where a, b, in c are the model parameters
# Let's fit the Richards curve using genetic algorithms
# We first define our model function (argument params represents a vector of the parameters a, b, and c)
model <- function(params)
{
params[1] * (1 - exp(-params[2] * Age))^params[3]
}
# We define the fitness function as the sum of squares of the differences between estimated and observed data
myFitness2 <- function(params)
{
-sum((Vol - model(params))^2)
}
# The fitness function needs to be maximized with respect to the model's parameters, given the observed data in x and y.
# A blend crossover is used for improving the search over the parameter space: for two parents x1 and x2 (assume x1 < x2)
# it randomly picks a solution in the range [x1 - k*(x2-x1), x2 + k*(x2-x1)], where k represents a constant between 0 and 1.
# We restrict the search interval for a,b, and c to [1000.0, 5000.0], [0.0, 5.0], and [0.0, 5.0], respectively.
GA2 <- ga(type = "real-valued", fitness = myFitness2, lower = c(1000, 0, 0), upper = c(5000, 5, 5),
popSize = 500, crossover = gareal_blxCrossover, maxiter = 5000, run = 200, names = c("a", "b", "c"))
summary(GA2)
# Let's plot our solution
plot(Age, Vol)
lines(Age, model(GA2@solution))
# we can use a monitor function to plot the current solution
myMonitor2 <- function(obj)
{
i <- which.max(obj@fitness)
plot(Age, Vol)
lines(Age, model(obj@population[i,]), col="red")
title(paste("iteration =", obj@iter), font.main = 1)
Sys.sleep(1)
}
GA2 <- ga(type = "real-valued", fitness = myFitness2, lower = c(1000, 0, 0), upper = c(5000, 5, 5),
popSize = 500, crossover = gareal_blxCrossover, maxiter = 5000, run = 200, names = c("a", "b", "c"), monitor=myMonitor2)
#
#
# EXAMPLE 3: The Knapsack problem
#
#
# The Knapsack problem is defined as follows: given a set of items, each with a mass and a value, determine the subset
# of items to be included in a collection so that the total weight is less than or equal to a given limit and the total value
# is as large as possible.
# a vector of the items' values
values <- c(5, 8, 3, 4, 6, 5, 4, 3, 2)
# a vector of the item's weights
weights <- c(1, 3, 2, 4, 2, 1, 3, 4, 5)
# the knapsack capacity
Capacity <- 10
# A binary GA can be used to solve the knapsack problem. The solution to this problem is a binary string equal to the number
# of items where the ith bit is 1 if the ith item is in the subset and 0 otherwise. The fitness function should penalize
# unfeasible solutions.
knapsack <- function(x)
{
f <- sum(x * values)
w <- sum(x * weights)
if (w > Capacity)
f <- Capacity - w
f
}
GA3 <- ga(type = "binary", fitness = knapsack, nBits = length(weights), maxiter = 1000, run = 200, popSize = 100)
summary(GA3)
GA3@solution
#
# Example 4: ESTABLISHING A TIMETABLE
#
# A small football club has a youth team and a senior team. The player
# training program has seven components: stamina training, strength training,
# technique, tactics, psychological preparation, teamwork, and regeneration.
# Due to lack of funds, for each component, a single staff member is responsible
# for both the youth and the senior team, with the exceptions of tactics and
# stamina training, where two staff members are assigned, one to each team.
#
# The weekly training regime is summarized in the following table:
#
#+----------+---------------------+-----------------+-----------------+
#| Coach | Component | Senior team | Youth team |
#+----------+---------------------+-----------------+-----------------+
#| Anze | Strength training | 1 time a week | 1 time a week |
#| Bojan | Technique | 3 times a week | 3 times a week |
#| Ciril | Regeneration | 2 times a week | 2 times a week |
#| Dusan | Stamina training | doesn't conduct | 4 times a week |
#| Erik | Stamina training | 4 times a week | doesn't conduct |
#| Filip | Teamwork | 3 times a week | 3 times a week |
#| Gasper | Psychological prep. | 1 time a week | 1 time a week |
#| Hugo | Tactics | 1 time a week | doesn't conduct |
#| Iztok | Tactics | doesn't conduct | 1 time a week |
#+----------+---------------------+-----------------+-----------------+
#
# Training is performed from Monday to Friday in four different time slots:
# 8:00 - 10:00, 10:15 - 12:15, 14:00 - 16:00, and 16:15 - 18:15.
#
# Constraints:
#
# - each time slot can hold only one component for the youth team and one component
# for the senior team (the youth and senior teams train separately, so a single
# staff member can only train one of the two teams in a single time slot).
#
# - a team is not allowed to train the same component 2 or more times within one day.
#
# - the main purpose of the Tactics training component is to prepare the team for
# the upcoming match. Matches are usually played during the weekend, so Tactics
# training should be scheduled for Thursday in the 16:15 - 18:15 time slot.
#
# - after a match, the players need to rest. Therefore, there is no training in
# the Monday 8:00 - 10:00 time slot.
#
# - the stamina training coach Dusan is not available on Monday mornings
# (8:00 - 10:00 in 10:15 - 12:15 time slots)
#
# - there can be no Technique training on Wednesdays, because coach Bojan is
# not available.
#
#
# Produce a training schedule that takes into account these two and all of
# the above restrictions!
#
#
# VARIABLES
#
# senior - number of sessions per component for the senior team
# youth - number of sessions per component for the youth team
# staff - coaching staff -> the staff's actual occupacy is solved for, how much a certain coach can handle is (hard) coded in senior and youth variables!
# slots - possible slots
senior = c(1, 3, 2, 0, 4, 3, 1, 1, 0)
youth = c(1, 3, 2, 4, 0, 3, 1, 0, 1)
slots = 4*5
valueBin <- function(timetable)
{
# organize data into a multi-dimensional array
# days, time slots, staff, teams
t <- array(as.integer(timetable), c(5,4,9,2))
violations <- 0
# check all the conditions
# check the number of sessions per component
for (i in 1:9)
{
violations <- violations + abs(sum(t[,,i,1]) - senior[i])
violations <- violations + abs(sum(t[,,i,2]) - youth[i])
}
# it is not allowed to train the same component 2 or more times within one day
for (i in 1:9)
{
violations <- violations + sum(apply(t[,,i,1], 1, sum) > 1)
violations <- violations + sum(apply(t[,,i,2], 1, sum) > 1)
}
# a single staff member can only train one of the two teams in a single time slot
violations <- violations + sum(t[,,,1] == t[,,,2] & t[,,,1] != 0)
# each time slot can hold only one component for the youth team and one component
# for the senior team
for (i in 1:5)
for (j in 1:4)
{
violations <- violations + max(0, sum(t[i,j,,1]) - 1)
violations <- violations + max(0, sum(t[i,j,,2]) - 1)
}
# Tactics training should be scheduled for Thursday in the 16:15 - 18:15 time slot
violations <- violations + (t[4,3,8,1] != 1)
violations <- violations + (t[4,3,9,2] != 1)
# there is no training in the Monday 8:00 - 10:00 time slot
violations <- violations + sum(t[1,1,,])
# the stamina training coach Dusan is not available on Monday mornings
violations <- violations + sum(t[1,1:2,4,] == 1)
# there can be no Technique training on Wednesdays
violations <- violations + sum(t[3,,2,] == 1)
-violations
}
myInitPopulation <- function(object)
{
p <- gabin_Population(object)
for (i in 1:nrow(p))
{
t <- array(p[i,], c(5,4,9,2))
# Tactics training on Thursdays in the 16:15 - 18:15 time slot
t[4,3,8,1]=1
t[4,3,9,2]=1
# there is no training in the Monday 8:00 - 10:00 time slot
t[1,1,,] = 0
# there is no Stamina training on Monday mornings
t[1,1:2,4,] = 0
# there is no Technique training on Wednesdays
t[3,,2,] = 0
p[i,] <- as.vector(t)
}
p
}
GA4 <- ga(type = "binary", fitness = valueBin, nBits = 4*5*9*2,
popSize = 500, maxiter = 10, run = 200, population = myInitPopulation)
timetable2 <- function(solution,coach,team){
t <- array(solution, c(5,4,9,2))
t[,,coach,team]
}
## timetable of a coach 2 for team 1.
t <- timetable2(GA4@solution[1,],2,1)
t
#
#
# EXAMPLE 5: Traveling salesman problem
#
#
# Given a list of cities and the distances between each pair of cities, what is the shortest possible route that visits
# each city exactly once and returns to the origin city?
data("eurodist", package = "datasets")
D <- as.matrix(eurodist)
D
# An individual round tour is represented as a permutation of a default numbering of the cities defining the current order
# in which the cities are to be visited
# Calculation of the tour length
tourLength <- function(tour)
{
N <- length(tour)
dist <- 0
for (i in 2:N)
dist <- dist + D[tour[i-1],tour[i]]
dist <- dist + D[tour[N],tour[1]]
dist
}
# The fitness function to be maximized is defined as the reciprocal of the tour length.
tspFitness <- function(tour)
{
1/tourLength(tour)
}
GA5 <- ga(type = "permutation", fitness = tspFitness, lower = 1, upper = ncol(D), popSize = 50, maxiter = 5000, run = 500, pmutation = 0.2)
summary(GA5)
# Reconstruct the solution found
tour <- GA5@solution[1, ]
tour <- c(tour, tour[1])
tourLength(tour)
colnames(D)[tour]

46
v3/lab 2 - problems.R Normal file
View File

@ -0,0 +1,46 @@
#######################################################################################################################
#
# PROBLEMS
#
#######################################################################################################################
#
# Load the Movies dataset using the command:
#
# md <- read.table("movies.txt", sep=",", header=TRUE)
#
# Answer the following questions:
#
# - Are there more movies shorter than 100 min or longer than (or equal to) 100 minutes?
# (show your answer numerically and graphically)
#
# - Are there more action comedies or romantic comedies?
#
# - Plot a histogram of the ratings for drama movies.
#
# - Is the average rating of dramas higher than the average rating of non-dramas?
# (show your answer numerically and graphically)
#
# - Plot the number of animated movies being produced every year for the period 1995-2005.
#
# - Is there a clear boundary between short and feature movies (according to their length)?
#
#
#######################################################################################################################
#
# Load the Players dataset using the command:
#
# players <- read.table("players.txt", sep=",", header = T)
#
# - Plot the proportion of players according to playing positions.
#
# - Compare career rebounds (the "reb" attribute) with respect to playing position.
#
# - Show the distribution of free throw percentages.
# The percentage is determined by dividing the number of shots made ("ftm") by the total number of shots attempted ("fta").
#
# - Compare career 3-pointers made for the players active between 1990 and 2007, with respect to playing position.
#
# - How does the average career length of retired players vary from year to year?
#
#######################################################################################################################

326
v3/lab3_visualization.R Normal file
View File

@ -0,0 +1,326 @@
##############################################################################
#
# DATA VISUALIZATION
#
##############################################################################
# Please download data files "movies.txt" and "players.txt" into a local directory
# then set that directory as the current working directory of R.
# You can achive this using the "setwd" command or by selecting "File -> Change dir..."
# for example:
# setwd("c:\\labs\\data\\")
library(ggplot2)
library(dplyr)
# To read data from a text file, use the "read.table" command.
# The parameter header=TRUE indicates that the file to be read includes a first line with the column names
md <- read.table(file="movies.txt", sep=",", header=TRUE)
# To get more information on any specific named function, type "?" followed by the function name
?read.table
# Useful functions
head(md)
summary(md)
str(md)
names(md)
# We will transform binary attributes into nominal variables with a fixed number of possible values (factors)
md$Action <- as.factor(md$Action)
md$Animation <- as.factor(md$Animation)
# The remaining columns will be transformed using the for loop
for (i in 20:24)
md[,i] <- as.factor(md[,i])
#
# Type conversion functions:
#
# as.numeric
# as.integer
# as.character
# as.logical
# as.factor
# as.ordered
#
# values that cannot be converted to the specified type will be converted to a NA value
#
# Binary attributes are now represented as factors
summary(md)
# Accessing data frame elements...
md[30,]
md[30,3]
md[30,"length"]
md[,3]
md$length
# Useful data visualization functions
plot(md$length)
hist(md$length)
plot(density(md$length))
boxplot(md$length)
barplot(table(md$Drama))
pie(table(md$mpaa))
## nicer plots with ggplot2 + dplyr
md %>% ggplot(aes(length)) + geom_histogram(bins = 40) + ggtitle("A genetic histogram") + xlab("Length")
## plotting w.r.t. multiple mpaa categories
md %>% ggplot(aes(length,fill = mpaa)) + geom_density(alpha = 0.2)
## What about a nicer boxplot w.r.t mpaa?
## theme_bw() is more neutral theme
md %>% ggplot(aes(Drama, rating, color = mpaa)) + geom_boxplot() + theme_bw()
## show table view
###############################################################################
#
# EXAMPLE 1: What is the proportion of comedies to other genres in our data set?
#
###############################################################################
# the table() command gives the frequency of values in the vector
table(md$Comedy)
# the proportion of comedies can be plotted
barplot(table(md$Comedy))
pie(table(md$Comedy))
# it is important to always label graphs ...
tab <- table(md$Comedy)
names(tab) <- c("Other genres", "Comedies")
tab
pie(tab)
sum(tab)
barplot(tab, ylab="Number of titles", main="Proportion of comedies to other genres")
barplot(tab / sum(tab) * 100, ylab="Percentage of titles", main="The proportion of comedies to other genres")
pie(tab, main = "Proportion of comedies to other genres")
###############################################################################
#
# EXAMPLE 2: How are ratings distributed for comedies?
#
###############################################################################
# Plot the rating distribution for comedies
hist(md[md$Comedy == "1", "rating"], xlab="Rating", ylab="Frequency", main="Histogram of ratings for comedies")
# Box plots provide a visual display of the range and potential skewness of the data
boxplot(md[md$Comedy == "1", "rating"], ylab="Rating", main="Boxplot of ratings for comedies")
quantile(md$rating[md$Comedy == 1])
###############################################################################
#
# EXAMPLE 3: Are comedies on average better rated than non-comedies?
#
###############################################################################
# Select comedies
comedy <- md$Comedy == "1"
# Calculate the mean rating value for comedies and non-comedies
mean(md[comedy,"rating"])
mean(md[!comedy,"rating"])
# Comedies have, on average, higher ratings than non-comedies
# Side-by-side boxplots of ratings grouped by values of the attribute "Comedy"
boxplot(rating ~ Comedy, data=md)
boxplot(rating ~ Comedy, data=md, names=c("Other genres", "Comedies"), ylab="Rating", main="Comparison of ratings between comedies and non-comedies")
## or with dplyr directly
md %>% group_by(Comedy) %>% select(rating) %>% summarise(mean(rating))
###############################################################################
#
# EXAMPLE 4: What is the proportion of comedies (per year) from 1990 onwards?
#
###############################################################################
sel <- md$year >= 1990
# the table() command can be used to get a two-way contigency table
table(md$Comedy[sel], md$year[sel])
table(md$year[sel])
tabcomedy <- table(md$Comedy[sel], md$year[sel])
tabyear <- table(md$year[sel])
tabcomedy[2,]/tabyear
ratio <- tabcomedy[2,]/tabyear
barplot(ratio, xlab="Year", ylab="Relative frequency", main="Proportion of comedies")
plot(x=names(ratio), y=as.vector(ratio), type="l", xlab="Year", ylab="Relative frequency", main="Proportion of comedies, 1990-2005")
## or with dplyr directly
md %>% filter(year >= 1990) %>%
group_by(year, Comedy) %>%
summarise(n = n()) %>% mutate(freq = n / sum(n)) %>%
filter(Comedy == 1) %>% select(year, freq) %>%
ggplot(aes(year, freq)) + geom_point() + ggtitle("Frequency of comedies") + ylab("Frequency") + xlab("Year") + geom_line() + theme_bw()
###############################################################################
#
# EXAMPLE 5: Are there more movies above or below the average rating?
#
###############################################################################
# the average rating
mean(md$rating)
# how many movies are above the average rating?
tab <- table(md$rating > mean(md$rating))
tab
names(tab) <- c("below", "above")
barplot(tab, ylab="Number of titles", main="Proportion of movies above and below the average rating")
pie(tab, main="Proportion of movies above and below the average rating")
# Box plots provide a summarization of the variable distribution
boxplot(md$rating, ylab="Rating", main="Boxplot of movie ratings")
# The horizontal line inside the box represents the median rating value
# Let's plot the mean value...
abline(h=mean(md$rating))
## or with dplyr + ggplot
md %>% mutate(mRate = mean(rating)) %>%
mutate(indicator = ifelse(rating - mRate > 0, "above", "below")) %>%
group_by(indicator) %>%
summarise(counts = n()) %>%
ggplot(aes(indicator, counts, fill = indicator))+ geom_bar(stat = "identity")
# The mean differs from the median so the distribution is skewed.
# We can conclude that there are more cases above the mean value.
###############################################################################
#
# EXAMPLE 6: Do movies with bigger budgets get higher ratings?
#
###############################################################################
# there are missing values in the budget attribute
summary(md$budget)
is.na(md$budget)
table(is.na(md$budget))
which(is.na(md$budget))
# select complete observations only
sel <- is.na(md$budget)
mdsub <- md[!sel,]
nrow(mdsub)
summary(mdsub$budget)
plot(mdsub$budget, mdsub$rating, xlab="Budget in $", ylab="Rating", main="Movie rating vs budget")
# Plotted points are mostly located in the upper left part of the diagram,
# which means that a higher budget usually leads to a higher rating
# Utilization of the budget in terms of rating
ratio <- mdsub$budget/mdsub$rating
hist(ratio)
# Which movie has the worst budget utilization?
mdsub[which.max(ratio),]
# Let's discretize these budgets to:
# low (less than 1M), mid (between 1M and 50M) and big (more than 50M)
disbudget <- cut(mdsub$budget, c(0, 1000000, 50000000, 500000000), labels=c("low", "mid", "big"))
barplot(table(disbudget)/length(disbudget), xlab="Budget", ylab="Relative frequency", main="Proportion of movies vs budget")
# Side-by-side boxplots of ratings grouped by budget values
boxplot(mdsub$rating ~ disbudget, xlab="Budget", ylab="Rating", main="Boxplot of movie rating vs budget")
## Is this dependent on the mpaa?
## or with dplyr + ggplot + adding votes
md %>% select(budget, rating, votes, mpaa) %>%
na.omit() %>%
ggplot(aes(budget, rating, color = votes, fill = mpaa)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x) + theme_bw()
###############################################################################
#
# EXAMPLE 7:
# What is the cumulative movie budget for each year from 1990 to 2000?
# What is the average movie budget for each year from 1990 to 2000?
# (consider only those movies for witch information on the budget is available!)
#
###############################################################################
# Select the movies that contain information on their budgets
sel <- !is.na(md$budget) & md$year >= 1990 & md$year <= 2000
# We can calculate cumulative budget for each year using the "aggregate" function
# Data overflow problem!
aggregate(budget ~ year, data = md[sel,], sum)
# The budget values are represented as integers
typeof(md$budget)
# In order to avoid the overflow problem we have to convert
# the budget values into a double-precision representation (using the as.double() command)
aggregate(as.double(budget) ~ year, data = md[sel,], sum)
sum.budget <- aggregate(as.double(budget) ~ year, data = md[sel,], sum)
plot(sum.budget, type="l", xlab="Year", ylab="Cumulative budget in $", main="Cumulative movie budget per year")
avg.budget <- aggregate(as.double(budget) ~ year, data = md[sel,], mean)
plot(avg.budget, type="l", xlab="Year", ylab="Average budget in $", main="Average movie budget per year")
## or with dplyr
md %>% select(budget, year) %>% na.omit() %>%
group_by(year) %>% summarise(budget2 = sum(as.numeric(budget))) %>%
arrange(year) %>% mutate(csum = cumsum(budget2)) %>%
ggplot(aes(year, csum)) + geom_bar(stat = "identity") + theme_bw()
##############################################################################
#
# EXAMPLE 8: (players dataset)
# What is the average height for each season in the period from 1970 to 2000?
#
##############################################################################
# Load the Players dataset
players <- read.table("players.txt", sep=",", header = T)
summary(players)
# Create an empty vector
h <- vector()
# Use a for loop to go through each year in the period from 1970 to 2000
for (y in 1970:2000)
{
# Select active players in that year
sel <- players$firstseason <= y & players$lastseason >= y
# Append the resulting vector with the mean height for the current year
h <- c(h, mean(players$height[sel]))
}
# plot the resulting vector (use type="l" for lines)
plot(1970:2000, h, type="l", xlab="Year", ylab="Height in cm", main="Average height in NBA")
## or with dplyr
dfx <- data.frame(year = 1970:2000,mh = h)
dfx %>% ggplot(aes(year, mh)) + geom_point() + geom_smooth(method = "loess") + theme_bw() + xlab("Year") + ylab("Mean height")

4925
v3/movies.txt Normal file

File diff suppressed because it is too large Load Diff

3160
v3/players.txt Normal file

File diff suppressed because it is too large Load Diff