library(ggplot2)
# Looking at stickleback data.
stickles <- read.csv('Data/03e3SticklebackPlates.csv', stringsAsFactors = FALSE)
# Plotting the frequency distribution of the number of plates on a stickleback
# (type of fish)
ggplot(stickles, aes(x=no.plates)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Grouping based on the genotype - homozygous recessive, heterozygous, and
# homozygous dominant
ggplot(stickles, aes(x=no.plates, fill = genotype)) +
geom_histogram() +
facet_grid(genotype ~ .)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Learning to use a new package, dplyr to select subsets of data
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Select only the mm data
filter(stickles, genotype == 'mm')
## genotype no.plates
## 1 mm 11
## 2 mm 14
## 3 mm 11
## 4 mm 8
## 5 mm 12
## 6 mm 11
## 7 mm 12
## 8 mm 11
## 9 mm 9
## 10 mm 9
## 11 mm 9
## 12 mm 10
## 13 mm 9
## 14 mm 11
## 15 mm 14
## 16 mm 14
## 17 mm 14
## 18 mm 14
## 19 mm 13
## 20 mm 13
## 21 mm 10
## 22 mm 13
## 23 mm 11
## 24 mm 14
## 25 mm 14
## 26 mm 8
## 27 mm 37
## 28 mm 13
## 29 mm 10
## 30 mm 12
## 31 mm 8
## 32 mm 12
## 33 mm 10
## 34 mm 14
## 35 mm 8
## 36 mm 10
## 37 mm 13
## 38 mm 22
## 39 mm 9
## 40 mm 12
## 41 mm 8
## 42 mm 12
## 43 mm 11
## 44 mm 14
## 45 mm 12
## 46 mm 8
## 47 mm 12
## 48 mm 10
## 49 mm 9
## 50 mm 10
## 51 mm 14
## 52 mm 10
## 53 mm 8
## 54 mm 14
## 55 mm 10
## 56 mm 14
## 57 mm 12
## 58 mm 11
## 59 mm 12
## 60 mm 12
## 61 mm 14
## 62 mm 12
## 63 mm 12
## 64 mm 9
## 65 mm 11
## 66 mm 14
## 67 mm 9
## 68 mm 9
## 69 mm 11
## 70 mm 12
## 71 mm 10
## 72 mm 10
## 73 mm 11
## 74 mm 11
## 75 mm 14
## 76 mm 13
## 77 mm 11
## 78 mm 9
## 79 mm 12
## 80 mm 14
## 81 mm 11
## 82 mm 12
## 83 mm 11
## 84 mm 6
## 85 mm 14
## 86 mm 8
## 87 mm 14
## 88 mm 11
# Operators that allow you to test logical conditions (TRUE/FALSE)
1 == 2
## [1] FALSE
1 == 1
## [1] TRUE
1 >= 2
## [1] FALSE
1 <= 2
## [1] TRUE
1 > 2
## [1] FALSE
1 < 2
## [1] TRUE
1 != 2
## [1] TRUE
# Plot only the mm data
ggplot(data = filter(stickles, genotype == 'mm'), aes(x = no.plates)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Subset the data and exclude the heterozygote
filter(stickles, genotype != 'Mm')
## genotype no.plates
## 1 mm 11
## 2 mm 14
## 3 mm 11
## 4 mm 8
## 5 mm 12
## 6 mm 11
## 7 mm 12
## 8 MM 64
## 9 mm 11
## 10 MM 65
## 11 mm 9
## 12 MM 62
## 13 MM 63
## 14 mm 9
## 15 mm 9
## 16 mm 10
## 17 MM 60
## 18 mm 9
## 19 MM 64
## 20 mm 11
## 21 MM 63
## 22 mm 14
## 23 mm 14
## 24 MM 64
## 25 mm 14
## 26 mm 14
## 27 mm 13
## 28 MM 64
## 29 MM 65
## 30 MM 64
## 31 MM 64
## 32 MM 63
## 33 mm 13
## 34 mm 10
## 35 MM 49
## 36 MM 66
## 37 MM 61
## 38 MM 63
## 39 MM 66
## 40 MM 65
## 41 mm 13
## 42 MM 62
## 43 MM 64
## 44 mm 11
## 45 mm 14
## 46 MM 60
## 47 mm 14
## 48 MM 62
## 49 mm 8
## 50 mm 37
## 51 mm 13
## 52 mm 10
## 53 MM 64
## 54 mm 12
## 55 mm 8
## 56 MM 64
## 57 MM 65
## 58 mm 12
## 59 MM 64
## 60 MM 66
## 61 mm 10
## 62 MM 62
## 63 mm 14
## 64 MM 61
## 65 mm 8
## 66 mm 10
## 67 mm 13
## 68 mm 22
## 69 mm 9
## 70 MM 65
## 71 MM 63
## 72 MM 63
## 73 mm 12
## 74 MM 63
## 75 MM 65
## 76 MM 62
## 77 MM 63
## 78 mm 8
## 79 mm 12
## 80 MM 61
## 81 MM 64
## 82 mm 11
## 83 mm 14
## 84 mm 12
## 85 MM 61
## 86 MM 65
## 87 MM 64
## 88 MM 63
## 89 mm 8
## 90 MM 65
## 91 mm 12
## 92 mm 10
## 93 MM 63
## 94 MM 64
## 95 mm 9
## 96 MM 42
## 97 mm 10
## 98 MM 62
## 99 MM 64
## 100 mm 14
## 101 mm 10
## 102 mm 8
## 103 MM 63
## 104 mm 14
## 105 mm 10
## 106 mm 14
## 107 MM 65
## 108 MM 63
## 109 MM 69
## 110 mm 12
## 111 MM 62
## 112 MM 63
## 113 MM 62
## 114 MM 63
## 115 MM 64
## 116 MM 62
## 117 mm 11
## 118 mm 12
## 119 MM 67
## 120 mm 12
## 121 mm 14
## 122 MM 66
## 123 mm 12
## 124 MM 60
## 125 mm 12
## 126 mm 9
## 127 MM 66
## 128 mm 11
## 129 mm 14
## 130 mm 9
## 131 MM 56
## 132 mm 9
## 133 MM 62
## 134 mm 11
## 135 MM 62
## 136 mm 12
## 137 mm 10
## 138 MM 64
## 139 mm 10
## 140 MM 65
## 141 mm 11
## 142 MM 62
## 143 mm 11
## 144 MM 65
## 145 mm 14
## 146 MM 62
## 147 MM 62
## 148 mm 13
## 149 MM 63
## 150 MM 64
## 151 MM 64
## 152 MM 63
## 153 mm 11
## 154 mm 9
## 155 MM 64
## 156 MM 62
## 157 mm 12
## 158 MM 57
## 159 mm 14
## 160 MM 62
## 161 mm 11
## 162 mm 12
## 163 MM 61
## 164 mm 11
## 165 mm 6
## 166 mm 14
## 167 mm 8
## 168 MM 62
## 169 mm 14
## 170 mm 11
# Calculate a new column
mutate(stickles, diff = no.plates - mean(no.plates))
## genotype no.plates diff
## 1 mm 11 -32.4331395
## 2 Mm 63 19.5668605
## 3 Mm 22 -21.4331395
## 4 Mm 10 -33.4331395
## 5 mm 14 -29.4331395
## 6 mm 11 -32.4331395
## 7 Mm 58 14.5668605
## 8 Mm 36 -7.4331395
## 9 Mm 31 -12.4331395
## 10 Mm 61 17.5668605
## 11 Mm 63 19.5668605
## 12 mm 8 -35.4331395
## 13 mm 12 -31.4331395
## 14 mm 11 -32.4331395
## 15 Mm 64 20.5668605
## 16 Mm 64 20.5668605
## 17 mm 12 -31.4331395
## 18 Mm 55 11.5668605
## 19 Mm 21 -22.4331395
## 20 MM 64 20.5668605
## 21 Mm 47 3.5668605
## 22 Mm 57 13.5668605
## 23 mm 11 -32.4331395
## 24 Mm 58 14.5668605
## 25 MM 65 21.5668605
## 26 Mm 63 19.5668605
## 27 mm 9 -34.4331395
## 28 MM 62 18.5668605
## 29 Mm 29 -14.4331395
## 30 Mm 10 -33.4331395
## 31 MM 63 19.5668605
## 32 mm 9 -34.4331395
## 33 Mm 61 17.5668605
## 34 Mm 39 -4.4331395
## 35 mm 9 -34.4331395
## 36 Mm 50 6.5668605
## 37 mm 10 -33.4331395
## 38 MM 60 16.5668605
## 39 Mm 60 16.5668605
## 40 mm 9 -34.4331395
## 41 Mm 63 19.5668605
## 42 MM 64 20.5668605
## 43 mm 11 -32.4331395
## 44 Mm 60 16.5668605
## 45 Mm 66 22.5668605
## 46 MM 63 19.5668605
## 47 Mm 63 19.5668605
## 48 mm 14 -29.4331395
## 49 mm 14 -29.4331395
## 50 Mm 59 15.5668605
## 51 Mm 30 -13.4331395
## 52 MM 64 20.5668605
## 53 mm 14 -29.4331395
## 54 mm 14 -29.4331395
## 55 mm 13 -30.4331395
## 56 Mm 62 18.5668605
## 57 MM 64 20.5668605
## 58 MM 65 21.5668605
## 59 Mm 34 -9.4331395
## 60 MM 64 20.5668605
## 61 MM 64 20.5668605
## 62 Mm 62 18.5668605
## 63 MM 63 19.5668605
## 64 Mm 30 -13.4331395
## 65 mm 13 -30.4331395
## 66 mm 10 -33.4331395
## 67 MM 49 5.5668605
## 68 Mm 62 18.5668605
## 69 Mm 43 -0.4331395
## 70 MM 66 22.5668605
## 71 MM 61 17.5668605
## 72 Mm 26 -17.4331395
## 73 MM 63 19.5668605
## 74 Mm 59 15.5668605
## 75 MM 66 22.5668605
## 76 MM 65 21.5668605
## 77 Mm 62 18.5668605
## 78 Mm 63 19.5668605
## 79 Mm 56 12.5668605
## 80 mm 13 -30.4331395
## 81 Mm 62 18.5668605
## 82 Mm 28 -15.4331395
## 83 Mm 61 17.5668605
## 84 Mm 56 12.5668605
## 85 MM 62 18.5668605
## 86 Mm 62 18.5668605
## 87 Mm 39 -4.4331395
## 88 Mm 25 -18.4331395
## 89 Mm 34 -9.4331395
## 90 MM 64 20.5668605
## 91 mm 11 -32.4331395
## 92 Mm 63 19.5668605
## 93 Mm 64 20.5668605
## 94 mm 14 -29.4331395
## 95 MM 60 16.5668605
## 96 Mm 61 17.5668605
## 97 mm 14 -29.4331395
## 98 MM 62 18.5668605
## 99 mm 8 -35.4331395
## 100 mm 37 -6.4331395
## 101 Mm 61 17.5668605
## 102 mm 13 -30.4331395
## 103 mm 10 -33.4331395
## 104 Mm 62 18.5668605
## 105 MM 64 20.5668605
## 106 Mm 63 19.5668605
## 107 mm 12 -31.4331395
## 108 mm 8 -35.4331395
## 109 Mm 58 14.5668605
## 110 Mm 51 7.5668605
## 111 MM 64 20.5668605
## 112 MM 65 21.5668605
## 113 Mm 62 18.5668605
## 114 mm 12 -31.4331395
## 115 Mm 61 17.5668605
## 116 MM 64 20.5668605
## 117 MM 66 22.5668605
## 118 Mm 53 9.5668605
## 119 mm 10 -33.4331395
## 120 Mm 32 -11.4331395
## 121 Mm 24 -19.4331395
## 122 MM 62 18.5668605
## 123 mm 14 -29.4331395
## 124 MM 61 17.5668605
## 125 mm 8 -35.4331395
## 126 mm 10 -33.4331395
## 127 Mm 63 19.5668605
## 128 Mm 62 18.5668605
## 129 Mm 52 8.5668605
## 130 mm 13 -30.4331395
## 131 Mm 32 -11.4331395
## 132 mm 22 -21.4331395
## 133 Mm 60 16.5668605
## 134 mm 9 -34.4331395
## 135 Mm 62 18.5668605
## 136 MM 65 21.5668605
## 137 Mm 61 17.5668605
## 138 Mm 52 8.5668605
## 139 Mm 53 9.5668605
## 140 Mm 51 7.5668605
## 141 MM 63 19.5668605
## 142 Mm 61 17.5668605
## 143 MM 63 19.5668605
## 144 Mm 56 12.5668605
## 145 Mm 57 13.5668605
## 146 Mm 54 10.5668605
## 147 mm 12 -31.4331395
## 148 MM 63 19.5668605
## 149 Mm 47 3.5668605
## 150 MM 65 21.5668605
## 151 MM 62 18.5668605
## 152 MM 63 19.5668605
## 153 Mm 48 4.5668605
## 154 Mm 59 15.5668605
## 155 mm 8 -35.4331395
## 156 mm 12 -31.4331395
## 157 Mm 62 18.5668605
## 158 MM 61 17.5668605
## 159 MM 64 20.5668605
## 160 Mm 61 17.5668605
## 161 Mm 31 -12.4331395
## 162 mm 11 -32.4331395
## 163 Mm 24 -19.4331395
## 164 Mm 26 -17.4331395
## 165 mm 14 -29.4331395
## 166 Mm 64 20.5668605
## 167 mm 12 -31.4331395
## 168 Mm 25 -18.4331395
## 169 Mm 49 5.5668605
## 170 MM 61 17.5668605
## 171 Mm 60 16.5668605
## 172 MM 65 21.5668605
## 173 Mm 61 17.5668605
## 174 MM 64 20.5668605
## 175 MM 63 19.5668605
## 176 Mm 62 18.5668605
## 177 mm 8 -35.4331395
## 178 MM 65 21.5668605
## 179 mm 12 -31.4331395
## 180 Mm 22 -21.4331395
## 181 Mm 48 4.5668605
## 182 Mm 68 24.5668605
## 183 Mm 42 -1.4331395
## 184 Mm 63 19.5668605
## 185 Mm 62 18.5668605
## 186 Mm 27 -16.4331395
## 187 mm 10 -33.4331395
## 188 Mm 61 17.5668605
## 189 MM 63 19.5668605
## 190 Mm 29 -14.4331395
## 191 Mm 62 18.5668605
## 192 MM 64 20.5668605
## 193 mm 9 -34.4331395
## 194 MM 42 -1.4331395
## 195 Mm 35 -8.4331395
## 196 mm 10 -33.4331395
## 197 Mm 62 18.5668605
## 198 Mm 61 17.5668605
## 199 Mm 62 18.5668605
## 200 Mm 51 7.5668605
## 201 Mm 59 15.5668605
## 202 Mm 45 1.5668605
## 203 Mm 62 18.5668605
## 204 Mm 60 16.5668605
## 205 Mm 63 19.5668605
## 206 Mm 60 16.5668605
## 207 MM 62 18.5668605
## 208 MM 64 20.5668605
## 209 Mm 29 -14.4331395
## 210 Mm 63 19.5668605
## 211 mm 14 -29.4331395
## 212 Mm 21 -22.4331395
## 213 mm 10 -33.4331395
## 214 mm 8 -35.4331395
## 215 Mm 64 20.5668605
## 216 MM 63 19.5668605
## 217 Mm 60 16.5668605
## 218 Mm 51 7.5668605
## 219 Mm 58 14.5668605
## 220 Mm 45 1.5668605
## 221 mm 14 -29.4331395
## 222 mm 10 -33.4331395
## 223 Mm 39 -4.4331395
## 224 Mm 49 5.5668605
## 225 Mm 13 -30.4331395
## 226 mm 14 -29.4331395
## 227 MM 65 21.5668605
## 228 MM 63 19.5668605
## 229 MM 69 25.5668605
## 230 Mm 61 17.5668605
## 231 Mm 62 18.5668605
## 232 mm 12 -31.4331395
## 233 MM 62 18.5668605
## 234 MM 63 19.5668605
## 235 MM 62 18.5668605
## 236 MM 63 19.5668605
## 237 Mm 54 10.5668605
## 238 Mm 60 16.5668605
## 239 MM 64 20.5668605
## 240 Mm 41 -2.4331395
## 241 MM 62 18.5668605
## 242 mm 11 -32.4331395
## 243 mm 12 -31.4331395
## 244 Mm 61 17.5668605
## 245 Mm 61 17.5668605
## 246 Mm 64 20.5668605
## 247 MM 67 23.5668605
## 248 mm 12 -31.4331395
## 249 mm 14 -29.4331395
## 250 MM 66 22.5668605
## 251 mm 12 -31.4331395
## 252 Mm 54 10.5668605
## 253 MM 60 16.5668605
## 254 Mm 64 20.5668605
## 255 Mm 40 -3.4331395
## 256 mm 12 -31.4331395
## 257 mm 9 -34.4331395
## 258 Mm 59 15.5668605
## 259 Mm 45 1.5668605
## 260 MM 66 22.5668605
## 261 mm 11 -32.4331395
## 262 Mm 34 -9.4331395
## 263 Mm 62 18.5668605
## 264 mm 14 -29.4331395
## 265 Mm 52 8.5668605
## 266 Mm 17 -26.4331395
## 267 Mm 64 20.5668605
## 268 Mm 51 7.5668605
## 269 mm 9 -34.4331395
## 270 MM 56 12.5668605
## 271 Mm 57 13.5668605
## 272 mm 9 -34.4331395
## 273 Mm 62 18.5668605
## 274 MM 62 18.5668605
## 275 Mm 60 16.5668605
## 276 mm 11 -32.4331395
## 277 Mm 61 17.5668605
## 278 Mm 24 -19.4331395
## 279 MM 62 18.5668605
## 280 mm 12 -31.4331395
## 281 mm 10 -33.4331395
## 282 Mm 60 16.5668605
## 283 MM 64 20.5668605
## 284 Mm 62 18.5668605
## 285 Mm 53 9.5668605
## 286 mm 10 -33.4331395
## 287 MM 65 21.5668605
## 288 mm 11 -32.4331395
## 289 Mm 64 20.5668605
## 290 Mm 37 -6.4331395
## 291 Mm 54 10.5668605
## 292 MM 62 18.5668605
## 293 mm 11 -32.4331395
## 294 MM 65 21.5668605
## 295 Mm 62 18.5668605
## 296 Mm 60 16.5668605
## 297 mm 14 -29.4331395
## 298 MM 62 18.5668605
## 299 MM 62 18.5668605
## 300 mm 13 -30.4331395
## 301 MM 63 19.5668605
## 302 MM 64 20.5668605
## 303 MM 64 20.5668605
## 304 Mm 24 -19.4331395
## 305 Mm 23 -20.4331395
## 306 MM 63 19.5668605
## 307 mm 11 -32.4331395
## 308 mm 9 -34.4331395
## 309 MM 64 20.5668605
## 310 MM 62 18.5668605
## 311 mm 12 -31.4331395
## 312 Mm 41 -2.4331395
## 313 Mm 61 17.5668605
## 314 Mm 59 15.5668605
## 315 Mm 59 15.5668605
## 316 MM 57 13.5668605
## 317 mm 14 -29.4331395
## 318 Mm 12 -31.4331395
## 319 Mm 20 -23.4331395
## 320 Mm 42 -1.4331395
## 321 Mm 10 -33.4331395
## 322 MM 62 18.5668605
## 323 Mm 47 3.5668605
## 324 mm 11 -32.4331395
## 325 Mm 64 20.5668605
## 326 Mm 64 20.5668605
## 327 Mm 42 -1.4331395
## 328 Mm 61 17.5668605
## 329 Mm 26 -17.4331395
## 330 Mm 27 -16.4331395
## 331 mm 12 -31.4331395
## 332 MM 61 17.5668605
## 333 mm 11 -32.4331395
## 334 mm 6 -37.4331395
## 335 Mm 64 20.5668605
## 336 Mm 62 18.5668605
## 337 Mm 61 17.5668605
## 338 mm 14 -29.4331395
## 339 Mm 60 16.5668605
## 340 mm 8 -35.4331395
## 341 MM 62 18.5668605
## 342 mm 14 -29.4331395
## 343 Mm 60 16.5668605
## 344 mm 11 -32.4331395
# Let's meet the chain operator %>%. This allows us to pass data from one
# statement to another.
stickles %>%
filter(genotype == 'mm') %>%
mutate(diff = no.plates - mean(no.plates)) %>%
ggplot(data = ., aes(x = diff)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Plot the difference between the mean and each value of plates per fish
mutate(stickles, diff = no.plates - mean(no.plates)) %>%
ggplot(data = ., aes(x = diff)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Using group_by and summarise to get summary statistics for different groups.
mean(stickles$no.plates)
## [1] 43.43314
mm <- filter(stickles, genotype == 'mm')
head(mm)
## genotype no.plates
## 1 mm 11
## 2 mm 14
## 3 mm 11
## 4 mm 8
## 5 mm 12
## 6 mm 11
mean(mm$no.plates)
## [1] 11.67045
stickles %>%
group_by(genotype) %>%
summarise(avg_plates = mean(no.plates))
## Source: local data frame [3 x 2]
##
## genotype avg_plates
## (chr) (dbl)
## 1 mm 11.67045
## 2 Mm 50.37931
## 3 MM 62.78049
str(range(stickles$no.plates))
## int [1:2] 6 69
Lab write-up questions:
Question 1:
For each of the 3 groups in the stickleback data, compare the mean and median values that you expect given the plots. For each plot, do you expect the mean and median to be similar, the mean to be larger than the median, or the median to be larger than the mean? Why? Which of the mean or median is the best metric for each of the groups?
Question 2:
Calculate the squared difference (of the number of plates) for the homozygote dominant subpopulation. Plot a histogram of this squared difference value.
Question 3:
For each of the 3 genotypes in the stickleback data, calculate the mean, standard deviation, median, min, and max values.