library(ggplot2)

# Looking at stickleback data.
stickles <- read.csv('Data/03e3SticklebackPlates.csv', stringsAsFactors = FALSE)

# Plotting the frequency distribution of the number of plates on a stickleback 
# (type of fish)
ggplot(stickles, aes(x=no.plates)) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Grouping based on the genotype - homozygous recessive, heterozygous, and 
# homozygous dominant
ggplot(stickles, aes(x=no.plates, fill = genotype)) +
  geom_histogram() + 
  facet_grid(genotype ~ .)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Learning to use a new package, dplyr to select subsets of data
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Select only the mm data
filter(stickles, genotype == 'mm')
##    genotype no.plates
## 1        mm        11
## 2        mm        14
## 3        mm        11
## 4        mm         8
## 5        mm        12
## 6        mm        11
## 7        mm        12
## 8        mm        11
## 9        mm         9
## 10       mm         9
## 11       mm         9
## 12       mm        10
## 13       mm         9
## 14       mm        11
## 15       mm        14
## 16       mm        14
## 17       mm        14
## 18       mm        14
## 19       mm        13
## 20       mm        13
## 21       mm        10
## 22       mm        13
## 23       mm        11
## 24       mm        14
## 25       mm        14
## 26       mm         8
## 27       mm        37
## 28       mm        13
## 29       mm        10
## 30       mm        12
## 31       mm         8
## 32       mm        12
## 33       mm        10
## 34       mm        14
## 35       mm         8
## 36       mm        10
## 37       mm        13
## 38       mm        22
## 39       mm         9
## 40       mm        12
## 41       mm         8
## 42       mm        12
## 43       mm        11
## 44       mm        14
## 45       mm        12
## 46       mm         8
## 47       mm        12
## 48       mm        10
## 49       mm         9
## 50       mm        10
## 51       mm        14
## 52       mm        10
## 53       mm         8
## 54       mm        14
## 55       mm        10
## 56       mm        14
## 57       mm        12
## 58       mm        11
## 59       mm        12
## 60       mm        12
## 61       mm        14
## 62       mm        12
## 63       mm        12
## 64       mm         9
## 65       mm        11
## 66       mm        14
## 67       mm         9
## 68       mm         9
## 69       mm        11
## 70       mm        12
## 71       mm        10
## 72       mm        10
## 73       mm        11
## 74       mm        11
## 75       mm        14
## 76       mm        13
## 77       mm        11
## 78       mm         9
## 79       mm        12
## 80       mm        14
## 81       mm        11
## 82       mm        12
## 83       mm        11
## 84       mm         6
## 85       mm        14
## 86       mm         8
## 87       mm        14
## 88       mm        11
# Operators that allow you to test logical conditions (TRUE/FALSE)
1 == 2
## [1] FALSE
1 == 1
## [1] TRUE
1 >= 2
## [1] FALSE
1 <= 2
## [1] TRUE
1 > 2
## [1] FALSE
1 < 2
## [1] TRUE
1 != 2
## [1] TRUE
# Plot only the mm data
ggplot(data = filter(stickles, genotype == 'mm'), aes(x = no.plates)) + 
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Subset the data and exclude the heterozygote
filter(stickles, genotype != 'Mm')
##     genotype no.plates
## 1         mm        11
## 2         mm        14
## 3         mm        11
## 4         mm         8
## 5         mm        12
## 6         mm        11
## 7         mm        12
## 8         MM        64
## 9         mm        11
## 10        MM        65
## 11        mm         9
## 12        MM        62
## 13        MM        63
## 14        mm         9
## 15        mm         9
## 16        mm        10
## 17        MM        60
## 18        mm         9
## 19        MM        64
## 20        mm        11
## 21        MM        63
## 22        mm        14
## 23        mm        14
## 24        MM        64
## 25        mm        14
## 26        mm        14
## 27        mm        13
## 28        MM        64
## 29        MM        65
## 30        MM        64
## 31        MM        64
## 32        MM        63
## 33        mm        13
## 34        mm        10
## 35        MM        49
## 36        MM        66
## 37        MM        61
## 38        MM        63
## 39        MM        66
## 40        MM        65
## 41        mm        13
## 42        MM        62
## 43        MM        64
## 44        mm        11
## 45        mm        14
## 46        MM        60
## 47        mm        14
## 48        MM        62
## 49        mm         8
## 50        mm        37
## 51        mm        13
## 52        mm        10
## 53        MM        64
## 54        mm        12
## 55        mm         8
## 56        MM        64
## 57        MM        65
## 58        mm        12
## 59        MM        64
## 60        MM        66
## 61        mm        10
## 62        MM        62
## 63        mm        14
## 64        MM        61
## 65        mm         8
## 66        mm        10
## 67        mm        13
## 68        mm        22
## 69        mm         9
## 70        MM        65
## 71        MM        63
## 72        MM        63
## 73        mm        12
## 74        MM        63
## 75        MM        65
## 76        MM        62
## 77        MM        63
## 78        mm         8
## 79        mm        12
## 80        MM        61
## 81        MM        64
## 82        mm        11
## 83        mm        14
## 84        mm        12
## 85        MM        61
## 86        MM        65
## 87        MM        64
## 88        MM        63
## 89        mm         8
## 90        MM        65
## 91        mm        12
## 92        mm        10
## 93        MM        63
## 94        MM        64
## 95        mm         9
## 96        MM        42
## 97        mm        10
## 98        MM        62
## 99        MM        64
## 100       mm        14
## 101       mm        10
## 102       mm         8
## 103       MM        63
## 104       mm        14
## 105       mm        10
## 106       mm        14
## 107       MM        65
## 108       MM        63
## 109       MM        69
## 110       mm        12
## 111       MM        62
## 112       MM        63
## 113       MM        62
## 114       MM        63
## 115       MM        64
## 116       MM        62
## 117       mm        11
## 118       mm        12
## 119       MM        67
## 120       mm        12
## 121       mm        14
## 122       MM        66
## 123       mm        12
## 124       MM        60
## 125       mm        12
## 126       mm         9
## 127       MM        66
## 128       mm        11
## 129       mm        14
## 130       mm         9
## 131       MM        56
## 132       mm         9
## 133       MM        62
## 134       mm        11
## 135       MM        62
## 136       mm        12
## 137       mm        10
## 138       MM        64
## 139       mm        10
## 140       MM        65
## 141       mm        11
## 142       MM        62
## 143       mm        11
## 144       MM        65
## 145       mm        14
## 146       MM        62
## 147       MM        62
## 148       mm        13
## 149       MM        63
## 150       MM        64
## 151       MM        64
## 152       MM        63
## 153       mm        11
## 154       mm         9
## 155       MM        64
## 156       MM        62
## 157       mm        12
## 158       MM        57
## 159       mm        14
## 160       MM        62
## 161       mm        11
## 162       mm        12
## 163       MM        61
## 164       mm        11
## 165       mm         6
## 166       mm        14
## 167       mm         8
## 168       MM        62
## 169       mm        14
## 170       mm        11
# Calculate a new column
mutate(stickles, diff = no.plates - mean(no.plates))
##     genotype no.plates        diff
## 1         mm        11 -32.4331395
## 2         Mm        63  19.5668605
## 3         Mm        22 -21.4331395
## 4         Mm        10 -33.4331395
## 5         mm        14 -29.4331395
## 6         mm        11 -32.4331395
## 7         Mm        58  14.5668605
## 8         Mm        36  -7.4331395
## 9         Mm        31 -12.4331395
## 10        Mm        61  17.5668605
## 11        Mm        63  19.5668605
## 12        mm         8 -35.4331395
## 13        mm        12 -31.4331395
## 14        mm        11 -32.4331395
## 15        Mm        64  20.5668605
## 16        Mm        64  20.5668605
## 17        mm        12 -31.4331395
## 18        Mm        55  11.5668605
## 19        Mm        21 -22.4331395
## 20        MM        64  20.5668605
## 21        Mm        47   3.5668605
## 22        Mm        57  13.5668605
## 23        mm        11 -32.4331395
## 24        Mm        58  14.5668605
## 25        MM        65  21.5668605
## 26        Mm        63  19.5668605
## 27        mm         9 -34.4331395
## 28        MM        62  18.5668605
## 29        Mm        29 -14.4331395
## 30        Mm        10 -33.4331395
## 31        MM        63  19.5668605
## 32        mm         9 -34.4331395
## 33        Mm        61  17.5668605
## 34        Mm        39  -4.4331395
## 35        mm         9 -34.4331395
## 36        Mm        50   6.5668605
## 37        mm        10 -33.4331395
## 38        MM        60  16.5668605
## 39        Mm        60  16.5668605
## 40        mm         9 -34.4331395
## 41        Mm        63  19.5668605
## 42        MM        64  20.5668605
## 43        mm        11 -32.4331395
## 44        Mm        60  16.5668605
## 45        Mm        66  22.5668605
## 46        MM        63  19.5668605
## 47        Mm        63  19.5668605
## 48        mm        14 -29.4331395
## 49        mm        14 -29.4331395
## 50        Mm        59  15.5668605
## 51        Mm        30 -13.4331395
## 52        MM        64  20.5668605
## 53        mm        14 -29.4331395
## 54        mm        14 -29.4331395
## 55        mm        13 -30.4331395
## 56        Mm        62  18.5668605
## 57        MM        64  20.5668605
## 58        MM        65  21.5668605
## 59        Mm        34  -9.4331395
## 60        MM        64  20.5668605
## 61        MM        64  20.5668605
## 62        Mm        62  18.5668605
## 63        MM        63  19.5668605
## 64        Mm        30 -13.4331395
## 65        mm        13 -30.4331395
## 66        mm        10 -33.4331395
## 67        MM        49   5.5668605
## 68        Mm        62  18.5668605
## 69        Mm        43  -0.4331395
## 70        MM        66  22.5668605
## 71        MM        61  17.5668605
## 72        Mm        26 -17.4331395
## 73        MM        63  19.5668605
## 74        Mm        59  15.5668605
## 75        MM        66  22.5668605
## 76        MM        65  21.5668605
## 77        Mm        62  18.5668605
## 78        Mm        63  19.5668605
## 79        Mm        56  12.5668605
## 80        mm        13 -30.4331395
## 81        Mm        62  18.5668605
## 82        Mm        28 -15.4331395
## 83        Mm        61  17.5668605
## 84        Mm        56  12.5668605
## 85        MM        62  18.5668605
## 86        Mm        62  18.5668605
## 87        Mm        39  -4.4331395
## 88        Mm        25 -18.4331395
## 89        Mm        34  -9.4331395
## 90        MM        64  20.5668605
## 91        mm        11 -32.4331395
## 92        Mm        63  19.5668605
## 93        Mm        64  20.5668605
## 94        mm        14 -29.4331395
## 95        MM        60  16.5668605
## 96        Mm        61  17.5668605
## 97        mm        14 -29.4331395
## 98        MM        62  18.5668605
## 99        mm         8 -35.4331395
## 100       mm        37  -6.4331395
## 101       Mm        61  17.5668605
## 102       mm        13 -30.4331395
## 103       mm        10 -33.4331395
## 104       Mm        62  18.5668605
## 105       MM        64  20.5668605
## 106       Mm        63  19.5668605
## 107       mm        12 -31.4331395
## 108       mm         8 -35.4331395
## 109       Mm        58  14.5668605
## 110       Mm        51   7.5668605
## 111       MM        64  20.5668605
## 112       MM        65  21.5668605
## 113       Mm        62  18.5668605
## 114       mm        12 -31.4331395
## 115       Mm        61  17.5668605
## 116       MM        64  20.5668605
## 117       MM        66  22.5668605
## 118       Mm        53   9.5668605
## 119       mm        10 -33.4331395
## 120       Mm        32 -11.4331395
## 121       Mm        24 -19.4331395
## 122       MM        62  18.5668605
## 123       mm        14 -29.4331395
## 124       MM        61  17.5668605
## 125       mm         8 -35.4331395
## 126       mm        10 -33.4331395
## 127       Mm        63  19.5668605
## 128       Mm        62  18.5668605
## 129       Mm        52   8.5668605
## 130       mm        13 -30.4331395
## 131       Mm        32 -11.4331395
## 132       mm        22 -21.4331395
## 133       Mm        60  16.5668605
## 134       mm         9 -34.4331395
## 135       Mm        62  18.5668605
## 136       MM        65  21.5668605
## 137       Mm        61  17.5668605
## 138       Mm        52   8.5668605
## 139       Mm        53   9.5668605
## 140       Mm        51   7.5668605
## 141       MM        63  19.5668605
## 142       Mm        61  17.5668605
## 143       MM        63  19.5668605
## 144       Mm        56  12.5668605
## 145       Mm        57  13.5668605
## 146       Mm        54  10.5668605
## 147       mm        12 -31.4331395
## 148       MM        63  19.5668605
## 149       Mm        47   3.5668605
## 150       MM        65  21.5668605
## 151       MM        62  18.5668605
## 152       MM        63  19.5668605
## 153       Mm        48   4.5668605
## 154       Mm        59  15.5668605
## 155       mm         8 -35.4331395
## 156       mm        12 -31.4331395
## 157       Mm        62  18.5668605
## 158       MM        61  17.5668605
## 159       MM        64  20.5668605
## 160       Mm        61  17.5668605
## 161       Mm        31 -12.4331395
## 162       mm        11 -32.4331395
## 163       Mm        24 -19.4331395
## 164       Mm        26 -17.4331395
## 165       mm        14 -29.4331395
## 166       Mm        64  20.5668605
## 167       mm        12 -31.4331395
## 168       Mm        25 -18.4331395
## 169       Mm        49   5.5668605
## 170       MM        61  17.5668605
## 171       Mm        60  16.5668605
## 172       MM        65  21.5668605
## 173       Mm        61  17.5668605
## 174       MM        64  20.5668605
## 175       MM        63  19.5668605
## 176       Mm        62  18.5668605
## 177       mm         8 -35.4331395
## 178       MM        65  21.5668605
## 179       mm        12 -31.4331395
## 180       Mm        22 -21.4331395
## 181       Mm        48   4.5668605
## 182       Mm        68  24.5668605
## 183       Mm        42  -1.4331395
## 184       Mm        63  19.5668605
## 185       Mm        62  18.5668605
## 186       Mm        27 -16.4331395
## 187       mm        10 -33.4331395
## 188       Mm        61  17.5668605
## 189       MM        63  19.5668605
## 190       Mm        29 -14.4331395
## 191       Mm        62  18.5668605
## 192       MM        64  20.5668605
## 193       mm         9 -34.4331395
## 194       MM        42  -1.4331395
## 195       Mm        35  -8.4331395
## 196       mm        10 -33.4331395
## 197       Mm        62  18.5668605
## 198       Mm        61  17.5668605
## 199       Mm        62  18.5668605
## 200       Mm        51   7.5668605
## 201       Mm        59  15.5668605
## 202       Mm        45   1.5668605
## 203       Mm        62  18.5668605
## 204       Mm        60  16.5668605
## 205       Mm        63  19.5668605
## 206       Mm        60  16.5668605
## 207       MM        62  18.5668605
## 208       MM        64  20.5668605
## 209       Mm        29 -14.4331395
## 210       Mm        63  19.5668605
## 211       mm        14 -29.4331395
## 212       Mm        21 -22.4331395
## 213       mm        10 -33.4331395
## 214       mm         8 -35.4331395
## 215       Mm        64  20.5668605
## 216       MM        63  19.5668605
## 217       Mm        60  16.5668605
## 218       Mm        51   7.5668605
## 219       Mm        58  14.5668605
## 220       Mm        45   1.5668605
## 221       mm        14 -29.4331395
## 222       mm        10 -33.4331395
## 223       Mm        39  -4.4331395
## 224       Mm        49   5.5668605
## 225       Mm        13 -30.4331395
## 226       mm        14 -29.4331395
## 227       MM        65  21.5668605
## 228       MM        63  19.5668605
## 229       MM        69  25.5668605
## 230       Mm        61  17.5668605
## 231       Mm        62  18.5668605
## 232       mm        12 -31.4331395
## 233       MM        62  18.5668605
## 234       MM        63  19.5668605
## 235       MM        62  18.5668605
## 236       MM        63  19.5668605
## 237       Mm        54  10.5668605
## 238       Mm        60  16.5668605
## 239       MM        64  20.5668605
## 240       Mm        41  -2.4331395
## 241       MM        62  18.5668605
## 242       mm        11 -32.4331395
## 243       mm        12 -31.4331395
## 244       Mm        61  17.5668605
## 245       Mm        61  17.5668605
## 246       Mm        64  20.5668605
## 247       MM        67  23.5668605
## 248       mm        12 -31.4331395
## 249       mm        14 -29.4331395
## 250       MM        66  22.5668605
## 251       mm        12 -31.4331395
## 252       Mm        54  10.5668605
## 253       MM        60  16.5668605
## 254       Mm        64  20.5668605
## 255       Mm        40  -3.4331395
## 256       mm        12 -31.4331395
## 257       mm         9 -34.4331395
## 258       Mm        59  15.5668605
## 259       Mm        45   1.5668605
## 260       MM        66  22.5668605
## 261       mm        11 -32.4331395
## 262       Mm        34  -9.4331395
## 263       Mm        62  18.5668605
## 264       mm        14 -29.4331395
## 265       Mm        52   8.5668605
## 266       Mm        17 -26.4331395
## 267       Mm        64  20.5668605
## 268       Mm        51   7.5668605
## 269       mm         9 -34.4331395
## 270       MM        56  12.5668605
## 271       Mm        57  13.5668605
## 272       mm         9 -34.4331395
## 273       Mm        62  18.5668605
## 274       MM        62  18.5668605
## 275       Mm        60  16.5668605
## 276       mm        11 -32.4331395
## 277       Mm        61  17.5668605
## 278       Mm        24 -19.4331395
## 279       MM        62  18.5668605
## 280       mm        12 -31.4331395
## 281       mm        10 -33.4331395
## 282       Mm        60  16.5668605
## 283       MM        64  20.5668605
## 284       Mm        62  18.5668605
## 285       Mm        53   9.5668605
## 286       mm        10 -33.4331395
## 287       MM        65  21.5668605
## 288       mm        11 -32.4331395
## 289       Mm        64  20.5668605
## 290       Mm        37  -6.4331395
## 291       Mm        54  10.5668605
## 292       MM        62  18.5668605
## 293       mm        11 -32.4331395
## 294       MM        65  21.5668605
## 295       Mm        62  18.5668605
## 296       Mm        60  16.5668605
## 297       mm        14 -29.4331395
## 298       MM        62  18.5668605
## 299       MM        62  18.5668605
## 300       mm        13 -30.4331395
## 301       MM        63  19.5668605
## 302       MM        64  20.5668605
## 303       MM        64  20.5668605
## 304       Mm        24 -19.4331395
## 305       Mm        23 -20.4331395
## 306       MM        63  19.5668605
## 307       mm        11 -32.4331395
## 308       mm         9 -34.4331395
## 309       MM        64  20.5668605
## 310       MM        62  18.5668605
## 311       mm        12 -31.4331395
## 312       Mm        41  -2.4331395
## 313       Mm        61  17.5668605
## 314       Mm        59  15.5668605
## 315       Mm        59  15.5668605
## 316       MM        57  13.5668605
## 317       mm        14 -29.4331395
## 318       Mm        12 -31.4331395
## 319       Mm        20 -23.4331395
## 320       Mm        42  -1.4331395
## 321       Mm        10 -33.4331395
## 322       MM        62  18.5668605
## 323       Mm        47   3.5668605
## 324       mm        11 -32.4331395
## 325       Mm        64  20.5668605
## 326       Mm        64  20.5668605
## 327       Mm        42  -1.4331395
## 328       Mm        61  17.5668605
## 329       Mm        26 -17.4331395
## 330       Mm        27 -16.4331395
## 331       mm        12 -31.4331395
## 332       MM        61  17.5668605
## 333       mm        11 -32.4331395
## 334       mm         6 -37.4331395
## 335       Mm        64  20.5668605
## 336       Mm        62  18.5668605
## 337       Mm        61  17.5668605
## 338       mm        14 -29.4331395
## 339       Mm        60  16.5668605
## 340       mm         8 -35.4331395
## 341       MM        62  18.5668605
## 342       mm        14 -29.4331395
## 343       Mm        60  16.5668605
## 344       mm        11 -32.4331395
# Let's meet the chain operator %>%. This allows us to pass data from one 
# statement to another. 
stickles %>%
  filter(genotype == 'mm') %>%
  mutate(diff = no.plates - mean(no.plates)) %>%
  ggplot(data = ., aes(x = diff)) + 
    geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Plot the difference between the mean and each value of plates per fish
mutate(stickles, diff = no.plates - mean(no.plates)) %>% 
  ggplot(data = ., aes(x = diff)) + 
    geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Using group_by and summarise to get summary statistics for different groups.
mean(stickles$no.plates)
## [1] 43.43314
mm <- filter(stickles, genotype == 'mm')
head(mm)
##   genotype no.plates
## 1       mm        11
## 2       mm        14
## 3       mm        11
## 4       mm         8
## 5       mm        12
## 6       mm        11
mean(mm$no.plates)
## [1] 11.67045
stickles %>% 
  group_by(genotype) %>%
  summarise(avg_plates = mean(no.plates))
## Source: local data frame [3 x 2]
## 
##   genotype avg_plates
##      (chr)      (dbl)
## 1       mm   11.67045
## 2       Mm   50.37931
## 3       MM   62.78049
str(range(stickles$no.plates))
##  int [1:2] 6 69

Lab write-up questions:

Question 1:

For each of the 3 groups in the stickleback data, compare the mean and median values that you expect given the plots. For each plot, do you expect the mean and median to be similar, the mean to be larger than the median, or the median to be larger than the mean? Why? Which of the mean or median is the best metric for each of the groups?

Question 2:

Calculate the squared difference (of the number of plates) for the homozygote dominant subpopulation. Plot a histogram of this squared difference value.

Question 3:

For each of the 3 genotypes in the stickleback data, calculate the mean, standard deviation, median, min, and max values.