rm(list = ls())
set.seed(2024)
library(here)
library(phyloseq)
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.5.2
## Warning: package 'tibble' was built under R version 4.5.2
## Warning: package 'tidyr' was built under R version 4.5.2
## Warning: package 'readr' was built under R version 4.5.2
## Warning: package 'purrr' was built under R version 4.5.2
## Warning: package 'dplyr' was built under R version 4.5.2
## Warning: package 'lubridate' was built under R version 4.5.2
library(ggh4x)
theme_set(theme_bw())
Both pilot and following study data are loaded
ps <- read_rds(here('data','following_study','ps.rds'))
ps.pilot <- read_rds(here('data','pilot_study','ps.rds'))
sam <- data.frame(sample_data(ps))
Check basic information of the current study.
Does breed groups of dogs evenly distributed between IE and control group?
tb <- sam %>%
filter(!is.na(Breed.Group..1.), is.na(Breed.Group..2.)) %>%
xtabs(~Breed.Group..1. + Epileptic.or.Control, data = .)
tb
## Epileptic.or.Control
## Breed.Group..1. Control Epileptic
## Herder 8 9
## Pointer Spaniel 10 9
## Retriever 11 11
## Scent Hound 2 3
## Sight Hound 1 2
## Sled Dog 3 4
## Terrier 1 2
chisq.test(tb)
## Warning in chisq.test(tb): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: tb
## X-squared = 0.91298, df = 6, p-value = 0.9887
Get distribution of age between IE and control dogs.
sam %>%
group_by(Epileptic.or.Control) %>%
summarise(mean= mean(Age..months.), `standard deviation` = sd(Age..months.))
## # A tibble: 2 × 3
## Epileptic.or.Control mean `standard deviation`
## <fct> <dbl> <dbl>
## 1 Control 75.7 34.7
## 2 Epileptic 75.7 37.0
t.test(Age..months. ~ Epileptic.or.Control, data = sam)
##
## Welch Two Sample t-test
##
## data: Age..months. by Epileptic.or.Control
## t = -0.005633, df = 95.582, p-value = 0.9955
## alternative hypothesis: true difference in means between group Control and group Epileptic is not equal to 0
## 95 percent confidence interval:
## -14.42475 14.34312
## sample estimates:
## mean in group Control mean in group Epileptic
## 75.65306 75.69388
Does sex of dogs evenly distributed between IE and control group?
tb <- xtabs(~Sex + Epileptic.or.Control, data = sam)
tb
## Epileptic.or.Control
## Sex Control Epileptic
## F 33 25
## M 16 24
chisq.test(tb)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: tb
## X-squared = 2.0698, df = 1, p-value = 0.1502
Distribution of phenobarbital administration in IE dogs
tb <- xtabs(~Pheno.Y.N + Epileptic.or.Control, data = sam)
tb
## Epileptic.or.Control
## Pheno.Y.N Control Epileptic
## No 49 30
## Yes 0 19
Does Phenobarbital Administration effective?
tb <- sam %>%
filter(Epileptic.or.Control == 'Epileptic') %>%
xtabs(~Pheno.Y.N + Seizure.Freedom..Y.N., data = .)
tb
## Seizure.Freedom..Y.N.
## Pheno.Y.N No Yes
## No 24 5
## Yes 8 11
chisq.test(tb)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: tb
## X-squared = 6.8058, df = 1, p-value = 0.009086
16S rRNA sequencing taxonomy composition between pilot and current studies at phylum level.
sample_data(ps.pilot) <- sample_data(ps.pilot) |>
data.frame() |>
mutate(Epileptic.or.Control = if_else(Condition == 'Epilepsy',
'Epileptic', 'Control')) |>
sample_data()
ps.all <- merge_phyloseq(ps, ps.pilot) %>%
tax_glom('Phylum') %>%
transform_sample_counts(function(x) x / sum(x)) %>% # transform to proportional
psmelt()
ps.all$Phylum <- as.character(ps.all$Phylum)
ps.all$Phylum[ps.all$Abundance < 0.01] <- 'Others'
ps.all$study <- if_else(ps.all$study == 'present', 'present study', 'pilot study')
ps.all$study <- factor(ps.all$study, levels = c('pilot study', 'present study'))
ps.all <- ps.all |>
mutate(group = if_else(Epileptic.or.Control == 'Epileptic',
'IE','Ctrl'))
ggplot(data = ps.all) +
geom_col(aes(Sample, Abundance, fill = Phylum),position = "fill", width = 1) +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
panel.spacing.x = unit(0, "lines")) +
scale_y_continuous(labels = scales::percent) +
facet_nested(. ~ study + group, scales = "free_x", space = "free_x") +
labs(y = 'Relative Abundance', x = 'Sample', fill = 'Phylum')
ggsave(here('figures','Figure1.png'), width = 170, units = "mm")
## Saving 170 x 127 mm image
ggsave(here('figures','Figure1.pdf'), width = 170, units = "mm")
## Saving 170 x 127 mm image
phylum.composition <- ps.all %>%
split(.$study) %>%
lapply(function(x) x %>%
group_by(Sample,Phylum) %>%
summarise(Abundance = sum(Abundance)) %>%
arrange(desc(Abundance)) %>%
pivot_wider(names_from = Phylum, values_from = Abundance, values_fill = 0))
## `summarise()` has regrouped the output.
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by Sample and Phylum.
## ℹ Output is grouped by Sample.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(Sample, Phylum))` for per-operation grouping
## (`?dplyr::dplyr_by`) instead.
phylum.composition %>% head()
## $`pilot study`
## # A tibble: 28 × 8
## # Groups: Sample [28]
## Sample Firmicutes Actinobacteriota Bacteroidota Fusobacteriota Proteobacteria
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 018Oa… 0.928 0 0.0602 0 0
## 2 020Pe… 0.917 0 0.0833 0 0
## 3 015Ea… 0.915 0 0.0580 0.0217 0
## 4 017Ha… 0.906 0 0.0852 0 0
## 5 021Fr… 0.884 0.0260 0.0661 0 0.0191
## 6 008To… 0.868 0.0301 0.0760 0.0259 0
## 7 019Jo… 0.856 0.0654 0.0668 0.0115 0
## 8 022Ar… 0.844 0.0670 0.0848 0 0
## 9 012Je… 0.837 0.0395 0.0946 0.0138 0.0141
## 10 001Lu… 0.836 0.124 0.0319 0 0
## # ℹ 18 more rows
## # ℹ 2 more variables: Campylobacterota <dbl>, Others <dbl>
##
## $`present study`
## # A tibble: 98 × 8
## # Groups: Sample [98]
## Sample Firmicutes Bacteroidota Actinobacteriota Fusobacteriota Proteobacteria
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Netti… 0.994 0 0 0 0
## 2 Netti… 0.976 0 0.0197 0 0
## 3 Netti… 0.975 0 0.0233 0 0
## 4 Netti… 0.957 0 0.0419 0 0
## 5 Netti… 0.956 0 0.0323 0 0.0116
## 6 Netti… 0.943 0 0.0519 0 0
## 7 Netti… 0.917 0 0.0729 0 0
## 8 Netti… 0.902 0 0.0294 0 0.0650
## 9 Netti… 0.901 0.0152 0.0693 0.0131 0
## 10 Netti… 0.881 0.0248 0.0809 0 0
## # ℹ 88 more rows
## # ℹ 2 more variables: Campylobacterota <dbl>, Others <dbl>
phylum.composition %>% lapply(summary)
## $`pilot study`
## Sample Firmicutes Actinobacteriota Bacteroidota
## Length:28 Min. :0.3244 Min. :0.00000 Min. :0.02085
## Class :character 1st Qu.:0.7173 1st Qu.:0.02589 1st Qu.:0.06452
## Mode :character Median :0.7719 Median :0.06968 Median :0.07679
## Mean :0.7632 Mean :0.09480 Mean :0.09150
## 3rd Qu.:0.8470 3rd Qu.:0.12981 3rd Qu.:0.11050
## Max. :0.9277 Max. :0.37715 Max. :0.24079
## Fusobacteriota Proteobacteria Campylobacterota Others
## Min. :0.00000 Min. :0.000000 Min. :0.000000 Min. :0.000000
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.000000 1st Qu.:0.001151
## Median :0.01471 Median :0.000000 Median :0.000000 Median :0.004330
## Mean :0.03272 Mean :0.009954 Mean :0.003112 Mean :0.004756
## 3rd Qu.:0.06827 3rd Qu.:0.018931 3rd Qu.:0.000000 3rd Qu.:0.008200
## Max. :0.11172 Max. :0.064716 Max. :0.035429 Max. :0.012866
##
## $`present study`
## Sample Firmicutes Bacteroidota Actinobacteriota
## Length:98 Min. :0.1972 Min. :0.00000 Min. :0.00000
## Class :character 1st Qu.:0.7049 1st Qu.:0.00000 1st Qu.:0.07739
## Mode :character Median :0.7972 Median :0.02385 Median :0.11842
## Mean :0.7611 Mean :0.05750 Mean :0.12149
## 3rd Qu.:0.8463 3rd Qu.:0.05977 3rd Qu.:0.15228
## Max. :0.9943 Max. :0.48026 Max. :0.34529
## Fusobacteriota Proteobacteria Campylobacterota Others
## Min. :0.00000 Min. :0.00000 Min. :0.000000 Min. :0.000000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.001470
## Median :0.02206 Median :0.00000 Median :0.000000 Median :0.004220
## Mean :0.04054 Mean :0.01094 Mean :0.002935 Mean :0.005537
## 3rd Qu.:0.05942 3rd Qu.:0.01406 3rd Qu.:0.000000 3rd Qu.:0.008369
## Max. :0.24561 Max. :0.13216 Max. :0.108978 Max. :0.020336
Pilot study
genus.ps <- ps.pilot %>% subset_taxa(Phylum == 'Actinobacteriota') %>%
transform_sample_counts(function(x) x / sum(x)) %>%
psmelt()
genus.ps$Genus[genus.ps$Abundance < 0.01] <- 'Others'
genus.ps %>%
group_by(Sample, Genus) %>%
summarise(Abundance = sum(Abundance), .groups = 'drop') %>%
group_by(Genus) %>%
summarise(mean = sum(Abundance)/26, `standard deviation` = sd(Abundance)) %>%
arrange(desc(mean))
## # A tibble: 13 × 3
## Genus mean `standard deviation`
## <chr> <dbl> <dbl>
## 1 Collinsella 0.934 0.114
## 2 <NA> 0.0331 NA
## 3 Parvibacter 0.0253 0.227
## 4 Slackia 0.0243 0.0803
## 5 Corynebacterium 0.0158 0.275
## 6 Actinomyces 0.0154 NA
## 7 Others 0.0100 0.0101
## 8 Leucobacter 0.00769 NA
## 9 Bifidobacterium 0.00585 0.0786
## 10 Paeniglutamicibacter 0.00331 NA
## 11 Sanguibacter-Flavimobilis 0.00165 NA
## 12 Enterorhabdus 0.000479 NA
## 13 Libanicoccus 0.000469 NA
Current study
genus.ps <- ps %>% subset_taxa(Phylum == 'Actinobacteriota') %>%
transform_sample_counts(function(x) x / sum(x)) %>%
psmelt()
genus.ps$Genus[genus.ps$Abundance < 0.01] <- 'Others'
genus.ps %>%
group_by(Sample, Genus) %>%
summarise(Abundance = sum(Abundance), .groups = 'drop') %>%
group_by(Genus) %>%
summarise(mean = sum(Abundance)/98, `standard deviation` = sd(Abundance)) %>%
arrange(desc(mean))
## # A tibble: 13 × 3
## Genus mean `standard deviation`
## <chr> <dbl> <dbl>
## 1 Collinsella 0.803 0.156
## 2 Slackia 0.110 0.142
## 3 Bifidobacterium 0.0535 0.284
## 4 Parvibacter 0.0112 0.0431
## 5 Others 0.0109 0.0140
## 6 Actinomyces 0.00513 0.341
## 7 Leucobacter 0.00220 NA
## 8 <NA> 0.00205 0.0298
## 9 Corynebacterium 0.00129 0.0631
## 10 Raoultibacter 0.000325 NA
## 11 Pseudopropionibacterium 0.000157 NA
## 12 Eggerthella 0.000123 NA
## 13 Asaccharobacter 0.000113 NA
Pilot study
genus.ps <- ps.pilot %>% subset_taxa(Phylum == 'Firmicutes') %>%
transform_sample_counts(function(x) x / sum(x)) %>%
psmelt()
genus.ps$Genus[genus.ps$Abundance < 0.01] <- 'Others'
genus.ps %>%
group_by(Sample, Genus) %>%
summarise(Abundance = sum(Abundance), .groups = 'drop') %>%
group_by(Genus) %>%
summarise(mean = sum(Abundance)/26, `standard deviation` = sd(Abundance)) %>%
arrange(desc(mean))
## # A tibble: 39 × 3
## Genus mean `standard deviation`
## <chr> <dbl> <dbl>
## 1 Megamonas 0.255 0.185
## 2 Blautia 0.152 0.115
## 3 [Ruminococcus] gnavus group 0.124 0.0997
## 4 Others 0.100 0.0437
## 5 Lachnoclostridium 0.0541 0.0298
## 6 Faecalibacterium 0.0535 0.0999
## 7 Catenibacterium 0.0513 0.0803
## 8 Dorea 0.0453 0.0439
## 9 Holdemanella 0.0369 0.0695
## 10 Erysipelatoclostridium 0.0277 0.0484
## # ℹ 29 more rows
Current study
genus.ps <- ps %>% subset_taxa(Phylum == 'Firmicutes') %>%
transform_sample_counts(function(x) x / sum(x)) %>%
psmelt()
genus.ps$Genus[genus.ps$Abundance < 0.01] <- 'Others'
genus.ps %>%
group_by(Sample, Genus) %>%
summarise(Abundance = sum(Abundance), .groups = 'drop') %>%
group_by(Genus) %>%
summarise(mean = sum(Abundance)/98, `standard deviation` = sd(Abundance)) %>%
arrange(desc(mean))
## # A tibble: 51 × 3
## Genus mean `standard deviation`
## <chr> <dbl> <dbl>
## 1 Peptoclostridium 0.371 0.130
## 2 Blautia 0.215 0.102
## 3 Others 0.0973 0.0380
## 4 Streptococcus 0.0399 0.136
## 5 [Ruminococcus] gnavus group 0.0364 0.0394
## 6 Dorea 0.0316 0.0396
## 7 Allobaculum 0.0212 0.0335
## 8 Turicibacter 0.0193 0.0941
## 9 Holdemanella 0.0167 0.0260
## 10 Catenibacterium 0.0155 0.0230
## # ℹ 41 more rows
sessioninfo::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.5.1 (2025-06-13)
## os macOS Tahoe 26.3
## system aarch64, darwin20
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/New_York
## date 2026-02-23
## pandoc 3.8.3 @ /opt/homebrew/bin/ (via rmarkdown)
## quarto 1.8.25 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/quarto
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date (UTC) lib source
## ade4 1.7-23 2025-02-14 [1] CRAN (R 4.5.0)
## ape 5.8-1 2024-12-16 [1] CRAN (R 4.5.0)
## Biobase 2.70.0 2025-10-29 [1] Bioconductor 3.22 (R 4.5.1)
## BiocGenerics 0.56.0 2025-10-29 [1] Bioconductor 3.22 (R 4.5.1)
## biomformat 1.38.0 2025-10-29 [1] Bioconductor 3.22 (R 4.5.1)
## Biostrings 2.78.0 2025-10-29 [1] Bioconductor 3.22 (R 4.5.1)
## bslib 0.10.0 2026-01-26 [1] CRAN (R 4.5.2)
## cachem 1.1.0 2024-05-16 [1] CRAN (R 4.5.0)
## cli 3.6.5 2025-04-23 [1] CRAN (R 4.5.0)
## cluster 2.1.8.2 2026-02-05 [1] CRAN (R 4.5.2)
## codetools 0.2-20 2024-03-31 [1] CRAN (R 4.5.1)
## crayon 1.5.3 2024-06-20 [1] CRAN (R 4.5.0)
## data.table 1.18.2.1 2026-01-27 [1] CRAN (R 4.5.2)
## digest 0.6.39 2025-11-19 [1] CRAN (R 4.5.2)
## dplyr * 1.2.0 2026-02-03 [1] CRAN (R 4.5.2)
## evaluate 1.0.5 2025-08-27 [1] CRAN (R 4.5.0)
## farver 2.1.2 2024-05-13 [1] CRAN (R 4.5.0)
## fastmap 1.2.0 2024-05-15 [1] CRAN (R 4.5.0)
## forcats * 1.0.1 2025-09-25 [1] CRAN (R 4.5.0)
## foreach 1.5.2 2022-02-02 [1] CRAN (R 4.5.0)
## generics 0.1.4 2025-05-09 [1] CRAN (R 4.5.0)
## ggh4x * 0.3.1 2025-05-30 [1] CRAN (R 4.5.0)
## ggplot2 * 4.0.2 2026-02-03 [1] CRAN (R 4.5.2)
## glue 1.8.0 2024-09-30 [1] CRAN (R 4.5.0)
## gtable 0.3.6 2024-10-25 [1] CRAN (R 4.5.0)
## here * 1.0.2 2025-09-15 [1] CRAN (R 4.5.0)
## hms 1.1.4 2025-10-17 [1] CRAN (R 4.5.0)
## htmltools 0.5.9 2025-12-04 [1] CRAN (R 4.5.2)
## igraph 2.2.2 2026-02-12 [1] CRAN (R 4.5.2)
## IRanges 2.44.0 2025-10-29 [1] Bioconductor 3.22 (R 4.5.1)
## iterators 1.0.14 2022-02-05 [1] CRAN (R 4.5.0)
## jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.5.0)
## jsonlite 2.0.0 2025-03-27 [1] CRAN (R 4.5.0)
## knitr 1.51 2025-12-20 [1] CRAN (R 4.5.2)
## labeling 0.4.3 2023-08-29 [1] CRAN (R 4.5.0)
## lattice 0.22-9 2026-02-09 [1] CRAN (R 4.5.2)
## lifecycle 1.0.5 2026-01-08 [1] CRAN (R 4.5.2)
## lubridate * 1.9.5 2026-02-04 [1] CRAN (R 4.5.2)
## magrittr 2.0.4 2025-09-12 [1] CRAN (R 4.5.0)
## MASS 7.3-65 2025-02-28 [1] CRAN (R 4.5.1)
## Matrix 1.7-4 2025-08-28 [1] CRAN (R 4.5.0)
## mgcv 1.9-4 2025-11-07 [1] CRAN (R 4.5.0)
## multtest 2.66.0 2025-10-29 [1] Bioconductor 3.22 (R 4.5.1)
## nlme 3.1-168 2025-03-31 [1] CRAN (R 4.5.1)
## otel 0.2.0 2025-08-29 [1] CRAN (R 4.5.0)
## permute 0.9-10 2026-02-06 [1] CRAN (R 4.5.2)
## phyloseq * 1.54.0 2025-10-29 [1] Bioconductor 3.22 (R 4.5.1)
## pillar 1.11.1 2025-09-17 [1] CRAN (R 4.5.0)
## pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.5.0)
## plyr 1.8.9 2023-10-02 [1] CRAN (R 4.5.0)
## purrr * 1.2.1 2026-01-09 [1] CRAN (R 4.5.2)
## R6 2.6.1 2025-02-15 [1] CRAN (R 4.5.0)
## ragg 1.5.0 2025-09-02 [1] CRAN (R 4.5.0)
## RColorBrewer 1.1-3 2022-04-03 [1] CRAN (R 4.5.0)
## Rcpp 1.1.1 2026-01-10 [1] CRAN (R 4.5.2)
## readr * 2.2.0 2026-02-19 [1] CRAN (R 4.5.2)
## reshape2 1.4.5 2025-11-12 [1] CRAN (R 4.5.0)
## rhdf5 2.54.1 2025-12-02 [1] https://bioc-release.r-universe.dev (R 4.5.2)
## rhdf5filters 1.22.0 2025-10-29 [1] Bioconductor 3.22 (R 4.5.1)
## Rhdf5lib 1.32.0 2025-10-29 [1] Bioconductor 3.22 (R 4.5.1)
## rlang 1.1.7 2026-01-09 [1] CRAN (R 4.5.2)
## rmarkdown 2.30 2025-09-28 [1] CRAN (R 4.5.0)
## rprojroot 2.1.1 2025-08-26 [1] CRAN (R 4.5.0)
## rstudioapi 0.18.0 2026-01-16 [1] CRAN (R 4.5.2)
## S4Vectors 0.48.0 2025-10-29 [1] Bioconductor 3.22 (R 4.5.1)
## S7 0.2.1 2025-11-14 [1] CRAN (R 4.5.2)
## sass 0.4.10 2025-04-11 [1] CRAN (R 4.5.0)
## scales 1.4.0 2025-04-24 [1] CRAN (R 4.5.0)
## Seqinfo 1.0.0 2025-10-29 [1] Bioconductor 3.22 (R 4.5.1)
## sessioninfo 1.2.3 2025-02-05 [1] CRAN (R 4.5.0)
## stringi 1.8.7 2025-03-27 [1] CRAN (R 4.5.0)
## stringr * 1.6.0 2025-11-04 [1] CRAN (R 4.5.0)
## survival 3.8-6 2026-01-16 [1] CRAN (R 4.5.2)
## systemfonts 1.3.1 2025-10-01 [1] CRAN (R 4.5.0)
## textshaping 1.0.4 2025-10-10 [1] CRAN (R 4.5.0)
## tibble * 3.3.1 2026-01-11 [1] CRAN (R 4.5.2)
## tidyr * 1.3.2 2025-12-19 [1] CRAN (R 4.5.2)
## tidyselect 1.2.1 2024-03-11 [1] CRAN (R 4.5.0)
## tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.5.0)
## timechange 0.4.0 2026-01-29 [1] CRAN (R 4.5.2)
## tzdb 0.5.0 2025-03-15 [1] CRAN (R 4.5.0)
## utf8 1.2.6 2025-06-08 [1] CRAN (R 4.5.0)
## vctrs 0.7.1 2026-01-23 [1] CRAN (R 4.5.2)
## vegan 2.7-2 2025-10-08 [1] CRAN (R 4.5.0)
## withr 3.0.2 2024-10-28 [1] CRAN (R 4.5.0)
## xfun 0.56 2026-01-18 [1] CRAN (R 4.5.2)
## XVector 0.50.0 2025-10-29 [1] Bioconductor 3.22 (R 4.5.1)
## yaml 2.3.12 2025-12-10 [1] CRAN (R 4.5.2)
##
## [1] /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library
## * ── Packages attached to the search path.
##
## ──────────────────────────────────────────────────────────────────────────────