Set up

rm(list = ls())
set.seed(2024)
library(here)
library(phyloseq)
library(tidyverse)

## Warning: package 'ggplot2' was built under R version 4.5.2

## Warning: package 'tibble' was built under R version 4.5.2

## Warning: package 'tidyr' was built under R version 4.5.2

## Warning: package 'readr' was built under R version 4.5.2

## Warning: package 'purrr' was built under R version 4.5.2

## Warning: package 'dplyr' was built under R version 4.5.2

## Warning: package 'lubridate' was built under R version 4.5.2

library(ggh4x)
theme_set(theme_bw())

Read in data

Both pilot and following study data are loaded

ps <- read_rds(here('data','following_study','ps.rds'))
ps.pilot <- read_rds(here('data','pilot_study','ps.rds'))
sam <- data.frame(sample_data(ps))

Basic information

Check basic information of the current study.

Does breed groups of dogs evenly distributed between IE and control group?

tb <- sam %>% 
  filter(!is.na(Breed.Group..1.), is.na(Breed.Group..2.)) %>%
  xtabs(~Breed.Group..1. + Epileptic.or.Control, data = .)
tb

##                  Epileptic.or.Control
## Breed.Group..1.   Control Epileptic
##   Herder                8         9
##   Pointer Spaniel      10         9
##   Retriever            11        11
##   Scent Hound           2         3
##   Sight Hound           1         2
##   Sled Dog              3         4
##   Terrier               1         2

chisq.test(tb)

## Warning in chisq.test(tb): Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  tb
## X-squared = 0.91298, df = 6, p-value = 0.9887

Get distribution of age between IE and control dogs.

sam %>% 
  group_by(Epileptic.or.Control) %>% 
  summarise(mean= mean(Age..months.), `standard deviation` = sd(Age..months.))

## # A tibble: 2 × 3
##   Epileptic.or.Control  mean `standard deviation`
##   <fct>                <dbl>                <dbl>
## 1 Control               75.7                 34.7
## 2 Epileptic             75.7                 37.0

t.test(Age..months. ~ Epileptic.or.Control, data = sam)

## 
##  Welch Two Sample t-test
## 
## data:  Age..months. by Epileptic.or.Control
## t = -0.005633, df = 95.582, p-value = 0.9955
## alternative hypothesis: true difference in means between group Control and group Epileptic is not equal to 0
## 95 percent confidence interval:
##  -14.42475  14.34312
## sample estimates:
##   mean in group Control mean in group Epileptic 
##                75.65306                75.69388

Does sex of dogs evenly distributed between IE and control group?

tb <- xtabs(~Sex + Epileptic.or.Control, data = sam)
tb

##    Epileptic.or.Control
## Sex Control Epileptic
##   F      33        25
##   M      16        24

chisq.test(tb)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  tb
## X-squared = 2.0698, df = 1, p-value = 0.1502

Distribution of phenobarbital administration in IE dogs

tb <- xtabs(~Pheno.Y.N + Epileptic.or.Control, data = sam)
tb

##          Epileptic.or.Control
## Pheno.Y.N Control Epileptic
##       No       49        30
##       Yes       0        19

Does Phenobarbital Administration effective?

tb <- sam %>% 
  filter(Epileptic.or.Control == 'Epileptic') %>% 
  xtabs(~Pheno.Y.N + Seizure.Freedom..Y.N., data = .)
tb

##          Seizure.Freedom..Y.N.
## Pheno.Y.N No Yes
##       No  24   5
##       Yes  8  11

chisq.test(tb)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  tb
## X-squared = 6.8058, df = 1, p-value = 0.009086

Taxonomy Composition between the Pilot and Current Study

Bar plot

16S rRNA sequencing taxonomy composition between pilot and current studies at phylum level.

sample_data(ps.pilot) <- sample_data(ps.pilot) |> 
    data.frame() |> 
    mutate(Epileptic.or.Control = if_else(Condition == 'Epilepsy',
                                          'Epileptic', 'Control')) |> 
    sample_data()
ps.all <- merge_phyloseq(ps, ps.pilot) %>% 
  tax_glom('Phylum') %>% 
  transform_sample_counts(function(x) x / sum(x)) %>% # transform to proportional
  psmelt()
ps.all$Phylum <- as.character(ps.all$Phylum)
ps.all$Phylum[ps.all$Abundance < 0.01] <- 'Others'
ps.all$study <- if_else(ps.all$study == 'present', 'present study', 'pilot study')
ps.all$study <- factor(ps.all$study, levels = c('pilot study', 'present study'))
ps.all <- ps.all |> 
    mutate(group = if_else(Epileptic.or.Control == 'Epileptic',
                           'IE','Ctrl'))

ggplot(data = ps.all) + 
  geom_col(aes(Sample, Abundance, fill = Phylum),position = "fill", width = 1) +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        panel.spacing.x = unit(0, "lines"))  +
  scale_y_continuous(labels = scales::percent) +
  facet_nested(. ~ study + group, scales = "free_x", space = "free_x") +
  labs(y = 'Relative Abundance', x = 'Sample', fill = 'Phylum')

ggsave(here('figures','Figure1.png'), width = 170, units = "mm")

## Saving 170 x 127 mm image

ggsave(here('figures','Figure1.pdf'), width = 170, units = "mm")

## Saving 170 x 127 mm image

Taxonomy distribution at phylum level

phylum.composition <- ps.all %>% 
  split(.$study) %>%
  lapply(function(x) x %>% 
           group_by(Sample,Phylum) %>%
           summarise(Abundance = sum(Abundance)) %>%
           arrange(desc(Abundance)) %>% 
           pivot_wider(names_from = Phylum, values_from = Abundance, values_fill = 0))

## `summarise()` has regrouped the output.
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by Sample and Phylum.
## ℹ Output is grouped by Sample.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(Sample, Phylum))` for per-operation grouping
##   (`?dplyr::dplyr_by`) instead.

phylum.composition %>% head()

## $`pilot study`
## # A tibble: 28 × 8
## # Groups:   Sample [28]
##    Sample Firmicutes Actinobacteriota Bacteroidota Fusobacteriota Proteobacteria
##    <chr>       <dbl>            <dbl>        <dbl>          <dbl>          <dbl>
##  1 018Oa…      0.928           0            0.0602         0              0     
##  2 020Pe…      0.917           0            0.0833         0              0     
##  3 015Ea…      0.915           0            0.0580         0.0217         0     
##  4 017Ha…      0.906           0            0.0852         0              0     
##  5 021Fr…      0.884           0.0260       0.0661         0              0.0191
##  6 008To…      0.868           0.0301       0.0760         0.0259         0     
##  7 019Jo…      0.856           0.0654       0.0668         0.0115         0     
##  8 022Ar…      0.844           0.0670       0.0848         0              0     
##  9 012Je…      0.837           0.0395       0.0946         0.0138         0.0141
## 10 001Lu…      0.836           0.124        0.0319         0              0     
## # ℹ 18 more rows
## # ℹ 2 more variables: Campylobacterota <dbl>, Others <dbl>
## 
## $`present study`
## # A tibble: 98 × 8
## # Groups:   Sample [98]
##    Sample Firmicutes Bacteroidota Actinobacteriota Fusobacteriota Proteobacteria
##    <chr>       <dbl>        <dbl>            <dbl>          <dbl>          <dbl>
##  1 Netti…      0.994       0                0              0              0     
##  2 Netti…      0.976       0                0.0197         0              0     
##  3 Netti…      0.975       0                0.0233         0              0     
##  4 Netti…      0.957       0                0.0419         0              0     
##  5 Netti…      0.956       0                0.0323         0              0.0116
##  6 Netti…      0.943       0                0.0519         0              0     
##  7 Netti…      0.917       0                0.0729         0              0     
##  8 Netti…      0.902       0                0.0294         0              0.0650
##  9 Netti…      0.901       0.0152           0.0693         0.0131         0     
## 10 Netti…      0.881       0.0248           0.0809         0              0     
## # ℹ 88 more rows
## # ℹ 2 more variables: Campylobacterota <dbl>, Others <dbl>

phylum.composition %>% lapply(summary)

## $`pilot study`
##     Sample            Firmicutes     Actinobacteriota   Bacteroidota    
##  Length:28          Min.   :0.3244   Min.   :0.00000   Min.   :0.02085  
##  Class :character   1st Qu.:0.7173   1st Qu.:0.02589   1st Qu.:0.06452  
##  Mode  :character   Median :0.7719   Median :0.06968   Median :0.07679  
##                     Mean   :0.7632   Mean   :0.09480   Mean   :0.09150  
##                     3rd Qu.:0.8470   3rd Qu.:0.12981   3rd Qu.:0.11050  
##                     Max.   :0.9277   Max.   :0.37715   Max.   :0.24079  
##  Fusobacteriota    Proteobacteria     Campylobacterota       Others        
##  Min.   :0.00000   Min.   :0.000000   Min.   :0.000000   Min.   :0.000000  
##  1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.001151  
##  Median :0.01471   Median :0.000000   Median :0.000000   Median :0.004330  
##  Mean   :0.03272   Mean   :0.009954   Mean   :0.003112   Mean   :0.004756  
##  3rd Qu.:0.06827   3rd Qu.:0.018931   3rd Qu.:0.000000   3rd Qu.:0.008200  
##  Max.   :0.11172   Max.   :0.064716   Max.   :0.035429   Max.   :0.012866  
## 
## $`present study`
##     Sample            Firmicutes      Bacteroidota     Actinobacteriota 
##  Length:98          Min.   :0.1972   Min.   :0.00000   Min.   :0.00000  
##  Class :character   1st Qu.:0.7049   1st Qu.:0.00000   1st Qu.:0.07739  
##  Mode  :character   Median :0.7972   Median :0.02385   Median :0.11842  
##                     Mean   :0.7611   Mean   :0.05750   Mean   :0.12149  
##                     3rd Qu.:0.8463   3rd Qu.:0.05977   3rd Qu.:0.15228  
##                     Max.   :0.9943   Max.   :0.48026   Max.   :0.34529  
##  Fusobacteriota    Proteobacteria    Campylobacterota       Others        
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.000000   Min.   :0.000000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.001470  
##  Median :0.02206   Median :0.00000   Median :0.000000   Median :0.004220  
##  Mean   :0.04054   Mean   :0.01094   Mean   :0.002935   Mean   :0.005537  
##  3rd Qu.:0.05942   3rd Qu.:0.01406   3rd Qu.:0.000000   3rd Qu.:0.008369  
##  Max.   :0.24561   Max.   :0.13216   Max.   :0.108978   Max.   :0.020336

Taxonomy distribution at genus level

Actinobacteriota

Pilot study

genus.ps <- ps.pilot %>% subset_taxa(Phylum == 'Actinobacteriota') %>% 
  transform_sample_counts(function(x) x / sum(x)) %>% 
  psmelt()
genus.ps$Genus[genus.ps$Abundance < 0.01] <- 'Others'
genus.ps %>% 
  group_by(Sample, Genus) %>% 
  summarise(Abundance = sum(Abundance), .groups = 'drop') %>% 
  group_by(Genus) %>% 
  summarise(mean = sum(Abundance)/26, `standard deviation` = sd(Abundance)) %>% 
  arrange(desc(mean))

## # A tibble: 13 × 3
##    Genus                         mean `standard deviation`
##    <chr>                        <dbl>                <dbl>
##  1 Collinsella               0.934                  0.114 
##  2 <NA>                      0.0331                NA     
##  3 Parvibacter               0.0253                 0.227 
##  4 Slackia                   0.0243                 0.0803
##  5 Corynebacterium           0.0158                 0.275 
##  6 Actinomyces               0.0154                NA     
##  7 Others                    0.0100                 0.0101
##  8 Leucobacter               0.00769               NA     
##  9 Bifidobacterium           0.00585                0.0786
## 10 Paeniglutamicibacter      0.00331               NA     
## 11 Sanguibacter-Flavimobilis 0.00165               NA     
## 12 Enterorhabdus             0.000479              NA     
## 13 Libanicoccus              0.000469              NA

Current study

genus.ps <- ps %>% subset_taxa(Phylum == 'Actinobacteriota') %>% 
  transform_sample_counts(function(x) x / sum(x)) %>% 
  psmelt()
genus.ps$Genus[genus.ps$Abundance < 0.01] <- 'Others'
genus.ps %>% 
  group_by(Sample, Genus) %>% 
  summarise(Abundance = sum(Abundance), .groups = 'drop') %>% 
  group_by(Genus) %>% 
  summarise(mean = sum(Abundance)/98, `standard deviation` = sd(Abundance)) %>% 
  arrange(desc(mean))

## # A tibble: 13 × 3
##    Genus                       mean `standard deviation`
##    <chr>                      <dbl>                <dbl>
##  1 Collinsella             0.803                  0.156 
##  2 Slackia                 0.110                  0.142 
##  3 Bifidobacterium         0.0535                 0.284 
##  4 Parvibacter             0.0112                 0.0431
##  5 Others                  0.0109                 0.0140
##  6 Actinomyces             0.00513                0.341 
##  7 Leucobacter             0.00220               NA     
##  8 <NA>                    0.00205                0.0298
##  9 Corynebacterium         0.00129                0.0631
## 10 Raoultibacter           0.000325              NA     
## 11 Pseudopropionibacterium 0.000157              NA     
## 12 Eggerthella             0.000123              NA     
## 13 Asaccharobacter         0.000113              NA

Firmicutes