@article{4f8ad0760adf4c74bf2a44fb2a0450e2,
title = "Modeling association in microbial communities with clique loglinear models",
abstract = "There is a growing awareness of the important roles that microbial communities play in complex biological processes. Modern investigation of these often uses next generation sequencing of metagenomic samples to determine community composition. We propose a statistical technique based on clique loglinear models and Bayes model averaging to identify microbial components in a metagenomic sample at various taxonomic levels that have significant associations. We describe the model class, a stochastic search technique for model selection, and the calculation of estimates of posterior probabilities of interest. We demonstrate our approach using data from the Human Microbiome Project and from a study of the skin microbiome in chronic wound healing. Our technique also identifies significant dependencies among microbial components as evidence of possible microbial syntrophy.",
keywords = "Contingency tables, Graphical models, Microbiome, Model selection, Next generation sequencing",
author = "Adrian Dobra and Camilo Valdes and Dragana Ajdic and Bertrand Clarke and Jennifer Clarke",
note = "Funding Information: Human Microbiome Project (HMP) is an ongoing collaborative study funded by the U.S. National Institutes of Health (NIH) to provide data and tools for studying the role of human microbiomes in human health and disease. Started in 2007 it has generated ground-breaking publications [Fierer et al. (2010), Minot et al. (2013), Zhao et al. (2012)] and a plethora of metagenomic data on human microbiomes. Our method from Section 2.4 can represent the associations from an HMP sample with an independence graph so we can infer the bacterial taxa present and their associations. Funding Information: Received January 2018; revised November 2018. 1Supported in part by NSF Grants DMS/MPS-1737746 and DMS-1120255 to University of Washington, and Grants DMS-1410771 and DMS-1419754 to University of Nebraska-Lincoln. 2A. Dobra and C. Valdes contributed equally to this work and are joint first authors. Key words and phrases. Contingency tables, graphical models, model selection, microbiome, next generation sequencing. Publisher Copyright: {\textcopyright} Institute of Mathematical Statistics, 2019.",
year = "2019",
doi = "10.1214/18-AOAS1229",
language = "English (US)",
volume = "13",
pages = "931--957",
journal = "Annals of Applied Statistics",
issn = "1932-6157",
publisher = "Institute of Mathematical Statistics",
number = "2",
}