@article{e2aa657ae09f4d9db7e3c23d2b837b57,
title = "Systems-Based Approach for Optimization of Assembly-Free Bacterial MLST Mapping",
abstract = "Epidemiological surveillance of bacterial pathogens requires real-time data analysis with a fast turnaround, while aiming at generating two main outcomes: (1) species-level identification and (2) variant mapping at different levels of genotypic resolution for population-based tracking and surveillance, in addition to predicting traits such as antimicrobial resistance (AMR). Multi-locus sequence typing (MLST) aids this process by identifying sequence types (ST) based on seven ubiquitous genome-scattered loci. In this paper, we selected one assembly-dependent and one assembly-free method for ST mapping and applied them with the default settings and ST schemes they are distributed with, and systematically assessed their accuracy and scalability across a wide array of phylogenetically divergent Public Health-relevant bacterial pathogens with available MLST databases. Our data show that the optimal k-mer length for stringMLST is species-specific and that genome-intrinsic and-extrinsic features can affect the performance and accuracy of the program. Although suitable parameters could be identified for most organisms, there were instances where this program may not be directly deployable in its current format. Next, we integrated stringMLST into our freely available and scalable hierarchical-based population genomics platform, ProkEvo, and further demonstrated how the implementation facilitates automated, reproducible bacterial population analysis.",
keywords = "MLST, ProkEvo, genomic epidemiology, k-mer lengths, multi-locus sequence typing, parameter-tunning, public health, stringMLST, surveillance, whole-genome sequencing, zoonotic pathogens",
author = "Natasha Pavlovikj and Gomes-Neto, {Joao Carlos} and Deogun, {Jitender S.} and Benson, {Andrew K.}",
note = "Funding Information: Acknowledgments: This work was completed by utilizing the Holland Computing Center of the University of Nebraska, which receives support from the Nebraska Research Initiative, and using resources provided by the Open Science Grid, which is supported by the National Science Foundation and the U.S. Department of Energy{\textquoteright}s Office of Science. This research used the Pegasus Workflow Management Software funded by the National Science Foundation under grant #1664162. This publication made use of the PubMLST website (https://pubmlst.org/ (accessed on 20 February 2022)) developed by Keith Jolley (Jolley & Maiden 2010, BMC Bioinformatics, 11:595) and sited at the University of Oxford. The development of that website was funded by the Wellcome Trust. We would like to greatly thank Mats Rynge for his extensive assistance and valuable suggestions while setting up and running ProkEvo on the Open Science Grid. We also thank Derek Weitzel and Karan Vahi for their technical support. Funding Information: This work was completed by utilizing the Holland Computing Center of the University of Nebraska, which receives support from the Nebraska Research Initiative, and using resources provided by the Open Science Grid, which is supported by the National Science Foundation and the U.S. Department of Energy{\textquoteright}s Office of Science. This research used the Pegasus Workflow Management Software funded by the National Science Foundation under grant #1664162. This publication made use of the PubMLST website (https://pubmlst.org/ (accessed on 20 February 2022)) developed by Keith Jolley (Jolley & Maiden 2010, BMC Bioinformatics, 11:595) and sited at the University of Oxford. The development of that website was funded by the Wellcome Trust. We would like to greatly thank Mats Rynge for his extensive assistance and valuable suggestions while setting up and running ProkEvo on the Open Science Grid. We also thank Derek Weitzel and Karan Vahi for their technical support. Publisher Copyright: {\textcopyright} 2022 by the authors. Licensee MDPI, Basel, Switzerland.",
year = "2022",
month = may,
doi = "10.3390/life12050670",
language = "English (US)",
volume = "12",
journal = "Life",
issn = "0024-3019",
publisher = "Multidisciplinary Digital Publishing Institute (MDPI)",
number = "5",
}