@article{3472dddc69ac4a63966970c42066f576,
title = "Functional Evolution of Proteins",
abstract = "The functional evolution of proteins advances through gene duplication followed by functional drift, whereas molecular evolution occurs through random mutational events. Over time, protein active-site structures or functional epitopes remain highly conserved, which enables relationships to be inferred between distant orthologs or paralogs. In this study, we present the first functional clustering and evolutionary analysis of the RCSB Protein Data Bank (RCSB PDB) based on similarities between active-site structures. All of the ligand-bound proteins within the RCSB PDB were scored using our Comparison of Protein Active-site Structures (CPASS) software and database (http://cpass.unl.edu/). Principal component analysis was then used to identify 4431 representative structures to construct a phylogenetic tree based on the CPASS comparative scores (http://itol.embl.de/shared/jcatazaro). The resulting phylogenetic tree identified a sequential, step-wise evolution of protein active-sites and provides novel insights into the emergence of protein function or changes in substrate specificity based on subtle changes in geometry and amino acid composition.",
keywords = "CPASS, functional evolution, protein active-sites, proteins",
author = "Jonathan Catazaro and Adam Caprez and David Swanson and Robert Powers",
note = "Funding Information: This work was supported by the National Institute of Allergy and Infectious Diseases (R21AI081154), The Redox Biology Center (P30GM103335, NIGMS); and The Nebraska Center for Integrated Biomolecular Communication (P20GM113126, NIGMS). The research was performed in facilities renovated with support from the National Institutes of Health (RR015468-01). This work was completed utilizing the Holland Computing Center of the University of Nebraska. Funding Information: The PCA scores plots were leveraged to find a representative protein structure for each functional class based on EC number and the type of bound ligand. For each functional class in the PCA scores plot, the protein active-site with the shortest Euclidean distance to the center of the 95% confidence ellipse was chosen as a representative structure. Again, the 95% confidence ellipse defines the membership for a given functional class. Accordingly, the selected protein active-site FIGURE 1 The PCA scores plot of a CPASS distance matrix for fructose-6-phosphate bound proteins. Active-sites are clustered by Enzyme Commission number, which refers to a specific function. Ellipses correspond to the 95% confidence intervals for each of the functional clusters (colored) and the dataset (black) [Color figure can be viewed at wileyonlinelibrary.com] should have a high CPASS similarity score or a small variance relative to the other protein active-sites in the functional class. In effect, the selected protein active site is expected to serve as a structural “average” for the functional class. This is supported by the histogram plots of the CPASS similarity scores shown in Figure 2A,B. The CPASS similarity scores between members of a given functional class (eg, same bound ligand and EC number) are significantly larger (Figure 2B) than the CPASS similarity scores between members of different functional classes (Figure 2A). The relatively flat distribution of lower CPASS scores in Figure 2B is attributed to members of unannotated groups that presumably have different functions despite binding the same ligand. In total, the 169 PCA score plots identified a representative structure for 4431 EC functional classes. A complete table with the unique, non-redundant mappings for each RCSB PDB structure to their corresponding representative structure can be found in the Supporting Information (Figure S1, Supporting Information). Publisher Copyright: {\textcopyright} 2019 Wiley Periodicals, Inc.",
year = "2019",
month = jun,
doi = "10.1002/prot.25670",
language = "English (US)",
volume = "87",
pages = "492--501",
journal = "Proteins: Structure, Function and Bioinformatics",
issn = "0887-3585",
publisher = "Wiley-Liss Inc.",
number = "6",
}