@article{ab8d72b89d1c4e45b6e3d3f48a09c238,
title = "Pervasive misannotation of microexons that are evolutionarily conserved and crucial for gene function in plants",
abstract = "It is challenging to identify the smallest microexons (≤15-nt) due to their small size. Consequently, these microexons are often misannotated or missed entirely during genome annotation. Here, we develop a pipeline to accurately identify 2,398 small microexons in 10 diverse plant species using 990 RNA-seq datasets, and most of them have not been annotated in the reference genomes. Analysis reveals that microexons tend to have increased detained flanking introns that require post-transcriptional splicing after polyadenylation. Examination of 45 conserved microexon clusters demonstrates that microexons and associated gene structures can be traced back to the origin of land plants. Based on these clusters, we develop an algorithm to genome-wide model coding microexons in 132 plants and find that microexons provide a strong phylogenetic signal for plant organismal relationships. Microexon modeling reveals diverse evolutionary trajectories, involving microexon gain and loss and alternative splicing. Our work provides a comprehensive view of microexons in plants.",
author = "Huihui Yu and Mu Li and Jaspreet Sandhu and Guangchao Sun and Schnable, {James C.} and Harkamal Walia and Weibo Xie and Bin Yu and Mower, {Jeffrey P.} and Chi Zhang",
note = "Funding Information: We thank Dr. Jixian Zhai for sharing Arabidopsis PTS-intron list with us. We thank Hanh Nguyen and Thomas Clemente for providing Williams 82 soybean seedlings. We also thank all the research teams and individuals for sharing the genome sequences and RNA-seq datasets on public databases. The computations in the analysis of large-scale RNA-seq and genomic data were run on the bioinformatics computing platform of the National Key Laboratory of Crop Genetic Improvement, Huazhong Agricultural University. This project was supported by the National Science Foundation (Award #: OIA-1557417 to C.Z. and B.Y., and Award #: MCB-1818-82 to C.Z. and B.Y.) and the Nebraska Soybean Board (Award 20R-09-1/2 #1739 to C.Z.). Funding Information: We thank Dr. Jixian Zhai for sharing Arabidopsis PTS-intron list with us. We thank Hanh Nguyen and Thomas Clemente for providing Williams 82 soybean seedlings. We also thank all the research teams and individuals for sharing the genome sequences and RNA-seq datasets on public databases. The computations in the analysis of large-scale RNA-seq and genomic data were run on the bioinformatics computing platform of the National Key Laboratory of Crop Genetic Improvement, Huazhong Agricultural University. This project was supported by the National Science Foundation (Award #: OIA-1557417 to C.Z. and B.Y., and Award #: MCB-1818-82 to C.Z. and B.Y.) and the Nebraska Soybean Board (Award 20R-09-1/2 #1739 to C.Z.). Publisher Copyright: {\textcopyright} 2022, The Author(s).",
year = "2022",
month = dec,
doi = "10.1038/s41467-022-28449-8",
language = "English (US)",
volume = "13",
journal = "Nature communications",
issn = "2041-1723",
publisher = "Nature Publishing Group",
number = "1",
}