@inproceedings{567c4ca6b330467c96c024bcd7fb7935,
title = "Predicting yeast gene function based on hidden markov models",
abstract = "The prediction of function classes for unannotated genes or Open Reading Frames (ORFs) is important for understanding the function role of genes and gene networks. Existing data mining tools, such as Support Vector Machines (SVMs) and K-Nearest Neighbors (KNNs), can only achieve about 40% precision. We developed a gene function prediction tool based on profile Hidden Markov Models (HMMs). HMMs have shown great successes in modeling noisy sequential data sets in speech recognition and protein sequence profiling. Results from contingency test showed significant Markov dependency in time-series expression data, and therefore HMMs would be especially appropriate for modeling gene expressions. Each function class is associated with a distinct HMM whose parameters are trained using yeast time-series gene expression data. The function annotations of the HMM training set were obtained from the Munich Information Centre for Protein Sequences (MIPS) data base. We designed two structural variants of HMMs (chain HMM, split HMM) and tested each of them on 40 function classes. The highest overall prediction precision achieved was 67% using double-split HMM with n-fold cross-validation. We also attempted to generalize HMMs to Dynamic Bayesian Networks (DBNs) for gene function prediction using heterogeneous data sets.",
keywords = "Function prediction, Gene expression, Hidden markov model",
author = "Xutao Deng and Huimin Geng and Hesham Ali",
year = "2005",
language = "English (US)",
isbn = "9781618395528",
series = "20th International Conference on Computers and Their Applications 2005, CATA 2005",
pages = "196--201",
booktitle = "20th International Conference on Computers and Their Applications 2005, CATA 2005",
note = "20th International Conference on Computers and Their Applications 2005, CATA 2005 ; Conference date: 16-03-2005 Through 18-03-2005",
}