@inproceedings{f020dddf6a5e4c308493f792cf6f4d78,
title = "Feature subspace transformations for enhancing K-means clustering",
abstract = "Unsupervised classification typically concerns identifying clusters of similar entities in an unlabeled dataset. Popular methods include clustering based on (i) distance-based metrics between the entities in the feature space (K-Means), and (ii) combinatorial properties in a weighted graph representation of the dataset (Multilevel K-Means). In this paper, we present a force-directed graph layout based feature subspace transformation (FST) scheme to transform the dataset before the application of K-Means. Our FST-K-Means method utilizes both distance-based and combinatorial attributes of the original dataset to seek improvements in the internal and external quality metrics of unsu-pervised classification. We demonstrate the effectiveness of FST-K-Means in improving classification quality relative to K-Means and Multilevel K-Means (GraClus). The quality of classification is measured by observing internal and external quality metrics on a test suite of datasets. Our results indicate that on average, the internal quality metric (cluster cohesiveness) is 20.2% better than K-Means, and 6.6% better than GraClus. More significantly, FST-K-Means improves the external quality metric (accuracy) of classification on average by 14.9% relative to K-Means and 23.6% relative to GraClus.",
keywords = "Feature subspace, Graph layout, K-means clustering",
author = "Anirban Chatterjee and Sanjukta Bhowmick and Padma Raghavan",
year = "2010",
doi = "10.1145/1871437.1871733",
language = "English (US)",
isbn = "9781450300995",
series = "International Conference on Information and Knowledge Management, Proceedings",
pages = "1801--1804",
booktitle = "CIKM'10 - Proceedings of the 19th International Conference on Information and Knowledge Management and Co-located Workshops",
note = "19th International Conference on Information and Knowledge Management and Co-located Workshops, CIKM'10 ; Conference date: 26-10-2010 Through 30-10-2010",
}