@inproceedings{12e751c26fed424ebb0e99330220d69d,
title = "κ-means-: A unified approach to clustering and outlier detection",
abstract = "We present a unified approach for simultaneously clustering and discovering outliers in data. Our approach is formalized as a generalization of the k-means problem. We prove that the problem is NP-hard and then present a practical polynomial time algorithm, which is guaranteed to converge to a local optimum. Furthermore we extend our approach to all distance measures that can be expressed in the form of a Bregman divergence. Experiments on synthetic and real datasets demonstrate the effectiveness of our approach and the utility of carrying out both clustering and outlier detection in a concurrent manner. In particular on the famous KDD cup network-intrusion dataset, we were able to increase the precision of the outlier detection task by nearly 100% compared to the classical nearest-neighbor approach.",
author = "Sanjay Chawla and Aristides Gionisy",
note = "Publisher Copyright: Copyright {\textcopyright} SIAM.; null ; Conference date: 02-05-2013 Through 04-05-2013",
year = "2013",
doi = "10.1137/1.9781611972832.21",
language = "English",
series = "Proceedings of the 2013 SIAM International Conference on Data Mining, SDM 2013",
publisher = "Siam Society",
pages = "189--197",
editor = "Joydeep Ghosh and Zoran Obradovic and Jennifer Dy and Zhi-Hua Zhou and Chandrika Kamath and Srinivasan Parthasarathy",
booktitle = "Proceedings of the 2013 SIAM International Conference on Data Mining, SDM 2013",
}