@inproceedings{0375586c3eb040d798d1ff939a86d4e6,
title = "A comparison of fisher vectors and gaussian supervectors for document versus non-document image classification",
abstract = "This research addresses the document vs. non-document image classification problem. The ability to select images containing text from an OCR processing stream that also includes images of scenes, people, faces, etc., will eliminate unnecessary computation and free up valuable computer resources for other tasks. This is particularly true for high volume OCR systems. Fisher vectors represent images as gradients of a global generative Gaussian Mixture Model (GMM) of low level image descriptors, and exhibit state-of-the-art performance for object categorization. Gaussian supervectors represent images by soft clustering low level image descriptors according to posterior GMM mixture probabilities, optionally using MAP adaptation, and have demonstrated state-of-the-art performance for scene categorization. We compare results obtained by applying linear SVMs to Fisher vector and Gaussian supervector representations to categorize images as having only text, no text, or a mixture of text and non-text. We also report the performance of GMM-based soft versions of vectors of locally aggregated descriptors (VLAD) and Bag of Visual words (BOV).",
keywords = "Fisher vector, GMM, Gaussian supervector, SURF descriptor, SVM, dimension reduction, document image classification, random projection, soft BOV, soft VLAD",
author = "Smith, {David C.} and Kornelson, {Keri A.}",
year = "2013",
doi = "10.1117/12.2023329",
language = "English",
isbn = "9780819497062",
series = "Proceedings of SPIE - The International Society for Optical Engineering",
booktitle = "Applications of Digital Image Processing XXXVI",
note = "null ; Conference date: 26-08-2013 Through 29-08-2013",
}