@inproceedings{1391e5e576084ea99b98c3bcfc7aad3a,
title = "Achieving 100,000,000 database inserts per second using Accumulo and D4M",
abstract = "The Apache Accumulo database is an open source relaxed consistency database that is widely used for government applications. Accumulo is designed to deliver high performance on unstructured data such as graphs of network data. This paper tests the performance of Accumulo using data from the Graph500 benchmark. The Dynamic Distributed Dimensional Data Model (D4M) software is used to implement the benchmark on a 216-node cluster running the MIT SuperCloud software stack. A peak performance of over 100,000,000 database inserts per second was achieved which is 100× larger than the highest previously published value for any other database. The performance scales linearly with the number of ingest clients, number of database servers, and data size. The performance was achieved by adapting several supercomputing techniques to this application: distributed arrays, domain decomposition, adaptive load balancing, and single-program-multiple-data programming.",
keywords = "Accumulo, Big Data, D4M, Graph500, Hadoop, MIT SuperCloud",
author = "Jeremy Kepner and William Arcand and David Bestor and Bill Bergeron and Chansup Byun and Vijay Gadepally and Matthew Hubbell and Peter Michaleas and Julie Mullen and Andrew Prout and Albert Reuther and Antonio Rosa and Charles Yee",
note = "Publisher Copyright: {\textcopyright} 2014 IEEE.; 2014 IEEE High Performance Extreme Computing Conference, HPEC 2014 ; Conference date: 09-09-2014 Through 11-09-2014",
year = "2014",
month = feb,
day = "11",
doi = "10.1109/HPEC.2014.7040945",
language = "English",
series = "2014 IEEE High Performance Extreme Computing Conference, HPEC 2014",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
booktitle = "2014 IEEE High Performance Extreme Computing Conference, HPEC 2014",
}