@inproceedings{a61230b64b5849608411f08de2f6eec5,
title = "COP: Planning conflicts for faster parallel transactional machine learning",
abstract = "Machine learning techniques are essential to extracting knowledge from data. The volume of data encourages the use of parallelization techniques to extract knowledge faster. However, schemes to parallelize machine learning tasks face the trade-off between obeying strict consistency constraints and performance. Existing consistency schemes require expensive coordination between worker threads to detect conflicts, leading to poor performance. In this work, we consider the problem of improving the performance of multi-core machine learning while preserving strong consistency guarantees. We propose Conflict Order Planning (COP), a consistency scheme that exploits special properties of machine learning workloads to reduce the overhead of coordination. What is special about machine learning workloads is that the dataset is often known prior to the execution of the machine learning algorithm and is reused multiple times with different settings. We exploit this prior knowledge of the dataset to plan a partial order for concurrent execution. This planning reduces the cost of consistency significantly because it allows the use of a light-weight conflict detection operation that we call ReadWait. We demonstrate the use of COP on a Stochastic Gradient Descent algorithm for Support Vector Machines and observe better scalability and a speedup factor between 2-6x when compared to other consistency schemes.",
author = "Faisal Nawab and Divyakant Agrawal and {El Abbadi}, Amr and Sanjay Chawla",
note = "Funding Information: This work is partially funded by a gift grant from Oracle and a gift grant from NEC Labs America. We would also like to thank Amazon for access to Amazon EC2. Publisher Copyright: {\textcopyright} 2017, Copyright is with the authors.; null ; Conference date: 21-03-2017 Through 24-03-2017",
year = "2017",
doi = "10.5441/002/edbt.2017.13",
language = "English",
series = "Advances in Database Technology - EDBT",
publisher = "OpenProceedings.org",
pages = "132--143",
editor = "Bernhard Mitschang and Volker Markl and Sebastian Bress and Periklis Andritsos and Kai-Uwe Sattler and Salvatore Orlando",
booktitle = "Advances in Database Technology - EDBT 2017",
}