Using Simulation to Evaluate Error Detection Strategies: A Case Study of Cloud-Based Deployment Processes"/> Using Simulation to Evaluate Error Detection Strategies: A Case Study of Cloud-Based Deployment Processes"/>
@article{Chen15,
author = {Jie Chen and Xiwei Xu and Leon J. Osterweil and Liming Zhu and
Yuriy Brun and Len Bass and Junchao Xiao and Mingshu Li and Qing Wang},
title = {Using
Simulation to Evaluate Error Detection Strategies: {A} Case Study of
Cloud-Based Deployment Processes},
journal = {Journal of Systems and Software},
venue = {JSS},
year = {2015},
volume = {110},
pages = {205--221},
month = {December},
doi = {10.1016/j.jss.2015.08.043},
note = {DOI:
10.1016/j.jss.2015.08.043},
abstract = {The processes for deploying systems in cloud environments can
be the basis for studying strategies for detecting and correcting errors
committed during complex process execution. These cloud-based processes
encompass diverse activities, and entail complex interactions between cloud
infrastructure, application software, tools, and humans. Many of these
processes, such as those for making release decisions during continuous
deployment and troubleshooting in system upgrades, are highly error-prone.
Unlike the typically well-tested deployed software systems, these
deployment processes are usually neither well understood nor well tested.
Errors that occur during such processes may require time-consuming
troubleshooting, undoing and redoing steps, and problem fixing.
Consequently, these processes should ideally be guided by strategies for
detecting errors that consider trade-offs between efficiency and
reliability. This paper presents a framework for systematically exploring
such trade-offs. To evaluate the framework and illustrate our approach, we
use two representative cloud deployment processes: a continuous deployment
process and a rolling upgrade process. We augment an existing process
modeling language to represent these processes and model errors that may
occur during process execution. We use a process-aware discrete-event
simulator to evaluate strategies and empirically validate simulation
results by comparing them to experiences in a production environment. Our
evaluation demonstrates that our approach supports the study of how
error-handling strategies affect how much time is taken for task-completion
and error-fixing.},
fundedBy = {NSF IIS-1239334, NSF CNS-1258588, NSF IIS-0705772,
Natural Science Foundation of China 91318301,
Natural Science Foundation of China 91218302},
}