@inproceedings{e9411acac4af44439260cb7a2cfb3fa9,
title = "Work-in-Progress: Optimal Checkpointing Strategy for Real-time Systems with Both Logical and Timing Correctness",
abstract = "This paper proposes an optimal checkpoint scheme for fault resilience in real-time systems, in which we consider both logical consistency and timing correctness. First, we partition message-passing processes into a directed acyclic graph (DAG) considering their dependencies, where the logical consistency of checkpoints is guaranteed. Then, we find the critical path of the DAG, which is the longest path performed in sequence. Next, we analyze the optimal checkpoint strategy on the critical path where the overall execution time (including checkpointing overhead) is minimized. When a fault is detected, the system rolls back to the nearest valid checkpoint for recovery. The optimal number of checkpoints and their intervals are derived by the algorithm.",
keywords = "checkpointing, fault resilience, logical consistency, Real-time systems, timing correctness",
author = "Lin Zhang and Zifan Wang and Fanxin Kong",
note = "Funding Information: ACKNOWLEDGEMENT This research was supported in part by NSF CNS-2143256. Any opinions, findings and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation (NSF). Publisher Copyright: {\textcopyright} 2022 IEEE.; 43rd IEEE Real-Time Systems Symposium, RTSS 2022 ; Conference date: 05-12-2022 Through 08-12-2022",
year = "2022",
doi = "10.1109/RTSS55097.2022.00055",
language = "English (US)",
series = "Proceedings - Real-Time Systems Symposium",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "515--518",
booktitle = "Proceeding - 43rd IEEE Real-Time Systems Symposium, RTSS 2022",
}