abstract={Data replication is a key technology in distributed systems that enables higher availability and performance. This article surveys optimistic replication algorithms. They allow replica contents to diverge in the short term to support concurrent work practices and tolerate failures in low-quality communication links. The importance of such techniques is increasing as collaboration through wide-area and mobile networks becomes popular.Optimistic replication deploys algorithms not seen in traditional “pessimistic” systems. Instead of synchronous replica coordination, an optimistic algorithm propagates changes in the background, discovers conflicts after they happen, and reaches agreement on the final contents incrementally.We explore the solution space for optimistic replication algorithms. This article identifies key challenges facing optimistic replication systems---ordering operations, detecting and resolving conflicts, propagating changes efficiently, and bounding replica divergence---and provides a comprehensive survey of techniques developed for addressing these challenges.},
abstract={Data replication is a key technology in distributed systems that enables higher availability and performance. This article surveys optimistic replication algorithms. They allow replica contents to diverge in the short term to support concurrent work practices and tolerate failures in low-quality communication links. The importance of such techniques is increasing as collaboration through wide-area and mobile networks becomes popular.Optimistic replication deploys algorithms not seen in traditional “pessimistic” systems. Instead of synchronous replica coordination, an optimistic algorithm propagates changes in the background, discovers conflicts after they happen, and reaches agreement on the final contents incrementally.We explore the solution space for optimistic replication algorithms. This article identifies key challenges facing optimistic replication systems---ordering operations, detecting and resolving conflicts, propagating changes efficiently, and bounding replica divergence---and provides a comprehensive survey of techniques developed for addressing these challenges.},
pages={42},
language={en},
number={1},
number={1},
journaltitle={{ACM} Computing Surveys},
author={Saito, Yasushi and Shapiro, Marc},
urldate={2023-06-09},
urldate={2023-06-09},
date={2005},
journal={ACM Computing Surveys},
langid={english},
author={Saito, Yasushi and Shapiro, Marc},
year={2005},
pages={42},
file={Saito et Shapiro - 2005 - Optimistic Replication.pdf:/home/amaury/Zotero/storage/4WJX5IAN/Saito et Shapiro - 2005 - Optimistic Replication.pdf:application/pdf},
file={Saito et Shapiro - 2005 - Optimistic Replication.pdf:/home/amaury/Zotero/storage/4WJX5IAN/Saito et Shapiro - 2005 - Optimistic Replication.pdf:application/pdf},
}
}
@article{singh_zeno_nodate,
@article{singh_zeno_2009,
title={Zeno: Eventually Consistent Byzantine-Fault Tolerance},
abstract={Many distributed services are hosted at large, shared, geographically diverse data centers, and they use replication to achieve high availability despite the unreachability of an entire data center. Recent events show that non-crash faults occur in these services and may lead to long outages. While Byzantine-Fault Tolerance ({BFT}) could be used to withstand these faults, current {BFT} protocols can become unavailable if a small fraction of their replicas are unreachable. This is because existing {BFT} protocols favor strong safety guarantees (consistency) over liveness (availability).},
abstract={Many distributed services are hosted at large, shared, geographically diverse data centers, and they use replication to achieve high availability despite the unreachability of an entire data center. Recent events show that non-crash faults occur in these services and may lead to long outages. While Byzantine-Fault Tolerance (BFT) could be used to withstand these faults, current BFT protocols can become unavailable if a small fraction of their replicas are unreachable. This is because existing BFT protocols favor strong safety guarantees (consistency) over liveness (availability).},
language={en},
author={Singh, Atul and Fonseca, Pedro and Kuznetsov, Petr and Rodrigues, Rodrigo and Maniatis, Petros},
author={Singh, Atul and Fonseca, Pedro and Kuznetsov, Petr and Rodrigues, Rodrigo and Maniatis, Petros},
langid={english},
year={2009},
file={Singh et al. - Zeno Eventually Consistent Byzantine-Fault Tolera.pdf:/home/amaury/Zotero/storage/K6J2UEBK/Singh et al. - Zeno Eventually Consistent Byzantine-Fault Tolera.pdf:application/pdf},
file={Singh et al. - Zeno Eventually Consistent Byzantine-Fault Tolera.pdf:/home/amaury/Zotero/storage/K6J2UEBK/Singh et al. - Zeno Eventually Consistent Byzantine-Fault Tolera.pdf:application/pdf},
}
}
@inproceedings{shakarami_refresh_2019,
@inproceedings{shakarami_refresh_2019,
title={Refresh Instead of Revoke Enhances Safety and Availability: A Formal Analysis},
title={Refresh {Instead} of {Revoke} {Enhances} {Safety} and {Availability}: {A} {Formal} {Analysis}},
volume={{LNCS}-11559},
volume={LNCS-11559},
shorttitle={Refresh {Instead} of {Revoke} {Enhances} {Safety} and {Availability}},
url={https://inria.hal.science/hal-02384596},
url={https://inria.hal.science/hal-02384596},
doi={10.1007/978-3-030-22479-0_16},
doi={10.1007/978-3-030-22479-0_16},
shorttitle={Refresh Instead of Revoke Enhances Safety and Availability},
abstract={Due to inherent delays and performance costs, the decision point in a distributed multi-authority Attribute-Based Access Control (ABAC) system is exposed to the risk of relying on outdated attribute values and policy; which is the safety and consistency problem. This paper formally characterizes three increasingly strong levels of consistency to restrict this exposure. Notably, we recognize the concept of refreshing attribute values rather than simply checking the revocation status, as in traditional approaches. Refresh replaces an older value with a newer one, while revoke simply invalidates the old value. Our lowest consistency level starts from the highest level in prior revocation-based work by Lee and Winslett (LW). Our two higher levels utilize the concept of request time which is absent in LW. For each of our levels we formally show that using refresh instead of revocation provides added safety and availability.},
abstract={Due to inherent delays and performance costs, the decision point in a distributed multi-authority Attribute-Based Access Control ({ABAC}) system is exposed to the risk of relying on outdated attribute values and policy; which is the safety and consistency problem. This paper formally characterizes three increasingly strong levels of consistency to restrict this exposure. Notably, we recognize the concept of refreshing attribute values rather than simply checking the revocation status, as in traditional approaches. Refresh replaces an older value with a newer one, while revoke simply invalidates the old value. Our lowest consistency level starts from the highest level in prior revocation-based work by Lee and Winslett ({LW}). Our two higher levels utilize the concept of request time which is absent in {LW}. For each of our levels we formally show that using refresh instead of revocation provides added safety and availability.},
language={en},
eventtitle={33th {IFIP} Annual Conference on Data and Applications Security and Privacy ({DBSec})},
urldate={2023-06-09},
pages={301},
publisher={Springer International Publishing},
publisher={Springer International Publishing},
author={Shakarami, Mehrnoosh and Sandhu, Ravi},
author={Shakarami, Mehrnoosh and Sandhu, Ravi},
urldate={2023-06-09},
month=jul,
date={2019-07-15},
year={2019},
langid={english},
pages={301},
file={Shakarami et Sandhu - 2019 - Refresh Instead of Revoke Enhances Safety and Avai.pdf:/home/amaury/Zotero/storage/XQNWKF7H/Shakarami et Sandhu - 2019 - Refresh Instead of Revoke Enhances Safety and Avai.pdf:application/pdf},
file={Shakarami et Sandhu - 2019 - Refresh Instead of Revoke Enhances Safety and Avai.pdf:/home/amaury/Zotero/storage/XQNWKF7H/Shakarami et Sandhu - 2019 - Refresh Instead of Revoke Enhances Safety and Avai.pdf:application/pdf},
}
}
...
@@ -46,15 +47,15 @@
...
@@ -46,15 +47,15 @@
issn={0164-0925, 1558-4593},
issn={0164-0925, 1558-4593},
url={https://dl.acm.org/doi/10.1145/5001.5007},
url={https://dl.acm.org/doi/10.1145/5001.5007},
doi={10.1145/5001.5007},
doi={10.1145/5001.5007},
abstract={The problem of concurrent accesses to registers by asynchronous components is considered. A set of axioms about the values in a register during concurrent accesses is proposed. It is shown that if these axioms are met by a register, then concurrent accesses to it may be viewed as nonconcurrent, thus making it possible to analyze asynchronous algorithms without elaborate timing analysis of operations. These axioms are shown, in a certain sense, to be the weakest. Motivation for this work came from analyzing low-level hardware components in a {VLSI} chip which concurrently accesses a flip-flop.},
abstract={The problem of concurrent accesses to registers by asynchronous components is considered. A set of axioms about the values in a register during concurrent accesses is proposed. It is shown that if these axioms are met by a register, then concurrent accesses to it may be viewed as nonconcurrent, thus making it possible to analyze asynchronous algorithms without elaborate timing analysis of operations. These axioms are shown, in a certain sense, to be the weakest. Motivation for this work came from analyzing low-level hardware components in a VLSI chip which concurrently accesses a flip-flop.},
pages={142--153},
language={en},
number={1},
number={1},
journaltitle={{ACM} Transactions on Programming Languages and Systems},
shortjournal={{ACM} Trans. Program. Lang. Syst.},
author={Misra, J.},
urldate={2023-06-08},
urldate={2023-06-08},
date={1986-01-02},
journal={ACM Transactions on Programming Languages and Systems},
langid={english},
author={Misra, J.},
month=jan,
year={1986},
pages={142--153},
file={Misra - 1986 - Axioms for memory access in asynchronous hardware .pdf:/home/amaury/Zotero/storage/KZP2774N/Misra - 1986 - Axioms for memory access in asynchronous hardware .pdf:application/pdf},
file={Misra - 1986 - Axioms for memory access in asynchronous hardware .pdf:/home/amaury/Zotero/storage/KZP2774N/Misra - 1986 - Axioms for memory access in asynchronous hardware .pdf:application/pdf},
}
}
...
@@ -65,58 +66,59 @@
...
@@ -65,58 +66,59 @@
url={https://doi.org/10.1007/BF01786228},
url={https://doi.org/10.1007/BF01786228},
doi={10.1007/BF01786228},
doi={10.1007/BF01786228},
abstract={Interprocess communication is studied without assuming any lower-level communication primitives. Three classes of communication registers are considered, and several constructions are given for implementing one class of register with a weaker class. The formalism developed in Part I is used in proving the correctness of these constructions.},
abstract={Interprocess communication is studied without assuming any lower-level communication primitives. Three classes of communication registers are considered, and several constructions are given for implementing one class of register with a weaker class. The formalism developed in Part I is used in proving the correctness of these constructions.},
pages={86--101},
language={en},
number={2},
number={2},
journaltitle={Distributed Computing},
shortjournal={Distrib Comput},
author={Lamport, Leslie},
urldate={2023-06-08},
urldate={2023-06-08},
date={1986-06-01},
journal={Distributed Computing},
langid={english},
author={Lamport, Leslie},
month=jun,
year={1986},
keywords={Communication Network, Computer Hardware, Computer System, Operating System, System Organization},
keywords={Communication Network, Computer Hardware, Computer System, Operating System, System Organization},
pages={86--101},
file={Lamport - 1986 - On interprocess communication.pdf:/home/amaury/Zotero/storage/XV7AEARN/Lamport - 1986 - On interprocess communication.pdf:application/pdf},
file={Lamport - 1986 - On interprocess communication.pdf:/home/amaury/Zotero/storage/XV7AEARN/Lamport - 1986 - On interprocess communication.pdf:application/pdf},
}
}
@book{lipton_pram_1988,
@book{lipton_pram_1988,
title={{PRAM}: A ScalableSharedMemory},
title={{PRAM}: {A} {Scalable} {Shared} {Memory}},
shorttitle={{PRAM}},
shorttitle={{PRAM}},
pagetotal={13},
language={en},
publisher={Princeton University, Department of Computer Science},
publisher={Princeton University, Department of Computer Science},
author={Lipton, Richard J. and Sandberg, Jonathan S.},
author={Lipton, Richard J. and Sandberg, Jonathan S.},
date={1988},
year={1988},
langid={english},
note={Google-Books-ID: 962epwAACAAJ},
note={Google-Books-{ID}: 962epwAACAAJ},
file={Lipton et Sandberg - 1988 - PRAM A Scalable Shared Memory.pdf:/home/amaury/Zotero/storage/3ZYT3WT4/Lipton et Sandberg - 1988 - PRAM A Scalable Shared Memory.pdf:application/pdf},
file={Lipton et Sandberg - 1988 - PRAM A Scalable Shared Memory.pdf:/home/amaury/Zotero/storage/3ZYT3WT4/Lipton et Sandberg - 1988 - PRAM A Scalable Shared Memory.pdf:application/pdf},
}
}
@inproceedings{hutto_slow_1990,
@inproceedings{hutto_slow_1990,
title={Slow memory: weakening consistency to enhance concurrency in distributed shared memories},
title={Slow memory: weakening consistency to enhance concurrency in distributed shared memories},
abstract={The use of weakly consistent memories in distributed shared memory systems to combat unacceptable network delay and to allow such systems to scale is proposed. Proposed memory correctness conditions are surveyed, and how they are related by a weakness hierarchy is demonstrated. Multiversion and messaging interpretations of memory are introduced as means of systematically exploring the space of possible memories. Slow memory is presented as a memory that allows the effects of writes to propagate slowly through the system, eliminating the need for costly consistency maintenance protocols that limit concurrency. Slow memory processes a valuable locality property and supports a reduction from traditional atomic memory. Thus slow memory is as expressive as atomic memory. This expressiveness is demonstrated by two exclusion algorithms and a solution to M.J. Fischer and A. Michael's (1982) dictionary problem on slow memory.},
abstract={The use of weakly consistent memories in distributed shared memory systems to combat unacceptable network delay and to allow such systems to scale is proposed. Proposed memory correctness conditions are surveyed, and how they are related by a weakness hierarchy is demonstrated. Multiversion and messaging interpretations of memory are introduced as means of systematically exploring the space of possible memories. Slow memory is presented as a memory that allows the effects of writes to propagate slowly through the system, eliminating the need for costly consistency maintenance protocols that limit concurrency. Slow memory processes a valuable locality property and supports a reduction from traditional atomic memory. Thus slow memory is as expressive as atomic memory. This expressiveness is demonstrated by two exclusion algorithms and a solution to M.J. Fischer and A. Michael's (1982) dictionary problem on slow memory.},
eventtitle={Proceedings.,10th International Conference on Distributed Computing Systems},
file={Hutto et Ahamad - 1990 - Slow memory weakening consistency to enhance conc.pdf:/home/amaury/Téléchargements/Hutto et Ahamad - 1990 - Slow memory weakening consistency to enhance conc.pdf:application/pdf},
file={Hutto et Ahamad - 1990 - Slow memory weakening consistency to enhance conc.pdf:/home/amaury/Téléchargements/Hutto et Ahamad - 1990 - Slow memory weakening consistency to enhance conc.pdf:application/pdf},
}
}
@article{lamport_how_1979,
@article{lamport_how_1979,
title={How to Make a MultiprocessorComputerThatCorrectlyExecutesMultiprocessPrograms},
title={How to {Make} a {Multiprocessor} {Computer} {That} {Correctly} {Executes} {Multiprocess} {Programs}},
volume={C-28},
volume={C-28},
issn={1557-9956},
issn={1557-9956},
doi={10.1109/TC.1979.1675439},
doi={10.1109/TC.1979.1675439},
abstract={Many large sequential computers execute operations in a different order than is specified by the program. A correct execution is achieved if the results produced are the same as would be produced by executing the program steps in order. For a multiprocessor computer, such a correct execution by each processor does not guarantee the correct execution of the entire program. Additional conditions are given which do guarantee that a computer correctly executes multiprocess programs.},
abstract={Many large sequential computers execute operations in a different order than is specified by the program. A correct execution is achieved if the results produced are the same as would be produced by executing the program steps in order. For a multiprocessor computer, such a correct execution by each processor does not guarantee the correct execution of the entire program. Additional conditions are given which do guarantee that a computer correctly executes multiprocess programs.},
pages={690--691},
number={9},
number={9},
journaltitle={{IEEE} Transactions on Computers},
journal={IEEE Transactions on Computers},
author={{Lamport}},
author={{Lamport}},
date={1979-09},
month=sep,
note={Conference Name: {IEEE} Transactions on Computers},
year={1979},
note={Conference Name: IEEE Transactions on Computers},
file={IEEE Xplore Abstract Record:/home/amaury/Zotero/storage/IVGSSPNE/1675439.html:text/html;Lamport - 1979 - How to Make a Multiprocessor Computer That Correct.pdf:/home/amaury/Zotero/storage/GY8CWGUV/Lamport - 1979 - How to Make a Multiprocessor Computer That Correct.pdf:application/pdf},
file={IEEE Xplore Abstract Record:/home/amaury/Zotero/storage/IVGSSPNE/1675439.html:text/html;Lamport - 1979 - How to Make a Multiprocessor Computer That Correct.pdf:/home/amaury/Zotero/storage/GY8CWGUV/Lamport - 1979 - How to Make a Multiprocessor Computer That Correct.pdf:application/pdf},
abstract={This paper discusses memory consistency models and their influence on software in the context of parallel machines. In the first part we review previous work on memory consistency models. The second part discusses the issues that arise due to weakening memory consistency. We are especially interested in the influence that weakened consistency models have on language, compiler, and runtime system design. We conclude that tighter interaction between those parts and the memory system might improve performance considerably.},
abstract={This paper discusses memory consistency models and their influence on software in the context of parallel machines. In the first part we review previous work on memory consistency models. The second part discusses the issues that arise due to weakening memory consistency. We are especially interested in the influence that weakened consistency models have on language, compiler, and runtime system design. We conclude that tighter interaction between those parts and the memory system might improve performance considerably.},
pages={18--26},
language={en},
number={1},
number={1},
journaltitle={{ACM} {SIGOPS} Operating Systems Review},
abstract={The use of weakly consistent memories in distributed shared memory systems to combat unacceptable network delay and to allow such systems to scale is proposed. Proposed memory correctness conditions are surveyed, and how they are related by a weakness hierarchy is demonstrated. Multiversion and messaging interpretations of memory are introduced as means of systematically exploring the space of possible memories. Slow memory is presented as a memory that allows the effects of writes to propagate slowly through the system, eliminating the need for costly consistency maintenance protocols that limit concurrency. Slow memory processes a valuable locality property and supports a reduction from traditional atomic memory. Thus slow memory is as expressive as atomic memory. This expressiveness is demonstrated by two exclusion algorithms and a solution to M.J. Fischer and A. Michael's (1982) dictionary problem on slow memory.},
eventtitle={Proceedings.,10th International Conference on Distributed Computing Systems},
booktitle={Foundations of Software Technology and Theoretical Computer Science},
urldate={2023-06-06},
booktitle={Foundations of {Software} {Technology} and {Theoretical} {Computer} {Science}},
publisher={Springer Berlin Heidelberg},
publisher={Springer Berlin Heidelberg},
author={Raynal, Michel and Schiper, André},
author={Raynal, Michel and Schiper, André},
editor={Thiagarajan, P. S.},
editor={Goos, Gerhard and Hartmanis, Juris and Leeuwen, Jan and Thiagarajan, P. S.},
editorb={Goos, Gerhard and Hartmanis, Juris and Leeuwen, Jan},
year={1995},
editorbtype={redactor},
urldate={2023-06-06},
date={1995},
langid={english},
doi={10.1007/3-540-60692-0_48},
doi={10.1007/3-540-60692-0_48},
note={Series Title: Lecture Notes in Computer Science},
note={Series Title: Lecture Notes in Computer Science},
pages={180--194},
file={Raynal et Schiper - 1995 - From causal consistency to sequential consistency .pdf:/home/amaury/Zotero/storage/B8UNWUSA/Raynal et Schiper - 1995 - From causal consistency to sequential consistency .pdf:application/pdf},
file={Raynal et Schiper - 1995 - From causal consistency to sequential consistency .pdf:/home/amaury/Zotero/storage/B8UNWUSA/Raynal et Schiper - 1995 - From causal consistency to sequential consistency .pdf:application/pdf},
}
}
@thesis{kumar_fault-tolerant_2019,
@phdthesis{kumar_fault-tolerant_2019,
title={Fault-Tolerant Distributed Services in Message-Passing Systems},
type={{PhD} {Thesis}},
institution={Texas A\&M University},
title={Fault-{Tolerant} {Distributed} {Services} in {Message}-{Passing} {Systems}},
type={phdthesis},
school={Texas A\&M University},
author={Kumar, Saptaparni},
author={Kumar, Saptaparni},
date={2019},
year={2019},
file={Kumar - 2019 - Fault-Tolerant Distributed Services in Message-Pas.pdf:/home/amaury/Zotero/storage/Q9XK77W9/Kumar - 2019 - Fault-Tolerant Distributed Services in Message-Pas.pdf:application/pdf;Snapshot:/home/amaury/Zotero/storage/7JB26RAJ/1.html:text/html},
file={Kumar - 2019 - Fault-Tolerant Distributed Services in Message-Pas.pdf:/home/amaury/Zotero/storage/Q9XK77W9/Kumar - 2019 - Fault-Tolerant Distributed Services in Message-Pas.pdf:application/pdf;Snapshot:/home/amaury/Zotero/storage/7JB26RAJ/1.html:text/html},
}
}
@article{somasekaram_high-availability_2022,
@article{somasekaram_high-availability_2022,
title={High-AvailabilityClusters: A Taxonomy, Survey, and FutureDirections},
title={High-{Availability} {Clusters}: {A} {Taxonomy}, {Survey}, and {Future} {Directions}},
volume={187},
volume={187},
issn={01641212},
issn={01641212},
shorttitle={High-{Availability} {Clusters}},
url={http://arxiv.org/abs/2109.15139},
url={http://arxiv.org/abs/2109.15139},
doi={10.1016/j.jss.2021.111208},
doi={10.1016/j.jss.2021.111208},
shorttitle={High-Availability Clusters},
abstract={The delivery of key services in domains ranging from finance and manufacturing to healthcare and transportation is underpinned by a rapidly growing number of mission-critical enterprise applications. Ensuring the continuity of these complex applications requires the use of software-managed infrastructures called high-availability clusters (HACs). HACs employ sophisticated techniques to monitor the health of key enterprise application layers and of the resources they use, and to seamlessly restart or relocate application components after failures. In this paper, we first describe the manifold uses of HACs to protect essential layers of a critical application and present the architecture of high availability clusters. We then propose a taxonomy that covers all key aspects of HACs -- deployment patterns, application areas, types of cluster, topology, cluster management, failure detection and recovery, consistency and integrity, and data synchronisation; and we use this taxonomy to provide a comprehensive survey of the end-to-end software solutions available for the HAC deployment of enterprise applications. Finally, we discuss the limitations and challenges of existing HAC solutions, and we identify opportunities for future research in the area.},
abstract={The delivery of key services in domains ranging from finance and manufacturing to healthcare and transportation is underpinned by a rapidly growing number of mission-critical enterprise applications. Ensuring the continuity of these complex applications requires the use of software-managed infrastructures called high-availability clusters ({HACs}). {HACs} employ sophisticated techniques to monitor the health of key enterprise application layers and of the resources they use, and to seamlessly restart or relocate application components after failures. In this paper, we first describe the manifold uses of {HACs} to protect essential layers of a critical application and present the architecture of high availability clusters. We then propose a taxonomy that covers all key aspects of {HACs} -- deployment patterns, application areas, types of cluster, topology, cluster management, failure detection and recovery, consistency and integrity, and data synchronisation; and we use this taxonomy to provide a comprehensive survey of the end-to-end software solutions available for the {HAC} deployment of enterprise applications. Finally, we discuss the limitations and challenges of existing {HAC} solutions, and we identify opportunities for future research in the area.},
pages={111208},
journaltitle={Journal of Systems and Software},
shortjournal={Journal of Systems and Software},
author={Somasekaram, Premathas and Calinescu, Radu and Buyya, Rajkumar},
urldate={2023-06-06},
urldate={2023-06-06},
date={2022-05},
journal={Journal of Systems and Software},
eprinttype={arxiv},
author={Somasekaram, Premathas and Calinescu, Radu and Buyya, Rajkumar},
eprint={2109.15139 [cs, eess]},
month=may,
year={2022},
note={arXiv:2109.15139 [cs, eess]},
keywords={Computer Science - Distributed, Parallel, and Cluster Computing, Computer Science - Networking and Internet Architecture, Electrical Engineering and Systems Science - Systems and Control},
keywords={Computer Science - Distributed, Parallel, and Cluster Computing, Computer Science - Networking and Internet Architecture, Electrical Engineering and Systems Science - Systems and Control},
pages={111208},
file={arXiv.org Snapshot:/home/amaury/Zotero/storage/B4KCP9BG/2109.html:text/html;Somasekaram et al. - 2022 - High-Availability Clusters A Taxonomy, Survey, an.pdf:/home/amaury/Zotero/storage/K3LQZLC8/Somasekaram et al. - 2022 - High-Availability Clusters A Taxonomy, Survey, an.pdf:application/pdf},
file={arXiv.org Snapshot:/home/amaury/Zotero/storage/B4KCP9BG/2109.html:text/html;Somasekaram et al. - 2022 - High-Availability Clusters A Taxonomy, Survey, an.pdf:/home/amaury/Zotero/storage/K3LQZLC8/Somasekaram et al. - 2022 - High-Availability Clusters A Taxonomy, Survey, an.pdf:application/pdf},
}
}
...
@@ -206,12 +191,12 @@
...
@@ -206,12 +191,12 @@
title={Concurrence et cohérence dans les systèmes répartis},
title={Concurrence et cohérence dans les systèmes répartis},
isbn={978-1-78405-295-9},
isbn={978-1-78405-295-9},
abstract={La société moderne est de plus en plus dominée par la société virtuelle, le nombre d’internautes dans le monde ayant dépassé les trois milliards en 2015. A la différence de leurs homologues séquentiels, les systèmes répartis sont beaucoup plus difficiles à concevoir, et sont donc sujets à de nombreux problèmes.La cohérence séquentielle fournit la même vue globale à tous les utilisateurs, mais le confort d\&\#39;utilisation qu\&\#39;elle apporte est trop coûteux, voire impossible, à mettre en oeuvre à grande échelle. Concurrence et cohérence dans les systèmes répartis examine les meilleures façons de spécifier les objets que l’on peut tout de même implémenter dans ces systèmes.Cet ouvrage explore la zone grise des systèmes répartis et dresse une carte des critères de cohérence faible, identifiant plusieurs familles et démontrant comment elles peuvent s’intégrer dans un langage de programmation.},
abstract={La société moderne est de plus en plus dominée par la société virtuelle, le nombre d’internautes dans le monde ayant dépassé les trois milliards en 2015. A la différence de leurs homologues séquentiels, les systèmes répartis sont beaucoup plus difficiles à concevoir, et sont donc sujets à de nombreux problèmes.La cohérence séquentielle fournit la même vue globale à tous les utilisateurs, mais le confort d\&\#39;utilisation qu\&\#39;elle apporte est trop coûteux, voire impossible, à mettre en oeuvre à grande échelle. Concurrence et cohérence dans les systèmes répartis examine les meilleures façons de spécifier les objets que l’on peut tout de même implémenter dans ces systèmes.Cet ouvrage explore la zone grise des systèmes répartis et dresse une carte des critères de cohérence faible, identifiant plusieurs familles et démontrant comment elles peuvent s’intégrer dans un langage de programmation.},
pagetotal={194},
language={fr},
publisher={{ISTE} Group},
publisher={ISTE Group},
author={Perrin, Matthieu},
author={Perrin, Matthieu},
date={2017-09-01},
month=sep,
langid={french},
year={2017},
note={Google-Books-{ID}: 6DRlDwAAQBAJ},
note={Google-Books-ID: 6DRlDwAAQBAJ},
file={Perrin - 2017 - Concurrence et cohérence dans les systèmes réparti.pdf:/home/amaury/Téléchargements/Perrin - 2017 - Concurrence et cohérence dans les systèmes réparti.pdf:application/pdf},
file={Perrin - 2017 - Concurrence et cohérence dans les systèmes réparti.pdf:/home/amaury/Téléchargements/Perrin - 2017 - Concurrence et cohérence dans les systèmes réparti.pdf:application/pdf},
}
}
...
@@ -219,55 +204,55 @@
...
@@ -219,55 +204,55 @@
title={Practical client-side replication: weak consistency semantics for insecure settings},
title={Practical client-side replication: weak consistency semantics for insecure settings},
abstract={Client-side replication and direct client-to-client synchronization can be used to create highly available, low-latency interactive applications. Causal consistency, the strongest available consistency model under network partitions, is an attractive consistency model for these applications.},
abstract={Client-side replication and direct client-to-client synchronization can be used to create highly available, low-latency interactive applications. Causal consistency, the strongest available consistency model under network partitions, is an attractive consistency model for these applications.},
pages={2590--2605},
language={en},
number={12},
number={12},
journaltitle={Proceedings of the {VLDB} Endowment},
shortjournal={Proc. {VLDB} Endow.},
author={Van Der Linde, Albert and Leitão, João and Preguiça, Nuno},
urldate={2023-06-06},
urldate={2023-06-06},
date={2020-08},
journal={Proceedings of the VLDB Endowment},
langid={english},
author={Van Der Linde, Albert and Leitão, João and Preguiça, Nuno},
month=aug,
year={2020},
pages={2590--2605},
file={Van Der Linde et al. - 2020 - Practical client-side replication weak consistenc.pdf:/home/amaury/Zotero/storage/5TJ3SA56/Van Der Linde et al. - 2020 - Practical client-side replication weak consistenc.pdf:application/pdf},
file={Van Der Linde et al. - 2020 - Practical client-side replication weak consistenc.pdf:/home/amaury/Zotero/storage/5TJ3SA56/Van Der Linde et al. - 2020 - Practical client-side replication weak consistenc.pdf:application/pdf},
abstract={Reliability at massive scale is one of the biggest challenges we face at Amazon.com, one of the largest e-commerce operations in the world; even the slightest outage has significant financial consequences and impacts customer trust. The Amazon.com platform, which provides services for many web sites worldwide, is implemented on top of an infrastructure of tens of thousands of servers and network components located in many datacenters around the world. At this scale, small and large components fail continuously and the way persistent state is managed in the face of these failures drives the reliability and scalability of the software systems.},
abstract={Reliability at massive scale is one of the biggest challenges we face at Amazon.com, one of the largest e-commerce operations in the world; even the slightest outage has significant financial consequences and impacts customer trust. The Amazon.com platform, which provides services for many web sites worldwide, is implemented on top of an infrastructure of tens of thousands of servers and network components located in many datacenters around the world. At this scale, small and large components fail continuously and the way persistent state is managed in the face of these failures drives the reliability and scalability of the software systems.},
author={{DeCandia}, Giuseppe and Hastorun, Deniz and Jampani, Madan and Kakulapati, Gunavardhan and Lakshman, Avinash and Pilchin, Alex and Sivasubramanian, Swaminathan and Vosshall, Peter and Vogels, Werner},
language={en},
langid={english},
author={DeCandia, Giuseppe and Hastorun, Deniz and Jampani, Madan and Kakulapati, Gunavardhan and Lakshman, Avinash and Pilchin, Alex and Sivasubramanian, Swaminathan and Vosshall, Peter and Vogels, Werner},
year={2007},
file={DeCandia et al. - Dynamo Amazon’s Highly Available Key-value Store.pdf:/home/amaury/Zotero/storage/KDHRPBGR/DeCandia et al. - Dynamo Amazon’s Highly Available Key-value Store.pdf:application/pdf},
file={DeCandia et al. - Dynamo Amazon’s Highly Available Key-value Store.pdf:/home/amaury/Zotero/storage/KDHRPBGR/DeCandia et al. - Dynamo Amazon’s Highly Available Key-value Store.pdf:application/pdf},
abstract={Causal ordering in an asynchronous system has many applications in distributed computing, including in replicated databases and real-time collaborative software. Previous work in the area focused on ordering point-to-point messages in a fault-free setting, and on ordering broadcasts under various fault models. To the best of our knowledge, Byzantine faulttolerant causal ordering has not been attempted for point-topoint communication in an asynchronous setting. In this paper, we first show that existing algorithms for causal ordering of point-to-point communication fail under Byzantine faults. We then prove that it is impossible to causally order messages under point-to-point communication in an asynchronous system with one or more Byzantine failures. We then present two algorithms that can causally order messages under Byzantine failures, where the network provides an upper bound on the message transmission time. The proofs of correctness for these algorithms show that it is possible to achieve causal ordering for point-to-point communication under a stronger asynchrony model where the network provides an upper bound on message transmission time. We also give extensions of our two algorithms for Byzantine fault-tolerant causal ordering of multicasts.},
abstract={Causal ordering in an asynchronous system has many applications in distributed computing, including in replicated databases and real-time collaborative software. Previous work in the area focused on ordering point-to-point messages in a fault-free setting, and on ordering broadcasts under various fault models. To the best of our knowledge, Byzantine faulttolerant causal ordering has not been attempted for point-topoint communication in an asynchronous setting. In this paper, we first show that existing algorithms for causal ordering of point-to-point communication fail under Byzantine faults. We then prove that it is impossible to causally order messages under point-to-point communication in an asynchronous system with one or more Byzantine failures. We then present two algorithms that can causally order messages under Byzantine failures, where the network provides an upper bound on the message transmission time. The proofs of correctness for these algorithms show that it is possible to achieve causal ordering for point-to-point communication under a stronger asynchrony model where the network provides an upper bound on message transmission time. We also give extensions of our two algorithms for Byzantine fault-tolerant causal ordering of multicasts.},
number={{arXiv}:2112.11337},
language={en},
publisher={{arXiv}},
author={Misra, Anshuman and Kshemkalyani, Ajay},
urldate={2023-07-12},
urldate={2023-07-12},
date={2021-12-21},
publisher={arXiv},
langid={english},
author={Misra, Anshuman and Kshemkalyani, Ajay},
eprinttype={arxiv},
month=dec,
eprint={2112.11337 [cs]},
year={2021},
note={arXiv:2112.11337 [cs]},
keywords={Computer Science - Distributed, Parallel, and Cluster Computing},
keywords={Computer Science - Distributed, Parallel, and Cluster Computing},
title={Distributed CausalMemory in the Presence of ByzantineServers},
title={Distributed {Causal} {Memory} in the {Presence} of {Byzantine} {Servers}},
doi={10.1109/NCA.2019.8935059},
doi={10.1109/NCA.2019.8935059},
abstract={We study distributed causal shared memory (or distributed read/write objects) in the client-server model over asynchronous message-passing networks in which some servers may suffer Byzantine failures. Since Ahamad et al. proposed causal memory in 1994, there have been abundant research on causal storage. Lately, there is a renewed interest in enforcing causal consistency in large-scale distributed storage systems (e.g., {COPS}, Eiger, Bolt-on). However, to the best of our knowledge, the fault-tolerance aspect of causal memory is not well studied, especially on the tight resilience bound. In our prior work, we showed that 2 f+1 servers is the tight bound to emulate crash-tolerant causal shared memory when up to f servers may crash. In this paper, we adopt a typical model considered in many prior works on Byzantine-tolerant storage algorithms and quorum systems. In the system, up to f servers may suffer Byzantine failures and any number of clients may crash. We constructively present an emulation algorithm for Byzantine causal memory using 3 f+1 servers. We also prove that 3 f+1 is necessary for tolerating up to f Byzantine servers. In other words, we show that 3 f+1 is a tight bound. For evaluation, we implement our algorithm in Golang and compare their performance with two state-of-the-art fault-tolerant algorithms that ensure atomicity in the Google Cloud Platform.},
abstract={We study distributed causal shared memory (or distributed read/write objects) in the client-server model over asynchronous message-passing networks in which some servers may suffer Byzantine failures. Since Ahamad et al. proposed causal memory in 1994, there have been abundant research on causal storage. Lately, there is a renewed interest in enforcing causal consistency in large-scale distributed storage systems (e.g., COPS, Eiger, Bolt-on). However, to the best of our knowledge, the fault-tolerance aspect of causal memory is not well studied, especially on the tight resilience bound. In our prior work, we showed that 2 f+1 servers is the tight bound to emulate crash-tolerant causal shared memory when up to f servers may crash. In this paper, we adopt a typical model considered in many prior works on Byzantine-tolerant storage algorithms and quorum systems. In the system, up to f servers may suffer Byzantine failures and any number of clients may crash. We constructively present an emulation algorithm for Byzantine causal memory using 3 f+1 servers. We also prove that 3 f+1 is necessary for tolerating up to f Byzantine servers. In other words, we show that 3 f+1 is a tight bound. For evaluation, we implement our algorithm in Golang and compare their performance with two state-of-the-art fault-tolerant algorithms that ensure atomicity in the Google Cloud Platform.},
eventtitle={2019 {IEEE} 18th International Symposium on Network Computing and Applications ({NCA})},
booktitle={2019 {IEEE} 18th {International} {Symposium} on {Network} {Computing} and {Applications} ({NCA})},
pages={1--8},
booktitle={2019 {IEEE} 18th International Symposium on Network Computing and Applications ({NCA})},
author={Tseng, Lewis and Wang, Zezhi and Zhao, Yajie and Pan, Haochen},
author={Tseng, Lewis and Wang, Zezhi and Zhao, Yajie and Pan, Haochen},