@phdthesis{W2009DISS,
author = {Thomas Weise},
title = {Evolving Distributed Algorithms with Genetic Programming},
publisher = {University of Kassel, Fachbereich 16: Elektrotechnik/Informatik},
year = {2009},
month = may # {~4},
affiliation = {Distributed Systems Group, Fachbereich 16: Elektrotechnik/Informatik,
University of Kassel, Germany},
location = {Distributed Systems Group, Fachbereich 16: Elektrotechnik/Informatik,
University of Kassel, Germany},
address = {Wilhelmsh{\"{o}}her Allee 73, 34121 Kassel, Germany},
institution = {University of Kassel, Fachbereich 16: Elektrotechnik/Informatik},
school = {University of Kassel, Fachbereich 16: Elektrotechnik/Informatik,
Distributed Systems Group},
note = {Unique identifier: urn:nbn:de:hebis:34-2009051127217\\
The work is online available at
http://www.it-weise.de/documents/index.html\#W2009DISS.\\
The dissertation can be downloaded at
http://www.it-weise.de/documents/files/W2009DISS.pdf.\\
The slides can be downloaded at
http://www.it-weise.de/documents/files/W2009DISS\_slides.pdf.\\
The animations can be downloaded at
http://www.it-weise.de/documents/files/W2009DISS\_anim.jar.\\
The supplementary slides can be downloaded at
http://www.it-weise.de/documents/files/W2009DISS\_suppl.pdf.\\
Contact Thomas Weise at tweise@gmx.de or http://www.it-weise.de/.},
abstract = {Distributed systems are one of the most vital components of the economy.
The most prominent example is probably the internet, a constituent
element of our knowledge society. During the recent years, the number of
novel network types has steadily increased. Amongst others, sensor
networks, distributed systems composed of tiny computational devices
with scarce resources, have emerged. The further development and
heterogeneous connection of such systems imposes new requirements on the
software development process. Mobile and wireless networks, for
instance, have to organize themselves autonomously and must be able to
react to changes in the environment and to failing nodes alike.
Researching new approaches for the design of distributed algorithms may
lead to methods with which these requirements can be met efficiently. In
this thesis, one such method is developed, tested, and discussed in
respect of its practical utility.\\
Our new design approach for distributed algorithms is based on Genetic
Programming, a member of the family of evolutionary algorithms.
Evolutionary algorithms are metaheuristic optimization methods which
copy principles from natural evolution. They use a population of
solution candidates which they try to refine step by step in order to
attain optimal values for predefined objective functions.\\
The synthesis of an algorithm with our approach starts with an analysis
step in which the wanted global behavior of the distributed system is
specified. From this specification, objective functions are derived
which steer a Genetic Programming process where the solution candidates
are distributed programs. The objective functions rate how close these
programs approximate the goal behavior in multiple randomized network
simulations. The evolutionary process step by step selects the most
promising solution candidates and modifies and combines them with
mutation and crossover operators. This way, a description of the global
behavior of a distributed system is translated automatically to programs
which, if executed locally on the nodes of the system, exhibit this
behavior. \\
In our work, we test six different ways for representing distributed
programs, comprising adaptations and extensions of well-known Genetic
Programming methods (SGP, eSGP, and LGP), one bio-inspired approach
(Fraglets), and two new program representations called Rule-based
Genetic Programming (RBGP, eRBGP) designed by us. We breed programs in
these representations for three well-known example problems in
distributed systems: election algorithms, the distributed mutual
exclusion at a critical section, and the distributed computation of the
greatest common divisor of a set of numbers. \\
Synthesizing distributed programs the evolutionary way does not
necessarily lead to the envisaged results. In a detailed analysis, we
discuss the problematic features which make this form of Genetic
Programming particularly hard. The two Rule-based Genetic Programming
approaches have been developed especially in order to mitigate these
difficulties. In our experiments, at least one of them (eRBGP) turned
out to be a very efficient approach and in most cases, was superior to
the other representations.},
contents = {Title Page\\
Citation Suggestion\\
Abstract\\
Zusammenfassung\\
Acknowledgements\\
Erkl{\"{a}}rung\\
Contents\\
Part 1: Foundations\\
1.1. Introduction\\
1.1.1. Motivation\\
1.1.1.1. Challenges in Distributed Systems\\
1.1.1.2. Design from Nature\\
1.1.2. Solution Approach\\
1.1.3. Major Results\\
1.1.4. The Structure of this Thesis\\
1.2. Metaheuristic Optimization\\
1.2.1. The Structure of Optimization\\
1.2.2. Multi-Objective Optimization\\
1.2.2.1. Pareto Optimization\\
1.2.2.2. Pareto Ranking\\
1.2.3. Evolutionary Algorithms\\
1.2.4. Genetic Algorithms\\
1.2.4.1. Mutation: Unary Reproduction\\
1.2.4.2. Crossover: Binary Reproduction\\
1.2.5. Genetic Programming\\
1.2.5.1. Mutation: Unary Reproduction\\
1.2.5.2. Recombination: Binary Reproduction\\
1.2.5.3. Automatically Defined Functions\\
1.2.6. Classifier Systems\\
1.2.6.1. Introduction\\
1.2.6.2. A Small Example\\
1.2.6.3. Messages\\
1.2.6.4. Conditions and Actions\\
1.2.6.5. Classifiers\\
1.3. Related Work\\
1.3.1. Optimizing Routing\\
1.3.1.1. Evolving Fault-Tolerant Routing Rules\\
1.3.1.2. Genetic Programming of Broadcast Algorithms\\
1.3.1.3. Optimizing Collective Communication\\
1.3.2. Synthesizing Protocols\\
1.3.2.1. Transformation of Finite State Machine-based Specifications\\
1.3.2.2. Evolving Fraglet-based Protocols\\
1.3.2.3. Online Protocol Adaptation and Evolution with Fraglets\\
Part 2: Genetic Programming of Distributed Algorithms\\
2.1. Features which make Genetic Programming Hard\\
2.1.1. Premature Convergence\\
2.1.1.1. Introduction\\
2.1.1.2. The Problem\\
2.1.1.3. One Cause: Loss of Diversity\\
2.1.1.4. Contermeasures\\
2.1.2. Ruggedness and Weak Causality\\
2.1.2.1. The Problem\\
2.1.2.2. One Cause: Weak Causality\\
2.1.2.3. Countermeasures\\
2.1.3. Neutrality and Code Bloat\\
2.1.3.1. Introduction\\
2.1.3.2. The Problem: Code Bloat\\
2.1.3.3. Countermeasures\\
2.1.4. Needle-In-A-Haystack Problems\\
2.1.4.1. Introduction\\
2.1.4.2. The Problem: All-Or-Nothing?\\
2.1.4.3. Countermeasures\\
2.1.5. Epistasis\\
2.1.5.1. Introduction\\
2.1.5.2. The Problem\\
2.1.5.3. Countermeasures\\
2.1.6. Noise and Robustness\\
2.1.6.1. Introduction - Noise\\
2.1.6.2. The Problem: Need for Robustness\\
2.1.6.3. Countermeasures\\
2.1.7. Overfitting and Oversimplification\\
2.1.7.1. Overfitting\\
2.1.7.2. Oversimplification\\
2.1.8. Correctness\\
2.1.8.1. The Problem\\
2.1.8.2. Countermeasures\\
2.2. Approaches\\
2.2.1. Standard Genetic Programming [SGP]\\
2.2.1.1. Approach\\
2.2.1.2. Adaptation\\
2.2.2. Extended Standard Genetic Programming with Indexed Memory
[eSGP]\\
2.2.2.1. Approach\\
2.2.2.2. Adaptation\\
2.2.3. Linear Genetic Programming [LGP]\\
2.2.3.1. Approach\\
2.2.3.2. Adaptation\\
2.2.3.3. On-the-Fly Compiler in the DGPF\\
2.2.4. Fraglets\\
2.2.4.1. Approach\\
2.2.4.2. Adaptation\\
2.2.5. Rule-based Genetic Programming [RBGP]\\
2.2.5.1. Approach\\
2.2.5.2. Adaptation\\
2.2.6. Extended Rule-based Genetic Programming [eRBGP]\\
2.2.6.1. Approach\\
2.2.6.2. Adaptation\\
2.2.7. Summary\\
2.3. Distributed System Model and Simulations\\
2.3.1. Distributed Systems and Algorithms\\
2.3.2. The Distributed System Model\\
2.3.2.1. The Node Model\\
2.3.2.2. The Parallelism Model\\
2.3.2.3. The Topology and Messaging Model\\
2.3.2.4. The Message Latency Model\\
2.3.3. The Structure of the Simulations\\
2.3.4. The System Architecture\\
2.3.5. Summary\\
2.4. Evolving Election Algorithms\\
2.4.1. Specification of the Global Behavior\\
2.4.2. Objective Functions\\
2.4.2.1. Functional Objective Functions\\
2.4.2.2. Non-Functional Objective\\
2.4.3. Experimental Settings\\
2.4.4. Evolved Programs\\
2.4.4.1. For Problem Variant a)\\
2.4.4.2. For Problem Variant c)\\
2.4.4.3. For Problem Variant b) and d)\\
2.4.5. Results and Evaluation\\
2.4.5.1. Overview\\
2.4.5.2. Convergence\\
2.4.5.3. Comparison of the GP Approaches\\
2.4.5.4. Runtime\\
2.4.6. Summary\\
2.5. Evolving Mutual Exclusion for Critical Sections\\
2.5.1. Specification of the Global Behavior\\
2.5.2. Objective Functions\\
2.5.2.1. Protection of the Critical Section\\
2.5.2.2. Utilization of the Critical Section\\
2.5.3. Experimental Settings\\
2.5.4. Evolved Programs\\
2.5.5. Results and Evaluation\\
2.5.5.1. Overview\\
2.5.5.2. Pareto Frontiers\\
2.5.5.3. Convergence\\
2.5.5.4. Comparison of the GP Approaches\\
2.5.5.5. Comparison of the Scenarios\\
2.5.5.6. Runtime\\
2.5.6. Summary\\
2.6. Evolving Algorithms for the Distributed Greatest Common Divisor
Problem\\
2.6.1. Specification of the Global Behavior\\
2.6.2. Objective Functions\\
2.6.3. Experimental Settings\\
2.6.4. Evolved Programs\\
2.6.5. Results and Evaluation\\
2.6.5.1. Overview\\
2.6.5.2. Convergence\\
2.6.5.3. Comparison of the GP Approaches\\
2.6.5.4. Runtime\\
2.6.6. Summary\\
Part 3: Conclusions\\
3.1. Critical Discussion\\
3.1.1. Criticism of the Approach\\
3.1.1.1. Complexity of Realization\\
3.1.1.2. The Evolved Programs\\
3.1.2. Significance of our Experiments\\
3.1.2.1. Settings\\
3.1.2.2. Why Evolutionary Algorithms?\\
3.1.2.3. Genetic Programming Approaches\\
3.1.2.4. Adaptation of the Genetic Programming Approaches\\
3.1.2.5. Choice of Scenarios\\
3.1.3. Quality of the GP Approach Features\\
3.1.3.1. Is Rule-based Genetic Programming Good?\\
3.1.3.2. Is Indexed Memory Good?\\
3.2. Other Projects\\
3.2.1. The Evolution of Aggregation Protocols\\
3.2.2. Combining GP and MDD\\
3.2.3. Offline Emergence Engineering with GP\\
3.2.4. Tunable Model for Problematic Fitness Landscapes\\
3.2.5. GAs for Semantic Web Service Composition\\
3.2.6. The Electronic Book on Global Optimization\\
3.2.7. Sigoa and DGPF\\
3.3. Outlook and Summary\\
3.3.1. From Theory to Practice\\
3.3.2. Investigating the Optimization of Non-Functional Features\\
3.3.3. Combining Temporary Memory with other GP Approaches\\
3.4. Summary\\
Part 4: Appendices\\
4.1. The Election Experiments\\
4.2. The Critical Section Experiments\\
4.3. The Distributed Greatest Common Divisor Experiments\\
4.4. Diversity-Preserving Algorithms\\
4.5. Ausf{\"{u}}hrliche Deutsche Zusammenfassung\\
Bibliography\\
Index\\
List of Figures\\
List of Tables\\
List of Algorithms\\
List of Listings},
keywords = {Genetic Programming, Distributed Systems, Distributed Algorithm,
Evolutionary Algorithm, Rule-based Genetic Programming, RBGP, extended
Rule-based Genetic Programming, eRBGP, Standard Genetic Programming,
SGP, Extended Standard Genetic Programming, eSGP, Linear Genetic
Programming, LGP,\\
Fraglets, Election, Critical Section, Mutual Exclusion, Greatest Common
Divisor, Learning Classifier Systems, LCS, Metaheuristic Optimization,
Ruggedness, Epistasis, Neutrality, Redundancy, Robustness, Correctness,
Adequacy, Bloat, Overfitting, Positional Epistasis, Routing, Protocols,
Premature Convergence, Needle-In-A-Haystack, All-Or-Nothing, Indexed
Memory, Global Memory, Local Memory, Stack, Automatically Defined
Function, ADF, Message Handler, Ansynchrone},
language = {en},
url = {http://kobra.bibliothek.uni-kassel.de/handle/urn:nbn:de:hebis:34-2009051127217}
}