OOPSLA (5)
ECOOP (2)
ACM CSUR (1)
ESEC/FSE (1)
HPCC (1)
IEEE Soft (1)
IEEE TSE (1)
PLDI (1)
DLS (8)
Programming (5)
MPLR (4)
ManLang (4)
SCP (3)
ComLan (2)
Onward! (2)
ITiCSE (1)
JoT (1)
SAC (1)
TOOLS (1)
VEE (1)
AGERE! (7)
ICOOOLPS (6)
VMIL (4)
MoreVMs (3)
PLACES (2)
ATPS (1)
MISS (1)
PX (1)
WODA (1)
Meta (2)
AGERE (1)
GraalWorkshop (1)
@inproceedings{Burchell:2024:InstBased, abstract = {Profilers are crucial tools for identifying and improving application performance. However, for language implementations with just-in-time (JIT) compilation, e.g., for Java and JavaScript, instrumentation-based profilers can have significant overheads and report unrealistic results caused by the instrumentation. In this paper, we examine state-of-the-art instrumentation-based profilers for Java to determine the realism of their results. We assess their overhead, the effect on compilation time, and the generated bytecode. We found that the profiler with the lowest overhead increased run time by 82x. Additionally, we investigate the realism of results by testing a profiler’s ability to detect whether inlining is enabled, which is an important compiler optimization. Our results document that instrumentation can alter program behavior so that performance observations are unrealistic, i.e., they do not reflect the performance of the uninstrumented program. As a solution, we sketch late-compiler-phase-based instrumentation for just-in-time compilers, which gives us the precision of instrumentation-based profiling with an overhead that is multiple magnitudes lower than that of standard instrumentation-based profilers, with a median overhead of 23.3% (min. 1.4%, max. 464%). By inserting probes late in the compilation process, we avoid interfering with compiler optimizations, which yields more realistic results.}, author = {Burchell, Humphrey and Larose, Octave and Marr, Stefan}, blog = {https://stefan-marr.de/2024/09/instrumenation-based-profiling-on-jvms-is-broken/}, booktitle = {Proceedings of the 21st ACM SIGPLAN International Conference on Managed Programming Languages and Runtimes}, doi = {10.1145/3679007.3685058}, keywords = {Graal Instrumentation JVM Java MeMyPublication Optimization Profiler Profiling Sampling myown}, month = sep, pdf = {https://stefan-marr.de/downloads/mplr24-burchell-et-al-towards-realistic-results-for-instrumentation-based-profilers-for-jit-compiled-systems.pdf}, publisher = {ACM}, series = {MPLR'24}, title = {{Towards Realistic Results for Instrumentation-Based Profilers for JIT-Compiled Systems}}, year = {2024}, month_numeric = {9} }
@article{Lauwaerts:2024:Latch, abstract = {Testing is an essential part of the software development cycle. Unfortunately, testing on constrained devices is currently very challenging. First, the limited memory of constrained devices severely restricts the size of test suites. Second, the limited processing power causes test suites to execute slowly, preventing a fast feedback loop. Third, when the constrained device becomes unresponsive, it is impossible to distinguish between the test failing or taking very long, forcing the developer to work with timeouts. Unfortunately, timeouts can cause tests to be flaky, i.e., have unpredictable outcomes independent of code changes. Given these problems, most IoT developers rely on laborious manual testing. In this paper, we propose the novel testing framework Latch (Large-scale Automated Testing on Constrained Hardware) to overcome the three main challenges of running large test suites on constrained hardware, as well as automate manual testing scenarios through a novel testing methodology based on debugger-like operations—we call this new testing approach managed testing. The core idea of Latch is to enable testing on constrained devices without those devices maintaining the whole test suite in memory. Therefore, programmers script and run tests on a workstation which then step-wise instructs the constrained device to execute each test, thereby overcoming the memory constraints. Our testing framework further allows developers to mark tests as depending on other tests. This way, Latch can skip tests that depend on previously failing tests resulting in a faster feedback loop. Finally, Latch addresses the issue of timeouts and flaky tests by including an analysis mode that provides feedback on timeouts and the flakiness of tests. To illustrate the expressiveness of Latch, we present testing scenarios representing unit testing, integration testing, and end-to-end testing. We evaluate the performance of Latch by testing a virtual machine against the WebAssembly specification, with a large test suite consisting of 10,213 tests running on an ESP32 microcontroller. Our experience shows that the testing framework is expressive, reliable and reasonably fast, making it suitable to run large test suites on constrained devices. Furthermore, the debugger-like operations enable to closely mimic manual testing.}, author = {Lauwaerts, Tom and Marr, Stefan and Scholliers, Christophe}, doi = {10.1016/j.scico.2024.103157}, issn = {0167-6423}, journal = {Science of Computer Programming}, keywords = {AutomatedDevices Embedded Flaky MeMyPublication Testing Tests myown}, month = jun, pages = {1--30}, pdf = {https://stefan-marr.de/downloads/scp24-lauwaerts-et-al-latch-enabling-large-scale-automated-testing-on-constrained-systems.pdf}, series = {SCP}, title = {{Latch: Enabling Large-scale Automated Testing on Constrained Systems}}, year = {2024}, month_numeric = {6} }
@proceedings{Marr:2023:DLS, author = {Marr, Stefan}, day = {24}, editor = {Marr, Stefan}, isbn = {979-8-4007-0389-8}, keywords = {DLS23 DynamicLanguages MeMyPublication Proceedings myown}, location = {Cascais, Portugal}, month = oct, publisher = {ACM}, series = {DLS}, title = {DLS 2023: Proceedings of the 19th ACM SIGPLAN International Symposium on Dynamic Languages}, url = {https://dl.acm.org/doi/proceedings/10.1145/3622759}, year = {2023}, month_numeric = {10} }
@article{Pimas:2023:Bee, author = {Pimás, Javier E. and Marr, Stefan and Garbervetsky, Diego}, doi = {10.22152/programming-journal.org/2024/8/5}, journal = {The Art, Science, and Engineering of Programming}, keywords = {Application Architecture Bee GarbageCollection LiveProgramming MeMyPublication ObjectOriented Objects Smalltalk VirtualMachine myown}, month = oct, number = {2}, pages = {1--34}, pdf = {https://stefan-marr.de/downloads/prog23-pimas-et-al-live-objects-all-the-way-down.pdf}, publisher = {Aspect-Oriented Software Association ({AOSA})}, series = {Programming'24}, title = {{Live Objects All The Way Down: Removing the Barriers between Applications and Virtual Machines}}, volume = {8}, year = {2023}, month_numeric = {10} }
@inproceedings{Burchell:2023:Profilers, abstract = {To identify optimisation opportunities, Java developers often use sampling profilers that attribute a percentage of run time to the methods of a program. Even so these profilers use sampling, are probabilistic in nature, and may suffer for instance from safepoint bias, they are normally considered to be relatively reliable. However, unreliable or inaccurate profiles may misdirect developers in their quest to resolve performance issues by not correctly identifying the program parts that would benefit most from optimisations. With the wider adoption of profilers such as async-profiler and Honest Profiler, which are designed to avoid the safepoint bias, we wanted to investigate how precise and accurate Java sampling profilers are today. We investigate the precision, reliability, accuracy, and overhead of async-profiler, Honest Profiler, Java Flight Recorder, JProfiler, perf, and YourKit, which are all actively maintained. We assess them on the fully deterministic Are We Fast Yet benchmarks to have a stable foundation for the probabilistic profilers. We find that profilers are relatively reliable over 30 runs and normally report the same hottest method. Unfortunately, this is not true for all benchmarks, which suggests their reliability may be application-specific. Different profilers also report different methods as hottest and cannot reliably agree on the set of top 5 hottest methods. On the positive side, the average run time overhead is in the range of 1% to 5.4% for the different profilers. Future work should investigate how results can become more reliable, perhaps by reducing the observer effect of profilers by using optimisation decisions of unprofiled runs or by developing a principled approach of combining multiple profiles that explore different dynamic optimisations.}, acceptancerate = {0.54}, appendix = {https://github.com/HumphreyHCB/AWFY-Profilers}, author = {Burchell, Humphrey and Larose, Octave and Kaleba, Sophie and Marr, Stefan}, blog = {https://stefan-marr.de/2023/09/dont-blindly-trust-your-profiler/}, booktitle = {Proceedings of the 20th ACM SIGPLAN International Conference on Managed Programming Languages and Runtimes}, doi = {10.1145/3617651.3622985}, keywords = {CPUSampling Comparison MeMyPublication Precision Profiling myown}, month = oct, pages = {1--14}, pdf = {https://stefan-marr.de/downloads/mplr23-burchell-et-al-dont-trust-your-profiler.pdf}, publisher = {ACM}, series = {MPLR'23}, title = {{Don’t Trust Your Profiler: An Empirical Study on the Precision and Accuracy of Java Profilers}}, year = {2023}, month_numeric = {10} }
@article{Larose:2023:AstVsBc, abstract = {Thanks to partial evaluation and meta-tracing, it became practical to build language implementations that reach state-of-the-art peak performance by implementing only an interpreter. Systems such as RPython and GraalVM provide components such as a garbage collector and just-in-time compiler in a language-agnostic manner, greatly reducing implementation effort. However, meta-compilation-based language implementations still need to improve further to reach the low memory use and fast warmup behavior that custom-built systems provide. A key element in this endeavor is interpreter performance. Folklore tells us that bytecode interpreters are superior to abstract-syntax-tree (AST) interpreters both in terms of memory use and run-time performance. This work assesses the trade-offs between AST and bytecode interpreters to verify common assumptions and whether they hold in the context of meta-compilation systems. We implemented four interpreters, each an AST and a bytecode one using RPython and GraalVM. We keep the difference between the interpreters as small as feasible to be able to evaluate interpreter performance, peak performance, warmup, memory use, and the impact of individual optimizations. Our results show that both systems indeed reach performance close to Node.js/V8. Looking at interpreter-only performance, our AST interpreters are on par with, or even slightly faster than their bytecode counterparts. After just-in-time compilation, the results are roughly on par. This means bytecode interpreters do not have their widely assumed performance advantage. However, we can confirm that bytecodes are more compact in memory than ASTs, which becomes relevant for larger applications. However, for smaller applications, we noticed that bytecode interpreters allocate more memory because boxing avoidance is not as applicable, and because the bytecode interpreter structure requires memory, e.g., for a reified stack. Our results show AST interpreters to be competitive on top of meta-compilation systems. Together with possible engineering benefits, they should thus not be discounted so easily in favor of bytecode interpreters.}, appendix = {https://doi.org/10.5281/zenodo.8147414}, articleno = {233}, author = {Larose, Octave and Kaleba, Sophie and Burchell, Humphrey and Marr, Stefan}, blog = {https://stefan-marr.de/2023/10/ast-vs-bytecode-interpreters/}, doi = {10.1145/3622808}, html = {https://stefan-marr.de/papers/oopsla-larose-et-al-ast-vs-bytecode-interpreters-in-the-age-of-meta-compilation/}, issn = {2475-1421}, journal = {Proceedings of the ACM on Programming Languages}, keywords = {AST Bytecode CaseStudy Comparison Interpreter JITCompilation MeMyPublication MetaTracing PartialEvaluation myown}, month = oct, number = {OOPSLA2}, numpages = {29}, pages = {318--346}, pdf = {https://stefan-marr.de/downloads/oopsla23-larose-et-al-ast-vs-bytecode-interpreters-in-the-age-of-meta-compilation.pdf}, publisher = {{ACM}}, series = {OOPSLA'23}, title = {AST vs. Bytecode: Interpreters in the Age of Meta-Compilation}, volume = {7}, year = {2023}, month_numeric = {10} }
@inproceedings{Huang:2023:GA, abstract = {Interpreter performance remains important today. Interpreters are needed in resource constrained systems, and even in systems with just-in-time compilers, they are crucial during warm up. A common form of interpreters is a bytecode interpreter, where the interpreter executes bytecode instructions one by one. Each bytecode is executed by the corresponding bytecode handler. In this paper, we show that the order of the bytecode handlers in the interpreter source code affects the execution performance of programs on the interpreter. On the basis of this observation, we propose a genetic algorithm (GA) approach to find an approximately optimal order. In our GA approach, we find an order optimized for a specific benchmark program and a specific CPU. We evaluated the effectiveness of our approach on various models of CPUs including x86 processors and an ARM processor. The order found using GA improved the execution speed of the program for which the order was optimized between 0.8% and 23.0% with 7.7% on average. We also assess the cross-benchmark and cross-machine performance of the GA-found order. Some orders showed good generalizability across benchmarks, speeding up all benchmark programs. However, the solutions do not generalize across different machines, indicating that they are highly specific to a microarchitecture.}, author = {Huang, Wanhong and Marr, Stefan and Ugawa, Tomoharu}, blog = {https://stefan-marr.de/2023/06/squeezing-a-little-more-performance-out-of-bytecode-interpreters/}, booktitle = {The 38th ACM/SIGAPP Symposium on Applied Computing (SAC '23)}, doi = {10.1145/3555776.3577712}, isbn = {978-1-4503-9517-5/23/03}, keywords = {Bytecodes CodeLayout EmbeddedSystems GeneticAlgorithm Interpreter JavaScript MeMyPublication Optimization myown}, month = mar, pages = {10}, pdf = {https://stefan-marr.de/downloads/acmsac23-huang-et-al-optimizing-the-order-of-bytecode-handlers-in-interpreters-using-a-genetic-algorithm.pdf}, publisher = {ACM}, series = {SAC'23}, title = {{Optimizing the Order of Bytecode Handlers in Interpreters using a Genetic Algorithm}}, year = {2023}, month_numeric = {3} }
@inproceedings{Kaleba:2022:CallSites, abstract = {Applications written in dynamic languages are becoming larger and larger and companies increasingly use multi-million line codebases in production. At the same time, dynamic languages rely heavily on dynamic optimizations, particularly those that reduce the overhead of method calls. In this work, we study the call-site behavior of Ruby benchmarks that are being used to guide the development of upcoming Ruby implementations such as TruffleRuby and YJIT. We study the interaction of call-site lookup caches, method splitting, and elimination of duplicate call-targets. We find that these optimizations are indeed highly effective on both smaller and large benchmarks, methods and closures alike, and help to open up opportunities for further optimizations such as inlining. However, we show that TruffleRuby's splitting may be applied too aggressively on already-monomorphic call-sites, coming at a run-time cost. We also find three distinct patterns in the evolution of call-site behavior over time, which may help to guide novel optimizations. We believe that our results may support language implementers in optimizing runtime systems for large codebases built in dynamic languages.}, acceptancerate = {0.4}, author = {Kaleba, Sophie and Larose, Octave and Jones, Richard and Marr, Stefan}, blog = {https://stefan-marr.de/2022/11/how-effective-are-classic-lookup-optimizations-for-rails-apps/}, booktitle = {Proceedings of the 18th Symposium on Dynamic Languages}, day = {7}, doi = {10.1145/3563834.3567538}, keywords = {Analysis CallSite DynamicLanguages Inlining LookupCache MeMyPublication Splitting myown}, location = {Auckland, New Zealand}, month = dec, note = {(acceptance rate 40%)}, pages = {14}, pdf = {https://stefan-marr.de/downloads/dls22-kaleba-et-al-analyzing-the-run-time-call-site-behavior-of-ruby-applications.pdf}, publisher = {ACM}, series = {DLS'22}, title = {Who You Gonna Call: Analyzing the Run-time Call-Site Behavior of Ruby Applications}, year = {2022}, month_numeric = {12} }
@inproceedings{Marr:2022:LSP, abstract = {With the wide adoption of the language server protocol, the desire to have IDE-style tooling even for niche and research languages has exploded. The Truffle language framework facilitates this desire by offering an almost zero-effort approach to language implementers to providing IDE features. However, this existing approach needs to execute the code being worked on to capture much of the information needed for an IDE, ideally with full unit-test coverage. To capture information more reliably and avoid the need to execute the code being worked on, we propose a new parse-based design for language servers. Our solution provides a language-agnostic interface for structural information, with which we can support most common IDE features for dynamic languages. Comparing the two approaches, we find that our new parse-based approach requires only a modest development effort for each language and has only minor tradeoffs for precision, for instance for code completion, compared to Truffle's execution-based approach. Further, we show that less than 1,000 lines of code capture enough details to provide much of the typical IDE functionality, with an order of magnitude less code than ad hoc language servers. We tested our approach for the custom parsers of Newspeak and SOM, as well as SimpleLanguage's ANTLR grammar without any changes to it. Combining both parse and execution-based approaches has the potential to provide good and precise IDE tooling for a wide range of languages with only small development effort. By itself, our approach would be a good addition to the many libraries implementing the language server protocol to enable low-effort implementations of IDE features.}, acceptancerate = {0.4}, appendix = {https://github.com/smarr/effortless-language-servers}, author = {Marr, Stefan and Burchell, Humphrey and Niephaus, Fabio}, blog = {https://stefan-marr.de/2022/10/effortless-language-servers/}, booktitle = {Proceedings of the 18th Symposium on Dynamic Languages}, day = {7}, doi = {10.1145/3563834.3567537}, html = {https://stefan-marr.de/papers/dls-marr-et-al-execution-vs-parse-based-language-servers/}, keywords = {Comparison ExecutionTime LanguageServerProtocol MeMyPublication ParseTime myown}, location = {Auckland, New Zealand}, month = dec, note = {(acceptance rate 40%)}, pages = {14}, pdf = {https://stefan-marr.de/downloads/dls22-marr-et-al-execution-vs-parse-based-language-servers.pdf}, publisher = {ACM}, series = {DLS'22}, title = {Execution vs. Parse-Based Language Servers: Tradeoffs and Opportunities for Language-Agnostic Tooling for Dynamic Languages}, year = {2022}, month_numeric = {12} }
@inproceedings{Ugawa:2022:HCGOpt, abstract = {JavaScript is increasingly used for the Internet of Things (IoT) on embedded systems. However, JavaScript's memory footprint is a challenge, because normal JavaScript virtual machines (VMs) do not fit into the small memory of IoT devices. In part this is because a significant amount of memory is used by hidden classes, which are used to represent JavaScript's dynamic objects efficiently. In this research, we optimize the hidden class graph to minimize their memory use. Our solution collects the hidden class graph and related information for an application in a profiling run, and optimizes the graph offline. We reduce the number of hidden classes by avoiding introducing intermediate ones, for instance when properties are added one after another. Our optimizations allow the VM to assign the most likely final hidden class to an object at its creation. They also minimize re-allocation of storage for property values, and reduce the polymorphism of inline caches. We implemented these optimizations in a JavaScript VM, eJSVM, and found that offline optimization can eliminate 61.9% of the hidden classes on average. It also improves execution speed by minimizing the number of hidden class transitions for an object and reducing inline cache misses.}, author = {Ugawa, Tomoharu and Marr, Stefan and Jones, Richard}, booktitle = {Proceedings of the 14th ACM SIGPLAN International Workshop on Virtual Machines and Intermediate Languages}, day = {5}, doi = {10.1145/3563838.3567678}, keywords = {EmbeddedSystems HiddenClasses InlineCaching IoT JavaScript MeMyPublication OfflineOptimization VirtualMachine myown}, location = {Auckland, New Zealand}, month = dec, pages = {11}, pdf = {https://stefan-marr.de/downloads/vmil22-ugawa-et-al-profile-guided-offline-optimization-of-hidden-class-graphs.pdf}, publisher = {ACM}, series = {VMIL'22}, title = {Profile Guided Offline Optimization of Hidden Class Graphs for JavaScript VMs in Embedded Systems}, year = {2022}, month_numeric = {12} }
@presentation{Marr:2022:GraalWorkshop, abstract = {Language implementation frameworks such as Truffle+Graal and RPython make the promise of state-of-the-art performance by implementing “just” the interpreter, and leaving the rest to the frameworks, which add a just-in-time compiler, garbage collection, and various other bits “for free”. One important assumption for these frameworks is that real systems do not spend a lot of time interpreting user code, but reach highly-optimized compiled code quickly. Unfortunately, for large codebases with millions of lines of code, this assumption does not hold as well as for common benchmarks. A significant amount of time is spent interpreting code. This is only exacerbated by modern development approaches, which lead to, what one would assume to be long running server applications, being updated every 30 minutes. In practice, this means for large and actively developed codebases, interpreter performance is key. This brings us to the question of how Truffle-based interpreters such as Graal.js, TruffleRuby, GraalPython, and TruffleSOM compare to commonly used interpreter implementations for the same language. We will present our results comparing these interpreters with and without just-in-time compilation on the Are We Fast Yet benchmarks, which were designed for cross-language comparison. We will further analyze where these interpreters spend their time, and experiment with an approach to approximate “best case” performance assuming an interpreter could perform optimizations on the method level without requiring just-in-time compilation. Based on our observations, we will discuss a number of possible steps forward based on the idea of supernodes, i.e., node combination, object inlining, and generating interpreters using Graal’s partial evaluator. All these techniques attempt to mitigate the performance cost of the “everything is a node” implementation style of Truffle interpreters, which leads to costly run-time program representation and a high degree of redundancy in correctness check during interpretation.}, author = {Marr, Stefan and Larose, Octave and Kaleba, Sophie and Seaton, Chris}, booktitle = {The 2022 Graal Workshop: Science, Art, Magic: Using and Developing The Graal Compiler}, keywords = {GraalVM Interpreters MeMyPublication Optimization Presentation VMs myown}, month = apr, series = {GraalWorkshop}, title = {{Truffle Interpreter Performance without the Holy Graal}}, year = {2022}, month_numeric = {4} }
@presentation{Kaleba:2022:MoreVMs, abstract = {Web-applications are ubiquitous, from simple personal blogs to e-commerce platforms with millions of sales. Ruby-on-Rails is a popular framework implemented in Ruby that provides tools to build such web-applications. Performance is often critical in the context of large-scale web-applications; especially in dynamic languages such as Ruby that feature reflection and the use of many small methods. Such languages therefore benefit from run-time optimisations, notably through the combined use of lookup caches, splitting and inlining. To limit their overhead, such optimisations generally rely on assumptions that do not necessarily match with the actual run- time behaviour. With Phase-based splitting, we showed that splitting can benefit from using homogeneous patterns of behaviour, called “phases” to reach better performance. In an effort to identify such phases in real-world web-applications, we thoroughly analyse the run-time call-site behaviour of Ruby programs and Ruby-on-Rails applications, running on top of TruffleRuby. This talk describes our findings and aims at guiding future research on call-site optimisation.}, author = {Kaleba, Sophie and Larose, Octave and Marr, Stefan and Jones, Richard}, booktitle = {Workshop on Modern Language Runtimes, Ecosystems, and VMs}, keywords = {Behavior Benchmarking MeMyPublication Phases Ruby myown}, month = mar, series = {MoreVMs'22}, title = {Who You Gonna Call? A Case Study about the Call-Site Behaviour in Ruby-on-Rails Applications}, venue = {MoreVMs Worshop}, year = {2022}, month_numeric = {3} }
@presentation{Larose:2022:MoreVMs, abstract = {The Truffle framework allows language implementations to reach state-of-the-art run time performance while only providing an abstract syntax tree (AST) interpreter; the AST is compiled to machine code using GraalVM’s JIT compiler. However, it can take some time before this fully optimized code is generated and executed: startup performance is consequently tightly bound to interpreter performance, therefore reducing the execution time requires us to improve interpreter performance instead of solely focusing on the JIT compiler. Through the use of a novel technique called supernodes, we take steps towards improving the run-time performance of Truffle-based interpreters, aiming to reduce programs’ overall execution time by improving interpreter performance. We take inspiration from a well-known bytecode interpreter optimization technique: superinstructions, which reduce the instruction dispatch overhead by concatenating existing instructions. In the case of supernodes, AST nodes are merged, creating a single entity which has the same behavior as the original tree. The main performance gain of supernodes comes from removing redundant node safety guards: for instance, regular nodes have to check the type of their input data through a type guard, even though it may have already been determined by another node which had no way of sharing this information. Supernodes avoid this problem through unifying the node contexts. Moreover, similarly to superinstructions, performance is also gained through reducing the node dispatch overhead as a result of using fewer nodes. So far, we have been relying on the research language TruffleSOM, a minimal Smalltalk dialect implemented using Truffle, and we are focusing on the Are We Fast Yet benchmark suite, which contains both micro- and macro-benchmarks. Initial results are promising, showing up to 42% run time reduction with the addition of 20 supernodes. Moreover, there are currently no performance regressions for any of our benchmarks compared to a baseline with no supernode candidates available. However, node trees that yield valuable supernodes are currently detected manually, and the process of generating supernodes from them is currently not automated. In the future, we aim to develop heuristics to identify potential supernode candidates: both at parsing time through static analysis, as well as later during run time through detection of frequently called nodes that could yield performance gains should they be merged together. The supernodes would then be generated based on the code of the selected AST nodes. In addition, we aim to port our technique to more complex Truffle implementations used in the industry, such as GraalPython or TruffleRuby.}, author = {Larose, Octave and Kaleba, Sophie and Marr, Stefan}, booktitle = {Workshop on Modern Language Runtimes, Ecosystems, and VMs}, keywords = {Graal GraalVM Interpreters MeMyPublication Optimization myown}, month = mar, series = {MoreVMs'22}, title = {Less Is More: Merging AST Nodes To Optimize Interpreters}, year = {2022}, month_numeric = {3} }
@presentation{Marr:2021:Agere, abstract = {The actor model is a great tool for various use cases. Though, it’s not the only tool, and sometimes perhaps not even the best. Consequently, developers started mixing and matching high-level concurrency models based on the problem at hand, much like other programming abstractions. Though, this comes with various problems. For instance, we don’t usually have debugging tools that help us to make sense of the resulting system. If we even have a debugger, it may barely allow us to step through our programs instruction by instruction. Let’s imagine a better world! One were we can follow asynchronous messages, jump to the next transaction commit, or break on the next fork/join task created. Though, race conditions remain notoriously difficult to reproduce. One solutions it to record our program’s execution, ideally capturing the bug. Then we can replay it as often as need to identify the cause of our bug. The hard bit here is making record & replay practical. I will explain how our concurrency-model-agnostic approach allows us to record model interactions trivially for later replay, and how we minimized its run-time overhead. In the case of actor applications, we can even make the snapshotting fast to be able to limit trace sizes. Having better debugging capabilities is a real productivity boost. Though, some bugs will always slip through the cracks. So, what if we could prevent those bugs from causing issues? Other researchers have shown how to do it, and I’ll conclude this talk with some ideas on how we can utilize the knowledge we have in our language implementations to make such mitigation approaches fast. The talk is based on work done in collaboration with Dominik Aumayr, Carmen Torres Lopez, Elisa Gonzalez Boix, and Hanspeter Mössenböck.}, author = {Marr, Stefan}, blog = {https://stefan-marr.de/2021/10/actors-and-now/}, booktitle = {11th ACM SIGPLAN International Workshop on Programming Based on Actors, Agents, and Decentralized Control}, day = {17}, keywords = {Actors Agere Concurrency ConcurrencyModels Keynote MeMyPublication Tooling myown}, month = oct, series = {AGERE'21}, title = {Actors! And now? An Implementer's Perspective on High-level Concurrency Models, Debugging Tools, and the Future of Automatic Bug Mitigation}, url = {https://2021.splashcon.org/details/agere-2021-papers/8/Actors-And-now-An-Implementer-s-Perspective-on-High-level-Concurrency-Models-Debug}, venue = {11th ACM SIGPLAN International Workshop on Programming Based on Actors, Agents, and Decentralized Control}, year = {2021}, month_numeric = {10} }
@presentation{Kaleba:2021:MonoBottle, author = {Kaleba, Sophie and Marr, Stefan and Jones, Richard}, day = {13}, eventtitle = {16th ACM International Workshop on Implementation, Compilation, Optimization of OO Languages, Programs and Systems}, keywords = {JITCompilation Lookup MeMyPublication Optimizations Phases Splitting myown}, month = jul, pdf = {https://stefan-marr.de/downloads/icooolps21-kaleba-et-al-avoiding-monomorphisation-bottlenecks-with-phase-based-splitting.pdf}, series = {ICOOOLPS'21}, title = {{Avoiding Monomorphisation Bottlenecks with Phase-based Splitting}}, year = {2021}, month_numeric = {7} }
@inproceedings{GreenwoodThessman:2021:NavieCastInsert, author = {Greenwood-Thessman, Erin and Gariano, Isaac Oscar and Roberts, Richard and Marr, Stefan and Homer, Michael and Noble, James}, booktitle = {Proceedings of the 16th {ACM} International Workshop on Implementation, Compilation, Optimization of {OO} Languages, Programs and Systems}, doi = {10.1145/3464972.3472395}, keywords = {Evaluation Experiments Graal Grace GradualTyping MeMyPublication Optimization SOMns Truffle myown}, month = jul, pdf = {https://stefan-marr.de/downloads/icooolps21-greenwood-thessman-et-al-naive-transient-cast-insertion-isnt-that-bad.pdf}, publisher = {{ACM}}, series = {ICOOOLPS'21}, title = {{Naïve Transient Cast Insertion Isn’t (That) Bad}}, year = {2021}, month_numeric = {7} }
@article{Aumayr:2021:AgnosticRR, abstract = {With concurrency being integral to most software systems, developers combine high-level concurrency models in the same application to tackle each problem with appropriate abstractions. While languages and libraries offer a wide range of concurrency models, debugging support for applications that combine them has not yet gained much attention. Record & replay aids debugging by deterministically reproducing recorded bugs, but is typically designed for a single concurrency model only. This paper proposes a practical concurrency-model-agnostic record & replay approach for multi-paradigm concurrent programs, i.e. applications that combine concurrency models. Our approach traces high-level non- deterministic events by using a uniform model-agnostic trace format and infrastructure. This enables ordering- based record & replay support for a wide range of concurrency models, and thereby enables debugging of applications that combine them. In addition, it allows language implementors to add new concurrency mod- els and reuse the model-agnostic record & replay support. We argue that a concurrency-model-agnostic record & replay is practical and enables advanced debugging support for a wide range of concurrency models. The evaluation shows that our approach is expressive and flexible enough to support record & replay of applications using threads & locks, communicating event loops, communicating sequential processes, software transactional memory and combinations of those concurrency models. For the actor model, we reach recording performance competitive with an optimized special-purpose record & replay solution. The average recording overhead on the Savina actor benchmark suite is 10% (min. 0%, max. 23%). The performance for other concurrency models and combinations thereof is at a similar level. We believe our concurrency-model-agnostic approach helps developers of applications that mix and match concurrency models. We hope that this substrate inspires new tools and languages making building and maintaining of multi-paradigm concurrent applications simpler and safer.}, author = {Aumayr, Dominik and Marr, Stefan and Kaleba, Sophie and Gonzalez Boix, Elisa and Mössenböck, Hanspeter}, day = {28}, doi = {10.22152/programming-journal.org/2021/5/14}, issn = {2473-7321}, journal = {The Art, Science, and Engineering of Programming}, keywords = {Actors CSP Concurrency MeMyPublication MultiParadigm MultiThreading Nondeterministic RecordReplay SOMns STM myown}, month = feb, number = {3}, pages = {39}, pdf = {https://stefan-marr.de/downloads/prog21-aumayr-et-al-capturing-high-level-nondeterminism-in-concurrent-programs-for-practical-concurrency-model-agnostic-record-replay.pdf}, publisher = {AOSA Inc.}, series = {Programming}, title = {{Capturing High-level Nondeterminism in Concurrent Programs for Practical Concurrency Model Agnostic Record & Replay}}, volume = {5}, year = {2021}, month_numeric = {2} }
@proceedings{MPLR:2020, author = {Marr, Stefan}, doi = {10.1145/3426182}, editor = {Marr, Stefan}, isbn = {978-1-4503-8853-5}, keywords = {Conference LanguageImplementation MPLR ManagedLanguages MeMyPublication Proceeding myown}, month = nov, publisher = {ACM}, series = {MPLR'20}, title = {17th International Conference on Managed Programming Languages and Runtimes}, url = {https://mplr2020.cs.manchester.ac.uk/}, venue = {Manchester, UK}, year = {2020}, month_numeric = {11} }
@proceedings{Marr:2020:P20I3, author = {Marr, Stefan}, doi = {10.22152/programming-journal.org/2020/4/issue3}, editor = {Marr, Stefan}, issn = {2473-7321}, keywords = {MeMyPublication myown}, month = feb, number = {3}, publisher = {AOSA Inc.}, series = {Programming}, title = {The Art, Science, and Engineering of Programming}, url = {https://programming-journal.org/2020/4/issue3/}, volume = {4}, year = {2020}, month_numeric = {2} }
@inproceedings{Gariano:2019:MTT, abstract = {One form of type checking used in gradually typed language is transient type checking: whenever an object ‘flows’ through code with a type annotation, the object is dynamically checked to ensure it has the methods required by the annotation. Just-in-time compilation and optimisation in virtual machines can eliminate much of the overhead of run-time transient type checks. Unfortunately this optimisation is not uniform: some type checks will significantly decrease, or even increase, a program’s performance. In this paper, we refine the so called “Takikawa” protocol, and use it to identify which type annotations have the greatest effects on performance. In particular, we show how graphing the performance of such benchmarks when varying which type annotations are present in the source code can be used to discern potential patterns in performance. We demonstrate our approach by testing the Moth virtual machine: for many of the benchmarks where Moth’s transient type checking impacts performance, we have been able to identify one or two specific type annotations that are the likely cause. Without these type annotations, the performance impact of transient type checking becomes negligible. Using our technique programmers can optimise programs by removing expensive type checks, and VM engineers can identify new opportunities for compiler optimisation.}, acmid = {3361232}, author = {Gariano, Isaac Oscar and Roberts, Richard and Marr, Stefan and Homer, Michael and Noble, James}, booktitle = {Proceedings of the 11th ACM SIGPLAN International Workshop on Virtual Machines and Intermediate Languages}, day = {22}, doi = {10.1145/3358504.3361232}, isbn = {978-1-4503-6987-9}, keywords = {Grace GradualTyping MeMyPublication Moth Performance TransientTypes Visualization myown}, location = {Athens, Greece}, month = oct, numpages = {9}, pages = {58--66}, pdf = {https://stefan-marr.de/downloads/vmil19-gariano-et-al-which-of-my-transient-type-checks-are-not-almost-free.pdf}, publisher = {ACM}, series = {VMIL'19}, title = {{Which of My Transient Type Checks Are Not (Almost) Free?}}, year = {2019}, month_numeric = {10} }
@proceedings{Marr:2019:DLS, author = {Marr, Stefan and Fumero, Juan}, day = {20}, editor = {Marr, Stefan and Fumero, Juan}, isbn = {978-1-4503-6996-1}, keywords = {DLS19 DynamicLanguages MeMyPublication Proceedings myown}, location = {Athens, Greece}, month = oct, publisher = {ACM}, series = {DLS}, title = {DLS 2019: Proceedings of the 15th ACM SIGPLAN International Symposium on Dynamic Languages}, url = {https://dl.acm.org/citation.cfm?id=3359619}, year = {2019}, month_numeric = {10} }
@inproceedings{Aumayr:2019:ASnap, abstract = {The actor model is popular for many types of server applications. Efficient snapshotting of applications is crucial in the deployment of pre-initialized applications or moving running applications to different machines, e.g for debugging purposes. A key issue is that snapshotting blocks all other operations. In modern latency-sensitive applications, stopping the application to persist its state needs to be avoided, because users may not tolerate the increased request latency. In order to minimize the impact of snapshotting on request latency, our approach persists the application’s state asynchronously by capturing partial heaps, completing snapshots step by step. Additionally, our solution is transparent and supports arbitrary object graphs. We prototyped our snapshotting approach on top of the Truffle/Graal platform and evaluated it with the Savina benchmarks and the Acme Air microservice application. When performing a snapshot every thousand Acme Air requests, the number of slow requests ( 0.007% of all requests) with latency above 100ms increases by 5.43%. Our Savina microbenchmark results detail how different utilization patterns impact snapshotting cost. To the best of our knowledge, this is the first system that enables asynchronous snapshotting of actor applications, i.e. without stop-the-world synchronization, and thereby minimizes the impact on latency. We thus believe it enables new deployment and debugging options for actor systems.}, acceptancerate = {0.61}, author = {Aumayr, Dominik and Marr, Stefan and Gonzalez Boix, Elisa and Mössenböck, Hanspeter}, booktitle = {Proceedings of the 16th ACM SIGPLAN International Conference on Managed Programming Languages and Runtimes}, doi = {10.1145/3357390.3361019}, isbn = {978-1-4503-6977-0}, keywords = {AcmeAir Actors Benchmarking Concurrency Dominik Latency MPLR MeMyPublication Snapshots myown}, month = oct, pages = {157--171}, pdf = {https://stefan-marr.de/downloads/mplr19-aumayr-et-al-asynchronous-snapshots-of-actor-systems-for-latency-sensitive-applications.pdf}, publisher = {ACM}, series = {MPLR'19}, title = {{Asynchronous Snapshots of Actor Systems for Latency-Sensitive Applications}}, year = {2019}, month_numeric = {10} }
@proceedings{Marr:2019:P20I2, author = {Marr, Stefan}, doi = {10.22152/programming-journal.org/2020/4/issue2}, editor = {Marr, Stefan}, issn = {2473-7321}, keywords = {MeMyPublication myown}, month = oct, number = {2}, publisher = {AOSA Inc.}, series = {Programming}, title = {The Art, Science, and Engineering of Programming}, url = {https://programming-journal.org/2020/4/issue2/}, volume = {4}, year = {2019}, month_numeric = {10} }
@inproceedings{Rigger:2019:Builtins, abstract = {C programs can use compiler builtins to provide functionality that the C language lacks. On Linux, GCC provides several thousands of builtins that are also supported by other mature compilers, such as Clang and ICC. Maintainers of other tools lack guidance on whether and which builtins should be implemented to support popular projects. To assist tool developers who want to support GCC builtins, we analyzed builtin use in 4,913 C projects from GitHub. We found that 37% of these projects relied on at least one builtin. Supporting an increasing proportion of projects requires support of an exponentially increasing number of builtins; however, implementing only 10 builtins already covers over 30% of the projects. Since we found that many builtins in our corpus remained unused, the effort needed to support 90% of the projects is moderate, requiring about 110 builtins to be implemented. For each project, we analyzed the evolution of builtin use over time and found that the majority of projects mostly added builtins. This suggests that builtins are not a legacy feature and must be supported in future tools. Systematic testing of builtin support in existing tools revealed that many lacked support for builtins either partially or completely; we also discovered incorrect implementations in various tools, including the formally verified CompCert compiler.}, acceptancerate = {0.24}, author = {Rigger, Manuel and Marr, Stefan and Adams, Bram and Mössenböck, Hanspeter}, booktitle = {Proceedings of the 2019 27th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering}, doi = {10.1145/3338906.3338907}, isbn = {978-1-4503-5572-8/19/08}, keywords = {Builtins Compiler EmpiricalStudy GCC GitHub MeMyPublication Survey Use myown}, month = aug, pdf = {https://stefan-marr.de/downloads/fse19-rigger-et-al-understanding-gcc-builtins-to-develop-better-tools.pdf}, publisher = {ACM}, series = {ESEC/FSE'19}, title = {{Understanding GCC Builtins to Develop Better Tools}}, year = {2019}, month_numeric = {8} }
@inproceedings{Roberts:2019:TTAF, abstract = {Transient gradual typing imposes run-time type tests that typically cause a linear slowdown. This performance impact discourages the use of type annotations because adding types to a program makes the program slower. A virtual machine can employ standard just-in-time optimizations to reduce the overhead of transient checks to near zero. These optimizations can give gradually-typed languages performance comparable to state-of-the-art dynamic languages, so programmers can add types to their code without affecting their programs' performance.}, acceptancerate = {0.37}, appendix = {https://github.com/gracelang/moth-benchmarks/blob/papers/ecoop19/index.md}, author = {Roberts, Richard and Marr, Stefan and Homer, Michael and Noble, James}, booktitle = {33rd European Conference on Object-Oriented Programming}, day = {15}, doi = {10.4230/LIPIcs.ECOOP.2019.5}, isbn = {978-3-95977-111-5}, issn = {1868-8969}, keywords = {Benchmarking Evaluation Graal Grace GradualTyping MeMyPublication Moth SOMns TransientTypes Truffle myown}, month = jul, number = {5}, pages = {5:1--5:28}, pdf = {https://stefan-marr.de/downloads/ecoop19-roberts-et-al-transient-typechecks-are-almost-free.pdf}, publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik}, series = {ECOOP'19}, title = {Transient Typechecks are (Almost) Free}, volume = {134}, year = {2019}, month_numeric = {7} }
@inproceedings{TorresLopez:2019:MD, abstract = {Many of today's software systems are parallel or concurrent. With the rise of Node.js and more generally event-loop architectures, many systems need to handle concurrency. However, its non-deterministic behavior makes it hard to reproduce bugs. Today's interactive debuggers unfortunately do not support developers in debugging non-deterministic issues. They only allow us to explore a single execution path. Therefore, some bugs may never be reproduced in the debugging session, because the right conditions are not triggered. As a solution, we propose multiverse debugging, a new approach for debugging non-deterministic programs that allows developers to observe all possible execution paths of a parallel program and debug it interactively. We introduce the concepts of multiverse breakpoints and stepping, which can halt a program in different execution paths, i.e. universes. We apply multiverse debugging to AmbientTalk, an actor-based language, resulting in Voyager, a multiverse debugger implemented on top of the AmbientTalk operational semantics. We provide a proof of non-interference, i.e., we prove that observing the behavior of a program by the debugger does not affect the behavior of that program and vice versa. Multiverse debugging establishes the foundation for debugging non-deterministic programs interactively, which we believe can aid the development of parallel and concurrent systems.}, acceptancerate = {0.37}, appendix = {https://doi.org/10.4230/DARTS.5.2.4}, author = {Torres Lopez, Carmen and Gurdeep Singh, Robbert and Marr, Stefan and Gonzalez Boix, Elisa and Scholliers, Christophe}, blog = {https://stefan-marr.de/2019/07/what-if-we-could-see-all-concurrency-bugs-in-the-debugger/}, booktitle = {33rd European Conference on Object-Oriented Programming}, day = {15}, doi = {10.4230/LIPIcs.ECOOP.2019.27}, isbn = {978-3-95977-111-5}, issn = {1868-8969}, keywords = {Actors AmbientTalk Concurrency Debugging FormalSemantics Formalism MeMyPublication Multiverse NonDeterminism Redex myown}, month = jul, number = {27}, pages = {27:1--27:30}, pdf = {https://stefan-marr.de/downloads/ecoop19-torres-lopez-et-al-multiverse-debugging-non-deterministic-debugging-for-non-deterministic-programs.pdf}, publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik}, series = {ECOOP'19}, title = {{Multiverse Debugging: Non-deterministic Debugging for Non-deterministic Programs}}, volume = {134}, year = {2019}, month_numeric = {7} }
@proceedings{Marr:2019:P20I1, author = {Marr, Stefan}, doi = {10.22152/programming-journal.org/2020/4/issue1}, editor = {Marr, Stefan}, issn = {2473-7321}, keywords = {MeMyPublication myown}, month = jun, number = {1}, publisher = {AOSA Inc.}, series = {Programming}, title = {The Art, Science, and Engineering of Programming}, url = {https://programming-journal.org/2020/4/issue1/}, volume = {4}, year = {2019}, month_numeric = {6} }
@patent{Daloze:2019:TSO-P, author = {Daloze, Benoit and Marr, Stefan and Bonetta, Daniele}, day = {2}, howpublished = {US Patent}, keywords = {MeMyPublication Objects ThreadSafe Truffle myown patent}, month = apr, note = {Initial publication Sept. 28, 2017}, number = {US 10,248,349 B2}, title = {{Efficient and Thread-Safe Objects for Dynamically-Typed Languages}}, year = {2019}, month_numeric = {4} }
@proceedings{Marr:2019:Programming19, author = {Marr, Stefan and Cazzola, Walter}, editor = {Marr, Stefan and Cazzola, Walter}, isbn = {978-1-4503-6257-3}, keywords = {MeMyPublication Proceedings Programming Workshops myown}, location = {Genova, Italy}, month = apr, publisher = {ACM}, series = {ICPS}, title = {{<Programming>'19: Conference Companion of the 3rd International Conference on Art, Science, and Engineering of Programming}}, url = {https://dl.acm.org/citation.cfm?id=3328433}, year = {2019}, month_numeric = {4} }
@proceedings{Meta:2018, author = {Chari, Guido and Gonzalez Boix, Elisa and Marr, Stefan}, day = {5}, editor = {Chari, Guido and Gonzalez Boix, Elisa and Marr, Stefan}, isbn = {978-1-4503-6068-5}, keywords = {MeMyPublication Metaprogramming Proceedings Workshop myown}, location = {Boston, MA, USA}, month = nov, publisher = {ACM}, series = {SPLASH Workshop}, title = {Meta 2018: Proceedings of the 3rd ACM SIGPLAN International Workshop on Meta-Programming Techniques and Reflection}, url = {https://dl.acm.org/citation.cfm?id=3281074}, year = {2018}, month_numeric = {11} }
@proceedings{VMIL:2018, author = {Kell, Stephen and Marr, Stefan}, day = {4}, isbn = {978-1-4503-6071-5}, keywords = {Compilation GarbageCollection MeMyPublication Proceedings VirtualMachines Workshop myown}, location = {Boston, MA, USA}, month = nov, publisher = {ACM}, series = {SPLASH Workshop}, title = {VMIL 2018: Proceedings of the 10th ACM SIGPLAN International Workshop on Virtual Machines and Intermediate Languages}, year = {2018}, month_numeric = {11} }
@presentation{Marr:2018:Grace, author = {Marr, Stefan and Roberts, Richard and Noble, James}, day = {4}, keywords = {Compilation Grace MeMyPublication Truffle myown}, month = nov, pages = {3}, pdf = {https://stefan-marr.de/downloads/grace18-marr-et-al-newspeak-and-truffle-a-platform-for-grace.pdf}, series = {Grace'18}, title = {{Newspeak and Truffle: A Platform for Grace?}}, type = {Presentation}, year = {2018}, month_numeric = {11} }
@article{Daloze:2018:TSC, abstract = {Dynamic programming languages such as Python and Ruby are widely used, and much effort is spent on making them efficient. One substantial research effort in this direction is the enabling of parallel code execution. While there has been significant progress, making dynamic collections efficient, scalable, and thread-safe is an open issue. Typical programs in dynamic languages use few but versatile collection types. Such collections are an important ingredient of dynamic environments, but are difficult to make safe, efficient, and scalable. In this paper, we propose an approach for efficient and concurrent collections by gradually increasing synchronization levels according to the dynamic needs of each collection instance. Collections reachable only by a single thread have no synchronization, arrays accessed in bounds have minimal synchronization, and for the general case, we adopt the Layout Lock paradigm and extend its design with a lightweight version that fits the setting of dynamic languages. We apply our approach to Ruby’s Array and Hash collections. Our experiments show that our approach has no overhead on single-threaded benchmarks, scales linearly for Array and Hash accesses, achieves the same scalability as Fortran and Java for classic parallel algorithms, and scales better than other Ruby implementations on Ruby workloads.}, author = {Daloze, Benoit and Tal, Arie and Marr, Stefan and Mössenböck, Hanspeter and Petrank, Erez}, doi = {10.1145/3276478}, journal = {Proceedings of the ACM on Programming Languages}, keywords = {Builtin Collections Graal GraalVM LocalShared MeMyPublication Ruby ThreadSafety Truffle TruffleRuby myown}, month = nov, number = {OOPSLA}, pages = {108:1--108:30}, pdf = {https://stefan-marr.de/downloads/oopsla18-daloze-et-al-parallelization-of-dynamic-languages-synchronizing-built-in-collections.pdf}, series = {OOPSLA'18}, title = {{Parallelization of Dynamic Languages: Synchronizing Built-in Collections}}, volume = {2}, year = {2018}, month_numeric = {11} }
@inproceedings{Aumayr:2018:RR, abstract = {With the ubiquity of parallel commodity hardware, developers turn to high-level concurrency models such as the actor model to lower the complexity of concurrent software. However, debugging concurrent software is hard, especially for concurrency models with a limited set of supporting tools. Such tools often deal only with the underlying threads and locks, which obscures the view on e.g. actors and messages and thereby introduces additional complexity. To improve on this situation, we present a low-overhead record & replay approach for actor languages. It allows one to debug concurrency issues deterministically based on a previously recorded trace. Our evaluation shows that the average run-time overhead for tracing on benchmarks from the Savina suite is 10% (min. 0%, max. 20%). For Acme-Air, a modern web application, we see a maximum increase of 1% in latency for HTTP requests and about 1.4 MB/s of trace data. These results are a first step towards deterministic replay debugging of actor systems in production.}, acceptancerate = {0.72}, author = {Aumayr, Dominik and Marr, Stefan and Béra, Clément and Gonzalez Boix, Elisa and Mössenböck, Hanspeter}, blog = {https://stefan-marr.de/2018/08/deterministic-replay-for-actors/}, booktitle = {Proceedings of the 15th International Conference on Managed Languages and Runtimes}, day = {12--13}, doi = {10.1145/3237009.3237015}, isbn = {978-1-4503-6424-9/18/09}, keywords = {Actors Concurrency Debugging Determinism MeMyPublication Replay SOMns Tracing Truffle}, month = sep, pdf = {https://stefan-marr.de/downloads/manlang18-aumayr-et-al-efficient-and-deterministic-record-and-replay-for-actor-languages.pdf}, publisher = {ACM}, series = {ManLang'18}, title = {{Efficient and Deterministic Record \& Replay for Actor Languages}}, year = {2018}, month_numeric = {9} }
@article{TorresLopez:2018:BugSurvey, abstract = {The actor model is an attractive foundation for developing concurrent applications because actors are isolated concurrent entities that communicate through asynchronous messages and do not share state. Thereby, they avoid concurrency bugs such as data races, but are not immune to concurrency bugs in general.}, author = {Torres Lopez, Carmen and Marr, Stefan and Mössenböck, Hanspeter and Gonzalez Boix, Elisa}, booktitle = {Programming with Actors: State-of-the-Art and Research Perspectives}, doi = {10.1007/978-3-030-00302-9_6}, editor = {Ricci, Alessandro and Haller, Philipp}, isbn = {978-3-030-00302-9}, keywords = {ActorLanguages Analysis Bug Concurrency Debugging EventLoopConcurrency MeMyPublication Testing Truffle myown}, month = sep, numpages = {32}, pages = {155--185}, pdf = {https://stefan-marr.de/downloads/lncs-torres-lopez-et-al-study-of-concurrency-bugs-and-advanced-development-support-for-actor-based-programs.pdf}, publisher = {Springer}, series = {AGERE!'16 (LNCS)}, title = {{A Study of Concurrency Bugs and Advanced Development Support for Actor-based Programs}}, volume = {10789}, year = {2018}, month_numeric = {9} }
@inproceedings{Marr:2018:COLL-PX, abstract = {While an integral part of all programming languages, the design of collection libraries is rarely studied. This work briefly reviews the collection libraries of 14 languages to identify possible design dimensions. Some languages have surprisingly few but versatile collections, while others have large libraries with many specialized collections. Based on the identified design dimensions, we argue that a small collection library with only a sequence, a map, and a set type are a suitable choice to facilitate exploratory programming. Such a design minimizes the number of decisions programmers have to make when dealing with collections, and it improves discoverability of collection operations. We further discuss techniques that make their implementation practical from a performance perspective. Based on these arguments, we conclude that languages which aim to support exploratory programming should strive for small and versatile collection libraries.}, acceptancerate = {1.00}, author = {Marr, Stefan and Daloze, Benoit}, blog = {https://stefan-marr.de/2018/03/how-to-design-collection-libraries/}, booktitle = {Proceedings of Programming Experience Workshop}, day = {10}, doi = {10.1145/3191697.3214334}, html = {https://stefan-marr.de/papers/px-marr-daloze-few-versatile-vs-many-specialized-collections/}, isbn = {978-1-4503-5513-1}, keywords = {Analysis Collections Design Framework Implementation Library MeMyPublication Opinion Survey myown}, month = apr, numpages = {9}, pages = {135--143}, pdf = {https://stefan-marr.de/downloads/px18-marr-daloze-few-versatile-vs-many-specialized-collections.pdf}, publisher = {ACM}, series = {PX/18}, title = {{Few Versatile vs. Many Specialized Collections: How to design a collection library for exploratory programming?}}, year = {2018}, month_numeric = {4} }
@proceedings{Marr:2018, author = {Marr, Stefan and Sartor, Jennifer B.}, editor = {Marr, Stefan and Sartor, Jennifer B.}, isbn = {978-1-4503-5513-1}, keywords = {MeMyPublication Proceedings Programming Workshops myown}, location = {Nice, France}, month = apr, publisher = {ACM}, series = {ICPS}, title = {<Programming>'18 Companion: Conference Companion of the 2nd International Conference on Art, Science, and Engineering of Programming}, url = {https://dl.acm.org/citation.cfm?id=3191697}, year = {2018}, month_numeric = {4} }
@article{Chari:2018:FREE, abstract = {VMs are complex pieces of software that implement programming language semantics in an efficient, portable, and secure way. Unfortunately, mainstream VMs provide applications with few mechanisms to alter execution semantics or memory management at run time. We argue that this limits the evolvability and maintainability of running systems for both, the application domain, e.g., to support unforeseen requirements, and the VM domain, e.g., to modify the organization of objects in memory. This work explores the idea of incorporating reflective capabilities into the VM domain and analyzes its impact in the context of software adaptation tasks. We characterize the notion of a fully reflective VM, a kind of VM that provides means for its own observability and modifiability at run time. This enables programming languages to adapt the underlying VM to changing requirements. We propose a reference architecture for such VMs and present TruffleMATE as a prototype for this architecture. We evaluate the mechanisms TruffleMATE provides to deal with unanticipated dynamic adaptation scenarios for security, optimization, and profiling aspects. In contrast to existing alternatives, we observe that TruffleMATE is able to handle all scenarios, using less than 50 lines of code for each, and without interfering with the application's logic.}, author = {Chari, Guido and Garbervetsky, Diego and Marr, Stefan and Ducasse, Stéphane}, doi = {10.1109/TSE.2018.2812715}, issn = {0098-5589}, journal = {IEEE Transactions on Software Engineering}, keywords = {Compilation Design Mate MeMyPublication MetaObjectProtocol MetaProgramming Reflection Truffle VM myown}, month = mar, number = {99}, pages = {1--20}, pdf = {https://stefan-marr.de/downloads/tse18-chari-et-al-fully-reflective-execution-environments.pdf}, series = {IEEE TSE}, title = {{Fully Reflective Execution Environments: Virtual Machines for More Flexible Software}}, volume = {PP}, year = {2018}, month_numeric = {3} }
@inproceedings{Rigger:2018:Inline, abstract = {C codebases frequently embed nonportable and unstandardized elements such as inline assembly code. Such elements are not well understood, which poses a problem to tool developers who aspire to support C code. This paper investigates the use of x86-64 inline assembly in 1264 C projects from GitHub and combines qualitative and quantitative analyses to answer questions that tool authors may have. We found that 28.1% of the most popular projects contain inline assembly code, although the majority contain only a few fragments with just one or two instructions. The most popular instructions constitute a small subset concerned largely with multicore semantics, performance optimization, and hardware control. Our findings are intended to help developers of C-focused tools, those testing compilers, and language designers seeking to reduce the reliance on inline assembly. They may also aid the design of tools focused on inline assembly itself.}, acceptancerate = {0.32}, author = {Rigger, Manuel and Marr, Stefan and Kell, Stephen and Leopoldseder, David and Mössenböck, Hanspeter}, booktitle = {VEE ’18: 14th ACM SIGPLAN/SIGOPS International Conference on Virtual Execution Environments}, doi = {10.1145/3186411.3186418}, isbn = {978-1-4503-5579-7/18/03}, keywords = {Analysis Assembly C Empirical GitHub MeMyPublication Study Survey myown}, month = mar, numpages = {16}, pages = {84--99}, pdf = {http://stefan-marr.de/downloads/vee18-rigger-et-al-an-analysis-of-x86-64-inline-assembly-in-c-programs.pdf}, series = {VEE'18}, title = {{An Analysis of x86-64 Inline Assembly in C Programs}}, year = {2018}, month_numeric = {3} }
@inproceedings{pimas2017garbage, abstract = {In dynamic object-oriented languages, low-level mechanisms such as just-in-time compilation, object allocation, garbage collection (GC) and method dispatch are often handled by virtual machines (VMs). VMs are typically implemented using static languages, allowing only few changes at run time. In such systems, the VM is not part of the language and interfaces to memory management or method dispatch are fixed, not allowing for arbitrary adaptation. Furthermore, the implementation can typically not be inspected or debugged with standard tools used to work on application code. This paper reports on our experience building Bee, a dynamic Smalltalk runtime, written in Smalltalk. Bee is a Dynamic Metacircular Runtime (DMR) and seamlessly integrates the VM into the application and thereby overcomes many restrictions of classic VMs, for instance by allowing arbitrary code modifications of the VM at run time. Furthermore, the approach enables developers to use their standard tools for application code also for the VM, allowing them to inspect, debug, understand, and modify a DMR seamlessly. We detail our experience of implementing GC, compilation, and optimizations in a DMR. We discuss examples where we found that DMRs can improve understanding of the system, provide tighter control of the software stack, and facilitate research. We also show that in high-level benchmarks the Bee DMR performance is close to that of a widely used Smalltalk VM.}, acceptancerate = {0.64}, author = {Pimás, Javier and Burroni, Javier and Arnaud, Jean Baptiste and Marr, Stefan}, booktitle = {Proceedings of the 13th ACM SIGPLAN International Symposium on Dynamic Languages}, day = {24}, doi = {10.1145/3133841.3133845}, isbn = {978-1-4503-5526-1/17/10}, keywords = {Bee DMR MeMyPublication MetaCircular MetaObjectProtocol Reflection Smalltalk myown}, location = {Vancouver, Canada}, month = oct, note = {(acceptance rate 64%)}, numpages = {12}, pdf = {https://stefan-marr.de/downloads/dls17-pimas-et-al-garbage-collection-and-efficiency-in-dynamic-metacircular-runtimes.pdf}, publisher = {ACM}, series = {DLS'17}, title = {{Garbage Collection and Efficiency in Dynamic Metacircular Runtimes}}, year = {2017}, month_numeric = {10} }
@inproceedings{Marr:2017:CPCD, abstract = {Today's complex software systems combine high-level concurrency models. Each model is used to solve a specific set of problems. Unfortunately, debuggers support only the low-level notions of threads and shared memory, forcing developers to reason about these notions instead of the high-level concurrency models they chose. This paper proposes a concurrency-agnostic debugger protocol that decouples the debugger from the concurrency models employed by the target application. As a result, the underlying language runtime can define custom breakpoints, stepping operations, and execution events for each concurrency model it supports, and a debugger can expose them without having to be specifically adapted. We evaluated the generality of the protocol by applying it to SOMns, a Newspeak implementation, which supports a diversity of concurrency models including communicating sequential processes, communicating event loops, threads and locks, fork/join parallelism, and software transactional memory. We implemented 21 breakpoints and 20 stepping operations for these concurrency models. For none of these, the debugger needed to be changed. Furthermore, we visualize all concurrent interactions independently of a specific concurrency model. To show that tooling for a specific concurrency model is possible, we visualize actor turns and message sends separately.}, acceptancerate = {0.64}, author = {Marr, Stefan and Torres Lopez, Carmen and Aumayr, Dominik and Gonzalez Boix, Elisa and Mössenböck, Hanspeter}, blog = {https://stefan-marr.de/2017/10/multi-paradigm-concurrent-debugging/}, booktitle = {Proceedings of the 13th ACM SIGPLAN International Symposium on Dynamic Languages}, day = {24}, doi = {10.1145/3133841.3133842}, html = {https://stefan-marr.de/papers/dls-marr-et-al-concurrency-agnostic-protocol-for-debugging/}, isbn = {978-1-4503-5526-1/17/10}, keywords = {Breakpoints Concurrency Debugging MeMyPublication Stepping Tooling Visualization myown}, location = {Vancouver, Canada}, month = oct, note = {(acceptance rate 64%)}, numpages = {12}, pages = {3--14}, pdf = {https://stefan-marr.de/downloads/dls17-marr-et-al-concurrency-agnostic-protocol-for-debugging.pdf}, publisher = {ACM}, series = {DLS'17}, title = {{A Concurrency-Agnostic Protocol for Multi-Paradigm Concurrent Debugging Tools}}, year = {2017}, month_numeric = {10} }
@inproceedings{TorresLopez:2017:PAT, abstract = {Since the multicore revolution, software systems are more and more inherently concurrent. Debugging such concurrent software systems is still hard, but in the recent years new tools and techniques are being proposed. For such novel debugging techniques, the main question is how to make sure that the proposed techniques are sufficiently expressive. In this paper, we explore a formal foundation that allows researchers to identify debugging techniques and assess how complete their features are in the context of message-passing concurrency. In particular, we describe a principled approach for defining the operational semantics of a debugger. Subsequently, we apply this technique to derive the operational semantics for a communicating event-loop debugger. We show that our technique scales for defining the semantics of a wide set of novel breakpoints recently proposed by systems such as REME-D and Kómpos. To the best of our knowledge, this is the first formal semantics for debugging asynchronous message passing-based concurrency models.}, acceptancerate = {0.75}, acmid = {3141839}, author = {Torres Lopez, Carmen and Gonzalez Boix, Elisa and Scholliers, Christophe and Marr, Stefan and Mössenböck, Hanspeter}, booktitle = {Proceedings of the 7th ACM SIGPLAN International Workshop on Programming Based on Actors, Agents, and Decentralized Control}, day = {23}, doi = {10.1145/3141834.3141839}, isbn = {978-1-4503-5516-2}, keywords = {ActorModel Actors Concurrency Debugging Formalization MeMyPublication Messages Stepping myown}, location = {Vancouver, BC, Canada}, month = oct, numpages = {9}, pages = {41--49}, pdf = {https://stefan-marr.de/downloads/agere17-torres-lopez-et-al-a-principled-approach-towards-debugging-communicating-event-loops.pdf}, publisher = {ACM}, series = {AGERE!'17}, title = {A Principled Approach Towards Debugging Communicating Event-loops}, year = {2017}, month_numeric = {10} }
@proceedings{Meta:2017, author = {Chiba, Shigeru and Gonzalez Boix, Elisa and Marr, Stefan}, day = {22}, editor = {Chiba, Shigeru and Gonzalez Boix, Elisa and Marr, Stefan}, isbn = {978-1-4503-5523-0}, keywords = {MeMyPublication Metaprogramming Proceedings Workshop myown}, location = {Vancouver, BC, Canada}, month = oct, publisher = {ACM}, series = {SPLASH Workshop}, title = {Meta 2017: Proceedings of the 2nd ACM SIGPLAN International Workshop on Meta-Programming Techniques and Reflection}, url = {https://dl.acm.org/citation.cfm?id=3141517}, year = {2017}, month_numeric = {10} }
@inproceedings{Eisl:2017:TRAP, abstract = {Register allocation is an integral part of compilation, regardless of whether a compiler aims for fast compilation or optimal code quality. State-of-the-art dynamic compilers often use global register allocation approaches such as linear scan. Recent results suggest that non-global trace-based register allocation approaches can compete with global approaches in terms of allocation quality. Instead of processing the whole compilation unit (i.e., method) at once, a trace-based register allocator divides the problem into linear code segments, called traces. In this work, we present a register allocation framework that can exploit the additional flexibility of traces to select different allocation strategies based on the characteristics of a trace. This provides us with fine-grained control over the trade-off between compile time and peak performance in a just-in-time compiler. Our framework features three allocation strategies: a linear-scan-based approach that achieves good code quality, a single-pass bottom-up strategy that aims for short allocation times, and an allocator for trivial traces. To demonstrate the flexibility of the framework, we select 8 allocation policies and show their impact on compile time and peak performance. This approach can reduce allocation time by 7%–43% at a peak performance penalty of about 1%–11% on average. For systems that do not focus on peak performance, our approach allows to adjust the time spent for register allocation, and therefore the overall compilation time, thus finding the optimal balance between compile time and peak performance according to an application's requirements.}, acceptancerate = {0.45}, author = {Eisl, Josef and Marr, Stefan and Würthinger, Thomas and Mössenböck, Hanspeter}, booktitle = {Proceedings of the 14th International Conference on Managed Languages and Runtimes}, doi = {10.1145/3132190.3132209}, isbn = {978-1-4503-5340-3/17/09}, keywords = {Compilation JIT MeMyPublication Optimization Policies RegisterAllocation Trace-based myown}, location = {Prague, Czech Republic}, month = sep, numpages = {13}, pdf = {https://stefan-marr.de/downloads/manlang17-eisl-et-al-trace-register-allocation-policies.pdf}, publisher = {ACM}, series = {ManLang'17}, title = {Trace Register Allocation Policies: Compile-time vs. Performance Trade-offs}, year = {2017}, month_numeric = {9} }
@inproceedings{Grimmer:2017:TJ, abstract = {While Java is a statically-typed language, some of its features make it behave like a dynamically-typed language at run time. This includes Java’s boxing of primitive values as well as generics, which rely on type erasure. This paper investigates how runtime technology for dynamically-typed languages such as JavaScript and Python can be used for Java bytecode. Using optimistic optimizations, we specialize bytecode instructions that access references in such a way, that they can handle primitive data directly and also specialize data structures in order to avoid boxing for primitive types. Our evaluation shows that these optimizations can be successfully applied to a statically-typed language such as Java and can also improve performance significantly. With this approach, we get an efficient implementation of Java's generics, avoid changes to the Java language, and maintain backwards compatibility, allowing existing code to benefit from our optimization transparently.}, acceptancerate = {0.45}, author = {Grimmer, Matthias and Marr, Stefan and Kahlhofer, Mario and Wimmer, Christian and Würthinger, Thomas and Mössenböck, Hanspeter}, booktitle = {Proceedings of the 14th International Conference on Managed Languages and Runtimes}, doi = {10.1145/3132190.3132202}, isbn = {978-1-4503-5340-3/17/09}, keywords = {MeMyPublication myown}, location = {Prague, Czech Republic}, month = sep, numpages = {11}, pdf = {https://stefan-marr.de/downloads/manlang17-grimmer-et-al-applying-optimizations-for-dynamically-typed-languages-to-java.pdf}, publisher = {ACM}, series = {ManLang'17}, title = {{Applying Optimizations for Dynamically-typed Languages to Java}}, year = {2017}, month_numeric = {9} }
@article{Vandercammen:2017:STRAF, abstract = {Just-in-time compilation has proven an effective, though effort-intensive, choice for realizing performant language runtimes. Recently introduced JIT compilation frameworks advocate applying meta-compilation techniques such as partial evaluation or meta-tracing on simple interpreters to reduce the implementation effort. However, such frameworks are few and far between. Designed and highly optimized for performance, they are difficult to experiment with. We therefore present STRAF, a minimalistic yet flexible Scala framework for studying trace-based JIT compilation. STRAF is sufficiently general to support a diverse set of language interpreters, but also sufficiently extensible to enable experiments with trace recording and optimization. We demonstrate the former by plugging two different interpreters into STRAF. We demonstrate the latter by extending STRAF with e.g., constant folding and type-specialization optimizations, which are commonly found in dedicated trace-based JIT compilers. The evaluation shows that STRAF is suitable for prototyping new techniques and formalisms in the domain of trace-based JIT compilation.}, author = {Vandercammen, Maarten and Marr, Stefan and De Roover, Coen}, doi = {10.1016/j.cl.2017.07.005}, issn = {1477-8424}, journal = {Computer Languages, Systems \& Structures}, keywords = {Framework Lisp MeMyPublication MetaTracing Scala Scheme Trace-basedJIT Tracing myown optimization semantics}, month = jul, pdf = {https://stefan-marr.de/downloads/comlan17-vandercammen-et-al-a-flexible-framework-for-studying-trace-based-just-in-time-compilation.pdf}, series = {ComLan}, title = {{A Flexible Framework for Studying Trace-Based Just-In-Time Compilation}}, year = {2017}, month_numeric = {7} }
@inproceedings{Chari:2017:JITMOP, abstract = {Just-in-time compilers and their aggressive speculative optimizations reduced the performance gap between dynamic and static languages drastically. To successfully speculate, compilers rely on the program variability observed at run time to be low, and use heuristics to determine when optimization is beneficial. However, some variability patterns are hard to capture with heuristics. Specifically, ephemeral, warmup, rare, and highly indirect variability are challenges for today's compiler heuristics. As a consequence, they can lead to reduced application performance. However, these types of variability are identifiable at the application level and could be mitigated with information provided by developers. As a solution, we propose a metaobject protocol for dynamic compilation systems to enable application developers to provide such information at run time. As a proof of concept, we demonstrate performance improvements for a few scenarios in a dynamic language built on top of the Truffle and Graal system.}, author = {Chari, Guido and Garbervetsky, Diego and Marr, Stefan}, booktitle = {Proceedings of the 11th Workshop on Implementation, Compilation, Optimization of Object-Oriented Languages, Programs and Systems}, day = {19}, doi = {10.1145/3098572.3098577}, isbn = {978-1-4503-5088-4/17/06}, keywords = {Compiler JIT MOP MeMyPublication MetaProgramming Optimization myown}, month = jun, pages = {5}, pdf = {https://stefan-marr.de/downloads/icooolps17-chari-et-al-a-mop-for-optimizing-run-time-variability.pdf}, series = {ICOOOLPS '17}, title = {{A Metaobject Protocol for Optimizing Application-Specific Run-Time Variability}}, type = {Position Paper}, year = {2017}, month_numeric = {6} }
@inproceedings{Chari:2017:FVR, abstract = {It has become common for software systems to require or benefit from dynamic adaptation, i.e., to modify their behavior while they are running. Among the existing approaches to this problem, language-level solutions are appealing for scenarios in which fine-grained adaptation is needed, i.e., when the granularity of the modifications is that of individual objects, or for small applications where an architectural solution based on complex middleware is overkill. However, there is no consensus on which of the existing language-level approaches to adopt. A recent survey on self-adaptive systems asks [9]: Is it possible to adopt a single paradigm providing all required abstractions to implement adaptive systems?}, acmid = {3098422}, author = {Chari, Guido and Garbervetsky, Diego and Marr, Stefan}, booktitle = {Proceedings of the 39th International Conference on Software Engineering Companion}, doi = {10.1109/ICSE-C.2017.144}, isbn = {978-1-5386-1589-8}, keywords = {Mate MeMyPublication myown poster}, location = {Buenos Aires, Argentina}, month = may, numpages = {3}, pages = {229--231}, pdf = {https://stefan-marr.de/downloads/icse17-chari-et-al-fully-reflective-vms-for-software-adaptation.pdf}, publisher = {IEEE}, series = {ICSE-C '17}, title = {Fully-reflective VMs for Ruling Software Adaptation}, type = {Poster}, year = {2017}, month_numeric = {5} }
@presentation{Roberts:2017:VMAdaption, abstract = {We adapt SOMns, a Truffle-based interpreter for Newspeak, to the Grace programming language. We highlight differences between the semantics of these languages and offer preliminary results showing that adaption is possible while retaining performance. The similarities between the languages promote the potential for adaption and code sharing between implementations. Through experimentation we intend to explore how the design of the tailored implementation; the flexibility of the underlying framework; and similarities between languages affect adaptability, and by doing so hope to realize a set of mechanisms that can be easily extended to create optimized VMs for object-orientated languages.}, author = {Roberts, Richard and Marr, Stefan and Homer, Michael and Noble, James}, booktitle = {Workshop on Modern Language Runtimes, Ecosystems, and VMs}, day = {3}, keywords = {Adaptation Grace MeMyPublication Truffle VM myown}, month = apr, pdf = {https://stefan-marr.de/downloads/morevms17-roberts-et-al-toward-virtual-machine-adaption.pdf}, series = {MoreVMs'17}, title = {{Toward Virtual Machine Adaption Rather than Reimplementation: Adapting SOMns for Grace}}, type = {Extended Abstract}, year = {2017}, month_numeric = {4} }
@presentation{Marr:2017:KomposDemo, abstract = {With the omnipresence of multicore processors, developers combine various concurrency abstractions such as actors, communicating sequential processes, or fork/join programming to utilize the available parallelism. Unfortunately, when we debug such systems today, we need to reason about low-level memory accesses, locks, and thread pools, because debuggers rarely know about actors, messages, channels, or fork/join tasks. With Kómpos, we present a concurrent debugger that is designed to enable us to reason about our programs on the level of these abstractions. We will demonstrate some of the features of Kómpos and discuss the research challenges for building a concurrent debugger for a wide range of concurrency abstractions.}, author = {Marr, Stefan and Torres Lopez, Carmen and Aumayr, Dominik and Gonzalez Boix, Elisa and Mössenböck, Hanspeter}, booktitle = {Companion to the First International Conference on the Art, Science and Engineering of Programming}, day = {2}, doi = {10.1145/3079368.3079378}, isbn = {978-1-4503-4836-2}, keywords = {Actors CSP Concurrency Debugger Kompos MeMyPublication STM SharedMemory myown}, month = apr, pages = {2:1--2:2}, pdf = {https://stefan-marr.de/downloads/progdemo-marr-et-al-kompos-a-platform-for-debugging-complex-concurrent-applications.pdf}, publisher = {ACM}, series = {Programming Demo'17}, title = {{Kómpos: A Platform for Debugging Complex Concurrent Applications}}, type = {Demo}, year = {2017}, month_numeric = {4} }
@inproceedings{Bonetta:2016:GEMs, abstract = {JavaScript is the most popular programming language for client-side Web applications, and Node.js has popularized the language for server-side computing, too. In this domain, the minimal support for parallel programming remains however a major limitation. In this paper we introduce a novel parallel programming abstraction called Generic Messages (GEMs). GEMs allow one to combine message passing and shared-memory parallelism, extending the classes of parallel applications that can be built with Node.js. GEMs have customizable semantics and enable several forms of thread safety, isolation, and concurrency control. GEMs are designed as convenient JavaScript abstractions that expose high-level and safe parallelism models to the developer. Experiments show that GEMs outperform equivalent Node.js applications thanks to their usage of shared memory.}, acceptancerate = {0.25}, author = {Bonetta, Daniele and Salucci, Luca and Marr, Stefan and Binder, Walter}, booktitle = {Proceedings of the 2016 ACM International Conference on Object Oriented Programming Systems Languages \& Applications}, day = {2}, doi = {10.1145/2983990.2984039}, isbn = {978-1-4503-4444-9}, keywords = {Concurrency JavaScript MeMyPublication Messaging NodeJS Parallelism SharedMemory myown}, location = {Amsterdam, Netherlands}, month = nov, note = {(acceptance rate 25%)}, numpages = {18}, pages = {531--547}, pdf = {https://stefan-marr.de/downloads/oopsla16-bonetta-et-al-gems-shared-memory-parallel-programming-for-nodejs.pdf}, publisher = {ACM}, series = {OOPSLA '16}, title = {{GEMs: Shared-memory Parallel Programming for Node.js}}, year = {2016}, month_numeric = {11} }
@inproceedings{Daloze:2016:TSO, abstract = {We are in the multi-core era. Dynamically-typed languages are in widespread use, but their support for multithreading still lags behind. One of the reasons is that the sophisticated techniques they use to efficiently represent their dynamic object models are often unsafe in multithreaded environments. This paper defines safety requirements for dynamic object models in multithreaded environments. Based on these requirements, a language-agnostic and thread-safe object model is designed that maintains the efficiency of sequential approaches. This is achieved by ensuring that field reads do not require synchronization and field updates only need to synchronize on objects shared between threads. Basing our work on JRuby+Truffle, we show that our safe object model has zero overhead on peak performance for thread-local objects and only 3\% average overhead on parallel benchmarks where field updates require synchronization. Thus, it can be a foundation for safe and efficient multithreaded VMs for a wide range of dynamic languages.}, acceptancerate = {0.25}, author = {Daloze, Benoit and Marr, Stefan and Bonetta, Daniele and Mössenböck, Hanspeter}, booktitle = {Proceedings of the 2016 ACM International Conference on Object Oriented Programming Systems Languages \& Applications}, day = {2}, doi = {10.1145/2983990.2984001}, isbn = {978-1-4503-4444-9}, keywords = {Bugs Concurrency HiddenClasses JRuby Map MeMyPublication ObjectModel ObjectStorage RaceConditions Ruby ThreadSafety myown}, month = nov, note = {(acceptance rate 25%)}, numpages = {18}, pages = {642--659}, pdf = {https://stefan-marr.de/downloads/oopsla16-daloze-et-al-efficient-and-thread-safe-objects-for-dynamically-typed-languages.pdf}, publisher = {ACM}, series = {OOPSLA '16}, title = {{Efficient and Thread-Safe Objects for Dynamically-Typed Languages}}, year = {2016}, month_numeric = {11} }
@inproceedings{Chari:2016:Mate2, abstract = {Programming language virtual machines (VMs) realize language semantics, enforce security properties, and execute applications efficiently. Fully Reflective Execution Environments (EEs) are VMs that additionally expose their whole structure and behavior to applications. This enables developers to observe and adapt VMs at run time. However, there is a belief that reflective EEs are not viable for practical usages because such flexibility would incur a high performance overhead. To refute this belief, we built a reflective EE on top of a highly optimizing dynamic compiler. We introduced a new optimization model that, based on the conjecture that variability of low-level (EE-level) reflective behavior is low in many scenarios, mitigates the most significant sources of the performance overheads related to the reflective capabilities in the EE. Our experiments indicate that reflective EEs can reach peak performance in the order of standard VMs. Concretely, that a) if reflective mechanisms are not used the execution overhead is negligible compared to standard VMs, b) VM operations can be redefined at language-level without incurring in significant overheads, c) for several software adaptation tasks, applying the reflection at the VM level is not only lightweight in terms of engineering effort, but also competitive in terms of performance in comparison to other ad-hoc solutions.}, acceptancerate = {0.55}, author = {Chari, Guido and Garbervetsky, Diego and Marr, Stefan}, booktitle = {Proceedings of the 12th Symposium on Dynamic Languages}, day = {1}, doi = {10.1145/2989225.2989234}, isbn = {978-1-4503-4445-6}, keywords = {MOP Mate MeMyPublication MetaObjectProtocol Optimization Performance Study Truffle myown}, location = {Amsterdam, Netherlands}, month = nov, note = {(acceptance rate 55%)}, numpages = {12}, pages = {60--71}, pdf = {https://stefan-marr.de/downloads/dls16-chari-et-al-building-efficient-and-highly-run-time-adaptable-virtual-machines.pdf}, publisher = {ACM}, series = {DLS'16}, title = {{Building Efficient and Highly Run-time Adaptable Virtual Machines}}, year = {2016}, month_numeric = {11} }
@inproceedings{Marr:2016:AWFY, abstract = {Comparing the performance of programming languages is difficult because they differ in many aspects including preferred programming abstractions, available frameworks, and their runtime systems. Nonetheless, the question about relative performance comes up repeatedly in the research community, industry, and wider audience of enthusiasts. This paper presents 14 benchmarks and a novel methodology to assess the compiler effectiveness across language implementations. Using a set of common language abstractions, the benchmarks are implemented in Java, JavaScript, Ruby, Crystal, Newspeak, and Smalltalk. We show that the benchmarks exhibit a wide range of characteristics using language-agnostic metrics. Using four different languages on top of the same compiler, we show that the benchmarks perform similarly and therefore allow for a comparison of compiler effectiveness across languages. Based on anecdotes, we argue that these benchmarks help language implementers to identify performance bugs and optimization potential by comparing to other language implementations.}, acceptancerate = {0.55}, appendix = {https://github.com/smarr/are-we-fast-yet#readme}, author = {Marr, Stefan and Daloze, Benoit and Mössenböck, Hanspeter}, blog = {https://stefan-marr.de/2016/10/cross-language-compiler-benchmarking-are-we-fast-yet/}, booktitle = {Proceedings of the 12th Symposium on Dynamic Languages}, day = {1}, doi = {10.1145/2989225.2989232}, html = {https://stefan-marr.de/papers/dls-marr-et-al-cross-language-compiler-benchmarking-are-we-fast-yet/}, isbn = {978-1-4503-4445-6}, keywords = {Benchmark Compiler Crystal Graal Java JavaScript MeMyPublication Metrics Newspeak NodeJS Performance Ruby Smalltalk Truffle myown}, location = {Amsterdam, Netherlands}, month = nov, note = {(acceptance rate 55%)}, numpages = {12}, pages = {120--131}, pdf = {https://stefan-marr.de/downloads/dls16-marr-et-al-cross-language-compiler-benchmarking-are-we-fast-yet.pdf}, publisher = {ACM}, series = {DLS'16}, title = {{Cross-Language Compiler Benchmarking---Are We Fast Yet?}}, year = {2016}, month_numeric = {11} }
@presentation{TorresLopez:2016:TAD, abstract = {With the ubiquity of multicore hardware, concurrent and parallel programming has become a fundamental part of software development. If writing concurrent programs is hard, debugging them is even harder. The actor model is attractive for developing concurrent applications because actors are isolated concurrent entities that communicates through asynchronous message sending and do not share state, thus they avoid common concurrency bugs such as race conditions. However, they are not immune to bugs. This paper presents initial work on a taxonomy of concurrent bugs for actor-based applications. Based on this study, we propose debugging tooling to assist the development process of actor-based applications.}, author = {Torres Lopez, Carmen and Marr, Stefan and Mössenböck, Hanspeter and Gonzalez Boix, Elisa}, booktitle = {Presentation at 6th International Workshop on Programming based on Actors, Agents, and Decentralized Control}, day = {30}, keywords = {Bugs EventLoops MeMyPublication Messages Survey Vats actors concurrency debugging myown}, month = oct, numpages = {5}, pdf = {https://stefan-marr.de/downloads/agere16-torres-lopez-et-al-towards-advanced-debugging-support-for-actor-languages.pdf}, series = {AGERE! '16}, title = {{Towards Advanced Debugging Support for Actor Languages: Studying Concurrency Bugs in Actor-based Programs}}, year = {2016}, month_numeric = {10} }
@proceedings{Marr:2016:ICOOOLPS, author = {Marr, Stefan and Jul, Eric}, editor = {Marr, Stefan and Jul, Eric}, isbn = {978-1-4503-4837-9}, keywords = {MeMyPublication myown}, location = {Rome, Italy}, month = jul, publisher = {ACM}, series = {ICOOOLPS '16}, title = {Proceedings of the 11th Workshop on Implementation, Compilation, Optimization of Object-Oriented Languages, Programs and Systems}, url = {https://dl.acm.org/citation.cfm?id=3012408}, year = {2016}, month_numeric = {7} }
@article{DeKoster2016, abstract = {The actor model is a message-passing concurrency model that avoids deadlocks and low-level data races by construction. This facilitates concurrent programming, especially in the context of complex interactive applications where modularity, security and fault-tolerance are required. The tradeoff is that the actor model sacrifices expressiveness and safety guarantees with respect to parallel access to shared state. In this paper we present domains as a set of novel language abstractions for safely encapsulating and sharing state within the actor model. We introduce four types of domains, namely immutable, isolated, observable and shared domains that each are tailored to a certain access pattern on that shared state. The domains are characterized with an operational semantics. For each we discuss how the actor model's safety guarantees are upheld even in the presence of conceptually shared state. Furthermore, the proposed language abstractions are evaluated with a case study in Scala comparing them to other synchonisation mechanisms to demonstrate their benefits in deadlock freedom, parallel reads, and enforced isolation. }, author = {De Koster, Joeri and Marr, Stefan and Van Cutsem, Tom and D'Hondt, Theo}, blog = {https://stefan-marr.de/2016/02/domains-sharing-state-in-the-communicating-event-loop-actor-model/}, doi = {10.1016/j.cl.2016.01.003}, issn = {1477-8424}, journal = {Computer Languages, Systems & Structures }, keywords = {Actors Concurrency Domains EventLoops Interpreter MeMyPublication Parallelism Shacl myown}, month = apr, numpages = {39}, pages = {132-160}, pdf = {https://stefan-marr.de/downloads/comlan-de-koster-et-al-domains-sharing-state-in-the-communicating-event-loop-actor-model.pdf}, publisher = {Elsevier}, series = {ComLan}, title = {Domains: Sharing State in the Communicating Event-Loop Actor Model}, volume = {45}, year = {2016}, month_numeric = {4} }
@inproceedings{Salucci:2016:GMC, abstract = {Systems based on event-loops have been popularized by Node.JS, and are becoming a key technology in the domain of cloud computing. Despite their popularity, such systems support only share-nothing parallelism via message passing between parallel entities usually called workers. In this paper, we introduce a novel parallel programming abstraction called Generic Messages (GEMs), which enables shared-memory parallelism for share-nothing event-based systems. A key characteristic of GEMs is that they enable workers to share state by specifying how the state can be accessed once it is shared. We call this aspect of the GEMs model capability-based parallelism.}, acmid = {2851184}, articleno = {40}, author = {Salucci, Luca and Bonetta, Daniele and Marr, Stefan and Binder, Walter}, booktitle = {Proceedings of the 21st ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming}, doi = {10.1145/2851141.2851184}, isbn = {978-1-4503-4092-2}, keywords = {Concurrency ECMAScript EventLoops JS JavaScript MOP MeMyPublication Parallelism Proxy SharedMemory myown}, location = {Barcelona, Spain}, month = mar, numpages = {2}, pages = {40:1--40:2}, pdf = {https://stefan-marr.de/downloads/ppopp-salucci-et-al-generic-messages-capability-based-shared-memory-parallelism-for-event-loop-systems.pdf}, publisher = {ACM}, series = {PPoPP Poster'16}, title = {{Generic Messages: Capability-based Shared Memory Parallelism for Event-loop Systems}}, year = {2016}, month_numeric = {3} }
@inproceedings{Marr:2016:MetaConc, abstract = {With the widespread use of multicore processors, software becomes more and more diverse in its use of parallel computing resources. To address all application requirements, each with the appropriate abstraction, developers mix and match various concurrency abstractions made available to them via libraries and frameworks. Unfortunately, today's tools such as debuggers and profilers do not support the diversity of these abstractions. Instead of enabling developers to reason about the high-level programming concepts, they used to express their programs, the tools work only on the library's implementation level. While this is a common problem also for other libraries and frameworks, the complexity of concurrency exacerbates the issue further, and reasoning on the higher levels of the concurrency abstractions is essential to manage the associated complexity. In this position paper, we identify open research issues and propose to build tools based on a common meta-level interface to enable developers to reasons about their programs based on the high-level concepts they used to implement them.}, author = {Marr, Stefan and Gonzalez Boix, Elisa and Mössenböck, Hanspeter}, blog = {https://stefan-marr.de/2016/01/towards-meta-level-engineering-and-tooling-for-complex-concurrent-systems/}, booktitle = {Proceedings of the 9th Arbeitstagung Programmiersprachen}, day = {25}, html = {https://stefan-marr.de/papers/atps-marr-et-al-towards-meta-level-engineering-and-tooling-for-complex-concurrent-systems/}, issn = {1613-0073}, keywords = {Abstraction Concepts Concurrency HighLevel MeMyPublication MetaEngineering MetaLevel MetaProgramming Parallelism Profiling Representation Tooling myown}, location = {Vienna, Austria}, month = feb, pages = {91--95}, pdf = {https://stefan-marr.de/downloads/atps-marr-et-al-towards-meta-level-engineering-and-tooling-for-complex-concurrent-systems.pdf}, publisher = {CEUR-WS}, series = {ATPS'16}, title = {Towards Meta-Level Engineering and Tooling for Complex Concurrent Systems}, type = {Position Paper}, url = {https://stefan-marr.de/papers/atps-marr-et-al-towards-meta-level-engineering-and-tooling-for-complex-concurrent-systems/}, volume = {1559}, year = {2016}, month_numeric = {2} }
@presentation{OptCELWithTruffle, abstract = {Communicating Event-Loop Languages similar to E and AmbientTalk are recently gaining more traction as a subset of actor languages. With the rise of JavaScript, E’s notion of vats and non-blocking communication based on promises entered the mainstream. For implementations, the combination of dynamic typing, asynchronous message sending, and promise resolution pose new optimization challenges. This paper discusses these challenges and presents initial experiments for a Newspeak implementation based on the Truffle framework. Our implementation is on average 1.65x slower than Java on a set of 14 benchmarks. Initial optimizations improve the performance of asynchronous messages and reduce the cost of encapsulation on microbenchmarks by about 2x. Parallel actor benchmarks further show that the system scales based on the workload characteristics. Thus, we conclude that Truffle is a promising platform also for communicating event-loop languages.}, author = {Marr, Stefan and Mössenböck, Hanspeter}, blog = {https://stefan-marr.de/2015/10/optimizing-communicating-event-loop-languages-with-truffle/}, booktitle = {Presentation at 5th International Workshop on Programming based on Actors, Agents, and Decentralized Control}, day = {26}, html = {https://stefan-marr.de/papers/agere-marr-moessenboeck-optimizing-communicating-event-loop-languages-with-truffle/}, keywords = {Actors Caching CommunicatingEventLoops Compiler Concurrency MeMyPublication Message Optimization PIC Sending Truffle myown}, location = {Pittsburgh, PA, USA}, month = oct, pdf = {https://stefan-marr.de/downloads/agere15-agere-marr-moessenboeck-optimizing-communicating-event-loop-languages-with-truffle.pdf}, series = {AGERE!'15}, title = {Optimizing Communicating Event-Loop Languages with Truffle}, type = {Work-in-Progress-Paper}, year = {2015}, month_numeric = {10} }
@inproceedings{Chari:15:Mate, abstract = {Modern development environments promote live programming (LP) mechanisms because it enhances the development experience by providing instantaneous feed- back and interaction with live objects. LP is typically supported with advanced reflective techniques within dynamic languages. These languages run on top of Virtual Machines (VMs) that are built in a static manner so that most of their components are bound at compile time. As a consequence, VM developers are forced to work using the traditional edit-compile-run cycle, even when they are designing LP-supporting environments. In this paper we explore the idea of bringing LP techniques to the VM domain for improving their observability, evolution and adaptability at run-time. We define the notion of fully reflective execution environments (EEs), systems that provide reflection not only at the application level but also at the level of the VM. We characterize such systems, propose a design, and present Mate v1, a prototypical implementation. Based on our prototype, we analyze the feasibility and applicability of incorporating reflective capabilities into different parts of EEs. Furthermore, the evaluation demonstrates the opportunities such reflective capabilities provide for unanticipated dynamic adaptation scenarios, benefiting thus, a wider range of users.}, author = {Chari, Guido and Garbervetsky, Diego and Marr, Stefan and Ducasse, Stéphane}, blog = {https://stefan-marr.de/2015/10/jit-data-structures-fully-reflective-vms-and-meta-circular-meta-tracing/}, booktitle = {Proceedings of the 2015 ACM International Symposium on New Ideas, New Paradigms, and Reflections on Programming \& Software}, doi = {10.1145/2814228.2814241}, isbn = {978-1-4503-3688-8}, keywords = {DynamicAdaptation LiveProgramming MOP Mate MeMyPublication MetaObjectProtocols Reflection VirtualMachines myown}, location = {Pittsburgh, PA, USA}, month = oct, numpages = {14}, pages = {240--253}, pdf = {https://stefan-marr.de/downloads/onward15-chari-et-al-towards-fully-reflective-environments.pdf}, publisher = {ACM}, series = {Onward! '15}, title = {Towards Fully Reflective Environments}, year = {2015}, month_numeric = {10} }
@inproceedings{Vandercammen:15:WODA, abstract = {Trace-based JIT compilers identify frequently executed program paths at run-time and subsequently record, compile and optimize their execution. In order to improve the performance of the generated machine instructions, JIT compilers heavily rely on dynamic analysis of the code. Existing work treats the components of a JIT compiler as a monolithic whole, tied to particular execution semantics. We propose a formal framework that facilitates the design and implementation of a tracing JIT compiler and its accompanying dynamic analyses by decoupling the tracing, optimization, and interpretation processes. This results in a framework that is more configurable and extensible than existing formal tracing models. We formalize the tracer and interpreter as two abstract state machines that communicate through a minimal, well-defined interface. Developing a tracing JIT compiler becomes possible for arbitrary interpreters that implement this interface. The abstract machines also provide the necessary hooks to plug in custom analyses and optimizations.}, address = {Pittsburgh, PA, USA}, author = {Vandercammen, Maarten and Nicolay, Jens and Marr, Stefan and De Koster, Joeri and D'Hondt, Theo and De Roover, Coen}, blog = {https://stefan-marr.de/2015/10/jit-data-structures-fully-reflective-vms-and-meta-circular-meta-tracing/}, booktitle = {Proceedings of the 13th International Workshop on Dynamic Analysis}, doi = {10.1145/2823363.2823369}, keywords = {Compilation DynamicAnalysis JIT MeMyPublication OperationalSemantics Tracing myown}, month = oct, numpages = {6}, pages = {25--30}, pdf = {https://stefan-marr.de/downloads/woda15-vandercammen-et-al-a-formal-foundation-for-trace-based-jit-compilers.pdf}, publisher = {ACM}, series = {WODA '15}, title = {A Formal Foundation for Trace-Based JIT Compilers}, year = {2015}, month_numeric = {10} }
@inproceedings{Marr:2015:MTPE, abstract = {Tracing and partial evaluation have been proposed as meta-compilation techniques for interpreters to make just-in-time compilation language-independent. They promise that programs executing on simple interpreters can reach performance of the same order of magnitude as if they would be executed on state-of-the-art virtual machines with highly optimizing just-in-time compilers built for a specific language. Tracing and partial evaluation approach this meta-compilation from two ends of a spectrum, resulting in different sets of tradeoffs. This study investigates both approaches in the context of self-optimizing interpreters, a technique for building fast abstract-syntax-tree interpreters. Based on RPython for tracing and Truffle for partial evaluation, we assess the two approaches by comparing the impact of various optimizations on the performance of an interpreter for SOM, an object-oriented dynamically-typed language. The goal is to determine whether either approach yields clear performance or engineering benefits. We find that tracing and partial evaluation both reach roughly the same level of performance. SOM based on meta-tracing is on average 3x slower than Java, while SOM based on partial evaluation is on average 2.3x slower than Java. With respect to the engineering, tracing has however significant benefits, because it requires language implementers to apply fewer optimizations to reach the same level of performance.}, acceptancerate = {0.25}, acmid = {2660194}, appendix = {https://stefan-marr.de/papers/oopsla-marr-ducasse-meta-tracing-vs-partial-evaluation-artifacts/}, author = {Marr, Stefan and Ducasse, Stéphane}, blog = {https://stefan-marr.de/2015/10/tracing-vs-partial-evaluation-comparing-meta-compilation-approaches-for-self-optimizing-interpreters/}, booktitle = {Proceedings of the 2015 ACM International Conference on Object Oriented Programming Systems Languages \& Applications}, doi = {10.1145/2814270.2814275}, html = {https://stefan-marr.de/papers/oopsla-marr-ducasse-meta-tracing-vs-partial-evaluation/}, isbn = {978-1-4503-2585-1}, keywords = {Compiler Interpreter JITCompilation MeMyPublication MetaTracing Optimization PartialEvaluation RPython SelfOptimizing Tracing Truffle myown}, month = oct, note = {(acceptance rate 25%)}, numpages = {19}, pages = {821--839}, pdf = {https://stefan-marr.de/downloads/oopsla15-marr-ducasse-meta-tracing-vs-partial-evaluation.pdf}, publisher = {ACM}, series = {OOPSLA '15}, title = {Tracing vs. Partial Evaluation: Comparing Meta-Compilation Approaches for Self-Optimizing Interpreters}, year = {2015}, month_numeric = {10} }
@inproceedings{DeWael:15:JitData, abstract = {Today, software engineering practices focus on finding the single right data representation (i.e., data structure) for a program. The right data representation, however, might not exist: relying on a single representation of the data for the lifetime of the program can be suboptimal in terms of performance. We explore the idea of developing data structures for which changing the data representation is an intrinsic property. To this end we introduce Just-in-Time Data Structures, which enable representation changes at runtime, based on declarative input from a performance expert programmer. Just-in-Time Data Structures are an attempt to shift the focus from finding the ``right’’ data structure to finding the right sequence of data representations. We present JitDS-Java, an extension to the Java language, to develop Just-in-Time Data Structures. Further, we show two example programs that benefit from changing the representation at runtime.}, author = {De Wael, Mattias and Marr, Stefan and De Koster, Joeri and Sartor, Jennifer B. and De Meuter, Wolfgang}, blog = {https://stefan-marr.de/2015/10/jit-data-structures-fully-reflective-vms-and-meta-circular-meta-tracing/}, booktitle = {Proceedings of the 2015 ACM International Symposium on New Ideas, New Paradigms, and Reflections on Programming \& Software}, doi = {10.1145/2814228.2814231}, isbn = {978-1-4503-3688-8}, keywords = {Algorithms DataStructures DynamicReclassification MeMyPublication Optimizations Performance myown}, location = {Pittsburgh, PA, USA}, month = oct, numpages = {15}, pages = {61--75}, pdf = {https://stefan-marr.de/downloads/onward15-de-wael-et-al-just-in-time-data-structures.pdf}, publisher = {ACM}, series = {Onward! '15}, title = {Just-in-Time Data Structures}, url = {http://soft.vub.ac.be/~madewael/jitds/}, year = {2015}, month_numeric = {10} }
@article{PGASSurvey, abstract = {The Partitioned Global Address Space (PGAS) model is a parallel programming model that aims to improve programmer productivity while at the same time aiming for high performance. The main premise of PGAS is that a globally shared address space improves productivity, but that a distinction between local and remote data accesses is required to allow performance optimizations and to support scalability on large-scale parallel architectures. To this end, PGAS preserves the global address space while embracing awareness of non-uniform communication costs. Today, about a dozen languages exist that adhere to the PGAS model. This survey proposes a definition and a taxonomy along four axes: how parallelism is introduced, how the address space is partitioned, how data is distributed among the partitions and finally how data is accessed across partitions. Our taxonomy reveals that today's PGAS languages focus on distributing regular data and distinguish only between local and remote data access cost, whereas the distribution of irregular data and the adoption of richer data access cost models remain open challenges.}, articleno = {62}, author = {De Wael, Mattias and Marr, Stefan and De Fraine, Bruno and Van Cutsem, Tom and De Meuter, Wolfgang}, blog = {https://stefan-marr.de/2015/01/partitioned-global-address-space-languages/}, doi = {10.1145/2716320}, html = {https://stefan-marr.de/papers/acm-csur-de-wael-et-al-partitioned-global-address-space-languages/}, impactfactor2014 = {3.373}, impactfactor5y2014 = {5.949}, issn = {0360-0300}, journal = {ACM Computing Surveys}, keywords = {Concurrency GlobalAddressSpace HPC LanguageDesign MeMyPublication MessagePassing PGAS Parallelism Performance SharedMemory myown}, month = jun, number = {4}, numpages = {27}, pages = {62:1--62:27}, pdf = {https://stefan-marr.de/downloads/acm-csur-de-wael-et-al-partitioned-global-address-space-languages.pdf}, publisher = {ACM}, series = {ACM CSUR}, title = {Partitioned Global Address Space Languages}, volume = {47}, year = {2015}, month_numeric = {6} }
@inproceedings{ZeroOverhead, abstract = {Runtime metaprogramming enables many useful applications and is often a convenient solution to solve problems in a generic way, which makes it widely used in frameworks, middleware, and domain-specific languages. However, powerful metaobject protocols are rarely supported and even common concepts such as reflective method invocation or dynamic proxies are not optimized. Solutions proposed in literature either restrict the metaprogramming capabilities or require application or library developers to apply performance improving techniques. For overhead-free runtime metaprogramming, we demonstrate that dispatch chains, a generalized form of polymorphic inline caches common to self-optimizing interpreters, are a simple optimization at the language-implementation level. Our evaluation with self-optimizing interpreters shows that unrestricted metaobject protocols can be realized for the first time without runtime overhead, and that this optimization is applicable for just-in-time compilation of interpreters based on meta-tracing as well as partial evaluation. In this context, we also demonstrate that optimizing common reflective operations can lead to significant performance improvements for existing applications.}, appendix = {https://stefan-marr.de/papers/pldi-marr-et-al-zero-overhead-metaprogramming-artifacts/}, author = {Marr, Stefan and Seaton, Chris and Ducasse, Stéphane}, blog = {https://stefan-marr.de/2015/04/zero-overhead-metaprogramming/}, booktitle = {Proceedings of the 36th ACM SIGPLAN Conference on Programming Language Design and Implementation}, doi = {10.1145/2737924.2737963}, html = {https://stefan-marr.de/papers/pldi-marr-et-al-zero-overhead-metaprogramming/}, isbn = {978-1-4503-3468-6}, keywords = {DispatchChains MeMyPublication MetaProgramming Overhead Performance RPython Truffle myown}, month = jun, note = {(acceptance rate 19%)}, numpages = {10}, pages = {545--554}, pdf = {https://stefan-marr.de/downloads/pldi15-marr-et-al-zero-overhead-metaprogramming.pdf}, publisher = {ACM}, series = {PLDI '15}, title = {Zero-Overhead Metaprogramming: Reflection and Metaobject Protocols Fast and without Compromises}, year = {2015}, month_numeric = {6} }
@article{marr2014parallel, abstract = {Using imperative programming to process event streams, such as those generated by multi-touch devices and 3D cameras, has significant engineering drawbacks. Declarative approaches solve common problems but so far, they have not been able to scale on multicore systems while providing guaranteed response times. We propose PARTE, a parallel scalable complex event processing engine that allows for a declarative definition of event patterns and provides soft real-time guarantees for their recognition. The proposed approach extends the classical Rete algorithm and maps event matching onto a graph of actor nodes. Using a tiered event matching model, PARTE provides upper bounds on the detection latency by relying on a combination of non-blocking message passing between Rete nodes and safe memory management techniques. The performance evaluation shows the scalability of our approach on up to 64 cores. Moreover, it indicates that PARTE's design choices lead to more predictable performance compared to a PARTE variant without soft real-time guarantees. Finally, the evaluation indicates further that gesture recognition can benefit from the exposed parallelism with superlinear speedups.}, author = {Marr, Stefan and Renaux, Thierry and Hoste, Lode and De Meuter, Wolfgang}, blog = {https://stefan-marr.de/2014/02/parallel-gesture-recognition-with-soft-real-time-guarantees-2/}, day = {1}, doi = {10.1016/j.scico.2014.02.012}, impactfactor2014 = {0.715}, impactfactor5year2014 = {0.837}, issn = {0167-6423}, journal = {Science of Computer Programming}, keywords = {Clips MeMyPublication PARTE RealTime Rete RulesEngine VM myown}, month = feb, pages = {159--183}, pdf = {https://stefan-marr.de/downloads/scp14-marr-et-al-parallel-gesture-recognition-with-soft-real-time-guarantees.pdf}, series = {SCP}, title = {Parallel Gesture Recognition with Soft Real-Time Guarantees}, url = {https://stefan-marr.de/downloads/scp14-marr-et-al-parallel-gesture-recognition-with-soft-real-time-guarantees.pdf}, volume = {98, Part 2}, year = {2015}, month_numeric = {2} }
@article{DeKoster2014, abstract = {The actor model is a concurrency model that avoids issues such as deadlocks and data races by construction, and thus facilitates concurrent programming. While it has mainly been used for expressing distributed computations, it is equally useful for modeling concurrent computations in a single shared memory machine. In component based software, the actor model lends itself to divide the components naturally over different actors and use message-passing concurrency for the interaction between these components. The tradeoff is that the actor model sacrifices expressiveness and efficiency with respect to parallel access to shared state. This paper gives an overview of the disadvantages of the actor model when trying to express shared state and then formulates an extension of the actor model to solve these issues. Our solution proposes domains and synchronization views to solve the issues without compromising on the semantic properties of the actor model. Thus, the resulting concurrency model maintains deadlock-freedom and avoids low-level data races.}, author = {De Koster, Joeri and Marr, Stefan and D'Hondt, Theo and Van Cutsem, Tom}, blog = {https://stefan-marr.de/2014/03/domains-safe-sharing-among-actors/}, doi = {10.1016/j.scico.2014.02.008}, impactfactor2014 = {0.715}, impactfactor5year2014 = {0.837}, issn = {0167-6423}, journal = {Science of Computer Programming }, keywords = {Actors Domains MeMyPublication Synchronization Views myown}, month = feb, pages = {140--158}, pdf = {https://stefan-marr.de/downloads/scp14-de-koster-et-al-domains-safe-sharing-among-actors.pdf}, series = {SCP}, title = {Domains: safe sharing among actors }, volume = {98, Part 2}, year = {2015}, month_numeric = {2} }
@article{marr2014there, abstract = {Research on language implementation techniques has regained importance with the rise of domain-specific languages (DSLs). Although DSLs can help manage a domain’s complexity, building highly optimizing compilers or virtual machines is rarely affordable. So, performance remains an issue. Ideally, you would implement a simple interpreter and still be able to achieve acceptable performance. RPython and Truffle are implementation techniques based on simple interpreters; they promise to perform at the same order of magnitude as highly optimizing virtual machines. This case study compares the two techniques to identify their similarities, weaknesses, and areas for further research.}, appendix = {https://stefan-marr.de/papers/ieee-soft-marr-et-al-appendix-performance-evaluation/}, author = {Marr, Stefan and Pape, Tobias and De Meuter, Wolfgang}, blog = {https://stefan-marr.de/2014/09/are-we-there-yet/}, day = {15}, doi = {10.1109/MS.2014.98}, html = {https://stefan-marr.de/papers/ieee-soft-marr-et-al-are-we-there-yet/}, impactfactor2014 = {1.053}, impactfactor5y2014 = {1.397}, issn = {0740-7459}, journal = {IEEE Software}, keywords = {Compilers ComputerProgramming DSL Interpreter Interpreters LanguageImplementation MeMyPublication Performance ProgrammingLanguages RPython SOM SelfOptimizing Smalltalk SoftwareEngineering Truffle VirtualMachines myown}, month = sep, number = {5}, numpages = {8}, pages = {60--67}, pdf = {https://stefan-marr.de/downloads/ieee-soft-marr-et-al-are-we-there-yet.pdf}, series = {IEEE Soft}, title = {Are We There Yet? Simple Language Implementation Techniques for the 21st Century}, volume = {31}, year = {2014}, month_numeric = {9} }
@inproceedings{dewael2014forkjoin, abstract = {Now that multicore processors are commonplace, developing parallel software has escaped the confines of high-performance computing and enters the mainstream. The Fork/Join framework, for instance, is part of the standard Java platform since version 7. Fork/Join is a high-level parallel programming model advocated to make parallelizing recursive divide-and-conquer algorithms particularly easy. While, in theory, Fork/Join is a simple and effective technique to expose parallelism in applications, it has not been investigated before whether and how the technique is applied in practice. We therefore performed an empirical study on a corpus of 120 open source Java projects that use the framework for roughly 362 different tasks. On the one hand, we confirm the frequent use of four best-practice patterns (Sequential Cutoff, Linked Subtasks, Leaf Tasks, and avoiding unnecessary forking) in actual projects. On the other hand, we also discovered three recurring anti-patterns that potentially limit parallel performance: sub-optimal use of Java collections when splitting tasks into subtasks as well as when merging the results of subtasks, and finally the inappropriate sharing of resources between tasks. We document these anti-patterns and study their impact on performance.}, acceptancerate = {0.39}, author = {De Wael, Mattias and Marr, Stefan and Van Cutsem, Tom}, blog = {https://stefan-marr.de/2014/09/forkjoin-parallelism-in-the-wild/}, booktitle = {Proceedings of the 2014 International Conference on Principles and Practices of Programming on the Java Platform: Virtual Machines, Languages, and Tools}, doi = {10.1145/2647508.2647511}, isbn = {978-1-4503-2926-2}, keywords = {AntiPatterns EmpiricalStudy ForkJoin Java MeMyPublication OpenSource Patterns myown}, month = sep, note = {(acceptance rate 39%)}, numpages = {12}, pages = {39--50}, pdf = {https://stefan-marr.de/downloads/pppj14-dewael-et-al-forkjoin-parallelism-in-the-wild.pdf}, series = {ManLang'14}, title = {Fork/Join Parallelism in the Wild: Documenting Patterns and Anti-Patterns in Java Programs using the Fork/Join Framework}, url = {https://stefan-marr.de/downloads/pppj14-dewael-et-al-forkjoin-parallelism-in-the-wild.pdf}, year = {2014}, month_numeric = {9} }
@inproceedings{dewael2014interface, abstract = {Finding the right algorithm–data structure combination is easy, but finding the right data structure for a set of algorithms is much less trivial. Moreover, using the same data representation throughout the whole program might be sub-optimal. Depending on several factors, often only known at runtime, some programs benefit from changing the data representation during execution. In this position paper we introduce the idea of Just-In-Time data structures, a combination of a data interface and a set of concrete data representations with different performance characteristics. These Just-In- Time data structures can dynamically swap their internal data representation when the cost of swapping is payed back many times in the remainder of the computation. To make Just-In-Time data structures work, research is needed at three fronts: 1. We need to better understand the synergy between different data representations and algorithms; 2. We need a structured approach to handle the transitions between data representations; 3. We need descriptive programming constructs to express which representation fits a program fragment best. Combined, this research will result in a structured programming approach where separating data interface from data representation, not only improves understandability and maintainability, but also improves performance through automated transitions of data representation.}, articleno = {2}, author = {De Wael, Mattias and Marr, Stefan and De Meuter, Wolfgang}, booktitle = {Proceedings of the 9th Workshop on Implementation, Compilation, Optimization of Object-Oriented Languages, Programs and Systems}, doi = {10.1145/2633301.2633303}, isbn = {978-1-4503-2914-9}, keywords = {DataStructures JIT MeMyPublication myown}, location = {Uppsala, Sweden}, month = jul, numpages = {4}, pages = {2:1--2:4}, series = {ICOOOLPS}, title = {Data Interface + Algorithms = Efficient Programs: Separating Logic from Representation to Improve Performance}, year = {2014}, month_numeric = {7} }
@inproceedings{swalens2014towards, abstract = {In the past decades, many different programming models for managing concurrency in applications have been proposed, such as the actor model, Communicating Sequential Processes, and Software Transactional Memory. The ubiquity of multi-core processors has made harnessing concurrency even more important. We observe that modern languages, such as Scala, Clojure, or F\#, provide not one, but \emph{multiple} concurrency models that help developers manage concurrency. Large end-user applications are rarely built using just a single concurrency model. Programmers need to manage a responsive UI, deal with file or network I/O, asynchronous workflows, and shared resources. Different concurrency models facilitate different requirements. This raises the issue of how these concurrency models interact, and whether they are \emph{composable}. After all, combining different concurrency models may lead to subtle bugs or inconsistencies. In this paper, we perform an in-depth study of the concurrency abstractions provided by the Clojure language. We study all pairwise combinations of the abstractions, noting which ones compose without issues, and which do not. We make an attempt to abstract from the specifics of Clojure, identifying the general properties of concurrency models that facilitate or hinder composition.}, author = {Swalens, Janwillem and Marr, Stefan and De Koster, Joeri and Van Cutsem, Tom}, blog = {https://stefan-marr.de/2014/02/towards-composable-concurrency-abstractions/}, booktitle = {Proceedings of the Workshop on Programming Language Approaches to Concurrency and communication-cEntric Software (PLACES)}, doi = {10.4204/EPTCS.155.8}, journal = {EPTCS}, keywords = {Actors Atoms CSP Clojure Concurrency MeMyPublication Programming STM Transactions VM myown}, month = apr, pages = {54--60}, pdf = {https://arxiv.org/pdf/1406.3485}, series = {PLACES '14}, title = {Towards Composable Concurrency Abstractions}, url = {https://arxiv.org/abs/1406.3485}, volume = {155}, year = {2014}, month_numeric = {4} }
@inproceedings{dekoster2013tanks, abstract = {In the past, the Actor Model has mainly been explored in a distributed context. However, more and more application developers are also starting to use it to program shared-memory multicore machines because of the safety guarantees it provides. It avoids issues such as deadlocks and race conditions by construction, and thus facilitates concurrent programming. The tradeoff is that the Actor Model sacrifices expressiveness with respect to accessing shared state because actors are fully isolated from each other (a.k.a. "shared-nothing parallelism"). There is a need for more high level synchronization mechanisms that integrate with the actor model without sacrificing the safety and liveness guarantees it provides. This paper introduces a variation on the communicating event-loops actor model called the Tank model. A tank is an actor that can expose part of its state as a shared read-only resource. The model ensures that any other actor will always observe a consistent version of that state, even in the face of concurrent updates of the actor that owns that state.}, author = {De Koster, Joeri and Marr, Stefan and D'Hondt, Theo and Van Cutsem, Tom}, blog = {https://stefan-marr.de/2013/10/tanks-multiple-reader-single-writer-actors/}, booktitle = {Proceedings of AGERE! 2013, 3rd International Workshop on Programming based on Actors, Agents, and Decentralized Control}, day = {27}, isbn = {978-1-4503-2602-5}, keywords = {Actors Concurrency ConcurrentReads Consistency Encapsulation EventLoops Isolation MeMyPublication myown}, location = {Indianapolis, Indiana, USA}, month = oct, numpages = {8}, pages = {61--68}, pdf = {https://stefan-marr.de/downloads/agere13-de-koster-et-al-tanks-multiple-reader-single-writer-actors.pdf}, series = {AGERE!'13}, title = {Tanks: Multiple reader, single writer actors}, url = {https://soft.vub.ac.be/~smarr/downloads/agere13-de-koster-et-al-tanks-multiple-reader-single-writer-actors.pdf}, year = {2013}, month_numeric = {10} }
@inproceedings{Swalens2013cloud, abstract = {Traffic monitoring or crowd management systems produce large amounts of data in the form of events that need to be processed to detect relevant incidents. Rule-based pattern recognition is a promising approach for these applications, however, increasing amounts of data as well as large and complex rule sets demand for more and more processing power and memory. In order to scale such applications, a rule-based pattern detection system needs to be distributable over multiple machines. Today's approaches are however focused on static distribution of rules or do not support reasoning over the full set of events. We propose Cloud PARTE, a complex event detection system that implements the Rete algorithm on top of mobile actors. These actors can migrate between machines to respond to changes in the work load distribution. Cloud PARTE is an extension of PARTE and offers the first rule engine specifically tailored for continuous complex event detection that is able to benefit from elastic systems as provided by cloud computing platforms. It supports fully automatic load balancing and supports online rules with access to the entire event pool.}, author = {Swalens, Janwillem and Renaux, Thierry and Hoste, Lode and Marr, Stefan and De Meuter, Wolfgang}, blog = {https://stefan-marr.de/2013/10/cloud-parte-elastic-complex-event-processing-based-on-mobile-actors/}, booktitle = {Proceedings of AGERE! 2013, 3rd International Workshop on Programming based on Actors, Agents, and Decentralized Control}, keywords = {BigData Cloud Distribution Elasicity Inference LoadBalancing MeMyPublication Rete TrafficMonitoring myown}, month = oct, pages = {3--12}, pdf = {https://soft.vub.ac.be/~smarr/downloads/agere13-swalens-et-al-cloud-parte-elastic-complex-event-processing-based-on-mobile-actors.pdf}, publisher = {ACM}, series = {AGERE! '13}, title = {Cloud PARTE: Elastic Complex Event Processing based on Mobile Actors}, url = {https://soft.vub.ac.be/~smarr/downloads/agere13-swalens-et-al-cloud-parte-elastic-complex-event-processing-based-on-mobile-actors.pdf}, year = {2013}, month_numeric = {10} }
@phdthesis{marr2013phd, abstract = {During the past decade, software developers widely adopted JVM and CLI as multi-language virtual machines (VMs). At the same time, the multicore revolution burdened developers with increasing complexity. Language implementers devised a wide range of concurrent and parallel programming concepts to address this complexity but struggle to build these concepts on top of common multi-language VMs. Missing support in these VMs leads to tradeoffs between implementation simplicity, correctly implemented language semantics, and performance guarantees. Departing from the traditional distinction between concurrency and parallelism, this dissertation finds that parallel programming concepts benefit from performance-related VM support, while concurrent programming concepts benefit from VM support that guarantees correct semantics in the presence of reflection, mutable state, and interaction with other languages and libraries. Focusing on these concurrent programming concepts, this dissertation finds that a VM needs to provide mechanisms for managed state, managed execution, ownership, and controlled enforcement. Based on these requirements, this dissertation proposes an ownership-based metaobject protocol (OMOP) to build novel multi-language VMs with proper concurrent programming support. This dissertation demonstrates the OMOP's benefits by building concurrent programming concepts such as agents, software transactional memory, actors, active objects, and communicating sequential processes on top of the OMOP. The performance evaluation shows that OMOP-based implementations of concurrent programming concepts can reach performance on par with that of their conventionally implemented counterparts if the OMOP is supported by the VM. To conclude, the OMOP proposed in this dissertation provides a unifying and minimal substrate to support concurrent programming on top of multi-language VMs. The OMOP enables language implementers to correctly implement language semantics, while simultaneously enabling VMs to provide efficient implementations.}, address = {Pleinlaan 2, B-1050 Brussels, Belgium}, author = {Marr, Stefan}, blog = {http://stefan-marr.de/2013/01/supporting-concurrency-abstractions-in-high-level-language-virtual-machines/}, isbn = {978-90-5718-256-3}, keywords = {Concurrency Domains MOP MeMyPublication MetaObjectProtocol OMOP Ownership VMs VirtualMachine myown}, month = jan, pdf = {http://www.stefan-marr.de/downloads/marr-phd-2013-supporting-concurrency-abstractions-in-high-level-language-virtual-machines.pdf}, publisher = {VUBPress}, school = {Software Languages Lab, Vrije Universiteit Brussel}, series = {SOFT PhD}, title = {Supporting Concurrency Abstractions in High-level Language Virtual Machines}, url = {http://www.stefan-marr.de/2013/01/supporting-concurrency-abstractions-in-high-level-language-virtual-machines/}, year = {2013}, month_numeric = {1} }
@inproceedings{renaux2012parte, abstract = {Applying imperative programming techniques to process event streams, like those generated by multi-touch devices and 3D cameras, has significant engineering drawbacks. Declarative approaches solve these problems but have not been able to scale on multicore systems while providing guaranteed response times.We propose PARTE, a parallel scalable complex event processing engine which allows a declarative definition of event patterns and provides soft real-time guarantees for their recognition. It extends the state-saving Rete algorithm and maps the event matching onto a graph of actor nodes. Using a tiered event matching model, PARTEprovides upper bounds on the detection latency. Based on the domain-specific constraints, PARTE's design relies on a combination of 1) lock-free data structures; 2) safe memory management techniques; and 3) message passing between Rete nodes. In our benchmarks, we measured scalability up to 8 cores, outperforming highly optimized sequential implementations.}, acmid = {2414646}, author = {Renaux, Thierry and Hoste, Lode and Marr, Stefan and De Meuter, Wolfgang}, blog = {http://stefan-marr.de/2013/01/parallel-gesture-recognition-with-soft-real-time-guarantees/}, booktitle = {Proceedings of the 2nd edition on Programming Systems, Languages and Applications based on Actors, Agents, and Decentralized Control Abstractions}, doi = {10.1145/2414639.2414646}, isbn = {978-1-4503-1630-9}, keywords = {MeMyPublication actors dataflow guarantees myown realtime rete}, location = {Tucson, Arizona, USA}, month = oct, numpages = {12}, pages = {35--46}, pdf = {http://www.stefan-marr.de/downloads/agere12-parallel-gesture-recognition-with-soft-real-time-guarantees.pdf}, series = {AGERE!'12}, title = {Parallel Gesture Recognition with Soft Real-Time Guarantees}, url = {http://www.stefan-marr.de/2013/01/parallel-gesture-recognition-with-soft-real-time-guarantees/}, year = {2012}, month_numeric = {10} }
@inproceedings{MOP_TOOLS, abstract = {Supporting all known abstractions for concurrent and parallel programming in a virtual machines (VM) is a futile undertaking, but it is required to give programmers appropriate tools and performance. Instead of supporting all abstractions directly, VMs need a unifying mechanism similar to \texttt{INVOKEDYNAMIC} for JVMs. Our survey of parallel and concurrent programming concepts identifies concurrency abstractions as the ones benefiting most from support in a VM. Currently, their semantics is often weakened, reducing their engineering benefits. They require a mechanism to define flexible language guarantees. Based on this survey, we define an ownership-based meta-object protocol as candidate for VM support. We demonstrate its expressiveness by implementing actor semantics, software transactional memory, agents, CSP, and active objects. While the performance of our prototype confirms the need for VM support, it also shows that the chosen mechanism is appropriate to express a wide range of concurrency abstractions in a unified way.}, acceptancerate = {0.31}, address = {Berlin / Heidelberg}, author = {Marr, Stefan and D'Hondt, Theo}, blog = {http://stefan-marr.de/2012/03/identifying-a-unifying-mechanism-for-the-implementation-of-concurrency-abstractions-on-multi-language-virtual-machines/}, booktitle = {Objects, Models, Components, Patterns, 50th International Conference, TOOLS 2012}, doi = {10.1007/978-3-642-30561-0_13}, isbn = {978-3-642-30560-3}, keywords = {Abstraction Concurrency LanguageSupport MeMyPublication Parallelism VirtualMachine myown}, month = may, note = {(acceptance rate 31%)}, pages = {171-186}, pdf = {http://www.stefan-marr.de/downloads/tools12-smarr-dhondt-identifying-a-unifying-mechanism-for-the-implementation-of-concurrency-abstractions-on-multi-language-virtual-machines.pdf}, publisher = {Springer}, series = {TOOLS'12 (LNCS)}, title = {Identifying A Unifying Mechanism for the Implementation of Concurrency Abstractions on Multi-Language Virtual Machines}, url = {http://www.stefan-marr.de/2012/03/identifying-a-unifying-mechanism-for-the-implementation-of-concurrency-abstractions-on-multi-language-virtual-machines/}, volume = {7304}, year = {2012}, month_numeric = {5} }
@inproceedings{MISS2012, abstract = {In this paper, we review what we have learned from implementing languages for parallel and concurrent programming, and investigate the role of modularity. To identify the approaches used to facilitate correctness and maintainability, we ask the following questions: What guides modularization? Are informal approaches used to facilitate correctness? Are concurrency concerns modularized? And, where is language support lacking most? Our subjects are AmbientTalk, SLIP, and the RoarVM. All three evolved over the years, enabling us to look back at specific experiments to understand the impact of concurrency on modularity. We conclude from our review that concurrency concerns are one of the strongest drivers for the definition of module boundaries. It helps when languages offer sophisticated modularization constructs. However, with respect to concurrency, other language features like single-assignment are of greater importance. Furthermore, tooling that enables remodularization taking concurrency invariants into account would be of great value.}, acmid = {2162031}, author = {Marr, Stefan and Nicolay, Jens and Van Cutsem, Tom and D'Hondt, Theo}, blog = {http://stefan-marr.de/2012/01/modularity-and-conventions-for-maintainable-concurrent-language-implementations-a-review-of-our-experiences-and-practices/}, booktitle = {Proceedings of the 2nd Workshop on Modularity In Systems Software (MISS'2012)}, doi = {10.1145/2162024.2162031}, isbn = {978-1-4503-1217-2}, keywords = {CaseStudy Concurrency ExperienceReport Interpreters MeMyPublication Modularity Parallelism VMs myown}, location = {Potsdam, Germany}, month = mar, numpages = {6}, pages = {21--26}, pdf = {http://www.stefan-marr.de/downloads/miss12-smarr-et-al-modularity-and-conventions-for-maintainable-concurrent-language-implementations.pdf}, publisher = {ACM}, series = {MISS'12}, title = {Modularity and Conventions for Maintainable Concurrent Language Implementations: A Review of Our Experiences and Practices}, url = {http://www.stefan-marr.de/2012/01/modularity-and-conventions-for-maintainable-concurrent-language-implementations-a-review-of-our-experiences-and-practices/}, year = {2012}, month_numeric = {3} }
@inproceedings{SyncViewsPoster, abstract = {The actor model has already proven itself as an interesting concurrency model that avoids issues such as deadlocks and race conditions by construction, and thus facilitates concurrent programming. The tradeoff is that it sacrifices expressiveness and efficiency especially with respect to data parallelism. However, many standard solutions to computationally expensive problems employ data parallel algorithms for better performance on parallel systems. We identified three problems that inhibit the use of data-parallel algorithms within the actor model. Firstly, one of the main properties of the actor model, the fact that no data is shared, is one of the most severe performance bottlenecks. Especially the fact that shared state can not be read truly in parallel. Secondly, the actor model on its own does not provide a mechanism to specify extra synchronization conditions on batches of messages which leads to event-level data-races. And lastly, programmers are forced to write code in a continuation-passing style (CPS) to handle typical request-response situations. However, CPS breaks the sequential flow of the code and is often hard to understand, which increases complexity and lowers maintainability. We proposes \emph{synchronization views} to solve these three issues without compromising the semantic properties of the actor model. Thus, the resulting concurrency model maintains deadlock-freedom, avoids low-level race conditions, and keeps the semantics of macro-step execution.}, author = {De Koster, Joeri and Marr, Stefan and D'Hondt, Theo}, blog = {http://stefan-marr.de/2011/12/synchronization-views-for-event-loop-actors/}, booktitle = {Proceedings of the 17th ACM SIGPLAN symposium on Principles and Practice of Parallel Programming}, doi = {10.1145/2145816.2145873}, keywords = {ActPigoo Actors Concurrency MeMyPublication Parallelism ReaderWriter Synchronization Views myown}, month = feb, note = {(Poster)}, pages = {317--318}, pdf = {http://soft.vub.ac.be/~smarr/downloads/ppopp12-dekoster-synchronization-views-for-event-loop-actors.pdf}, publisher = {ACM}, series = {PPoPP Poster'12}, title = {Synchronization Views for Event-loop Actors}, url = {http://www.stefan-marr.de/2011/12/synchronization-views-for-event-loop-actors/}, year = {2012}, month_numeric = {2} }
@article{CSOMPL_JOT, abstract = {CSOM/PL is a software product line (SPL) derived from applying multi-dimensional separation of concerns (MDSOC) techniques to the domain of high-level language virtual machine (VM) implementations. For CSOM/PL, we modularised CSOM, a Smalltalk VM implemented in C, using VMADL (virtual machine architecture description language). Several features of the original CSOM were encapsulated in VMADL modules and composed in various combinations. In an evaluation of our approach, we show that applying MDSOC and SPL principles to a domain as complex as that of VMs is not only feasible but beneficial, as it improves understandability, maintainability, and configurability of VM implementations without harming performance. }, author = {Haupt, Michael and Marr, Stefan and Hirschfeld, Robert}, blog = {http://stefan-marr.de/2011/12/csompl-a-virtual-machine-product-line/}, doi = {10.5381/jot.2011.10.1.a12}, issn = {1660-1769}, journal = {Journal of Object Technology}, keywords = {AOP CSOM FOP Interpreters MeMyPublication Modularization VirtualMachine myown}, month = dec, number = {12}, pages = {1-30}, pdf = {http://www.stefan-marr.de/downloads/jot11-mhaupt-csompl-a-virtual-machine-product-line.pdf}, series = {JoT}, title = {CSOM/PL: A Virtual Machine Product Line}, url = {http://www.stefan-marr.de/2011/12/csompl-a-virtual-machine-product-line/}, volume = {10}, year = {2011}, month_numeric = {12} }
@inproceedings{VMIL11, abstract = {While parallel programming for very regular problems has been used in the scientific community by non-computer-scientists successfully for a few decades now, concurrent programming and solving irregular problems remains hard. Furthermore, we shift from few expert system programmers mastering concurrency for a constrained set of problems to mainstream application developers being required to master concurrency for a wide variety of problems. Consequently, high-level language virtual machine (VM) research faces interesting questions. What are processor design changes that have an impact on the abstractions provided by VMs to provide platform independence? How can application programmers' diverse needs be facilitated to solve concurrent programming problems? We argue that VMs will need to be ready for a wide range of different concurrency models that allow solving concurrency problems with appropriate abstractions. Furthermore, they need to abstract from heterogeneous processor architectures, varying performance characteristics, need to account for memory access cost and inter-core communication mechanisms but should only expose the minimal useful set of notions like locality, explicit communication, and adaptable scheduling to maintain their abstracting nature. Eventually, language designers need to be enabled to guarantee properties like encapsulation, scheduling guarantees, and immutability also when an interaction between different problem-specific concurrency abstractions is required.}, acmid = {2095104}, author = {Marr, Stefan and De Wael, Mattias and Haupt, Michael and D'Hondt, Theo}, blog = {http://stefan-marr.de/2011/09/which-problems-does-a-multi-language-virtual-machine-need-to-solve-in-the-multicoremanycore-era/}, booktitle = {Proceedings of the 5th Workshop on Virtual Machines and Intermediate Languages}, doi = {10.1145/2095050.2095104}, isbn = {978-1-4503-1183-0}, keywords = {Concurrency Encapsulation ManyCore MeMyPublication MultiCore PositionPaper Scheduling Survey VMIL VMs myown}, location = {Portland, Oregon, USA}, month = oct, numpages = {8}, pages = {341--348}, pdf = {http://www.stefan-marr.de/downloads/vmil11-smarr-et-al-which-problems-does-a-multi-language-virtual-machine-need-to-solve-in-the-multicore-manycore-era.pdf}, publisher = {ACM}, series = {VMIL '11}, title = {Which Problems Does a Multi-Language Virtual Machine Need to Solve in the Multicore/Manycore Era?}, url = {http://www.stefan-marr.de/2011/09/which-problems-does-a-multi-language-virtual-machine-need-to-solve-in-the-multicoremanycore-era/}, year = {2011}, month_numeric = {10} }
@techreport{CSOMPL_TR, abstract = {CSOM/PL is a software product line (SPL) derived from applying multi-dimensional separation of concerns (MDSOC) techniques to the domain of high-level language virtual machine (VM) implementations. For CSOM/PL, we modularised CSOM, a Smalltalk VM implemented in C, using VMADL (virtual machine architecture description language). Several features of the original CSOM were encapsulated in VMADL modules and composed in various combinations. In an evaluation of our approach, we show that applying MDSOC and SPL principles to a domain as complex as that of VMs is not only feasible but beneficial, as it improves understandability, maintainability, and configurability of VM implementations without harming performance.}, address = {Am Neuen Palais 10, 14469 Potsdam}, author = {Haupt, Michael and Marr, Stefan and Hirschfeld, Robert}, day = {18}, institution = {Hasso Plattner Institute }, keywords = {AOP Architecture ArchitectureLanguage CSOM MasterThesis MeMyPublication Modularization Productline VirtualMachine aspect-oriented myown services}, month = apr, number = {48}, pages = {26}, publisher = {Universitätsverlag Potsdam}, series = {HPI TR}, title = {CSOM/PL: A Virtual Machine Product Line*}, url = {http://www.amazon.de/CSOM-PL-virtual-machine-product/dp/3869561343}, year = {2011}, month_numeric = {4} }
@inproceedings{SplashDocSymp2010, abstract = {We propose to search for common abstractions for different concurrency models to enable high-level language virtual machines to support a wide range of different concurrency models. This would enable domain-specific solutions for the concurrency problem. Furthermore, advanced knowledge about concurrency in the VM model will most likely lead to better implementation opportunities on top of the different upcoming many-core architectures. The idea is to investigate the concepts of encapsulation and locality to this end. Thus, we are going to experiment with different language abstractions for concurrency on top of a virtual machine, which supports encapsulation and locality, to see how language designers could benefit, and how virtual machines could optimize programs using these concepts.}, address = {New York, NY, USA}, author = {Marr, Stefan}, blog = {http://stefan-marr.de/2010/07/doctoral-symposium-at-splash-2010/}, booktitle = {SPLASH '10: Proceedings of the ACM International Conference Companion on Object Oriented Programming Systems Languages and Applications Companion}, day = {17-21}, doi = {10.1145/1869542.1869583}, isbn = {978-1-4503-0240-1}, keywords = {Abstraction Concurrency ManyCore MeMyPublication Proposal VM myown}, location = {Reno/Tahoe, Nevada, USA}, month = oct, pages = {221--222}, pdf = {http://www.stefan-marr.de/downloads/docsymp-smarr.pdf}, publisher = {ACM}, series = {SPLASH DocSymp}, title = {Encapsulation And Locality: A Foundation for Concurrency Support in Multi-Language Virtual Machines?}, type = {Doctoral Symposium}, url = {http://www.stefan-marr.de/2010/07/doctoral-symposium-at-splash-2010/}, year = {2010}, month_numeric = {10} }
@presentation{MulticoreProgramming, abstract = {This paper argues in favour of a language-oriented approach to teach the principles of concurrency to graduate students. Over the past years, the popularity of programming lan- guages that promote a functional programming style has steadily grown. We want to promote the use of such lan- guages as the appropriate basic tools to deal with the “mul- ticore revolution”. We describe some of these programming languages and highlight two of them: Erlang and Clojure. We use these languages in a new graduate-level course that we will teach starting next academic year. Our goal is not to convince the reader that Erlang and Clojure are the best possible choices among this pool of candidate languages. Rather, our goal is to promote a functional programming style to tackle concurrency issues, and to teach this style in a programming language that makes it easy, straightforward and convenient to use that style. We do not want to get bogged down in a discussion on the usefulness or importance of learning new programming languages. For a good summary of the diverse advantages of studying new programming languages, we refer to a recent white paper by the ACM SIGPLAN education board [6].}, author = {Van Cutsem, Tom and Marr, Stefan and De Meuter, Wolfgang}, booktitle = {Workshop on Curricula for Concurrency and Parallelism}, day = {17}, keywords = {MeMyPublication clojure concurrency course erlang immutability myown parallelism teaching}, month = oct, pages = {3}, pdf = {http://soft.vub.ac.be/Publications/2010/vub-tr-soft-10-12.pdf}, series = {CCP'10}, title = {A Language-oriented Approach to Teaching Concurrency}, url = {http://soft.vub.ac.be/Publications/2010/vub-tr-soft-10-12.pdf}, year = {2010}, month_numeric = {10} }
@inproceedings{SplashPoster2010, abstract = {We propose to search for common abstractions for concurrency models to enable multi-language virtual machines to support a wide range of them. This would enable domain-specific solutions for concurrency problems. Furthermore, such an abstraction could improve portability of virtual machines to the vastly different upcoming many-core architectures.}, author = {Marr, Stefan and D'Hondt, Theo}, booktitle = {SPLASH '10: Companion of the ACM International Conference on Object Oriented Programming Systems Languages and Applications}, doi = {10.1145/1869542.1869593}, isbn = {978-1-4503-0240-1}, keywords = {ManyCore MeMyPublication MultiCore Poster SPLASH machines myown virtual}, location = {Reno/Tahoe, Nevada, USA}, month = oct, note = {(Poster)}, pages = {239--240}, pdf = {http://www.stefan-marr.de/downloads/docsymp-smarr.pdf}, series = {SPLASH Poster}, title = {Many-Core Virtual Machines: Decoupling Abstract from Concrete Concurrency}, url = {http://www.stefan-marr.de/2010/08/poster-at-splash10/}, year = {2010}, month_numeric = {10} }
@inproceedings{InsertionTreePhasers, abstract = {This paper presents an algorithm and a data structure for scalable dynamic synchronization in fine-grained parallelism. The algorithm supports the full generality of phasers with dynamic, two-phase, and point-to-point synchronization. It retains the scalability of classical tree barriers, but provides unbounded dynamicity by employing a tailor-made insertion tree data structure. It is the first completely documented implementation strategy for a scalable phaser synchronization construct. Our evaluation shows that it can be used as a drop-in replacement for classic barriers without harming performance, despite its additional complexity and potential for performance optimizations. Furthermore, our approach overcomes performance and scalability limitations which have been present in other phaser proposals.}, author = {Marr, Stefan and Verhaegen, Stijn and De Fraine, Bruno and D'Hondt, Theo and De Meuter, Wolfgang}, blog = {http://stefan-marr.de/2010/07/insertion-tree-phasers-efficient-and-scalable-barrier-synchronization-for-fine-grained-parallelism/}, booktitle = {Proceedings of the 12th IEEE International Conference on High Performance Computing and Communications}, doi = {10.1109/HPCC.2010.30}, isbn = {978-0-7695-4214-0}, keywords = {Barrier MeMyPublication Phasers X10 algorithm benchmarks clocks evaluation habanero insertionTree myown tree}, month = sep, note = {Best Student Paper AwardAcceptance Rate: 19,1% (58/304)}, pages = {130-137}, pdf = {http://www.stefan-marr.de/downloads/hpcc2010-marr-etal-insertion-tree-phasers.pdf}, publisher = {IEEE Computer Society}, series = {HPCC'10}, title = {Insertion Tree Phasers: Efficient and Scalable Barrier Synchronization for Fine-grained Parallelism}, url = {http://www.stefan-marr.de/2010/07/insertion-tree-phasers-efficient-and-scalable-barrier-synchronization-for-fine-grained-parallelism/}, year = {2010}, month_numeric = {9} }
@inproceedings{SOMFamily, abstract = {This paper introduces the SOM (Simple Object Machine) family of virtual machine (VM) implementations. Starting from a Java-based implementation, several ports of the VM to different programming languages have been developed and put to successful use in teaching at both undergraduate and graduate levels since 2006. Moreover, the VMs have been used in various research projects. We document the rationale behind each of the SOM VMs and results that have been achieved in teaching and research.}, acceptancerate = {0.51}, author = {Haupt, Michael and Hirschfeld, Robert and Pape, Tobias and Gabrysiak, Gregor and Marr, Stefan and Bergmann, Arne and Heise, Arvid and Kleine, Matthias and Krahn, Robert}, booktitle = {Proceedings of the 15th Annual Conference on Innovation and Technology in Computer Science Education}, day = {26--30}, doi = {10.1145/1822090.1822098}, isbn = {978-1-60558-729-5}, keywords = {CSOM MeMyPublication SOM Teaching VM myown}, location = {Bilkent, Ankara, Turkey}, month = jun, note = {(acceptance rate 51%)}, pages = {18--22}, pdf = {http://www.hpi.uni-potsdam.de/hirschfeld/publications/media/HauptHirschfeldPapeGabrysiakMarrBergmannHeiseKleineKrahn_2010_TheSomFamily_AcmDL.pdf}, publisher = {ACM Press}, series = {ITiCSE'10}, title = {The SOM Family: Virtual Machines for Teaching and Research}, url = {http://www.hpi.uni-potsdam.de/hirschfeld/publications/media/HauptHirschfeldPapeGabrysiakMarrBergmannHeiseKleineKrahn_2010_TheSomFamily_AcmDL.pdf}, year = {2010}, month_numeric = {6} }
@inproceedings{places09, abstract = {The upcoming many-core architectures require software developers to exploit concurrency to utilize available computational power. Today's high-level language virtual machines (VMs), which are a cornerstone of software development, do not provide sufficient abstraction for concurrency concepts. We analyze concrete and abstract concurrency models and identify the challenges they impose for VMs. To provide sufficient concurrency support in VMs, we propose to integrate concurrency operations into VM instruction sets. Since there will always be VMs optimized for special purposes, our goal is to develop a methodology to design instruction sets with concurrency support. Therefore, we also propose a list of trade-offs that have to be investigated to advise the design of such instruction sets. As a first experiment, we implemented one instruction set extension for shared memory and one for non-shared memory concurrency. From our experimental results, we derived a list of requirements for a full-grown experimental environment for further research.}, author = {Marr, Stefan and Haupt, Michael and Timbermont, Stijn and Adams, Bram and D'Hondt, Theo and Costanza, Pascal and De Meuter, Wolfgang}, blog = {http://stefan-marr.de/2010/02/virtual-machine-support-for-many-core-architectures-decoupling-abstract-from-concrete-concurrency-models/}, booktitle = {Second International Workshop on Programming Languages Approaches to Concurrency and Communication-cEntric Software}, doi = {10.4204/EPTCS.17.6}, keywords = {MeMyPublication myown}, month = feb, pages = {63-77}, pdf = {http://arxiv.org/pdf/1002.0939v1}, series = {PLACES'09 (EPTCS)}, title = {Virtual Machine Support for Many-Core Architectures: Decoupling Abstract From Concrete Concurrency Models}, volume = {17}, year = {2010}, month_numeric = {2} }
@inproceedings{VMIL09, abstract = {Today's major high-level language virtual machines (VMs) are becoming successful in being multi-language execution platforms, hosting a wide range of languages. With the transition from few-core to many-core processors, we argue that VMs will also have to abstract from concrete concurrency models at the hardware level, to be able to support a wide range of abstract concurrency models on a language level. To overcome the lack of sufficient abstractions for concurrency concepts in VMs, we proposed earlier to extend VM intermediate languages by special concurrency constructs. As a first step towards this goal, we try to fill a gap in the current literature and survey the intermediate language design of VMs. Our goal is to identify currently used techniques and principles as well as to gain an overview over the available concurrency related features in intermediate languages. Another aspect of interest is the influence of the particular target language, for which the VM is originally intended, on the intermediate language.}, address = {New York, NY, USA}, author = {Marr, Stefan and Haupt, Michael and D'Hondt, Theo}, blog = {http://www.stefan-marr.de/2010/02/intermediate-language-design-of-high-level-language-virtual-machines-towards-comprehensive-concurrency-support/}, booktitle = {Proceedings of the 3rd Workshop on Virtual Machines and Intermediate Languages}, doi = {10.1145/1711506.1711509}, isbn = {978-1-60558-874-2}, keywords = {Bytecode Concurrency Design InstructionSet IntermediateLanguage Language Machines MeMyPublication Survey Virtual myown}, month = oct, note = {(extended abstract)}, pages = {3:1--3:2}, pdf = {http://www.stefan-marr.de/downloads/vmil09-smarr.pdf}, publisher = {ACM}, series = {VMIL'09}, title = {Intermediate Language Design of High-level Language Virtual Machines: Towards Comprehensive Concurrency Support}, url = {http://www.stefan-marr.de/2010/02/intermediate-language-design-of-high-level-language-virtual-machines-towards-comprehensive-concurrency-support/}, year = {2009}, month_numeric = {10} }
@inproceedings{TowardsACMM, abstract = {In this position paper we propose to extend an existing delegation-based machine model with concurrency primitives. The original machine model which is built on the concepts of objects, messages, and delegation, provides support for languages enabling multi-dimensional separation of concerns (MDSOC). We propose to extend this model with an actor-based concurrency model, allowing for both true parallelism as well as lightweight concurrency primitives such as coroutines. In order to demonstrate its expressiveness, we informally describe how three high-level languages supporting different concurrency models can be mapped onto our extended machine model. We also provide an outlook on the extended model's potential to support concurrency-related MDSOC features.}, author = {Schippers, Hans and Van Cutsem, Tom and Marr, Stefan and Haupt, Michael and Hirschfeld, Robert}, blog = {http://stefan-marr.de/2010/02/towards-an-actor-based-concurrent-machine-model/}, booktitle = {Proceedings of the Fourth Workshop on the Implementation, Compilation, Optimization of Object-Oriented Languages, Programs and Systems (ICOOOLPS)}, day = {6}, doi = {10.1145/1565824.1565825}, isbn = {978-1-60558-541-3}, keywords = {Actors Concurrency Io Java MDSOC MachineModel MeMyPublication Salsa VM myown}, location = {Genova, Italy}, month = jul, pages = {4--9}, pdf = {http://www.stefan-marr.de/downloads/icooolps09-schippers.pdf}, publisher = {ACM}, series = {ICOOOLPS'09}, title = {Towards an Actor-based Concurrent Machine Model}, url = {http://www.stefan-marr.de/2010/02/towards-an-actor-based-concurrent-machine-model/}, year = {2009}, month_numeric = {7} }
@mastersthesis{MAThesis, address = {Potsdam, Germany}, author = {Marr, Stefan}, keywords = {Me:MastersThesis MeMyPublication myown}, month = sep, pdf = {http://stefan-marr.de/downloads/Masterarbeit-Modularisierung-virtueller-Maschinen.pdf}, school = {Hasso Plattner Institute}, series = {Master Thesis}, title = {Modularisierung Virtueller Maschinen}, url = {http://www.stefan-marr.de/2008/10/i-am-done-eventually/}, year = {2008}, month_numeric = {9} }