@proceedings {680651, title = {Assuage: Assembly Synthesis Using A Guided Exploration}, journal = {the 34th ACM Symposium on User Interface Software and Technology (UIST{\textquoteright}21)}, year = {2021}, pages = {134{\textendash}148}, address = {Virtual Event, USA}, abstract = { Assembly programming is challenging, even for experts. Program synthesis, as an alternative to manual implementation, has the potential to enable both expert and non-expert users to generate programs in an automated fashion. However, current tools and techniques are unable to synthesize assembly programs larger than a few instructions. We present Assuage : ASsembly Synthesis Using A Guided Exploration, which is a parallel interactive assembly synthesizer that engages the user as an active collaborator, enabling synthesis to scale beyond current limits. Using Assuage, users can provide two types of semantically meaningful hints that expedite synthesis and allow for exploration of multiple possibilities simultaneously. Assuage exposes information about the underlying synthesis process using multiple representations to help users guide synthesis. We conducted a within-subjects study with twenty-one participants working on assembly programming tasks. With Assuage, participants with a wide range of expertise were able to achieve significantly higher success rates, perceived less subjective workload, and preferred the usefulness and usability of Assuage over a state of the art synthesis tool. }, url = {https://doi.org/10.1145/3472749.3474740}, author = {Jingmei Hu and Priyan Vaithilingam and Stephen Chong and Margo Seltzer and Elena L Glassman} } @proceedings {670037, title = {Improving Data Scientist Efficiency with Provenance}, journal = {the ACM/IEEE 42nd International Conference on Software Engineering (ICSE{\textquoteright}20) : Companion Proceedings}, year = {2020}, pages = {1086-1097}, address = {Seoul, South Korea}, abstract = {Data scientists frequently analyze data by writing scripts. We conducted a contextual inquiry with interdisciplinary researchers, which revealed that parameter tuning is a highly iterative process and that debugging is time-consuming. As analysis scripts evolve and become more complex, analysts have difficulty conceptualizing their workflow. In particular, after editing a script, it becomes difficult to determine precisely which code blocks depend on the edit. Consequently, scientists frequently re-run entire scripts instead of re-running only the necessary parts. We present ProvBuild, a tool that leverages language-level provenance to streamline the debugging process by reducing programmer cognitive load and decreasing subsequent runtimes, leading to an overall reduction in elapsed debugging time. ProvBuild uses provenance to track dependencies in a script. When an analyst debugs a script, ProvBuild generates a simplified script that contains only the information necessary to debug a particular problem. We demonstrate that debugging the simplified script lowers a programmer{\textquoteright}s cognitive load and permits faster re-execution when testing changes. The combination of reduced cognitive load and shorter runtime reduces the time necessary to debug a script. We quantitatively and qualitatively show that even though ProvBuild introduces overhead during a script{\textquoteright}s first execution, it is a more efficient way for users to debug and tune complex workflows. ProvBuild demonstrates a novel use of language-level provenance, in which it is used to proactively improve programmer productively rather than merely providing a way to retroactively gain insight into a body of code.}, url = {https://doi.org/10.1145/3377811.3380366}, author = {Jingmei Hu and Jiwon Joung and Maia Jacobs and Krzysztof Z. Gajos and Margo I. Seltzer} } @proceedings {645577, title = {ProvBuild: Improving Data Scientist Efficiency with Provenance (An Extended Abstract)}, journal = {the ACM/IEEE 42nd International Conference on Software Engineering (ICSE{\textquoteright}20) : Companion Proceedings}, year = {2020}, pages = {266{\textendash}267}, address = {Seoul, South Korea}, abstract = {Data scientists frequently analyze data by writing scripts. We conducted a contextual inquiry with interdisciplinary researchers, which revealed that parameter tuning is a highly iterative process and that debugging is time-consuming. As analysis scripts evolve and become more complex, analysts have difficulty conceptualizing their workflow. In particular, after editing a script, it becomes difficult to determine precisely which code blocks depend on the edit. Consequently, scientists frequently re-run entire scripts instead of re-running only the necessary parts. We present ProvBuild, a data analysis environment that uses change impact analysis to improve the iterative debugging process in script-based workflow pipelines. ProvBuild is a tool that leverages language-level provenance to streamline the debugging process by reducing programmer cognitive load and decreasing subsequent runtimes, leading to an overall reduction in elapsed debugging time. ProvBuild uses provenance to track dependencies in a script. When an analyst debugs a script, ProvBuild generates a simplified script that contains only the information necessary to debug a particular problem. We demonstrate that debugging the simplified script lowers a programmer{\textquoteright}s cognitive load and permits faster re-execution when testing changes. The combination of reduced cognitive load and shorter runtime reduces the time necessary to debug a script. We quantitatively and qualitatively show that even though ProvBuild introduces overhead during a script{\textquoteright}s first execution, it is a more efficient way for users to debug and tune complex workflows. ProvBuild demonstrates a novel use of language-level provenance, in which it is used to proactively improve programmer productively rather than merely providing a way to retroactively gain insight into a body of code. To the best of our knowledge, ProvBuild is a novel application of change impact analysis and it is the first debugging tool to leverage language-level provenance to reduce cognitive load and execution time.}, url = {https://doi.org/10.1145/3377812.3390912}, author = {Jingmei Hu and Jiwon Joung and Maia Jacobs and Krzysztof Z. Gajos and Margo I. Seltzer} } @proceedings {643012, title = {Trials and Tribulations in Synthesizing Operating Systems}, journal = {The 10th Workshop on Programming Languages and Operating Systems (PLOS{\textquoteright}19)}, year = {2019}, pages = {67-73}, address = {Huntsville, Ontario, Canada}, abstract = { Recent advances in program synthesis convinced us that it was the right time to transform the process of porting an operating system into a program synthesis problem. We set out to synthesize the needed machine dependent code for an existing operating system. This undertaking proved far more challenging than we anticipated. We summarize our experience and lessons learned and propose next steps in realizing such an undertaking. }, url = {https://dl.acm.org/citation.cfm?id=3365401}, author = {Jingmei Hu and Eric Lu and David A. Holland and Ming Kawaguchi and Stephen Chong and Margo I. Seltzer} } @report {643008, title = {Aquarium: Cassiopea and Alewife Languages}, year = {2019}, abstract = {This technical report describes two of the domain-specific languages used in the Aquarium kernel code synthesis project. It presents the language cores in terms of abstract syntax. Cassiopea is a machine description language for describing the semantics of processor instruction sets. Alewife is a specification language that can be used to write machine-independent specifications for assembly-level instruction blocks. An Alewife specification can be used to verify and synthesize code for any machine described in Cassiopea, given a machine-specific translation for abstractions used in the specification. This article does not include an introduction to either the Aquarium system or the use of the languages. In addition to this version of the article being a draft, the Aquarium project and the languages are work in progress. This article cannot currently be considered either final or complete.}, url = {https://arxiv.org/abs/1908.00093}, author = {David A. Holland and Jingmei Hu and Ming Kawaguchi and Eric Lu and Stephen Chong and Margo I. Seltzer} } @article {625405, title = {ShakeIn: secure user authentication of smartphones with single-handed shakes}, journal = {IEEE Transactions on Mobile Computing}, volume = {16}, number = {10}, year = {2017}, pages = {2901-2912}, abstract = {Smartphones have been widely used with a vast array of sensitive and private information stored on these devices.To secure such information from being leaked, user authentication schemes are necessary. Current password/pattern-based user authentication schemes are vulnerable to shoulder surfing attacks and smudge attacks. In contrast, stroke/gait-based schemes are secure but inconvenient for users to input. In this paper, we propose ShakeIn, a handy user authentication scheme for secure unlocking of a smartphone by simply shaking the phone. With embedded motion sensors, ShakeIn can effectively capture the unique and reliable biometrical features of users about how they shake. In this way, even if an attacker sees a user shaking his/her phone, the attacker can hardly reproduce the same behavior. Furthermore, by allowing users to customize the way they shake the phone, ShakeIn endows users with the maximum operation flexibility. We implement ShakeIn and conduct both intensive trace-driven simulations and real experiments on 20 volunteers with about 530,555 shaking samples collected over multiple months. The results show that ShakeIn achieves an average equal error rate of 1.2 percent with a small number of shakes using only 35 training samples even in the presence of shoulder-surfing attacks.}, url = {https://www.computer.org/csdl/journal/tm/2017/10/07814282/13rRUwwJWGl}, author = {Hongzi Zhu and Jingmei Hu and Shan Chang and Li Lu} }