@inproceedings{howison_biolite_2012,
title = {{{BioLite}, a lightweight bioinformatics framework with automated tracking of diagnostics and provenance}},
url = {https://www.usenix.org/conference/tapp12/biolite-lightweight-bioinformatics-framework-automated-tracking-diagnostics-and},
abstract = {We present a new {Python/C++} framework, {BioLite}, for implementing bioinformatics pipelines for {Next-Generation} Sequencing {(NGS)} data. {BioLite} tracks provenance of analyses, automates the collection and reporting of diagnostics (such as summary statistics and plots at intermediate stages), and profiles computational requirements. These diagnostics can be accessed across multiple stages of a pipeline, from other pipelines, and in {HTML} reports. Finally, we describe several use cases for diagnostics in our own analyses.},
booktitle = {{Proceedings of the 4th {USENIX} Workshop on the Theory and Practice of Provenance {(TaPP} '12)}},
author = {Howison, Mark and {Sinnott-Armstrong}, Nicholas A. and Dunn, Casey W.},
month = jun,
year = {2012},
file = {Howison et al. - 2012 - BioLite, a lightweight bioinformatics framework wi.pdf:/Users/mhowison/Documents/Zotero/storage/IBCEEVSC/Howison et al. - 2012 - BioLite, a lightweight bioinformatics framework wi.pdf:application/pdf}
}
@article{bethel_multi-core_2012,
title = {{Multi-core and many-core shared-memory parallel raycasting volume rendering optimization and tuning}},
url = {http://hpc.sagepub.com/content/early/2012/03/30/1094342012440466.abstract},
doi = {10.1177/1094342012440466},
abstract = {Given the computing industry trend of increasing processing capacity by adding more cores to a chip, the focus of this work is tuning the performance of a staple visualization algorithm, raycasting volume rendering, for shared-memory parallelism on multi-core {CPUs} and many-core {GPUs.} Our approach is to vary tunable algorithmic settings, along with known algorithmic optimizations and two different memory layouts, and measure performance in terms of absolute runtime and L2 memory cache misses. Our results indicate there is a wide variation in runtime performance on all platforms, as much as 254\% for the tunable parameters we test on multi-core {CPUs} and 265\% on many-core {GPUs}, and the optimal configurations vary across platforms, often in a non-obvious way. For example, our results indicate the optimal configurations on the {GPU} occur at a crossover point between those that maintain good cache utilization and those that saturate computational throughput. This result is likely to be extremely difficult to predict with an empirical performance model for this particular algorithm because it has an unstructured memory access pattern that varies locally for individual rays and globally for the selected viewpoint. Our results also show that optimal parameters on modern architectures are markedly different from those in previous studies run on older architectures. In addition, given the dramatic performance variation across platforms for both optimal algorithm settings and performance results, there is a clear benefit for production visualization and analysis codes to adopt a strategy for performance optimization through auto-tuning. These benefits will likely become more pronounced in the future as the number of cores per chip and the cost of moving data through the memory hierarchy both increase.},
journal = {International Journal of High Performance Computing Applications},
author = {Bethel, E Wes and Howison, Mark},
month = apr,
year = {2012},
file = {Bethel and Howison - 2012 - Multi-core and many-core shared-memory parallel ra.pdf:/Users/mhowison/Documents/Zotero/storage/VZWH248D/Bethel and Howison - 2012 - Multi-core and many-core shared-memory parallel ra.pdf:application/pdf}
}
@article{howison_hybrid_2012,
title = {{Hybrid Parallelism for Volume Rendering on Large-, Multi-, and {Many-Core} Systems}},
volume = {18},
url = {http://escholarship.org/uc/item/4n57d1mn},
doi = {10.1109/TVCG.2011.24},
abstract = {With the computing industry trending towards multi- and many-core processors, we study how a standard visualization algorithm, ray-casting volume rendering, can benefit from a hybrid parallelism approach. Hybrid parallelism provides the best of both worlds: using distributed- memory parallelism across a large numbers of nodes increases available {FLOPs} and memory, while exploiting shared-memory parallelism among the cores within each node ensures that each node performs its portion of the larger calculation as efficiently as possible. We demonstrate results from weak and strong scaling studies, at levels of concurrency ranging up to 216,000, and with datasets as large as 12.2 trillion cells. The greatest benefit from hybrid parallelism lies in the communication portion of the algorithm, the dominant cost at higher levels of concurrency. We show that reducing the number of participants with a hybrid approach significantly improves performance.},
journal = {{IEEE} Transactions on Visualization and Computer Graphics},
author = {Howison, Mark and Bethel, E. Wes and Childs, Hank},
year = {2012},
pages = {17--29},
file = {Howison et al. - 2012 - Hybrid Parallelism for Volume Rendering on Large-,.pdf:/Users/mhowison/Documents/Zotero/storage/UX57HJH8/Howison et al. - 2012 - Hybrid Parallelism for Volume Rendering on Large-,.pdf:application/pdf}
}
@inproceedings{howison_mathematical_2011,
title = {{The Mathematical Imagery Trainer: from embodied interaction to conceptual learning}},
url = {http://dl.acm.org/citation.cfm?id=1979230},
doi = {10.1145/1978942.1979230},
abstract = {We introduce an embodied-interaction instructional design, the Mathematical Imagery Trainer {(MIT)}, for helping young students develop grounded understanding of proportional equivalence (e.g., 2/3 = 4/6). Taking advantage of the low-cost availability of hand-motion tracking provided by the Nintendo Wii remote, the {MIT} applies cognitive-science findings that mathematical concepts are grounded in mental simulation of dynamic imagery, which is acquired through perceiving, planning, and performing actions with the body. We describe our rationale for and implementation of the {MIT} through a design-based research approach and report on clinical interviews with twenty-two 4th–6th grade students who engaged in problem-solving tasks with the {MIT.}},
booktitle = {{Proceedings of the {ACM} {SIGCHI} Conference on Human Factors in Computing Systems {(CHI} '11)}},
author = {Howison, Mark and Trninic, Dragan and Reinholz, Daniel and Abrahamson, Dor},
year = {2011},
pages = {1989--1998},
file = {Howison et al. - 2011 - The Mathematical Imagery Trainer from embodied in.pdf:/Users/mhowison/Documents/Zotero/storage/CUNFN62M/Howison et al. - 2011 - The Mathematical Imagery Trainer from embodied in.pdf:application/pdf}
}
@inproceedings{chou_parallel_2011,
address = {New York, {NY}, {USA}},
series = {{SC} '11},
title = {{Parallel index and query for large scale data analysis}},
isbn = {978-1-4503-0771-0},
url = {http://doi.acm.org/10.1145/2063384.2063424},
doi = {10.1145/2063384.2063424},
abstract = {Modern scientific datasets present numerous data management and analysis challenges. State-of-the-art index and query technologies are critical for facilitating interactive exploration of large datasets, but numerous challenges remain in terms of designing a system for processing general scientific datasets. The system needs to be able to run on distributed multi-core platforms, efficiently utilize underlying {I/O} infrastructure, and scale to massive datasets.
We present {FastQuery}, a novel software framework that address these challenges. {FastQuery} utilizes a state-of-the-art index and query technology {(FastBit)} and is designed to process massive datasets on modern supercomputing platforms. We apply {FastQuery} to processing of a massive {50TB} dataset generated by a large scale accelerator modeling code. We demonstrate the scalability of the tool to 11,520 cores. Motivated by the scientific need to search for interesting particles in this dataset, we use our framework to reduce search time from hours to tens of seconds.},
booktitle = {{Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis}},
publisher = {{ACM}},
author = {Chou, Jerry and Howison, Mark and Austin, Brian and Wu, Kesheng and Qiang, Ji and Bethel, E Wes and Shoshani, Arie and Rübel, Oliver and {{Prabhat}} and Ryne, Rob D},
year = {2011},
keywords = {indexing systems, large scale data analysis},
pages = {30:1---30:11},
file = {Chou et al. - 2011 - Parallel index and query for large scale data anal.pdf:/Users/mhowison/Documents/Zotero/storage/8AQEN7GB/Chou et al. - 2011 - Parallel index and query for large scale data anal.pdf:application/pdf}
}
@article{childs_extreme_2010,
title = {{Extreme Scaling of Production Visualization Software on Diverse Architectures}},
volume = {30},
issn = {0272-1716},
url = {http://dx.doi.org/10.1109/MCG.2010.51},
abstract = {This article presents the results of experiments studying how the pure-parallelism paradigm scales to massive data sets, including 16,000 or more cores on trillion-cell meshes, the largest data sets published
to date in the visualization literature. The findings on scaling characteristics and bottlenecks contribute to understanding how pure parallelism will perform in the future.},
journal = {{IEEE} Computer Graphics and Applications},
author = {Childs, Hank and Pugmire, David and Ahern, Sean and Whitlock, Brad and Howison, Mark and {{Prabhat}} and Weber, Gunther H. and Bethel, E. Wes},
month = may,
year = {2010},
keywords = {computer graphics, Dawn, Denovo, graphics and multimedia, {I/O} performance, interprocess communication, many-core processing, petascale computing, pure parallelism, very large data sets, {VisIt}, visualization},
pages = {22--31},
file = {Childs et al. - 2010 - Extreme scaling of production visualization software on diverse architectures.pdf:/Users/mhowison/Documents/Zotero/storage/EJ4SQADR/Childs et al. - 2010 - Extreme scaling of production visualization software on diverse architectures.pdf:application/pdf}
}
@inproceedings{abrahamson_kinemathics:_2010,
address = {Denver, {CO}, {USA}},
title = {{Kinemathics: exploring kinesthetically induced mathematical learning}},
url = {http://edrl.berkeley.edu/content/kinemathics-exploring-kinesthetically-induced-mathematical-learning},
abstract = {Building on growing evidence that human reasoning simulates multi-modal dynamical imagery drawn from lived experience, we conjectured that some mathematical concepts are challenging because their images are difficult to simulate mentally and that this difficulty, in turn, is directly implicative of the difficulty to enact these concepts physically. For example, two linked equal-rate linear growths are easier to process than two linked different-rate linear growths, because miming the latter is by far more difficult to coordinate ambidextrously. To examine our conjecture, we built a computer-based servo-mechanical device, the Mathematical Imagery Trainer, designed to induce in students the physically challenging concept of proportionality and measured for reflective-learning gains. The full paper reports on a study with thirty 4th-grade students.},
booktitle = {{Paper presented at the annual meeting of the American Educational Research Association}},
author = {Abrahamson, Dor and Howison, Mark},
month = may,
year = {2010}
}
@inproceedings{uselton_parallel_2010,
address = {Atlanta, {GA}},
title = {{Parallel {I/O} performance: From events to ensembles}},
isbn = {978-1-4244-6442-5},
url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=5470424},
doi = {10.1109/IPDPS.2010.5470424},
abstract = {Parallel {I/O} is fast becoming a bottleneck to the research agendas of many users of extreme scale parallel computers. The principle cause of this is the concurrency explosion of high-end computation, coupled with the complexity of providing parallel file systems that perform reliably at such scales. More than just being a bottleneck, parallel {I/O} performance at scale is notoriously variable, being influenced by numerous factors inside and outside the application, thus making it extremely difficult to isolate cause and effect for performance events. In this paper, we propose a statistical approach to understanding {I/O} performance that moves from the analysis of performance events to the exploration of performance ensembles. Using this methodology, we examine two {I/O-intensive} scientific computations from cosmology and climate science, and demonstrate that our approach can identify application and middleware performance deficiencies - resulting in more than 4x run time improvement for both examined applications.},
booktitle = {{2010 {IEEE} International Symposium on Parallel \& Distributed Processing {(IPDPS)}}},
publisher = {{IEEE}},
author = {Uselton, Andrew and Howison, Mark and Wright, {N.J.} and Skinner, David and Keen, Noel and Shalf, John and Karavanic, {K.L.} and Oliker, Leonid},
year = {2010},
pages = {1--11},
file = {Uselton et al. - 2010 - Parallel IO performance From events to ensembles.pdf:/Users/mhowison/Documents/Zotero/storage/W7MGUCW6/Uselton et al. - 2010 - Parallel IO performance From events to ensembles.pdf:application/pdf}
}
@inproceedings{howison_tuning_2010,
title = {{Tuning {HDF5} for Lustre File Systems}},
url = {http://www.mcs.anl.gov/events/workshops/iasds10/howison_hdf5_lustre_iasds2010.pdf},
abstract = {{HDF5} is a cross-platform parallel {I/O} library that is used by a wide variety of {HPC} applications for the flexibility of its hierarchical object-database representation of scientific data. We describe our recent work to optimize the performance of the {HDF5} and {MPI-IO} libraries for the Lustre parallel file system. We selected three different {HPC} applications to represent the diverse range of {I/O} requirements, and measured their performance on three different systems to demonstrate the robustness of our optimizations across different file system configurations and to validate our optimization strategy. We demonstrate that the combined optimizations improve {HDF5} parallel {I/O} performance by up to 33 times in some cases - running close to the achievable peak performance of the underlying file system - and demonstrate scalable performance up to 40,960-way concurrency.},
booktitle = {{Workshop on Interfaces and Abstractions for Scientific Data Storage {(IASDS10)}}},
author = {Howison, Mark and Koziol, Quincey and Knaak, David and Mainzer, John and Shalf, John},
year = {2010},
file = {Howison et al. - 2010 - Tuning HDF5 for Lustre File Systems.pdf:/Users/mhowison/Documents/Zotero/storage/GM3W8A57/Howison et al. - 2010 - Tuning HDF5 for Lustre File Systems.pdf:application/pdf}
}
@inproceedings{howison_mpi-hybrid_2010,
title = {{{MPI-hybrid} Parallelism for Volume Rendering on Large, Multi-core Systems}},
url = {http://escholarship.org/uc/item/4r0575d1.pdf},
abstract = {This work studies the performance and scalability characteristics of "hybrid" parallel programming and execution as applied to raycasting volume rendering - a staple visualization algorithm - on a large, multi-core platform. Historically, the Message Passing Interface {(MPI)} has become the de-facto standard for parallel programming and execution on modern parallel systems. As the computing industry trends towards multi-core processors, with four- and six-core chips common today and 128-core chips coming soon, we wish to better understand how algorithmic and parallel programming choices impact performance and scalability on large, distributed-memory multi-core systems. Our findings indicate that the hybrid-parallel implementation, at levels of concurrency ranging from 1,728 to 216,000, performs better, uses a smaller absolute memory footprint, and consumes less communication bandwidth than the traditional, {MPI-only} implementation.},
booktitle = {{Proceedings of the Eurographics Symposium on Parallel Graphics and Visualization}},
author = {Howison, Mark and Bethel, E. Wes and Childs, Hank},
year = {2010},
file = {Howison et al. - 2010 - MPI-hybrid Parallelism for Volume Rendering on Lar.pdf:/Users/mhowison/Documents/Zotero/storage/HKHC25CT/Howison et al. - 2010 - MPI-hybrid Parallelism for Volume Rendering on Lar.pdf:application/pdf}
}
@inproceedings{howison_h5hut:_2010,
title = {{H5hut: A {High-Performance} {I/O} Library for Particle-based Simulations}},
url = {http://www.mcs.anl.gov/events/workshops/iasds10/howison_h5hut_iasds2010.pdf},
abstract = {Particle-based simulations running on large high-performance computing systems over many time steps can generate an enormous amount of particle- and field- based data for post-processing and analysis. Achieving high-performance {I/O} for this data, effectively managing it on disk, and interfacing it with analysis and visualization tools can be challenging, especially for domain scientists who do not have {I/O} and data management expertise. We present the H5hut library, an implementation of several data models for particle-based simulations that encapsulates the complexity of {HDF5} and is simple to use, yet does not compromise performance.},
booktitle = {{Workshop on Interfaces and Abstractions for Scientific Data Storage {(IASDS10)}}},
author = {Howison, Mark and Adelmann, Andreas and Bethel, E. Wes and Gsell, Achim and Oswald, Benedikt and {{Prabhat}}},
year = {2010},
file = {Howison et al. - 2010 - H5hut A High-Performance IO Library for Particle.pdf:/Users/mhowison/Documents/Zotero/storage/X24FCF38/Howison et al. - 2010 - H5hut A High-Performance IO Library for Particle.pdf:application/pdf}
}
@techreport{howison_comparing_2010,
address = {Berkeley, {CA}, {USA}, 94720},
title = {{Comparing {GPU} Implementations of Bilateral and Anisotropic Diffusion Filters for {3D} Biomedical Datasets}},
url = {http://vis.lbl.gov/Publications/2010/LBNL-3425E.pdf},
abstract = {We compare the performance of hand-tuned {CUDA} implementations of bilateral and anisotropic diffusion filters for denoising {3D} {MRI} datasets. Our tests sweep comparable parameters for the two filters and measure total runtime, memory bandwidth, computational throughput, and mean squared errors relative to a noiseless reference dataset.},
number = {{LBNL-3425E}},
institution = {Lawrence Berkeley National Laboratory},
author = {Howison, Mark},
year = {2010},
file = {Howison - 2010 - Comparing GPU Implementations of Bilateral and Ani.pdf:/Users/mhowison/Documents/Zotero/storage/NTDH4BBA/Howison - 2010 - Comparing GPU Implementations of Bilateral and Ani.pdf:application/pdf}
}
@inproceedings{kamil_generalized_2009,
address = {Atlanta, {GA}},
title = {{A Generalized Framework for Auto-tuning Stencil Computations}},
url = {http://escholarship.org/uc/item/23p6g5nj},
abstract = {This work introduces a generalized framework for automatically tuning stencil computations to achieve superior performance on a broad range of multicore architectures. Stencil (nearest-neighbor) based kernels constitute the core of many important scientific applications involving block-structured grids. Auto-tuning systems search over optimization strategies to find the combination of tunable parameters that maximizes computational efficiency for a given algorithmic kernel. Although the auto-tuning strategy has been successfully applied to libraries, generalized stencil kernels are not amenable to packaging as libraries. Studied kernels in this work include both memory-bound kernels as well as a computation-bound bilateral filtering kernel. We introduce a generalized stencil auto-tuning framework that takes a straightforward Fortran expression of a stencil kernel and automatically generates tuned implementations of the kernel in C or Fortran to achieve performance portability across diverse computer architectures.},
booktitle = {{Cray Users Group Meeting {(CUG)}}},
author = {Kamil, Shoaib and Chan, Cy and Williams, Samuel and Oliker, Leonid and Shalf, John and Howison, Mark and Bethel, E. Wes and {{Prabhat}}},
year = {2009},
keywords = {auto-parallelization, auto-tuning, multicore, xt4},
file = {Kamil et al. - 2009 - A Generalized Framework for Auto-tuning Stencil Co.pdf:/Users/mhowison/Documents/Zotero/storage/2Q3CKXI6/Kamil et al. - 2009 - A Generalized Framework for Auto-tuning Stencil Co.pdf:application/pdf}
}
@article{howison_cad_2009,
title = {{{CAD} Tools for the Construction of {3D} Escher Tiles}},
volume = {6},
url = {http://www.cadanda.com/CADandA_6_6_737-748.html},
doi = {10.3722/cadaps.2009.737-748},
abstract = {We discuss data structures, geometric algorithms, and user interface issues that arise in the construction of {CAD} tools for the design of irregular shapes that tile all of 3-space in a regular, isohedral manner.},
number = {6},
journal = {{Computer-Aided} Design and Applications},
author = {Howison, Mark and Séquin, Carlo H.},
year = {2009},
keywords = {3d tile generator, incremental interactive delaunay, isohedral tilings},
pages = {737--748},
file = {Howison and Séquin - 2009 - CAD Tools for the Construction of 3D Escher Tiles.pdf:/Users/mhowison/Documents/Zotero/storage/U6A8DCUQ/Howison and Séquin - 2009 - CAD Tools for the Construction of 3D Escher Tiles.pdf:application/pdf}
}
@phdthesis{howison_cad_2009-1,
type = {Master's thesis},
title = {{{CAD} Tools for Creating Space-filling {3D} Escher Tiles}},
url = {http://www.eecs.berkeley.edu/Pubs/TechRpts/2009/EECS-2009-56.pdf},
abstract = {We discuss the design and implementation of {CAD} tools for creating decorative solids that tile 3-space in a regular, isohedral manner. Isohedral tilings of the plane, as popularized by M. C. Escher, can be constructed by hand or using existing tools on the web. Specialized {CAD} tools have also been developed for tiling other 2-manifolds. This work addresses the question: How can we generate interesting tilings of 3-space? To generate boundary representations of {3D} tiles, we have implemented an interactive constrained Delaunay triangulation algorithm. In addition, we have designed a specialized meshcutting algorithm used in layering extruded {2D} tiles to create intricate space-filling designs. We describe visual debugging methods used during the implementation of these two geometric algorithms, and also explain user-interface decisions we made in designing the {CAD} tools. Finally, we show examples of {3D} tilings that are derived from extruded {2D} shapes and from {3D} cubic lattices.},
school = {University of California at Berkeley},
author = {Howison, Mark},
year = {2009},
file = {Howison - 2009 - CAD Tools for Creating Space-filling 3D Escher Til.pdf:/Users/mhowison/Documents/Zotero/storage/4DEANMHR/Howison - 2009 - CAD Tools for Creating Space-filling 3D Escher Til.pdf:application/pdf}
}
@inproceedings{abrahamson_toward_2008,
address = {New York, {NY}, {USA}},
title = {{Toward a phenomenology of mathematical artifacts: A circumspective deconstruction of a design for the binomial}},
url = {http://edrl.berkeley.edu/content/toward-phenomenology-mathematical-artifacts-circumspective-deconstruction-design-binomial},
abstract = {We demonstrate the potential of an innovative design-based research methodology - manipulation of normative features of didactic routines - to illuminate and support the integration of constructivist and socio-cultural theoretical models pertaining to the roles of epistemic and material resources at play in students’ mediated development of mathematical concepts. We present three cases of individual middle-school students who participated in a study of probabilistic cognition, where the design was for students to coordinate their intuition for the outcome of an experiment with a stochastic device with its sample space that they build through combinatorial analysis. The activity sequencing was non-normative in that only having completed the combinatorial analysis did students recognize its pertinence to their grounding intuition, and that recognition is the focus of our analysis. By uncovering students’ heuristics as well as the pragmatics of the student–interviewer dyad as they communicate over properties of the learning materials and negotiate activities embedded in the design, we implicate a major challenge of struggling students to be their underdeveloped epistemic disposition - they do not expect to be able to “solve the world” and do not skillfully use mathematical representations let alone trust them to bear on their inquiry. To grow into {STEM} content, these students require early nurturing into problem-solving socio–cognitive norms.},
booktitle = {{Paper presented at the annual conference of the American Education Research Association}},
author = {Abrahamson, Dor and Bryant, Michael J. and Howison, Mark and {Relaford-Doyle}, Josephine J.},
month = mar,
year = {2008}
}
This file was generated by bibtex2html 1.97.