author = {Nielsen, Jonas B and Fritsche, Lars and Zhou, Wei and Teslovich, Tanya M and Holmen, Oddgeir L and Gustafsson, Stefan and Elvestad, Maiken B and Schmidt, Ellen M and Beaumont, Robin Beaumont and Wolford, Brooke N and Lin, Maoxuan and Brummett, Chad M and Preuss, Michael H and Refsgaard, Lena and Bottinger, Erwin P and Graham, Sarah E and Surakka, Ida and Chu, Yunhan and Skogholt, Anne Heidi and Dalen, H{\aa}vard and Boyle, Alan P and Oral, Hakan and Herron, Todd J and Kitzman, Jacob and Jalife, Jos{\accute{e}} and Svendsen, Jesper H and Olesen, Morten S and Nj{\o}lstad, Inger and L{\o}chen, Maja-Lisa and Baras, Aris and Gottesman, Omri and Marcketta, Anthony and O’Dushlaine, Colm and Ritchie, Marylyn D and Wilsgaard, Tom and Looos, Ruth JF and Frayling, Timothy M and Boehnke, Michael and Ingelsson, Erik and Carey, David J and Dewey, Frederick E and Kang, Hyun M and Abecasis, Gonçalo R and Hveem, Kristian and Willer, Cristen J},
  title = {{Genome-wide association analysis of atrial fibrillation identifies two new risk loci and highlights biological pathways and regulatory elements involved in cardiac development}},
  year = {2017},
  journal = {American Journal of Human Genetics},
  note = {Accepted.}
  author = {Diehl, Adam G and Boyle, Alan P},
  title = {Transcription factor co-binding patterns drive conserved regulatory outcomes},
  year = {2017},
  doi = {10.1101/189571},
  publisher = {Cold Spring Harbor Laboratory},
  abstract = {The mouse has been widely used as a model system in which to study human genetic mechanisms. However, part of the difficulty in translating findings from mouse is that, despite high levels of gene conservation, regulatory control networks between human and mouse have been extensively rewired. To understand common themes of regulatory control we look beyond physical sharing of regulatory sequence, where extensive turnover of individual transcription factor binding sites complicates cross-species prediction of specific functions, and instead look at conserved properties of the regulatory code itself. We define regulatory conservation in terms of a grammar with shared, species-specific, and tissue-specific segments, and show that this grammar is more predictive of shared chromatin states and gene expression profiles than shared occupancy alone. Furthermore, we demonstrate a marked enrichment of disease associated variation in conserved grammatical patterns. These findings offer new understanding of transcriptional regulatory mechanisms shared between human and mouse.},
  url = {},
  eprint = {},
  journal = {bioRxiv}
  author = {Spadafore, Maxwell and Najarian, Kayvan and Boyle, Alan P},
  title = {{A proximity-based graph clustering method for the identification and application of transcription factor clusters.}},
  journal = {BMC Bioinformatics},
  year = {2017},
  volume = {18},
  number = {1},
  pages = {530},
  month = nov,
  doi = {10.1186/s12859-017-1935-y},
  abstract = {BACKGROUND:Transcription factors (TFs) form a complex regulatory network within the cell that is crucial to cell functioning and human health. While methods to establish where a TF binds to DNA are well established, these methods provide no information describing how TFs interact with one another when they do bind. TFs tend to bind the genome in clusters, and current methods to identify these clusters are either limited in scope, unable to detect relationships beyond motif similarity, or not applied to TF-TF interactions. METHODS:Here, we present a proximity-based graph clustering approach to identify TF clusters using either ChIP-seq or motif search data. We use TF co-occurrence to construct a filtered, normalized adjacency matrix and use the Markov Clustering Algorithm to partition the graph while maintaining TF-cluster and cluster-cluster interactions. We then apply our graph structure beyond clustering, using it to increase the accuracy of motif-based TFBS searching for an example TF. RESULTS:We show that our method produces small, manageable clusters that encapsulate many known, experimentally validated transcription factor interactions and that our method is capable of capturing interactions that motif similarity methods might miss. Our graph structure is able to significantly increase the accuracy of motif TFBS searching, demonstrating that the TF-TF connections within the graph correlate with biological TF-TF interactions. CONCLUSION:The interactions identified by our method correspond to biological reality and allow for fast exploration of TF clustering and regulatory dynamics.},
  url = {},
  pdf = {},
  note = {{PMID:} 29187152}
  author = {*Yang, Bo and *Zhou, Wei and *Jiao, Jiao and Nielsen, Jonas B and Mathis, Michael R and Heydarpour, Mahyar and Lettre, Guillaume and Folkersen, Lasse and Prakash, Siddharth and Schurmann, Claudia and Fritsche, Lars and Farnum, Gregory A and Lin, Maoxuan and Othman, Mohammad and Hornsby, Whitney and Driscoll, Anisa and Levasseur, Alexandra and Thomas, Marc and Farhat, Linda and Dub{\'e}, Marie-Pierre and Isselbacher, Eric M and Franco-Cereceda, Anders and Guo, Dong-chuan and Bottinger, Erwin P and Deeb, G Michael and Booher, Anna and Kheterpal, Sachin and Chen, Y Eugene and Kang, Hyun Min and Kitzman, Jacob and Cordell, Heather J and Keavney, Bernard D and Goodship, Judith A and Ganesh, Santhi K and Abecasis, Gon{\c c}alo and Eagle, Kim A and Boyle, Alan P and Loos, Ruth J F and {\dag}Eriksson, Per and {\dag}Tardif, Jean-Claude and {\dag}Brummett, Chad M and {\dag}Milewicz, Dianna M and {\dag}Body, Simon C and {\dag}Willer, Cristen J},
  title = {{Protein-altering and regulatory genetic variants near GATA4 implicated in bicuspid aortic valve.}},
  journal = {Nature communications},
  year = {2017},
  volume = {8},
  pages = {15481},
  month = may,
  doi = {10.1038/ncomms15481},
  abstract = {Bicuspid aortic valve (BAV) is a heritable congenital heart defect and an important risk factor for valvulopathy and aortopathy. Here we report a genome-wide association scan of 466 BAV cases and 4,660 age, sex and ethnicity-matched controls with replication in up to 1,326 cases and 8,103 controls. We identify association with a noncoding variant 151 kb from the gene encoding the cardiac-specific transcription factor, GATA4, and near-significance for p.Ser377Gly in GATA4. GATA4 was interrupted by CRISPR-Cas9 in induced pluripotent stem cells from healthy donors. The disruption of GATA4 significantly impaired the transition from endothelial cells into mesenchymal cells, a critical step in heart valve development.},
  url = {},
  pdf = {},
  note = {{PMID:} 28541271}
  author = {Nishizaki, Sierra S and Boyle, Alan P},
  title = {{Mining the Unknown: Assigning Function to Noncoding Single Nucleotide Polymorphisms.}},
  journal = {Trends in Genetics},
  year = {2017},
  volume = {33},
  number = {1},
  pages = {34--45},
  month = jan,
  doi = {10.1016/j.tig.2016.10.008},
  abstract = {One of the formative goals of genetics research is to understand how genetic variation leads to phenotypic differences and human disease. Genome-wide association studies (GWASs) bring us closer to this goal by linking variation with disease faster than ever before. Despite this, GWASs alone are unable to pinpoint disease-causing single nucleotide polymorphisms (SNPs). Noncoding SNPs, which represent the majority of GWAS SNPs, present a particular challenge. To address this challenge, an array of computational tools designed to prioritize and predict the function of noncoding GWAS SNPs have been developed. However, fewer than 40% of GWAS publications from 2015 utilized these tools. We discuss several leading methods for annotating noncoding variants and how they can be integrated into research pipelines in hopes that they will be broadly applied in future GWAS analyses.},
  url = {},
  pdf = {},
  note = {{PMID:} 27939749}
  author = {Diehl, Adam G and Boyle, Alan P},
  title = {{Deciphering ENCODE.}},
  journal = {Trends in Genetics},
  year = {2016},
  volume = {32},
  number = {4},
  pages = {238--249},
  month = mar,
  doi = {10.1016/j.tig.2016.02.002},
  abstract = {The ENCODE project represents a major leap from merely describing and comparing genomic sequences to surveying them for direct indicators of function. The astounding quantity of data produced by the ENCODE consortium can serve as a map to locate specific landmarks, guide hypothesis generation, and lead us to principles and mechanisms underlying genome biology. Despite its broad appeal, the size and complexity of the repository can be intimidating to prospective users. We present here some background about the ENCODE data, survey the resources available for accessing them, and describe a few simple principles to help prospective users choose the data type(s) that best suit their needs, where to get them, and how to use them to their best advantage.},
  url = {},
  pdf = {},
  note = {{PMID:} 26962025}
  author = {Phanstiel, Douglas H and Boyle, Alan P and Heidari, Nastaran and Snyder, Michael P},
  title = {{Mango: A bias correcting ChIA-PET analysis pipeline}},
  year = {2015},
  doi = {10.1093/bioinformatics/btv336},
  abstract = {Motivation: Chromatin Interaction Analysis by Paired-End Tag sequencing (ChIA-PET) is an established method for detecting genome-wide looping interactions at high resolution. Current ChIA-PET analysis software packages either fail to correct for non-specific interactions due to genomic proximity or only address a fraction of the steps required for data processing. We present Mango, a complete ChIA-PET data analysis pipeline that provides statistical confidence estimates for interactions and corrects for major sources of bias including differential peak enrichment and genomic proximity.Results: Comparison to the existing software packages, ChIA-PET Tool and ChiaSig revealed that Mango interactions exhibit much better agreement with high-resolution Hi-C data. Importantly, Mango executes all steps required for processing ChIA-PET datasets, whereas ChiaSig only completes 20% of the required steps. Application of Mango to multiple available ChIA-PET datasets permitted the independent rediscovery of known trends in chromatin loops including enrichment of CTCF, RAD21, SMC3 and ZNF143 at the anchor regions of interactions and strong bias for convergent CTCF motifs.Availability and implementation: Mango is open source and distributed through github at mpsnyder@standford.eduSupplementary information: Supplementary data are available at Bioinformatics online.},
  url = {},
  journal = {Bioinformatics},
  pdf = {},
  note = {{PMID:} 26034063}
  author = {*Cheng, Yong and *Ma, Zhihai and Kim, Bong-Hyun and Wu, Weisheng and Cayting, Philip and Boyle, Alan P and Sundaram, Vasavi and Xing, Xiaoyun and Dogan, Nergiz and Li, Jingjing and Euskirchen, Ghia and Lin, Shin and Lin, Yiing and Visel, Axel and Kawli, Trupti and Yang, Xinqiong and Patacsil, Dorrelyn and Keller, Cheryl A and Giardine, Belinda and {Mouse ENCODE Consortium} and Kundaje, Anshul and Wang, Ting and Pennacchio, Len A and Weng, Zhiping and {\dag}Hardison, Ross C and {\dag}Snyder, Michael P},
  title = {{Principles of regulatory information conservation between mouse and human.}},
  journal = {Nature},
  year = {2014},
  volume = {515},
  number = {7527},
  pages = {371--375},
  month = nov,
  doi = {10.1038/nature13985},
  abstract = {To broaden our understanding of the evolution of gene regulation mechanisms, we generated occupancy profiles for 34 orthologous transcription factors (TFs) in human-mouse erythroid progenitor, lymphoblast and embryonic stem-cell lines. By combining the genome-wide transcription factor occupancy repertoires, associated epigenetic signals, and co-association patterns, here we deduce several evolutionary principles of gene regulatory features operating since the mouse and human lineages diverged. The genomic distribution profiles, primary binding motifs, chromatin states, and DNA methylation preferences are well conserved for TF-occupied sequences. However, the extent to which orthologous DNA segments are bound by orthologous TFs varies both among TFs and with genomic location: binding at promoters is more highly conserved than binding at distal elements. Notably, occupancy-conserved TF-occupied sequences tend to be pleiotropic; they function in several tissues and also co-associate with many TFs. Single nucleotide variants at sites with potential regulatory functions are enriched in occupancy-conserved TF-occupied sequences.},
  url = {},
  pdf = {},
  note = {{PMID:} 25409826}
  author = {*Yue, Feng and *Cheng, Yong and *Breschi, Alessandra and *Vierstra, Jeff and *Wu, Weisheng and *Ryba, Tyrone and *Sandstrom, Richard and *Ma, Zhihai and *Davis, Carrie and *Pope, Benjamin D and *Shen, Yin and Pervouchine, Dmitri D and Djebali, Sarah and Thurman, Robert E and Kaul, Rajinder and Rynes, Eric and Kirilusha, Anthony and Marinov, Georgi K and Williams, Brian A and Trout, Diane and Amrhein, Henry and Fisher-Aylor, Katherine and Antoshechkin, Igor and DeSalvo, Gilberto and See, Lei-Hoon and Fastuca, Meagan and Drenkow, Jorg and Zaleski, Chris and Dobin, Alex and Prieto, Pablo and Lagarde, Julien and Bussotti, Giovanni and Tanzer, Andrea and Denas, Olgert and Li, Kanwei and Bender, M A and Zhang, Miaohua and Byron, Rachel and Groudine, Mark T and McCleary, David and Pham, Long and Ye, Zhen and Kuan, Samantha and Edsall, Lee and Wu, Yi-Chieh and Rasmussen, Matthew D and Bansal, Mukul S and Kellis, Manolis and Keller, Cheryl A and Morrissey, Christapher S and Mishra, Tejaswini and Jain, Deepti and Dogan, Nergiz and Harris, Robert S and Cayting, Philip and Kawli, Trupti and Boyle, Alan P and Euskirchen, Ghia and Kundaje, Anshul and Lin, Shin and Lin, Yiing and Jansen, Camden and Malladi, Venkat S and Cline, Melissa S and Erickson, Drew T and Kirkup, Vanessa M and Learned, Katrina and Sloan, Cricket A and Rosenbloom, Kate R and Lacerda de Sousa, Beatriz and Beal, Kathryn and Pignatelli, Miguel and Flicek, Paul and Lian, Jin and Kahveci, Tamer and Lee, Dongwon and Kent, W James and Ramalho Santos, Miguel and Herrero, Javier and Notredame, Cedric and Johnson, Audra and Vong, Shinny and Lee, Kristen and Bates, Daniel and Neri, Fidencio and Diegel, Morgan and Canfield, Theresa and Sabo, Peter J and Wilken, Matthew S and Reh, Thomas A and Giste, Erika and Shafer, Anthony and Kutyavin, Tanya and Haugen, Eric and Dunn, Douglas and Reynolds, Alex P and Neph, Shane and Humbert, Richard and Hansen, R Scott and De Bruijn, Marella and Selleri, Licia and Rudensky, Alexander and Josefowicz, Steven and Samstein, Robert and Eichler, Evan E and Orkin, Stuart H and Levasseur, Dana and Papayannopoulou, Thalia and Chang, Kai-Hsin and Skoultchi, Arthur and Gosh, Srikanta and Disteche, Christine and Treuting, Piper and Wang, Yanli and Weiss, Mitchell J and Blobel, Gerd A and Cao, Xiaoyi and Zhong, Sheng and Wang, Ting and Good, Peter J. and Lowdon, Rebecca F. and Adams, Leslie B and Zhou, Xiao-Qiao and Pazin, Michael J and Feingold, Elise A. and Wold, Barbara and Taylor, James and Mortazavi, Ali and Weissman, Sherman M and Stamatoyannopoulos, John A and Snyder, Michael P and Guigo, Roderic and Gingeras, Thomas R. and Gilbert, David M and Hardison, Ross C and Beer, Michael A and Ren, Bing and {Mouse ENCODE Consortium}},
  title = {{A comparative encyclopedia of DNA elements in the mouse genome.}},
  journal = {Nature},
  year = {2014},
  volume = {515},
  number = {7527},
  pages = {355--364},
  month = nov,
  doi = {10.1038/nature13992},
  abstract = {The laboratory mouse shares the majority of its protein-coding genes with humans, making it the premier model organism in biomedical research, yet the two mammals differ in significant ways. To gain greater insights into both shared and species-specific transcriptional and cellular regulatory programs in the mouse, the Mouse ENCODE Consortium has mapped transcription, DNase I hypersensitivity, transcription factor binding, chromatin modifications and replication domains throughout the mouse genome in diverse cell and tissue types. By comparing with the human genome, we not only confirm substantial conservation in the newly annotated potential functional sequences, but also find a large degree of divergence of sequences involved in transcriptional regulation, chromatin state and higher order chromatin organization. Our results illuminate the wide range of evolutionary forces acting on genes and their regulatory regions, and provide a general resource for research into mammalian biology and mechanisms of human diseases.},
  url = {},
  pdf = {},
  note = {{PMID:} 25409824}
  author = {*Boyle, Alan P and *Araya, Carlos L and Brdlik, Cathleen and Cayting, Philip and Cheng, Chao and Cheng, Yong and Gardner, Kathryn and Hillier, LaDeana W and Janette, Judith and Jiang, Lixia and Kasper, Dionna and Kawli, Trupti and Kheradpour, Pouya and Kundaje, Anshul and Li, Jingyi Jessica and Ma, Lijia and Niu, Wei and Rehm, E Jay and Rozowsky, Joel and Slattery, Matthew and Spokony, Rebecca and Terrell, Robert and Vafeados, Dionne and Wang, Daifeng and Weisdepp, Peter and Wu, Yi-Chieh and Xie, Dan and Yan, Koon-Kiu and Feingold, Elise A. and Good, Peter J. and Pazin, Michael J and Huang, Haiyan and Bickel, Peter J and Brenner, Steven E. and Reinke, Valerie and Waterston, Robert H and Gerstein, Mark and {\dag}White, Kevin P and {\dag}Kellis, Manolis and {\dag}Snyder, Michael},
  title = {{Comparative analysis of regulatory information and circuits across distant species.}},
  journal = {Nature},
  year = {2014},
  volume = {512},
  number = {7515},
  pages = {453--456},
  month = aug,
  doi = {10.1038/nature13668},
  abstract = {Despite the large evolutionary distances between metazoan species, they can show remarkable commonalities in their biology, and this has helped to establish fly and worm as model organisms for human biology. Although studies of individual elements and factors have explored similarities in gene regulation, a large-scale comparative analysis of basic principles of transcriptional regulatory features is lacking. Here we map the genome-wide binding locations of 165 human, 93 worm and 52 fly transcription regulatory factors, generating a total of 1,019 data sets from diverse cell types, developmental stages, or conditions in the three species, of which 498 (48.9%) are presented here for the first time. We find that structural properties of regulatory networks are remarkably conserved and that orthologous regulatory factor families recognize similar binding motifs in vivo and show some similar co-associations. Our results suggest that gene-regulatory properties previously observed for individual factors are general principles of metazoan regulation that are remarkably well-preserved despite extensive functional divergence of individual network connections. The comparative maps of regulatory circuitry provided here will drive an improved understanding of the regulatory underpinnings of model organism biology and how these relate to human biology, development and disease.},
  url = {},
  pdf = {},
  note = {{PMID:} 25164757}
  author = {Araya, Carlos L and Kawli, Trupti and Kundaje, Anshul and Jiang, Lixia and Wu, Beijing and Vafeados, Dionne and Terrell, Robert and Weissdepp, Peter and Gevirtzman, Louis and Mace, Daniel and Niu, Wei and Boyle, Alan P and Xie, Dan and Ma, Lijia and Murray, John I. and Reinke, Valerie and Waterston, Robert H and Snyder, Michael},
  title = {{Regulatory analysis of the C. elegans genome with spatiotemporal resolution.}},
  journal = {Nature},
  year = {2014},
  volume = {512},
  number = {7515},
  pages = {400--405},
  month = aug,
  doi = {10.1038/nature13497},
  abstract = {Discovering the structure and dynamics of transcriptional regulatory events in the genome with cellular and temporal resolution is crucial to understanding the regulatory underpinnings of development and disease. We determined the genomic distribution of binding sites for 92 transcription factors and regulatory proteins across multiple stages of Caenorhabditis elegans development by performing 241 ChIP-seq (chromatin immunoprecipitation followed by sequencing) experiments. Integration of regulatory binding and cellular-resolution expression data produced a spatiotemporally resolved metazoan transcription factor binding map. Using this map, we explore developmental regulatory circuits that encode combinatorial logic at the levels of co-binding and co-expression of transcription factors, characterizing the genomic coverage and clustering of regulatory binding, the binding preferences of, and biological processes regulated by, transcription factors, the global transcription factor co-associations and genomic subdomains that suggest shared patterns of regulation, and identifying key transcription factors and transcription factor co-associations for fate specification of individual lineages and cell types.},
  url = {},
  pdf = {},
  note = {{PMID:} 25164749}
  author = {Phanstiel, Douglas H and Boyle, Alan P and Araya, Carlos L and Snyder, Michael P},
  title = {{Sushi.R: flexible, quantitative and integrative genomic visualizations for publication-quality multi-panel figures.}},
  journal = {Bioinformatics},
  year = {2014},
  month = jun,
  doi = {10.1093/bioinformatics/btu379},
  abstract = {Motivation: Interpretation and communication of genomic data require flexible and quantitative tools to analyze and visualize diverse data types, and yet, a comprehensive tool to display all common genomic data types in publication quality figures does not exist to date. To address this shortcoming, we present Sushi.R, an R/Bioconductor package that allows flexible integration of genomic visualizations into highly customizable, publication-ready, multi-panel figures from common genomic data formats including Browser Extensible Data (BED), bedGraph and Browser Extensible Data Paired-End (BEDPE). Sushi.R is open source and made publicly available through GitHub ( and Bioconductor (},
  url = {},
  pdf = {},
  note = {{PMID:} 24903420}
  title = {Dynamic trans-acting factor colocalization in human cells},
  author = {*Xie, Dan and *Boyle, Alan P and *Wu, Linfeng and Kawli, Trupti and Zhai, Jie and Snyder, Michael},
  journal = {Cell},
  pages = {713--724},
  volume = {155},
  number = {3},
  year = {2013},
  month = oct,
  url = {},
  pdf = {},
  doi = {10.1016/j.cell.2013.09.043},
  abstract = {Different trans-acting factors (TFs) collaborate and act in concert at distinct loci to perform accurate regulation of their target genes. To date, the cobinding of TF pairs has been investigated in a limited context both in terms of the number of factors within a cell type and across cell types and the extent of combinatorial colocalizations. Here, we use an approach to analyze TF colocalization within a cell type and across multiple cell lines at an unprecedented level. We extend this approach with large-scale mass spectrometry analysis of immunoprecipitations of 50 TFs. Our combined approach reveals large numbers of interesting TF-TF associations. We observe extensive change in TF colocalizations both within a cell type exposed to different conditions and across multiple cell types. We show distinct functional annotations and properties of different TF cobinding patterns and provide insights into the complex regulatory landscape of the cell.},
  note = {{PMID:} 24243024}
  title = {Extensive variation in chromatin states across humans},
  author = {*Kasowski, Maya and *Kyriazopoulou-Panagiotopoulou, Sofia and *Grubert, Fabian and *Zaugg, Judith B and *Kundaje, Anshul and Liu, Yuling and Boyle, Alan P and Zhang, Qiangfeng Cliff and Zakharia, Fouad and Spacek, Damek V and Li, Jingjing and Xie, Dan and Steinmetz, Lars M and Hogenesch, John B and Kellis, Manolis and Batzoglou, Serafim and Snyder, Michael},
  journal = {Science},
  year = {2013},
  month = {Nov},
  volume = {342},
  number = {6159},
  url = {},
  pdf = {},
  doi = {10.1126/science.1242510},
  pages = {750--752},
  note = {{PMID:} 24136358}
  title = {Annotation of functional variation in personal genomes using {RegulomeDB}},
  volume = {22},
  issn = {1549-5469},
  url = {},
  pdf = {},
  doi = {10.1101/gr.137323.112},
  abstract = {As the sequencing of healthy and disease genomes becomes more commonplace, detailed annotation provides interpretation for individual variation responsible for normal and disease phenotypes. Current approaches focus on direct changes in protein coding genes, particularly nonsynonymous mutations that directly affect the gene product. However, most individual variation occurs outside of genes and, indeed, most markers generated from genome-wide association studies {(GWAS)} identify variants outside of coding segments. Identification of potential regulatory changes that perturb these sites will lead to a better localization of truly functional variants and interpretation of their effects. We have developed a novel approach and database, {RegulomeDB}, which guides interpretation of regulatory variants in the human genome. {RegulomeDB} includes high-throughput, experimental data sets from {ENCODE} and other sources, as well as computational predictions and manual annotations to identify putative regulatory potential and identify functional variants. These data sources are combined into a powerful tool that scores variants to help separate functional variants from a large pool and provides a small set of putative sites with testable hypotheses as to their function. We demonstrate the applicability of this tool to the annotation of noncoding variants from 69 full sequenced genomes as well as that of a personal genome, where thousands of functionally associated variants were identified. Moreover, we demonstrate a {GWAS} where the database is able to quickly identify the known associated functional variant and provide a hypothesis as to its function. Overall, we expect this approach and resource to be valuable for the annotation of human genome sequences.},
  number = {9},
  journal = {Genome Research},
  author = {Boyle, Alan P and Hong, Eurie L and Hariharan, Manoj and Cheng, Yong and Schaub, Marc A and Kasowski, Maya and Karczewski, Konrad J and Park, Julie and Hitz, Benjamin C and Weng, Shuai and Cherry, J Michael and Snyder, Michael},
  month = sep,
  year = {2012},
  note = {{PMID:} 22955989},
  pages = {1790--1797}
  title = {Linking disease associations with regulatory information in the human genome},
  volume = {22},
  issn = {1549-5469},
  url = {},
  pdf = {},
  doi = {10.1101/gr.136127.111},
  abstract = {Genome-wide association studies have been successful in identifying single nucleotide polymorphisms {(SNPs)} associated with a large number of phenotypes. However, an associated {SNP} is likely part of a larger region of linkage disequilibrium. This makes it difficult to precisely identify the {SNPs} that have a biological link with the phenotype. We have systematically investigated the association of multiple types of {ENCODE} data with disease-associated {SNPs} and show that there is significant enrichment for functional {SNPs} among the currently identified associations. This enrichment is strongest when integrating multiple sources of functional information and when highest confidence disease-associated {SNPs} are used. We propose an approach that integrates multiple types of functional data generated by the {ENCODE} Consortium to help identify "functional {SNPs"} that may be associated with the disease phenotype. Our approach generates putative functional annotations for up to 80\% of all previously reported associations. We show that for most associations, the functional {SNP} most strongly supported by experimental evidence is a {SNP} in linkage disequilibrium with the reported association rather than the reported {SNP} itself. Our results show that the experimental data sets generated by the {ENCODE} Consortium can be successfully used to suggest functional hypotheses for variants associated with diseases and other phenotypes.},
  number = {9},
  journal = {Genome Research},
  author = {Schaub, Marc A and Boyle, Alan P and Kundaje, Anshul and {\dag}Batzoglou, Serafim and {\dag}Snyder, Michael},
  month = sep,
  year = {2012},
  note = {{PMID:} 22955986},
  pages = {1748--1759}
  title = {An integrated encyclopedia of {DNA} elements in the human genome},
  volume = {489},
  issn = {1476-4687},
  url = {},
  pdf = {},
  doi = {10.1038/nature11247},
  abstract = {The human genome encodes the blueprint of life, but the function of the vast majority of its nearly three billion bases is unknown. The Encyclopedia of {DNA} Elements {(ENCODE)} project has systematically mapped regions of transcription, transcription factor association, chromatin structure and histone modification. These data enabled us to assign biochemical functions for 80\% of the genome, in particular outside of the well-studied protein-coding regions. Many discovered candidate regulatory elements are physically associated with one another and with expressed genes, providing new insights into the mechanisms of gene regulation. The newly identified elements also show a statistical correspondence to sequence variants linked to human disease, and can thereby guide interpretation of this variation. Overall, the project provides new insights into the organization and regulation of our genes and genome, and is an expansive resource of functional annotations for biomedical research.},
  number = {7414},
  journal = {Nature},
  author = {{The {ENCODE} Project Consortium}},
  month = sep,
  year = {2012},
  note = {{PMID:} 22955616},
  pages = {57--74}
  title = {Architecture of the human regulatory network derived from {ENCODE} data},
  volume = {489},
  issn = {1476-4687},
  url = {},
  pdf = {},
  doi = {10.1038/nature11245},
  abstract = {Transcription factors bind in a combinatorial fashion to specify the on-and-off states of genes; the ensemble of these binding events forms a regulatory network, constituting the wiring diagram for a cell. To examine the principles of the human transcriptional regulatory network, we determined the genomic binding information of 119 transcription-related factors in over 450 distinct experiments. We found the combinatorial, co-association of transcription factors to be highly context specific: distinct combinations of factors bind at specific genomic locations. In particular, there are significant differences in the binding proximal and distal to genes. We organized all the transcription factor binding into a hierarchy and integrated it with other genomic information (for example, {microRNA} regulation), forming a dense meta-network. Factors at different levels have different properties; for instance, top-level transcription factors more strongly influence expression and middle-level ones co-regulate targets to mitigate information-flow bottlenecks. Moreover, these co-regulations give rise to many enriched network motifs (for example, noise-buffering feed-forward loops). Finally, more connected network components are under stronger selection and exhibit a greater degree of allele-specific activity (that is, differential binding to the two parental alleles). The regulatory information obtained in this study will be crucial for interpreting personal genome sequences and understanding basic principles of human biology and disease.},
  number = {7414},
  journal = {Nature},
  author = {*Gerstein, Mark B and *Kundaje, Anshul and *Hariharan, Manoj and *Landt, Stephen G and *Yan, Koon-Kiu and *Cheng, Chao and *Mu, Xinmeng Jasmine and *Khurana, Ekta and *Rozowsky, Joel and *Alexander, Roger and *Min, Renqiang and *Alves, Pedro and Abyzov, Alexej and Addleman, Nick and Bhardwaj, Nitin and Boyle, Alan P and Cayting, Philip and Charos, Alexandra and Chen, David Z and Cheng, Yong and Clarke, Declan and Eastman, Catharine and Euskirchen, Ghia and Frietze, Seth and Fu, Yao and Gertz, Jason and Grubert, Fabian and Harmanci, Arif and Jain, Preti and Kasowski, Maya and Lacroute, Phil and Leng, Jing and Lian, Jin and Monahan, Hannah and {O'Geen}, Henriette and Ouyang, Zhengqing and Partridge, E Christopher and Patacsil, Dorrelyn and Pauli, Florencia and Raha, Debasish and Ramirez, Lucia and Reddy, Timothy E and Reed, Brian and Shi, Minyi and Slifer, Teri and Wang, Jing and Wu, Linfeng and Yang, Xinqiong and Yip, Kevin Y and Zilberman-Schapira, Gili and Batzoglou, Serafim and Sidow, Arend and Farnham, Peggy J and Myers, Richard M and Weissman, Sherman M and Snyder, Michael},
  month = sep,
  year = {2012},
  note = {{PMID:} 22955619},
  pages = {91--100}
  title = {Personal omics profiling reveals dynamic molecular and medical phenotypes},
  volume = {148},
  issn = {1097-4172},
  url = {},
  pdf = {},
  doi = {10.1016/j.cell.2012.02.009},
  abstract = {Personalized medicine is expected to benefit from combining genomic information with regular monitoring of physiological states by multiple high-throughput methods. Here, we present an integrative personal omics profile {(iPOP)}, an analysis that combines genomic, transcriptomic, proteomic, metabolomic, and autoantibody profiles from a single individual over a 14 month period. Our {iPOP} analysis revealed various medical risks, including type 2 diabetes. It also uncovered extensive, dynamic changes in diverse molecular components and biological pathways across healthy and diseased conditions. Extremely high-coverage genomic and transcriptomic data, which provide the basis of our {iPOP}, revealed extensive heteroallelic changes during healthy and diseased states and an unexpected {RNA} editing mechanism. This study demonstrates that longitudinal {iPOP} can be used to interpret healthy and diseased states by connecting genomic information with additional dynamic omics activity.},
  number = {6},
  journal = {Cell},
  author = {*Chen, Rui and *Mias, George I and *{Li-Pook-Than}, Jennifer and *Jiang, Lihua and Lam, Hugo Y K and Chen, Rong and Miriami, Elana and Karczewski, Konrad J and Hariharan, Manoj and Dewey, Frederick E and Cheng, Yong and Clark, Michael J and Im, Hogune and Habegger, Lukas and Balasubramanian, Suganthi and {O'Huallachain}, Maeve and Dudley, Joel T and Hillenmeyer, Sara and Haraksingh, Rajini and Sharon, Donald and Euskirchen, Ghia and Lacroute, Phil and Bettinger, Keith and Boyle, Alan P and Kasowski, Maya and Grubert, Fabian and Seki, Scott and Garcia, Marco and {Whirl-Carrillo}, Michelle and Gallardo, Mercedes and Blasco, Maria A and Greenberg, Peter L and Snyder, Phyllis and Klein, Teri E and Altman, Russ B and Butte, Atul J and Ashley, Euan A and Gerstein, Mark and Nadeau, Kari C and Tang, Hua and Snyder, Michael},
  month = mar,
  year = {2012},
  note = {{PMID:} 22424236},
  pages = {1293--1307}
  title = {Open chromatin defined by {DNaseI} and {FAIRE} identifies regulatory elements that shape cell-type identity},
  issn = {1549-5469},
  volume = {21},
  url = {},
  pdf = {},
  doi = {10.1101/gr.121541.111},
  abstract = {The human body contains thousands of unique cell types, each with specialized functions. Cell identity is governed in large part by gene transcription programs, which are determined by regulatory elements encoded in {DNA.} To identify regulatory elements active in seven cell lines representative of diverse human cell types, we used {DNase-seq} and {FAIRE-seq} to map "open chromatin." Over 870,000 {DNaseI} or {FAIRE} sites, which correspond tightly to nucleosome depleted regions, were identified across the seven cell lines, covering nearly 9\% of the genome. The combination of {DNaseI} and {FAIRE} is more effective than either assay alone in identifying likely regulatory elements, as judged by coincidence with transcription factor binding locations determined in the same cells. Open chromatin common to all seven cell types tended to be at or near transcription start sites and to be coincident with {CTCF} binding sites, while open chromatin sites found in only one cell type were typically located away from transcription start sites, and contained {DNA} motifs recognized by regulators of cell-type identity. We show that open chromatin regions bound by {CTCF} are potent insulators. We identified clusters of open regulatory elements {(COREs)} that were physically near each other and whose appearance was coordinated among one or more cell types. Gene expression and {RNA} Pol {II} binding data support the hypothesis that {COREs} control gene activity required for the maintenance of cell-type identity. This publicly available atlas of regulatory elements may prove valuable in identifying non-coding {DNA} sequence variants that are causally linked to human disease.},
  journal = {Genome Research},
  author = {*Song, Lingyun and *Zhang, Zhancheng and *Grasfeder, Linda L and *Boyle, Alan P and *Giresi, Paul G and *Lee, {Bum-Kyu} and *Sheffield, Nathan C and Graff, Stefan and Huss, Mikael and Keefe, Damian and Liu, Zheng and London, Darin and {McDaniell}, Ryan M and Shibata, Yoichiro and Showers, Kimberly A and Simon, Jeremy M and Vales, Teresa and Wang, Tianyuan and Winter, Deborah and Zhang, Zhuzhu and Clarke, Neil D and {\dag}Birney, Ewan and {\dag}Iyer, Vishy R and {\dag}Crawford, Gregory E and {\dag}Lieb, Jason D and {\dag}Furey, Terrence S},
  month = jul,
  year = {2011},
  number = {10},
  note = {{PMID:} 21750106},
  pages = {1757--1767}
  title = {A User's Guide to the Encyclopedia of {DNA} Elements {(ENCODE)}},
  volume = {9},
  url = {},
  pdf = {},
  doi = {10.1371/journal.pbio.1001046},
  abstract = {The Encyclopedia of {DNA} Elements {(ENCODE)} Project was created to enable the scientific and medical communities to interpret the human genome sequence and to use it to understand human biology and improve health. The {ENCODE} Consortium, a 
large group of scientists from around the world, uses a variety of experimental methods to identify and describe the regions of the 3 billion base-pair human genome that are important for function. Using experimental, computational, and statistical analyses, we 
aimed to discover and describe genes, transcripts, and transcriptional regulatory regions, as well as {DNA} binding proteins that interact with regulatory regions in the genome, including transcription factors, different versions of histones and other markers, 
and {DNA} methylation patterns that define states of the genome in various cell types. The {ENCODE} Project has developed standards for each experiment type to ensure high-quality, reproducible data and novel algorithms to facilitate analysis. All data and 
derived results are made available through a freely accessible database. This article provides an overview of the complete project and the resources it is generating, as well as examples to illustrate the application of {ENCODE} data as a user's guide to 
facilitate the interpretation of the human genome.},
  number = {4},
  journal = {{PLoS} Biology},
  author = {{The {ENCODE} Project Consortium}},
  month = apr,
  year = {2011},
  pages = {e1001046},
  note = {{PMID:} 21526222}
  title = {High-resolution genome-wide in vivo footprinting of diverse transcription factors in human cells},
  volume = {21},
  issn = {1549-5469},
  url = {},
  pdf = {},
  doi = {10.1101/gr.112656.110},
  abstract = {Regulation of gene transcription in diverse cell 
types is largely determined by varied sets of cis-elements where 
transcription factors bind. Here we demonstrate that data from a single 
high-throughput {DNaseI} hypersensitivity assay can delineate hundreds 
of thousands of base-pair resolution in vivo footprints in human cells 
that precisely mark individual transcription {factor-DNA} interactions. 
These annotations provide a unique resource for the investigation of 
cis-regulatory elements. We find that footprints for specific 
transcription factors correlate with {ChIP-seq} enrichment and can 
accurately identify functional vs. non-functional transcription factor 
motifs. We also find that footprints reveal a unique evolutionary 
conservation pattern that differentiates functional footprinted bases 
from surrounding {DNA.} Finally, detailed analysis of {CTCF} footprints 
suggests multiple modes of binding and a novel {DNA} binding motif 
upstream of the primary binding site.},
  journal = {Genome Research},
  author = {Alan P Boyle and Lingyun Song and {Bum-Kyu} Lee and 
Darin London and Damian Keefe and Ewan Birney and Vishwanath R Iyer and 
Gregory E {\dag}Crawford and Terrence S {\dag}Furey},
  month = mar,
  year = {2011},
  note = {{PMID:} 21106903},
  pages = {456--464}
  title = {Global epigenomic analysis of primary human pancreatic 
islets provides insights into type 2 diabetes susceptibility loci},
  volume = {12},
  issn = {1932-7420},
  url = {},
  pdf = {},
  doi = {10.1016/j.cmet.2010.09.012},
  abstract = {Identifying cis-regulatory elements is important to 
understanding how human pancreatic islets modulate gene expression in 
physiologic or pathophysiologic (e.g., diabetic) conditions. We 
conducted genome-wide analysis of {DNase} I hypersensitive sites, 
histone H3 lysine methylation modifications {(K4me1,} K4me3, K79me2), 
and {CCCTC} factor {(CTCF)} binding in human islets. This identified 
.18,000 putative promoters (several hundred unannotated and 
islet-active). Surprisingly, active promoter modifications were absent 
at genes encoding islet-specific hormones, suggesting a distinct 
regulatory mechanism. Of 34,039 distal (nonpromoter) regulatory 
elements, 47\% are islet unique and 22\% are {CTCF} bound. In the 18 
type 2 diabetes {(T2D)-associated} loci, we identified 118 putative 
regulatory elements and confirmed enhancer activity for 12 of 33 tested. 
Among six regulatory elements harboring {T2D-associated} variants, two 
exhibit significant allele-specific differences in activity. These 
findings present a global snapshot of the human islet epigenome and 
should provide functional context for noncoding variants emerging from 
genetic studies of {T2D} and other islet disorders.},
  number = {5},
  journal = {Cell Metabolism},
  author = {Michael L *Stitzel and Praveen *Sethupathy and Daniel S 
Pearson and Peter S Chines and Lingyun Song and Michael R Erdos and Ryan 
Welch and Stephen C J Parker and Alan P Boyle and Laura J Scott and 
Elliott H Margulies and Michael Boehnke and Terrence S Furey and Gregory 
E Crawford and Francis S Collins},
  month = nov,
  year = {2010},
  note = {{PMID:} 21035756},
  pages = {443--455}
  title = {Heritable individual-specific and allele-specific 
chromatin signatures in humans},
  volume = {328},
  issn = {1095-9203},
  url = {},
  pdf = {},
  documenturl = {},
  doi = {10.1126/science.1184655},
  abstract = {The extent to which variation in chromatin structure 
and transcription factor binding may influence gene expression, and thus 
underlie or contribute to variation in phenotype, is unknown. To address 
this question, we cataloged both individual-to-individual variation and 
differences between homologous chromosomes within the same individual 
(allele-specific variation) in chromatin structure and transcription 
factor binding in lymphoblastoid cells derived from individuals of 
geographically diverse ancestry. Ten percent of active chromatin sites 
were individual-specific; a similar proportion were allele-specific. 
Both individual-specific and allele-specific sites were commonly 
transmitted from parent to child, which suggests that they are heritable 
features of the human genome. Our study shows that heritable chromatin 
status and transcription factor binding differ as a result of genetic 
variation and may underlie phenotypic variation in humans.},
  number = {5975},
  journal = {Science},
  author = {Ryan {McDaniell} and {Bum-Kyu} Lee and Lingyun Song 
and Zheng Liu and Alan P Boyle and Michael R Erdos and Laura J Scott and 
Mario A Morken and Katerina S Kucera and Anna Battenhouse and Damian 
Keefe and Francis S Collins and Huntington F Willard and Jason D Lieb 
and Terrence S Furey and Gregory E {\dag}Crawford and Vishwanath R {\dag}Iyer and 
Ewan {\dag}Birney},
  month = apr,
  year = {2010},
  note = {{PMID:} 20299549},
  keywords = {African Continental Ancestry Group, Alleles, Binding 
Sites, Cell Line, Chromatin, Chromatin Immunoprecipitation, Chromosomes, 
Human, Chromosomes, Human, X, Deoxyribonuclease I, European Continental 
Ancestry Group, Female, Gene Expression Regulation, Genetic Variation, 
Humans, Male, Nuclear Family, Polymorphism, Single Nucleotide, Protein 
Binding, Regulatory Elements, Transcriptional, Repressor Proteins, 
Sequence Analysis, {DNA,} Transcription Factors, X Chromosome 
  pages = {235--239}
  title = {Evidence-ranked motif identification},
  volume = {11},
  issn = {1465-6914},
  url = {},
  pdf = {},
  doi = {10.1186/gb-2010-11-2-r19},
  abstract = {{ABSTRACT:} {cERMIT} is a computationally efficient 
motif discovery tool based on analyzing genome-wide quantitative 
regulatory evidence. Instead of pre-selecting promising candidate 
sequences, it utilizes information across all sequence regions to search 
for high-scoring motifs. We apply {cERMIT} on a range of direct binding 
and overexpression data sets; it substantially outperforms 
state-of-the-art approaches on curated {ChIP-chip} datasets, and easily 
scales to current mammalian {ChIP-seq} experiments with data on 
thousands of non-coding regions.},
  number = {2},
  journal = {Genome Biology},
  author = {Stoyan Georgiev and Alan P Boyle and Karthik Jayasurya 
and Sayan Mukherjee and Uwe Ohler},
  month = feb,
  year = {2010},
  note = {{PMID:} 20156354},
  pages = {R19}
  title = {Both noncoding and protein-coding {RNAs} contribute to 
gene expression evolution in the primate brain},
  volume = {2},
  issn = {1759-6653},
  url = {},
  pdf = {},
  doi = {10.1093/gbe/evq002},
  abstract = {Despite striking differences in cognition and 
behavior between humans and our closest primate relatives, several 
studies have found little evidence for adaptive change in protein-coding 
regions of genes expressed primarily in the brain. Instead, changes in 
gene expression may underlie many cognitive and behavioral differences. 
Here, we used digital gene expression: tag profiling (here called 
{Tag-Seq,} also called {DGE:tag} profiling) to assess changes in global 
transcript abundance in the frontal cortex of the brains of 3 humans, 3 
chimpanzees, and 3 rhesus macaques. A substantial fraction of 
transcripts we identified as differentially transcribed among species 
were not assayed in previous studies based on microarrays. 
Differentially expressed tags within coding regions are enriched for 
gene functions involved in synaptic transmission, transport, oxidative 
phosphorylation, and lipid metabolism. Importantly, because {Tag-Seq} 
technology provides strand-specific information about all polyadenlyated 
transcripts, we were able to assay expression in noncoding intragenic 
regions, including both sense and antisense noncoding transcripts 
(relative to nearby genes). We find that many noncoding transcripts are 
conserved in both location and expression level between species, 
suggesting a possible functional role. Lastly, we examined the overlap 
between differential gene expression and signatures of positive 
selection within putative promoter regions, a sign that these 
differences represent adaptations during human evolution. Comparative 
approaches may provide important insights into genes responsible for 
differences in cognitive functions between humans and nonhuman primates, 
as well as highlighting new candidate genes for studies investigating 
neurological disorders.},
  journal = {Genome Biology and Evolution},
  author = {Courtney C Babbitt and Olivier Fedrigo and Adam D 
Pfefferle and Alan P Boyle and Julie E Horvath and Terrence S Furey and 
Gregory A Wray},
  year = {2010},
  note = {{PMID:} 20333225},
  pages = {67--79}
  title = {{DNaseI} hypersensitivity at gene-poor, {FSH} 
dystrophy-linked 4q35.2},
  volume = {37},
  issn = {1362-4962},
  url = {},
  pdf = {},
  doi = {10.1093/nar/gkp833},
  abstract = {A subtelomeric region, 4q35.2, is implicated in 
facioscapulohumeral muscular dystrophy {(FSHD),} a dominant disease 
thought to involve local pathogenic changes in chromatin. {FSHD} 
patients have too few copies of a tandem 3.3-kb repeat {(D4Z4)} at 
4q35.2. No phenotype is associated with having few copies of an almost 
identical repeat at 10q26.3. Standard expression analyses have not given 
definitive answers as to the genes involved. To investigate the 
pathogenic effects of short {D4Z4} arrays on gene expression in the very 
gene-poor 4q35.2 and to find chromatin landmarks there for transcription 
control, unannotated genes and chromatin structure, we mapped 
{DNaseI-hypersensitive} {(DH)} sites in {FSHD} and control myoblasts. 
Using custom tiling arrays {(DNase-chip),} we found unexpectedly many 
{DH} sites in the two large gene deserts in this {4-Mb} region. One site 
was seen preferentially in {FSHD} myoblasts. Several others were mapped 
{\textgreater}0.7 Mb from genes known to be active in the muscle lineage 
and were also observed in cultured fibroblasts, but not in lymphoid, 
myeloid or hepatic cells. Their selective occurrence in cells derived 
from mesoderm suggests functionality. Our findings indicate that the 
gene desert regions of 4q35.2 may have functional significance, possibly 
also to {FSHD,} despite their paucity of known genes.},
  number = {22},
  journal = {Nucleic Acids Research},
  author = {Xueqing Xu and Koji Tsumagari and Janet Sowden and 
Rabi Tawil and Alan P Boyle and Lingyun Song and Terrence S Furey and 
Gregory E Crawford and Melanie Ehrlich},
  month = dec,
  year = {2009},
  note = {{PMID:} 19820107},
  keywords = {Adolescent, Cells, Cultured, Chromatin, Chromosomes, 
Human, Pair 4, Deoxyribonuclease I, Female, Genes, Humans, Linkage 
{(Genetics),} Muscular Dystrophy, Facioscapulohumeral, Myoblasts, 
Oligonucleotide Array Sequence Analysis, Reverse Transcriptase 
Polymerase Chain Reaction},
  pages = {7381--7393}
  title = {High-resolution mapping studies of chromatin and gene 
regulatory elements},
  volume = {1},
  issn = {1750-1911},
  url = {},
  pdf = {},
  doi = {10.2217/epi.09.29},
  abstract = {Microarray and high-throughput sequencing 
technologies have enabled the development of comprehensive assays to 
identify locations of particular chromatin structures and regulatory 
elements. It is now possible to create genome-wide maps of {DNA} 
methylation, trans-factor binding sites, histone variants and histone 
tail modifications, nucleosome positions, regions of open chromatin, and 
chromosome locations and interactions. This review provides a summary of 
these new assays that are changing the way in which molecular biology 
research is being performed. While the generation of large amounts of 
data from these experiments is becoming increasingly easier, the 
development of corresponding analysis methods has progressed more 
slowly. It will likely be years before the full extent of the 
information contained in these data is fully appreciated.},
  number = {2},
  journal = {Epigenomics},
  author = {Alan P Boyle and Terrence S Furey},
  year = {2009},
  note = {{PMID:} 20514362},
  pages = {319--329}
  title = {{F-Seq:} a feature density estimator for 
high-throughput sequence tags.},
  volume = {24},
  url = {},
  pdf = {},
  doi = {10.1093/bioinformatics/btn480},
  abstract = {Tag sequencing using high-throughput sequencing 
technologies are now regularly employed to identify specific sequence 
features, such as transcription factor binding sites {(ChIP-seq)} or 
regions of open chromatin {(DNase-seq).} To intuitively summarize and 
display individual sequence data as an accurate and interpretable 
signal, we developed {F-Seq,} a software package that generates a 
continuous tag sequence density estimation allowing identification of 
biologically meaningful sites whose output can be displayed directly in 
the {UCSC} Genome Browser. {AVAILABILITY:} The software is written in 
the Java language and is available on all major computing platforms for 
download at},
  number = {21},
  journal = {Bioinformatics},
  author = {Alan P Boyle and Justin Guinney and Gregory E Crawford 
and Terrence S Furey},
  month = nov,
  year = {2008},
  note = {{PMID:} 18784119},
  keywords = {Chromatin Immunoprecipitation; Deoxyribonuclease I; 
Genome; Sequence Analysis, {DNA;} Sequence Tagged Sites; Software},
  pages = {2537--2538}
  title = {High-resolution mapping and characterization of open 
chromatin across the genome.},
  volume = {132},
  url = {},
  pdf = {},
  doi = {10.1016/j.cell.2007.12.014},
  abstract = {Mapping {DNase} I hypersensitive {(HS)} sites is an 
accurate method of identifying the location of genetic regulatory 
elements, including promoters, enhancers, silencers, insulators, and 
locus control regions. We employed high-throughput sequencing and 
whole-genome tiled array strategies to identify {DNase} I {HS} sites 
within human primary {CD4+} T cells. Combining these two technologies, 
we have created a comprehensive and accurate genome-wide open chromatin 
map. Surprisingly, only 16\%-21\% of the identified 94,925 {DNase} I 
{HS} sites are found in promoters or first exons of known genes, but 
nearly half of the most open sites are in these regions. In conjunction 
with expression, motif, and chromatin immunoprecipitation data, we find 
evidence of cell-type-specific characteristics, including the ability to 
identify transcription start sites and locations of different chromatin 
marks utilized in these cells. In addition, and unexpectedly, our 
analyses have uncovered detailed features of nucleosome structure.},
  number = {2},
  journal = {Cell},
  author = {Alan P Boyle and Sean Davis and Hennady P Shulha and 
Paul Meltzer and Elliott H Margulies and Zhiping Weng and Terrence S 
{\dag}Furey and Gregory E {\dag}Crawford},
  month = jan,
  year = {2008},
  note = {{PMID:} 18243105},
  keywords = {Algorithms; Area Under Curve; Binding Sites; 
{CD4-Positive} {T-Lymphocytes;} Cell Nucleus; Chromatin; Chromatin 
Immunoprecipitation; Chromosome Mapping; Chromosomes, {DNA;} 
Transcription Factors, Human; Deoxyribonuclease I; Genome, Human; 
Histones; Humans; Nucleosomes; Oligonucleotide Array Sequence Analysis; 
Promoter Regions {(Genetics);} {ROC} Curve; Sensitivity and Specificity; 
Sequence Analysis},
  pages = {311--322}
  title = {Global analysis of microbial translation initiation regions},
  url = {},
  pdf = {},
  abstract = {The availability of genomic sequences from multiple 
bacteria has allowed global comparisons of patterns. Here we present a 
graphical comparison of normalized base frequencies in the vicinity of 
translation  starts for both eubacteria and archae. The results show 
that most eubacterial Open Reading Frames (ORFs) are preceded by a 
distinctly recognizable Shine-Dalgarno (SD) sequence pattern. However, 
some eubacteria deviate from this arrangement and have diminished SD 
patterns or completely lack this sequence. On the other hand, some 
archae seem to use both SD sequences and leaderless transcripts in their 
translation initiation  process. This is dependent on the position of a 
gene within an operon. Most archae seem to have other regular sequences 
located upstream from the typical SD location. Both eubacteria and 
archae have a surprising repetitive pattern seen within the averaged 
ORFs. The eubacterial and archaeal averaged patterns are slightly 
different from each other, and individual organisms within each domain 
vary from the averages. Nevertheless, the existence of such a 
periodicity within ORFs may allow the development of new techniques to 
identify actual genes from ORFs.},
  volume = {48},
  number = {3},
  pages = {138--150},
  booktitle = {Journal of the Mississippi Academy of Sciences},
  author = {Alan P Boyle and John A Boyle},
  month = jul,
  year = {2003}
  title = {Visualization of aligned genomic open reading frame data},
  issn = {1539-3429},
  url = {},
  pdf = {},
  abstract = {Students can better appreciate the value of genomic 
data if they are asked to use the data themselves. However, in general 
the enormous volume of data involved makes detailed examination 
difficult. Here we present a web site that allows students to study one 
particular aspect of sequenced genomes. They are able to align the open 
reading frames (ORFs) of any available genome that is of reasonable 
size. The ORFs may be aligned using either the start codon or the stop 
codon as the starting points. Results will readily show the presence of 
common ribosome binding sites as well as reveal interesting order within 
the ORFs that is nonexistent outside of them. Students will be able to 
ask various questions involving comparisons of genomes and see the 
results presented in both a tabular and graphic format. An example 
problem is presented under "Results."},
  volume = {31},
  number = {1},
  pages = {64--68},
  journal = {Biochemistry and Molecular Biology Education},
  author = {Alan P Boyle and John A Boyle},
  month = jan,
  year = {2003}
  title = {Interactive clustering for exploration of genomic data},
  pdf = {},
  abstract = {The complete genomic sequences for many organisms, particularly 
primitive organisms with relatively small genomes (prokaryotes), are now 
available. We describe an approach that supports interactive exploration 
of patterns in genomic data by combining use of positional weight 
matrices, the k-means clustering algorithm, and a visualization tool. 
Users interact with the system by examining a visualization of the 
"average" pattern found in each cluster for the sequence under 
consideration and determine if further clustering or modified clustering 
is desired. The effectiveness of this approach is demonstrated by a 
study of promoter sequences in archaea.},
  volume = {12},
  pages = {753--758},
  booktitle = {Proceedings of the Artificial Neural Networks in Engineering Conference},
  author = {Xiufeng Wan and John A Boyle and Susan M Bridges and Alan P Boyle},
  address = {St. Louis, MO},
  month = nov,
  year = {2002}