<?xml version="1.0" encoding="UTF-8"?>

<rdf:RDF
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
   xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
   xmlns="http://purl.org/rss/1.0/"
   xmlns:dc="http://purl.org/dc/elements/1.1/"
   xmlns:prism="http://prismstandard.org/namespaces/1.2/basic/"
   xmlns:dcterms="http://purl.org/dc/terms/"

>
<channel rdf:about="http://www.citeulike.org/about">
<pubDate>Sat, 05 Jul 2008 12:28:17 BST</pubDate>


	<title>CiteULike: dpollard's method</title>
	<description>CiteULike: dpollard's method</description>


	<link>http://www.citeulike.org/user/dpollard/tag/method</link>
	<dc:publisher>CiteULike.org</dc:publisher>
	<dc:language>en-gb</dc:language>
	<dc:rights>Copyright &#169; 2004-2008 citeulike.org</dc:rights>
	<items>
    <rdf:Seq>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/2327491"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/2938163"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1202343"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/2615331"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1202344"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1712721"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/2879290"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/2340727"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/2318105"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/2318094"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/2064285"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/2060758"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1903551"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/785142"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1891904"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1021805"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1570977"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/489958"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1818073"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1746568"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1390802"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1390791"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1290009"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/826186"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1352994"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1295925"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1352944"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1320184"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1320157"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1320137"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1320133"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1218043"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1218035"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1182082"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1181240"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1181222"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1161181"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1167977"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/816984"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/456431"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1062019"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/1082428"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/950154"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/903929"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/878405"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/849684"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/758009"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/580530"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/dpollard/article/558928"/>

	</rdf:Seq>
	</items>
	</channel>


<item rdf:about="http://www.citeulike.org/user/dpollard/article/2327491">
    <title>A genome-wide approach to identifying novel-imprinted genes.</title>
    <link>http://www.citeulike.org/user/dpollard/article/2327491</link>
    <description>&lt;i&gt;Hum Genet, Vol. 122, No. 6. (January 2008), pp. 625-634.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Genomic imprinting is an epigenetic process in which the copy of a gene inherited from one parent (maternal or paternal) is consistently silenced or expressed at a significantly lower level than the copy from the other parent. In an effort to begin a systematic genome-wide screen for imprinted genes, we assayed differential allelic expression (DAE) at 3,877 bi-allelic protein-coding sites located in 2,625 human genes in 67 unrelated individuals using genotyping microarrays. We used the presence of both over- and under-expression of the reference allele compared to the alternate allele to identify candidate-imprinted genes. We found 61 genes with at least twofold DAE plus &#34;flipping&#34; of the more highly expressed allele between reference and alternate across heterozygous samples. Sixteen flipping genes were genotyped and assayed for DAE in an independent data set of lymphoblastoid cell lines from two CEPH pedigrees. We confirmed that PEG10 is paternally expressed, identified one gene (ZNF331) with multiple lines of data indicating it is imprinted, and predicted several additional imprinting candidate genes. Our findings suggest that there are at most several hundred genes in the human genome that are universally imprinted. With samples of mRNA from appropriate tissues and a collection of informative cSNPs, a genome-wide search using this methodology could expand the list of genes that undergo genomic imprinting in a tissue- or temporal-specific manner.</description>
    <dc:title>A genome-wide approach to identifying novel-imprinted genes.</dc:title>

    <dc:creator>KS Pollard</dc:creator>
    <dc:creator>D Serre</dc:creator>
    <dc:creator>X Wang</dc:creator>
    <dc:creator>H Tao</dc:creator>
    <dc:creator>E Grundberg</dc:creator>
    <dc:creator>TJ Hudson</dc:creator>
    <dc:creator>AG Clark</dc:creator>
    <dc:creator>K Frazer</dc:creator>
    <dc:identifier>doi:10.1007/s00439-007-0440-1</dc:identifier>
    <dc:source>Hum Genet, Vol. 122, No. 6. (January 2008), pp. 625-634.</dc:source>
    <dc:date>2008-02-03T22:30:25-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Hum Genet</prism:publicationName>
    <prism:issn>0340-6717</prism:issn>
    <prism:volume>122</prism:volume>
    <prism:number>6</prism:number>
    <prism:startingPage>625</prism:startingPage>
    <prism:endingPage>634</prism:endingPage>
    <prism:category>allele-specific</prism:category>
    <prism:category>array</prism:category>
    <prism:category>expression</prism:category>
    <prism:category>method</prism:category>
    <prism:category>rockman_journal_club</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/2938163">
    <title>Genome-wide allele-specific expression analysis using Massively Parallel Signature Sequencing (MPSS) reveals cis- and trans-effects on gene expression in maize hybrid meristem tissue.</title>
    <link>http://www.citeulike.org/user/dpollard/article/2938163</link>
    <description>&lt;i&gt;Plant molecular biology, Vol. 66, No. 5. (March 2008), pp. 551-563.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Allelic differences in expression are important genetic factors contributing to quantitative trait variation in various organisms. However, the extent of genome-wide allele-specific expression by different modes of gene regulation has not been well characterized in plants. In this study we developed a new methodology for allele-specific expression analysis by applying Massively Parallel Signature Sequencing (MPSS), an open ended and sequencing based mRNA profiling technology. This methodology enabled a genome-wide evaluation of cis- and trans-effects on allelic expression in six meristem stages of the maize hybrid. Summarization of data from nearly 400 pairs of MPSS allelic signature tags showed that 60% of the genes in the hybrid meristems exhibited differential allelic expression. Because both alleles are subjected to the same trans-acting factors in the hybrid, the data suggest the abundance of cis-regulatory differences in the genome. Comparing the same allele expressed in the hybrid versus its inbred parents showed that 40% of the genes were differentially expressed, suggesting different trans-acting effects present in different genotypes. Such trans-acting effects may result in gene expression in the hybrid different from allelic additive expression. With this approach we quantified gene expression in the hybrid relative to its inbred parents at the allele-specific level. As compared to measuring total transcript levels, this study provides a new level of understanding of different modes of gene regulation in the hybrid and the molecular basis of heterosis.</description>
    <dc:title>Genome-wide allele-specific expression analysis using Massively Parallel Signature Sequencing (MPSS) reveals cis- and trans-effects on gene expression in maize hybrid meristem tissue.</dc:title>

    <dc:creator>M Guo</dc:creator>
    <dc:creator>S Yang</dc:creator>
    <dc:creator>M Rupe</dc:creator>
    <dc:creator>B Hu</dc:creator>
    <dc:creator>DR Bickel</dc:creator>
    <dc:creator>L Arthur</dc:creator>
    <dc:creator>O Smith</dc:creator>
    <dc:identifier>doi:10.1007/s11103-008-9290-z</dc:identifier>
    <dc:source>Plant molecular biology, Vol. 66, No. 5. (March 2008), pp. 551-563.</dc:source>
    <dc:date>2008-06-27T19:58:37-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Plant molecular biology</prism:publicationName>
    <prism:issn>0167-4412</prism:issn>
    <prism:volume>66</prism:volume>
    <prism:number>5</prism:number>
    <prism:startingPage>551</prism:startingPage>
    <prism:endingPage>563</prism:endingPage>
    <prism:category>allele-specific</prism:category>
    <prism:category>expression</prism:category>
    <prism:category>massively_parallel_signature_sequencing</prism:category>
    <prism:category>method</prism:category>
    <prism:category>rockman_journal_club</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1202343">
    <title>Analysis of allelic differential expression in human white blood cells</title>
    <link>http://www.citeulike.org/user/dpollard/article/1202343</link>
    <description>&lt;i&gt;Genome Res., Vol. 16, No. 3. (1 March 2006), pp. 331-339.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Allelic variation of gene expression is common in humans, and is of interest because of its potential contribution to variation in heritable traits. To identify human genes with allelic expression differences, we genotype DNA and examine mRNA isolated from the white blood cells of 12 unrelated individuals using oligonucleotide arrays containing 8406 exonic SNPs. Of the exonic SNPs, 1983, located in 1389 genes, are both expressed in the white blood cells and heterozygous in at least one of the 12 individuals, and thus can be examined for differential allelic expression. Of the 1389 genes, 731 (53%) show allele expression differences in at least one individual. To gain insight into the regulatory mechanisms governing allelic expression differences, we analyze a set of 60 genes containing exonic SNPs that are heterozygous in three or more samples, and for which all heterozygotes display differential expression. We find three patterns of allelic expression, suggesting different underlying regulatory mechanisms. Exonic SNPs in three of the 60 genes are monoallelically expressed in the human white blood cells, and when examined in families show expression of only the maternal copy, consistent with regulation by imprinting. Approximately one-third of the genes have the same allele expressed more highly in all heterozygotes, suggesting that their regulation is predominantly influenced by cis-elements in strong linkage disequilibrium with the assayed exonic SNP. The remaining two-thirds of the genes have different alleles expressed more highly in different heterozygotes, suggesting that their expression differences are influenced by factors not in strong linkage disequilibrium with the assayed exonic SNP. 10.1101/gr.4559106</description>
    <dc:title>Analysis of allelic differential expression in human white blood cells</dc:title>

    <dc:creator>Krishna Pant</dc:creator>
    <dc:creator>Heng Tao</dc:creator>
    <dc:creator>Erica Beilharz</dc:creator>
    <dc:creator>Dennis Ballinger</dc:creator>
    <dc:creator>David Cox</dc:creator>
    <dc:creator>Kelly Frazer</dc:creator>
    <dc:identifier>doi:10.1101/gr.4559106</dc:identifier>
    <dc:source>Genome Res., Vol. 16, No. 3. (1 March 2006), pp. 331-339.</dc:source>
    <dc:date>2007-04-02T01:56:59-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Genome Res.</prism:publicationName>
    <prism:volume>16</prism:volume>
    <prism:number>3</prism:number>
    <prism:startingPage>331</prism:startingPage>
    <prism:endingPage>339</prism:endingPage>
    <prism:category>allele-specific</prism:category>
    <prism:category>array</prism:category>
    <prism:category>expression</prism:category>
    <prism:category>method</prism:category>
    <prism:category>rockman_journal_club</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/2615331">
    <title>SNP-specific array-based allele-specific expression analysis.</title>
    <link>http://www.citeulike.org/user/dpollard/article/2615331</link>
    <description>&lt;i&gt;Genome Res (27 March 2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We have developed an optimized array-based approach for customizable allele-specific gene expression (ASE) analysis. The central features of the approach are the ability to select SNPs at will for detection, and the absence of need to PCR amplify the target. A surprisingly long probe length (39-49 nt) was needed for allelic discrimination. Reconstitution experiments demonstrate linearity of ASE over a broad range. Using this approach, we have discovered at least two novel imprinted genes, NLRP2, which encodes a member of the inflammasome, and OSBPL1A, which encodes a presumed oxysterol-binding protein, were both preferentially expressed from the maternal allele. In contrast, ERAP2, which encodes an aminopeptidase, did not show preferential parent-of-origin expression, but rather, cis-acting nonimprinted differential allelic control. The approach is scalable to the whole genome and can be used for discovery of functional epigenetic modifications in patient samples.</description>
    <dc:title>SNP-specific array-based allele-specific expression analysis.</dc:title>

    <dc:creator>Hans T Bjornsson</dc:creator>
    <dc:creator>Thomas J Albert</dc:creator>
    <dc:creator>Christine M Ladd-Acosta</dc:creator>
    <dc:creator>Roland D Green</dc:creator>
    <dc:creator>Michael A Rongione</dc:creator>
    <dc:creator>Christina M Middle</dc:creator>
    <dc:creator>Rafael A Irizarry</dc:creator>
    <dc:creator>Karl W Broman</dc:creator>
    <dc:creator>Andrew P Feinberg</dc:creator>
    <dc:identifier>doi:10.1101/gr.073254.107</dc:identifier>
    <dc:source>Genome Res (27 March 2008)</dc:source>
    <dc:date>2008-03-31T08:09:11-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Genome Res</prism:publicationName>
    <prism:issn>1088-9051</prism:issn>
    <prism:category>allele-specific</prism:category>
    <prism:category>array</prism:category>
    <prism:category>expression</prism:category>
    <prism:category>method</prism:category>
    <prism:category>rockman_journal_club</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1202344">
    <title>Allelic Variation in Gene Expression Is Common in the Human Genome</title>
    <link>http://www.citeulike.org/user/dpollard/article/1202344</link>
    <description>&lt;i&gt;Genome Res., Vol. 13, No. 8. (1 August 2003), pp. 1855-1862.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Variations in gene sequence and expression underlie much of human variability. Despite the known biological roles of differential allelic gene expression resulting from X-chromosome inactivation and genomic imprinting, a large-scale analysis of allelic gene expression in human is lacking. We examined allele-specific gene expression of 1063 transcribed single-nucleotide polymorphisms (SNPs) by using Affymetrix HuSNP oligo arrays. Among the 602 genes that were heterozygous and expressed in kidney or liver tissues from seven individuals, 326 (54%) showed preferential expression of one allele in at least one individual, and 170 of those showed greater than fourfold difference between the two alleles. The allelic variation has been confirmed by real-time quantitative PCR experiments. Some of these 170 genes are known to be imprinted, such as SNRPN, IPW, HTR2A, and PEG3. Most of the differentially expressed genes are not in known imprinting domains but instead are distributed throughout the genome. Our studies demonstrate that variation of gene expression between alleles is common, and this variation may contribute to human variability. 10.1101/gr.1006603</description>
    <dc:title>Allelic Variation in Gene Expression Is Common in the Human Genome</dc:title>

    <dc:creator>Shuen Lo</dc:creator>
    <dc:creator>Zhining Wang</dc:creator>
    <dc:creator>Ying Hu</dc:creator>
    <dc:creator>Howard Yang</dc:creator>
    <dc:creator>Sheryl Gere</dc:creator>
    <dc:creator>Kenneth Buetow</dc:creator>
    <dc:creator>Maxwell Lee</dc:creator>
    <dc:identifier>doi:10.1101/gr.1006603</dc:identifier>
    <dc:source>Genome Res., Vol. 13, No. 8. (1 August 2003), pp. 1855-1862.</dc:source>
    <dc:date>2007-04-02T01:59:59-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:publicationName>Genome Res.</prism:publicationName>
    <prism:volume>13</prism:volume>
    <prism:number>8</prism:number>
    <prism:startingPage>1855</prism:startingPage>
    <prism:endingPage>1862</prism:endingPage>
    <prism:category>affy_array</prism:category>
    <prism:category>allele-specific</prism:category>
    <prism:category>expression</prism:category>
    <prism:category>method</prism:category>
    <prism:category>rockman_journal_club</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1712721">
    <title>Allele-Specific Chromatin Immunoprecipitation Studies Show Genetic Influence on Chromatin State in Human Genome</title>
    <link>http://www.citeulike.org/user/dpollard/article/1712721</link>
    <description>&lt;i&gt;PLoS Genetics, Vol. 3, No. 5. (1 May 2007), e81.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Several recent studies have shown a genetic influence on gene expression variation, including variation between the two chromosomes within an individual and variation between individuals at the population level. We hypothesized that genetic inheritance may also affect variation in chromatin states. To test this hypothesis, we analyzed chromatin states in 12 lymphoblastoid cells derived from two Centre d&#39;Etude du Polymorphisme Humain families using an allele-specific chromatin immunoprecipitation (ChIP-on-chip) assay with Affymetrix 10K SNP chip. We performed the allele-specific ChIP-on-chip assays for the 12 lymphoblastoid cells using antibodies targeting at RNA polymerase II and five post-translation modified forms of the histone H3 protein. The use of multiple cell lines from the Centre d&#39;Etude du Polymorphisme Humain families allowed us to evaluate variation of chromatin states across pedigrees. These studies demonstrated that chromatin state clustered by family. Our results support the idea that genetic inheritance can determine the epigenetic state of the chromatin as shown previously in model organisms. To our knowledge, this is the first demonstration in humans that genetics may be an important factor that influences global chromatin state mediated by histone modification, the hallmark of the epigenetic phenomena.</description>
    <dc:title>Allele-Specific Chromatin Immunoprecipitation Studies Show Genetic Influence on Chromatin State in Human Genome</dc:title>

    <dc:creator>Mitsutaka Kadota</dc:creator>
    <dc:creator>Howard Yang</dc:creator>
    <dc:creator>Nan Hu</dc:creator>
    <dc:creator>Chaoyu Wang</dc:creator>
    <dc:creator>Ying Hu</dc:creator>
    <dc:creator>Philip Taylor</dc:creator>
    <dc:creator>Kenneth Buetow</dc:creator>
    <dc:creator>Maxwell Lee</dc:creator>
    <dc:identifier>doi:10.1371/journal.pgen.0030081</dc:identifier>
    <dc:source>PLoS Genetics, Vol. 3, No. 5. (1 May 2007), e81.</dc:source>
    <dc:date>2007-09-30T23:49:47-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>PLoS Genetics</prism:publicationName>
    <prism:volume>3</prism:volume>
    <prism:number>5</prism:number>
    <prism:startingPage>e81</prism:startingPage>
    <prism:category>allele-specific</prism:category>
    <prism:category>chip_chip</prism:category>
    <prism:category>method</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/2879290">
    <title>Differential Allelic Expression in the Human Genome: A Robust Approach To Identify Genetic and Epigenetic Cis-Acting Mechanisms Regulating Gene Expression</title>
    <link>http://www.citeulike.org/user/dpollard/article/2879290</link>
    <description>&lt;i&gt;PLoS Genet, Vol. 4, No. 2. (29 February 2008), e1000006.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The recent development of whole genome association studies has lead to the robust identification of several loci involved in different common human diseases. Interestingly, some of the strongest signals of association observed in these studies arise from non-coding regions located in very large introns or far away from any annotated genes, raising the possibility that these regions are involved in the etiology of the disease through some unidentified regulatory mechanisms. These findings highlight the importance of better understanding the mechanisms leading to inter-individual differences in gene expression in humans. Most of the existing approaches developed to identify common regulatory polymorphisms are based on linkage/association mapping of gene expression to genotypes. However, these methods have some limitations, notably their cost and the requirement of extensive genotyping information from all the individuals studied which limits their applications to a specific cohort or tissue. Here we describe a robust and high-throughput method to directly measure differences in allelic expression for a large number of genes using the Illumina Allele-Specific Expression BeadArray platform and quantitative sequencing of RT-PCR products. We show that this approach allows reliable identification of differences in the relative expression of the two alleles larger than 1.5-fold (i.e., deviations of the allelic ratio larger than 60∶40) and offers several advantages over the mapping of total gene expression, particularly for studying humans or outbred populations. Our analysis of more than 80 individuals for 2,968 SNPs located in 1,380 genes confirms that differential allelic expression is a widespread phenomenon affecting the expression of 20% of human genes and shows that our method successfully captures expression differences resulting from both genetic and epigenetic cis-acting mechanisms.</description>
    <dc:title>Differential Allelic Expression in the Human Genome: A Robust Approach To Identify Genetic and Epigenetic Cis-Acting Mechanisms Regulating Gene Expression</dc:title>

    <dc:creator>David Serre</dc:creator>
    <dc:creator>Scott Gurd</dc:creator>
    <dc:creator>Bing Ge</dc:creator>
    <dc:creator>Robert Sladek</dc:creator>
    <dc:creator>Donna Sinnett</dc:creator>
    <dc:creator>Eef Harmsen</dc:creator>
    <dc:creator>Marina Bibikova</dc:creator>
    <dc:creator>Eugene Chudin</dc:creator>
    <dc:creator>David Barker</dc:creator>
    <dc:creator>Todd Dickinson</dc:creator>
    <dc:creator>Jian-Bing Fan</dc:creator>
    <dc:creator>Thomas Hudson</dc:creator>
    <dc:identifier>doi:10.1371/journal.pgen.1000006</dc:identifier>
    <dc:source>PLoS Genet, Vol. 4, No. 2. (29 February 2008), e1000006.</dc:source>
    <dc:date>2008-06-10T11:37:44-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>PLoS Genet</prism:publicationName>
    <prism:volume>4</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>e1000006</prism:startingPage>
    <prism:publisher>Public Library of Science</prism:publisher>
    <prism:category>allele-specific</prism:category>
    <prism:category>expression</prism:category>
    <prism:category>illumina_beads</prism:category>
    <prism:category>method</prism:category>
    <prism:category>rockman_journal_club</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/2340727">
    <title>Gene Network Inference via Structural Equation Modeling in Genetical Genomics Experiments.</title>
    <link>http://www.citeulike.org/user/dpollard/article/2340727</link>
    <description>&lt;i&gt;Genetics (3 February 2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Our goal is gene network inference in Genetical Genomics or Systems Genetics experiments. For species where sequence information is available, we first perform expression QTL mapping by jointly utilizing cis, cistrans and trans regulation. After using local structural models to identify regulator-target pairs for each eQTL, we construct an encompassing directed network (EDN) by assembling all retained regulator-target relationships. The EDN has nodes corresponding to expressed genes and eQTLs, and directed edges from eQTLs to cis-regulated target genes, from cis-regulated genes to cistrans regulated target genes, from trans-regulator genes to target genes and from trans-eQTLs to target genes. For network inference within the strongly constrained search space defined by the EDN, we propose Structural Equation Modeling (SEM), because it can model cyclic networks and the EDN indeed contains feedback relationships. Based on a factorization of the likelihood and the constrained search space, our SEM algorithm infers networks involving several hundred genes and eQTL. Structure inference is based on a penalized likelihood ratio and an adaptation of Occam's Window model selection. The SEM algorithm was evaluated using data simulated with nonlinear ordinary differential equations and known cyclic network topologies and was applied to a real yeast data set.</description>
    <dc:title>Gene Network Inference via Structural Equation Modeling in Genetical Genomics Experiments.</dc:title>

    <dc:creator>Bing Liu</dc:creator>
    <dc:creator>Alberto de la Fuente</dc:creator>
    <dc:creator>Ina Hoeschele</dc:creator>
    <dc:identifier>doi:10.1534/genetics.107.080069</dc:identifier>
    <dc:source>Genetics (3 February 2008)</dc:source>
    <dc:date>2008-02-06T11:38:22-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Genetics</prism:publicationName>
    <prism:issn>0016-6731</prism:issn>
    <prism:category>eqtl</prism:category>
    <prism:category>method</prism:category>
    <prism:category>network</prism:category>
    <prism:category>system</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/2318105">
    <title>Using DNA duplex stability information for transcription factor binding site discovery.</title>
    <link>http://www.citeulike.org/user/dpollard/article/2318105</link>
    <description>&lt;i&gt;Pac Symp Biocomput (2008), pp. 453-464.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Transcription factor (TF) binding site discovery is an important step in understanding transcriptional regulation. Many computational tools have already been developed, but their success in detecting TF motifs is still limited. We believe one of the main reasons for the low accuracy of current methods is that they do not take into account the structural aspects of TF-DNA interaction. We have previously shown that knowledge about the structural class of the TF and information about nucleosome occupancy can be used to improve motif discovery. Here, we demonstrate the benefits of using information about the DNA double-helical stability for motif discovery. We notice that, in general, the energy needed to destabilize the DNA double helix is higher at TF binding sites than at random DNA sites. We use this information to derive informative positional priors that we incorporate into a motif finding algorithm. When applied to yeast ChIP-chip data, the new informative priors improve the performance of the motif finder significantly when compared to priors that do not use the energetic stability information.</description>
    <dc:title>Using DNA duplex stability information for transcription factor binding site discovery.</dc:title>

    <dc:creator>R Gordân</dc:creator>
    <dc:creator>AJ Hartemink</dc:creator>
    <dc:source>Pac Symp Biocomput (2008), pp. 453-464.</dc:source>
    <dc:date>2008-02-01T06:20:50-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Pac Symp Biocomput</prism:publicationName>
    <prism:issn>1793-5091</prism:issn>
    <prism:startingPage>453</prism:startingPage>
    <prism:endingPage>464</prism:endingPage>
    <prism:category>binding_site</prism:category>
    <prism:category>biophysical_model</prism:category>
    <prism:category>method</prism:category>
    <prism:category>motif_detection</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>transcription_factor</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/2318094">
    <title>The effect of the guide tree on multiple sequence alignments and subsequent phylogenetic analyses.</title>
    <link>http://www.citeulike.org/user/dpollard/article/2318094</link>
    <description>&lt;i&gt;Pac Symp Biocomput (2008), pp. 25-36.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Many multiple sequence alignment methods (MSAs) use guide trees in conjunction with a progressive alignment technique to generate a multiple sequence alignment but use differing techniques to produce the guide tree and to perform the progressive alignment. In this paper we explore the consequences of changing the guide tree used for the alignment routine. We evaluate four leading MSA methods (ProbCons, MAFFT, Muscle, and ClustalW) as well as a new MSA method (FTA, for &#34;Fixed Tree Alignment&#34;) which we have developed, on a wide range of simulated datasets. Although improvements in alignment accuracy can be obtained by providing better guide trees, in general there is little effect on the &#34;accuracy&#34; (measured using the SP-score) of the alignment by improving the guide tree. However, RAxML-based phylogenetic analyses of alignments based upon better guide trees tend to be much more accurate. This impact is particularly significant for ProbCons, one of the best MSA methods currently available, and our method, FTA. Finally, for very good guide trees, phylogenies based upon FTA alignments are more accurate than phylogenies based upon ProbCons alignments, suggesting that further improvements in phylogenetic accuracy may be obtained through algorithms of this type.</description>
    <dc:title>The effect of the guide tree on multiple sequence alignments and subsequent phylogenetic analyses.</dc:title>

    <dc:creator>S Nelesen</dc:creator>
    <dc:creator>K Liu</dc:creator>
    <dc:creator>D Zhao</dc:creator>
    <dc:creator>CR Linder</dc:creator>
    <dc:creator>T Warnow</dc:creator>
    <dc:source>Pac Symp Biocomput (2008), pp. 25-36.</dc:source>
    <dc:date>2008-02-01T06:13:52-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Pac Symp Biocomput</prism:publicationName>
    <prism:issn>1793-5091</prism:issn>
    <prism:startingPage>25</prism:startingPage>
    <prism:endingPage>36</prism:endingPage>
    <prism:category>accuracy</prism:category>
    <prism:category>alignment</prism:category>
    <prism:category>alignment_accuracy</prism:category>
    <prism:category>method</prism:category>
    <prism:category>multiple_alignment</prism:category>
    <prism:category>phylogeny</prism:category>
    <prism:category>reconstruction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/2064285">
    <title>CisModule: de novo discovery of cis-regulatory modules by hierarchical mixture modeling.</title>
    <link>http://www.citeulike.org/user/dpollard/article/2064285</link>
    <description>&lt;i&gt;Proc Natl Acad Sci U S A, Vol. 101, No. 33. (17 August 2004), pp. 12114-12119.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The regulatory information for a eukaryotic gene is encoded in cis-regulatory modules. The binding sites for a set of interacting transcription factors have the tendency to colocalize to the same modules. Current de novo motif discovery methods do not take advantage of this knowledge. We propose a hierarchical mixture approach to model the cis-regulatory module structure. Based on the model, a new de novo motif-module discovery algorithm, CisModule, is developed for the Bayesian inference of module locations and within-module motif sites. Dynamic programming-like recursions are developed to reduce the computational complexity from exponential to linear in sequence length. By using both simulated and real data sets, we demonstrate that CisModule is not only accurate in predicting modules but also more sensitive in detecting motif patterns and binding sites than standard motif discovery methods are.</description>
    <dc:title>CisModule: de novo discovery of cis-regulatory modules by hierarchical mixture modeling.</dc:title>

    <dc:creator>Q Zhou</dc:creator>
    <dc:creator>WH Wong</dc:creator>
    <dc:identifier>doi:10.1073/pnas.0402858101</dc:identifier>
    <dc:source>Proc Natl Acad Sci U S A, Vol. 101, No. 33. (17 August 2004), pp. 12114-12119.</dc:source>
    <dc:date>2007-12-05T22:45:51-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>Proc Natl Acad Sci U S A</prism:publicationName>
    <prism:issn>0027-8424</prism:issn>
    <prism:volume>101</prism:volume>
    <prism:number>33</prism:number>
    <prism:startingPage>12114</prism:startingPage>
    <prism:endingPage>12119</prism:endingPage>
    <prism:category>ab_initio</prism:category>
    <prism:category>cis_regulatory_elements</prism:category>
    <prism:category>clustering</prism:category>
    <prism:category>method</prism:category>
    <prism:category>prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/2060758">
    <title>Genome-wide analysis of clustered Dorsal binding sites identifies putative target genes in the Drosophila embryo.</title>
    <link>http://www.citeulike.org/user/dpollard/article/2060758</link>
    <description>&lt;i&gt;Proc Natl Acad Sci U S A, Vol. 99, No. 2. (22 January 2002), pp. 763-768.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Metazoan genomes contain vast tracts of cis-regulatory DNA that have been identified typically through tedious functional assays. As a result, it has not been possible to uncover a cis-regulatory code that links primary DNA sequences to gene expression patterns. In an initial effort to determine whether coordinately regulated genes share a common &#34;grammar,&#34; we have examined the distribution of Dorsal recognition sequences in the Drosophila genome. Dorsal is one of the best-characterized sequence-specific transcription factors in Drosophila. The homeobox gene zerknullt (zen) is repressed directly by Dorsal, and this repression is mediated by a 600-bp silencer, the ventral repression element (VRE), which contains four optimal Dorsal binding sites. The arrangement and sequence of the Dorsal recognition sequences in the VRE were used to develop a computational algorithm to search the Drosophila genome for clusters of optimal Dorsal binding sites. There are 15 regions in the genome that contain three or more optimal sites within a span of 400 bp or less. Three of these regions are associated with known Dorsal target genes: sog, zen, and Brinker. The Dorsal binding cluster in sog is shown to mediate lateral stripes of gene expression in response to low levels of the Dorsal gradient. Two of the remaining 12 clusters are shown to be associated with genes that exhibit asymmetric patterns of expression across the dorsoventral axis. These results suggest that bioinformatics can be used to identify novel target genes and associated regulatory DNAs in a gene network.</description>
    <dc:title>Genome-wide analysis of clustered Dorsal binding sites identifies putative target genes in the Drosophila embryo.</dc:title>

    <dc:creator>M Markstein</dc:creator>
    <dc:creator>P Markstein</dc:creator>
    <dc:creator>V Markstein</dc:creator>
    <dc:creator>MS Levine</dc:creator>
    <dc:identifier>doi:10.1073/pnas.012591199</dc:identifier>
    <dc:source>Proc Natl Acad Sci U S A, Vol. 99, No. 2. (22 January 2002), pp. 763-768.</dc:source>
    <dc:date>2007-12-05T12:38:16-00:00</dc:date>
    <prism:publicationYear>2002</prism:publicationYear>
    <prism:publicationName>Proc Natl Acad Sci U S A</prism:publicationName>
    <prism:issn>0027-8424</prism:issn>
    <prism:volume>99</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>763</prism:startingPage>
    <prism:endingPage>768</prism:endingPage>
    <prism:category>cis_regulatory_elements</prism:category>
    <prism:category>clustering</prism:category>
    <prism:category>drosophila</prism:category>
    <prism:category>embryonic_patterning</prism:category>
    <prism:category>method</prism:category>
    <prism:category>prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1903551">
    <title>Accurate gene-tree reconstruction by learning gene- and species-specific substitution rates across multiple complete genomes</title>
    <link>http://www.citeulike.org/user/dpollard/article/1903551</link>
    <description>&lt;i&gt;Genome Res. (7 November 2007), gr.7105007.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Comparative genomics provides a general methodology for discovering functional DNA elements and understanding their evolution. The availability of many related genomes enables more powerful analyses, but requires rigorous phylogenetic methods to resolve orthologous genes and regions. Here, we use 12 recently sequenced Drosophila genomes and nine fungal genomes to address the problem of accurate gene-tree reconstruction across many complete genomes. We show that existing phylogenetic methods that treat each gene tree in isolation show large-scale inaccuracies, largely due to insufficient phylogenetic information in individual genes. However, we find that gene trees exhibit common properties that can be exploited for evolutionary studies and accurate phylogenetic reconstruction. Evolutionary rates can be decoupled into gene-specific and species-specific components, which can be learned across complete genomes. We develop a phylogenetic reconstruction methodology that exploits these properties and achieves significantly higher accuracy, addressing the species-level heterotachy and enabling studies of gene evolution in the context of species evolution. 10.1101/gr.7105007</description>
    <dc:title>Accurate gene-tree reconstruction by learning gene- and species-specific substitution rates across multiple complete genomes</dc:title>

    <dc:creator>Matthew Rasmussen</dc:creator>
    <dc:creator>Manolis Kellis</dc:creator>
    <dc:identifier>doi:10.1101/gr.7105007</dc:identifier>
    <dc:source>Genome Res. (7 November 2007), gr.7105007.</dc:source>
    <dc:date>2007-11-12T17:38:22-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Genome Res.</prism:publicationName>
    <prism:startingPage>gr.7105007</prism:startingPage>
    <prism:category>drosophila</prism:category>
    <prism:category>method</prism:category>
    <prism:category>phylogeny</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>yeast</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/785142">
    <title>The cost of inbreeding in Arabidopsis.</title>
    <link>http://www.citeulike.org/user/dpollard/article/785142</link>
    <description>&lt;i&gt;Nature, Vol. 416, No. 6880. (4 April 2002), pp. 531-534.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Population geneticists have long sought to estimate the distribution of selection intensities among genes of diverse function across the genome. Only recently have DNA sequencing and analytical techniques converged to make this possible. Important advances have come from comparing genetic variation within species (polymorphism) with fixed differences between species (divergence). These approaches have been used to examine individual genes for evidence of selection. Here we use the fact that the time since species divergence allows combination of data across genes. In a comparison of amino-acid replacements among species of the mustard weed Arabidopsis with those among species of the fruitfly Drosophila, we find evidence for predominantly beneficial gene substitutions in Drosophila but predominantly detrimental substitutions in Arabidopsis. We attribute this difference to the Arabidopsis mating system of partial self-fertilization, which corroborates a prediction of population genetics theory that species with a high frequency of inbreeding are less efficient in eliminating deleterious mutations owing to their reduced effective population size.</description>
    <dc:title>The cost of inbreeding in Arabidopsis.</dc:title>

    <dc:creator>CD Bustamante</dc:creator>
    <dc:creator>R Nielsen</dc:creator>
    <dc:creator>SA Sawyer</dc:creator>
    <dc:creator>KM Olsen</dc:creator>
    <dc:creator>MD Purugganan</dc:creator>
    <dc:creator>DL Hartl</dc:creator>
    <dc:identifier>doi:10.1038/416531a</dc:identifier>
    <dc:source>Nature, Vol. 416, No. 6880. (4 April 2002), pp. 531-534.</dc:source>
    <dc:date>2006-08-03T22:26:32-00:00</dc:date>
    <prism:publicationYear>2002</prism:publicationYear>
    <prism:publicationName>Nature</prism:publicationName>
    <prism:issn>0028-0836</prism:issn>
    <prism:volume>416</prism:volume>
    <prism:number>6880</prism:number>
    <prism:startingPage>531</prism:startingPage>
    <prism:endingPage>534</prism:endingPage>
    <prism:category>method</prism:category>
    <prism:category>population_genetics</prism:category>
    <prism:category>test_for_selection</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1891904">
    <title>MORPH: Probabilistic Alignment Combined with Hidden Markov Models of cis-Regulatory Modules</title>
    <link>http://www.citeulike.org/user/dpollard/article/1891904</link>
    <description>&lt;i&gt;PLoS Computational Biology, Vol. 3, No. 11. (1 November 2007), e216.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The discovery and analysis of cis-regulatory modules (CRMs) in metazoan genomes is crucial for understanding the transcriptional control of development and many other biological processes. Cross-species sequence comparison holds much promise for improving computational prediction of CRMs, for elucidating their binding site composition, and for understanding how they evolve. Current methods for analyzing orthologous CRMs from multiple species rely upon sequence alignments produced by off-the-shelf alignment algorithms, which do not exploit the presence of binding sites in the sequences. We present here a unified probabilistic framework, called MORPH, that integrates the alignment task with binding site predictions, allowing more robust CRM analysis in two species. The framework sums over all possible alignments of two sequences, thus accounting for alignment ambiguities in a natural way. We perform extensive tests on orthologous CRMs from two moderately diverged species Drosophila melanogaster and D. mojavensis, to demonstrate the advantages of the new approach. We show that it can overcome certain computational artifacts of traditional alignment tools and provide a different, likely more accurate, picture of cis-regulatory evolution than that obtained from existing methods. The burgeoning field of cis-regulatory evolution, which is amply supported by the availability of many related genomes, is currently thwarted by the lack of accurate alignments of regulatory regions. Our work will fill in this void and enable more reliable analysis of CRM evolution.</description>
    <dc:title>MORPH: Probabilistic Alignment Combined with Hidden Markov Models of cis-Regulatory Modules</dc:title>

    <dc:creator>Saurabh Sinha</dc:creator>
    <dc:creator>Xin He</dc:creator>
    <dc:identifier>doi:10.1371/journal.pcbi.0030216</dc:identifier>
    <dc:source>PLoS Computational Biology, Vol. 3, No. 11. (1 November 2007), e216.</dc:source>
    <dc:date>2007-11-10T02:00:31-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>PLoS Computational Biology</prism:publicationName>
    <prism:volume>3</prism:volume>
    <prism:number>11</prism:number>
    <prism:startingPage>e216</prism:startingPage>
    <prism:category>alignment</prism:category>
    <prism:category>alignment_accuracy</prism:category>
    <prism:category>binding_site_alignment</prism:category>
    <prism:category>brant_presented</prism:category>
    <prism:category>cis_regulatory_elements</prism:category>
    <prism:category>eisen_journal_club</prism:category>
    <prism:category>method</prism:category>
    <prism:category>round_robin</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1021805">
    <title>Transcriptional Control in the Segmentation Gene Network of Drosophila</title>
    <link>http://www.citeulike.org/user/dpollard/article/1021805</link>
    <description>&lt;i&gt;PLoS Biology, Vol. 2, No. 9. (1 September 2004), e271.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The segmentation gene network of Drosophila consists of maternal and zygotic factors that generate, by transcriptional (cross-) regulation, expression patterns of increasing complexity along the anterior-posterior axis of the embryo. Using known binding site information for maternal and zygotic gap transcription factors, the computer algorithm Ahab recovers known segmentation control elements (modules) with excellent success and predicts many novel modules within the network and genome-wide. We show that novel module predictions are highly enriched in the network and typically clustered proximal to the promoter, not only upstream, but also in intronic space and downstream. When placed upstream of a reporter gene, they consistently drive patterned blastoderm expression, in most cases faithfully producing one or more pattern elements of the endogenous gene. Moreover, we demonstrate for the entire set of known and newly validated modules that Ahab&#39;s prediction of binding sites correlates well with the expression patterns produced by the modules, revealing basic rules governing their composition. Specifically, we show that maternal factors consistently act as activators and that gap factors act as repressors, except for the bimodal factor Hunchback. Our data suggest a simple context-dependent rule for its switch from repressive to activating function. Overall, the composition of modules appears well fitted to the spatiotemporal distribution of their positive and negative input factors. Finally, by comparing Ahab predictions with different categories of transcription factor input, we confirm the global regulatory structure of the segmentation gene network, but find odd skipped behaving like a primary pair-rule gene. The study expands our knowledge of the segmentation gene network by increasing the number of experimentally tested modules by 50&#37;. For the first time, the entire set of validated modules is analyzed for binding site composition under a uniform set of criteria, permitting the definition of basic composition rules. The study demonstrates that computational methods are a powerful complement to experimental approaches in the analysis of transcription networks.</description>
    <dc:title>Transcriptional Control in the Segmentation Gene Network of Drosophila</dc:title>

    <dc:creator>Mark Schroeder</dc:creator>
    <dc:creator>Michael Pearce</dc:creator>
    <dc:creator>John Fak</dc:creator>
    <dc:creator>Hongqing Fan</dc:creator>
    <dc:creator>Ulrich Unnerstall</dc:creator>
    <dc:creator>Eldon Emberly</dc:creator>
    <dc:creator>Nikolaus Rajewsky</dc:creator>
    <dc:creator>Eric Siggia</dc:creator>
    <dc:creator>Ulrike Gaul</dc:creator>
    <dc:identifier>doi:10.1371/journal.pbio.0020271</dc:identifier>
    <dc:source>PLoS Biology, Vol. 2, No. 9. (1 September 2004), e271.</dc:source>
    <dc:date>2007-01-01T19:51:52-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>PLoS Biology</prism:publicationName>
    <prism:volume>2</prism:volume>
    <prism:number>9</prism:number>
    <prism:startingPage>e271</prism:startingPage>
    <prism:category>cis_regulatory_elements</prism:category>
    <prism:category>embryonic_patterning</prism:category>
    <prism:category>expression_pattern</prism:category>
    <prism:category>method</prism:category>
    <prism:category>modeling</prism:category>
    <prism:category>prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1570977">
    <title>Bayesian estimation of concordance among gene trees.</title>
    <link>http://www.citeulike.org/user/dpollard/article/1570977</link>
    <description>&lt;i&gt;Mol Biol Evol, Vol. 24, No. 2. (February 2007), pp. 412-426.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Multigene sequence data have great potential for elucidating important and interesting evolutionary processes, but statistical methods for extracting information from such data remain limited. Although various biological processes may cause different genes to have different genealogical histories (and hence different tree topologies), we also may expect that the number of distinct topologies among a set of genes is relatively small compared with the number of possible topologies. Therefore evidence about the tree topology for one gene should influence our inferences of the tree topology on a different gene, but to what extent? In this paper, we present a new approach for modeling and estimating concordance among a set of gene trees given aligned molecular sequence data. Our approach introduces a one-parameter probability distribution to describe the prior distribution of concordance among gene trees. We describe a novel 2-stage Markov chain Monte Carlo (MCMC) method that first obtains independent Bayesian posterior probability distributions for individual genes using standard methods. These posterior distributions are then used as input for a second MCMC procedure that estimates a posterior distribution of gene-to-tree maps (GTMs). The posterior distribution of GTMs can then be summarized to provide revised posterior probability distributions for each gene (taking account of concordance) and to allow estimation of the proportion of the sampled genes for which any given clade is true (the sample-wide concordance factor). Further, under the assumption that the sampled genes are drawn randomly from a genome of known size, we show how one can obtain an estimate, with credibility intervals, on the proportion of the entire genome for which a clade is true (the genome-wide concordance factor). We demonstrate the method on a set of 106 genes from 8 yeast species.</description>
    <dc:title>Bayesian estimation of concordance among gene trees.</dc:title>

    <dc:creator>C Ané</dc:creator>
    <dc:creator>B Larget</dc:creator>
    <dc:creator>DA Baum</dc:creator>
    <dc:creator>SD Smith</dc:creator>
    <dc:creator>A Rokas</dc:creator>
    <dc:source>Mol Biol Evol, Vol. 24, No. 2. (February 2007), pp. 412-426.</dc:source>
    <dc:date>2007-08-17T00:06:25-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Mol Biol Evol</prism:publicationName>
    <prism:issn>0737-4038</prism:issn>
    <prism:volume>24</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>412</prism:startingPage>
    <prism:endingPage>426</prism:endingPage>
    <prism:category>concordance</prism:category>
    <prism:category>gene_tree</prism:category>
    <prism:category>gene_vs_species_tree</prism:category>
    <prism:category>method</prism:category>
    <prism:category>phylogeny</prism:category>
    <prism:category>speciation</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/489958">
    <title>Discovery of Regulatory Elements by a Computational Method for Phylogenetic Footprinting</title>
    <link>http://www.citeulike.org/user/dpollard/article/489958</link>
    <description>&lt;i&gt;Genome Res., Vol. 12, No. 5. (1 May 2002), pp. 739-748.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Phylogenetic footprinting is a method for the discovery of regulatory elements in a set of orthologous regulatory regions from multiple species. It does so by identifying the best conserved motifs in those orthologous regions. We describe a computer algorithm designed specifically for this purpose, making use of the phylogenetic relationships among the sequences under study to make more accurate predictions. The program is guaranteed to report all sets of motifs with the lowest parsimony scores, calculated with respect to the phylogenetic tree relating the input species. We report the results of this algorithm on several data sets of interest. A large number of known functional binding sites are identified by our method, but we also find several highly conserved motifs for which no function is yet known.</description>
    <dc:title>Discovery of Regulatory Elements by a Computational Method for Phylogenetic Footprinting</dc:title>

    <dc:creator>Mathieu Blanchette</dc:creator>
    <dc:creator>Martin Tompa</dc:creator>
    <dc:identifier>doi:10.1101/gr.6902</dc:identifier>
    <dc:source>Genome Res., Vol. 12, No. 5. (1 May 2002), pp. 739-748.</dc:source>
    <dc:date>2006-02-02T20:22:09-00:00</dc:date>
    <prism:publicationYear>2002</prism:publicationYear>
    <prism:publicationName>Genome Res.</prism:publicationName>
    <prism:volume>12</prism:volume>
    <prism:number>5</prism:number>
    <prism:startingPage>739</prism:startingPage>
    <prism:endingPage>748</prism:endingPage>
    <prism:category>ab_initio</prism:category>
    <prism:category>binding_site</prism:category>
    <prism:category>conservation</prism:category>
    <prism:category>method</prism:category>
    <prism:category>prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1818073">
    <title>Enhancing the quality of phylogenetic analysis using fuzzy hidden Markov model alignments.</title>
    <link>http://www.citeulike.org/user/dpollard/article/1818073</link>
    <description>&lt;i&gt;Medinfo, Vol. 12, No. Pt 2. (2007), pp. 1245-1249.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Any effective phylogeny inference based on molecular data begins by performing efficient multiple sequence alignments. So far, the Hidden Markov Model (HMM) method for multiple sequence alignment has been proved competitive to the classical deterministic algorithms with respect to phylogenetic analysis; nevertheless, its stochastic nature does not help it cope with the existing dependence among the sequence elements. This paper deals with phylogenetic analysis of protein and gene data using multiple sequence alignments produced by fuzzy profile Hidden Markov Models. Fuzzy profile HMMs are a novel type of profile HMMs based on fuzzy sets and fuzzy integrals, which generalize the classical stochastic HMM by relaxing its independence assumptions. In this paper, alignments produced by the fuzzy HMM model are used in phylogenetic analysis of protein data, enhancing the quality of phylogenetic trees. The new methodology is implemented in HPV virus phylogenetic inference. The results of the analysis are compared against those obtained by the classical profile HMM model and depict the superiority of the fuzzy profile HMM in this field.</description>
    <dc:title>Enhancing the quality of phylogenetic analysis using fuzzy hidden Markov model alignments.</dc:title>

    <dc:creator>C Collyda</dc:creator>
    <dc:creator>S Diplaris</dc:creator>
    <dc:creator>P Mitkas</dc:creator>
    <dc:creator>N Maglaveras</dc:creator>
    <dc:creator>C Pappas</dc:creator>
    <dc:source>Medinfo, Vol. 12, No. Pt 2. (2007), pp. 1245-1249.</dc:source>
    <dc:date>2007-10-25T01:22:43-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Medinfo</prism:publicationName>
    <prism:volume>12</prism:volume>
    <prism:number>Pt 2</prism:number>
    <prism:startingPage>1245</prism:startingPage>
    <prism:endingPage>1249</prism:endingPage>
    <prism:category>alignment</prism:category>
    <prism:category>alignment_accuracy</prism:category>
    <prism:category>method</prism:category>
    <prism:category>phylogeny</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1746568">
    <title>Incorporating evolution of transcription factor binding sites into annotated alignments.</title>
    <link>http://www.citeulike.org/user/dpollard/article/1746568</link>
    <description>&lt;i&gt;J Biosci, Vol. 32, No. 5. (August 2007), pp. 841-850.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Identifying transcription factor binding sites (TFBSs) is essential to elucidate putative regulatory mechanisms.A common strategy is to combine cross-species conservation with single sequence TFBS annotation to yield &#34;conserved TFBSs&#34;. Most current methods in this field adopt a multi-step ap proach that segregates the two aspects.Again,it is widely accepted that the evolutionary dynamics of binding sites differ from those of the surrounding sequence. Hence, it is desirable to have an approach that explicitly takes this factor into account.Although a plethora of approaches have been proposed for the prediction of conserved TFBSs,very few explicitly model TFBS evolutionary properties, while additionally being multi-step. Recently, we introduced a novel approach to simultaneously align and annotate conserved TFBSs in a pair of sequences.Building upon the standard Smith-Waterman algorithm for local alignments, SimAnn introduces additional states for profiles to output extended alignments or annotated alignments.That is, alignments with parts annotated as gap lessly aligned TFBSs (pair-profile hits)are generated.Moreover,the pair- profile related parameters are derived in a sound statistical framework. In this article,we extend this approach to explicitly incorporate evolution of binding sites in the SimAnn framework.We demonstrate the extension in the theoretical derivations through two position-specific evolutionary models,previously used for modelling TFBS evolution.In a simulated setting,we provide a proof of concept that the approach works given the underlying assumptions,as compared to the original work.Finally,using a real dataset of experimentally verified binding sites in human-mouse sequence pairs,we compare the new approach (eSimAnn) to an existing multi-step tool that also considers TFBS evolution. Although it is widely accepted that binding sites evolve differently from the surrounding sequences,most comparative TFBS identification methods do not explicitly consider this.Additionally, prediction of conserved binding sites is carried out in a multi-step approach that segregates alignment from TFBS annotation. In this paper, we demonstrate how the simultaneous alignment and annotation approach of SimAnn can be further extended to incorporate TFBS evolutionary relationships.We study how alignments and binding site predictions interplay at varying evolutionary distances and for various profile qualities.</description>
    <dc:title>Incorporating evolution of transcription factor binding sites into annotated alignments.</dc:title>

    <dc:creator>AS Bais</dc:creator>
    <dc:creator>S Grossmann</dc:creator>
    <dc:creator>M Vingron</dc:creator>
    <dc:source>J Biosci, Vol. 32, No. 5. (August 2007), pp. 841-850.</dc:source>
    <dc:date>2007-10-09T17:18:49-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>J Biosci</prism:publicationName>
    <prism:issn>0250-5991</prism:issn>
    <prism:volume>32</prism:volume>
    <prism:number>5</prism:number>
    <prism:startingPage>841</prism:startingPage>
    <prism:endingPage>850</prism:endingPage>
    <prism:category>alignment</prism:category>
    <prism:category>binding_site</prism:category>
    <prism:category>conserved</prism:category>
    <prism:category>method</prism:category>
    <prism:category>prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1390802">
    <title>Systematic detection of statistically overrepresented DNA motif association rules.</title>
    <link>http://www.citeulike.org/user/dpollard/article/1390802</link>
    <description>&lt;i&gt;Genome Inform, Vol. 17, No. 1. (2006), pp. 124-133.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;DNA motifs, or cis-elements, are short nucleotide sequence patterns recognized by various transcription factors (TFs). In promoters, these TFs bind in a complex combinatorial manner in order to regulate the expression of a downstream gene. The combinatorial space is frequently large and difficult to manage since vertebrates have thousands of transcription factors and more than 20,000 genes. We introduce a computer program called CAYCE (Combinatorial AnalYsis of Cis-Elements) that systematically detects statistically overrepresented DNA motif association rules independent of Microarray information. CAYCE is an adaptation of the apriori algorithm traditionally used for association rule mining, but offers three significant advancements. (1) It analyzes multiple occurrences of an item, corresponding to multiple TF binding sites, (2) It compares results with a biologically relevant background, and (3), it provides p-values for straightforward statistical interpretation. CAYCE can be easily applied to any item-set data where the investigator is also interested in multiple occurrences of a single item, and/or overrepresentation of association rules compared with a background. Applying CAYCE to human promoters in 1% of the human genome, we discover that motif clusters containing five repetitions of SP1 are the most statistically significant.</description>
    <dc:title>Systematic detection of statistically overrepresented DNA motif association rules.</dc:title>

    <dc:creator>JM Lin</dc:creator>
    <dc:creator>Z Weng</dc:creator>
    <dc:source>Genome Inform, Vol. 17, No. 1. (2006), pp. 124-133.</dc:source>
    <dc:date>2007-06-14T23:03:18-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Genome Inform</prism:publicationName>
    <prism:issn>0919-9454</prism:issn>
    <prism:volume>17</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>124</prism:startingPage>
    <prism:endingPage>133</prism:endingPage>
    <prism:category>ab_initio</prism:category>
    <prism:category>binding_site</prism:category>
    <prism:category>eisen_journal_club</prism:category>
    <prism:category>method</prism:category>
    <prism:category>motif_detection</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>round_robin</prism:category>
    <prism:category>rules</prism:category>
    <prism:category>venky_presented</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1390791">
    <title>Computational and experimental approaches for modeling gene regulatory networks.</title>
    <link>http://www.citeulike.org/user/dpollard/article/1390791</link>
    <description>&lt;i&gt;Curr Pharm Des, Vol. 13, No. 14. (2007), pp. 1415-1436.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;To understand most cellular processes, one must understand how genetic information is processed. A formidable challenge is the dissection of gene regulatory networks to delineate how eukaryotic cells coordinate and govern patterns of gene expression that ultimately lead to a phenotype. In this paper, we review several approaches for modeling eukaryotic gene regulatory networks and for reverse engineering such networks from experimental observations. Since we are interested in elucidating the transcriptional regulatory mechanisms of colon cancer progression, we use this important biological problem to illustrate various aspects of modeling gene regulation. We discuss four important models: gene networks, transcriptional regulatory systems, Boolean networks, and dynamical Bayesian networks. We review state-of-the-art functional genomics techniques, such as gene expression profiling, cis-regulatory element identification, TF target gene identification, and gene silencing by RNA interference, which can be used to extract information about gene regulation. We can employ this information, in conjunction with appropriately designed reverse engineering algorithms, to construct a computational model of gene regulation that sufficiently predicts experimental observations. In the last part of this review, we focus on the problem of reverse engineering transcriptional regulatory networks by gene perturbations. We mathematically formulate this problem and discuss the role of experimental resolution in our ability to reconstruct accurate models of gene regulation. We conclude, by discussing a promising approach for inferring a transcriptional regulatory system from microarray data obtained by gene perturbations.</description>
    <dc:title>Computational and experimental approaches for modeling gene regulatory networks.</dc:title>

    <dc:creator>J Goutsias</dc:creator>
    <dc:creator>NH Lee</dc:creator>
    <dc:source>Curr Pharm Des, Vol. 13, No. 14. (2007), pp. 1415-1436.</dc:source>
    <dc:date>2007-06-14T22:48:01-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Curr Pharm Des</prism:publicationName>
    <prism:issn>1873-4286</prism:issn>
    <prism:volume>13</prism:volume>
    <prism:number>14</prism:number>
    <prism:startingPage>1415</prism:startingPage>
    <prism:endingPage>1436</prism:endingPage>
    <prism:category>computational</prism:category>
    <prism:category>experimental</prism:category>
    <prism:category>method</prism:category>
    <prism:category>modeling</prism:category>
    <prism:category>regulatory_network</prism:category>
    <prism:category>review</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1290009">
    <title>Binding Site Graphs: A New Graph Theoretical Framework for Prediction of Transcription Factor Binding Sites</title>
    <link>http://www.citeulike.org/user/dpollard/article/1290009</link>
    <description>&lt;i&gt;PLoS Computational Biology, Vol. 3, No. 5. (1 May 2007), e90.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Computational prediction of nucleotide binding specificity for transcription factors remains a fundamental and largely unsolved problem. Determination of binding positions is a prerequisite for research in gene regulation, a major mechanism controlling phenotypic diversity. Furthermore, an accurate determination of binding specificities from high-throughput data sources is necessary to realize the full potential of systems biology. Unfortunately, recently performed independent evaluation showed that more than half the predictions from most widely used algorithms are false. We introduce a graph-theoretical framework to describe local sequence similarity as the pair-wise distances between nucleotides in promoter sequences, and hypothesize that densely connected subgraphs are indicative of transcription factor binding sites. Using a well-established sampling algorithm coupled with simple clustering and scoring schemes, we identify sets of closely related nucleotides and test those for known TF binding activity. Using an independent benchmark, we find our algorithm predicts yeast binding motifs considerably better than currently available techniques and without manual curation. Importantly, we reduce the number of false positive predictions in yeast to less than 30&#37;. We also develop a framework to evaluate the statistical significance of our motif predictions. We show that our approach is robust to the choice of input promoters, and thus can be used in the context of predicting binding positions from noisy experimental data. We apply our method to identify binding sites using data from genome scale ChIP&#8211;chip experiments. Results from these experiments are publicly available at http://cagt10.bu.edu/BSG. The graphical framework developed here may be useful when combining predictions from numerous computational and experimental measures. Finally, we discuss how our algorithm can be used to improve the sensitivity of computational predictions of transcription factor binding specificities.</description>
    <dc:title>Binding Site Graphs: A New Graph Theoretical Framework for Prediction of Transcription Factor Binding Sites</dc:title>

    <dc:creator>Timothy Reddy</dc:creator>
    <dc:creator>Charles Delisi</dc:creator>
    <dc:creator>Boris Shakhnovich</dc:creator>
    <dc:identifier>doi:10.1371/journal.pcbi.0030090</dc:identifier>
    <dc:source>PLoS Computational Biology, Vol. 3, No. 5. (1 May 2007), e90.</dc:source>
    <dc:date>2007-05-11T14:56:35-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>PLoS Computational Biology</prism:publicationName>
    <prism:volume>3</prism:volume>
    <prism:number>5</prism:number>
    <prism:startingPage>e90</prism:startingPage>
    <prism:category>ab_initio</prism:category>
    <prism:category>binding_site</prism:category>
    <prism:category>eisen_journal_club</prism:category>
    <prism:category>method</prism:category>
    <prism:category>motif</prism:category>
    <prism:category>round_robin</prism:category>
    <prism:category>venky_presented</prism:category>
    <prism:category>yeast</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/826186">
    <title>Determination of local statistical significance of patterns in Markov sequences with application to promoter element identification.</title>
    <link>http://www.citeulike.org/user/dpollard/article/826186</link>
    <description>&lt;i&gt;J Comput Biol, Vol. 11, No. 1. (2004), pp. 1-14.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;High-level eukaryotic genomes present a particular challenge to the computational identification of transcription factor binding sites (TFBSs) because of their long noncoding regions and large numbers of repeat elements. This is evidenced by the noisy results generated by most current methods. In this paper, we present a p-value-based scoring scheme using probability generating functions to evaluate the statistical significance of potential TFBSs. Furthermore, we introduce the local genomic context into the model so that candidate sites are evaluated based both on their similarities to known binding sites and on their contrasts against their respective local genomic contexts. We demonstrate that our approach is advantageous in the prediction of myogenin and MEF2 binding sites in the human genome. We also apply LMM to large-scale human binding site sequences in situ and found that, compared to current popular methods, LMM analysis can reduce false positive errors by more than 50% without compromising sensitivity. This improvement will be of importance to any subsequent algorithm that aims to detect regulatory modules based on known PSSMs.</description>
    <dc:title>Determination of local statistical significance of patterns in Markov sequences with application to promoter element identification.</dc:title>

    <dc:creator>H Huang</dc:creator>
    <dc:creator>MC Kao</dc:creator>
    <dc:creator>X Zhou</dc:creator>
    <dc:creator>JS Liu</dc:creator>
    <dc:creator>WH Wong</dc:creator>
    <dc:identifier>doi:10.1089/106652704773416858</dc:identifier>
    <dc:source>J Comput Biol, Vol. 11, No. 1. (2004), pp. 1-14.</dc:source>
    <dc:date>2006-09-03T04:34:23-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>J Comput Biol</prism:publicationName>
    <prism:issn>1066-5277</prism:issn>
    <prism:volume>11</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>1</prism:startingPage>
    <prism:endingPage>14</prism:endingPage>
    <prism:category>ab_initio</prism:category>
    <prism:category>binding_site</prism:category>
    <prism:category>eisen_journal_club</prism:category>
    <prism:category>markov</prism:category>
    <prism:category>method</prism:category>
    <prism:category>motif</prism:category>
    <prism:category>round_robin</prism:category>
    <prism:category>stuart_presented</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1352994">
    <title>Localizing Recent Adaptive Evolution in the Human Genome</title>
    <link>http://www.citeulike.org/user/dpollard/article/1352994</link>
    <description>&lt;i&gt;PLoS Genetics, Vol. 3, No. 6. (1 June 2007), e90.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Identifying genomic locations that have experienced selective sweeps is an important first step toward understanding the molecular basis of adaptive evolution. Using statistical methods that account for the confounding effects of population demography, recombination rate variation, and single-nucleotide polymorphism ascertainment, while also providing fine-scale estimates of the position of the selected site, we analyzed a genomic dataset of 1.2 million human single-nucleotide polymorphisms genotyped in African-American, European-American, and Chinese samples. We identify 101 regions of the human genome with very strong evidence (p &#60; 10&#8722;5) of a recent selective sweep and where our estimate of the position of the selective sweep falls within 100 kb of a known gene. Within these regions, genes of biological interest include genes in pigmentation pathways, components of the dystrophin protein complex, clusters of olfactory receptors, genes involved in nervous system development and function, immune system genes, and heat shock genes. We also observe consistent evidence of selective sweeps in centromeric regions. In general, we find that recent adaptation is strikingly pervasive in the human genome, with as much as 10&#37; of the genome affected by linkage to a selective sweep.</description>
    <dc:title>Localizing Recent Adaptive Evolution in the Human Genome</dc:title>

    <dc:creator>Scott Williamson</dc:creator>
    <dc:creator>Melissa Hubisz</dc:creator>
    <dc:creator>Andrew Clark</dc:creator>
    <dc:creator>Bret Payseur</dc:creator>
    <dc:creator>Carlos Bustamante</dc:creator>
    <dc:creator>Rasmus Nielsen</dc:creator>
    <dc:identifier>doi:10.1371/journal.pgen.0030090</dc:identifier>
    <dc:source>PLoS Genetics, Vol. 3, No. 6. (1 June 2007), e90.</dc:source>
    <dc:date>2007-05-31T22:21:33-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>PLoS Genetics</prism:publicationName>
    <prism:volume>3</prism:volume>
    <prism:number>6</prism:number>
    <prism:startingPage>e90</prism:startingPage>
    <prism:category>adaptive_evolution</prism:category>
    <prism:category>dan_presented</prism:category>
    <prism:category>eisen_journal_club</prism:category>
    <prism:category>human</prism:category>
    <prism:category>method</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>round_robin</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1295925">
    <title>Binding Site Graphs: A New Graph Theoretical Framework for Prediction of Transcription Factor Binding Sites</title>
    <link>http://www.citeulike.org/user/dpollard/article/1295925</link>
    <description>&lt;i&gt;PLoS Computational Biology, Vol. 3, No. 5. (1 May 2007), e90.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Computational prediction of nucleotide binding specificity for transcription factors remains a fundamental and largely unsolved problem. Determination of binding positions is a prerequisite for research in gene regulation, a major mechanism controlling phenotypic diversity. Furthermore, an accurate determination of binding specificities from high-throughput data sources is necessary to realize the full potential of systems biology. Unfortunately, recently performed independent evaluation showed that more than half the predictions from most widely used algorithms are false. We introduce a graph-theoretical framework to describe local sequence similarity as the pair-wise distances between nucleotides in promoter sequences, and hypothesize that densely connected subgraphs are indicative of transcription factor binding sites. Using a well-established sampling algorithm coupled with simple clustering and scoring schemes, we identify sets of closely related nucleotides and test those for known TF binding activity. Using an independent benchmark, we find our algorithm predicts yeast binding motifs considerably better than currently available techniques and without manual curation. Importantly, we reduce the number of false positive predictions in yeast to less than 30&#37;. We also develop a framework to evaluate the statistical significance of our motif predictions. We show that our approach is robust to the choice of input promoters, and thus can be used in the context of predicting binding positions from noisy experimental data. We apply our method to identify binding sites using data from genome scale ChIP&#8211;chip experiments. Results from these experiments are publicly available at http://cagt10.bu.edu/BSG. The graphical framework developed here may be useful when combining predictions from numerous computational and experimental measures. Finally, we discuss how our algorithm can be used to improve the sensitivity of computational predictions of transcription factor binding specificities.</description>
    <dc:title>Binding Site Graphs: A New Graph Theoretical Framework for Prediction of Transcription Factor Binding Sites</dc:title>

    <dc:creator>Timothy Reddy</dc:creator>
    <dc:creator>Charles Delisi</dc:creator>
    <dc:creator>Boris Shakhnovich</dc:creator>
    <dc:identifier>doi:10.1371/journal.pcbi.0030090</dc:identifier>
    <dc:source>PLoS Computational Biology, Vol. 3, No. 5. (1 May 2007), e90.</dc:source>
    <dc:date>2007-05-14T21:23:11-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>PLoS Computational Biology</prism:publicationName>
    <prism:volume>3</prism:volume>
    <prism:number>5</prism:number>
    <prism:startingPage>e90</prism:startingPage>
    <prism:category>cis_regulatory_elements</prism:category>
    <prism:category>method</prism:category>
    <prism:category>prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1352944">
    <title>Imputation-based analysis of association studies: candidate regions and quantitative traits</title>
    <link>http://www.citeulike.org/user/dpollard/article/1352944</link>
    <description>&lt;i&gt;PLoS Genetics, Vol. preprint, No. 2007. (1 May 2007), e114.eor.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We introduce a new statistical approach to the analysis of association studies. The approach is particularly designed to exploit the availability of dense SNP data on a &#34;panel&#34; of unrelated individuals who are not part of the study group (eg HapMap data, or resequencing data in a candidate region of interest), to allow markers that are not typed in the study group to be tested for association with the phenotype. Specifically, we use patterns of Linkage Disequilibrium in the dense marker set in the panel, together with genotype data on a less dense marker set in the study group, to estimate the genotype data in the study group at the dense set of markers, and then assess association between the phenotype and these estimated genotypes, using Bayesian methods to allow for uncertainty in the estimated genotypes. Compared with standard single-SNP tests, the approach results in an increase in power to detect association, even in cases where the causal variant is typed, with the greatest gain being when multiple functional variants are present in the region of study. In addition to this increase in power to detect association, the approach also provides more interpretable explanations for observed associations, including assessing, for each SNP, the strength of the evidence that it (rather than another correlated SNP) has a functional effect on the phenotype. Although our paper focuses on the situation where a quantitative phenotype is assessed for association with SNPs in a relatively restricted region (eg a candidate gene), many of the methods we describe are applicable, and computationally practical, for whole genome association studies.</description>
    <dc:title>Imputation-based analysis of association studies: candidate regions and quantitative traits</dc:title>

    <dc:creator>Bertrand Servin</dc:creator>
    <dc:creator>Matthew Stephens</dc:creator>
    <dc:identifier>doi:10.1371/journal.pgen.0030114.eor</dc:identifier>
    <dc:source>PLoS Genetics, Vol. preprint, No. 2007. (1 May 2007), e114.eor.</dc:source>
    <dc:date>2007-05-31T21:44:42-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>PLoS Genetics</prism:publicationName>
    <prism:volume>preprint</prism:volume>
    <prism:number>2007</prism:number>
    <prism:startingPage>e114.eor</prism:startingPage>
    <prism:category>association_mapping</prism:category>
    <prism:category>method</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1320184">
    <title>SPACER: identification of cis-regulatory elements with non-contiguous critical residues.</title>
    <link>http://www.citeulike.org/user/dpollard/article/1320184</link>
    <description>&lt;i&gt;Bioinformatics, Vol. 23, No. 8. (15 April 2007), pp. 1029-1031.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;MOTIVATION: Many transcription factors bind to sites that are long and loosely related to each other. De novo identification of such motifs is computationally challenging. In this article, we propose a novel semi-greedy algorithm over the space of all IUPAC degenerate strings to identify the most over-represented highly degenerate motifs. RESULTS: We present an implementation of this algorithm, named SPACER (Separated Pattern-based Algorithm for cis-Element Recognition) and demonstrate its effectiveness in identifying 'gapped' and highly degenerate motifs. We compare SPACER's performance against ten motif finders on 42 experimentally defined regulons from Bacillus subtilis, Escherichia coli and Saccharomyces cerevisiae. These motif finders cover a wide range of both enumerative and statistical approaches, including programs specifically designed for prokaryotic and 'gapped' motifs. AVAILABILITY: A Java 1.4 implementation is freely available on the Web at http://genie.Dartmouth.edu/SPACER/</description>
    <dc:title>SPACER: identification of cis-regulatory elements with non-contiguous critical residues.</dc:title>

    <dc:creator>A Chakravarty</dc:creator>
    <dc:creator>JM Carlson</dc:creator>
    <dc:creator>RS Khetani</dc:creator>
    <dc:creator>CE DeZiel</dc:creator>
    <dc:creator>RH Gross</dc:creator>
    <dc:source>Bioinformatics, Vol. 23, No. 8. (15 April 2007), pp. 1029-1031.</dc:source>
    <dc:date>2007-05-23T00:09:39-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Bioinformatics</prism:publicationName>
    <prism:issn>1460-2059</prism:issn>
    <prism:volume>23</prism:volume>
    <prism:number>8</prism:number>
    <prism:startingPage>1029</prism:startingPage>
    <prism:endingPage>1031</prism:endingPage>
    <prism:category>binding_site</prism:category>
    <prism:category>cis_regulatory_elements</prism:category>
    <prism:category>clustering</prism:category>
    <prism:category>method</prism:category>
    <prism:category>prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1320157">
    <title>STAMP: a web tool for exploring DNA-binding motif similarities.</title>
    <link>http://www.citeulike.org/user/dpollard/article/1320157</link>
    <description>&lt;i&gt;Nucleic Acids Res (3 May 2007)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;STAMP is a newly developed web server that is designed to support the study of DNA-binding motifs. STAMP may be used to query motifs against databases of known motifs; the software aligns input motifs against the chosen database (or alternatively against a user-provided dataset), and lists of the highest-scoring matches are returned. Such similarity-search functionality is expected to facilitate the identification of transcription factors that potentially interact with newly discovered motifs. STAMP also automatically builds multiple alignments, familial binding profiles and similarity trees when more than one motif is inputted. These functions are expected to enable evolutionary studies on sets of related motifs and fixed-order regulatory modules, as well as illustrating similarities and redundancies within the input motif collection. STAMP is a highly flexible alignment platform, allowing users to 'mix-and-match' between various implemented comparison metrics, alignment methods (local or global, gapped or ungapped), multiple alignment strategies and tree-building methods. Motifs may be inputted as frequency matrices (in many of the commonly used formats), consensus sequences, or alignments of known binding sites. STAMP also directly accepts the output files from 12 supported motif-finders, enabling quick interpretation of motif-discovery analyses. STAMP is available at http://www.benoslab.pitt.edu/stamp.</description>
    <dc:title>STAMP: a web tool for exploring DNA-binding motif similarities.</dc:title>

    <dc:creator>Shaun Mahony</dc:creator>
    <dc:creator>Panayiotis V Benos</dc:creator>
    <dc:source>Nucleic Acids Res (3 May 2007)</dc:source>
    <dc:date>2007-05-22T23:40:05-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Nucleic Acids Res</prism:publicationName>
    <prism:issn>1362-4962</prism:issn>
    <prism:category>alignment</prism:category>
    <prism:category>method</prism:category>
    <prism:category>motif</prism:category>
    <prism:category>pwm</prism:category>
    <prism:category>website</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1320137">
    <title>A phylogenetic Gibbs sampler that yields centroid solutions for cis regulatory site prediction.</title>
    <link>http://www.citeulike.org/user/dpollard/article/1320137</link>
    <description>&lt;i&gt;Bioinformatics (8 May 2007)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;MOTIVATION: Identification of functionally conserved regulatory elements in sequence data from closely related organisms is becoming feasible, due to the rapid growth of public sequence databases. Closely related organisms are most likely to have common regulatory motifs; however, the recent speciation of such organisms results in the high degree of correlation in their genome sequences, confounding the detection of functional elements. Additionally, alignment algorithms that use optimization techniques are limited to the detection of a single alignment that may not be representative. Comparative-genomics studies must be able to address the phylogenetic correlation in the data and efficiently explore the alignment space, in order to make specific and biologically relevant predictions. RESULTS: We describe here a Gibbs sampler that employs a full phylogenetic model and reports an ensemble centroid solution. We describe regulatory motif detection using both simulated and real data, and demonstrate that this approach achieves improved specificity, sensitivity, and positive predictive value over non-phylogenetic algorithms, and over phylogenetic algorithms that report a maximum likelihood solution. AVAILABILITY: The software is freely available at http://bayesweb.wadsworth.org/gibbs/gibbs.html.</description>
    <dc:title>A phylogenetic Gibbs sampler that yields centroid solutions for cis regulatory site prediction.</dc:title>

    <dc:creator>Lee A Newberg</dc:creator>
    <dc:creator>William A Thompson</dc:creator>
    <dc:creator>Sean Conlan</dc:creator>
    <dc:creator>Thomas M Smith</dc:creator>
    <dc:creator>Lee Ann McCue</dc:creator>
    <dc:creator>Charles E Lawrence</dc:creator>
    <dc:identifier>doi:10.1093/bioinformatics/btm241</dc:identifier>
    <dc:source>Bioinformatics (8 May 2007)</dc:source>
    <dc:date>2007-05-22T23:27:43-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Bioinformatics</prism:publicationName>
    <prism:issn>1460-2059</prism:issn>
    <prism:category>cis_regulatory_elements</prism:category>
    <prism:category>clustering</prism:category>
    <prism:category>discovery</prism:category>
    <prism:category>method</prism:category>
    <prism:category>motif</prism:category>
    <prism:category>prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1320133">
    <title>RE-MuSiC: a tool for multiple sequence alignment with regular expression constraints.</title>
    <link>http://www.citeulike.org/user/dpollard/article/1320133</link>
    <description>&lt;i&gt;Nucleic Acids Res (8 May 2007)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;RE-MuSiC is a web-based multiple sequence alignment tool that can incorporate biological knowledge about structure, function, or conserved patterns regarding the sequences of interest. It accepts amino acid or nucleic acid sequences and a set of constraints as inputs. The constraints are pattern descriptions, instead of exact positions of fragments to be aligned together. The output is an alignment where for each pattern (constraint), an occurrence on each sequence can be found aligned together with those on the other sequences, in a manner that the overall alignment is optimized. Its predecessor, MuSiC, has been found useful by researchers since its release in 2004. However, it is noticed in applications that the pattern formulation adopted in MuSiC, namely, plain strings allowing mismatches, is not expressive and flexible enough. The constraint formulation adopted in RE-MuSiC is therefore enhanced to be regular expressions, which is convenient in expressing many biologically significant patterns like those collected in the PROSITE database, or structural consensuses that often involve variable ranges between conserved parts. Experiments demonstrate that RE-MuSiC can be used to help predict important residues and locate phylogenetically conserved structural elements. RE-MuSiC is available on-line at http://140.113.239.131/RE-MUSIC.</description>
    <dc:title>RE-MuSiC: a tool for multiple sequence alignment with regular expression constraints.</dc:title>

    <dc:creator>Yun-Sheng Chung</dc:creator>
    <dc:creator>Wei-Hsun Lee</dc:creator>
    <dc:creator>Chuan Yi Tang</dc:creator>
    <dc:creator>Chin Lung Lu</dc:creator>
    <dc:source>Nucleic Acids Res (8 May 2007)</dc:source>
    <dc:date>2007-05-22T23:24:51-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Nucleic Acids Res</prism:publicationName>
    <prism:issn>1362-4962</prism:issn>
    <prism:category>method</prism:category>
    <prism:category>multiple_alignment</prism:category>
    <prism:category>regular_expressions</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1218043">
    <title>Discovering transcriptional regulatory regions in Drosophila by a nonalignment method for phylogenetic footprinting.</title>
    <link>http://www.citeulike.org/user/dpollard/article/1218043</link>
    <description>&lt;i&gt;Proc Natl Acad Sci U S A (29 March 2007)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The functional annotation of the nonprotein-coding DNA of eukaryotic genomes is a problem of central importance. Phylogenetic footprinting methods, which attempt to identify functional regulatory regions by comparing orthologous genomic sequences of evolutionarily related species, have shown promising results. The main advantage of this class of approaches is that they do not require any knowledge of the regulating transcription factors. Here we describe a method called Enhancer Detection using only Genomic Information (EDGI), which integrates a traditional motif-discovery algorithm with a local permutation-clustering algorithm. Together, they can identify large regulatory elements (e.g., enhancers) as evolutionarily conserved order-independent clusters of short conserved motifs. We show that EDGI can distinguish between established sets of known enhancers and nonenhancers with 88% accuracy, rivaling predictions by methods that rely on the knowledge of the regulating transcription factors and their DNA-binding specificities. We tested EDGI's performance on a set of Drosophila genomes. Our results demonstrate that comparative genomic analysis of multiple closely related species has substantial power to identify key functional elements without additional biological knowledge.</description>
    <dc:title>Discovering transcriptional regulatory regions in Drosophila by a nonalignment method for phylogenetic footprinting.</dc:title>

    <dc:creator>Alona Sosinsky</dc:creator>
    <dc:creator>Barry Honig</dc:creator>
    <dc:creator>Richard S Mann</dc:creator>
    <dc:creator>Andrea Califano</dc:creator>
    <dc:identifier>doi:10.1073/pnas.0701614104</dc:identifier>
    <dc:source>Proc Natl Acad Sci U S A (29 March 2007)</dc:source>
    <dc:date>2007-04-09T16:31:01-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Proc Natl Acad Sci U S A</prism:publicationName>
    <prism:issn>0027-8424</prism:issn>
    <prism:category>clustering</prism:category>
    <prism:category>comparative</prism:category>
    <prism:category>drosophila</prism:category>
    <prism:category>enhancer</prism:category>
    <prism:category>method</prism:category>
    <prism:category>prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1218035">
    <title>ChIP-on-chip protocol for genome-wide analysis of transcription factor binding in Drosophila melanogaster embryos.</title>
    <link>http://www.citeulike.org/user/dpollard/article/1218035</link>
    <description>&lt;i&gt;Nat Protoc, Vol. 1, No. 6. (2006), pp. 2839-2855.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;This protocol describes a method to detect in vivo associations between proteins and DNA in developing Drosophila embryos. It combines formaldehyde crosslinking and immunoprecipitation of protein-bound sequences with genome-wide analysis using microarrays. After crosslinking, nuclei are enriched using differential centrifugation and the chromatin is sheared by sonication. Antibodies specifically recognizing wild-type protein or, alternatively, a genetically encoded epitope tag are used to enrich for specifically bound DNA sequences. After purification and polymerase chain reaction-based amplification, the samples are fluorescently labeled and hybridized to genomic tiling microarrays. This protocol has been successfully used to study different tissue-specific transcription factors, and is generally applicable to in vivo analysis of any DNA-binding proteins in Drosophila embryos. The full protocol, including the collection of embryos and the collection of raw microarray data, can be completed within 10 days.</description>
    <dc:title>ChIP-on-chip protocol for genome-wide analysis of transcription factor binding in Drosophila melanogaster embryos.</dc:title>

    <dc:creator>T Sandmann</dc:creator>
    <dc:creator>JS Jakobsen</dc:creator>
    <dc:creator>EE Furlong</dc:creator>
    <dc:identifier>doi:10.1038/nprot.2006.383</dc:identifier>
    <dc:source>Nat Protoc, Vol. 1, No. 6. (2006), pp. 2839-2855.</dc:source>
    <dc:date>2007-04-09T16:23:29-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Nat Protoc</prism:publicationName>
    <prism:issn>1750-2799</prism:issn>
    <prism:volume>1</prism:volume>
    <prism:number>6</prism:number>
    <prism:startingPage>2839</prism:startingPage>
    <prism:endingPage>2855</prism:endingPage>
    <prism:category>binding</prism:category>
    <prism:category>chip_chip</prism:category>
    <prism:category>drosophila</prism:category>
    <prism:category>method</prism:category>
    <prism:category>transcription_factor</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1182082">
    <title>In vitro analysis of DNA-protein interactions by proximity ligation.</title>
    <link>http://www.citeulike.org/user/dpollard/article/1182082</link>
    <description>&lt;i&gt;Proc Natl Acad Sci U S A, Vol. 104, No. 9. (27 February 2007), pp. 3067-3072.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Protein-binding DNA sequence elements encode a variety of regulated functions of genomes. Information about such elements is currently in a state of rapid growth, but improved methods are required to characterize the sequence specificity of DNA-binding proteins. We have established an in vitro method for specific and sensitive solution-phase analysis of interactions between proteins and nucleic acids in nuclear extracts, based on the proximity ligation assay. The reagent consumption is very low, and the excellent sensitivity of the assay enables analysis of as few as 1-10 cells. We show that our results are highly reproducible, quantitative, and in good agreement with both EMSA and predictions obtained by using a motif finding software. This assay can be a valuable tool to characterize in-depth the sequence specificity of DNA-binding proteins and to evaluate effects of polymorphisms in known transcription factor binding sites.</description>
    <dc:title>In vitro analysis of DNA-protein interactions by proximity ligation.</dc:title>

    <dc:creator>SM Gustafsdottir</dc:creator>
    <dc:creator>J Schlingemann</dc:creator>
    <dc:creator>A Rada-Iglesias</dc:creator>
    <dc:creator>E Schallmeiner</dc:creator>
    <dc:creator>M Kamali-Moghaddam</dc:creator>
    <dc:creator>C Wadelius</dc:creator>
    <dc:creator>U Landegren</dc:creator>
    <dc:identifier>doi:10.1073/pnas.0611229104</dc:identifier>
    <dc:source>Proc Natl Acad Sci U S A, Vol. 104, No. 9. (27 February 2007), pp. 3067-3072.</dc:source>
    <dc:date>2007-03-23T19:27:29-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Proc Natl Acad Sci U S A</prism:publicationName>
    <prism:issn>0027-8424</prism:issn>
    <prism:volume>104</prism:volume>
    <prism:number>9</prism:number>
    <prism:startingPage>3067</prism:startingPage>
    <prism:endingPage>3072</prism:endingPage>
    <prism:category>binding</prism:category>
    <prism:category>colin_presented</prism:category>
    <prism:category>eisen_journal_club</prism:category>
    <prism:category>method</prism:category>
    <prism:category>round_robin</prism:category>
    <prism:category>transcription_factor</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1181240">
    <title>Global Discriminative Learning for Higher-Accuracy Computational Gene Prediction</title>
    <link>http://www.citeulike.org/user/dpollard/article/1181240</link>
    <description>&lt;i&gt;PLoS Computational Biology, Vol. 3, No. 3. (1 March 2007), e54.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Most ab initio gene predictors use a probabilistic sequence model, typically a hidden Markov model, to combine separately trained models of genomic signals and content. By combining separate models of relevant genomic features, such gene predictors can exploit small training sets and incomplete annotations, and can be trained fairly efficiently. However, that type of piecewise training does not optimize prediction accuracy and has difficulty in accounting for statistical dependencies among different parts of the gene model. With genomic information being created at an ever-increasing rate, it is worth investigating alternative approaches in which many different types of genomic evidence, with complex statistical dependencies, can be integrated by discriminative learning to maximize annotation accuracy. Among discriminative learning methods, large-margin classifiers have become prominent because of the success of support vector machines (SVM) in many classification tasks. We describe CRAIG, a new program for ab initio gene prediction based on a conditional random field model with semi-Markov structure that is trained with an online large-margin algorithm related to multiclass SVMs. Our experiments on benchmark vertebrate datasets and on regions from the ENCODE project show significant improvements in prediction accuracy over published gene predictors that use intrinsic features only, particularly at the gene level and on genes with long introns.</description>
    <dc:title>Global Discriminative Learning for Higher-Accuracy Computational Gene Prediction</dc:title>

    <dc:creator>Axel Bernal</dc:creator>
    <dc:creator>Koby Crammer</dc:creator>
    <dc:creator>Artemis Hatzigeorgiou</dc:creator>
    <dc:creator>Fernando Pereira</dc:creator>
    <dc:identifier>doi:10.1371/journal.pcbi.0030054</dc:identifier>
    <dc:source>PLoS Computational Biology, Vol. 3, No. 3. (1 March 2007), e54.</dc:source>
    <dc:date>2007-03-23T00:32:35-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>PLoS Computational Biology</prism:publicationName>
    <prism:volume>3</prism:volume>
    <prism:number>3</prism:number>
    <prism:startingPage>e54</prism:startingPage>
    <prism:category>ab_initio</prism:category>
    <prism:category>gene_prediction</prism:category>
    <prism:category>method</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1181222">
    <title>Multiple Hypothesis Testing to Detect Lineages Under Positive Selection that Affects Only a Few Sites.</title>
    <link>http://www.citeulike.org/user/dpollard/article/1181222</link>
    <description>&lt;i&gt;Mol Biol Evol (5 March 2007)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Detection of positive Darwinian selection has become ever more important with the rapid growth of genomic data sets. Recent branch-site models of codon substitution account for variation of selective pressure over branches on the tree and across sites in the sequence, and provide a means to detect short episodes of molecular adaptation affecting just a few sites. In likelihood ratio tests based on such models, the branches to be tested for positive selection have to be specified a priori. In the absence of a biological hypothesis to designate so-called foreground branches, one may test many branches, but a correction for multiple testing becomes necessary. In this paper, we employ computer simulation to evaluate the performance of six multiple-test correction procedures when the branch-site models are used to test every branch on the phylogeny for positive selection. Four of the methods control the family-wise error rates (FWER) while the other two control the false discovery rate (FDR). We found that all correction procedures achieved acceptable FWER except for extremely divergent sequences and serious model violations, when the test may become unreliable. The power of the test to detect positive selection is influenced by the strength of selection and the sequence divergence, with the highest power observed at intermediate divergences. The four correction procedures that control the FWER had similar power. We recommend Rom's procedure for its slightly higher power, but the simple Bonferroni correction is useable as well. The two correction procedures that control the FDR had slightly more power but also higher FWER. We demonstrate the multiple-test procedures by analyzing gene sequences from the extracellular domain of the cluster of differentiation 2 gene (CD2) from 10 mammalian species. Both our simulation and real data analysis suggest that the multiple-test procedures are useful when multiple branches have to be tested on the same data set.</description>
    <dc:title>Multiple Hypothesis Testing to Detect Lineages Under Positive Selection that Affects Only a Few Sites.</dc:title>

    <dc:creator>Maria Anisimova</dc:creator>
    <dc:creator>Ziheng Yang</dc:creator>
    <dc:identifier>doi:10.1093/molbev/msm042</dc:identifier>
    <dc:source>Mol Biol Evol (5 March 2007)</dc:source>
    <dc:date>2007-03-22T23:50:29-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Mol Biol Evol</prism:publicationName>
    <prism:issn>0737-4038</prism:issn>
    <prism:category>adaptive</prism:category>
    <prism:category>evolution</prism:category>
    <prism:category>method</prism:category>
    <prism:category>multiple_testing</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1161181">
    <title>Incorporating Indel Information into Phylogeny Estimation for Rapidly Emerging Pathogens</title>
    <link>http://www.citeulike.org/user/dpollard/article/1161181</link>
    <description>&lt;i&gt;BMC Evolutionary Biology, Vol. 7 (14 March 2007), 40.&lt;/i&gt;</description>
    <dc:title>Incorporating Indel Information into Phylogeny Estimation for Rapidly Emerging Pathogens</dc:title>

    <dc:creator>Benjamin Redelings</dc:creator>
    <dc:creator>Marc Suchard</dc:creator>
    <dc:identifier>doi:10.1186/1471-2148-7-40</dc:identifier>
    <dc:source>BMC Evolutionary Biology, Vol. 7 (14 March 2007), 40.</dc:source>
    <dc:date>2007-03-14T19:28:41-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>BMC Evolutionary Biology</prism:publicationName>
    <prism:issn>1471-2148</prism:issn>
    <prism:volume>7</prism:volume>
    <prism:startingPage>40</prism:startingPage>
    <prism:category>alignment</prism:category>
    <prism:category>gene</prism:category>
    <prism:category>indels</prism:category>
    <prism:category>method</prism:category>
    <prism:category>phylogeny</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1167977">
    <title>Global Discriminative Learning for Higher-Accuracy Computational Gene Prediction</title>
    <link>http://www.citeulike.org/user/dpollard/article/1167977</link>
    <description>&lt;i&gt;PLoS Computational Biology, Vol. 3, No. 3. (1 March 2007), e54.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Most ab initio gene predictors use a probabilistic sequence model, typically a hidden Markov model, to combine separately trained models of genomic signals and content. By combining separate models of relevant genomic features, such gene predictors can exploit small training sets and incomplete annotations, and can be trained fairly efficiently. However, that type of piecewise training does not optimize prediction accuracy and has difficulty in accounting for statistical dependencies among different parts of the gene model. With genomic information being created at an ever-increasing rate, it is worth investigating alternative approaches in which many different types of genomic evidence, with complex statistical dependencies, can be integrated by discriminative learning to maximize annotation accuracy. Among discriminative learning methods, large-margin classifiers have become prominent because of the success of support vector machines (SVM) in many classification tasks. We describe CRAIG, a new program for ab initio gene prediction based on a conditional random field model with semi-Markov structure that is trained with an online large-margin algorithm related to multiclass SVMs. Our experiments on benchmark vertebrate datasets and on regions from the ENCODE project show significant improvements in prediction accuracy over published gene predictors that use intrinsic features only, particularly at the gene level and on genes with long introns.</description>
    <dc:title>Global Discriminative Learning for Higher-Accuracy Computational Gene Prediction</dc:title>

    <dc:creator>Axel Bernal</dc:creator>
    <dc:creator>Koby Crammer</dc:creator>
    <dc:creator>Artemis Hatzigeorgiou</dc:creator>
    <dc:creator>Fernando Pereira</dc:creator>
    <dc:identifier>doi:10.1371/journal.pcbi.0030054</dc:identifier>
    <dc:source>PLoS Computational Biology, Vol. 3, No. 3. (1 March 2007), e54.</dc:source>
    <dc:date>2007-03-16T21:09:52-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>PLoS Computational Biology</prism:publicationName>
    <prism:volume>3</prism:volume>
    <prism:number>3</prism:number>
    <prism:startingPage>e54</prism:startingPage>
    <prism:category>ab_initio</prism:category>
    <prism:category>gene_prediction</prism:category>
    <prism:category>method</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/816984">
    <title>Computational methods for transcriptional regulation.</title>
    <link>http://www.citeulike.org/user/dpollard/article/816984</link>
    <description>&lt;i&gt;Curr Opin Genet Dev, Vol. 15, No. 2. (April 2005), pp. 214-221.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;How is the information from a thousand gene-expression arrays, the location of more than two hundred regulatory factors, and nine sequenced genomes to be integrated into a global view of the regulatory network in budding yeast? Computational methods that fit incomplete noisy data provide the outlines of regulatory pathways, but the errors are not quantified. In the fly, embryonic patterning has proved amenable to computational prediction, but only when the DNA-binding preferences of the relevant factors are taken into account. In both these model organisms, simply restricting attention to regulatory sequences that align with related species (i.e. &#34;conserved&#34;) discards much information regarding what is functional.</description>
    <dc:title>Computational methods for transcriptional regulation.</dc:title>

    <dc:creator>ED Siggia</dc:creator>
    <dc:identifier>doi:10.1016/j.gde.2005.02.004</dc:identifier>
    <dc:source>Curr Opin Genet Dev, Vol. 15, No. 2. (April 2005), pp. 214-221.</dc:source>
    <dc:date>2006-08-25T21:25:29-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:publicationName>Curr Opin Genet Dev</prism:publicationName>
    <prism:issn>0959-437X</prism:issn>
    <prism:volume>15</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>214</prism:startingPage>
    <prism:endingPage>221</prism:endingPage>
    <prism:category>cis_regulatory_elements</prism:category>
    <prism:category>expression_pattern</prism:category>
    <prism:category>gene_expression</prism:category>
    <prism:category>method</prism:category>
    <prism:category>modeling</prism:category>
    <prism:category>prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/456431">
    <title>Prediction of cis-regulatory elements using binding site matrices--the successes, the failures and the reasons for both.</title>
    <link>http://www.citeulike.org/user/dpollard/article/456431</link>
    <description>&lt;i&gt;Curr Opin Genet Dev, Vol. 15, No. 4. (August 2005), pp. 395-402.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Protein-DNA interactions control many aspects of animal development and cellular responses to the environment. Although profiling of individual transcription factor binding sites is not a reliable guide for predicting the position of cis-regulatory elements in large genomes, modelling the evolution and the organization of regulatory elements has provided enough information to make some successful predictions. For vertebrate genomes, the field is limited by the lack of sufficient experimental data upon which to build reliable models. Nonetheless, a combination of experimental, computational and comparative data is likely to reveal aspects of complex regulatory networks in vertebrates, just as it has already done for simple eukaryotic genomes.</description>
    <dc:title>Prediction of cis-regulatory elements using binding site matrices--the successes, the failures and the reasons for both.</dc:title>

    <dc:creator>T Vavouri</dc:creator>
    <dc:creator>G Elgar</dc:creator>
    <dc:identifier>doi:10.1016/j.gde.2005.05.002</dc:identifier>
    <dc:source>Curr Opin Genet Dev, Vol. 15, No. 4. (August 2005), pp. 395-402.</dc:source>
    <dc:date>2006-01-05T21:04:57-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:publicationName>Curr Opin Genet Dev</prism:publicationName>
    <prism:issn>0959-437X</prism:issn>
    <prism:volume>15</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>395</prism:startingPage>
    <prism:endingPage>402</prism:endingPage>
    <prism:category>binding_site</prism:category>
    <prism:category>cis_regulatory_elements</prism:category>
    <prism:category>method</prism:category>
    <prism:category>motif</prism:category>
    <prism:category>pattern</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>pwm</prism:category>
    <prism:category>regulatory_sequence</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1062019">
    <title>Statistical significance of cis-regulatory modules</title>
    <link>http://www.citeulike.org/user/dpollard/article/1062019</link>
    <description>&lt;i&gt;BMC Bioinformatics, Vol. 8, No. 1. (2007)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;BACKGROUND:It is becoming increasingly important for researchers to be able to scan through large genomic regions for transcription factor binding sites or clusters of binding sites forming cis-regulatory modules. Correspondingly, there has been a push to develop algorithms for the rapid detection and assessment of cis-regulatory modules. While various algorithms for this purpose have been introduced, most are not well suited for rapid, large scale scanning.RESULTS:We introduce methods designed for the detection and statistical evaluation of cis-regulatory modules, modeled as either clusters of individual binding sites or as combinations of sites with constrained organization. In order to determine the statistical significance of module sites, we first need a method to determine the statistical significance of single transcription factor binding site matches. We introduce a straightforward method of estimating the statistical significance of single site matches using a database of known promoters to produce data structures that can be used to estimate p-values for binding site matches. We next introduce a technique to calculate the statistical significance of the arrangement of binding sites within a module using a max-gap model. If the module scanned for has defined organizational parameters, the probability of the module is corrected to account for organizational constraints. The statistical significance of single site matches and the architecture of sites within the module can be combined to provide an overall estimation of statistical significance of cis-regulatory module sites.CONCLUSIONS:The methods introduced in this paper allow for the detection and statistical evaluation of single transcription factor binding sites and cis-regulatory modules. The features described are implemented in the Search Tool for Occurrences of Regulatory Motifs (STORM) and MODSTORM software.</description>
    <dc:title>Statistical significance of cis-regulatory modules</dc:title>

    <dc:creator>Dustin Schones</dc:creator>
    <dc:creator>Andrew Smith</dc:creator>
    <dc:creator>Michael Zhang</dc:creator>
    <dc:identifier>doi:10.1186/1471-2105-8-19</dc:identifier>
    <dc:source>BMC Bioinformatics, Vol. 8, No. 1. (2007)</dc:source>
    <dc:date>2007-01-23T13:59:47-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>BMC Bioinformatics</prism:publicationName>
    <prism:volume>8</prism:volume>
    <prism:number>1</prism:number>
    <prism:category>crm</prism:category>
    <prism:category>method</prism:category>
    <prism:category>prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/1082428">
    <title>Multiple alignment by sequence annealing.</title>
    <link>http://www.citeulike.org/user/dpollard/article/1082428</link>
    <description>&lt;i&gt;Bioinformatics, Vol. 23, No. 2. (15 January 2007)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;MOTIVATION: We introduce a novel approach to multiple alignment that is based on an algorithm for rapidly checking whether single matches are consistent with a partial multiple alignment. This leads to a sequence annealing algorithm, which is an incremental method for building multiple sequence alignments one match at a time. Our approach improves significantly on the standard progressive alignment approach to multiple alignment. RESULTS: The sequence annealing algorithm performs well on benchmark test sets of protein sequences. It is not only sensitive, but also specific, drastically reducing the number of incorrectly aligned residues in comparison to other programs. The method allows for adjustment of the sensitivity/specificity tradeoff and can be used to reliably identify homologous regions among protein sequences. AVAILABILITY: An implementation of the sequence annealing algorithm is available at http://bio.math.berkeley.edu/amap/</description>
    <dc:title>Multiple alignment by sequence annealing.</dc:title>

    <dc:creator>AS Schwartz</dc:creator>
    <dc:creator>L Pachter</dc:creator>
    <dc:source>Bioinformatics, Vol. 23, No. 2. (15 January 2007)</dc:source>
    <dc:date>2007-02-01T19:46:44-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Bioinformatics</prism:publicationName>
    <prism:issn>1460-2059</prism:issn>
    <prism:volume>23</prism:volume>
    <prism:number>2</prism:number>
    <prism:category>alignment</prism:category>
    <prism:category>method</prism:category>
    <prism:category>protein</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/950154">
    <title>Single-Molecule Detection of Transcription Factor Binding to DNA in Real Time: Specificity, Equilibrium, and Kinetic Parameters.</title>
    <link>http://www.citeulike.org/user/dpollard/article/950154</link>
    <description>&lt;i&gt;Biochemistry, Vol. 45, No. 46. (21 November 2006), pp. 13794-13806.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Specificity and temporal control of transcriptional machinery are encoded within sequence-specific transcription factors, of which there are thousands in mammalian genomes. Efforts to completely decipher this code will require an understanding of the DNA binding thermodynamic and kinetic properties displayed by each transcription factor, a daunting task given the current methodologies for measuring these interactions. Here, we present a novel methodology to quantify the binding of proteins to target DNA molecules based on single-molecule detection and real-time counting of individual free and bound fluorescently tagged molecules flowing past a detection device. Using this technology, we measured DNA binding by fluorescently tagged domains of four distinct transcription factors, namely, human early growth response protein Egr-1, vertebrate GATA-1, Drosophila GAGA factor, and &#38;lgr; bacteriophage Cro repressor. These proteins represent different structural classes (zinc-finger and helix-turn-helix), quaternary states (monomeric and dimeric), and relative affinities (high, intermediate, and low). Specific binding of each protein to its cognate DNA target was demonstrated at low picomolar concentrations. The equilibrium (Kd) and kinetic (kon and koff) constants governing DNA binding by one of these transcription factors, that of Egr-1, were measured using this approach. Kd values obtained from three different types of saturation titrations were reproducible and consistent, yielding values between 10 and 14 pM that, along with the kinetic constants, agree closely with literature values. Because this methodology offers several significant advantages over other existing approaches, namely, real-time determination, requirement for small amounts of reagents, high reproducibility, exquisite sensitivity, and amenability to high-throughput analysis, it is suitable for characterizing DNA-binding proteins as well as other interacting pairs of molecules that can be fluorescently tagged.</description>
    <dc:title>Single-Molecule Detection of Transcription Factor Binding to DNA in Real Time: Specificity, Equilibrium, and Kinetic Parameters.</dc:title>

    <dc:creator>Eric Nalefski</dc:creator>
    <dc:creator>Eugene Nebelitsky</dc:creator>
    <dc:creator>Janice Lloyd</dc:creator>
    <dc:creator>Steven Gullans</dc:creator>
    <dc:identifier>doi:10.1021/bi0602011</dc:identifier>
    <dc:source>Biochemistry, Vol. 45, No. 46. (21 November 2006), pp. 13794-13806.</dc:source>
    <dc:date>2006-11-17T12:58:47-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Biochemistry</prism:publicationName>
    <prism:issn>0006-2960</prism:issn>
    <prism:volume>45</prism:volume>
    <prism:number>46</prism:number>
    <prism:startingPage>13794</prism:startingPage>
    <prism:endingPage>13806</prism:endingPage>
    <prism:category>binding</prism:category>
    <prism:category>kenetics</prism:category>
    <prism:category>method</prism:category>
    <prism:category>motif</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/903929">
    <title>Identifying cis-regulatory modules by combining comparative and compositional analysis of DNA.</title>
    <link>http://www.citeulike.org/user/dpollard/article/903929</link>
    <description>&lt;i&gt;Bioinformatics (10 October 2006)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;MOTIVATION: Predicting cis-regulatory modules (CRMs) in higher eukaryotes is a challenging computational task. Commonly used methods to predict CRMs based on the signal of transcription factor binding sites (TFBS) are limited by prior information about transcription factor specificity. More general methods that bypass the reliance on TFBS models are needed for comprehensive CRM prediction. RESULTS: We have developed a method to predict CRMs called CisPlusFinder that identifies high density regions of perfect local ungapped sequences (PLUSs) based on multiple species conservation. By assuming that PLUSs contain core TFBS motifs that are locally overrepresented, the method attempts to capture the expected features of CRM structure and evolution. Applied to a benchmark dataset of CRMs involved in early Drosophila development, CisPlusFinder predicts more annotated CRMs than all other methods tested. Using the REDfly database, we find that some &#34;false positive&#34; predictions in the benchmark dataset correspond to recently annotated CRMs. Our work demonstrates that CRM prediction methods that combine comparative genomic data with statistical properties of DNA may achieve reasonable performance when applied genome-wide in the absence of an a priori set of known TFBS motifs. AVAILABILITY: The program CisPlusFinder can be downloaded at http://jakob.genetik.uni-koeln.de/bioinformatik/people/nora/nora.html. All software is licensed under the Lesser GNU Public License (LGPL).</description>
    <dc:title>Identifying cis-regulatory modules by combining comparative and compositional analysis of DNA.</dc:title>

    <dc:creator>Nora Pierstorff</dc:creator>
    <dc:creator>Casey M Bergman</dc:creator>
    <dc:creator>Thomas Wiehe</dc:creator>
    <dc:source>Bioinformatics (10 October 2006)</dc:source>
    <dc:date>2006-10-18T18:52:57-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Bioinformatics</prism:publicationName>
    <prism:issn>1460-2059</prism:issn>
    <prism:category>annotation</prism:category>
    <prism:category>comparative</prism:category>
    <prism:category>composition</prism:category>
    <prism:category>discovery</prism:category>
    <prism:category>drosophila</prism:category>
    <prism:category>eisen_journal_club</prism:category>
    <prism:category>emily_presented</prism:category>
    <prism:category>method</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>round_robin</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/878405">
    <title>Exogean: a framework for annotating protein-coding genes in eukaryotic genomic DNA.</title>
    <link>http://www.citeulike.org/user/dpollard/article/878405</link>
    <description>&lt;i&gt;Genome Biol, Vol. 7 Suppl 1 (2006)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;BACKGROUND: Accurate and automatic gene identification in eukaryotic genomic DNA is more than ever of crucial importance to efficiently exploit the large volume of assembled genome sequences available to the community. Automatic methods have always been considered less reliable than human expertise. This is illustrated in the EGASP project, where reference annotations against which all automatic methods are measured are generated by human annotators and experimentally verified. We hypothesized that replicating the accuracy of human annotators in an automatic method could be achieved by formalizing the rules and decisions that they use, in a mathematical formalism. RESULTS: We have developed Exogean, a flexible framework based on directed acyclic colored multigraphs (DACMs) that can represent biological objects (for example, mRNA, ESTs, protein alignments, exons) and relationships between them. Graphs are analyzed to process the information according to rules that replicate those used by human annotators. Simple individual starting objects given as input to Exogean are thus combined and synthesized into complex objects such as protein coding transcripts. CONCLUSION: We show here, in the context of the EGASP project, that Exogean is currently the method that best reproduces protein coding gene annotations from human experts, in terms of identifying at least one exact coding sequence per gene. We discuss current limitations of the method and several avenues for improvement.</description>
    <dc:title>Exogean: a framework for annotating protein-coding genes in eukaryotic genomic DNA.</dc:title>

    <dc:creator>S Djebali</dc:creator>
    <dc:creator>F Delaplace</dc:creator>
    <dc:creator>HR Crollius</dc:creator>
    <dc:identifier>doi:10.1186/gb-2006-7-s1-s7</dc:identifier>
    <dc:source>Genome Biol, Vol. 7 Suppl 1 (2006)</dc:source>
    <dc:date>2006-09-29T23:20:27-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Genome Biol</prism:publicationName>
    <prism:issn>1465-6914</prism:issn>
    <prism:volume>7 Suppl 1</prism:volume>
    <prism:category>annotation</prism:category>
    <prism:category>eukaryote</prism:category>
    <prism:category>gene</prism:category>
    <prism:category>method</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/dpollard/article/849684">
    <title>Quantification of the variation in percentage identity for protein sequence alignments</title>
    <link>http://www.citeulike.org/user/dpollard/article/849684</link>
    <description>&lt;i&gt;BMC Bioinformatics, Vol. 7 (19 September 2006), 415.&lt;/i&gt;</description>
    <dc:title>Quantification of the variation in percentage identity for protein sequence alignments</dc:title>

    <dc:creator>Ps Raghava</dc:creator>
    <dc:creator>Geoffrey Barton</dc:creator>
    <dc:identifier>doi:10.1186/1471-2105-7-415</dc:identifier>
    <dc:source>BMC Bioinformatics, Vol. 7 (19 September 2006), 415.</dc:source>
    <dc:date>2006-09-19T12:09:59-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>BMC Bioinformatics</prism:publicationName>
    <prism:issn>1471-2105</prism:issn>
    <prism:volume>7</prism:volume>
    <prism:startingPage>415</prism:startingPage>
    <prism:category>accuracy</prism:category>
    <prism:category>alignment</prism:cat