<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
   xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
   xmlns="http://purl.org/rss/1.0/"
   xmlns:dc="http://purl.org/dc/elements/1.1/"
   xmlns:prism="http://prismstandard.org/namespaces/1.2/basic/"
   xmlns:dcterms="http://purl.org/dc/terms/"
>
<channel rdf:about="http://www.citeulike.org/about">

	<title>CiteULike: indigoviolet's library [368 articles]</title>
	<description>CiteULike: indigoviolet's library [368 articles]</description>


	<link>http://www.citeulike.org/user/indigoviolet</link>
	<dc:publisher>CiteULike.org</dc:publisher>
	<dc:language>en-gb</dc:language>
	<dc:rights>Copyright &#169; 2004-2008 citeulike.org</dc:rights>
	<items>
    <rdf:Seq>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/2122187"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/407124"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/407125"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1274217"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/558739"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1267554"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/209852"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/212874"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/2098979"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/2098944"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/876703"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/921611"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/701850"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/2097765"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/2097758"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/2097756"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/2097754"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/2097508"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/2097484"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/2097483"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/827938"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1287340"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/2097474"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/2097471"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/2064285"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/453150"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1084478"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/2097314"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/2096906"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/882895"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/909307"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1811386"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1646950"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/2060806"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/2060800"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/2060758"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/2060753"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/959130"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1507637"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/2060703"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/816965"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/318263"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1453943"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/2051664"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/466470"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/2051640"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/408246"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/503161"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/622628"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1314398"/>

	</rdf:Seq>
	</items>
	</channel>


<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/2122187">
    <title>The Making of a Fly: The Genetics of Animal Design</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/2122187</link>
    <description>&lt;i&gt;(15 April 1992)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Understanding how a multicellular animal develops from a single cell (the fertilized egg) poses one of the greatest challenges in biology today. Development from egg to adult involves the sequential expression of virtually the whole of an organism's genetic instructions both in the mother as she lays down developmental cues in the egg, and in the embryo itself. Most of our present information on the role of genes in development comes from the invertebrate fruit fly, &#60;i&#62;Drosophila&#60;/i&#62;. The two authors of this text (amongst the foremost authorities in the world) follow the developmental process from fertilization through the primitive structural development of the body plan of the fly after cleavage into the differentiation of the variety of tissues, organs and body parts that together define the fly. The developmental processes are fully explained throughout the text in the modern language of molecular biology and genetics. This text represents the vital synthesis of the subject that many have been waiting for and it will enable many specific courses in developmental biology and molecular genetics to focus on it. It will appeali to 2nd and 3rd year students in these disciplines as well as in biochemistry, neurobiology and zoology. It will also have widespread appeal among researchers. &#60;br&#62;&#60;ul&#62;&#60;br&#62;&#60;li&#62;Authored by one of the foremost authorities in the world. &#60;br&#62;&#60;li&#62;A unique synthesis of the developmental cycle of &#60;i&#62;Drosophila&#60;/i&#62; - our major source of information on the role of genes in development. &#60;br&#62;&#60;li&#62;Designed to provide the basis of new courses in developmental biology and molecular genetics at senior undergraduate level. &#60;br&#62;&#60;li&#62;A lucid explanation in the modern language of the science.&#60;/li&#62;&#60;/ul&#62;</description>
    <dc:title>The Making of a Fly: The Genetics of Animal Design</dc:title>

    <dc:creator>Peter Lawrence</dc:creator>
    <dc:source>(15 April 1992)</dc:source>
    <dc:date>2007-12-15T03:00:46-00:00</dc:date>
    <prism:publisher>Wiley-Blackwell</prism:publisher>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/407124">
    <title>WebLogo: A Sequence Logo Generator</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/407124</link>
    <description>&lt;i&gt;Genome Res., Vol. 14, No. 6. (1 June 2004), pp. 1188-1190.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;WebLogo generates sequence logos, graphical representations of the patterns within a multiple sequence alignment. Sequence logos provide a richer and more precise description of sequence similarity than consensus sequences and can rapidly reveal significant features of the alignment otherwise difficult to perceive. Each logo consists of stacks of letters, one stack for each position in the sequence. The overall height of each stack indicates the sequence conservation at that position (measured in bits), whereas the height of symbols within the stack reflects the relative frequency of the corresponding amino or nucleic acid at that position. WebLogo has been enhanced recently with additional features and options, to provide a convenient and highly configurable sequence logo generator. A command line interface and the complete, open WebLogo source code are available for local installation and customization.</description>
    <dc:title>WebLogo: A Sequence Logo Generator</dc:title>

    <dc:creator>Gavin Crooks</dc:creator>
    <dc:creator>Gary Hon</dc:creator>
    <dc:creator>John-Marc Chandonia</dc:creator>
    <dc:creator>Steven Brenner</dc:creator>
    <dc:identifier>doi:10.1101/gr.849004</dc:identifier>
    <dc:source>Genome Res., Vol. 14, No. 6. (1 June 2004), pp. 1188-1190.</dc:source>
    <dc:date>2005-11-24T10:24:28-00:00</dc:date>
    <prism:publicationName>Genome Res.</prism:publicationName>
    <prism:volume>14</prism:volume>
    <prism:number>6</prism:number>
    <prism:startingPage>1188</prism:startingPage>
    <prism:endingPage>1190</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/407125">
    <title>Sequence logos: a new way to display consensus sequences.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/407125</link>
    <description>&lt;i&gt;Nucleic Acids Res, Vol. 18, No. 20. (25 October 1990), pp. 6097-6100.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;A graphical method is presented for displaying the patterns in a set of aligned sequences. The characters representing the sequence are stacked on top of each other for each position in the aligned sequences. The height of each letter is made proportional to its frequency, and the letters are sorted so the most common one is on top. The height of the entire stack is then adjusted to signify the information content of the sequences at that position. From these 'sequence logos', one can determine not only the consensus sequence but also the relative frequency of bases and the information content (measured in bits) at every position in a site or sequence. The logo displays both significant residues and subtle sequence patterns.</description>
    <dc:title>Sequence logos: a new way to display consensus sequences.</dc:title>

    <dc:creator>TD Schneider</dc:creator>
    <dc:creator>RM Stephens</dc:creator>
    <dc:source>Nucleic Acids Res, Vol. 18, No. 20. (25 October 1990), pp. 6097-6100.</dc:source>
    <dc:date>2005-11-24T10:26:51-00:00</dc:date>
    <prism:publicationName>Nucleic Acids Res</prism:publicationName>
    <prism:issn>0305-1048</prism:issn>
    <prism:volume>18</prism:volume>
    <prism:number>20</prism:number>
    <prism:startingPage>6097</prism:startingPage>
    <prism:endingPage>6100</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1274217">
    <title>Selection of DNA binding sites by regulatory proteins. Statistical-mechanical theory and application to operators and promoters.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1274217</link>
    <description>&lt;i&gt;J Mol Biol, Vol. 193, No. 4. (20 February 1987), pp. 723-750.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We present a statistical-mechanical selection theory for the sequence analysis of a set of specific DNA regulatory sites that makes it possible to predict the relationship between individual base-pair choices in the site and specific activity (affinity). The theory is based on the assumption that specific DNA sequences have been selected to conform to some requirement for protein binding (or activity), and that all sequences that can fulfil this requirement are equally likely to occur. In most cases, the number of specific DNA sequences that are known for a certain DNA-binding protein is very small, and we discuss in detail the small-sample uncertainties that this leads to. When applied to the binding sites for cro repressor in phage lambda, the theory can predict, from the sequence statistics alone, their rank order binding affinities in reasonable agreement with measured values. However, the statistical uncertainty generated by such a small sample (only 6 sites known) limits the result to order-of-magnitude comparisons. When applied to the much larger sample of Escherichia coli promoter sequences, the theory predicts the correlation between in vitro activity (k2KB values) and homology score (closeness to the consensus sequence) observed by Mulligan et al. (1984). The analysis of base-pair frequencies in the promoter sample is consistent with the assumption that base-pairs at different positions in the sites contribute independently to the specific activity, except in a few marginal cases that are discussed. When the promoter sites are ordered according to predicted activities, they seem to conform to the Gaussian distribution that results from a requirement for maximal sequence variability within the constraint of providing a certain average activity. The theory allows us to compare the number of specific sites with a certain activity to the number that would be expected from random occurrence in the genome. While strong promoters are &#34;overspecified&#34;, in the sense that their probability of random occurrence is very low, random sequences with weak promoter-like properties are expected to occur in very large numbers. This leads to the conclusion that functional specificity is based on other properties in addition to primary sequence recognition; some possibilities are discussed. Finally, we show that the sequence information, as defined by Schneider et al. (1986), can be used directly (at least in the case of equilibrium binding sites) to estimate the number of protein molecules that are specifically bound at random &#34;pseudosites&#34; in the genome.(ABSTRACT TRUNCATED AT 400 WORDS)</description>
    <dc:title>Selection of DNA binding sites by regulatory proteins. Statistical-mechanical theory and application to operators and promoters.</dc:title>

    <dc:creator>OG Berg</dc:creator>
    <dc:creator>PH von Hippel</dc:creator>
    <dc:source>J Mol Biol, Vol. 193, No. 4. (20 February 1987), pp. 723-750.</dc:source>
    <dc:date>2007-05-03T16:39:34-00:00</dc:date>
    <prism:publicationName>J Mol Biol</prism:publicationName>
    <prism:issn>0022-2836</prism:issn>
    <prism:volume>193</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>723</prism:startingPage>
    <prism:endingPage>750</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/558739">
    <title>An overview of the structures of protein-DNA complexes.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/558739</link>
    <description>&lt;i&gt;Genome Biol, Vol. 1, No. 1. (2000)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;On the basis of a structural analysis of 240 protein-DNA complexes contained in the Protein Data Bank (PDB), we have classified the DNA-binding proteins involved into eight different structural/functional groups, which are further classified into 54 structural families. Here we present this classification and review the functions, structures and binding interactions of these protein-DNA complexes.</description>
    <dc:title>An overview of the structures of protein-DNA complexes.</dc:title>

    <dc:creator>NM Luscombe</dc:creator>
    <dc:creator>SE Austin</dc:creator>
    <dc:creator>HM Berman</dc:creator>
    <dc:creator>JM Thornton</dc:creator>
    <dc:source>Genome Biol, Vol. 1, No. 1. (2000)</dc:source>
    <dc:date>2006-03-21T16:48:16-00:00</dc:date>
    <prism:publicationName>Genome Biol</prism:publicationName>
    <prism:issn>1465-6914</prism:issn>
    <prism:volume>1</prism:volume>
    <prism:number>1</prism:number>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1267554">
    <title>Drosophila DNase I footprint database: a systematic genome annotation of transcription factor binding sites in the fruitfly, Drosophila melanogaster.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1267554</link>
    <description>&lt;i&gt;Bioinformatics, Vol. 21, No. 8. (15 April 2005), pp. 1747-1749.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;SUMMARY: Despite increasing numbers of computational tools developed to predict cis-regulatory sequences, the availability of high-quality datasets of transcription factor binding sites limits advances in the bioinformatics of gene regulation. Here we present such a dataset based on a systematic literature curation and genome annotation of DNase I footprints for the fruitfly, Drosophila melanogaster. Using the experimental results of 201 primary references, we annotated 1367 binding sites from 87 transcription factors and 101 target genes in the D.melanogaster genome sequence. These data will provide a rich resource for future bioinformatics analyses of transcriptional regulation in Drosophila such as constructing motif models, training cis-regulatory module detectors, benchmarking alignment tools and continued text mining of the extensive literature on transcriptional regulation in this important model organism. AVAILABILITY: http://www.flyreg.org/ CONTACT: cbergman@gen.cam.ac.uk.</description>
    <dc:title>Drosophila DNase I footprint database: a systematic genome annotation of transcription factor binding sites in the fruitfly, Drosophila melanogaster.</dc:title>

    <dc:creator>CM Bergman</dc:creator>
    <dc:creator>JW Carlson</dc:creator>
    <dc:creator>SE Celniker</dc:creator>
    <dc:source>Bioinformatics, Vol. 21, No. 8. (15 April 2005), pp. 1747-1749.</dc:source>
    <dc:date>2007-04-30T09:18:37-00:00</dc:date>
    <prism:publicationName>Bioinformatics</prism:publicationName>
    <prism:issn>1367-4803</prism:issn>
    <prism:volume>21</prism:volume>
    <prism:number>8</prism:number>
    <prism:startingPage>1747</prism:startingPage>
    <prism:endingPage>1749</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/209852">
    <title>GOstat: find statistically overrepresented Gene Ontologies within a group of genes.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/209852</link>
    <description>&lt;i&gt;Bioinformatics, Vol. 20, No. 9. (12 June 2004), pp. 1464-1465.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;SUMMARY: Modern experimental techniques, as for example DNA microarrays, as a result usually produce a long list of genes, which are potentially interesting in the analyzed process. In order to gain biological understanding from this type of data, it is necessary to analyze the functional annotations of all genes in this list. The Gene-Ontology (GO) database provides a useful tool to annotate and analyze the functions of a large number of genes. Here, we introduce a tool that utilizes this information to obtain an understanding of which annotations are typical for the analyzed list of genes. This program automatically obtains the GO annotations from a database and generates statistics of which annotations are overrepresented in the analyzed list of genes. This results in a list of GO terms sorted by their specificity. AVAILABILITY: Our program GOstat is accessible via the Internet at http://gostat.wehi.edu.au</description>
    <dc:title>GOstat: find statistically overrepresented Gene Ontologies within a group of genes.</dc:title>

    <dc:creator>T Beissbarth</dc:creator>
    <dc:creator>TP Speed</dc:creator>
    <dc:identifier>doi:10.1093/bioinformatics/bth088</dc:identifier>
    <dc:source>Bioinformatics, Vol. 20, No. 9. (12 June 2004), pp. 1464-1465.</dc:source>
    <dc:date>2005-05-25T04:19:07-00:00</dc:date>
    <prism:publicationName>Bioinformatics</prism:publicationName>
    <prism:issn>1367-4803</prism:issn>
    <prism:volume>20</prism:volume>
    <prism:number>9</prism:number>
    <prism:startingPage>1464</prism:startingPage>
    <prism:endingPage>1465</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/212874">
    <title>Gene ontology: tool for the unification of biology. The Gene Ontology Consortium.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/212874</link>
    <description>&lt;i&gt;Nat Genet, Vol. 25, No. 1. (May 2000), pp. 25-29.&lt;/i&gt;</description>
    <dc:title>Gene ontology: tool for the unification of biology. The Gene Ontology Consortium.</dc:title>

    <dc:creator>M Ashburner</dc:creator>
    <dc:creator>CA Ball</dc:creator>
    <dc:creator>JA Blake</dc:creator>
    <dc:creator>D Botstein</dc:creator>
    <dc:creator>H Butler</dc:creator>
    <dc:creator>JM Cherry</dc:creator>
    <dc:creator>AP Davis</dc:creator>
    <dc:creator>K Dolinski</dc:creator>
    <dc:creator>SS Dwight</dc:creator>
    <dc:creator>JT Eppig</dc:creator>
    <dc:creator>MA Harris</dc:creator>
    <dc:creator>DP Hill</dc:creator>
    <dc:creator>L Issel-Tarver</dc:creator>
    <dc:creator>A Kasarskis</dc:creator>
    <dc:creator>S Lewis</dc:creator>
    <dc:creator>JC Matese</dc:creator>
    <dc:creator>JE Richardson</dc:creator>
    <dc:creator>M Ringwald</dc:creator>
    <dc:creator>GM Rubin</dc:creator>
    <dc:creator>G Sherlock</dc:creator>
    <dc:identifier>doi:10.1038/75556</dc:identifier>
    <dc:source>Nat Genet, Vol. 25, No. 1. (May 2000), pp. 25-29.</dc:source>
    <dc:date>2005-05-27T12:30:22-00:00</dc:date>
    <prism:publicationName>Nat Genet</prism:publicationName>
    <prism:issn>1061-4036</prism:issn>
    <prism:volume>25</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>25</prism:startingPage>
    <prism:endingPage>29</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/2098979">
    <title>Temporal patterns of fruit fly (Drosophila) evolution revealed by mutation clocks.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/2098979</link>
    <description>&lt;i&gt;Mol Biol Evol, Vol. 21, No. 1. (January 2004), pp. 36-44.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Drosophila melanogaster has been a canonical model organism to study genetics, development, behavior, physiology, evolution, and population genetics for nearly a century. Despite this emphasis and the completion of its nuclear genome sequence, the timing of major speciation events leading to the origin of this fruit fly remain elusive because of the paucity of extensive fossil records and biogeographic data. Use of molecular clocks as an alternative has been fraught with non-clock-like accumulation of nucleotide and amino-acid substitutions. Here we present a novel methodology in which genomic mutation distances are used to overcome these limitations and to make use of all available gene sequence data for constructing a fruit fly molecular time scale. Our analysis of 2977 pairwise sequence comparisons from 176 nuclear genes reveals a long-term fruit fly mutation clock ticking at a rate of 11.1 mutations per kilobase pair per Myr. Genomic mutation clock-based timings of the landmark speciation events leading to the evolution of D. melanogaster show that it shared most recent common ancestry 5.4 MYA with D. simulans, 12.6 MYA with D. erecta+D. orena, 12.8 MYA with D. yakuba+D. teisseri, 35.6 MYA with the takahashii subgroup, 41.3 MYA with the montium subgroup, 44.2 MYA with the ananassae subgroup, 54.9 MYA with the obscura group, 62.2 MYA with the willistoni group, and 62.9 MYA with the subgenus Drosophila. These and other estimates are compatible with those known from limited biogeographic and fossil records. The inferred temporal pattern of fruit fly evolution shows correspondence with the cooling patterns of paleoclimate changes and habitat fragmentation in the Cenozoic.</description>
    <dc:title>Temporal patterns of fruit fly (Drosophila) evolution revealed by mutation clocks.</dc:title>

    <dc:creator>K Tamura</dc:creator>
    <dc:creator>S Subramanian</dc:creator>
    <dc:creator>S Kumar</dc:creator>
    <dc:source>Mol Biol Evol, Vol. 21, No. 1. (January 2004), pp. 36-44.</dc:source>
    <dc:date>2007-12-12T14:32:26-00:00</dc:date>
    <prism:publicationName>Mol Biol Evol</prism:publicationName>
    <prism:issn>0737-4038</prism:issn>
    <prism:volume>21</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>36</prism:startingPage>
    <prism:endingPage>44</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/2098944">
    <title>Gene Recognition Via Spliced Sequence Alignment</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/2098944</link>
    <description>&lt;i&gt;Proceedings of the National Academy of Sciences of the United States of America, Vol. 93, No. 17. (1996), pp. 9061-9066.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Gene recognition is one of the most important problems in computational molecular biology. Previous attempts to solve this problem were based on statistics, and applications of combinatorial methods for gene recognition were almost unexplored. Recent advances in large-scale cDNA sequencing open a way toward a new approach to gene recognition that uses previously sequenced genes as a clue for recognition of newly sequenced genes. This paper describes a spliced alignment algorithm and software tool that explores all possible exon assemblies in polynomial time and finds the multiexon structure with the best fit to a related protein. Unlike other existing methods, the algorithm successfully recognizes genes even in the case of short exons or exons with unusual codon usage; we also report correct assemblies for genes with more than 10 exons. On a test sample of human genes with known mammalian relatives, the average correlation between the predicted and actual proteins was 99%. The algorithm correctly reconstructed 87% of genes and the rare discrepancies between the predicted and real exon-intron structures were caused either by short (less than 5 amino acids) initial/terminal exons or by alternative splicing. Moreover, the algorithm predicts human genes reasonably well when the homologous protein is nonvertebrate or even prokaryotic. The surprisingly good performance of the method was confirmed by extensive simulations: in particular, with target proteins at 160 accepted point mutations (PAM) (25% similarity), the correlation between the predicted and actual genes was still as high as 95%.</description>
    <dc:title>Gene Recognition Via Spliced Sequence Alignment</dc:title>

    <dc:creator>Mikhail Gelfand</dc:creator>
    <dc:creator>Andrey Mironov</dc:creator>
    <dc:creator>Pavel Pevzner</dc:creator>
    <dc:source>Proceedings of the National Academy of Sciences of the United States of America, Vol. 93, No. 17. (1996), pp. 9061-9066.</dc:source>
    <dc:date>2007-12-12T14:20:19-00:00</dc:date>
    <prism:publicationName>Proceedings of the National Academy of Sciences of the United States of America</prism:publicationName>
    <prism:volume>93</prism:volume>
    <prism:number>17</prism:number>
    <prism:startingPage>9061</prism:startingPage>
    <prism:endingPage>9066</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/876703">
    <title>TRANSFAC: a database on transcription factors and their DNA binding sites.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/876703</link>
    <description>&lt;i&gt;Nucleic Acids Res, Vol. 24, No. 1. (1 January 1996), pp. 238-241.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;TRANSFAC is a database about eukaryotic transcription regulating DNA sequence elements and the transcription factors binding to and acting through them. This report summarizes the present status of this database and accompanying retrieval tools.</description>
    <dc:title>TRANSFAC: a database on transcription factors and their DNA binding sites.</dc:title>

    <dc:creator>E Wingender</dc:creator>
    <dc:creator>P Dietze</dc:creator>
    <dc:creator>H Karas</dc:creator>
    <dc:creator>R Knüppel</dc:creator>
    <dc:source>Nucleic Acids Res, Vol. 24, No. 1. (1 January 1996), pp. 238-241.</dc:source>
    <dc:date>2006-09-28T21:13:40-00:00</dc:date>
    <prism:publicationName>Nucleic Acids Res</prism:publicationName>
    <prism:issn>0305-1048</prism:issn>
    <prism:volume>24</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>238</prism:startingPage>
    <prism:endingPage>241</prism:endingPage>
    <prism:category>database</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/921611">
    <title>Systematic evolution of ligands by exponential enrichment: RNA ligands to bacteriophage T4 DNA polymerase.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/921611</link>
    <description>&lt;i&gt;Science, Vol. 249, No. 4968. (3 August 1990), pp. 505-510.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;High-affinity nucleic acid ligands for a protein were isolated by a procedure that depends on alternate cycles of ligand selection from pools of variant sequences and amplification of the bound species. Multiple rounds exponentially enrich the population for the highest affinity species that can be clonally isolated and characterized. In particular one eight-base region of an RNA that interacts with the T4 DNA polymerase was chosen and randomized. Two different sequences were selected by this procedure from the calculated pool of 65,536 species. One is the wild-type sequence found in the bacteriophage mRNA; one is varied from wild type at four positions. The binding constants of these two RNA's to T4 DNA polymerase are equivalent. These protocols with minimal modification can yield high-affinity ligands for any protein that binds nucleic acids as part of its function; high-affinity ligands could conceivably be developed for any target molecule.</description>
    <dc:title>Systematic evolution of ligands by exponential enrichment: RNA ligands to bacteriophage T4 DNA polymerase.</dc:title>

    <dc:creator>C Tuerk</dc:creator>
    <dc:creator>L Gold</dc:creator>
    <dc:source>Science, Vol. 249, No. 4968. (3 August 1990), pp. 505-510.</dc:source>
    <dc:date>2006-11-01T16:58:40-00:00</dc:date>
    <prism:publicationName>Science</prism:publicationName>
    <prism:issn>0036-8075</prism:issn>
    <prism:volume>249</prism:volume>
    <prism:number>4968</prism:number>
    <prism:startingPage>505</prism:startingPage>
    <prism:endingPage>510</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/701850">
    <title>Identifying DNA and protein patterns with statistically significant alignments of multiple sequences</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/701850</link>
    <description>&lt;i&gt;Bioinformatics, Vol. 15, No. 7. (1 July 1999), pp. 563-577.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;10.1093/bioinformatics/15.7.563</description>
    <dc:title>Identifying DNA and protein patterns with statistically significant alignments of multiple sequences</dc:title>

    <dc:creator>Gz Hertz</dc:creator>
    <dc:creator>Gd Stormo</dc:creator>
    <dc:identifier>doi:10.1093/bioinformatics/15.7.563</dc:identifier>
    <dc:source>Bioinformatics, Vol. 15, No. 7. (1 July 1999), pp. 563-577.</dc:source>
    <dc:date>2006-06-20T10:24:34-00:00</dc:date>
    <prism:publicationName>Bioinformatics</prism:publicationName>
    <prism:volume>15</prism:volume>
    <prism:number>7</prism:number>
    <prism:startingPage>563</prism:startingPage>
    <prism:endingPage>577</prism:endingPage>
    <prism:category>algorithm</prism:category>
    <prism:category>motif</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/2097765">
    <title>A computational genomics approach to the identification of gene networks</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/2097765</link>
    <description>&lt;i&gt;Nucl. Acids Res., Vol. 25, No. 18. (15 September 1997), pp. 3594-3604.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;10.1093/nar/25.18.3594</description>
    <dc:title>A computational genomics approach to the identification of gene networks</dc:title>

    <dc:creator>A Wagner</dc:creator>
    <dc:identifier>doi:10.1093/nar/25.18.3594</dc:identifier>
    <dc:source>Nucl. Acids Res., Vol. 25, No. 18. (15 September 1997), pp. 3594-3604.</dc:source>
    <dc:date>2007-12-12T11:04:50-00:00</dc:date>
    <prism:publicationName>Nucl. Acids Res.</prism:publicationName>
    <prism:volume>25</prism:volume>
    <prism:number>18</prism:number>
    <prism:startingPage>3594</prism:startingPage>
    <prism:endingPage>3604</prism:endingPage>
    <prism:category>motif</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/2097758">
    <title>A graph-based motif detection algorithm models complex nucleotide dependencies in transcription factor binding sites.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/2097758</link>
    <description>&lt;i&gt;Nucleic Acids Res, Vol. 34, No. 20. (2006), pp. 5730-5739.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Given a set of known binding sites for a specific transcription factor, it is possible to build a model of the transcription factor binding site, usually called a motif model, and use this model to search for other sites that bind the same transcription factor. Typically, this search is performed using a position-specific scoring matrix (PSSM), also known as a position weight matrix. In this paper we analyze a set of eukaryotic transcription factor binding sites and show that there is extensive clustering of similar k-mers in eukaryotic motifs, owing to both functional and evolutionary constraints. The apparent limitations of probabilistic models in representing complex nucleotide dependencies lead us to a graph-based representation of motifs. When deciding whether a candidate k-mer is part of a motif or not, we base our decision not on how well the k-mer conforms to a model of the motif as a whole, but how similar it is to specific, known k-mers in the motif. We elucidate the reasons why we expect graph-based methods to perform well on motif data. Our MotifScan algorithm shows greatly improved performance over the prevalent PSSM-based method for the detection of eukaryotic motifs.</description>
    <dc:title>A graph-based motif detection algorithm models complex nucleotide dependencies in transcription factor binding sites.</dc:title>

    <dc:creator>BT Naughton</dc:creator>
    <dc:creator>E Fratkin</dc:creator>
    <dc:creator>S Batzoglou</dc:creator>
    <dc:creator>DL Brutlag</dc:creator>
    <dc:source>Nucleic Acids Res, Vol. 34, No. 20. (2006), pp. 5730-5739.</dc:source>
    <dc:date>2007-12-12T10:59:52-00:00</dc:date>
    <prism:publicationName>Nucleic Acids Res</prism:publicationName>
    <prism:issn>1362-4962</prism:issn>
    <prism:volume>34</prism:volume>
    <prism:number>20</prism:number>
    <prism:startingPage>5730</prism:startingPage>
    <prism:endingPage>5739</prism:endingPage>
    <prism:category>motif</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/2097756">
    <title>Combining evidence using p-values: application to sequence homology searches.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/2097756</link>
    <description>&lt;i&gt;Bioinformatics, Vol. 14, No. 1. (1998), pp. 48-54.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;MOTIVATION: To illustrate an intuitive and statistically valid method for combining independent sources of evidence that yields a p-value for the complete evidence, and to apply it to the problem of detecting simultaneous matches to multiple patterns in sequence homology searches. RESULTS: In sequence analysis, two or more (approximately) independent measures of the membership of a sequence (or sequence region) in some class are often available. We would like to estimate the likelihood of the sequence being a member of the class in view of all the available evidence. An example is estimating the significance of the observed match of a macromolecular sequence (DNA or protein) to a set of patterns (motifs) that characterize a biological sequence family. An intuitive way to do this is to express each piece of evidence as a p-value, and then use the product of these p-values as the measure of membership in the family. We derive a formula and algorithm (QFAST) for calculating the statistical distribution of the product of n independent p-values. We demonstrate that sorting sequences by this p-value effectively combines the information present in multiple motifs, leading to highly accurate and sensitive sequence homology searches.</description>
    <dc:title>Combining evidence using p-values: application to sequence homology searches.</dc:title>

    <dc:creator>TL Bailey</dc:creator>
    <dc:creator>M Gribskov</dc:creator>
    <dc:source>Bioinformatics, Vol. 14, No. 1. (1998), pp. 48-54.</dc:source>
    <dc:date>2007-12-12T10:57:21-00:00</dc:date>
    <prism:publicationName>Bioinformatics</prism:publicationName>
    <prism:issn>1367-4803</prism:issn>
    <prism:volume>14</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>48</prism:startingPage>
    <prism:endingPage>54</prism:endingPage>
    <prism:category>algorithm</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/2097754">
    <title>Methods and statistics for combining motif match scores.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/2097754</link>
    <description>&lt;i&gt;J Comput Biol, Vol. 5, No. 2. (1998), pp. 211-221.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Position-specific scoring matrices are useful for representing and searching for protein sequence motifs. A sequence family can often be described by a group of one or more motifs, and an effective search must combine the scores for matching a sequence to each of the motifs in the group. We describe three methods for combining match scores and estimating the statistical significance of the combined scores and evaluate the search quality (classification accuracy) and the accuracy of the estimate of statistical significance of each. The three methods are: 1) sum of scores, 2) sum of reduced variates, 3) product of score p-values. We show that method 3) is superior to the other two methods in both regards, and that combining motif scores indeed gives better search accuracy. The MAST sequence homology search algorithm utilizing the product of p-values scoring method is available for interactive use and downloading at URL http:/(/)www.sdsc.edu/MEME.</description>
    <dc:title>Methods and statistics for combining motif match scores.</dc:title>

    <dc:creator>TL Bailey</dc:creator>
    <dc:creator>M Gribskov</dc:creator>
    <dc:source>J Comput Biol, Vol. 5, No. 2. (1998), pp. 211-221.</dc:source>
    <dc:date>2007-12-12T10:56:43-00:00</dc:date>
    <prism:publicationName>J Comput Biol</prism:publicationName>
    <prism:issn>1066-5277</prism:issn>
    <prism:volume>5</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>211</prism:startingPage>
    <prism:endingPage>221</prism:endingPage>
    <prism:category>motif</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/2097508">
    <title>Statistical extraction of Drosophila cis-regulatory modules using exhaustive assessment of local word frequency.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/2097508</link>
    <description>&lt;i&gt;BMC Bioinformatics, Vol. 4 (22 December 2003)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;BACKGROUND: Transcription regulatory regions in higher eukaryotes are often represented by cis-regulatory modules (CRM) and are responsible for the formation of specific spatial and temporal gene expression patterns. These extended, approximately 1 KB, regions are found far from coding sequences and cannot be extracted from genome on the basis of their relative position to the coding regions. RESULTS: To explore the feasibility of CRM extraction from a genome, we generated an original training set, containing annotated sequence data for most of the known developmental CRMs from Drosophila. Based on this set of experimental data, we developed a strategy for statistical extraction of cis-regulatory modules from the genome, using exhaustive analysis of local word frequency (LWF). To assess the performance of our analysis, we measured the correlation between predictions generated by the LWF algorithm and the distribution of conserved non-coding regions in a number of Drosophila developmental genes. CONCLUSIONS: In most of the cases tested, we observed high correlation (up to 0.6-0.8, measured on the entire gene locus) between the two independent techniques. We discuss computational strategies available for extraction of Drosophila CRMs and possible extensions of these methods.</description>
    <dc:title>Statistical extraction of Drosophila cis-regulatory modules using exhaustive assessment of local word frequency.</dc:title>

    <dc:creator>AG Nazina</dc:creator>
    <dc:creator>DA Papatsenko</dc:creator>
    <dc:identifier>doi:10.1186/1471-2105-4-65</dc:identifier>
    <dc:source>BMC Bioinformatics, Vol. 4 (22 December 2003)</dc:source>
    <dc:date>2007-12-12T09:21:34-00:00</dc:date>
    <prism:publicationName>BMC Bioinformatics</prism:publicationName>
    <prism:issn>1471-2105</prism:issn>
    <prism:volume>4</prism:volume>
    <prism:category>algorithm</prism:category>
    <prism:category>enhancer-prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/2097484">
    <title>Detection of cis -element clusters in higher eukaryotic DNA</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/2097484</link>
    <description>&lt;i&gt;Bioinformatics, Vol. 17, No. 10. (1 October 2001), pp. 878-889.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Motivation: Computational prediction and analysis of transcription regulatory regions in DNA sequences has the potential to accelerate greatly our understanding of how cellular processes are controlled. We present a hidden Markov model based method for detecting regulatory regions in DNA sequences, by searching for clusters of cis -elements. Results: When applied to regulatory targets of the transcription factor LSF, this method achieves a sensitivity of 67%, while making one prediction per 33 kb of non-repetitive human genomic sequence. When applied to muscle specific regulatory regions, we obtain a sensitivity and prediction rate that compare favorably with one of the best alternative approaches. Our method, which we call Cister, can be used to predict different varieties of regulatory region by searching for clusters of cis -elements of any type chosen by the user. Cister is simple to use and is available on the web. Availability: http://sullivan.bu.edu/~mfrith/cister.shtml Contact: mfrith@bu.edu; zhiping@bu.edu 10.1093/bioinformatics/17.10.878</description>
    <dc:title>Detection of cis -element clusters in higher eukaryotic DNA</dc:title>

    <dc:creator>Martin Frith</dc:creator>
    <dc:creator>Ulla Hansen</dc:creator>
    <dc:creator>Zhiping Weng</dc:creator>
    <dc:identifier>doi:10.1093/bioinformatics/17.10.878</dc:identifier>
    <dc:source>Bioinformatics, Vol. 17, No. 10. (1 October 2001), pp. 878-889.</dc:source>
    <dc:date>2007-12-12T09:15:31-00:00</dc:date>
    <prism:publicationName>Bioinformatics</prism:publicationName>
    <prism:volume>17</prism:volume>
    <prism:number>10</prism:number>
    <prism:startingPage>878</prism:startingPage>
    <prism:endingPage>889</prism:endingPage>
    <prism:category>algorithm</prism:category>
    <prism:category>enhancer-prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/2097483">
    <title>Statistical significance of clusters of motifs represented by position specific scoring matrices in nucleotide sequences</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/2097483</link>
    <description>&lt;i&gt;Nucl. Acids Res., Vol. 30, No. 14. (15 July 2002), pp. 3214-3224.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The human genome encodes the transcriptional control of its genes in clusters of cis-elements that constitute enhancers, silencers and promoter signals. The sequence motifs of individual cis- elements are usually too short and degenerate for confident detection. In most cases, the requirements for organization of cis-elements within these clusters are poorly understood. Therefore, we have developed a general method to detect local concentrations of cis-element motifs, using predetermined matrix representations of the cis-elements, and calculate the statistical significance of these motif clusters. The statistical significance calculation is highly accurate not only for idealized, pseudorandom DNA, but also for real human DNA. We use our method cluster of motifs E-value tool' (COMET) to make novel predictions concerning the regulation of genes by transcription factors associated with muscle. COMET performs comparably with two alternative state-of-the-art techniques, which are more complex and lack E-value calculations. Our statistical method enables us to clarify the major bottleneck in the hard problem of detecting cis-regulatory regions, which is that many known enhancers do not contain very significant clusters of the motif types that we search for. Thus, discovery of additional signals that belong to these regulatory regions will be the key to future progress. 10.1093/nar/gkf438</description>
    <dc:title>Statistical significance of clusters of motifs represented by position specific scoring matrices in nucleotide sequences</dc:title>

    <dc:creator>Martin Frith</dc:creator>
    <dc:creator>John Spouge</dc:creator>
    <dc:creator>Ulla Hansen</dc:creator>
    <dc:creator>Zhiping Weng</dc:creator>
    <dc:identifier>doi:10.1093/nar/gkf438</dc:identifier>
    <dc:source>Nucl. Acids Res., Vol. 30, No. 14. (15 July 2002), pp. 3214-3224.</dc:source>
    <dc:date>2007-12-12T09:15:26-00:00</dc:date>
    <prism:publicationName>Nucl. Acids Res.</prism:publicationName>
    <prism:volume>30</prism:volume>
    <prism:number>14</prism:number>
    <prism:startingPage>3214</prism:startingPage>
    <prism:endingPage>3224</prism:endingPage>
    <prism:category>algorithm</prism:category>
    <prism:category>enhancer-prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/827938">
    <title>JASPAR: an open-access database for eukaryotic transcription factor binding profiles.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/827938</link>
    <description>&lt;i&gt;Nucleic Acids Res, Vol. 32, No. Database issue. (1 January 2004)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The analysis of regulatory regions in genome sequences is strongly based on the detection of potential transcription factor binding sites. The preferred models for representation of transcription factor binding specificity have been termed position-specific scoring matrices. JASPAR is an open-access database of annotated, high-quality, matrix-based transcription factor binding site profiles for multicellular eukaryotes. The profiles were derived exclusively from sets of nucleotide sequences experimentally demonstrated to bind transcription factors. The database is complemented by a web interface for browsing, searching and subset selection, an online sequence analysis utility and a suite of programming tools for genome-wide and comparative genomic analysis of regulatory regions. JASPAR is available at http://jaspar. cgb.ki.se.</description>
    <dc:title>JASPAR: an open-access database for eukaryotic transcription factor binding profiles.</dc:title>

    <dc:creator>A Sandelin</dc:creator>
    <dc:creator>W Alkema</dc:creator>
    <dc:creator>P Engström</dc:creator>
    <dc:creator>WW Wasserman</dc:creator>
    <dc:creator>B Lenhard</dc:creator>
    <dc:source>Nucleic Acids Res, Vol. 32, No. Database issue. (1 January 2004)</dc:source>
    <dc:date>2006-09-04T16:47:07-00:00</dc:date>
    <prism:publicationName>Nucleic Acids Res</prism:publicationName>
    <prism:issn>1362-4962</prism:issn>
    <prism:volume>32</prism:volume>
    <prism:number>Database issue</prism:number>
    <prism:category>database</prism:category>
    <prism:category>motif</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1287340">
    <title>ClusterDraw web server: a tool to identify and visualize clusters of binding motifs for transcription factors</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1287340</link>
    <description>&lt;i&gt;Bioinformatics, Vol. 23, No. 8. (15 April 2007), pp. 1032-1034.&lt;/i&gt;</description>
    <dc:title>ClusterDraw web server: a tool to identify and visualize clusters of binding motifs for transcription factors</dc:title>

    <dc:creator>Papatsenko</dc:creator>
    <dc:creator>Dmitri</dc:creator>
    <dc:identifier>doi:10.1093/bioinformatics/btm047</dc:identifier>
    <dc:source>Bioinformatics, Vol. 23, No. 8. (15 April 2007), pp. 1032-1034.</dc:source>
    <dc:date>2007-05-10T04:35:50-00:00</dc:date>
    <prism:publicationName>Bioinformatics</prism:publicationName>
    <prism:issn>1367-4803</prism:issn>
    <prism:volume>23</prism:volume>
    <prism:number>8</prism:number>
    <prism:startingPage>1032</prism:startingPage>
    <prism:endingPage>1034</prism:endingPage>
    <prism:publisher>Oxford University Press</prism:publisher>
    <prism:category>algorithm</prism:category>
    <prism:category>enhancer-prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/2097474">
    <title>Cluster-Buster: finding dense clusters of motifs in DNA sequences</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/2097474</link>
    <description>&lt;i&gt;Nucl. Acids Res., Vol. 31, No. 13. (1 July 2003), pp. 3666-3668.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The signals that determine activation and repression of specific genes in response to appropriate stimuli are one of the most important, but least understood, types of information encoded in genomic DNA. The nucleotide sequence patterns, or motifs, preferentially bound by various transcription factors have been collected in databases. However, these motifs appear to be individually too short and degenerate to enable detection of functional enhancer and silencer elements within a large genome. Several groups have proposed that dense clusters of motifs may diagnose regulatory regions more accurately. Cluster-Buster is the third incarnation of our software for finding clusters of pre-specified motifs in DNA sequences. We offer a Cluster-Buster web server at http://zlab.bu.edu/cluster-buster/. 10.1093/nar/gkg540</description>
    <dc:title>Cluster-Buster: finding dense clusters of motifs in DNA sequences</dc:title>

    <dc:creator>Martin Frith</dc:creator>
    <dc:creator>Michael Li</dc:creator>
    <dc:creator>Zhiping Weng</dc:creator>
    <dc:identifier>doi:10.1093/nar/gkg540</dc:identifier>
    <dc:source>Nucl. Acids Res., Vol. 31, No. 13. (1 July 2003), pp. 3666-3668.</dc:source>
    <dc:date>2007-12-12T09:10:05-00:00</dc:date>
    <prism:publicationName>Nucl. Acids Res.</prism:publicationName>
    <prism:volume>31</prism:volume>
    <prism:number>13</prism:number>
    <prism:startingPage>3666</prism:startingPage>
    <prism:endingPage>3668</prism:endingPage>
    <prism:category>algorithm</prism:category>
    <prism:category>enhancer-prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/2097471">
    <title>Decoding human regulatory circuits.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/2097471</link>
    <description>&lt;i&gt;Genome Res, Vol. 14, No. 10A. (October 2004), pp. 1967-1974.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Clusters of transcription factor binding sites (TFBSs) which direct gene expression constitute cis-regulatory modules (CRMs). We present a novel algorithm, based on Gibbs sampling, which locates, de novo, the cis features of these CRMs, their component TFBSs, and the properties of their spatial distribution. The algorithm finds 69% of experimentally reported TFBSs and 85% of the CRMs in a reference data set of regions upstream of genes differentially expressed in skeletal muscle cells. A discriminant procedure based on the output of the model specifically discriminated regulatory sequences in muscle-specific genes in an independent test set. Application of the method to the analysis of 2710 10-kb fragments upstream of annotated human genes identified 17 novel candidate modules with a false discovery rate &#60;/=0.05, demonstrating the applicability of the method to genome-scale data.</description>
    <dc:title>Decoding human regulatory circuits.</dc:title>

    <dc:creator>W Thompson</dc:creator>
    <dc:creator>MJ Palumbo</dc:creator>
    <dc:creator>WW Wasserman</dc:creator>
    <dc:creator>JS Liu</dc:creator>
    <dc:creator>CE Lawrence</dc:creator>
    <dc:identifier>doi:10.1101/gr.2589004</dc:identifier>
    <dc:source>Genome Res, Vol. 14, No. 10A. (October 2004), pp. 1967-1974.</dc:source>
    <dc:date>2007-12-12T09:09:32-00:00</dc:date>
    <prism:publicationName>Genome Res</prism:publicationName>
    <prism:issn>1088-9051</prism:issn>
    <prism:volume>14</prism:volume>
    <prism:number>10A</prism:number>
    <prism:startingPage>1967</prism:startingPage>
    <prism:endingPage>1974</prism:endingPage>
    <prism:category>algorithm</prism:category>
    <prism:category>enhancer-prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/2064285">
    <title>CisModule: de novo discovery of cis-regulatory modules by hierarchical mixture modeling.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/2064285</link>
    <description>&lt;i&gt;Proc Natl Acad Sci U S A, Vol. 101, No. 33. (17 August 2004), pp. 12114-12119.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The regulatory information for a eukaryotic gene is encoded in cis-regulatory modules. The binding sites for a set of interacting transcription factors have the tendency to colocalize to the same modules. Current de novo motif discovery methods do not take advantage of this knowledge. We propose a hierarchical mixture approach to model the cis-regulatory module structure. Based on the model, a new de novo motif-module discovery algorithm, CisModule, is developed for the Bayesian inference of module locations and within-module motif sites. Dynamic programming-like recursions are developed to reduce the computational complexity from exponential to linear in sequence length. By using both simulated and real data sets, we demonstrate that CisModule is not only accurate in predicting modules but also more sensitive in detecting motif patterns and binding sites than standard motif discovery methods are.</description>
    <dc:title>CisModule: de novo discovery of cis-regulatory modules by hierarchical mixture modeling.</dc:title>

    <dc:creator>Q Zhou</dc:creator>
    <dc:creator>WH Wong</dc:creator>
    <dc:identifier>doi:10.1073/pnas.0402858101</dc:identifier>
    <dc:source>Proc Natl Acad Sci U S A, Vol. 101, No. 33. (17 August 2004), pp. 12114-12119.</dc:source>
    <dc:date>2007-12-05T22:45:51-00:00</dc:date>
    <prism:publicationName>Proc Natl Acad Sci U S A</prism:publicationName>
    <prism:issn>0027-8424</prism:issn>
    <prism:volume>101</prism:volume>
    <prism:number>33</prism:number>
    <prism:startingPage>12114</prism:startingPage>
    <prism:endingPage>12119</prism:endingPage>
    <prism:category>algorithm</prism:category>
    <prism:category>enhancer-prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/453150">
    <title>De novo cis-regulatory module elicitation for eukaryotic genomes</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/453150</link>
    <description>&lt;i&gt;PNAS, Vol. 102, No. 20. (17 May 2005), pp. 7079-7084.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Transcription regulation is controlled by coordinated binding of one or more transcription factors in the promoter regions of genes. In many species, especially higher eukaryotes, transcription factor binding sites tend to occur as homotypic or heterotypic clusters, also known as cis-regulatory modules. The number of sites and distances between the sites, however, vary greatly in a module. We propose a statistical model to describe the underlying cluster structure as well as individual motif conservation and develop a Monte Carlo motif screening strategy for predicting novel regulatory modules in upstream sequences of coregulated genes. We demonstrate the power of the method with examples ranging from bacterial to insect and human genomes.</description>
    <dc:title>De novo cis-regulatory module elicitation for eukaryotic genomes</dc:title>

    <dc:creator>Mayetri Gupta</dc:creator>
    <dc:creator>Jun Liu</dc:creator>
    <dc:identifier>doi:10.1073/pnas.0408743102</dc:identifier>
    <dc:source>PNAS, Vol. 102, No. 20. (17 May 2005), pp. 7079-7084.</dc:source>
    <dc:date>2005-12-30T06:20:05-00:00</dc:date>
    <prism:publicationName>PNAS</prism:publicationName>
    <prism:volume>102</prism:volume>
    <prism:number>20</prism:number>
    <prism:startingPage>7079</prism:startingPage>
    <prism:endingPage>7084</prism:endingPage>
    <prism:category>algorithm</prism:category>
    <prism:category>enhancer-prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1084478">
    <title>Modulefinder: a tool for computational discovery of cis regulatory modules.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1084478</link>
    <description>&lt;i&gt;Pac Symp Biocomput (2005), pp. 519-530.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Regulation of gene expression occurs largely through the binding of sequence-specific transcription factors (TFs) to genomic binding sites (BSs). We present a rigorous scoring scheme, implemented as a C program termed &#34;ModuleFinder&#34;, that evaluates the likelihood that a given genomic region is a cis regulatory module (CRM) for an input set of TFs according to its degree of: (1) homotypic site clustering; (2) heterotypic site clustering; and (3) evolutionary conservation across multiple genomes. Importantly, ModuleFinder obtains all parameters needed to appropriately weight the relative contributions of these sequence features directly from the input sequences and TFBS motifs, and does not need to first be trained. Using two previously described collections of experimentally verified CRMs in mammals and in fly as validation datasets, we show that ModuleFinder is able to identify CRMs with great sensitivity and specificity.</description>
    <dc:title>Modulefinder: a tool for computational discovery of cis regulatory modules.</dc:title>

    <dc:creator>AA Philippakis</dc:creator>
    <dc:creator>FS He</dc:creator>
    <dc:creator>ML Bulyk</dc:creator>
    <dc:source>Pac Symp Biocomput (2005), pp. 519-530.</dc:source>
    <dc:date>2007-02-02T15:42:56-00:00</dc:date>
    <prism:publicationName>Pac Symp Biocomput</prism:publicationName>
    <prism:startingPage>519</prism:startingPage>
    <prism:endingPage>530</prism:endingPage>
    <prism:category>algorithm</prism:category>
    <prism:category>enhancer-prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/2097314">
    <title>Identification of functional clusters of transcription factor binding motifs in genome sequences: the MSCAN algorithm.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/2097314</link>
    <description>&lt;i&gt;Bioinformatics, Vol. 19 Suppl 1 (2003)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;MOTIVATION:The identification of regulatory control regions within genomes is a major challenge. Studies have demonstrated that regulating regions can be described as locally dense clusters or modules of cis-acting transcription factor binding sites (TFBS). For well-described biological contexts, it is possible to train predictive algorithms to discern novel modules in genome sequences. However, utility of module detection methods has been severely limited by insufficient training data. For only a few tissues can one obtain sufficient numbers of literature-derived regulatory modules. RESULTS: We present a novel method, MSCAN, that circumvents the training data problem by measuring the statistical significance of any non-overlapping combination of TFBS in a window. Given a set of transcription factor binding profiles, a significance threshold, and a genomic sequence, MSCAN returns putative regulatory regions. We assess performance on two curated collections of regulatory regions; one each for tissue-specific expression in liver and skeletal muscle cells. The efficiency of MSCAN allows for predictive screens of entire genomes.</description>
    <dc:title>Identification of functional clusters of transcription factor binding motifs in genome sequences: the MSCAN algorithm.</dc:title>

    <dc:creator>O Johansson</dc:creator>
    <dc:creator>W Alkema</dc:creator>
    <dc:creator>WW Wasserman</dc:creator>
    <dc:creator>J Lagergren</dc:creator>
    <dc:source>Bioinformatics, Vol. 19 Suppl 1 (2003)</dc:source>
    <dc:date>2007-12-12T07:47:19-00:00</dc:date>
    <prism:publicationName>Bioinformatics</prism:publicationName>
    <prism:issn>1367-4803</prism:issn>
    <prism:volume>19 Suppl 1</prism:volume>
    <prism:category>algorithm</prism:category>
    <prism:category>enhancer-prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/2096906">
    <title>MAPPER: a search engine for the computational identification of putative transcription factor binding sites in multiple genomes.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/2096906</link>
    <description>&lt;i&gt;BMC Bioinformatics, Vol. 6 (2005)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;BACKGROUND: Cis-regulatory modules are combinations of regulatory elements occurring in close proximity to each other that control the spatial and temporal expression of genes. The ability to identify them in a genome-wide manner depends on the availability of accurate models and of search methods able to detect putative regulatory elements with enhanced sensitivity and specificity. RESULTS: We describe the implementation of a search method for putative transcription factor binding sites (TFBSs) based on hidden Markov models built from alignments of known sites. We built 1,079 models of TFBSs using experimentally determined sequence alignments of sites provided by the TRANSFAC and JASPAR databases and used them to scan sequences of the human, mouse, fly, worm and yeast genomes. In several cases tested the method identified correctly experimentally characterized sites, with better specificity and sensitivity than other similar computational methods. Moreover, a large-scale comparison using synthetic data showed that in the majority of cases our method performed significantly better than a nucleotide weight matrix-based method. CONCLUSION: The search engine, available at http://mapper.chip.org, allows the identification, visualization and selection of putative TFBSs occurring in the promoter or other regions of a gene from the human, mouse, fly, worm and yeast genomes. In addition it allows the user to upload a sequence to query and to build a model by supplying a multiple sequence alignment of binding sites for a transcription factor of interest. Due to its extensive database of models, powerful search engine and flexible interface, MAPPER represents an effective resource for the large-scale computational analysis of transcriptional regulation.</description>
    <dc:title>MAPPER: a search engine for the computational identification of putative transcription factor binding sites in multiple genomes.</dc:title>

    <dc:creator>VD Marinescu</dc:creator>
    <dc:creator>IS Kohane</dc:creator>
    <dc:creator>A Riva</dc:creator>
    <dc:identifier>doi:10.1186/1471-2105-6-79</dc:identifier>
    <dc:source>BMC Bioinformatics, Vol. 6 (2005)</dc:source>
    <dc:date>2007-12-12T05:43:52-00:00</dc:date>
    <prism:publicationName>BMC Bioinformatics</prism:publicationName>
    <prism:issn>1471-2105</prism:issn>
    <prism:volume>6</prism:volume>
    <prism:category>algorithm</prism:category>
    <prism:category>motif</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/882895">
    <title>CREME: a framework for identifying cis-regulatory modules in human-mouse conserved segments.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/882895</link>
    <description>&lt;i&gt;Bioinformatics, Vol. 19 Suppl 1 (2003)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;MOTIVATION: The binding of transcription factors to specific regulatory sequence elements is a primary mechanism for controlling gene transcription. Recent findings suggest a modular organization of binding sites for transcription factors that cooperate in the regulation of genes. In this work we establish a framework for finding recurrent cis-regulatory modules in the promoters of a selected set of genes and scoring their statistical significance. RESULTS: Proceeding from a database of identified binding site motifs and their genomic locations we seek motifs whose frequency in the selected promoters is different than in a background promoter set. We present several statistical tests designed for this purpose. We provide a hashing algorithm for detecting combinations of these motifs that co-occur in clusters within the selected promoters. The significance of such co-occurrences is evaluated using novel statistical scores. Our methods are combined in CREME, a suite of software which includes a browser for viewing the pattern of occurrence of selected cis-regulatory modules. We applied our methodology to find modules within human-mouse conserved promoter segments, focusing on cell cycle regulated genes and stress response related genes. To validate the biological significance of the identified modules we tested whether the associated genes tended to be co-expressed or share similar function. In the cell cycle set five of the seven identified sets of genes were coherently expressed. On the stress response data four of the six detected sets fell predominantly into well-defined functional sub-categories.</description>
    <dc:title>CREME: a framework for identifying cis-regulatory modules in human-mouse conserved segments.</dc:title>

    <dc:creator>R Sharan</dc:creator>
    <dc:creator>I Ovcharenko</dc:creator>
    <dc:creator>A Ben-Hur</dc:creator>
    <dc:creator>RM Karp</dc:creator>
    <dc:source>Bioinformatics, Vol. 19 Suppl 1 (2003)</dc:source>
    <dc:date>2006-10-03T22:27:36-00:00</dc:date>
    <prism:publicationName>Bioinformatics</prism:publicationName>
    <prism:issn>1367-4803</prism:issn>
    <prism:volume>19 Suppl 1</prism:volume>
    <prism:category>algorithm</prism:category>
    <prism:category>enhancer-prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/909307">
    <title>Identification of regulatory regions which confer muscle-specific gene expression</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/909307</link>
    <description>&lt;i&gt;Journal of Molecular Biology, Vol. 278, No. 1. (24 April 1998), pp. 167-181.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;For many newly sequenced genes, sequence analysis of the putative protein yields no clue on function. It would be beneficial to be able to identify in the genome the regulatory regions that confer temporal and spatial expression patterns for the uncharacterized genes. Additionally, it would be advantageous to identify regulatory regions within genes of known expression pattern without performing the costly and time consuming laboratory studies now required. To achieve these goals, the wealth of case studies performed over the past 15 years will have to be collected into predictive models of expression. Extensive studies of genes expressed in skeletal muscle have identified specific transcription factors which bind to regulatory elements to control gene expression. However, potential binding sites for these factors occur with sufficient frequency that it is rare for a gene to be found without one. Analysis of experimentally determined muscle regulatory sequences indicates that muscle expression requires multiple elements in close proximity. A model is generated with predictive capability for identifying these muscle-specific regulatory modules. Phylogenetic footprinting, the identification of sequences conserved between distantly related species, complements the statistical predictions. Through the use of logistic regression analysis, the model promises to be easily modified to take advantage of the elucidation of additional factors, cooperation rules, and spacing constraints.</description>
    <dc:title>Identification of regulatory regions which confer muscle-specific gene expression</dc:title>

    <dc:creator>Wyeth Wasserman</dc:creator>
    <dc:creator>James Fickett</dc:creator>
    <dc:identifier>doi:10.1006/jmbi.1998.1700</dc:identifier>
    <dc:source>Journal of Molecular Biology, Vol. 278, No. 1. (24 April 1998), pp. 167-181.</dc:source>
    <dc:date>2006-10-22T01:50:49-00:00</dc:date>
    <prism:publicationName>Journal of Molecular Biology</prism:publicationName>
    <prism:volume>278</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>167</prism:startingPage>
    <prism:endingPage>181</prism:endingPage>
    <prism:category>algorithm</prism:category>
    <prism:category>enhancer-prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1811386">
    <title>Prediction of similarly acting cis-regulatory modules by subsequence profiling and comparative genomics in Drosophila melanogaster and D.pseudoobscura.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1811386</link>
    <description>&lt;i&gt;Bioinformatics, Vol. 20, No. 16. (1 November 2004), pp. 2738-2750.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;MOTIVATION: To date, computational searches for cis-regulatory modules (CRMs) have relied on two methods. The first, phylogenetic footprinting, has been used to find CRMs in non-coding sequence, but does not directly link DNA sequence with spatio-temporal patterns of expression. The second, based on searches for combinations of transcription factor (TF) binding motifs, has been employed in genome-wide discovery of similarly acting enhancers, but requires prior knowledge of the set of TFs acting at the CRM and the TFs' binding motifs. RESULTS: We propose a method for CRM discovery that combines aspects of both approaches in an effort to overcome their individual limitations. By treating phylogenetically footprinted non-coding regions (PFRs) as proxies for CRMs, we endeavor to find PFRs near co-regulated genes that are comprised of similar short, conserved sequences. Using Markov chains as a convenient formulation to assess similarity, we develop a sampling algorithm to search a large group of PFRs for the most similar subset. When starting with a set of genes involved in Drosophila early blastoderm development and using phylogenetic comparisons of Drosophila melanogaster and D.pseudoobscura genomes, we show here that our algorithm successfully detects known CRMs. Further, we use our similarity metric, based on Markov chain discrimination, in a genome-wide search, and uncover additional known and many candidate early blastoderm CRMs. AVAILABILITY: Software is available via http://arep.med.harvard.edu/enhancer</description>
    <dc:title>Prediction of similarly acting cis-regulatory modules by subsequence profiling and comparative genomics in Drosophila melanogaster and D.pseudoobscura.</dc:title>

    <dc:creator>YH Grad</dc:creator>
    <dc:creator>FP Roth</dc:creator>
    <dc:creator>MS Halfon</dc:creator>
    <dc:creator>GM Church</dc:creator>
    <dc:source>Bioinformatics, Vol. 20, No. 16. (1 November 2004), pp. 2738-2750.</dc:source>
    <dc:date>2007-10-23T16:39:02-00:00</dc:date>
    <prism:publicationName>Bioinformatics</prism:publicationName>
    <prism:issn>1367-4803</prism:issn>
    <prism:volume>20</prism:volume>
    <prism:number>16</prism:number>
    <prism:startingPage>2738</prism:startingPage>
    <prism:endingPage>2750</prism:endingPage>
    <prism:category>algorithm</prism:category>
    <prism:category>enhancer-prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1646950">
    <title>Finding cis-regulatory modules in Drosophila using phylogenetic hidden Markov models</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1646950</link>
    <description>&lt;i&gt;Bioinformatics, Vol. 23, No. 16. (15 August 2007), pp. 2031-2037.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Motivation: Finding the regulatory modules for transcription factors binding is an important step in elucidating the complex molecular mechanisms underlying regulation of gene expression. There are numerous methods available for solving this problem, however, very few of them take advantage of the increasing availability of comparative genomic data. Results: We develop a method for finding regulatory modules in Eukaryotic species using phylogenetic data. Using computer simulations and analysis of real data, we show that the use of phylogenetic hidden Markov model can lead to an increase in accuracy of prediction over methods that do not take advantage of the data from multiple species. Availability: The new method is made accessible under GPL in a new publicly available JAVA program: EvoPromoter. It can be downloaded at http://sourceforge.net/projects/evopromoter/ Contact: sww8@cornell.edu 10.1093/bioinformatics/btm299</description>
    <dc:title>Finding cis-regulatory modules in Drosophila using phylogenetic hidden Markov models</dc:title>

    <dc:creator>Wendy Wong</dc:creator>
    <dc:creator>Rasmus Nielsen</dc:creator>
    <dc:identifier>doi:10.1093/bioinformatics/btm299</dc:identifier>
    <dc:source>Bioinformatics, Vol. 23, No. 16. (15 August 2007), pp. 2031-2037.</dc:source>
    <dc:date>2007-09-12T08:44:50-00:00</dc:date>
    <prism:publicationName>Bioinformatics</prism:publicationName>
    <prism:volume>23</prism:volume>
    <prism:number>16</prism:number>
    <prism:startingPage>2031</prism:startingPage>
    <prism:endingPage>2037</prism:endingPage>
    <prism:category>algorithm</prism:category>
    <prism:category>enhancer-prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/2060806">
    <title>Searching for statistically significant regulatory modules.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/2060806</link>
    <description>&lt;i&gt;Bioinformatics, Vol. 19 Suppl 2 (October 2003)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;MOTIVATION: The regulatory machinery controlling gene expression is complex, frequently requiring multiple, simultaneous DNA-protein interactions. The rate at which a gene is transcribed may depend upon the presence or absence of a collection of transcription factors bound to the DNA near the gene. Locating transcription factor binding sites in genomic DNA is difficult because the individual sites are small and tend to occur frequently by chance. True binding sites may be identified by their tendency to occur in clusters, sometimes known as regulatory modules. RESULTS: We describe an algorithm for detecting occurrences of regulatory modules in genomic DNA. The algorithm, called mcast, takes as input a DNA database and a collection of binding site motifs that are known to operate in concert. mcast uses a motif-based hidden Markov model with several novel features. The model incorporates motif-specific p-values, thereby allowing scores from motifs of different widths and specificities to be compared directly. The p-value scoring also allows mcast to only accept motif occurrences with significance below a user-specified threshold, while still assigning better scores to motif occurrences with lower p-values. mcast can search long DNA sequences, modeling length distributions between motifs within a regulatory module, but ignoring length distributions between modules. The algorithm produces a list of predicted regulatory modules, ranked by E-value. We validate the algorithm using simulated data as well as real data sets from fruitfly and human. AVAILABILITY: http://meme.sdsc.edu/MCAST/paper</description>
    <dc:title>Searching for statistically significant regulatory modules.</dc:title>

    <dc:creator>TL Bailey</dc:creator>
    <dc:creator>WS Noble</dc:creator>
    <dc:source>Bioinformatics, Vol. 19 Suppl 2 (October 2003)</dc:source>
    <dc:date>2007-12-05T13:00:32-00:00</dc:date>
    <prism:publicationName>Bioinformatics</prism:publicationName>
    <prism:issn>1460-2059</prism:issn>
    <prism:volume>19 Suppl 2</prism:volume>
    <prism:category>algorithm</prism:category>
    <prism:category>enhancer-prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/2060800">
    <title>Homotypic regulatory clusters in Drosophila.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/2060800</link>
    <description>&lt;i&gt;Genome Res, Vol. 13, No. 4. (April 2003), pp. 579-588.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Cis-regulatory modules (CRMs) are transcription regulatory DNA segments (approximately 1 Kb range) that control the expression of developmental genes in higher eukaryotes. We analyzed clustering of known binding motifs for transcription factors (TFs) in over 60 known CRMs from 20 Drosophila developmental genes, and we present evidence that each type of recognition motif forms significant clusters within the regulatory regions regulated by the corresponding TF. We demonstrate how a search with a single binding motif can be applied to explore gene regulatory networks and to discover coregulated genes in the genome. We also discuss the potential of the clustering method in interpreting the differential response of genes to various levels of transcriptional regulators.</description>
    <dc:title>Homotypic regulatory clusters in Drosophila.</dc:title>

    <dc:creator>AP Lifanov</dc:creator>
    <dc:creator>VJ Makeev</dc:creator>
    <dc:creator>AG Nazina</dc:creator>
    <dc:creator>DA Papatsenko</dc:creator>
    <dc:identifier>doi:10.1101/gr.668403</dc:identifier>
    <dc:source>Genome Res, Vol. 13, No. 4. (April 2003), pp. 579-588.</dc:source>
    <dc:date>2007-12-05T12:56:40-00:00</dc:date>
    <prism:publicationName>Genome Res</prism:publicationName>
    <prism:issn>1088-9051</prism:issn>
    <prism:volume>13</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>579</prism:startingPage>
    <prism:endingPage>588</prism:endingPage>
    <prism:category>algorithm</prism:category>
    <prism:category>enhancer-prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/2060758">
    <title>Genome-wide analysis of clustered Dorsal binding sites identifies putative target genes in the Drosophila embryo.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/2060758</link>
    <description>&lt;i&gt;Proc Natl Acad Sci U S A, Vol. 99, No. 2. (22 January 2002), pp. 763-768.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Metazoan genomes contain vast tracts of cis-regulatory DNA that have been identified typically through tedious functional assays. As a result, it has not been possible to uncover a cis-regulatory code that links primary DNA sequences to gene expression patterns. In an initial effort to determine whether coordinately regulated genes share a common &#34;grammar,&#34; we have examined the distribution of Dorsal recognition sequences in the Drosophila genome. Dorsal is one of the best-characterized sequence-specific transcription factors in Drosophila. The homeobox gene zerknullt (zen) is repressed directly by Dorsal, and this repression is mediated by a 600-bp silencer, the ventral repression element (VRE), which contains four optimal Dorsal binding sites. The arrangement and sequence of the Dorsal recognition sequences in the VRE were used to develop a computational algorithm to search the Drosophila genome for clusters of optimal Dorsal binding sites. There are 15 regions in the genome that contain three or more optimal sites within a span of 400 bp or less. Three of these regions are associated with known Dorsal target genes: sog, zen, and Brinker. The Dorsal binding cluster in sog is shown to mediate lateral stripes of gene expression in response to low levels of the Dorsal gradient. Two of the remaining 12 clusters are shown to be associated with genes that exhibit asymmetric patterns of expression across the dorsoventral axis. These results suggest that bioinformatics can be used to identify novel target genes and associated regulatory DNAs in a gene network.</description>
    <dc:title>Genome-wide analysis of clustered Dorsal binding sites identifies putative target genes in the Drosophila embryo.</dc:title>

    <dc:creator>M Markstein</dc:creator>
    <dc:creator>P Markstein</dc:creator>
    <dc:creator>V Markstein</dc:creator>
    <dc:creator>MS Levine</dc:creator>
    <dc:identifier>doi:10.1073/pnas.012591199</dc:identifier>
    <dc:source>Proc Natl Acad Sci U S A, Vol. 99, No. 2. (22 January 2002), pp. 763-768.</dc:source>
    <dc:date>2007-12-05T12:38:16-00:00</dc:date>
    <prism:publicationName>Proc Natl Acad Sci U S A</prism:publicationName>
    <prism:issn>0027-8424</prism:issn>
    <prism:volume>99</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>763</prism:startingPage>
    <prism:endingPage>768</prism:endingPage>
    <prism:category>algorithm</prism:category>
    <prism:category>enhancer-prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/2060753">
    <title>SCORE: A computational approach to the identification of cis-regulatory modules and target genes in whole-genome sequence data</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/2060753</link>
    <description>&lt;i&gt;Proceedings of the National Academy of Sciences, Vol. 99, No. 15. (23 July 2002), pp. 9888-9893.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;A large fraction of the information content of metazoan genomes resides in the transcriptional and posttranscriptional cis-regulatory elements that collectively provide the blueprint for using the protein-coding capacity of the DNA, thus guiding the development and physiology of the entire organism. As successive whole-genome sequencing projects[---]-including those of mice and humans[---]are completed, we have full access to the regulatory genome of yet another species. But our ability to decipher the cis-regulatory code, and hence to link genes into regulatory networks on a global scale, is currently very limited. Here we describe SCORE (Site Clustering Over Random Expectation), a computational method for identifying transcriptional cis-regulatory modules based on the fact that they often contain, in statistically improbable concentrations, multiple binding sites for the same transcription factor. We have carried out a Drosophila genomewide inventory of predicted binding sites for the Notch-regulated transcription factor Suppressor of Hairless [Su(H)] and found that the fly genome contains highly nonrandom clusterings of Su(H) sites over a broad range of sequence intervals. We found that the most statistically significant clusters are very heavily enriched in both known and logical targets of Su(H) binding and regulation. The utility of the SCORE approach was validated by in vivo experiments showing that proper expression of the novel gene Him in adult muscle precursor cells depends both on Su(H) gene activity and sequences that include a previously unstudied cluster of four Su(H) sites, indicating that Him is a likely direct target of Su(H). 10.1073/pnas.152320899</description>
    <dc:title>SCORE: A computational approach to the identification of cis-regulatory modules and target genes in whole-genome sequence data</dc:title>

    <dc:creator>Mark Rebeiz</dc:creator>
    <dc:creator>Nick Reeves</dc:creator>
    <dc:creator>James Posakony</dc:creator>
    <dc:identifier>doi:10.1073/pnas.152320899</dc:identifier>
    <dc:source>Proceedings of the National Academy of Sciences, Vol. 99, No. 15. (23 July 2002), pp. 9888-9893.</dc:source>
    <dc:date>2007-12-05T12:37:41-00:00</dc:date>
    <prism:publicationName>Proceedings of the National Academy of Sciences</prism:publicationName>
    <prism:volume>99</prism:volume>
    <prism:number>15</prism:number>
    <prism:startingPage>9888</prism:startingPage>
    <prism:endingPage>9893</prism:endingPage>
    <prism:category>algorithm</prism:category>
    <prism:category>enhancer-prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/959130">
    <title>Identifying cis-regulatory modules by combining comparative and compositional analysis of DNA</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/959130</link>
    <description>&lt;i&gt;Bioinformatics, Vol. 22, No. 23. (1 December 2006), pp. 2858-2864.&lt;/i&gt;</description>
    <dc:title>Identifying cis-regulatory modules by combining comparative and compositional analysis of DNA</dc:title>

    <dc:creator>Pierstorff</dc:creator>
    <dc:creator>Nora</dc:creator>
    <dc:creator>Bergman</dc:creator>
    <dc:creator>M Casey</dc:creator>
    <dc:creator>Wiehe</dc:creator>
    <dc:creator>Thomas</dc:creator>
    <dc:identifier>doi:10.1093/bioinformatics/btl499</dc:identifier>
    <dc:source>Bioinformatics, Vol. 22, No. 23. (1 December 2006), pp. 2858-2864.</dc:source>
    <dc:date>2006-11-23T09:23:24-00:00</dc:date>
    <prism:publicationName>Bioinformatics</prism:publicationName>
    <prism:issn>1367-4803</prism:issn>
    <prism:volume>22</prism:volume>
    <prism:number>23</prism:number>
    <prism:startingPage>2858</prism:startingPage>
    <prism:endingPage>2864</prism:endingPage>
    <prism:publisher>Oxford University Press</prism:publisher>
    <prism:category>algorithm</prism:category>
    <prism:category>enhancer-prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1507637">
    <title>Stubb: a program for discovery and analysis of cis-regulatory modules.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1507637</link>
    <description>&lt;i&gt;Nucleic Acids Res, Vol. 34, No. Web Server issue. (1 July 2006)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Given the DNA-binding specificities (motifs) of one or more transcription factors, an important bioinformatics problem is to discover significant clusters of binding sites for the transcription factors(s). Such clusters often correspond to cis-regulatory modules mediating regulation of an adjacent gene. In earlier work, we developed the Stubb program that uses a probabilistic model and a maximum likelihood approach to efficiently detect cis-regulatory modules over genomic scales. It may optionally exploit a second related genome to improve module prediction accuracy. We describe here the use of a web-based interface for the Stubb program. The interface is equipped with a special post-processing step for in-depth analysis of specific modules, in order to reveal individual binding sites predicted in the module. The web server may be accessed at the URL http://stubb.rockefeller.edu/.</description>
    <dc:title>Stubb: a program for discovery and analysis of cis-regulatory modules.</dc:title>

    <dc:creator>S Sinha</dc:creator>
    <dc:creator>Y Liang</dc:creator>
    <dc:creator>E Siggia</dc:creator>
    <dc:source>Nucleic Acids Res, Vol. 34, No. Web Server issue. (1 July 2006)</dc:source>
    <dc:date>2007-07-27T21:16:40-00:00</dc:date>
    <prism:publicationName>Nucleic Acids Res</prism:publicationName>
    <prism:issn>1362-4962</prism:issn>
    <prism:volume>34</prism:volume>
    <prism:number>Web Server issue</prism:number>
    <prism:category>algorithm</prism:category>
    <prism:category>enhancer-prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/2060703">
    <title>Cross-species comparison significantly improves genome-wide prediction of cis-regulatory modules in Drosophila.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/2060703</link>
    <description>&lt;i&gt;BMC Bioinformatics, Vol. 5 (9 September 2004)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;BACKGROUND: The discovery of cis-regulatory modules in metazoan genomes is crucial for understanding the connection between genes and organism diversity. It is important to quantify how comparative genomics can improve computational detection of such modules. RESULTS: We run the Stubb software on the entire D. melanogaster genome, to obtain predictions of modules involved in segmentation of the embryo. Stubb uses a probabilistic model to score sequences for clustering of transcription factor binding sites, and can exploit multiple species data within the same probabilistic framework. The predictions are evaluated using publicly available gene expression data for thousands of genes, after careful manual annotation. We demonstrate that the use of a second genome (D. pseudoobscura) for cross-species comparison significantly improves the prediction accuracy of Stubb, and is a more sensitive approach than intersecting the results of separate runs over the two genomes. The entire list of predictions is made available online. CONCLUSION: Evolutionary conservation of modules serves as a filter to improve their detection in silico. The future availability of additional fruitfly genomes therefore carries the prospect of highly specific genome-wide predictions using Stubb.</description>
    <dc:title>Cross-species comparison significantly improves genome-wide prediction of cis-regulatory modules in Drosophila.</dc:title>

    <dc:creator>S Sinha</dc:creator>
    <dc:creator>MD Schroeder</dc:creator>
    <dc:creator>U Unnerstall</dc:creator>
    <dc:creator>U Gaul</dc:creator>
    <dc:creator>ED Siggia</dc:creator>
    <dc:identifier>doi:10.1186/1471-2105-5-129</dc:identifier>
    <dc:source>BMC Bioinformatics, Vol. 5 (9 September 2004)</dc:source>
    <dc:date>2007-12-05T12:14:03-00:00</dc:date>
    <prism:publicationName>BMC Bioinformatics</prism:publicationName>
    <prism:issn>1471-2105</prism:issn>
    <prism:volume>5</prism:volume>
    <prism:category>algorithm</prism:category>
    <prism:category>enhancer-prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/816965">
    <title>Computational detection of genomic cis-regulatory modules applied to body patterning in the early Drosophila embryo.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/816965</link>
    <description>&lt;i&gt;BMC Bioinformatics, Vol. 3 (24 October 2002)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;BACKGROUND: Regulation of gene transcription is crucial for the function and development of all organisms. While gene prediction programs that identify protein coding sequence are used with remarkable success in the annotation of genomes, the development of computational methods to analyze noncoding regions and to delineate transcriptional control elements is still in its infancy. RESULTS: Here we present novel algorithms to detect cis-regulatory modules through genome wide scans for clusters of transcription factor binding sites using three levels of prior information. When binding sites for the factors are known, our statistical segmentation algorithm, Ahab, yields about 150 putative gap gene regulated modules, with no adjustable parameters other than a window size. If one or more related modules are known, but no binding sites, repeated motifs can be found by a customized Gibbs sampler and input to Ahab, to predict genes with similar regulation. Finally using only the genome, we developed a third algorithm, Argos, that counts and scores clusters of overrepresented motifs in a window of sequence. Argos recovers many of the known modules, upstream of the segmentation genes, with no training data. CONCLUSIONS: We have demonstrated, in the case of body patterning in the Drosophila embryo, that our algorithms allow the genome-wide identification of regulatory modules. We believe that Ahab overcomes many problems of recent approaches and we estimated the false positive rate to be about 50%. Argos is the first successful attempt to predict regulatory modules using only the genome without training data. Complete results and module predictions across the Drosophila genome are available at http://uqbar.rockefeller.edu/~siggia/.</description>
    <dc:title>Computational detection of genomic cis-regulatory modules applied to body patterning in the early Drosophila embryo.</dc:title>

    <dc:creator>N Rajewsky</dc:creator>
    <dc:creator>M Vergassola</dc:creator>
    <dc:creator>U Gaul</dc:creator>
    <dc:creator>ED Siggia</dc:creator>
    <dc:identifier>doi:10.1186/1471-2105-3-30</dc:identifier>
    <dc:source>BMC Bioinformatics, Vol. 3 (24 October 2002)</dc:source>
    <dc:date>2006-08-25T21:12:45-00:00</dc:date>
    <prism:publicationName>BMC Bioinformatics</prism:publicationName>
    <prism:issn>1471-2105</prism:issn>
    <prism:volume>3</prism:volume>
    <prism:category>algorithm</prism:category>
    <prism:category>enhancer-prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/318263">
    <title>BLAT--the BLAST-like alignment tool.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/318263</link>
    <description>&lt;i&gt;Genome Res, Vol. 12, No. 4. (April 2002), pp. 656-664.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Analyzing vertebrate genomes requires rapid mRNA/DNA and cross-species protein alignments. A new tool, BLAT, is more accurate and 500 times faster than popular existing tools for mRNA/DNA alignments and 50 times faster for protein alignments at sensitivity settings typically used when comparing vertebrate sequences. BLAT's speed stems from an index of all nonoverlapping K-mers in the genome. This index fits inside the RAM of inexpensive computers, and need only be computed once for each genome assembly. BLAT has several major stages. It uses the index to find regions in the genome likely to be homologous to the query sequence. It performs an alignment between homologous regions. It stitches together these aligned regions (often exons) into larger alignments (typically genes). Finally, BLAT revisits small internal exons possibly missed at the first stage and adjusts large gap boundaries that have canonical splice sites where feasible. This paper describes how BLAT was optimized. Effects on speed and sensitivity are explored for various K-mer sizes, mismatch schemes, and number of required index matches. BLAT is compared with other alignment programs on various test sets and then used in several genome-wide applications. http://genome.ucsc.edu hosts a web-based BLAT server for the human genome.</description>
    <dc:title>BLAT--the BLAST-like alignment tool.</dc:title>

    <dc:creator>WJ Kent</dc:creator>
    <dc:identifier>doi:10.1101/gr.229202. Article published online before March 2002</dc:identifier>
    <dc:source>Genome Res, Vol. 12, No. 4. (April 2002), pp. 656-664.</dc:source>
    <dc:date>2005-09-13T16:14:30-00:00</dc:date>
    <prism:publicationName>Genome Res</prism:publicationName>
    <prism:issn>1088-9051</prism:issn>
    <prism:volume>12</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>656</prism:startingPage>
    <prism:endingPage>664</prism:endingPage>
    <prism:category>algorithm</prism:category>
    <prism:category>alignment</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1453943">
    <title>NCBI reference sequences (RefSeq): a curated non-redundant sequence database of genomes, transcripts and proteins.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1453943</link>
    <description>&lt;i&gt;Nucleic Acids Res, Vol. 35, No. Database issue. (January 2007)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;NCBI's reference sequence (RefSeq) database (http://www.ncbi.nlm.nih.gov/RefSeq/) is a curated non-redundant collection of sequences representing genomes, transcripts and proteins. The database includes 3774 organisms spanning prokaryotes, eukaryotes and viruses, and has records for 2,879,860 proteins (RefSeq release 19). RefSeq records integrate information from multiple sources, when additional data are available from those sources and therefore represent a current description of the sequence and its features. Annotations include coding regions, conserved domains, tRNAs, sequence tagged sites (STS), variation, references, gene and protein product names, and database cross-references. Sequence is reviewed and features are added using a combined approach of collaboration and other input from the scientific community, prediction, propagation from GenBank and curation by NCBI staff. The format of all RefSeq records is validated, and an increasing number of tests are being applied to evaluate the quality of sequence and annotation, especially in the context of complete genomic sequence.</description>
    <dc:title>NCBI reference sequences (RefSeq): a curated non-redundant sequence database of genomes, transcripts and proteins.</dc:title>

    <dc:creator>KD Pruitt</dc:creator>
    <dc:creator>T Tatusova</dc:creator>
    <dc:creator>DR Maglott</dc:creator>
    <dc:source>Nucleic Acids Res, Vol. 35, No. Database issue. (January 2007)</dc:source>
    <dc:date>2007-07-13T11:34:07-00:00</dc:date>
    <prism:publicationName>Nucleic Acids Res</prism:publicationName>
    <prism:issn>1362-4962</prism:issn>
    <prism:volume>35</prism:volume>
    <prism:number>Database issue</prism:number>
    <prism:category>database</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/2051664">
    <title>DroSpeGe: rapid access database for new Drosophila species genomes.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/2051664</link>
    <description>&lt;i&gt;Nucleic Acids Res, Vol. 35, No. Database issue. (January 2007)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The Drosophila species comparative genome database DroSpeGe (http://insects.eugenes.org/DroSpeGe/) provides genome researchers with rapid, usable access to 12 new and old Drosophila genomes, since its inception in 2004. Scientists can use, with minimal computing expertise, the wealth of new genome information for developing new insights into insect evolution. New genome assemblies provided by several sequencing centers have been annotated with known model organism gene homologies and gene predictions to provided basic comparative data. TeraGrid supplies the shared cyberinfrastructure for the primary computations. This genome database includes homologies to Drosophila melanogaster and eight other eukaryote model genomes, and gene predictions from several groups. BLAST searches of the newest assemblies are integrated with genome maps. GBrowse maps provide detailed views of cross-species aligned genomes. BioMart provides for data mining of annotations and sequences. Common chromosome maps identify major synteny among species. Potential gain and loss of genes is suggested by Gene Ontology groupings for genes of the new species. Summaries of essential genome statistics include sizes, genes found and predicted, homology among genomes, phylogenetic trees of species and comparisons of several gene predictions for sensitivity and specificity in finding new and known genes.</description>
    <dc:title>DroSpeGe: rapid access database for new Drosophila species genomes.</dc:title>

    <dc:creator>DG Gilbert</dc:creator>
    <dc:source>Nucleic Acids Res, Vol. 35, No. Database issue. (January 2007)</dc:source>
    <dc:date>2007-12-03T18:14:53-00:00</dc:date>
    <prism:publicationName>Nucleic Acids Res</prism:publicationName>
    <prism:issn>1362-4962</prism:issn>
    <prism:volume>35</prism:volume>
    <prism:number>Database issue</prism:number>
    <prism:category>database</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/466470">
    <title>Drosophila melanogaster: a case study of a model genomic sequence and its consequences.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/466470</link>
    <description>&lt;i&gt;Genome Res, Vol. 15, No. 12. (December 2005), pp. 1661-1667.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The sequencing and annotation of the Drosophila melanogaster genome, first published in 2000 through collaboration between Celera Genomics and the Drosophila Genome Projects, has provided a number of important contributions to genome research. By demonstrating the utility of methods such as whole-genome shotgun sequencing and genome annotation by a community &#34;jamboree,&#34; the Drosophila genome established the precedents for the current paradigm used by most genome projects. Subsequent releases of the initial genome sequence have been improved by the Berkeley Drosophila Genome Project and annotated by FlyBase, the Drosophila community database, providing one of the highest-quality genome sequences and annotations for any organism. We discuss the impact of the growing number of genome sequences now available in the genus on current Drosophila research, and some of the biological questions that these resources will enable to be solved in the future.</description>
    <dc:title>Drosophila melanogaster: a case study of a model genomic sequence and its consequences.</dc:title>

    <dc:creator>M Ashburner</dc:creator>
    <dc:creator>CM Bergman</dc:creator>
    <dc:identifier>doi:10.1101/gr.3726705</dc:identifier>
    <dc:source>Genome Res, Vol. 15, No. 12. (December 2005), pp. 1661-1667.</dc:source>
    <dc:date>2006-01-16T22:29:59-00:00</dc:date>
    <prism:publicationName>Genome Res</prism:publicationName>
    <prism:issn>1088-9051</prism:issn>
    <prism:volume>15</prism:volume>
    <prism:number>12</prism:number>
    <prism:startingPage>1661</prism:startingPage>
    <prism:endingPage>1667</prism:endingPage>
    <prism:category>annotation</prism:category>
    <prism:category>review</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/2051640">
    <title>The FlyBase database of the Drosophila Genome Projects and community literature. The FlyBase Consortium.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/2051640</link>
    <description>&lt;i&gt;Nucleic Acids Res, Vol. 27, No. 1. (1 January 1999), pp. 85-88.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The FlyBase Drosophila genetics database and the public interfaces of the Berkeley Drosophila Genome Project (BDGP) and European Drosophila Genome Project (EDGP) are in the process of integrating. At present, the data of these projects are available from independent, but hyperlinked, WWW sites (FlyBase URL, http://flybase. bio.indiana.edu/; BDGP URL, http://fruitfly.berkeley.edu/; EDGP URL, http://edgp.ebi.ac.uk/ ). Because of the considerable overlap of data classes between the contributions of the Drosophila genome projects and the Drosophila community, the new and enlarged FlyBase consortium views the implementation of a single integrated Drosophila genomics/genetics server as essential to the scientific community. This integration will occur in a stepwise fashion over the next 1-2 years. In this report, the salient features of the current databases and how to interrogate and navigate the extensive data sets are discussed.</description>
    <dc:title>The FlyBase database of the Drosophila Genome Projects and community literature. The FlyBase Consortium.</dc:title>

    <dc:creator>Flybase Consortium</dc:creator>
    <dc:source>Nucleic Acids Res, Vol. 27, No. 1. (1 January 1999), pp. 85-88.</dc:source>
    <dc:date>2007-12-03T18:09:29-00:00</dc:date>
    <prism:publicationName>Nucleic Acids Res</prism:publicationName>
    <prism:issn>0305-1048</prism:issn>
    <prism:volume>27</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>85</prism:startingPage>
    <prism:endingPage>88</prism:endingPage>
    <prism:category>database</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/408246">
    <title>PAML: a program package for phylogenetic analysis by maximum likelihood.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/408246</link>
    <description>&lt;i&gt;Comput Appl Biosci, Vol. 13, No. 5. (October 1997), pp. 555-556.&lt;/i&gt;</description>
    <dc:title>PAML: a program package for phylogenetic analysis by maximum likelihood.</dc:title>

    <dc:creator>Z Yang</dc:creator>
    <dc:source>Comput Appl Biosci, Vol. 13, No. 5. (October 1997), pp. 555-556.</dc:source>
    <dc:date>2005-11-25T12:56:07-00:00</dc:date>
    <prism:publicationName>Comput Appl Biosci</prism:publicationName>
    <prism:issn>0266-7061</prism:issn>
    <prism:volume>13</prism:volume>
    <prism:number>5</prism:number>
    <prism:startingPage>555</prism:startingPage>
    <prism:endingPage>556</prism:endingPage>
    <prism:category>algorithm</prism:category>
    <prism:category>molecular-evolution</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/503161">
    <title>T-Coffee: A novel method for fast and accurate multiple sequence alignment.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/503161</link>
    <description>&lt;i&gt;J Mol Biol, Vol. 302, No. 1. (8 September 2000), pp. 205-217.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We describe a new method (T-Coffee) for multiple sequence alignment that provides a dramatic improvement in accuracy with a modest sacrifice in speed as compared to the most commonly used alternatives. The method is broadly based on the popular progressive approach to multiple alignment but avoids the most serious pitfalls caused by the greedy nature of this algorithm. With T-Coffee we pre-process a data set of all pair-wise alignments between the sequences. This provides us with a library of alignment information that can be used to guide the progressive alignment. Intermediate alignments are then based not only on the sequences to be aligned next but also on how all of the sequences align with each other. This alignment information can be derived from heterogeneous sources such as a mixture of alignment programs and/or structure superposition. Here, we illustrate the power of the approach by using a combination of local and global pair-wise alignments to generate the library. The resulting alignments are significantly more reliable, as determined by comparison with a set of 141 test cases, than any of the popular alternatives that we tried. The improvement, especially clear with the more difficult test cases, is always visible, regardless of the phylogenetic spread of the sequences in the tests.</description>
    <dc:title>T-Coffee: A novel method for fast and accurate multiple sequence alignment.</dc:title>

    <dc:creator>C Notredame</dc:creator>
    <dc:creator>DG Higgins</dc:creator>
    <dc:creator>J Heringa</dc:creator>
    <dc:identifier>doi:10.1006/jmbi.2000.4042</dc:identifier>
    <dc:source>J Mol Biol, Vol. 302, No. 1. (8 September 2000), pp. 205-217.</dc:source>
    <dc:date>2006-02-12T22:59:31-00:00</dc:date>
    <prism:publicationName>J Mol Biol</prism:publicationName>
    <prism:issn>0022-2836</prism:issn>
    <prism:volume>302</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>205</prism:startingPage>
    <prism:endingPage>217</prism:endingPage>
    <prism:category>algorithm</prism:category>
    <prism:category>alignment</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/622628">
    <title>Whole-genome sequence assembly for mammalian genomes: Arachne 2.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/622628</link>
    <description>&lt;i&gt;Genome Res, Vol. 13, No. 1. (January 2003), pp. 91-96.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We previously described the whole-genome assembly program Arachne, presenting assemblies of simulated data for small to mid-sized genomes. Here we describe algorithmic adaptations to the program, allowing for assembly of mammalian-size genomes, and also improving the assembly of smaller genomes. Three principal changes were simultaneously made and applied to the assembly of the mouse genome, during a six-month period of development: (1) Supercontigs (scaffolds) were iteratively broken and rejoined using several criteria, yielding a 64-fold increase in length (N50), and apparent elimination of all global misjoins; (2) gaps between contigs in supercontigs were filled (partially or completely) by insertion of reads, as suggested by pairing within the supercontig, increasing the N50 contig length by 50%; (3) memory usage was reduced fourfold. The outcome of this mouse assembly and its analysis are described in (Mouse Genome Sequencing Consortium 2002).</description>
    <dc:title>Whole-genome sequence assembly for mammalian genomes: Arachne 2.</dc:title>

    <dc:creator>DB Jaffe</dc:creator>
    <dc:creator>J Butler</dc:creator>
    <dc:creator>S Gnerre</dc:creator>
    <dc:creator>E Mauceli</dc:creator>
    <dc:creator>K Lindblad-Toh</dc:creator>
    <dc:creator>JP Mesirov</dc:creator>
    <dc:creator>MC Zody</dc:creator>
    <dc:creator>ES Lander</dc:creator>
    <dc:identifier>doi:10.1101/gr.828403</dc:identifier>
    <dc:source>Genome Res, Vol. 13, No. 1. (January 2003), pp. 91-96.</dc:source>
    <dc:date>2006-05-11T05:29:25-00:00</dc:date>
    <prism:publicationName>Genome Res</prism:publicationName>
    <prism:issn>1088-9051</prism:issn>
    <prism:volume>13</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>91</prism:startingPage>
    <prism:endingPage>96</prism:endingPage>
    <prism:category>algorithm</prism:category>
    <prism:category>assembly</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1314398">
    <title>Base-calling of automated sequencer traces using phred. II. Error probabilities.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1314398</link>
    <description>&lt;i&gt;Genome Res, Vol. 8, No. 3. (March 1998), pp. 186-194.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Elimination of the data processing bottleneck in high-throughput sequencing will require both improved accuracy of data processing software and reliable measures of that accuracy. We have developed and implemented in our base-calling program phred the ability to estimate a probability of error for each base-call, as a function of certain parameters computed from the trace data. These error probabilities are shown here to be valid (correspond to actual error rates) and to have high power to discriminate correct base-calls from incorrect ones, for read data collected under several different chemistries and electrophoretic conditions. They play a critical role in our assembly program phrap and our finishing program consed.</description>
    <dc:title>Base-calling of automated sequencer traces using phred. II. Error probabilities.</dc:title>

    <dc:creator>B Ewing</dc:creator>
    <dc:creator>P Green</dc:creator>
    <dc:source>Genome Res, Vol. 8, No. 3. (March 1998), pp. 186-194.</dc:source>
    <dc:date>2007-05-21T05:32:16-00:00</dc:date>
    <prism:publicationName>Genome Res</prism:publicationName>
    <prism:issn>1088-9051</prism:issn>
    <prism:volume>8</prism:volume>
    <prism:number>3</prism:number>
    <prism:startingPage>186</prism:startingPage>
    <prism:endingPage>194</prism:endingPage>
    <prism:category>algorithm</prism:category>
</item>



</rdf:RDF>

