<?xml version="1.0" encoding="UTF-8"?>

<rdf:RDF
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
   xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
   xmlns="http://purl.org/rss/1.0/"
   xmlns:dc="http://purl.org/dc/elements/1.1/"
   xmlns:prism="http://prismstandard.org/namespaces/1.2/basic/"
   xmlns:dcterms="http://purl.org/dc/terms/"

>
<channel rdf:about="http://www.citeulike.org/about">
<pubDate>Thu, 21 Aug 2008 01:15:15 BST</pubDate>


	<title>CiteULike: indigoviolet's no-tag</title>
	<description>CiteULike: indigoviolet's no-tag</description>


	<link>http://www.citeulike.org/user/indigoviolet/tag/no-tag</link>
	<dc:publisher>CiteULike.org</dc:publisher>
	<dc:language>en-gb</dc:language>
	<dc:rights>Copyright &#169; 2004-2008 citeulike.org</dc:rights>
	<items>
    <rdf:Seq>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/2122187"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/407124"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/407125"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1274217"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/558739"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1267554"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/209852"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/212874"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/2098979"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/2098944"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/921611"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/100088"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1446221"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1445214"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1443754"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1443746"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1443743"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1390802"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1375603"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1203906"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/94994"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1375602"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/796333"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/756148"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/904110"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/94993"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/958433"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1085102"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/224563"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1006283"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1006282"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1224456"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1224455"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1224454"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1224453"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1190545"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1021805"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1097483"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1096346"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1085324"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1085326"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1064697"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/882014"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1085575"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1085574"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1085571"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/882012"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/342965"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1084785"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/indigoviolet/article/1084706"/>

	</rdf:Seq>
	</items>
	</channel>


<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/2122187">
    <title>The Making of a Fly: The Genetics of Animal Design</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/2122187</link>
    <description>&lt;i&gt;(15 April 1992)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Understanding how a multicellular animal develops from a single cell (the fertilized egg) poses one of the greatest challenges in biology today. Development from egg to adult involves the sequential expression of virtually the whole of an organism's genetic instructions both in the mother as she lays down developmental cues in the egg, and in the embryo itself. Most of our present information on the role of genes in development comes from the invertebrate fruit fly, &#60;i&#62;Drosophila&#60;/i&#62;. The two authors of this text (amongst the foremost authorities in the world) follow the developmental process from fertilization through the primitive structural development of the body plan of the fly after cleavage into the differentiation of the variety of tissues, organs and body parts that together define the fly. The developmental processes are fully explained throughout the text in the modern language of molecular biology and genetics. This text represents the vital synthesis of the subject that many have been waiting for and it will enable many specific courses in developmental biology and molecular genetics to focus on it. It will appeali to 2nd and 3rd year students in these disciplines as well as in biochemistry, neurobiology and zoology. It will also have widespread appeal among researchers. &#60;br&#62;&#60;ul&#62;&#60;br&#62;&#60;li&#62;Authored by one of the foremost authorities in the world. &#60;br&#62;&#60;li&#62;A unique synthesis of the developmental cycle of &#60;i&#62;Drosophila&#60;/i&#62; - our major source of information on the role of genes in development. &#60;br&#62;&#60;li&#62;Designed to provide the basis of new courses in developmental biology and molecular genetics at senior undergraduate level. &#60;br&#62;&#60;li&#62;A lucid explanation in the modern language of the science.&#60;/li&#62;&#60;/ul&#62;</description>
    <dc:title>The Making of a Fly: The Genetics of Animal Design</dc:title>

    <dc:creator>Peter Lawrence</dc:creator>
    <dc:source>(15 April 1992)</dc:source>
    <dc:date>2007-12-15T03:00:46-00:00</dc:date>
    <prism:publicationYear>1992</prism:publicationYear>
    <prism:publisher>Wiley-Blackwell</prism:publisher>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/407124">
    <title>WebLogo: A Sequence Logo Generator</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/407124</link>
    <description>&lt;i&gt;Genome Res., Vol. 14, No. 6. (1 June 2004), pp. 1188-1190.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;WebLogo generates sequence logos, graphical representations of the patterns within a multiple sequence alignment. Sequence logos provide a richer and more precise description of sequence similarity than consensus sequences and can rapidly reveal significant features of the alignment otherwise difficult to perceive. Each logo consists of stacks of letters, one stack for each position in the sequence. The overall height of each stack indicates the sequence conservation at that position (measured in bits), whereas the height of symbols within the stack reflects the relative frequency of the corresponding amino or nucleic acid at that position. WebLogo has been enhanced recently with additional features and options, to provide a convenient and highly configurable sequence logo generator. A command line interface and the complete, open WebLogo source code are available for local installation and customization.</description>
    <dc:title>WebLogo: A Sequence Logo Generator</dc:title>

    <dc:creator>Gavin Crooks</dc:creator>
    <dc:creator>Gary Hon</dc:creator>
    <dc:creator>John-Marc Chandonia</dc:creator>
    <dc:creator>Steven Brenner</dc:creator>
    <dc:identifier>doi:10.1101/gr.849004</dc:identifier>
    <dc:source>Genome Res., Vol. 14, No. 6. (1 June 2004), pp. 1188-1190.</dc:source>
    <dc:date>2005-11-24T10:24:28-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>Genome Res.</prism:publicationName>
    <prism:volume>14</prism:volume>
    <prism:number>6</prism:number>
    <prism:startingPage>1188</prism:startingPage>
    <prism:endingPage>1190</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/407125">
    <title>Sequence logos: a new way to display consensus sequences.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/407125</link>
    <description>&lt;i&gt;Nucleic Acids Res, Vol. 18, No. 20. (25 October 1990), pp. 6097-6100.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;A graphical method is presented for displaying the patterns in a set of aligned sequences. The characters representing the sequence are stacked on top of each other for each position in the aligned sequences. The height of each letter is made proportional to its frequency, and the letters are sorted so the most common one is on top. The height of the entire stack is then adjusted to signify the information content of the sequences at that position. From these 'sequence logos', one can determine not only the consensus sequence but also the relative frequency of bases and the information content (measured in bits) at every position in a site or sequence. The logo displays both significant residues and subtle sequence patterns.</description>
    <dc:title>Sequence logos: a new way to display consensus sequences.</dc:title>

    <dc:creator>TD Schneider</dc:creator>
    <dc:creator>RM Stephens</dc:creator>
    <dc:source>Nucleic Acids Res, Vol. 18, No. 20. (25 October 1990), pp. 6097-6100.</dc:source>
    <dc:date>2005-11-24T10:26:51-00:00</dc:date>
    <prism:publicationYear>1990</prism:publicationYear>
    <prism:publicationName>Nucleic Acids Res</prism:publicationName>
    <prism:issn>0305-1048</prism:issn>
    <prism:volume>18</prism:volume>
    <prism:number>20</prism:number>
    <prism:startingPage>6097</prism:startingPage>
    <prism:endingPage>6100</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1274217">
    <title>Selection of DNA binding sites by regulatory proteins. Statistical-mechanical theory and application to operators and promoters.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1274217</link>
    <description>&lt;i&gt;J Mol Biol, Vol. 193, No. 4. (20 February 1987), pp. 723-750.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We present a statistical-mechanical selection theory for the sequence analysis of a set of specific DNA regulatory sites that makes it possible to predict the relationship between individual base-pair choices in the site and specific activity (affinity). The theory is based on the assumption that specific DNA sequences have been selected to conform to some requirement for protein binding (or activity), and that all sequences that can fulfil this requirement are equally likely to occur. In most cases, the number of specific DNA sequences that are known for a certain DNA-binding protein is very small, and we discuss in detail the small-sample uncertainties that this leads to. When applied to the binding sites for cro repressor in phage lambda, the theory can predict, from the sequence statistics alone, their rank order binding affinities in reasonable agreement with measured values. However, the statistical uncertainty generated by such a small sample (only 6 sites known) limits the result to order-of-magnitude comparisons. When applied to the much larger sample of Escherichia coli promoter sequences, the theory predicts the correlation between in vitro activity (k2KB values) and homology score (closeness to the consensus sequence) observed by Mulligan et al. (1984). The analysis of base-pair frequencies in the promoter sample is consistent with the assumption that base-pairs at different positions in the sites contribute independently to the specific activity, except in a few marginal cases that are discussed. When the promoter sites are ordered according to predicted activities, they seem to conform to the Gaussian distribution that results from a requirement for maximal sequence variability within the constraint of providing a certain average activity. The theory allows us to compare the number of specific sites with a certain activity to the number that would be expected from random occurrence in the genome. While strong promoters are &#34;overspecified&#34;, in the sense that their probability of random occurrence is very low, random sequences with weak promoter-like properties are expected to occur in very large numbers. This leads to the conclusion that functional specificity is based on other properties in addition to primary sequence recognition; some possibilities are discussed. Finally, we show that the sequence information, as defined by Schneider et al. (1986), can be used directly (at least in the case of equilibrium binding sites) to estimate the number of protein molecules that are specifically bound at random &#34;pseudosites&#34; in the genome.(ABSTRACT TRUNCATED AT 400 WORDS)</description>
    <dc:title>Selection of DNA binding sites by regulatory proteins. Statistical-mechanical theory and application to operators and promoters.</dc:title>

    <dc:creator>OG Berg</dc:creator>
    <dc:creator>PH von Hippel</dc:creator>
    <dc:source>J Mol Biol, Vol. 193, No. 4. (20 February 1987), pp. 723-750.</dc:source>
    <dc:date>2007-05-03T16:39:34-00:00</dc:date>
    <prism:publicationYear>1987</prism:publicationYear>
    <prism:publicationName>J Mol Biol</prism:publicationName>
    <prism:issn>0022-2836</prism:issn>
    <prism:volume>193</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>723</prism:startingPage>
    <prism:endingPage>750</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/558739">
    <title>An overview of the structures of protein-DNA complexes.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/558739</link>
    <description>&lt;i&gt;Genome Biol, Vol. 1, No. 1. (2000)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;On the basis of a structural analysis of 240 protein-DNA complexes contained in the Protein Data Bank (PDB), we have classified the DNA-binding proteins involved into eight different structural/functional groups, which are further classified into 54 structural families. Here we present this classification and review the functions, structures and binding interactions of these protein-DNA complexes.</description>
    <dc:title>An overview of the structures of protein-DNA complexes.</dc:title>

    <dc:creator>NM Luscombe</dc:creator>
    <dc:creator>SE Austin</dc:creator>
    <dc:creator>HM Berman</dc:creator>
    <dc:creator>JM Thornton</dc:creator>
    <dc:source>Genome Biol, Vol. 1, No. 1. (2000)</dc:source>
    <dc:date>2006-03-21T16:48:16-00:00</dc:date>
    <prism:publicationYear>2000</prism:publicationYear>
    <prism:publicationName>Genome Biol</prism:publicationName>
    <prism:issn>1465-6914</prism:issn>
    <prism:volume>1</prism:volume>
    <prism:number>1</prism:number>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1267554">
    <title>Drosophila DNase I footprint database: a systematic genome annotation of transcription factor binding sites in the fruitfly, Drosophila melanogaster.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1267554</link>
    <description>&lt;i&gt;Bioinformatics, Vol. 21, No. 8. (15 April 2005), pp. 1747-1749.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;SUMMARY: Despite increasing numbers of computational tools developed to predict cis-regulatory sequences, the availability of high-quality datasets of transcription factor binding sites limits advances in the bioinformatics of gene regulation. Here we present such a dataset based on a systematic literature curation and genome annotation of DNase I footprints for the fruitfly, Drosophila melanogaster. Using the experimental results of 201 primary references, we annotated 1367 binding sites from 87 transcription factors and 101 target genes in the D.melanogaster genome sequence. These data will provide a rich resource for future bioinformatics analyses of transcriptional regulation in Drosophila such as constructing motif models, training cis-regulatory module detectors, benchmarking alignment tools and continued text mining of the extensive literature on transcriptional regulation in this important model organism. AVAILABILITY: http://www.flyreg.org/ CONTACT: cbergman@gen.cam.ac.uk.</description>
    <dc:title>Drosophila DNase I footprint database: a systematic genome annotation of transcription factor binding sites in the fruitfly, Drosophila melanogaster.</dc:title>

    <dc:creator>CM Bergman</dc:creator>
    <dc:creator>JW Carlson</dc:creator>
    <dc:creator>SE Celniker</dc:creator>
    <dc:source>Bioinformatics, Vol. 21, No. 8. (15 April 2005), pp. 1747-1749.</dc:source>
    <dc:date>2007-04-30T09:18:37-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:publicationName>Bioinformatics</prism:publicationName>
    <prism:issn>1367-4803</prism:issn>
    <prism:volume>21</prism:volume>
    <prism:number>8</prism:number>
    <prism:startingPage>1747</prism:startingPage>
    <prism:endingPage>1749</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/209852">
    <title>GOstat: find statistically overrepresented Gene Ontologies within a group of genes.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/209852</link>
    <description>&lt;i&gt;Bioinformatics, Vol. 20, No. 9. (12 June 2004), pp. 1464-1465.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;SUMMARY: Modern experimental techniques, as for example DNA microarrays, as a result usually produce a long list of genes, which are potentially interesting in the analyzed process. In order to gain biological understanding from this type of data, it is necessary to analyze the functional annotations of all genes in this list. The Gene-Ontology (GO) database provides a useful tool to annotate and analyze the functions of a large number of genes. Here, we introduce a tool that utilizes this information to obtain an understanding of which annotations are typical for the analyzed list of genes. This program automatically obtains the GO annotations from a database and generates statistics of which annotations are overrepresented in the analyzed list of genes. This results in a list of GO terms sorted by their specificity. AVAILABILITY: Our program GOstat is accessible via the Internet at http://gostat.wehi.edu.au</description>
    <dc:title>GOstat: find statistically overrepresented Gene Ontologies within a group of genes.</dc:title>

    <dc:creator>T Beissbarth</dc:creator>
    <dc:creator>TP Speed</dc:creator>
    <dc:identifier>doi:10.1093/bioinformatics/bth088</dc:identifier>
    <dc:source>Bioinformatics, Vol. 20, No. 9. (12 June 2004), pp. 1464-1465.</dc:source>
    <dc:date>2005-05-25T04:19:07-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>Bioinformatics</prism:publicationName>
    <prism:issn>1367-4803</prism:issn>
    <prism:volume>20</prism:volume>
    <prism:number>9</prism:number>
    <prism:startingPage>1464</prism:startingPage>
    <prism:endingPage>1465</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/212874">
    <title>Gene ontology: tool for the unification of biology. The Gene Ontology Consortium.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/212874</link>
    <description>&lt;i&gt;Nat Genet, Vol. 25, No. 1. (May 2000), pp. 25-29.&lt;/i&gt;</description>
    <dc:title>Gene ontology: tool for the unification of biology. The Gene Ontology Consortium.</dc:title>

    <dc:creator>M Ashburner</dc:creator>
    <dc:creator>CA Ball</dc:creator>
    <dc:creator>JA Blake</dc:creator>
    <dc:creator>D Botstein</dc:creator>
    <dc:creator>H Butler</dc:creator>
    <dc:creator>JM Cherry</dc:creator>
    <dc:creator>AP Davis</dc:creator>
    <dc:creator>K Dolinski</dc:creator>
    <dc:creator>SS Dwight</dc:creator>
    <dc:creator>JT Eppig</dc:creator>
    <dc:creator>MA Harris</dc:creator>
    <dc:creator>DP Hill</dc:creator>
    <dc:creator>L Issel-Tarver</dc:creator>
    <dc:creator>A Kasarskis</dc:creator>
    <dc:creator>S Lewis</dc:creator>
    <dc:creator>JC Matese</dc:creator>
    <dc:creator>JE Richardson</dc:creator>
    <dc:creator>M Ringwald</dc:creator>
    <dc:creator>GM Rubin</dc:creator>
    <dc:creator>G Sherlock</dc:creator>
    <dc:identifier>doi:10.1038/75556</dc:identifier>
    <dc:source>Nat Genet, Vol. 25, No. 1. (May 2000), pp. 25-29.</dc:source>
    <dc:date>2005-05-27T12:30:22-00:00</dc:date>
    <prism:publicationYear>2000</prism:publicationYear>
    <prism:publicationName>Nat Genet</prism:publicationName>
    <prism:issn>1061-4036</prism:issn>
    <prism:volume>25</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>25</prism:startingPage>
    <prism:endingPage>29</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/2098979">
    <title>Temporal patterns of fruit fly (Drosophila) evolution revealed by mutation clocks.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/2098979</link>
    <description>&lt;i&gt;Mol Biol Evol, Vol. 21, No. 1. (January 2004), pp. 36-44.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Drosophila melanogaster has been a canonical model organism to study genetics, development, behavior, physiology, evolution, and population genetics for nearly a century. Despite this emphasis and the completion of its nuclear genome sequence, the timing of major speciation events leading to the origin of this fruit fly remain elusive because of the paucity of extensive fossil records and biogeographic data. Use of molecular clocks as an alternative has been fraught with non-clock-like accumulation of nucleotide and amino-acid substitutions. Here we present a novel methodology in which genomic mutation distances are used to overcome these limitations and to make use of all available gene sequence data for constructing a fruit fly molecular time scale. Our analysis of 2977 pairwise sequence comparisons from 176 nuclear genes reveals a long-term fruit fly mutation clock ticking at a rate of 11.1 mutations per kilobase pair per Myr. Genomic mutation clock-based timings of the landmark speciation events leading to the evolution of D. melanogaster show that it shared most recent common ancestry 5.4 MYA with D. simulans, 12.6 MYA with D. erecta+D. orena, 12.8 MYA with D. yakuba+D. teisseri, 35.6 MYA with the takahashii subgroup, 41.3 MYA with the montium subgroup, 44.2 MYA with the ananassae subgroup, 54.9 MYA with the obscura group, 62.2 MYA with the willistoni group, and 62.9 MYA with the subgenus Drosophila. These and other estimates are compatible with those known from limited biogeographic and fossil records. The inferred temporal pattern of fruit fly evolution shows correspondence with the cooling patterns of paleoclimate changes and habitat fragmentation in the Cenozoic.</description>
    <dc:title>Temporal patterns of fruit fly (Drosophila) evolution revealed by mutation clocks.</dc:title>

    <dc:creator>K Tamura</dc:creator>
    <dc:creator>S Subramanian</dc:creator>
    <dc:creator>S Kumar</dc:creator>
    <dc:source>Mol Biol Evol, Vol. 21, No. 1. (January 2004), pp. 36-44.</dc:source>
    <dc:date>2007-12-12T14:32:26-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>Mol Biol Evol</prism:publicationName>
    <prism:issn>0737-4038</prism:issn>
    <prism:volume>21</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>36</prism:startingPage>
    <prism:endingPage>44</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/2098944">
    <title>Gene Recognition Via Spliced Sequence Alignment</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/2098944</link>
    <description>&lt;i&gt;Proceedings of the National Academy of Sciences of the United States of America, Vol. 93, No. 17. (1996), pp. 9061-9066.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Gene recognition is one of the most important problems in computational molecular biology. Previous attempts to solve this problem were based on statistics, and applications of combinatorial methods for gene recognition were almost unexplored. Recent advances in large-scale cDNA sequencing open a way toward a new approach to gene recognition that uses previously sequenced genes as a clue for recognition of newly sequenced genes. This paper describes a spliced alignment algorithm and software tool that explores all possible exon assemblies in polynomial time and finds the multiexon structure with the best fit to a related protein. Unlike other existing methods, the algorithm successfully recognizes genes even in the case of short exons or exons with unusual codon usage; we also report correct assemblies for genes with more than 10 exons. On a test sample of human genes with known mammalian relatives, the average correlation between the predicted and actual proteins was 99%. The algorithm correctly reconstructed 87% of genes and the rare discrepancies between the predicted and real exon-intron structures were caused either by short (less than 5 amino acids) initial/terminal exons or by alternative splicing. Moreover, the algorithm predicts human genes reasonably well when the homologous protein is nonvertebrate or even prokaryotic. The surprisingly good performance of the method was confirmed by extensive simulations: in particular, with target proteins at 160 accepted point mutations (PAM) (25% similarity), the correlation between the predicted and actual genes was still as high as 95%.</description>
    <dc:title>Gene Recognition Via Spliced Sequence Alignment</dc:title>

    <dc:creator>Mikhail Gelfand</dc:creator>
    <dc:creator>Andrey Mironov</dc:creator>
    <dc:creator>Pavel Pevzner</dc:creator>
    <dc:source>Proceedings of the National Academy of Sciences of the United States of America, Vol. 93, No. 17. (1996), pp. 9061-9066.</dc:source>
    <dc:date>2007-12-12T14:20:19-00:00</dc:date>
    <prism:publicationYear>1996</prism:publicationYear>
    <prism:publicationName>Proceedings of the National Academy of Sciences of the United States of America</prism:publicationName>
    <prism:volume>93</prism:volume>
    <prism:number>17</prism:number>
    <prism:startingPage>9061</prism:startingPage>
    <prism:endingPage>9066</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/921611">
    <title>Systematic evolution of ligands by exponential enrichment: RNA ligands to bacteriophage T4 DNA polymerase.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/921611</link>
    <description>&lt;i&gt;Science, Vol. 249, No. 4968. (3 August 1990), pp. 505-510.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;High-affinity nucleic acid ligands for a protein were isolated by a procedure that depends on alternate cycles of ligand selection from pools of variant sequences and amplification of the bound species. Multiple rounds exponentially enrich the population for the highest affinity species that can be clonally isolated and characterized. In particular one eight-base region of an RNA that interacts with the T4 DNA polymerase was chosen and randomized. Two different sequences were selected by this procedure from the calculated pool of 65,536 species. One is the wild-type sequence found in the bacteriophage mRNA; one is varied from wild type at four positions. The binding constants of these two RNA's to T4 DNA polymerase are equivalent. These protocols with minimal modification can yield high-affinity ligands for any protein that binds nucleic acids as part of its function; high-affinity ligands could conceivably be developed for any target molecule.</description>
    <dc:title>Systematic evolution of ligands by exponential enrichment: RNA ligands to bacteriophage T4 DNA polymerase.</dc:title>

    <dc:creator>C Tuerk</dc:creator>
    <dc:creator>L Gold</dc:creator>
    <dc:identifier>doi:10.1126/science.2200121</dc:identifier>
    <dc:source>Science, Vol. 249, No. 4968. (3 August 1990), pp. 505-510.</dc:source>
    <dc:date>2006-11-01T16:58:40-00:00</dc:date>
    <prism:publicationYear>1990</prism:publicationYear>
    <prism:publicationName>Science</prism:publicationName>
    <prism:issn>0036-8075</prism:issn>
    <prism:volume>249</prism:volume>
    <prism:number>4968</prism:number>
    <prism:startingPage>505</prism:startingPage>
    <prism:endingPage>510</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/100088">
    <title>Basic local alignment search tool.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/100088</link>
    <description>&lt;i&gt;J Mol Biol, Vol. 215, No. 3. (5 October 1990), pp. 403-410.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;A new approach to rapid sequence comparison, basic local alignment search tool (BLAST), directly approximates alignments that optimize a measure of local similarity, the maximal segment pair (MSP) score. Recent mathematical results on the stochastic properties of MSP scores allow an analysis of the performance of this method as well as the statistical significance of alignments it generates. The basic algorithm is simple and robust; it can be implemented in a number of ways and applied in a variety of contexts including straightforward DNA and protein sequence database searches, motif searches, gene identification searches, and in the analysis of multiple regions of similarity in long DNA sequences. In addition to its flexibility and tractability to mathematical analysis, BLAST is an order of magnitude faster than existing sequence comparison tools of comparable sensitivity.</description>
    <dc:title>Basic local alignment search tool.</dc:title>

    <dc:creator>SF Altschul</dc:creator>
    <dc:creator>W Gish</dc:creator>
    <dc:creator>W Miller</dc:creator>
    <dc:creator>EW Myers</dc:creator>
    <dc:creator>DJ Lipman</dc:creator>
    <dc:identifier>doi:10.1006/jmbi.1990.9999</dc:identifier>
    <dc:source>J Mol Biol, Vol. 215, No. 3. (5 October 1990), pp. 403-410.</dc:source>
    <dc:date>2005-02-21T16:47:03-00:00</dc:date>
    <prism:publicationYear>1990</prism:publicationYear>
    <prism:publicationName>J Mol Biol</prism:publicationName>
    <prism:issn>0022-2836</prism:issn>
    <prism:volume>215</prism:volume>
    <prism:number>3</prism:number>
    <prism:startingPage>403</prism:startingPage>
    <prism:endingPage>410</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1446221">
    <title>Searching for genetic determinants in the new millennium</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1446221</link>
    <description>&lt;i&gt;Nature, Vol. 405, No. 6788. (15 June 2000), pp. 847-856.&lt;/i&gt;</description>
    <dc:title>Searching for genetic determinants in the new millennium</dc:title>

    <dc:creator>Neil Risch</dc:creator>
    <dc:identifier>doi:10.1038/35015718</dc:identifier>
    <dc:source>Nature, Vol. 405, No. 6788. (15 June 2000), pp. 847-856.</dc:source>
    <dc:date>2007-07-10T10:18:02-00:00</dc:date>
    <prism:publicationYear>2000</prism:publicationYear>
    <prism:publicationName>Nature</prism:publicationName>
    <prism:volume>405</prism:volume>
    <prism:number>6788</prism:number>
    <prism:startingPage>847</prism:startingPage>
    <prism:endingPage>856</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1445214">
    <title>Prospects for whole-genome linkage disequilibrium mapping of common disease genes.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1445214</link>
    <description>&lt;i&gt;Nat Genet, Vol. 22, No. 2. (June 1999), pp. 139-144.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Recently, attention has focused on the use of whole-genome linkage disequilibrium (LD) studies to map common disease genes. Such studies would employ a dense map of single nucleotide polymorphisms (SNPs) to detect association between a marker and disease. Construction of SNP maps is currently underway. An essential issue yet to be settled is the required marker density of such maps. Here, I use population simulations to estimate the extent of LD surrounding common gene variants in the general human population as well as in isolated populations. Two main conclusions emerge from these investigations. First, a useful level of LD is unlikely to extend beyond an average distance of roughly 3 kb in the general population, which implies that approximately 500,000 SNPs will be required for whole-genome studies. Second, the extent of LD is similar in isolated populations unless the founding bottleneck is very narrow or the frequency of the variant is low (&#60;5%).</description>
    <dc:title>Prospects for whole-genome linkage disequilibrium mapping of common disease genes.</dc:title>

    <dc:creator>L Kruglyak</dc:creator>
    <dc:identifier>doi:10.1038/9642</dc:identifier>
    <dc:source>Nat Genet, Vol. 22, No. 2. (June 1999), pp. 139-144.</dc:source>
    <dc:date>2007-07-10T02:34:44-00:00</dc:date>
    <prism:publicationYear>1999</prism:publicationYear>
    <prism:publicationName>Nat Genet</prism:publicationName>
    <prism:issn>1061-4036</prism:issn>
    <prism:volume>22</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>139</prism:startingPage>
    <prism:endingPage>144</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1443754">
    <title>Advances in Genetics http://www.sciencedirect.com/science/bookseries/00652660</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1443754</link>
    <description>&lt;i&gt;&lt;/i&gt;</description>
    <dc:title>Advances in Genetics http://www.sciencedirect.com/science/bookseries/00652660</dc:title>

    <dc:date>2007-07-09T09:11:56-00:00</dc:date>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1443746">
    <title>Linkage and association: basic concepts.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1443746</link>
    <description>&lt;i&gt;Adv Genet, Vol. 42 (2001), pp. 45-66.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Many investigators are turning their efforts to dissecting the etiology of complex traits. The primary tools for gene discovery, localization, and functional analysis are linkage and association studies. While the conceptual underpinnings of these approaches have long been known, advances in recent decades in molecular genetics, in the development of efficient computational algorithms, and in computing power have enabled the large-scale application of these methods. Here, we review the biological basis of linkage and association among loci and the common methods used to assess these relationships with respect to observed phenotypes. We further consider the two most common approaches--genome scans and candidate gene studies--especially their respective strengths, weaknesses, and resource requirements. Finally, we highlight some of the major challenges that arise from these investigative approaches and those that are inherent in the nature of complex traits. The chapters that follow elaborate on many of these topics.</description>
    <dc:title>Linkage and association: basic concepts.</dc:title>

    <dc:creator>IB Borecki</dc:creator>
    <dc:creator>BK Suarez</dc:creator>
    <dc:source>Adv Genet, Vol. 42 (2001), pp. 45-66.</dc:source>
    <dc:date>2007-07-09T09:05:55-00:00</dc:date>
    <prism:publicationYear>2001</prism:publicationYear>
    <prism:publicationName>Adv Genet</prism:publicationName>
    <prism:issn>0065-2660</prism:issn>
    <prism:volume>42</prism:volume>
    <prism:startingPage>45</prism:startingPage>
    <prism:endingPage>66</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1443743">
    <title>Association study designs for complex diseases</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1443743</link>
    <description>&lt;i&gt;Nat Rev Genet, Vol. 2, No. 2. (2001), pp. 91-99.&lt;/i&gt;</description>
    <dc:title>Association study designs for complex diseases</dc:title>

    <dc:creator>LR Cardon</dc:creator>
    <dc:creator>JI Bell</dc:creator>
    <dc:source>Nat Rev Genet, Vol. 2, No. 2. (2001), pp. 91-99.</dc:source>
    <dc:date>2007-07-09T09:04:29-00:00</dc:date>
    <prism:publicationYear>2001</prism:publicationYear>
    <prism:publicationName>Nat Rev Genet</prism:publicationName>
    <prism:volume>2</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>91</prism:startingPage>
    <prism:endingPage>99</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1390802">
    <title>Systematic detection of statistically overrepresented DNA motif association rules.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1390802</link>
    <description>&lt;i&gt;Genome Inform, Vol. 17, No. 1. (2006), pp. 124-133.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;DNA motifs, or cis-elements, are short nucleotide sequence patterns recognized by various transcription factors (TFs). In promoters, these TFs bind in a complex combinatorial manner in order to regulate the expression of a downstream gene. The combinatorial space is frequently large and difficult to manage since vertebrates have thousands of transcription factors and more than 20,000 genes. We introduce a computer program called CAYCE (Combinatorial AnalYsis of Cis-Elements) that systematically detects statistically overrepresented DNA motif association rules independent of Microarray information. CAYCE is an adaptation of the apriori algorithm traditionally used for association rule mining, but offers three significant advancements. (1) It analyzes multiple occurrences of an item, corresponding to multiple TF binding sites, (2) It compares results with a biologically relevant background, and (3), it provides p-values for straightforward statistical interpretation. CAYCE can be easily applied to any item-set data where the investigator is also interested in multiple occurrences of a single item, and/or overrepresentation of association rules compared with a background. Applying CAYCE to human promoters in 1% of the human genome, we discover that motif clusters containing five repetitions of SP1 are the most statistically significant.</description>
    <dc:title>Systematic detection of statistically overrepresented DNA motif association rules.</dc:title>

    <dc:creator>JM Lin</dc:creator>
    <dc:creator>Z Weng</dc:creator>
    <dc:source>Genome Inform, Vol. 17, No. 1. (2006), pp. 124-133.</dc:source>
    <dc:date>2007-06-14T23:03:18-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Genome Inform</prism:publicationName>
    <prism:issn>0919-9454</prism:issn>
    <prism:volume>17</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>124</prism:startingPage>
    <prism:endingPage>133</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1375603">
    <title>Functional single nucleotide polymorphism-based association studies.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1375603</link>
    <description>&lt;i&gt;Hum Genomics, Vol. 2, No. 6. (June 2006), pp. 391-402.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Association studies hold great promise for the elucidation of the genetic basis of diseases. Studies based on functional single nucleotide polymorphisms (SNPs) or on linkage disequilibrium (LD) represent two main types of designs. LD-based association studies can be comprehensive for common causative variants, but they perform poorly for rare alleles. Conversely, functional SNP-based studies are efficient because they focus on the SNPs with the highest a priori chance of being associated. Our poor ability to predict the functional effect of SNPs, however, hampers attempts to make these studies comprehensive. Recent progress in comparative genomics, and evidence that functional elements tend to lie in conserved regions, promises to change the landscape, permitting functional SNP association studies to be carried out that comprehensively assess common and rare alleles. SNP genotyping technologies are already sufficient for such studies, but studies will require continued genomic sequencing of multiple species, research on the functional role of conserved sequences and additional SNP discovery and validation efforts (including targeted SNP discovery to identify the rare alleles in functional regions). With these resources, we expect that comprehensive functional SNP association studies will soon be possible.</description>
    <dc:title>Functional single nucleotide polymorphism-based association studies.</dc:title>

    <dc:creator>VE Carlton</dc:creator>
    <dc:creator>JS Ireland</dc:creator>
    <dc:creator>F Useche</dc:creator>
    <dc:creator>M Faham</dc:creator>
    <dc:source>Hum Genomics, Vol. 2, No. 6. (June 2006), pp. 391-402.</dc:source>
    <dc:date>2007-06-10T09:33:35-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Hum Genomics</prism:publicationName>
    <prism:issn>1479-7364</prism:issn>
    <prism:volume>2</prism:volume>
    <prism:number>6</prism:number>
    <prism:startingPage>391</prism:startingPage>
    <prism:endingPage>402</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1203906">
    <title>Bioinformatics approaches and resources for single nucleotide polymorphism functional analysis.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1203906</link>
    <description>&lt;i&gt;Brief Bioinform, Vol. 6, No. 1. (March 2005), pp. 44-56.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Since the initial sequencing of the human genome, many projects are underway to understand the effects of genetic variation between individuals. Predicting and understanding the downstream effects of genetic variation using computational methods are becoming increasingly important for single nucleotide polymorphism (SNP) selection in genetics studies and understanding the molecular basis of disease. According to the NIH, there are now more than four million validated SNPs in the human genome. The volume of known genetic variations lends itself well to an informatics approach. Bioinformaticians have become very good at functional inference methods derived from functional and structural genomics. This review will present a broad overview of the tools and resources available to collect and understand functional variation from the perspective of structure, expression, evolution and phenotype. Additionally, public resources available for SNP identification and characterisation are summarised.</description>
    <dc:title>Bioinformatics approaches and resources for single nucleotide polymorphism functional analysis.</dc:title>

    <dc:creator>S Mooney</dc:creator>
    <dc:source>Brief Bioinform, Vol. 6, No. 1. (March 2005), pp. 44-56.</dc:source>
    <dc:date>2007-04-03T09:54:06-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:publicationName>Brief Bioinform</prism:publicationName>
    <prism:issn>1467-5463</prism:issn>
    <prism:volume>6</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>44</prism:startingPage>
    <prism:endingPage>56</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/94994">
    <title>Genome-wide association studies for common diseases and complex traits</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/94994</link>
    <description>&lt;i&gt;Nature Reviews Genetics, Vol. 6, No. 2. (01 February 2005), pp. 95-108.&lt;/i&gt;</description>
    <dc:title>Genome-wide association studies for common diseases and complex traits</dc:title>

    <dc:creator>Joel Hirschhorn</dc:creator>
    <dc:creator>Mark Daly</dc:creator>
    <dc:identifier>doi:10.1038/nrg1521</dc:identifier>
    <dc:source>Nature Reviews Genetics, Vol. 6, No. 2. (01 February 2005), pp. 95-108.</dc:source>
    <dc:date>2005-02-15T07:22:40-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:publicationName>Nature Reviews Genetics</prism:publicationName>
    <prism:issn>1471-0056</prism:issn>
    <prism:volume>6</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>95</prism:startingPage>
    <prism:endingPage>108</prism:endingPage>
    <prism:publisher>Nature Publishing Group</prism:publisher>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1375602">
    <title>Linkage disequilibrium maps and association mapping.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1375602</link>
    <description>&lt;i&gt;J Clin Invest, Vol. 115, No. 6. (June 2005), pp. 1425-1430.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The causal chain between a gene and its effect on disease susceptibility cannot be understood until the effect has been localized in the DNA sequence. Recently, polymorphisms incorporated in the HapMap Project have made linkage disequilibrium (LD) the most powerful tool for localization. The genetics of LD, the maps and databases that it provides, and their use for association mapping, as well as alternative methods for gene localization, are briefly described.</description>
    <dc:title>Linkage disequilibrium maps and association mapping.</dc:title>

    <dc:creator>NE Morton</dc:creator>
    <dc:identifier>doi:10.1172/JCI25032</dc:identifier>
    <dc:source>J Clin Invest, Vol. 115, No. 6. (June 2005), pp. 1425-1430.</dc:source>
    <dc:date>2007-06-10T09:32:41-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:publicationName>J Clin Invest</prism:publicationName>
    <prism:issn>0021-9738</prism:issn>
    <prism:volume>115</prism:volume>
    <prism:number>6</prism:number>
    <prism:startingPage>1425</prism:startingPage>
    <prism:endingPage>1430</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/796333">
    <title>Strategies for the detection of copy number and other structural variants in the human genome.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/796333</link>
    <description>&lt;i&gt;Hum Genomics, Vol. 2, No. 6. (June 2006), pp. 403-414.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Advances in genome scanning technologies are revealing that copy number variants (CNVs) and polymorphisms, ranging from a few kilobases to several megabases in size, are present in genomes at frequencies much greater than previously known. Discoveries of additional forms of genomic variation, including inversions, insertions, deletions and complex rearrangements, are also occurring at an increased rate. Along with CNVs, these sequence alterations are collectively known as structural variants, and their discovery has had an immediate impact on the interpretation of basic research and clinical diagnostic data. This paper discusses different methods, experimental strategies and technologies that are currently available to study copy number variation and other structural variants in the human genome.</description>
    <dc:title>Strategies for the detection of copy number and other structural variants in the human genome.</dc:title>

    <dc:creator>AR Carson</dc:creator>
    <dc:creator>L Feuk</dc:creator>
    <dc:creator>M Mohammed</dc:creator>
    <dc:creator>SW Scherer</dc:creator>
    <dc:source>Hum Genomics, Vol. 2, No. 6. (June 2006), pp. 403-414.</dc:source>
    <dc:date>2006-08-10T17:12:32-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Hum Genomics</prism:publicationName>
    <prism:issn>1479-7364</prism:issn>
    <prism:volume>2</prism:volume>
    <prism:number>6</prism:number>
    <prism:startingPage>403</prism:startingPage>
    <prism:endingPage>414</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/756148">
    <title>Clinical applications of whole-genome association studies: future applications at the bedside</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/756148</link>
    <description>&lt;i&gt;Expert Review of Molecular Diagnostics, Vol. 6, No. 4. (July 2006), pp. 551-565.&lt;/i&gt;</description>
    <dc:title>Clinical applications of whole-genome association studies: future applications at the bedside</dc:title>

    <dc:creator>Motsinger</dc:creator>
    <dc:creator>A Alison</dc:creator>
    <dc:creator>Ritchie</dc:creator>
    <dc:creator>D Marylyn</dc:creator>
    <dc:creator>Dobrin</dc:creator>
    <dc:creator>E Seth</dc:creator>
    <dc:identifier>doi:10.1586/14737159.6.4.551</dc:identifier>
    <dc:source>Expert Review of Molecular Diagnostics, Vol. 6, No. 4. (July 2006), pp. 551-565.</dc:source>
    <dc:date>2006-07-13T04:30:22-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Expert Review of Molecular Diagnostics</prism:publicationName>
    <prism:issn>1473-7159</prism:issn>
    <prism:volume>6</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>551</prism:startingPage>
    <prism:endingPage>565</prism:endingPage>
    <prism:publisher>Future Drugs</prism:publisher>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/904110">
    <title>A gene-centric approach to genome-wide association studies</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/904110</link>
    <description>&lt;i&gt;Nature Reviews Genetics, Vol. 7, No. 11., pp. 885-891.&lt;/i&gt;</description>
    <dc:title>A gene-centric approach to genome-wide association studies</dc:title>

    <dc:creator>Eric Jorgenson</dc:creator>
    <dc:creator>John Witte</dc:creator>
    <dc:identifier>doi:10.1038/nrg1962</dc:identifier>
    <dc:source>Nature Reviews Genetics, Vol. 7, No. 11., pp. 885-891.</dc:source>
    <dc:date>2006-10-18T20:14:27-00:00</dc:date>
    <prism:publicationName>Nature Reviews Genetics</prism:publicationName>
    <prism:issn>1471-0056</prism:issn>
    <prism:volume>7</prism:volume>
    <prism:number>11</prism:number>
    <prism:startingPage>885</prism:startingPage>
    <prism:endingPage>891</prism:endingPage>
    <prism:publisher>Nature Publishing Group</prism:publisher>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/94993">
    <title>Genome-wide association studies: theoretical and practical concerns</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/94993</link>
    <description>&lt;i&gt;Nature Reviews Genetics, Vol. 6, No. 2. (01 February 2005), pp. 109-118.&lt;/i&gt;</description>
    <dc:title>Genome-wide association studies: theoretical and practical concerns</dc:title>

    <dc:creator>William Wang</dc:creator>
    <dc:creator>Bryan Barratt</dc:creator>
    <dc:creator>David Clayton</dc:creator>
    <dc:creator>John Todd</dc:creator>
    <dc:identifier>doi:10.1038/nrg1522</dc:identifier>
    <dc:source>Nature Reviews Genetics, Vol. 6, No. 2. (01 February 2005), pp. 109-118.</dc:source>
    <dc:date>2005-02-15T07:22:39-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:publicationName>Nature Reviews Genetics</prism:publicationName>
    <prism:issn>1471-0056</prism:issn>
    <prism:volume>6</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>109</prism:startingPage>
    <prism:endingPage>118</prism:endingPage>
    <prism:publisher>Nature Publishing Group</prism:publisher>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/958433">
    <title>Insights into social insects from the genome of the honeybee Apis mellifera</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/958433</link>
    <description>&lt;i&gt;Nature, Vol. 444, No. 7118., pp. 512-512.&lt;/i&gt;</description>
    <dc:title>Insights into social insects from the genome of the honeybee Apis mellifera</dc:title>

    <dc:creator>The</dc:creator>
    <dc:identifier>doi:10.1038/nature05400</dc:identifier>
    <dc:source>Nature, Vol. 444, No. 7118., pp. 512-512.</dc:source>
    <dc:date>2006-11-23T05:58:05-00:00</dc:date>
    <prism:publicationName>Nature</prism:publicationName>
    <prism:issn>0028-0836</prism:issn>
    <prism:volume>444</prism:volume>
    <prism:number>7118</prism:number>
    <prism:startingPage>512</prism:startingPage>
    <prism:endingPage>512</prism:endingPage>
    <prism:publisher>Nature Publishing Group</prism:publisher>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1085102">
    <title>ARACHNE: A Whole-Genome Shotgun Assembler</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1085102</link>
    <description>&lt;i&gt;Genome Res., Vol. 12, No. 1. (1 January 2002), pp. 177-189.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We describe a new computer system, called ARACHNE, for assembling genome sequence using paired-end whole-genome shotgun reads. ARACHNE has several key features, including an efficient and sensitive procedure for finding read overlaps, a procedure for scoring overlaps that achieves high accuracy by correcting errors before assembly, read merger based on forward-reverse links, and detection of repeat contigs by forward-reverse link inconsistency. To test ARACHNE, we created simulated reads providing ~10-fold coverage of the genomes of H. influenzae, S. cerevisiae, and D. melanogaster, as well as human chromosomes 21 and 22. The assemblies of these simulated reads yielded nearly complete coverage of the respective genomes, with a small number of contigs joined into a smaller number of supercontigs (or scaffolds). For example, analysis of the D. melanogaster genome yielded ~98% coverage with an N50 contig length of 324 kb and an N50 supercontig length of 5143 kb. The assembly accuracy was high, although not perfect: small errors occurred at a frequency of roughly 1 per 1 Mb (typically, deletion of ~1 kb in size), with a very small number of other misassemblies. The assembly was rapid: the Drosophila assembly required only 21 hours on a single 667 MHz processor and used 8.4 Gb of memory. 10.1101/gr.208902</description>
    <dc:title>ARACHNE: A Whole-Genome Shotgun Assembler</dc:title>

    <dc:creator>Serafim Batzoglou</dc:creator>
    <dc:creator>David Jaffe</dc:creator>
    <dc:creator>Ken Stanley</dc:creator>
    <dc:creator>Jonathan Butler</dc:creator>
    <dc:creator>Sante Gnerre</dc:creator>
    <dc:creator>Evan Mauceli</dc:creator>
    <dc:creator>Bonnie Berger</dc:creator>
    <dc:creator>Jill Mesirov</dc:creator>
    <dc:creator>Eric Lander</dc:creator>
    <dc:identifier>doi:10.1101/gr.208902</dc:identifier>
    <dc:source>Genome Res., Vol. 12, No. 1. (1 January 2002), pp. 177-189.</dc:source>
    <dc:date>2007-02-02T21:36:29-00:00</dc:date>
    <prism:publicationYear>2002</prism:publicationYear>
    <prism:publicationName>Genome Res.</prism:publicationName>
    <prism:volume>12</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>177</prism:startingPage>
    <prism:endingPage>189</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/224563">
    <title>Inparanoid: a comprehensive database of eukaryotic orthologs.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/224563</link>
    <description>&lt;i&gt;Nucleic Acids Res, Vol. 33, No. Database issue. (1 January 2005)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The Inparanoid eukaryotic ortholog database (http://inparanoid.cgb.ki.se/) is a collection of pairwise ortholog groups between 17 whole genomes; Anopheles gambiae, Caenorhabditis briggsae, Caenorhabditis elegans, Drosophila melanogaster, Danio rerio, Takifugu rubripes, Gallus gallus, Homo sapiens, Mus musculus, Pan troglodytes, Rattus norvegicus, Oryza sativa, Plasmodium falciparum, Arabidopsis thaliana, Escherichia coli, Saccharomyces cerevisiae and Schizosaccharomyces pombe. Complete proteomes for these genomes were derived from Ensembl and UniProt and compared pairwise using Blast, followed by a clustering step using the Inparanoid program. An Inparanoid cluster is seeded by a reciprocally best-matching ortholog pair, around which inparalogs (should they exist) are gathered independently, while outparalogs are excluded. The ortholog clusters can be searched on the website using Ensembl gene/protein or UniProt identifiers, annotation text or by Blast alignment against our protein datasets. The entire dataset can be downloaded, as can the Inparanoid program itself.</description>
    <dc:title>Inparanoid: a comprehensive database of eukaryotic orthologs.</dc:title>

    <dc:creator>KP O'Brien</dc:creator>
    <dc:creator>M Remm</dc:creator>
    <dc:creator>EL Sonnhammer</dc:creator>
    <dc:source>Nucleic Acids Res, Vol. 33, No. Database issue. (1 January 2005)</dc:source>
    <dc:date>2005-06-09T19:37:42-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:publicationName>Nucleic Acids Res</prism:publicationName>
    <prism:issn>1362-4962</prism:issn>
    <prism:volume>33</prism:volume>
    <prism:number>Database issue</prism:number>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1006283">
    <title>3D morphology and gene expression in the Drosophila blastoderm at cellular resolution II: dynamics</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1006283</link>
    <description>&lt;i&gt;Genome Biology, Vol. 7 (21 December 2006), R124.&lt;/i&gt;</description>
    <dc:title>3D morphology and gene expression in the Drosophila blastoderm at cellular resolution II: dynamics</dc:title>

    <dc:creator>Soile Keranen</dc:creator>
    <dc:creator>Charless Fowlkes</dc:creator>
    <dc:creator>Cris</dc:creator>
    <dc:creator>Damir Sudar</dc:creator>
    <dc:creator>David Knowles</dc:creator>
    <dc:creator>Jitendra Malik</dc:creator>
    <dc:creator>Mark Biggin</dc:creator>
    <dc:identifier>doi:10.1186/gb-2006-7-12-r124</dc:identifier>
    <dc:source>Genome Biology, Vol. 7 (21 December 2006), R124.</dc:source>
    <dc:date>2006-12-21T23:16:14-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Genome Biology</prism:publicationName>
    <prism:issn>1465-6906</prism:issn>
    <prism:volume>7</prism:volume>
    <prism:startingPage>R124</prism:startingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1006282">
    <title>3D morphology and gene expression in the Drosophila blastoderm at cellular resolution I: data acquisition pipeline</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1006282</link>
    <description>&lt;i&gt;Genome Biology, Vol. 7 (21 December 2006), R123.&lt;/i&gt;</description>
    <dc:title>3D morphology and gene expression in the Drosophila blastoderm at cellular resolution I: data acquisition pipeline</dc:title>

    <dc:creator>Cris</dc:creator>
    <dc:creator>Soile Keranen</dc:creator>
    <dc:creator>Charless Fowlkes</dc:creator>
    <dc:creator>Lisa Simirenko</dc:creator>
    <dc:creator>Gunther Weber</dc:creator>
    <dc:creator>Angela Depace</dc:creator>
    <dc:creator>Clara Henriquez</dc:creator>
    <dc:creator>David Kaszuba</dc:creator>
    <dc:creator>Bernd Hamann</dc:creator>
    <dc:creator>Michael Eisen</dc:creator>
    <dc:creator>Jitendra Malik</dc:creator>
    <dc:creator>Damir Sudar</dc:creator>
    <dc:creator>Mark Biggin</dc:creator>
    <dc:creator>David Knowles</dc:creator>
    <dc:identifier>doi:10.1186/gb-2006-7-12-r123</dc:identifier>
    <dc:source>Genome Biology, Vol. 7 (21 December 2006), R123.</dc:source>
    <dc:date>2006-12-21T23:16:13-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Genome Biology</prism:publicationName>
    <prism:issn>1465-6906</prism:issn>
    <prism:volume>7</prism:volume>
    <prism:startingPage>R123</prism:startingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1224456">
    <title>Inferring Noncoding RNA Families and Classes by Means of Genome-Scale Structure-Based Clustering</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1224456</link>
    <description>&lt;i&gt;PLoS Computational Biology, Vol. 3, No. 4. (1 April 2007), e65.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The RFAM database defines families of ncRNAs by means of sequence similarities that are sufficient to establish homology. In some cases, such as microRNAs and box H/ACA snoRNAs, functional commonalities define classes of RNAs that are characterized by structural similarities, and typically consist of multiple RNA families. Recent advances in high-throughput transcriptomics and comparative genomics have produced very large sets of putative noncoding RNAs and regulatory RNA signals. For many of them, evidence for stabilizing selection acting on their secondary structures has been derived, and at least approximate models of their structures have been computed. The overwhelming majority of these hypothetical RNAs cannot be assigned to established families or classes. We present here a structure-based clustering approach that is capable of extracting putative RNA classes from genome-wide surveys for structured RNAs. The LocARNA (local alignment of RNA) tool implements a novel variant of the Sankoff algorithm that is sufficiently fast to deal with several thousand candidate sequences. The method is also robust against false positive predictions, i.e., a contamination of the input data with unstructured or nonconserved sequences. We have successfully tested the LocARNA-based clustering approach on the sequences of the RFAM-seed alignments. Furthermore, we have applied it to a previously published set of 3,332 predicted structured elements in the Ciona intestinalis genome (Missal K, Rose D, Stadler PF (2005) Noncoding RNAs in Ciona intestinalis. Bioinformatics 21 (Supplement 2): i77&#8211;i78). In addition to recovering, e.g., tRNAs as a structure-based class, the method identifies several RNA families, including microRNA and snoRNA candidates, and suggests several novel classes of ncRNAs for which to date no representative has been experimentally characterized.</description>
    <dc:title>Inferring Noncoding RNA Families and Classes by Means of Genome-Scale Structure-Based Clustering</dc:title>

    <dc:creator>Sebastian Will</dc:creator>
    <dc:creator>Kristin Reiche</dc:creator>
    <dc:creator>Ivo Hofacker</dc:creator>
    <dc:creator>Peter Stadler</dc:creator>
    <dc:creator>Rolf Backofen</dc:creator>
    <dc:identifier>doi:10.1371/journal.pcbi.0030065</dc:identifier>
    <dc:source>PLoS Computational Biology, Vol. 3, No. 4. (1 April 2007), e65.</dc:source>
    <dc:date>2007-04-13T17:52:12-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>PLoS Computational Biology</prism:publicationName>
    <prism:volume>3</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>e65</prism:startingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1224455">
    <title>Prediction of ligand binding site and functionally important residues based on fuzzy-oil-drop model</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1224455</link>
    <description>&lt;i&gt;PLoS Computational Biology, Vol. preprint, No. 2007. (1 April 2007), e94.eor.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The recognition of a ligand binding site in a protein molecule is important for identifying its biological activity. The model for in silico recognition of binding site is applied to 33 proteins of known biological activity and 33 proteins of unknown biological activity (related to genomic project). The hydrophobic core, which stabilizes the protein structure, is assumed to be represented by a three-dimensional Gaussian function, the value of which represents the hydrophobicity density distribution in a protein molecule. The experimentally observed distribution of hydrophobicity compared with the theoretically expected one reveals differences within the molecule. Fully automatic method for preliminary active site recognition in silico. The irregularities between idealized and observed hydrophobicity distribution were found to be localized exactly in the ligation site. Functionally important residues were recognized in the proteins with unknown function on the basis of this finding. The localization of the ligation site according to the presented model can be easily carried out for any protein structure with a free prediction server available from http://bioinformatics.cm-uj.krakow.pl/activesite.</description>
    <dc:title>Prediction of ligand binding site and functionally important residues based on fuzzy-oil-drop model</dc:title>

    <dc:creator>Michal Brylinski</dc:creator>
    <dc:creator>Katarzyna Prymula</dc:creator>
    <dc:creator>Wiktor Jurkowski</dc:creator>
    <dc:creator>Marek Kochanczyk</dc:creator>
    <dc:creator>Ewa Stawowczyk</dc:creator>
    <dc:creator>Leszek Konieczny</dc:creator>
    <dc:creator>Irena Roterman</dc:creator>
    <dc:identifier>doi:10.1371/journal.pcbi.0030094.eor</dc:identifier>
    <dc:source>PLoS Computational Biology, Vol. preprint, No. 2007. (1 April 2007), e94.eor.</dc:source>
    <dc:date>2007-04-13T17:52:09-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>PLoS Computational Biology</prism:publicationName>
    <prism:volume>preprint</prism:volume>
    <prism:number>2007</prism:number>
    <prism:startingPage>e94.eor</prism:startingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1224454">
    <title>The Human Genomic Melting Map</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1224454</link>
    <description>&lt;i&gt;PLoS Computational Biology, Vol. preprint, No. 2007. (1 April 2007), e93.eor.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;In a living cell, the antiparallel double-stranded helix of DNA is a dynamically changing structure. The structure relates to interactions between and within the DNA strands, and the array of other macromolecules that constitutes functional chromatin. It is only through its changing conformations that DNA can organize and structure a large number of cellular functions. In particular, DNA must locally uncoil, or melt, and become single-stranded for DNA replication, repair, recombination and transcription to occur. It has previously been shown that this melting occurs cooperatively, whereby several basepairs act in concert to generate melting bubbles, and in this way constitute a domain that behaves as a unit with respect to local DNA single-strandedness. Knowledge and computational power now permits the calculation of genomic melting maps, which provide information about the propensities of forming local bubbles determined from the whole sequence. We here present the complete human genomic melting map, and present a first report on its basic features, the extent of cooperativity and correlations to various physical and biological features of the human genome. Globally, the melting map co-varies very strongly with GC content. Most importantly, however, cooperativity of DNA denaturation causes this correlation to be weaker at resolutions less than 500 bps. This is also the resolution level at which most structural and biological processes occur, signifying the importance of the informational content inherent in the genomic melting map. The human DNA melting map may be further explored through http://meltmap.uio.no.</description>
    <dc:title>The Human Genomic Melting Map</dc:title>

    <dc:creator>Fang Liu</dc:creator>
    <dc:creator>Eivind T&#38;#x00f8;stesen</dc:creator>
    <dc:creator>Jostein Sundet</dc:creator>
    <dc:creator>Tor-Kristian Jenssen</dc:creator>
    <dc:creator>Christoph Bock</dc:creator>
    <dc:creator>Geir Jerstad</dc:creator>
    <dc:creator>William Thilly</dc:creator>
    <dc:creator>Eivind Hovig</dc:creator>
    <dc:identifier>doi:10.1371/journal.pcbi.0030093.eor</dc:identifier>
    <dc:source>PLoS Computational Biology, Vol. preprint, No. 2007. (1 April 2007), e93.eor.</dc:source>
    <dc:date>2007-04-13T17:52:07-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>PLoS Computational Biology</prism:publicationName>
    <prism:volume>preprint</prism:volume>
    <prism:number>2007</prism:number>
    <prism:startingPage>e93.eor</prism:startingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1224453">
    <title>Binding Site Graphs: A New Graph Theoretical Framework for Prediction of Transcription Factor Binding Sites</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1224453</link>
    <description>&lt;i&gt;PLoS Computational Biology, Vol. preprint, No. 2007. (1 April 2007), e90.eor.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Computational prediction of nucleotide binding specificity for transcription factors remains a fundamental and largely unsolved problem. Determination of binding positions is a prerequisite for research in gene regulation, a major mechanism controlling phenotypic diversity. Furthermore, an accurate determination of binding specificities from high-throughput data sources is necessary to realize the full potential of systems biology. Unfortunately, recently preformed independent evaluation showed that more than half of the predictions from most widely used algorithms are false. We introduce a graph-theoretical framework to describe local sequence similarity as the pair-wise distances between nucleotides in promoter sequences, and hypothesize that densely connected subgraphs are indicative of transcription factor binding sites. Using a well-established sampling algorithm coupled with simple clustering and scoring schemes, we identify sets of closely related nucleotides and test those for known TF binding activity. Using an independent benchmark, we find our algorithm predicts yeast binding motifs considerably better than currently available techniques and without manual curation. Importantly, we reduce the number of false positive predictions in yeast to less than 30%. We also develop a framework to evaluate the statistical significance of our motif predictions. We show that our approach is robust to the choice of input promoters, and thus can be used in the context of predicting binding positions from noisy experimental data. We apply our method to identify binding sites using data from genome scale ChIP-chip experiments. Results from these experiments are publicly available at http://cagt10.bu.edu/BSG/. The graphical framework developed here may be useful when combining predictions from numerous computational and experimental measures. Finally, we discuss how our algorithm can be used to improve the sensitivity of computational predictions of transcription factor binding specificities.</description>
    <dc:title>Binding Site Graphs: A New Graph Theoretical Framework for Prediction of Transcription Factor Binding Sites</dc:title>

    <dc:creator>Timothy Reddy</dc:creator>
    <dc:creator>Charles Delisi</dc:creator>
    <dc:creator>Boris Shakhnovich</dc:creator>
    <dc:identifier>doi:10.1371/journal.pcbi.0030090.eor</dc:identifier>
    <dc:source>PLoS Computational Biology, Vol. preprint, No. 2007. (1 April 2007), e90.eor.</dc:source>
    <dc:date>2007-04-13T17:51:55-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>PLoS Computational Biology</prism:publicationName>
    <prism:volume>preprint</prism:volume>
    <prism:number>2007</prism:number>
    <prism:startingPage>e90.eor</prism:startingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1190545">
    <title>Genomic regulatory blocks encompass multiple neighboring genes and maintain conserved synteny in vertebrates</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1190545</link>
    <description>&lt;i&gt;Genome Res. (2007)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We report evidence for a mechanism for the maintenance of long-range conserved synteny across vertebrate genomes. We found the largest mammal-teleost conserved chromosomal segments to be spanned by highly conserved noncoding elements (HCNEs), their developmental regulatory target genes, and phylogenetically and functionally unrelated &#34;bystander&#34; genes. Bystander genes are not specifically under the control of the regulatory elements that drive the target genes and are expressed in patterns that are different from those of the target genes. Reporter insertions distal to zebrafish developmental regulatory genes pax6.1/2, rx3, id1, and fgf8 and miRNA genes mir9-1 and mir9-5 recapitulate the expression patterns of these genes even if located inside or beyond bystander genes, suggesting that the regulatory domain of a developmental regulatory gene can extend into and beyond adjacent transcriptional units. We termed these chromosomal segments genomic regulatory blocks (GRBs). After whole genome duplication in teleosts, GRBs, including HCNEs and target genes, were often maintained in both copies, while bystander genes were typically lost from one GRB, strongly suggesting that evolutionary pressure acts to keep the single-copy GRBs of higher vertebrates intact. We show that loss of bystander genes and other mutational events suffered by duplicated GRBs in teleost genomes permits target gene identification and HCNE/target gene assignment. These findings explain the absence of evolutionary breakpoints from large vertebrate chromosomal segments and will aid in the recognition of position effect mutations within human GRBs.</description>
    <dc:title>Genomic regulatory blocks encompass multiple neighboring genes and maintain conserved synteny in vertebrates</dc:title>

    <dc:creator>Hiroshi Kikuta</dc:creator>
    <dc:creator>Mary Laplante</dc:creator>
    <dc:creator>Pavla Navratilova</dc:creator>
    <dc:creator>Anna Komisarczuk</dc:creator>
    <dc:creator>Par Engstrom</dc:creator>
    <dc:creator>David Fredman</dc:creator>
    <dc:creator>Altuna Akalin</dc:creator>
    <dc:creator>Mario Caccamo</dc:creator>
    <dc:creator>Ian Sealy</dc:creator>
    <dc:creator>Kerstin Howe</dc:creator>
    <dc:creator>Julien Ghislain</dc:creator>
    <dc:creator>Guillaume Pezeron</dc:creator>
    <dc:creator>Philippe Mourrain</dc:creator>
    <dc:creator>Staale Ellingsen</dc:creator>
    <dc:creator>Andrew Oates</dc:creator>
    <dc:creator>Christine Thisse</dc:creator>
    <dc:creator>Bernard Thisse</dc:creator>
    <dc:creator>Isabelle Foucher</dc:creator>
    <dc:creator>Birgit Adolf</dc:creator>
    <dc:creator>Andrea Geling</dc:creator>
    <dc:creator>Boris Lenhard</dc:creator>
    <dc:creator>Thomas Becker</dc:creator>
    <dc:source>Genome Res. (2007)</dc:source>
    <dc:date>2007-03-27T23:16:09-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Genome Res.</prism:publicationName>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1021805">
    <title>Transcriptional Control in the Segmentation Gene Network of Drosophila</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1021805</link>
    <description>&lt;i&gt;PLoS Biology, Vol. 2, No. 9. (1 September 2004), e271.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The segmentation gene network of Drosophila consists of maternal and zygotic factors that generate, by transcriptional (cross-) regulation, expression patterns of increasing complexity along the anterior-posterior axis of the embryo. Using known binding site information for maternal and zygotic gap transcription factors, the computer algorithm Ahab recovers known segmentation control elements (modules) with excellent success and predicts many novel modules within the network and genome-wide. We show that novel module predictions are highly enriched in the network and typically clustered proximal to the promoter, not only upstream, but also in intronic space and downstream. When placed upstream of a reporter gene, they consistently drive patterned blastoderm expression, in most cases faithfully producing one or more pattern elements of the endogenous gene. Moreover, we demonstrate for the entire set of known and newly validated modules that Ahab&#39;s prediction of binding sites correlates well with the expression patterns produced by the modules, revealing basic rules governing their composition. Specifically, we show that maternal factors consistently act as activators and that gap factors act as repressors, except for the bimodal factor Hunchback. Our data suggest a simple context-dependent rule for its switch from repressive to activating function. Overall, the composition of modules appears well fitted to the spatiotemporal distribution of their positive and negative input factors. Finally, by comparing Ahab predictions with different categories of transcription factor input, we confirm the global regulatory structure of the segmentation gene network, but find odd skipped behaving like a primary pair-rule gene. The study expands our knowledge of the segmentation gene network by increasing the number of experimentally tested modules by 50&#37;. For the first time, the entire set of validated modules is analyzed for binding site composition under a uniform set of criteria, permitting the definition of basic composition rules. The study demonstrates that computational methods are a powerful complement to experimental approaches in the analysis of transcription networks.</description>
    <dc:title>Transcriptional Control in the Segmentation Gene Network of Drosophila</dc:title>

    <dc:creator>Mark Schroeder</dc:creator>
    <dc:creator>Michael Pearce</dc:creator>
    <dc:creator>John Fak</dc:creator>
    <dc:creator>Hongqing Fan</dc:creator>
    <dc:creator>Ulrich Unnerstall</dc:creator>
    <dc:creator>Eldon Emberly</dc:creator>
    <dc:creator>Nikolaus Rajewsky</dc:creator>
    <dc:creator>Eric Siggia</dc:creator>
    <dc:creator>Ulrike Gaul</dc:creator>
    <dc:identifier>doi:10.1371/journal.pbio.0020271</dc:identifier>
    <dc:source>PLoS Biology, Vol. 2, No. 9. (1 September 2004), e271.</dc:source>
    <dc:date>2007-01-01T19:51:52-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>PLoS Biology</prism:publicationName>
    <prism:volume>2</prism:volume>
    <prism:number>9</prism:number>
    <prism:startingPage>e271</prism:startingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1097483">
    <title>The Regulatory Code for Transcriptional Response Diversity and Its Relation to Genome Structural Properties in A. thaliana</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1097483</link>
    <description>&lt;i&gt;PLoS Genetics, Vol. 3, No. 2. (1 February 2007), e11.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Regulation of gene expression via specific cis-regulatory promoter elements has evolved in cellular organisms as a major adaptive mechanism to respond to environmental change. Assuming a simple model of transcriptional regulation, genes that are differentially expressed in response to a large number of different external stimuli should harbor more distinct regulatory elements in their upstream regions than do genes that only respond to few environmental challenges. We tested this hypothesis in Arabidopsis thaliana using the compendium of gene expression profiling data available in AtGenExpress and known cis-element motifs mapped to upstream gene promoter regions and studied the relation of the observed breadth of differential gene expression response with several fundamental genome architectural properties. We observed highly significant positive correlations between the density of cis-elements in upstream regions and the number of conditions in which a gene was differentially regulated. The correlation was most pronounced in regions immediately upstream of the transcription start sites. Multistimuli response genes were observed to be associated with significantly longer upstream intergenic regions, retain more paralogs in the Arabidopsis genome, are shorter, have fewer introns, and are more likely to contain TATA-box motifs in their promoters. In abiotic stress time series data, multistimuli response genes were found to be overrepresented among early-responding genes. Genes involved in the regulation of transcription, stress response, and signaling processes were observed to possess the greatest regulatory capacity. Our results suggest that greater gene expression regulatory complexity appears to be encoded by an increased density of cis-regulatory elements and provide further evidence for an evolutionary adaptation of the regulatory code at the genomic layout level. Larger intergenic spaces preceding multistimuli response genes may have evolved to allow greater regulatory gene expression potential.</description>
    <dc:title>The Regulatory Code for Transcriptional Response Diversity and Its Relation to Genome Structural Properties in A. thaliana</dc:title>

    <dc:creator>Dirk Walther</dc:creator>
    <dc:creator>Roman Brunnemann</dc:creator>
    <dc:creator>Joachim Selbig</dc:creator>
    <dc:identifier>doi:10.1371/journal.pgen.0030011</dc:identifier>
    <dc:source>PLoS Genetics, Vol. 3, No. 2. (1 February 2007), e11.</dc:source>
    <dc:date>2007-02-09T23:45:45-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>PLoS Genetics</prism:publicationName>
    <prism:volume>3</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>e11</prism:startingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1096346">
    <title>Indelign: a probabilistic framework for annotation of insertions and deletions in a multiple alignment</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1096346</link>
    <description>&lt;i&gt;Bioinformatics, Vol. 23, No. 3. (1 February 2007), pp. 289-297.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Motivation: A quantitative study of molecular evolutionary events such as substitutions, insertions and deletions from closely related genomes requires (1) an accurate multiple sequence alignment program and (2) a method to annotate the insertions and deletions that explain the gaps' in the alignment. Although the former requirement has been extensively addressed, the latter problem has received little attention, especially in a comprehensive probabilistic framework. Results: Here, we present Indelign, a program that uses a probabilistic evolutionary model to compute the most likely scenario of insertions and deletions consistent with an input multiple alignment. It is also capable of modifying the given alignment so as to obtain a better agreement with the evolutionary model. We find close to optimal performance and substantial improvement over alternative methods, in tests of Indelign on synthetic data. We use Indelign to analyze regulatory sequences in Drosophila, and find an excess of insertions over deletions, which is different from what has been reported for neutral sequences. Availability: The Indelign program may be downloaded from the website http://veda.cs.uiuc.edu/indelign/ Supplementary information: Supplementary material is available at Bioinformatics online. Contact: sinhas@uiuc.edu 10.1093/bioinformatics/btl578</description>
    <dc:title>Indelign: a probabilistic framework for annotation of insertions and deletions in a multiple alignment</dc:title>

    <dc:creator>Jaebum Kim</dc:creator>
    <dc:creator>Saurabh Sinha</dc:creator>
    <dc:identifier>doi:10.1093/bioinformatics/btl578</dc:identifier>
    <dc:source>Bioinformatics, Vol. 23, No. 3. (1 February 2007), pp. 289-297.</dc:source>
    <dc:date>2007-02-09T10:26:29-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Bioinformatics</prism:publicationName>
    <prism:volume>23</prism:volume>
    <prism:number>3</prism:number>
    <prism:startingPage>289</prism:startingPage>
    <prism:endingPage>297</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1085324">
    <title>Parallel evolution of conserved noncoding elements that target a common set of developmental regulatory genes from worms to humans.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1085324</link>
    <description>&lt;i&gt;Genome Biology, Vol. 8 (02 February 2007), R15.&lt;/i&gt;</description>
    <dc:title>Parallel evolution of conserved noncoding elements that target a common set of developmental regulatory genes from worms to humans.</dc:title>

    <dc:creator>Tanya Vavouri</dc:creator>
    <dc:creator>Klaudia Walter</dc:creator>
    <dc:creator>Walter Gilks</dc:creator>
    <dc:creator>Ben Lehner</dc:creator>
    <dc:creator>Greg Elgar</dc:creator>
    <dc:identifier>doi:10.1186/gb-2007-8-2-r15</dc:identifier>
    <dc:source>Genome Biology, Vol. 8 (02 February 2007), R15.</dc:source>
    <dc:date>2007-02-02T23:08:25-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Genome Biology</prism:publicationName>
    <prism:issn>1465-6906</prism:issn>
    <prism:volume>8</prism:volume>
    <prism:startingPage>R15</prism:startingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1085326">
    <title>Boosting with stumps for predicting transcription start sites</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1085326</link>
    <description>&lt;i&gt;Genome Biology, Vol. 8 (02 February 2007), R17.&lt;/i&gt;</description>
    <dc:title>Boosting with stumps for predicting transcription start sites</dc:title>

    <dc:creator>Xiaoyue Zhao</dc:creator>
    <dc:creator>Zhenyu Xuan</dc:creator>
    <dc:creator>Michael Zhang</dc:creator>
    <dc:identifier>doi:10.1186/gb-2007-8-2-r17</dc:identifier>
    <dc:source>Genome Biology, Vol. 8 (02 February 2007), R17.</dc:source>
    <dc:date>2007-02-02T23:08:26-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Genome Biology</prism:publicationName>
    <prism:issn>1465-6906</prism:issn>
    <prism:volume>8</prism:volume>
    <prism:startingPage>R17</prism:startingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1064697">
    <title>Creating a honey bee consensus gene set</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1064697</link>
    <description>&lt;i&gt;Genome Biology, Vol. 8 (22 January 2007), R13.&lt;/i&gt;</description>
    <dc:title>Creating a honey bee consensus gene set</dc:title>

    <dc:creator>Christine Elsik</dc:creator>
    <dc:creator>Aaron Mackey</dc:creator>
    <dc:creator>Justin Reese</dc:creator>
    <dc:creator>Natalia Milshina</dc:creator>
    <dc:creator>David Roos</dc:creator>
    <dc:creator>George Weinstock</dc:creator>
    <dc:identifier>doi:10.1186/gb-2007-8-1-r13</dc:identifier>
    <dc:source>Genome Biology, Vol. 8 (22 January 2007), R13.</dc:source>
    <dc:date>2007-01-24T08:54:55-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Genome Biology</prism:publicationName>
    <prism:issn>1465-6906</prism:issn>
    <prism:volume>8</prism:volume>
    <prism:startingPage>R13</prism:startingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/882014">
    <title>Bounded search for de novo identification of degenerate cis-regulatory elements.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/882014</link>
    <description>&lt;i&gt;BMC Bioinformatics, Vol. 7 (2006)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;BACKGROUND: The identification of statistically overrepresented sequences in the upstream regions of coregulated genes should theoretically permit the identification of potential cis-regulatory elements. However, in practice many cis-regulatory elements are highly degenerate, precluding the use of an exhaustive word-counting strategy for their identification. While numerous methods exist for inferring base distributions using a position weight matrix, recent studies suggest that the independence assumptions inherent in the model, as well as the inability to reach a global optimum, limit this approach. RESULTS: In this paper, we report PRISM, a degenerate motif finder that leverages the relationship between the statistical significance of a set of binding sites and that of the individual binding sites. PRISM first identifies overrepresented, non-degenerate consensus motifs, then iteratively relaxes each one into a high-scoring degenerate motif. This approach requires no tunable parameters, thereby lending itself to unbiased performance comparisons. We therefore compare PRISM's performance against nine popular motif finders on 28 well-characterized S. cerevisiae regulons. PRISM consistently outperforms all other programs. Finally, we use PRISM to predict the binding sites of uncharacterized regulons. Our results support a proposed mechanism of action for the yeast cell-cycle transcription factor Stb1, whose binding site has not been determined experimentally. CONCLUSION: The relationship between statistical measures of the binding sites and the set as a whole leads to a simple means of identifying the diverse range of cis-regulatory elements to which a protein binds. This approach leverages the advantages of word-counting, in that position dependencies are implicitly accounted for and local optima are more easily avoided. While we sacrifice guaranteed optimality to prevent the exponential blowup of exhaustive search, we prove that the error is bounded and experimentally show that the performance is superior to other methods. A Java implementation of this algorithm can be downloaded from our web server at http://genie.dartmouth.edu/prism.</description>
    <dc:title>Bounded search for de novo identification of degenerate cis-regulatory elements.</dc:title>

    <dc:creator>JM Carlson</dc:creator>
    <dc:creator>A Chakravarty</dc:creator>
    <dc:creator>RS Khetani</dc:creator>
    <dc:creator>RH Gross</dc:creator>
    <dc:identifier>doi:10.1186/1471-2105-7-254</dc:identifier>
    <dc:source>BMC Bioinformatics, Vol. 7 (2006)</dc:source>
    <dc:date>2006-10-02T18:22:58-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>BMC Bioinformatics</prism:publicationName>
    <prism:issn>1471-2105</prism:issn>
    <prism:volume>7</prism:volume>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1085575">
    <title>New Small Nuclear RNA Gene-Like Transcriptional Units as Sources of Regulatory Transcripts</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1085575</link>
    <description>&lt;i&gt;PLoS Genetics, Vol. 3, No. 2. (1 February 2007), e1.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;By means of a computer search for upstream promoter elements (distal sequence element and proximal sequence element) typical of small nuclear RNA genes, we have identified in the human genome a number of previously unrecognized, putative transcription units whose predicted products are novel noncoding RNAs with homology to protein-coding genes. By elucidating the function of one of them, we provide evidence for the existence of a sense/antisense-based gene-regulation network where part of the polymerase III transcriptome could control its polymerase II counterpart.</description>
    <dc:title>New Small Nuclear RNA Gene-Like Transcriptional Units as Sources of Regulatory Transcripts</dc:title>

    <dc:creator>Aldo Pagano</dc:creator>
    <dc:creator>Manuele Castelnuovo</dc:creator>
    <dc:creator>Federico Tortelli</dc:creator>
    <dc:creator>Roberto Ferrari</dc:creator>
    <dc:creator>Giorgio Dieci</dc:creator>
    <dc:creator>Ranieri Cancedda</dc:creator>
    <dc:identifier>doi:10.1371/journal.pgen.0030001</dc:identifier>
    <dc:source>PLoS Genetics, Vol. 3, No. 2. (1 February 2007), e1.</dc:source>
    <dc:date>2007-02-03T00:17:32-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>PLoS Genetics</prism:publicationName>
    <prism:volume>3</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>e1</prism:startingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1085574">
    <title>Monotony of surprise and large-scale quest for unusual words.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1085574</link>
    <description>&lt;i&gt;J Comput Biol, Vol. 10, No. 3-4. (2003), pp. 283-311.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The problem of characterizing and detecting recurrent sequence patterns such as substrings or motifs and related associations or rules is variously pursued in order to compress data, unveil structure, infer succinct descriptions, extract and classify features, etc. In molecular biology, exceptionally frequent or rare words in bio-sequences have been implicated in various facets of biological function and structure. The discovery, particularly on a massive scale, of such patterns poses interesting methodological and algorithmic problems and often exposes scenarios in which tables and synopses grow faster and bigger than the raw sequences they are meant to encapsulate. In previous study, the ability to succinctly compute, store, and display unusual substrings has been linked to a subtle interplay between the combinatorics of the subword of a word and local monotonicities of some scores used to measure the departure from expectation. In this paper, we carry out an extensive analysis of such monotonicities for a broader variety of scores. This supports the construction of data structures and algorithms capable of performing global detection of unusual substrings in time and space linear in the subject sequences, under various probabilistic models.</description>
    <dc:title>Monotony of surprise and large-scale quest for unusual words.</dc:title>

    <dc:creator>A Apostolico</dc:creator>
    <dc:creator>ME Bock</dc:creator>
    <dc:creator>S Lonardi</dc:creator>
    <dc:identifier>doi:10.1089/10665270360688020</dc:identifier>
    <dc:source>J Comput Biol, Vol. 10, No. 3-4. (2003), pp. 283-311.</dc:source>
    <dc:date>2007-02-03T00:16:58-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:publicationName>J Comput Biol</prism:publicationName>
    <prism:issn>1066-5277</prism:issn>
    <prism:volume>10</prism:volume>
    <prism:number>3-4</prism:number>
    <prism:startingPage>283</prism:startingPage>
    <prism:endingPage>311</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1085571">
    <title>Fast and systematic genome-wide discovery of conserved regulatory elements using a non-alignment based approach.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1085571</link>
    <description>&lt;i&gt;Genome Biol, Vol. 6, No. 2. (2005)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We describe a powerful new approach for discovering globally conserved regulatory elements between two genomes. The method is fast, simple and comprehensive, without requiring alignments. Its application to pairs of yeasts, worms, flies and mammals yields a large number of known and novel putative regulatory elements. Many of these are validated by independent biological observations, have spatial and/or orientation biases, are co-conserved with other elements and show surprising conservation across large phylogenetic distances.</description>
    <dc:title>Fast and systematic genome-wide discovery of conserved regulatory elements using a non-alignment based approach.</dc:title>

    <dc:creator>O Elemento</dc:creator>
    <dc:creator>S Tavazoie</dc:creator>
    <dc:identifier>doi:10.1186/gb-2005-6-2-r18</dc:identifier>
    <dc:source>Genome Biol, Vol. 6, No. 2. (2005)</dc:source>
    <dc:date>2007-02-03T00:14:18-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:publicationName>Genome Biol</prism:publicationName>
    <prism:issn>1465-6914</prism:issn>
    <prism:volume>6</prism:volume>
    <prism:number>2</prism:number>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/882012">
    <title>Phylogeny based discovery of regulatory elements.</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/882012</link>
    <description>&lt;i&gt;BMC Bioinformatics, Vol. 7 (2006)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;BACKGROUND: Algorithms that locate evolutionarily conserved sequences have become powerful tools for finding functional DNA elements, including transcription factor binding sites; however, most methods do not take advantage of an explicit model for the constrained evolution of functional DNA sequences. RESULTS: We developed a probabilistic framework that combines an HKY85 model, which assigns probabilities to different base substitutions between species, and weight matrix models of transcription factor binding sites, which describe the probabilities of observing particular nucleotides at specific positions in the binding site. The method incorporates the phylogenies of the species under consideration and takes into account the position specific variation of transcription factor binding sites. Using our framework we assessed the suitability of alignments of genomic sequences from commonly used species as substrates for comparative genomic approaches to regulatory motif finding. We then applied this technique to Saccharomyces cerevisiae and related species by examining all possible six base pair DNA sequences (hexamers) and identifying sequences that are conserved in a significant number of promoters. By combining similar conserved hexamers we reconstructed known cis-regulatory motifs and made predictions of previously unidentified motifs. We tested one prediction experimentally, finding it to be a regulatory element involved in the transcriptional response to glucose. CONCLUSION: The experimental validation of a regulatory element prediction missed by other large-scale motif finding studies demonstrates that our approach is a useful addition to the current suite of tools for finding regulatory motifs.</description>
    <dc:title>Phylogeny based discovery of regulatory elements.</dc:title>

    <dc:creator>J Gertz</dc:creator>
    <dc:creator>JC Fay</dc:creator>
    <dc:creator>BA Cohen</dc:creator>
    <dc:identifier>doi:10.1186/1471-2105-7-266</dc:identifier>
    <dc:source>BMC Bioinformatics, Vol. 7 (2006)</dc:source>
    <dc:date>2006-10-02T18:21:38-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>BMC Bioinformatics</prism:publicationName>
    <prism:issn>1471-2105</prism:issn>
    <prism:volume>7</prism:volume>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/342965">
    <title>Discovery of regulatory elements in vertebrates through comparative genomics</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/342965</link>
    <description>&lt;i&gt;Nature Biotechnology, Vol. 23, No. 10. (06 October 2005), pp. 1249-1256.&lt;/i&gt;</description>
    <dc:title>Discovery of regulatory elements in vertebrates through comparative genomics</dc:title>

    <dc:creator>Amol Prakash</dc:creator>
    <dc:creator>Martin Tompa</dc:creator>
    <dc:identifier>doi:10.1038/nbt1140</dc:identifier>
    <dc:source>Nature Biotechnology, Vol. 23, No. 10. (06 October 2005), pp. 1249-1256.</dc:source>
    <dc:date>2005-10-06T22:55:30-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:publicationName>Nature Biotechnology</prism:publicationName>
    <prism:issn>1087-0156</prism:issn>
    <prism:volume>23</prism:volume>
    <prism:number>10</prism:number>
    <prism:startingPage>1249</prism:startingPage>
    <prism:endingPage>1256</prism:endingPage>
    <prism:publisher>Nature Publishing Group</prism:publisher>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1084785">
    <title>Robustness Can Evolve Gradually in Complex Regulatory Gene Networks with Varying Topology</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1084785</link>
    <description>&lt;i&gt;PLoS Computational Biology, Vol. 3, No. 2. (1 February 2007), e15.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The topology of cellular circuits (the who-interacts-with-whom) is key to understand their robustness to both mutations and noise. The reason is that many biochemical parameters driving circuit behavior vary extensively and are thus not fine-tuned. Existing work in this area asks to what extent the function of any one given circuit is robust. But is high robustness truly remarkable, or would it be expected for many circuits of similar topology? And how can high robustness come about through gradual Darwinian evolution that changes circuit topology gradually, one interaction at a time? We here ask these questions for a model of transcriptional regulation networks, in which we explore millions of different network topologies. Robustness to mutations and noise are correlated in these networks. They show a skewed distribution, with a very small number of networks being vastly more robust than the rest. All networks that attain a given gene expression state can be organized into a graph whose nodes are networks that differ in their topology. Remarkably, this graph is connected and can be easily traversed by gradual changes of network topologies. Thus, robustness is an evolvable property. This connectedness and evolvability of robust networks may be a general organizational principle of biological networks. In addition, it exists also for RNA and protein structures, and may thus be a general organizational principle of all biological systems.</description>
    <dc:title>Robustness Can Evolve Gradually in Complex Regulatory Gene Networks with Varying Topology</dc:title>

    <dc:creator>Stefano Ciliberti</dc:creator>
    <dc:creator>Olivier Martin</dc:creator>
    <dc:creator>Andreas Wagner</dc:creator>
    <dc:identifier>doi:10.1371/journal.pcbi.0030015</dc:identifier>
    <dc:source>PLoS Computational Biology, Vol. 3, No. 2. (1 February 2007), e15.</dc:source>
    <dc:date>2007-02-02T19:16:39-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>PLoS Computational Biology</prism:publicationName>
    <prism:volume>3</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>e15</prism:startingPage>
    <prism:category>no-tag</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/indigoviolet/article/1084706">
    <title>Serum response factor binding sites differ in three human cell types</title>
    <link>http://www.citeulike.org/user/indigoviolet/article/1084706</link>
    <description>&lt;i&gt;Genome Res., Vol. 17, No. 2. (1 February 2007), pp. 136-144.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The serum response factor (SRF) is essential for embryonic development and maintenance of muscle cells and neurons. The mechanism by which this factor controls these divergent pathways is unclear. Here we present a genome-wide view of occupancy of SRF at its binding sites with a focus on those that vary with cell type. We used chromatin immunoprecipitation (ChIP) in combination with human promoter microarrays to identify 216 putative SRF binding sites in the human genome. We performed independent quantitative PCR validation at over half of these sites that resulted in 146 sites we assert to be true binding sites at over 90% confidence. Nearly half of the sites are bound by SRF in only one of the three cell types we tested, providing strong evidence for the diverse roles for SRF in different cell types. We also explore possible mechanisms controlling differential binding of SRF in these cell types by assaying cofactor binding, DNA methylation, histone methylation, and histone acetylation at a subset of sites bound preferentially in smooth muscle cells. Although we did not see a strong correlation between SRF binding and epigenetics modifications, at these sites, we propose that SRF cofactors may play an important role in determining cell-dependent SRF binding sites. ELK4 (previously known as SAP-1 [SRF-associated protein-1]) is ubiquitously expressed. Therefore, we expected it to occupy sites where SRF binding is common in all cell types. Indeed, 90% of SRF sites also bound by ELK4 were common to all three cell types. Together, our data provide a more complete understanding of the regulatory network controlled by SRF. 10.1101/gr.5875007</description>
    <dc:title>Serum response factor binding sites differ in three human cell types</dc:title>

    <dc:creator>Sara Cooper</dc:creator>
    <dc:creator>Nathan Trinklein</dc:creator>
    <dc:creator>Loan Nguyen</dc:creator>
    <dc:creator>Richard Myers</dc:creator>
    <dc:identifier>doi:10.1101/gr.5875007</dc:identifier>
    <dc:source>Genome Res., Vol. 17, No. 2. (1 February 2007), pp. 136-144.</dc:source>
    <dc:date>2007-02-02T17:58:38-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Genome Res.</prism:publicationName>
    <prism:volume>17</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>136</prism:startingPage>
    <prism:endingPage>144</prism:endingPage>
    <prism:category>no-tag</prism:category>
</item>



</rdf:RDF>

