<?xml version="1.0" encoding="UTF-8"?>

<rdf:RDF
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
   xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
   xmlns="http://purl.org/rss/1.0/"
   xmlns:dc="http://purl.org/dc/elements/1.1/"
   xmlns:prism="http://prismstandard.org/namespaces/1.2/basic/"
   xmlns:dcterms="http://purl.org/dc/terms/"

>
<channel rdf:about="http://www.citeulike.org/about">
<pubDate>Sat, 26 Jul 2008 05:50:21 BST</pubDate>


	<title>CiteULike: analogAI's library [72 articles]</title>
	<description>CiteULike: analogAI's library [72 articles]</description>


	<link>http://www.citeulike.org/user/analogAI</link>
	<dc:publisher>CiteULike.org</dc:publisher>
	<dc:language>en-gb</dc:language>
	<dc:rights>Copyright &#169; 2004-2008 citeulike.org</dc:rights>
	<items>
    <rdf:Seq>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/2933217"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/2924395"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/2874542"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/2874536"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/2759729"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/962574"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/928070"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/812979"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/812975"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/526137"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/785236"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/785235"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/471650"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/638322"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/682584"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/342964"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/615468"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/613992"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/613991"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/613990"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/613989"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/382660"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/3547"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/445819"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/216611"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/445816"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/332173"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/439298"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/439296"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/438271"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/420500"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/416744"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/101933"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/143674"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/410985"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/410984"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/211877"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/407127"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/407125"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/407124"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/406753"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/406744"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/165167"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/165166"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/405427"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/405424"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/403892"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/349120"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/400393"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/analogAI/article/142321"/>

	</rdf:Seq>
	</items>
	</channel>


<item rdf:about="http://www.citeulike.org/user/analogAI/article/2933217">
    <title>Cyanobacterial leader peptides for protein secretion</title>
    <link>http://www.citeulike.org/user/analogAI/article/2933217</link>
    <description>&lt;i&gt;FEMS Microbiology Letters, Vol. 218, No. 2. (2003), pp. 351-357.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Abstract The leader peptide of the major secreted protein PilA1 of the cyanobacterium Synechocystis sp. strain PCC 6803 and several artificial leader peptides have been used to study secretion of the reporter protein lichenase to the culture medium. The strains of Synechocystis carrying lichenase with the leader sequences of PilA and with the leader sequence of Slr2016 efficiently secreted the reporter protein. The artificial leader sequence that was characterized by the overall positive charge (as PilA1 and Slr2016 leaders) also allowed secretion. The artificial leader with negative charge, however, did not allow secretion of the reporter protein. Moreover, no secreted proteins have been isolated from this strain using conventional techniques for preparation of secreted proteins. These data suggest that the general secretion pathway in cyanobacteria, at least for pilins, recognizes the overall charge of the leader sequences, and operates in a sequence-non-specific manner.</description>
    <dc:title>Cyanobacterial leader peptides for protein secretion</dc:title>

    <dc:creator>Tatiana Sergeyenko</dc:creator>
    <dc:creator>Dmitry Los</dc:creator>
    <dc:identifier>doi:10.1016/S0378-1097(02)01197-7</dc:identifier>
    <dc:source>FEMS Microbiology Letters, Vol. 218, No. 2. (2003), pp. 351-357.</dc:source>
    <dc:date>2008-06-27T10:10:39-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:publicationName>FEMS Microbiology Letters</prism:publicationName>
    <prism:volume>218</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>351</prism:startingPage>
    <prism:endingPage>357</prism:endingPage>
    <prism:category>glucanase</prism:category>
    <prism:category>lichenase</prism:category>
    <prism:category>pcc6803</prism:category>
    <prism:category>signalprotein</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/2924395">
    <title>Synechocystis PCC6803: a euryhaline cyanobacterium</title>
    <link>http://www.citeulike.org/user/analogAI/article/2924395</link>
    <description>&lt;i&gt;FEMS Microbiology Letters, Vol. 18, No. 1-2. (1983), pp. 99-102.&lt;/i&gt;</description>
    <dc:title>Synechocystis PCC6803: a euryhaline cyanobacterium</dc:title>

    <dc:creator>DL Richardson</dc:creator>
    <dc:creator>RH Reed</dc:creator>
    <dc:creator>WDP Stewart</dc:creator>
    <dc:identifier>doi:10.1111/j.1574-6968.1983.tb00457.x</dc:identifier>
    <dc:source>FEMS Microbiology Letters, Vol. 18, No. 1-2. (1983), pp. 99-102.</dc:source>
    <dc:date>2008-06-24T21:27:24-00:00</dc:date>
    <prism:publicationYear>1983</prism:publicationYear>
    <prism:publicationName>FEMS Microbiology Letters</prism:publicationName>
    <prism:volume>18</prism:volume>
    <prism:number>1-2</prism:number>
    <prism:startingPage>99</prism:startingPage>
    <prism:endingPage>102</prism:endingPage>
    <prism:category>growth</prism:category>
    <prism:category>halotolerance</prism:category>
    <prism:category>pcc6803</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/2874542">
    <title>Conjugative transfer and autonomous replication of a promiscuous IncQ plasmid in the cyanobacterium Synechocystis PCC 6803</title>
    <link>http://www.citeulike.org/user/analogAI/article/2874542</link>
    <description>&lt;i&gt;Molecular and General Genetics MGG, Vol. 221, No. 1. (1 March 1990), pp. 129-133.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The promiscuous IncQ plasmid pKT210 (Cmr, Smr) is efficiently transferred by transpecific conjugation from Escherichia coli to the facultatively heterotrophic cyanobacterium Synechocystis PCC6803 when mobilized by a helper plasmid coding for IncP transfer functions. The IncQ plasmid is stably maintained in the cyanobacterium as an autonomously replicating multicopy plasmid with no detectable structural alterations and can be recovered by transformation back to E. coli when using a mcrA mcrB host. Thus, the replicative host-range of IncQ plasmids extends beyond purple bacteria to the distinct procaryotic taxon of cyanobacteria, allowing the use of these small plasmids as convenient cloning vectors in Synechocystis PCC6803 and presumably also in cyanobacteria that are not amenable to genetic transformation. In contrast, an IncQ plasmid bearing the TRP1 gene of Saccharomyces cerevisiae failed to replicate when transferred to that yeast by transformation.</description>
    <dc:title>Conjugative transfer and autonomous replication of a promiscuous IncQ plasmid in the cyanobacterium Synechocystis PCC 6803</dc:title>

    <dc:creator>Sabine Kreps</dc:creator>
    <dc:creator>Fabrice Ferino</dc:creator>
    <dc:creator>Christine Mosrin</dc:creator>
    <dc:creator>Jozef Gerits</dc:creator>
    <dc:creator>Max Mergeay</dc:creator>
    <dc:creator>Pierre Thuriaux</dc:creator>
    <dc:identifier>doi:10.1007/BF00280378</dc:identifier>
    <dc:source>Molecular and General Genetics MGG, Vol. 221, No. 1. (1 March 1990), pp. 129-133.</dc:source>
    <dc:date>2008-06-09T06:14:07-00:00</dc:date>
    <prism:publicationYear>1990</prism:publicationYear>
    <prism:publicationName>Molecular and General Genetics MGG</prism:publicationName>
    <prism:volume>221</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>129</prism:startingPage>
    <prism:endingPage>133</prism:endingPage>
    <prism:category>pcc6803</prism:category>
    <prism:category>plasmid</prism:category>
    <prism:category>transformation</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/2874536">
    <title>A host-vector system for gene cloning in the cyanobacterium Synechocystis PCC 6803</title>
    <link>http://www.citeulike.org/user/analogAI/article/2874536</link>
    <description>&lt;i&gt;Molecular and General Genetics MGG, Vol. 204, No. 1. (1 July 1986), pp. 185-191.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Synechocystis 6803 contains at least four cryptic plasmids of 2.27 kb (pUS1, pUS2 and pUS3) and 5.20 kb (pUS4). The 1.70 kb HpaI fragments of the related plasmids pUS2 and pUS3 were cloned into the Apr gene of the E. coli plasmid pACYC177, yielding the Kmr hybrid plasmids pUF12 and pUF3 respectively. pUF3 recombines in Synechocystis 6803 with a 2.27 kb plasmid giving the Kmr shuttle vector pUF311. The 1.35 kb HaeII fragment containing the Cm2 gene of the E. coli plasmid pACYC184 was cloned in pUF311 generating the Cmr Kmr shuttle vector pFCLV7. Wild-type cells of Synechocystis 6803 are transformed, albeit poorly, by the plasmids pUF3, pUF12 and pFCLV7. pFCLV7 very efficiently transforms the SUF311 strain of Synechocystis 6803 containing pUF311 as a resident plasmid. This is due to recombination between the homologous parts of pFCLV7 and pUF311. For the same reason the strain SUF311 is also efficiently transformable by E. coli plasmids, as shown for pLF8, provided that they have some homology with the E. coli part of pUF311.</description>
    <dc:title>A host-vector system for gene cloning in the cyanobacterium Synechocystis PCC 6803</dc:title>

    <dc:creator>F Chauvat</dc:creator>
    <dc:creator>L Vries</dc:creator>
    <dc:creator>A Ende</dc:creator>
    <dc:creator>GA Arkel</dc:creator>
    <dc:identifier>doi:10.1007/BF00330208</dc:identifier>
    <dc:source>Molecular and General Genetics MGG, Vol. 204, No. 1. (1 July 1986), pp. 185-191.</dc:source>
    <dc:date>2008-06-09T06:08:59-00:00</dc:date>
    <prism:publicationYear>1986</prism:publicationYear>
    <prism:publicationName>Molecular and General Genetics MGG</prism:publicationName>
    <prism:volume>204</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>185</prism:startingPage>
    <prism:endingPage>191</prism:endingPage>
    <prism:category>pcc6803</prism:category>
    <prism:category>transformation</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/2759729">
    <title>Engineering BioBrick vectors from BioBrick parts</title>
    <link>http://www.citeulike.org/user/analogAI/article/2759729</link>
    <description>&lt;i&gt;Journal of Biological Engineering, Vol. 2, No. 5. (14 April 2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Background The underlying goal of synthetic biology is to make the process of engineering biological systems easier. Recent work has focused on defining and developing standard biological parts. The technical standard that has gained the most traction in the synthetic biology community is the BioBrick standard for physical composition of genetic parts. Parts that conform to the BioBrick assembly standard are BioBrick standard biological parts. To date, over 2,000 BioBrick parts have been contributed to, and are available from, the Registry of Standard Biological Parts. Results Here we extended the same advantages of BioBrick standard biological parts to the plasmid-based vectors that are used to provide and propagate BioBrick parts. We developed a process for engineering BioBrick vectors from BioBrick parts. We designed a new set of BioBrick parts that encode many useful vector functions. We combined the new parts to make a BioBrick base vector that facilitates BioBrick vector construction. We demonstrated the utility of the process by constructing seven new BioBrick vectors. We also successfully used the resulting vectors to assemble and propagate other BioBrick standard biological parts. Conclusions We extended the principles of part reuse and standardization to BioBrick vectors. As a result, myriad new BioBrick vectors can be readily produced from all existing and newly designed BioBrick parts. We invite the synthetic biology community to (1) use the process to make and share new BioBrick vectors; (2) expand the current collection of BioBrick vector parts; and (3) characterize and improve the available collection of BioBrick vector parts.</description>
    <dc:title>Engineering BioBrick vectors from BioBrick parts</dc:title>

    <dc:creator>Reshma Shetty</dc:creator>
    <dc:creator>Drew Endy</dc:creator>
    <dc:creator>Thomas Knight</dc:creator>
    <dc:source>Journal of Biological Engineering, Vol. 2, No. 5. (14 April 2008)</dc:source>
    <dc:date>2008-05-06T04:31:21-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Journal of Biological Engineering</prism:publicationName>
    <prism:volume>2</prism:volume>
    <prism:number>5</prism:number>
    <prism:category>biobrick</prism:category>
    <prism:category>transformation</prism:category>
    <prism:category>vector</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/962574">
    <title>On the Rate of Diatom Growth</title>
    <link>http://www.citeulike.org/user/analogAI/article/962574</link>
    <description>&lt;i&gt;Journal of the Marine Biological Association of the United Kingdom, Vol. 19, No. 1. (1933), pp. 253-276.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Diatom growth increase from sterilized soil extract promotes diatom growth (possible containing some growth promoting factor), added to sea water with ample phosphate and nitrate.</description>
    <dc:title>On the Rate of Diatom Growth</dc:title>

    <dc:creator>HW Harvey</dc:creator>
    <dc:source>Journal of the Marine Biological Association of the United Kingdom, Vol. 19, No. 1. (1933), pp. 253-276.</dc:source>
    <dc:date>2006-11-26T23:27:04-00:00</dc:date>
    <prism:publicationYear>1933</prism:publicationYear>
    <prism:publicationName>Journal of the Marine Biological Association of the United Kingdom</prism:publicationName>
    <prism:volume>19</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>253</prism:startingPage>
    <prism:endingPage>276</prism:endingPage>
    <prism:category>clostorium_nitzschia</prism:category>
    <prism:category>diatom</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/928070">
    <title>CO2 fixation and ethanol production with microalgal photosynthesis and intracellular anaerobic fermentation</title>
    <link>http://www.citeulike.org/user/analogAI/article/928070</link>
    <description>&lt;i&gt;pp. 137-142.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Microalgae were screened from seawater. More than 250 strains were isolated, and some of the isolated strains and two strains from culture collections were tested to examine ethanol productivity. Some strains had high growth rate of 20-30 g dry biomass/m2/day and high starch content of more than 20% (dry base). A strain Chlorella vulgaris (IAM C-534) had a high starch content of 37%. Starch was extracted from the cells of the Chlorella, saccharified and fermented with yeasts; 65% of the ethanol-conversion rate was obtained as compared to the theoretical rate from starch. The algal starch proved to be a good source for ethanol production using the conventional process. As an example of another type of ethanol production process, intracellular starch fermentation under dark and anaerobic conditions was examined. All of the tested strains showed intracellular starch degradation and ethanol production, but the levels of ethanol production were significantly different from each other. Higher ethanol productions were obtained with Chlamydomonas reinhardtii (UTEX2247) and Sak-1 isolated from seawater. These showed a maximum ethanol concentration of 1 (w/w)%. The characteristics of intracellular ethanol production were examined with the Chlamydomonas. These results indicate that intracellular ethanol production is simpler and less energy intensive than the conventional ethanol-fermentation process.</description>
    <dc:title>CO2 fixation and ethanol production with microalgal photosynthesis and intracellular anaerobic fermentation</dc:title>

    <dc:creator>A Hirano</dc:creator>
    <dc:source>pp. 137-142.</dc:source>
    <dc:date>2006-11-04T07:52:34-00:00</dc:date>
    <prism:startingPage>137</prism:startingPage>
    <prism:endingPage>142</prism:endingPage>
    <prism:category>chlamydomonas_reinhardtii</prism:category>
    <prism:category>chlorella_vulgaris</prism:category>
    <prism:category>ethanol</prism:category>
    <prism:category>green_algae</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/812979">
    <title>Specificity grafting of human antibody frameworks selected from a phage display library: generation of a highly stable humanized anti-CD22 single-chain Fv fragment</title>
    <link>http://www.citeulike.org/user/analogAI/article/812979</link>
    <description>&lt;i&gt;Protein Eng., Vol. 16, No. 10. (1 October 2003), pp. 753-759.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;A prerequisite for the enrichment of antibodies screened from phage display libraries is their stable expression on a phage during multiple selection rounds. Thus, if stringent panning procedures are employed, selection is simultaneously driven by antigen affinity, stability and solubility. To take advantage of robust pre-selected scaffolds of such molecules, we grafted single-chain Fv (scFv) antibodies, previously isolated from a human phage display library after multiple rounds of in vitro panning on tumor cells, with the specificity of the clinically established murine monoclonal anti-CD22 antibody RFB4. We show that a panel of grafted scFvs retained the specificity of the murine monoclonal antibody, bound to the target antigen with high affinity (6.4-9.6 nM), and exhibited exceptional biophysical stability with retention of 89-93% of the initial binding activity after 6 days of incubation in human serum at 37degreesC. Selection of stable human scaffolds with high sequence identity to both the human germline and the rodent frameworks required only a small number of murine residues to be retained within the human frameworks in order to maintain the structural integrity of the antigen binding site. We expect this approach may be applicable for the rapid generation of highly stable humanized antibodies with low immunogenic potential. 10.1093/protein/gzg096</description>
    <dc:title>Specificity grafting of human antibody frameworks selected from a phage display library: generation of a highly stable humanized anti-CD22 single-chain Fv fragment</dc:title>

    <dc:creator>Jurgen Krauss</dc:creator>
    <dc:creator>Michaela Arndt</dc:creator>
    <dc:creator>Andrew Martin</dc:creator>
    <dc:creator>Huaitian Liu</dc:creator>
    <dc:creator>Susanna Rybak</dc:creator>
    <dc:source>Protein Eng., Vol. 16, No. 10. (1 October 2003), pp. 753-759.</dc:source>
    <dc:date>2006-08-22T19:36:33-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:publicationName>Protein Eng.</prism:publicationName>
    <prism:volume>16</prism:volume>
    <prism:number>10</prism:number>
    <prism:startingPage>753</prism:startingPage>
    <prism:endingPage>759</prism:endingPage>
    <prism:category>protein-engineering</prism:category>
    <prism:category>protein-stability</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/812975">
    <title>Identification of differences in the specificity-determining residues of antibodies that recognize antigens of different size: implications for the rational design of antibody repertoires.</title>
    <link>http://www.citeulike.org/user/analogAI/article/812975</link>
    <description>&lt;i&gt;J Mol Recognit, Vol. 17, No. 2. (r 2004), pp. 132-143.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Studies of antibodies of known three-dimensional structure have revealed that insertion and deletion of amino acids at the hypervariable loops change the canonical structures, thus generating differences in the antigen-binding site topography. Such differences determine the size of the antigen with which the antibody interacts. Here, 59 unique antibodies determined at a resolution of 3.0 A or below, including 19 in complex with proteins, 18 with peptides and 22 with haptens, were analyzed to identify and characterize differences in the residues that are directly involved in the interaction with antigen, so-called specificity-determining residues (SDRs). It was found that antibodies use a similar number of SDRs to recognize proteins and peptides but contact haptens with five SDRs less. By using a score of SDR usage, differences in the location of the SDRs, depending on the type of antigen recognized, were then identified with precision. An analysis of the surface generated by the SDRs usage indicates that the differences found correlate well with the size of the antigen. Anti-protein antibodies have the largest SDR surface, with SDRs of high usage located in the edge of the surface. The SDR surface of anti-hapten antibodies is the smallest, with hot spots of contacts in the interior of the binding surface and buried in the V(L):V(H) interface. The SDR surface of anti-peptide antibodies has a size in between anti-protein and anti-hapten antibodies, with the SDRs of high usage located in the interior of the antigen-binding site but do not buried as in anti-hapten antibodies. These findings led to a fine-tuning of the model correlating differences in the antigen-binding site topography with its preference to recognize antigens of different size. Therefore, it is discussed how this knowledge should help to design antibody repertoires biased toward the recognition of antigens of predefined size.</description>
    <dc:title>Identification of differences in the specificity-determining residues of antibodies that recognize antigens of different size: implications for the rational design of antibody repertoires.</dc:title>

    <dc:creator>JC Almagro</dc:creator>
    <dc:identifier>doi:10.1002/jmr.659</dc:identifier>
    <dc:source>J Mol Recognit, Vol. 17, No. 2. (r 2004), pp. 132-143.</dc:source>
    <dc:date>2006-08-22T19:29:13-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>J Mol Recognit</prism:publicationName>
    <prism:issn>0952-3499</prism:issn>
    <prism:volume>17</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>132</prism:startingPage>
    <prism:endingPage>143</prism:endingPage>
    <prism:category>immunoglobulin</prism:category>
    <prism:category>proteinrecognition</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/526137">
    <title>Inaugural Article: Finding important sites in protein sequences</title>
    <link>http://www.citeulike.org/user/analogAI/article/526137</link>
    <description>&lt;i&gt;PNAS, Vol. 99, No. 23. (12 November 2002), pp. 14764-14771.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;By using sequence information from an aligned protein family, a procedure is exhibited for finding sites that may be functionally or structurally critical to the protein. Features based on sequence conservation within subfamilies in the alignment and associations between sites are used to select the sites. The sites are subject to statistical evaluation correcting for phylogenetic bias in the collection of sequences. This method is applied to two families: the phycobiliproteins, light-harvesting proteins in cyanobacteria, red algae, and cryptomonads, and the globins that function in oxygen storage and transport. The sites identified by the procedure are located in key structural positions and merit further experimental study.</description>
    <dc:title>Inaugural Article: Finding important sites in protein sequences</dc:title>

    <dc:creator>Peter Bickel</dc:creator>
    <dc:creator>Katherina Kechris</dc:creator>
    <dc:creator>Philip Spector</dc:creator>
    <dc:creator>Gary Wedemayer</dc:creator>
    <dc:creator>Alexander Glazer</dc:creator>
    <dc:identifier>doi:10.1073/pnas.222508899</dc:identifier>
    <dc:source>PNAS, Vol. 99, No. 23. (12 November 2002), pp. 14764-14771.</dc:source>
    <dc:date>2006-03-02T02:17:57-00:00</dc:date>
    <prism:publicationYear>2002</prism:publicationYear>
    <prism:publicationName>PNAS</prism:publicationName>
    <prism:volume>99</prism:volume>
    <prism:number>23</prism:number>
    <prism:startingPage>14764</prism:startingPage>
    <prism:endingPage>14771</prism:endingPage>
    <prism:category>informationtheory</prism:category>
    <prism:category>residue-covariation</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/785236">
    <title>Covariation of mutations in the V3 loop of human immunodeficiency virus type 1 envelope protein: an information theoretic analysis.</title>
    <link>http://www.citeulike.org/user/analogAI/article/785236</link>
    <description>&lt;i&gt;Proc Natl Acad Sci U S A, Vol. 90, No. 15. (1 August 1993), pp. 7176-7180.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The V3 loop of the human immunodeficiency virus type 1 (HIV-1) envelope protein is a highly variable region that is both functionally and immunologically important. Using available amino acid sequences from the V3 region, we have used an information theoretic quantity called mutual information, a measure of covariation, to quantify dependence between mutations in the loop. Certain pairs of sites, including non-contiguous sites along the sequence, do not have independent mutations but display considerable, statistically significant, covarying mutations as measured by mutual information. For the pairs of sites with the highest mutual information, specific amino acids were identified that were highly predictive of amino acids in the linked site. The observed interdependence between variable sites may have implications for structural or functional relationships; separate experimental evidence indicates functional linkage between some of the pairs of sites with high mutual information. Further specific mutational studies of the V3 loop's role in determining viral phenotype are suggested by our analyses. Also, the implications of our results may be important to consider for V3 peptide vaccine design. The methods used here are generally applicable to the study of variable proteins.</description>
    <dc:title>Covariation of mutations in the V3 loop of human immunodeficiency virus type 1 envelope protein: an information theoretic analysis.</dc:title>

    <dc:creator>BT Korber</dc:creator>
    <dc:creator>RM Farber</dc:creator>
    <dc:creator>DH Wolpert</dc:creator>
    <dc:creator>AS Lapedes</dc:creator>
    <dc:source>Proc Natl Acad Sci U S A, Vol. 90, No. 15. (1 August 1993), pp. 7176-7180.</dc:source>
    <dc:date>2006-08-04T01:34:19-00:00</dc:date>
    <prism:publicationYear>1993</prism:publicationYear>
    <prism:publicationName>Proc Natl Acad Sci U S A</prism:publicationName>
    <prism:issn>0027-8424</prism:issn>
    <prism:volume>90</prism:volume>
    <prism:number>15</prism:number>
    <prism:startingPage>7176</prism:startingPage>
    <prism:endingPage>7180</prism:endingPage>
    <prism:category>informationtheory</prism:category>
    <prism:category>residue-covariation</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/785235">
    <title>Covariability of V3 loop amino acids.</title>
    <link>http://www.citeulike.org/user/analogAI/article/785235</link>
    <description>&lt;i&gt;AIDS Res Hum Retroviruses, Vol. 12, No. 15. (10 October 1996), pp. 1401-1411.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We reanalyzed for covariability a set of 308 human immunodeficiency virus type 1 (HIV-1) V3 loop amino acid sequences from the B envelope sequence subtype previously analyzed by Korber et al.,1 as well as a new set of 440 sequences that also included substantial numbers of sequences from subtypes A, D, and E. We used the measure employed by Korber et al., essentially the likelihood ratio statistic for independence, plus two additional measures as well as clade information to examine the new set and both data sets simultaneously. We set forth the following conclusions and observations. The eight most highly connected sites identified through these statistical approaches included all of the six residues previously shown to have determining roles in structure, immunologic recognition, virus phenotype, and host range; each of the seven pairs of covariant sites found by Korber were signaled by our additional two measures in the set of 308 sequences, although 2 or 3 dropped out of the examination of the set of 440 when the requirement of stringent significance was applied for some or all of the three tests, respectively; using the same criteria, a total of 20 (including 5 Korber et al. pairs) or a total of 6 (including 4 Korber et al. pairs) were found when the set of 440 was added. Several limitations to statistical analysis of this type of HIV sequence data were also noted. For example, the data sets were, by historical necessity, collected haphazardly. For example, it was not possible to separate substantially sized groups out according to time of or since infection, disease status, antiviral treatment, geography, etc. There was also an enormous &#34;wealth of significance&#34; within the data. For example, for one measure the 440 data set showed 233 of the 465 pairs of sites with a likelihood ratio statistic of &#60; 0.001. Last, most sites had consensus amino acids in 80% or more of the sequences; hence, there was an absence of data on many combinations of amino acids. Given the observed linkage between sites shown to be covariable and those known to have critical biological function, the statistical approaches we and Korber et al. have outlined may find use in predicting critical structural features of HIV proteins as targets for therapeutic intervention.</description>
    <dc:title>Covariability of V3 loop amino acids.</dc:title>

    <dc:creator>PJ Bickel</dc:creator>
    <dc:creator>PC Cosman</dc:creator>
    <dc:creator>RA Olshen</dc:creator>
    <dc:creator>PC Spector</dc:creator>
    <dc:creator>AG Rodrigo</dc:creator>
    <dc:creator>JI Mullins</dc:creator>
    <dc:source>AIDS Res Hum Retroviruses, Vol. 12, No. 15. (10 October 1996), pp. 1401-1411.</dc:source>
    <dc:date>2006-08-04T01:32:59-00:00</dc:date>
    <prism:publicationYear>1996</prism:publicationYear>
    <prism:publicationName>AIDS Res Hum Retroviruses</prism:publicationName>
    <prism:issn>0889-2229</prism:issn>
    <prism:volume>12</prism:volume>
    <prism:number>15</prism:number>
    <prism:startingPage>1401</prism:startingPage>
    <prism:endingPage>1411</prism:endingPage>
    <prism:category>informationtheory</prism:category>
    <prism:category>residue-covariation</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/471650">
    <title>Finding the fittest fold: using the evolutionary record to design new proteins.</title>
    <link>http://www.citeulike.org/user/analogAI/article/471650</link>
    <description>&lt;i&gt;Cell, Vol. 122, No. 6. (23 September 2005), pp. 832-834.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;For many years, the holy grail of protein engineering has been the design of artificial amino acid sequences that fold into stable proteins with desired functions. In the current issue of Nature, two papers from the Ranganathan group (Russ et al., 2005; Socolich et al., 2005) report remarkable success in the design of artificial WW domains. Their method, termed statistical coupling analysis (Lockless and Ranganathan, 1999), does not use structural or physicochemical information but instead extracts information about essential patterns of amino acids from the evolutionary record.</description>
    <dc:title>Finding the fittest fold: using the evolutionary record to design new proteins.</dc:title>

    <dc:creator>RG Smock</dc:creator>
    <dc:creator>LM Gierasch</dc:creator>
    <dc:identifier>doi:10.1016/j.cell.2005.09.005</dc:identifier>
    <dc:source>Cell, Vol. 122, No. 6. (23 September 2005), pp. 832-834.</dc:source>
    <dc:date>2006-01-19T19:48:53-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:publicationName>Cell</prism:publicationName>
    <prism:issn>0092-8674</prism:issn>
    <prism:volume>122</prism:volume>
    <prism:number>6</prism:number>
    <prism:startingPage>832</prism:startingPage>
    <prism:endingPage>834</prism:endingPage>
    <prism:category>caig</prism:category>
    <prism:category>proteinfolding</prism:category>
    <prism:category>protein-structure</prism:category>
    <prism:category>residue-covariation</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/638322">
    <title>Gaps in structurally similar proteins: towards improvement of multiple sequence alignment.</title>
    <link>http://www.citeulike.org/user/analogAI/article/638322</link>
    <description>&lt;i&gt;Proteins, Vol. 54, No. 1. (1 January 2004), pp. 71-87.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;An algorithm was developed to locally optimize gaps from the FSSP database. Over 2 million gaps were identified from all versus all FSSP structure comparisons, and datasets of non-identical gaps and flanking regions comprising between 90,000 and 135,000 sequence fragments were extracted for statistical analysis. Relative to background frequencies, gaps were enriched in residue types with small side chains and high turn propensity (D, G, N, P, S), and were depleted in residue types with hydrophobic side chains (C, F, I, L, V, W, Y). In contrast, regions flanking a gap exhibited opposite trends in amino acid frequencies, i.e., enrichment in hydrophobic residues and a high degree of secondary structure. Log-odds scores of residue type as a function of position in or around a gap were derived from the statistics. Three simple experiments demonstrated that these scores contained significant predictive information. First, regions where gaps were observed in single sequences taken from HOMSTRAD structure-based multiple sequence alignments generally scored higher than regions where gaps were not observed. Second, given the correct pairwise-aligned cores, the actual positions of gaps could be reproduced from sequence more accurately using the structurally-derived statistics than by using random pairwise alignments. Finally, revision of the Clustal-W residue-specific gap opening parameters with this new information improved the agreement of Clustal-W alignments with the structure-based alignments. At least three applications for these results are envisioned: improvement of gap penalties in pairwise (or multiple) sequence alignment, prediction of regions of single sequences likely (or unlikely) to contain indels, and more accurate placement of gaps in automated pairwise structure alignment.</description>
    <dc:title>Gaps in structurally similar proteins: towards improvement of multiple sequence alignment.</dc:title>

    <dc:creator>JO Wrabl</dc:creator>
    <dc:creator>NV Grishin</dc:creator>
    <dc:identifier>doi:10.1002/prot.10508</dc:identifier>
    <dc:source>Proteins, Vol. 54, No. 1. (1 January 2004), pp. 71-87.</dc:source>
    <dc:date>2006-05-17T23:48:53-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>Proteins</prism:publicationName>
    <prism:issn>1097-0134</prism:issn>
    <prism:volume>54</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>71</prism:startingPage>
    <prism:endingPage>87</prism:endingPage>
    <prism:category>protein-sequence</prism:category>
    <prism:category>sequencealignment</prism:category>
    <prism:category>structuralalignment</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/682584">
    <title>Stability improvement of antibodies for extracellular and intracellular applications: CDR grafting to stable frameworks and structure-based framework engineering.</title>
    <link>http://www.citeulike.org/user/analogAI/article/682584</link>
    <description>&lt;i&gt;Methods, Vol. 34, No. 2. (October 2004), pp. 184-199.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;By combining the knowledge gained from an analysis of the biophysical properties of natural antibody variable domains, the effects of mutations obtained in directed evolution experiments, and the detailed structural comparison of antibodies, it has now become possible to engineer antibodies for higher thermodynamic stability and more efficient folding. This is particularly important when antibodies are to be used under conditions where the disulfide bonds cannot form, i.e., in intracellular applications (as &#34;intrabodies&#34;). We describe in detail two methods for the knowledge-based improvement of antibody stability and folding efficiency. While CDR grafting from a non-human to the most closely related human antibody framework is an established technique to reduce the immunogenicity of a therapeutic antibody, CDR grafting for stabilization implies the use of a more distantly related acceptor framework with superior biophysical characteristics. The use of such dissimilar frameworks requires particular attention to antigen contact residues outside the classical CDR definition and to residues capable of indirectly affecting the conformation of the antigen binding site. As a second alternative, the stability of a suboptimal framework can be improved by the introduction of point mutations designed to optimize key residue interactions. We describe the analysis methods used to identify such point mutations, which can be introduced all at once, while maintaining the framework features necessary for antigen binding. These rational approaches render the continued &#34;rediscovery&#34; of certain mutations by directed evolution unnecessary, but they can also be used in conjunction with such methods to discover even better molecules.</description>
    <dc:title>Stability improvement of antibodies for extracellular and intracellular applications: CDR grafting to stable frameworks and structure-based framework engineering.</dc:title>

    <dc:creator>S Ewert</dc:creator>
    <dc:creator>A Honegger</dc:creator>
    <dc:creator>A Plückthun</dc:creator>
    <dc:identifier>doi:10.1016/j.ymeth.2004.04.007</dc:identifier>
    <dc:source>Methods, Vol. 34, No. 2. (October 2004), pp. 184-199.</dc:source>
    <dc:date>2006-06-03T06:02:47-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>Methods</prism:publicationName>
    <prism:issn>1046-2023</prism:issn>
    <prism:volume>34</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>184</prism:startingPage>
    <prism:endingPage>199</prism:endingPage>
    <prism:category>cdr</prism:category>
    <prism:category>immunoglobulin</prism:category>
    <prism:category>protein-engineering</prism:category>
    <prism:category>protein-stability</prism:category>
    <prism:category>protein-structure</prism:category>
    <prism:category>residue-covariation</prism:category>
    <prism:category>review-article</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/342964">
    <title>Engineering novel binding proteins from nonimmunoglobulin domains</title>
    <link>http://www.citeulike.org/user/analogAI/article/342964</link>
    <description>&lt;i&gt;Nature Biotechnology, Vol. 23, No. 10. (06 October 2005), pp. 1257-1268.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Not all adaptive immune systems use the immunoglobulin fold as the basis for specific recognition molecules: sea lampreys, for example, have evolved an adaptive immune system that is based on leucine-rich repeat proteins. Additionally, many other proteins, not necessarily involved in adaptive immunity, mediate specific high-affinity interactions. Such alternatives to immunoglobulins represent attractive starting points for the design of novel binding molecules for research and clinical applications. Indeed, through progress and increased experience in library design and selection technologies, gained not least from working with synthetic antibody libraries, researchers have now exploited many of these novel scaffolds as tailor-made affinity reagents. Significant progress has been made not only in the basic science of generating specific binding molecules, but also in applications of the selected binders in laboratory procedures, proteomics, diagnostics and therapy. Challenges ahead include identifying applications where these novel proteins can not only be an alternative, but can enable approaches so far deemed technically impossible, and delineate those therapeutic applications commensurate with the molecular properties of the respective proteins.</description>
    <dc:title>Engineering novel binding proteins from nonimmunoglobulin domains</dc:title>

    <dc:creator>Kaspar Binz</dc:creator>
    <dc:creator>Patrick Amstutz</dc:creator>
    <dc:creator>Andreas Plückthun</dc:creator>
    <dc:identifier>doi:10.1038/nbt1127</dc:identifier>
    <dc:source>Nature Biotechnology, Vol. 23, No. 10. (06 October 2005), pp. 1257-1268.</dc:source>
    <dc:date>2005-10-06T22:55:30-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:publicationName>Nature Biotechnology</prism:publicationName>
    <prism:issn>1087-0156</prism:issn>
    <prism:volume>23</prism:volume>
    <prism:number>10</prism:number>
    <prism:startingPage>1257</prism:startingPage>
    <prism:endingPage>1268</prism:endingPage>
    <prism:publisher>Nature Publishing Group</prism:publisher>
    <prism:category>immunoglobulin</prism:category>
    <prism:category>protein-engineering</prism:category>
    <prism:category>proteinrecognition</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/615468">
    <title>Protein secondary structure: entropy, correlations and prediction.</title>
    <link>http://www.citeulike.org/user/analogAI/article/615468</link>
    <description>&lt;i&gt;Bioinformatics, Vol. 20, No. 10. (10 July 2004), pp. 1603-1611.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;MOTIVATION: Is protein secondary structure primarily determined by local interactions between residues closely spaced along the amino acid backbone or by non-local tertiary interactions? To answer this question, we measure the entropy densities of primary and secondary structure sequences, and the local inter-sequence mutual information density. RESULTS: We find that the important inter-sequence interactions are short ranged, that correlations between neighboring amino acids are essentially uninformative and that only one-fourth of the total information needed to determine the secondary structure is available from local inter-sequence correlations. These observations support the view that the majority of most proteins fold via a cooperative process where secondary and tertiary structure form concurrently. Moreover, existing single-sequence secondary structure prediction algorithms are almost optimal, and we should not expect a dramatic improvement in prediction accuracy. AVAILABILITY: Both the data sets and analysis code are freely available from our Web site at http://compbio.berkeley.edu/</description>
    <dc:title>Protein secondary structure: entropy, correlations and prediction.</dc:title>

    <dc:creator>GE Crooks</dc:creator>
    <dc:creator>SE Brenner</dc:creator>
    <dc:source>Bioinformatics, Vol. 20, No. 10. (10 July 2004), pp. 1603-1611.</dc:source>
    <dc:date>2006-05-05T19:51:39-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>Bioinformatics</prism:publicationName>
    <prism:issn>1367-4803</prism:issn>
    <prism:volume>20</prism:volume>
    <prism:number>10</prism:number>
    <prism:startingPage>1603</prism:startingPage>
    <prism:endingPage>1611</prism:endingPage>
    <prism:category>informationtheory</prism:category>
    <prism:category>protein-structure</prism:category>
    <prism:category>residue-covariation</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/613992">
    <title>Improving contact predictions by the combination of correlated mutations and other sources of sequence information.</title>
    <link>http://www.citeulike.org/user/analogAI/article/613992</link>
    <description>&lt;i&gt;Fold Des, Vol. 2, No. 3. (1997)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We have previously developed a method for predicting interresidue contacts using information about correlated mutations in multiple sequence alignments. The predictions generated with this method were clearly better than random but not enough for their use in de novo protein folding experiments. We assess the possibility of improving contact predictions combining information from the following variables: correlated mutations, sequence conservation, sequence separation along the chain, alignment stability, family size, residue-specific contact occupancy and formation of contact networks. The application of a protocol for combining these independent variables leads to contact predictions that are on average two times better than those obtained initially with correlated mutations. Correlated mutations can be effectively combined with other types of information derived from multiple sequence alignments. Among the different variables tried, sequence conservation and contact density are particularly relevant for the combination with correlated mutations.</description>
    <dc:title>Improving contact predictions by the combination of correlated mutations and other sources of sequence information.</dc:title>

    <dc:creator>O Olmea</dc:creator>
    <dc:creator>A Valencia</dc:creator>
    <dc:source>Fold Des, Vol. 2, No. 3. (1997)</dc:source>
    <dc:date>2006-05-05T04:26:26-00:00</dc:date>
    <prism:publicationYear>1997</prism:publicationYear>
    <prism:publicationName>Fold Des</prism:publicationName>
    <prism:issn>1359-0278</prism:issn>
    <prism:volume>2</prism:volume>
    <prism:number>3</prism:number>
    <prism:category>protein-structure</prism:category>
    <prism:category>residue-covariation</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/613991">
    <title>A graphical interface for correlated mutations and other protein structure prediction methods.</title>
    <link>http://www.citeulike.org/user/analogAI/article/613991</link>
    <description>&lt;i&gt;Comput Appl Biosci, Vol. 13, No. 3. (June 1997), pp. 319-321.&lt;/i&gt;</description>
    <dc:title>A graphical interface for correlated mutations and other protein structure prediction methods.</dc:title>

    <dc:creator>F Pazos</dc:creator>
    <dc:creator>O Olmea</dc:creator>
    <dc:creator>A Valencia</dc:creator>
    <dc:source>Comput Appl Biosci, Vol. 13, No. 3. (June 1997), pp. 319-321.</dc:source>
    <dc:date>2006-05-05T04:15:32-00:00</dc:date>
    <prism:publicationYear>1997</prism:publicationYear>
    <prism:publicationName>Comput Appl Biosci</prism:publicationName>
    <prism:issn>0266-7061</prism:issn>
    <prism:volume>13</prism:volume>
    <prism:number>3</prism:number>
    <prism:startingPage>319</prism:startingPage>
    <prism:endingPage>321</prism:endingPage>
    <prism:category>protein-structure</prism:category>
    <prism:category>residue-covariation</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/613990">
    <title>Correlated mutations and residue contacts in proteins.</title>
    <link>http://www.citeulike.org/user/analogAI/article/613990</link>
    <description>&lt;i&gt;Proteins, Vol. 18, No. 4. (April 1994), pp. 309-317.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The maintenance of protein function and structure constrains the evolution of amino acid sequences. This fact can be exploited to interpret correlated mutations observed in a sequence family as an indication of probable physical contact in three dimensions. Here we present a simple and general method to analyze correlations in mutational behavior between different positions in a multiple sequence alignment. We then use these correlations to predict contact maps for each of 11 protein families and compare the result with the contacts determined by crystallography. For the most strongly correlated residue pairs predicted to be in contact, the prediction accuracy ranges from 37 to 68% and the improvement ratio relative to a random prediction from 1.4 to 5.1. Predicted contact maps can be used as input for the calculation of protein tertiary structure, either from sequence information alone or in combination with experimental information.</description>
    <dc:title>Correlated mutations and residue contacts in proteins.</dc:title>

    <dc:creator>U Göbel</dc:creator>
    <dc:creator>C Sander</dc:creator>
    <dc:creator>R Schneider</dc:creator>
    <dc:creator>A Valencia</dc:creator>
    <dc:identifier>doi:10.1002/prot.340180402</dc:identifier>
    <dc:source>Proteins, Vol. 18, No. 4. (April 1994), pp. 309-317.</dc:source>
    <dc:date>2006-05-05T03:58:37-00:00</dc:date>
    <prism:publicationYear>1994</prism:publicationYear>
    <prism:publicationName>Proteins</prism:publicationName>
    <prism:issn>0887-3585</prism:issn>
    <prism:volume>18</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>309</prism:startingPage>
    <prism:endingPage>317</prism:endingPage>
    <prism:category>protein-sequence</prism:category>
    <prism:category>residue-covariation</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/613989">
    <title>Native protein sequences are close to optimal for their structures.</title>
    <link>http://www.citeulike.org/user/analogAI/article/613989</link>
    <description>&lt;i&gt;Proc Natl Acad Sci U S A, Vol. 97, No. 19. (12 September 2000), pp. 10383-10388.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;How large is the volume of sequence space that is compatible with a given protein structure? Starting from random sequences, low free energy sequences were generated for 108 protein backbone structures by using a Monte Carlo optimization procedure and a free energy function based primarily on Lennard-Jones packing interactions and the Lazaridis-Karplus implicit solvation model. Remarkably, in the designed sequences 51% of the core residues and 27% of all residues were identical to the amino acids in the corresponding positions in the native sequences. The lowest free energy sequences obtained for ensembles of native-like backbone structures were also similar to the native sequence. Furthermore, both the individual residue frequencies and the covariances between pairs of positions observed in the very large SH3 domain family were recapitulated in core sequences designed for SH3 domain structures. Taken together, these results suggest that the volume of sequence space optimal for a protein structure is surprisingly restricted to a region around the native sequence.</description>
    <dc:title>Native protein sequences are close to optimal for their structures.</dc:title>

    <dc:creator>B Kuhlman</dc:creator>
    <dc:creator>D Baker</dc:creator>
    <dc:identifier>doi:10.1073/pnas.97.19.10383</dc:identifier>
    <dc:source>Proc Natl Acad Sci U S A, Vol. 97, No. 19. (12 September 2000), pp. 10383-10388.</dc:source>
    <dc:date>2006-05-05T03:48:44-00:00</dc:date>
    <prism:publicationYear>2000</prism:publicationYear>
    <prism:publicationName>Proc Natl Acad Sci U S A</prism:publicationName>
    <prism:issn>0027-8424</prism:issn>
    <prism:volume>97</prism:volume>
    <prism:number>19</prism:number>
    <prism:startingPage>10383</prism:startingPage>
    <prism:endingPage>10388</prism:endingPage>
    <prism:category>protein-structure</prism:category>
    <prism:category>residue-covariation</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/382660">
    <title>Stabilization centers in proteins: identification, characterization and predictions.</title>
    <link>http://www.citeulike.org/user/analogAI/article/382660</link>
    <description>&lt;i&gt;J Mol Biol, Vol. 272, No. 4. (3 October 1997), pp. 597-612.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Methods are presented to locate residues, stabilization center elements, which are expected to stabilize protein structures by preventing their decay with their cooperative long range interactions. Artificial neural network-based algorithms were developed to predict these residues from the primary structure of single proteins and from the amino acid sequences of homologous proteins. The prediction accuracy using only single sequence information is 65%, but the incorporation of evolutionary information in the form of multiple alignments and conservation scores raises the efficiency by 3%. The composition, relative accessibility, number and type of interactions, conservation and the X-ray thermal factor of the identified stabilization center residues are different, not only from the whole data set but from the rest of the long range interacting residues as well. The most frequent stabilization center residues are usually found at buried positions and have a hydrophobic or aromatic side-chain, but some polar or charged residues also play an important role in the stabilization. The stabilization centers show significant difference in the composition and in the type of linked secondary structural elements compared with the rest of the residues. The performed structural and sequential conservation analysis showed the higher conservation of stabilization centers over protein families. The relation of the proposed stabilization centers to folding nuclei is also discussed.</description>
    <dc:title>Stabilization centers in proteins: identification, characterization and predictions.</dc:title>

    <dc:creator>Z Dosztányi</dc:creator>
    <dc:creator>A Fiser</dc:creator>
    <dc:creator>I Simon</dc:creator>
    <dc:identifier>doi:10.1006/jmbi.1997.1242 </dc:identifier>
    <dc:source>J Mol Biol, Vol. 272, No. 4. (3 October 1997), pp. 597-612.</dc:source>
    <dc:date>2005-11-07T09:48:17-00:00</dc:date>
    <prism:publicationYear>1997</prism:publicationYear>
    <prism:publicationName>J Mol Biol</prism:publicationName>
    <prism:issn>0022-2836</prism:issn>
    <prism:volume>272</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>597</prism:startingPage>
    <prism:endingPage>612</prism:endingPage>
    <prism:category>protein-stability</prism:category>
    <prism:category>protein-structure</prism:category>
    <prism:category>residue-covariation</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/3547">
    <title>Simulating protein evolution in sequence and structure space.</title>
    <link>http://www.citeulike.org/user/analogAI/article/3547</link>
    <description>&lt;i&gt;Curr Opin Struct Biol, Vol. 14, No. 2. (April 2004), pp. 202-207.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Naturally occurring proteins comprise a special subset of all plausible sequences and structures selected through evolution. Simulating protein evolution with simplified and all-atom models has shed light on the evolutionary dynamics of protein populations, the nature of evolved sequences and structures, and the extent to which today's proteins are shaped by selection pressures on folding, structure and function. Extensive mapping of the native structure, stability and folding rate in sequence space using lattice proteins has revealed organizational principles of the sequence/structure map important for evolutionary dynamics. Evolutionary simulations with lattice proteins have highlighted the importance of fitness landscapes, evolutionary mechanisms, population dynamics and sequence space entropy in shaping the generic properties of proteins. Finally, evolutionary-like simulations with all-atom models, in particular computational protein design, have helped identify the dominant selection pressures on naturally occurring protein sequences and structures.</description>
    <dc:title>Simulating protein evolution in sequence and structure space.</dc:title>

    <dc:creator>Y Xia</dc:creator>
    <dc:creator>M Levitt</dc:creator>
    <dc:identifier>doi:10.1016/j.sbi.2004.03.001</dc:identifier>
    <dc:source>Curr Opin Struct Biol, Vol. 14, No. 2. (April 2004), pp. 202-207.</dc:source>
    <dc:date>2004-12-13T22:52:45-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>Curr Opin Struct Biol</prism:publicationName>
    <prism:issn>0959-440X</prism:issn>
    <prism:volume>14</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>202</prism:startingPage>
    <prism:endingPage>207</prism:endingPage>
    <prism:category>computer-simulation</prism:category>
    <prism:category>proteinfolding</prism:category>
    <prism:category>protein-sequence</prism:category>
    <prism:category>protein-structure</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/445819">
    <title>The identification of conserved interactions within the SH3 domain by alignment of sequences and structures.</title>
    <link>http://www.citeulike.org/user/analogAI/article/445819</link>
    <description>&lt;i&gt;Protein Sci, Vol. 9, No. 11. (November 2000), pp. 2170-2180.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The SH3 domain, comprised of approximately 60 residues, is found within a wide variety of proteins, and is a mediator of protein-protein interactions. Due to the large number of SH3 domain sequences and structures in the databases, this domain provides one of the best available systems for the examination of sequence and structural conservation within a protein family. In this study, a large and diverse alignment of SH3 domain sequences was constructed, and the pattern of conservation within this alignment was compared to conserved structural features, as deduced from analysis of eighteen different SH3 domain structures. Seventeen SH3 domain structures solved in the presence of bound peptide were also examined to identify positions that are consistently most important in mediating the peptide-binding function of this domain. Although residues at the two most conserved positions in the alignment are directly involved in peptide binding, residues at most other conserved positions play structural roles, such as stabilizing turns or comprising the hydrophobic core. Surprisingly, several highly conserved side-chain to main-chain hydrogen bonds were observed in the functionally crucial RT-Src loop between residues with little direct involvement in peptide binding. These hydrogen bonds may be important for maintaining this region in the precise conformation necessary for specific peptide recognition. In addition, a previously unrecognized yet highly conserved beta-bulge was identified in the second beta-strand of the domain, which appears to provide a necessary kink in this strand, allowing it to hydrogen bond to both sheets comprising the fold.</description>
    <dc:title>The identification of conserved interactions within the SH3 domain by alignment of sequences and structures.</dc:title>

    <dc:creator>SM Larson</dc:creator>
    <dc:creator>AR Davidson</dc:creator>
    <dc:source>Protein Sci, Vol. 9, No. 11. (November 2000), pp. 2170-2180.</dc:source>
    <dc:date>2005-12-21T03:57:04-00:00</dc:date>
    <prism:publicationYear>2000</prism:publicationYear>
    <prism:publicationName>Protein Sci</prism:publicationName>
    <prism:issn>0961-8368</prism:issn>
    <prism:volume>9</prism:volume>
    <prism:number>11</prism:number>
    <prism:startingPage>2170</prism:startingPage>
    <prism:endingPage>2180</prism:endingPage>
    <prism:category>protein-structure</prism:category>
    <prism:category>residue-covariation</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/216611">
    <title>Evolutionarily conserved networks of residues mediate allosteric communication in proteins.</title>
    <link>http://www.citeulike.org/user/analogAI/article/216611</link>
    <description>&lt;i&gt;Nat Struct Biol, Vol. 10, No. 1. (January 2003), pp. 59-69.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;A fundamental goal in cellular signaling is to understand allosteric communication, the process by which signals originating at one site in a protein propagate reliably to affect distant functional sites. The general principles of protein structure that underlie this process remain unknown. Here, we describe a sequence-based statistical method for quantitatively mapping the global network of amino acid interactions in a protein. Application of this method for three structurally and functionally distinct protein families (G protein-coupled receptors, the chymotrypsin class of serine proteases and hemoglobins) reveals a surprisingly simple architecture for amino acid interactions in each protein family: a small subset of residues forms physically connected networks that link distant functional sites in the tertiary structure. Although small in number, residues comprising the network show excellent correlation with the large body of mechanistic data available for each family. The data suggest that evolutionarily conserved sparse networks of amino acid interactions represent structural motifs for allosteric communication in proteins.</description>
    <dc:title>Evolutionarily conserved networks of residues mediate allosteric communication in proteins.</dc:title>

    <dc:creator>GM Süel</dc:creator>
    <dc:creator>SW Lockless</dc:creator>
    <dc:creator>MA Wall</dc:creator>
    <dc:creator>R Ranganathan</dc:creator>
    <dc:identifier>doi:10.1038/nsb881</dc:identifier>
    <dc:source>Nat Struct Biol, Vol. 10, No. 1. (January 2003), pp. 59-69.</dc:source>
    <dc:date>2005-06-02T11:24:11-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:publicationName>Nat Struct Biol</prism:publicationName>
    <prism:issn>1072-8368</prism:issn>
    <prism:volume>10</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>59</prism:startingPage>
    <prism:endingPage>69</prism:endingPage>
    <prism:category>protein-structure</prism:category>
    <prism:category>residue-covariation</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/445816">
    <title>Analysis of covariation in an SH3 domain sequence alignment: applications in tertiary contact prediction and the design of compensating hydrophobic core substitutions.</title>
    <link>http://www.citeulike.org/user/analogAI/article/445816</link>
    <description>&lt;i&gt;J Mol Biol, Vol. 303, No. 3. (27 October 2000), pp. 433-446.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We have analyzed sequence covariation in an alignment of 266 non-redundant SH3 domain sequences using chi-squared statistical methods. Artifactual covariations arising from close evolutionary relationships among certain sequence subgroups were eliminated using empirically derived sequence diversity thresholds. This covariation detection method was able to predict residue-residue contacts (side-chain centres of mass within 8 A) in the structure of the SH3 domain with an accuracy of 85 %, which is greater than that achieved in many previous covariation studies. In examining the positions involved most frequently in covariations, we discovered a dramatic over-representation of a subset of five hydrophobic core positions. This covariation information was used to design second and third site substitutions that could compensate for highly destabilizing hydrophobic core substitutions in the Fyn SH3 domain, thus providing experimental data to validate the covariation analysis. The testing of our covariation detection method on 15 other alignments showed that the accuracy of contact prediction is highly variable depending on which sequence alignment is used, and useful levels of prediction accuracy were obtained with only approximately one-third of alignments. The results presented here provide insight into the difficulties inherent in covariation analysis, and suggest that it may have limited usefulness in tertiary structure prediction. On the other hand, our ability to use covariation analysis to design stabilizing combinations of hydrophobic core substitutions attests to its potential utility for gaining deeper insight into the stability determinants and functional mechanisms of proteins with known three-dimensional structures.</description>
    <dc:title>Analysis of covariation in an SH3 domain sequence alignment: applications in tertiary contact prediction and the design of compensating hydrophobic core substitutions.</dc:title>

    <dc:creator>SM Larson</dc:creator>
    <dc:creator>AA Di Nardo</dc:creator>
    <dc:creator>AR Davidson</dc:creator>
    <dc:identifier>doi:10.1006/jmbi.2000.4146</dc:identifier>
    <dc:source>J Mol Biol, Vol. 303, No. 3. (27 October 2000), pp. 433-446.</dc:source>
    <dc:date>2005-12-21T03:50:17-00:00</dc:date>
    <prism:publicationYear>2000</prism:publicationYear>
    <prism:publicationName>J Mol Biol</prism:publicationName>
    <prism:issn>0022-2836</prism:issn>
    <prism:volume>303</prism:volume>
    <prism:number>3</prism:number>
    <prism:startingPage>433</prism:startingPage>
    <prism:endingPage>446</prism:endingPage>
    <prism:category>protein-structure</prism:category>
    <prism:category>residue-covariation</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/332173">
    <title>Evolutionarily Conserved Pathways of Energetic Connectivity in Protein Families</title>
    <link>http://www.citeulike.org/user/analogAI/article/332173</link>
    <description>&lt;i&gt;Science, Vol. 286, No. 5438. (08 October 1999), pp. 295-299.&lt;/i&gt;</description>
    <dc:title>Evolutionarily Conserved Pathways of Energetic Connectivity in Protein Families</dc:title>

    <dc:creator>Steve Lockless</dc:creator>
    <dc:creator>Rama Ranganathan</dc:creator>
    <dc:identifier>doi:10.1126/science.286.5438.295</dc:identifier>
    <dc:source>Science, Vol. 286, No. 5438. (08 October 1999), pp. 295-299.</dc:source>
    <dc:date>2005-09-25T12:48:05-00:00</dc:date>
    <prism:publicationYear>1999</prism:publicationYear>
    <prism:publicationName>Science</prism:publicationName>
    <prism:volume>286</prism:volume>
    <prism:number>5438</prism:number>
    <prism:startingPage>295</prism:startingPage>
    <prism:endingPage>299</prism:endingPage>
    <prism:category>protein-structure</prism:category>
    <prism:category>residue-covariation</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/439298">
    <title>Information content of individual genetic sequences.</title>
    <link>http://www.citeulike.org/user/analogAI/article/439298</link>
    <description>&lt;i&gt;J Theor Biol, Vol. 189, No. 4. (21 December 1997), pp. 427-441.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Related genetic sequences having a common function can be described by Shannon's information measure and depicted graphically by a sequence logo. Though useful for many purposes, sequence logos only show the average sequence conservation, and inferring the conservation for individual sequences is difficult. This limitation is overcome by the individual information ( R i) technique described here. The method begins by generating a weight matrix from the frequencies of each nucleotide or amino acid at each position of the aligned sequences. This matrix is then applied to the sequences themselves to determine the sequence conservation of each individual sequence. The matrix is unique because the average of these assignments is the total sequence conservation, ad there is only one way to construct such a matrix. For binding sites on polynucleotides, the weight matrix has a natural cut off that distinguishes functional sequences from other sequences. R i values are on an absolute scale measured in bits of information so the conservation of different biological functions can be compared with one another. The matrix can be used to rank-order the sequences, to search for new sequences, to compare sequences to other quantitative data such as binding energy or distance between binding sites, to distinguish mutations from polymorphisms, to design sequences of a given strength, and to detect errors in databases. The R i method has been used to identify previously undescribed but experimentally verified DNA binding sites. The individual information distribution was determined for E. coli ribosome binding sites, bacterial Fis binding sites, and human donor and acceptor splice junctions, among others. The distributions demonstrate clearly that the consensus sequence is highly unusual, and hence is a poor method to describe naturally occurring binding sites.</description>
    <dc:title>Information content of individual genetic sequences.</dc:title>

    <dc:creator>TD Schneider</dc:creator>
    <dc:source>J Theor Biol, Vol. 189, No. 4. (21 December 1997), pp. 427-441.</dc:source>
    <dc:date>2005-12-16T03:41:40-00:00</dc:date>
    <prism:publicationYear>1997</prism:publicationYear>
    <prism:publicationName>J Theor Biol</prism:publicationName>
    <prism:issn>0022-5193</prism:issn>
    <prism:volume>189</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>427</prism:startingPage>
    <prism:endingPage>441</prism:endingPage>
    <prism:category>informationtheory</prism:category>
    <prism:category>sequencealignment</prism:category>
    <prism:category>statistics</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/439296">
    <title>Dirichlet mixtures: a method for improved detection of weak but significant protein sequence homology.</title>
    <link>http://www.citeulike.org/user/analogAI/article/439296</link>
    <description>&lt;i&gt;Comput Appl Biosci, Vol. 12, No. 4. (August 1996), pp. 327-345.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We present a method for condensing the information in multiple alignments of proteins into a mixture of Dirichlet densities over amino acid distributions. Dirichlet mixture densities are designed to be combined with observed amino acid frequencies to form estimates of expected amino acid probabilities at each position in a profile, hidden Markov model or other statistical model. These estimates give a statistical model greater generalization capacity, so that remotely related family members can be more reliably recognized by the model. This paper corrects the previously published formula for estimating these expected probabilities, and contains complete derivations of the Dirichlet mixture formulas, methods for optimizing the mixtures to match particular databases, and suggestions for efficient implementation.</description>
    <dc:title>Dirichlet mixtures: a method for improved detection of weak but significant protein sequence homology.</dc:title>

    <dc:creator>K Sjölander</dc:creator>
    <dc:creator>K Karplus</dc:creator>
    <dc:creator>M Brown</dc:creator>
    <dc:creator>R Hughey</dc:creator>
    <dc:creator>A Krogh</dc:creator>
    <dc:creator>IS Mian</dc:creator>
    <dc:creator>D Haussler</dc:creator>
    <dc:source>Comput Appl Biosci, Vol. 12, No. 4. (August 1996), pp. 327-345.</dc:source>
    <dc:date>2005-12-16T03:26:41-00:00</dc:date>
    <prism:publicationYear>1996</prism:publicationYear>
    <prism:publicationName>Comput Appl Biosci</prism:publicationName>
    <prism:issn>0266-7061</prism:issn>
    <prism:volume>12</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>327</prism:startingPage>
    <prism:endingPage>345</prism:endingPage>
    <prism:category>sequencealignment</prism:category>
    <prism:category>statistics</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/438271">
    <title>Position-based sequence weights.</title>
    <link>http://www.citeulike.org/user/analogAI/article/438271</link>
    <description>&lt;i&gt;J Mol Biol, Vol. 243, No. 4. (4 November 1994), pp. 574-578.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Sequence weighting methods have been used to reduce redundancy and emphasize diversity in multiple sequence alignment and searching applications. Each of these methods is based on a notion of distance between a sequence and an ancestral or generalized sequence. We describe a different approach, which bases weights on the diversity observed at each position in the alignment, rather than on a sequence distance measure. These position-based weights make minimal assumptions, are simple to compute, and perform well in comprehensive evaluations.</description>
    <dc:title>Position-based sequence weights.</dc:title>

    <dc:creator>S Henikoff</dc:creator>
    <dc:creator>JG Henikoff</dc:creator>
    <dc:source>J Mol Biol, Vol. 243, No. 4. (4 November 1994), pp. 574-578.</dc:source>
    <dc:date>2005-12-15T00:14:22-00:00</dc:date>
    <prism:publicationYear>1994</prism:publicationYear>
    <prism:publicationName>J Mol Biol</prism:publicationName>
    <prism:issn>0022-2836</prism:issn>
    <prism:volume>243</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>574</prism:startingPage>
    <prism:endingPage>578</prism:endingPage>
    <prism:category>sequencealignment</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/420500">
    <title>FUGUE: sequence-structure homology recognition using environment-specific substitution tables and structure-dependent gap penalties.</title>
    <link>http://www.citeulike.org/user/analogAI/article/420500</link>
    <description>&lt;i&gt;J Mol Biol, Vol. 310, No. 1. (29 June 2001), pp. 243-257.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;FUGUE, a program for recognizing distant homologues by sequence-structure comparison (http://www-cryst.bioc.cam.ac.uk/fugue/), has three key features. (1) Improved environment-specific substitution tables. Substitutions of an amino acid in a protein structure are constrained by its local structural environment, which can be defined in terms of secondary structure, solvent accessibility, and hydrogen bonding status. The environment-specific substitution tables have been derived from structural alignments in the HOMSTRAD database (http://www-cryst.bioc. cam.ac.uk/homstrad/). (2) Automatic selection of alignment algorithm with detailed structure-dependent gap penalties. FUGUE uses the global-local algorithm to align a sequence-structure pair when they greatly differ in length and uses the global algorithm in other cases. The gap penalty at each position of the structure is determined according to its solvent accessibility, its position relative to the secondary structure elements (SSEs) and the conservation of the SSEs. (3) Combined information from both multiple sequences and multiple structures. FUGUE is designed to align multiple sequences against multiple structures to enrich the conservation/variation information. We demonstrate that the combination of these three key features implemented in FUGUE improves both homology recognition performance and alignment accuracy.</description>
    <dc:title>FUGUE: sequence-structure homology recognition using environment-specific substitution tables and structure-dependent gap penalties.</dc:title>

    <dc:creator>J Shi</dc:creator>
    <dc:creator>TL Blundell</dc:creator>
    <dc:creator>K Mizuguchi</dc:creator>
    <dc:identifier>doi:10.1006/jmbi.2001.4762</dc:identifier>
    <dc:source>J Mol Biol, Vol. 310, No. 1. (29 June 2001), pp. 243-257.</dc:source>
    <dc:date>2005-12-03T00:20:20-00:00</dc:date>
    <prism:publicationYear>2001</prism:publicationYear>
    <prism:publicationName>J Mol Biol</prism:publicationName>
    <prism:issn>0022-2836</prism:issn>
    <prism:volume>310</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>243</prism:startingPage>
    <prism:endingPage>257</prism:endingPage>
    <prism:category>protein-structure</prism:category>
    <prism:category>sequencealignment</prism:category>
    <prism:category>structuralalignment</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/416744">
    <title>Multiple protein sequence alignment from tertiary structure comparison: assignment of global and residue confidence levels.</title>
    <link>http://www.citeulike.org/user/analogAI/article/416744</link>
    <description>&lt;i&gt;Proteins, Vol. 14, No. 2. (October 1992), pp. 309-323.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;An algorithm is presented for the accurate and rapid generation of multiple protein sequence alignments from tertiary structure comparisons. A preliminary multiple sequence alignment is performed using sequence information, which then determines an initial superposition of the structures. A structure comparison algorithm is applied to all pairs of proteins in the superimposed set and a similarity tree calculated. Multiple sequence alignments are then generated by following the tree from the branches to the root. At each branchpoint of the tree, a structure-based sequence alignment and coordinate transformations are output, with the multiple alignment of all structures output at the root. The algorithm encoded in STAMP (STructural Alignment of Multiple Proteins) is shown to give alignments in good agreement with published structural accounts within the dehydrogenase fold domains, globins, and serine proteinases. In order to reduce the need for visual verification, two similarity indices are introduced to determine the quality of each generated structural alignment. Sc quantifies the global structural similarity between pairs or groups of proteins, whereas Pij' provides a normalized measure of the confidence in the alignment of each residue. STAMP alignments have the quality of each alignment characterized by Sc and Pij' values and thus provide a reproducible resource for studies of residue conservation within structural motifs.</description>
    <dc:title>Multiple protein sequence alignment from tertiary structure comparison: assignment of global and residue confidence levels.</dc:title>

    <dc:creator>RB Russell</dc:creator>
    <dc:creator>GJ Barton</dc:creator>
    <dc:identifier>doi:10.1002/prot.340140216</dc:identifier>
    <dc:source>Proteins, Vol. 14, No. 2. (October 1992), pp. 309-323.</dc:source>
    <dc:date>2005-12-01T01:02:43-00:00</dc:date>
    <prism:publicationYear>1992</prism:publicationYear>
    <prism:publicationName>Proteins</prism:publicationName>
    <prism:issn>0887-3585</prism:issn>
    <prism:volume>14</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>309</prism:startingPage>
    <prism:endingPage>323</prism:endingPage>
    <prism:category>protein-structure</prism:category>
    <prism:category>structuralalignment</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/101933">
    <title>Multiple structural alignment by secondary structures: algorithm and applications.</title>
    <link>http://www.citeulike.org/user/analogAI/article/101933</link>
    <description>&lt;i&gt;Protein Sci, Vol. 12, No. 11. (November 2003), pp. 2492-2507.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We present MASS (Multiple Alignment by Secondary Structures), a novel highly efficient method for structural alignment of multiple protein molecules and detection of common structural motifs. MASS is based on a two-level alignment, using both secondary structure and atomic representation. Utilizing secondary structure information aids in filtering out noisy solutions and achieves efficiency and robustness. Currently, only a few methods are available for addressing the multiple structural alignment task. In addition to using secondary structure information, the advantage of MASS as compared to these methods is that it is a combination of several important characteristics: (1) While most existing methods are based on series of pairwise comparisons, and thus might miss optimal global solutions, MASS is truly multiple, considering all the molecules simultaneously; (2) MASS is sequence order-independent and thus capable of detecting nontopological structural motifs; (3) MASS is able to detect not only structural motifs, shared by all input molecules, but also motifs shared only by subsets of the molecules. Here, we show the application of MASS to various protein ensembles. We demonstrate its ability to handle a large number (order of tens) of molecules, to detect nontopological motifs and to find biologically meaningful alignments within nonpredefined subsets of the input. In particular, we show how by using conserved structural motifs, one can guide protein-protein docking, which is a notoriously difficult problem. MASS is freely available at http://bioinfo3d.cs.tau.ac.il/MASS/.</description>
    <dc:title>Multiple structural alignment by secondary structures: algorithm and applications.</dc:title>

    <dc:creator>O Dror</dc:creator>
    <dc:creator>H Benyamini</dc:creator>
    <dc:creator>R Nussinov</dc:creator>
    <dc:creator>HJ Wolfson</dc:creator>
    <dc:source>Protein Sci, Vol. 12, No. 11. (November 2003), pp. 2492-2507.</dc:source>
    <dc:date>2005-02-23T16:17:59-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:publicationName>Protein Sci</prism:publicationName>
    <prism:issn>0961-8368</prism:issn>
    <prism:volume>12</prism:volume>
    <prism:number>11</prism:number>
    <prism:startingPage>2492</prism:startingPage>
    <prism:endingPage>2507</prism:endingPage>
    <prism:category>protein-structure</prism:category>
    <prism:category>structuralalignment</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/143674">
    <title>The immunoglobulin fold family: sequence analysis and 3D structure comparisons.</title>
    <link>http://www.citeulike.org/user/analogAI/article/143674</link>
    <description>&lt;i&gt;Protein Eng, Vol. 12, No. 7. (July 1999), pp. 563-571.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Fifty-two 3D structures of Ig-like domains covering the immunoglobulin fold family (IgFF) were compared and classified according to the conservation of their secondary structures. Members of the IgFF are distantly related proteins or evolutionarily unrelated proteins with a similar fold, the Ig fold. In this paper, a multiple structural alignment of the conserved common core is described and the correlation between corresponding sequences is discussed. While the members of the IgFF exhibit wide heterogeneity in terms of tissue and species distribution or functional implications, the 3D structures of these domains are far more conserved than their sequences. We define topologically equivalent residues in the Ig-like domains, describe the hydrophobic common cores and discuss the presence of additional strands. The disulfide bridges, not necessary for the stability of the Ig fold, may have an effect on the compactness of the domains. Based upon sequence and structure analysis, we propose the introduction of two new subtypes (C3 and C4) to the previous classifications, in addition to a new global structural classification. The very low mean sequence identity between subgroups of the IgFF suggests the occurrence of both divergent and convergent evolutionary processes, explaining the wide diversity of the superfamily. Finally, this review suggest that hydrophobic residues constituting the common hydrophobic cores are important clues to explain how highly divergent sequences can adopt a similar fold.</description>
    <dc:title>The immunoglobulin fold family: sequence analysis and 3D structure comparisons.</dc:title>

    <dc:creator>DM Halaby</dc:creator>
    <dc:creator>A Poupon</dc:creator>
    <dc:creator>J Mornon</dc:creator>
    <dc:source>Protein Eng, Vol. 12, No. 7. (July 1999), pp. 563-571.</dc:source>
    <dc:date>2005-03-31T20:45:23-00:00</dc:date>
    <prism:publicationYear>1999</prism:publicationYear>
    <prism:publicationName>Protein Eng</prism:publicationName>
    <prism:issn>0269-2139</prism:issn>
    <prism:volume>12</prism:volume>
    <prism:number>7</prism:number>
    <prism:startingPage>563</prism:startingPage>
    <prism:endingPage>571</prism:endingPage>
    <prism:category>immunoglobulin</prism:category>
    <prism:category>proteinfolding</prism:category>
    <prism:category>proteinrecognition</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/410985">
    <title>Determining the roles of different chain fragments in recognition of immunoglobulin fold</title>
    <link>http://www.citeulike.org/user/analogAI/article/410985</link>
    <description>&lt;i&gt;Protein Eng., Vol. 15, No. 1. (1 January 2002), pp. 13-19.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We examine sequence-to-structure specificity of beta-structural fragments of immunoglobulin domains. The structure specificity of separate chain fragments is estimated by computing the Z-score values in recognition of the native structure in gapless threading tests. To improve the accuracy of our calculations we use energy averaging over diverse homologs of immunoglobulin domains. We show that the interactions between residues of beta-structure are more determinant in recognition of the native structure than the interactions within the whole chain molecule. This result distinguishes immunoglobulins from more typical proteins where the interactions between residues of the whole chain normally recognize the native fold more accurately than interactions between the residues of the secondary structure residues alone [Reva,B. and Topiol,S. (2000) Biocomputing: Proceedings of the Pacific Symposium. World Scientific Publishing Co., pp. 168-178]. We also find that the predominant contributions of the secondary structure are produced by the four central beta-strands that form the core of the molecule. The results of this study allow us through quantitative means to understand the architecture of immunoglobulin molecules. Comparing the fold recognition data for different chain fragments one can say that beta-strands form a rigid frame for immunoglobulin molecules, whereas loops, with no structural role, can develop a broad variety of binding specificities. It is well known that protein function is determined by specific portions of a protein chain. This study suggests that the whole protein structure can be predominantly determined by a few fragments of chain which form the structural framework of the molecule. This idea may help in better understanding the mechanisms of protein evolution: strengthening a protein structure in the key framework-forming regions allows mutations and flexibility in other chain regions.</description>
    <dc:title>Determining the roles of different chain fragments in recognition of immunoglobulin fold</dc:title>

    <dc:creator>B Reva</dc:creator>
    <dc:creator>A Kister</dc:creator>
    <dc:creator>S Topiol</dc:creator>
    <dc:creator>I Gelfand</dc:creator>
    <dc:source>Protein Eng., Vol. 15, No. 1. (1 January 2002), pp. 13-19.</dc:source>
    <dc:date>2005-11-29T05:47:03-00:00</dc:date>
    <prism:publicationYear>2002</prism:publicationYear>
    <prism:publicationName>Protein Eng.</prism:publicationName>
    <prism:volume>15</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>13</prism:startingPage>
    <prism:endingPage>19</prism:endingPage>
    <prism:category>immunoglobulin</prism:category>
    <prism:category>proteinfolding</prism:category>
    <prism:category>protein-structure</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/410984">
    <title>The importance of loop length in the folding of an immunoglobulin domain</title>
    <link>http://www.citeulike.org/user/analogAI/article/410984</link>
    <description>&lt;i&gt;Protein Engineering, Design and Selection, Vol. 17, No. 5. (1 May 2004), pp. 443-453.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Immunoglobulin (Ig)-like proteins have been shown to fold following formation of a nucleus comprising interactions between residues that are distant in the primary sequence. What role do the loops connecting these nucleus residues play? Here, the importance of loops connecting beta-strands in different sheets of the Ig fold is investigated, by insertion of five glycine residues into the B-C loop of an Ig domain from human titin, TI I27. The folding pathway of this elongated pseudo wild-type' TI I27 is probed using protein engineering and Phi-value analysis. The Phi-values calculated for mutants within the pseudo wild-type protein indicate that the folding nucleus in wild-type TI I27 is conserved, supporting the hypothesis that the inter-sheet loop is not critical to the formation of a long-range folding nucleus.</description>
    <dc:title>The importance of loop length in the folding of an immunoglobulin domain</dc:title>

    <dc:creator>Caroline Wright</dc:creator>
    <dc:creator>John Christodoulou</dc:creator>
    <dc:creator>Christopher Dobson</dc:creator>
    <dc:creator>Jane Clarke</dc:creator>
    <dc:identifier>doi:10.1093/protein/gzh052</dc:identifier>
    <dc:source>Protein Engineering, Design and Selection, Vol. 17, No. 5. (1 May 2004), pp. 443-453.</dc:source>
    <dc:date>2005-11-29T05:44:53-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>Protein Engineering, Design and Selection</prism:publicationName>
    <prism:volume>17</prism:volume>
    <prism:number>5</prism:number>
    <prism:startingPage>443</prism:startingPage>
    <prism:endingPage>453</prism:endingPage>
    <prism:category>immunoglobulin</prism:category>
    <prism:category>proteinfolding</prism:category>
    <prism:category>protein-structure</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/211877">
    <title>A method for simultaneous alignment of multiple protein structures.</title>
    <link>http://www.citeulike.org/user/analogAI/article/211877</link>
    <description>&lt;i&gt;Proteins, Vol. 56, No. 1. (1 July 2004), pp. 143-156.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Here, we present MultiProt, a fully automated highly efficient technique to detect multiple structural alignments of protein structures. MultiProt finds the common geometrical cores between input molecules. To date, most methods for multiple alignment start from the pairwise alignment solutions. This may lead to a small overall alignment. In contrast, our method derives multiple alignments from simultaneous superpositions of input molecules. Further, our method does not require that all input molecules participate in the alignment. Actually, it efficiently detects high scoring partial multiple alignments for all possible number of molecules in the input. To demonstrate the power of MultiProt, we provide a number of case studies. First, we demonstrate known multiple alignments of protein structures to illustrate the performance of MultiProt. Next, we present various biological applications. These include: (1) a partial alignment of hinge-bent domains; (2) identification of functional groups of G-proteins; (3) analysis of binding sites; and (4) protein-protein interface alignment. Some applications preserve the sequence order of the residues in the alignment, whereas others are order-independent. It is their residue sequence order-independence that allows application of MultiProt to derive multiple alignments of binding sites and of protein-protein interfaces, making MultiProt an extremely useful structural tool.</description>
    <dc:title>A method for simultaneous alignment of multiple protein structures.</dc:title>

    <dc:creator>M Shatsky</dc:creator>
    <dc:creator>R Nussinov</dc:creator>
    <dc:creator>HJ Wolfson</dc:creator>
    <dc:identifier>doi:10.1002/prot.10628</dc:identifier>
    <dc:source>Proteins, Vol. 56, No. 1. (1 July 2004), pp. 143-156.</dc:source>
    <dc:date>2005-05-26T09:39:27-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>Proteins</prism:publicationName>
    <prism:issn>1097-0134</prism:issn>
    <prism:volume>56</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>143</prism:startingPage>
    <prism:endingPage>156</prism:endingPage>
    <prism:category>protein-structure</prism:category>
    <prism:category>structuralalignment</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/407127">
    <title>HMM Logos for visualization of protein families.</title>
    <link>http://www.citeulike.org/user/analogAI/article/407127</link>
    <description>&lt;i&gt;BMC Bioinformatics, Vol. 5 (21 January 2004)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;BACKGROUND: Profile Hidden Markov Models (pHMMs) are a widely used tool for protein family research. Up to now, however, there exists no method to visualize all of their central aspects graphically in an intuitively understandable way. RESULTS: We present a visualization method that incorporates both emission and transition probabilities of the pHMM, thus extending sequence logos introduced by Schneider and Stephens. For each emitting state of the pHMM, we display a stack of letters. The stack height is determined by the deviation of the position's letter emission frequencies from the background frequencies. The stack width visualizes both the probability of reaching the state (the hitting probability) and the expected number of letters the state emits during a pass through the model (the state's expected contribution).A web interface offering online creation of HMM Logos and the corresponding source code can be found at the Logos web server of the Max Planck Institute for Molecular Genetics http://logos.molgen.mpg.de. CONCLUSIONS: We demonstrate that HMM Logos can be a useful tool for the biologist: We use them to highlight differences between two homologous subfamilies of GTPases, Rab and Ras, and we show that they are able to indicate structural elements of Ras.</description>
    <dc:title>HMM Logos for visualization of protein families.</dc:title>

    <dc:creator>B Schuster-Böckler</dc:creator>
    <dc:creator>J Schultz</dc:creator>
    <dc:creator>S Rahmann</dc:creator>
    <dc:identifier>doi:10.1186/1471-2105-5-7</dc:identifier>
    <dc:source>BMC Bioinformatics, Vol. 5 (21 January 2004)</dc:source>
    <dc:date>2005-11-24T10:28:39-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>BMC Bioinformatics</prism:publicationName>
    <prism:issn>1471-2105</prism:issn>
    <prism:volume>5</prism:volume>
    <prism:category>sequencelogos</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/407125">
    <title>Sequence logos: a new way to display consensus sequences.</title>
    <link>http://www.citeulike.org/user/analogAI/article/407125</link>
    <description>&lt;i&gt;Nucleic Acids Res, Vol. 18, No. 20. (25 October 1990), pp. 6097-6100.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;A graphical method is presented for displaying the patterns in a set of aligned sequences. The characters representing the sequence are stacked on top of each other for each position in the aligned sequences. The height of each letter is made proportional to its frequency, and the letters are sorted so the most common one is on top. The height of the entire stack is then adjusted to signify the information content of the sequences at that position. From these 'sequence logos', one can determine not only the consensus sequence but also the relative frequency of bases and the information content (measured in bits) at every position in a site or sequence. The logo displays both significant residues and subtle sequence patterns.</description>
    <dc:title>Sequence logos: a new way to display consensus sequences.</dc:title>

    <dc:creator>TD Schneider</dc:creator>
    <dc:creator>RM Stephens</dc:creator>
    <dc:source>Nucleic Acids Res, Vol. 18, No. 20. (25 October 1990), pp. 6097-6100.</dc:source>
    <dc:date>2005-11-24T10:26:51-00:00</dc:date>
    <prism:publicationYear>1990</prism:publicationYear>
    <prism:publicationName>Nucleic Acids Res</prism:publicationName>
    <prism:issn>0305-1048</prism:issn>
    <prism:volume>18</prism:volume>
    <prism:number>20</prism:number>
    <prism:startingPage>6097</prism:startingPage>
    <prism:endingPage>6100</prism:endingPage>
    <prism:category>sequencelogos</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/407124">
    <title>WebLogo: A Sequence Logo Generator</title>
    <link>http://www.citeulike.org/user/analogAI/article/407124</link>
    <description>&lt;i&gt;Genome Res., Vol. 14, No. 6. (1 June 2004), pp. 1188-1190.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;WebLogo generates sequence logos, graphical representations of the patterns within a multiple sequence alignment. Sequence logos provide a richer and more precise description of sequence similarity than consensus sequences and can rapidly reveal significant features of the alignment otherwise difficult to perceive. Each logo consists of stacks of letters, one stack for each position in the sequence. The overall height of each stack indicates the sequence conservation at that position (measured in bits), whereas the height of symbols within the stack reflects the relative frequency of the corresponding amino or nucleic acid at that position. WebLogo has been enhanced recently with additional features and options, to provide a convenient and highly configurable sequence logo generator. A command line interface and the complete, open WebLogo source code are available for local installation and customization.</description>
    <dc:title>WebLogo: A Sequence Logo Generator</dc:title>

    <dc:creator>Gavin Crooks</dc:creator>
    <dc:creator>Gary Hon</dc:creator>
    <dc:creator>John-Marc Chandonia</dc:creator>
    <dc:creator>Steven Brenner</dc:creator>
    <dc:identifier>doi:10.1101/gr.849004</dc:identifier>
    <dc:source>Genome Res., Vol. 14, No. 6. (1 June 2004), pp. 1188-1190.</dc:source>
    <dc:date>2005-11-24T10:24:28-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>Genome Res.</prism:publicationName>
    <prism:volume>14</prism:volume>
    <prism:number>6</prism:number>
    <prism:startingPage>1188</prism:startingPage>
    <prism:endingPage>1190</prism:endingPage>
    <prism:category>sequencelogos</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/406753">
    <title>Definition of general topological equivalence in protein structures. A procedure involving comparison of properties and relationships through simulated annealing and dynamic programming.</title>
    <link>http://www.citeulike.org/user/analogAI/article/406753</link>
    <description>&lt;i&gt;J Mol Biol, Vol. 212, No. 2. (20 March 1990), pp. 403-428.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;A protein is defined as an indexed string of elements at each level in the hierarchy of protein structure: sequence, secondary structure, super-secondary structure, etc. The elements, for example, residues or secondary structure segments such as helices or beta-strands, are associated with a series of properties and can be involved in a number of relationships with other elements. Element-by-element dissimilarity matrices are then computed and used in the alignment procedure based on the sequence alignment algorithm of Needleman &#38; Wunsch, expanded by the simulated annealing technique to take into account relationships as well as properties. The utility of this method for exploring the variability of various aspects of protein structure and for comparing distantly related proteins is demonstrated by multiple alignment of serine proteinases, aspartic proteinase lobes and globins.</description>
    <dc:title>Definition of general topological equivalence in protein structures. A procedure involving comparison of properties and relationships through simulated annealing and dynamic programming.</dc:title>

    <dc:creator>A Sali</dc:creator>
    <dc:creator>TL Blundell</dc:creator>
    <dc:source>J Mol Biol, Vol. 212, No. 2. (20 March 1990), pp. 403-428.</dc:source>
    <dc:date>2005-11-24T00:59:49-00:00</dc:date>
    <prism:publicationYear>1990</prism:publicationYear>
    <prism:publicationName>J Mol Biol</prism:publicationName>
    <prism:issn>0022-2836</prism:issn>
    <prism:volume>212</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>403</prism:startingPage>
    <prism:endingPage>428</prism:endingPage>
    <prism:category>protein-structure</prism:category>
    <prism:category>sequencealignment</prism:category>
    <prism:category>structuralalignment</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/406744">
    <title>An integrated approach to the analysis and modeling of protein sequences and structures. III. A comparative study of sequence conservation in protein structural families using multiple structural alignments.</title>
    <link>http://www.citeulike.org/user/analogAI/article/406744</link>
    <description>&lt;i&gt;J Mol Biol, Vol. 301, No. 3. (18 August 2000), pp. 691-711.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The information required to generate a protein structure is contained in its amino acid sequence, but how three-dimensional information is mapped onto a linear sequence is still incompletely understood. Multiple structure alignments of similar protein structures have been used to investigate conserved sequence features but contradictory results have been obtained, due, in large part, to the absence of subjective criteria to be used in the construction of sequence profiles and in the quantitative comparison of alignment results. Here, we report a new procedure for multiple structure alignment and use it to construct structure-based sequence profiles for similar proteins. The definition of &#34;similar&#34; is based on the structural alignment procedure and on the protein structural distance (PSD) described in paper I of this series, which offers an objective measure for protein structure relationships. Our approach is tested in two well-studied groups of proteins; serine proteases and Ig-like proteins. It is demonstrated that the quality of a sequence profile generated by a multiple structure alignment is quite sensitive to the PSD used as a threshold for the inclusion of proteins in the alignment. Specifically, if the proteins included in the aligned set are too distant in structure from one another, there will be a dilution of information and patterns that are relevant to a subset of the proteins are likely to be lost.In order to understand better how the same three-dimensional information can be encoded in seemingly unrelated sequences, structure-based sequence profiles are constructed for subsets of proteins belonging to nine superfolds. We identify patterns of relatively conserved residues in each subset of proteins. It is demonstrated that the most conserved residues are generally located in the regions where tertiary interactions occur and that are relatively conserved in structure. Nevertheless, the conservation patterns are relatively weak in all cases studied, indicating that structure-determining factors that do not require a particular sequential arrangement of amino acids, such as secondary structure propensities and hydrophobic interactions, are important in encoding protein fold information. In general, we find that similar structures can fold without having a set of highly conserved residue clusters or a well-conserved sequence profile; indeed, in some cases there is no apparent conservation pattern common to structures with the same fold. Thus, when a group of proteins exhibits a common and well-defined sequence pattern, it is more likely that these sequences have a close evolutionary relationship rather than the similarities having arisen from the structural requirements of a given fold.</description>
    <dc:title>An integrated approach to the analysis and modeling of protein sequences and structures. III. A comparative study of sequence conservation in protein structural families using multiple structural alignments.</dc:title>

    <dc:creator>AS Yang</dc:creator>
    <dc:creator>B Honig</dc:creator>
    <dc:identifier>doi:10.1006/jmbi.2000.3975</dc:identifier>
    <dc:source>J Mol Biol, Vol. 301, No. 3. (18 August 2000), pp. 691-711.</dc:source>
    <dc:date>2005-11-23T23:46:48-00:00</dc:date>
    <prism:publicationYear>2000</prism:publicationYear>
    <prism:publicationName>J Mol Biol</prism:publicationName>
    <prism:issn>0022-2836</prism:issn>
    <prism:volume>301</prism:volume>
    <prism:number>3</prism:number>
    <prism:startingPage>691</prism:startingPage>
    <prism:endingPage>711</prism:endingPage>
    <prism:category>protein-structure</prism:category>
    <prism:category>sequencealignment</prism:category>
    <prism:category>structuralalignment</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/165167">
    <title>An integrated approach to the analysis and modeling of protein sequences and structures. II. On the relationship between sequence and structural similarity for proteins that are not obviously related in sequence.</title>
    <link>http://www.citeulike.org/user/analogAI/article/165167</link>
    <description>&lt;i&gt;J Mol Biol, Vol. 301, No. 3. (18 August 2000), pp. 679-689.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Here, we discuss the relationship between protein sequence and protein structural similarity. It is established that a protein structural distance (PSD) of 2.0 is a threshold above which two proteins are unlikely to have a detectable pairwise sequence relationship. A precise correlation is established between the level of sequence similarity, defined by a normalized Smith-Waterman score, and the probability that two proteins will have a similar structure (defined by pairwise PSD&#60;2). This correlation can be used in evaluating the likelihood for success in a comparative modeling procedure. We establish the existence of a correlation between sequence and structural similarity for pairs of proteins that are related in structure but whose sequence relationship is not detectable using standard pairwise sequence alignments. Although it is well known that there is a close relationship between sequence and structural similarity for pairwise sequence identities greater than about 30 %, there has been little discussion as to the possible existence of such a relationship for pairs of proteins in or below the twilight zone of sequence similarity (&#60;25 % pairwise sequence identity). Possible implications of our results for the evolution of protein structure are discussed.</description>
    <dc:title>An integrated approach to the analysis and modeling of protein sequences and structures. II. On the relationship between sequence and structural similarity for proteins that are not obviously related in sequence.</dc:title>

    <dc:creator>AS Yang</dc:creator>
    <dc:creator>B Honig</dc:creator>
    <dc:identifier>doi:10.1006/jmbi.2000.3974</dc:identifier>
    <dc:source>J Mol Biol, Vol. 301, No. 3. (18 August 2000), pp. 679-689.</dc:source>
    <dc:date>2005-04-20T00:29:35-00:00</dc:date>
    <prism:publicationYear>2000</prism:publicationYear>
    <prism:publicationName>J Mol Biol</prism:publicationName>
    <prism:issn>0022-2836</prism:issn>
    <prism:volume>301</prism:volume>
    <prism:number>3</prism:number>
    <prism:startingPage>679</prism:startingPage>
    <prism:endingPage>689</prism:endingPage>
    <prism:category>protein-structure</prism:category>
    <prism:category>structuralalignment</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/165166">
    <title>An integrated approach to the analysis and modeling of protein sequences and structures. I. Protein structural alignment and a quantitative measure for protein structural distance.</title>
    <link>http://www.citeulike.org/user/analogAI/article/165166</link>
    <description>&lt;i&gt;J Mol Biol, Vol. 301, No. 3. (18 August 2000), pp. 665-678.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We have devised and implemented in PrISM (protein informatics system for modeling) a new measure of protein structural relationships, the protein structural distance (PSD). The PSD is designed to describe relationships between protein structures in quantitative rather than descriptive terms and is applicable both when two structures are very similar, and when they are very different. It is calculated with a structural alignment procedure that uses double dynamic programming to align secondary structure elements and an iterative rigid body superposition that minimizes the root-mean-square deviation of C(alpha) atoms. The alignment algorithm, as implemented on a modest workstation, is computationally efficient, allowing for large-scale structural comparisons. PSD scores for more than one and a half million pairs of proteins were calculated and compared to the discrete classification of proteins in the SCOP database. The PSD scores, which were obtained automatically, are in large part consistent with the manually derived classifications in SCOP. Discrepancies do arise, however, due, in part, to the fact that SCOP uses criteria other than structural similarity to derive classifications while the PrISM procedure is exclusively structure based. Analysis of PSD scores suggests that there is a continuous aspect of protein conformation space, even though various classification schemes are extremely useful. The use of a continuous measure for structural distance between all pairs of proteins allows us, as described in the two accompanying papers to derive sequence/structure relationships in a more quantitative way than has previously been possible. An important strength of the approach implemented in PrISM is its ability to address many different kinds of queries interactively, making its structural comparison procedure a convenient computational tool that complements structural classification databases such as SCOP and CATH.</description>
    <dc:title>An integrated approach to the analysis and modeling of protein sequences and structures. I. Protein structural alignment and a quantitative measure for protein structural distance.</dc:title>

    <dc:creator>AS Yang</dc:creator>
    <dc:creator>B Honig</dc:creator>
    <dc:identifier>doi:10.1006/jmbi.2000.3973</dc:identifier>
    <dc:source>J Mol Biol, Vol. 301, No. 3. (18 August 2000), pp. 665-678.</dc:source>
    <dc:date>2005-04-20T00:28:40-00:00</dc:date>
    <prism:publicationYear>2000</prism:publicationYear>
    <prism:publicationName>J Mol Biol</prism:publicationName>
    <prism:issn>0022-2836</prism:issn>
    <prism:volume>301</prism:volume>
    <prism:number>3</prism:number>
    <prism:startingPage>665</prism:startingPage>
    <prism:endingPage>678</prism:endingPage>
    <prism:category>protein-structure</prism:category>
    <prism:category>structuralalignment</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/405427">
    <title>An attempt to define allergen-specific molecular surface features: a bioinformatic approach</title>
    <link>http://www.citeulike.org/user/analogAI/article/405427</link>
    <description>&lt;i&gt;Bioinformatics, Vol. 21, No. 23. (1 December 2005), pp. 4201-4204.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Allergens are proteins that elicit T helper lymphocyte type 2 (Th2) responses culminating in IgE antibody production and allergic disease. However, we have no answer to the fundamental question of why certain proteins are allergens, while others are not. We hypothesized that analysis of the surface of diverse allergens may reveal common structural features which might enable them to be recognized as Th2-inducing antigens by cells of the innate immune system. We have therefore used the ConSurf server to search for allergen-specific motifs. This has enabled us to identify residue conservation patterns in the homologues of Ara t 8 (plant profilin), Act c 1 (actinidin), Bet v 1 (plant pathogenesis-related protein) and Ves v 5 (venom allergen). The results demonstrate the presence of allergen-specific patches consisting of an unusually high proportion of surface-exposed hydrophobic residues. The patches that have been identified may represent molecular patterns recognizable by cells of the innate immune system. Contact: farouk.shakib@nottingham.ac.uk Supplementary Information: http://www.nottingham.ac.uk/immunology/research/BI</description>
    <dc:title>An attempt to define allergen-specific molecular surface features: a bioinformatic approach</dc:title>

    <dc:creator>Ruta Furmonaviciene</dc:creator>
    <dc:creator>Brian Sutton</dc:creator>
    <dc:creator>Fabian Glaser</dc:creator>
    <dc:creator>Charlie Laughton</dc:creator>
    <dc:creator>Nick Jones</dc:creator>
    <dc:creator>Herb Sewell</dc:creator>
    <dc:creator>Farouk Shakib</dc:creator>
    <dc:identifier>doi:10.1093/bioinformatics/bti700</dc:identifier>
    <dc:source>Bioinformatics, Vol. 21, No. 23. (1 December 2005), pp. 4201-4204.</dc:source>
    <dc:date>2005-11-23T08:24:46-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:publicationName>Bioinformatics</prism:publicationName>
    <prism:volume>21</prism:volume>
    <prism:number>23</prism:number>
    <prism:startingPage>4201</prism:startingPage>
    <prism:endingPage>4204</prism:endingPage>
    <prism:category>allergy</prism:category>
    <prism:category>proteinrecognition</prism:category>
    <prism:category>protein-structure</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/405424">
    <title>The biology of IGE and the basis of allergic disease.</title>
    <link>http://www.citeulike.org/user/analogAI/article/405424</link>
    <description>&lt;i&gt;Annu Rev Immunol, Vol. 21 (2003), pp. 579-628.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Allergic individuals exposed to minute quantities of allergen experience an immediate response. Immediate hypersensitivity reflects the permanent sensitization of mucosal mast cells by allergen-specific IgE antibodies bound to their high-affinity receptors (FcepsilonRI). A combination of factors contributes to such long-lasting sensitization of the mast cells. They include the homing of mast cells to mucosal tissues, the local synthesis of IgE, the induction of FcepsilonRI expression on mast cells by IgE, the consequent downregulation of FcgammaR (through an insufficiency of the common gamma-chains), and the exceptionally slow dissociation of IgE from FcepsilonRI. To understand the mechanism of the immediate hypersensitivity phenomenon, we need explanations of why IgE antibodies are synthesized in preference to IgG in mucosal tissues and why the IgE is so tenaciously retained on mast cell-surface receptors. There is now compelling evidence that the microenvironment of mucosal tissues of allergic disease favors class switching to IgE; and the exceptionally high affinity of IgE for FcepsilonRI can now be interpreted in terms of the recently determined crystal structures of IgE-FcepsilonRI and IgG-FcgammaR complexes. The rate of local IgE synthesis can easily compensate for the rate of the antibody dissociation from its receptors on mucosal mast cells. Effective mechanisms ensure that allergic reactions are confined to mucosal tissues, thereby minimizing the risk of systemic anaphylaxis.</description>
    <dc:title>The biology of IGE and the basis of allergic disease.</dc:title>

    <dc:creator>HJ Gould</dc:creator>
    <dc:creator>BJ Sutton</dc:creator>
    <dc:creator>AJ Beavil</dc:creator>
    <dc:creator>RL Beavil</dc:creator>
    <dc:creator>N McCloskey</dc:creator>
    <dc:creator>HA Coker</dc:creator>
    <dc:creator>D Fear</dc:creator>
    <dc:creator>L Smurthwaite</dc:creator>
    <dc:identifier>doi:10.1146/annurev.immunol.21.120601.141103</dc:identifier>
    <dc:source>Annu Rev Immunol, Vol. 21 (2003), pp. 579-628.</dc:source>
    <dc:date>2005-11-23T07:57:09-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:publicationName>Annu Rev Immunol</prism:publicationName>
    <prism:issn>0732-0582</prism:issn>
    <prism:volume>21</prism:volume>
    <prism:startingPage>579</prism:startingPage>
    <prism:endingPage>628</prism:endingPage>
    <prism:category>allergy</prism:category>
    <prism:category>ige</prism:category>
    <prism:category>immunoglobulin</prism:category>
    <prism:category>review-article</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/403892">
    <title>Frequency of gaps observed in a structurally aligned protein pair database suggests a simple gap penalty function</title>
    <link>http://www.citeulike.org/user/analogAI/article/403892</link>
    <description>&lt;i&gt;Nucl. Acids Res., Vol. 32, No. 9. (20 May 2004), pp. 2838-2843.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Gap penalty is an important component of the scoring scheme that is needed when searching for homologous proteins and for accurate alignment of protein sequences. Most homology search and sequence alignment algorithms employ a heuristic affine gap penalty' scheme q + r x n, in which q is the penalty for opening a gap, r the penalty for extending it and n the gap length. In order to devise a more rational scoring scheme, we examined the pattern of gaps that occur in a database of structurally aligned protein domain pairs. We find that the logarithm of the frequency of gaps varies linearly with the length of the gap, but with a break at a gap of length 3, and is well approximated by two linear regression lines with R2 values of 1.0 and 0.99. The bilinear behavior is retained when gaps are categorized by secondary structures of the two residues flanking the gap. Similar results were obtained when another, totally independent, structurally aligned protein pair database was used. These results suggest a modification of the affine gap penalty function.</description>
    <dc:title>Frequency of gaps observed in a structurally aligned protein pair database suggests a simple gap penalty function</dc:title>

    <dc:creator>Nalin Goonesekere</dc:creator>
    <dc:creator>Byungkook Lee</dc:creator>
    <dc:source>Nucl. Acids Res., Vol. 32, No. 9. (20 May 2004), pp. 2838-2843.</dc:source>
    <dc:date>2005-11-22T02:42:33-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>Nucl. Acids Res.</prism:publicationName>
    <prism:volume>32</prism:volume>
    <prism:number>9</prism:number>
    <prism:startingPage>2838</prism:startingPage>
    <prism:endingPage>2843</prism:endingPage>
    <prism:category>protein-structure</prism:category>
    <prism:category>structuralalignment</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/349120">
    <title>Protein structure alignment by incremental combinatorial extension (CE) of the optimal path.</title>
    <link>http://www.citeulike.org/user/analogAI/article/349120</link>
    <description>&lt;i&gt;Protein Eng, Vol. 11, No. 9. (September 1998), pp. 739-747.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;A new algorithm is reported which builds an alignment between two protein structures. The algorithm involves a combinatorial extension (CE) of an alignment path defined by aligned fragment pairs (AFPs) rather than the more conventional techniques using dynamic programming and Monte Carlo optimization. AFPs, as the name suggests, are pairs of fragments, one from each protein, which confer structure similarity. AFPs are based on local geometry, rather than global features such as orientation of secondary structures and overall topology. Combinations of AFPs that represent possible continuous alignment paths are selectively extended or discarded thereby leading to a single optimal alignment. The algorithm is fast and accurate in finding an optimal structure alignment and hence suitable for database scanning and detailed analysis of large protein families. The method has been tested and compared with results from Dali and VAST using a representative sample of similar structures. Several new structural similarities not detected by these other methods are reported. Specific one-on-one alignments and searches against all structures as found in the Protein Data Bank (PDB) can be performed via the Web at http://cl.sdsc.edu/ce.html.</description>
    <dc:title>Protein structure alignment by incremental combinatorial extension (CE) of the optimal path.</dc:title>

    <dc:creator>IN Shindyalov</dc:creator>
    <dc:creator>PE Bourne</dc:creator>
    <dc:identifier>doi:10.1093/protein/11.9.739</dc:identifier>
    <dc:source>Protein Eng, Vol. 11, No. 9. (September 1998), pp. 739-747.</dc:source>
    <dc:date>2005-10-12T15:16:16-00:00</dc:date>
    <prism:publicationYear>1998</prism:publicationYear>
    <prism:publicationName>Protein Eng</prism:publicationName>
    <prism:issn>0269-2139</prism:issn>
    <prism:volume>11</prism:volume>
    <prism:number>9</prism:number>
    <prism:startingPage>739</prism:startingPage>
    <prism:endingPage>747</prism:endingPage>
    <prism:category>protein-structure</prism:category>
    <prism:category>structuralalignment</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/400393">
    <title>MAMMOTH (Matching molecular models obtained from theory): An automated method for model comparison</title>
    <link>http://www.citeulike.org/user/analogAI/article/400393</link>
    <description>&lt;i&gt;Protein Sci, Vol. 11, No. 11. (1 November 2002), pp. 2606-2621.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Advances in structural genomics and protein structure prediction require the design of automatic, fast, objective, and well benchmarked methods capable of comparing and assessing the similarity of low-resolution three-dimensional structures, via experimental or theoretical approaches. Here, a new method for sequence-independent structural alignment is presented that allows comparison of an experimental protein structure with an arbitrary low-resolution protein tertiary model. The heuristic algorithm is given and then used to show that it can describe random structural alignments of proteins with different folds with good accuracy by an extreme value distribution. From this observation, a structural similarity score between two proteins or two different conformations of the same protein is derived from the likelihood of obtaining a given structural alignment by chance. The performance of the derived score is then compared with well established, consensus manual-based scores and data sets. We found that the new approach correlates better than other tools with the gold standard provided by a human evaluator. Timings indicate that the algorithm is fast enough for routine use with large databases of protein models. Overall, our results indicate that the new program (MAMMOTH) will be a good tool for protein structure comparisons in structural genomics applications. MAMMOTH is available from our web site at http://physbio.mssm.edu/[~]ortizg/.</description>
    <dc:title>MAMMOTH (Matching molecular models obtained from theory): An automated method for model comparison</dc:title>

    <dc:creator>Angel Ortiz</dc:creator>
    <dc:creator>Charlie Strauss</dc:creator>
    <dc:creator>Osvaldo Olmea</dc:creator>
    <dc:identifier>doi:10.1110/ps.0215902</dc:identifier>
    <dc:source>Protein Sci, Vol. 11, No. 11. (1 November 2002), pp. 2606-2621.</dc:source>
    <dc:date>2005-11-19T02:08:58-00:00</dc:date>
    <prism:publicationYear>2002</prism:publicationYear>
    <prism:publicationName>Protein Sci</prism:publicationName>
    <prism:volume>11</prism:volume>
    <prism:number>11</prism:number>
    <prism:startingPage>2606</prism:startingPage>
    <prism:endingPage>2621</prism:endingPage>
    <prism:category>protein-structure</prism:category>
    <prism:category>structuralalignment</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/analogAI/article/142321">
    <title>Non-sequential structure-based alignments reveal topology-independent core packing arrangements in proteins</title>
    <link>http://www.citeulike.org/user/analogAI/article/142321</link>
    <description>&lt;i&gt;Bioinformatics, Vol. 21, No. 7. (01 April 2005), pp. 1010-1019.&lt;/i&gt;</description>
    <dc:title>Non-sequential structure-based alignments reveal topology-independent core packing arrangements in proteins</dc:title>

    <dc:creator>Yuan Xin</dc:creator>
    <dc:creator>Bystroff Christopher</dc:creator>
    <dc:identifier>doi:10.1093/bioinformatics/bti128</dc:identifier>
    <dc:source>Bioinformatics, Vol. 21, No. 7. (01 April 2005), pp. 1010-1019.</dc:source>
    <dc:date>2005-03-28T20:02:49-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:publicationName>Bioinformatics</prism:publicationName>
    <prism:issn>1367-4803</prism:issn>
    <prism:volume>21</prism:volume>
    <prism:number>7</prism:number>
    <prism:startingPage>1010</prism:startingPage>
    <prism:endingPage>1019</prism:endingPage>
    <prism:publisher>Oxford University Press</prism:publisher>
    <prism:category>protein-structure</prism:category>
    <prism:category>structuralalignment</prism:category>
</item>



</rdf:RDF>

