<?xml version="1.0" encoding="UTF-8"?>

<rdf:RDF
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
   xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
   xmlns="http://purl.org/rss/1.0/"
   xmlns:dc="http://purl.org/dc/elements/1.1/"
   xmlns:prism="http://prismstandard.org/namespaces/1.2/basic/"
   xmlns:dcterms="http://purl.org/dc/terms/"

>
<channel rdf:about="http://www.citeulike.org/about">
<pubDate>Sat, 26 Jul 2008 07:46:15 BST</pubDate>


	<title>CiteULike: neils's bioinformatics</title>
	<description>CiteULike: neils's bioinformatics</description>


	<link>http://www.citeulike.org/user/neils/tag/bioinformatics</link>
	<dc:publisher>CiteULike.org</dc:publisher>
	<dc:language>en-gb</dc:language>
	<dc:rights>Copyright &#169; 2004-2008 citeulike.org</dc:rights>
	<items>
    <rdf:Seq>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2986192"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2986189"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2856223"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2970074"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2961489"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2938498"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2938453"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2911654"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2906848"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2906756"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2905306"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2895943"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2863207"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2858042"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2853120"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2851672"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2843098"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2843037"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2843033"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2841303"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2838456"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2835173"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/767961"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2793797"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2796492"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2794838"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2793899"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2791320"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2783989"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/1090867"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2783961"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2783957"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2771903"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2774912"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2773795"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2767706"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2743647"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2727373"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/1562564"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2713333"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2694137"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2693993"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2693990"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2687673"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2679164"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2675935"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2671108"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2671052"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2670979"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/neils/article/2653050"/>

	</rdf:Seq>
	</items>
	</channel>


<item rdf:about="http://www.citeulike.org/user/neils/article/2986192">
    <title>Extracting sequence features to predict protein-DNA interactions: a comparative study</title>
    <link>http://www.citeulike.org/user/neils/article/2986192</link>
    <description>&lt;i&gt;Nucl. Acids Res., Vol. 36, No. 12. (1 July 2008), pp. 4137-4148.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Predicting how and where proteins, especially transcription factors (TFs), interact with DNA is an important problem in biology. We present here a systematic study of predictive modeling approaches to the TF-DNA binding problem, which have been frequently shown to be more efficient than those methods only based on position-specific weight matrices (PWMs). In these approaches, a statistical relationship between genomic sequences and gene expression or ChIP-binding intensities is inferred through a regression framework; and influential sequence features are identified by variable selection. We examine a few state-of-the-art learning methods including stepwise linear regression, multivariate adaptive regression splines, neural networks, support vector machines, boosting and Bayesian additive regression trees (BART). These methods are applied to both simulated datasets and two whole-genome ChIP-chip datasets on the TFs Oct4 and Sox2, respectively, in human embryonic stem cells. We find that, with proper learning methods, predictive modeling approaches can significantly improve the predictive power and identify more biologically interesting features, such as TF-TF interactions, than the PWM approach. In particular, BART and boosting show the best and the most robust overall performance among all the methods. 10.1093/nar/gkn361</description>
    <dc:title>Extracting sequence features to predict protein-DNA interactions: a comparative study</dc:title>

    <dc:creator>Qing Zhou</dc:creator>
    <dc:creator>Jun Liu</dc:creator>
    <dc:identifier>doi:10.1093/nar/gkn361</dc:identifier>
    <dc:source>Nucl. Acids Res., Vol. 36, No. 12. (1 July 2008), pp. 4137-4148.</dc:source>
    <dc:date>2008-07-10T23:30:44-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Nucl. Acids Res.</prism:publicationName>
    <prism:volume>36</prism:volume>
    <prism:number>12</prism:number>
    <prism:startingPage>4137</prism:startingPage>
    <prism:endingPage>4148</prism:endingPage>
    <prism:category>bioinformatics</prism:category>
    <prism:category>interaction</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>protein-dna</prism:category>
    <prism:category>sequence</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2986189">
    <title>A Mitochondrial Protein Compendium Elucidates Complex I Disease Biology</title>
    <link>http://www.citeulike.org/user/neils/article/2986189</link>
    <description>&lt;i&gt;Cell, Vol. 134, No. 1. (11 July 2008), pp. 112-123.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Summary Mitochondria are complex organelles whose dysfunction underlies a broad spectrum of human diseases. Identifying all of the proteins resident in this organelle and understanding how they integrate into pathways represent major challenges in cell biology. Toward this goal, we performed mass spectrometry, GFP tagging, and machine learning to create a mitochondrial compendium of 1098 genes and their protein expression across 14 mouse tissues. We link poorly characterized proteins in this inventory to known mitochondrial pathways by virtue of shared evolutionary history. Using this approach, we predict 19 proteins to be important for the function of complex I (CI) of the electron transport chain. We validate a subset of these predictions using RNAi, including C8orf38, which we further show harbors an inherited mutation in a lethal, infantile CI deficiency. Our results have important implications for understanding CI function and pathogenesis and, more generally, illustrate how our compendium can serve as a foundation for systematic investigations of mitochondria.</description>
    <dc:title>A Mitochondrial Protein Compendium Elucidates Complex I Disease Biology</dc:title>

    <dc:creator>David Pagliarini</dc:creator>
    <dc:creator>Sarah Calvo</dc:creator>
    <dc:creator>Betty Chang</dc:creator>
    <dc:creator>Sunil Sheth</dc:creator>
    <dc:creator>Scott Vafai</dc:creator>
    <dc:creator>Shao-En Ong</dc:creator>
    <dc:creator>Geoffrey Walford</dc:creator>
    <dc:creator>Canny Sugiana</dc:creator>
    <dc:creator>Avihu Boneh</dc:creator>
    <dc:creator>William Chen</dc:creator>
    <dc:creator>David Hill</dc:creator>
    <dc:creator>Marc Vidal</dc:creator>
    <dc:creator>James Evans</dc:creator>
    <dc:creator>David Thorburn</dc:creator>
    <dc:creator>Steven Carr</dc:creator>
    <dc:creator>Vamsi Mootha</dc:creator>
    <dc:identifier>doi:10.1016/j.cell.2008.06.016</dc:identifier>
    <dc:source>Cell, Vol. 134, No. 1. (11 July 2008), pp. 112-123.</dc:source>
    <dc:date>2008-07-10T23:26:34-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Cell</prism:publicationName>
    <prism:volume>134</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>112</prism:startingPage>
    <prism:endingPage>123</prism:endingPage>
    <prism:category>bioinformatics</prism:category>
    <prism:category>complex-i</prism:category>
    <prism:category>disease</prism:category>
    <prism:category>localisation</prism:category>
    <prism:category>mitochondria</prism:category>
    <prism:category>protein</prism:category>
    <prism:category>scl</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2856223">
    <title>InteroPorc: Automated Inference of Highly Conserved Protein Interaction Networks.</title>
    <link>http://www.citeulike.org/user/neils/article/2856223</link>
    <description>&lt;i&gt;Bioinformatics (Oxford, England) (28 May 2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;MOTIVATION: Protein-protein interaction networks provide insights into the relationships between the proteins of an organism thereby contributing to a better understanding of cellular processes. Nevertheless, large-scale interaction networks are available for only a few model organisms. Thus, interologs are useful for a systematic transfer of protein interaction networks between organisms. However, no standard tool is available so far for that purpose. RESULTS: In this study, we present an automated prediction tool developed for all sequenced genomes available in Integr8. We also have developed a second method to predict protein-protein interactions in the widely used cyanobacterium Synechocystis. Using these methods, we have constructed a new network of 8,783 inferred interactions for Synechocystis. AVAILABILITY: InteroPorc is open-source, downloadable and usable through a web interface at http://biodev.extra.cea.fr/interoporc/ CONTACT: michaut.bioinfo@gmail.com, jean-christophe.aude@cea.fr SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.</description>
    <dc:title>InteroPorc: Automated Inference of Highly Conserved Protein Interaction Networks.</dc:title>

    <dc:creator>Magali Michaut</dc:creator>
    <dc:creator>Samuel Kerrien</dc:creator>
    <dc:creator>Luisa Montecchi-Palazzi</dc:creator>
    <dc:creator>Franck Chauvat</dc:creator>
    <dc:creator>Corinne Cassier-Chauvat</dc:creator>
    <dc:creator>Jean-Christophe Aude</dc:creator>
    <dc:creator>Pierre Legrain</dc:creator>
    <dc:identifier>doi:10.1093/bioinformatics/btn249</dc:identifier>
    <dc:source>Bioinformatics (Oxford, England) (28 May 2008)</dc:source>
    <dc:date>2008-06-02T09:13:15-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Bioinformatics (Oxford, England)</prism:publicationName>
    <prism:issn>1460-2059</prism:issn>
    <prism:category>bioinformatics</prism:category>
    <prism:category>interaction</prism:category>
    <prism:category>network</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>protein-protein</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2970074">
    <title>2DB: a Proteomics database for storage, analysis, presentation, and retrieval of information from mass spectrometric experiments</title>
    <link>http://www.citeulike.org/user/neils/article/2970074</link>
    <description>&lt;i&gt;BMC Bioinformatics, Vol. 9, No. 1. (2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;BACKGROUND:The amount of information stemming from proteomics experiments involving (multi dimensional) separation techniques, mass spectrometric analysis, and computational analysis is ever-increasing. Data from such an experimental workflow needs to be captured, related and analyzed. Biological experiments within this scope produce heterogenic data ranging from pictures of one or two-dimensional protein maps and spectra recorded by tandem mass spectrometry to text-based identifications made by algorithms which analyze these spectra. Additionally, peptide and corresponding protein information needs to be displayed. RESULTS:In order to handle the large amount of data from computational processing of mass spectrometric experiments, automatic import scripts are available and the necessity for manual input to the database has been minimized. Information is in a generic format which abstracts from specific software tools typically used in such an experimental workflow. The software is therefore capable of storing and cross analysing results from many algorithms. A novel feature and a focus of this database is to facilitate protein identification by using peptides identified from mass spectrometry and link this information directly to respective protein maps. Additionally, our application employs spectral counting for quantitative presentation of the data. All information can be linked to hot spots on images to place the results into an experimental context. A summary of identified proteins, containing all relevant information per hot spot, is automatically generated, usually upon either a change in the underlying protein models or due to newly imported identifications. The supporting information for this report can be accessed in multiple ways using the user interface provided by the application.CONCLUSIONS:We present a proteomics database which aims to greatly reduce evaluation time of results from mass spectrometric experiments and enhance result quality by allowing consistent data handling. Import functionality, automatic protein detection, and summary creation act together to facilitate data analysis. In addition, supporting information for these findings is readily accessible via the graphical user interface provided. The database schema and the implementation, which can easily be installed on virtually any server, can be downloaded in the form of a compressed file from our project webpage.</description>
    <dc:title>2DB: a Proteomics database for storage, analysis, presentation, and retrieval of information from mass spectrometric experiments</dc:title>

    <dc:creator>Jens Allmer</dc:creator>
    <dc:creator>Sebastian Kuhlgert</dc:creator>
    <dc:creator>Michael Hippler</dc:creator>
    <dc:identifier>doi:10.1186/1471-2105-9-302</dc:identifier>
    <dc:source>BMC Bioinformatics, Vol. 9, No. 1. (2008)</dc:source>
    <dc:date>2008-07-07T13:10:06-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>BMC Bioinformatics</prism:publicationName>
    <prism:volume>9</prism:volume>
    <prism:number>1</prism:number>
    <prism:category>bioinformatics</prism:category>
    <prism:category>database</prism:category>
    <prism:category>mass-spec</prism:category>
    <prism:category>proteomics</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2961489">
    <title>Deducing topology of protein-protein interaction networks from experimentally measured sub-networks</title>
    <link>http://www.citeulike.org/user/neils/article/2961489</link>
    <description>&lt;i&gt;BMC Bioinformatics, Vol. 9, No. 1. (2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;BACKGROUND:Protein-protein interaction networks are commonly sampled using yeast two hybrid approaches. However, whether topological information reaped from these experimentally-measured sub-networks can be extrapolated to complete protein-protein interaction networks is unclear.RESULTS:By analyzing various experimental protein-protein interaction datasets, we found that they are not random samples of the parent networks. Based on the experimental bait-prey behaviors, our computer simulations show that these non-random sampling features may affect the topological information. We tested the hypothesis that a core sub-network exists within the experimentally sampled network that better maintains the topological characteristics of the parent protein-protein interaction network. We developed a method to filter the experimentally sampled network to result in a core sub-network that more accurately reflects the topology of the parent network. These findings have fundamental implications for large-scale protein interaction studies and for our understanding of the behavior of cellular networks.CONCLUSIONS:The topological information from experimental measured networks network as is may not be the correct source for topological information about the parent protein-protein interaction network. We define a core sub-network that more accurately reflects the topology of the parent network.</description>
    <dc:title>Deducing topology of protein-protein interaction networks from experimentally measured sub-networks</dc:title>

    <dc:creator>Ling Yang</dc:creator>
    <dc:creator>Thomas Vondriska</dc:creator>
    <dc:creator>Zhangang Han</dc:creator>
    <dc:creator>Robb Maclellan</dc:creator>
    <dc:creator>James Weiss</dc:creator>
    <dc:creator>Zhilin Qu</dc:creator>
    <dc:identifier>doi:10.1186/1471-2105-9-301</dc:identifier>
    <dc:source>BMC Bioinformatics, Vol. 9, No. 1. (2008)</dc:source>
    <dc:date>2008-07-03T23:29:09-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>BMC Bioinformatics</prism:publicationName>
    <prism:volume>9</prism:volume>
    <prism:number>1</prism:number>
    <prism:category>bioinformatics</prism:category>
    <prism:category>interaction</prism:category>
    <prism:category>network</prism:category>
    <prism:category>protein-protein</prism:category>
    <prism:category>topology</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2938498">
    <title>Global investigation of protein-protein interactions in yeast Saccharomyces cerevisiae using re-occurring short polypeptide sequences</title>
    <link>http://www.citeulike.org/user/neils/article/2938498</link>
    <description>&lt;i&gt;Nucl. Acids Res. (27 June 2008), gkn390.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Protein-protein interaction (PPI) maps provide insight into cellular biology and have received considerable attention in the post-genomic era. While large-scale experimental approaches have generated large collections of experimentally determined PPIs, technical limitations preclude certain PPIs from detection. Recently, we demonstrated that yeast PPIs can be computationally predicted using re-occurring short polypeptide sequences between known interacting protein pairs. However, the computational requirements and low specificity made this method unsuitable for large-scale investigations. Here, we report an improved approach, which exhibits a specificity of [~]99.95% and executes 16 000 times faster. Importantly, we report the first all-to-all sequence-based computational screen of PPIs in yeast, Saccharomyces cerevisiae in which we identify 29 589 high confidence interactions of [~]2 x 107 possible pairs. Of these, 14 438 PPIs have not been previously reported and may represent novel interactions. In particular, these results reveal a richer set of membrane protein interactions, not readily amenable to experimental investigations. From the novel PPIs, a novel putative protein complex comprised largely of membrane proteins was revealed. In addition, two novel gene functions were predicted and experimentally confirmed to affect the efficiency of non-homologous end-joining, providing further support for the usefulness of the identified PPIs in biological investigations. 10.1093/nar/gkn390</description>
    <dc:title>Global investigation of protein-protein interactions in yeast Saccharomyces cerevisiae using re-occurring short polypeptide sequences</dc:title>

    <dc:creator>S Pitre</dc:creator>
    <dc:creator>C North</dc:creator>
    <dc:creator>M Alamgir</dc:creator>
    <dc:creator>M Jessulat</dc:creator>
    <dc:creator>A Chan</dc:creator>
    <dc:creator>X Luo</dc:creator>
    <dc:creator>JR Green</dc:creator>
    <dc:creator>M Dumontier</dc:creator>
    <dc:creator>F Dehne</dc:creator>
    <dc:creator>A Golshani</dc:creator>
    <dc:identifier>doi:10.1093/nar/gkn390</dc:identifier>
    <dc:source>Nucl. Acids Res. (27 June 2008), gkn390.</dc:source>
    <dc:date>2008-06-28T02:26:36-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Nucl. Acids Res.</prism:publicationName>
    <prism:startingPage>gkn390</prism:startingPage>
    <prism:category>bioinformatics</prism:category>
    <prism:category>interaction</prism:category>
    <prism:category>peptide</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>protein-protein</prism:category>
    <prism:category>yeast</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2938453">
    <title>Protein complex identification by supervised graph local clustering</title>
    <link>http://www.citeulike.org/user/neils/article/2938453</link>
    <description>&lt;i&gt;Bioinformatics, Vol. 24, No. 13. (1 July 2008), pp. i250-268.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Motivation: Protein complexes integrate multiple gene products to coordinate many biological functions. Given a graph representing pairwise protein interaction data one can search for subgraphs representing protein complexes. Previous methods for performing such search relied on the assumption that complexes form a clique in that graph. While this assumption is true for some complexes, it does not hold for many others. New algorithms are required in order to recover complexes with other types of topological structure. Results: We present an algorithm for inferring protein complexes from weighted interaction graphs. By using graph topological patterns and biological properties as features, we model each complex subgraph by a probabilistic Bayesian network (BN). We use a training set of known complexes to learn the parameters of this BN model. The log-likelihood ratio derived from the BN is then used to score subgraphs in the protein interaction graph and identify new complexes. We applied our method to protein interaction data in yeast. As we show our algorithm achieved a considerable improvement over clique based algorithms in terms of its ability to recover known complexes. We discuss some of the new complexes predicted by our algorithm and determine that they likely represent true complexes. Availability: Matlab implementation is available on the supporting website: www.cs.cmu.edu/~qyj/SuperComplex Contact: zivbj@cs.cmu.edu 10.1093/bioinformatics/btn164</description>
    <dc:title>Protein complex identification by supervised graph local clustering</dc:title>

    <dc:creator>Yanjun Qi</dc:creator>
    <dc:creator>Fernanda Balem</dc:creator>
    <dc:creator>Christos Faloutsos</dc:creator>
    <dc:creator>Judith Klein-Seetharaman</dc:creator>
    <dc:creator>Ziv Bar-Joseph</dc:creator>
    <dc:identifier>doi:10.1093/bioinformatics/btn164</dc:identifier>
    <dc:source>Bioinformatics, Vol. 24, No. 13. (1 July 2008), pp. i250-268.</dc:source>
    <dc:date>2008-06-28T00:53:00-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Bioinformatics</prism:publicationName>
    <prism:volume>24</prism:volume>
    <prism:number>13</prism:number>
    <prism:startingPage>i250</prism:startingPage>
    <prism:endingPage>268</prism:endingPage>
    <prism:category>bioinformatics</prism:category>
    <prism:category>clustering</prism:category>
    <prism:category>complex</prism:category>
    <prism:category>graph</prism:category>
    <prism:category>interaction</prism:category>
    <prism:category>protein-protein</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2911654">
    <title>GMODWeb: a web framework for the generic model organism database</title>
    <link>http://www.citeulike.org/user/neils/article/2911654</link>
    <description>&lt;i&gt;Genome Biology, Vol. 9, No. 6. (2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The Generic Model Organism Database (GMOD) initiative provides species-agnostic data models and software tools for representing curated model organism data. Here we describe GMODWeb, a GMOD project designed to speed the development of Model Organism Database (MOD) websites. Sites created with GMODWeb provide integration with other GMOD tools and allow users to browse and search through a variety of data types. GMODWeb was built using the open source Turnkey web framework and is available from http://turnkey.sourceforge.net.</description>
    <dc:title>GMODWeb: a web framework for the generic model organism database</dc:title>

    <dc:creator>Brian O'Connor</dc:creator>
    <dc:creator>Allen Day</dc:creator>
    <dc:creator>Scott Cain</dc:creator>
    <dc:creator>Olivier Arnaiz</dc:creator>
    <dc:creator>Linda Sperling</dc:creator>
    <dc:creator>Lincoln Stein</dc:creator>
    <dc:identifier>doi:10.1186/gb-2008-9-6-r102</dc:identifier>
    <dc:source>Genome Biology, Vol. 9, No. 6. (2008)</dc:source>
    <dc:date>2008-06-21T00:24:58-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Genome Biology</prism:publicationName>
    <prism:volume>9</prism:volume>
    <prism:number>6</prism:number>
    <prism:category>bioinformatics</prism:category>
    <prism:category>framework</prism:category>
    <prism:category>genomics</prism:category>
    <prism:category>gmod</prism:category>
    <prism:category>perl</prism:category>
    <prism:category>software</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2906848">
    <title>Phosphorylation-Specific MS/MS Scoring for Rapid and Accurate Phosphoproteome Analysis</title>
    <link>http://www.citeulike.org/user/neils/article/2906848</link>
    <description>&lt;i&gt;J. Proteome Res. (19 June 2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Abstract: The promise of mass spectrometry as a tool for probing signal-transduction is predicated on reliable identification of post-translational modifications. Phosphorylations are key mediators of cellular signaling, yet are hard to detect, partly because of unusual fragmentation patterns of phosphopeptides. In addition to being accurate, MS/MS identification software must be robust and efficient to deal with increasingly large spectral data sets. Here, we present a new scoring function for the Inspect software for phosphorylated peptide tandem mass spectra for ion-trap instruments, without the need for manual validation. The scoring function was modeled by learning fragmentation patterns from 7677 validated phosphopeptide spectra. We compare our algorithm against SEQUEST and X!Tandem on testing and training data sets. At a 1% false positive rate, Inspect identified the greatest total number of phosphorylated spectra, 13% more than SEQUEST and 39% more than X!Tandem. Spectra identified by Inspect tended to score better in several spectral quality measures. Furthermore, Inspect runs much faster than either SEQUEST or X!Tandem, making desktop phosphoproteomics feasible. Finally, we used our new models to reanalyze a corpus of 423 000 LTQ spectra acquired for a phosphoproteome analysis of Saccharomyces cerevisiae DNA damage and repair pathways and discovered 43% more phosphopeptides than the previous study.</description>
    <dc:title>Phosphorylation-Specific MS/MS Scoring for Rapid and Accurate Phosphoproteome Analysis</dc:title>

    <dc:creator>Samuel Payne</dc:creator>
    <dc:creator>Margaret Yau</dc:creator>
    <dc:creator>Marcus Smolka</dc:creator>
    <dc:creator>Stephen Tanner</dc:creator>
    <dc:creator>Huilin Zhou</dc:creator>
    <dc:creator>Vineet Bafna</dc:creator>
    <dc:identifier>doi:10.1021/pr800129m</dc:identifier>
    <dc:source>J. Proteome Res. (19 June 2008)</dc:source>
    <dc:date>2008-06-19T08:13:54-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>J. Proteome Res.</prism:publicationName>
    <prism:category>bioinformatics</prism:category>
    <prism:category>ms-ms</prism:category>
    <prism:category>phosphoprotein</prism:category>
    <prism:category>phosphorylation</prism:category>
    <prism:category>proteome</prism:category>
    <prism:category>scoring</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2906756">
    <title>Colander: A Probability-Based Support Vector Machine Algorithm for Automatic Screening for CID Spectra of Phosphopeptides Prior to Database Search</title>
    <link>http://www.citeulike.org/user/neils/article/2906756</link>
    <description>&lt;i&gt;J. Proteome Res. (19 June 2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Abstract: We developed a probability-based machine-learning program, Colander, to identify tandem mass spectra that are highly likely to represent phosphopeptides prior to database search. We identified statistically significant diagnostic features of phosphopeptide tandem mass spectra based on ion trap CID MS/MS experiments. Statistics for the features are calculated from 376 validated phosphopeptide spectra and 376 nonphosphopeptide spectra. A probability-based support vector machine (SVM) program, Colander, was then trained on five selected features. Data sets were assembled both from LC/LC-MS/MS analyses of large-scale phosphopeptide enrichments from proteolyzed cells, tissues and synthetic phosphopeptides. These data sets were used to evaluate the capability of Colander to select pS/pT-containing phosphopeptide tandem mass spectra. When applied to unknown tandem mass spectra, Colander can routinely remove 80% of tandem mass spectra while retaining 95% of phosphopeptide tandem mass spectra. The program significantly reduced computational time spent on database search by 6090%. Furthermore, prefiltering tandem mass spectra representing phosphopeptides can increase the number of phosphopeptide identifications under a predefined false positive rate.</description>
    <dc:title>Colander: A Probability-Based Support Vector Machine Algorithm for Automatic Screening for CID Spectra of Phosphopeptides Prior to Database Search</dc:title>

    <dc:creator>Bingwen Lu</dc:creator>
    <dc:creator>Cristian Ruse</dc:creator>
    <dc:creator>John Yates</dc:creator>
    <dc:identifier>doi:10.1021/pr8001194</dc:identifier>
    <dc:source>J. Proteome Res. (19 June 2008)</dc:source>
    <dc:date>2008-06-19T08:09:13-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>J. Proteome Res.</prism:publicationName>
    <prism:category>bioinformatics</prism:category>
    <prism:category>cid</prism:category>
    <prism:category>database</prism:category>
    <prism:category>machine-learning</prism:category>
    <prism:category>mass-spec</prism:category>
    <prism:category>phosphopeptides</prism:category>
    <prism:category>svm</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2905306">
    <title>The Landscape of Human Proteins Interacting with Viruses and Other Pathogens</title>
    <link>http://www.citeulike.org/user/neils/article/2905306</link>
    <description>&lt;i&gt;PLoS Pathog, Vol. 4, No. 2. (Feb 2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Infectious diseases result in millions of deaths each year. Mechanisms of infection have been studied in detail for many pathogens. However, many questions are relatively unexplored. What are the properties of human proteins that interact with pathogens? Do pathogens interact with certain functional classes of human proteins? Which infection mechanisms and pathways are commonly triggered by multiple pathogens? In this paper, to our knowledge, we provide the first study of the landscape of human proteins interacting with pathogens. We integrate human–pathogen protein–protein interactions (PPIs) for 190 pathogen strains from seven public databases. Nearly all of the 10,477 human-pathogen PPIs are for viral systems (98.3%), with the majority belonging to the human–HIV system (77.9%). We find that both viral and bacterial pathogens tend to interact with hubs (proteins with many interacting partners) and bottlenecks (proteins that are central to many paths in the network) in the human PPI network. We construct separate sets of human proteins interacting with bacterial pathogens, viral pathogens, and those interacting with multiple bacteria and with multiple viruses. Gene Ontology functions enriched in these sets reveal a number of processes, such as cell cycle regulation, nuclear transport, and immune response that participate in interactions with different pathogens. Our results provide the first global view of strategies used by pathogens to subvert human cellular processes and infect human cells. Supplementary data accompanying this paper is available at http://staff.vbi.vt.edu/dyermd/publications/dyer2008a.html.</description>
    <dc:title>The Landscape of Human Proteins Interacting with Viruses and Other Pathogens</dc:title>

    <dc:creator>Matthew Dyer</dc:creator>
    <dc:creator>Murali</dc:creator>
    <dc:creator>Bruno Sobral</dc:creator>
    <dc:identifier>doi:10.1371/journal.ppat.0040032</dc:identifier>
    <dc:source>PLoS Pathog, Vol. 4, No. 2. (Feb 2008)</dc:source>
    <dc:date>2008-06-18T12:51:04-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>PLoS Pathog</prism:publicationName>
    <prism:volume>4</prism:volume>
    <prism:number>2</prism:number>
    <prism:publisher>Public Library of Science</prism:publisher>
    <prism:category>bioinformatics</prism:category>
    <prism:category>database</prism:category>
    <prism:category>human</prism:category>
    <prism:category>interaction</prism:category>
    <prism:category>pathogen</prism:category>
    <prism:category>protein-protein</prism:category>
    <prism:category>virus</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2895943">
    <title>PURE: a webserver for the prediction of domains in unassigned regions in proteins</title>
    <link>http://www.citeulike.org/user/neils/article/2895943</link>
    <description>&lt;i&gt;BMC Bioinformatics, Vol. 9 (14 June 2008), 281.&lt;/i&gt;</description>
    <dc:title>PURE: a webserver for the prediction of domains in unassigned regions in proteins</dc:title>

    <dc:creator>Chilamakuri Reddy</dc:creator>
    <dc:creator>Shameer Khader</dc:creator>
    <dc:creator>Bernard Offmann</dc:creator>
    <dc:creator>Ramanthan Sowdhamini</dc:creator>
    <dc:identifier>doi:10.1186/1471-2105-9-281</dc:identifier>
    <dc:source>BMC Bioinformatics, Vol. 9 (14 June 2008), 281.</dc:source>
    <dc:date>2008-06-15T08:24:07-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>BMC Bioinformatics</prism:publicationName>
    <prism:issn>1471-2105</prism:issn>
    <prism:volume>9</prism:volume>
    <prism:startingPage>281</prism:startingPage>
    <prism:category>bioinformatics</prism:category>
    <prism:category>domain</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>protein</prism:category>
    <prism:category>region</prism:category>
    <prism:category>webserver</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2863207">
    <title>NeAT: a toolbox for the analysis of biological networks, clusters, classes and pathways</title>
    <link>http://www.citeulike.org/user/neils/article/2863207</link>
    <description>&lt;i&gt;Nucl. Acids Res. (4 June 2008), gkn336.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The network analysis tools (NeAT) (http://rsat.ulb.ac.be/neat/) provide a user-friendly web access to a collection of modular tools for the analysis of networks (graphs) and clusters (e.g. microarray clusters, functional classes, etc.). A first set of tools supports basic operations on graphs (comparison between two graphs, neighborhood of a set of input nodes, path finding and graph randomization). Another set of programs makes the connection between networks and clusters (graph-based clustering, cliques discovery and mapping of clusters onto a network). The toolbox also includes programs for detecting significant intersections between clusters/classes (e.g. clusters of co-expression versus functional classes of genes). NeAT are designed to cope with large datasets and provide a flexible toolbox for analyzing biological networks stored in various databases (protein interactions, regulation and metabolism) or obtained from high-throughput experiments (two-hybrid, mass-spectrometry and microarrays). The web interface interconnects the programs in predefined analysis flows, enabling to address a series of questions about networks of interest. Each tool can also be used separately by entering custom data for a specific analysis. NeAT can also be used as web services (SOAP/WSDL interface), in order to design programmatic workflows and integrate them with other available resources. 10.1093/nar/gkn336</description>
    <dc:title>NeAT: a toolbox for the analysis of biological networks, clusters, classes and pathways</dc:title>

    <dc:creator>Sylvain Brohee</dc:creator>
    <dc:creator>Karoline Faust</dc:creator>
    <dc:creator>Gipsi Lima-Mendez</dc:creator>
    <dc:creator>Olivier Sand</dc:creator>
    <dc:creator>Rekin's Janky</dc:creator>
    <dc:creator>Gilles Vanderstocken</dc:creator>
    <dc:creator>Yves Deville</dc:creator>
    <dc:creator>Jacques van Helden</dc:creator>
    <dc:identifier>doi:10.1093/nar/gkn336</dc:identifier>
    <dc:source>Nucl. Acids Res. (4 June 2008), gkn336.</dc:source>
    <dc:date>2008-06-05T01:56:38-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Nucl. Acids Res.</prism:publicationName>
    <prism:startingPage>gkn336</prism:startingPage>
    <prism:category>bioinformatics</prism:category>
    <prism:category>cluster</prism:category>
    <prism:category>network</prism:category>
    <prism:category>pathways</prism:category>
    <prism:category>software</prism:category>
    <prism:category>webserver</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2858042">
    <title>Large-scale analysis of gene clustering in bacteria</title>
    <link>http://www.citeulike.org/user/neils/article/2858042</link>
    <description>&lt;i&gt;Genome Res., Vol. 18, No. 6. (1 June 2008), pp. 949-956.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;An important strategy to study operons and their evolution is to investigate clustering of related genes across multiple bacterial genomes. Although existing algorithms are available that can identify gene clusters across two or more genomes, very few algorithms are efficient enough to study gene clusters across hundreds of genomes. We observe that a querying strategy can be used to analyze gene clusters across a large number of genomes and develop an efficient algorithm to identify all related clusters on a genome from a given query cluster. We use this algorithm to study gene clustering in 400 bacterial genomes by starting from a well-characterized list of operons in Escherichia coli K12 and perform comparative analysis of operon occurrences, gene orientations, and rearrangements both within and across clusters. We show that important biological insights can be obtained by comparing results across these categories. A software program implementing the algorithm (GCQuery) and supplementary data containing detailed results are available at http://faculty.cs.tamu.edu/shsze/gcquery. 10.1101/gr.072322.107</description>
    <dc:title>Large-scale analysis of gene clustering in bacteria</dc:title>

    <dc:creator>Qingwu Yang</dc:creator>
    <dc:creator>Sing-Hoi Sze</dc:creator>
    <dc:identifier>doi:10.1101/gr.072322.107</dc:identifier>
    <dc:source>Genome Res., Vol. 18, No. 6. (1 June 2008), pp. 949-956.</dc:source>
    <dc:date>2008-06-03T01:28:27-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Genome Res.</prism:publicationName>
    <prism:volume>18</prism:volume>
    <prism:number>6</prism:number>
    <prism:startingPage>949</prism:startingPage>
    <prism:endingPage>956</prism:endingPage>
    <prism:category>bacteria</prism:category>
    <prism:category>bioinformatics</prism:category>
    <prism:category>clustering</prism:category>
    <prism:category>genetics</prism:category>
    <prism:category>operon</prism:category>
    <prism:category>prediction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2853120">
    <title>iTools: A Framework for Classification, Categorization and Integration of Computational Biology Resources</title>
    <link>http://www.citeulike.org/user/neils/article/2853120</link>
    <description>&lt;i&gt;PLoS ONE, Vol. 3, No. 5. (May 2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The advancement of the computational biology field hinges on progress in three fundamental directions – the development of new computational algorithms, the availability of informatics resource management infrastructures and the capability of tools to interoperate and synergize. There is an explosion in algorithms and tools for computational biology, which makes it difficult for biologists to find, compare and integrate such resources. We describe a new infrastructure, iTools, for managing the query, traversal and comparison of diverse computational biology resources. Specifically, iTools stores information about three types of resources–data, software tools and web-services. The iTools design, implementation and resource meta - data content reflect the broad research, computational, applied and scientific expertise available at the seven National Centers for Biomedical Computing. iTools provides a system for classification, categorization and integration of different computational biology resources across space-and-time scales, biomedical problems, computational infrastructures and mathematical foundations. A large number of resources are already iTools-accessible to the community and this infrastructure is rapidly growing. iTools includes human and machine interfaces to its resource meta-data repository. Investigators or computer programs may utilize these interfaces to search, compare, expand, revise and mine meta-data descriptions of existent computational biology resources. We propose two ways to browse and display the iTools dynamic collection of resources. The first one is based on an ontology of computational biology resources, and the second one is derived from hyperbolic projections of manifolds or complex structures onto planar discs. iTools is an open source project both in terms of the source code development as well as its meta-data content. iTools employs a decentralized, portable, scalable and lightweight framework for long-term resource management. We demonstrate several applications of iTools as a framework for integrated bioinformatics. iTools and the complete details about its specifications, usage and interfaces are available at the iTools web page http://iTools.ccb.ucla.edu.</description>
    <dc:title>iTools: A Framework for Classification, Categorization and Integration of Computational Biology Resources</dc:title>

    <dc:creator>Ivo Dinov</dc:creator>
    <dc:creator>Daniel Rubin</dc:creator>
    <dc:creator>William Lorensen</dc:creator>
    <dc:creator>Jonathan Dugan</dc:creator>
    <dc:creator>Jeff Ma</dc:creator>
    <dc:creator>Shawn Murphy</dc:creator>
    <dc:creator>Beth Kirschner</dc:creator>
    <dc:creator>William Bug</dc:creator>
    <dc:creator>Michael Sherman</dc:creator>
    <dc:creator>Aris Floratos</dc:creator>
    <dc:creator>David Kennedy</dc:creator>
    <dc:creator>HV Jagadish</dc:creator>
    <dc:creator>Jeanette Schmidt</dc:creator>
    <dc:creator>Brian Athey</dc:creator>
    <dc:creator>Andrea Califano</dc:creator>
    <dc:creator>Mark Musen</dc:creator>
    <dc:creator>Russ Altman</dc:creator>
    <dc:creator>Ron Kikinis</dc:creator>
    <dc:creator>Isaac Kohane</dc:creator>
    <dc:creator>Scott Delp</dc:creator>
    <dc:creator>Stott Parker</dc:creator>
    <dc:creator>Arthur Toga</dc:creator>
    <dc:identifier>doi:10.1371/journal.pone.0002265</dc:identifier>
    <dc:source>PLoS ONE, Vol. 3, No. 5. (May 2008)</dc:source>
    <dc:date>2008-05-31T12:42:35-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>PLoS ONE</prism:publicationName>
    <prism:volume>3</prism:volume>
    <prism:number>5</prism:number>
    <prism:publisher>Public Library of Science</prism:publisher>
    <prism:category>bioinformatics</prism:category>
    <prism:category>computational-biology</prism:category>
    <prism:category>framework</prism:category>
    <prism:category>software</prism:category>
    <prism:category>tools</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2851672">
    <title>poolHiTS: A Shifted Transversal Design based pooling strategy for high-throughput drug screening</title>
    <link>http://www.citeulike.org/user/neils/article/2851672</link>
    <description>&lt;i&gt;BMC Bioinformatics, Vol. 9, No. 1. (2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;BACKGROUND:A key goal of drug discovery is to increase the throughput of small molecule screens without sacrificing screening accuracy. High-throughput screening (HTS) in drug discovery involves testing a large number of compounds in a biological assay to identify active compounds. Normally, molecules from a large compound library are tested individually to identify the activity of each molecule. Usually a small number of compounds are found to be active, however the presence of false positive and negative testing errors suggests that this one-drug one-assay screening strategy can be significantly improved. Pooling designs are testing schemes that test mixtures of compounds in each assay, thereby generating a screen of the whole compound library in fewer tests. By repeatedly testing compounds in different combinations, pooling designs also allow for error-correction. These pooled designs, for specific experiment parameters, can be simply and efficiently created using the Shifted Transversal Design (STD) pooling algorithm. However, drug screening contains a number of key constraints that require specific modifications if this pooling approach is to be useful for practical screen designs. RESULTS:In this paper, we introduce a pooling strategy called poolHiTS (Pooled High-Throughput Screening) which is based on the STD algorithm. In poolHiTS, we implement a limit on the number of compounds that can be mixed in a single assay. In addition, we show that the STD-based pooling strategy is limited in the error-correction that it can achieve. Due to the mixing constraint, we show that it is more efficient to split a large library into smaller blocks of compounds, which are then tested using an optimized strategy repeated for each block. We package the optimal block selection algorithm into poolHiTS. The MATLAB codes for the poolHiTS algorithm and the corresponding decoding strategy are also provided. CONCLUSIONS:We have produced a practical version of STD algorithm for pooled drug screens. This pooling strategy provides both assay compression and error-correction capabilities that can both accelerate and reduce the overall cost of HTS in drug discovery.</description>
    <dc:title>poolHiTS: A Shifted Transversal Design based pooling strategy for high-throughput drug screening</dc:title>

    <dc:creator>Raghunandan Kainkaryam</dc:creator>
    <dc:creator>Peter Woolf</dc:creator>
    <dc:identifier>doi:10.1186/1471-2105-9-256</dc:identifier>
    <dc:source>BMC Bioinformatics, Vol. 9, No. 1. (2008)</dc:source>
    <dc:date>2008-05-31T02:52:59-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>BMC Bioinformatics</prism:publicationName>
    <prism:volume>9</prism:volume>
    <prism:number>1</prism:number>
    <prism:category>bioinformatics</prism:category>
    <prism:category>drug</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>screen</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2843098">
    <title>PROMALS3D web server for accurate multiple protein sequence and structure alignments</title>
    <link>http://www.citeulike.org/user/neils/article/2843098</link>
    <description>&lt;i&gt;Nucl. Acids Res. (24 May 2008), gkn322.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Multiple sequence alignments are essential in computational sequence and structural analysis, with applications in homology detection, structure modeling, function prediction and phylogenetic analysis. We report PROMALS3D web server for constructing alignments for multiple protein sequences and/or structures using information from available 3D structures, database homologs and predicted secondary structures. PROMALS3D shows higher alignment accuracy than a number of other advanced methods. Input of PROMALS3D web server can be FASTA format protein sequences, PDB format protein structures and/or user-defined alignment constraints. The output page provides alignments with several formats, including a colored alignment augmented with useful information about sequence grouping, predicted secondary structures and consensus sequences. Intermediate results of sequence and structural database searches are also available. The PROMALS3D web server is available at: http://prodata.swmed.edu/promals3d/. 10.1093/nar/gkn322</description>
    <dc:title>PROMALS3D web server for accurate multiple protein sequence and structure alignments</dc:title>

    <dc:creator>Jimin Pei</dc:creator>
    <dc:creator>Ming Tang</dc:creator>
    <dc:creator>Nick Grishin</dc:creator>
    <dc:identifier>doi:10.1093/nar/gkn322</dc:identifier>
    <dc:source>Nucl. Acids Res. (24 May 2008), gkn322.</dc:source>
    <dc:date>2008-05-29T03:27:59-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Nucl. Acids Res.</prism:publicationName>
    <prism:startingPage>gkn322</prism:startingPage>
    <prism:category>alignment</prism:category>
    <prism:category>bioinformatics</prism:category>
    <prism:category>protein</prism:category>
    <prism:category>sequence</prism:category>
    <prism:category>structure</prism:category>
    <prism:category>webserver</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2843037">
    <title>PIE: an online prediction system for protein-protein interactions from text</title>
    <link>http://www.citeulike.org/user/neils/article/2843037</link>
    <description>&lt;i&gt;Nucl. Acids Res. (28 May 2008), gkn281.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Protein-protein interaction (PPI) extraction has been an important research topic in bio-text mining area, since the PPI information is critical for understanding biological processes. However, there are very few open systems available on the Web and most of the systems focus on keyword searching based on predefined PPIs. PIE (Protein Interaction information Extraction system) is a configurable Web service to extract PPIs from literature, including user-provided papers as well as PubMed articles. After providing abstracts or papers, the prediction results are displayed in an easily readable form with essential, yet compact features. The PIE interface supports more features such as PDF file extraction, PubMed search tool and network communication, which are useful for biologists and bio-system developers. The PIE system utilizes natural language processing techniques and machine learning methodologies to predict PPI sentences, which results in high precision performance for Web users. PIE is freely available at http://bi.snu.ac.kr/pie/. 10.1093/nar/gkn281</description>
    <dc:title>PIE: an online prediction system for protein-protein interactions from text</dc:title>

    <dc:creator>Sun Kim</dc:creator>
    <dc:creator>Soo-Yong Shin</dc:creator>
    <dc:creator>In-Hee Lee</dc:creator>
    <dc:creator>Soo-Jin Kim</dc:creator>
    <dc:creator>Ram Sriram</dc:creator>
    <dc:creator>Byoung-Tak Zhang</dc:creator>
    <dc:identifier>doi:10.1093/nar/gkn281</dc:identifier>
    <dc:source>Nucl. Acids Res. (28 May 2008), gkn281.</dc:source>
    <dc:date>2008-05-29T02:00:31-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Nucl. Acids Res.</prism:publicationName>
    <prism:startingPage>gkn281</prism:startingPage>
    <prism:category>bioinformatics</prism:category>
    <prism:category>interaction</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>protein-protein</prism:category>
    <prism:category>text-mining</prism:category>
    <prism:category>webserver</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2843033">
    <title>ENDEAVOUR update: a web resource for gene prioritization in multiple species</title>
    <link>http://www.citeulike.org/user/neils/article/2843033</link>
    <description>&lt;i&gt;Nucl. Acids Res. (28 May 2008), gkn325.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;ENDEAVOUR (http://www.esat.kuleuven.be/endeavourweb; this web site is free and open to all users and there is no login requirement) is a web resource for the prioritization of candidate genes. Using a training set of genes known to be involved in a biological process of interest, our approach consists of (i) inferring several models (based on various genomic data sources), (ii) applying each model to the candidate genes to rank those candidates against the profile of the known genes and (iii) merging the several rankings into a global ranking of the candidate genes. In the present article, we describe the latest developments of ENDEAVOUR. First, we provide a web-based user interface, besides our Java client, to make ENDEAVOUR more universally accessible. Second, we support multiple species: in addition to Homo sapiens, we now provide gene prioritization for three major model organisms: Mus musculus, Rattus norvegicus and Caenorhabditis elegans. Third, ENDEAVOUR makes use of additional data sources and is now including numerous databases: ontologies and annotations, protein-protein interactions, cis-regulatory information, gene expression data sets, sequence information and text-mining data. We tested the novel version of ENDEAVOUR on 32 recent disease gene associations from the literature. Additionally, we describe a number of recent independent studies that made use of ENDEAVOUR to prioritize candidate genes for obesity and Type II diabetes, cleft lip and cleft palate, and pulmonary fibrosis. 10.1093/nar/gkn325</description>
    <dc:title>ENDEAVOUR update: a web resource for gene prioritization in multiple species</dc:title>

    <dc:creator>Leon-Charles Tranchevent</dc:creator>
    <dc:creator>Roland Barriot</dc:creator>
    <dc:creator>Shi Yu</dc:creator>
    <dc:creator>Steven Vooren</dc:creator>
    <dc:creator>Peter Loo</dc:creator>
    <dc:creator>Bert Coessens</dc:creator>
    <dc:creator>Bart Moor</dc:creator>
    <dc:creator>Stein Aerts</dc:creator>
    <dc:creator>Yves Moreau</dc:creator>
    <dc:identifier>doi:10.1093/nar/gkn325</dc:identifier>
    <dc:source>Nucl. Acids Res. (28 May 2008), gkn325.</dc:source>
    <dc:date>2008-05-29T01:57:07-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Nucl. Acids Res.</prism:publicationName>
    <prism:startingPage>gkn325</prism:startingPage>
    <prism:category>bioinformatics</prism:category>
    <prism:category>candidate</prism:category>
    <prism:category>function</prism:category>
    <prism:category>gene</prism:category>
    <prism:category>selection</prism:category>
    <prism:category>webserver</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2841303">
    <title>Unravelling the genomic mosaic of a ubiquitous genus of marine cyanobacteria</title>
    <link>http://www.citeulike.org/user/neils/article/2841303</link>
    <description>&lt;i&gt;Genome Biology, Vol. 9, No. 5. (2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;BACKGROUND:The picocyanobacterial genus Synechococcus occurs over wide oceanic expanses, having colonized most available niches in the photic zone. Large scale distribution patterns of the different Synechococcus clades (based on 16S rRNA gene markers) suggest the occurrence of two major lifestyles ('opportunists'/'specialists'), corresponding to two distinct broad habitats ('coastal'/'open ocean'). Yet, the genetic basis of niche partitioning is still poorly understood in this ecologically important group.RESULTS:Here, we compare the genomes of 11 marine Synechococcus isolates, representing 10 distinct lineages. Phylogenies inferred from the core genome allowed us to refine the taxonomic relationships between clades by revealing a clear dichotomy within the main subcluster, reminiscent of the two aforementioned lifestyles. Genome size is strongly correlated with the cumulative lengths of hypervariable regions (or 'islands'). One of these, encompassing most genes encoding the light-harvesting phycobilisome rod complexes, is involved in adaptation to changes in light quality and has clearly been transferred between members of different Synechococcus lineages. Furthermore, we observed that two strains (RS9917 and WH5701) which have similar pigmentation and physiology, have an unusually high number of genes in common, given their phylogenetic distance.CONCLUSIONS:We propose that while members of a given marine Synechococcus lineage may have the same broad geographical distribution, local niche occupancy is facilitated by lateral gene transfers, a process in which genomic islands play a key role as a repository for transferred genes. Our work also highlights the need for developing picocyanobacterial systematics based on genome-derived parameters combined with ecological and physiological data.</description>
    <dc:title>Unravelling the genomic mosaic of a ubiquitous genus of marine cyanobacteria</dc:title>

    <dc:creator>Alexis Dufresne</dc:creator>
    <dc:creator>Martin Ostrowski</dc:creator>
    <dc:creator>David Scanlan</dc:creator>
    <dc:creator>Laurence Garczarek</dc:creator>
    <dc:creator>Sophie Mazard</dc:creator>
    <dc:creator>Brian Palenik</dc:creator>
    <dc:creator>Ian Paulsen</dc:creator>
    <dc:creator>Nicole de Marsac</dc:creator>
    <dc:creator>Patrick Wincker</dc:creator>
    <dc:creator>Carole Dossat</dc:creator>
    <dc:creator>Steve Ferriera</dc:creator>
    <dc:creator>Justin Johnson</dc:creator>
    <dc:creator>Anton Post</dc:creator>
    <dc:creator>Wolfgang Hess</dc:creator>
    <dc:creator>Frederic Partensky</dc:creator>
    <dc:identifier>doi:10.1186/gb-2008-9-5-r90</dc:identifier>
    <dc:source>Genome Biology, Vol. 9, No. 5. (2008)</dc:source>
    <dc:date>2008-05-28T10:55:38-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Genome Biology</prism:publicationName>
    <prism:volume>9</prism:volume>
    <prism:number>5</prism:number>
    <prism:category>analysis</prism:category>
    <prism:category>bioinformatics</prism:category>
    <prism:category>comparative</prism:category>
    <prism:category>cyanobacteria</prism:category>
    <prism:category>genomics</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2838456">
    <title>Prediction of enzyme function by combining sequence similarity and protein interactions</title>
    <link>http://www.citeulike.org/user/neils/article/2838456</link>
    <description>&lt;i&gt;BMC Bioinformatics, Vol. 9, No. 1. (2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;BACKGROUND:A number of studies have used protein interaction data alone for protein function prediction. Here, we introduce a computational approach for annotation of enzymes, based on the observation that similar protein sequences are more likely to perform the same function if they share similar interacting partners. RESULTS:The method has been tested using interaction data about 3,890 protein sequences and averaging the results within protein families to account for over- and under-representation. For protein sequences that align with at least 40 sequence identity to a known enzyme, the specificity of our method in predicting the first three EC digits increased from 80% to 90% at 80 coverage when compared to PSI-BLAST. CONCLUSION:Our method can be applied not only to proteins for which we know interacting partners but also to their homologs. The method can be used for large-scale enzymatic functional annotation of protein sequences to refine predictions based on sequence matching alone, increasing the specificity of 10% of the predictions made by PSI-BLAST alone.</description>
    <dc:title>Prediction of enzyme function by combining sequence similarity and protein interactions</dc:title>

    <dc:creator>Jordi Espadaler</dc:creator>
    <dc:creator>Narayanan Eswar</dc:creator>
    <dc:creator>Enric Querol</dc:creator>
    <dc:creator>Francesc Aviles</dc:creator>
    <dc:creator>Andrej Sali</dc:creator>
    <dc:creator>Marc Renom</dc:creator>
    <dc:creator>Baldomero Oliva</dc:creator>
    <dc:identifier>doi:10.1186/1471-2105-9-249</dc:identifier>
    <dc:source>BMC Bioinformatics, Vol. 9, No. 1. (2008)</dc:source>
    <dc:date>2008-05-28T00:04:03-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>BMC Bioinformatics</prism:publicationName>
    <prism:volume>9</prism:volume>
    <prism:number>1</prism:number>
    <prism:category>bioinformatics</prism:category>
    <prism:category>enzyme</prism:category>
    <prism:category>function</prism:category>
    <prism:category>interaction</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>protein-protein</prism:category>
    <prism:category>sequence</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2835173">
    <title>Predikin and PredikinDB: a computational framework for the prediction of protein kinase peptide specificity and an associated database of phosphorylation sites</title>
    <link>http://www.citeulike.org/user/neils/article/2835173</link>
    <description>&lt;i&gt;BMC Bioinformatics, Vol. 9 (26 May 2008), 245.&lt;/i&gt;</description>
    <dc:title>Predikin and PredikinDB: a computational framework for the prediction of protein kinase peptide specificity and an associated database of phosphorylation sites</dc:title>

    <dc:creator>Neil Saunders</dc:creator>
    <dc:creator>Ross Brinkworth</dc:creator>
    <dc:creator>Thomas Huber</dc:creator>
    <dc:creator>Bruce Kemp</dc:creator>
    <dc:creator>Bostjan Kobe</dc:creator>
    <dc:identifier>doi:10.1186/1471-2105-9-245</dc:identifier>
    <dc:source>BMC Bioinformatics, Vol. 9 (26 May 2008), 245.</dc:source>
    <dc:date>2008-05-26T16:41:40-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>BMC Bioinformatics</prism:publicationName>
    <prism:issn>1471-2105</prism:issn>
    <prism:volume>9</prism:volume>
    <prism:startingPage>245</prism:startingPage>
    <prism:category>bioinformatics</prism:category>
    <prism:category>kinase</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>predikin</prism:category>
    <prism:category>specificity</prism:category>
    <prism:category>substrate</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/767961">
    <title>The statistical significance of nucleotide position-weight matrix matches</title>
    <link>http://www.citeulike.org/user/neils/article/767961</link>
    <description>&lt;i&gt;Comput. Appl. Biosci., Vol. 12, No. 5. (1 October 1996), pp. 431-439.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;MOTIVATION: To improve the detection of nucleotide sequence signals (e.g. promoter elements) by position-weight matrices (PWM) using the concept of statistically significant matches. RESULTS: The Mksite program was originally developed for analyzing protein sequences. We report NMksite, a new version adapted to the processing of nucleotide sequences. NMksite creates PWM from nucleotide sequence block alignments or occurrence tables using three weight computation schemes. An original feature of NMksite is the numerical computation of the statistical significance of PWM matches. The utility of this concept is demonstrated in the context of the prediction of splice sites and promoter regions. AVAILABILITY: Mksite and other components of the MODEST (Motif DEsign and Search Tool) package (written in C/Unix) are available at http://igs-server.cnrs-mrs.fr CONTACT: E-mail: jmc@igs.cnrs-mrs.fr 10.1093/bioinformatics/12.5.431</description>
    <dc:title>The statistical significance of nucleotide position-weight matrix matches</dc:title>

    <dc:creator>Jean-Michel Claverie</dc:creator>
    <dc:creator>Stephane Audic</dc:creator>
    <dc:identifier>doi:10.1093/bioinformatics/12.5.431</dc:identifier>
    <dc:source>Comput. Appl. Biosci., Vol. 12, No. 5. (1 October 1996), pp. 431-439.</dc:source>
    <dc:date>2006-07-21T07:19:55-00:00</dc:date>
    <prism:publicationYear>1996</prism:publicationYear>
    <prism:publicationName>Comput. Appl. Biosci.</prism:publicationName>
    <prism:volume>12</prism:volume>
    <prism:number>5</prism:number>
    <prism:startingPage>431</prism:startingPage>
    <prism:endingPage>439</prism:endingPage>
    <prism:category>bioinformatics</prism:category>
    <prism:category>matrix</prism:category>
    <prism:category>nucleotide</prism:category>
    <prism:category>pwm</prism:category>
    <prism:category>statistics</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2793797">
    <title>Estimating the size of the human interactome</title>
    <link>http://www.citeulike.org/user/neils/article/2793797</link>
    <description>&lt;i&gt;Proceedings of the National Academy of Sciences (12 May 2008), 0708078105.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;After the completion of the human and other genome projects it emerged that the number of genes in organisms as diverse as fruit flies, nematodes, and humans does not reflect our perception of their relative complexity. Here, we provide reliable evidence that the size of protein interaction networks in different organisms appears to correlate much better with their apparent biological complexity. We develop a stable and powerful, yet simple, statistical procedure to estimate the size of the whole network from subnet data. This approach is then applied to a range of eukaryotic organisms for which extensive protein interaction data have been collected and we estimate the number of interactions in humans to be approx650,000. We find that the human interaction network is one order of magnitude bigger than the Drosophila melanogaster interactome and approx3 times bigger than in Caenorhabditis elegans. 10.1073/pnas.0708078105</description>
    <dc:title>Estimating the size of the human interactome</dc:title>

    <dc:creator>Michael Stumpf</dc:creator>
    <dc:creator>Thomas Thorne</dc:creator>
    <dc:creator>Eric de Silva</dc:creator>
    <dc:creator>Ronald Stewart</dc:creator>
    <dc:creator>Hyeong An</dc:creator>
    <dc:creator>Michael Lappe</dc:creator>
    <dc:creator>Carsten Wiuf</dc:creator>
    <dc:identifier>doi:10.1073/pnas.0708078105</dc:identifier>
    <dc:source>Proceedings of the National Academy of Sciences (12 May 2008), 0708078105.</dc:source>
    <dc:date>2008-05-13T07:34:25-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Proceedings of the National Academy of Sciences</prism:publicationName>
    <prism:startingPage>0708078105</prism:startingPage>
    <prism:category>bioinformatics</prism:category>
    <prism:category>human</prism:category>
    <prism:category>interaction</prism:category>
    <prism:category>interactome</prism:category>
    <prism:category>network</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>protein-protein</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2796492">
    <title>The Predikin webserver: improved prediction of protein kinase peptide specificity using structural information</title>
    <link>http://www.citeulike.org/user/neils/article/2796492</link>
    <description>&lt;i&gt;Nucl. Acids Res. (13 May 2008), gkn279.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The Predikin webserver allows users to predict substrates of protein kinases. The Predikin system is built from three components: a database of protein kinase substrates that links phosphorylation sites with specific protein kinase sequences; a perl module to analyse query protein kinases and a web interface through which users can submit protein kinases for analysis. The Predikin perl module provides methods to (i) locate protein kinase catalytic domains in a sequence, (ii) classify them by type or family, (iii) identify substrate-determining residues, (iv) generate weighted scoring matrices using three different methods, (v) extract putative phosphorylation sites in query substrate sequences and (vi) score phosphorylation sites for a given kinase, using optional filters. The web interface provides user-friendly access to each of these functions and allows users to obtain rapidly a set of predictions that they can export for further analysis. The server is available at http://predikin.biosci.uq.edu.au. 10.1093/nar/gkn279</description>
    <dc:title>The Predikin webserver: improved prediction of protein kinase peptide specificity using structural information</dc:title>

    <dc:creator>Neil Saunders</dc:creator>
    <dc:creator>Bostjan Kobe</dc:creator>
    <dc:identifier>doi:10.1093/nar/gkn279</dc:identifier>
    <dc:source>Nucl. Acids Res. (13 May 2008), gkn279.</dc:source>
    <dc:date>2008-05-14T02:54:55-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Nucl. Acids Res.</prism:publicationName>
    <prism:startingPage>gkn279</prism:startingPage>
    <prism:category>bioinformatics</prism:category>
    <prism:category>kinase</prism:category>
    <prism:category>peptide</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>predikin</prism:category>
    <prism:category>specificity</prism:category>
    <prism:category>webserver</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2794838">
    <title>Biocomputational prediction of small non-coding RNAs in Streptomyces</title>
    <link>http://www.citeulike.org/user/neils/article/2794838</link>
    <description>&lt;i&gt;BMC Genomics, Vol. 9, No. 1. (2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;BACKGROUND:The first systematic study of small non-coding RNAs (sRNA, ncRNA) in Streptomyces is presented. Except for a few exceptions, the Streptomyces sRNAs, as well as the sRNAs in other genera of the Actinomyces group, have remained unstudied. This study was based on sequence conservation in intergenic regions of Streptomyces, localization of transcription termination factors, and genomic arrangement of genes flanking the predicted sRNAs. RESULTS:Thirty-two potential sRNAs in Streptomyces were predicted. Of these, expression of 20 was detected by microarrays and RT-PCR. The prediction was validated by a structure based computational approach. Two predicted sRNAs were found to be terminated by transcription termination factors different from the Rho-independent terminators. One predicted sRNA was identified computationally with high probability as a Streptomyces 6S RNA. Out of the 32 predicted sRNAs, 24 were found to be structurally dissimilar from known sRNAs.CONCLUSIONS:Streptomyces is the largest genus of Actinomyces, whose sRNAs have not been studied. The Actinomyces is a group of bacterial species with unique genomes and phenotypes. Therefore, in Actinomyces, new unique bacterial sRNAs may be identified. The sequence and structural dissimilarity of the predicted Streptomyces sRNAs demonstrated by this study serve as the first evidence of the uniqueness of Actinomyces sRNAs.</description>
    <dc:title>Biocomputational prediction of small non-coding RNAs in Streptomyces</dc:title>

    <dc:creator>Josef Panek</dc:creator>
    <dc:creator>Jan Bobek</dc:creator>
    <dc:creator>Karel Mikulik</dc:creator>
    <dc:creator>Marek Basler</dc:creator>
    <dc:creator>Jiri Vohradsky</dc:creator>
    <dc:identifier>doi:10.1186/1471-2164-9-217</dc:identifier>
    <dc:source>BMC Genomics, Vol. 9, No. 1. (2008)</dc:source>
    <dc:date>2008-05-13T12:06:38-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>BMC Genomics</prism:publicationName>
    <prism:volume>9</prism:volume>
    <prism:number>1</prism:number>
    <prism:category>bacteria</prism:category>
    <prism:category>bioinformatics</prism:category>
    <prism:category>ncrna</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>streptomyces</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2793899">
    <title>In pursuit of virtual lead optimization: The role of the receptor structure and ensembles in accurate docking</title>
    <link>http://www.citeulike.org/user/neils/article/2793899</link>
    <description>&lt;i&gt;Proteins: Structure, Function, and Bioinformatics, Vol. 9999, No. 9999. (2008), NA.&lt;/i&gt;</description>
    <dc:title>In pursuit of virtual lead optimization: The role of the receptor structure and ensembles in accurate docking</dc:title>

    <dc:creator>Bolstad</dc:creator>
    <dc:creator>Amy Anderson</dc:creator>
    <dc:identifier>doi:10.1002/prot.22081</dc:identifier>
    <dc:source>Proteins: Structure, Function, and Bioinformatics, Vol. 9999, No. 9999. (2008), NA.</dc:source>
    <dc:date>2008-05-13T08:16:16-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Proteins: Structure, Function, and Bioinformatics</prism:publicationName>
    <prism:volume>9999</prism:volume>
    <prism:number>9999</prism:number>
    <prism:startingPage>NA</prism:startingPage>
    <prism:category>bioinformatics</prism:category>
    <prism:category>docking</prism:category>
    <prism:category>lead</prism:category>
    <prism:category>optimisation</prism:category>
    <prism:category>screen</prism:category>
    <prism:category>virtual</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2791320">
    <title>Comprehensive inventory of protein complexes in the Protein Data Bank from consistent classification of interfaces</title>
    <link>http://www.citeulike.org/user/neils/article/2791320</link>
    <description>&lt;i&gt;BMC Bioinformatics, Vol. 9, No. 1. (2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;BACKGROUND:Protein-protein interactions are ubiquitous and essential for all cellular processes. High-resolution X-ray crystallographic structures of protein complexes can reveal the details of their function and provide a basis for many computational and experimental approaches. Differentiation between biological and non-biological contacts and reconstruction of the intact complex is a challenging computational problem. A successful solution can provide additional insights into the fundamental principles of biological recognition and reduce errors in many algorithms and databases utilizing interaction information extracted from the Protein Data Bank (PDB).RESULTS:We have developed a method for identifying protein complexes in the PDB X-ray structures by a four step procedure: (1) comprehensively collecting all protein-protein interfaces; (2) clustering similar protein-protein interfaces together; (3) estimating the probability that each cluster is relevant based on a diverse set of properties; and (4) combining these scores for each PDB entry in order to predict the complex structure. The resulting clusters of biologically relevant interfaces provide a reliable catalog of evolutionary conserved protein-protein interactions. These interfaces, as well as the predicted protein complexes, are available from the Protein Interface Server (PInS) website at http://pins.ornl.gov/.CONCLUSIONS:Our method demonstrates an almost two-fold reduction of the annotation error rate as evaluated on a large benchmark set of complexes validated from the literature. We also estimate relative contributions of each interface property to the accurate discrimination of biologically relevant interfaces and discuss possible directions for further improving the prediction method.</description>
    <dc:title>Comprehensive inventory of protein complexes in the Protein Data Bank from consistent classification of interfaces</dc:title>

    <dc:creator>Andrew Bordner</dc:creator>
    <dc:creator>Andrey Gorin</dc:creator>
    <dc:identifier>doi:10.1186/1471-2105-9-234</dc:identifier>
    <dc:source>BMC Bioinformatics, Vol. 9, No. 1. (2008)</dc:source>
    <dc:date>2008-05-12T23:09:37-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>BMC Bioinformatics</prism:publicationName>
    <prism:volume>9</prism:volume>
    <prism:number>1</prism:number>
    <prism:category>bioinformatics</prism:category>
    <prism:category>complex</prism:category>
    <prism:category>interaction</prism:category>
    <prism:category>interface</prism:category>
    <prism:category>pdb</prism:category>
    <prism:category>protein-protein</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2783989">
    <title>An online database for the detection of novel archaeal sequences in human ESTs.</title>
    <link>http://www.citeulike.org/user/neils/article/2783989</link>
    <description>&lt;i&gt;Bioinformatics (Oxford, England), Vol. 20, No. 15. (12 October 2004), pp. 2361-2362.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We have developed a rapid, automated screening system and online database to detect foreign sequences of archaeal origin in human expressed sequence tags. The aim of the screening is to detect transcripts that may be derived from novel, putative archaeal pathogens or symbionts. AVAILABILITY: http://psychro.bioinformatics.unsw.edu.au/pathogen/index.php.</description>
    <dc:title>An online database for the detection of novel archaeal sequences in human ESTs.</dc:title>

    <dc:creator>NF Saunders</dc:creator>
    <dc:creator>PM Curmi</dc:creator>
    <dc:creator>R Cavicchioli</dc:creator>
    <dc:source>Bioinformatics (Oxford, England), Vol. 20, No. 15. (12 October 2004), pp. 2361-2362.</dc:source>
    <dc:date>2008-05-11T09:25:42-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>Bioinformatics (Oxford, England)</prism:publicationName>
    <prism:issn>1367-4803</prism:issn>
    <prism:volume>20</prism:volume>
    <prism:number>15</prism:number>
    <prism:startingPage>2361</prism:startingPage>
    <prism:endingPage>2362</prism:endingPage>
    <prism:category>archaea</prism:category>
    <prism:category>bioinformatics</prism:category>
    <prism:category>database</prism:category>
    <prism:category>est</prism:category>
    <prism:category>genomics</prism:category>
    <prism:category>pathogen</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/1090867">
    <title>Mechanisms of thermal adaptation revealed from the genomes of the Antarctic Archaea Methanogenium frigidum and Methanococcoides burtonii.</title>
    <link>http://www.citeulike.org/user/neils/article/1090867</link>
    <description>&lt;i&gt;Genome Res, Vol. 13, No. 7. (July 2003), pp. 1580-1588.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We generated draft genome sequences for two cold-adapted Archaea, Methanogenium frigidum and Methanococcoides burtonii, to identify genotypic characteristics that distinguish them from Archaea with a higher optimal growth temperature (OGT). Comparative genomics revealed trends in amino acid and tRNA composition, and structural features of proteins. Proteins from the cold-adapted Archaea are characterized by a higher content of noncharged polar amino acids, particularly Gln and Thr and a lower content of hydrophobic amino acids, particularly Leu. Sequence data from nine methanogen genomes (OGT 15 degrees -98 degrees C) were used to generate 1111 modeled protein structures. Analysis of the models from the cold-adapted Archaea showed a strong tendency in the solvent-accessible area for more Gln, Thr, and hydrophobic residues and fewer charged residues. A cold shock domain (CSD) protein (CspA homolog) was identified in M. frigidum, two hypothetical proteins with CSD-folds in M. burtonii, and a unique winged helix DNA-binding domain protein in M. burtonii. This suggests that these types of nucleic acid binding proteins have a critical role in cold-adapted Archaea. Structural analysis of tRNA sequences from the Archaea indicated that GC content is the major factor influencing tRNA stability in hyperthermophiles, but not in the psychrophiles, mesophiles or moderate thermophiles. Below an OGT of 60 degrees C, the GC content in tRNA was largely unchanged, indicating that any requirement for flexibility of tRNA in psychrophiles is mediated by other means. This is the first time that comparisons have been performed with genome data from Archaea spanning the growth temperature extremes from psychrophiles to hyperthermophiles.</description>
    <dc:title>Mechanisms of thermal adaptation revealed from the genomes of the Antarctic Archaea Methanogenium frigidum and Methanococcoides burtonii.</dc:title>

    <dc:creator>NF Saunders</dc:creator>
    <dc:creator>T Thomas</dc:creator>
    <dc:creator>PM Curmi</dc:creator>
    <dc:creator>JS Mattick</dc:creator>
    <dc:creator>E Kuczek</dc:creator>
    <dc:creator>R Slade</dc:creator>
    <dc:creator>J Davis</dc:creator>
    <dc:creator>PD Franzmann</dc:creator>
    <dc:creator>D Boone</dc:creator>
    <dc:creator>K Rusterholtz</dc:creator>
    <dc:creator>R Feldman</dc:creator>
    <dc:creator>C Gates</dc:creator>
    <dc:creator>S Bench</dc:creator>
    <dc:creator>K Sowers</dc:creator>
    <dc:creator>K Kadner</dc:creator>
    <dc:creator>A Aerts</dc:creator>
    <dc:creator>P Dehal</dc:creator>
    <dc:creator>C Detter</dc:creator>
    <dc:creator>T Glavina</dc:creator>
    <dc:creator>S Lucas</dc:creator>
    <dc:creator>P Richardson</dc:creator>
    <dc:creator>F Larimer</dc:creator>
    <dc:creator>L Hauser</dc:creator>
    <dc:creator>M Land</dc:creator>
    <dc:creator>R Cavicchioli</dc:creator>
    <dc:identifier>doi:10.1101/gr.1180903</dc:identifier>
    <dc:source>Genome Res, Vol. 13, No. 7. (July 2003), pp. 1580-1588.</dc:source>
    <dc:date>2007-02-06T16:51:44-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:publicationName>Genome Res</prism:publicationName>
    <prism:issn>1088-9051</prism:issn>
    <prism:volume>13</prism:volume>
    <prism:number>7</prism:number>
    <prism:startingPage>1580</prism:startingPage>
    <prism:endingPage>1588</prism:endingPage>
    <prism:category>antarctic</prism:category>
    <prism:category>bioinformatics</prism:category>
    <prism:category>genomics</prism:category>
    <prism:category>methanococcoides</prism:category>
    <prism:category>methanogenium</prism:category>
    <prism:category>psychrophily</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2783961">
    <title>Serpins in unicellular Eukarya, Archaea, and Bacteria: sequence analysis and evolution.</title>
    <link>http://www.citeulike.org/user/neils/article/2783961</link>
    <description>&lt;i&gt;Journal of molecular evolution, Vol. 59, No. 4. (October 2004), pp. 437-447.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Most serpins irreversibly inactivate specific serine proteinases of the chymotrypsin family. Inhibitory serpins are unusual proteins in that their native structure is metastable, and rapid conversion to a relaxed state is required to trap target enzymes in a covalent complex. The evolutionary origin of the serpin fold is unresolved, and while serpins in animals are known to be involved in the regulation of a remarkable diversity of metabolic processes, the physiological functions of homologues from other phyla are unknown. Addressing these questions, here we analyze serpin genes identified in unicellular eukaryotes: the green alga Chlamydomonas reinhardtii, the dinoflagellate Alexandrium tamarense, and the human pathogens Entamoeba spp., Eimera tenella, Toxoplasma gondii, and Giardia lamblia. We compare these sequences to others, particularly those in the complete genome sequences of Archaea, where serpins were found in only 4 of 13 genera, and Bacteria, in only 9 of 56 genera. The serpins from unicellular organisms appear to be phylogenetically distinct from all of the clades of higher eukaryotic serpins. Most of the sequences from unicellular organisms have the characteristics of inhibitory serpins, and where multiple serpin genes are found in one genome, variability is displayed in the region of the reactive-center loop important for specificity. All the unicellular eukaryotic serpins have large hydrophobic or positively charged residues at the putative PI position. In contrast, none of the prokaryotic serpins has a residue of these types at the predicted P1 position, but many have smaller, neutral residues. Serpin evolution is discussed.</description>
    <dc:title>Serpins in unicellular Eukarya, Archaea, and Bacteria: sequence analysis and evolution.</dc:title>

    <dc:creator>TH Roberts</dc:creator>
    <dc:creator>J Hejgaard</dc:creator>
    <dc:creator>NF Saunders</dc:creator>
    <dc:creator>R Cavicchioli</dc:creator>
    <dc:creator>PM Curmi</dc:creator>
    <dc:identifier>doi:10.1007/s00239-004-2635-6</dc:identifier>
    <dc:source>Journal of molecular evolution, Vol. 59, No. 4. (October 2004), pp. 437-447.</dc:source>
    <dc:date>2008-05-11T09:11:29-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>Journal of molecular evolution</prism:publicationName>
    <prism:issn>0022-2844</prism:issn>
    <prism:volume>59</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>437</prism:startingPage>
    <prism:endingPage>447</prism:endingPage>
    <prism:category>analysis</prism:category>
    <prism:category>bioinformatics</prism:category>
    <prism:category>evolution</prism:category>
    <prism:category>sequence</prism:category>
    <prism:category>serpin</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2783957">
    <title>Predicted roles for hypothetical proteins in the low-temperature expressed proteome of the Antarctic archaeon Methanococcoides burtonii.</title>
    <link>http://www.citeulike.org/user/neils/article/2783957</link>
    <description>&lt;i&gt;Journal of proteome research, Vol. 4, No. 2. (r 2005), pp. 464-472.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Using liquid chromatography-mass spectrometry, 528 proteins were identified that are expressed during growth at 4 degrees C in the cold adapted archaeon, Methanococcoides burtonii. Of those, 135 were annotated previously as unique or conserved hypothetical proteins. We have performed a comprehensive, integrated analysis of the latter proteins using threading, InterProScan, predicted subcellular localization and visualization of conserved gene context across multiple prokaryotic genomes. Functional information was obtained for 55 proteins, providing new insight into the physiology of M. burtonii. Many of the proteins were predicted to be involved in DNA/RNA binding or modification and cell signaling, suggesting a complex, uncharacterized regulatory network controlling cellular processes during growth at low-temperature. Novel enzymatic functions were predicted for several proteins, including a putative candidate gene for the posttranslational modification of the key methanogenesis enzyme coenzyme M methyl reductase. A bacterial-like CRISPR locus was identified as a strong candidate for archaeal-bacterial lateral gene transfer. Gene context analysis proved a valuable augmentation to the other predictive methods in several cases, by revealing conserved gene associations and annotations in other microbial genomes. Our results underscore the importance of addressing the &#34;hypothetical protein problem&#34; for a complete understanding of cell physiology.</description>
    <dc:title>Predicted roles for hypothetical proteins in the low-temperature expressed proteome of the Antarctic archaeon Methanococcoides burtonii.</dc:title>

    <dc:creator>NF Saunders</dc:creator>
    <dc:creator>A Goodchild</dc:creator>
    <dc:creator>M Raftery</dc:creator>
    <dc:creator>M Guilhaus</dc:creator>
    <dc:creator>PM Curmi</dc:creator>
    <dc:creator>R Cavicchioli</dc:creator>
    <dc:identifier>doi:10.1021/pr049797+</dc:identifier>
    <dc:source>Journal of proteome research, Vol. 4, No. 2. (r 2005), pp. 464-472.</dc:source>
    <dc:date>2008-05-11T09:08:43-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:publicationName>Journal of proteome research</prism:publicationName>
    <prism:issn>1535-3893</prism:issn>
    <prism:volume>4</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>464</prism:startingPage>
    <prism:endingPage>472</prism:endingPage>
    <prism:category>antarctic</prism:category>
    <prism:category>archaea</prism:category>
    <prism:category>bioinformatics</prism:category>
    <prism:category>function</prism:category>
    <prism:category>methanococcoides</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>protein</prism:category>
    <prism:category>psychrophily</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2771903">
    <title>Discovering sequence motifs with arbitrary insertions and deletions.</title>
    <link>http://www.citeulike.org/user/neils/article/2771903</link>
    <description>&lt;i&gt;PLoS computational biology, Vol. 4, No. 4. (April 2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;BIOLOGY IS ENCODED IN MOLECULAR SEQUENCES: deciphering this encoding remains a grand scientific challenge. Functional regions of DNA, RNA, and protein sequences often exhibit characteristic but subtle motifs; thus, computational discovery of motifs in sequences is a fundamental and much-studied problem. However, most current algorithms do not allow for insertions or deletions (indels) within motifs, and the few that do have other limitations. We present a method, GLAM2 (Gapped Local Alignment of Motifs), for discovering motifs allowing indels in a fully general manner, and a companion method GLAM2SCAN for searching sequence databases using such motifs. glam2 is a generalization of the gapless Gibbs sampling algorithm. It re-discovers variable-width protein motifs from the PROSITE database significantly more accurately than the alternative methods PRATT and SAM-T2K. Furthermore, it usefully refines protein motifs from the ELM database: in some cases, the refined motifs make orders of magnitude fewer overpredictions than the original ELM regular expressions. GLAM2 performs respectably on the BAliBASE multiple alignment benchmark, and may be superior to leading multiple alignment methods for &#34;motif-like&#34; alignments with N- and C-terminal extensions. Finally, we demonstrate the use of GLAM2 to discover protein kinase substrate motifs and a gapped DNA motif for the LIM-only transcriptional regulatory complex: using GLAM2SCAN, we identify promising targets for the latter. GLAM2 is especially promising for short protein motifs, and it should improve our ability to identify the protein cleavage sites, interaction sites, post-translational modification attachment sites, etc., that underlie much of biology. It may be equally useful for arbitrarily gapped motifs in DNA and RNA, although fewer examples of such motifs are known at present. GLAM2 is public domain software, available for download at http://bioinformatics.org.au/glam2.</description>
    <dc:title>Discovering sequence motifs with arbitrary insertions and deletions.</dc:title>

    <dc:creator>MC Frith</dc:creator>
    <dc:creator>NF Saunders</dc:creator>
    <dc:creator>B Kobe</dc:creator>
    <dc:creator>TL Bailey</dc:creator>
    <dc:identifier>doi:10.1371/journal.pcbi.1000071</dc:identifier>
    <dc:source>PLoS computational biology, Vol. 4, No. 4. (April 2008)</dc:source>
    <dc:date>2008-05-08T14:33:21-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>PLoS computational biology</prism:publicationName>
    <prism:issn>1553-7358</prism:issn>
    <prism:volume>4</prism:volume>
    <prism:number>4</prism:number>
    <prism:category>bioinformatics</prism:category>
    <prism:category>discovery</prism:category>
    <prism:category>glam2</prism:category>
    <prism:category>motifs</prism:category>
    <prism:category>sequence</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2774912">
    <title>Fast protein tertiary structure retrieval based on global surface shape similarity</title>
    <link>http://www.citeulike.org/user/neils/article/2774912</link>
    <description>&lt;i&gt;Proteins: Structure, Function, and Bioinformatics, Vol. 9999, No. 9999. (xx 2008)&lt;/i&gt;</description>
    <dc:title>Fast protein tertiary structure retrieval based on global surface shape similarity</dc:title>

    <dc:creator>Lee Sael</dc:creator>
    <dc:creator>Bin Li</dc:creator>
    <dc:creator>David La</dc:creator>
    <dc:creator>Yi Fang</dc:creator>
    <dc:creator>Karthik Ramani</dc:creator>
    <dc:creator>Raif Rustamov</dc:creator>
    <dc:creator>Daisuke Kihara</dc:creator>
    <dc:identifier>doi:10.1002/prot.22030</dc:identifier>
    <dc:source>Proteins: Structure, Function, and Bioinformatics, Vol. 9999, No. 9999. (xx 2008)</dc:source>
    <dc:date>2008-05-09T09:19:56-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Proteins: Structure, Function, and Bioinformatics</prism:publicationName>
    <prism:volume>9999</prism:volume>
    <prism:number>9999</prism:number>
    <prism:category>bioinformatics</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>protein</prism:category>
    <prism:category>retrieval</prism:category>
    <prism:category>structure</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2773795">
    <title>The minimum information about a genome sequence (MIGS) specification</title>
    <link>http://www.citeulike.org/user/neils/article/2773795</link>
    <description>&lt;i&gt;Nat Biotech, Vol. 26, No. 5. (May 2008), pp. 541-547.&lt;/i&gt;</description>
    <dc:title>The minimum information about a genome sequence (MIGS) specification</dc:title>

    <dc:creator>Dawn Field</dc:creator>
    <dc:creator>George Garrity</dc:creator>
    <dc:creator>Tanya Gray</dc:creator>
    <dc:creator>Norman Morrison</dc:creator>
    <dc:creator>Jeremy Selengut</dc:creator>
    <dc:creator>Peter Sterk</dc:creator>
    <dc:creator>Tatiana Tatusova</dc:creator>
    <dc:creator>Nicholas Thomson</dc:creator>
    <dc:creator>Michael Allen</dc:creator>
    <dc:creator>Samuel Angiuoli</dc:creator>
    <dc:creator>Michael Ashburner</dc:creator>
    <dc:creator>Nelson Axelrod</dc:creator>
    <dc:creator>Sandra Baldauf</dc:creator>
    <dc:creator>Stuart Ballard</dc:creator>
    <dc:creator>Jeffrey Boore</dc:creator>
    <dc:creator>Guy Cochrane</dc:creator>
    <dc:creator>James Cole</dc:creator>
    <dc:creator>Peter Dawyndt</dc:creator>
    <dc:creator>Paul De Vos</dc:creator>
    <dc:creator>Claude Depamphilis</dc:creator>
    <dc:creator>Robert Edwards</dc:creator>
    <dc:creator>Nadeem Faruque</dc:creator>
    <dc:creator>Robert Feldman</dc:creator>
    <dc:creator>Jack Gilbert</dc:creator>
    <dc:creator>Paul Gilna</dc:creator>
    <dc:creator>Frank Glockner</dc:creator>
    <dc:creator>Philip Goldstein</dc:creator>
    <dc:creator>Robert Guralnick</dc:creator>
    <dc:creator>Dan Haft</dc:creator>
    <dc:creator>David Hancock</dc:creator>
    <dc:creator>Henning Hermjakob</dc:creator>
    <dc:creator>Christiane Hertz-Fowler</dc:creator>
    <dc:creator>Phil Hugenholtz</dc:creator>
    <dc:creator>Ian Joint</dc:creator>
    <dc:creator>Leonid Kagan</dc:creator>
    <dc:creator>Matthew Kane</dc:creator>
    <dc:creator>Jessie Kennedy</dc:creator>
    <dc:creator>George Kowalchuk</dc:creator>
    <dc:creator>Renzo Kottmann</dc:creator>
    <dc:creator>Eugene Kolker</dc:creator>
    <dc:creator>Saul Kravitz</dc:creator>
    <dc:creator>Nikos Kyrpides</dc:creator>
    <dc:creator>Jim Leebens-Mack</dc:creator>
    <dc:creator>Suzanna Lewis</dc:creator>
    <dc:creator>Kelvin Li</dc:creator>
    <dc:creator>Allyson Lister</dc:creator>
    <dc:creator>Phillip Lord</dc:creator>
    <dc:creator>Natalia Maltsev</dc:creator>
    <dc:creator>Victor Markowitz</dc:creator>
    <dc:creator>Jennifer Martiny</dc:creator>
    <dc:creator>Barbara Methe</dc:creator>
    <dc:creator>Ilene Mizrachi</dc:creator>
    <dc:creator>Richard Moxon</dc:creator>
    <dc:creator>Karen Nelson</dc:creator>
    <dc:creator>Julian Parkhill</dc:creator>
    <dc:creator>Lita Proctor</dc:creator>
    <dc:creator>Owen White</dc:creator>
    <dc:creator>Susanna-Assunta Sansone</dc:creator>
    <dc:creator>Andrew Spiers</dc:creator>
    <dc:creator>Robert Stevens</dc:creator>
    <dc:creator>Paul Swift</dc:creator>
    <dc:creator>Chris Taylor</dc:creator>
    <dc:creator>Yoshio Tateno</dc:creator>
    <dc:creator>Adrian Tett</dc:creator>
    <dc:creator>Sarah Turner</dc:creator>
    <dc:creator>David Ussery</dc:creator>
    <dc:creator>Bob Vaughan</dc:creator>
    <dc:creator>Naomi Ward</dc:creator>
    <dc:creator>Trish Whetzel</dc:creator>
    <dc:creator>Ingio San Gil</dc:creator>
    <dc:creator>Gareth Wilson</dc:creator>
    <dc:creator>Anil Wipat</dc:creator>
    <dc:identifier>doi:10.1038/nbt1360</dc:identifier>
    <dc:source>Nat Biotech, Vol. 26, No. 5. (May 2008), pp. 541-547.</dc:source>
    <dc:date>2008-05-08T23:35:18-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Nat Biotech</prism:publicationName>
    <prism:volume>26</prism:volume>
    <prism:number>5</prism:number>
    <prism:startingPage>541</prism:startingPage>
    <prism:endingPage>547</prism:endingPage>
    <prism:publisher>Nature Publishing Group</prism:publisher>
    <prism:category>bioinformatics</prism:category>
    <prism:category>genomics</prism:category>
    <prism:category>information</prism:category>
    <prism:category>sequencing</prism:category>
    <prism:category>standards</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2767706">
    <title>Genome analysis of the platypus reveals unique signatures of evolution</title>
    <link>http://www.citeulike.org/user/neils/article/2767706</link>
    <description>&lt;i&gt;Nature, Vol. 453, No. 7192. (May 2008), pp. 175-183.&lt;/i&gt;</description>
    <dc:title>Genome analysis of the platypus reveals unique signatures of evolution</dc:title>

    <dc:identifier>doi:10.1038/nature06936</dc:identifier>
    <dc:source>Nature, Vol. 453, No. 7192. (May 2008), pp. 175-183.</dc:source>
    <dc:date>2008-05-07T23:43:22-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Nature</prism:publicationName>
    <prism:volume>453</prism:volume>
    <prism:number>7192</prism:number>
    <prism:startingPage>175</prism:startingPage>
    <prism:endingPage>183</prism:endingPage>
    <prism:publisher>Nature Publishing Group</prism:publisher>
    <prism:category>bioinformatics</prism:category>
    <prism:category>evolution</prism:category>
    <prism:category>genome</prism:category>
    <prism:category>platypus</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2743647">
    <title>SCPRED: Accurate prediction of protein structural class for sequences of twilight-zone similarity with predicting sequences</title>
    <link>http://www.citeulike.org/user/neils/article/2743647</link>
    <description>&lt;i&gt;BMC Bioinformatics, Vol. 9, No. 1. (2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;BACKGROUND:Protein structure prediction methods provide accurate results when a homologous protein is predicted, while poorer predictions are obtained in the absence of homologous templates. However, some protein chains that share twilight-zone pairwise identity can form similar folds and thus determining structural similarity without the sequence similarity would be desirable for the structure prediction. The folding type of a protein or its domain is defined as the structural class. Current structural class prediction methods that predict the four structural classes defined in SCOP provide up to 63% accuracy for the datasets in which sequence identity of any pair of sequences belongs to the twilight-zone. We propose SCPRED method that improves prediction accuracy for sequences that share twilight-zone pairwise similarity with sequences used for the prediction.RESULTS:SCPRED uses a support vector machine classifier that takes several custom-designed features as its input to predict the structural classes. Based on extensive design that considers over 2300 index-, composition- and physicochemical properties-based features along with features based on the predicted secondary structure and content, the classifier's input includes 8 features based on information extracted from the secondary structure predicted with PSI-PRED and one feature computed from the sequence. Tests performed with datasets of 1673 protein chains, in which any pair of sequences shares twilight-zone similarity, show that SCPRED obtains 80.3% accuracy when predicting the four SCOP-defined structural classes, which is superior when compared with over a dozen recent competing methods that are based on support vector machine, logistic regression, and ensemble of classifiers predictors. CONCLUSIONS:The SCPRED can accurately find similar structures for sequences that share low identity with sequence used for the prediction. The high predictive accuracy achieved by SCPRED is attributed to the design of the features, which are capable of separating the structural classes in spite of their low dimensionality. We also demonstrate that the SCPRED's predictions can be successfully used as a post-processing filter to improve performance of modern fold classification methods.</description>
    <dc:title>SCPRED: Accurate prediction of protein structural class for sequences of twilight-zone similarity with predicting sequences</dc:title>

    <dc:creator>Lukasz Kurgan</dc:creator>
    <dc:creator>Krzysztof Cios</dc:creator>
    <dc:creator>Ke Chen</dc:creator>
    <dc:identifier>doi:10.1186/1471-2105-9-226</dc:identifier>
    <dc:source>BMC Bioinformatics, Vol. 9, No. 1. (2008)</dc:source>
    <dc:date>2008-05-01T23:27:06-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>BMC Bioinformatics</prism:publicationName>
    <prism:volume>9</prism:volume>
    <prism:number>1</prism:number>
    <prism:category>bioinformatics</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>protein</prism:category>
    <prism:category>sequence</prism:category>
    <prism:category>structure</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2727373">
    <title>A Quick Guide for Computer-Assisted Instruction in Computational Biology and Bioinformatics</title>
    <link>http://www.citeulike.org/user/neils/article/2727373</link>
    <description>&lt;i&gt;PLoS Comput Biol, Vol. 4, No. 4. (Apr 2008)&lt;/i&gt;</description>
    <dc:title>A Quick Guide for Computer-Assisted Instruction in Computational Biology and Bioinformatics</dc:title>

    <dc:creator>Manuel Costa</dc:creator>
    <dc:creator>Eduardo Galembeck</dc:creator>
    <dc:creator>Guilherme Marson</dc:creator>
    <dc:creator>Bayardo Torres</dc:creator>
    <dc:identifier>doi:10.1371/journal.pcbi.1000035</dc:identifier>
    <dc:source>PLoS Comput Biol, Vol. 4, No. 4. (Apr 2008)</dc:source>
    <dc:date>2008-04-28T05:38:52-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>PLoS Comput Biol</prism:publicationName>
    <prism:volume>4</prism:volume>
    <prism:number>4</prism:number>
    <prism:publisher>Public Library of Science</prism:publisher>
    <prism:category>bioinformatics</prism:category>
    <prism:category>computer-assisted</prism:category>
    <prism:category>teaching</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/1562564">
    <title>An evaluation of automated homology modelling methods at low target template sequence similarity</title>
    <link>http://www.citeulike.org/user/neils/article/1562564</link>
    <description>&lt;i&gt;Bioinformatics, Vol. 23, No. 15. (1 August 2007), pp. 1901-1908.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Motivation: There are two main areas of difficulty in homology modelling that are particularly important when sequence identity between target and template falls below 50%: sequence alignment and loop building. These problems become magnified with automatic modelling processes, as there is no human input to correct mistakes. As such we have benchmarked several stand-alone strategies that could be implemented in a workflow for automated high-throughput homology modelling. These include three new sequence-structure alignment programs: 3D-Coffee, Staccato and SAlign, plus five homology modelling programs and their respective loop building methods: Builder, Nest, Modeller, SegMod/ENCAD and Swiss-Model. The SABmark database provided 123 targets with at least five templates from the same SCOP family and sequence identities [&#8804;]50%. Results: When using Modeller as the common modelling program, 3D-Coffee outperforms Staccato and SAlign using both multiple templates and the best single template, and across the sequence identity range 2050%. The mean model RMSD generated from 3D-Coffee using multiple templates is 15 and 28% (or using single templates, 3 and 13%) better than those generated by Staccato and Salign, respectively. 3D-Coffee gives equivalent modelling accuracy from multiple and single templates, but Staccato and SAlign are more successful with single templates, their quality deteriorating as additional lower sequence identity templates are added. Evaluating the different homology modelling programs, on average Modeller performs marginally better in overall modelling than the others tested. However, on average Nest produces the best loops with an 8% improvement by mean RMSD compared to the loops generated by Builder. Contact: r.m.jackson@leeds.ac.uk. Supplementary information: Supplementary data are available at Bioinformatics online. 10.1093/bioinformatics/btm262</description>
    <dc:title>An evaluation of automated homology modelling methods at low target template sequence similarity</dc:title>

    <dc:creator>James Dalton</dc:creator>
    <dc:creator>Richard Jackson</dc:creator>
    <dc:identifier>doi:10.1093/bioinformatics/btm262</dc:identifier>
    <dc:source>Bioinformatics, Vol. 23, No. 15. (1 August 2007), pp. 1901-1908.</dc:source>
    <dc:date>2007-08-15T13:18:22-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Bioinformatics</prism:publicationName>
    <prism:volume>23</prism:volume>
    <prism:number>15</prism:number>
    <prism:startingPage>1901</prism:startingPage>
    <prism:endingPage>1908</prism:endingPage>
    <prism:category>bioinformatics</prism:category>
    <prism:category>evaluation</prism:category>
    <prism:category>homology</prism:category>
    <prism:category>modelling</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>protein</prism:category>
    <prism:category>structure</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2713333">
    <title>NetworKIN: a resource for exploring cellular phosphorylation networks</title>
    <link>http://www.citeulike.org/user/neils/article/2713333</link>
    <description>&lt;i&gt;Nucl. Acids Res., Vol. 36 (2008), pp. D695-699.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Protein kinases control cellular responses by phosphorylating specific substrates. Recent proteome-wide mapping of protein phosphorylation sites by mass spectrometry has discovered thousands of in vivo sites. Systematically assigning all 518 human kinases to all these sites is a challenging problem. The NetworKIN database (http://networkin.info) integrates consensus substrate motifs with context modelling for improved prediction of cellular kinase-substrate relations. Based on the latest human phosphoproteome from the Phospho.ELM and PhosphoSite databases, the resource offers insight into phosphorylation-modulated interaction networks. Here, we describe how NetworKIN can be used for both global and targeted molecular studies. Via the web interface users can query the database of precomputed kinase-substrate relations or obtain predictions on novel phosphoproteins. The database currently contains a predicted phosphorylation network with 20 224 site-specific interactions involving 3978 phosphoproteins and 73 human kinases from 20 families.</description>
    <dc:title>NetworKIN: a resource for exploring cellular phosphorylation networks</dc:title>

    <dc:creator>Rune Linding</dc:creator>
    <dc:creator>Lars Jensen</dc:creator>
    <dc:creator>Adrian Pasculescu</dc:creator>
    <dc:creator>Marina Olhovsky</dc:creator>
    <dc:creator>Karen Colwill</dc:creator>
    <dc:creator>Peer Bork</dc:creator>
    <dc:creator>Michael Yaffe</dc:creator>
    <dc:creator>Tony Pawson</dc:creator>
    <dc:source>Nucl. Acids Res., Vol. 36 (2008), pp. D695-699.</dc:source>
    <dc:date>2008-04-24T15:22:01-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Nucl. Acids Res.</prism:publicationName>
    <prism:volume>36</prism:volume>
    <prism:startingPage>D695</prism:startingPage>
    <prism:endingPage>699</prism:endingPage>
    <prism:category>bioinformatics</prism:category>
    <prism:category>network</prism:category>
    <prism:category>phosphorylation</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>signal</prism:category>
    <prism:category>transduction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2694137">
    <title>Genome-wide subcellular localization of putative outer membrane and extracellular proteins in Leptospira interrogans serovar Lai genome using bioinformatics approaches</title>
    <link>http://www.citeulike.org/user/neils/article/2694137</link>
    <description>&lt;i&gt;BMC Genomics, Vol. 9, No. 1. (2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;BACKGROUND:In bacterial pathogens, both cell surface-exposed outer membrane proteins and proteins secreted into the extracellular environment play crucial roles in host-pathogen interaction and pathogenesis. Considerable efforts have been made to identify outer membrane (OM) and extracellular (EX) proteins produced by Leptospira interrogans, which may be used as novel targets for the development of infection markers and leptospirosis vaccines.RESULTS:In this study we used a novel computational framework based on combined prediction methods with deduction concept to identify putative OM and EX proteins encoded by the Leptospira interrogans genome. The framework consists of the following steps: (1) identifying proteins homologous to known proteins in subcellular localization databases derived from the &#34;consensus vote&#34; of computational predictions, (2) incorporating homology based search and structural information to enhance gene annotation and functional identification to infer the specific structural characters and localizations, and (3) developing a specific classifier for cytoplasmic proteins (CP) and cytoplasmic membrane proteins (CM) using Linear discriminant analysis (LDA). We have identified 114 putative EX and 63 putative OM proteins, of which 41% are conserved or hypothetical proteins containing sequence and/or protein folding structures similar to those of known EX and OM proteins. CONCLUSION:Overall results derived from the combined computational analysis correlate with the available experimental evidence. This is the most extensive in silico protein subcellular localization identification to date for Leptospira interrogans serovar Lai genome that may be useful in protein annotation, discovery of novel genes and understanding the biology of Leptospira.</description>
    <dc:title>Genome-wide subcellular localization of putative outer membrane and extracellular proteins in Leptospira interrogans serovar Lai genome using bioinformatics approaches</dc:title>

    <dc:creator>Wasna Viratyosin</dc:creator>
    <dc:creator>Supawadee Ingsriswang</dc:creator>
    <dc:creator>Eakasit Pacharawongsakda</dc:creator>
    <dc:creator>Prasit Palittapongarnpim</dc:creator>
    <dc:identifier>doi:10.1186/1471-2164-9-181</dc:identifier>
    <dc:source>BMC Genomics, Vol. 9, No. 1. (2008)</dc:source>
    <dc:date>2008-04-21T04:04:52-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>BMC Genomics</prism:publicationName>
    <prism:volume>9</prism:volume>
    <prism:number>1</prism:number>
    <prism:category>bacteria</prism:category>
    <prism:category>bioinformatics</prism:category>
    <prism:category>genomics</prism:category>
    <prism:category>leptospira</prism:category>
    <prism:category>localisation</prism:category>
    <prism:category>membrane</prism:category>
    <prism:category>pathogen</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>protein</prism:category>
    <prism:category>scl</prism:category>
    <prism:category>secretion</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2693993">
    <title>Functional Maps of Protein Complexes from Quantitative Genetic Interaction Data</title>
    <link>http://www.citeulike.org/user/neils/article/2693993</link>
    <description>&lt;i&gt;PLoS Comput Biol, Vol. 4, No. 4. (Apr 2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Recently, a number of advanced screening technologies have allowed for the comprehensive quantification of aggravating and alleviating genetic interactions among gene pairs. In parallel, TAP-MS studies (tandem affinity purification followed by mass spectroscopy) have been successful at identifying physical protein interactions that can indicate proteins participating in the same molecular complex. Here, we propose a method for the joint learning of protein complexes and their functional relationships by integration of quantitative genetic interactions and TAP-MS data. Using 3 independent benchmark datasets, we demonstrate that this method is &#62;50% more accurate at identifying functionally related protein pairs than previous approaches. Application to genes involved in yeast chromosome organization identifies a functional map of 91 multimeric complexes, a number of which are novel or have been substantially expanded by addition of new subunits. Interestingly, we find that complexes that are enriched for aggravating genetic interactions (i.e., synthetic lethality) are more likely to contain essential genes, linking each of these interactions to an underlying mechanism. These results demonstrate the importance of both large-scale genetic and physical interaction data in mapping pathway architecture and function.</description>
    <dc:title>Functional Maps of Protein Complexes from Quantitative Genetic Interaction Data</dc:title>

    <dc:creator>Sourav Bandyopadhyay</dc:creator>
    <dc:creator>Ryan Kelley</dc:creator>
    <dc:creator>Nevan Krogan</dc:creator>
    <dc:creator>Trey Ideker</dc:creator>
    <dc:identifier>doi:10.1371/journal.pcbi.1000065</dc:identifier>
    <dc:source>PLoS Comput Biol, Vol. 4, No. 4. (Apr 2008)</dc:source>
    <dc:date>2008-04-21T02:13:52-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>PLoS Comput Biol</prism:publicationName>
    <prism:volume>4</prism:volume>
    <prism:number>4</prism:number>
    <prism:publisher>Public Library of Science</prism:publisher>
    <prism:category>bioinformatics</prism:category>
    <prism:category>complex</prism:category>
    <prism:category>genetics</prism:category>
    <prism:category>interaction</prism:category>
    <prism:category>network</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>protein-protein</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2693990">
    <title>Predicting Co-Complexed Protein Pairs from Heterogeneous Data</title>
    <link>http://www.citeulike.org/user/neils/article/2693990</link>
    <description>&lt;i&gt;PLoS Comput Biol, Vol. 4, No. 4. (Apr 2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Proteins do not carry out their functions alone. Instead, they often act by participating in macromolecular complexes and play different functional roles depending on the other members of the complex. It is therefore interesting to identify co-complex relationships. Although protein complexes can be identified in a high-throughput manner by experimental technologies such as affinity purification coupled with mass spectrometry (APMS), these large-scale datasets often suffer from high false positive and false negative rates. Here, we present a computational method that predicts co-complexed protein pair (CCPP) relationships using kernel methods from heterogeneous data sources. We show that a diffusion kernel based on random walks on the full network topology yields good performance in predicting CCPPs from protein interaction networks. In the setting of direct ranking, a diffusion kernel performs much better than the mutual clustering coefficient. In the setting of SVM classifiers, a diffusion kernel performs much better than a linear kernel. We also show that combination of complementary information improves the performance of our CCPP recognizer. A summation of three diffusion kernels based on two-hybrid, APMS, and genetic interaction networks and three sequence kernels achieves better performance than the sequence kernels or diffusion kernels alone. Inclusion of additional features achieves a still better ROC50 of 0.937. Assuming a negative-to-positive ratio of 600∶1, the final classifier achieves 89.3% coverage at an estimated false discovery rate of 10%. Finally, we applied our prediction method to two recently described APMS datasets. We find that our predicted positives are highly enriched with CCPPs that are identified by both datasets, suggesting that our method successfully identifies true CCPPs. An SVM classifier trained from heterogeneous data sources provides accurate predictions of CCPPs in yeast. This computational method thereby provides an inexpensive method for identifying protein complexes that extends and complements high-throughput experimental data.</description>
    <dc:title>Predicting Co-Complexed Protein Pairs from Heterogeneous Data</dc:title>

    <dc:creator>Jian Qiu</dc:creator>
    <dc:creator>William Noble</dc:creator>
    <dc:identifier>doi:10.1371/journal.pcbi.1000054</dc:identifier>
    <dc:source>PLoS Comput Biol, Vol. 4, No. 4. (Apr 2008)</dc:source>
    <dc:date>2008-04-21T02:12:15-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>PLoS Comput Biol</prism:publicationName>
    <prism:volume>4</prism:volume>
    <prism:number>4</prism:number>
    <prism:publisher>Public Library of Science</prism:publisher>
    <prism:category>bioinformatics</prism:category>
    <prism:category>complex</prism:category>
    <prism:category>interaction</prism:category>
    <prism:category>kernel</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>protein-protein</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2687673">
    <title>Indicators from archaeal secretomes.</title>
    <link>http://www.citeulike.org/user/neils/article/2687673</link>
    <description>&lt;i&gt;Microbiological research (11 April 2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Just as in the Eukarya and the Bacteria, members of the Archaea need to export proteins beyond the cell membrane. This would be required to fulfill a variety of essential functions such as nutrient acquisition and biotransformations, maintenance of extracellular structures and more. Apart from the Eukarya and the Bacteria however, members of the Archaea share a number of unique characteristics. Does this uniqueness extend to the protein secretion system? It was the objective of this study to answer this question. To overcome the limited experimental information on secreted proteins in Archaea, this study was carried out by subjecting the available archaeal genomes, which represent halophiles, thermophiles, and extreme thermophiles, to bioinformatics analysis. Specifically, to examine the properties of the secretomes of the Archaea using the ExProt program. A total of 24 genomes were analyzed. Secretomes were found to fall in the range of 6% of total ORFs (Methanopyrus kandleri) to 19% (Halobacterium sp. NRC-1). Methanosarcina acetivorans has the highest fraction of lipoproteins (at 89) and the lowest (at 1) were members of the Thermoplasma, Pyrobaculum aerophilum, and Nanoarchaeum equitans. Based on the Tat consensus sequence, contribution of these secreted proteins to the secretomes were negligible, making up 8 proteins out of a total of 7105 predicted exported proteins. Amino acid composition, an attribute of signal peptides not used as a selection criteria by ExProt, of predicted archaeal signal peptides show that in the haloarchaea secretomes, the frequency of the amino acid Lys is much lower than that seen in bacterial signal peptides, but is compensated for by a higher frequency of Arg. It also showed that higher frequencies for Thr, Val, and Gly contribute to the hydrophobic character in haloarchaeal signal peptides, unlike bacterial signal peptides in which the hydrophobic character is dominated by Leu and Ile.</description>
    <dc:title>Indicators from archaeal secretomes.</dc:title>

    <dc:creator>Mazen Saleh</dc:creator>
    <dc:creator>Catharine Song</dc:creator>
    <dc:creator>Sabah Nasserulla</dc:creator>
    <dc:creator>L G Leduc</dc:creator>
    <dc:identifier>doi:10.1016/j.micres.2008.03.002</dc:identifier>
    <dc:source>Microbiological research (11 April 2008)</dc:source>
    <dc:date>2008-04-18T11:15:28-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Microbiological research</prism:publicationName>
    <prism:issn>0944-5013</prism:issn>
    <prism:category>analysis</prism:category>
    <prism:category>archaea</prism:category>
    <prism:category>bioinformatics</prism:category>
    <prism:category>genomics</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>secretion</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2679164">
    <title>Identification of Proteins Secreted by Malaria Parasite into Erythrocyte using SVM and PSSM profiles</title>
    <link>http://www.citeulike.org/user/neils/article/2679164</link>
    <description>&lt;i&gt;BMC Bioinformatics, Vol. 9, No. 1. (2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;BACKGROUND:Malaria parasite secretes various proteins in infected RBC for its growth and survival. Thus identification of these secretory proteins is important for developing vaccine/drug against malaria. The existing motif-based methods have got limited success due to lack of universal motif in all secretory proteins of malaria parasite.RESULTS:In this study a systematic attempt has been made to develop a general method for predicting secretory proteins of malaria parasite. All models were trained and tested on a non-redundant dataset of 252 secretory and 252 non-secretory proteins. We developed SVM models and achieved maximum MCC 0.72 with 85.65% accuracy and MCC 0.74 with 86.45 accuracy using amino acid and dipeptide composition respectively. SVM models were developed using split-amino acid and split-dipeptide composition and achieved maximum MCC 0.74 with 86.40% accuracy and MCC 0.77 with accuracy 88.22% respectively. In this study, for the first time PSSM profiles obtained from PSI-BLAST, have been used for predicting secretory proteins. We achieved maximum MCC 0.86 with 92.66% accuracy using PSSM based SVM model. All models developed in this study were evaluated using 5-fold cross-validation technique. CONCLUSION:This study demonstrates that secretory proteins have different residue composition than non-secretory proteins. Thus, it is possible to predict secretory proteins from its residue composition-using machine learning technique. The multiple sequence alignment provides more information than sequence itself. Thus performance of method based on PSSM profile is more accurate than method based on sequence composition. A web server PSEApred has been developed for predicting secretory proteins of malaria parasites (http://www.imtech.res.in/raghava/pseapred/).</description>
    <dc:title>Identification of Proteins Secreted by Malaria Parasite into Erythrocyte using SVM and PSSM profiles</dc:title>

    <dc:creator>Ruchi Verma</dc:creator>
    <dc:creator>Ajit Tiwari</dc:creator>
    <dc:creator>Sukhwinder Kaur</dc:creator>
    <dc:creator>Grish Varshney</dc:creator>
    <dc:creator>Gajendra Raghava</dc:creator>
    <dc:identifier>doi:10.1186/1471-2105-9-201</dc:identifier>
    <dc:source>BMC Bioinformatics, Vol. 9, No. 1. (2008)</dc:source>
    <dc:date>2008-04-16T23:30:21-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>BMC Bioinformatics</prism:publicationName>
    <prism:volume>9</prism:volume>
    <prism:number>1</prism:number>
    <prism:category>bioinformatics</prism:category>
    <prism:category>erythrocyte</prism:category>
    <prism:category>machine-learning</prism:category>
    <prism:category>malaria</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>protein</prism:category>
    <prism:category>pssm</prism:category>
    <prism:category>secretion</prism:category>
    <prism:category>svm</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/neils/article/2675935">
    <title>Identification of Phosphorylation Sites in Protein Kinase A Substrates Using Artificial Neural Networks and Mass Spectrometry</title>
    <link>http://www.citeulike.org/user/neils/article/2675935</link>
    <description>&lt;i&gt;J. Proteome Res., Vol. 3, No. 3. (14 June 2004), pp. 426-433.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Abstract: Protein phosphorylation plays a key role in cell regulation and identification of phosphorylation sites is important for understanding their functional significance. Here, we present an artificial neural network algorithm: NetPhosK (http://www.cbs.dtu.dk/services/NetPhosK/) that predicts protein kinase A (PKA) phosphorylation sites. The neural network was trained with a positive set of 258 experimentally verified PKA phosphorylation sites. The predictions by NetPhosK were validated using four novel PKA substrates: Necdin, RFX5, En-2, and Wee 1. The four proteins were phosphorylated by PKA in vitro and 13 PKA phosphorylation sites were identified by mass spectrometry. NetPhosK was 100% sensitive and 41% specific in predicting PKA sites in the four proteins. These results demonstrate the potential of using integrated computational and experimental methods for detailed investigations of the phosphoproteome. Keywords: protein kinase A phosphorylation site prediction neural network analysis mass spectrometry</description>
    <dc:title>Identification of Phosphorylation Sites in Protein Kinase A Substrates Using Artificial Neural Networks and Mass Spectrometry</dc:title>

    <dc:creator>M Hjerrild</dc:creator>
    <dc:creator>A Stensballe</dc:creator>
    <dc:creator>TE Rasm