<?xml version="1.0" encoding="UTF-8"?>

<rdf:RDF
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
   xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
   xmlns="http://purl.org/rss/1.0/"
   xmlns:dc="http://purl.org/dc/elements/1.1/"
   xmlns:prism="http://prismstandard.org/namespaces/1.2/basic/"
   xmlns:dcterms="http://purl.org/dc/terms/"

>
<channel rdf:about="http://www.citeulike.org/about">
<pubDate>Sun, 27 Jul 2008 08:12:58 BST</pubDate>


	<title>CiteULike: jsr's library [46 articles]</title>
	<description>CiteULike: jsr's library [46 articles]</description>


	<link>http://www.citeulike.org/user/jsr</link>
	<dc:publisher>CiteULike.org</dc:publisher>
	<dc:language>en-gb</dc:language>
	<dc:rights>Copyright &#169; 2004-2008 citeulike.org</dc:rights>
	<items>
    <rdf:Seq>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2651861"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2636569"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2634539"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/449"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2622604"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/524758"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2580409"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/420107"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2574403"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2569685"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2569593"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2538920"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/635732"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2536153"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2536142"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/1471101"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2530696"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/1839936"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2530417"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2520093"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2519967"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2519955"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2518684"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/1364758"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/93541"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2514754"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/1606545"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2470565"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2491530"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2491367"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2489449"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2489402"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2489295"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/1706426"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2488258"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2473582"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2477638"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2477625"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2472088"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2472067"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2472047"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2470860"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2470655"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/695242"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/1983303"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/jsr/article/2470596"/>

	</rdf:Seq>
	</items>
	</channel>


<item rdf:about="http://www.citeulike.org/user/jsr/article/2651861">
    <title>Bayesian Density Estimation and Inference Using Mixtures</title>
    <link>http://www.citeulike.org/user/jsr/article/2651861</link>
    <description>&lt;i&gt;Journal of the American Statistical Association, Vol. 90, No. 430. (???? 1995), pp. 577-588.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We describe and illustrate Bayesian inference in models for density estimation using mixtures of Dirichlet processes. These models provide natural settings for density estimation, and are exemplified by special cases where data are modelled as a sample from mixtures of normal distributions. Efficient simulation methods are used to approximate various prior, posterior and predictive distributions. This allows for direct inference on a variety of practical issues, including problems of local...</description>
    <dc:title>Bayesian Density Estimation and Inference Using Mixtures</dc:title>

    <dc:creator>Michael Escobar</dc:creator>
    <dc:creator>Mike West</dc:creator>
    <dc:source>Journal of the American Statistical Association, Vol. 90, No. 430. (???? 1995), pp. 577-588.</dc:source>
    <dc:date>2008-04-11T05:27:35-00:00</dc:date>
    <prism:publicationYear>1995</prism:publicationYear>
    <prism:publicationName>Journal of the American Statistical Association</prism:publicationName>
    <prism:volume>90</prism:volume>
    <prism:number>430</prism:number>
    <prism:startingPage>577</prism:startingPage>
    <prism:endingPage>588</prism:endingPage>
    <prism:category>bayesian</prism:category>
    <prism:category>density_estimation</prism:category>
    <prism:category>dirichlet_process</prism:category>
    <prism:category>mcmc</prism:category>
    <prism:category>mixture_model</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2636569">
    <title>Covariance kernels from Bayesian generative models</title>
    <link>http://www.citeulike.org/user/jsr/article/2636569</link>
    <description>&lt;i&gt;(2000)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We propose the framework of mutual information kernels for learning covariance kernels, as used in Support Vector machines and Gaussian process classifiers, from unlabeled task data using Bayesian techniques. We describe an implementation of this framework which uses variational Bayesian mixtures of factor analyzers in order to attack classification problems in high-dimensional spaces where labeled data is sparse, but unlabeled data is abundant.</description>
    <dc:title>Covariance kernels from Bayesian generative models</dc:title>

    <dc:creator>M Seeger</dc:creator>
    <dc:source>(2000)</dc:source>
    <dc:date>2008-04-07T05:10:48-00:00</dc:date>
    <prism:publicationYear>2000</prism:publicationYear>
    <prism:category>bayesian</prism:category>
    <prism:category>gaussian_process</prism:category>
    <prism:category>generative_models</prism:category>
    <prism:category>semi_supervised</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2634539">
    <title>Beyond Gaussian Processes: On the Distributions of Inﬁnite Networks</title>
    <link>http://www.citeulike.org/user/jsr/article/2634539</link>
    <description>&lt;i&gt;(2005)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;A general analysis of the limiting distribution of neural network functions is performed, with emphasis on non-Gaussian limits. We show that with i.i.d. symmetric stable output weights, and more generally with weights distributed from the normal domain of attraction of a stable variable, that the neural functions converge in distribution to stable processes. Condi- tions are also investigated under which Gaussian limits do occur when the weights are independent but not identically distributed. Some par- ticularly tractable classes of stable distributions are examined, and the possibility of learning with such processes.</description>
    <dc:title>Beyond Gaussian Processes: On the Distributions of Inﬁnite Networks</dc:title>

    <dc:creator>Ricky Der</dc:creator>
    <dc:creator>Daniel Lee</dc:creator>
    <dc:source>(2005)</dc:source>
    <dc:date>2008-04-06T11:32:16-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:category>gaussian_process</prism:category>
    <prism:category>neural_networks</prism:category>
    <prism:category>statistics</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/449">
    <title>Exploiting generative models in discriminative classifiers</title>
    <link>http://www.citeulike.org/user/jsr/article/449</link>
    <description>&lt;i&gt;&lt;/i&gt;</description>
    <dc:title>Exploiting generative models in discriminative classifiers</dc:title>

    <dc:creator>T. Jaakkola</dc:creator>
    <dc:creator>D. Haussler</dc:creator>
    <dc:date>2004-11-22T00:17:30-00:00</dc:date>
    <prism:category>gaussian_process</prism:category>
    <prism:category>generative_models</prism:category>
    <prism:category>kernel_methods</prism:category>
    <prism:category>manifolds</prism:category>
    <prism:category>metric_learning</prism:category>
    <prism:category>svm</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2622604">
    <title>Probabilistic Geometry</title>
    <link>http://www.citeulike.org/user/jsr/article/2622604</link>
    <description>&lt;i&gt;Proceedings of the National Academy of Sciences of the United States of America, Vol. 37, No. 4. (1951), pp. 226-229.&lt;/i&gt;</description>
    <dc:title>Probabilistic Geometry</dc:title>

    <dc:creator>Karl Menger</dc:creator>
    <dc:source>Proceedings of the National Academy of Sciences of the United States of America, Vol. 37, No. 4. (1951), pp. 226-229.</dc:source>
    <dc:date>2008-04-02T08:19:29-00:00</dc:date>
    <prism:publicationYear>1951</prism:publicationYear>
    <prism:publicationName>Proceedings of the National Academy of Sciences of the United States of America</prism:publicationName>
    <prism:volume>37</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>226</prism:startingPage>
    <prism:endingPage>229</prism:endingPage>
    <prism:category>metric_spaces</prism:category>
    <prism:category>probabilistic_geometry</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/524758">
    <title>Nonparametric empirical Bayes for the Dirichlet process mixture model</title>
    <link>http://www.citeulike.org/user/jsr/article/524758</link>
    <description>&lt;i&gt;Statistics and Computing, Vol. 16, No. 1. (March 2006), pp. 5-14.&lt;/i&gt;</description>
    <dc:title>Nonparametric empirical Bayes for the Dirichlet process mixture model</dc:title>

    <dc:creator>Jon Mcauliffe</dc:creator>
    <dc:creator>David Blei</dc:creator>
    <dc:creator>Michael Jordan</dc:creator>
    <dc:identifier>doi:10.1007/s11222-006-5196-2</dc:identifier>
    <dc:source>Statistics and Computing, Vol. 16, No. 1. (March 2006), pp. 5-14.</dc:source>
    <dc:date>2006-03-01T06:53:43-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Statistics and Computing</prism:publicationName>
    <prism:issn>0960-3174</prism:issn>
    <prism:volume>16</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>5</prism:startingPage>
    <prism:endingPage>14</prism:endingPage>
    <prism:publisher>Springer</prism:publisher>
    <prism:category>bayesian</prism:category>
    <prism:category>dirichlet_process</prism:category>
    <prism:category>empirical_bayes</prism:category>
    <prism:category>mixture_model</prism:category>
    <prism:category>nonparametric</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2580409">
    <title>Duality Between Learning Machines: A Bridge Between Supervised and Unsupervised Learning</title>
    <link>http://www.citeulike.org/user/jsr/article/2580409</link>
    <description>&lt;i&gt;Neural Computation, Vol. 6, No. 3. (1994), pp. 491-508.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We exhibit a duality between two perceptrons which allows us to compare the theoretical analysis of supervised and unsupervised learning tasks. The first perceptron has one output and is asked to learn a classification of p patterns. The second (dual) perceptron has p outputs and is asked to transmit as much information as possible on a distribution of inputs. We show in particular that the maximum information that can be stored in the couplings for the supervised learning task is equal to the...</description>
    <dc:title>Duality Between Learning Machines: A Bridge Between Supervised and Unsupervised Learning</dc:title>

    <dc:creator>Jean Nadal</dc:creator>
    <dc:creator>N Parga</dc:creator>
    <dc:source>Neural Computation, Vol. 6, No. 3. (1994), pp. 491-508.</dc:source>
    <dc:date>2008-03-24T12:55:10-00:00</dc:date>
    <prism:publicationYear>1994</prism:publicationYear>
    <prism:publicationName>Neural Computation</prism:publicationName>
    <prism:volume>6</prism:volume>
    <prism:number>3</prism:number>
    <prism:startingPage>491</prism:startingPage>
    <prism:endingPage>508</prism:endingPage>
    <prism:category>duality</prism:category>
    <prism:category>perceptrons</prism:category>
    <prism:category>statistics</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/420107">
    <title>An Introduction to Variational Methods for Graphical Models</title>
    <link>http://www.citeulike.org/user/jsr/article/420107</link>
    <description>&lt;i&gt;Machine Learning, Vol. 37, No. 2. (1999), pp. 183-233.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;. This paper presents a tutorial introduction to the use of variational methods for inference and learning in graphical models (Bayesian networks and Markov random fields). We present a number of examples of graphical models, including the QMR-DT database, the sigmoid belief network, the Boltzmann machine, and several variants of hidden Markov models, in which it is infeasible to run exact inference algorithms. We then introduce variational methods, which exploit laws of large numbers to...</description>
    <dc:title>An Introduction to Variational Methods for Graphical Models</dc:title>

    <dc:creator>Michael Jordan</dc:creator>
    <dc:creator>Zoubin Ghahramani</dc:creator>
    <dc:creator>Tommi Jaakkola</dc:creator>
    <dc:creator>Lawrence Saul</dc:creator>
    <dc:source>Machine Learning, Vol. 37, No. 2. (1999), pp. 183-233.</dc:source>
    <dc:date>2005-12-02T19:12:12-00:00</dc:date>
    <prism:publicationYear>1999</prism:publicationYear>
    <prism:publicationName>Machine Learning</prism:publicationName>
    <prism:volume>37</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>183</prism:startingPage>
    <prism:endingPage>233</prism:endingPage>
    <prism:category>graphical_models</prism:category>
    <prism:category>statistical_learning</prism:category>
    <prism:category>statistics</prism:category>
    <prism:category>variational_methods</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2574403">
    <title>The matrix stick-breaking process for flexible multi-task learning</title>
    <link>http://www.citeulike.org/user/jsr/article/2574403</link>
    <description>&lt;i&gt;(2007), pp. 1063-1070.&lt;/i&gt;</description>
    <dc:title>The matrix stick-breaking process for flexible multi-task learning</dc:title>

    <dc:creator>Ya Xue</dc:creator>
    <dc:creator>David Dunson</dc:creator>
    <dc:creator>Lawrence Carin</dc:creator>
    <dc:identifier>doi:10.1145/1273496.1273630</dc:identifier>
    <dc:source>(2007), pp. 1063-1070.</dc:source>
    <dc:date>2008-03-23T10:28:43-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:startingPage>1063</prism:startingPage>
    <prism:endingPage>1070</prism:endingPage>
    <prism:publisher>ACM</prism:publisher>
    <prism:category>bayesian</prism:category>
    <prism:category>dirichlet_process</prism:category>
    <prism:category>nonparametric</prism:category>
    <prism:category>transfer_learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2569685">
    <title>Kernels for multi-task learning</title>
    <link>http://www.citeulike.org/user/jsr/article/2569685</link>
    <description>&lt;i&gt;(2004)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;This paper provides a foundation for multi–task learning using reproducing ker- nel Hilbert spaces of vector–valued functions. In this setting, the kernel is a matrix–valued function. Some explicit examples will be described which go be- yond our earlier results in [7]. In particular, we characterize classes of matrix– valued kernels which are linear and are of the dot product or the translation invari- ant type. We discuss how these kernels can be used to model relations between the tasks and present linear multi–task learning algorithms. Finally, we present a novel proof of the representer theorem for a minimizer of a regularization func- tional which is based on the notion of minimal norm interpolation.</description>
    <dc:title>Kernels for multi-task learning</dc:title>

    <dc:creator>Charles Micchelli</dc:creator>
    <dc:creator>Massimiliano Pontil</dc:creator>
    <dc:source>(2004)</dc:source>
    <dc:date>2008-03-21T12:51:43-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:category>kernel_methods</prism:category>
    <prism:category>learning</prism:category>
    <prism:category>vector_valued_kernels</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2569593">
    <title>Max-margin Classiﬁcation of Data with Absent Features</title>
    <link>http://www.citeulike.org/user/jsr/article/2569593</link>
    <description>&lt;i&gt;Journal of Machine Learning Research, Vol. 9 (2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We consider the problem of learning classiﬁers in structured domains, where some objects have a subset of features that are inherently absent due to complex relationships between the features. Un- like the case where a feature exists but its value is not observed, here we focus on the case where a feature may not even exist (structurally absent) for some of the samples. The common approach for handling missing features in discriminative models is to ﬁrst complete their unknown values, and then use a standard classiﬁcation procedure over the completed data. This paper focuses on features that are known to be non-existing, rather than have an unknown value. We show how incomplete data can be classiﬁed directly without any completion of the missing features using a max-margin learning framework. We formulate an objective function, based on the geometric interpretation of the margin, that aims to maximize the margin of each sample in its own relevant subspace. In this formulation, the linearly separable case can be transformed into a binary search over a series of second order cone programs (SOCP), a convex problem that can be solved efﬁciently. We also describe two approaches for optimizing the general case: an approximation that can be solved as a standard quadratic program (QP) and an iterative approach for solving the exact problem. By avoiding the pre-processing phase in which the data is completed, both of these approaches could offer considerable computational savings. More importantly, we show that the elegant handling of missing values by our approach allows it to both outperform other methods when the missing values have non-trivial structure, and be competitive with other methods when the values are missing at random. We demonstrate our results on several standard benchmarks and two real-world problems: edge prediction in metabolic pathways, and automobile detection in natural images.</description>
    <dc:title>Max-margin Classiﬁcation of Data with Absent Features</dc:title>

    <dc:creator>Gal Chechik</dc:creator>
    <dc:creator>Geremy Heitz</dc:creator>
    <dc:creator>Gal Elidan</dc:creator>
    <dc:creator>Pieter Abbeel</dc:creator>
    <dc:creator>Daphne Koller</dc:creator>
    <dc:source>Journal of Machine Learning Research, Vol. 9 (2008)</dc:source>
    <dc:date>2008-03-21T11:25:02-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Journal of Machine Learning Research</prism:publicationName>
    <prism:volume>9</prism:volume>
    <prism:category>learning</prism:category>
    <prism:category>missing_features</prism:category>
    <prism:category>svm</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2538920">
    <title>Metric Learning for Text Documents</title>
    <link>http://www.citeulike.org/user/jsr/article/2538920</link>
    <description>&lt;i&gt;IEEE Trans. Pattern Anal. Mach. Intell., Vol. 28, No. 4. (April 2006)&lt;/i&gt;</description>
    <dc:title>Metric Learning for Text Documents</dc:title>

    <dc:creator>Guy Lebanon</dc:creator>
    <dc:identifier>doi:10.1109/TPAMI.2006.77</dc:identifier>
    <dc:source>IEEE Trans. Pattern Anal. Mach. Intell., Vol. 28, No. 4. (April 2006)</dc:source>
    <dc:date>2008-03-16T12:45:22-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>IEEE Trans. Pattern Anal. Mach. Intell.</prism:publicationName>
    <prism:issn>0162-8828</prism:issn>
    <prism:volume>28</prism:volume>
    <prism:number>4</prism:number>
    <prism:publisher>IEEE Computer Society</prism:publisher>
    <prism:category>metric_learning</prism:category>
    <prism:category>nlp</prism:category>
    <prism:category>unsupervised_learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/635732">
    <title>On Kernel-Target Alignment</title>
    <link>http://www.citeulike.org/user/jsr/article/635732</link>
    <description>&lt;i&gt;&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We introduce the notion of kernel-alignment, a measure of similarity between two kernel functions or between a kernel and a target function. This quantity captures the degree of agreement between a kernel and a given learning task, and has very natural interpretations in machine learning, leading also to simple algorithms for model selection and learning. We analyse its theoretical properties, proving that it is sharply concentrated around its expected value, and we discuss its relation with...</description>
    <dc:title>On Kernel-Target Alignment</dc:title>

    <dc:creator>Nello Cristianini</dc:creator>
    <dc:creator>John Taylor</dc:creator>
    <dc:creator>Andr&#233; Elisseeff</dc:creator>
    <dc:date>2006-05-15T15:03:06-00:00</dc:date>
    <prism:category>kernel_methods</prism:category>
    <prism:category>metric_learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2536153">
    <title>Self-Organizing Homotopy Network</title>
    <link>http://www.citeulike.org/user/jsr/article/2536153</link>
    <description>&lt;i&gt;(2007)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;In this paper, we propose a conceptual learning algorithm called the ‘self-organizing homotopy (SOH)’ together with an implementation thereof. As in the case of the SOM, our SOH organizes a homotopy in a self- organizing manner by giving a set of data episodes. Thus it is an extension of the SOM, moving from a ‘map’ to a ‘homotopy’. From a geometrical viewpoint, the SOH rep- resents a set of (i.e. multiple) data distributions by a ﬁber bundle, whereas the SOM represents a single data distribu- tion by a manifold. Therefore, this paper also proposes the concept of ‘ﬁber bundle learning” as an extension of mani- fold learning. One of the solutions to the SOH is SOM2 , in which every reference vector unit of the conventional SOM is itself replaced by an SOM. Consequently SOM2 has the ability to represent a ﬁber bundle, i.e. a product manifold, by using a product space of SOM ×SOM. It is also possible to design SOMn to represent higher order ﬁber bundles. It is expected that SOHs will play important roles in the ﬁelds of pattern recognition, adaptive functions, context under- standing, and others, in which nonlinear manifolds and the homotopy play crucial roles.</description>
    <dc:title>Self-Organizing Homotopy Network</dc:title>

    <dc:creator>Tetsuo Furukawa</dc:creator>
    <dc:source>(2007)</dc:source>
    <dc:date>2008-03-15T11:27:28-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:category>fiber_bundle_learning</prism:category>
    <prism:category>homotopy_learning</prism:category>
    <prism:category>manifolds</prism:category>
    <prism:category>unsupervised_learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2536142">
    <title>Metric Learning by Collapsing Classes</title>
    <link>http://www.citeulike.org/user/jsr/article/2536142</link>
    <description>&lt;i&gt;(2005)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We present an algorithm for learning a quadratic Gaussian metric (Maha- lanobis distance) for use in classiﬁcation tasks. Our method relies on the simple geometric intuition that a good metric is one under which points in the same class are simultaneously near each other and far from points in the other classes. We construct a convex optimization problem whose solution generates such a metric by trying to collapse all examples in the same class to a single point and push examples in other classes inﬁnitely far away. We show that when the metric we learn is used in simple clas- siﬁers, it yields substantial improvements over standard alternatives on a variety of problems. We also discuss how the learned metric may be used to obtain a compact low dimensional feature representation of the original input space, allowing more efﬁcient classiﬁcation with very little reduction in performance.</description>
    <dc:title>Metric Learning by Collapsing Classes</dc:title>

    <dc:creator>Amir Globerson</dc:creator>
    <dc:creator>Sam Roweis</dc:creator>
    <dc:source>(2005)</dc:source>
    <dc:date>2008-03-15T11:11:30-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:category>clustering</prism:category>
    <prism:category>metric_learning</prism:category>
    <prism:category>unsupervised_learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/1471101">
    <title>Information-theoretic metric learning</title>
    <link>http://www.citeulike.org/user/jsr/article/1471101</link>
    <description>&lt;i&gt;(2007), pp. 209-216.&lt;/i&gt;</description>
    <dc:title>Information-theoretic metric learning</dc:title>

    <dc:creator>Jason Davis</dc:creator>
    <dc:creator>Brian Kulis</dc:creator>
    <dc:creator>Prateek Jain</dc:creator>
    <dc:creator>Suvrit Sra</dc:creator>
    <dc:creator>Inderjit Dhillon</dc:creator>
    <dc:identifier>doi:10.1145/1273496.1273523</dc:identifier>
    <dc:source>(2007), pp. 209-216.</dc:source>
    <dc:date>2007-07-21T13:31:32-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:startingPage>209</prism:startingPage>
    <prism:endingPage>216</prism:endingPage>
    <prism:publisher>ACM Press</prism:publisher>
    <prism:category>manifolds</prism:category>
    <prism:category>metric_learning</prism:category>
    <prism:category>unsupervised_learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2530696">
    <title>Discovering Shared Structure in Manifold Learning</title>
    <link>http://www.citeulike.org/user/jsr/article/2530696</link>
    <description>&lt;i&gt;(2004)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We claim and present arguments to the effect that a large class of manifold learning algorithms that are essentially local will suffer from at least four generic problems associated with (1) noise in the data, (2) curvature of the manifold, (3) dimensionality of the manifold, and (4) the presence of many manifolds with little data per manifold. This analysis suggests non-local manifold learning algorithms which attempt to discover shared structure in the tangent planes at different posi- tions. A criterion for such an algorithm is proposed and experiments estimating a tangent plane prediction function are presented. The function has parameters that are shared across space rather than estimated based on the local neighborhood, as in current non-parametric manifold learning algorithms. The results show clearly the advantages of this approach with respect to local manifold learning algorithms.</description>
    <dc:title>Discovering Shared Structure in Manifold Learning</dc:title>

    <dc:creator>Yoshua Bengio</dc:creator>
    <dc:creator>Martin Monperrus</dc:creator>
    <dc:source>(2004)</dc:source>
    <dc:date>2008-03-14T05:13:17-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:category>learning</prism:category>
    <prism:category>manifolds</prism:category>
    <prism:category>semi_supervised</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/1839936">
    <title>Charting a manifold</title>
    <link>http://www.citeulike.org/user/jsr/article/1839936</link>
    <description>&lt;i&gt;Neural Information Processing Systems (NIPS), No. 15. (2003)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We construct a nonlinear mapping from a high-dimensional sample space to a low-dimensional vector space, effectively recovering a Cartesian coordinate system for the manifold from which the data is sampled. The mapping preserves local geometric relations in the manifold and is pseudo-invertible. We show how to estimate the intrinsic dimensionality of the manifold from samples, decompose the sample data into locally linear low-dimensional patches, merge these patches into a single low- dimensional coordinate system, and compute forward and reverse map- pings between the sample and coordinate spaces. The objective functions are convex and their solutions are given in closed form.</description>
    <dc:title>Charting a manifold</dc:title>

    <dc:creator>Matthew Brand</dc:creator>
    <dc:source>Neural Information Processing Systems (NIPS), No. 15. (2003)</dc:source>
    <dc:date>2007-10-30T10:57:30-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:publicationName>Neural Information Processing Systems (NIPS)</prism:publicationName>
    <prism:number>15</prism:number>
    <prism:category>learning</prism:category>
    <prism:category>manifolds</prism:category>
    <prism:category>semi_supervised</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2530417">
    <title>A Hilbert Space Embedding for Distributions</title>
    <link>http://www.citeulike.org/user/jsr/article/2530417</link>
    <description>&lt;i&gt;Discovery Science (2007), pp. 40-41.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;While kernel methods are the basis of many popular techniques in supervised learning, they are less commonly used in testing, estimation, and analysis of probability distributions, where information theoretic approaches rule the roost. However it becomes difficult to estimate mutual information or entropy if the data are high dimensional.</description>
    <dc:title>A Hilbert Space Embedding for Distributions</dc:title>

    <dc:creator>Alex Smola</dc:creator>
    <dc:creator>Arthur Gretton</dc:creator>
    <dc:creator>Le Song</dc:creator>
    <dc:creator>Bernhard Schölkopf</dc:creator>
    <dc:identifier>doi:10.1007/978-3-540-75488-6_5</dc:identifier>
    <dc:source>Discovery Science (2007), pp. 40-41.</dc:source>
    <dc:date>2008-03-14T02:17:42-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Discovery Science</prism:publicationName>
    <prism:startingPage>40</prism:startingPage>
    <prism:endingPage>41</prism:endingPage>
    <prism:category>hilbert_spaces</prism:category>
    <prism:category>kernel_methods</prism:category>
    <prism:category>statistical_learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2520093">
    <title>Integration of Stochastic Models by Minimizing &#945;-Divergence</title>
    <link>http://www.citeulike.org/user/jsr/article/2520093</link>
    <description>&lt;i&gt;Neural Comput., Vol. 19, No. 10. (October 2007), pp. 2780-2796.&lt;/i&gt;</description>
    <dc:title>Integration of Stochastic Models by Minimizing &#945;-Divergence</dc:title>

    <dc:creator>Shun-Ichi Amari</dc:creator>
    <dc:identifier>doi:10.1162/neco.2007.19.10.2780</dc:identifier>
    <dc:source>Neural Comput., Vol. 19, No. 10. (October 2007), pp. 2780-2796.</dc:source>
    <dc:date>2008-03-12T12:34:08-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Neural Comput.</prism:publicationName>
    <prism:issn>0899-7667</prism:issn>
    <prism:volume>19</prism:volume>
    <prism:number>10</prism:number>
    <prism:startingPage>2780</prism:startingPage>
    <prism:endingPage>2796</prism:endingPage>
    <prism:publisher>MIT Press</prism:publisher>
    <prism:category>information_geometry</prism:category>
    <prism:category>manifolds</prism:category>
    <prism:category>stochastic_models</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2519967">
    <title>Model Selection and the Principle of Minimum Description Length</title>
    <link>http://www.citeulike.org/user/jsr/article/2519967</link>
    <description>&lt;i&gt;Journal of the American Statistical Association, Vol. 96, No. 454. (2001), pp. 746-774.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;This article reviews the principle of minimum description length (MDL) for problems of model selection. By viewing statistical modeling as a means of generating descriptions of observed data, the MDL framework discriminates between competing models based on the complexity of each description. This approach began with Kolmogorov's theory of algorithmic complexity, matured in the literature on information theory, and has recently received renewed attention within the statistics community. Here we review both the practical and the theoretical aspects of MDL as a tool for model selection, emphasizing the rich connections between information theory and statistics. At the boundary between these two disciplines we find many interesting interpretations of popular frequentist and Bayesian procedures. As we show, MDL provides an objective umbrella under which rather disparate approaches to statistical modeling can coexist and be compared. We illustrate the MDL principle by considering problems in regression, nonparametric curve estimation, cluster analysis, and time series analysis. Because model selection in linear regression is an extremely common problem that arises in many applications, we present detailed derivations of several MDL criteria in this context and discuss their properties through a number of examples. Our emphasis is on the practical application of MDL, and hence we make extensive use of real datasets. In writing this review, we tried to make the descriptive philosophy of MDL natural to a statistics audience by examining classical problems in model selection. In the engineering literature, however, MDL is being applied to ever more exotic modeling situations. As a principle for statistical modeling in general, one strength of MDL is that it can be intuitively extended to provide useful tools for new problems.</description>
    <dc:title>Model Selection and the Principle of Minimum Description Length</dc:title>

    <dc:creator>Mark Hansen</dc:creator>
    <dc:creator>Bin Yu</dc:creator>
    <dc:source>Journal of the American Statistical Association, Vol. 96, No. 454. (2001), pp. 746-774.</dc:source>
    <dc:date>2008-03-12T11:46:48-00:00</dc:date>
    <prism:publicationYear>2001</prism:publicationYear>
    <prism:publicationName>Journal of the American Statistical Association</prism:publicationName>
    <prism:volume>96</prism:volume>
    <prism:number>454</prism:number>
    <prism:startingPage>746</prism:startingPage>
    <prism:endingPage>774</prism:endingPage>
    <prism:category>information_theory</prism:category>
    <prism:category>mdl</prism:category>
    <prism:category>model_selection</prism:category>
    <prism:category>stochastic_compexity</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2519955">
    <title>An Introduction to the MDL Principle</title>
    <link>http://www.citeulike.org/user/jsr/article/2519955</link>
    <description>&lt;i&gt;&lt;/i&gt;</description>
    <dc:title>An Introduction to the MDL Principle</dc:title>

    <dc:creator>Jorma Rissanen</dc:creator>
    <dc:date>2008-03-12T11:41:26-00:00</dc:date>
    <prism:category>information_theory</prism:category>
    <prism:category>learning</prism:category>
    <prism:category>mdl</prism:category>
    <prism:category>statistics</prism:category>
    <prism:category>stochastic_compexity</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2518684">
    <title>Warped Gaussian Processes</title>
    <link>http://www.citeulike.org/user/jsr/article/2518684</link>
    <description>&lt;i&gt;(2004)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We generalise the Gaussian process (GP) framework for regression by learning a nonlinear transformation of the GP outputs. This allows for non-Gaussian processes and non-Gaussian noise. The learning algorithm chooses a nonlinear transformation such that transformed data is well-modelled by a GP. This can be seen as including a preprocessing transformation as an integral part of the probabilistic modelling problem, rather than as an ad-hoc step. We demonstrate on several real regression...</description>
    <dc:title>Warped Gaussian Processes</dc:title>

    <dc:creator>E Snelson</dc:creator>
    <dc:creator>C Rasmussen</dc:creator>
    <dc:creator>Z Ghahramani</dc:creator>
    <dc:source>(2004)</dc:source>
    <dc:date>2008-03-12T08:41:05-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:category>bayesian</prism:category>
    <prism:category>gaussian_process</prism:category>
    <prism:category>learning</prism:category>
    <prism:category>nonlinear</prism:category>
    <prism:category>nonparametric</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/1364758">
    <title>Self-Organizing Homotopy Network</title>
    <link>http://www.citeulike.org/user/jsr/article/1364758</link>
    <description>&lt;i&gt;(2007)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;In this paper, we propose a conceptual learning algorithm called the ‘self-organizing homotopy (SOH)’ together with an implementation thereof. As in the case of the SOM, our SOH organizes a homotopy in a self- organizing manner by giving a set of data episodes. Thus it is an extension of the SOM, moving from a ‘map’ to a ‘homotopy’. From a geometrical viewpoint, the SOH rep- resents a set of (i.e. multiple) data distributions by a ﬁber bundle, whereas the SOM represents a single data distribu- tion by a manifold. Therefore, this paper also proposes the concept of ‘ﬁber bundle learning” as an extension of mani- fold learning. One of the solutions to the SOH is SOM2 , in which every reference vector unit of the conventional SOM is itself replaced by an SOM. Consequently SOM2 has the ability to represent a ﬁber bundle, i.e. a product manifold, by using a product space of SOM ×SOM. It is also possible to design SOMn to represent higher order ﬁber bundles. It is expected that SOHs will play important roles in the ﬁelds of pattern recognition, adaptive functions, context under- standing, and others, in which nonlinear manifolds and the homotopy play crucial roles.</description>
    <dc:title>Self-Organizing Homotopy Network</dc:title>

    <dc:creator>Tetsuo Furukawa</dc:creator>
    <dc:source>(2007)</dc:source>
    <dc:date>2007-06-04T21:53:35-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:category>bayesian</prism:category>
    <prism:category>gaussian_process</prism:category>
    <prism:category>learning</prism:category>
    <prism:category>nonparametric</prism:category>
    <prism:category>time_series</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/93541">
    <title>Counting probability distributions: differential geometry and model selection.</title>
    <link>http://www.citeulike.org/user/jsr/article/93541</link>
    <description>&lt;i&gt;Proc Natl Acad Sci U S A, Vol. 97, No. 21. (10 October 2000), pp. 11170-11175.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;A central problem in science is deciding among competing explanations of data containing random errors. We argue that assessing the &#34;complexity&#34; of explanations is essential to a theoretically well-founded model selection procedure. We formulate model complexity in terms of the geometry of the space of probability distributions. Geometric complexity provides a clear intuitive understanding of several extant notions of model complexity. This approach allows us to reconceptualize the model selection problem as one of counting explanations that lie close to the &#34;truth.&#34; We demonstrate the usefulness of the approach by applying it to the recovery of models in psychophysics.</description>
    <dc:title>Counting probability distributions: differential geometry and model selection.</dc:title>

    <dc:creator>IJ Myung</dc:creator>
    <dc:creator>V Balasubramanian</dc:creator>
    <dc:creator>MA Pitt</dc:creator>
    <dc:identifier>doi:10.1073/pnas.170283897</dc:identifier>
    <dc:source>Proc Natl Acad Sci U S A, Vol. 97, No. 21. (10 October 2000), pp. 11170-11175.</dc:source>
    <dc:date>2005-02-12T02:27:06-00:00</dc:date>
    <prism:publicationYear>2000</prism:publicationYear>
    <prism:publicationName>Proc Natl Acad Sci U S A</prism:publicationName>
    <prism:issn>0027-8424</prism:issn>
    <prism:volume>97</prism:volume>
    <prism:number>21</prism:number>
    <prism:startingPage>11170</prism:startingPage>
    <prism:endingPage>11175</prism:endingPage>
    <prism:category>information_geometry</prism:category>
    <prism:category>manifolds</prism:category>
    <prism:category>mdl</prism:category>
    <prism:category>model_selection</prism:category>
    <prism:category>statistics</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2514754">
    <title>Spatial Nonparametric Bayesian Models</title>
    <link>http://www.citeulike.org/user/jsr/article/2514754</link>
    <description>&lt;i&gt;(2001)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Introduction and Motivation The prior distribution is an essential ingredient of any Bayesian analysis, and it plays a major role in determining the final results. As such, Bayesians attempt to use prior distributions that have certain properties. Perhaps the main property is a desire to accurately reflect prior information, i.e., information external to the experiment at hand. We would supplement this vague property with a second equally vague property. The posterior distribution should...</description>
    <dc:title>Spatial Nonparametric Bayesian Models</dc:title>

    <dc:creator>S Maceachern</dc:creator>
    <dc:creator>A Kottas</dc:creator>
    <dc:creator>A Gelfand</dc:creator>
    <dc:source>(2001)</dc:source>
    <dc:date>2008-03-11T13:05:24-00:00</dc:date>
    <prism:publicationYear>2001</prism:publicationYear>
    <prism:category>bayesian</prism:category>
    <prism:category>dirichlet_process</prism:category>
    <prism:category>gaussian_process</prism:category>
    <prism:category>nonparametric</prism:category>
    <prism:category>statistics</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/1606545">
    <title>A choice model with infinitely many latent features</title>
    <link>http://www.citeulike.org/user/jsr/article/1606545</link>
    <description>&lt;i&gt;(2006), pp. 361-368.&lt;/i&gt;</description>
    <dc:title>A choice model with infinitely many latent features</dc:title>

    <dc:creator>Dilan G&#246;r&#252;r</dc:creator>
    <dc:creator>Frank J&#228;kel</dc:creator>
    <dc:creator>Carl Rasmussen</dc:creator>
    <dc:identifier>doi:10.1145/1143844.1143890</dc:identifier>
    <dc:source>(2006), pp. 361-368.</dc:source>
    <dc:date>2007-08-30T02:43:23-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:startingPage>361</prism:startingPage>
    <prism:endingPage>368</prism:endingPage>
    <prism:publisher>ACM Press</prism:publisher>
    <prism:category>bayesian</prism:category>
    <prism:category>choice_model</prism:category>
    <prism:category>indian_buffet_process</prism:category>
    <prism:category>nonparametric</prism:category>
    <prism:category>statistical_learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2470565">
    <title>Combinatorial Stochastic Processes</title>
    <link>http://www.citeulike.org/user/jsr/article/2470565</link>
    <description>&lt;i&gt;(2002)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;CONTENTS Contents 0 Preliminaries 3 0.1 Preface . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 3 0.2 Introduction . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4 0.3 Notation . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 6 0.4 Brownian motion and related processes . . . . . . . . . . . . . . . 7 0.5 Subordinators . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 12 1 Bell polynomials and composite structures 15 1.1 Partitions and...</description>
    <dc:title>Combinatorial Stochastic Processes</dc:title>

    <dc:creator>J Pitman</dc:creator>
    <dc:source>(2002)</dc:source>
    <dc:date>2008-03-05T08:37:55-00:00</dc:date>
    <prism:publicationYear>2002</prism:publicationYear>
    <prism:category>statistics</prism:category>
    <prism:category>stochastic_processes</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2491530">
    <title>Hierarchical Beta Processes and the Indian Buﬀet Process</title>
    <link>http://www.citeulike.org/user/jsr/article/2491530</link>
    <description>&lt;i&gt;(2007)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We show that the beta process is the de Finetti mixing distribution underlying the In- dian buﬀet process of [2]. This result shows that the beta process plays the role for the Indian buﬀet process that the Dirichlet pro- cess plays for the Chinese restaurant process, a parallel that guides us in deriving analogs for the beta process of the many known ex- tensions of the Dirichlet process. In partic- ular we deﬁne Bayesian hierarchies of beta processes and use the connection to the beta process to develop posterior inference algo- rithms for the Indian buﬀet process. We also present an application to document classiﬁ- cation, exploring a relationship between the hierarchical beta process and smoothed naive Bayes models.</description>
    <dc:title>Hierarchical Beta Processes and the Indian Buﬀet Process</dc:title>

    <dc:creator>Romain Thibaux</dc:creator>
    <dc:creator>Michael Jordan</dc:creator>
    <dc:source>(2007)</dc:source>
    <dc:date>2008-03-09T00:33:24-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:category>bayesian</prism:category>
    <prism:category>beta_process</prism:category>
    <prism:category>indian_buffet_process</prism:category>
    <prism:category>nonparametric</prism:category>
    <prism:category>unsupervised_learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2491367">
    <title>Stick-breaking Construction for the Indian Buffet Process</title>
    <link>http://www.citeulike.org/user/jsr/article/2491367</link>
    <description>&lt;i&gt;(2007)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The Indian buffet process (IBP) is a Bayesian nonparametric distribution whereby objects are modelled using an unbounded number of latent features. In this paper we derive a stick-breaking representation for the IBP. Based on this new rep- resentation, we develop slice samplers for the IBP that are efﬁcient, easy to implement and are more generally applicable than the currently available Gibbs sampler. This representation, along with the work of Thibaux and Jordan [17], also illuminates interesting theoretical connec- tions between the IBP, Chinese restaurant pro- cesses, Beta processes and Dirichlet processes.</description>
    <dc:title>Stick-breaking Construction for the Indian Buffet Process</dc:title>

    <dc:creator>YW Teh</dc:creator>
    <dc:creator>D Gorur</dc:creator>
    <dc:creator>Z Ghahramani</dc:creator>
    <dc:source>(2007)</dc:source>
    <dc:date>2008-03-08T23:12:59-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:category>bayesian</prism:category>
    <prism:category>indian_buffet_process</prism:category>
    <prism:category>nonparametric</prism:category>
    <prism:category>unsupervised_learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2489449">
    <title>Prior Distributions for Partitions in Bayesian Nonparametrics</title>
    <link>http://www.citeulike.org/user/jsr/article/2489449</link>
    <description>&lt;i&gt;(3 Jan 2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Prior distributions for unknown data distributions play an important role in nonparametric Bayesian statistics. A commonly-used prior distribution for an unknown data distribution is the Dirichlet process, which induces a random partition on the observations from the unknown data distribution. We investigate the prediction rule that underlies the Dirichlet process prior and the implicit &#34;rich-get-richer&#34; characteristics of random partitions generated by this process. To provide more flexibility for the modeling of random partitions, we present two alternative prior distributions for random partitions: the Pitman-Yor process and a uniform process. We present several asymptotic results for partitions under each process as well as a simulation-based evaluation of partition properties in small samples. We also discuss the exchangeability of partitions under each prediction rule. We give special focus to the uniform process which does not share the same &#34;rich-get-richer&#34; property as the Dirichlet process, which would be advantageous in applications where that implicit property is not reasonable.</description>
    <dc:title>Prior Distributions for Partitions in Bayesian Nonparametrics</dc:title>

    <dc:creator>Lee Dicker</dc:creator>
    <dc:creator>Shane Jensen</dc:creator>
    <dc:source>(3 Jan 2008)</dc:source>
    <dc:date>2008-03-08T12:44:34-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:category>bayesian</prism:category>
    <prism:category>dirichlet_process</prism:category>
    <prism:category>nonparametric</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2489402">
    <title>Hierarchical Dirichlet Processes</title>
    <link>http://www.citeulike.org/user/jsr/article/2489402</link>
    <description>&lt;i&gt;Journal of the American Statistical Association, Vol. 101 (December 2006)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We consider problems involving groups of data, where each observation within a group is a draw from a mixture model, and where it is desirable to share mixture components between groups. We assume that the number of mixture components is unknown a priori and is to be inferred from the data. In this setting it is natural to consider sets of Dirichlet processes, one for each group, where the well-known clustering property of the Dirichlet process provides a nonparametric prior for the number of mixture components within each group. Given our desire to tie the mixture models in the various groups, we consider a hierarchical model, speciﬁcally one in which the base measure for the child Dirichlet processes is itself distributed according to a Dirichlet process. Such a base measure being discrete, the child Dirichlet processes necessar- ily share atoms. Thus, as desired, the mixture models in the different groups necessarily share mixture components. We discuss representations of hierarchical Dirichlet processes in terms of a stick-breaking process, and a generalization of the Chinese restaurant process that we refer to as the “Chinese restaurant franchise.” We present Markov chain Monte Carlo algorithms for posterior inference in hierarchical Dirichlet process mixtures, and describe applications to problems in information retrieval and text modelling.</description>
    <dc:title>Hierarchical Dirichlet Processes</dc:title>

    <dc:creator>Yee Teh</dc:creator>
    <dc:creator>Michael Jordan</dc:creator>
    <dc:creator>Matthew Beal</dc:creator>
    <dc:creator>David Blei</dc:creator>
    <dc:source>Journal of the American Statistical Association, Vol. 101 (December 2006)</dc:source>
    <dc:date>2008-03-08T12:29:44-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Journal of the American Statistical Association</prism:publicationName>
    <prism:volume>101</prism:volume>
    <prism:category>bayesian</prism:category>
    <prism:category>clustering</prism:category>
    <prism:category>hierarchical_dirichlet_process</prism:category>
    <prism:category>latent_feature_models</prism:category>
    <prism:category>learning</prism:category>
    <prism:category>nonparametric</prism:category>
    <prism:category>statistical_learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2489295">
    <title>Pure Exploration for Multi-Armed Bandit Problems</title>
    <link>http://www.citeulike.org/user/jsr/article/2489295</link>
    <description>&lt;i&gt;(19 Feb 2008)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We consider the framework of stochastic multi-armed bandit problems and study the possibilities and limitations of strategies that explore sequentially the arms. The strategies are assessed not in terms of their cumulative regrets, as is usually the case, but through quantities referred to as simple regrets. The latter are related to the (expected) gains of the decisions that the strategies would recommend for a new one-shot instance of the same multi-armed bandit problem. Here, exploration is only constrained by the number of available rounds (not necessarily known in advance), in contrast to the case when cumulative regrets are considered and when exploitation needs to be performed at the same time. We start by indicating the links between simple and cumulative regrets. A small cumulative regret entails a small simple regret but too small a cumulative regret prevents the simple regret from decreasing exponentially towards zero, its optimal distribution-dependent rate. We therefore introduce specific strategies, for which we prove both distribution-dependent and distribution-free bounds. A concluding experimental study puts these theoretical bounds in perspective and shows the interest of non-uniform exploration of the arms.</description>
    <dc:title>Pure Exploration for Multi-Armed Bandit Problems</dc:title>

    <dc:creator>Sebastien Bubeck</dc:creator>
    <dc:creator>Remi Munos</dc:creator>
    <dc:creator>Gilles Stoltz</dc:creator>
    <dc:source>(19 Feb 2008)</dc:source>
    <dc:date>2008-03-08T11:55:42-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:category>bandits</prism:category>
    <prism:category>learning_theory</prism:category>
    <prism:category>online</prism:category>
    <prism:category>reinforcement_learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/1706426">
    <title>Metric entropy in competitive on-line prediction</title>
    <link>http://www.citeulike.org/user/jsr/article/1706426</link>
    <description>&lt;i&gt;(9 Sep 2006)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Competitive on-line prediction (also known as universal prediction of individual sequences) is a strand of learning theory avoiding making any stochastic assumptions about the way the observations are generated. The predictor's goal is to compete with a benchmark class of prediction rules, which is often a proper Banach function space. Metric entropy provides a unifying framework for competitive on-line prediction: the numerous known upper bounds on the metric entropy of various compact sets in function spaces readily imply bounds on the performance of on-line prediction strategies. This paper discusses strengths and limitations of the direct approach to competitive on-line prediction via metric entropy, including comparisons to other approaches.</description>
    <dc:title>Metric entropy in competitive on-line prediction</dc:title>

    <dc:creator>Vladimir Vovk</dc:creator>
    <dc:source>(9 Sep 2006)</dc:source>
    <dc:date>2007-09-28T21:09:32-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:category>learning</prism:category>
    <prism:category>learning_theory</prism:category>
    <prism:category>metric_entropy</prism:category>
    <prism:category>statistical_learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2488258">
    <title>Position Auctions</title>
    <link>http://www.citeulike.org/user/jsr/article/2488258</link>
    <description>&lt;i&gt;International Journal of Industrial Organization (October 2006)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;A theoretical and empirical analysis of the ad auction used by Google and Yahoo.</description>
    <dc:title>Position Auctions</dc:title>

    <dc:creator>Hal Varian</dc:creator>
    <dc:source>International Journal of Industrial Organization (October 2006)</dc:source>
    <dc:date>2008-03-08T05:21:51-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>International Journal of Industrial Organization</prism:publicationName>
    <prism:category>auction_theory</prism:category>
    <prism:category>computational_economics</prism:category>
    <prism:category>game_theory</prism:category>
    <prism:category>mechanism_design</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2473582">
    <title>On the Mathematical Foundations of Learning</title>
    <link>http://www.citeulike.org/user/jsr/article/2473582</link>
    <description>&lt;i&gt;Bulletin of the American Mathematical Society, Vol. 39, No. 1. (2002), pp. 1-49.&lt;/i&gt;</description>
    <dc:title>On the Mathematical Foundations of Learning</dc:title>

    <dc:creator>Felipe Cucker</dc:creator>
    <dc:creator>Steve Smale</dc:creator>
    <dc:source>Bulletin of the American Mathematical Society, Vol. 39, No. 1. (2002), pp. 1-49.</dc:source>
    <dc:date>2008-03-05T16:15:29-00:00</dc:date>
    <prism:publicationYear>2002</prism:publicationYear>
    <prism:publicationName>Bulletin of the American Mathematical Society</prism:publicationName>
    <prism:volume>39</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>1</prism:startingPage>
    <prism:endingPage>49</prism:endingPage>
    <prism:category>learning</prism:category>
    <prism:category>learning_theory</prism:category>
    <prism:category>statistical_learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2477638">
    <title>Bayesian nonparametric latent feature models</title>
    <link>http://www.citeulike.org/user/jsr/article/2477638</link>
    <description>&lt;i&gt;(2007), pp. 201-225.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We describe a flexible nonparametric approach to latent variable modelling in which the number of latent variables is unbounded. This approach is based on a probability distribution over equivalence classes of binary matrices with a finite number of rows, corresponding to the data points, and an unbounded number of columns, corresponding to the latent variables. Each data point can be associated with a subset of the possible latent variables, which we refer to as the latent features of that data point. The binary variables in the matrix indicate which latent feature is possessed by which data point, and there is a potentially infinite array of features. We derive the distribution over unbounded binary matrices by taking the limit of a distribution over N x K binary matrices as K -&#62; infinity, a strategy inspired by the derivation of the Chinese restaurant process (Aldous, 1985; Pitman, 2002) which preserves exchangeability of the rows. We define a simple generative processes for this distribution which we call the Indian buffet process (IBP; Griffiths and Ghahramani, 2005, 2006). The IBP contains a single hyperparameter which controls the expected number of latent features possessed by each data point. We describe a two-parameter generalization of the IBP which has additional flexibility, independently controlling the expected number of features and their variance across data points. The use of this distribution as a prior in an infinite latent feature model is illustrated, and Markov chain Monte Carlo algorithms for inference are described.</description>
    <dc:title>Bayesian nonparametric latent feature models</dc:title>

    <dc:creator>Zoubin Ghahramani</dc:creator>
    <dc:creator>TL Griffiths</dc:creator>
    <dc:creator>Peter Sollich</dc:creator>
    <dc:source>(2007), pp. 201-225.</dc:source>
    <dc:date>2008-03-06T07:59:43-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:startingPage>201</prism:startingPage>
    <prism:endingPage>225</prism:endingPage>
    <prism:publisher>Oxford University Press</prism:publisher>
    <prism:category>bayesian</prism:category>
    <prism:category>indian_buffet_process</prism:category>
    <prism:category>latent_feature_models</prism:category>
    <prism:category>learning</prism:category>
    <prism:category>nonparametric</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2477625">
    <title>The Use of Unlabeled Data in Predictive Modeling</title>
    <link>http://www.citeulike.org/user/jsr/article/2477625</link>
    <description>&lt;i&gt;ArXiv e-prints, Vol. 710 (October 2007)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The incorporation of unlabeled data in regression and classification analysis is an increasing focus of the applied statistics and machine learning literatures, with a number of recent examples demonstrating the potential for unlabeled data to contribute to improved predictive accuracy. The statistical basis for this semisupervised analysis does not appear to have been well delineated; as a result, the underlying theory and rationale may be underappreciated, especially by nonstatisticians. There is also room for statisticians to become more fully engaged in the vigorous research in this important area of intersection of the statistical and computer sciences. Much of the theoretical work in the literature has focused, for example, on geometric and structural properties of the unlabeled data in the context of particular algorithms, rather than probabilistic and statistical questions. This paper overviews the fundamental statistical foundations for predictive modeling and the general questions associated with unlabeled data, highlighting the relevance of venerable concepts of sampling design and prior specification. This theory, illustrated with a series of central illustrative examples and two substantial real data analyses, shows precisely when, why and how unlabeled data matter.</description>
    <dc:title>The Use of Unlabeled Data in Predictive Modeling</dc:title>

    <dc:creator>F Liang</dc:creator>
    <dc:creator>S Mukherjee</dc:creator>
    <dc:creator>M West</dc:creator>
    <dc:source>ArXiv e-prints, Vol. 710 (October 2007)</dc:source>
    <dc:date>2008-03-06T07:54:57-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>ArXiv e-prints</prism:publicationName>
    <prism:volume>710</prism:volume>
    <prism:category>bayesian</prism:category>
    <prism:category>learning</prism:category>
    <prism:category>manifolds</prism:category>
    <prism:category>semi_supervised</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2472088">
    <title>Infinite latent feature models and the Indian buffet process</title>
    <link>http://www.citeulike.org/user/jsr/article/2472088</link>
    <description>&lt;i&gt;(2005)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We define a probability distribution over equivalence classes of binary matrices with a finite number of rows and an unbounded number of columns.</description>
    <dc:title>Infinite latent feature models and the Indian buffet process</dc:title>

    <dc:creator>T Griffiths</dc:creator>
    <dc:creator>Z Ghahramani</dc:creator>
    <dc:source>(2005)</dc:source>
    <dc:date>2008-03-05T11:10:46-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:category>bayesian</prism:category>
    <prism:category>indian_buffet_process</prism:category>
    <prism:category>learning</prism:category>
    <prism:category>nonparametric</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2472067">
    <title>On the Gittins Index for Multiarmed Bandits</title>
    <link>http://www.citeulike.org/user/jsr/article/2472067</link>
    <description>&lt;i&gt;The Annals of Applied Probability, Vol. 2, No. 4. (1992), pp. 1024-1033.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;This paper considers the multiarmed bandit problem and presents a new proof of the optimality of the Gittins index policy. The proof is intuitive and does not require an interchange argument. The insight it affords is used to give a streamlined summary of previous research and to prove a new result: The optimal value function is a submodular set function of the available projects.</description>
    <dc:title>On the Gittins Index for Multiarmed Bandits</dc:title>

    <dc:creator>Richard Weber</dc:creator>
    <dc:source>The Annals of Applied Probability, Vol. 2, No. 4. (1992), pp. 1024-1033.</dc:source>
    <dc:date>2008-03-05T11:04:00-00:00</dc:date>
    <prism:publicationYear>1992</prism:publicationYear>
    <prism:publicationName>The Annals of Applied Probability</prism:publicationName>
    <prism:volume>2</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>1024</prism:startingPage>
    <prism:endingPage>1033</prism:endingPage>
    <prism:category>bandits</prism:category>
    <prism:category>decision_theory</prism:category>
    <prism:category>rl</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2472047">
    <title>Evolutionarily stable strategies of random games, and the vertices of random polygons</title>
    <link>http://www.citeulike.org/user/jsr/article/2472047</link>
    <description>&lt;i&gt;Annals of Applied Probability, Vol. 18, No. 1. (2008), pp. 259-287.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;An evolutionarily stable strategy (ESS) is an equilibrium strategy that is immune to invasions by rare alternative (“mutant”) strategies. Unlike Nash equilibria, ESS do not always exist in finite games. In this paper we address the question of what happens when the size of the game increases: does an ESS exist for “almost every large” game? Letting the entries in the n×n game matrix be independently randomly chosen according to a distribution F, we study the number of ESS with support of size 2. In particular, we show that, as n→∞, the probability of having such an ESS: (i) converges to 1 for distributions F with “exponential and faster decreasing tails” (e.g., uniform, normal, exponential); and (ii) converges to for distributions F with “slower than exponential decreasing tails” (e.g., lognormal, Pareto, Cauchy). Our results also imply that the expected number of vertices of the convex hull of n random points in the plane converges to infinity for the distributions in (i), and to 4 for the distributions in (ii).</description>
    <dc:title>Evolutionarily stable strategies of random games, and the vertices of random polygons</dc:title>

    <dc:creator>Sergiu Hart</dc:creator>
    <dc:creator>Yosef Rinott</dc:creator>
    <dc:creator>Benjamin Weiss</dc:creator>
    <dc:source>Annals of Applied Probability, Vol. 18, No. 1. (2008), pp. 259-287.</dc:source>
    <dc:date>2008-03-05T10:56:23-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Annals of Applied Probability</prism:publicationName>
    <prism:volume>18</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>259</prism:startingPage>
    <prism:endingPage>287</prism:endingPage>
    <prism:category>evolutionary_game_theory</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2470860">
    <title>Semi-Supervised Learning on Riemannian Manifolds</title>
    <link>http://www.citeulike.org/user/jsr/article/2470860</link>
    <description>&lt;i&gt;Mach. Learn., Vol. 56, No. 1-3. (2004), pp. 209-239.&lt;/i&gt;</description>
    <dc:title>Semi-Supervised Learning on Riemannian Manifolds</dc:title>

    <dc:creator>Mikhail Belkin</dc:creator>
    <dc:creator>Partha Niyogi</dc:creator>
    <dc:identifier>doi:10.1023/B:MACH.0000033120.25363.1e</dc:identifier>
    <dc:source>Mach. Learn., Vol. 56, No. 1-3. (2004), pp. 209-239.</dc:source>
    <dc:date>2008-03-05T09:38:36-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>Mach. Learn.</prism:publicationName>
    <prism:issn>0885-6125</prism:issn>
    <prism:volume>56</prism:volume>
    <prism:number>1-3</prism:number>
    <prism:startingPage>209</prism:startingPage>
    <prism:endingPage>239</prism:endingPage>
    <prism:publisher>Kluwer Academic Publishers</prism:publisher>
    <prism:category>geometry</prism:category>
    <prism:category>learning</prism:category>
    <prism:category>manifolds</prism:category>
    <prism:category>semi_supervised</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2470655">
    <title>Asymptotic Bayes Criteria for Nonparametric Response Surface Design</title>
    <link>http://www.citeulike.org/user/jsr/article/2470655</link>
    <description>&lt;i&gt;Annals of Statistics, Vol. 22, No. 2. (1994), pp. 634-651.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;This paper deals with Bayesian design for response surface prediction when the prior may be finite or infinite dimensional, the design space arbitrary. In order that the resulting problems be manageable, we resort to asymptotic versions of D-, G- and A-optimality. Here the asymptotics stem from allowing the error variance to be large. The problems thus elicited have strong game-like characteristics. Examples of theoretical solutions are brought forward, especially when the priors are stationary processes on an interval, and we give numerical evidence that the asymptotics work well in the finite domain.</description>
    <dc:title>Asymptotic Bayes Criteria for Nonparametric Response Surface Design</dc:title>

    <dc:creator>Toby Mitchell</dc:creator>
    <dc:creator>Jerome Sacks</dc:creator>
    <dc:creator>Donald Ylvisaker</dc:creator>
    <dc:source>Annals of Statistics, Vol. 22, No. 2. (1994), pp. 634-651.</dc:source>
    <dc:date>2008-03-05T09:00:54-00:00</dc:date>
    <prism:publicationYear>1994</prism:publicationYear>
    <prism:publicationName>Annals of Statistics</prism:publicationName>
    <prism:volume>22</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>634</prism:startingPage>
    <prism:endingPage>651</prism:endingPage>
    <prism:category>bayesian</prism:category>
    <prism:category>global_optimization</prism:category>
    <prism:category>nonparametric</prism:category>
    <prism:category>response_surface</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/695242">
    <title>Bayesian Hierarchical Clustering</title>
    <link>http://www.citeulike.org/user/jsr/article/695242</link>
    <description>&lt;i&gt;&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We present a novel algorithm for agglomerative hierarchical clustering based on evaluating marginal likelihoods of a probabilistic model. This algorithm has several advantages over traditional distance-based agglomerative clustering algorithms. (1) It defines a probabilistic model of the data which can be used to compute the predictive distribution of a test point and the probability of it belonging to any of the existing clusters in the tree. (2) It uses a model-based criterion to...</description>
    <dc:title>Bayesian Hierarchical Clustering</dc:title>

    <dc:creator>Katherine Heller</dc:creator>
    <dc:date>2006-06-13T20:57:23-00:00</dc:date>
    <prism:category>bayesian</prism:category>
    <prism:category>clustering</prism:category>
    <prism:category>__google__</prism:category>
    <prism:category>hierarchical_clustering</prism:category>
    <prism:category>nonparametric</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/1983303">
    <title>A Nonparametric Bayesian Approach to Modeling Overlapping Clusters</title>
    <link>http://www.citeulike.org/user/jsr/article/1983303</link>
    <description>&lt;i&gt;(2007)&lt;/i&gt;</description>
    <dc:title>A Nonparametric Bayesian Approach to Modeling Overlapping Clusters</dc:title>

    <dc:creator>KA Heller</dc:creator>
    <dc:creator>Z Ghahramani</dc:creator>
    <dc:source>(2007)</dc:source>
    <dc:date>2007-11-26T03:17:47-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:category>bayesian</prism:category>
    <prism:category>clustering</prism:category>
    <prism:category>__google__</prism:category>
    <prism:category>hierarchical_clustering</prism:category>
    <prism:category>indian_buffet_process</prism:category>
    <prism:category>nonparametric</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/jsr/article/2470596">
    <title>Online Mechanisms</title>
    <link>http://www.citeulike.org/user/jsr/article/2470596</link>
    <description>&lt;i&gt;&lt;/i&gt;</description>
    <dc:title>Online Mechanisms</dc:title>

    <dc:creator>David Parkes</dc:creator>
    <dc:date>2008-03-05T08:44:18-00:00</dc:date>
    <prism:category>auction_theory</prism:category>
    <prism:category>computational_economics</prism:category>
    <prism:category>game_theory</prism:category>
    <prism:category>learning</prism:category>
    <prism:category>mechanism_design</prism:category>
    <prism:category>online</prism:category>
</item>



</rdf:RDF>

