<?xml version="1.0" encoding="UTF-8"?>

<rdf:RDF
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
   xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
   xmlns="http://purl.org/rss/1.0/"
   xmlns:dc="http://purl.org/dc/elements/1.1/"
   xmlns:prism="http://prismstandard.org/namespaces/1.2/basic/"
   xmlns:dcterms="http://purl.org/dc/terms/"

>
<channel rdf:about="http://www.citeulike.org/about">
<pubDate>Thu, 24 Jul 2008 23:45:10 BST</pubDate>


	<title>CiteULike: bpacker's library [134 articles]</title>
	<description>CiteULike: bpacker's library [134 articles]</description>


	<link>http://www.citeulike.org/user/bpacker</link>
	<dc:publisher>CiteULike.org</dc:publisher>
	<dc:language>en-gb</dc:language>
	<dc:rights>Copyright &#169; 2004-2008 citeulike.org</dc:rights>
	<items>
    <rdf:Seq>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/2638542"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/2363626"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/2363617"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/2355132"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/2355122"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/2324817"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/2324813"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/2324810"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/2306828"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/2300741"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/2299577"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/142759"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/2278466"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/2232917"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/2211878"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/1866475"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/2086477"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/2086469"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/1808736"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/1771591"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/1766905"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/1766894"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/1766875"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/140030"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/1766865"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/1766855"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/1617758"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/989397"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/1577170"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/1559169"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/1558368"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/909787"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/1558325"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/1545446"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/1538694"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/686689"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/201537"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/1423550"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/899368"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/1229748"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/1402724"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/1364780"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/1300309"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/1361536"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/513211"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/1353717"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/453442"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/899534"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/1067266"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/bpacker/article/1031416"/>

	</rdf:Seq>
	</items>
	</channel>


<item rdf:about="http://www.citeulike.org/user/bpacker/article/2638542">
    <title>Text Classification for DAG-Structured Categories</title>
    <link>http://www.citeulike.org/user/bpacker/article/2638542</link>
    <description>&lt;i&gt;Advances in Knowledge Discovery and Data Mining (2005), pp. 290-300.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Hierarchical text classification concerning the relationship among categories has become an interesting problem recently. Most research has focused on tree-structured categories, but in reality directed acyclic graph (DAG) - structured categories, where a child category may have more than one parent category, appear more often. In this paper, we introduce three approaches, namely, flat, tree-based, and DAG-based, for solving the multi-label text classification problem in which categories are organized as a DAG, and documents are classified into both leaf and internal categories. We also present experimental results of the methods using SVMs as classifiers on the Reuters-21578 collection and our data set of research papers in Artificial Intelligence. Keywords: text classification, hierarchies, multi-labels, SVM.</description>
    <dc:title>Text Classification for DAG-Structured Categories</dc:title>

    <dc:creator>Cao Nguyen</dc:creator>
    <dc:creator>Tran Dung</dc:creator>
    <dc:creator>Tru Cao</dc:creator>
    <dc:identifier>doi:10.1007/11430919_36</dc:identifier>
    <dc:source>Advances in Knowledge Discovery and Data Mining (2005), pp. 290-300.</dc:source>
    <dc:date>2008-04-07T19:09:52-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:publicationName>Advances in Knowledge Discovery and Data Mining</prism:publicationName>
    <prism:startingPage>290</prism:startingPage>
    <prism:endingPage>300</prism:endingPage>
    <prism:category>classification</prism:category>
    <prism:category>hierarchy</prism:category>
    <prism:category>nlp</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/2363626">
    <title>Learning from Multiple sources</title>
    <link>http://www.citeulike.org/user/bpacker/article/2363626</link>
    <description>&lt;i&gt;(2006)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We consider the problem of learning accurate models from multiple sources of “nearby” data. Given distinct samples from multiple data sources and estimates of the dissimilarities between these sources, we provide a general theory of which samples should be used to learn models for each source. This theory is applicable in a broad decision-theoretic learning framework, and yields results for classification and regression generally, and for density estimation within the exponential family. A key component of our approach is the development of approximate triangle inequalities for expected loss, which may be of independent interest.</description>
    <dc:title>Learning from Multiple sources</dc:title>

    <dc:creator>Koby Crammer</dc:creator>
    <dc:creator>Michael Kearns</dc:creator>
    <dc:creator>Jennifer Wortman</dc:creator>
    <dc:source>(2006)</dc:source>
    <dc:date>2008-02-11T19:32:59-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:category>cotraining</prism:category>
    <prism:category>hierarchy</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/2363617">
    <title>Probabilistic abstraction hierarchies</title>
    <link>http://www.citeulike.org/user/bpacker/article/2363617</link>
    <description>&lt;i&gt;(2001)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Many domains are naturally organized in an abstraction hierarchy or taxonomy, where the instances in &#34;nearby&#34; classes in the taxonomy are similar. In this paper, we provide a general probabilistic framework for clustering data into a set of classes organized as a taxonomy, where each class is associated with a probabilistic model from which the data was generated. The clustering algorithm simultaneously optimizes three things: the assignment of data instances to clusters, the models...</description>
    <dc:title>Probabilistic abstraction hierarchies</dc:title>

    <dc:creator>E Segal</dc:creator>
    <dc:creator>D Koller</dc:creator>
    <dc:creator>D Ormoneit</dc:creator>
    <dc:source>(2001)</dc:source>
    <dc:date>2008-02-11T19:24:32-00:00</dc:date>
    <prism:publicationYear>2001</prism:publicationYear>
    <prism:category>hierarchy</prism:category>
    <prism:category>probabilistic-models</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/2355132">
    <title>General Bounds on the Mutual Information Between a Parameter and n Conditionally Independent Observations</title>
    <link>http://www.citeulike.org/user/bpacker/article/2355132</link>
    <description>&lt;i&gt;(1995), pp. 402-411.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Each parameter ` in an abstract parameter space &#920; is associated with a different probability distribution on a set Y . A parameter ` is chosen at random from &#920; according to some a priori distribution on &#920;, and n conditionally independent random variables Y n = Y 1 ; : : : Yn are observed with common distribution determined by `. We obtain bounds on the mutual information between the random variable &#920;, giving the choice of parameter, and the random variable Y n , giving ...</description>
    <dc:title>General Bounds on the Mutual Information Between a Parameter and n Conditionally Independent Observations</dc:title>

    <dc:creator>David Haussler</dc:creator>
    <dc:creator>Manfred Opper</dc:creator>
    <dc:source>(1995), pp. 402-411.</dc:source>
    <dc:date>2008-02-08T23:37:25-00:00</dc:date>
    <prism:publicationYear>1995</prism:publicationYear>
    <prism:startingPage>402</prism:startingPage>
    <prism:endingPage>411</prism:endingPage>
    <prism:category>probabilistic-models</prism:category>
    <prism:category>statistics</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/2355122">
    <title>A Bayesian/Information Theoretic Model of Learning to Learn via Multiple Task Sampling</title>
    <link>http://www.citeulike.org/user/bpacker/article/2355122</link>
    <description>&lt;i&gt;Machine Learning, Vol. 28, No. 1. (1 July 1997), pp. 7-39.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;A Bayesian model of learning to learn by sampling from multiple tasks is presented. The multiple tasks are themselves generated by sampling from a distribution over an environment of related tasks. Such an environment is shown to be naturally modelled within a Bayesian context by the concept of an objective prior distribution. It is argued that for many common machine learning problems, although in general we do not know the true (objective) prior for the problem, we do have some idea of a set of possible priors to which the true prior belongs. It is shown that under these circumstances a learner can use Bayesian inference to learn the true prior by learning sufficiently many tasks from the environment. In addition, bounds are given on the amount of information required to learn a task when it is simultaneously learnt with several other tasks. The bounds show that if the learner has little knowledge of the true prior, but the dimensionality of the true prior is small, then sampling multiple tasks is highly advantageous. The theory is applied to the problem of learning a common feature set or equivalently a low-dimensional-representation (LDR) for an environment of related tasks.</description>
    <dc:title>A Bayesian/Information Theoretic Model of Learning to Learn via Multiple Task Sampling</dc:title>

    <dc:creator>Jonathan Baxter</dc:creator>
    <dc:identifier>doi:10.1023/A:1007327622663</dc:identifier>
    <dc:source>Machine Learning, Vol. 28, No. 1. (1 July 1997), pp. 7-39.</dc:source>
    <dc:date>2008-02-08T23:32:43-00:00</dc:date>
    <prism:publicationYear>1997</prism:publicationYear>
    <prism:publicationName>Machine Learning</prism:publicationName>
    <prism:volume>28</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>7</prism:startingPage>
    <prism:endingPage>39</prism:endingPage>
    <prism:category>hierarchy</prism:category>
    <prism:category>learning-theory</prism:category>
    <prism:category>probabilistic-models</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/2324817">
    <title>A new composition theorem for learning algorithms</title>
    <link>http://www.citeulike.org/user/bpacker/article/2324817</link>
    <description>&lt;i&gt;(1998), pp. 583-589.&lt;/i&gt;</description>
    <dc:title>A new composition theorem for learning algorithms</dc:title>

    <dc:creator>Nader Bshouty</dc:creator>
    <dc:identifier>doi:10.1145/276698.276873</dc:identifier>
    <dc:source>(1998), pp. 583-589.</dc:source>
    <dc:date>2008-02-03T04:38:43-00:00</dc:date>
    <prism:publicationYear>1998</prism:publicationYear>
    <prism:startingPage>583</prism:startingPage>
    <prism:endingPage>589</prism:endingPage>
    <prism:publisher>ACM</prism:publisher>
    <prism:category>learning-theory</prism:category>
    <prism:category>phase-learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/2324813">
    <title>Three learning phases for radial-basis-function networks</title>
    <link>http://www.citeulike.org/user/bpacker/article/2324813</link>
    <description>&lt;i&gt;Neural Networks, Vol. 14, No. 4-5. (May 2001), pp. 439-458.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;In this paper, learning algorithms for radial basis function (RBF) networks are discussed. Whereas multilayer perceptrons (MLP) are typically trained with backpropagation algorithms, starting the training procedure with a random initialization of the MLP's parameters, an RBF network may be trained in many different ways. We categorize these RBF training methods into one-, two-, and three-phase learning schemes. Two-phase RBF learning is a very common learning scheme. The two layers of an RBF network are learnt separately; first the RBF layer is trained, including the adaptation of centers and scaling parameters, and then the weights of the output layer are adapted. RBF centers may be trained by clustering, vector quantization and classification tree algorithms, and the output layer by supervised learning (through gradient descent or pseudo inverse solution). Results from numerical experiments of RBF classifiers trained by two-phase learning are presented in three completely different pattern recognition applications: (a) the classification of 3D visual objects; (b) the recognition hand-written digits (2D objects); and (c) the categorization of high-resolution electrocardiograms given as a time series (1D objects) and as a set of features extracted from these time series. In these applications, it can be observed that the performance of RBF classifiers trained with two-phase learning can be improved through a third backpropagation-like training phase of the RBF network, adapting the whole set of parameters (RBF centers, scaling parameters, and output layer weights) simultaneously. This, we call three-phase learning in RBF networks. A practical advantage of two- and three-phase learning in RBF networks is the possibility to use unlabeled training data for the first training phase. Support vector (SV) learning in RBF networks is a different learning approach. SV learning can be considered, in this context of learning, as a special type of one-phase learning, where only the output layer weights of the RBF network are calculated, and the RBF centers are restricted to be a subset of the training data. Numerical experiments with several classifier schemes including k-nearest-neighbor, learning vector quantization and RBF classifiers trained through two-phase, three-phase and support vector learning are given. The performance of the RBF classifiers trained through SV learning and three-phase learning are superior to the results of two-phase learning, but SV learning often leads to complex network structures, since the number of support vectors is not a small fraction of the total number of data points.</description>
    <dc:title>Three learning phases for radial-basis-function networks</dc:title>

    <dc:creator>Friedhelm Schwenker</dc:creator>
    <dc:creator>Hans Kestler</dc:creator>
    <dc:creator>Gunther Palm</dc:creator>
    <dc:identifier>doi:10.1016/S0893-6080(01)00027-2</dc:identifier>
    <dc:source>Neural Networks, Vol. 14, No. 4-5. (May 2001), pp. 439-458.</dc:source>
    <dc:date>2008-02-03T04:36:06-00:00</dc:date>
    <prism:publicationYear>2001</prism:publicationYear>
    <prism:publicationName>Neural Networks</prism:publicationName>
    <prism:volume>14</prism:volume>
    <prism:number>4-5</prism:number>
    <prism:startingPage>439</prism:startingPage>
    <prism:endingPage>458</prism:endingPage>
    <prism:category>phase-learning</prism:category>
    <prism:category>sparse-coding</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/2324810">
    <title>Exact Learning Composed Classes with a Small Number of Mistakes</title>
    <link>http://www.citeulike.org/user/bpacker/article/2324810</link>
    <description>&lt;i&gt;Learning Theory (2006), pp. 199-213.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The Composition Lemma is one of the strongest tools for learning complex classes. It shows that if a class is learnable then composing the class with a class of polynomial number of concepts gives a learnable class. In this paper we extend the Composition Lemma as follows: we show that composing an attribute efficient learnable class with a learnable class with polynomial shatter coefficient gives a learnable class. This result extends many results in the literature and gives polynomial learning algorithms for new classes.</description>
    <dc:title>Exact Learning Composed Classes with a Small Number of Mistakes</dc:title>

    <dc:creator>Nader Bshouty</dc:creator>
    <dc:creator>Hanna Mazzawi</dc:creator>
    <dc:identifier>doi:10.1007/11776420_17</dc:identifier>
    <dc:source>Learning Theory (2006), pp. 199-213.</dc:source>
    <dc:date>2008-02-03T04:33:34-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Learning Theory</prism:publicationName>
    <prism:startingPage>199</prism:startingPage>
    <prism:endingPage>213</prism:endingPage>
    <prism:category>learning-theory</prism:category>
    <prism:category>phase-learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/2306828">
    <title>On Efficient Agnostic Learning of Linear Combinations of Basis Functions</title>
    <link>http://www.citeulike.org/user/bpacker/article/2306828</link>
    <description>&lt;i&gt;(1995), pp. 369-376.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We consider efficient agnostic learning of linear combinations of basis functions when the sum of absolute values of the weights of the linear combinations is bounded. With the quadratic loss function, we show that the class of linear combinations of a set of basis functions is efficiently agnostically learnable if and only if the class of basis functions is efficiently agnostically learnable. We also show that the sample complexity for learning the linear combinations grows polynomially if and ...</description>
    <dc:title>On Efficient Agnostic Learning of Linear Combinations of Basis Functions</dc:title>

    <dc:creator>Wee Lee</dc:creator>
    <dc:creator>Peter Bartlett</dc:creator>
    <dc:creator>Robert Williamson</dc:creator>
    <dc:source>(1995), pp. 369-376.</dc:source>
    <dc:date>2008-01-30T10:15:04-00:00</dc:date>
    <prism:publicationYear>1995</prism:publicationYear>
    <prism:startingPage>369</prism:startingPage>
    <prism:endingPage>376</prism:endingPage>
    <prism:category>sparse-coding</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/2300741">
    <title>Are Emily and Greg More Employable than Lakisha and Jamal? A Field Experiment on Labor Market Discrimination</title>
    <link>http://www.citeulike.org/user/bpacker/article/2300741</link>
    <description>&lt;i&gt;National Bureau of Economic Research Working Paper Series (July 2003), 9873.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Author contact info: Marianne Bertrand Graduate School of Business University of Chicago 5807 South Woodlawn Avenue Chicago, IL 60637 Tel: 773/834-5943 Fax: 773/702-0458 E-Mail: marianne.bertrand@gsb.uchicago.edu Sendhil Mullainathan Department of Economics Littauer 208 Harvard University Cambridge, MA 02138 Tel: 617/496-2720 Fax: 617/495-7730 E-Mail: mullain@fas.harvard.edu We perform a field experiment to measure racial discrimination in the labor market. We respond with fictitious resumes to help-wanted ads in Boston and Chicago newspapers. To manipulate perception of race, each resume is assigned either a very African American sounding name or a very White sounding name. The results show significant discrimination against African-American names: White names receive 50 percent more callbacks for interviews. We also find that race affects the benefits of a better resume. For White names, a higher quality resume elicits 30 percent more callbacks whereas for African Americans, it elicits a far smaller increase. Applicants living in better neighborhoods receive more callbacks but, interestingly, this effect does not differ by race. The amount of discrimination is uniform across occupations and industries. Federal contractors and employers who list Equal Opportunity Employer' in their ad discriminate as much as other employers. We find little evidence that our results are driven by employers inferring something other than race, such as social class, from the names. These results suggest that racial discrimination is still a prominent feature of the labor market.</description>
    <dc:title>Are Emily and Greg More Employable than Lakisha and Jamal? A Field Experiment on Labor Market Discrimination</dc:title>

    <dc:creator>Marianne Bertrand</dc:creator>
    <dc:creator>Sendhil Mullainathan</dc:creator>
    <dc:source>National Bureau of Economic Research Working Paper Series (July 2003), 9873.</dc:source>
    <dc:date>2008-01-29T04:35:09-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:publicationName>National Bureau of Economic Research Working Paper Series</prism:publicationName>
    <prism:startingPage>9873</prism:startingPage>
    <prism:category>racism</prism:category>
    <prism:category>social-science</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/2299577">
    <title>Elements of Information Theory</title>
    <link>http://www.citeulike.org/user/bpacker/article/2299577</link>
    <description>&lt;i&gt;(5 October 2001)&lt;/i&gt;</description>
    <dc:title>Elements of Information Theory</dc:title>

    <dc:creator>Thomas Cover</dc:creator>
    <dc:creator>Joy Thomas</dc:creator>
    <dc:source>(5 October 2001)</dc:source>
    <dc:date>2008-01-28T21:56:55-00:00</dc:date>
    <prism:publicationYear>2001</prism:publicationYear>
    <prism:category>statistics</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/142759">
    <title>Analyzing the Effectiveness and Applicability of Co-training</title>
    <link>http://www.citeulike.org/user/bpacker/article/142759</link>
    <description>&lt;i&gt;(2000), pp. 86-93.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Recently there has been significant interest in supervised learning algorithms that combine labeled and unlabeled data for text learning tasks. The co-training setting [1] applies to datasets that have a natural separation of their features into two disjoint sets. We demonstrate that when learning from labeled and unlabeled data, algorithms explicitly leveraging a natural independent split of the features outperform algorithms that do not. When a natural split does not exist, co-training...</description>
    <dc:title>Analyzing the Effectiveness and Applicability of Co-training</dc:title>

    <dc:creator>Kamal Nigam</dc:creator>
    <dc:creator>Rayid Ghani</dc:creator>
    <dc:source>(2000), pp. 86-93.</dc:source>
    <dc:date>2005-03-29T20:46:17-00:00</dc:date>
    <prism:publicationYear>2000</prism:publicationYear>
    <prism:startingPage>86</prism:startingPage>
    <prism:endingPage>93</prism:endingPage>
    <prism:category>cotraining</prism:category>
    <prism:category>probabilistic-models</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/2278466">
    <title>Co-training from an Incremental EM Perspective</title>
    <link>http://www.citeulike.org/user/bpacker/article/2278466</link>
    <description>&lt;i&gt;Intelligent Data Engineering and Automated Learning – IDEAL 2004 (2004), pp. 765-773.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We study classification when the majority of data is unlabeled, and only a small fraction is labeled: the so-called semi-supervised learning situation. Blum and Mitchell’s co-training is a popular semi-supervised algorithm [1] to use when we have multiple independent views of the entities to classify. An example of a multi-view situation is classifying web pages: one view may describe the pages by the words that occur on them, another view describes the pages by the words in the hyperlinks that point to them. In co-training two learners each form a model from the labeled data and then incrementally label small subsets of the unlabeled data for each other. The learners then re-estimate their model from the labeled data and the psuedo-labels provided by the learners. Though some analysis of the algorithm’s performance exists [1] the computation performed is still not well understood. We propose that each view in co-training is effectively performing incremental EM as postulated by Neal and Hinton [3], combined with a Bayesian classifier. This analysis suggests improvements over the core co-training algorithm. We introduce variations, which result in faster convergence to the maximum possible accuracy of classification than the core co-training algorithm, and therefore increase the learning efficiency. We empirically verify our claim for a number of data sets in the context of belief network learning.</description>
    <dc:title>Co-training from an Incremental EM Perspective</dc:title>

    <dc:creator>Minoo Aminian</dc:creator>
    <dc:source>Intelligent Data Engineering and Automated Learning – IDEAL 2004 (2004), pp. 765-773.</dc:source>
    <dc:date>2008-01-23T01:32:34-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>Intelligent Data Engineering and Automated Learning – IDEAL 2004</prism:publicationName>
    <prism:startingPage>765</prism:startingPage>
    <prism:endingPage>773</prism:endingPage>
    <prism:category>cotraining</prism:category>
    <prism:category>probabilistic-models</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/2232917">
    <title>Learning Mixtures of Gaussians</title>
    <link>http://www.citeulike.org/user/bpacker/article/2232917</link>
    <description>&lt;i&gt;(1999)&lt;/i&gt;</description>
    <dc:title>Learning Mixtures of Gaussians</dc:title>

    <dc:creator>Sanjoy Dasgupta</dc:creator>
    <dc:source>(1999)</dc:source>
    <dc:date>2008-01-15T02:34:44-00:00</dc:date>
    <prism:publicationYear>1999</prism:publicationYear>
    <prism:publisher>IEEE Computer Society</prism:publisher>
    <prism:category>probabilistic-models</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/2211878">
    <title>Continuation methods for mixing heterogeneous sources</title>
    <link>http://www.citeulike.org/user/bpacker/article/2211878</link>
    <description>&lt;i&gt;(2002)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;A number of modern learning tasks involve estimation from heterogeneous information sources.</description>
    <dc:title>Continuation methods for mixing heterogeneous sources</dc:title>

    <dc:creator>A Corduneanu</dc:creator>
    <dc:creator>T Jaakkola</dc:creator>
    <dc:source>(2002)</dc:source>
    <dc:date>2008-01-09T19:12:50-00:00</dc:date>
    <prism:publicationYear>2002</prism:publicationYear>
    <prism:category>continuation-methods</prism:category>
    <prism:category>semisupervised-learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/1866475">
    <title>The Information Bottleneck EM algorithm</title>
    <link>http://www.citeulike.org/user/bpacker/article/1866475</link>
    <description>&lt;i&gt;(2003)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Learning with hidden variables is a central challenge in probabilistic graphical models that has important implications for many real-life problems. The classical approach is using the Expectation Maximization (EM) algorithm. This algorithm, however, can get trapped in local maxima. In this paper we explore a new approach that is based on the Information Bottleneck principle.</description>
    <dc:title>The Information Bottleneck EM algorithm</dc:title>

    <dc:creator>G Elidan</dc:creator>
    <dc:creator>N Friedman</dc:creator>
    <dc:source>(2003)</dc:source>
    <dc:date>2007-11-05T04:43:43-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:category>continuation-methods</prism:category>
    <prism:category>probabilistic-models</prism:category>
    <prism:category>statistics</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/2086477">
    <title>Some theory for Fisher's linear discriminant function, `naive Bayes', and some alternatives when there are many more variables than observations</title>
    <link>http://www.citeulike.org/user/bpacker/article/2086477</link>
    <description>&lt;i&gt;Bernoulli, Vol. 10, No. 6. (2004), pp. 989-1010.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We show that the `naive Bayes' classifier which assumes independent covariates greatly outperforms the Fisher linear discriminant rule under broad conditions when the number of variables grows faster than the number of observations, in the classical problem of discriminating between two normal populations. We also introduce a class of rules spanning the range between independence and arbitrary dependence. These rules are shown to achieve Bayes consistency for the Gaussian `coloured noise' model and to adapt to a spectrum of convergence rates, which we conjecture to be minimax.</description>
    <dc:title>Some theory for Fisher's linear discriminant function, `naive Bayes', and some alternatives when there are many more variables than observations</dc:title>

    <dc:creator>Peter Bickel</dc:creator>
    <dc:creator>Elizaveta Levina</dc:creator>
    <dc:source>Bernoulli, Vol. 10, No. 6. (2004), pp. 989-1010.</dc:source>
    <dc:date>2007-12-10T18:47:42-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>Bernoulli</prism:publicationName>
    <prism:volume>10</prism:volume>
    <prism:number>6</prism:number>
    <prism:startingPage>989</prism:startingPage>
    <prism:endingPage>1010</prism:endingPage>
    <prism:category>regularization</prism:category>
    <prism:category>statistics</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/2086469">
    <title>Regularization in statistics</title>
    <link>http://www.citeulike.org/user/bpacker/article/2086469</link>
    <description>&lt;i&gt;TEST, Vol. 15, No. 2. (2006), pp. 271-344.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Abstract&#160;&#160;This paper is a selective review of the regularization methods scattered in statistics literature. We introduce a general conceptual approach to regularization and fit most existing methods into it. We have tried to focus on the importance of regularization when dealing with today's high-dimensional objects: data and models. A wide range of examples are discussed, including nonparametric regression, boosting, covariance matrix estimation, principal component estimation, subsampling.</description>
    <dc:title>Regularization in statistics</dc:title>

    <dc:creator>Peter Bickel</dc:creator>
    <dc:creator>Bo Li</dc:creator>
    <dc:creator>Alexandre Tsybakov</dc:creator>
    <dc:creator>Sara van de Geer</dc:creator>
    <dc:creator>Bin Yu</dc:creator>
    <dc:creator>Teófilo Valdés</dc:creator>
    <dc:creator>Carlos Rivero</dc:creator>
    <dc:creator>Jianqing Fan</dc:creator>
    <dc:creator>Aad van der Vaart</dc:creator>
    <dc:identifier>doi:10.1007/BF02607055</dc:identifier>
    <dc:source>TEST, Vol. 15, No. 2. (2006), pp. 271-344.</dc:source>
    <dc:date>2007-12-10T18:43:06-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>TEST</prism:publicationName>
    <prism:volume>15</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>271</prism:startingPage>
    <prism:endingPage>344</prism:endingPage>
    <prism:category>boosting</prism:category>
    <prism:category>regularization</prism:category>
    <prism:category>statistics</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/1808736">
    <title>On the bias of information estimates</title>
    <link>http://www.citeulike.org/user/bpacker/article/1808736</link>
    <description>&lt;i&gt;Psychological Bulletin, Vol. 71, No. 2. (February 1969), pp. 108-109.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Considers the problem of the calculation of the bias of the maximum likelihood information estimate H, based on independent choices among k events. The expectation EH is calculated exactly as a function of the probabilities p1, p2, . . . , pkk. The bias H - EH is approximated by using a convergent expansion for a logarithm and using the 1st 2 terms of a finite expansion for the jth moment of a random variable. The resulting approximation is more generally valid, although less concise and simple, than the classical Miller-Madow approximation</description>
    <dc:title>On the bias of information estimates</dc:title>

    <dc:creator>AG Carlton</dc:creator>
    <dc:source>Psychological Bulletin, Vol. 71, No. 2. (February 1969), pp. 108-109.</dc:source>
    <dc:date>2007-10-23T03:12:06-00:00</dc:date>
    <prism:publicationYear>1969</prism:publicationYear>
    <prism:publicationName>Psychological Bulletin</prism:publicationName>
    <prism:volume>71</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>108</prism:startingPage>
    <prism:endingPage>109</prism:endingPage>
    <prism:category>statistics</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/1771591">
    <title>Analytical estimates of limited sampling biases in different information measures</title>
    <link>http://www.citeulike.org/user/bpacker/article/1771591</link>
    <description>&lt;i&gt;Network: Computation in Neural Systems, Vol. 7, No. 1. (1996), pp. 87-107.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Measuring the information carried by neuronal activity is made difficult, particularly when recording from mammalian cells, by the limited amount of data usually available, which results in a systematic error. While empirical ad hoc procedures have been used to correct for such error, we have recently proposed a direct procedure consisting of the analytical calculation of the average error, its estimation (up to subleading terms) from the data, and its subtraction from raw information measures to yield unbiased measures. We calculate here the leading correction terms for both the average transmitted information and the conditional information and, since usually one must first regularize the data, we specify the expressions appropriate to different regularizations. Computer simulations indicate a broad range of validity of the analytical results, suggest the effectiveness of regularizing by simple binning and illustrate the advantage of this over the previously used `bootstrap' procedure.</description>
    <dc:title>Analytical estimates of limited sampling biases in different information measures</dc:title>

    <dc:creator>Stefano Panzeri</dc:creator>
    <dc:creator>Alessandro Treves</dc:creator>
    <dc:identifier>doi:10.1088/0954-898X/7/1/006</dc:identifier>
    <dc:source>Network: Computation in Neural Systems, Vol. 7, No. 1. (1996), pp. 87-107.</dc:source>
    <dc:date>2007-10-15T23:11:18-00:00</dc:date>
    <prism:publicationYear>1996</prism:publicationYear>
    <prism:publicationName>Network: Computation in Neural Systems</prism:publicationName>
    <prism:volume>7</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>87</prism:startingPage>
    <prism:endingPage>107</prism:endingPage>
    <prism:category>statistics</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/1766905">
    <title>Semi-Supervised Self-Training of Object Detection Models</title>
    <link>http://www.citeulike.org/user/bpacker/article/1766905</link>
    <description>&lt;i&gt;&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The construction of appearance-based object detection systems is time-consuming and difficult because a large number of training examples must be collected and manually labeled in order to capture variations in object appearance. Semi-supervised training is a means for reducing the effort needed to prepare the training set by training the model with a small number of fully labeled examples and an additional set of unlabeled or weakly labeled examples. In this work we present a semi-supervised...</description>
    <dc:title>Semi-Supervised Self-Training of Object Detection Models</dc:title>

    <dc:creator>Chuck Rosenberg</dc:creator>
    <dc:creator>Martial Hebert</dc:creator>
    <dc:creator>Henry Schneiderman</dc:creator>
    <dc:date>2007-10-14T14:54:24-00:00</dc:date>
    <prism:category>object-detection</prism:category>
    <prism:category>semisupervised-learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/1766894">
    <title>Analyzing the effectiveness and applicability of co-training</title>
    <link>http://www.citeulike.org/user/bpacker/article/1766894</link>
    <description>&lt;i&gt;(2000), pp. 86-93.&lt;/i&gt;</description>
    <dc:title>Analyzing the effectiveness and applicability of co-training</dc:title>

    <dc:creator>Kamal Nigam</dc:creator>
    <dc:creator>Rayid Ghani</dc:creator>
    <dc:identifier>doi:10.1145/354756.354805</dc:identifier>
    <dc:source>(2000), pp. 86-93.</dc:source>
    <dc:date>2007-10-14T14:50:46-00:00</dc:date>
    <prism:publicationYear>2000</prism:publicationYear>
    <prism:startingPage>86</prism:startingPage>
    <prism:endingPage>93</prism:endingPage>
    <prism:publisher>ACM Press</prism:publisher>
    <prism:category>cotraining</prism:category>
    <prism:category>semisupervised-learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/1766875">
    <title>Co-training and expansion: Towards bridging theory and practice</title>
    <link>http://www.citeulike.org/user/bpacker/article/1766875</link>
    <description>&lt;i&gt;(2004)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Co-training is a method for combining labeled and unlabeled data when examples can be thought of as containing two distinct sets of features. It has had a number of practical successes, yet previous theoretical analyses have needed very strong assumptions on the data that are unlikely to be satisfied in practice.</description>
    <dc:title>Co-training and expansion: Towards bridging theory and practice</dc:title>

    <dc:creator>N Balcan</dc:creator>
    <dc:creator>A Bluem</dc:creator>
    <dc:creator>K Yang</dc:creator>
    <dc:source>(2004)</dc:source>
    <dc:date>2007-10-14T14:45:26-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:category>cotraining</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/140030">
    <title>Combining Labeled and Unlabeled Data with Co-training</title>
    <link>http://www.citeulike.org/user/bpacker/article/140030</link>
    <description>&lt;i&gt;(1998)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We consider the problem of using a large unlabeled sample to boost performance of a learning algorithm when only a small set of labeled examples is available. In particular, we consider a problem setting motivated by the task of learning to classify web pages, in which the description of each example can be partitioned into two distinct views. For example, the description of a web page can be partitioned into the words occurring on that page, and the words occurring in hyperlinks that point to...</description>
    <dc:title>Combining Labeled and Unlabeled Data with Co-training</dc:title>

    <dc:creator>Avrim Blum</dc:creator>
    <dc:creator>Tom Mitchell</dc:creator>
    <dc:source>(1998)</dc:source>
    <dc:date>2005-03-25T18:29:37-00:00</dc:date>
    <prism:publicationYear>1998</prism:publicationYear>
    <prism:category>cotraining</prism:category>
    <prism:category>semisupervised-learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/1766865">
    <title>An augmented PAC model for semi-supervised learning</title>
    <link>http://www.citeulike.org/user/bpacker/article/1766865</link>
    <description>&lt;i&gt;(2005)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;that these numbers depend on. We provide examples of sample-complexity bounds both for uniform convergence and #-cover based algorithms, as well as several algorithmic results. 21.1 Introduction As we have already seen in the previous chapters, there has been growing interest in using unlabeled data together with labeled data in machine learning, and a number of di#erent approaches have been developed. However, the assumptions these methods are based on are often quite distinct and not...</description>
    <dc:title>An augmented PAC model for semi-supervised learning</dc:title>

    <dc:creator>M Balcan</dc:creator>
    <dc:creator>A Blum</dc:creator>
    <dc:source>(2005)</dc:source>
    <dc:date>2007-10-14T14:41:46-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:category>cotraining</prism:category>
    <prism:category>semisupervised-learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/1766855">
    <title>Unsupervised improvement of visual detectors using cotraining</title>
    <link>http://www.citeulike.org/user/bpacker/article/1766855</link>
    <description>&lt;i&gt;(2003)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;One significant challenge in the construction of visual detection systems is the acquisition of sufficient labeled data. This paper describes a new technique for training visual detectors which requires only a small quantity of labeled data, and then uses unlabeled data to improve performance over time. Unsupervised improvement is based on the cotraining framework of Blum and Mitchell, in which two disparate classifiers are trained simultaneously. Unlabeled examples which are confidently...</description>
    <dc:title>Unsupervised improvement of visual detectors using cotraining</dc:title>

    <dc:creator>A Levin</dc:creator>
    <dc:creator>P Viola</dc:creator>
    <dc:creator>Y Freund</dc:creator>
    <dc:source>(2003)</dc:source>
    <dc:date>2007-10-14T14:38:57-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:category>cotraining</prism:category>
    <prism:category>object-detection</prism:category>
    <prism:category>semisupervised-learning</prism:category>
    <prism:category>vision</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/1617758">
    <title>Exponentiated Gradient Versus Gradient Descent for Linear Predictors</title>
    <link>http://www.citeulike.org/user/bpacker/article/1617758</link>
    <description>&lt;i&gt;No. UCSC-CRL-94-16. (1994)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We consider two algorithm for on-line prediction based on a linear model. The algorithms are the well-known gradient descent (GD) algorithm and a new algorithm, which we call EG Σ . They both maintain a weight vector using simple updates. For the GD algorithm, the update is based on subtracting the gradient of the squared error made on a prediction. The EG Σ algorithm uses the components of the gradient in the exponents of factors that are used in updating the weight vector...</description>
    <dc:title>Exponentiated Gradient Versus Gradient Descent for Linear Predictors</dc:title>

    <dc:creator>Jyrki Kivinen</dc:creator>
    <dc:creator>Manfred Warmuth</dc:creator>
    <dc:source>No. UCSC-CRL-94-16. (1994)</dc:source>
    <dc:date>2007-09-04T08:15:47-00:00</dc:date>
    <prism:publicationYear>1994</prism:publicationYear>
    <prism:number>UCSC-CRL-94-16</prism:number>
    <prism:category>algorithms</prism:category>
    <prism:category>optimization</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/989397">
    <title>Putting Objects in Perspective</title>
    <link>http://www.citeulike.org/user/bpacker/article/989397</link>
    <description>&lt;i&gt;(2006)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Image understanding requires not only individually estimating elements of the visual world but also capturing the interplay among them. In this paper, we provide a framework for placing local object detection in the context of the overall 3D scene by modeling the interdependence of objects, surface orientations, and camera viewpoint.</description>
    <dc:title>Putting Objects in Perspective</dc:title>

    <dc:creator>Derek Hoiem</dc:creator>
    <dc:creator>Alexei Efros</dc:creator>
    <dc:creator>Martial Hebert</dc:creator>
    <dc:source>(2006)</dc:source>
    <dc:date>2006-12-12T08:11:42-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:category>recognition</prism:category>
    <prism:category>vision</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/1577170">
    <title>Uncovering shared structures in multiclass classification</title>
    <link>http://www.citeulike.org/user/bpacker/article/1577170</link>
    <description>&lt;i&gt;(2007), pp. 17-24.&lt;/i&gt;</description>
    <dc:title>Uncovering shared structures in multiclass classification</dc:title>

    <dc:creator>Yonatan Amit</dc:creator>
    <dc:creator>Michael Fink</dc:creator>
    <dc:creator>Nathan Srebro</dc:creator>
    <dc:creator>Shimon Ullman</dc:creator>
    <dc:identifier>doi:10.1145/1273496.1273499</dc:identifier>
    <dc:source>(2007), pp. 17-24.</dc:source>
    <dc:date>2007-08-20T17:35:22-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:startingPage>17</prism:startingPage>
    <prism:endingPage>24</prism:endingPage>
    <prism:publisher>ACM Press</prism:publisher>
    <prism:category>classification</prism:category>
    <prism:category>hierarchy</prism:category>
    <prism:category>product-of-experts</prism:category>
    <prism:category>vision</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/1559169">
    <title>A dedicated generalized Procrustes algorithm for consensus molecular alignment</title>
    <link>http://www.citeulike.org/user/bpacker/article/1559169</link>
    <description>&lt;i&gt;Journal of Chemometrics, Vol. 18, No. 1. (2004), pp. 37-42.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Recently the idea of using generalized Procrustes analysis for aligning sets of molecules was introduced using standard algorithms. In this paper it is shown that, by tailoring the algorithm to this specific problem, a great gain in computational speed and memory efficiency can be obtained, but even more importantly, by using rotations without reflection, changes in chirality of molecules can be prevented, which was not previously possible. Copyright © 2004 John Wiley &#38; Sons, Ltd.</description>
    <dc:title>A dedicated generalized Procrustes algorithm for consensus molecular alignment</dc:title>

    <dc:creator>Jacques Commandeur</dc:creator>
    <dc:creator>Pieter Kroonenberg</dc:creator>
    <dc:creator>William Dunn</dc:creator>
    <dc:identifier>doi:10.1002/cem.842</dc:identifier>
    <dc:source>Journal of Chemometrics, Vol. 18, No. 1. (2004), pp. 37-42.</dc:source>
    <dc:date>2007-08-14T00:09:55-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>Journal of Chemometrics</prism:publicationName>
    <prism:volume>18</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>37</prism:startingPage>
    <prism:endingPage>42</prism:endingPage>
    <prism:category>correspondence</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/1558368">
    <title>Nonlinear dimensionality reduction by semidefinite programming and kernel matrix factorization</title>
    <link>http://www.citeulike.org/user/bpacker/article/1558368</link>
    <description>&lt;i&gt;(2005)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We describe an algorithm for nonlinear dimensionality reduction based on semidefinite programming and kernel matrix factorization.</description>
    <dc:title>Nonlinear dimensionality reduction by semidefinite programming and kernel matrix factorization</dc:title>

    <dc:creator>W Weinberger</dc:creator>
    <dc:creator>B Packer</dc:creator>
    <dc:creator>L Saul</dc:creator>
    <dc:source>(2005)</dc:source>
    <dc:date>2007-08-13T17:29:02-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:category>dimensionality-reduction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/909787">
    <title>Learning a kernel matrix for nonlinear dimensionality reduction</title>
    <link>http://www.citeulike.org/user/bpacker/article/909787</link>
    <description>&lt;i&gt;(2004)&lt;/i&gt;</description>
    <dc:title>Learning a kernel matrix for nonlinear dimensionality reduction</dc:title>

    <dc:creator>Kilian Weinberger</dc:creator>
    <dc:creator>Fei Sha</dc:creator>
    <dc:creator>Lawrence Saul</dc:creator>
    <dc:identifier>doi:10.1145/1015330.1015345</dc:identifier>
    <dc:source>(2004)</dc:source>
    <dc:date>2006-10-22T16:10:42-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publisher>ACM Press</prism:publisher>
    <prism:category>dimensionality-reduction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/1558325">
    <title>Unsupervised learning of image manifolds by semidefinite programming</title>
    <link>http://www.citeulike.org/user/bpacker/article/1558325</link>
    <description>&lt;i&gt;Computer Vision and Pattern Recognition, 2004. CVPR 2004. Proceedings of the 2004 IEEE Computer Society Conference on, Vol. 2 (2004), pp. II-988-II-995 Vol.2.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Can we detect low dimensional structure in high dimensional data sets of images and video? The problem of dimensionality reduction arises often in computer vision and pattern recognition. In this paper, we propose a new solution to this problem based on semidefinite programming. Our algorithm can be used to analyze high dimensional data that lies on or near a low dimensional manifold. It overcomes certain limitations of previous work in manifold learning, such as Isomap and locally linear embedding. We illustrate the algorithm on easily visualized examples of curves and surfaces, as well as on actual images of faces, handwritten digits, and solid objects.</description>
    <dc:title>Unsupervised learning of image manifolds by semidefinite programming</dc:title>

    <dc:creator>KQ Weinberger</dc:creator>
    <dc:creator>LK Saul</dc:creator>
    <dc:source>Computer Vision and Pattern Recognition, 2004. CVPR 2004. Proceedings of the 2004 IEEE Computer Society Conference on, Vol. 2 (2004), pp. II-988-II-995 Vol.2.</dc:source>
    <dc:date>2007-08-13T17:14:31-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>Computer Vision and Pattern Recognition, 2004. CVPR 2004. Proceedings of the 2004 IEEE Computer Society Conference on</prism:publicationName>
    <prism:volume>2</prism:volume>
    <prism:startingPage>II-988</prism:startingPage>
    <prism:endingPage>II-995 Vol.2</prism:endingPage>
    <prism:category>dimensionality-reduction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/1545446">
    <title>Spectral Methods for Dimensionality Reduction</title>
    <link>http://www.citeulike.org/user/bpacker/article/1545446</link>
    <description>&lt;i&gt;(2005)&lt;/i&gt;</description>
    <dc:title>Spectral Methods for Dimensionality Reduction</dc:title>

    <dc:creator>Lawrence Saul</dc:creator>
    <dc:source>(2005)</dc:source>
    <dc:date>2007-08-09T08:19:11-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:category>dimensionality-reduction</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/1538694">
    <title>Online multiclass learning by interclass hypothesis sharing</title>
    <link>http://www.citeulike.org/user/bpacker/article/1538694</link>
    <description>&lt;i&gt;(2006), pp. 313-320.&lt;/i&gt;</description>
    <dc:title>Online multiclass learning by interclass hypothesis sharing</dc:title>

    <dc:creator>Michael Fink</dc:creator>
    <dc:creator>Shai Shalev-Shwartz</dc:creator>
    <dc:creator>Yoram Singer</dc:creator>
    <dc:creator>Shimon Ullman</dc:creator>
    <dc:identifier>doi:10.1145/1143844.1143884</dc:identifier>
    <dc:source>(2006), pp. 313-320.</dc:source>
    <dc:date>2007-08-06T20:35:05-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:startingPage>313</prism:startingPage>
    <prism:endingPage>320</prism:endingPage>
    <prism:publisher>ACM Press</prism:publisher>
    <prism:category>boosting</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/686689">
    <title>A View of the EM Algorithm that Justifies Incremental, Sparse, and other Variants</title>
    <link>http://www.citeulike.org/user/bpacker/article/686689</link>
    <description>&lt;i&gt;(1998)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;. The EM algorithm performs maximum likelihood estimation for data in which some variables are unobserved. We present a function that resembles negative free energy and show that the M step maximizes this function with respect to the model parameters and the E step maximizes it with respect to the distribution over the unobserved variables. From this perspective, it is easy to justify an incremental variant of the EM algorithm in which the distribution for only one of the unobserved variables...</description>
    <dc:title>A View of the EM Algorithm that Justifies Incremental, Sparse, and other Variants</dc:title>

    <dc:creator>R Neal</dc:creator>
    <dc:creator>G Hinton</dc:creator>
    <dc:source>(1998)</dc:source>
    <dc:date>2006-06-06T14:32:04-00:00</dc:date>
    <prism:publicationYear>1998</prism:publicationYear>
    <prism:publisher>Kluwer</prism:publisher>
    <prism:category>cotraining</prism:category>
    <prism:category>probabilistic-models</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/201537">
    <title>Rapid object detection using a boosted cascade of simple features</title>
    <link>http://www.citeulike.org/user/bpacker/article/201537</link>
    <description>&lt;i&gt;(2001)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;This paper describes a machine learning approach for visual object detection which is capable of processing images extremely rapidly and achieving high detection rates. This work is distinguished by three key contributions. The first is the introduction of a new image representation called the &#34;Integral Image&#34; which allows the features used by our detector to be computed very quickly. The second is a learning algorithm, based on AdaBoost, which selects a small number of critical visual features ...</description>
    <dc:title>Rapid object detection using a boosted cascade of simple features</dc:title>

    <dc:creator>P Viola</dc:creator>
    <dc:creator>M Jones</dc:creator>
    <dc:source>(2001)</dc:source>
    <dc:date>2005-05-16T15:51:34-00:00</dc:date>
    <prism:publicationYear>2001</prism:publicationYear>
    <prism:category>boosting</prism:category>
    <prism:category>vision</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/1423550">
    <title>Learning to detect objects in images via a sparse, part-based representation</title>
    <link>http://www.citeulike.org/user/bpacker/article/1423550</link>
    <description>&lt;i&gt;Pattern Analysis and Machine Intelligence, IEEE Transactions on, Vol. 26, No. 11. (2004), pp. 1475-1490.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We study the problem of detecting objects in still, gray-scale images. Our primary focus is the development of a learning-based approach to the problem that makes use of a sparse, part-based representation. A vocabulary of distinctive object parts is automatically constructed from a set of sample images of the object class of interest; images are then represented using parts from this vocabulary, together with spatial relations observed among the parts. Based on this representation, a learning algorithm is used to automatically learn to detect instances of the object class in new images. The approach can be applied to any object with distinguishable parts in a relatively fixed spatial configuration; it is evaluated here on difficult sets of real-world images containing side views of cars, and is seen to successfully detect objects in varying conditions amidst background clutter and mild occlusion. In evaluating object detection approaches, several important methodological issues arise that have not been satisfactorily addressed in the previous work. A secondary focus of this paper is to highlight these issues, and to develop rigorous evaluation standards for the object detection problem. A critical evaluation of our approach under the proposed standards is presented.</description>
    <dc:title>Learning to detect objects in images via a sparse, part-based representation</dc:title>

    <dc:creator>S Agarwal</dc:creator>
    <dc:creator>A Awan</dc:creator>
    <dc:creator>D Roth</dc:creator>
    <dc:source>Pattern Analysis and Machine Intelligence, IEEE Transactions on, Vol. 26, No. 11. (2004), pp. 1475-1490.</dc:source>
    <dc:date>2007-06-29T18:34:16-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>Pattern Analysis and Machine Intelligence, IEEE Transactions on</prism:publicationName>
    <prism:volume>26</prism:volume>
    <prism:number>11</prism:number>
    <prism:startingPage>1475</prism:startingPage>
    <prism:endingPage>1490</prism:endingPage>
    <prism:category>recognition</prism:category>
    <prism:category>vision</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/899368">
    <title>Old and new matrix algebra useful for statistics</title>
    <link>http://www.citeulike.org/user/bpacker/article/899368</link>
    <description>&lt;i&gt;(1997)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;This paper contains a large number of matrix identities which cannot be absorbed by mere reading. The reader is encouraged to take time and check each equation by hand and work out the examples. This is advanced material; see Searle (1982) for basic results. 1 Derivatives</description>
    <dc:title>Old and new matrix algebra useful for statistics</dc:title>

    <dc:creator>T Minka</dc:creator>
    <dc:source>(1997)</dc:source>
    <dc:date>2006-10-16T14:25:09-00:00</dc:date>
    <prism:publicationYear>1997</prism:publicationYear>
    <prism:category>math</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/1229748">
    <title>A Hierarchical Community of Experts</title>
    <link>http://www.citeulike.org/user/bpacker/article/1229748</link>
    <description>&lt;i&gt;(1997)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We describe a directed acyclic graphical model that contains a hierarchy of linear units and a mechanism for dynamically selecting an appropriate subset of these units to model each observation. The non-linear selection mechanism is a hierarchy of binary units each of which gates the output of one of the linear units. There are no connections from linear units to binary units, so the generative model can be viewed as a logistic belief net (Neal 1992) which selects a skeleton linear model from...</description>
    <dc:title>A Hierarchical Community of Experts</dc:title>

    <dc:creator>Geoffrey Hinton</dc:creator>
    <dc:creator>Brian Sallans</dc:creator>
    <dc:creator>Zoubin Ghahramani</dc:creator>
    <dc:source>(1997)</dc:source>
    <dc:date>2007-04-16T12:21:21-00:00</dc:date>
    <prism:publicationYear>1997</prism:publicationYear>
    <prism:publisher>Kluwer Academic</prism:publisher>
    <prism:category>hierarchy</prism:category>
    <prism:category>probabilistic-models</prism:category>
    <prism:category>product-of-experts</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/1402724">
    <title>Training Products of Experts by Maximizing Contrastive Likelihood</title>
    <link>http://www.citeulike.org/user/bpacker/article/1402724</link>
    <description>&lt;i&gt;(1999)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;It is possible to combine multiple probabilistic models of the same data by multiplying the probabilities together and then renormalizing. This is a very ecient way to model highdimensional data which simultaneously satises many dierent low-dimensional constraints because each individual expert model can focus on giving high probability to data vectors that satisfy just one of the constraints. Data vectors that satisfy this one constraint but violate other constraints will be ruled out...</description>
    <dc:title>Training Products of Experts by Maximizing Contrastive Likelihood</dc:title>

    <dc:creator>G Hinton</dc:creator>
    <dc:source>(1999)</dc:source>
    <dc:date>2007-06-21T16:13:55-00:00</dc:date>
    <prism:publicationYear>1999</prism:publicationYear>
    <prism:category>probabilistic-models</prism:category>
    <prism:category>product-of-experts</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/1364780">
    <title>Logistic Regression, AdaBoost and Bregman Distances</title>
    <link>http://www.citeulike.org/user/bpacker/article/1364780</link>
    <description>&lt;i&gt;(2000), pp. 158-169.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;. We give a unified account of boosting and logistic regression in which each learning problem is cast in terms of optimization of Bregman distances. The striking similarity of the two problems in this framework allows us to design and analyze algorithms for both simultaneously, and to easily adapt algorithms designed for one problem to the other. For both problems, we give new algorithms and explain their potential advantages over existing methods. These algorithms can be divided into two...</description>
    <dc:title>Logistic Regression, AdaBoost and Bregman Distances</dc:title>

    <dc:creator>Michael Collins</dc:creator>
    <dc:creator>Robert Schapire</dc:creator>
    <dc:creator>Yoram Singer</dc:creator>
    <dc:source>(2000), pp. 158-169.</dc:source>
    <dc:date>2007-06-04T22:12:37-00:00</dc:date>
    <prism:publicationYear>2000</prism:publicationYear>
    <prism:startingPage>158</prism:startingPage>
    <prism:endingPage>169</prism:endingPage>
    <prism:category>boosting</prism:category>
    <prism:category>probabilistic-models</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/1300309">
    <title>Products of experts</title>
    <link>http://www.citeulike.org/user/bpacker/article/1300309</link>
    <description>&lt;i&gt;(1999)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;It is possible to combine multiple probabilistic models of the same data by multiplying the probabilities together and then renormalizing. This is a very ecient way to model high-dimensional data which simultaneously satises many dierent lowdimensional constraints. Each individual expert model can focus on giving high probability to data vectors that satisfy just one of the constraints. Data vectors that satisfy this one constraint but violate other constraints will be ruled out by their low...</description>
    <dc:title>Products of experts</dc:title>

    <dc:creator>G Hinton</dc:creator>
    <dc:source>(1999)</dc:source>
    <dc:date>2007-05-16T15:18:17-00:00</dc:date>
    <prism:publicationYear>1999</prism:publicationYear>
    <prism:category>probabilistic-models</prism:category>
    <prism:category>product-of-experts</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/1361536">
    <title>Incorporating prior knowledge into boosting</title>
    <link>http://www.citeulike.org/user/bpacker/article/1361536</link>
    <description>&lt;i&gt;(2002)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We describe a modification to the AdaBoost algorithm that permits the incorporation of prior human knowledge as a means of compensating for a shortage of training data. We give a convergence result for the algorithm.</description>
    <dc:title>Incorporating prior knowledge into boosting</dc:title>

    <dc:creator>R Schapire</dc:creator>
    <dc:creator>M Rochery</dc:creator>
    <dc:creator>M Rahim</dc:creator>
    <dc:creator>N Gupta</dc:creator>
    <dc:source>(2002)</dc:source>
    <dc:date>2007-06-04T05:35:26-00:00</dc:date>
    <prism:publicationYear>2002</prism:publicationYear>
    <prism:category>boosting</prism:category>
    <prism:category>priors</prism:category>
    <prism:category>probabilistic-models</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/513211">
    <title>Probabilistic principal component analysis</title>
    <link>http://www.citeulike.org/user/bpacker/article/513211</link>
    <description>&lt;i&gt;(1997)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Principal component analysis (PCA) is a ubiquitous technique for data analysis and processing, but one which is not based upon a probability model. In this paper we demonstrate how the principal axes of a set of observed data vectors may be determined through maximum-likelihood estimation of parameters in a latent variable model closely related to factor analysis. We consider the properties of the associated likelihood function, giving an EM algorithm for estimating the principal subspace...</description>
    <dc:title>Probabilistic principal component analysis</dc:title>

    <dc:creator>M Tipping</dc:creator>
    <dc:creator>C Bishop</dc:creator>
    <dc:source>(1997)</dc:source>
    <dc:date>2006-02-20T12:43:23-00:00</dc:date>
    <prism:publicationYear>1997</prism:publicationYear>
    <prism:category>probabilistic-models</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/1353717">
    <title>Training Products of Experts by Minimizing Contrastive Divergence</title>
    <link>http://www.citeulike.org/user/bpacker/article/1353717</link>
    <description>&lt;i&gt;Neural Comp., Vol. 14, No. 8. (1 August 2002), pp. 1771-1800.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;It is possible to combine multiple latent-variable models of the same data by multiplying their probability distributions together and then renormalizing. This way of combining individual &#34;expert&#34; models makes it hard to generate samples from the combined model but easy to infer the values of the latent variables of each expert, because the combination rule ensures that the latent variables of different experts are conditionally independent when given the data. A product of experts (PoE) is therefore an interesting candidate for a perceptual system in which rapid inference is vital and generation is unnecessary. Training a PoE by maximizing the likelihood of the data is difficult because it is hard even to approximate the derivatives of the renormalization term in the combination rule. Fortunately, a PoE can be trained using a different objective function called &#34;contrastive divergence&#34; whose derivatives with regard to the parameters can be approximated accurately and efficiently. Examples are presented of contrastive divergence learning using several types of expert on several types of data.</description>
    <dc:title>Training Products of Experts by Minimizing Contrastive Divergence</dc:title>

    <dc:creator>Geoffrey Hinton</dc:creator>
    <dc:source>Neural Comp., Vol. 14, No. 8. (1 August 2002), pp. 1771-1800.</dc:source>
    <dc:date>2007-06-01T01:43:58-00:00</dc:date>
    <prism:publicationYear>2002</prism:publicationYear>
    <prism:publicationName>Neural Comp.</prism:publicationName>
    <prism:volume>14</prism:volume>
    <prism:number>8</prism:number>
    <prism:startingPage>1771</prism:startingPage>
    <prism:endingPage>1800</prism:endingPage>
    <prism:category>probabilistic-models</prism:category>
    <prism:category>product-of-experts</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/453442">
    <title>Learning Probabilistic Relational Models</title>
    <link>http://www.citeulike.org/user/bpacker/article/453442</link>
    <description>&lt;i&gt;Lecture Notes in Computer Science, Vol. 1864 (2000), pp. 322-??.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Most real-world data is stored in relational form. In contrast, most statistical learning methods, e.g., Bayesian network learning, work only with &#34;flat&#34; data representations, forcing us to convert our data into a form that loses much of the relational structure. The recently introduced framework of probabilistic relational models (PRMs) allow us to represent much richer dependency structures, involving multiple entities and the relations between them; they allow the properties of an...</description>
    <dc:title>Learning Probabilistic Relational Models</dc:title>

    <dc:creator>Lise Getoor</dc:creator>
    <dc:source>Lecture Notes in Computer Science, Vol. 1864 (2000), pp. 322-??.</dc:source>
    <dc:date>2005-12-30T17:22:38-00:00</dc:date>
    <prism:publicationYear>2000</prism:publicationYear>
    <prism:publicationName>Lecture Notes in Computer Science</prism:publicationName>
    <prism:volume>1864</prism:volume>
    <prism:startingPage>322</prism:startingPage>
    <prism:endingPage>??</prism:endingPage>
    <prism:category>probabilistic-models</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/899534">
    <title>Improving text classification by shrinkage in a hierarchy of classes</title>
    <link>http://www.citeulike.org/user/bpacker/article/899534</link>
    <description>&lt;i&gt;(1998)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;When documents are organized in a large number of topic categories, the categories are often arranged in a hierarchy. The U.S. patent database and Yahoo are two examples.</description>
    <dc:title>Improving text classification by shrinkage in a hierarchy of classes</dc:title>

    <dc:creator>A Mccallum</dc:creator>
    <dc:creator>R Rosenfeld</dc:creator>
    <dc:creator>T Mitchell</dc:creator>
    <dc:creator>A Ng</dc:creator>
    <dc:source>(1998)</dc:source>
    <dc:date>2006-10-16T16:19:28-00:00</dc:date>
    <prism:publicationYear>1998</prism:publicationYear>
    <prism:category>classification</prism:category>
    <prism:category>hierarchy</prism:category>
    <prism:category>nlp</prism:category>
    <prism:category>statistics</prism:category>
    <prism:category>topic</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/1067266">
    <title>Expected sample moments of concomitants of selected order statistics</title>
    <link>http://www.citeulike.org/user/bpacker/article/1067266</link>
    <description>&lt;i&gt;(2002)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;In this paper, the task of determining expected values of sample moments, where the sample members have been selected based on noisy information, is considered. Exact expressions for expected values of sums of products of concomitants of selected order statistics are derived. Then, using Edgeworth and Cornish-Fisher approximations, explicit results that depend on coefficients that can be determined numerically are obtained. While the results are exact only for normal populations, it is...</description>
    <dc:title>Expected sample moments of concomitants of selected order statistics</dc:title>

    <dc:creator>D Arnold</dc:creator>
    <dc:creator>H Beyer</dc:creator>
    <dc:source>(2002)</dc:source>
    <dc:date>2007-01-25T17:23:41-00:00</dc:date>
    <prism:publicationYear>2002</prism:publicationYear>
    <prism:category>statistics</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/bpacker/article/1031416">
    <title>Errors in the estimation of the variance: implications for multiple-probability fluctuation analysis.</title>
    <link>http://www.citeulike.org/user/bpacker/article/1031416</link>
    <description>&lt;i&gt;J Neurosci Methods, Vol. 153, No. 2. (15 June 2006), pp. 250-260.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Synapses play a crucial role in information processing in the brain. Amplitude fluctuations of synaptic responses can be used to extract information about the mechanisms underlying synaptic transmission and its modulation. In particular, multiple-probability fluctuation analysis can be used to estimate the number of functional release sites, the mean probability of release and the amplitude of the mean quantal response from fits of the relationship between the variance and mean amplitude of postsynaptic responses, recorded at different probabilities. To determine these quantal parameters, calculate their uncertainties and the goodness-of-fit of the model, it is important to weight the contribution of each data point in the fitting procedure. We therefore investigated the errors associated with measuring the variance by determining the best estimators of the variance of the variance and have used simulations of synaptic transmission to test their accuracy and reliability under different experimental conditions. For central synapses, which generally have a low number of release sites, the amplitude distribution of synaptic responses is not normal, thus the use of a theoretical variance of the variance based on the normal assumption is not a good approximation. However, appropriate estimators can be derived for the population and for limited sample sizes using a more general expression that involves higher moments and introducing unbiased estimators based on the h-statistics. Our results are likely to be relevant for various applications of fluctuation analysis when few channels or release sites are present.</description>
    <dc:title>Errors in the estimation of the variance: implications for multiple-probability fluctuation analysis.</dc:title>

    <dc:creator>C Saviane</dc:creator>
    <dc:creator>RA Silver</dc:creator>
    <dc:identifier>doi:10.1016/j.jneumeth.2005.11.003</dc:identifier>
    <dc:source>J Neurosci Methods, Vol. 153, No. 2. (15 June 2006), pp. 250-260.</dc:source>
    <dc:date>2007-01-09T11:51:38-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>J Neurosci Methods</prism:publicationName>
    <prism:issn>0165-0270</prism:issn>
    <prism:volume>153</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>250</prism:startingPage>
    <prism:endingPage>260</prism:endingPage>
    <prism:category>statistics</prism:category>
</item>



</rdf:RDF>

