<?xml version="1.0" encoding="UTF-8"?>

<rdf:RDF
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
   xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
   xmlns="http://purl.org/rss/1.0/"
   xmlns:dc="http://purl.org/dc/elements/1.1/"
   xmlns:prism="http://prismstandard.org/namespaces/1.2/basic/"
   xmlns:dcterms="http://purl.org/dc/terms/"

>
<channel rdf:about="http://www.citeulike.org/about">
<pubDate>Sun, 06 Jul 2008 02:45:24 BST</pubDate>


	<title>CiteULike: sdvillal's library [848 articles]</title>
	<description>CiteULike: sdvillal's library [848 articles]</description>


	<link>http://www.citeulike.org/user/sdvillal</link>
	<dc:publisher>CiteULike.org</dc:publisher>
	<dc:language>en-gb</dc:language>
	<dc:rights>Copyright &#169; 2004-2008 citeulike.org</dc:rights>
	<items>
    <rdf:Seq>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/461215"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2882884"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2882880"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2875042"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2526066"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2858306"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2587571"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2856249"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2820442"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2819451"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2818738"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/1915886"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2798738"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2793881"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2786799"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2764550"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2737704"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2737626"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2712765"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2706830"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2693500"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2693473"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2693466"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/140030"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/1939239"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2607812"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2677698"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2677204"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2677105"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2672793"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/252315"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2386941"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2634464"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2620013"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2617935"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/510440"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2205725"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2616147"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2616138"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/525518"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/494124"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2607805"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2605457"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/1121661"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2584713"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2584709"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2584700"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2584684"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2580889"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/sdvillal/article/2570838"/>

	</rdf:Seq>
	</items>
	</channel>


<item rdf:about="http://www.citeulike.org/user/sdvillal/article/461215">
    <title>An information-theoretic perspective of tf-idf measures</title>
    <link>http://www.citeulike.org/user/sdvillal/article/461215</link>
    <description>&lt;i&gt;Information Processing &#38; Management, Vol. 39, No. 1. (January 2003), pp. 45-65.&lt;/i&gt;</description>
    <dc:title>An information-theoretic perspective of tf-idf measures</dc:title>

    <dc:creator>Akiko Aizawa</dc:creator>
    <dc:identifier>doi:10.1016/S0306-4573(02)00021-3</dc:identifier>
    <dc:source>Information Processing &#38; Management, Vol. 39, No. 1. (January 2003), pp. 45-65.</dc:source>
    <dc:date>2006-01-10T19:08:43-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:publicationName>Information Processing &#38; Management</prism:publicationName>
    <prism:volume>39</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>45</prism:startingPage>
    <prism:endingPage>65</prism:endingPage>
    <prism:category>multimedia-ir</prism:category>
    <prism:category>text-classification</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2882884">
    <title>Modeling word burstiness using the Dirichlet distribution</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2882884</link>
    <description>&lt;i&gt;(2005), pp. 545-552.&lt;/i&gt;</description>
    <dc:title>Modeling word burstiness using the Dirichlet distribution</dc:title>

    <dc:creator>Rasmus Madsen</dc:creator>
    <dc:creator>David Kauchak</dc:creator>
    <dc:creator>Charles Elkan</dc:creator>
    <dc:identifier>doi:10.1145/1102351.1102420</dc:identifier>
    <dc:source>(2005), pp. 545-552.</dc:source>
    <dc:date>2008-06-11T15:02:47-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:startingPage>545</prism:startingPage>
    <prism:endingPage>552</prism:endingPage>
    <prism:publisher>ACM</prism:publisher>
    <prism:category>dirichlet</prism:category>
    <prism:category>text-classification</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2882880">
    <title>Clustering documents with an exponential-family approximation of the Dirichlet compound multinomial distribution</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2882880</link>
    <description>&lt;i&gt;(2006), pp. 289-296.&lt;/i&gt;</description>
    <dc:title>Clustering documents with an exponential-family approximation of the Dirichlet compound multinomial distribution</dc:title>

    <dc:creator>Charles Elkan</dc:creator>
    <dc:identifier>doi:10.1145/1143844.1143881</dc:identifier>
    <dc:source>(2006), pp. 289-296.</dc:source>
    <dc:date>2008-06-11T15:01:53-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:startingPage>289</prism:startingPage>
    <prism:endingPage>296</prism:endingPage>
    <prism:publisher>ACM</prism:publisher>
    <prism:category>dirichlet</prism:category>
    <prism:category>text-classification</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2875042">
    <title>Building Text Classifiers Using Positive and Unlabeled Examples</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2875042</link>
    <description>&lt;i&gt;(2003)&lt;/i&gt;</description>
    <dc:title>Building Text Classifiers Using Positive and Unlabeled Examples</dc:title>

    <dc:creator>Bing Liu</dc:creator>
    <dc:creator>Yang Dai</dc:creator>
    <dc:creator>Xiaoli Li</dc:creator>
    <dc:creator>Wee Lee</dc:creator>
    <dc:creator>Philip Yu</dc:creator>
    <dc:source>(2003)</dc:source>
    <dc:date>2008-06-09T09:17:14-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:publisher>IEEE Computer Society</prism:publisher>
    <prism:category>occ-applications</prism:category>
    <prism:category>occ-others</prism:category>
    <prism:category>semi-supervised</prism:category>
    <prism:category>text-classification</prism:category>
    <prism:category>unsupervised-dr-for-classification</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2526066">
    <title>Addressing the curse of imbalanced training sets: one-sided selection</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2526066</link>
    <description>&lt;i&gt;(1997), pp. 179-186.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Adding examples of the majority class to the training set can have a detrimental effect on the learner's behavior: noisy or otherwise unreliable examples from the majority class can overwhelm the minority class. The paper discusses criteria to evaluate the utility of classifiers induced from such imbalanced training sets, gives explanation of the poor behavior of some learners under these circumstances, and suggests as a solution a simple technique called one-sided selection of...</description>
    <dc:title>Addressing the curse of imbalanced training sets: one-sided selection</dc:title>

    <dc:creator>Miroslav Kubat</dc:creator>
    <dc:creator>Stan Matwin</dc:creator>
    <dc:source>(1997), pp. 179-186.</dc:source>
    <dc:date>2008-03-13T12:23:40-00:00</dc:date>
    <prism:publicationYear>1997</prism:publicationYear>
    <prism:startingPage>179</prism:startingPage>
    <prism:endingPage>186</prism:endingPage>
    <prism:publisher>Morgan Kaufmann</prism:publisher>
    <prism:category>imbalanced</prism:category>
    <prism:category>negative-example-selection</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2858306">
    <title>A practical method for the software fault-prediction</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2858306</link>
    <description>&lt;i&gt;Information Reuse and Integration, 2007. IRI 2007. IEEE International Conference on (2007), pp. 659-666.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;In the paper, a novel machine learning method, SimBoost, is proposed to handle the software fault-prediction problem when highly skewed datasets are used. Although the method, proved by empirical results, can make the datasets much more balanced, the accuracy of the prediction is still not satisfactory. Therefore, a fuzzy-based representation of the software module fault state has been presented instead of the original faulty/non-faulty one. Several experiments were conducted using datasets from NASA Metrics Data Program. The discussion of the results of experiments is provided.</description>
    <dc:title>A practical method for the software fault-prediction</dc:title>

    <dc:creator>Zhan Li</dc:creator>
    <dc:creator>M Reformat</dc:creator>
    <dc:identifier>doi:10.1109/IRI.2007.4296695</dc:identifier>
    <dc:source>Information Reuse and Integration, 2007. IRI 2007. IEEE International Conference on (2007), pp. 659-666.</dc:source>
    <dc:date>2008-06-03T05:41:52-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Information Reuse and Integration, 2007. IRI 2007. IEEE International Conference on</prism:publicationName>
    <prism:startingPage>659</prism:startingPage>
    <prism:endingPage>666</prism:endingPage>
    <prism:category>boosting</prism:category>
    <prism:category>imbalanced</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2587571">
    <title>Generalization from Observed to Unobserved Features by Clustering</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2587571</link>
    <description>&lt;i&gt;Journal of Machine Learning Research, Vol. 9 (March 2008), pp. 339-370.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We argue that when objects are characterized by many attributes, clustering them on the basis of a random subset of these attributes can capture information on the unobserved attributes as well. Moreover, we show that under mild technical conditions, clustering the objects on the basis of such a random subset performs almost as well as clustering with the full attribute set. We prove finite sample generalization theorems for this novel learning scheme that extends analogous results from the supervised learning setting. We use our framework to analyze generalization to unobserved features of two well-known clustering algorithms: k-means and the maximum likelihood multinomial mixture model. The scheme is demonstrated for collaborative filtering of users with movie ratings as attributes and document clustering with words as attributes.</description>
    <dc:title>Generalization from Observed to Unobserved Features by Clustering</dc:title>

    <dc:creator>Eyal Krupka</dc:creator>
    <dc:creator>Naftali Tishby</dc:creator>
    <dc:source>Journal of Machine Learning Research, Vol. 9 (March 2008), pp. 339-370.</dc:source>
    <dc:date>2008-03-25T20:06:19-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Journal of Machine Learning Research</prism:publicationName>
    <prism:volume>9</prism:volume>
    <prism:startingPage>339</prism:startingPage>
    <prism:endingPage>370</prism:endingPage>
    <prism:category>clustering</prism:category>
    <prism:category>negative-features</prism:category>
    <prism:category>unobserved-features</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2856249">
    <title>Max-margin Classification of Data with Absent Features</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2856249</link>
    <description>&lt;i&gt;Journal of Machine Learning Research, Vol. 9 (January 2008), pp. 1-21.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We consider the problem of learning classifiers in structured domains, where some objects have a subset of features that are inherently absent due to complex relationships between the features. Unlike the case where a feature exists but its value is not observed, here we focus on the case where a feature may not even exist (structurally absent) for some of the samples. The common approach for handling missing features in discriminative models is to first complete their unknown values, and then use a standard classification procedure over the completed data. This paper focuses on features that are known to be non-existing, rather than have an unknown value. We show how incomplete data can be classified directly without any completion of the missing features using a max-margin learning framework. We formulate an objective function, based on the geometric interpretation of the margin, that aims to maximize the margin of each sample in its own relevant subspace. In this formulation, the linearly separable case can be transformed into a binary search over a series of second order cone programs (SOCP), a convex problem that can be solved efficiently. We also describe two approaches for optimizing the general case: an approximation that can be solved as a standard quadratic program (QP) and an iterative approach for solving the exact problem. By avoiding the pre-processing phase in which the data is completed, both of these approaches could offer considerable computational savings. More importantly, we show that the elegant handling of missing values by our approach allows it to both outperform other methods when the missing values have non-trivial structure, and be competitive with other methods when the values are missing at random. We demonstrate our results on several standard benchmarks and two real-world problems: edge prediction in metabolic pathways, and automobile detection in natural images.</description>
    <dc:title>Max-margin Classification of Data with Absent Features</dc:title>

    <dc:creator>Gal Chechik</dc:creator>
    <dc:creator>Geremy Heitz</dc:creator>
    <dc:creator>Gal Elidan</dc:creator>
    <dc:creator>Pieter Abbeel</dc:creator>
    <dc:creator>Daphne Koller</dc:creator>
    <dc:source>Journal of Machine Learning Research, Vol. 9 (January 2008), pp. 1-21.</dc:source>
    <dc:date>2008-06-02T09:30:24-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Journal of Machine Learning Research</prism:publicationName>
    <prism:volume>9</prism:volume>
    <prism:startingPage>1</prism:startingPage>
    <prism:endingPage>21</prism:endingPage>
    <prism:category>structured-domains</prism:category>
    <prism:category>unobserved-features</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2820442">
    <title>Computational Methods of Feature Selection (Chapman &#38; Hall/Crc Data Mining and Knowledge Discovery Series)</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2820442</link>
    <description>&lt;i&gt;(29 October 2007)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Due to increasing demands for dimensionality reduction, research on feature selection has deeply and widely expanded into many fields, including computational statistics, pattern recognition, machine learning, data mining, and knowledge discovery. Highlighting current research issues, **Computational Methods of Feature Selection** introduces the basic concepts and principles, state-of-the-art algorithms, and novel applications of this tool. The book begins by exploring unsupervised, randomized, and causal feature selection. It then reports on some recent results of empowering feature selection, including active feature selection, decision-border estimate, the use of ensembles with independent probes, and incremental feature selection. This is followed by discussions of weighting and local methods, such as the ReliefF family, _k_-means clustering, local feature relevance, and a new interpretation of Relief. The book subsequently covers text classification, a new feature selection score, and both constraint-guided and aggressive feature selection. The final section examines applications of feature selection in bioinformatics, including feature construction as well as redundancy-, ensemble-, and penalty-based feature selection. Through a clear, concise, and coherent presentation of topics, this volume systematically covers the key concepts, underlying principles, and inventive applications of feature selection, illustrating how this powerful tool can efficiently harness massive, high-dimensional data and turn it into valuable, reliable information.</description>
    <dc:title>Computational Methods of Feature Selection (Chapman &#38; Hall/Crc Data Mining and Knowledge Discovery Series)</dc:title>

    <dc:source>(29 October 2007)</dc:source>
    <dc:date>2008-05-21T16:00:30-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publisher>Chapman &#38; Hall/CRC</prism:publisher>
    <prism:category>dimension-reduction</prism:category>
    <prism:category>feature-selection</prism:category>
    <prism:category>text-classification</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2819451">
    <title>The Pyramid Match Kernel: Efficient Learning with Sets of Features</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2819451</link>
    <description>&lt;i&gt;Journal of Machine Learning Research, Vol. 8 (April 2007), pp. 725-760.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;In numerous domains it is useful to represent a single example by the set of the local features or parts that comprise it. However, this representation poses a challenge to many conventional machine learning techniques, since sets may vary in cardinality and elements lack a meaningful ordering. Kernel methods can learn complex functions, but a kernel over unordered set inputs must somehow solve for correspondences---generally a computationally expensive task that becomes impractical for large set sizes. We present a new fast kernel function called the pyramid match that measures partial match similarity in time linear in the number of features. The pyramid match maps unordered feature sets to multi-resolution histograms and computes a weighted histogram intersection in order to find implicit correspondences based on the finest resolution histogram cell where a matched pair first appears. We show the pyramid match yields a Mercer kernel, and we prove bounds on its error relative to the optimal partial matching cost. We demonstrate our algorithm on both classification and regression tasks, including object recognition, 3-D human pose inference, and time of publication estimation for documents, and we show that the proposed method is accurate and significantly more efficient than current approaches.</description>
    <dc:title>The Pyramid Match Kernel: Efficient Learning with Sets of Features</dc:title>

    <dc:creator>Kristen Grauman</dc:creator>
    <dc:creator>Trevor Darrell</dc:creator>
    <dc:source>Journal of Machine Learning Research, Vol. 8 (April 2007), pp. 725-760.</dc:source>
    <dc:date>2008-05-21T11:49:49-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Journal of Machine Learning Research</prism:publicationName>
    <prism:volume>8</prism:volume>
    <prism:startingPage>725</prism:startingPage>
    <prism:endingPage>760</prism:endingPage>
    <prism:category>image-processing</prism:category>
    <prism:category>kernel-machines</prism:category>
    <prism:category>scalability</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2818738">
    <title>Feature selection for text categorization on imbalanced data</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2818738</link>
    <description>&lt;i&gt;SIGKDD Explor. Newsl., Vol. 6, No. 1. (June 2004), pp. 80-89.&lt;/i&gt;</description>
    <dc:title>Feature selection for text categorization on imbalanced data</dc:title>

    <dc:creator>Zhaohui Zheng</dc:creator>
    <dc:creator>Xiaoyun Wu</dc:creator>
    <dc:creator>Rohini Srihari</dc:creator>
    <dc:identifier>doi:10.1145/1007730.1007741</dc:identifier>
    <dc:source>SIGKDD Explor. Newsl., Vol. 6, No. 1. (June 2004), pp. 80-89.</dc:source>
    <dc:date>2008-05-21T09:15:20-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>SIGKDD Explor. Newsl.</prism:publicationName>
    <prism:issn>1931-0145</prism:issn>
    <prism:volume>6</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>80</prism:startingPage>
    <prism:endingPage>89</prism:endingPage>
    <prism:publisher>ACM</prism:publisher>
    <prism:category>dimension-reduction</prism:category>
    <prism:category>feature-selection</prism:category>
    <prism:category>imbalanced</prism:category>
    <prism:category>text-classification</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/1915886">
    <title>An Extensive Empirical Study of Feature Selection Metrics for Text Classification</title>
    <link>http://www.citeulike.org/user/sdvillal/article/1915886</link>
    <description>&lt;i&gt;Journal of Machine Learning Research, Vol. 3 (March 2003), pp. 1289-1305.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Machine learning for text classification is the cornerstone of document categorization, news filtering, document routing, and personalization. In text domains, effective feature selection is essential to make the learning task efficient and more accurate. This paper presents an empirical comparison of twelve feature selection methods (e.g. Information Gain) evaluated on a benchmark of 229 text classification problem instances that were gathered from Reuters, TREC, OHSUMED, etc. The results are analyzed from multiple goal perspectives-accuracy, F-measure, precision, and recall-since each is appropriate in different situations. The results reveal that a new feature selection metric we call 'Bi-Normal Separation' (BNS), outperformed the others by a substantial margin in most situations. This margin widened in tasks with high class skew, which is rampant in text classification problems and is particularly challenging for induction algorithms. A new evaluation methodology is offered that focuses on the needs of the data mining practitioner faced with a single dataset who seeks to choose one (or a pair of) metrics that are most likely to yield the best performance. From this perspective, BNS was the top single choice for all goals except precision, for which Information Gain yielded the best result most often. This analysis also revealed, for example, that Information Gain and Chi-Squared have correlated failures, and so they work poorly together. When choosing optimal pairs of metrics for each of the four performance goals, BNS is consistently a member of the pair---e.g., for greatest recall, the pair BNS + F1-measure yielded the best performance on the greatest number of tasks by a considerable margin.</description>
    <dc:title>An Extensive Empirical Study of Feature Selection Metrics for Text Classification</dc:title>

    <dc:creator>George Forman</dc:creator>
    <dc:source>Journal of Machine Learning Research, Vol. 3 (March 2003), pp. 1289-1305.</dc:source>
    <dc:date>2007-11-14T20:42:55-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:publicationName>Journal of Machine Learning Research</prism:publicationName>
    <prism:volume>3</prism:volume>
    <prism:startingPage>1289</prism:startingPage>
    <prism:endingPage>1305</prism:endingPage>
    <prism:category>dimension-reduction</prism:category>
    <prism:category>feature-selection</prism:category>
    <prism:category>imbalanced</prism:category>
    <prism:category>text-classification</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2798738">
    <title>Phase transitions and the search problem</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2798738</link>
    <description>&lt;i&gt;Artificial Intelligence, Vol. 81, No. 1-2. (March 1996), pp. 1-15.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We describe how techniques that were originally developed in statistical mechanics can be applied to search problems that arise commonly in artificial intelligence. This approach is useful for understanding the typical behavior of classes of problems. In particular, these techniques predict that abrupt changes in computational cost, analogous to physical phase transitions, should occur universally, as heuristic effectiveness or search space topology is varied. We also present a number of open questions raised by these studies.</description>
    <dc:title>Phase transitions and the search problem</dc:title>

    <dc:creator>Tad Hogg</dc:creator>
    <dc:creator>Bernardo Huberman</dc:creator>
    <dc:creator>Colin Williams</dc:creator>
    <dc:identifier>doi:10.1016/0004-3702(95)00044-5</dc:identifier>
    <dc:source>Artificial Intelligence, Vol. 81, No. 1-2. (March 1996), pp. 1-15.</dc:source>
    <dc:date>2008-05-14T13:39:17-00:00</dc:date>
    <prism:publicationYear>1996</prism:publicationYear>
    <prism:publicationName>Artificial Intelligence</prism:publicationName>
    <prism:volume>81</prism:volume>
    <prism:number>1-2</prism:number>
    <prism:startingPage>1</prism:startingPage>
    <prism:endingPage>15</prism:endingPage>
    <prism:category>phase-transition</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2793881">
    <title>The Design and Analysis of an Algorithm Portfolio for SAT</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2793881</link>
    <description>&lt;i&gt;Principles and Practice of Constraint Programming – CP 2007 (2007), pp. 712-727.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;It has been widely observed that there is no “dominant” SAT solver; instead, different solvers perform best on different instances. Rather than following the traditional approach of choosing the best solver for a given class of instances, we advocate making this decision online on a per-instance basis. Building on previous work, we describe a per-instance solver portfolio for SAT, SATzilla-07, which uses so-called empirical hardness models to choose among its constituent solvers. We leverage new model-building techniques such as censored sampling and hierarchical hardness models, and demonstrate the effectiveness of our techniques by building a portfolio of state-of-the-art SAT solvers and evaluating it on several widely-studied SAT data sets. Overall, we show that our portfolio significantly outperforms its constituent algorithms on every data set. Our approach has also proven itself to be effective in practice: in the 2007 SAT competition, SATzilla-07 won three gold medals, one silver, and one bronze; it is available online at http://www.cs.ubc.ca/labs/beta/Projects/SATzilla .</description>
    <dc:title>The Design and Analysis of an Algorithm Portfolio for SAT</dc:title>

    <dc:creator>Lin Xu</dc:creator>
    <dc:creator>Frank Hutter</dc:creator>
    <dc:creator>Holger Hoos</dc:creator>
    <dc:creator>Kevin Leyton-Brown</dc:creator>
    <dc:identifier>doi:10.1007/978-3-540-74970-7_50</dc:identifier>
    <dc:source>Principles and Practice of Constraint Programming – CP 2007 (2007), pp. 712-727.</dc:source>
    <dc:date>2008-05-13T08:11:57-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Principles and Practice of Constraint Programming – CP 2007</prism:publicationName>
    <prism:startingPage>712</prism:startingPage>
    <prism:endingPage>727</prism:endingPage>
    <prism:category>algorithm-portfolio</prism:category>
    <prism:category>data-mining-general</prism:category>
    <prism:category>meta-learning</prism:category>
    <prism:category>sat</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2786799">
    <title>Off-the-peg and bespoke classifiers for fraud detection</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2786799</link>
    <description>&lt;i&gt;Computational Statistics &#38; Data Analysis, Vol. 52, No. 9. (15 May 2008), pp. 4521-4532.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Detecting fraudulent plastic card transactions is an important and challenging problem. The challenges arise from a number of factors including the sheer volume of transactions financial institutions have to process, the asynchronous and heterogeneous nature of transactions, and the adaptive behaviour of fraudsters. In this fraud detection problem the performance of a supervised two-class classification approach is compared with performance of an unsupervised one-class classification approach. Attention is focussed primarily on one-class classification approaches. Useful representations of transaction records, and ways of combining different one-class classifiers are described. Assessment of performance for such problems is complicated by the need for timely decision making. Performance assessment measures are discussed, and the performance of a number of one- and two-class classification methods is assessed using two large, real world personal banking data sets.</description>
    <dc:title>Off-the-peg and bespoke classifiers for fraud detection</dc:title>

    <dc:creator>Piotr Juszczak</dc:creator>
    <dc:creator>Niall Adams</dc:creator>
    <dc:creator>David Hand</dc:creator>
    <dc:creator>Christopher Whitrow</dc:creator>
    <dc:creator>David Weston</dc:creator>
    <dc:identifier>doi:10.1016/j.csda.2008.03.014</dc:identifier>
    <dc:source>Computational Statistics &#38; Data Analysis, Vol. 52, No. 9. (15 May 2008), pp. 4521-4532.</dc:source>
    <dc:date>2008-05-12T07:36:53-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Computational Statistics &#38; Data Analysis</prism:publicationName>
    <prism:volume>52</prism:volume>
    <prism:number>9</prism:number>
    <prism:startingPage>4521</prism:startingPage>
    <prism:endingPage>4532</prism:endingPage>
    <prism:category>occ-applications</prism:category>
    <prism:category>occ-comparison</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2764550">
    <title>Variational Extensions to EM and Multinomial PCA</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2764550</link>
    <description>&lt;i&gt;(2002), pp. 23-34.&lt;/i&gt;</description>
    <dc:title>Variational Extensions to EM and Multinomial PCA</dc:title>

    <dc:creator>Wray Buntine</dc:creator>
    <dc:source>(2002), pp. 23-34.</dc:source>
    <dc:date>2008-05-07T10:01:17-00:00</dc:date>
    <prism:publicationYear>2002</prism:publicationYear>
    <prism:startingPage>23</prism:startingPage>
    <prism:endingPage>34</prism:endingPage>
    <prism:publisher>Springer-Verlag</prism:publisher>
    <prism:category>dimension-reduction</prism:category>
    <prism:category>dirichlet</prism:category>
    <prism:category>pca</prism:category>
    <prism:category>sets-of-features</prism:category>
    <prism:category>text-classification</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2737704">
    <title>Scale-sensitive dimensions, uniform convergence, and learnability</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2737704</link>
    <description>&lt;i&gt;J. ACM, Vol. 44, No. 4. (July 1997), pp. 615-631.&lt;/i&gt;</description>
    <dc:title>Scale-sensitive dimensions, uniform convergence, and learnability</dc:title>

    <dc:creator>Noga Alon</dc:creator>
    <dc:creator>Shai Ben-David</dc:creator>
    <dc:creator>Nicol&#242; Cesa-Bianchi</dc:creator>
    <dc:creator>David Haussler</dc:creator>
    <dc:identifier>doi:10.1145/263867.263927</dc:identifier>
    <dc:source>J. ACM, Vol. 44, No. 4. (July 1997), pp. 615-631.</dc:source>
    <dc:date>2008-04-30T12:29:38-00:00</dc:date>
    <prism:publicationYear>1997</prism:publicationYear>
    <prism:publicationName>J. ACM</prism:publicationName>
    <prism:issn>0004-5411</prism:issn>
    <prism:volume>44</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>615</prism:startingPage>
    <prism:endingPage>631</prism:endingPage>
    <prism:publisher>ACM</prism:publisher>
    <prism:category>generalization</prism:category>
    <prism:category>learnability</prism:category>
    <prism:category>ml-foundations</prism:category>
    <prism:category>scale-sensitive</prism:category>
    <prism:category>uniform-convergence</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2737626">
    <title>Reliable Reasoning: Induction and Statistical Learning Theory (Jean Nicod Lectures)</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2737626</link>
    <description>&lt;i&gt;(01 May 2007)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;In &#60;i&#62;Reliable Reasoning,&#60;/i&#62; Gilbert Harman and Sanjeev Kulkarni--a philosopher and an engineer--argue that philosophy and cognitive science can benefit from statistical learning theory (SLT), the theory that lies behind recent advances in machine learning. The philosophical problem of induction, for example, is in part about the reliability of inductive reasoning, where the reliability of a method is measured by its statistically expected percentage of errors--a central topic in SLT.&#60;br /&#62; &#60;br /&#62; After discussing philosophical attempts to evade the problem of induction, Harman and Kulkarni provide an admirably clear account of the basic framework of SLT and its implications for inductive reasoning. They explain the Vapnik-Chervonenkis (VC) dimension of a set of hypotheses and distinguish two kinds of inductive reasoning, describing fundamental results about the power and limits of those methods in terms of the VC-dimension of the hypotheses being considered. The VC-dimension is found to be superior to a related measure proposed by Karl Popper, and shown not to correspond exactly to ordinary notions of simplicity. The authors discuss various topics in machine learning, including nearest-neighbor methods, neural networks, and support vector machines. Finally, they describe transductive reasoning and suggest possible new models of human reasoning suggested by developments in SLT.</description>
    <dc:title>Reliable Reasoning: Induction and Statistical Learning Theory (Jean Nicod Lectures)</dc:title>

    <dc:creator>Gilbert Harman</dc:creator>
    <dc:creator>Sanjeev Kulkarni</dc:creator>
    <dc:source>(01 May 2007)</dc:source>
    <dc:date>2008-04-30T11:55:19-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publisher>The MIT Press</prism:publisher>
    <prism:category>ml-foundations</prism:category>
    <prism:category>ml-philosophy</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2712765">
    <title>Tutorial on Practical Prediction Theory for Classification</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2712765</link>
    <description>&lt;i&gt;Journal of Machine Learning Research, Vol. 6 (March 2005), pp. 273-306.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We discuss basic prediction theory and its impact on classification success evaluation, implications for learning algorithm design, and uses in learning algorithm execution. This tutorial is meant to be a comprehensive compilation of results which are both theoretically rigorous and quantitatively useful.</description>
    <dc:title>Tutorial on Practical Prediction Theory for Classification</dc:title>

    <dc:creator>John Langford</dc:creator>
    <dc:source>Journal of Machine Learning Research, Vol. 6 (March 2005), pp. 273-306.</dc:source>
    <dc:date>2008-04-24T11:35:10-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:publicationName>Journal of Machine Learning Research</prism:publicationName>
    <prism:volume>6</prism:volume>
    <prism:startingPage>273</prism:startingPage>
    <prism:endingPage>306</prism:endingPage>
    <prism:category>error-estimation</prism:category>
    <prism:category>learning-bounds</prism:category>
    <prism:category>ml-foundations</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2706830">
    <title>Learning over Sets using Kernel Principal Angles</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2706830</link>
    <description>&lt;i&gt;Journal of Machine Learning Research, Vol. 4 (October 2003), pp. 913-931.&lt;/i&gt;</description>
    <dc:title>Learning over Sets using Kernel Principal Angles</dc:title>

    <dc:creator>Lior Wolf</dc:creator>
    <dc:creator>Amnon Shashua</dc:creator>
    <dc:source>Journal of Machine Learning Research, Vol. 4 (October 2003), pp. 913-931.</dc:source>
    <dc:date>2008-04-23T10:12:18-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:publicationName>Journal of Machine Learning Research</prism:publicationName>
    <prism:volume>4</prism:volume>
    <prism:startingPage>913</prism:startingPage>
    <prism:endingPage>931</prism:endingPage>
    <prism:category>cbir</prism:category>
    <prism:category>dimension-reduction</prism:category>
    <prism:category>image-processing</prism:category>
    <prism:category>kernel-machines</prism:category>
    <prism:category>multi-instance</prism:category>
    <prism:category>multimedia-ir</prism:category>
    <prism:category>sets-of-features</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2693500">
    <title>Abduction and Induction: Essays on their Relation and Integration (Applied Logic Series)</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2693500</link>
    <description>&lt;i&gt;(30 April 2000)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;From the very beginning of their investigation of human reasoning, philosophers have identified two other forms of reasoning, besides deduction, which we now call abduction and induction. Deduction is now fairly well understood, but abduction and induction have eluded a similar level of understanding. The papers collected here address the relationship between abduction and induction and their possible integration. The approach is sometimes philosophical, sometimes that of pure logic, and some papers adopt the more task-oriented approach of AI. &#60;br/&#62; The book will command the attention of philosophers, logicians, AI researchers and computer scientists in general.</description>
    <dc:title>Abduction and Induction: Essays on their Relation and Integration (Applied Logic Series)</dc:title>

    <dc:source>(30 April 2000)</dc:source>
    <dc:date>2008-04-20T19:18:41-00:00</dc:date>
    <prism:publicationYear>2000</prism:publicationYear>
    <prism:publisher>Springer</prism:publisher>
    <prism:category>abduction</prism:category>
    <prism:category>ml-foundations</prism:category>
    <prism:category>ml-philosophy</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2693473">
    <title>Smart Inductive Generalizations are Abductions</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2693473</link>
    <description>&lt;i&gt;&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;This paper describes abduction as `inference to the best explanation' and argues that &#34;smart&#34; inductive generalizations are a special case of abductions. Along the way it argues that some good explanations are not proofs and some proofs are not explanations, concluding that explanations are not deductive proofs in any particularly interesting sense. An attractive alternative is that explanations are assignments of causal responsibility. Smart inductive generalizations can then be seen to be...</description>
    <dc:title>Smart Inductive Generalizations are Abductions</dc:title>

    <dc:creator>J Josephson</dc:creator>
    <dc:date>2008-04-20T19:05:03-00:00</dc:date>
    <prism:category>abduction</prism:category>
    <prism:category>ml-foundations</prism:category>
    <prism:category>ml-philosophy</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2693466">
    <title>Integrating abduction and induction in machine learning</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2693466</link>
    <description>&lt;i&gt;(1997)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;This paper discusses the integration of traditional abductive and inductive reasoning methods in the development of machine learning systems. In particular, the paper discusses our recent work in two areas: 1) The use of traditional abductive methods to propose revisions during theory refinement, where an existing knowledge base is modified to make it consistent with a set of empirical data; and 2) The use of inductive learning methods to automatically acquire from examples a diagnostic...</description>
    <dc:title>Integrating abduction and induction in machine learning</dc:title>

    <dc:creator>R Mooney</dc:creator>
    <dc:source>(1997)</dc:source>
    <dc:date>2008-04-20T19:02:30-00:00</dc:date>
    <prism:publicationYear>1997</prism:publicationYear>
    <prism:category>abduction</prism:category>
    <prism:category>ml-foundations</prism:category>
    <prism:category>ml-philosophy</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/140030">
    <title>Combining Labeled and Unlabeled Data with Co-training</title>
    <link>http://www.citeulike.org/user/sdvillal/article/140030</link>
    <description>&lt;i&gt;(1998)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We consider the problem of using a large unlabeled sample to boost performance of a learning algorithm when only a small set of labeled examples is available. In particular, we consider a problem setting motivated by the task of learning to classify web pages, in which the description of each example can be partitioned into two distinct views. For example, the description of a web page can be partitioned into the words occurring on that page, and the words occurring in hyperlinks that point to...</description>
    <dc:title>Combining Labeled and Unlabeled Data with Co-training</dc:title>

    <dc:creator>Avrim Blum</dc:creator>
    <dc:creator>Tom Mitchell</dc:creator>
    <dc:source>(1998)</dc:source>
    <dc:date>2005-03-25T18:29:37-00:00</dc:date>
    <prism:publicationYear>1998</prism:publicationYear>
    <prism:category>co-training</prism:category>
    <prism:category>semi-supervised</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/1939239">
    <title>Latent Dirichlet Allocation</title>
    <link>http://www.citeulike.org/user/sdvillal/article/1939239</link>
    <description>&lt;i&gt;Journal of Machine Learning Research, Vol. 3 (January 2003), pp. 993-1022.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We describe latent Dirichlet allocation (LDA), a generative probabilistic model for collections of discrete data such as text corpora. LDA is a three-level hierarchical Bayesian model, in which each item of a collection is modeled as a finite mixture over an underlying set of topics. Each topic is, in turn, modeled as an infinite mixture over an underlying set of topic probabilities. In the context of text modeling, the topic probabilities provide an explicit representation of a document. We present efficient approximate inference techniques based on variational methods and an EM algorithm for empirical Bayes parameter estimation. We report results in document modeling, text classification, and collaborative filtering, comparing to a mixture of unigrams model and the probabilistic LSI model.</description>
    <dc:title>Latent Dirichlet Allocation</dc:title>

    <dc:creator>David Blei</dc:creator>
    <dc:creator>Andrew Ng</dc:creator>
    <dc:creator>Michael Jordan</dc:creator>
    <dc:source>Journal of Machine Learning Research, Vol. 3 (January 2003), pp. 993-1022.</dc:source>
    <dc:date>2007-11-19T20:26:58-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:publicationName>Journal of Machine Learning Research</prism:publicationName>
    <prism:volume>3</prism:volume>
    <prism:startingPage>993</prism:startingPage>
    <prism:endingPage>1022</prism:endingPage>
    <prism:category>dirichlet</prism:category>
    <prism:category>proportional-data</prism:category>
    <prism:category>text-classification</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2607812">
    <title>A selective sampling approach to active feature selection</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2607812</link>
    <description>&lt;i&gt;Artif. Intell., Vol. 159, No. 1-2. (2004), pp. 49-74.&lt;/i&gt;</description>
    <dc:title>A selective sampling approach to active feature selection</dc:title>

    <dc:creator>Huan Liu</dc:creator>
    <dc:creator>Hiroshi Motoda</dc:creator>
    <dc:creator>Lei Yu</dc:creator>
    <dc:identifier>doi:10.1016/j.artint.2004.05.009</dc:identifier>
    <dc:source>Artif. Intell., Vol. 159, No. 1-2. (2004), pp. 49-74.</dc:source>
    <dc:date>2008-03-28T15:57:09-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>Artif. Intell.</prism:publicationName>
    <prism:issn>0004-3702</prism:issn>
    <prism:volume>159</prism:volume>
    <prism:number>1-2</prism:number>
    <prism:startingPage>49</prism:startingPage>
    <prism:endingPage>74</prism:endingPage>
    <prism:publisher>Elsevier Science Publishers Ltd.</prism:publisher>
    <prism:category>active-learning</prism:category>
    <prism:category>dimension-reduction</prism:category>
    <prism:category>feature-selection</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2677698">
    <title>Ensembles of nested dichotomies for multi-class problems</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2677698</link>
    <description>&lt;i&gt;(2004)&lt;/i&gt;</description>
    <dc:title>Ensembles of nested dichotomies for multi-class problems</dc:title>

    <dc:creator>Eibe Frank</dc:creator>
    <dc:creator>Stefan Kramer</dc:creator>
    <dc:identifier>doi:10.1145/1015330.1015363</dc:identifier>
    <dc:source>(2004)</dc:source>
    <dc:date>2008-04-16T11:45:32-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publisher>ACM</prism:publisher>
    <prism:category>ensembles</prism:category>
    <prism:category>multiclass</prism:category>
    <prism:category>occ-for-multiclass</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2677204">
    <title>Semisupervised learning using feature selection based on maximum density subgraphs</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2677204</link>
    <description>&lt;i&gt;Systems and Computers in Japan, Vol. 38, No. 9. (2007), pp. 32-43.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;In machine learning tasks on large-scale datasets, the labeled data essential to the classification are not always sufficient, which degrades the learning accuracy. Meanwhile, unlabeled data are always abundant. Hence, semisupervised learning which uses both unlabeled and labeled data to improve the learning accuracy is currently of great interest. In this paper, we use a graph to represent the underlying distribution of both labeled and unlabeled data and split it by using multiway cut to classify unlabeled data. Additionally, we propose a graph-based feature selection algorithm to improve the learning accuracy of our graph-based semisupervised learning algorithm. In our algorithm, we first propose an evaluation criterion for the attribute relevance using the graph density. Then, we extract the relevant attribute subset by finding the clique on the graph where each vertex stands for the attribute and each edge stands for the relevance of a feature pair. © 2007 Wiley Periodicals, Inc. Syst Comp Jpn, 38(9): 32-43, 2007; Published online in Wiley InterScience (). DOI 10.1002/scj.20757</description>
    <dc:title>Semisupervised learning using feature selection based on maximum density subgraphs</dc:title>

    <dc:creator>Yoshiyuki Nakatani</dc:creator>
    <dc:creator>Kuangyi Zhu</dc:creator>
    <dc:creator>Kuniaki Uehara</dc:creator>
    <dc:identifier>doi:10.1002/scj.20757</dc:identifier>
    <dc:source>Systems and Computers in Japan, Vol. 38, No. 9. (2007), pp. 32-43.</dc:source>
    <dc:date>2008-04-16T09:01:11-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Systems and Computers in Japan</prism:publicationName>
    <prism:volume>38</prism:volume>
    <prism:number>9</prism:number>
    <prism:startingPage>32</prism:startingPage>
    <prism:endingPage>43</prism:endingPage>
    <prism:category>dimension-reduction</prism:category>
    <prism:category>feature-selection</prism:category>
    <prism:category>learning-on-graphs</prism:category>
    <prism:category>locality</prism:category>
    <prism:category>semi-supervised</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2677105">
    <title>Less is More: Compact Matrix Decomposition for Large Sparse Graphs</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2677105</link>
    <description>&lt;i&gt;(2007)&lt;/i&gt;</description>
    <dc:title>Less is More: Compact Matrix Decomposition for Large Sparse Graphs</dc:title>

    <dc:creator>Jimeng Sun</dc:creator>
    <dc:creator>Yinglian Xie</dc:creator>
    <dc:creator>Hui Zhang</dc:creator>
    <dc:creator>Christos Faloutsos</dc:creator>
    <dc:source>(2007)</dc:source>
    <dc:date>2008-04-16T08:22:34-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publisher>SIAM</prism:publisher>
    <prism:category>dimension-reduction</prism:category>
    <prism:category>graphs</prism:category>
    <prism:category>learning-on-graphs</prism:category>
    <prism:category>occ-applications</prism:category>
    <prism:category>occ-high-dimensional</prism:category>
    <prism:category>occ-others</prism:category>
    <prism:category>scalability</prism:category>
    <prism:category>semi-supervised</prism:category>
    <prism:category>spectral</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2672793">
    <title>Masquerader Detection Using OCLEP: One-Class Classification Using Length Statistics of Emerging Patterns</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2672793</link>
    <description>&lt;i&gt;Web-Age Information Management Workshops, 2006. WAIM '06. Seventh International Conference on (2006), pp. 5-5.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We introduce a new method for masquerader detection that only uses a user¿s own data for training, called Oneclass Classification using Length statistics of Emerging Patterns (OCLEP). Emerging patterns (EPs) are patterns whose support increases from one dataset/class to another with a big ratio, and have been very useful in earlier studies. OCLEP classifies a case T as self or masquerader by using the average length of EPs obtained by contrasting T against sets of samples of a user¿s normal data. It is based on the observation that one needs long EPs to differentiate instances from a common class, but needs short EPs to differentiate instances from different classes. OCLEP has two novel features: for training it uses EPs mined from just the self class; for classification it uses the length statistics instead of the EPs themselves. Experiments show that OCLEP can achieve very good accuracy while keeping the false positive rate low, it achieves slightly better area-under-ROC-curve than SVM, and it can achieve good results when other approaches can not. OCLEP requires little effort in choosing parameters; the SVM requires significant tuning and it is hard to reach the theoretical optimal result. These features imply that OCLEP is a good complementary component for a robust masquerader detection system, even though its average performance in false positive rate is not as good as SVM¿s.</description>
    <dc:title>Masquerader Detection Using OCLEP: One-Class Classification Using Length Statistics of Emerging Patterns</dc:title>

    <dc:creator>Lijun Chen</dc:creator>
    <dc:creator>Guozhu Dong</dc:creator>
    <dc:identifier>doi:10.1109/WAIMW.2006.19</dc:identifier>
    <dc:source>Web-Age Information Management Workshops, 2006. WAIM '06. Seventh International Conference on (2006), pp. 5-5.</dc:source>
    <dc:date>2008-04-15T10:24:41-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Web-Age Information Management Workshops, 2006. WAIM '06. Seventh International Conference on</prism:publicationName>
    <prism:startingPage>5</prism:startingPage>
    <prism:endingPage>5</prism:endingPage>
    <prism:category>occ-applications</prism:category>
    <prism:category>occ-others</prism:category>
    <prism:category>occ-support-vector</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/252315">
    <title>Matrix Computations (Johns Hopkins Studies in Mathematical Sciences)</title>
    <link>http://www.citeulike.org/user/sdvillal/article/252315</link>
    <description>&lt;i&gt;(15 October 1996)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;&#60;P&#62;Revised and updated, the third edition of Golub and Van Loan's classic text in computer science provides essential information about the mathematical background and algorithmic skills required for the production of numerical software. This new edition includes thoroughly revised chapters on matrix multiplication problems and parallel matrix computations, expanded treatment of CS decomposition, an updated overview of floating point arithmetic, a more accurate rendition of the modified Gram-Schmidt process, and new material devoted to GMRES, QMR, and other methods designed to handle the sparse unsymmetric linear system problem.&#60;/P&#62;</description>
    <dc:title>Matrix Computations (Johns Hopkins Studies in Mathematical Sciences)</dc:title>

    <dc:creator>Gene Golub</dc:creator>
    <dc:creator>Charles Van Loan</dc:creator>
    <dc:source>(15 October 1996)</dc:source>
    <dc:date>2005-07-12T18:58:24-00:00</dc:date>
    <prism:publicationYear>1996</prism:publicationYear>
    <prism:publisher>The Johns Hopkins University Press</prism:publisher>
    <prism:category>algebra</prism:category>
    <prism:category>maths</prism:category>
    <prism:category>numerical-computing</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2386941">
    <title>Numerical Recipes with Source Code CD-ROM 3rd Edition: The Art of Scientific Computing</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2386941</link>
    <description>&lt;i&gt;(01 September 2007)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;This book/CD bundle of the greatly expanded third edition of Numerical Recipes now has wider coverage than ever before, many new, expanded and updated sections, and two completely new chapters. Co-authored by four leading scientists from academia and industry, Numerical Recipes starts with basic mathematics and computer science and proceeds to complete, working routines. The informal, easy-to-read style that made earlier editions so popular is kept throughout. Highlights of the new material include: a new chapter on classification and inference, Gaussian mixture models, HMMs, hierarchical clustering, and SVMs; a new chapter on computational geometry, covering KD trees, quad- and octrees, Delaunay triangulation, and algorithms for lines, polygons, triangles, and spheres; interior point methods for linear programming; MCMC; an expanded treatment of ODEs with completely new routines; and many new statistical distributions.</description>
    <dc:title>Numerical Recipes with Source Code CD-ROM 3rd Edition: The Art of Scientific Computing</dc:title>

    <dc:creator>William Press</dc:creator>
    <dc:creator>Saul Teukolsky</dc:creator>
    <dc:creator>William Vetterling</dc:creator>
    <dc:creator>Brian Flannery</dc:creator>
    <dc:source>(01 September 2007)</dc:source>
    <dc:date>2008-02-15T17:43:05-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publisher>Cambridge University Press</prism:publisher>
    <prism:category>numerical-computing</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2634464">
    <title>Learning a Maximum Margin Subspace for Image Retrieval</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2634464</link>
    <description>&lt;i&gt;IEEE Trans. on Knowl. and Data Eng., Vol. 20, No. 2. (February 2008), pp. 189-201.&lt;/i&gt;</description>
    <dc:title>Learning a Maximum Margin Subspace for Image Retrieval</dc:title>

    <dc:creator>Xiaofei He</dc:creator>
    <dc:creator>Deng Cai</dc:creator>
    <dc:creator>Jiawei Han</dc:creator>
    <dc:identifier>doi:10.1109/TKDE.2007.190692</dc:identifier>
    <dc:source>IEEE Trans. on Knowl. and Data Eng., Vol. 20, No. 2. (February 2008), pp. 189-201.</dc:source>
    <dc:date>2008-04-06T10:16:43-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>IEEE Trans. on Knowl. and Data Eng.</prism:publicationName>
    <prism:issn>1041-4347</prism:issn>
    <prism:volume>20</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>189</prism:startingPage>
    <prism:endingPage>201</prism:endingPage>
    <prism:publisher>IEEE Educational Activities Department</prism:publisher>
    <prism:category>active-learning</prism:category>
    <prism:category>dimension-reduction</prism:category>
    <prism:category>image-processing</prism:category>
    <prism:category>multimedia-ir</prism:category>
    <prism:category>spectral</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2620013">
    <title>Geodesic entropic graphs for dimension and entropy estimation in manifold learning</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2620013</link>
    <description>&lt;i&gt;Signal Processing, IEEE Transactions on [see also Acoustics, Speech, and Signal Processing, IEEE Transactions on], Vol. 52, No. 8. (2004), pp. 2210-2221.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;In the manifold learning problem, one seeks to discover a smooth low dimensional surface, i.e., a manifold embedded in a higher dimensional linear vector space, based on a set of measured sample points on the surface. In this paper, we consider the closely related problem of estimating the manifold's intrinsic dimension and the intrinsic entropy of the sample points. Specifically, we view the sample points as realizations of an unknown multivariate density supported on an unknown smooth manifold. We introduce a novel geometric approach based on entropic graph methods. Although the theory presented applies to this general class of graphs, we focus on the geodesic-minimal-spanning-tree (GMST) to obtaining asymptotically consistent estimates of the manifold dimension and the Re/spl acute/nyi /spl alpha/-entropy of the sample density on the manifold. The GMST approach is striking in its simplicity and does not require reconstruction of the manifold or estimation of the multivariate density of the samples. The GMST method simply constructs a minimal spanning tree (MST) sequence using a geodesic edge matrix and uses the overall lengths of the MSTs to simultaneously estimate manifold dimension and entropy. We illustrate the GMST approach on standard synthetic manifolds as well as on real data sets consisting of images of faces.</description>
    <dc:title>Geodesic entropic graphs for dimension and entropy estimation in manifold learning</dc:title>

    <dc:creator>JA Costa</dc:creator>
    <dc:creator>AO Hero</dc:creator>
    <dc:identifier>doi:10.1109/TSP.2004.831130</dc:identifier>
    <dc:source>Signal Processing, IEEE Transactions on [see also Acoustics, Speech, and Signal Processing, IEEE Transactions on], Vol. 52, No. 8. (2004), pp. 2210-2221.</dc:source>
    <dc:date>2008-04-01T14:04:25-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>Signal Processing, IEEE Transactions on [see also Acoustics, Speech, and Signal Processing, IEEE Transactions on]</prism:publicationName>
    <prism:volume>52</prism:volume>
    <prism:number>8</prism:number>
    <prism:startingPage>2210</prism:startingPage>
    <prism:endingPage>2221</prism:endingPage>
    <prism:category>intrinsic-dimension-estimation</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2617935">
    <title>Feature Extraction: Foundations and Applications (Studies in Fuzziness and Soft Computing)</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2617935</link>
    <description>&lt;i&gt;(29 August 2006)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;&#60;P&#62;This book is both a reference for engineers and scientists and a teaching resource, featuring tutorial chapters and research papers on feature extraction. Its CD-ROM includes the data of the NIPS 2003 Feature Selection Challenge and sample Matlab® code.&#60;/P&#62; &#60;P&#62;&#34;This book compiles some very promising techniques, coming from an extremely smart collection of researchers, delivering their best ideas in a competitive environment.&#34; Trevor Hastie, Stanford University&#60;/P&#62; &#60;P&#62;&#34;Feature selection is a key technology for making sense of the high dimensional data. Isabelle Guyon et al. have done a splendid job in designing a challenging competition, and collecting the lessons learned.&#34; Bernhard Schoelkopf, Max Planck Institute&#60;/P&#62; &#60;P&#62;&#34;There has been until now insufficient consideration of feature selection algorithms, no unified presentation of leading methods, and no systematic comparisons. This volume is noteworthy for the breadth of methods covered, the clarity of presentations, the unity in notation and the helpful statistical appendices.&#34; David G. Stork, Ricoh Innovations&#60;/P&#62; &#60;P&#62;&#34;Feature extraction finds application in biotechnology, industrial inspection, the Internet, radar, sonar, and speech recognition. This book will make a difference to the literature on machine learning.&#34; Simon Haykin, Mc Master University &#60;/P&#62; &#60;P&#62;&#34;This book sets a high standard as the public record of an interesting and effective competition.&#34; Peter Norvig, Google Inc.&#60;/P&#62;</description>
    <dc:title>Feature Extraction: Foundations and Applications (Studies in Fuzziness and Soft Computing)</dc:title>

    <dc:source>(29 August 2006)</dc:source>
    <dc:date>2008-03-31T23:22:48-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publisher>Springer</prism:publisher>
    <prism:category>dimension-reduction</prism:category>
    <prism:category>feature-extraction</prism:category>
    <prism:category>feature-selection</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/510440">
    <title>Convexity, Classification, and Risk Bounds</title>
    <link>http://www.citeulike.org/user/sdvillal/article/510440</link>
    <description>&lt;i&gt;Journal of the American Statistical Association, Vol. 101, No. 473. (March 2006), pp. 138-156.&lt;/i&gt;</description>
    <dc:title>Convexity, Classification, and Risk Bounds</dc:title>

    <dc:creator>Peter Bartlett</dc:creator>
    <dc:creator>Michael Jordan</dc:creator>
    <dc:creator>Jon Mcauliffe</dc:creator>
    <dc:identifier>doi:10.1198/016214505000000907</dc:identifier>
    <dc:source>Journal of the American Statistical Association, Vol. 101, No. 473. (March 2006), pp. 138-156.</dc:source>
    <dc:date>2006-02-18T14:36:37-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Journal of the American Statistical Association</prism:publicationName>
    <prism:issn>0162-1459</prism:issn>
    <prism:volume>101</prism:volume>
    <prism:number>473</prism:number>
    <prism:startingPage>138</prism:startingPage>
    <prism:endingPage>156</prism:endingPage>
    <prism:publisher>American Statistical Association</prism:publisher>
    <prism:category>error-estimation</prism:category>
    <prism:category>loss-functions</prism:category>
    <prism:category>ml-foundations</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2205725">
    <title>On divergences, surrogate loss functions, and decentralized detection</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2205725</link>
    <description>&lt;i&gt;(25 Oct 2005)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We develop a general correspondence between a family of loss functions that act as surrogates to 0-1 loss, and the class of Ali-Silvey or $f$-divergence functionals. This correspondence provides the basis for choosing and evaluating various surrogate losses frequently used in statistical learning (e.g., hinge loss, exponential loss, logistic loss); conversely, it provides a decision-theoretic framework for the choice of divergences in signal processing and quantization theory. We exploit this correspondence to characterize the statistical behavior of a nonparametric decentralized hypothesis testing algorithms that operate by minimizing convex surrogate loss functions. In particular, we specify the family of loss functions that are equivalent to 0-1 loss in the sense of producing the same quantization rules and discriminant functions.</description>
    <dc:title>On divergences, surrogate loss functions, and decentralized detection</dc:title>

    <dc:creator>Xuanlong Nguyen</dc:creator>
    <dc:creator>Martin Wainwright</dc:creator>
    <dc:creator>Michael Jordan</dc:creator>
    <dc:source>(25 Oct 2005)</dc:source>
    <dc:date>2008-01-08T00:17:15-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:category>error-estimation</prism:category>
    <prism:category>loss-functions</prism:category>
    <prism:category>ml-foundations</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2616147">
    <title>One-class classifiers : a review and analysis of suitability in the context of mobile-masquerader detection</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2616147</link>
    <description>&lt;i&gt;South African Computer Journal, Vol. 36 (2006), pp. 29-48.&lt;/i&gt;</description>
    <dc:title>One-class classifiers : a review and analysis of suitability in the context of mobile-masquerader detection</dc:title>

    <dc:creator>Oleksiy Mazhelis</dc:creator>
    <dc:source>South African Computer Journal, Vol. 36 (2006), pp. 29-48.</dc:source>
    <dc:date>2008-03-31T13:10:59-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>South African Computer Journal</prism:publicationName>
    <prism:volume>36</prism:volume>
    <prism:startingPage>29</prism:startingPage>
    <prism:endingPage>48</prism:endingPage>
    <prism:category>occ-applications</prism:category>
    <prism:category>occ-survey</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2616138">
    <title>Application of LVQ to novelty detection using outlier training data</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2616138</link>
    <description>&lt;i&gt;Pattern Recognition Letters, Vol. 27, No. 13. (1 October 2006), pp. 1572-1579.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We propose to use learning vector quantization (LVQ) in novelty detection where a few outliers exist in training data. The codebook update of original LVQ is modified and the scheme to determine a threshold for each codebook is proposed. Experimental results on artificial and real-world problems are quite promising.</description>
    <dc:title>Application of LVQ to novelty detection using outlier training data</dc:title>

    <dc:creator>Hyoung-Joo Lee</dc:creator>
    <dc:creator>Sungzoon Cho</dc:creator>
    <dc:identifier>doi:10.1016/j.patrec.2006.02.019</dc:identifier>
    <dc:source>Pattern Recognition Letters, Vol. 27, No. 13. (1 October 2006), pp. 1572-1579.</dc:source>
    <dc:date>2008-03-31T13:07:04-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Pattern Recognition Letters</prism:publicationName>
    <prism:volume>27</prism:volume>
    <prism:number>13</prism:number>
    <prism:startingPage>1572</prism:startingPage>
    <prism:endingPage>1579</prism:endingPage>
    <prism:category>occ-neural-networks</prism:category>
    <prism:category>occ-with-outliers</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/525518">
    <title>The many faces of sequence alignment.</title>
    <link>http://www.citeulike.org/user/sdvillal/article/525518</link>
    <description>&lt;i&gt;Brief Bioinform, Vol. 6, No. 1. (March 2005), pp. 6-22.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Starting with the sequencing of the mouse genome in 2002, we have entered a period where the main focus of genomics will be to compare multiple genomes in order to learn about human biology and evolution at the DNA level. Alignment methods are the main computational component of this endeavour. This short review aims to summarise the current status of research in alignments, emphasising large-scale genomic comparisons and suggesting possible directions that will be explored in the near future.</description>
    <dc:title>The many faces of sequence alignment.</dc:title>

    <dc:creator>S Batzoglou</dc:creator>
    <dc:identifier>doi:10.1093/bib/6.1.6</dc:identifier>
    <dc:source>Brief Bioinform, Vol. 6, No. 1. (March 2005), pp. 6-22.</dc:source>
    <dc:date>2006-03-01T16:58:55-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:publicationName>Brief Bioinform</prism:publicationName>
    <prism:issn>1467-5463</prism:issn>
    <prism:volume>6</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>6</prism:startingPage>
    <prism:endingPage>22</prism:endingPage>
    <prism:category>time-series</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/494124">
    <title>The complexity of theorem-proving procedures</title>
    <link>http://www.citeulike.org/user/sdvillal/article/494124</link>
    <description>&lt;i&gt;(1971), pp. 151-158.&lt;/i&gt;</description>
    <dc:title>The complexity of theorem-proving procedures</dc:title>

    <dc:creator>Stephen Cook</dc:creator>
    <dc:identifier>doi:10.1145/800157.805047</dc:identifier>
    <dc:source>(1971), pp. 151-158.</dc:source>
    <dc:date>2006-02-06T08:39:41-00:00</dc:date>
    <prism:publicationYear>1971</prism:publicationYear>
    <prism:startingPage>151</prism:startingPage>
    <prism:endingPage>158</prism:endingPage>
    <prism:publisher>ACM Press</prism:publisher>
    <prism:category>algorithmics</prism:category>
    <prism:category>classic-refs</prism:category>
    <prism:category>seminal</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2607805">
    <title>Active Learning with Feedback on Features and Instances</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2607805</link>
    <description>&lt;i&gt;Journal of Machine Learning Research, Vol. 7 (August 2006), pp. 1655-1686.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We extend the traditional active learning framework to include feedback on features in addition to labeling instances, and we execute a careful study of the effects of feature selection and human feedback on features in the setting of text categorization. Our experiments on a variety of categorization tasks indicate that there is significant potential in improving classifier performance by feature re-weighting, beyond that achieved via membership queries alone (traditional active learning) if we have access to an oracle that can point to the important (most predictive) features. Our experiments on human subjects indicate that human feedback on feature relevance can identify a sufficient proportion of the most relevant features (over 50% in our experiments). We find that on average, labeling a feature takes much less time than labeling a document. We devise an algorithm that interleaves labeling features and documents which significantly accelerates standard active learning in our simulation experiments. Feature feedback can complement traditional active learning in applications such as news filtering, e-mail classification, and personalization, where the human teacher can have significant knowledge on the relevance of features.</description>
    <dc:title>Active Learning with Feedback on Features and Instances</dc:title>

    <dc:creator>Hema Raghavan</dc:creator>
    <dc:creator>Omid Madani</dc:creator>
    <dc:creator>Rosie Jones</dc:creator>
    <dc:source>Journal of Machine Learning Research, Vol. 7 (August 2006), pp. 1655-1686.</dc:source>
    <dc:date>2008-03-28T15:53:39-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Journal of Machine Learning Research</prism:publicationName>
    <prism:volume>7</prism:volume>
    <prism:startingPage>1655</prism:startingPage>
    <prism:endingPage>1686</prism:endingPage>
    <prism:category>active-learning</prism:category>
    <prism:category>dimension-reduction</prism:category>
    <prism:category>feature-selection</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2605457">
    <title>Alternative Measures of Computational Complexity with Applications to Agnostic Learning</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2605457</link>
    <description>&lt;i&gt;Theory and Applications of Models of Computation (2006), pp. 231-235.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We address a fundamental problem of complexity theory - the inadequacy of worst-case complexity for the task of evaluating the computational resources required for real life problems. While being the best known measure and enjoying the support of a rich and elegant theory, worst-case complexity seems gives rise to over-pessimistic complexity values. Many standard task, that are being carried out routinely in machine learning applications, are NP-hard, that is, infeasible from the worst-case-complexity perspective. In this work we offer an alternative measure of complexity for approximations-optimization tasks. Our approach is to define a hierarchy on the set of inputs to a learning task, so that natural (’real data’) inputs occupy only bounded levels of this hierarchy and that there are algorithms that handle in polynomial time each such bounded level.</description>
    <dc:title>Alternative Measures of Computational Complexity with Applications to Agnostic Learning</dc:title>

    <dc:creator>Shai Ben-David</dc:creator>
    <dc:identifier>doi:10.1007/11750321_22</dc:identifier>
    <dc:source>Theory and Applications of Models of Computation (2006), pp. 231-235.</dc:source>
    <dc:date>2008-03-28T10:27:57-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Theory and Applications of Models of Computation</prism:publicationName>
    <prism:startingPage>231</prism:startingPage>
    <prism:endingPage>235</prism:endingPage>
    <prism:category>agnostic-learning</prism:category>
    <prism:category>algorithmics</prism:category>
    <prism:category>ml-philosophy</prism:category>
    <prism:category>natural-optimization</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/1121661">
    <title>Random Forests</title>
    <link>http://www.citeulike.org/user/sdvillal/article/1121661</link>
    <description>&lt;i&gt;Machine Learning, Vol. V45, No. 1. (1 October 2001), pp. 5-32.&lt;/i&gt;</description>
    <dc:title>Random Forests</dc:title>

    <dc:creator>Leo Breiman</dc:creator>
    <dc:identifier>doi:10.1023/A:1010933404324</dc:identifier>
    <dc:source>Machine Learning, Vol. V45, No. 1. (1 October 2001), pp. 5-32.</dc:source>
    <dc:date>2007-02-25T22:54:41-00:00</dc:date>
    <prism:publicationYear>2001</prism:publicationYear>
    <prism:publicationName>Machine Learning</prism:publicationName>
    <prism:volume>V45</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>5</prism:startingPage>
    <prism:endingPage>32</prism:endingPage>
    <prism:category>algorithmic-distance-metrics</prism:category>
    <prism:category>boosting</prism:category>
    <prism:category>classic-refs</prism:category>
    <prism:category>dimension-reduction</prism:category>
    <prism:category>ensemble-diversity</prism:category>
    <prism:category>ensembles</prism:category>
    <prism:category>feature-selection</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2584713">
    <title>Novelty detection with constructive probabilistic neural networks</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2584713</link>
    <description>&lt;i&gt;Neurocomputing, Vol. 71, No. 4-6. (January 2008), pp. 1046-1053.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;This paper investigates the use of probabilistic neural networks trained with the dynamic decay adjustment algorithm (PNN-DDA) for novelty detection tasks. PNN-DDA is a fast, constructive neural model originally developed and investigated for standard classification tasks. The training algorithm is controlled by two parameters, [theta]+ and [theta]-. Simulations employing four data sets from the UCI machine learning repository are reported. The results show that parameter [theta]- considerably influences the performance of PNN-DDA for novelty detection, and furthermore, that PNN-DDA achieves performance comparable to NNDD with the advantage of producing much smaller classifiers.</description>
    <dc:title>Novelty detection with constructive probabilistic neural networks</dc:title>

    <dc:creator>Adriano Oliveira</dc:creator>
    <dc:creator>Flavio Costa</dc:creator>
    <dc:creator>Clovis Filho</dc:creator>
    <dc:identifier>doi:10.1016/j.neucom.2007.11.003</dc:identifier>
    <dc:source>Neurocomputing, Vol. 71, No. 4-6. (January 2008), pp. 1046-1053.</dc:source>
    <dc:date>2008-03-25T12:05:39-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Neurocomputing</prism:publicationName>
    <prism:volume>71</prism:volume>
    <prism:number>4-6</prism:number>
    <prism:startingPage>1046</prism:startingPage>
    <prism:endingPage>1053</prism:endingPage>
    <prism:category>occ-applications</prism:category>
    <prism:category>occ-nearest-neighbor</prism:category>
    <prism:category>occ-neural-networks</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2584709">
    <title>Weighted support vector machine for data classification</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2584709</link>
    <description>&lt;i&gt;Neural Networks, 2005. IJCNN '05. Proceedings. 2005 IEEE International Joint Conference on, Vol. 2 (2005), pp. 859-864 vol. 2.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;This paper presents a weighted support vector machine (WSVM) to improve the outlier sensitivity problem of standard support vector machine (SVM) for two-class data classification. The basic idea is to assign different weights to different data points such that the WSVM training algorithm learns the decision surface according to the relative importance of data points in the training data set. The weights used in WSVM are generated by kernel-based possibilistic c-means (KPCM) algorithm, whose partition generates relative high values for important data points but low values for outliers. Experimental results indicate that the proposed method reduces the affect of outliers and yields higher classification rate than standard SVM does when outliers exist in the training data set.</description>
    <dc:title>Weighted support vector machine for data classification</dc:title>

    <dc:creator>Xulei Yang</dc:creator>
    <dc:creator>Qing Song</dc:creator>
    <dc:creator>A Cao</dc:creator>
    <dc:source>Neural Networks, 2005. IJCNN '05. Proceedings. 2005 IEEE International Joint Conference on, Vol. 2 (2005), pp. 859-864 vol. 2.</dc:source>
    <dc:date>2008-03-25T12:04:19-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:publicationName>Neural Networks, 2005. IJCNN '05. Proceedings. 2005 IEEE International Joint Conference on</prism:publicationName>
    <prism:volume>2</prism:volume>
    <prism:startingPage>859</prism:startingPage>
    <prism:endingPage>864 vol. 2</prism:endingPage>
    <prism:category>imbalanced</prism:category>
    <prism:category>kernel-machines</prism:category>
    <prism:category>occ-support-vector</prism:category>
    <prism:category>occ-with-outliers</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2584700">
    <title>A Unified Subspace Outlier Ensemble Framework for Outlier Detection in High Dimensional Spaces</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2584700</link>
    <description>&lt;i&gt;(24 May 2005)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The task of outlier detection is to find small groups of data objects that are exceptional when compared with rest large amount of data. Detection of such outliers is important for many applications such as fraud detection and customer migration. Most such applications are high dimensional domains in which the data may contain hundreds of dimensions. However, the outlier detection problem itself is not well defined and none of the existing definitions are widely accepted, especially in high dimensional space. In this paper, our first contribution is to propose a unified framework for outlier detection in high dimensional spaces from an ensemble-learning viewpoint. In our new framework, the outlying-ness of each data object is measured by fusing outlier factors in different subspaces using a combination function. Accordingly, we show that all existing researches on outlier detection can be regarded as special cases in the unified framework with respect to the set of subspaces considered and the type of combination function used. In addition, to demonstrate the usefulness of the ensemble-learning based outlier detection framework, we developed a very simple and fast algorithm, namely SOE1 (Subspace Outlier Ensemble using 1-dimensional Subspaces) in which only subspaces with one dimension is used for mining outliers from large categorical datasets. The SOE1 algorithm needs only two scans over the dataset and hence is very appealing in real data mining applications. Experimental results on real datasets and large synthetic datasets show that: (1) SOE1 has comparable performance with respect to those state-of-art outlier detection algorithms on identifying true outliers and (2) SOE1 can be an order of magnitude faster than one of the fastest outlier detection algorithms known so far.</description>
    <dc:title>A Unified Subspace Outlier Ensemble Framework for Outlier Detection in High Dimensional Spaces</dc:title>

    <dc:creator>Zengyou He</dc:creator>
    <dc:creator>Xiaofei Xu</dc:creator>
    <dc:creator>Shengchun Deng</dc:creator>
    <dc:source>(24 May 2005)</dc:source>
    <dc:date>2008-03-25T12:00:22-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:category>ensembles</prism:category>
    <prism:category>occ-applications</prism:category>
    <prism:category>occ-dimension-reduction</prism:category>
    <prism:category>occ-high-dimensional</prism:category>
    <prism:category>occ-others</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2584684">
    <title>Modified support vector novelty detector using training data with outliers</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2584684</link>
    <description>&lt;i&gt;Pattern Recogn. Lett., Vol. 24, No. 14. (October 2003), pp. 2479-2487.&lt;/i&gt;</description>
    <dc:title>Modified support vector novelty detector using training data with outliers</dc:title>

    <dc:creator>Li Cao</dc:creator>
    <dc:creator>Heow Lee</dc:creator>
    <dc:creator>Wai Chong</dc:creator>
    <dc:identifier>doi:10.1016/S0167-8655(03)00093-X</dc:identifier>
    <dc:source>Pattern Recogn. Lett., Vol. 24, No. 14. (October 2003), pp. 2479-2487.</dc:source>
    <dc:date>2008-03-25T11:53:18-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:publicationName>Pattern Recogn. Lett.</prism:publicationName>
    <prism:issn>0167-8655</prism:issn>
    <prism:volume>24</prism:volume>
    <prism:number>14</prism:number>
    <prism:startingPage>2479</prism:startingPage>
    <prism:endingPage>2487</prism:endingPage>
    <prism:publisher>Elsevier Science Inc.</prism:publisher>
    <prism:category>occ-support-vector</prism:category>
    <prism:category>occ-with-outliers</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2580889">
    <title>Performance Measures for Neyman-Pearson Classification</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2580889</link>
    <description>&lt;i&gt;IEEE Transactions on Information Theory, Vol. 53, No. 8. (2007), pp. 2852-2863.&lt;/i&gt;</description>
    <dc:title>Performance Measures for Neyman-Pearson Classification</dc:title>

    <dc:creator>Clayton Scott</dc:creator>
    <dc:identifier>doi:10.1109/TIT.2007.901152</dc:identifier>
    <dc:source>IEEE Transactions on Information Theory, Vol. 53, No. 8. (2007), pp. 2852-2863.</dc:source>
    <dc:date>2008-03-24T14:45:45-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>IEEE Transactions on Information Theory</prism:publicationName>
    <prism:volume>53</prism:volume>
    <prism:number>8</prism:number>
    <prism:startingPage>2852</prism:startingPage>
    <prism:endingPage>2863</prism:endingPage>
    <prism:category>error-estimation</prism:category>
    <prism:category>occ-others</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/sdvillal/article/2570838">
    <title>Local learning projections</title>
    <link>http://www.citeulike.org/user/sdvillal/article/2570838</link>
    <description>&lt;i&gt;(2007), pp. 1039-1046.&lt;/i&gt;</description>
    <dc:title>Local learning projections</dc:title>

    <dc:creator>Mingrui Wu</dc:creator>
    <dc:creator>Kai Yu</dc:creator>
    <dc:creator>Shipeng Yu</dc:creator>
    <dc:creator>Bernhard Sch&#246;lkopf</dc:creator>
    <dc:identifier>doi:10.1145/1273496.1273627</dc:identifier>
    <dc:source>(2007), pp. 1039-1046.</dc:source>
    <dc:date>2008-03-21T23:48:47-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:startingPage>1039</prism:startingPage>
    <prism:endingPage>1046</prism:endingPage>
    <prism:publisher>ACM</prism:publisher>
    <prism:category>dimension-reduction</prism:category>
    <prism:category>locality</prism:category>
    <prism:category>lpp</prism:category>
    <prism:category>pca</prism:category>
</item>



</rdf:RDF>

