<?xml version="1.0" encoding="UTF-8"?>

<rdf:RDF
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
   xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
   xmlns="http://purl.org/rss/1.0/"
   xmlns:dc="http://purl.org/dc/elements/1.1/"
   xmlns:prism="http://prismstandard.org/namespaces/1.2/basic/"
   xmlns:dcterms="http://purl.org/dc/terms/"

>
<channel rdf:about="http://www.citeulike.org/about">
<pubDate>Sat, 26 Jul 2008 05:59:41 BST</pubDate>


	<title>CiteULike: awooga's reinforcement-learning</title>
	<description>CiteULike: awooga's reinforcement-learning</description>


	<link>http://www.citeulike.org/user/awooga/tag/reinforcement-learning</link>
	<dc:publisher>CiteULike.org</dc:publisher>
	<dc:language>en-gb</dc:language>
	<dc:rights>Copyright &#169; 2004-2008 citeulike.org</dc:rights>
	<items>
    <rdf:Seq>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/2615899"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/1421135"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/1421137"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/1237233"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/1926528"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/2444018"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/2336558"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/781429"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/513870"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/1115463"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/112017"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/884653"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/1597243"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/156154"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/1532668"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/1464933"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/1444549"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/556226"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/167800"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/1398480"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/1396873"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/1300442"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/482141"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/590544"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/1047089"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/1211921"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/963346"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/1209970"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/1206075"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/1205798"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/467922"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/1106726"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/1084672"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/1074689"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/546145"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/670257"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/1044619"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/121957"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/121955"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/121953"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/1014412"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/973018"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/355573"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/963570"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/478993"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/959456"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/685452"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/949094"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/156151"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/awooga/article/938047"/>

	</rdf:Seq>
	</items>
	</channel>


<item rdf:about="http://www.citeulike.org/user/awooga/article/2615899">
    <title>Food Reward in the Absence of Taste Receptor Signaling</title>
    <link>http://www.citeulike.org/user/awooga/article/2615899</link>
    <description>&lt;i&gt;Neuron, Vol. 57, No. 6. (27 March 2008), pp. 930-941.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Summary Food palatability and hedonic value play central roles in nutrient intake. However, postingestive effects can influence food preferences independently of palatability, although the neurobiological bases of such mechanisms remain poorly understood. Of central interest is whether the same brain reward circuitry that is responsive to palatable rewards also encodes metabolic value independently of taste signaling. Here we show that trpm5-/- mice, which lack the cellular machinery required for sweet taste transduction, can develop a robust preference for sucrose solutions based solely on caloric content. Sucrose intake induced dopamine release in the ventral striatum of these sweet-blind mice, a pattern usually associated with receipt of palatable rewards. Furthermore, single neurons in this same ventral striatal region showed increased sensitivity to caloric intake even in the absence of gustatory inputs. Our findings suggest that calorie-rich nutrients can directly influence brain reward circuits that control food intake independently of palatability or functional taste transduction.</description>
    <dc:title>Food Reward in the Absence of Taste Receptor Signaling</dc:title>

    <dc:creator>Ivan de Araujo</dc:creator>
    <dc:creator>Albino Oliveira-Maia</dc:creator>
    <dc:creator>Tatyana Sotnikova</dc:creator>
    <dc:creator>Raul Gainetdinov</dc:creator>
    <dc:creator>Marc Caron</dc:creator>
    <dc:creator>Miguel Nicolelis</dc:creator>
    <dc:creator>Sidney Simon</dc:creator>
    <dc:identifier>doi:10.1016/j.neuron.2008.01.032</dc:identifier>
    <dc:source>Neuron, Vol. 57, No. 6. (27 March 2008), pp. 930-941.</dc:source>
    <dc:date>2008-03-31T11:10:48-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Neuron</prism:publicationName>
    <prism:volume>57</prism:volume>
    <prism:number>6</prism:number>
    <prism:startingPage>930</prism:startingPage>
    <prism:endingPage>941</prism:endingPage>
    <prism:category>dopamine</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>reward</prism:category>
    <prism:category>taste</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/1421135">
    <title>Reward Prediction Error Computation in the Pedunculopontine Tegmental Nucleus Neurons</title>
    <link>http://www.citeulike.org/user/awooga/article/1421135</link>
    <description>&lt;i&gt;Annals of the New York Academy of Sciences, Vol. 1104, No. 1. (May 2007), pp. 310-323.&lt;/i&gt;</description>
    <dc:title>Reward Prediction Error Computation in the Pedunculopontine Tegmental Nucleus Neurons</dc:title>

    <dc:creator>Yasushi Kobayashi</dc:creator>
    <dc:creator>Ken-Ichi Okada</dc:creator>
    <dc:identifier>doi:10.1196/annals.1390.003</dc:identifier>
    <dc:source>Annals of the New York Academy of Sciences, Vol. 1104, No. 1. (May 2007), pp. 310-323.</dc:source>
    <dc:date>2007-06-29T02:35:43-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Annals of the New York Academy of Sciences</prism:publicationName>
    <prism:issn>0077-8923</prism:issn>
    <prism:volume>1104</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>310</prism:startingPage>
    <prism:endingPage>323</prism:endingPage>
    <prism:publisher>Blackwell Publishing</prism:publisher>
    <prism:category>acetylcholine</prism:category>
    <prism:category>dopamine</prism:category>
    <prism:category>pptg</prism:category>
    <prism:category>reinforcement-learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/1421137">
    <title>Serotonin and the Evaluation of Future Rewards: Theory, Experiments, and Possible Neural Mechanisms</title>
    <link>http://www.citeulike.org/user/awooga/article/1421137</link>
    <description>&lt;i&gt;Annals of the New York Academy of Sciences, Vol. 1104, No. 1. (May 2007), pp. 289-300.&lt;/i&gt;</description>
    <dc:title>Serotonin and the Evaluation of Future Rewards: Theory, Experiments, and Possible Neural Mechanisms</dc:title>

    <dc:creator>Nicolas Schweighofer</dc:creator>
    <dc:creator>Saori Tanaka</dc:creator>
    <dc:creator>Kenji Doya</dc:creator>
    <dc:identifier>doi:10.1196/annals.1390.011</dc:identifier>
    <dc:source>Annals of the New York Academy of Sciences, Vol. 1104, No. 1. (May 2007), pp. 289-300.</dc:source>
    <dc:date>2007-06-29T02:35:43-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Annals of the New York Academy of Sciences</prism:publicationName>
    <prism:issn>0077-8923</prism:issn>
    <prism:volume>1104</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>289</prism:startingPage>
    <prism:endingPage>300</prism:endingPage>
    <prism:publisher>Blackwell Publishing</prism:publisher>
    <prism:category>model</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>serotonin</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/1237233">
    <title>The computational neurobiology of learning and reward</title>
    <link>http://www.citeulike.org/user/awooga/article/1237233</link>
    <description>&lt;i&gt;Current Opinion in Neurobiology, Vol. 16, No. 2. (April 2006), pp. 199-204.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Following the suggestion that midbrain dopaminergic neurons encode a signal, known as a `reward prediction error', used by artificial intelligence algorithms for learning to choose advantageous actions, the study of the neural substrates for reward-based learning has been strongly influenced by computational theories. In recent work, such theories have been increasingly integrated into experimental design and analysis. Such hybrid approaches have offered detailed new insights into the function of a number of brain areas, especially the cortex and basal ganglia. In part this is because these approaches enable the study of neural correlates of subjective factors (such as a participant's beliefs about the reward to be received for performing some action) that the computational theories purport to quantify.</description>
    <dc:title>The computational neurobiology of learning and reward</dc:title>

    <dc:creator>Nathaniel Daw</dc:creator>
    <dc:creator>Kenji Doya</dc:creator>
    <dc:identifier>doi:10.1016/j.conb.2006.03.006</dc:identifier>
    <dc:source>Current Opinion in Neurobiology, Vol. 16, No. 2. (April 2006), pp. 199-204.</dc:source>
    <dc:date>2007-04-19T16:11:56-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Current Opinion in Neurobiology</prism:publicationName>
    <prism:volume>16</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>199</prism:startingPage>
    <prism:endingPage>204</prism:endingPage>
    <prism:category>dopamine</prism:category>
    <prism:category>model</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>serotonin</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/1926528">
    <title>The misbehavior of value and the discipline of the will.</title>
    <link>http://www.citeulike.org/user/awooga/article/1926528</link>
    <description>&lt;i&gt;Neural Netw, Vol. 19, No. 8. (October 2006), pp. 1153-1160.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Most reinforcement learning models of animal conditioning operate under the convenient, though fictive, assumption that Pavlovian conditioning concerns prediction learning whereas instrumental conditioning concerns action learning. However, it is only through Pavlovian responses that Pavlovian prediction learning is evident, and these responses can act against the instrumental interests of the subjects. This can be seen in both experimental and natural circumstances. In this paper we study the consequences of importing this competition into a reinforcement learning context, and demonstrate the resulting effects in an omission schedule and a maze navigation task. The misbehavior created by Pavlovian values can be quite debilitating; we discuss how it may be disciplined.</description>
    <dc:title>The misbehavior of value and the discipline of the will.</dc:title>

    <dc:creator>P Dayan</dc:creator>
    <dc:creator>Y Niv</dc:creator>
    <dc:creator>B Seymour</dc:creator>
    <dc:creator>ND Daw</dc:creator>
    <dc:identifier>doi:10.1016/j.neunet.2006.03.002</dc:identifier>
    <dc:source>Neural Netw, Vol. 19, No. 8. (October 2006), pp. 1153-1160.</dc:source>
    <dc:date>2007-11-16T14:12:57-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Neural Netw</prism:publicationName>
    <prism:issn>0893-6080</prism:issn>
    <prism:volume>19</prism:volume>
    <prism:number>8</prism:number>
    <prism:startingPage>1153</prism:startingPage>
    <prism:endingPage>1160</prism:endingPage>
    <prism:category>instinct</prism:category>
    <prism:category>misbehaviour</prism:category>
    <prism:category>reinforcement-learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/2444018">
    <title>Serotonin, Inhibition, and Negative Mood</title>
    <link>http://www.citeulike.org/user/awooga/article/2444018</link>
    <description>&lt;i&gt;PLoS Computational Biology, Vol. 4, No. 2. (1 February 2008), e4.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Pavlovian predictions of future aversive outcomes lead to behavioral inhibition, suppression, and withdrawal. There is considerable evidence for the involvement of serotonin in both the learning of these predictions and the inhibitory consequences that ensue, although less for a causal relationship between the two. In the context of a highly simplified model of chains of affectively charged thoughts, we interpret the combined effects of serotonin in terms of pruning a tree of possible decisions, (i.e., eliminating those choices that have low or negative expected outcomes). We show how a drop in behavioral inhibition, putatively resulting from an experimentally or psychiatrically influenced drop in serotonin, could result in unexpectedly large negative prediction errors and a significant aversive shift in reinforcement statistics. We suggest an interpretation of this finding that helps dissolve the apparent contradiction between the fact that inhibition of serotonin reuptake is the first-line treatment of depression, although serotonin itself is most strongly linked with aversive rather than appetitive outcomes and predictions.</description>
    <dc:title>Serotonin, Inhibition, and Negative Mood</dc:title>

    <dc:creator>Peter Dayan</dc:creator>
    <dc:creator>Quentin Huys</dc:creator>
    <dc:identifier>doi:10.1371/journal.pcbi.0040004</dc:identifier>
    <dc:source>PLoS Computational Biology, Vol. 4, No. 2. (1 February 2008), e4.</dc:source>
    <dc:date>2008-02-28T15:48:33-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>PLoS Computational Biology</prism:publicationName>
    <prism:volume>4</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>e4</prism:startingPage>
    <prism:category>depression</prism:category>
    <prism:category>model</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>serotonin</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/2336558">
    <title>Conditioned immunomodulation: Research needs and directions</title>
    <link>http://www.citeulike.org/user/awooga/article/2336558</link>
    <description>&lt;i&gt;Brain, Behavior, and Immunity, Vol. 17, No. 1, Supplement 1. (15 February 2003), pp. 51-57.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Considering the brief time that psychoneuroimmunology has existed as a bona fide field of research, a great deal of data has been collected in support of the proposition that homeostatic mechanisms are the product of an integrated system of defenses of which the immune system is a critical component. It is now clear that immune function is influenced by autonomic nervous systems activity and by the release of neuroendocrine substances from the pituitary. Conversely, cytokines and hormones released by an activated immune system influence neural and endocrine processes. Regulatory peptides and receptors, once confined to the brain, are expressed by both the nervous and immune systems enabling each system to monitor and modulate the activities of the other. It is hardly surprising, then, that immunologic reactivity can be influenced by stressful life experiences or by Pavlovian conditioning.</description>
    <dc:title>Conditioned immunomodulation: Research needs and directions</dc:title>

    <dc:creator>Robert Ader</dc:creator>
    <dc:identifier>doi:10.1016/S0889-1591(02)00067-3</dc:identifier>
    <dc:source>Brain, Behavior, and Immunity, Vol. 17, No. 1, Supplement 1. (15 February 2003), pp. 51-57.</dc:source>
    <dc:date>2008-02-05T18:14:36-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:publicationName>Brain, Behavior, and Immunity</prism:publicationName>
    <prism:volume>17</prism:volume>
    <prism:number>1, Supplement 1</prism:number>
    <prism:startingPage>51</prism:startingPage>
    <prism:endingPage>57</prism:endingPage>
    <prism:category>conditioning</prism:category>
    <prism:category>endocrinology</prism:category>
    <prism:category>psychoneuroimmunology</prism:category>
    <prism:category>reinforcement-learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/781429">
    <title>Extending the effects of spike-timing-dependent plasticity to behavioral timescales.</title>
    <link>http://www.citeulike.org/user/awooga/article/781429</link>
    <description>&lt;i&gt;Proc Natl Acad Sci U S A, Vol. 103, No. 23. (6 June 2006), pp. 8876-8881.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Activity-dependent modification of synaptic strengths due to spike-timing-dependent plasticity (STDP) is sensitive to correlations between pre- and postsynaptic firing over timescales of tens of milliseconds. Temporal associations typically encountered in behavioral tasks involve times on the order of seconds. To relate the learning of such temporal associations to STDP, we must account for this large discrepancy in timescales. We show that the gap between synaptic and behavioral timescales can be bridged if the stimuli being associated generate sustained responses that vary appropriately in time. Synapses between neurons that fire this way can be modified by STDP in a manner that depends on the temporal ordering of events separated by several seconds even though the underlying plasticity has a much smaller temporal window.</description>
    <dc:title>Extending the effects of spike-timing-dependent plasticity to behavioral timescales.</dc:title>

    <dc:creator>PJ Drew</dc:creator>
    <dc:creator>LF Abbott</dc:creator>
    <dc:identifier>doi:10.1073/pnas.0600676103</dc:identifier>
    <dc:source>Proc Natl Acad Sci U S A, Vol. 103, No. 23. (6 June 2006), pp. 8876-8881.</dc:source>
    <dc:date>2006-08-01T06:40:05-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Proc Natl Acad Sci U S A</prism:publicationName>
    <prism:issn>0027-8424</prism:issn>
    <prism:volume>103</prism:volume>
    <prism:number>23</prism:number>
    <prism:startingPage>8876</prism:startingPage>
    <prism:endingPage>8881</prism:endingPage>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>stdp</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/513870">
    <title>The neural basis of human error processing: reinforcement learning, dopamine, and the error-related negativity.</title>
    <link>http://www.citeulike.org/user/awooga/article/513870</link>
    <description>&lt;i&gt;Psychol Rev, Vol. 109, No. 4. (October 2002), pp. 679-709.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The authors present a unified account of 2 neural systems concerned with the development and expression of adaptive behaviors: a mesencephalic dopamine system for reinforcement learning and a &#34;generic&#34; error-processing system associated with the anterior cingulate cortex. The existence of the error-processing system has been inferred from the error-related negativity (ERN), a component of the event-related brain potential elicited when human participants commit errors in reaction-time tasks. The authors propose that the ERN is generated when a negative reinforcement learning signal is conveyed to the anterior cingulate cortex via the mesencephalic dopamine system and that this signal is used by the anterior cingulate cortex to modify performance on the task at hand. They provide support for this proposal using both computational modeling and psychophysiological experimentation.</description>
    <dc:title>The neural basis of human error processing: reinforcement learning, dopamine, and the error-related negativity.</dc:title>

    <dc:creator>CB Holroyd</dc:creator>
    <dc:creator>MG Coles</dc:creator>
    <dc:source>Psychol Rev, Vol. 109, No. 4. (October 2002), pp. 679-709.</dc:source>
    <dc:date>2006-02-20T20:56:12-00:00</dc:date>
    <prism:publicationYear>2002</prism:publicationYear>
    <prism:publicationName>Psychol Rev</prism:publicationName>
    <prism:issn>0033-295X</prism:issn>
    <prism:volume>109</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>679</prism:startingPage>
    <prism:endingPage>709</prism:endingPage>
    <prism:category>anterior-cingulate</prism:category>
    <prism:category>dopamine</prism:category>
    <prism:category>eeg</prism:category>
    <prism:category>ern</prism:category>
    <prism:category>erp</prism:category>
    <prism:category>reinforcement-learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/1115463">
    <title>Reinforcement-related brain potentials from medial frontal cortex: origins and functional significance.</title>
    <link>http://www.citeulike.org/user/awooga/article/1115463</link>
    <description>&lt;i&gt;Neurosci Biobehav Rev, Vol. 28, No. 4. (July 2004), pp. 441-448.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The development of the field of cognitive neuroscience has inspired a revival of interest in the brain mechanisms involved in the processing of rewards, punishments, and abstract performance feedback. One fruitful line of research in this area was initiated by the report of an electrophysiological brain potential in humans that was differentially sensitive to negative and positive performance feedback [J. Cogn. Neurosci. 9 (1997) 788]. Here we review current knowledge regarding the neural basis and functional significance of this feedback-evoked 'error-related negativity' (ERN). Our review is organized around a set of predictions derived from a recent theory, which holds that the ERN is associated with the arrival of a negative reward prediction error signal in anterior cingulate cortex.</description>
    <dc:title>Reinforcement-related brain potentials from medial frontal cortex: origins and functional significance.</dc:title>

    <dc:creator>S Nieuwenhuis</dc:creator>
    <dc:creator>CB Holroyd</dc:creator>
    <dc:creator>N Mol</dc:creator>
    <dc:creator>MG Coles</dc:creator>
    <dc:identifier>doi:10.1016/j.neubiorev.2004.05.003</dc:identifier>
    <dc:source>Neurosci Biobehav Rev, Vol. 28, No. 4. (July 2004), pp. 441-448.</dc:source>
    <dc:date>2007-02-21T01:25:44-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>Neurosci Biobehav Rev</prism:publicationName>
    <prism:issn>0149-7634</prism:issn>
    <prism:volume>28</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>441</prism:startingPage>
    <prism:endingPage>448</prism:endingPage>
    <prism:category>anterior-cingulate</prism:category>
    <prism:category>dopamine</prism:category>
    <prism:category>eeg</prism:category>
    <prism:category>event-related-negativity</prism:category>
    <prism:category>reinforcement-learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/112017">
    <title>Reinforcement Learning: An Introduction (Adaptive Computation and Machine Learning)</title>
    <link>http://www.citeulike.org/user/awooga/article/112017</link>
    <description>&lt;i&gt;(01 March 1998)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Reinforcement learning, one of the most active research areas in artificial intelligence, is a computational approach to learning whereby an agent tries to maximize the total amount of reward it receives when interacting with a complex, uncertain environment. In &#60;i&#62;Reinforcement Learning&#60;/i&#62;, Richard Sutton and Andrew Barto provide a clear and simple account of the key ideas and algorithms of reinforcement learning. Their discussion ranges from the history of the field's intellectual foundations to the most recent developments and applications. The only necessary mathematical background is familiarity with elementary concepts of probability.&#60;br /&#62; &#60;br /&#62; The book is divided into three parts. Part I defines the reinforcement learning problem in terms of Markov decision processes. Part II provides basic solution methods: dynamic programming, Monte Carlo methods, and temporal-difference learning. Part III presents a unified view of the solution methods and incorporates artificial neural networks, eligibility traces, and planning; the two final chapters present case studies and consider the future of reinforcement learning.</description>
    <dc:title>Reinforcement Learning: An Introduction (Adaptive Computation and Machine Learning)</dc:title>

    <dc:creator>Richard Sutton</dc:creator>
    <dc:creator>Andrew Barto</dc:creator>
    <dc:source>(01 March 1998)</dc:source>
    <dc:date>2005-03-02T20:12:18-00:00</dc:date>
    <prism:publicationYear>1998</prism:publicationYear>
    <prism:publisher>The MIT Press</prism:publisher>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>review</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/884653">
    <title>Learning in Spiking Neural Networks by Reinforcement of Stochastic Synaptic Transmission</title>
    <link>http://www.citeulike.org/user/awooga/article/884653</link>
    <description>&lt;i&gt;Neuron, Vol. 40, No. 6. (18 December 2003), pp. 1063-1073.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;It is well-known that chemical synaptic transmission is an unreliable process, but the function of such unreliability remains unclear. Here I consider the hypothesis that the randomness of synaptic transmission is harnessed by the brain for learning, in analogy to the way that genetic mutation is utilized by Darwinian evolution. This is possible if synapses are &#34;hedonistic,&#34; responding to a global reward signal by increasing their probabilities of vesicle release or failure, depending on which action immediately preceded reward. Hedonistic synapses learn by computing a stochastic approximation to the gradient of the average reward. They are compatible with synaptic dynamics such as short-term facilitation and depression and with the intricacies of dendritic integration and action potential generation. A network of hedonistic synapses can be trained to perform a desired computation by administering reward appropriately, as illustrated here through numerical simulations of integrate-and-fire model neurons.</description>
    <dc:title>Learning in Spiking Neural Networks by Reinforcement of Stochastic Synaptic Transmission</dc:title>

    <dc:creator>Sebastian Seung</dc:creator>
    <dc:identifier>doi:10.1016/S0896-6273(03)00761-X</dc:identifier>
    <dc:source>Neuron, Vol. 40, No. 6. (18 December 2003), pp. 1063-1073.</dc:source>
    <dc:date>2006-10-05T12:48:49-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:publicationName>Neuron</prism:publicationName>
    <prism:volume>40</prism:volume>
    <prism:number>6</prism:number>
    <prism:startingPage>1063</prism:startingPage>
    <prism:endingPage>1073</prism:endingPage>
    <prism:category>dopamine</prism:category>
    <prism:category>feedback</prism:category>
    <prism:category>hedonistic-neuron</prism:category>
    <prism:category>reinforcement-learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/1597243">
    <title>Retrospective and prospective coding for predicted reward in the sensory thalamus</title>
    <link>http://www.citeulike.org/user/awooga/article/1597243</link>
    <description>&lt;i&gt;Nature, Vol. 412, No. 6846. (2001), pp. 546-549.&lt;/i&gt;</description>
    <dc:title>Retrospective and prospective coding for predicted reward in the sensory thalamus</dc:title>

    <dc:creator>Yutaka Komura</dc:creator>
    <dc:creator>Ryoi Tamura</dc:creator>
    <dc:creator>Teruko Uwano</dc:creator>
    <dc:creator>Hisao Nishijo</dc:creator>
    <dc:creator>Kimitaka Kaga</dc:creator>
    <dc:creator>Taketoshi Ono</dc:creator>
    <dc:identifier>doi:10.1038/35087595</dc:identifier>
    <dc:source>Nature, Vol. 412, No. 6846. (2001), pp. 546-549.</dc:source>
    <dc:date>2007-08-27T19:35:20-00:00</dc:date>
    <prism:publicationYear>2001</prism:publicationYear>
    <prism:publicationName>Nature</prism:publicationName>
    <prism:volume>412</prism:volume>
    <prism:number>6846</prism:number>
    <prism:startingPage>546</prism:startingPage>
    <prism:endingPage>549</prism:endingPage>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>thalamus</prism:category>
    <prism:category>time</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/156154">
    <title>Multiple reward signals in the brain.</title>
    <link>http://www.citeulike.org/user/awooga/article/156154</link>
    <description>&lt;i&gt;Nat Rev Neurosci, Vol. 1, No. 3. (December 2000), pp. 199-207.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The fundamental biological importance of rewards has created an increasing interest in the neuronal processing of reward information. The suggestion that the mechanisms underlying drug addiction might involve natural reward systems has also stimulated interest. This article focuses on recent neurophysiological studies in primates that have revealed that neurons in a limited number of brain structures carry specific signals about past and future rewards. This research provides the first step towards an understanding of how rewards influence behaviour before they are received and how the brain might use reward information to control learning and goal-directed behaviour.</description>
    <dc:title>Multiple reward signals in the brain.</dc:title>

    <dc:creator>W Schultz</dc:creator>
    <dc:source>Nat Rev Neurosci, Vol. 1, No. 3. (December 2000), pp. 199-207.</dc:source>
    <dc:date>2005-04-08T21:46:07-00:00</dc:date>
    <prism:publicationYear>2000</prism:publicationYear>
    <prism:publicationName>Nat Rev Neurosci</prism:publicationName>
    <prism:issn>1471-003X</prism:issn>
    <prism:volume>1</prism:volume>
    <prism:number>3</prism:number>
    <prism:startingPage>199</prism:startingPage>
    <prism:endingPage>207</prism:endingPage>
    <prism:category>dopamine</prism:category>
    <prism:category>reinforcement-learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/1532668">
    <title>The Role of the Dorsal Striatum in Reward and Decision-Making</title>
    <link>http://www.citeulike.org/user/awooga/article/1532668</link>
    <description>&lt;i&gt;J. Neurosci., Vol. 27, No. 31. (1 August 2007), pp. 8161-8165.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Although the involvement in the striatum in the refinement and control of motor movement has long been recognized, recent description of discrete frontal corticobasal ganglia networks in a range of species has focused attention on the role particularly of the dorsal striatum in executive functions. Current evidence suggests that the dorsal striatum contributes directly to decision-making, especially to action selection and initiation, through the integration of sensorimotor, cognitive, and motivational/emotional information within specific corticostriatal circuits involving discrete regions of striatum. We review key evidence from recent studies in rodent, nonhuman primate, and human subjects. 10.1523/JNEUROSCI.1554-07.2007</description>
    <dc:title>The Role of the Dorsal Striatum in Reward and Decision-Making</dc:title>

    <dc:creator>Bernard Balleine</dc:creator>
    <dc:creator>Mauricio Delgado</dc:creator>
    <dc:creator>Okihide Hikosaka</dc:creator>
    <dc:identifier>doi:10.1523/JNEUROSCI.1554-07.2007</dc:identifier>
    <dc:source>J. Neurosci., Vol. 27, No. 31. (1 August 2007), pp. 8161-8165.</dc:source>
    <dc:date>2007-08-03T09:08:38-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>J. Neurosci.</prism:publicationName>
    <prism:volume>27</prism:volume>
    <prism:number>31</prism:number>
    <prism:startingPage>8161</prism:startingPage>
    <prism:endingPage>8165</prism:endingPage>
    <prism:category>basal-ganglia</prism:category>
    <prism:category>dopamine</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>review</prism:category>
    <prism:category>striatum</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/1464933">
    <title>Behavior, Purpose and Teleology</title>
    <link>http://www.citeulike.org/user/awooga/article/1464933</link>
    <description>&lt;i&gt;Philosophy of Science, Vol. 10, No. 1. (1943), pp. 18-24.&lt;/i&gt;</description>
    <dc:title>Behavior, Purpose and Teleology</dc:title>

    <dc:creator>Arturo Rosenblueth</dc:creator>
    <dc:creator>Norbert Wiener</dc:creator>
    <dc:creator>Julian Bigelow</dc:creator>
    <dc:source>Philosophy of Science, Vol. 10, No. 1. (1943), pp. 18-24.</dc:source>
    <dc:date>2007-07-18T13:11:21-00:00</dc:date>
    <prism:publicationYear>1943</prism:publicationYear>
    <prism:publicationName>Philosophy of Science</prism:publicationName>
    <prism:volume>10</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>18</prism:startingPage>
    <prism:endingPage>24</prism:endingPage>
    <prism:category>cybernetics</prism:category>
    <prism:category>feedback</prism:category>
    <prism:category>philosophy-of-science</prism:category>
    <prism:category>reinforcement-learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/1444549">
    <title>Metalearning and neuromodulation.</title>
    <link>http://www.citeulike.org/user/awooga/article/1444549</link>
    <description>&lt;i&gt;Neural Netw, Vol. 15, No. 4-6. (l 2002), pp. 495-506.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;This paper presents a computational theory on the roles of the ascending neuromodulatory systems from the viewpoint that they mediate the global signals that regulate the distributed learning mechanisms in the brain. Based on the review of experimental data and theoretical models, it is proposed that dopamine signals the error in reward prediction, serotonin controls the time scale of reward prediction, noradrenaline controls the randomness in action selection, and acetylcholine controls the speed of memory update. The possible interactions between those neuromodulators and the environment are predicted on the basis of computational theory of metalearning.</description>
    <dc:title>Metalearning and neuromodulation.</dc:title>

    <dc:creator>K Doya</dc:creator>
    <dc:source>Neural Netw, Vol. 15, No. 4-6. (l 2002), pp. 495-506.</dc:source>
    <dc:date>2007-07-09T15:45:31-00:00</dc:date>
    <prism:publicationYear>2002</prism:publicationYear>
    <prism:publicationName>Neural Netw</prism:publicationName>
    <prism:issn>0893-6080</prism:issn>
    <prism:volume>15</prism:volume>
    <prism:number>4-6</prism:number>
    <prism:startingPage>495</prism:startingPage>
    <prism:endingPage>506</prism:endingPage>
    <prism:category>acetylcholine</prism:category>
    <prism:category>dopamine</prism:category>
    <prism:category>neuromodulation</prism:category>
    <prism:category>noradrenaline</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>serotonin</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/556226">
    <title>Reward Timing in the Primary Visual Cortex</title>
    <link>http://www.citeulike.org/user/awooga/article/556226</link>
    <description>&lt;i&gt;Science, Vol. 311, No. 5767. (17 March 2006), pp. 1606-1609.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We discovered that when adult rats experience an association between visual stimuli and subsequent rewards, the responses of a substantial fraction of neurons in the primary visual cortex evolve from those that relate solely to the physical attributes of the stimuli to those that accurately predict the timing of reward. In addition to revealing a remarkable type of response plasticity in adult V1, these data demonstrate that reward-timing activity--a &#34;higher&#34; brain function--can occur very early in sensory-processing paths. These findings challenge the traditional interpretation of activity in the primary visual cortex.</description>
    <dc:title>Reward Timing in the Primary Visual Cortex</dc:title>

    <dc:creator>Marshall Shuler</dc:creator>
    <dc:creator>Mark Bear</dc:creator>
    <dc:identifier>doi:10.1126/science.1123513</dc:identifier>
    <dc:source>Science, Vol. 311, No. 5767. (17 March 2006), pp. 1606-1609.</dc:source>
    <dc:date>2006-03-17T19:30:52-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Science</prism:publicationName>
    <prism:volume>311</prism:volume>
    <prism:number>5767</prism:number>
    <prism:startingPage>1606</prism:startingPage>
    <prism:endingPage>1609</prism:endingPage>
    <prism:category>dopamine</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>visual-cortex</prism:category>
    <prism:category>working-memory</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/167800">
    <title>Selective D2 Receptor Actions on the Functional Circuitry of Working Memory</title>
    <link>http://www.citeulike.org/user/awooga/article/167800</link>
    <description>&lt;i&gt;Science, Vol. 303, No. 5659. (06 February 2004), pp. 853-856.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Prefrontal neurons engaged by working memory tasks express a sequence of phasic and tonic activations linked to a train of sensory, mnemonic, and response-related events. Here, we report that the dopamine D2 receptor selectively modulates the neural activities associated with memory-guided saccades in oculomotor delayed-response tasks yet has little or no effect on the persistent mnemonic-related activity, which is instead modulated by D1 receptors. This associates the D2 receptor with a specific component of working memory circuitry and fractionates the modulatory effects of D1 and D2 receptors on the neural machinery of a cognitive process.</description>
    <dc:title>Selective D2 Receptor Actions on the Functional Circuitry of Working Memory</dc:title>

    <dc:creator>Min Wang</dc:creator>
    <dc:creator>Susheel Vijayraghavan</dc:creator>
    <dc:creator>Patricia Goldman-Rakic</dc:creator>
    <dc:identifier>doi:10.1126/science.1091162</dc:identifier>
    <dc:source>Science, Vol. 303, No. 5659. (06 February 2004), pp. 853-856.</dc:source>
    <dc:date>2005-04-22T23:03:13-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>Science</prism:publicationName>
    <prism:volume>303</prism:volume>
    <prism:number>5659</prism:number>
    <prism:startingPage>853</prism:startingPage>
    <prism:endingPage>856</prism:endingPage>
    <prism:category>d2-receptor</prism:category>
    <prism:category>dopamine</prism:category>
    <prism:category>electrophysiology</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>task-switching</prism:category>
    <prism:category>working-memory</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/1398480">
    <title>Multiple Dopamine Functions at Different Time Courses</title>
    <link>http://www.citeulike.org/user/awooga/article/1398480</link>
    <description>&lt;i&gt;Annual Review of Neuroscience, Vol. 30, No. 1. (2007)&lt;/i&gt;</description>
    <dc:title>Multiple Dopamine Functions at Different Time Courses</dc:title>

    <dc:creator>W Schultz</dc:creator>
    <dc:source>Annual Review of Neuroscience, Vol. 30, No. 1. (2007)</dc:source>
    <dc:date>2007-06-19T12:19:10-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Annual Review of Neuroscience</prism:publicationName>
    <prism:volume>30</prism:volume>
    <prism:number>1</prism:number>
    <prism:category>dopamine</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>review</prism:category>
    <prism:category>substantia-nigra</prism:category>
    <prism:category>temporal-modes</prism:category>
    <prism:category>vta</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/1396873">
    <title>Human Behaviour and the Principle of Least Effort</title>
    <link>http://www.citeulike.org/user/awooga/article/1396873</link>
    <description>&lt;i&gt;&lt;/i&gt;</description>
    <dc:title>Human Behaviour and the Principle of Least Effort</dc:title>

    <dc:creator>George Zipf</dc:creator>
    <dc:date>2007-06-18T12:13:37-00:00</dc:date>
    <prism:publisher>Hafner</prism:publisher>
    <prism:category>behaviour</prism:category>
    <prism:category>power-law</prism:category>
    <prism:category>reinforcement-learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/1300442">
    <title>Functional organization of the medial frontal cortex.</title>
    <link>http://www.citeulike.org/user/awooga/article/1300442</link>
    <description>&lt;i&gt;Curr Opin Neurobiol, Vol. 17, No. 2. (April 2007), pp. 220-227.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The anterior cingulate cortex (ACC) and adjacent areas of the medial frontal cortex (MFC) have been implicated in monitoring behaviour and in detecting errors. Recent evidence, however, suggests that the ACC not only registers the occurrence of errors but also represents other aspects of the reinforcement history that are crucial for guiding behaviour. Other studies raise the possibility that dorsal MFC areas not only monitor behaviour but also actually control response selection, particularly when the task in hand is changing. Many decisions are made in social contexts and their chances of success depend on what other individuals are doing. Evaluation of other individuals is therefore crucial for effective action selection, and some ACC regions are implicated in this process.</description>
    <dc:title>Functional organization of the medial frontal cortex.</dc:title>

    <dc:creator>MF Rushworth</dc:creator>
    <dc:creator>MJ Buckley</dc:creator>
    <dc:creator>TE Behrens</dc:creator>
    <dc:creator>ME Walton</dc:creator>
    <dc:creator>DM Bannerman</dc:creator>
    <dc:identifier>doi:10.1016/j.conb.2007.03.001</dc:identifier>
    <dc:source>Curr Opin Neurobiol, Vol. 17, No. 2. (April 2007), pp. 220-227.</dc:source>
    <dc:date>2007-05-16T16:00:03-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Curr Opin Neurobiol</prism:publicationName>
    <prism:issn>0959-4388</prism:issn>
    <prism:volume>17</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>220</prism:startingPage>
    <prism:endingPage>227</prism:endingPage>
    <prism:category>anterior-cingulate</prism:category>
    <prism:category>fmri</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>review</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/482141">
    <title>Activity in prefrontal cortex during dynamic selection of action sequences</title>
    <link>http://www.citeulike.org/user/awooga/article/482141</link>
    <description>&lt;i&gt;Nature Neuroscience, Vol. 9, No. 2. (22 January 2006), pp. 276-282.&lt;/i&gt;</description>
    <dc:title>Activity in prefrontal cortex during dynamic selection of action sequences</dc:title>

    <dc:creator>Bruno Averbeck</dc:creator>
    <dc:creator>Jeong-Woo Sohn</dc:creator>
    <dc:creator>Daeyeol Lee</dc:creator>
    <dc:identifier>doi:10.1038/nn1634</dc:identifier>
    <dc:source>Nature Neuroscience, Vol. 9, No. 2. (22 January 2006), pp. 276-282.</dc:source>
    <dc:date>2006-01-27T00:56:57-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Nature Neuroscience</prism:publicationName>
    <prism:issn>1097-6256</prism:issn>
    <prism:volume>9</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>276</prism:startingPage>
    <prism:endingPage>282</prism:endingPage>
    <prism:publisher>Nature Publishing Group</prism:publisher>
    <prism:category>electrophysiology</prism:category>
    <prism:category>in-vivo</prism:category>
    <prism:category>plasticity</prism:category>
    <prism:category>prefrontal-cortex</prism:category>
    <prism:category>reinforcement-learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/590544">
    <title>A role for inhibition in shaping the temporal flow of information in prefrontal cortex</title>
    <link>http://www.citeulike.org/user/awooga/article/590544</link>
    <description>&lt;i&gt;Nat Neurosci, Vol. 5, No. 2. (February 2002), pp. 175-180.&lt;/i&gt;</description>
    <dc:title>A role for inhibition in shaping the temporal flow of information in prefrontal cortex</dc:title>

    <dc:creator>Christos Constantinidis</dc:creator>
    <dc:creator>Graham Williams</dc:creator>
    <dc:creator>Patricia Goldman-Rakic</dc:creator>
    <dc:identifier>doi:10.1038/nn799</dc:identifier>
    <dc:source>Nat Neurosci, Vol. 5, No. 2. (February 2002), pp. 175-180.</dc:source>
    <dc:date>2006-04-18T14:36:22-00:00</dc:date>
    <prism:publicationYear>2002</prism:publicationYear>
    <prism:publicationName>Nat Neurosci</prism:publicationName>
    <prism:volume>5</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>175</prism:startingPage>
    <prism:endingPage>180</prism:endingPage>
    <prism:category>electrophysiology</prism:category>
    <prism:category>gabaergic-inhibition</prism:category>
    <prism:category>interneurons</prism:category>
    <prism:category>line-attarctors</prism:category>
    <prism:category>prefrontal-cortex</prism:category>
    <prism:category>reinforcement-learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/1047089">
    <title>Solving the Distal Reward Problem through Linkage of STDP and Dopamine Signaling.</title>
    <link>http://www.citeulike.org/user/awooga/article/1047089</link>
    <description>&lt;i&gt;Cereb Cortex (13 January 2007)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;In Pavlovian and instrumental conditioning, reward typically comes seconds after reward-triggering actions, creating an explanatory conundrum known as &#34;distal reward problem&#34;: How does the brain know what firing patterns of what neurons are responsible for the reward if 1) the patterns are no longer there when the reward arrives and 2) all neurons and synapses are active during the waiting period to the reward? Here, we show how the conundrum is resolved by a model network of cortical spiking neurons with spike-timing-dependent plasticity (STDP) modulated by dopamine (DA). Although STDP is triggered by nearly coincident firing patterns on a millisecond timescale, slow kinetics of subsequent synaptic plasticity is sensitive to changes in the extracellular DA concentration during the critical period of a few seconds. Random firings during the waiting period to the reward do not affect STDP and hence make the network insensitive to the ongoing activity-the key feature that distinguishes our approach from previous theoretical studies, which implicitly assume that the network be quiet during the waiting period or that the patterns be preserved until the reward arrives. This study emphasizes the importance of precise firing patterns in brain dynamics and suggests how a global diffusive reinforcement signal in the form of extracellular DA can selectively influence the right synapses at the right time.</description>
    <dc:title>Solving the Distal Reward Problem through Linkage of STDP and Dopamine Signaling.</dc:title>

    <dc:creator>Eugene M Izhikevich</dc:creator>
    <dc:identifier>doi:10.1093/cercor/bhl152</dc:identifier>
    <dc:source>Cereb Cortex (13 January 2007)</dc:source>
    <dc:date>2007-01-17T21:02:55-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Cereb Cortex</prism:publicationName>
    <prism:issn>1047-3211</prism:issn>
    <prism:category>abstract-model</prism:category>
    <prism:category>dopamine</prism:category>
    <prism:category>prefrontal-cortex</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>stdp</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/1211921">
    <title>Synchronous, Focally Modulated beta-Band Oscillations Characterize Local Field Potential Activity in the Striatum of Awake Behaving Monkeys</title>
    <link>http://www.citeulike.org/user/awooga/article/1211921</link>
    <description>&lt;i&gt;J. Neurosci., Vol. 23, No. 37. (17 December 2003), pp. 11741-11752.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Synchronous oscillatory activity has been observed in a range of neural networks from invertebrate nervous systems to the human frontal cortex. In humans and other primates, sensorimotor regions of the neocortex exhibit synchronous oscillations in the beta-frequency band ([~]15-30 Hz), and these are also prominent in the cerebellum, a brainstem sensorimotor region. However, recordings in the basal ganglia have suggested that such beta-band oscillations are not normally a primary feature of these structures. Instead, they become a dominant feature of neural activity in the basal ganglia in Parkinson's disease and in parkinsonian states induced by dopamine depletion in experimental animals. Here we demonstrate that when multiple electrodes are used to record local field potentials, 10-25 Hz oscillations can be readily detected in the striatum of normal macaque monkeys. These normally occurring oscillations are highly synchronous across large regions of the striatum. Furthermore, they are subject to dynamic modulation when monkeys perform a simple motor task to earn rewards. In the striatal region representing oculomotor activity, we found that small focal zones could pop in and out of synchrony as the monkeys made saccadic eye movements, suggesting that the broadly synchronous oscillatory activity interfaces with modular spatiotemporal patterns of task-related activity. We suggest that the background beta-band oscillations in the striatum could help to focus action-selection network functions of cortico-basal ganglia circuits.</description>
    <dc:title>Synchronous, Focally Modulated beta-Band Oscillations Characterize Local Field Potential Activity in the Striatum of Awake Behaving Monkeys</dc:title>

    <dc:creator>Richard Courtemanche</dc:creator>
    <dc:creator>Naotaka Fujii</dc:creator>
    <dc:creator>Ann Graybiel</dc:creator>
    <dc:source>J. Neurosci., Vol. 23, No. 37. (17 December 2003), pp. 11741-11752.</dc:source>
    <dc:date>2007-04-06T13:30:55-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:publicationName>J. Neurosci.</prism:publicationName>
    <prism:volume>23</prism:volume>
    <prism:number>37</prism:number>
    <prism:startingPage>11741</prism:startingPage>
    <prism:endingPage>11752</prism:endingPage>
    <prism:category>local-field-potential</prism:category>
    <prism:category>oscillations</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>striatum</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/963346">
    <title>Propagating waves mediate information transfer in the motor cortex</title>
    <link>http://www.citeulike.org/user/awooga/article/963346</link>
    <description>&lt;i&gt;Nature Neuroscience, Vol. 9, No. 12. (19 November 2006), pp. 1549-1557.&lt;/i&gt;</description>
    <dc:title>Propagating waves mediate information transfer in the motor cortex</dc:title>

    <dc:creator>Doug Rubino</dc:creator>
    <dc:creator>Kay Robbins</dc:creator>
    <dc:creator>Nicholas Hatsopoulos</dc:creator>
    <dc:identifier>doi:10.1038/nn1802</dc:identifier>
    <dc:source>Nature Neuroscience, Vol. 9, No. 12. (19 November 2006), pp. 1549-1557.</dc:source>
    <dc:date>2006-11-27T14:33:18-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Nature Neuroscience</prism:publicationName>
    <prism:issn>1097-6256</prism:issn>
    <prism:volume>9</prism:volume>
    <prism:number>12</prism:number>
    <prism:startingPage>1549</prism:startingPage>
    <prism:endingPage>1557</prism:endingPage>
    <prism:publisher>Nature Publishing Group</prism:publisher>
    <prism:category>cortex</prism:category>
    <prism:category>local-field-potential</prism:category>
    <prism:category>oscillations</prism:category>
    <prism:category>reinforcement-learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/1209970">
    <title>Timecourse of object-related neural activity in the primate prefrontal cortex during a short-term memory task.</title>
    <link>http://www.citeulike.org/user/awooga/article/1209970</link>
    <description>&lt;i&gt;Eur J Neurosci, Vol. 15, No. 7. (April 2002), pp. 1244-1254.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We studied the timecourse of neural activity in the primate (Macacca mulatta) prefrontal (PF) cortex during an object delayed-matching-to-sample (DMS) task. To assess the effects of experience on this timecourse, we conducted the task using both novel and highly familiar objects. In addition, noise patterns containing no task-relevant information were used as samples on some trials. Comparison of average PF ensemble activity relative to baseline activity generated by objects and noise patterns revealed three distinct activity periods. (i) Sample onset elicited a transient sensory visual response. In this sensory period, novel objects elicited stronger average ensemble activity than both familiar objects and noise patterns. (ii) An intermediate period of elevated activity followed, which began before sample offset, and continued well into the delay period. In the intermediate period, activity was elevated for noise patterns and novel objects, but near baseline for familiar objects. (iii) Finally, after average ensemble activity reached baseline activity at the end of the intermediate period, a reactivation period occurred late in the delay. Experience had little effect during reactivation, where activity was elevated for both novel and familiar objects compared to noise patterns. We show that the ensemble average resembles the activity timecourse of many single prefrontal neurons. These results suggest that PF delay activity does not merely maintain recent sensory input, but is subject to more complex experience-dependent dynamics. This has implications for how delay activity is generated and maintained.</description>
    <dc:title>Timecourse of object-related neural activity in the primate prefrontal cortex during a short-term memory task.</dc:title>

    <dc:creator>G Rainer</dc:creator>
    <dc:creator>EK Miller</dc:creator>
    <dc:source>Eur J Neurosci, Vol. 15, No. 7. (April 2002), pp. 1244-1254.</dc:source>
    <dc:date>2007-04-05T11:16:32-00:00</dc:date>
    <prism:publicationYear>2002</prism:publicationYear>
    <prism:publicationName>Eur J Neurosci</prism:publicationName>
    <prism:issn>0953-816X</prism:issn>
    <prism:volume>15</prism:volume>
    <prism:number>7</prism:number>
    <prism:startingPage>1244</prism:startingPage>
    <prism:endingPage>1254</prism:endingPage>
    <prism:category>behavioural-test</prism:category>
    <prism:category>prefrontal-cortex</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>working-memory</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/1206075">
    <title>Behavioral dopamine signals</title>
    <link>http://www.citeulike.org/user/awooga/article/1206075</link>
    <description>&lt;i&gt;Trends in Neurosciences, Vol. In Press, Corrected Proof&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Lesioning and psychopharmacological studies suggest a wide range of behavioral functions for ascending midbrain dopaminergic systems. However, electrophysiological and neurochemical studies during specific behavioral tasks demonstrate a more restricted spectrum of dopamine-mediated changes. Substantial increases in dopamine-mediated activity, as measured by electrophysiology or voltammetry, are related to rewards and reward-predicting stimuli. A somewhat slower, distinct electrophysiological response encodes the uncertainty associated with rewards. Aversive events produce different, mostly slower, electrophysiological dopamine responses that consist predominantly of depressions. Additionally, more modest dopamine concentration fluctuations, related to punishment and movement, are seen at 200-18 000 times longer time courses using voltammetry and microdialysis in vivo. Using these responses, dopamine neurotransmission provides differential and heterogeneous information to subcortical and cortical brain structures about essential outcome components for approach behavior, learning and economic decision-making.</description>
    <dc:title>Behavioral dopamine signals</dc:title>

    <dc:creator>Wolfram Schultz</dc:creator>
    <dc:identifier>doi:10.1016/j.tins.2007.03.007</dc:identifier>
    <dc:source>Trends in Neurosciences, Vol. In Press, Corrected Proof</dc:source>
    <dc:date>2007-04-04T14:07:26-00:00</dc:date>
    <prism:publicationName>Trends in Neurosciences</prism:publicationName>
    <prism:volume>In Press, Corrected Proof</prism:volume>
    <prism:category>dopamine</prism:category>
    <prism:category>microdialysis</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>voltammetry</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/1205798">
    <title>Adaptation of Prefrontal Cortical Firing Patterns and Their Fidelity to Changes in Action-Reward Contingencies</title>
    <link>http://www.citeulike.org/user/awooga/article/1205798</link>
    <description>&lt;i&gt;J. Neurosci., Vol. 27, No. 13. (28 March 2007), pp. 3548-3559.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Animals adapt action-selection policies when the relationship between possible actions and associated outcomes changes. Prefrontal cortical neurons vary their discharge patterns depending on action choice and rewards received and undoubtedly play a pivotal role in maintaining and adapting action policies. Here, we recorded neurons from the medial precentral subregion of mouse prefrontal cortex to examine neural substrates of goal-directed behavior. Discharge patterns were recorded after animals developed stable action-selection policies, wherein four possible action sequences were invariably related to different reward magnitudes and during adaptation to changes in the action-reward contingencies. During the adaptation period, when the same action sequence resulted in different reward magnitudes, many neurons (38%) exhibited significantly different discharge patterns for identical action sequences, well before reaching the reward site. In addition, trial-to-trial reliability of ensemble pattern production leading up to reward was found to vary both positively and negatively with increases and decreases in reward magnitude, respectively. Pairwise analyses of simultaneously recorded neurons revealed that decreased reliability in part reflected fluctuations between different ensemble activity patterns as opposed to within-pattern variability. Increases in reliability were related to an increased probability of both selecting highly rewarding actions and completing such actions without pause or reversal, whereas decreases in reliability were associated with the opposite pattern. Thus, we suggest that both the spatiotemporal pattern and fidelity of prefrontal cortical discharge are impacted by action-outcome relationships and that each of these features serve to adapt action choices and maintain behaviors leading to reward. 10.1523/JNEUROSCI.3604-06.2007</description>
    <dc:title>Adaptation of Prefrontal Cortical Firing Patterns and Their Fidelity to Changes in Action-Reward Contingencies</dc:title>

    <dc:creator>William Kargo</dc:creator>
    <dc:creator>Botond Szatmary</dc:creator>
    <dc:creator>Douglas Nitz</dc:creator>
    <dc:identifier>doi:10.1523/JNEUROSCI.3604</dc:identifier>
    <dc:source>J. Neurosci., Vol. 27, No. 13. (28 March 2007), pp. 3548-3559.</dc:source>
    <dc:date>2007-04-04T11:35:28-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>J. Neurosci.</prism:publicationName>
    <prism:volume>27</prism:volume>
    <prism:number>13</prism:number>
    <prism:startingPage>3548</prism:startingPage>
    <prism:endingPage>3559</prism:endingPage>
    <prism:category>dopamine</prism:category>
    <prism:category>electrophsiology</prism:category>
    <prism:category>prefrontal-cortex</prism:category>
    <prism:category>reinforcement-learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/467922">
    <title>Predictive neural coding of reward preference involves dissociable responses in human ventral midbrain and ventral striatum.</title>
    <link>http://www.citeulike.org/user/awooga/article/467922</link>
    <description>&lt;i&gt;Neuron, Vol. 49, No. 1. (5 January 2006), pp. 157-166.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Food preferences are acquired through experience and can exert strong influence on choice behavior. In order to choose which food to consume, it is necessary to maintain a predictive representation of the subjective value of the associated food stimulus. Here, we explore the neural mechanisms by which such predictive representations are learned through classical conditioning. Human subjects were scanned using fMRI while learning associations between arbitrary visual stimuli and subsequent delivery of one of five different food flavors. Using a temporal difference algorithm to model learning, we found predictive responses in the ventral midbrain and a part of ventral striatum (ventral putamen) that were related directly to subjects' actual behavioral preferences. These brain structures demonstrated divergent response profiles, with the ventral midbrain showing a linear response profile with preference, and the ventral striatum a bivalent response. These results provide insight into the neural mechanisms underlying human preference behavior.</description>
    <dc:title>Predictive neural coding of reward preference involves dissociable responses in human ventral midbrain and ventral striatum.</dc:title>

    <dc:creator>JP O'doherty</dc:creator>
    <dc:creator>TW Buchanan</dc:creator>
    <dc:creator>B Seymour</dc:creator>
    <dc:creator>RJ Dolan</dc:creator>
    <dc:identifier>doi:10.1016/j.neuron.2005.11.014</dc:identifier>
    <dc:source>Neuron, Vol. 49, No. 1. (5 January 2006), pp. 157-166.</dc:source>
    <dc:date>2006-01-18T01:28:49-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Neuron</prism:publicationName>
    <prism:issn>0896-6273</prism:issn>
    <prism:volume>49</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>157</prism:startingPage>
    <prism:endingPage>166</prism:endingPage>
    <prism:category>dopamine</prism:category>
    <prism:category>fmri</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>striatum</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/1106726">
    <title>Modeling functions of striatal dopamine modulation in learning and planning.</title>
    <link>http://www.citeulike.org/user/awooga/article/1106726</link>
    <description>&lt;i&gt;Neuroscience, Vol. 103, No. 1. (2001), pp. 65-85.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The activity of midbrain dopamine neurons is strikingly similar to the reward prediction error of temporal difference reinforcement learning models. Experimental evidence and simulation studies suggest that dopamine neuron activity serves as an effective reinforcement signal for learning of sensorimotor associations in striatal matrisomes. In the current study, we simulate dopamine neuron activity with the extended temporal difference model of Pavlovian learning and examine the influences of this signal on medium spiny neurons in striatal matrisomes. The modeled influences include transient membrane effects of dopamine D(1) receptor activation, dopamine-dependent long-term adaptations of corticostriatal transmission, and effects of dopamine on rhythmic fluctuations of the membrane potential between an elevated &#34;up-state&#34; and a hyperpolarized &#34;down-state&#34;. The most dominant activity in the striatal matrisomes is assumed to elicit behaviors via projections from the basal ganglia to the thalamus and the cortex. This &#34;standard model&#34; performs successfully when tested for sensorimotor learning and goal-directed behavior (planning). To investigate the contributions of our model assumptions to learning and planning, we test the performance of several model variants that lack one of these mechanisms. These simulations show that the adaptation of the dopamine-like signal is necessary for sensorimotor learning and planning. Sensorimotor learning requires dopamine-dependent long-term adaptation of corticostriatal transmission. Lack of dopamine-like novelty responses decreases the number of exploratory acts, which impairs planning capabilities. The model loses its planning capabilities if the dopamine-like signal is simulated with the original temporal difference model, because the original temporal difference model does not form novel associative chains. Transient membrane effects of the dopamine-like signal on striatal firing substantially shorten the reaction time in the planning task. The capability for planning is improved by influences of dopamine on the durations of membrane potential fluctuations and by manipulations that prolong the reaction time of the model. These results suggest that responses of dopamine neurons to conditioned stimuli contribute to sensorimotor reward learning, novelty responses of dopamine neurons stimulate exploration, and transient dopamine membrane effects are important for planning.</description>
    <dc:title>Modeling functions of striatal dopamine modulation in learning and planning.</dc:title>

    <dc:creator>RE Suri</dc:creator>
    <dc:creator>J Bargas</dc:creator>
    <dc:creator>MA Arbib</dc:creator>
    <dc:source>Neuroscience, Vol. 103, No. 1. (2001), pp. 65-85.</dc:source>
    <dc:date>2007-02-14T11:47:59-00:00</dc:date>
    <prism:publicationYear>2001</prism:publicationYear>
    <prism:publicationName>Neuroscience</prism:publicationName>
    <prism:issn>0306-4522</prism:issn>
    <prism:volume>103</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>65</prism:startingPage>
    <prism:endingPage>85</prism:endingPage>
    <prism:category>dopamine</prism:category>
    <prism:category>model</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>striatum</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/1084672">
    <title>Dopamine, uncertainty and TD learning.</title>
    <link>http://www.citeulike.org/user/awooga/article/1084672</link>
    <description>&lt;i&gt;Behav Brain Funct, Vol. 1 (4 May 2005)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Substantial evidence suggests that the phasic activities of dopaminergic neurons in the primate midbrain represent a temporal difference (TD) error in predictions of future reward, with increases above and decreases below baseline consequent on positive and negative prediction errors, respectively. However, dopamine cells have very low baseline activity, which implies that the representation of these two sorts of error is asymmetric. We explore the implications of this seemingly innocuous asymmetry for the interpretation of dopaminergic firing patterns in experiments with probabilistic rewards which bring about persistent prediction errors. In particular, we show that when averaging the non-stationary prediction errors across trials, a ramping in the activity of the dopamine neurons should be apparent, whose magnitude is dependent on the learning rate. This exact phenomenon was observed in a recent experiment, though being interpreted there in antipodal terms as a within-trial encoding of uncertainty.</description>
    <dc:title>Dopamine, uncertainty and TD learning.</dc:title>

    <dc:creator>Y Niv</dc:creator>
    <dc:creator>MO Duff</dc:creator>
    <dc:creator>P Dayan</dc:creator>
    <dc:identifier>doi:10.1186/1744-9081-1-6</dc:identifier>
    <dc:source>Behav Brain Funct, Vol. 1 (4 May 2005)</dc:source>
    <dc:date>2007-02-02T17:22:31-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:publicationName>Behav Brain Funct</prism:publicationName>
    <prism:issn>1744-9081</prism:issn>
    <prism:volume>1</prism:volume>
    <prism:category>dopamine</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>uncertainty</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/1074689">
    <title>Temporal Difference Model Reproduces Anticipatory Neural Activity</title>
    <link>http://www.citeulike.org/user/awooga/article/1074689</link>
    <description>&lt;i&gt;Neural Comp., Vol. 13, No. 4. (1 April 2001), pp. 841-862.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Anticipatory neural activity preceding behaviorally important events has been reported in cortex, striatum, and midbrain dopamine neurons. Whereas dopamine neurons are phasically activated by reward-predictive stimuli, anticipatory activity of cortical and striatal neurons is increased during delay periods before important events. Characteristics of dopamine neuron activity resemble those of the prediction error signal of the temporal difference (TD) model of Pavlovian learning (Sutton &#38; Barto, 1990). This study demonstrates that the prediction signal of the TD model reproduces characteristics of cortical and striatal anticipatory neural activity. This finding suggests that tonic anticipatory activities may reflect prediction signals that are involved in the processing of dopamine neuron activity.</description>
    <dc:title>Temporal Difference Model Reproduces Anticipatory Neural Activity</dc:title>

    <dc:creator>Roland Suri</dc:creator>
    <dc:creator>Wolfram Schultz</dc:creator>
    <dc:source>Neural Comp., Vol. 13, No. 4. (1 April 2001), pp. 841-862.</dc:source>
    <dc:date>2007-01-29T17:30:49-00:00</dc:date>
    <prism:publicationYear>2001</prism:publicationYear>
    <prism:publicationName>Neural Comp.</prism:publicationName>
    <prism:volume>13</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>841</prism:startingPage>
    <prism:endingPage>862</prism:endingPage>
    <prism:category>dopamine</prism:category>
    <prism:category>prediction</prism:category>
    <prism:category>reinforcement-learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/546145">
    <title>Reward, motivation, and reinforcement learning.</title>
    <link>http://www.citeulike.org/user/awooga/article/546145</link>
    <description>&lt;i&gt;Neuron, Vol. 36, No. 2. (10 October 2002), pp. 285-298.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;There is substantial evidence that dopamine is involved in reward learning and appetitive conditioning. However, the major reinforcement learning-based theoretical models of classical conditioning (crudely, prediction learning) are actually based on rules designed to explain instrumental conditioning (action learning). Extensive anatomical, pharmacological, and psychological data, particularly concerning the impact of motivational manipulations, show that these models are unreasonable. We review the data and consider the involvement of a rich collection of different neural systems in various aspects of these forms of conditioning. Dopamine plays a pivotal, but complicated, role.</description>
    <dc:title>Reward, motivation, and reinforcement learning.</dc:title>

    <dc:creator>P Dayan</dc:creator>
    <dc:creator>BW Balleine</dc:creator>
    <dc:source>Neuron, Vol. 36, No. 2. (10 October 2002), pp. 285-298.</dc:source>
    <dc:date>2006-03-10T14:10:15-00:00</dc:date>
    <prism:publicationYear>2002</prism:publicationYear>
    <prism:publicationName>Neuron</prism:publicationName>
    <prism:issn>0896-6273</prism:issn>
    <prism:volume>36</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>285</prism:startingPage>
    <prism:endingPage>298</prism:endingPage>
    <prism:category>conditioning</prism:category>
    <prism:category>dopamine</prism:category>
    <prism:category>model</prism:category>
    <prism:category>motivation</prism:category>
    <prism:category>nucleus-accumbens</prism:category>
    <prism:category>reinforcement-learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/670257">
    <title>Representation and Timing in Theories of the Dopamine System</title>
    <link>http://www.citeulike.org/user/awooga/article/670257</link>
    <description>&lt;i&gt;Neural Comp., Vol. 18, No. 7. (1 July 2006), pp. 1637-1677.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Although the responses of dopamine neurons in the primate midbrain are well characterized as carrying a temporal difference (TD) error signal for reward prediction, existing theories do not offer a credible account of how the brain keeps track of past sensory events that may be relevant to predicting future reward. Empirically, these shortcomings of previous theories are particularly evident in their account of experiments in which animals were exposed to variation in the timing of events. The original theories mispredicted the results of such experiments due to their use of a representational device called a tapped delay line. Here we propose that a richer understanding of history representation and a better account of these experiments can be given by considering TD algorithms for a formal setting that incorporates two features not originally considered in theories of the dopaminergic response: partial observability (a distinction between the animal's sensory experience and the true underlying state of the world) and semi-Markov dynamics (an explicit account of variation in the intervals between events). The new theory situates the dopaminergic system in a richer functional and anatomical context, since it assumes (in accord with recent computational theories of cortex) that problems of partial observability and stimulus history are solved in sensory cortex using statistical modeling and inference and that the TD system predicts reward using the results of this inference rather than raw sensory data. It also accounts for a range of experimental data, including the experiments involving programmed temporal variability and other previously unmodeled dopaminergic response phenomena, which we suggest are related to subjective noise in animals' interval timing. Finally, it offers new experimental predictions and a rich theoretical framework for designing future experiments.</description>
    <dc:title>Representation and Timing in Theories of the Dopamine System</dc:title>

    <dc:creator>Nathaniel Daw</dc:creator>
    <dc:creator>Aaron Courville</dc:creator>
    <dc:creator>David Tourtezky</dc:creator>
    <dc:source>Neural Comp., Vol. 18, No. 7. (1 July 2006), pp. 1637-1677.</dc:source>
    <dc:date>2006-05-25T16:20:43-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Neural Comp.</prism:publicationName>
    <prism:volume>18</prism:volume>
    <prism:number>7</prism:number>
    <prism:startingPage>1637</prism:startingPage>
    <prism:endingPage>1677</prism:endingPage>
    <prism:category>dopamine</prism:category>
    <prism:category>model</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>uncertainty</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/1044619">
    <title>Evidence that the delay-period activity of dopamine neurons corresponds to reward uncertainty rather than backpropagating TD errors.</title>
    <link>http://www.citeulike.org/user/awooga/article/1044619</link>
    <description>&lt;i&gt;Behav Brain Funct, Vol. 1, No. 1. (15 June 2005)&lt;/i&gt;</description>
    <dc:title>Evidence that the delay-period activity of dopamine neurons corresponds to reward uncertainty rather than backpropagating TD errors.</dc:title>

    <dc:creator>CD Fiorillo</dc:creator>
    <dc:creator>PN Tobler</dc:creator>
    <dc:creator>W Schultz</dc:creator>
    <dc:identifier>doi:10.1186/1744-9081-1-7</dc:identifier>
    <dc:source>Behav Brain Funct, Vol. 1, No. 1. (15 June 2005)</dc:source>
    <dc:date>2007-01-16T15:53:09-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:publicationName>Behav Brain Funct</prism:publicationName>
    <prism:issn>1744-9081</prism:issn>
    <prism:volume>1</prism:volume>
    <prism:number>1</prism:number>
    <prism:category>dopamine</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>reward-uncertainty</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/121957">
    <title>Discrete coding of reward probability and uncertainty by dopamine neurons.</title>
    <link>http://www.citeulike.org/user/awooga/article/121957</link>
    <description>&lt;i&gt;Science, Vol. 299, No. 5614. (21 March 2003), pp. 1898-1902.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Uncertainty is critical in the measure of information and in assessing the accuracy of predictions. It is determined by probability P, being maximal at P = 0.5 and decreasing at higher and lower probabilities. Using distinct stimuli to indicate the probability of reward, we found that the phasic activation of dopamine neurons varied monotonically across the full range of probabilities, supporting past claims that this response codes the discrepancy between predicted and actual reward. In contrast, a previously unobserved response covaried with uncertainty and consisted of a gradual increase in activity until the potential time of reward. The coding of uncertainty suggests a possible role for dopamine signals in attention-based learning and risk-taking behavior.</description>
    <dc:title>Discrete coding of reward probability and uncertainty by dopamine neurons.</dc:title>

    <dc:creator>CD Fiorillo</dc:creator>
    <dc:creator>PN Tobler</dc:creator>
    <dc:creator>W Schultz</dc:creator>
    <dc:identifier>doi:10.1126/science.1077349</dc:identifier>
    <dc:source>Science, Vol. 299, No. 5614. (21 March 2003), pp. 1898-1902.</dc:source>
    <dc:date>2005-03-11T16:19:28-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:publicationName>Science</prism:publicationName>
    <prism:issn>1095-9203</prism:issn>
    <prism:volume>299</prism:volume>
    <prism:number>5614</prism:number>
    <prism:startingPage>1898</prism:startingPage>
    <prism:endingPage>1902</prism:endingPage>
    <prism:category>dopamine</prism:category>
    <prism:category>electrophysiology</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>reward-uncertainty</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/121955">
    <title>A neural substrate of prediction and reward.</title>
    <link>http://www.citeulike.org/user/awooga/article/121955</link>
    <description>&lt;i&gt;Science, Vol. 275, No. 5306. (14 March 1997), pp. 1593-1599.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The capacity to predict future events permits a creature to detect, model, and manipulate the causal structure of its interactions with its environment. Behavioral experiments suggest that learning is driven by changes in the expectations about future salient events such as rewards and punishments. Physiological work has recently complemented these studies by identifying dopaminergic neurons in the primate whose fluctuating output apparently signals changes or errors in the predictions of future salient and rewarding events. Taken together, these findings can be understood through quantitative theories of adaptive optimizing control.</description>
    <dc:title>A neural substrate of prediction and reward.</dc:title>

    <dc:creator>W Schultz</dc:creator>
    <dc:creator>P Dayan</dc:creator>
    <dc:creator>PR Montague</dc:creator>
    <dc:source>Science, Vol. 275, No. 5306. (14 March 1997), pp. 1593-1599.</dc:source>
    <dc:date>2005-03-11T16:16:49-00:00</dc:date>
    <prism:publicationYear>1997</prism:publicationYear>
    <prism:publicationName>Science</prism:publicationName>
    <prism:issn>0036-8075</prism:issn>
    <prism:volume>275</prism:volume>
    <prism:number>5306</prism:number>
    <prism:startingPage>1593</prism:startingPage>
    <prism:endingPage>1599</prism:endingPage>
    <prism:category>dopamine</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>review</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/121953">
    <title>Predictive reward signal of dopamine neurons.</title>
    <link>http://www.citeulike.org/user/awooga/article/121953</link>
    <description>&lt;i&gt;J Neurophysiol, Vol. 80, No. 1. (July 1998), pp. 1-27.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The effects of lesions, receptor blocking, electrical self-stimulation, and drugs of abuse suggest that midbrain dopamine systems are involved in processing reward information and learning approach behavior. Most dopamine neurons show phasic activations after primary liquid and food rewards and conditioned, reward-predicting visual and auditory stimuli. They show biphasic, activation-depression responses after stimuli that resemble reward-predicting stimuli or are novel or particularly salient. However, only few phasic activations follow aversive stimuli. Thus dopamine neurons label environmental stimuli with appetitive value, predict and detect rewards and signal alerting and motivating events. By failing to discriminate between different rewards, dopamine neurons appear to emit an alerting message about the surprising presence or absence of rewards. All responses to rewards and reward-predicting stimuli depend on event predictability. Dopamine neurons are activated by rewarding events that are better than predicted, remain uninfluenced by events that are as good as predicted, and are depressed by events that are worse than predicted. By signaling rewards according to a prediction error, dopamine responses have the formal characteristics of a teaching signal postulated by reinforcement learning theories. Dopamine responses transfer during learning from primary rewards to reward-predicting stimuli. This may contribute to neuronal mechanisms underlying the retrograde action of rewards, one of the main puzzles in reinforcement learning. The impulse response releases a short pulse of dopamine onto many dendrites, thus broadcasting a rather global reinforcement signal to postsynaptic neurons. This signal may improve approach behavior by providing advance reward information before the behavior occurs, and may contribute to learning by modifying synaptic transmission. The dopamine reward signal is supplemented by activity in neurons in striatum, frontal cortex, and amygdala, which process specific reward information but do not emit a global reward prediction error signal. A cooperation between the different reward signals may assure the use of specific rewards for selectively reinforcing behaviors. Among the other projection systems, noradrenaline neurons predominantly serve attentional mechanisms and nucleus basalis neurons code rewards heterogeneously. Cerebellar climbing fibers signal errors in motor performance or errors in the prediction of aversive events to cerebellar Purkinje cells. Most deficits following dopamine-depleting lesions are not easily explained by a defective reward signal but may reflect the absence of a general enabling function of tonic levels of extracellular dopamine. Thus dopamine systems may have two functions, the phasic transmission of reward information and the tonic enabling of postsynaptic neurons.</description>
    <dc:title>Predictive reward signal of dopamine neurons.</dc:title>

    <dc:creator>W Schultz</dc:creator>
    <dc:source>J Neurophysiol, Vol. 80, No. 1. (July 1998), pp. 1-27.</dc:source>
    <dc:date>2005-03-11T16:15:59-00:00</dc:date>
    <prism:publicationYear>1998</prism:publicationYear>
    <prism:publicationName>J Neurophysiol</prism:publicationName>
    <prism:issn>0022-3077</prism:issn>
    <prism:volume>80</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>1</prism:startingPage>
    <prism:endingPage>27</prism:endingPage>
    <prism:category>dopamine</prism:category>
    <prism:category>noradrenaline</prism:category>
    <prism:category>novelty</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>review</prism:category>
    <prism:category>reward</prism:category>
    <prism:category>saliency</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/1014412">
    <title>Separate neural substrates for skill learning and performance in the ventral and dorsal striatum</title>
    <link>http://www.citeulike.org/user/awooga/article/1014412</link>
    <description>&lt;i&gt;Nature Neuroscience, Vol. 10, No. 1. (24 December 2006), pp. 126-131.&lt;/i&gt;</description>
    <dc:title>Separate neural substrates for skill learning and performance in the ventral and dorsal striatum</dc:title>

    <dc:creator>Hisham Atallah</dc:creator>
    <dc:creator>Dan Lopez-Paniagua</dc:creator>
    <dc:creator>Jerry Rudy</dc:creator>
    <dc:creator>Randall O'Reilly</dc:creator>
    <dc:identifier>doi:10.1038/nn1817</dc:identifier>
    <dc:source>Nature Neuroscience, Vol. 10, No. 1. (24 December 2006), pp. 126-131.</dc:source>
    <dc:date>2006-12-26T08:29:05-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Nature Neuroscience</prism:publicationName>
    <prism:issn>1097-6256</prism:issn>
    <prism:volume>10</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>126</prism:startingPage>
    <prism:endingPage>131</prism:endingPage>
    <prism:publisher>Nature Publishing Group</prism:publisher>
    <prism:category>basal-ganglia</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>striatum</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/973018">
    <title>Mesocortical dopamine modulation of executive functions: beyond working memory.</title>
    <link>http://www.citeulike.org/user/awooga/article/973018</link>
    <description>&lt;i&gt;Psychopharmacology (Berl), Vol. 188, No. 4. (November 2006), pp. 567-585.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;RATIONALE: Dopamine (DA) neurotransmission in the prefrontal cortex (PFC) is known to play an essential role in mediating executive functions such as the working memory. DA exerts these effects by acting on D(1) receptors because blockade or stimulation of these receptors in the PFC can impair performance on delayed response tasks. However, comparatively less is known about dopaminergic mechanisms that mediate other executive functions regulated by the PFC. Furthermore, the functional importance of other DA receptor subtypes that reside on PFC neurons (D(2) and D(4)) is unclear. OBJECTIVES: This review will summarize previous findings and previously unpublished data addressing the contribution of PFC DA to higher-order cognition. We will compare the DA receptor mechanisms, which regulate executive functions such as working memory, behavioral flexibility, and decision-making. RESULTS AND CONCLUSIONS: Whereas PFC D(1) receptor activity is of primary importance in working memory, D(1) and D(2) receptors act in a cooperative manner to facilitate behavioral flexibility. We note that the principle of the &#34;inverted U-shaped&#34; function of D(1) receptor activity mediating working memory does not necessarily apply to other PFC functions. DA in different subregions of the PFC also mediates decision-making assessed with delay discounting or effort-based procedures, and we report that D(1), D(2), and D(4) receptors in the medial PFC contribute to decision-making when animals must bias the direction of behavior to avoid aversive stimuli, assessed with a conditioned punishment procedure. Thus, mesocortical DA modulation of distinct executive functions is subserved by dissociable profiles of DA receptor activity in the PFC.</description>
    <dc:title>Mesocortical dopamine modulation of executive functions: beyond working memory.</dc:title>

    <dc:creator>SB Floresco</dc:creator>
    <dc:creator>O Magyar</dc:creator>
    <dc:identifier>doi:10.1007/s00213-006-0404-5</dc:identifier>
    <dc:source>Psychopharmacology (Berl), Vol. 188, No. 4. (November 2006), pp. 567-585.</dc:source>
    <dc:date>2006-12-04T12:36:22-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Psychopharmacology (Berl)</prism:publicationName>
    <prism:issn>0033-3158</prism:issn>
    <prism:volume>188</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>567</prism:startingPage>
    <prism:endingPage>585</prism:endingPage>
    <prism:category>basal-ganglia</prism:category>
    <prism:category>delay-discounting</prism:category>
    <prism:category>dopamine</prism:category>
    <prism:category>prefrontal-cortex</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>review</prism:category>
    <prism:category>working-memory</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/355573">
    <title>Activity of striatal neurons reflects dynamic encoding and recoding of procedural memories</title>
    <link>http://www.citeulike.org/user/awooga/article/355573</link>
    <description>&lt;i&gt;Nature, Vol. 437, No. 7062., pp. 1158-1161.&lt;/i&gt;</description>
    <dc:title>Activity of striatal neurons reflects dynamic encoding and recoding of procedural memories</dc:title>

    <dc:creator>Terra Barnes</dc:creator>
    <dc:creator>Yasuo Kubota</dc:creator>
    <dc:creator>Dan Hu</dc:creator>
    <dc:creator>Dezhe Jin</dc:creator>
    <dc:creator>Ann Graybiel</dc:creator>
    <dc:identifier>doi:10.1038/nature04053</dc:identifier>
    <dc:source>Nature, Vol. 437, No. 7062., pp. 1158-1161.</dc:source>
    <dc:date>2005-10-20T02:51:21-00:00</dc:date>
    <prism:publicationName>Nature</prism:publicationName>
    <prism:issn>0028-0836</prism:issn>
    <prism:volume>437</prism:volume>
    <prism:number>7062</prism:number>
    <prism:startingPage>1158</prism:startingPage>
    <prism:endingPage>1161</prism:endingPage>
    <prism:publisher>Nature Publishing Group</prism:publisher>
    <prism:category>basal-ganglia</prism:category>
    <prism:category>prefrontal-cortex</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>striatum</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/963570">
    <title>A neural network model with dopamine-like reinforcement signal that learns a spatial delayed response task.</title>
    <link>http://www.citeulike.org/user/awooga/article/963570</link>
    <description>&lt;i&gt;Neuroscience, Vol. 91, No. 3. (1999), pp. 871-890.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;This study investigated how the simulated response of dopamine neurons to reward-related stimuli could be used as reinforcement signal for learning a spatial delayed response task. Spatial delayed response tasks assess the functions of frontal cortex and basal ganglia in short-term memory, movement preparation and expectation of environmental events. In these tasks, a stimulus appears for a short period at a particular location, and after a delay the subject moves to the location indicated. Dopamine neurons are activated by unpredicted rewards and reward-predicting stimuli, are not influenced by fully predicted rewards, and are depressed by omitted rewards. Thus, they appear to report an error in the prediction of reward, which is the crucial reinforcement term in formal learning theories. Theoretical studies on reinforcement learning have shown that signals similar to dopamine responses can be used as effective teaching signals for learning. A neural network model implementing the temporal difference algorithm was trained to perform a simulated spatial delayed response task. The reinforcement signal was modeled according to the basic characteristics of dopamine responses to novel stimuli, primary rewards and reward-predicting stimuli. A Critic component analogous to dopamine neurons computed a temporal error in the prediction of reinforcement and emitted this signal to an Actor component which mediated the behavioral output. The spatial delayed response task was learned via two subtasks introducing spatial choices and temporal delays, in the same manner as monkeys in the laboratory. In all three tasks, the reinforcement signal of the Critic developed in a similar manner to the responses of natural dopamine neurons in comparable learning situations, and the learning curves of the Actor replicated the progress of learning observed in the animals. Several manipulations demonstrated further the efficacy of the particular characteristics of the dopamine-like reinforcement signal. Omission of reward induced a phasic reduction of the reinforcement signal at the time of the reward and led to extinction of learned actions. A reinforcement signal without prediction error resulted in impaired learning because of perseverative errors. Loss of learned behavior was seen with sustained reductions of the reinforcement signal, a situation in general comparable to the loss of dopamine innervation in Parkinsonian patients and experimentally lesioned animals. The striking similarities in teaching signals and learning behavior between the computational and biological results suggest that dopamine-like reward responses may serve as effective teaching signals for learning behavioral tasks that are typical for primate cognitive behavior, such as spatial delayed responding.</description>
    <dc:title>A neural network model with dopamine-like reinforcement signal that learns a spatial delayed response task.</dc:title>

    <dc:creator>RE Suri</dc:creator>
    <dc:creator>W Schultz</dc:creator>
    <dc:source>Neuroscience, Vol. 91, No. 3. (1999), pp. 871-890.</dc:source>
    <dc:date>2006-11-27T16:32:28-00:00</dc:date>
    <prism:publicationYear>1999</prism:publicationYear>
    <prism:publicationName>Neuroscience</prism:publicationName>
    <prism:issn>0306-4522</prism:issn>
    <prism:volume>91</prism:volume>
    <prism:number>3</prism:number>
    <prism:startingPage>871</prism:startingPage>
    <prism:endingPage>890</prism:endingPage>
    <prism:category>abstract</prism:category>
    <prism:category>dopamine</prism:category>
    <prism:category>reinforcement-learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/478993">
    <title>Learning of sequential movements by neural network model with dopamine-like reinforcement signal.</title>
    <link>http://www.citeulike.org/user/awooga/article/478993</link>
    <description>&lt;i&gt;Exp Brain Res, Vol. 121, No. 3. (August 1998), pp. 350-354.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Dopamine neurons appear to code an error in the prediction of reward. They are activated by unpredicted rewards, are not influenced by predicted rewards, and are depressed when a predicted reward is omitted. After conditioning, they respond to reward-predicting stimuli in a similar manner. With these characteristics, the dopamine response strongly resembles the predictive reinforcement teaching signal of neural network models implementing the temporal difference learning algorithm. This study explored a neural network model that used a reward-prediction error signal strongly resembling dopamine responses for learning movement sequences. A different stimulus was presented in each step of the sequence and required a different movement reaction, and reward occurred at the end of the correctly performed sequence. The dopamine-like predictive reinforcement signal efficiently allowed the model to learn long sequences. By contrast, learning with an unconditional reinforcement signal required synaptic eligibility traces of longer and biologically less-plausible durations for obtaining satisfactory performance. Thus, dopamine-like neuronal signals constitute excellent teaching signals for learning sequential behavior.</description>
    <dc:title>Learning of sequential movements by neural network model with dopamine-like reinforcement signal.</dc:title>

    <dc:creator>RE Suri</dc:creator>
    <dc:creator>W Schultz</dc:creator>
    <dc:source>Exp Brain Res, Vol. 121, No. 3. (August 1998), pp. 350-354.</dc:source>
    <dc:date>2006-01-24T19:14:29-00:00</dc:date>
    <prism:publicationYear>1998</prism:publicationYear>
    <prism:publicationName>Exp Brain Res</prism:publicationName>
    <prism:issn>0014-4819</prism:issn>
    <prism:volume>121</prism:volume>
    <prism:number>3</prism:number>
    <prism:startingPage>350</prism:startingPage>
    <prism:endingPage>354</prism:endingPage>
    <prism:category>abstract</prism:category>
    <prism:category>dopamine</prism:category>
    <prism:category>reinforcement-learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/959456">
    <title>Modulation of striatal single units by expected reward: a spiny neuron model displaying dopamine-induced bistability.</title>
    <link>http://www.citeulike.org/user/awooga/article/959456</link>
    <description>&lt;i&gt;J Neurophysiol, Vol. 90, No. 2. (August 2003), pp. 1095-1114.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Single-unit activity in the neostriatum of awake monkeys shows a marked dependence on expected reward. Responses to visual cues differ when animals expect primary reinforcements, such as juice rewards, in comparison to secondary reinforcements, such as tones. The mechanism of this reward-dependent modulation has not been established experimentally. To assess the hypothesis that direct neuromodulatory effects of dopamine on spiny neurons can account for this modulation, we develop a computational model based on simplified representations of key ionic currents and their modulation by D1 dopamine receptor activation. This minimal model can be analyzed in detail. We find that D1-mediated increases of inward rectifying potassium and L-type calcium currents cause a bifurcation: the native up/down state behavior of the spiny neuron model becomes truly bistable, which modulates the peak firing rate and the duration of the up state and introduces a dependence of the response on the past state history. These generic consequences of dopamine neuromodulation through bistability can account for both reward-dependent enhancement and suppression of spiny neuron single-unit responses to visual cues. We validate the model by simulating responses to visual targets in a memory-guided saccade task; our results are in close agreement with the main features of the experimental data. Our model provides a conceptual framework for understanding the functional significance of the short-term neuromodulatory actions of dopamine on signal processing in the striatum.</description>
    <dc:title>Modulation of striatal single units by expected reward: a spiny neuron model displaying dopamine-induced bistability.</dc:title>

    <dc:creator>AJ Gruber</dc:creator>
    <dc:creator>SA Solla</dc:creator>
    <dc:creator>DJ Surmeier</dc:creator>
    <dc:creator>JC Houk</dc:creator>
    <dc:identifier>doi:10.1152/jn.00618.2002</dc:identifier>
    <dc:source>J Neurophysiol, Vol. 90, No. 2. (August 2003), pp. 1095-1114.</dc:source>
    <dc:date>2006-11-23T12:37:41-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:publicationName>J Neurophysiol</prism:publicationName>
    <prism:issn>0022-3077</prism:issn>
    <prism:volume>90</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>1095</prism:startingPage>
    <prism:endingPage>1114</prism:endingPage>
    <prism:category>basal-ganglia</prism:category>
    <prism:category>dopamine</prism:category>
    <prism:category>neuromodulation</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>spiny-neurons</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/685452">
    <title>Model of Cortical-Basal Ganglionic Processing: Encoding the Serial Order of Sensory Events</title>
    <link>http://www.citeulike.org/user/awooga/article/685452</link>
    <description>&lt;i&gt;J Neurophysiol, Vol. 79, No. 6. (1 June 1998), pp. 3168-3188.&lt;/i&gt;</description>
    <dc:title>Model of Cortical-Basal Ganglionic Processing: Encoding the Serial Order of Sensory Events</dc:title>

    <dc:creator>David Beiser</dc:creator>
    <dc:creator>James Houk</dc:creator>
    <dc:source>J Neurophysiol, Vol. 79, No. 6. (1 June 1998), pp. 3168-3188.</dc:source>
    <dc:date>2006-06-06T00:26:56-00:00</dc:date>
    <prism:publicationYear>1998</prism:publicationYear>
    <prism:publicationName>J Neurophysiol</prism:publicationName>
    <prism:volume>79</prism:volume>
    <prism:number>6</prism:number>
    <prism:startingPage>3168</prism:startingPage>
    <prism:endingPage>3188</prism:endingPage>
    <prism:category>abstract</prism:category>
    <prism:category>basal-ganglia</prism:category>
    <prism:category>reinforcement-learning</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/949094">
    <title>The ability of the mesocortical dopamine system to operate in distinct temporal modes</title>
    <link>http://www.citeulike.org/user/awooga/article/949094</link>
    <description>&lt;i&gt;Psychopharmacology&lt;/i&gt;</description>
    <dc:title>The ability of the mesocortical dopamine system to operate in distinct temporal modes</dc:title>

    <dc:creator>Christopher Lapish</dc:creator>
    <dc:creator>Sven Kroener</dc:creator>
    <dc:creator>Daniel Durstewitz</dc:creator>
    <dc:creator>Antonieta Lavin</dc:creator>
    <dc:creator>Jeremy Seamans</dc:creator>
    <dc:identifier>doi:10.1007/s00213-006-0527-8</dc:identifier>
    <dc:source>Psychopharmacology</dc:source>
    <dc:date>2006-11-16T15:35:40-00:00</dc:date>
    <prism:publicationName>Psychopharmacology</prism:publicationName>
    <prism:category>dopamine</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>temporal-modes</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/156151">
    <title>Getting formal with dopamine and reward.</title>
    <link>http://www.citeulike.org/user/awooga/article/156151</link>
    <description>&lt;i&gt;Neuron, Vol. 36, No. 2. (10 October 2002), pp. 241-263.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Recent neurophysiological studies reveal that neurons in certain brain structures carry specific signals about past and future rewards. Dopamine neurons display a short-latency, phasic reward signal indicating the difference between actual and predicted rewards. The signal is useful for enhancing neuronal processing and learning behavioral reactions. It is distinctly different from dopamine's tonic enabling of numerous behavioral processes. Neurons in the striatum, frontal cortex, and amygdala also process reward information but provide more differentiated information for identifying and anticipating rewards and organizing goal-directed behavior. The different reward signals have complementary functions, and the optimal use of rewards in voluntary behavior would benefit from interactions between the signals. Addictive psychostimulant drugs may exert their action by amplifying the dopamine reward signal.</description>
    <dc:title>Getting formal with dopamine and reward.</dc:title>

    <dc:creator>W Schultz</dc:creator>
    <dc:source>Neuron, Vol. 36, No. 2. (10 October 2002), pp. 241-263.</dc:source>
    <dc:date>2005-04-08T21:43:40-00:00</dc:date>
    <prism:publicationYear>2002</prism:publicationYear>
    <prism:publicationName>Neuron</prism:publicationName>
    <prism:issn>0896-6273</prism:issn>
    <prism:volume>36</prism:volume>
    <prism:number>2</prism:number>
    <prism:startingPage>241</prism:startingPage>
    <prism:endingPage>263</prism:endingPage>
    <prism:category>dopamine</prism:category>
    <prism:category>reinforcement-learning</prism:category>
    <prism:category>review</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/awooga/article/938047">
    <title>Dopamine: generalization and bonuses</title>
    <link>http://www.citeulike.org/user/awooga/article/938047</link>
    <description>&lt;i&gt;Neural Networks, Vol. 15, No. 4-6. ( 2002), pp. 549-559.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;In the temporal difference model of primate dopamine neurons, their phasic activity reports a prediction error for future reward. This model is supported by a wealth of experimental data. However, in certain circumstances, the activity of the dopamine cells seems anomalous under the model, as they respond in particular ways to stimuli that are not obviously related to predictions of reward. In this paper, we address two important sets of anomalies, those having to do with generalization and novelty. Generalization responses are treated as the natural consequence of partial information; novelty responses are treated by the suggestion that dopamine cells multiplex information about reward bonuses, including exploration bonuses and shaping bonuses. We interpret this additional role for dopamine in terms of the mechanistic attentional and psychomotor effects of dopamine, having the computational role of guiding exploration.</description>
    <dc:title>Dopamine: generalization and bonuses</dc:title>

    <dc:creator>Sham Kakade</dc:creator>
    <dc:creator>Peter Dayan</dc:creator>
    <dc:identifier>doi:10.1016/S0893-6080(02)00048-5</dc:identifier>
    <dc:source>Neural Networks, Vol. 15, No. 4-6. ( 2002), pp. 549-559.</dc:source>
    <dc:date>2006-11-09T17:17:43-00:00</dc:date>
    <prism:publicationYear>2002</prism:publicationYear>
    <prism:publicationName>Neural Networks</prism:publicationName>
    <prism:volume>15</prism:volume>
    <prism:number>4-6</prism:number>
    <prism:startingPage>549</prism:startingPage>
    <prism:endingPage>559</prism:endingPage>
    <prism:category>abstract-model</prism:category>
    <prism:category>dopamine</prism:category>
    <prism:category>generalisation</prism:category>
    <prism:category>novelty</prism:category>
    <prism:category>reinforcement-learning</prism:category>
</item>



</rdf:RDF>

