<?xml version="1.0" encoding="UTF-8"?>

<rdf:RDF
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
   xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
   xmlns="http://purl.org/rss/1.0/"
   xmlns:dc="http://purl.org/dc/elements/1.1/"
   xmlns:prism="http://prismstandard.org/namespaces/1.2/basic/"
   xmlns:dcterms="http://purl.org/dc/terms/"

>
<channel rdf:about="http://www.citeulike.org/about">
<pubDate>Sat, 05 Jul 2008 23:00:25 BST</pubDate>


	<title>CiteULike: mmuecke's library [373 articles]</title>
	<description>CiteULike: mmuecke's library [373 articles]</description>


	<link>http://www.citeulike.org/user/mmuecke</link>
	<dc:publisher>CiteULike.org</dc:publisher>
	<dc:language>en-gb</dc:language>
	<dc:rights>Copyright &#169; 2004-2008 citeulike.org</dc:rights>
	<items>
    <rdf:Seq>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2963806"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2963801"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2952250"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2952170"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2923926"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2947829"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2947326"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2947308"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/1851392"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2598926"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2926148"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2926145"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2925857"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2925854"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2925845"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2925844"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2906757"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2906740"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2898775"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/1475012"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2889540"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2887024"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2886899"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2882882"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2878971"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2878968"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2878960"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2878802"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/740823"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/333539"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2869457"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2869447"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2869308"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2869304"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2399069"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2864354"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2861341"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2860622"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2514901"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2858543"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2857390"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2837205"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/1179116"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/1179110"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/1405131"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2833953"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/1179114"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/595170"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2818440"/>
        <rdf:li rdf:resource="http://www.citeulike.org/user/mmuecke/article/2774707"/>

	</rdf:Seq>
	</items>
	</channel>


<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2963806">
    <title>Toward efficient static analysis of finite-precision effects in DSP applications via affine arithmetic modeling</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2963806</link>
    <description>&lt;i&gt;Design Automation Conference, 2003. Proceedings (2003), pp. 496-501.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We introduce a static error analysis technique, based on smart interval methods from affine arithmetic, to help designers translate DSP codes from full-precision floating-point to smaller finite-precision formats. The technique gives results for numerical error estimation comparable to detailed simulation, but achieves speedups of three orders of magnitude by avoiding actual bit-level simulation. We show results for experiments mapping common DSP transform algorithms to implementations using small custom floating point formats.</description>
    <dc:title>Toward efficient static analysis of finite-precision effects in DSP applications via affine arithmetic modeling</dc:title>

    <dc:creator>Fang</dc:creator>
    <dc:creator>RA Rutenbar</dc:creator>
    <dc:creator>M Puschel</dc:creator>
    <dc:creator>Tsuhan Chen</dc:creator>
    <dc:source>Design Automation Conference, 2003. Proceedings (2003), pp. 496-501.</dc:source>
    <dc:date>2008-07-04T12:35:19-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:publicationName>Design Automation Conference, 2003. Proceedings</prism:publicationName>
    <prism:startingPage>496</prism:startingPage>
    <prism:endingPage>501</prism:endingPage>
    <prism:category>error_analysis</prism:category>
    <prism:category>floating_point</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2963801">
    <title>Fast, accurate static analysis for fixed-point finite-precision effects in DSP designs</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2963801</link>
    <description>&lt;i&gt;Computer Aided Design, 2003. ICCAD-2003. International Conference on (2003), pp. 275-282.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Translating digital signal processing (DSP) software into its finite-precision hardware implementation is often a time-consuming task. We describe a new static analysis technique that can accurately analyze finite-precision effects arising from fixed-point implementations of DSP algorithms. The technique is based on recent interval representation methods from affine arithmetic, and the use of new probabilistic bounds. The resulting numerical error estimates are comparable to detailed statistical simulation, but achieve speedups of four to five orders of magnitude by avoiding actual bit-true simulation. We show error analysis results on both feed forward and feedback DSP kernels.</description>
    <dc:title>Fast, accurate static analysis for fixed-point finite-precision effects in DSP designs</dc:title>

    <dc:creator>CF Fang</dc:creator>
    <dc:creator>RA Rutenbar</dc:creator>
    <dc:creator>Tsuhan Chen</dc:creator>
    <dc:identifier>doi:10.1109/ICCAD.2003.1257675</dc:identifier>
    <dc:source>Computer Aided Design, 2003. ICCAD-2003. International Conference on (2003), pp. 275-282.</dc:source>
    <dc:date>2008-07-04T12:33:21-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:publicationName>Computer Aided Design, 2003. ICCAD-2003. International Conference on</prism:publicationName>
    <prism:startingPage>275</prism:startingPage>
    <prism:endingPage>282</prism:endingPage>
    <prism:category>error_analysis</prism:category>
    <prism:category>fixedpoint</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2952250">
    <title>Anatomy of high-performance matrix multiplication</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2952250</link>
    <description>&lt;i&gt;ACM Trans. Math. Softw., Vol. 34, No. 3. (May 2008), pp. 1-25.&lt;/i&gt;</description>
    <dc:title>Anatomy of high-performance matrix multiplication</dc:title>

    <dc:creator>Kazushige Goto</dc:creator>
    <dc:creator>Robert van de Geijn</dc:creator>
    <dc:identifier>doi:10.1145/1356052.1356053</dc:identifier>
    <dc:source>ACM Trans. Math. Softw., Vol. 34, No. 3. (May 2008), pp. 1-25.</dc:source>
    <dc:date>2008-07-02T13:49:21-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>ACM Trans. Math. Softw.</prism:publicationName>
    <prism:issn>0098-3500</prism:issn>
    <prism:volume>34</prism:volume>
    <prism:number>3</prism:number>
    <prism:startingPage>1</prism:startingPage>
    <prism:endingPage>25</prism:endingPage>
    <prism:publisher>ACM</prism:publisher>
    <prism:category>high-performance_computing</prism:category>
    <prism:category>sparse_matrix</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2952170">
    <title>Automatic Generation of Matlab Functions Using Mathematica and Thermath</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2952170</link>
    <description>&lt;i&gt;Computing in Science &#38; Engineering, Vol. 10, No. 4. (2008), pp. 41-49.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The program tool Thermath now lets researchers automatically generate complete, ready-to-use Matlab functions. The examples here deal with critical-point calculations and the development of equipment models for separations in the chemical industry, but the ability to automatically generate Matlab functions is useful in many areas of science and engineering.</description>
    <dc:title>Automatic Generation of Matlab Functions Using Mathematica and Thermath</dc:title>

    <dc:creator>Raquel Macedo</dc:creator>
    <dc:creator>Marcelo Alfradique</dc:creator>
    <dc:creator>Marcelo Castier</dc:creator>
    <dc:identifier>doi:10.1109/MCSE.2008.90</dc:identifier>
    <dc:source>Computing in Science &#38; Engineering, Vol. 10, No. 4. (2008), pp. 41-49.</dc:source>
    <dc:date>2008-07-02T13:15:19-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Computing in Science &#38; Engineering</prism:publicationName>
    <prism:volume>10</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>41</prism:startingPage>
    <prism:endingPage>49</prism:endingPage>
    <prism:category>generative_programming</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2923926">
    <title>Interview&#60;br /&#62;The 'art' of being Donald Knuth</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2923926</link>
    <description>&lt;i&gt;Commun. ACM, Vol. 51, No. 7. (July 2008), pp. 35-39.&lt;/i&gt;</description>
    <dc:title>Interview&#60;br /&#62;The 'art' of being Donald Knuth</dc:title>

    <dc:identifier>doi:10.1145/1364782.1364794</dc:identifier>
    <dc:source>Commun. ACM, Vol. 51, No. 7. (July 2008), pp. 35-39.</dc:source>
    <dc:date>2008-06-24T15:49:45-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Commun. ACM</prism:publicationName>
    <prism:issn>0001-0782</prism:issn>
    <prism:volume>51</prism:volume>
    <prism:number>7</prism:number>
    <prism:startingPage>35</prism:startingPage>
    <prism:endingPage>39</prism:endingPage>
    <prism:publisher>ACM</prism:publisher>
    <prism:category>people</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2947829">
    <title>Anton, a special-purpose machine for molecular dynamics simulation</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2947829</link>
    <description>&lt;i&gt;Commun. ACM, Vol. 51, No. 7. (July 2008), pp. 91-97.&lt;/i&gt;</description>
    <dc:title>Anton, a special-purpose machine for molecular dynamics simulation</dc:title>

    <dc:creator>David Shaw</dc:creator>
    <dc:creator>Martin Deneroff</dc:creator>
    <dc:creator>Ron Dror</dc:creator>
    <dc:creator>Jeffrey Kuskin</dc:creator>
    <dc:creator>Richard Larson</dc:creator>
    <dc:creator>John Salmon</dc:creator>
    <dc:creator>Cliff Young</dc:creator>
    <dc:creator>Brannon Batson</dc:creator>
    <dc:creator>Kevin Bowers</dc:creator>
    <dc:creator>Jack Chao</dc:creator>
    <dc:creator>Michael Eastwood</dc:creator>
    <dc:creator>Joseph Gagliardo</dc:creator>
    <dc:creator>JP Grossman</dc:creator>
    <dc:creator>Richard Ho</dc:creator>
    <dc:creator>Douglas Lerardi</dc:creator>
    <dc:creator>Istv&#225;n Kolossv&#225;ry</dc:creator>
    <dc:creator>John Klepeis</dc:creator>
    <dc:creator>Timothy Layman</dc:creator>
    <dc:creator>Christine Mcleavey</dc:creator>
    <dc:creator>Mark Moraes</dc:creator>
    <dc:creator>Rolf Mueller</dc:creator>
    <dc:creator>Edward Priest</dc:creator>
    <dc:creator>Yibing Shan</dc:creator>
    <dc:creator>Jochen Spengler</dc:creator>
    <dc:creator>Michael Theobald</dc:creator>
    <dc:creator>Brian Towles</dc:creator>
    <dc:creator>Stanley Wang</dc:creator>
    <dc:identifier>doi:10.1145/1364782.1364802</dc:identifier>
    <dc:source>Commun. ACM, Vol. 51, No. 7. (July 2008), pp. 91-97.</dc:source>
    <dc:date>2008-07-01T12:30:10-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Commun. ACM</prism:publicationName>
    <prism:issn>0001-0782</prism:issn>
    <prism:volume>51</prism:volume>
    <prism:number>7</prism:number>
    <prism:startingPage>91</prism:startingPage>
    <prism:endingPage>97</prism:endingPage>
    <prism:publisher>ACM</prism:publisher>
    <prism:category>molecular_dynamics</prism:category>
    <prism:category>reconfigurable_platforms</prism:category>
    <prism:category>vlsi_design</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2947326">
    <title>A New Architecture For Multiple-Precision Floating-Point Multiply-Add Fused Unit Design</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2947326</link>
    <description>&lt;i&gt;(2007), pp. 69-76.&lt;/i&gt;</description>
    <dc:title>A New Architecture For Multiple-Precision Floating-Point Multiply-Add Fused Unit Design</dc:title>

    <dc:creator>Libo Huang</dc:creator>
    <dc:creator>Li Shen</dc:creator>
    <dc:creator>Kui Dai</dc:creator>
    <dc:creator>Zhiying Wang</dc:creator>
    <dc:identifier>doi:10.1109/ARITH.2007.5</dc:identifier>
    <dc:source>(2007), pp. 69-76.</dc:source>
    <dc:date>2008-07-01T09:57:22-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:startingPage>69</prism:startingPage>
    <prism:endingPage>76</prism:endingPage>
    <prism:publisher>IEEE Computer Society</prism:publisher>
    <prism:category>floating_point</prism:category>
    <prism:category>vlsi_design</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2947308">
    <title>Multiprecision floating point addition</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2947308</link>
    <description>&lt;i&gt;(2000), pp. 71-77.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;An efficient algorithm is presented that returns the exactly rounded sum of two multiprecision floating point numbers. Depending on the input signs and exponents the algorithm distinguishes five cases. In each case, the method minimizes the number of computer words that are subject to de-normalization, addition or subtraction, and normalization. The method achieves further efficiency by trying to combine these three steps into one single pass over the mantissas. To do this, the method guesses the shift amount of the normalizing shift before the sum is known.</description>
    <dc:title>Multiprecision floating point addition</dc:title>

    <dc:creator>George Collins</dc:creator>
    <dc:creator>Werner Krandick</dc:creator>
    <dc:identifier>doi:10.1145/345542.345585</dc:identifier>
    <dc:source>(2000), pp. 71-77.</dc:source>
    <dc:date>2008-07-01T09:52:44-00:00</dc:date>
    <prism:publicationYear>2000</prism:publicationYear>
    <prism:startingPage>71</prism:startingPage>
    <prism:endingPage>77</prism:endingPage>
    <prism:publisher>ACM</prism:publisher>
    <prism:category>floating_point</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/1851392">
    <title>A Survey of FPGAs for Acceleration of High Performance Computing and their Application to Computational Molecular Biology</title>
    <link>http://www.citeulike.org/user/mmuecke/article/1851392</link>
    <description>&lt;i&gt;TENCON 2005 2005 IEEE Region 10 (2005), pp. 1-6.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Molecular biocomputation workflows traditionally involve days of compute time to align DNA/ protein sequences. Custom computing machines (CCMs) provide a means to dramatically reduce alignment time, and FPGAs provide a practical means to implement such CCMs. Software implementation of some sequence alignment algorithms suffer quadratic time performance, however CCM implementations may be highly parallelized and consequently provide linear time performance. Similarly, CCMs may be used to accelerate workflows or operations in a wide range of domains, often dramatically outperforming large scale clusters. Programming and integration problems limit CCM usage, though progress has been made to overcome these problems. With continued development of tools, devices, and integration solutions, CCMs on FPGAs coupled to conventional systems present an effective architecture for high performance computing.</description>
    <dc:title>A Survey of FPGAs for Acceleration of High Performance Computing and their Application to Computational Molecular Biology</dc:title>

    <dc:creator>T Ramdas</dc:creator>
    <dc:creator>G Egan</dc:creator>
    <dc:source>TENCON 2005 2005 IEEE Region 10 (2005), pp. 1-6.</dc:source>
    <dc:date>2007-11-01T14:47:12-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:publicationName>TENCON 2005 2005 IEEE Region 10</prism:publicationName>
    <prism:startingPage>1</prism:startingPage>
    <prism:endingPage>6</prism:endingPage>
    <prism:category>bioinformatics</prism:category>
    <prism:category>fpgasupercomputing</prism:category>
    <prism:category>genomesequencing</prism:category>
    <prism:category>molecular_docking</prism:category>
    <prism:category>molecular_dynamics</prism:category>
    <prism:category>reconfigurable_computing</prism:category>
    <prism:category>survey</prism:category>
    <prism:category>trends</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2598926">
    <title>On-board analysis of uncalibrated data for a spacecraft at mars</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2598926</link>
    <description>&lt;i&gt;(2007), pp. 922-930.&lt;/i&gt;</description>
    <dc:title>On-board analysis of uncalibrated data for a spacecraft at mars</dc:title>

    <dc:creator>Rebecca Castano</dc:creator>
    <dc:creator>Kiri Wagstaff</dc:creator>
    <dc:creator>Steve Chien</dc:creator>
    <dc:creator>Timothy Stough</dc:creator>
    <dc:creator>Benyang Tang</dc:creator>
    <dc:identifier>doi:10.1145/1281192.1281291</dc:identifier>
    <dc:source>(2007), pp. 922-930.</dc:source>
    <dc:date>2008-03-26T15:18:27-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:startingPage>922</prism:startingPage>
    <prism:endingPage>930</prism:endingPage>
    <prism:publisher>ACM</prism:publisher>
    <prism:category>svm</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2926148">
    <title>A Hardware-friendly Support Vector Machine for Embedded Automotive Applications</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2926148</link>
    <description>&lt;i&gt;Neural Networks, 2007. IJCNN 2007. International Joint Conference on (2007), pp. 1360-1364.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We present here a hardware-friendly version of the support vector machine (SVM), which is useful to implement its feed-forward phase on limited-resources devices such as field programmable gate arrays (FPGAs) or microcontrollers, where a floating-point unit is seldom available. Our proposal is tested on a machine-vision benchmark dataset for automotive applications.</description>
    <dc:title>A Hardware-friendly Support Vector Machine for Embedded Automotive Applications</dc:title>

    <dc:creator>D Anguita</dc:creator>
    <dc:creator>A Ghio</dc:creator>
    <dc:creator>S Pischiutta</dc:creator>
    <dc:creator>S Ridella</dc:creator>
    <dc:identifier>doi:10.1109/IJCNN.2007.4371156</dc:identifier>
    <dc:source>Neural Networks, 2007. IJCNN 2007. International Joint Conference on (2007), pp. 1360-1364.</dc:source>
    <dc:date>2008-06-25T15:28:21-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Neural Networks, 2007. IJCNN 2007. International Joint Conference on</prism:publicationName>
    <prism:startingPage>1360</prism:startingPage>
    <prism:endingPage>1364</prism:endingPage>
    <prism:category>fixedpoint</prism:category>
    <prism:category>svm</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2926145">
    <title>The effect of quantization on support vector machines with Gaussian kernel</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2926145</link>
    <description>&lt;i&gt;Neural Networks, 2005. IJCNN '05. Proceedings. 2005 IEEE International Joint Conference on, Vol. 2 (2005), pp. 681-684 vol. 2.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;We apply here a probabilistic method to predict the effect of quantizing the parameters of a support vector machine with Gaussian kernel. Thanks to the particular structure of the SVM, the dependency of the output from the quantization noise can be predicted with good accuracy, and a simple closed-form formula can be derived, without imposing any hard-to-verify assumption.</description>
    <dc:title>The effect of quantization on support vector machines with Gaussian kernel</dc:title>

    <dc:creator>D Anguita</dc:creator>
    <dc:creator>G Bozza</dc:creator>
    <dc:source>Neural Networks, 2005. IJCNN '05. Proceedings. 2005 IEEE International Joint Conference on, Vol. 2 (2005), pp. 681-684 vol. 2.</dc:source>
    <dc:date>2008-06-25T15:27:04-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:publicationName>Neural Networks, 2005. IJCNN '05. Proceedings. 2005 IEEE International Joint Conference on</prism:publicationName>
    <prism:volume>2</prism:volume>
    <prism:startingPage>681</prism:startingPage>
    <prism:endingPage>684 vol. 2</prism:endingPage>
    <prism:category>error_analysis</prism:category>
    <prism:category>svm</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2925857">
    <title>An FPGA Implementation of Linear Kernel Support Vector Machines</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2925857</link>
    <description>&lt;i&gt;Reconfigurable Computing and FPGA's, 2006. ReConFig 2006. IEEE International Conference on (2006), pp. 1-6.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;This paper describes preliminary performance results of a reconfigurable hardware implementation of a support vector machine classifier, aimed at brain-computer interface applications, which require real-time decision making in a portable device. The main constraint of the design was that it could perform a classification decision within the time span of an evoked potential recording epoch of 300 ms, which was readily achieved for moderate-sized support vector sets. Regardless of its fixed-point implementation, the FPGA-based model achieves equivalent classification accuracies to those of its software-based, floating-point counterparts</description>
    <dc:title>An FPGA Implementation of Linear Kernel Support Vector Machines</dc:title>

    <dc:creator>Omar Pina-Ramirez</dc:creator>
    <dc:creator>Raquel Valdes-Cristerna</dc:creator>
    <dc:creator>Oscar Yanez-Suarez</dc:creator>
    <dc:identifier>doi:10.1109/RECONF.2006.307784</dc:identifier>
    <dc:source>Reconfigurable Computing and FPGA's, 2006. ReConFig 2006. IEEE International Conference on (2006), pp. 1-6.</dc:source>
    <dc:date>2008-06-25T12:45:01-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Reconfigurable Computing and FPGA's, 2006. ReConFig 2006. IEEE International Conference on</prism:publicationName>
    <prism:startingPage>1</prism:startingPage>
    <prism:endingPage>6</prism:endingPage>
    <prism:category>fixedpoint</prism:category>
    <prism:category>svm</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2925854">
    <title>Low-Power and Low-Cost Implementation of SVMs for Smart Sensors</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2925854</link>
    <description>&lt;i&gt;Instrumentation and Measurement, IEEE Transactions on, Vol. 56, No. 1. (2007), pp. 39-44.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;In this paper, we propose an efficient implementation of support vector machines (SVMs) on a low-power and low-cost 8-bit microcontroller. The proposed solution can be advantageously used to implement smart sensors and sensor networks for intelligent data analysis and pervasive computing. A new model selection algorithm that allows fitting the resource constraints imposed by the hardware architecture is proposed. Moreover, the performance of an optimized implementation which exploits the CORDIC algorithm is detailed and discussed</description>
    <dc:title>Low-Power and Low-Cost Implementation of SVMs for Smart Sensors</dc:title>

    <dc:creator>A Boni</dc:creator>
    <dc:creator>F Pianegiani</dc:creator>
    <dc:creator>D Petri</dc:creator>
    <dc:identifier>doi:10.1109/TIM.2006.887319</dc:identifier>
    <dc:source>Instrumentation and Measurement, IEEE Transactions on, Vol. 56, No. 1. (2007), pp. 39-44.</dc:source>
    <dc:date>2008-06-25T12:43:33-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Instrumentation and Measurement, IEEE Transactions on</prism:publicationName>
    <prism:volume>56</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>39</prism:startingPage>
    <prism:endingPage>44</prism:endingPage>
    <prism:category>svm</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2925845">
    <title>Finite precision analysis of support vector machine classification in logarithmic number systems</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2925845</link>
    <description>&lt;i&gt;Digital System Design, 2004. DSD 2004. Euromicro Symposium on (2004), pp. 254-261.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;In this paper we present an analysis of the minimal hardware precision required to implement support vector machine (SVM) classification within a logarithmic number system architecture. Support vector machines are fast emerging as a powerful machine-learning tool for pattern recognition, decision-making and classification. Logarithmic number systems (LNS) utilize the property of logarithmic compression for numerical operations. Within the logarithmic domain, multiplication and division can be treated simply as addition or subtraction. Hardware computation of these operations is significantly faster with reduced complexity. Leveraging the inherent properties of LNS, we are able to achieve significant savings over double-precision floating point in an implementation of a SVM classification algorithm.</description>
    <dc:title>Finite precision analysis of support vector machine classification in logarithmic number systems</dc:title>

    <dc:creator>FM Khan</dc:creator>
    <dc:creator>MG Arnold</dc:creator>
    <dc:creator>WM Pottenger</dc:creator>
    <dc:identifier>doi:10.1109/DSD.2004.1333285</dc:identifier>
    <dc:source>Digital System Design, 2004. DSD 2004. Euromicro Symposium on (2004), pp. 254-261.</dc:source>
    <dc:date>2008-06-25T12:37:38-00:00</dc:date>
    <prism:publicationYear>2004</prism:publicationYear>
    <prism:publicationName>Digital System Design, 2004. DSD 2004. Euromicro Symposium on</prism:publicationName>
    <prism:startingPage>254</prism:startingPage>
    <prism:endingPage>261</prism:endingPage>
    <prism:category>error_analysis</prism:category>
    <prism:category>mixed_precision</prism:category>
    <prism:category>svm</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2925844">
    <title>A reconfigurable parallel architecture for SVM classification</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2925844</link>
    <description>&lt;i&gt;Neural Networks, 2005. IJCNN '05. Proceedings. 2005 IEEE International Joint Conference on, Vol. 5 (2005), pp. 2867-2872 vol. 5.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The availability of powerful field programmable gate arrays (FPGA) has been exploited for their ability to rovide hardware solutions for many application areas, resulting in high-performance systems that can operate in real time by operating in parallel. The support vector machine computational paradigm can be cast as a collection of multiple streams operating in parallel on one such FPGA. This paper presents a parallel architecture that implements an SVM on a Xilinx FPGA. The results obtained by using this architecture for a complex pattern classification from high-energy physics involving thousands of patterns are reported and discussed, comparing the performance obtained by this architectural solution to that of a simpler sequential architecture.</description>
    <dc:title>A reconfigurable parallel architecture for SVM classification</dc:title>

    <dc:creator>I Biasi</dc:creator>
    <dc:creator>A Boni</dc:creator>
    <dc:creator>A Zorat</dc:creator>
    <dc:identifier>doi:10.1109/IJCNN.2005.1556380</dc:identifier>
    <dc:source>Neural Networks, 2005. IJCNN '05. Proceedings. 2005 IEEE International Joint Conference on, Vol. 5 (2005), pp. 2867-2872 vol. 5.</dc:source>
    <dc:date>2008-06-25T12:36:53-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:publicationName>Neural Networks, 2005. IJCNN '05. Proceedings. 2005 IEEE International Joint Conference on</prism:publicationName>
    <prism:volume>5</prism:volume>
    <prism:startingPage>2867</prism:startingPage>
    <prism:endingPage>2872 vol. 5</prism:endingPage>
    <prism:category>svm</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2906757">
    <title>Preparation for research: instruction in interpreting and evaluating research</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2906757</link>
    <description>&lt;i&gt;(1996), pp. 93-97.&lt;/i&gt;</description>
    <dc:title>Preparation for research: instruction in interpreting and evaluating research</dc:title>

    <dc:creator>Alan Fekete</dc:creator>
    <dc:identifier>doi:10.1145/236452.236516</dc:identifier>
    <dc:source>(1996), pp. 93-97.</dc:source>
    <dc:date>2008-06-19T08:09:20-00:00</dc:date>
    <prism:publicationYear>1996</prism:publicationYear>
    <prism:startingPage>93</prism:startingPage>
    <prism:endingPage>97</prism:endingPage>
    <prism:publisher>ACM</prism:publisher>
    <prism:category>scientific_publishing</prism:category>
    <prism:category>teaching</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2906740">
    <title>The Task of the Referee</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2906740</link>
    <description>&lt;i&gt;Computer, Vol. 23, No. 4. (April 1990), pp. 65-71.&lt;/i&gt;</description>
    <dc:title>The Task of the Referee</dc:title>

    <dc:creator>Alan Smith</dc:creator>
    <dc:source>Computer, Vol. 23, No. 4. (April 1990), pp. 65-71.</dc:source>
    <dc:date>2008-06-19T07:51:00-00:00</dc:date>
    <prism:publicationYear>1990</prism:publicationYear>
    <prism:publicationName>Computer</prism:publicationName>
    <prism:issn>0018-9162</prism:issn>
    <prism:volume>23</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>65</prism:startingPage>
    <prism:endingPage>71</prism:endingPage>
    <prism:publisher>IEEE Computer Society Press</prism:publisher>
    <prism:category>scientific_publishing</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2898775">
    <title>A domain specific interconnect for reconfigurable computing</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2898775</link>
    <description>&lt;i&gt;(2008), pp. 79-88.&lt;/i&gt;</description>
    <dc:title>A domain specific interconnect for reconfigurable computing</dc:title>

    <dc:creator>Sanjay Rajopadhye</dc:creator>
    <dc:creator>Gautam Gupta</dc:creator>
    <dc:creator>Lakshminarayanan Renganarayana</dc:creator>
    <dc:identifier>doi:10.1145/1375657.1375669</dc:identifier>
    <dc:source>(2008), pp. 79-88.</dc:source>
    <dc:date>2008-06-16T14:23:25-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:startingPage>79</prism:startingPage>
    <prism:endingPage>88</prism:endingPage>
    <prism:publisher>ACM</prism:publisher>
    <prism:category>reconfigurable_computing</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/1475012">
    <title>Algorithmic Differentiation: Application to Variational Problems in Computer Vision</title>
    <link>http://www.citeulike.org/user/mmuecke/article/1475012</link>
    <description>&lt;i&gt;Pattern Analysis and Machine Intelligence, IEEE Transactions on, Vol. 29, No. 7. (2007), pp. 1180-1193.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Many vision problems can be formulated as minimization of appropriate energy functionals. These energy functionals are usually minimized, based on the calculus of variations (Euler-Lagrange equation). Once the Euler-Lagrange equation has been determined, it needs to be discretized in order to implement it on a digital computer. This is not a trivial task and, is moreover, error-prone. In this paper, we propose a flexible alternative. We discretize the energy functional and, subsequently, apply the mathematical concept of algorithmic differentiation to directly derive algorithms that implement the energy functional's derivatives. This approach has several advantages: First, the computed derivatives are exact with respect to the implementation of the energy functional. Second, it is basically straightforward to compute second-order derivatives and, thus, the Hessian matrix of the energy functional. Third, algorithmic differentiation is a process which can be automated. We demonstrate this novel approach on three representative vision problems (namely, denoising, segmentation, and stereo) and show that state-of-the-art results are obtained with little effort.</description>
    <dc:title>Algorithmic Differentiation: Application to Variational Problems in Computer Vision</dc:title>

    <dc:creator>Thomas Pock</dc:creator>
    <dc:creator>Michael Pock</dc:creator>
    <dc:creator>Horst Bischof</dc:creator>
    <dc:identifier>doi:10.1109/TPAMI.2007.1044</dc:identifier>
    <dc:source>Pattern Analysis and Machine Intelligence, IEEE Transactions on, Vol. 29, No. 7. (2007), pp. 1180-1193.</dc:source>
    <dc:date>2007-07-23T12:16:48-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Pattern Analysis and Machine Intelligence, IEEE Transactions on</prism:publicationName>
    <prism:volume>29</prism:volume>
    <prism:number>7</prism:number>
    <prism:startingPage>1180</prism:startingPage>
    <prism:endingPage>1193</prism:endingPage>
    <prism:category>gpu</prism:category>
    <prism:category>image_processing</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2889540">
    <title>Combining local consistency, symbolic rewriting and interval methods</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2889540</link>
    <description>&lt;i&gt;Artificial Intelligence and Symbolic Mathematical Computation (1996), pp. 144-159.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;This paper is an attempt to address the processing of non-linear numerical constraints over the Reals by combining three different methods: local consistency techniques, symbolic rewriting and interval methods. To formalize this combination, we define a generic two-step constraint processing technique based on an extension of the Constraint Satisfaction Problem, called Extended Constraint Satisfaction Problem (ECSP). The first step is a rewriting step, in which the initial ECSP is symbolically transformed. The second step, called approximation step, is based on a local consistency notion, called weak arc-consistency, defined over ECSPs in terms of fixed point of contractant monotone operators. This notion is shown to generalize previous local consistency concepts defined over finite domains (arc-consistency) or infinite subsets of the Reals (arc B-consistency and interval, hull and box-consistency). A filtering algorithm, derived from AC-3, is given and is shown to be correct, confluent and to terminate. This framework is illustrated by the combination of Gröbner Bases computations and Interval Newton methods. The computation of Gröbner Bases for subsets of the initial set of constraints is used as a rewriting step and operators based on Interval Newton methods are used together with enumeration techniques to achieve weak arc-consistency on the modified ECSP. Experimental results from a prototype are presented, as well as comparisons with other systems.</description>
    <dc:title>Combining local consistency, symbolic rewriting and interval methods</dc:title>

    <dc:creator>Frédéric Benhamou</dc:creator>
    <dc:creator>Laurent Granvilliers</dc:creator>
    <dc:identifier>doi:10.1007/3-540-61732-9_55</dc:identifier>
    <dc:source>Artificial Intelligence and Symbolic Mathematical Computation (1996), pp. 144-159.</dc:source>
    <dc:date>2008-06-12T15:14:02-00:00</dc:date>
    <prism:publicationYear>1996</prism:publicationYear>
    <prism:publicationName>Artificial Intelligence and Symbolic Mathematical Computation</prism:publicationName>
    <prism:startingPage>144</prism:startingPage>
    <prism:endingPage>159</prism:endingPage>
    <prism:category>interval_arithmetic</prism:category>
    <prism:category>term_rewriting</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2887024">
    <title>Exploiting the performance of 32 bit floating point arithmetic in obtaining 64 bit accuracy (revisiting iterative refinement for linear systems)</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2887024</link>
    <description>&lt;i&gt;(2006)&lt;/i&gt;</description>
    <dc:title>Exploiting the performance of 32 bit floating point arithmetic in obtaining 64 bit accuracy (revisiting iterative refinement for linear systems)</dc:title>

    <dc:creator>Julie Langou</dc:creator>
    <dc:creator>Julien Langou</dc:creator>
    <dc:creator>Piotr Luszczek</dc:creator>
    <dc:creator>Jakub Kurzak</dc:creator>
    <dc:creator>Alfredo Buttari</dc:creator>
    <dc:creator>Jack Dongarra</dc:creator>
    <dc:identifier>doi:10.1145/1188455.1188573</dc:identifier>
    <dc:source>(2006)</dc:source>
    <dc:date>2008-06-12T11:19:53-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publisher>ACM</prism:publisher>
    <prism:category>floating_point</prism:category>
    <prism:category>mixed_precision</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2886899">
    <title>Performance and accuracy of hardware-oriented native-, emulated- and mixed-precision solvers in FEM simulations</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2886899</link>
    <description>&lt;i&gt;International Journal of Parallel, Emergent and Distributed Systems, Vol. 22, No. 4. (2007), pp. 221-256.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;In this survey paper, we compare native double precision solvers with emulated- and mixed-precision solvers of linear systems of equations as they typically arise in finite element discretisations. The emulation utilises two single float numbers to achieve higher precision, while the mixed precision iterative refinement computes residuals and updates the solution vector in double precision but solves the residual systems in single precision. Both techniques have been known since the 1960s, but little attention has been devoted to their performance aspects. Motivated by changing paradigms in processor technology and the emergence of highly-parallel devices with outstanding single float performance, we adapt the emulation and mixed precision techniques to coupled hardware configurations, where the parallel devices serve as scientific co-processors. The performance advantages are examined with respect to speedups over a native double precision implementation (time aspect) and reduced area requirements for a chip (space aspect). The paper begins with an overview of the theoretical background, algorithmic approaches and suitable hardware architectures. We then employ several conjugate gradient (CG) and multigrid solvers and study their behaviour for different parameter settings of the iterative refinement technique. Concrete speedup factors are evaluated on the coupled hardware configuration of a general-purpose CPU and a graphics processor. The dual performance aspect of potential area savings is assessed on a field programmable gate array (FPGA). In the last part, we test the applicability of the proposed mixed precision schemes with ill-conditioned matrices. We conclude that the mixed precision approach works very well with the parallel co-processors gaining speedup factors of four to five, and area savings of three to four, while maintaining the same accuracy as a reference solver executing everything in double precision.</description>
    <dc:title>Performance and accuracy of hardware-oriented native-, emulated- and mixed-precision solvers in FEM simulations</dc:title>

    <dc:creator>Dominik Göddeke</dc:creator>
    <dc:creator>Robert Strzodka</dc:creator>
    <dc:creator>Stefan Turek</dc:creator>
    <dc:identifier>doi:10.1080/17445760601122076</dc:identifier>
    <dc:source>International Journal of Parallel, Emergent and Distributed Systems, Vol. 22, No. 4. (2007), pp. 221-256.</dc:source>
    <dc:date>2008-06-12T10:03:59-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>International Journal of Parallel, Emergent and Distributed Systems</prism:publicationName>
    <prism:volume>22</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>221</prism:startingPage>
    <prism:endingPage>256</prism:endingPage>
    <prism:publisher>Taylor &#38; Francis</prism:publisher>
    <prism:category>floating_point</prism:category>
    <prism:category>mixed_precision</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2882882">
    <title>Hardware-based support vector machine classification in logarithmic number systems</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2882882</link>
    <description>&lt;i&gt;Circuits and Systems, 2005. ISCAS 2005. IEEE International Symposium on (2005), pp. 5154-5157 Vol. 5.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Support vector machines are emerging as a powerful machine-learning tool. Logarithmic number systems (LNS) utilize the property of logarithmic compression for numerical operations. We present an implementation of a digital support vector machine (SVM) classifier using LNS in which, when compared with other implementations, considerable hardware savings are achieved with no significant loss in classification accuracy.</description>
    <dc:title>Hardware-based support vector machine classification in logarithmic number systems</dc:title>

    <dc:creator>FM Khan</dc:creator>
    <dc:creator>MG Arnold</dc:creator>
    <dc:creator>WM Pottenger</dc:creator>
    <dc:identifier>doi:10.1109/ISCAS.2005.1465795</dc:identifier>
    <dc:source>Circuits and Systems, 2005. ISCAS 2005. IEEE International Symposium on (2005), pp. 5154-5157 Vol. 5.</dc:source>
    <dc:date>2008-06-11T15:02:11-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:publicationName>Circuits and Systems, 2005. ISCAS 2005. IEEE International Symposium on</prism:publicationName>
    <prism:startingPage>5154</prism:startingPage>
    <prism:endingPage>5157 Vol. 5</prism:endingPage>
    <prism:category>svm</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2878971">
    <title>The Cray BlackWidow: a highly scalable vector multiprocessor</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2878971</link>
    <description>&lt;i&gt;(2007), pp. 1-12.&lt;/i&gt;</description>
    <dc:title>The Cray BlackWidow: a highly scalable vector multiprocessor</dc:title>

    <dc:creator>Dennis Abts</dc:creator>
    <dc:creator>Abdulla Bataineh</dc:creator>
    <dc:creator>Steve Scott</dc:creator>
    <dc:creator>Greg Faanes</dc:creator>
    <dc:creator>Jim Schwarzmeier</dc:creator>
    <dc:creator>Eric Lundberg</dc:creator>
    <dc:creator>Tim Johnson</dc:creator>
    <dc:creator>Mike Bye</dc:creator>
    <dc:creator>Gerald Schwoerer</dc:creator>
    <dc:identifier>doi:10.1145/1362622.1362646</dc:identifier>
    <dc:source>(2007), pp. 1-12.</dc:source>
    <dc:date>2008-06-10T09:35:28-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:startingPage>1</prism:startingPage>
    <prism:endingPage>12</prism:endingPage>
    <prism:publisher>ACM</prism:publisher>
    <prism:category>computer_architecture</prism:category>
    <prism:category>high-performance_computing</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2878968">
    <title>GRAPE-DR: 2-Pflops massively-parallel computer with 512-core, 512-Gflops processor chips for scientific computing</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2878968</link>
    <description>&lt;i&gt;(2007), pp. 1-11.&lt;/i&gt;</description>
    <dc:title>GRAPE-DR: 2-Pflops massively-parallel computer with 512-core, 512-Gflops processor chips for scientific computing</dc:title>

    <dc:creator>Junichiro Makino</dc:creator>
    <dc:creator>Kei Hiraki</dc:creator>
    <dc:creator>Mary Inaba</dc:creator>
    <dc:identifier>doi:10.1145/1362622.1362647</dc:identifier>
    <dc:source>(2007), pp. 1-11.</dc:source>
    <dc:date>2008-06-10T09:31:53-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:startingPage>1</prism:startingPage>
    <prism:endingPage>11</prism:endingPage>
    <prism:publisher>ACM</prism:publisher>
    <prism:category>computer_architecture</prism:category>
    <prism:category>high-performance_computing</prism:category>
    <prism:category>molecular_dynamics</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2878960">
    <title>A case for low-complexity MP architectures</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2878960</link>
    <description>&lt;i&gt;(2007), pp. 1-12.&lt;/i&gt;</description>
    <dc:title>A case for low-complexity MP architectures</dc:title>

    <dc:creator>H\aakan Zeffer</dc:creator>
    <dc:creator>Erik Hagersten</dc:creator>
    <dc:identifier>doi:10.1145/1362622.1362648</dc:identifier>
    <dc:source>(2007), pp. 1-12.</dc:source>
    <dc:date>2008-06-10T09:29:20-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:startingPage>1</prism:startingPage>
    <prism:endingPage>12</prism:endingPage>
    <prism:publisher>ACM</prism:publisher>
    <prism:category>architecture_comparison</prism:category>
    <prism:category>computer_architecture</prism:category>
    <prism:category>trends</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2878802">
    <title>Multi-Sensor Multi-Level Information Fusion on Embedded Systems</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2878802</link>
    <description>&lt;i&gt;(2008)&lt;/i&gt;</description>
    <dc:title>Multi-Sensor Multi-Level Information Fusion on Embedded Systems</dc:title>

    <dc:creator>Andreas Klausner</dc:creator>
    <dc:source>(2008)</dc:source>
    <dc:date>2008-06-10T08:06:19-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:category>svm</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/740823">
    <title>Formal methods: state of the art and future directions</title>
    <link>http://www.citeulike.org/user/mmuecke/article/740823</link>
    <description>&lt;i&gt;ACM Comput. Surv., Vol. 28, No. 4. (December 1996), pp. 626-643.&lt;/i&gt;</description>
    <dc:title>Formal methods: state of the art and future directions</dc:title>

    <dc:creator>Edmund Clarke</dc:creator>
    <dc:creator>Jeannette Wing</dc:creator>
    <dc:identifier>doi:10.1145/242223.242257</dc:identifier>
    <dc:source>ACM Comput. Surv., Vol. 28, No. 4. (December 1996), pp. 626-643.</dc:source>
    <dc:date>2006-07-05T20:35:45-00:00</dc:date>
    <prism:publicationYear>1996</prism:publicationYear>
    <prism:publicationName>ACM Comput. Surv.</prism:publicationName>
    <prism:issn>0360-0300</prism:issn>
    <prism:volume>28</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>626</prism:startingPage>
    <prism:endingPage>643</prism:endingPage>
    <prism:publisher>ACM Press</prism:publisher>
    <prism:category>survey</prism:category>
    <prism:category>term_rewriting</prism:category>
    <prism:category>verification</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/333539">
    <title>High-level views on low-level representations</title>
    <link>http://www.citeulike.org/user/mmuecke/article/333539</link>
    <description>&lt;i&gt;(2005), pp. 168-179.&lt;/i&gt;</description>
    <dc:title>High-level views on low-level representations</dc:title>

    <dc:creator>Iavor Diatchki</dc:creator>
    <dc:creator>Mark Jones</dc:creator>
    <dc:creator>Rebekah Leslie</dc:creator>
    <dc:identifier>doi:10.1145/1086365.1086387</dc:identifier>
    <dc:source>(2005), pp. 168-179.</dc:source>
    <dc:date>2005-09-28T08:30:09-00:00</dc:date>
    <prism:publicationYear>2005</prism:publicationYear>
    <prism:startingPage>168</prism:startingPage>
    <prism:endingPage>179</prism:endingPage>
    <prism:publisher>ACM Press</prism:publisher>
    <prism:category>functional</prism:category>
    <prism:category>language_design</prism:category>
    <prism:category>vlsi_design</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2869457">
    <title>MPFR: A multiple-precision binary floating-point library with correct rounding</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2869457</link>
    <description>&lt;i&gt;ACM Trans. Math. Softw., Vol. 33, No. 2. (June 2007)&lt;/i&gt;</description>
    <dc:title>MPFR: A multiple-precision binary floating-point library with correct rounding</dc:title>

    <dc:creator>Laurent Fousse</dc:creator>
    <dc:creator>Guillaume Hanrot</dc:creator>
    <dc:creator>Vincent Lef&#232;vre</dc:creator>
    <dc:creator>Patrick P&#233;lissier</dc:creator>
    <dc:creator>Paul Zimmermann</dc:creator>
    <dc:identifier>doi:10.1145/1236463.1236468</dc:identifier>
    <dc:source>ACM Trans. Math. Softw., Vol. 33, No. 2. (June 2007)</dc:source>
    <dc:date>2008-06-06T13:52:58-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>ACM Trans. Math. Softw.</prism:publicationName>
    <prism:issn>0098-3500</prism:issn>
    <prism:volume>33</prism:volume>
    <prism:number>2</prism:number>
    <prism:publisher>ACM</prism:publisher>
    <prism:category>floating_point</prism:category>
    <prism:category>mixed_precision</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2869447">
    <title>Using symbolic algebra in algorithmic level DSP synthesis</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2869447</link>
    <description>&lt;i&gt;(2001), pp. 277-282.&lt;/i&gt;</description>
    <dc:title>Using symbolic algebra in algorithmic level DSP synthesis</dc:title>

    <dc:creator>Armita Peymandoust</dc:creator>
    <dc:creator>Giovanni De Micheli</dc:creator>
    <dc:identifier>doi:10.1145/378239.378485</dc:identifier>
    <dc:source>(2001), pp. 277-282.</dc:source>
    <dc:date>2008-06-06T13:45:38-00:00</dc:date>
    <prism:publicationYear>2001</prism:publicationYear>
    <prism:startingPage>277</prism:startingPage>
    <prism:endingPage>282</prism:endingPage>
    <prism:publisher>ACM</prism:publisher>
    <prism:category>high_level_synthesis</prism:category>
    <prism:category>term_rewriting</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2869308">
    <title>Hybrid system for multi-language and multi-environment generation of numerical codes</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2869308</link>
    <description>&lt;i&gt;(2001), pp. 209-216.&lt;/i&gt;</description>
    <dc:title>Hybrid system for multi-language and multi-environment generation of numerical codes</dc:title>

    <dc:creator>Joze Korelc</dc:creator>
    <dc:identifier>doi:10.1145/384101.384130</dc:identifier>
    <dc:source>(2001), pp. 209-216.</dc:source>
    <dc:date>2008-06-06T12:20:33-00:00</dc:date>
    <prism:publicationYear>2001</prism:publicationYear>
    <prism:startingPage>209</prism:startingPage>
    <prism:endingPage>216</prism:endingPage>
    <prism:publisher>ACM</prism:publisher>
    <prism:category>compiler_design</prism:category>
    <prism:category>language_design</prism:category>
    <prism:category>term_rewriting</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2869304">
    <title>Maple's evaluation process as constraint contextual rewriting</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2869304</link>
    <description>&lt;i&gt;(2001), pp. 32-37.&lt;/i&gt;</description>
    <dc:title>Maple's evaluation process as constraint contextual rewriting</dc:title>

    <dc:creator>Alessandro Armando</dc:creator>
    <dc:creator>Clemens Ballarin</dc:creator>
    <dc:identifier>doi:10.1145/384101.384107</dc:identifier>
    <dc:source>(2001), pp. 32-37.</dc:source>
    <dc:date>2008-06-06T12:17:55-00:00</dc:date>
    <prism:publicationYear>2001</prism:publicationYear>
    <prism:startingPage>32</prism:startingPage>
    <prism:endingPage>37</prism:endingPage>
    <prism:publisher>ACM</prism:publisher>
    <prism:category>term_rewriting</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2399069">
    <title>Efficient design methods for embedded communication systems</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2399069</link>
    <description>&lt;i&gt;EURASIP J. Embedded Syst., Vol. 2006, No. 1. (January 2006), pp. 21-21.&lt;/i&gt;</description>
    <dc:title>Efficient design methods for embedded communication systems</dc:title>

    <dc:creator>M Holzer</dc:creator>
    <dc:creator>B Knerr</dc:creator>
    <dc:creator>P Belanovic</dc:creator>
    <dc:creator>M Rupp</dc:creator>
    <dc:identifier>doi:10.1155/ES/2006/64913</dc:identifier>
    <dc:source>EURASIP J. Embedded Syst., Vol. 2006, No. 1. (January 2006), pp. 21-21.</dc:source>
    <dc:date>2008-02-19T14:54:25-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>EURASIP J. Embedded Syst.</prism:publicationName>
    <prism:issn>1687-3955</prism:issn>
    <prism:volume>2006</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>21</prism:startingPage>
    <prism:endingPage>21</prism:endingPage>
    <prism:publisher>Hindawi Publishing Corp.</prism:publisher>
    <prism:category>electronics_design_automation</prism:category>
    <prism:category>fixedpoint</prism:category>
    <prism:category>high_level_synthesis</prism:category>
    <prism:category>survey</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2864354">
    <title>Monte Carlo arithmetic: how to gamble with floating point and win</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2864354</link>
    <description>&lt;i&gt;Computing in Science &#38; Engineering, Vol. 2, No. 4. (2000), pp. 58-68.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;How sensitive to rounding errors are the results generated from a particular code running on a particular machine applied to a particular input? Monte Carlo arithmetic illustrates the potential for tools to support new kinds of a posteriori round-off error analysis</description>
    <dc:title>Monte Carlo arithmetic: how to gamble with floating point and win</dc:title>

    <dc:creator>DS Parker</dc:creator>
    <dc:creator>B Pierce</dc:creator>
    <dc:creator>PR Eggert</dc:creator>
    <dc:identifier>doi:10.1109/5992.852391</dc:identifier>
    <dc:source>Computing in Science &#38; Engineering, Vol. 2, No. 4. (2000), pp. 58-68.</dc:source>
    <dc:date>2008-06-05T10:47:44-00:00</dc:date>
    <prism:publicationYear>2000</prism:publicationYear>
    <prism:publicationName>Computing in Science &#38; Engineering</prism:publicationName>
    <prism:volume>2</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>58</prism:startingPage>
    <prism:endingPage>68</prism:endingPage>
    <prism:category>error_analysis</prism:category>
    <prism:category>floating_point</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2861341">
    <title>Floating-point error analysis based on affine arithmetic</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2861341</link>
    <description>&lt;i&gt;Acoustics, Speech, and Signal Processing, 2003. Proceedings. (ICASSP '03). 2003 IEEE International Conference on, Vol. 2 (2003), pp. II-561-4 vol.2.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;During the development of floating-point signal processing systems, an efficient error analysis method is needed to guarantee the output quality. We present a novel approach to floating-point error bound analysis based on affine arithmetic. The proposed method not only provides a tighter bound than the conventional approach, but also is applicable to any arithmetic operation. The error estimation accuracy is evaluated across several different applications which cover linear operations, nonlinear operations, and feedback systems. The accuracy decreases with the depth of computation path and also is affected by the linearity of the floating-point operations.</description>
    <dc:title>Floating-point error analysis based on affine arithmetic</dc:title>

    <dc:creator>CF Fang</dc:creator>
    <dc:creator>Tsuhan Chen</dc:creator>
    <dc:creator>RA Rutenbar</dc:creator>
    <dc:identifier>doi:10.1109/ICASSP.2003.1202428</dc:identifier>
    <dc:source>Acoustics, Speech, and Signal Processing, 2003. Proceedings. (ICASSP '03). 2003 IEEE International Conference on, Vol. 2 (2003), pp. II-561-4 vol.2.</dc:source>
    <dc:date>2008-06-04T14:52:29-00:00</dc:date>
    <prism:publicationYear>2003</prism:publicationYear>
    <prism:publicationName>Acoustics, Speech, and Signal Processing, 2003. Proceedings. (ICASSP '03). 2003 IEEE International Conference on</prism:publicationName>
    <prism:volume>2</prism:volume>
    <prism:startingPage>II</prism:startingPage>
    <prism:endingPage>561-4 vol.2</prism:endingPage>
    <prism:category>error_analysis</prism:category>
    <prism:category>floating_point</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2860622">
    <title>A precision- and range-independent tool for testing floating-point arithmetric I: basic operations, square root, and remainder</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2860622</link>
    <description>&lt;i&gt;ACM Trans. Math. Softw., Vol. 27, No. 1. (March 2001), pp. 92-118.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;This paper introduces a precision- and range-independent tool for testing the compliance of hardware or software implementations of (multiprecision) floating-point arithmetic with the principles of the IEEE standards 754 and 854. The tool consists of a driver program, offering many options to test only specific aspects of the IEEE standards, and a large set of test vectors, encoded in a precision-independent syntax to allow the testing of basic and extended hardware formats as well as multiprecision floating-point implementations. The suite of test vectors stems on one hand from the integration and fully precision- and range-independent generalization of existing hardware test sets, and on the other hand from the systematic testing of exact rounding for all combinations of round and sticky bits that can occur. The former constitutes only 50% of the resulting test set. In the latter we especially focus on hard-to-round cases. In addition, the test suite implicitly tests properties of floating-point operations, following the idea of Paranoia, and it reports which of the three IEEE-compliant underflow mechanisms is used by the floating-point implementation under consideration. We also chech whether that underflow mechanism is used consistently. The tool is backward compatible with the UCBTEST package and with Coonen's test syntax.</description>
    <dc:title>A precision- and range-independent tool for testing floating-point arithmetric I: basic operations, square root, and remainder</dc:title>

    <dc:creator>Bridgitte Verdonk</dc:creator>
    <dc:creator>Annie Cuyt</dc:creator>
    <dc:creator>Dennis Verschaeren</dc:creator>
    <dc:identifier>doi:10.1145/382043.382404</dc:identifier>
    <dc:source>ACM Trans. Math. Softw., Vol. 27, No. 1. (March 2001), pp. 92-118.</dc:source>
    <dc:date>2008-06-04T09:41:52-00:00</dc:date>
    <prism:publicationYear>2001</prism:publicationYear>
    <prism:publicationName>ACM Trans. Math. Softw.</prism:publicationName>
    <prism:issn>0098-3500</prism:issn>
    <prism:volume>27</prism:volume>
    <prism:number>1</prism:number>
    <prism:startingPage>92</prism:startingPage>
    <prism:endingPage>118</prism:endingPage>
    <prism:publisher>ACM</prism:publisher>
    <prism:category>floating_point</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2514901">
    <title>Mixed Precision Iterative Refinement Techniques for the Solution of Dense Linear Systems</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2514901</link>
    <description>&lt;i&gt;Int. J. High Perform. Comput. Appl., Vol. 21, No. 4. (November 2007), pp. 457-466.&lt;/i&gt;</description>
    <dc:title>Mixed Precision Iterative Refinement Techniques for the Solution of Dense Linear Systems</dc:title>

    <dc:creator>Alfredo Buttari</dc:creator>
    <dc:creator>Jack Dongarra</dc:creator>
    <dc:creator>Julie Langou</dc:creator>
    <dc:creator>Julien Langou</dc:creator>
    <dc:creator>Piotr Luszczek</dc:creator>
    <dc:creator>Jakub Kurzak</dc:creator>
    <dc:identifier>doi:10.1177/1094342007084026</dc:identifier>
    <dc:source>Int. J. High Perform. Comput. Appl., Vol. 21, No. 4. (November 2007), pp. 457-466.</dc:source>
    <dc:date>2008-03-11T13:25:50-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Int. J. High Perform. Comput. Appl.</prism:publicationName>
    <prism:issn>1094-3420</prism:issn>
    <prism:volume>21</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>457</prism:startingPage>
    <prism:endingPage>466</prism:endingPage>
    <prism:publisher>Sage Publications, Inc.</prism:publisher>
    <prism:category>cell_cpu</prism:category>
    <prism:category>floating_point</prism:category>
    <prism:category>mixed_precision</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2858543">
    <title>Developing FPGA Coprocessors for Performance-Accelerated Spacecraft Image Processing</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2858543</link>
    <description>&lt;i&gt;XCell (2008)&lt;/i&gt;</description>
    <dc:title>Developing FPGA Coprocessors for Performance-Accelerated Spacecraft Image Processing</dc:title>

    <dc:creator>Paula Pingree</dc:creator>
    <dc:creator>Lucas Scharenbroich</dc:creator>
    <dc:creator>Thomas Werne</dc:creator>
    <dc:creator>David Pellerin</dc:creator>
    <dc:source>XCell (2008)</dc:source>
    <dc:date>2008-06-03T08:43:31-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>XCell</prism:publicationName>
    <prism:category>electronics_design_automation</prism:category>
    <prism:category>fpga</prism:category>
    <prism:category>reconfigurable_computing</prism:category>
    <prism:category>svm</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2857390">
    <title>MPI Collectives on Modern Multicore Clusters: Performance Optimizations and Communication Characteristics</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2857390</link>
    <description>&lt;i&gt;Cluster Computing and the Grid, 2008. CCGRID '08. 8th IEEE International Symposium on (2008), pp. 130-137.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The advances in multicore technology and modern interconnects is rapidly accelerating the number of cores deployed in today's commodity clusters. A majority of parallel applications written in MPI employ collective operations in their communication kernels. Optimization of these operations on the multicore platforms is the key to obtaining good performance speed-ups. However, designing these operations on the modern multicores is a non-trivial task. Modern multicores such as Intel's Clovertown and AMD's Opteron feature various architectural attributes resulting in interesting ramifications. For example, Clovertown deploys shared L2 caches for a pair of cores whereas in Opteron, L2 caches are exclusive to a core. Understanding the impact of these architectures on communication performance is crucial to designing efficient collective algorithms. In this paper, we systematically evaluate these architectures and use these insights to develop efficient collective operations such as MPI_Bcast, MPI_Allgather, MPI_Allreduce and MPI_Alltoall. Further, we characterize the behavior of these collective algorithms on multicores especially when concurrent network and intra-node communications occur. We also evaluate the benefits of the proposed intra-node MPI_Allreduce over Opteron multicores and compare it with Intel Clovertown systems. The optimizations proposed in this paper reduce the latency of MPI_Bcast and MPI_Allgather by 1.9 and 4.0 times, respectively on 512 cores. For MPI_Allreduce, our optimizations improve the performance by as much as 33&#38;#x025; on the multicores. Further, we observe upto three times improvement in performance for matrix multiplication benchmark on 512 cores.</description>
    <dc:title>MPI Collectives on Modern Multicore Clusters: Performance Optimizations and Communication Characteristics</dc:title>

    <dc:creator>Amith Mamidala</dc:creator>
    <dc:creator>Rahul Kumar</dc:creator>
    <dc:creator>Debraj De</dc:creator>
    <dc:creator>DK Panda</dc:creator>
    <dc:identifier>doi:10.1109/CCGRID.2008.87</dc:identifier>
    <dc:source>Cluster Computing and the Grid, 2008. CCGRID '08. 8th IEEE International Symposium on (2008), pp. 130-137.</dc:source>
    <dc:date>2008-06-02T15:12:54-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>Cluster Computing and the Grid, 2008. CCGRID '08. 8th IEEE International Symposium on</prism:publicationName>
    <prism:startingPage>130</prism:startingPage>
    <prism:endingPage>137</prism:endingPage>
    <prism:category>communication_algorithms</prism:category>
    <prism:category>infiniband</prism:category>
    <prism:category>mpi</prism:category>
    <prism:category>performance_analysis</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2837205">
    <title>NASA Guide to Grammar, Punctuation, and Capitalization: A Handbook for Technical Writers and Editors</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2837205</link>
    <description>&lt;i&gt;(1990)&lt;/i&gt;</description>
    <dc:title>NASA Guide to Grammar, Punctuation, and Capitalization: A Handbook for Technical Writers and Editors</dc:title>

    <dc:creator>Mary Mccaskill</dc:creator>
    <dc:source>(1990)</dc:source>
    <dc:date>2008-05-27T10:49:43-00:00</dc:date>
    <prism:publicationYear>1990</prism:publicationYear>
    <prism:category>scientific_publishing</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/1179116">
    <title>Using FPGA Devices to Accelerate Biomolecular Simulations</title>
    <link>http://www.citeulike.org/user/mmuecke/article/1179116</link>
    <description>&lt;i&gt;Computer, Vol. 40, No. 3. (2007), pp. 66-73.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;A field-programmable gate array implementation of a molecular dynamics simulation method reduces the microprocessor time-to-solution by a factor of three while using only high-level languages. The application speedup on FPGA devices increases with the problem size. The authors use a performance model to analyze the potential of simulating large-scale biological systems faster than many cluster-based supercomputing platforms.</description>
    <dc:title>Using FPGA Devices to Accelerate Biomolecular Simulations</dc:title>

    <dc:creator>Sadaf Alam</dc:creator>
    <dc:creator>Pratul Agarwal</dc:creator>
    <dc:creator>Melissa Smith</dc:creator>
    <dc:creator>Jeffrey Vetter</dc:creator>
    <dc:creator>David Caliga</dc:creator>
    <dc:source>Computer, Vol. 40, No. 3. (2007), pp. 66-73.</dc:source>
    <dc:date>2007-03-21T14:21:59-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Computer</prism:publicationName>
    <prism:volume>40</prism:volume>
    <prism:number>3</prism:number>
    <prism:startingPage>66</prism:startingPage>
    <prism:endingPage>73</prism:endingPage>
    <prism:category>fpgasupercomputing</prism:category>
    <prism:category>high-performance_computing</prism:category>
    <prism:category>molecular_dynamics</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/1179110">
    <title>Trident: From High-Level Language to Hardware Circuitry</title>
    <link>http://www.citeulike.org/user/mmuecke/article/1179110</link>
    <description>&lt;i&gt;Computer, Vol. 40, No. 3. (2007), pp. 28-37.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Unlocking the potential of field-programmable gate arrays requires compilers that translate algorithmic high-level language code into hardware circuits. The Trident open source compiler translates C code to a hardware circuit description, providing designers with extreme flexibility in prototyping reconfigurable supercomputers.</description>
    <dc:title>Trident: From High-Level Language to Hardware Circuitry</dc:title>

    <dc:creator>Justin Tripp</dc:creator>
    <dc:creator>Maya Gokhale</dc:creator>
    <dc:creator>Kristopher Peterson</dc:creator>
    <dc:source>Computer, Vol. 40, No. 3. (2007), pp. 28-37.</dc:source>
    <dc:date>2007-03-21T14:17:39-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Computer</prism:publicationName>
    <prism:volume>40</prism:volume>
    <prism:number>3</prism:number>
    <prism:startingPage>28</prism:startingPage>
    <prism:endingPage>37</prism:endingPage>
    <prism:category>electronics_design_automation</prism:category>
    <prism:category>fpga</prism:category>
    <prism:category>high_level_synthesis</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/1405131">
    <title>Scientific Computing Kernels on the Cell Processor</title>
    <link>http://www.citeulike.org/user/mmuecke/article/1405131</link>
    <description>&lt;i&gt;International Journal of Parallel Programming, Vol. 35, No. 3. (June 2007), pp. 263-298.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;In this work, we examine the potential of using the recently-released STI Cell processor as a building block for future high-end scientific computing systems. Our work contains several novel contributions. First, we introduce a performance model for Cell and apply it to several key numerical kernels: dense matrix multiply, sparse matrix vector multiply, stencil computations, and 1D/2D FFTs. Next, we validate our model by comparing results against published hardware data, as well as our own Cell blade implementations. Additionally, we compare Cell performance to benchmarks run on leading superscalar (AMD Opteron), VLIW (Intel Itanium2), and vector (Cray X1E) architectures. Our work also explores several different kernel implementations and demonstrates a simple and effective programming model for Cell’s unique architecture. Finally, we propose modest microarchitectural modifications that could significantly increase the efficiency of double-precision calculations. Overall results demonstrate the tremendous potential of the Cell architecture for scientific computations in terms of both raw performance and power efficiency.</description>
    <dc:title>Scientific Computing Kernels on the Cell Processor</dc:title>

    <dc:creator>Samuel Williams</dc:creator>
    <dc:creator>John Shalf</dc:creator>
    <dc:creator>Leonid Oliker</dc:creator>
    <dc:creator>Shoaib Kamil</dc:creator>
    <dc:creator>Parry Husbands</dc:creator>
    <dc:creator>Katherine Yelick</dc:creator>
    <dc:identifier>doi:10.1007/s10766-007-0034-5</dc:identifier>
    <dc:source>International Journal of Parallel Programming, Vol. 35, No. 3. (June 2007), pp. 263-298.</dc:source>
    <dc:date>2007-06-22T19:15:35-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>International Journal of Parallel Programming</prism:publicationName>
    <prism:volume>35</prism:volume>
    <prism:number>3</prism:number>
    <prism:startingPage>263</prism:startingPage>
    <prism:endingPage>298</prism:endingPage>
    <prism:category>cell_cpu</prism:category>
    <prism:category>floating_point</prism:category>
    <prism:category>high-performance_computing</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2833953">
    <title>Accelerating Scientific Applications with Reconfigurable Computing: Getting Started</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2833953</link>
    <description>&lt;i&gt;Computing in Science &#38; Engineering, Vol. 9, No. 5. (2007), pp. 70-77.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;High-performance reconfigurable computing combines the advantages of the coarse-grain parallel processing provided in conventional multiprocessor systems with the fine-grain parallel processing available in field-programmable gate arrays.</description>
    <dc:title>Accelerating Scientific Applications with Reconfigurable Computing: Getting Started</dc:title>

    <dc:creator>VV Kindratenko</dc:creator>
    <dc:creator>CP Steffen</dc:creator>
    <dc:creator>RJ Brunner</dc:creator>
    <dc:identifier>doi:10.1109/MCSE.2007.91</dc:identifier>
    <dc:source>Computing in Science &#38; Engineering, Vol. 9, No. 5. (2007), pp. 70-77.</dc:source>
    <dc:date>2008-05-26T12:14:42-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Computing in Science &#38; Engineering</prism:publicationName>
    <prism:volume>9</prism:volume>
    <prism:number>5</prism:number>
    <prism:startingPage>70</prism:startingPage>
    <prism:endingPage>77</prism:endingPage>
    <prism:category>high-performance_computing</prism:category>
    <prism:category>reconfigurable_computing</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/1179114">
    <title>Sparse Matrix Computations on Reconfigurable Hardware</title>
    <link>http://www.citeulike.org/user/mmuecke/article/1179114</link>
    <description>&lt;i&gt;Computer, Vol. 40, No. 3. (2007), pp. 58-64.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;Using a high-level-language to hardware-description-language compiler and some novel architectures and algorithms to map two well-known double-precision floating-point sparse matrix iterative-linear-equation solvers--the Jacobi and conjugate gradient methods--onto a reconfigurable computer achieves more than a twofold speedup over software.</description>
    <dc:title>Sparse Matrix Computations on Reconfigurable Hardware</dc:title>

    <dc:creator>Viktor Prasanna</dc:creator>
    <dc:creator>Gerald Morris</dc:creator>
    <dc:identifier>doi:10.1109/MC.2007.103</dc:identifier>
    <dc:source>Computer, Vol. 40, No. 3. (2007), pp. 58-64.</dc:source>
    <dc:date>2007-03-21T14:21:11-00:00</dc:date>
    <prism:publicationYear>2007</prism:publicationYear>
    <prism:publicationName>Computer</prism:publicationName>
    <prism:volume>40</prism:volume>
    <prism:number>3</prism:number>
    <prism:startingPage>58</prism:startingPage>
    <prism:endingPage>64</prism:endingPage>
    <prism:category>floating_point</prism:category>
    <prism:category>high_level_synthesis</prism:category>
    <prism:category>high-performance_computing</prism:category>
    <prism:category>reconfigurable_computing</prism:category>
    <prism:category>sparse_matrix</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/595170">
    <title>Hardware Support for Interval Arithmetic</title>
    <link>http://www.citeulike.org/user/mmuecke/article/595170</link>
    <description>&lt;i&gt;Reliable Computing, Vol. 12, No. 3. (June 2006), pp. 225-237.&lt;/i&gt;</description>
    <dc:title>Hardware Support for Interval Arithmetic</dc:title>

    <dc:creator>Kirchner</dc:creator>
    <dc:creator>Reinhard</dc:creator>
    <dc:creator>Kulisch</dc:creator>
    <dc:creator>Ulrich</dc:creator>
    <dc:identifier>doi:10.1007/s11155-006-7220-9</dc:identifier>
    <dc:source>Reliable Computing, Vol. 12, No. 3. (June 2006), pp. 225-237.</dc:source>
    <dc:date>2006-04-22T16:04:53-00:00</dc:date>
    <prism:publicationYear>2006</prism:publicationYear>
    <prism:publicationName>Reliable Computing</prism:publicationName>
    <prism:issn>1385-3139</prism:issn>
    <prism:volume>12</prism:volume>
    <prism:number>3</prism:number>
    <prism:startingPage>225</prism:startingPage>
    <prism:endingPage>237</prism:endingPage>
    <prism:publisher>Springer</prism:publisher>
    <prism:category>floating_point</prism:category>
    <prism:category>interval_arithmetic</prism:category>
    <prism:category>vlsi_design</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2818440">
    <title>Advanced Arithmetic for the Digital Computer</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2818440</link>
    <description>&lt;i&gt;(20 November 2002)&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;The book deals with computer arithmetic in a more general sense than usual. Advanced computer arithmetic requires that all computer approximations of arithmetic operations – in particular those in the usual vector and matrix spaces – differ from the correct result by at most one rounding. The implementation of advanced computer arithmetic by fast hardware is examined in the book. The new expanded computational capability is gained at modest cost. It increases both the speed of a computation and the accuracy of the computed result. With it fast multiple precision arithmetic can be easily provided. All this strongly supports the case for implementing advanced computer arithmetic on every CPU. The book also shows that on superscalar processors interval operations can be made as fast as simple floating-point operations with only very modest additional hardware costs.</description>
    <dc:title>Advanced Arithmetic for the Digital Computer</dc:title>

    <dc:creator>Ulrich Kulisch</dc:creator>
    <dc:source>(20 November 2002)</dc:source>
    <dc:date>2008-05-21T07:29:03-00:00</dc:date>
    <prism:publicationYear>2002</prism:publicationYear>
    <prism:publisher>Springer</prism:publisher>
    <prism:category>book</prism:category>
    <prism:category>computer_architecture</prism:category>
    <prism:category>floating_point</prism:category>
    <prism:category>interval_arithmetic</prism:category>
</item>



<item rdf:about="http://www.citeulike.org/user/mmuecke/article/2774707">
    <title>Evaluating the performance of single and multiple core processors with PCMARK\textregistered05 and benchmark analysis</title>
    <link>http://www.citeulike.org/user/mmuecke/article/2774707</link>
    <description>&lt;i&gt;SIGMETRICS Perform. Eval. Rev., Vol. 35, No. 4. (March 2008), pp. 62-71.&lt;/i&gt;</description>
    <dc:title>Evaluating the performance of single and multiple core processors with PCMARK\textregistered05 and benchmark analysis</dc:title>

    <dc:creator>Fadi Sibai</dc:creator>
    <dc:identifier>doi:10.1145/1364644.1364647</dc:identifier>
    <dc:source>SIGMETRICS Perform. Eval. Rev., Vol. 35, No. 4. (March 2008), pp. 62-71.</dc:source>
    <dc:date>2008-05-09T08:11:22-00:00</dc:date>
    <prism:publicationYear>2008</prism:publicationYear>
    <prism:publicationName>SIGMETRICS Perform. Eval. Rev.</prism:publicationName>
    <prism:issn>0163-5999</prism:issn>
    <prism:volume>35</prism:volume>
    <prism:number>4</prism:number>
    <prism:startingPage>62</prism:startingPage>
    <prism:endingPage>71</prism:endingPage>
    <prism:publisher>ACM</prism:publisher>
    <prism:category>computer_architecture</prism:category>
    <prism:category>performance_analysis</prism:category>
</item>



</rdf:RDF>

