@CONFERENCE\{IMM2006-05085, author = "F. G. Gustavson and J. Wasniewski", title = "Rectangular Full Packed Format for {LAPACK} Algorithms Timings on Several Computers", year = "2006", month = "jun", keywords = "Symmetric matrices, Factorization, Cholesky Algoritm, Inversion, Solution", edition = "Springer", pages = "10", booktitle = "In Conference Proceedings of {PARA'}06 (is coming)", volume = "", series = "{LNCS} of Springer", editor = "", publisher = "Springer", organization = "", address = "Para06, Umea, Sweden", url = "http://www2.compute.dtu.dk/pubdb/pubs/5085-full.html", abstract = "We describe a new data format for storing triangular and symmetric matrices called {RFP} (Rectangular Full Packed). The standard two dimensional arrays of Fortran and C (also known as full format) that are used to store triangular and symmetric matrices waste nearly half the storage space but provide high performance via the use of level\verb+~+3 {BLAS}. Standard packed format arrays fully utilize storage (array space) but provide low performance as there are no level\verb+~+3 packed {BLAS}. We combine the good features of packed and full storage using {RFP} format to obtain high performance using L3 (level\verb+~+3) {BLAS} as {RFP} is full format. Also, {RFP} format requires exactly the same minimal storage as packed format. Each full and/or packedsymmetric/triangular routine becomes a single new {RFP} routine. We present {LAPACK} routines for Cholesky factorization, inverse and solution computation in {RFP} format to illustrate this new work and to describe its performance on the {IBM,} Itanium, {NEC,} and {SUN} platforms. Performance of {RFP} versus {LAPACK} full routines for both serial and {SMP} parallel processing is about the same while using half the storage. Performance is roughly one to a factor of 33 for serial and one to a factor of 100 for {SMP} parallel times faster than {LAPACK} packed routines. Existing {LAPACK} routines and vendor {LAPACK} routines were used in the serial and the {SMP} parallel study respectively. In both studies Vendor L3 {BLAS} were used." }