@CONFERENCE\{IMM2005-04191, author = "E. Antelo and T. Lang and P. Montuschi and A. Nannarelli", title = "Low Latency Digit-Recurrence Reciprocal and Square-Root Reciprocal Algorithm and Architecture", year = "2005", month = "jun", keywords = "reciprocal, square-root reciprocal, arithmetic", pages = "147-152", booktitle = "Proc. of 17th Symposium on Computer Arithmetic", volume = "", series = "", editor = "", publisher = "IEEE", organization = "", address = "", url = "http://www2.compute.dtu.dk/pubdb/pubs/4191-full.html", abstract = "The reciprocal and square-root reciprocal operations are important in several applications. For these operations, we present algorithms that combine a digit-by-digit module and one iteration of a quadratic-convergence approximation. The latter is implemented by a digit-recurrence, which uses the digits produced by the digit-by-digit part. In this way, both parts execute in an overlapped manner, so that the total number of cycles is about half of the number that would be required by the digit-by-digit part alone. Because of the approximation, correct rounding of the result cannot be obtained directly in all cases; we propose a variable-time implementation that produces the correctly rounded result with a small average overhead. Radix-4 implementations are described and have been synthesized. They achieve the same cycle time as the standard digit-by-digit implementation, resulting in a speed-up of about 2 and, because of the approximation part, the area factor is also about 2. We also show a combined implementation for both operations that has essentially the same complexity as that for square-root reciprocal alone." }