diff options
author | Juergen Beisert <j.beisert@pengutronix.de> | 2009-04-09 14:03:17 +0000 |
---|---|---|
committer | Juergen Beisert <j.beisert@pengutronix.de> | 2009-04-09 14:03:17 +0000 |
commit | 433266df9bd33ab25444d5dad15613625232263a (patch) | |
tree | 39ec3dd59f64d83bd7a0b8056fbda8717d61c745 | |
download | floatings-433266df9bd33ab25444d5dad15613625232263a.tar.gz floatings-433266df9bd33ab25444d5dad15613625232263a.tar.xz |
add simple floating point benchmarks
-rw-r--r-- | GNUmakefile.am | 16 | ||||
-rwxr-xr-x | autogen.sh | 22 | ||||
-rw-r--r-- | configure.ac | 62 | ||||
-rw-r--r-- | src/GNUmakefile.am | 17 | ||||
-rw-r--r-- | src/ff_bench.c | 408 | ||||
-rw-r--r-- | src/float_bench.c | 843 |
6 files changed, 1368 insertions, 0 deletions
diff --git a/GNUmakefile.am b/GNUmakefile.am new file mode 100644 index 0000000..beeb261 --- /dev/null +++ b/GNUmakefile.am @@ -0,0 +1,16 @@ +SUBDIRS = \ + src + +EXTRA_DIST = \ + autogen.sh + +MAINTAINERCLEANFILES = \ + configure \ + config.h.in \ + GNUmakefile.in \ + aclocal.m4 \ + $(DIST_ARCHIVES) + +maintainer-clean-local: + -chmod -R a+rw $(distdir) + -rm -fr $(distdir) diff --git a/autogen.sh b/autogen.sh new file mode 100755 index 0000000..29db9c9 --- /dev/null +++ b/autogen.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# +# usage: +# +# banner <target name> +# +banner() { + echo + TG=`echo $1 | sed -e "s,/.*/,,g"` + LINE=`echo $TG |sed -e "s/./-/g"` + echo $LINE + echo $TG + echo $LINE + echo +} + +banner "autoreconf" + +autoreconf --force --install --symlink -Wall || exit $? + +banner "Finished" diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..ace6279 --- /dev/null +++ b/configure.ac @@ -0,0 +1,62 @@ +# -*- Autoconf -*- +# Process this file with autoconf to produce a configure script. +AC_PREREQ(2.59) + +AC_INIT([floatings], [1.0.0], [bugs@pengutronix.de]) +AC_CONFIG_HEADERS([config.h]) +AC_CONFIG_SRCDIR([src/float_bench.c]) +AC_CANONICAL_BUILD +AC_CANONICAL_HOST + +AM_MAINTAINER_MODE + +CFLAGS="${CFLAGS} -Wall" + +# +# Checks for programs. +# +AC_PROG_CC +AC_PROG_INSTALL + +AM_INIT_AUTOMAKE([foreign no-exeext dist-bzip2]) + +# +# Checks for header files. +# +AC_CHECK_HEADERS([ \ + stdlib.h \ + string.h \ + ]) + +# +# Checks for library functions. +# +AC_FUNC_MALLOC +AC_CHECK_FUNCS([memset sqrt]) + + +# +# Debugging +# +AC_MSG_CHECKING([whether to enable debugging]) +AC_ARG_ENABLE(debug, + AS_HELP_STRING([--enable-debug], [enable debugging @<:@default=no@:>@]), + [case "$enableval" in + y | yes) CONFIG_DEBUG=yes ;; + *) CONFIG_DEBUG=no ;; + esac], + [CONFIG_DEBUG=no]) +AC_MSG_RESULT([${CONFIG_DEBUG}]) +if test "${CONFIG_DEBUG}" = "yes"; then + CFLAGS="${CFLAGS} -Werror -Wsign-compare -Wfloat-equal -Wformat-security -g -O1" + AC_DEFINE(DEBUG, 1, [debugging]) +else + CFLAGS="${CFLAGS} -O3" +fi + +AC_CONFIG_FILES([ + GNUmakefile + src/GNUmakefile + ]) +AC_OUTPUT + diff --git a/src/GNUmakefile.am b/src/GNUmakefile.am new file mode 100644 index 0000000..83a1a7d --- /dev/null +++ b/src/GNUmakefile.am @@ -0,0 +1,17 @@ +bin_PROGRAMS = \ + float_bench \ + ff_bench + +AM_CPPFLAGS = \ + -I$(top_builddir) + +float_bench_SOURCES = \ + float_bench.c +float_bench_LDADD = -lm + +ff_bench_SOURCES = \ + ff_bench.c +ff_bench_LDADD = -lm + +MAINTAINERCLEANFILES = \ + GNUmakefile.in diff --git a/src/ff_bench.c b/src/ff_bench.c new file mode 100644 index 0000000..e2b60d5 --- /dev/null +++ b/src/ff_bench.c @@ -0,0 +1,408 @@ +/* + + Two-dimensional FFT benchmark + + Designed and implemented by John Walker in April of 1989. + + This benchmark executes a specified number of passes (default + 20) through a loop in which each iteration performs a fast + Fourier transform of a square matrix (default size 256x256) of + complex numbers (default precision double), followed by the + inverse transform. After all loop iterations are performed + the results are checked against known correct values. + + This benchmark is intended for use on C implementations which + define "int" as 32 bits or longer and permit allocation and + direct addressing of arrays larger than one megabyte. + + If CAPOUT is defined, the result after all iterations is + written as a CA Lab pattern file. This is intended for + debugging in case horribly wrong results are obtained on a + given machine. + + Archival timings are run with the definitions below set as + follows: Float = double, Asize = 256, Passes = 20, CAPOUT not + defined. + + Time (seconds) System + + 2393.93 Sun 3/260, SunOS 3.4, C, "-f68881 -O". + (John Walker). + + 1928 Macintosh IIx, MPW C 3.0, "-mc68020 + -mc68881 -elems881 -m". (Hugh Hoover). + + 1636.1 Sun 4/110, "cc -O3 -lm". (Michael McClary). + The suspicion is that this is software + floating point. + + 1556.7 Macintosh II, A/UX, "cc -O -lm" + (Michael McClary). + + 1388.8 Sun 386i/250, SunOS 4.0.1 C + "-O /usr/lib/trig.il". (James Carrington). + + 1331.93 Sun 3/60, SunOS 4.0.1, C, + "-O4 -f68881 /usr/lib/libm.il" + (Bob Elman). + + 1204.0 Apollo Domain DN4000, C, "-cpu 3000 -opt 4". + (Sam Crupi). + + 1174.66 Compaq 386/25, SCO Xenix 386 C. + (Peter Shieh). + + 1068 Compaq 386/25, SCO Xenix 386, + Metaware High C. (Robert Wenig). + + 1064.0 Sun 3/80, SunOS 4.0.3 Beta C + "-O3 -f68881 /usr/lib/libm.il". (James Carrington). + + 1061.4 Compaq 386/25, SCO Xenix, High C 1.4. + (James Carrington). + + 1059.79 Compaq 386/25, 387/25, High C 1.4, + DOS|Extender 2.2, 387 inline code + generation. (Nathan Bender). + + 777.14 Compaq 386/25, IIT 3C87-25 (387 Compatible), + High C 1.5, DOS|Extender 2.2, 387 inline + code generation. (Nathan Bender). + + 751 Compaq DeskPro 386/33, High C 1.5 + DOS|Extender, + 387 code generation. (James Carrington). + + 431.44 Compaq 386/25, Weitek 3167-25, DOS 3.31, + High C 1.4, DOS|Extender, Weitek code generation. + (Nathan Bender). + + 344.9 Compaq 486/25, Metaware High C 1.6, Phar Lap + DOS|Extender, in-line floating point. (Nathan + Bender). + + 324.2 Data General Motorola 88000, 16 Mhz, Gnu C. + + 323.1 Sun 4/280, C, "-O4". (Eric Hill). + + 254 Compaq SystemPro 486/33, High C 1.5 + DOS|Extender, + 387 code generation. (James Carrington). + + 242.8 Silicon Graphics Personal IRIS, MIPS R2000A, + 12.5 Mhz, "-O3" (highest level optimisation). + (Mike Zentner). + + 233.0 Sun SPARCStation 1, C, "-O4", SunOS 4.0.3. + (Nathan Bender). + + 187.30 DEC PMAX 3100, MIPS 2000 chip. + (Robert Wenig). + + 120.46 Sun SparcStation 2, C, "-O4", SunOS 4.1.1. + (John Walker). + + 120.21 DEC 3MAX, MIPS 3000, "-O4". + + 98.0 Intel i860 experimental environment, + OS/2, data caching disabled. (Kern + Sibbald). + + 34.9 Silicon Graphics Indigo², MIPS R4400, + 175 Mhz, IRIX 5.2, "-O". + + 32.4 Pentium 133, Windows NT, Microsoft Visual + C++ 4.0. + + 17.25 Silicon Graphics Indigo², MIPS R4400, + 175 Mhz, IRIX 6.5, "-O3". + + 14.10 Dell Dimension XPS R100, Pentium II 400 MHz, + Windows 98, Microsoft Visual C 5.0. + + 10.7 Hewlett-Packard Kayak XU 450Mhz Pentium II, + Microsoft Visual C++ 6.0, Windows NT 4.0sp3. (Nathan Bender). + + 5.09 Sun Ultra 2, UltraSPARC V9, 300 MHz, gcc -O3. + + 0.846 Dell Inspiron 9100, Pentium 4, 3.4 GHz, gcc -O3. + +*/ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/time.h> +#include <math.h> + +/* The program may be run with Float defined as either float or + double. With IEEE arithmetic, the same answers are generated for + either floating point mode. */ + +#define Float double /* Floating point type used in FFT */ + +#define Asize 256 /* Array edge size */ +#define Passes 20 /* Number of FFT/Inverse passes */ + +#define max(a,b) ((a)>(b)?(a):(b)) +#define min(a,b) ((a)<=(b)?(a):(b)) + +#define FWMODE "w" + +/* + + Multi-dimensional fast Fourier transform + + Adapted from Press et al., "Numerical Recipes in C". + +*/ + +#define SWAP(a,b) tempr=(a); (a)=(b); (b)=tempr + +static void fourn(Float data[], int nn[], int ndim, int isign) +{ + int i1, i2, i3; + int i2rev, i3rev, ip1, ip2, ip3, ifp1, ifp2; + int ibit, idim, k1, k2, n, nprev, nrem, ntot; + Float tempi, tempr; + double theta, wi, wpi, wpr, wr, wtemp; + + ntot = 1; + for (idim = 1; idim <= ndim; idim++) + ntot *= nn[idim]; + nprev = 1; + for (idim = ndim; idim >= 1; idim--) { + n = nn[idim]; + nrem = ntot / (n * nprev); + ip1 = nprev << 1; + ip2 = ip1 * n; + ip3 = ip2 * nrem; + i2rev = 1; + for (i2 = 1; i2 <= ip2; i2 += ip1) { + if (i2 < i2rev) { + for (i1 = i2; i1 <= i2 + ip1 - 2; i1 += 2) { + for (i3 = i1; i3 <= ip3; i3 += ip2) { + i3rev = i2rev + i3 - i2; + SWAP(data[i3], data[i3rev]); + SWAP(data[i3 + 1], data[i3rev + 1]); + } + } + } + ibit = ip2 >> 1; + while (ibit >= ip1 && i2rev > ibit) { + i2rev -= ibit; + ibit >>= 1; + } + i2rev += ibit; + } + ifp1 = ip1; + while (ifp1 < ip2) { + ifp2 = ifp1 << 1; + theta = isign * 6.28318530717959 / (ifp2 / ip1); + wtemp = sin(0.5 * theta); + wpr = -2.0 * wtemp * wtemp; + wpi = sin(theta); + wr = 1.0; + wi = 0.0; + for (i3 = 1; i3 <= ifp1; i3 += ip1) { + for (i1 = i3; i1 <= i3 + ip1 - 2; i1 += 2) { + for (i2 = i1; i2 <= ip3; i2 += ifp2) { + k1 = i2; + k2 = k1 + ifp1; + tempr = wr * data[k2] - wi * data[k2 + 1]; + tempi = wr * data[k2 + 1] + wi * data[k2]; + data[k2] = data[k1] - tempr; + data[k2 + 1] = data[k1 + 1] - tempi; + data[k1] += tempr; + data[k1 + 1] += tempi; + } + } + wr = (wtemp = wr) * wpr - wi * wpi + wr; + wi = wi * wpr + wtemp * wpi + wi; + } + ifp1 = ifp2; + } + nprev *= n; + } +} +#undef SWAP + +static void usage(const char *prg, FILE *out) +{ + fprintf(out, "usage: %s [options]\n", prg); + fprintf(out, " options are:\n"); + fprintf(out, " '-d value' calculate a dimension of (value * value). Default is (256 * 256)\n"); + fprintf(out, " '-p value' passes. Default is 20\n"); + fprintf(out, " '-v' print program version and exit\n"); + fprintf(out, " '-h' print this help and exit\n"); +} + +int main(int argc, char *argv[]) +{ + int i, j, k, l, m, npasses = Passes, faedge = Asize, c, ret; + Float *fdata; + static int nsize[] = {0, 0, 0}; + long fanum, fasize; + double mapbase, mapscale, rmin, rmax, imin, imax; + struct timeval begin_tv, end_tv, diff_tv; + + /* handle command line options first */ + while (1) { + c = getopt(argc, argv, "d:p:hv"); + if (c == -1) + break; + + switch (c) { + case 'd': + faedge = atoi(optarg); /* FFT array edge size */ + break; + case 'p': + npasses = atoi(optarg); + break; + case 'h': + usage(argv[0], stdout); + exit(0); + case 'v': + printf("%s %s\n", PACKAGE_NAME, PACKAGE_VERSION); + exit(0); + } + } + + /* now we start. Get the current time */ + gettimeofday(&begin_tv, NULL); + + fanum = faedge * faedge; /* Elements in FFT array */ + fasize = ((fanum + 1) * 2 * sizeof(Float)); /* FFT array size */ + nsize[1] = nsize[2] = faedge; + + fdata = (Float *) malloc(fasize); + if (fdata == NULL) { + fprintf(stderr, "Can't allocate data array.\n"); + exit(1); + } + + /* Generate data array to process. */ + +#define Re(x,y) fdata[1 + (faedge * (x) + (y)) * 2] +#define Im(x,y) fdata[2 + (faedge * (x) + (y)) * 2] + + memset(fdata, 0, fasize); + for (i = 0; i < faedge; i++) { + for (j = 0; j < faedge; j++) { + if (((i & 15) == 8) || ((j & 15) == 8)) + Re(i, j) = 128.0; + } + } + + for (i = 0; i < npasses; i++) { + /* Transform image to frequency domain. */ + fourn(fdata, nsize, 2, 1); + + /* Back-transform to image. */ + fourn(fdata, nsize, 2, -1); + } + + { + double r, ij, ar, ai; + rmin = 1e10; rmax = -1e10; + imin = 1e10; imax = -1e10; + ar = 0; + ai = 0; + + for (i = 1; i <= fanum; i += 2) { + r = fdata[i]; + ij = fdata[i + 1]; + ar += r; + ai += ij; + rmin = min(r, rmin); + rmax = max(r, rmax); + imin = min(ij, imin); + imax = max(ij, imax); + } +#ifdef DEBUG + printf("Real min %.4g, max %.4g. Imaginary min %.4g, max %.4g.\n", + rmin, rmax, imin, imax); + printf("Average real %.4g, imaginary %.4g.\n", + ar / fanum, ai / fanum); +#endif + mapbase = rmin; + mapscale = 255 / (rmax - rmin); + } + + /* See if we got the right answers. */ + + m = 0; + for (i = 0; i < faedge; i++) { + for (j = 0; j < faedge; j++) { + k = (Re(i, j) - mapbase) * mapscale; + l = (((i & 15) == 8) || ((j & 15) == 8)) ? 255 : 0; + if (k != l) { + m++; + fprintf(stderr, + "Wrong answer at (%d,%d)! Expected %d, got %d.\n", + i, j, l, k); + } + } + } + if (m == 0) { + fprintf(stderr, "%d passes. No errors in results.\n", npasses); + ret = 0; + } else { + fprintf(stderr, "%d passes. %d errors in results.\n", + npasses, m); + ret = 1; + } + +#ifdef CAPOUT + + /* Output the result of the transform as a CA Lab pattern + file for debugging. */ + + { +#define SCRX 322 +#define SCRY 200 +#define SCRN (SCRX * SCRY) + unsigned char patarr[SCRY][SCRX]; + FILE *fp; + +/* Map user external state numbers to internal state index */ + +#define UtoI(x) (((((x) >> 1) & 0x7F) | ((x) << 7)) & 0xFF) + + /* Copy data from FFT buffer to map. */ + + memset(patarr, 0, sizeof patarr); + l = (SCRX - faedge) / 2; + m = (faedge > SCRY) ? 0 : ((SCRY - faedge) / 2); + for (i = 1; i < faedge; i++) { + for (j = 0; j < min(SCRY, faedge); j++) { + k = (Re(i, j) - mapbase) * mapscale; + patarr[j + m][i + l] = UtoI(k); + } + } + + /* Dump pattern map to file. */ + + fp = fopen("fft.cap", "w"); + if (fp == NULL) { + fprintf(stderr, "Cannot open output file.\n"); + exit(0); + } + putc(':', fp); + putc(1, fp); + fwrite(patarr, SCRN, 1, fp); + putc(6, fp); + fclose(fp); + } +#endif + + gettimeofday(&end_tv, NULL); + timersub(&end_tv, &begin_tv, &diff_tv); + + printf("Time(s): %u.%u\n", diff_tv.tv_sec, diff_tv.tv_usec); + + return ret; +} diff --git a/src/float_bench.c b/src/float_bench.c new file mode 100644 index 0000000..b8c4f71 --- /dev/null +++ b/src/float_bench.c @@ -0,0 +1,843 @@ +/* + + John Walker's Floating Point Benchmark, derived from... + + Marinchip Interactive Lens Design System + + John Walker December 1980 + + By John Walker + http://www.fourmilab.ch/ + + This program may be used, distributed, and modified freely as + long as the origin information is preserved. + + This is a complete optical design raytracing algorithm, + stripped of its user interface and recast into portable C. It + not only determines execution speed on an extremely floating + point (including trig function) intensive real-world + application, it checks accuracy on an algorithm that is + exquisitely sensitive to errors. The performance of this + program is typically far more sensitive to changes in the + efficiency of the trigonometric library routines than the + average floating point program. + + The benchmark may be compiled in two modes. If the symbol + INTRIG is defined, built-in trigonometric and square root + routines will be used for all calculations. Timings made with + INTRIG defined reflect the machine's basic floating point + performance for the arithmetic operators. If INTRIG is not + defined, the system library <math.h> functions are used. + Results with INTRIG not defined reflect the system's library + performance and/or floating point hardware support for trig + functions and square root. Results with INTRIG defined are a + good guide to general floating point performance, while + results with INTRIG undefined indicate the performance of an + application which is math function intensive. + + Special note regarding errors in accuracy: this program has + generated numbers identical to the last digit it formats and + checks on the following machines, floating point + architectures, and languages: + + Marinchip 9900 QBASIC IBM 370 double-precision (REAL * 8) format + + IBM PC / XT / AT Lattice C IEEE 64 bit, 80 bit temporaries + High C same, in line 80x87 code + BASICA "Double precision" + Quick BASIC IEEE double precision, software routines + + Sun 3 C IEEE 64 bit, 80 bit temporaries, + in-line 68881 code, in-line FPA code. + + MicroVAX II C Vax "G" format floating point + + Macintosh Plus MPW C SANE floating point, IEEE 64 bit format + implemented in ROM. + + Inaccuracies reported by this program should be taken VERY + SERIOUSLY INDEED, as the program has been demonstrated to be + invariant under changes in floating point format, as long as + the format is a recognised double precision format. If you + encounter errors, please remember that they are just as likely + to be in the floating point editing library or the + trigonometric libraries as in the low level operator code. + + The benchmark assumes that results are basically reliable, and + only tests the last result computed against the reference. If + you're running on a suspect system you can compile this + program with ACCURACY defined. This will generate a version + which executes as an infinite loop, performing the ray trace + and checking the results on every pass. All incorrect results + will be reported. + + Representative timings are given below. All have been + normalised as if run for 1000 iterations. + + Time in seconds Computer, Compiler, and notes + Normal INTRIG + + 3466.00 4031.00 Commodore 128, 2 Mhz 8510 with software floating + point. Abacus Software/Data-Becker Super-C 128, + version 3.00, run in fast (2 Mhz) mode. Note: + the results generated by this system differed + from the reference results in the 8th to 10th + decimal place. + + 3290.00 IBM PC/AT 6 Mhz, Microsoft/IBM BASICA version A3.00. + Run with the "/d" switch, software floating point. + + 2131.50 IBM PC/AT 6 Mhz, Lattice C version 2.14, small model. + This version of Lattice compiles subroutine + calls which either do software floating point + or use the 80x87. The machine on which I ran + this had an 80287, but the results were so bad + I wonder if it was being used. + + 1598.00 Macintosh Plus, MPW C, SANE Software floating point. + + 1582.13 Marinchip 9900 2 Mhz, QBASIC compiler with software + floating point. This was a QBASIC version of the + program which contained the identical algorithm. + + 404.00 IBM PC/AT 6 Mhz, Microsoft QuickBASIC version 2.0. + Software floating point. + + 165.15 IBM PC/AT 6 Mhz, Metaware High C version 1.3, small + model. This was compiled to call subroutines for + floating point, and the machine contained an 80287 + which was used by the subroutines. + + 143.20 Macintosh II, MPW C, SANE calls. I was unable to + determine whether SANE was using the 68881 chip or + not. + + 121.80 Sun 3/160 16 Mhz, Sun C. Compiled with -fsoft switch + which executes floating point in software. + + 78.78 110.11 IBM RT PC (Model 6150). IBM AIX 1.0 C compiler + with -O switch. + + 75.2 254.0 Microsoft Quick C 1.0, in-line 8087 instructions, + compiled with 80286 optimisation on. (Switches + were -Ol -FPi87-G2 -AS). Small memory model. + + 69.50 IBM PC/AT 6Mhz, Borland Turbo BASIC 1.0. Compiled + in "8087 required" mode to generate in-line + code for the math coprocessor. + + 66.96 IBM PC/AT 6Mhz, Microsoft QuickBASIC 4.0. This + release of QuickBASIC compiles code for the + 80287 math coprocessor. + + 66.36 206.35 IBM PC/AT 6Mhz, Metaware High C version 1.3, small + model. This was compiled with in-line code for the + 80287 math coprocessor. Trig functions still call + library routines. + + 63.07 220.43 IBM PC/AT, 6Mhz, Borland Turbo C, in-line 8087 code, + small model, word alignment, no stack checking, + 8086 code mode. + + 17.18 Apollo DN-3000, 12 Mhz 68020 with 68881, compiled + with in-line code for the 68881 coprocessor. + According to Apollo, the library routines are chosen + at runtime based on coprocessor presence. Since the + coprocessor was present, the library is supposed to + use in-line floating point code. + + 15.55 27.56 VAXstation II GPX. Compiled and executed under + VAX/VMS C. + + 15.14 37.93 Macintosh II, Unix system V. Green Hills 68020 + Unix compiler with in-line code for the 68881 + coprocessor (-O -ZI switches). + + 12.69 Sun 3/160 16 Mhz, Sun C. Compiled with -fswitch, + which calls a subroutine to select the fastest + floating point processor. This was using the 68881. + + 11.74 26.73 Compaq Deskpro 386, 16 Mhz 80386 with 16 Mhz 80387. + Metaware High C version 1.3, compiled with in-line + for the math coprocessor (but not optimised for the + 80386/80387). Trig functions still call library + routines. + + 8.43 30.49 Sun 3/160 16 Mhz, Sun C. Compiled with -f68881, + generating in-line MC68881 instructions. Trig + functions still call library routines. + + 6.29 25.17 Sun 3/260 25 Mhz, Sun C. Compiled with -f68881, + generating in-line MC68881 instructions. Trig + functions still call library routines. + + 4.57 Sun 3/260 25 Mhz, Sun FORTRAN 77. Compiled with + -O -f68881, generating in-line MC68881 instructions. + Trig functions are compiled in-line. This used + the FORTRAN 77 version of the program, FBFORT77.F. + + 4.00 14.20 Sun386i/25 Mhz model 250, Sun C compiler. + + 4.00 14.00 Sun386i/25 Mhz model 250, Metaware C. + + 3.10 12.00 Compaq 386/387 25 Mhz running SCO Xenix 2. + Compiled with Metaware HighC 386, optimized + for 386. + + 3.00 12.00 Compaq 386/387 25MHZ optimized for 386/387. + + 2.96 5.17 Sun 4/260, Sparc RISC processor. Sun C, + compiled with the -O2 switch for global + optimisation. + + 2.47 COMPAQ 486/25, secondary cache disabled, High C, + 486/387, inline f.p., small memory model. + + 2.20 3.40 Data General Motorola 88000, 16 Mhz, Gnu C. + + 1.56 COMPAQ 486/25, 128K secondary cache, High C, 486/387, + inline f.p., small memory model. + + 0.66 1.50 DEC Pmax, Mips processor. + + 0.63 0.91 Sun SparcStation 2, Sun C (SunOS 4.1.1) with + -O4 optimisation and "/usr/lib/libm.il" inline + floating point. + + 0.60 1.07 Intel 860 RISC processor, 33 Mhz, Greenhills + C compiler. + + 0.40 0.90 Dec 3MAX, MIPS 3000 processor, -O4. + + 0.31 0.90 IBM RS/6000, -O. + + 0.1129 0.2119 Dell Dimension XPS P133c, Pentium 133 MHz, + Windows 95, Microsoft Visual C 5.0. + + 0.0883 0.2166 Silicon Graphics Indigo², MIPS R4400, + 175 Mhz, "-O3". + + 0.0351 0.0561 Dell Dimension XPS R100, Pentium II 400 MHz, + Windows 98, Microsoft Visual C 5.0. + + 0.0312 0.0542 Sun Ultra 2, UltraSPARC V9, 300 MHz, Solaris + 2.5.1. + + 0.00862 0.01074 Dell Inspiron 9100, Pentium 4, 3.4 GHz, gcc -O3. + +*/ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> +#ifndef INTRIG +#include <math.h> +#endif + +#define cot(x) (1.0 / tan(x)) + +#define TRUE 1 +#define FALSE 0 + +#define max_surfaces 10 + +/* Local variables */ + +/* static char tbfr[132]; */ + +static short current_surfaces; +static short paraxial; + +static double clear_aperture; + +static double aberr_lspher; +static double aberr_osc; +static double aberr_lchrom; + +static double max_lspher; +static double max_osc; +static double max_lchrom; + +static double radius_of_curvature; +static double object_distance; +static double ray_height; +static double axis_slope_angle; +static double from_index; +static double to_index; + +static double spectral_line[9]; +static double s[max_surfaces][5]; +static double od_sa[2][2]; + +static char outarr[8][80]; /* Computed output of program goes here */ + +int itercount; /* The iteration counter for the main loop + in the program is made global so that + the compiler should not be allowed to + optimise out the loop over the ray + tracing code. */ + +#ifndef ITERATIONS +#define ITERATIONS 1000 +#endif +int niter = ITERATIONS; /* Iteration counter */ + +static char *refarr[] = { /* Reference results. These happen to + be derived from a run on Microsoft + Quick BASIC on the IBM PC/AT. */ + + " Marginal ray 47.09479120920 0.04178472683", + " Paraxial ray 47.08372160249 0.04177864821", + "Longitudinal spherical aberration: -0.01106960671", + " (Maximum permissible): 0.05306749907", + "Offense against sine condition (coma): 0.00008954761", + " (Maximum permissible): 0.00250000000", + "Axial chromatic aberration: 0.00448229032", + " (Maximum permissible): 0.05306749907" +}; + +/* The test case used in this program is the design for a 4 inch + achromatic telescope objective used as the example in Wyld's + classic work on ray tracing by hand, given in Amateur Telescope + Making, Volume 3. */ + +static double testcase[4][4] = { + {27.05, 1.5137, 63.6, 0.52}, + {-16.68, 1, 0, 0.138}, + {-16.68, 1.6164, 36.7, 0.38}, + {-78.1, 1, 0, 0} +}; + +/* Internal trig functions (used only if INTRIG is defined). These + standard functions may be enabled to obtain timings that reflect + the machine's floating point performance rather than the speed of + its trig function evaluation. */ + +#ifdef INTRIG + +/* The following definitions should keep you from getting intro trouble + with compilers which don't let you redefine intrinsic functions. */ + +#define sin I_sin +#define cos I_cos +#define tan I_tan +#define sqrt I_sqrt +#define atan I_atan +#define atan2 I_atan2 +#define asin I_asin + +#define fabs(x) ((x < 0.0) ? -x : x) + +#define pic 3.1415926535897932 + +/* Commonly used constants */ + +static double pi = pic, + twopi =pic * 2.0, + piover4 = pic / 4.0, + fouroverpi = 4.0 / pic, + piover2 = pic / 2.0; + +/* Coefficients for ATAN evaluation */ + +static double atanc[] = { + 0.0, + 0.4636476090008061165, + 0.7853981633974483094, + 0.98279372324732906714, + 1.1071487177940905022, + 1.1902899496825317322, + 1.2490457723982544262, + 1.2924966677897852673, + 1.3258176636680324644 +}; + +/* aint(x) Return integer part of number. Truncates towards 0 */ + +static double aint(double x) +{ + long l; + + /* Note that this routine cannot handle the full floating point + number range. This function should be in the machine-dependent + floating point library! */ + + l = x; + if ((int)(-0.5) != 0 && l < 0 ) + l++; + x = l; + return x; +} + +/* sin(x) Return sine, x in radians */ + +static double sin(double x) +{ + int sign; + double y, r, z; + + x = (((sign= (x < 0.0)) != 0) ? -x: x); + + if (x > twopi) + x -= (aint(x / twopi) * twopi); + + if (x > pi) { + x -= pi; + sign = !sign; + } + + if (x > piover2) + x = pi - x; + + if (x < piover4) { + y = x * fouroverpi; + z = y * y; + r = y * (((((((-0.202253129293E-13 * z + 0.69481520350522E-11) * z - + 0.17572474176170806E-8) * z + 0.313361688917325348E-6) * z - + 0.365762041821464001E-4) * z + 0.249039457019271628E-2) * z - + 0.0807455121882807815) * z + 0.785398163397448310); + } else { + y = (piover2 - x) * fouroverpi; + z = y * y; + r = ((((((-0.38577620372E-12 * z + 0.11500497024263E-9) * z - + 0.2461136382637005E-7) * z + 0.359086044588581953E-5) * z - + 0.325991886926687550E-3) * z + 0.0158543442438154109) * z - + 0.308425137534042452) * z + 1.0; + } + return sign ? -r : r; +} + +/* cos(x) Return cosine, x in radians, by identity */ + +static double cos(double x) +{ + x = (x < 0.0) ? -x : x; + if (x > twopi) /* Do range reduction here to limit */ + x = x - (aint(x / twopi) * twopi); /* roundoff on add of PI/2 */ + return sin(x + piover2); +} + +/* tan(x) Return tangent, x in radians, by identity */ + +static double tan(double x) +{ + return sin(x) / cos(x); +} + +/* sqrt(x) Return square root. Initial guess, then Newton- + Raphson refinement */ + +double sqrt(double x) +{ + double c, cl, y; + int n; + + if (x == 0.0) + return 0.0; + + if (x < 0.0) { + fprintf(stderr, + "\nGood work! You tried to take the square root of %g", + x); + fprintf(stderr, + "\nunfortunately, that is too complex for me to handle.\n"); + exit(1); + } + + y = (0.154116 + 1.893872 * x) / (1.0 + 1.047988 * x); + + c = (y - x / y) / 2.0; + cl = 0.0; + for (n = 50; c != cl && n--;) { + y = y - c; + cl = c; + c = (y - x / y) / 2.0; + } + return y; +} + +/* atan(x) Return arctangent in radians, + range -pi/2 to pi/2 */ + +static double atan(double x) +{ + int sign, l, y; + double a, b, z; + + x = (((sign = (x < 0.0)) != 0) ? -x : x); + l = 0; + + if (x >= 4.0) { + l = -1; + x = 1.0 / x; + y = 0; + goto atl; + } else { + if (x < 0.25) { + y = 0; + goto atl; + } + } + + y = aint(x / 0.5); + z = y * 0.5; + x = (x - z) / (x * z + 1); + +atl: + z = x * x; + b = ((((893025.0 * z + 49116375.0) * z + 425675250.0) * z + + 1277025750.0) * z + 1550674125.0) * z + 654729075.0; + a = (((13852575.0 * z + 216602100.0) * z + 891080190.0) * z + + 1332431100.0) * z + 654729075.0; + a = (a / b) * x + atanc[y]; + if (l) + a=piover2 - a; + return sign ? -a : a; +} + +/* atan2(y,x) Return arctangent in radians of y/x, + range -pi to pi */ + +static double atan2(double y, double x) +{ + double temp; + + if (x == 0.0) { + if (y == 0.0) /* Special case: atan2(0,0) = 0 */ + return 0.0; + else if (y > 0) + return piover2; + else + return -piover2; + } + temp = atan(y / x); + if (x < 0.0) { + if (y >= 0.0) + temp += pic; + else + temp -= pic; + } + return temp; +} + +/* asin(x) Return arcsine in radians of x */ + +static double asin(double x) +{ + if (fabs(x)>1.0) { + fprintf(stderr, + "\nInverse trig functions lose much of their gloss when"); + fprintf(stderr, + "\ntheir arguments are greater than 1, such as the"); + fprintf(stderr, + "\nvalue %g you passed.\n", x); + exit(1); + } + return atan2(x, sqrt(1 - x * x)); +} +#endif + +/* Calculate passage through surface + + If the variable PARAXIAL is true, the trace through the + surface will be done using the paraxial approximations. + Otherwise, the normal trigonometric trace will be done. + + This routine takes the following inputs: + + RADIUS_OF_CURVATURE Radius of curvature of surface + being crossed. If 0, surface is + plane. + + OBJECT_DISTANCE Distance of object focus from + lens vertex. If 0, incoming + rays are parallel and + the following must be specified: + + RAY_HEIGHT Height of ray from axis. Only + relevant if OBJECT.DISTANCE == 0 + + AXIS_SLOPE_ANGLE Angle incoming ray makes with axis + at intercept + + FROM_INDEX Refractive index of medium being left + + TO_INDEX Refractive index of medium being + entered. + + The outputs are the following variables: + + OBJECT_DISTANCE Distance from vertex to object focus + after refraction. + + AXIS_SLOPE_ANGLE Angle incoming ray makes with axis + at intercept after refraction. + +*/ + +static void transit_surface(void) { + double iang, /* Incidence angle */ + rang, /* Refraction angle */ + iang_sin, /* Incidence angle sin */ + rang_sin, /* Refraction angle sin */ + old_axis_slope_angle, sagitta; + + if (paraxial) { + if (radius_of_curvature != 0.0) { + if (object_distance == 0.0) { + axis_slope_angle = 0.0; + iang_sin = ray_height / radius_of_curvature; + } else + iang_sin = ((object_distance - + radius_of_curvature) / radius_of_curvature) * + axis_slope_angle; + + rang_sin = (from_index / to_index) * + iang_sin; + old_axis_slope_angle = axis_slope_angle; + axis_slope_angle = axis_slope_angle + + iang_sin - rang_sin; + if (object_distance != 0.0) + ray_height = object_distance * old_axis_slope_angle; + object_distance = ray_height / axis_slope_angle; + return; + } + object_distance = object_distance * (to_index / from_index); + axis_slope_angle = axis_slope_angle * (from_index / to_index); + return; + } + + if (radius_of_curvature != 0.0) { + if (object_distance == 0.0) { + axis_slope_angle = 0.0; + iang_sin = ray_height / radius_of_curvature; + } else { + iang_sin = ((object_distance - + radius_of_curvature) / radius_of_curvature) * + sin(axis_slope_angle); + } + iang = asin(iang_sin); + rang_sin = (from_index / to_index) * + iang_sin; + old_axis_slope_angle = axis_slope_angle; + axis_slope_angle = axis_slope_angle + + iang - asin(rang_sin); + sagitta = sin((old_axis_slope_angle + iang) / 2.0); + sagitta = 2.0 * radius_of_curvature*sagitta*sagitta; + object_distance = ((radius_of_curvature * sin( + old_axis_slope_angle + iang)) * + cot(axis_slope_angle)) + sagitta; + return; + } + + rang = -asin((from_index / to_index) * + sin(axis_slope_angle)); + object_distance = object_distance * ((to_index * + cos(-rang)) / (from_index * + cos(axis_slope_angle))); + axis_slope_angle = -rang; +} + +/* Perform ray trace in specific spectral line */ + +static void trace_line(int line, double ray_h) +{ + int i; + + object_distance = 0.0; + ray_height = ray_h; + from_index = 1.0; + + for (i = 1; i <= current_surfaces; i++) { + radius_of_curvature = s[i][1]; + to_index = s[i][2]; + if (to_index > 1.0) + to_index = to_index + ((spectral_line[4] - + spectral_line[line]) / + (spectral_line[3] - spectral_line[6])) * ((s[i][2] - 1.0) / + s[i][3]); + transit_surface(); + from_index = to_index; + if (i < current_surfaces) + object_distance = object_distance - s[i][4]; + } +} + +/* Initialise when called the first time */ + +int main(int argc, char *argv[]) +{ + int i, j, k, errors; + double od_fline, od_cline; +#ifdef ACCURACY + long passes; +#endif + struct timeval begin_tv, end_tv, diff_tv; + + spectral_line[1] = 7621.0; /* A */ + spectral_line[2] = 6869.955; /* B */ + spectral_line[3] = 6562.816; /* C */ + spectral_line[4] = 5895.944; /* D */ + spectral_line[5] = 5269.557; /* E */ + spectral_line[6] = 4861.344; /* F */ + spectral_line[7] = 4340.477; /* G'*/ + spectral_line[8] = 3968.494; /* H */ + + /* Process the number of iterations argument, if one is supplied. */ + + if (argc > 1) { + niter = atoi(argv[1]); + if (*argv[1] == '-' || niter < 1) { + printf("This is John Walker's floating point accuracy and\n"); + printf("performance benchmark program. You call it with\n"); + printf("\n%s <itercount>\n\n", argv[0]); + printf("where <itercount> is the number of iterations\n"); + printf("to be executed. Archival timings should be made\n"); + printf("with the iteration count set so that roughly five\n"); + printf("minutes of execution is timed.\n"); + exit(0); + } + } + + /* Load test case into working array */ + + clear_aperture = 4.0; + current_surfaces = 4; + for (i = 0; i < current_surfaces; i++) + for (j = 0; j < 4; j++) + s[i + 1][j + 1] = testcase[i][j]; + +#ifdef ACCURACY + printf("Beginning execution of floating point accuracy test...\n"); + passes = 0; +#else +/* + printf("Ready to begin John Walker's floating point accuracy\n"); + printf("and performance benchmark. %d iterations will be made.\n\n", niter); + + printf("\nMeasured run time in seconds should be divided by %.f\n", niter / 1000.0); + printf("to normalise for reporting results. For archival results,\n"); + printf("adjust iteration count so the benchmark runs about five minutes.\n\n"); +*/ + /* now we start. Get the current time */ + gettimeofday(&begin_tv, NULL); +#endif + + /* Perform ray trace the specified number of times. */ + +#ifdef ACCURACY + while (TRUE) { + passes++; + if ((passes % 100L) == 0) { + printf("Pass %ld.\n", passes); + } +#else + for (itercount = 0; itercount < niter; itercount++) { +#endif + + for (paraxial = 0; paraxial <= 1; paraxial++) { + + /* Do main trace in D light */ + + trace_line(4, clear_aperture / 2.0); + od_sa[paraxial][0] = object_distance; + od_sa[paraxial][1] = axis_slope_angle; + } + paraxial = FALSE; + + /* Trace marginal ray in C */ + + trace_line(3, clear_aperture / 2.0); + od_cline = object_distance; + + /* Trace marginal ray in F */ + + trace_line(6, clear_aperture / 2.0); + od_fline = object_distance; + + aberr_lspher = od_sa[1][0] - od_sa[0][0]; + aberr_osc = 1.0 - (od_sa[1][0] * od_sa[1][1]) / + (sin(od_sa[0][1]) * od_sa[0][0]); + aberr_lchrom = od_fline - od_cline; + max_lspher = sin(od_sa[0][1]); + + /* D light */ + + max_lspher = 0.0000926 / (max_lspher * max_lspher); + max_osc = 0.0025; + max_lchrom = max_lspher; +#ifndef ACCURACY + } + + gettimeofday(&end_tv, NULL); +#endif + + /* Now evaluate the accuracy of the results from the last ray trace */ + + sprintf(outarr[0], "%15s %21.11f %14.11f", + "Marginal ray", od_sa[0][0], od_sa[0][1]); + sprintf(outarr[1], "%15s %21.11f %14.11f", + "Paraxial ray", od_sa[1][0], od_sa[1][1]); + sprintf(outarr[2], + "Longitudinal spherical aberration: %16.11f", + aberr_lspher); + sprintf(outarr[3], + " (Maximum permissible): %16.11f", + max_lspher); + sprintf(outarr[4], + "Offense against sine condition (coma): %16.11f", + aberr_osc); + sprintf(outarr[5], + " (Maximum permissible): %16.11f", + max_osc); + sprintf(outarr[6], + "Axial chromatic aberration: %16.11f", + aberr_lchrom); + sprintf(outarr[7], + " (Maximum permissible): %16.11f", + max_lchrom); + + /* Now compare the edited results with the master values from + reference executions of this program. */ + + errors = 0; + for (i = 0; i < 8; i++) { + if (strcmp(outarr[i], refarr[i]) != 0) { +#ifdef ACCURACY + printf("\nError in pass %ld for results on line %d...\n", + passes, i + 1); +#else + printf("\nError in results on line %d...\n", i + 1); +#endif + printf("Expected: \"%s\"\n", refarr[i]); + printf("Received: \"%s\"\n", outarr[i]); + printf("(Errors) "); + k = strlen(refarr[i]); + for (j = 0; j < k; j++) { + printf("%c", refarr[i][j] == outarr[i][j] ? ' ' : '^'); + if (refarr[i][j] != outarr[i][j]) + errors++; + } + printf("\n"); + } + } +#ifdef ACCURACY + } +#else + if (errors > 0) { + fprintf(stderr, "\n%d error%s in results. This is VERY SERIOUS.\n", + errors, errors > 1 ? "s" : ""); + return 1; + } +#endif + timersub(&end_tv, &begin_tv, &diff_tv); + + printf("Time(s): %u.%u\n", diff_tv.tv_sec, diff_tv.tv_usec); + + return 0; +} |