summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJuergen Beisert <j.beisert@pengutronix.de>2009-04-09 14:03:17 +0000
committerJuergen Beisert <j.beisert@pengutronix.de>2009-04-09 14:03:17 +0000
commit433266df9bd33ab25444d5dad15613625232263a (patch)
tree39ec3dd59f64d83bd7a0b8056fbda8717d61c745
downloadfloatings-433266df9bd33ab25444d5dad15613625232263a.tar.gz
floatings-433266df9bd33ab25444d5dad15613625232263a.tar.xz
add simple floating point benchmarks
-rw-r--r--GNUmakefile.am16
-rwxr-xr-xautogen.sh22
-rw-r--r--configure.ac62
-rw-r--r--src/GNUmakefile.am17
-rw-r--r--src/ff_bench.c408
-rw-r--r--src/float_bench.c843
6 files changed, 1368 insertions, 0 deletions
diff --git a/GNUmakefile.am b/GNUmakefile.am
new file mode 100644
index 0000000..beeb261
--- /dev/null
+++ b/GNUmakefile.am
@@ -0,0 +1,16 @@
+SUBDIRS = \
+ src
+
+EXTRA_DIST = \
+ autogen.sh
+
+MAINTAINERCLEANFILES = \
+ configure \
+ config.h.in \
+ GNUmakefile.in \
+ aclocal.m4 \
+ $(DIST_ARCHIVES)
+
+maintainer-clean-local:
+ -chmod -R a+rw $(distdir)
+ -rm -fr $(distdir)
diff --git a/autogen.sh b/autogen.sh
new file mode 100755
index 0000000..29db9c9
--- /dev/null
+++ b/autogen.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+#
+# usage:
+#
+# banner <target name>
+#
+banner() {
+ echo
+ TG=`echo $1 | sed -e "s,/.*/,,g"`
+ LINE=`echo $TG |sed -e "s/./-/g"`
+ echo $LINE
+ echo $TG
+ echo $LINE
+ echo
+}
+
+banner "autoreconf"
+
+autoreconf --force --install --symlink -Wall || exit $?
+
+banner "Finished"
diff --git a/configure.ac b/configure.ac
new file mode 100644
index 0000000..ace6279
--- /dev/null
+++ b/configure.ac
@@ -0,0 +1,62 @@
+# -*- Autoconf -*-
+# Process this file with autoconf to produce a configure script.
+AC_PREREQ(2.59)
+
+AC_INIT([floatings], [1.0.0], [bugs@pengutronix.de])
+AC_CONFIG_HEADERS([config.h])
+AC_CONFIG_SRCDIR([src/float_bench.c])
+AC_CANONICAL_BUILD
+AC_CANONICAL_HOST
+
+AM_MAINTAINER_MODE
+
+CFLAGS="${CFLAGS} -Wall"
+
+#
+# Checks for programs.
+#
+AC_PROG_CC
+AC_PROG_INSTALL
+
+AM_INIT_AUTOMAKE([foreign no-exeext dist-bzip2])
+
+#
+# Checks for header files.
+#
+AC_CHECK_HEADERS([ \
+ stdlib.h \
+ string.h \
+ ])
+
+#
+# Checks for library functions.
+#
+AC_FUNC_MALLOC
+AC_CHECK_FUNCS([memset sqrt])
+
+
+#
+# Debugging
+#
+AC_MSG_CHECKING([whether to enable debugging])
+AC_ARG_ENABLE(debug,
+ AS_HELP_STRING([--enable-debug], [enable debugging @<:@default=no@:>@]),
+ [case "$enableval" in
+ y | yes) CONFIG_DEBUG=yes ;;
+ *) CONFIG_DEBUG=no ;;
+ esac],
+ [CONFIG_DEBUG=no])
+AC_MSG_RESULT([${CONFIG_DEBUG}])
+if test "${CONFIG_DEBUG}" = "yes"; then
+ CFLAGS="${CFLAGS} -Werror -Wsign-compare -Wfloat-equal -Wformat-security -g -O1"
+ AC_DEFINE(DEBUG, 1, [debugging])
+else
+ CFLAGS="${CFLAGS} -O3"
+fi
+
+AC_CONFIG_FILES([
+ GNUmakefile
+ src/GNUmakefile
+ ])
+AC_OUTPUT
+
diff --git a/src/GNUmakefile.am b/src/GNUmakefile.am
new file mode 100644
index 0000000..83a1a7d
--- /dev/null
+++ b/src/GNUmakefile.am
@@ -0,0 +1,17 @@
+bin_PROGRAMS = \
+ float_bench \
+ ff_bench
+
+AM_CPPFLAGS = \
+ -I$(top_builddir)
+
+float_bench_SOURCES = \
+ float_bench.c
+float_bench_LDADD = -lm
+
+ff_bench_SOURCES = \
+ ff_bench.c
+ff_bench_LDADD = -lm
+
+MAINTAINERCLEANFILES = \
+ GNUmakefile.in
diff --git a/src/ff_bench.c b/src/ff_bench.c
new file mode 100644
index 0000000..e2b60d5
--- /dev/null
+++ b/src/ff_bench.c
@@ -0,0 +1,408 @@
+/*
+
+ Two-dimensional FFT benchmark
+
+ Designed and implemented by John Walker in April of 1989.
+
+ This benchmark executes a specified number of passes (default
+ 20) through a loop in which each iteration performs a fast
+ Fourier transform of a square matrix (default size 256x256) of
+ complex numbers (default precision double), followed by the
+ inverse transform. After all loop iterations are performed
+ the results are checked against known correct values.
+
+ This benchmark is intended for use on C implementations which
+ define "int" as 32 bits or longer and permit allocation and
+ direct addressing of arrays larger than one megabyte.
+
+ If CAPOUT is defined, the result after all iterations is
+ written as a CA Lab pattern file. This is intended for
+ debugging in case horribly wrong results are obtained on a
+ given machine.
+
+ Archival timings are run with the definitions below set as
+ follows: Float = double, Asize = 256, Passes = 20, CAPOUT not
+ defined.
+
+ Time (seconds) System
+
+ 2393.93 Sun 3/260, SunOS 3.4, C, "-f68881 -O".
+ (John Walker).
+
+ 1928 Macintosh IIx, MPW C 3.0, "-mc68020
+ -mc68881 -elems881 -m". (Hugh Hoover).
+
+ 1636.1 Sun 4/110, "cc -O3 -lm". (Michael McClary).
+ The suspicion is that this is software
+ floating point.
+
+ 1556.7 Macintosh II, A/UX, "cc -O -lm"
+ (Michael McClary).
+
+ 1388.8 Sun 386i/250, SunOS 4.0.1 C
+ "-O /usr/lib/trig.il". (James Carrington).
+
+ 1331.93 Sun 3/60, SunOS 4.0.1, C,
+ "-O4 -f68881 /usr/lib/libm.il"
+ (Bob Elman).
+
+ 1204.0 Apollo Domain DN4000, C, "-cpu 3000 -opt 4".
+ (Sam Crupi).
+
+ 1174.66 Compaq 386/25, SCO Xenix 386 C.
+ (Peter Shieh).
+
+ 1068 Compaq 386/25, SCO Xenix 386,
+ Metaware High C. (Robert Wenig).
+
+ 1064.0 Sun 3/80, SunOS 4.0.3 Beta C
+ "-O3 -f68881 /usr/lib/libm.il". (James Carrington).
+
+ 1061.4 Compaq 386/25, SCO Xenix, High C 1.4.
+ (James Carrington).
+
+ 1059.79 Compaq 386/25, 387/25, High C 1.4,
+ DOS|Extender 2.2, 387 inline code
+ generation. (Nathan Bender).
+
+ 777.14 Compaq 386/25, IIT 3C87-25 (387 Compatible),
+ High C 1.5, DOS|Extender 2.2, 387 inline
+ code generation. (Nathan Bender).
+
+ 751 Compaq DeskPro 386/33, High C 1.5 + DOS|Extender,
+ 387 code generation. (James Carrington).
+
+ 431.44 Compaq 386/25, Weitek 3167-25, DOS 3.31,
+ High C 1.4, DOS|Extender, Weitek code generation.
+ (Nathan Bender).
+
+ 344.9 Compaq 486/25, Metaware High C 1.6, Phar Lap
+ DOS|Extender, in-line floating point. (Nathan
+ Bender).
+
+ 324.2 Data General Motorola 88000, 16 Mhz, Gnu C.
+
+ 323.1 Sun 4/280, C, "-O4". (Eric Hill).
+
+ 254 Compaq SystemPro 486/33, High C 1.5 + DOS|Extender,
+ 387 code generation. (James Carrington).
+
+ 242.8 Silicon Graphics Personal IRIS, MIPS R2000A,
+ 12.5 Mhz, "-O3" (highest level optimisation).
+ (Mike Zentner).
+
+ 233.0 Sun SPARCStation 1, C, "-O4", SunOS 4.0.3.
+ (Nathan Bender).
+
+ 187.30 DEC PMAX 3100, MIPS 2000 chip.
+ (Robert Wenig).
+
+ 120.46 Sun SparcStation 2, C, "-O4", SunOS 4.1.1.
+ (John Walker).
+
+ 120.21 DEC 3MAX, MIPS 3000, "-O4".
+
+ 98.0 Intel i860 experimental environment,
+ OS/2, data caching disabled. (Kern
+ Sibbald).
+
+ 34.9 Silicon Graphics Indigo², MIPS R4400,
+ 175 Mhz, IRIX 5.2, "-O".
+
+ 32.4 Pentium 133, Windows NT, Microsoft Visual
+ C++ 4.0.
+
+ 17.25 Silicon Graphics Indigo², MIPS R4400,
+ 175 Mhz, IRIX 6.5, "-O3".
+
+ 14.10 Dell Dimension XPS R100, Pentium II 400 MHz,
+ Windows 98, Microsoft Visual C 5.0.
+
+ 10.7 Hewlett-Packard Kayak XU 450Mhz Pentium II,
+ Microsoft Visual C++ 6.0, Windows NT 4.0sp3. (Nathan Bender).
+
+ 5.09 Sun Ultra 2, UltraSPARC V9, 300 MHz, gcc -O3.
+
+ 0.846 Dell Inspiron 9100, Pentium 4, 3.4 GHz, gcc -O3.
+
+*/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <math.h>
+
+/* The program may be run with Float defined as either float or
+ double. With IEEE arithmetic, the same answers are generated for
+ either floating point mode. */
+
+#define Float double /* Floating point type used in FFT */
+
+#define Asize 256 /* Array edge size */
+#define Passes 20 /* Number of FFT/Inverse passes */
+
+#define max(a,b) ((a)>(b)?(a):(b))
+#define min(a,b) ((a)<=(b)?(a):(b))
+
+#define FWMODE "w"
+
+/*
+
+ Multi-dimensional fast Fourier transform
+
+ Adapted from Press et al., "Numerical Recipes in C".
+
+*/
+
+#define SWAP(a,b) tempr=(a); (a)=(b); (b)=tempr
+
+static void fourn(Float data[], int nn[], int ndim, int isign)
+{
+ int i1, i2, i3;
+ int i2rev, i3rev, ip1, ip2, ip3, ifp1, ifp2;
+ int ibit, idim, k1, k2, n, nprev, nrem, ntot;
+ Float tempi, tempr;
+ double theta, wi, wpi, wpr, wr, wtemp;
+
+ ntot = 1;
+ for (idim = 1; idim <= ndim; idim++)
+ ntot *= nn[idim];
+ nprev = 1;
+ for (idim = ndim; idim >= 1; idim--) {
+ n = nn[idim];
+ nrem = ntot / (n * nprev);
+ ip1 = nprev << 1;
+ ip2 = ip1 * n;
+ ip3 = ip2 * nrem;
+ i2rev = 1;
+ for (i2 = 1; i2 <= ip2; i2 += ip1) {
+ if (i2 < i2rev) {
+ for (i1 = i2; i1 <= i2 + ip1 - 2; i1 += 2) {
+ for (i3 = i1; i3 <= ip3; i3 += ip2) {
+ i3rev = i2rev + i3 - i2;
+ SWAP(data[i3], data[i3rev]);
+ SWAP(data[i3 + 1], data[i3rev + 1]);
+ }
+ }
+ }
+ ibit = ip2 >> 1;
+ while (ibit >= ip1 && i2rev > ibit) {
+ i2rev -= ibit;
+ ibit >>= 1;
+ }
+ i2rev += ibit;
+ }
+ ifp1 = ip1;
+ while (ifp1 < ip2) {
+ ifp2 = ifp1 << 1;
+ theta = isign * 6.28318530717959 / (ifp2 / ip1);
+ wtemp = sin(0.5 * theta);
+ wpr = -2.0 * wtemp * wtemp;
+ wpi = sin(theta);
+ wr = 1.0;
+ wi = 0.0;
+ for (i3 = 1; i3 <= ifp1; i3 += ip1) {
+ for (i1 = i3; i1 <= i3 + ip1 - 2; i1 += 2) {
+ for (i2 = i1; i2 <= ip3; i2 += ifp2) {
+ k1 = i2;
+ k2 = k1 + ifp1;
+ tempr = wr * data[k2] - wi * data[k2 + 1];
+ tempi = wr * data[k2 + 1] + wi * data[k2];
+ data[k2] = data[k1] - tempr;
+ data[k2 + 1] = data[k1 + 1] - tempi;
+ data[k1] += tempr;
+ data[k1 + 1] += tempi;
+ }
+ }
+ wr = (wtemp = wr) * wpr - wi * wpi + wr;
+ wi = wi * wpr + wtemp * wpi + wi;
+ }
+ ifp1 = ifp2;
+ }
+ nprev *= n;
+ }
+}
+#undef SWAP
+
+static void usage(const char *prg, FILE *out)
+{
+ fprintf(out, "usage: %s [options]\n", prg);
+ fprintf(out, " options are:\n");
+ fprintf(out, " '-d value' calculate a dimension of (value * value). Default is (256 * 256)\n");
+ fprintf(out, " '-p value' passes. Default is 20\n");
+ fprintf(out, " '-v' print program version and exit\n");
+ fprintf(out, " '-h' print this help and exit\n");
+}
+
+int main(int argc, char *argv[])
+{
+ int i, j, k, l, m, npasses = Passes, faedge = Asize, c, ret;
+ Float *fdata;
+ static int nsize[] = {0, 0, 0};
+ long fanum, fasize;
+ double mapbase, mapscale, rmin, rmax, imin, imax;
+ struct timeval begin_tv, end_tv, diff_tv;
+
+ /* handle command line options first */
+ while (1) {
+ c = getopt(argc, argv, "d:p:hv");
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'd':
+ faedge = atoi(optarg); /* FFT array edge size */
+ break;
+ case 'p':
+ npasses = atoi(optarg);
+ break;
+ case 'h':
+ usage(argv[0], stdout);
+ exit(0);
+ case 'v':
+ printf("%s %s\n", PACKAGE_NAME, PACKAGE_VERSION);
+ exit(0);
+ }
+ }
+
+ /* now we start. Get the current time */
+ gettimeofday(&begin_tv, NULL);
+
+ fanum = faedge * faedge; /* Elements in FFT array */
+ fasize = ((fanum + 1) * 2 * sizeof(Float)); /* FFT array size */
+ nsize[1] = nsize[2] = faedge;
+
+ fdata = (Float *) malloc(fasize);
+ if (fdata == NULL) {
+ fprintf(stderr, "Can't allocate data array.\n");
+ exit(1);
+ }
+
+ /* Generate data array to process. */
+
+#define Re(x,y) fdata[1 + (faedge * (x) + (y)) * 2]
+#define Im(x,y) fdata[2 + (faedge * (x) + (y)) * 2]
+
+ memset(fdata, 0, fasize);
+ for (i = 0; i < faedge; i++) {
+ for (j = 0; j < faedge; j++) {
+ if (((i & 15) == 8) || ((j & 15) == 8))
+ Re(i, j) = 128.0;
+ }
+ }
+
+ for (i = 0; i < npasses; i++) {
+ /* Transform image to frequency domain. */
+ fourn(fdata, nsize, 2, 1);
+
+ /* Back-transform to image. */
+ fourn(fdata, nsize, 2, -1);
+ }
+
+ {
+ double r, ij, ar, ai;
+ rmin = 1e10; rmax = -1e10;
+ imin = 1e10; imax = -1e10;
+ ar = 0;
+ ai = 0;
+
+ for (i = 1; i <= fanum; i += 2) {
+ r = fdata[i];
+ ij = fdata[i + 1];
+ ar += r;
+ ai += ij;
+ rmin = min(r, rmin);
+ rmax = max(r, rmax);
+ imin = min(ij, imin);
+ imax = max(ij, imax);
+ }
+#ifdef DEBUG
+ printf("Real min %.4g, max %.4g. Imaginary min %.4g, max %.4g.\n",
+ rmin, rmax, imin, imax);
+ printf("Average real %.4g, imaginary %.4g.\n",
+ ar / fanum, ai / fanum);
+#endif
+ mapbase = rmin;
+ mapscale = 255 / (rmax - rmin);
+ }
+
+ /* See if we got the right answers. */
+
+ m = 0;
+ for (i = 0; i < faedge; i++) {
+ for (j = 0; j < faedge; j++) {
+ k = (Re(i, j) - mapbase) * mapscale;
+ l = (((i & 15) == 8) || ((j & 15) == 8)) ? 255 : 0;
+ if (k != l) {
+ m++;
+ fprintf(stderr,
+ "Wrong answer at (%d,%d)! Expected %d, got %d.\n",
+ i, j, l, k);
+ }
+ }
+ }
+ if (m == 0) {
+ fprintf(stderr, "%d passes. No errors in results.\n", npasses);
+ ret = 0;
+ } else {
+ fprintf(stderr, "%d passes. %d errors in results.\n",
+ npasses, m);
+ ret = 1;
+ }
+
+#ifdef CAPOUT
+
+ /* Output the result of the transform as a CA Lab pattern
+ file for debugging. */
+
+ {
+#define SCRX 322
+#define SCRY 200
+#define SCRN (SCRX * SCRY)
+ unsigned char patarr[SCRY][SCRX];
+ FILE *fp;
+
+/* Map user external state numbers to internal state index */
+
+#define UtoI(x) (((((x) >> 1) & 0x7F) | ((x) << 7)) & 0xFF)
+
+ /* Copy data from FFT buffer to map. */
+
+ memset(patarr, 0, sizeof patarr);
+ l = (SCRX - faedge) / 2;
+ m = (faedge > SCRY) ? 0 : ((SCRY - faedge) / 2);
+ for (i = 1; i < faedge; i++) {
+ for (j = 0; j < min(SCRY, faedge); j++) {
+ k = (Re(i, j) - mapbase) * mapscale;
+ patarr[j + m][i + l] = UtoI(k);
+ }
+ }
+
+ /* Dump pattern map to file. */
+
+ fp = fopen("fft.cap", "w");
+ if (fp == NULL) {
+ fprintf(stderr, "Cannot open output file.\n");
+ exit(0);
+ }
+ putc(':', fp);
+ putc(1, fp);
+ fwrite(patarr, SCRN, 1, fp);
+ putc(6, fp);
+ fclose(fp);
+ }
+#endif
+
+ gettimeofday(&end_tv, NULL);
+ timersub(&end_tv, &begin_tv, &diff_tv);
+
+ printf("Time(s): %u.%u\n", diff_tv.tv_sec, diff_tv.tv_usec);
+
+ return ret;
+}
diff --git a/src/float_bench.c b/src/float_bench.c
new file mode 100644
index 0000000..b8c4f71
--- /dev/null
+++ b/src/float_bench.c
@@ -0,0 +1,843 @@
+/*
+
+ John Walker's Floating Point Benchmark, derived from...
+
+ Marinchip Interactive Lens Design System
+
+ John Walker December 1980
+
+ By John Walker
+ http://www.fourmilab.ch/
+
+ This program may be used, distributed, and modified freely as
+ long as the origin information is preserved.
+
+ This is a complete optical design raytracing algorithm,
+ stripped of its user interface and recast into portable C. It
+ not only determines execution speed on an extremely floating
+ point (including trig function) intensive real-world
+ application, it checks accuracy on an algorithm that is
+ exquisitely sensitive to errors. The performance of this
+ program is typically far more sensitive to changes in the
+ efficiency of the trigonometric library routines than the
+ average floating point program.
+
+ The benchmark may be compiled in two modes. If the symbol
+ INTRIG is defined, built-in trigonometric and square root
+ routines will be used for all calculations. Timings made with
+ INTRIG defined reflect the machine's basic floating point
+ performance for the arithmetic operators. If INTRIG is not
+ defined, the system library <math.h> functions are used.
+ Results with INTRIG not defined reflect the system's library
+ performance and/or floating point hardware support for trig
+ functions and square root. Results with INTRIG defined are a
+ good guide to general floating point performance, while
+ results with INTRIG undefined indicate the performance of an
+ application which is math function intensive.
+
+ Special note regarding errors in accuracy: this program has
+ generated numbers identical to the last digit it formats and
+ checks on the following machines, floating point
+ architectures, and languages:
+
+ Marinchip 9900 QBASIC IBM 370 double-precision (REAL * 8) format
+
+ IBM PC / XT / AT Lattice C IEEE 64 bit, 80 bit temporaries
+ High C same, in line 80x87 code
+ BASICA "Double precision"
+ Quick BASIC IEEE double precision, software routines
+
+ Sun 3 C IEEE 64 bit, 80 bit temporaries,
+ in-line 68881 code, in-line FPA code.
+
+ MicroVAX II C Vax "G" format floating point
+
+ Macintosh Plus MPW C SANE floating point, IEEE 64 bit format
+ implemented in ROM.
+
+ Inaccuracies reported by this program should be taken VERY
+ SERIOUSLY INDEED, as the program has been demonstrated to be
+ invariant under changes in floating point format, as long as
+ the format is a recognised double precision format. If you
+ encounter errors, please remember that they are just as likely
+ to be in the floating point editing library or the
+ trigonometric libraries as in the low level operator code.
+
+ The benchmark assumes that results are basically reliable, and
+ only tests the last result computed against the reference. If
+ you're running on a suspect system you can compile this
+ program with ACCURACY defined. This will generate a version
+ which executes as an infinite loop, performing the ray trace
+ and checking the results on every pass. All incorrect results
+ will be reported.
+
+ Representative timings are given below. All have been
+ normalised as if run for 1000 iterations.
+
+ Time in seconds Computer, Compiler, and notes
+ Normal INTRIG
+
+ 3466.00 4031.00 Commodore 128, 2 Mhz 8510 with software floating
+ point. Abacus Software/Data-Becker Super-C 128,
+ version 3.00, run in fast (2 Mhz) mode. Note:
+ the results generated by this system differed
+ from the reference results in the 8th to 10th
+ decimal place.
+
+ 3290.00 IBM PC/AT 6 Mhz, Microsoft/IBM BASICA version A3.00.
+ Run with the "/d" switch, software floating point.
+
+ 2131.50 IBM PC/AT 6 Mhz, Lattice C version 2.14, small model.
+ This version of Lattice compiles subroutine
+ calls which either do software floating point
+ or use the 80x87. The machine on which I ran
+ this had an 80287, but the results were so bad
+ I wonder if it was being used.
+
+ 1598.00 Macintosh Plus, MPW C, SANE Software floating point.
+
+ 1582.13 Marinchip 9900 2 Mhz, QBASIC compiler with software
+ floating point. This was a QBASIC version of the
+ program which contained the identical algorithm.
+
+ 404.00 IBM PC/AT 6 Mhz, Microsoft QuickBASIC version 2.0.
+ Software floating point.
+
+ 165.15 IBM PC/AT 6 Mhz, Metaware High C version 1.3, small
+ model. This was compiled to call subroutines for
+ floating point, and the machine contained an 80287
+ which was used by the subroutines.
+
+ 143.20 Macintosh II, MPW C, SANE calls. I was unable to
+ determine whether SANE was using the 68881 chip or
+ not.
+
+ 121.80 Sun 3/160 16 Mhz, Sun C. Compiled with -fsoft switch
+ which executes floating point in software.
+
+ 78.78 110.11 IBM RT PC (Model 6150). IBM AIX 1.0 C compiler
+ with -O switch.
+
+ 75.2 254.0 Microsoft Quick C 1.0, in-line 8087 instructions,
+ compiled with 80286 optimisation on. (Switches
+ were -Ol -FPi87-G2 -AS). Small memory model.
+
+ 69.50 IBM PC/AT 6Mhz, Borland Turbo BASIC 1.0. Compiled
+ in "8087 required" mode to generate in-line
+ code for the math coprocessor.
+
+ 66.96 IBM PC/AT 6Mhz, Microsoft QuickBASIC 4.0. This
+ release of QuickBASIC compiles code for the
+ 80287 math coprocessor.
+
+ 66.36 206.35 IBM PC/AT 6Mhz, Metaware High C version 1.3, small
+ model. This was compiled with in-line code for the
+ 80287 math coprocessor. Trig functions still call
+ library routines.
+
+ 63.07 220.43 IBM PC/AT, 6Mhz, Borland Turbo C, in-line 8087 code,
+ small model, word alignment, no stack checking,
+ 8086 code mode.
+
+ 17.18 Apollo DN-3000, 12 Mhz 68020 with 68881, compiled
+ with in-line code for the 68881 coprocessor.
+ According to Apollo, the library routines are chosen
+ at runtime based on coprocessor presence. Since the
+ coprocessor was present, the library is supposed to
+ use in-line floating point code.
+
+ 15.55 27.56 VAXstation II GPX. Compiled and executed under
+ VAX/VMS C.
+
+ 15.14 37.93 Macintosh II, Unix system V. Green Hills 68020
+ Unix compiler with in-line code for the 68881
+ coprocessor (-O -ZI switches).
+
+ 12.69 Sun 3/160 16 Mhz, Sun C. Compiled with -fswitch,
+ which calls a subroutine to select the fastest
+ floating point processor. This was using the 68881.
+
+ 11.74 26.73 Compaq Deskpro 386, 16 Mhz 80386 with 16 Mhz 80387.
+ Metaware High C version 1.3, compiled with in-line
+ for the math coprocessor (but not optimised for the
+ 80386/80387). Trig functions still call library
+ routines.
+
+ 8.43 30.49 Sun 3/160 16 Mhz, Sun C. Compiled with -f68881,
+ generating in-line MC68881 instructions. Trig
+ functions still call library routines.
+
+ 6.29 25.17 Sun 3/260 25 Mhz, Sun C. Compiled with -f68881,
+ generating in-line MC68881 instructions. Trig
+ functions still call library routines.
+
+ 4.57 Sun 3/260 25 Mhz, Sun FORTRAN 77. Compiled with
+ -O -f68881, generating in-line MC68881 instructions.
+ Trig functions are compiled in-line. This used
+ the FORTRAN 77 version of the program, FBFORT77.F.
+
+ 4.00 14.20 Sun386i/25 Mhz model 250, Sun C compiler.
+
+ 4.00 14.00 Sun386i/25 Mhz model 250, Metaware C.
+
+ 3.10 12.00 Compaq 386/387 25 Mhz running SCO Xenix 2.
+ Compiled with Metaware HighC 386, optimized
+ for 386.
+
+ 3.00 12.00 Compaq 386/387 25MHZ optimized for 386/387.
+
+ 2.96 5.17 Sun 4/260, Sparc RISC processor. Sun C,
+ compiled with the -O2 switch for global
+ optimisation.
+
+ 2.47 COMPAQ 486/25, secondary cache disabled, High C,
+ 486/387, inline f.p., small memory model.
+
+ 2.20 3.40 Data General Motorola 88000, 16 Mhz, Gnu C.
+
+ 1.56 COMPAQ 486/25, 128K secondary cache, High C, 486/387,
+ inline f.p., small memory model.
+
+ 0.66 1.50 DEC Pmax, Mips processor.
+
+ 0.63 0.91 Sun SparcStation 2, Sun C (SunOS 4.1.1) with
+ -O4 optimisation and "/usr/lib/libm.il" inline
+ floating point.
+
+ 0.60 1.07 Intel 860 RISC processor, 33 Mhz, Greenhills
+ C compiler.
+
+ 0.40 0.90 Dec 3MAX, MIPS 3000 processor, -O4.
+
+ 0.31 0.90 IBM RS/6000, -O.
+
+ 0.1129 0.2119 Dell Dimension XPS P133c, Pentium 133 MHz,
+ Windows 95, Microsoft Visual C 5.0.
+
+ 0.0883 0.2166 Silicon Graphics Indigo², MIPS R4400,
+ 175 Mhz, "-O3".
+
+ 0.0351 0.0561 Dell Dimension XPS R100, Pentium II 400 MHz,
+ Windows 98, Microsoft Visual C 5.0.
+
+ 0.0312 0.0542 Sun Ultra 2, UltraSPARC V9, 300 MHz, Solaris
+ 2.5.1.
+
+ 0.00862 0.01074 Dell Inspiron 9100, Pentium 4, 3.4 GHz, gcc -O3.
+
+*/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#ifndef INTRIG
+#include <math.h>
+#endif
+
+#define cot(x) (1.0 / tan(x))
+
+#define TRUE 1
+#define FALSE 0
+
+#define max_surfaces 10
+
+/* Local variables */
+
+/* static char tbfr[132]; */
+
+static short current_surfaces;
+static short paraxial;
+
+static double clear_aperture;
+
+static double aberr_lspher;
+static double aberr_osc;
+static double aberr_lchrom;
+
+static double max_lspher;
+static double max_osc;
+static double max_lchrom;
+
+static double radius_of_curvature;
+static double object_distance;
+static double ray_height;
+static double axis_slope_angle;
+static double from_index;
+static double to_index;
+
+static double spectral_line[9];
+static double s[max_surfaces][5];
+static double od_sa[2][2];
+
+static char outarr[8][80]; /* Computed output of program goes here */
+
+int itercount; /* The iteration counter for the main loop
+ in the program is made global so that
+ the compiler should not be allowed to
+ optimise out the loop over the ray
+ tracing code. */
+
+#ifndef ITERATIONS
+#define ITERATIONS 1000
+#endif
+int niter = ITERATIONS; /* Iteration counter */
+
+static char *refarr[] = { /* Reference results. These happen to
+ be derived from a run on Microsoft
+ Quick BASIC on the IBM PC/AT. */
+
+ " Marginal ray 47.09479120920 0.04178472683",
+ " Paraxial ray 47.08372160249 0.04177864821",
+ "Longitudinal spherical aberration: -0.01106960671",
+ " (Maximum permissible): 0.05306749907",
+ "Offense against sine condition (coma): 0.00008954761",
+ " (Maximum permissible): 0.00250000000",
+ "Axial chromatic aberration: 0.00448229032",
+ " (Maximum permissible): 0.05306749907"
+};
+
+/* The test case used in this program is the design for a 4 inch
+ achromatic telescope objective used as the example in Wyld's
+ classic work on ray tracing by hand, given in Amateur Telescope
+ Making, Volume 3. */
+
+static double testcase[4][4] = {
+ {27.05, 1.5137, 63.6, 0.52},
+ {-16.68, 1, 0, 0.138},
+ {-16.68, 1.6164, 36.7, 0.38},
+ {-78.1, 1, 0, 0}
+};
+
+/* Internal trig functions (used only if INTRIG is defined). These
+ standard functions may be enabled to obtain timings that reflect
+ the machine's floating point performance rather than the speed of
+ its trig function evaluation. */
+
+#ifdef INTRIG
+
+/* The following definitions should keep you from getting intro trouble
+ with compilers which don't let you redefine intrinsic functions. */
+
+#define sin I_sin
+#define cos I_cos
+#define tan I_tan
+#define sqrt I_sqrt
+#define atan I_atan
+#define atan2 I_atan2
+#define asin I_asin
+
+#define fabs(x) ((x < 0.0) ? -x : x)
+
+#define pic 3.1415926535897932
+
+/* Commonly used constants */
+
+static double pi = pic,
+ twopi =pic * 2.0,
+ piover4 = pic / 4.0,
+ fouroverpi = 4.0 / pic,
+ piover2 = pic / 2.0;
+
+/* Coefficients for ATAN evaluation */
+
+static double atanc[] = {
+ 0.0,
+ 0.4636476090008061165,
+ 0.7853981633974483094,
+ 0.98279372324732906714,
+ 1.1071487177940905022,
+ 1.1902899496825317322,
+ 1.2490457723982544262,
+ 1.2924966677897852673,
+ 1.3258176636680324644
+};
+
+/* aint(x) Return integer part of number. Truncates towards 0 */
+
+static double aint(double x)
+{
+ long l;
+
+ /* Note that this routine cannot handle the full floating point
+ number range. This function should be in the machine-dependent
+ floating point library! */
+
+ l = x;
+ if ((int)(-0.5) != 0 && l < 0 )
+ l++;
+ x = l;
+ return x;
+}
+
+/* sin(x) Return sine, x in radians */
+
+static double sin(double x)
+{
+ int sign;
+ double y, r, z;
+
+ x = (((sign= (x < 0.0)) != 0) ? -x: x);
+
+ if (x > twopi)
+ x -= (aint(x / twopi) * twopi);
+
+ if (x > pi) {
+ x -= pi;
+ sign = !sign;
+ }
+
+ if (x > piover2)
+ x = pi - x;
+
+ if (x < piover4) {
+ y = x * fouroverpi;
+ z = y * y;
+ r = y * (((((((-0.202253129293E-13 * z + 0.69481520350522E-11) * z -
+ 0.17572474176170806E-8) * z + 0.313361688917325348E-6) * z -
+ 0.365762041821464001E-4) * z + 0.249039457019271628E-2) * z -
+ 0.0807455121882807815) * z + 0.785398163397448310);
+ } else {
+ y = (piover2 - x) * fouroverpi;
+ z = y * y;
+ r = ((((((-0.38577620372E-12 * z + 0.11500497024263E-9) * z -
+ 0.2461136382637005E-7) * z + 0.359086044588581953E-5) * z -
+ 0.325991886926687550E-3) * z + 0.0158543442438154109) * z -
+ 0.308425137534042452) * z + 1.0;
+ }
+ return sign ? -r : r;
+}
+
+/* cos(x) Return cosine, x in radians, by identity */
+
+static double cos(double x)
+{
+ x = (x < 0.0) ? -x : x;
+ if (x > twopi) /* Do range reduction here to limit */
+ x = x - (aint(x / twopi) * twopi); /* roundoff on add of PI/2 */
+ return sin(x + piover2);
+}
+
+/* tan(x) Return tangent, x in radians, by identity */
+
+static double tan(double x)
+{
+ return sin(x) / cos(x);
+}
+
+/* sqrt(x) Return square root. Initial guess, then Newton-
+ Raphson refinement */
+
+double sqrt(double x)
+{
+ double c, cl, y;
+ int n;
+
+ if (x == 0.0)
+ return 0.0;
+
+ if (x < 0.0) {
+ fprintf(stderr,
+ "\nGood work! You tried to take the square root of %g",
+ x);
+ fprintf(stderr,
+ "\nunfortunately, that is too complex for me to handle.\n");
+ exit(1);
+ }
+
+ y = (0.154116 + 1.893872 * x) / (1.0 + 1.047988 * x);
+
+ c = (y - x / y) / 2.0;
+ cl = 0.0;
+ for (n = 50; c != cl && n--;) {
+ y = y - c;
+ cl = c;
+ c = (y - x / y) / 2.0;
+ }
+ return y;
+}
+
+/* atan(x) Return arctangent in radians,
+ range -pi/2 to pi/2 */
+
+static double atan(double x)
+{
+ int sign, l, y;
+ double a, b, z;
+
+ x = (((sign = (x < 0.0)) != 0) ? -x : x);
+ l = 0;
+
+ if (x >= 4.0) {
+ l = -1;
+ x = 1.0 / x;
+ y = 0;
+ goto atl;
+ } else {
+ if (x < 0.25) {
+ y = 0;
+ goto atl;
+ }
+ }
+
+ y = aint(x / 0.5);
+ z = y * 0.5;
+ x = (x - z) / (x * z + 1);
+
+atl:
+ z = x * x;
+ b = ((((893025.0 * z + 49116375.0) * z + 425675250.0) * z +
+ 1277025750.0) * z + 1550674125.0) * z + 654729075.0;
+ a = (((13852575.0 * z + 216602100.0) * z + 891080190.0) * z +
+ 1332431100.0) * z + 654729075.0;
+ a = (a / b) * x + atanc[y];
+ if (l)
+ a=piover2 - a;
+ return sign ? -a : a;
+}
+
+/* atan2(y,x) Return arctangent in radians of y/x,
+ range -pi to pi */
+
+static double atan2(double y, double x)
+{
+ double temp;
+
+ if (x == 0.0) {
+ if (y == 0.0) /* Special case: atan2(0,0) = 0 */
+ return 0.0;
+ else if (y > 0)
+ return piover2;
+ else
+ return -piover2;
+ }
+ temp = atan(y / x);
+ if (x < 0.0) {
+ if (y >= 0.0)
+ temp += pic;
+ else
+ temp -= pic;
+ }
+ return temp;
+}
+
+/* asin(x) Return arcsine in radians of x */
+
+static double asin(double x)
+{
+ if (fabs(x)>1.0) {
+ fprintf(stderr,
+ "\nInverse trig functions lose much of their gloss when");
+ fprintf(stderr,
+ "\ntheir arguments are greater than 1, such as the");
+ fprintf(stderr,
+ "\nvalue %g you passed.\n", x);
+ exit(1);
+ }
+ return atan2(x, sqrt(1 - x * x));
+}
+#endif
+
+/* Calculate passage through surface
+
+ If the variable PARAXIAL is true, the trace through the
+ surface will be done using the paraxial approximations.
+ Otherwise, the normal trigonometric trace will be done.
+
+ This routine takes the following inputs:
+
+ RADIUS_OF_CURVATURE Radius of curvature of surface
+ being crossed. If 0, surface is
+ plane.
+
+ OBJECT_DISTANCE Distance of object focus from
+ lens vertex. If 0, incoming
+ rays are parallel and
+ the following must be specified:
+
+ RAY_HEIGHT Height of ray from axis. Only
+ relevant if OBJECT.DISTANCE == 0
+
+ AXIS_SLOPE_ANGLE Angle incoming ray makes with axis
+ at intercept
+
+ FROM_INDEX Refractive index of medium being left
+
+ TO_INDEX Refractive index of medium being
+ entered.
+
+ The outputs are the following variables:
+
+ OBJECT_DISTANCE Distance from vertex to object focus
+ after refraction.
+
+ AXIS_SLOPE_ANGLE Angle incoming ray makes with axis
+ at intercept after refraction.
+
+*/
+
+static void transit_surface(void) {
+ double iang, /* Incidence angle */
+ rang, /* Refraction angle */
+ iang_sin, /* Incidence angle sin */
+ rang_sin, /* Refraction angle sin */
+ old_axis_slope_angle, sagitta;
+
+ if (paraxial) {
+ if (radius_of_curvature != 0.0) {
+ if (object_distance == 0.0) {
+ axis_slope_angle = 0.0;
+ iang_sin = ray_height / radius_of_curvature;
+ } else
+ iang_sin = ((object_distance -
+ radius_of_curvature) / radius_of_curvature) *
+ axis_slope_angle;
+
+ rang_sin = (from_index / to_index) *
+ iang_sin;
+ old_axis_slope_angle = axis_slope_angle;
+ axis_slope_angle = axis_slope_angle +
+ iang_sin - rang_sin;
+ if (object_distance != 0.0)
+ ray_height = object_distance * old_axis_slope_angle;
+ object_distance = ray_height / axis_slope_angle;
+ return;
+ }
+ object_distance = object_distance * (to_index / from_index);
+ axis_slope_angle = axis_slope_angle * (from_index / to_index);
+ return;
+ }
+
+ if (radius_of_curvature != 0.0) {
+ if (object_distance == 0.0) {
+ axis_slope_angle = 0.0;
+ iang_sin = ray_height / radius_of_curvature;
+ } else {
+ iang_sin = ((object_distance -
+ radius_of_curvature) / radius_of_curvature) *
+ sin(axis_slope_angle);
+ }
+ iang = asin(iang_sin);
+ rang_sin = (from_index / to_index) *
+ iang_sin;
+ old_axis_slope_angle = axis_slope_angle;
+ axis_slope_angle = axis_slope_angle +
+ iang - asin(rang_sin);
+ sagitta = sin((old_axis_slope_angle + iang) / 2.0);
+ sagitta = 2.0 * radius_of_curvature*sagitta*sagitta;
+ object_distance = ((radius_of_curvature * sin(
+ old_axis_slope_angle + iang)) *
+ cot(axis_slope_angle)) + sagitta;
+ return;
+ }
+
+ rang = -asin((from_index / to_index) *
+ sin(axis_slope_angle));
+ object_distance = object_distance * ((to_index *
+ cos(-rang)) / (from_index *
+ cos(axis_slope_angle)));
+ axis_slope_angle = -rang;
+}
+
+/* Perform ray trace in specific spectral line */
+
+static void trace_line(int line, double ray_h)
+{
+ int i;
+
+ object_distance = 0.0;
+ ray_height = ray_h;
+ from_index = 1.0;
+
+ for (i = 1; i <= current_surfaces; i++) {
+ radius_of_curvature = s[i][1];
+ to_index = s[i][2];
+ if (to_index > 1.0)
+ to_index = to_index + ((spectral_line[4] -
+ spectral_line[line]) /
+ (spectral_line[3] - spectral_line[6])) * ((s[i][2] - 1.0) /
+ s[i][3]);
+ transit_surface();
+ from_index = to_index;
+ if (i < current_surfaces)
+ object_distance = object_distance - s[i][4];
+ }
+}
+
+/* Initialise when called the first time */
+
+int main(int argc, char *argv[])
+{
+ int i, j, k, errors;
+ double od_fline, od_cline;
+#ifdef ACCURACY
+ long passes;
+#endif
+ struct timeval begin_tv, end_tv, diff_tv;
+
+ spectral_line[1] = 7621.0; /* A */
+ spectral_line[2] = 6869.955; /* B */
+ spectral_line[3] = 6562.816; /* C */
+ spectral_line[4] = 5895.944; /* D */
+ spectral_line[5] = 5269.557; /* E */
+ spectral_line[6] = 4861.344; /* F */
+ spectral_line[7] = 4340.477; /* G'*/
+ spectral_line[8] = 3968.494; /* H */
+
+ /* Process the number of iterations argument, if one is supplied. */
+
+ if (argc > 1) {
+ niter = atoi(argv[1]);
+ if (*argv[1] == '-' || niter < 1) {
+ printf("This is John Walker's floating point accuracy and\n");
+ printf("performance benchmark program. You call it with\n");
+ printf("\n%s <itercount>\n\n", argv[0]);
+ printf("where <itercount> is the number of iterations\n");
+ printf("to be executed. Archival timings should be made\n");
+ printf("with the iteration count set so that roughly five\n");
+ printf("minutes of execution is timed.\n");
+ exit(0);
+ }
+ }
+
+ /* Load test case into working array */
+
+ clear_aperture = 4.0;
+ current_surfaces = 4;
+ for (i = 0; i < current_surfaces; i++)
+ for (j = 0; j < 4; j++)
+ s[i + 1][j + 1] = testcase[i][j];
+
+#ifdef ACCURACY
+ printf("Beginning execution of floating point accuracy test...\n");
+ passes = 0;
+#else
+/*
+ printf("Ready to begin John Walker's floating point accuracy\n");
+ printf("and performance benchmark. %d iterations will be made.\n\n", niter);
+
+ printf("\nMeasured run time in seconds should be divided by %.f\n", niter / 1000.0);
+ printf("to normalise for reporting results. For archival results,\n");
+ printf("adjust iteration count so the benchmark runs about five minutes.\n\n");
+*/
+ /* now we start. Get the current time */
+ gettimeofday(&begin_tv, NULL);
+#endif
+
+ /* Perform ray trace the specified number of times. */
+
+#ifdef ACCURACY
+ while (TRUE) {
+ passes++;
+ if ((passes % 100L) == 0) {
+ printf("Pass %ld.\n", passes);
+ }
+#else
+ for (itercount = 0; itercount < niter; itercount++) {
+#endif
+
+ for (paraxial = 0; paraxial <= 1; paraxial++) {
+
+ /* Do main trace in D light */
+
+ trace_line(4, clear_aperture / 2.0);
+ od_sa[paraxial][0] = object_distance;
+ od_sa[paraxial][1] = axis_slope_angle;
+ }
+ paraxial = FALSE;
+
+ /* Trace marginal ray in C */
+
+ trace_line(3, clear_aperture / 2.0);
+ od_cline = object_distance;
+
+ /* Trace marginal ray in F */
+
+ trace_line(6, clear_aperture / 2.0);
+ od_fline = object_distance;
+
+ aberr_lspher = od_sa[1][0] - od_sa[0][0];
+ aberr_osc = 1.0 - (od_sa[1][0] * od_sa[1][1]) /
+ (sin(od_sa[0][1]) * od_sa[0][0]);
+ aberr_lchrom = od_fline - od_cline;
+ max_lspher = sin(od_sa[0][1]);
+
+ /* D light */
+
+ max_lspher = 0.0000926 / (max_lspher * max_lspher);
+ max_osc = 0.0025;
+ max_lchrom = max_lspher;
+#ifndef ACCURACY
+ }
+
+ gettimeofday(&end_tv, NULL);
+#endif
+
+ /* Now evaluate the accuracy of the results from the last ray trace */
+
+ sprintf(outarr[0], "%15s %21.11f %14.11f",
+ "Marginal ray", od_sa[0][0], od_sa[0][1]);
+ sprintf(outarr[1], "%15s %21.11f %14.11f",
+ "Paraxial ray", od_sa[1][0], od_sa[1][1]);
+ sprintf(outarr[2],
+ "Longitudinal spherical aberration: %16.11f",
+ aberr_lspher);
+ sprintf(outarr[3],
+ " (Maximum permissible): %16.11f",
+ max_lspher);
+ sprintf(outarr[4],
+ "Offense against sine condition (coma): %16.11f",
+ aberr_osc);
+ sprintf(outarr[5],
+ " (Maximum permissible): %16.11f",
+ max_osc);
+ sprintf(outarr[6],
+ "Axial chromatic aberration: %16.11f",
+ aberr_lchrom);
+ sprintf(outarr[7],
+ " (Maximum permissible): %16.11f",
+ max_lchrom);
+
+ /* Now compare the edited results with the master values from
+ reference executions of this program. */
+
+ errors = 0;
+ for (i = 0; i < 8; i++) {
+ if (strcmp(outarr[i], refarr[i]) != 0) {
+#ifdef ACCURACY
+ printf("\nError in pass %ld for results on line %d...\n",
+ passes, i + 1);
+#else
+ printf("\nError in results on line %d...\n", i + 1);
+#endif
+ printf("Expected: \"%s\"\n", refarr[i]);
+ printf("Received: \"%s\"\n", outarr[i]);
+ printf("(Errors) ");
+ k = strlen(refarr[i]);
+ for (j = 0; j < k; j++) {
+ printf("%c", refarr[i][j] == outarr[i][j] ? ' ' : '^');
+ if (refarr[i][j] != outarr[i][j])
+ errors++;
+ }
+ printf("\n");
+ }
+ }
+#ifdef ACCURACY
+ }
+#else
+ if (errors > 0) {
+ fprintf(stderr, "\n%d error%s in results. This is VERY SERIOUS.\n",
+ errors, errors > 1 ? "s" : "");
+ return 1;
+ }
+#endif
+ timersub(&end_tv, &begin_tv, &diff_tv);
+
+ printf("Time(s): %u.%u\n", diff_tv.tv_sec, diff_tv.tv_usec);
+
+ return 0;
+}