We obtained a surprising performance result when comparing optimized parallel ray tracers written in F# and C++ recently. The following two programs render the same highly complex scenes containing over a million objects. Surprisingly, the 136-line managed F# program runs slightly faster at 17s than the 168-line unmanaged C++ which takes 18s.

Here is the F# program:

let t = System.Diagnostics.Stopwatch.StartNew()
let delta = sqrt epsilon_float
[<Struct>]
type vec =
val x : float
val y : float
val z : float
new(x, y, z) = {x=x; y=y; z=z}
static member ( + ) (a : vec, b : vec) = new vec(a.x+b.x, a.y+b.y, a.z+b.z)
static member ( - ) (a : vec, b : vec) = new vec(a.x-b.x, a.y-b.y, a.z-b.z)
static member ( * ) (a, b : vec) = new vec(a*b.x, a*b.y, a*b.z)
let vec x y z = new vec(x, y, z)
let dot (a : vec) (b : vec) = a.x * b.x + a.y * b.y + a.z * b.z
let length r = sqrt(dot r r)
let unitise r = 1. / length r * r
type scene = Scene of vec * float * scene_type
and scene_type =
| Sphere
| Group of scene * scene * scene * scene * scene
let infinity = System.Double.PositiveInfinity
let inline ray_sphere (d : vec) (v : vec) r =
let vx = v.x
let vy = v.y
let vz = v.z
let disc = vx * vx + vy * vy + vz * vz - r * r
if disc < 0. then infinity else
let b = vx * d.x + vy * d.y + vz * d.z
let b2 = b * b
if b2 < disc then infinity else
let disc = sqrt(b2 - disc)
let t1 = b - disc in
if t1 > 0. then t1 else b + disc
let ray_sphere' (o : vec) (d : vec) (c : vec) r =
let vx = c.x - o.x
let vy = c.y - o.y
let vz = c.z - o.z
let vv = vx * vx + vy * vy + vz * vz
let b = vx * d.x + vy * d.y + vz * d.z
let disc = b * b - vv + r * r
disc >= 0. && b + sqrt disc >= 0.
type hit = {mutable l: float; mutable nx: float; mutable ny: float; mutable nz: float}
let intersect_sphere (dir: vec) hit l' (center: vec) =
let x = l' * dir.x - center.x
let y = l' * dir.y - center.y
let z = l' * dir.z - center.z
let il = 1. / sqrt(x * x + y * y + z * z)
hit.l <- l'
hit.nx <- il * x
hit.ny <- il * y
hit.nz <- il * z
let rec intersect dir hit = function
| Scene(center, radius,typ) ->
let l' = ray_sphere dir center radius
if l' < hit.l then
match typ with
| Sphere ->
intersect_sphere dir hit l' center
| Group(a, b, c, d, e) ->
intersect dir hit a
intersect dir hit b
intersect dir hit c
intersect dir hit d
intersect dir hit e
let rec intersect' orig dir = function
| Scene(center, radius, typ) ->
ray_sphere' orig dir center radius &&
match typ with
| Sphere -> true
| Group (a, b, c, d, e) ->
intersect' orig dir a ||
intersect' orig dir b ||
intersect' orig dir c ||
intersect' orig dir d ||
intersect' orig dir e
let neg_light = unitise(vec 1. 3. -2.)
let rec ray_trace dir scene =
let hit = {l=infinity; nx=0.; ny=0.; nz=0.}
intersect dir hit scene;
if hit.l = infinity then 0. else
let n = vec hit.nx hit.ny hit.nz in
let g = dot n neg_light in
if g < 0. then 0. else
if intersect' (hit.l * dir + delta * n) neg_light scene then 0. else g
let fold5 f x a b c d e = f (f (f (f (f x a) b) c) d) e
let rec create level c r =
let obj = Scene(c, r,Sphere)
if level = 1 then obj else
let a = 3. * r / sqrt 12.
let rec bound (c, r) = function
| Scene(c', r',Sphere) -> c, max r (length (c - c') + r')
| Scene(_, _, Group(v, w, x, y, z)) -> fold5 bound (c, r) v w x y z
let aux x' z' = create (level - 1) (c + vec x' a z') (0.5 * r)
let w, x, y, z = aux a (-a), aux (-a) (-a), aux a a, aux (-a) a
let c, r = fold5 bound (c + vec 0. r 0., 0.) obj w x y z in
Scene(c, r, Group(obj, w, x, y, z))
let level, n = 11, 2048
let scene = create level (vec 0. -1. 4.) 1.
let ss = 4
do
use ch = System.IO.File.Create("C:\Users\Jon\Documents\Visual Studio 10\Projects\F#\RayTracer\image.pgm", n * n + 32)
sprintf "P5\n%d %d\n255\n" n n
|> String.iter (ch.WriteByte << byte)
let image = Array.create (n*n) 0uy
System.Threading.Tasks.Parallel.For(0, n, fun y ->
for x = 0 to n - 1 do
let mutable g = 0.
for dx = 0 to ss - 1 do
for dy = 0 to ss - 1 do
let aux x d = float x - 0.5 * float n + float d / float ss
let dir = unitise(vec (aux x dx) (aux y dy) (float n))
g <- g + ray_trace dir scene
image.[x+y*n] <- 0.5 + 255. * g / float (ss*ss) |> byte)
|> ignore
ch.Write(image, 0, n*n)
printf "Took %gs" t.Elapsed.TotalSeconds

And here is the C++ program:

#include "stdafx.h"
#include <vector>
#include <iostream>
#include <fstream>
#include <limits>
#include <cmath>
using namespace std;
numeric_limits<double> real;
double delta = sqrt(real.epsilon()), infinity = real.infinity();
struct Vec {
double x, y, z;
Vec(double x2, double y2, double z2) : x(x2), y(y2), z(z2) {}
};
Vec operator+(const Vec &a, const Vec &b)
{ return Vec(a.x+b.x, a.y+b.y, a.z+b.z); }
Vec operator-(const Vec &a, const Vec &b)
{ return Vec(a.x-b.x, a.y-b.y, a.z-b.z); }
Vec operator*(double a, const Vec &b) { return Vec(a*b.x, a*b.y, a*b.z); }
double dot(const Vec &a, const Vec &b) { return a.x*b.x + a.y*b.y + a.z*b.z; }
double length(const Vec &a) { return sqrt(dot(a, a)); }
Vec unitise(const Vec &a) { return (1 / sqrt(dot(a, a))) * a; }
struct Hit {
double lambda;
Vec normal;
Hit() : lambda(infinity), normal(Vec(0, 0, 0)) {}
};
struct Sphere;
struct Scene {
virtual ~Scene() {};
virtual void intersect(Hit &, const Vec &) const = 0;
virtual bool intersect(const Vec &, const Vec &) const = 0;
virtual Sphere bound(Sphere b) const = 0;
};
struct Sphere : public Scene {
Vec center;
double radius;
Sphere(Vec c, double r) : center(c), radius(r) {}
~Sphere() {}
double ray_sphere(const Vec &dir) const {
double b = center.x*dir.x + center.y*dir.y + center.z*dir.z;
double disc = b*b - dot(center, center) + radius * radius;
if (disc > 0) {
double d = sqrt(disc), t2 = b + d;
if (t2 > 0) {
double t1 = b - d;
return (t1 > 0 ? t1 : t2);
}
}
return infinity;
}
bool sray_sphere(const Vec &orig, const Vec &dir) const {
Vec v = center - orig;
double b = dot(v, dir), disc = b*b - dot(v, v) + radius * radius;
return (disc < 0 ? false : b + sqrt(disc) >= 0);
}
void intersect(Hit &hit, const Vec &dir) const {
double lambda = ray_sphere(dir);
if (lambda < hit.lambda) {
hit.lambda = lambda;
double
nx = lambda*dir.x - center.x,
ny = lambda*dir.y - center.y,
nz = lambda*dir.z - center.z;
double il = 1/sqrt(nx*nx + ny*ny + nz*nz);
hit.normal.x = il*nx;
hit.normal.y = il*ny;
hit.normal.z = il*nz;
}
}
bool intersect(const Vec &orig, const Vec &dir) const
{ return sray_sphere(orig, dir); }
Sphere bound(Sphere b) const {
return Sphere(b.center, max(b.radius, length(center - b.center) + radius));
}
};
typedef vector<Scene *> Scenes;
struct Group : public Scene {
Sphere b;
Scenes child;
Group(Sphere b, Scenes c) : b(b), child(c) {}
~Group() {
for (Scenes::const_iterator it=child.begin(); it!=child.end(); ++it)
delete *it;
}
void intersect(Hit &hit, const Vec &dir) const {
if (b.ray_sphere(dir) < hit.lambda)
for (Scenes::const_iterator it=child.begin(); it!=child.end(); ++it)
(*it)->intersect(hit, dir);
}
bool intersect(const Vec &orig, const Vec &dir) const {
if (!b.sray_sphere(orig, dir)) return false;
for (Scenes::const_iterator it=child.begin(); it!=child.end(); ++it)
if ((*it)->intersect(orig, dir)) return true;
return false;
}
Sphere bound(Sphere b) const {
Sphere b2 = b;
for (Scenes::const_iterator it=child.begin(); it!=child.end(); ++it)
b2 = (*it)->bound(b2);
return b2;
}
};
double ray_trace(const Vec &neg_light, const Vec &dir, const Scene &s) {
Hit hit;
s.intersect(hit, dir);
if (hit.lambda == infinity) return 0;
double g = dot(hit.normal, neg_light);
if (g < 0) return 0.;
Vec p = hit.lambda*dir + delta*hit.normal;
return (s.intersect(p, neg_light) ? 0 : g);
}
Scene *create(int level, Vec c, double r) {
Scene *s = new Sphere(c, r);
if (level == 1) return s;
Scenes child;
child.reserve(5);
child.push_back(s);
double rn = 3*r/sqrt(12.);
for (int dz=-1; dz<=1; dz+=2)
for (int dx=-1; dx<=1; dx+=2)
child.push_back(create(level-1, c + rn*Vec(dx, 1, dz), r/2));
Sphere b2(c + Vec(0, r, 0), r);
for (Scenes::const_iterator it=child.begin(); it!=child.end(); ++it)
b2 = (*it)->bound(b2);
return new Group(b2, child);
}
int _tmain(int argc, char* argv[]) {
int level = (argc==3 ? atoi(argv[1]) : 11),
n = (argc==3 ? atoi(argv[2]) : 2048), ss = 4;
Vec neg_light = unitise(Vec(1, 3, -2));
Scene *s(create(level, Vec(0, -1, 4), 1));
ofstream out;
std::vector<char> image(n*n);
#pragma omp parallel for
for (int y=0; y<n; ++y)
for (int x=0; x<n; ++x) {
double g=0;
for (int dx=0; dx<ss; ++dx)
for (int dy=0; dy<ss; ++dy) {
Vec dir(unitise(Vec(x+dx*1./ss-n/2., y+dy*1./ss-n/2., n)));
g += ray_trace(neg_light, dir, *s);
}
image[x+(n-1-y)*n] = .5 + 255. * g / (ss*ss);
}
out.open("C:\\Users\\Jon\\Documents\\image.pgm", std::ios_base::binary);
out << "P5\n" << n << " " << n << "\n255\n";
out.write(&image[0], n*n);
return 0;
}