Hao Chen and Xinguo Liu did a presentation at SIGGRAPH '08 and the slides from it contain a good deal of useful stuff, nevermind one of the ONLY easy-to-find rotate-by-matrix functions. It also treats the Z axis a bit awkwardly, so I patched the rotation code up a bit, and a pre-integrated cosine convolution filter so you can easily get SH coefs for directional light.
There was also gratuitous use of sqrt(3) multipliers, which can be completely eliminated by simply premultiplying or predividing coef #6 by it, which incidentally causes all of the constants and multipliers to resolve to rational numbers.
As always, you can include multiple lights by simply adding the SH coefs for them together. If you want specular, you can approximate a directional light by using the linear component to determine the direction, and constant component to determine the color. You can do this per-channel, or use the average values to determine the direction and do it once.
Here are the spoilers:
#define SH_AMBIENT_FACTOR (0.25f)
#define SH_LINEAR_FACTOR (0.5f)
#define SH_QUADRATIC_FACTOR (0.3125f)
void LambertDiffuseToSHCoefs(const terVec3 &dir, float out[9])
{
// Constant
out[0] = 1.0f * SH_AMBIENT_FACTOR;
// Linear
out[1] = dir[1] * SH_LINEAR_FACTOR;
out[2] = dir[2] * SH_LINEAR_FACTOR;
out[3] = dir[0] * SH_LINEAR_FACTOR;
// Quadratics
out[4] = ( dir[0]*dir[1] ) * 3.0f*SH_QUADRATIC_FACTOR;
out[5] = ( dir[1]*dir[2] ) * 3.0f*SH_QUADRATIC_FACTOR;
out[6] = ( 1.5f*( dir[2]*dir[2] ) - 0.5f ) * SH_QUADRATIC_FACTOR;
out[7] = ( dir[0]*dir[2] ) * 3.0f*SH_QUADRATIC_FACTOR;
out[8] = 0.5f*( dir[0]*dir[0] - dir[1]*dir[1] ) * 3.0f*SH_QUADRATIC_FACTOR;
}
void RotateCoefsByMatrix(float outCoefs[9], const float pIn[9], const terMat3x3 &rMat)
{
// DC
outCoefs[0] = pIn[0];
// Linear
outCoefs[1] = rMat[1][0]*pIn[3] + rMat[1][1]*pIn[1] + rMat[1][2]*pIn[2];
outCoefs[2] = rMat[2][0]*pIn[3] + rMat[2][1]*pIn[1] + rMat[2][2]*pIn[2];
outCoefs[3] = rMat[0][0]*pIn[3] + rMat[0][1]*pIn[1] + rMat[0][2]*pIn[2];
// Quadratics
outCoefs[4] = (
( rMat[0][0]*rMat[1][1] + rMat[0][1]*rMat[1][0] ) * ( pIn[4] )
+ ( rMat[0][1]*rMat[1][2] + rMat[0][2]*rMat[1][1] ) * ( pIn[5] )
+ ( rMat[0][2]*rMat[1][0] + rMat[0][0]*rMat[1][2] ) * ( pIn[7] )
+ ( rMat[0][0]*rMat[1][0] ) * ( pIn[8] )
+ ( rMat[0][1]*rMat[1][1] ) * ( -pIn[8] )
+ ( rMat[0][2]*rMat[1][2] ) * ( 3.0f*pIn[6] )
);
outCoefs[5] = (
( rMat[1][0]*rMat[2][1] + rMat[1][1]*rMat[2][0] ) * ( pIn[4] )
+ ( rMat[1][1]*rMat[2][2] + rMat[1][2]*rMat[2][1] ) * ( pIn[5] )
+ ( rMat[1][2]*rMat[2][0] + rMat[1][0]*rMat[2][2] ) * ( pIn[7] )
+ ( rMat[1][0]*rMat[2][0] ) * ( pIn[8] )
+ ( rMat[1][1]*rMat[2][1] ) * ( -pIn[8] )
+ ( rMat[1][2]*rMat[2][2] ) * ( 3.0f*pIn[6] )
);
outCoefs[6] = (
( rMat[2][1]*rMat[2][0] ) * ( pIn[4] )
+ ( rMat[2][2]*rMat[2][1] ) * ( pIn[5] )
+ ( rMat[2][0]*rMat[2][2] ) * ( pIn[7] )
+ 0.5f*( rMat[2][0]*rMat[2][0] ) * ( pIn[8])
+ 0.5f*( rMat[2][1]*rMat[2][1] ) * ( -pIn[8])
+ 1.5f*( rMat[2][2]*rMat[2][2] ) * ( pIn[6] )
- 0.5f * ( pIn[6] )
);
outCoefs[7] = (
( rMat[0][0]*rMat[2][1] + rMat[0][1]*rMat[2][0] ) * ( pIn[4] )
+ ( rMat[0][1]*rMat[2][2] + rMat[0][2]*rMat[2][1] ) * ( pIn[5] )
+ ( rMat[0][2]*rMat[2][0] + rMat[0][0]*rMat[2][2] ) * ( pIn[7] )
+ ( rMat[0][0]*rMat[2][0] ) * ( pIn[8] )
+ ( rMat[0][1]*rMat[2][1] ) * ( -pIn[8] )
+ ( rMat[0][2]*rMat[2][2] ) * ( 3.0f*pIn[6] )
);
outCoefs[8] = (
( rMat[0][1]*rMat[0][0] - rMat[1][1]*rMat[1][0] ) * ( pIn[4] )
+ ( rMat[0][2]*rMat[0][1] - rMat[1][2]*rMat[1][1] ) * ( pIn[5] )
+ ( rMat[0][0]*rMat[0][2] - rMat[1][0]*rMat[1][2] ) * ( pIn[7] )
+0.5f*( rMat[0][0]*rMat[0][0] - rMat[1][0]*rMat[1][0] ) * ( pIn[8] )
+0.5f*( rMat[0][1]*rMat[0][1] - rMat[1][1]*rMat[1][1] ) * ( -pIn[8] )
+0.5f*( rMat[0][2]*rMat[0][2] - rMat[1][2]*rMat[1][2] ) * ( 3.0f*pIn[6] )
);
}
... and to sample it in the shader ...
float3 SampleSHQuadratic(float3 dir, float3 shVector[9])
{
float3 ds1 = dir.xyz*dir.xyz;
float3 ds2 = dir*dir.yzx; // xy, zy, xz
float3 v = shVector[0];
v += dir.y * shVector[1];
v += dir.z * shVector[2];
v += dir.x * shVector[3];
v += ds2.x * shVector[4];
v += ds2.y * shVector[5];
v += (ds1.z * 1.5 - 0.5) * shVector[6];
v += ds2.z * shVector[7];
v += (ds1.x - ds1.y) * 0.5 * shVector[8];
return v;
}
For Monte Carlo integration, take sampling points, feed direction "dir" to the following function to get multipliers for each coefficient, then multiply by the intensity in that direction. Divide the total by the number of sampling points:
void SHForDirection(const terVec3 &dir, float out[9])
{
// Constant
out[0] = 1.0f;
// Linear
out[1] = dir[1] * 3.0f;
out[2] = dir[2] * 3.0f;
out[3] = dir[0] * 3.0f;
// Quadratics
out[4] = ( dir[0]*dir[1] ) * 15.0f;
out[5] = ( dir[1]*dir[2] ) * 15.0f;
out[6] = ( 1.5f*( dir[2]*dir[2] ) - 0.5f ) * 5.0f;
out[7] = ( dir[0]*dir[2] ) * 15.0f;
out[8] = 0.5f*( dir[0]*dir[0] - dir[1]*dir[1] ) * 15.0f;
}
... and finally, for a uniformly-distributed random point on a sphere ...
terVec3 RandomDirection(int (*randomFunc)(), int randMax)
{
float u = (((float)randomFunc()) / (float)(randMax - 1))*2.0f - 1.0f;
float n = sqrtf(1.0f - u*u);
float theta = 2.0f * M_PI * (((float)randomFunc()) / (float)(randMax));
return terVec3(n * cos(theta), n * sin(theta), u);
}