Find Tiles on a Line with 256ths Subpixel Precision

I decided to post this monster algorithm for anyone's future reference because it was an absolute pain in the ***. This is something anyone making a 2D, tile-based platforming engine will need, especially if you're using fixed-point math. Furthermore, I believe it belongs here in this forum.
On the whole this took an entire week of frustration to fully get working.
All values are Fix's, which are basically just signed integers. Here the input coordinates are scaled up for use in a fixed-point engine. So be sure to remember that 256 is equivalent to 1, 128 is equivalent to 0.5, 64 == 0.25, and so on.
Yes, this is code from DELTA.
On the whole this took an entire week of frustration to fully get working.

All values are Fix's, which are basically just signed integers. Here the input coordinates are scaled up for use in a fixed-point engine. So be sure to remember that 256 is equivalent to 1, 128 is equivalent to 0.5, 64 == 0.25, and so on.
// Coded by Simion32: Tile Finder v1.2
// Free to use, but please give credit.
typedef int64_t Fix64;
typedef int32_t Fix32;
// Pass in the start and end coordinates of the line, and the tile width and height (bit count).
void VisitTiles(Fix32 x1, Fix32 y1, Fix32 x2, Fix32 y2, int tw_bits, int th_bits)
{
Fix32 tw = (1 << tw_bits);
Fix32 th = (1 << th_bits);
Fix32 tx_inc = (((x2 >= x1)<<1)-1);
Fix32 ty_inc = (((y2 >= y1)<<1)-1);
if(tx_inc < 0)
{
Fix32 width = (x1 - x2);
if(x1 & (tw - 1))
x1 = ((x1 ^ (tw - 1)) + 1);
else
x1 -= tw;
x2 = (x1 + width);
}
if(ty_inc < 0)
{
Fix32 height = (y1 - y2);
if(y1 & (th - 1))
y1 = ((y1 ^ (th - 1)) + 1);
else
y1 -= th;
y2 = (y1 + height);
}
Fix32 tx1 = (x1 >> tw_bits);
Fix32 ty1 = (y1 >> th_bits);
Fix32 tx2 = (x2 >> tw_bits);
Fix32 ty2 = (y2 >> th_bits);
Fix32 dtx = (tx2 - tx1);
Fix32 dty = (ty2 - ty1);
{
Fix32 width = (x2 - x1);
x1 &= (tw - 1);
x2 = (x1 + width);
}
{
Fix32 height = (y2 - y1);
y1 &= (th - 1);
y2 = (y1 + height);
}
Fix32 n_tiles = (dtx + dty + 1);
Fix64 dx = (x2 - x1);
Fix64 dy = (y2 - y1);
Fix64 sdx = (dx * th);
Fix64 sdy = (dy * tw);
Fix64 error = +sdx -sdy -(dx * (y1 & (th - 1))) +(dy * (x1 & (tw - 1)));
for(; n_tiles > 0; --n_tiles)
{
visit_tile(tx1, ty1);
if(error == 0)
{
visit_tile(tx1+tx_inc, ty1);
}
if(error > 0)
{
tx1 += tx_inc;
error -= sdy;
}
else
{
ty1 += ty_inc;
error += sdx;
}
}
}
// Free to use, but please give credit.
typedef int64_t Fix64;
typedef int32_t Fix32;
// Pass in the start and end coordinates of the line, and the tile width and height (bit count).
void VisitTiles(Fix32 x1, Fix32 y1, Fix32 x2, Fix32 y2, int tw_bits, int th_bits)
{
Fix32 tw = (1 << tw_bits);
Fix32 th = (1 << th_bits);
Fix32 tx_inc = (((x2 >= x1)<<1)-1);
Fix32 ty_inc = (((y2 >= y1)<<1)-1);
if(tx_inc < 0)
{
Fix32 width = (x1 - x2);
if(x1 & (tw - 1))
x1 = ((x1 ^ (tw - 1)) + 1);
else
x1 -= tw;
x2 = (x1 + width);
}
if(ty_inc < 0)
{
Fix32 height = (y1 - y2);
if(y1 & (th - 1))
y1 = ((y1 ^ (th - 1)) + 1);
else
y1 -= th;
y2 = (y1 + height);
}
Fix32 tx1 = (x1 >> tw_bits);
Fix32 ty1 = (y1 >> th_bits);
Fix32 tx2 = (x2 >> tw_bits);
Fix32 ty2 = (y2 >> th_bits);
Fix32 dtx = (tx2 - tx1);
Fix32 dty = (ty2 - ty1);
{
Fix32 width = (x2 - x1);
x1 &= (tw - 1);
x2 = (x1 + width);
}
{
Fix32 height = (y2 - y1);
y1 &= (th - 1);
y2 = (y1 + height);
}
Fix32 n_tiles = (dtx + dty + 1);
Fix64 dx = (x2 - x1);
Fix64 dy = (y2 - y1);
Fix64 sdx = (dx * th);
Fix64 sdy = (dy * tw);
Fix64 error = +sdx -sdy -(dx * (y1 & (th - 1))) +(dy * (x1 & (tw - 1)));
for(; n_tiles > 0; --n_tiles)
{
visit_tile(tx1, ty1);
if(error == 0)
{
visit_tile(tx1+tx_inc, ty1);
}
if(error > 0)
{
tx1 += tx_inc;
error -= sdy;
}
else
{
ty1 += ty_inc;
error += sdx;
}
}
}
Yes, this is code from DELTA.
