Binary Tree Traversal

Traversing a binary tree is a pretty trivial process. You can literally do it in three lines of code:
void visit(node_t *n)
{
if (n->left) visit(n->left);
output(n);
if (n->right) visit(n->right);
}
What I need is an incremental process, like an iterator in C++ structures. I'll develop that using the above function. Let's convert it to an iterative function first:
void visit(node_t *n)
{
node_t * Sn[ MAXDEPTH ];
int      ra[ MAXDEPTH ];
int      depth;

depth= 0;
again:
if (n->left)
{
Sn[depth]= n;
ra[depth]= 1;
depth++;
n= n->left;
goto again;
ret1:
do {} while(0);
}
output(n);
if (n->right)
{
Sn[depth]= n;
ra[depth]= 2;
depth++;
n= n->right;
goto again;
ret2:
//nop
do {} while(0);
}
if (depth==0) return ;
depth--;
n= Sn[depth];
if (ra[depth]==1) goto ret1;
else  goto ret2;
}
Let's talk about what happened here. I replaced all recursive calls with jumps to the start of the function. When I do that, I save the only local variable, n, in the stack Sn and also record the "return address". The latter is necessary because when the function ends, it can return to two different places since there are two recursive calls. depth is used as a stack pointer.

Let's remove some of the silly NOPs from the above function and move the labels where they need to be:

void visit(node_t *n)
{
node_t * Sn[ MAXDEPTH ];
int      ra[ MAXDEPTH ];
int      depth;

depth= 0;
again:
if (n->left)
{
Sn[depth]= n;
ra[depth]= 1;
depth++;
n= n->left;
goto again;
}
ret1:
output(n);
if (n->right)
{
Sn[depth]= n;
ra[depth]= 2;
depth++;
n= n->right;
goto again;
}
ret2:
if (depth==0) return ;
depth--;
n= Sn[depth];
if (ra[depth]==1) goto ret1;
else  goto ret2;
}
Right. Now it's time for an elimination. Every time we store n in the stack, n gets a new value right after that. This new value is always a child of the pushed value, it's either the left child or the right child. So, if we have parent links (which I do), the whole Sn stack is unnecessary. We can replace the assignment to n with n= n->parent.
void visit(node_t *n)
{
int      ra[ MAXDEPTH ];
int      depth;

depth= 0;
again:
if (n->left)
{
ra[depth]= 1;
depth++;
n= n->left;
goto again;
}
ret1:
output(n);
if (n->right)
{
ra[depth]= 2;
depth++;
n= n->right;
goto again;
}
ret2:
if (depth==0) return ;
depth--;
n= n->parent;
if (ra[depth]==1) goto ret1;
else  goto ret2;
}
Now, when n is the root (with NULL parent), depth will be zero. At all other times, depth will be greater than zero. Initially n is the root and the depth is zero. When n gets assigned a new value thru
n= n->right;
or
n= n->left;
depth gets incremented. Similarly, whenever n moves to its parent, depth gets decremented. This means that the test depth==0 is equivalent to n->parent==NULL.

If we look carefully at the code, we will see that ra[depth] is 1 if n is the left child of its parent and that ra[depth] is two if n is the right child of its parent. When we combine all these facts, we can eliminate both ra and depth.

void visit(node_t *n)
{
again:
if (n->left)
{
n= n->left;
goto again;
}
ret1:
output(n);
if (n->right)
{
n= n->right;
goto again;
}
ret2:
if (n->parent==NULL) return ;
if (n==n->parent->left) { n= n->parent; goto ret1; }
else  { n= n->parent; goto ret2; }
}
Now we can rewrite the first and last jumps as loops:
void visit(node_t *n)
{
again:
while(n->left) n= n->left;
ret1:
output(n);
if (n->right)
{
n= n->right;
goto again;
}
while(n->parent && n==n->parent->right) n= n->parent;
if (n->parent==NULL) return ;
if (n==n->parent->left) { n= n->parent; goto ret1; }
}
The last "if" is really unnecessary because the condition for breaking the loop has become true at this point. The first part of the condition is handled by the previous statement. The second part (n is left child) is necessarily true at this point. Summing up, the function becomes:
void visit(node_t *n)
{
again:
while(n->left) n= n->left;
ret1:
output(n);
if (n->right)
{
n= n->right;
goto again;
}
while(n->parent && n==n->parent->right) n= n->parent;
if (n->parent==NULL) return ;
n= n->parent;
goto ret1;
}
I should do one last cleanup to remove the first label. This is necessary for converting the function to an incremental one.
void visit(node_t *n)
{
while(n->left) n= n->left;
ret1:
output(n);
if (n->right)
{
n= n->right;
while(n->left) n= n->left;
goto ret1;
}
while(n->parent && n==n->parent->right) n= n->parent;
if (n->parent==NULL) return ;
n= n->parent;
goto ret1;
}
We can combine the jumps and put things in an if-else construct:
void visit(node_t *n)
{
while(n->left) n= n->left;
ret1:
output(n);
if (n->right)
{
n= n->right;
while(n->left) n= n->left;
}
else
{
while(n->parent && n==n->parent->right) n= n->parent;
if (n->parent==NULL) return ;
n= n->parent;
}
goto ret1;
}
Now, if I move the output statement at the end of the loop, the function will look more like an incremental method:
void visit(node_t *n)
{
while(n->left) n= n->left;
output(n);
ret1:
if (n->right)
{
n= n->right;
while(n->left) n= n->left;
}
else
{
while(n->parent && n==n->parent->right) n= n->parent;
if (n->parent==NULL) return ;
n= n->parent;
}
output(n);
goto ret1;
}
We're ready to make the switch. The first two statements will be our begin() function. The rest will be the next() function. I will return the node after each output statement and then call the next() function repeatedly to iterate the loop.
node_t *visit_first(node_t *n)
{
while(n->left) n= n->left;
output(n);
return n;
}

node_t *visit_next(node_t *n)
{
if (n->right)
{
n= n->right;
while(n->left) n= n->left;
}
else
{
while(n->parent && n==n->parent->right) n= n->parent;
if (n->parent==NULL) return NULL;
n= n->parent;
}
output(n);
return n;
}
The previous loop-exit statement return ; has become return NULL; to indicate the end of the loop to the external controller. Of course, the output statements are now unnecessary because the nodes will be processed outside the visit function. I can add an empty-tree check while I'm at it:
node_t *visit_first(node_t *n)
{
if (n)
while(n->left) n= n->left;
return n;
}

node_t *visit_next(node_t *n)
{
if (n->right)
{
n= n->right;
while(n->left) n= n->left;
}
else
{
while(n->parent && n==n->parent->right) n= n->parent;
if (n->parent==NULL) return NULL;
n= n->parent;
}
return n;
}
This will be used as follows:
for(n=visit_first(root);n;n=visit_next(n))
output(n);
Not too shabby. I was a little sick and didn't want to drunk code this. It turned out pretty well.

Without The Parent Links

I had to do this for my critbit trees as well. The difference is that, now we don't have the parent links and only leaf nodes are output. Here is the recursive version:
void visit(node_t *n)
{
if (!n->internal) { output(n); return ; }
visit(n->left);
visit(n->right);
}
Since I don't have the parent links, I won't be able to get rid of the stack completely. Just to make things simpler, I'll use Sn[depth] instead of n. Note that depth is the index of the last valid entry. Here is the initial expansion to iterative:
void visit(node_t *n)
{
int     depth;
node_t *Sn[MAXDEPTH];
int     ra[MAXDEPTH];
depth= 0;
Sn= n;

again:
if (!Sn[depth]->internal)
{
output(Sn[depth]);
goto retf;
}

depth++; Sn[depth]= Sn[depth-1]->left; ra[depth]= 1;
goto again;
cal1:
depth++; Sn[depth]= Sn[depth-1]->right; ra[depth]= 2;
goto again;
cal2:
goto retf;

retf:
if (depth==0) return ;
depth--;
if (ra[depth+1]==1) goto cal1;
else goto cal2;
}
Just like in the previous section we can remove the array ra by modifying the decision to be based on which child the current node is. I'll get rid of the useless goto retf; while I'm at it.
void visit(node_t *n)
{
int     depth;
node_t *Sn[MAXDEPTH];
depth= 0;
Sn= n;

again:
if (!Sn[depth]->internal)
{
output(Sn[depth]);
goto retf;
}

depth++; Sn[depth]= Sn[depth-1]->left;
goto again;
cal1:
depth++; Sn[depth]= Sn[depth-1]->right;
goto again;

retf:
if (depth==0) return ;
depth--;
if (Sn[depth]->left==Sn[depth+1]) goto cal1;
else goto retf;
}
Now I can rewrite the first jumps to again as a loop:
void visit(node_t *n)
{
int     depth;
node_t *Sn[MAXDEPTH];
depth= 0;
Sn= n;

again:
while(Sn[depth]->internal)
{
depth++; Sn[depth]= Sn[depth-1]->left;
}
output(Sn[depth]);
goto retf;
cal1:
depth++; Sn[depth]= Sn[depth-1]->right;
goto again;

retf:
if (depth==0) return ;
depth--;
if (Sn[depth]->left==Sn[depth+1]) goto cal1;
else goto retf;
}
Now we observe that the block starting with the label again doesn't fall thru to the next statement. So, I could put it anywhere in the function. The only restriction is that, this block needs to be run once when the function is entered initially. I will simply make a copy of it at the end of the block cal1 and remove the jump.
void visit(node_t *n)
{
int     depth;
node_t *Sn[MAXDEPTH];
depth= 0;
Sn= n;

while(Sn[depth]->internal)
{
depth++; Sn[depth]= Sn[depth-1]->left;
}
output(Sn[depth]);
goto retf;

cal1:
depth++; Sn[depth]= Sn[depth-1]->right;
while(Sn[depth]->internal)
{
depth++; Sn[depth]= Sn[depth-1]->left;
}
output(Sn[depth]);
goto retf;

retf:
if (depth==0) return ;
depth--;
if (Sn[depth]->left==Sn[depth+1]) goto cal1;
else goto retf;
}
If I switch the blocks cal1 and retf, I will end up with a nicer flow control.
void visit(node_t *n)
{
int     depth;
node_t *Sn[MAXDEPTH];
depth= 0;
Sn= n;

while(Sn[depth]->internal)
{
depth++; Sn[depth]= Sn[depth-1]->left;
}
output(Sn[depth]);
goto retf;

retf:
if (depth==0) return ;
depth--;
if (Sn[depth]->left==Sn[depth+1]) goto cal1;
else goto retf;

cal1:
depth++; Sn[depth]= Sn[depth-1]->right;
while(Sn[depth]->internal)
{
depth++; Sn[depth]= Sn[depth-1]->left;
}
output(Sn[depth]);
goto retf;
}
Now I can simplify this even further by negating the condition and removing the useless jumps.
void visit(node_t *n)
{
int     depth;
node_t *Sn[MAXDEPTH];
depth= 0;
Sn= n;

while(Sn[depth]->internal)
{
depth++; Sn[depth]= Sn[depth-1]->left;
}
output(Sn[depth]);

retf:
if (depth==0) return ;
depth--;
if (Sn[depth]->left!=Sn[depth+1]) goto retf;

depth++; Sn[depth]= Sn[depth-1]->right;
while(Sn[depth]->internal)
{
depth++; Sn[depth]= Sn[depth-1]->left;
}
output(Sn[depth]);
goto retf;
}
The first goto can also be converted into a loop now. Let's assume that depth is K when we initially arrive at retf. When we exit the loop, the following will hold:
depth == K-1
Sn[depth]->left == Sn[depth+1]
Sn[K-1]->left= Sn[K]
If I modify the pre-decrement to become a post-decrement, the following will hold:
depth == K
Sn[depth-1]->left == Sn[depth]
Sn[K-1]->left= Sn[K]
Now I can remove the increment which immediately follows the loop.
void visit(node_t *n)
{
int     depth;
node_t *Sn[MAXDEPTH];
depth= 0;
Sn= n;

while(Sn[depth]->internal)
{
depth++; Sn[depth]= Sn[depth-1]->left;
}
output(Sn[depth]);

retf:
while(depth && Sn[depth-1]->left!=Sn[depth]) depth--;
if (depth==0) return ;

Sn[depth]= Sn[depth-1]->right;
while(Sn[depth]->internal)
{
depth++; Sn[depth]= Sn[depth-1]->left;
}
output(Sn[depth]);
goto retf;
}
Finally, the function is ready to be split up into the _first() and _next() functions. The first part of the function until the retf label will be the _first() function. Just after the output call, I will return from that function. The rest of visit() will be the next() function and I will call that repeatedly to iterate the loop:
typedef struct { node_t *Sn[maxdepth]; int depth; } pos_t;

pos_t* visit_first(node_t *root)
{
pos_t *P;
P= malloc(sizeof(*P));
P->depth= 0;
P->Sn[P->depth]= root;

while(P->Sn[P->depth]->internal)
{
P->depth++; P->Sn[P->depth]= P->Sn[P->depth-1]->left;
}
output(P->Sn[P->depth]);
return P;
}

pos_t *visit_next(pos_t *P)
{
while(P->depth && P->Sn[P->depth-1]->left!=P->Sn[P->depth])
P->depth--;
if (P->depth==0) { free(P); return NULL; }

P->Sn[P->depth]= P->Sn[P->depth-1]->right;
while(P->Sn[P->depth]->internal)
{
P->depth++; P->Sn[P->depth]= P->Sn[P->depth-1]->left;
}
output(P->Sn[P->depth]);
return P;
}