Go to the previous, next section.
In this example program the outermost for loop of each loop nest will be tiled.
#include <stdlib.h>
#include <suif.h>
#include <builder.h>
#include <dependence.h>
int tile_size;
/*************************************************************************
* Tile the given for loop *
*************************************************************************/
void tile_for_loop(tree_for * tf)
{
// create the builder structure for the loop index variable
block ind_var(tf->index());
// create a new variable for the tile driver using builder
char name[64];
sprintf(name, "%s_tile", tf->index()->name());
block tile_var(block::new_sym(tf->index()->type(), name));
var_sym * tv = (var_sym *)tile_var.get_sym();
// get the bounds of the current loop
named_lin_ineq * nli = include_for(tf);
// if bounds cannot be found, got out
if(nli == NULL) return;
// Create a named system of inequalities to represent the tiling inequalities
// tile_sz * tile_var <= index_var <= tile_sz + tile_var + tile_sz - 1
name_table nt(new immed(tv), new immed(tf->index()));
lin_ineq li(Compose(2, 3,
0, -tile_size, 1,
tile_size - 1, tile_size, -1));
// create the named linear inequality combining the names and inequalities
named_lin_ineq blk(nt, li);
// Add the tiling inequalities to the original set
*nli &= blk;
// project the original set to tighten bounds
nli->project();
// find the lower and upper bound expresstion for the tile
block tile_block_lb(nli->create_expression(tf->index(), FALSE));
block tile_block_ub(nli->create_expression(tf->index(), TRUE));
// project away the index variable of the tile
nli->project_away(immed(tf->index()));
// find the lower and upper bound expresstion for the tile driver
block tile_driver_lb(nli->create_expression(tv, FALSE));
block tile_driver_ub(nli->create_expression(tv, TRUE));
// create a builder structure to represent both loop nests
block code(block::FOR(tile_var,
tile_driver_lb,
tile_driver_ub,
block::FOR(ind_var,
tile_block_lb,
tile_block_ub,
block(tf->body()))));
// create a tree_node (a tree_for in this case) out of the builder representation
tree_node * new_tn = code.make_tree_node();
// insert the new tiled loops and remove the input for loop
tf->parent()->insert_before(new_tn, tf->list_e());
tf->parent()->remove(tf->list_e());
}
/*************************************************************************
* Find the outermost for loop and call tile_for_loop *
*************************************************************************/
void find_first_for(tree_node_list * tnl)
{
tree_node_list_iter iter(tnl);
while(!iter.is_empty()) {
tree_node * tn = iter.step();
switch(tn->kind()) {
case TREE_FOR:
tree_for * tnf = (tree_for *)tn;
tile_for_loop(tnf);
break;
case TREE_IF:
tree_if * tni = (tree_if *)tn;
find_first_for(tni->then_part());
find_first_for(tni->else_part());
break;
case TREE_LOOP:
tree_loop * tnl = (tree_loop *)tn;
find_first_for(tnl->body());
break;
case TREE_BLOCK:
tree_block * tnb = (tree_block *)tn;
find_first_for(tnb->body());
break;
case TREE_INSTR:
break;
default:
assert(0);
break;
}
}
}
/*************************************************************************
* For each procedure, set the proc. for builder and call find_first_for *
* Need to do fill_in_access because of include_for *
*************************************************************************/
void do_tile(tree_proc * tp)
{
block::set_proc(tp);
fill_in_access(tp);
find_first_for(tp->body());
}
/*************************************************************************
* Main: set the tile size if supplied, otherwise it is 64. *
*************************************************************************/
main(int argc, char * argv[])
{
extern int optind;
start_suif(argc, argv);
tile_size = 64;
if(strcmp(argv[optind], "-size") == 0) {
tile_size = atoi(argv[++optind]);
optind++;
}
suif_proc_iter(argc, argv, do_tile, TRUE);
}
double A[2000];
test(int N, int M)
{
int i, j, k;
for(i = 0; i<= 1000; i++)
for(j = 1; j<= 1000; j++)
A[i] = A[i-1];
for(i = 0; i<=M; i++)
for(j = 1; j<=1000; j++)
A[j] = A[j-1];
}
the C-form of the output created is
extern int test(int N, int M)
{
int i;
int j;
int k;
int i_tile6;
int i_tile7;
for(i_tile6 = 0; i_tile6 <= 15; i_tile6++)
for(i = max(0,0+64*i_tile6); i <= min(1000,63+64*i_tile6); i++)
for(j = 1; j <= 1000; j++)
A[i] = A[i - 1];
for(i_tile7 = 0; i_tile7 <= divfloor(0+M,64); i_tile7++)
for(i = max(0,0+64*i_tile7); i <= min(0+M,63+64*i_tile7); i++)
for (j = 1; j <= 1000; j++)
A[j] = A[j - 1];
return 0;
}
Go to the previous, next section.