Go to the previous, next section.

Example: Loop Tiling

In this example program the outermost for loop of each loop nest will be tiled.

tile.cc

#include <stdlib.h>
#include <suif.h>
#include <builder.h>
#include <dependence.h>

int tile_size;

/*************************************************************************
 * Tile the given for loop                                               *
 *************************************************************************/
void tile_for_loop(tree_for * tf)
{
    // create the builder structure for the loop index variable
    block ind_var(tf->index());

    // create a new variable for the tile driver using builder
    char name[64];
    sprintf(name, "%s_tile", tf->index()->name());
    block tile_var(block::new_sym(tf->index()->type(), name));
    var_sym * tv = (var_sym *)tile_var.get_sym();

    
    // get the bounds of the current loop
    named_lin_ineq * nli = include_for(tf);
    // if bounds cannot be found, got out
    if(nli == NULL) return;


    // Create a named system of inequalities to represent the tiling inequalities
    // tile_sz * tile_var <= index_var <= tile_sz + tile_var + tile_sz - 1
    name_table nt(new immed(tv), new immed(tf->index()));
    lin_ineq li(Compose(2, 3,
                                    0,  -tile_size,  1,
                        tile_size - 1,   tile_size, -1));
    
    // create the named linear inequality combining the names and inequalities
    named_lin_ineq blk(nt, li);

    // Add the tiling inequalities to the original set
    *nli &= blk;

    // project the original set to tighten bounds
    nli->project();
    

    // find the lower and upper bound expresstion for the tile
    block tile_block_lb(nli->create_expression(tf->index(), FALSE));
    block tile_block_ub(nli->create_expression(tf->index(), TRUE));

    // project away the index variable of the tile
    nli->project_away(immed(tf->index()));


    // find the lower and upper bound expresstion for the tile driver
    block tile_driver_lb(nli->create_expression(tv, FALSE));
    block tile_driver_ub(nli->create_expression(tv, TRUE));

    // create a builder structure to represent both loop nests
    block code(block::FOR(tile_var,
                          tile_driver_lb,
                          tile_driver_ub,
                          block::FOR(ind_var,
                                     tile_block_lb,
                                     tile_block_ub,
                                     block(tf->body()))));
    
    // create a tree_node (a tree_for in this case) out of the builder representation
    tree_node * new_tn = code.make_tree_node();
    
    // insert the new tiled loops and remove the input for loop
    tf->parent()->insert_before(new_tn, tf->list_e());
    tf->parent()->remove(tf->list_e());
} 


/*************************************************************************
 * Find the outermost for loop and call tile_for_loop                    *
 *************************************************************************/
void find_first_for(tree_node_list * tnl)
{
    tree_node_list_iter iter(tnl);
    while(!iter.is_empty()) {
        tree_node * tn = iter.step();
        switch(tn->kind()) {
        case TREE_FOR:
            tree_for * tnf = (tree_for *)tn;
            tile_for_loop(tnf);
            break;
            
        case TREE_IF:
            tree_if * tni = (tree_if *)tn;
            find_first_for(tni->then_part());
            find_first_for(tni->else_part());
            break;
            
        case TREE_LOOP:
            tree_loop * tnl = (tree_loop *)tn;
            find_first_for(tnl->body());
            break;
            
        case TREE_BLOCK:
            tree_block * tnb = (tree_block *)tn;
            find_first_for(tnb->body());
            break;
            
        case TREE_INSTR:
            break;
            
        default:
            assert(0);
            break;
        }
    }
}


/*************************************************************************
 * For each procedure, set the proc. for builder and call find_first_for *
 * Need to do fill_in_access because of include_for                      *
 *************************************************************************/
void do_tile(tree_proc * tp)
{
    block::set_proc(tp);
    fill_in_access(tp);
    find_first_for(tp->body());
}


/*************************************************************************
 *  Main: set the tile size if supplied, otherwise it is 64.             *
 *************************************************************************/
main(int argc, char * argv[])
{
    extern int optind;
    start_suif(argc, argv);

    tile_size = 64;

    if(strcmp(argv[optind], "-size") == 0) {
        tile_size = atoi(argv[++optind]);
        optind++;
    }

    suif_proc_iter(argc, argv, do_tile, TRUE);
}

Testing

After compiling this program and running over the following input program:

test.c

double A[2000];
test(int N, int M)
{
    int i, j, k;
    for(i = 0; i<= 1000; i++)
        for(j = 1; j<= 1000; j++)
            A[i] = A[i-1];

    for(i = 0; i<=M; i++)
        for(j = 1; j<=1000; j++)
            A[j] = A[j-1];
}

the C-form of the output created is

test.sfo.c

extern int test(int N, int M)
{
    int i;
    int j;
    int k;
    int i_tile6;
    int i_tile7;

    for(i_tile6 = 0; i_tile6 <= 15; i_tile6++)
        for(i = max(0,0+64*i_tile6); i <= min(1000,63+64*i_tile6); i++)
            for(j = 1; j <= 1000; j++)
                A[i] = A[i - 1];

    for(i_tile7 = 0; i_tile7 <= divfloor(0+M,64); i_tile7++)
        for(i = max(0,0+64*i_tile7); i <= min(0+M,63+64*i_tile7); i++)
            for (j = 1; j <= 1000; j++)
                A[j] = A[j - 1];
    return 0;
}

Go to the previous, next section.