#!/bin/sh
#
# This script, when run on a relatively modern version of linux
# should compile and run a micro-benchmark which tests the speed
# of inter-core data transfer.  This can help determine which cores
# share an on-chip cache and which do not.
#
# This code has been tested on Fedora Core 6, RHEL 5, RHEL 3.  Hopefully
# it will work on others.
#
# Corey Satten, corey @ cac.washington.edu, May 2007
#
TMP=/tmp/bench2$$
trap "rm -f $TMP $TMP.c; exit 0" 0 1 2 13 15
cat >$TMP.c <<'EOF' 
/*
 * IPC and CPU affinity microbenchmark. Corey Satten 5/17/07
 * Usage: time prog cpu# cpu#
 */
#define _GNU_SOURCE
#include <sched.h>
int read(), write();
int other, count, (*io[])() = {read, write};
main(int argc, char *argv[])
{
char buf[2];
int p1[2]; int p2[2];
pipe(p1); pipe(p2);
other = (fork()!=0);	/* other == 1 in parent, 0 in child */
close(p1[!other]); close(p2[other]);
{
    int cpu[2];
    cpu[0] = atoi(argc>1 ? argv[1] : "0");
    cpu[1] = atoi(argc>2 ? argv[2] : "1");
#ifndef OLD_WAY
    cpu_set_t mask;
    CPU_ZERO(&mask);
    CPU_SET(cpu[other], &mask);
    if (sched_setaffinity(0, sizeof(cpu_set_t), &mask) != 0) {
	perror("could not set affinity");
	exit(1);
	}
#else
    unsigned long mask = 1<<cpu[other];
    if (sched_setaffinity(0, &mask) != 0) {
	perror("could not set affinity");
	exit(1);
	}
#endif
}
for(count=0; count<2000000; ++count) {
    io[other](p1[other],buf,1);
    io[!other](p2[!other],buf,1);
    ++count;
    }
if (other) wait(0);
}
EOF
cc -o $TMP -O $TMP.c 2> /dev/null ||
cc -o $TMP -O $TMP.c -DOLD_WAY    || {
    echo "couldn't" compile the C code, sorry
    exit 1
    }

CORES=`awk '/^processor[ \t]+:/ {print $NF}' /proc/cpuinfo`
set x $CORES; shift; shift

echo
N=`echo $CORES|wc -w`
echo $N CPU cores were found: `echo $CORES | sed 's/ /, /g;s/[0-9]/#&/g'`
case $N in 1);; *)
    echo
    echo If you see a significant difference in performance between
    echo test cases below, you may find the manpage for the '"taskset"'
    echo command of interest.
esac

echo
echo reading/writing 2,000,000 times between two processes on core 0
time $TMP 0 0

for C in $*; do
    echo
    echo reading/writing 2,000,000 times between a processes on core 0 and core $C
    time $TMP 0 $C
done
