/* use gcc  -D_REENTRANT -lpthread to compile */

#include<stdio.h>
#include<pthread.h>

/* definition of a suitable structure */
typedef struct
{
  double volatile *p_s;       /* the shared value of scalar product */
  pthread_mutex_t *p_s_lock;  /* the lock for variable s */
  int n;                      /* the number of the thread */
  int nproc;                  /* the number of processors to exploit */
  double *x;                  /* data for first vector */
  double *y;                  /* data for second vector */
  int l;                      /* length of vectors */
} DATA;

void *SMP_scalprod(void *arg)
{
  register double localsum;  
  long i;
  DATA D = *(DATA *)arg;

	printf("thread no. %d!\n", D.n);
	
  localsum = 0.0;
 
/* Each thread start calculating the scalar product from i = D.n
   with D.n = 1, 2, ... , D.nproc.
   Since there are exactly D.nproc threads the increment on i is just
   D.nproc */
 
  for(i=D.n;i<D.l;i+=D.nproc)
	{
		localsum += D.x[i]*D.y[i];
		printf("thread no. %d calculating row %d\n", D.n, i);
	}
/* the thread assert the lock on s ... */
  pthread_mutex_lock(D.p_s_lock);

/* ... change the value of s ... */
  *(D.p_s) += localsum;

/* ... and remove the lock */
  pthread_mutex_unlock(D.p_s_lock);

  return NULL;
}

#define L 8    /* dimension of vectors */

int main(int argc, char **argv)
{
  pthread_t *thread;   
  void *retval;
  int cpu, i;
  DATA *A;
  volatile double s=0;     /* the shared variable */
  pthread_mutex_t s_lock;
  double x[L], y[L];
 
  if(argc != 2)
    { 
      printf("usage: %s  <number of CPU>\n", argv[0]);
      exit(1);
    }

  cpu = atoi(argv[1]);
  thread = (pthread_t *) calloc(cpu, sizeof(pthread_t));
  A = (DATA *)calloc(cpu, sizeof(DATA));


  for(i=0;i<L;i++)
    x[i] = y[i] = i;

/* initialize the lock variable */
  pthread_mutex_init(&s_lock, NULL);
 
  for(i=0;i < cpu;i++)
    {
/* initialize the structure */
      A[i].n = i;            /* the number of the thread */
      A[i].x = x;
      A[i].y = y;
      A[i].l = L;
      A[i].nproc = cpu;      /* the number of CPU */
      A[i].p_s = &s;
      A[i].p_s_lock = &s_lock;

      if(pthread_create(&thread[i], NULL, SMP_scalprod, &A[i] ))
{
  fprintf(stderr, "%s: cannot make thread\n", argv[0]);
  exit(1);
}
    }

  for(i=0;i<cpu;i++)
    {
      if(pthread_join(thread[i], &retval))
{
  fprintf(stderr, "%s: cannot join thread\n", argv[0]);
  exit(1);
}
    }

  printf("s = %f\n", s);
  exit(0);
}
