Skip to content
Snippets Groups Projects
Commit 357ad0f4 authored by Jason R Wilson's avatar Jason R Wilson
Browse files

files for PSA02

parent eda828c2
Branches
No related merge requests found
......@@ -5,13 +5,13 @@
#include "vec.h"
/* calculate the arg max */
int calc_arg_max (double* data, int rows, int cols, int* centers, int m) {
int calc_arg_max (double data[], int num_points, int dim, int centers[], int m) {
int arg_max;
double cost_sq = 0;
for (int i=0;i<rows;i++) {
for (int i=0;i<num_points;i++) {
double min_dist_sq = DBL_MAX;
for (int j=0;j<m;j++) {
double dist_sq = vec_dist_sq(data+i*cols,data+centers[j]*cols,cols);
double dist_sq = vec_dist_sq(data+i*dim,data+centers[j]*dim,dim);
if (dist_sq < min_dist_sq) {
min_dist_sq = dist_sq;
}
......@@ -25,11 +25,11 @@ int calc_arg_max (double* data, int rows, int cols, int* centers, int m) {
}
/* find the index of the cluster for the given point */
int find_cluster (double* kmeans, double* point, int k, int cols) {
int find_cluster (double kmeans[], double point[], int k, int dim) {
int cluster;
double min_dist_sq = DBL_MAX;
for (int i=0;i<k;i++) {
double dist_sq = vec_dist_sq(kmeans+i*cols,point,cols);
double dist_sq = vec_dist_sq(kmeans+i*dim,point,dim);
if (dist_sq < min_dist_sq) {
min_dist_sq = dist_sq;
cluster = i;
......@@ -39,22 +39,22 @@ int find_cluster (double* kmeans, double* point, int k, int cols) {
}
/* calculate the next kmeans */
void calc_kmeans_next (double *data, int rows, int cols, double* kmeans, double* kmeans_next, int k) {
void calc_kmeans_next (double data[], int num_points, int dim, double kmeans[], double kmeans_next[], int k) {
int num_points[k];
for (int i=0;i<k;i++) {
num_points[i] = 0;
}
vec_zero(kmeans_next,k*cols);
for (int i=0;i<rows;i++) {
int cluster = find_cluster(kmeans,data+i*cols,k,cols);
double* kmean = kmeans_next+cluster*cols;
vec_add(kmean,data+i*cols,kmean,cols);
vec_zero(kmeans_next,k*dim);
for (int i=0;i<num_points;i++) {
int cluster = find_cluster(kmeans,data+i*dim,k,dim);
double* kmean = kmeans_next+cluster*dim;
vec_add(kmean,data+i*dim,kmean,dim);
num_points[cluster] += 1;
}
for (int i=0;i<k;i++) {
double* kmean = kmeans_next+i*cols;
double* kmean = kmeans_next+i*dim;
if (num_points[i] > 0) {
vec_scalar_mult(kmean,1.0/num_points[i],kmean,cols);
vec_scalar_mult(kmean,1.0/num_points[i],kmean,dim);
} else {
printf ("error : cluster has no points!\n");
exit(1);
......@@ -63,60 +63,55 @@ void calc_kmeans_next (double *data, int rows, int cols, double* kmeans, double*
}
/* calculate kmeans using m steps of Lloyd's algorithm */
void calc_kmeans (double *data, int rows, int cols, double* kmeans, int k, int num_iter) {
void calc_kmeans (double data[], int num_points, int dim, double kmeans[], int k, int num_iter) {
/* find k centers using the farthest first algorithm */
int centers[k];
centers[0] = 0;
for (int m=1;m<k;m++) {
centers[m] = calc_arg_max(data,rows,cols,centers,m);
centers[m] = calc_arg_max(data,num_points,dim,centers,m);
}
/* initialize kmeans using the k centers */
for (int i=0;i<k;i++) {
vec_copy(kmeans+i*cols,data+centers[i]*cols,cols);
vec_copy(kmeans+i*dim,data+centers[i]*dim,dim);
}
/* update kmeans num_iter times */
double kmeans_next[k*cols];
double kmeans_next[k*dim];
for (int i=0;i<num_iter;i++) {
calc_kmeans_next(data,rows,cols,kmeans,kmeans_next,k);
vec_copy(kmeans,kmeans_next,k*cols);
calc_kmeans_next(data,num_points,dim,kmeans,kmeans_next,k);
vec_copy(kmeans,kmeans_next,k*dim);
}
}
int main (int argc, char** argv) {
int main (int argc, char* argv[]) {
/* get k, m, and thread_count from command line */
if (argc < 4) {
printf ("Command usage : %s %s %s %s\n",argv[0],"k","m","thread_count");
printf ("Command usage : %s %s %s %s\n",argv[0],"k","num_iter","thread_count");
return 1;
}
int k = atoi(argv[1]);
int m = atoi(argv[2]);
int num_iter = atoi(argv[2]);
int thread_count = atoi(argv[3]);
omp_set_num_threads(thread_count);
/* read the shape of the data matrix */
int rows, cols;
if (scanf("%*c %d %d",&rows, &cols) != 2) {
int num_points, dim;
if (scanf("%*c %d %d",&num_points, &dim) != 2) {
printf ("error reading the shape of the data matrix\n");
return 1;
}
/* dynamically allocate memory for the data matrix */
/* note: this line is roughly equivalent to */
/* double data[rows*cols] */
/* but the data memory is located on the heap rather than the stack */
double* data = (double*)malloc(rows*cols*sizeof(double));
/* read the data matrix */
for (int i=0;i<rows;i++) {
for (int j=0;j<cols;j++) {
if (scanf("%lf",data+i*cols+j) != 1) {
printf ("error reading data matrix\n");
return 1;
}
/* dynamically allocate memory for the data array */
double* data = (double*)malloc(num_points*dim*sizeof(double));
/* Read vectors from stdin and store them in the data array */
for (int i=0;i<num_points;i++) {
if (vec_read_stdin(data+i*dim,dim) != dim) {
printf ("error reading the next point from stdin\n");
return 1;
}
}
......@@ -125,13 +120,13 @@ int main (int argc, char** argv) {
start_time = omp_get_wtime();
/* calculate kmeans using m steps of Lloyd's algorithm */
double kmeans[k*cols];
calc_kmeans(data,rows,cols,kmeans,k,m);
double kmeans[k*dim];
calc_kmeans(data,num_points,dim,kmeans,k,num_iter);
/* stop the timer */
end_time = omp_get_wtime();
#ifdef STUDY
#ifdef TIMING
printf ("(%d,%.4f),",thread_count,(end_time-start_time));
#else
/* print out the thread count */
......@@ -142,8 +137,8 @@ int main (int argc, char** argv) {
/* print the results */
for (int i=0;i<k;i++) {
for (int j=0;j<cols;j++) {
printf ("%.5lf ",kmeans[i*cols+j]);
for (int j=0;j<dim;j++) {
printf ("%.5lf ",kmeans[i*dim+j]);
}
printf ("\n");
}
......
#!/bin/bash
#SBATCH -A cmda3634_rjh
#SBATCH -p normal_q
#SBATCH -t 00:05:00
#SBATCH -t 5
#SBATCH --cpus-per-task=4
#SBATCH -o omp_kmeans.out
......@@ -16,7 +16,7 @@ gcc -o omp_kmeans omp_kmeans.c vec.c -fopenmp
# OpenMP settings
export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
export OMP_PROC_BIND=TRUE
export OMP_PROC_BIND=true
# run omp_kmeans
cat $1 | ./omp_kmeans $2 $3 4
#!/bin/bash
#SBATCH -A cmda3634_rjh
#SBATCH -p normal_q
#SBATCH -t 00:10:00
#SBATCH -t 10
#SBATCH --cpus-per-task=32
#SBATCH -o omp_kmeans_timing.out
......@@ -12,11 +12,11 @@ cd $SLURM_SUBMIT_DIR
module load matplotlib
# Build the executable
gcc -D STUDY -o omp_kmeans omp_kmeans.c vec.c -fopenmp
gcc -DTIMING -o omp_kmeans omp_kmeans.c vec.c -fopenmp
# OpenMP settings
export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
export OMP_PROC_BIND=TRUE
export OMP_PROC_BIND=true
# run omp_kmeans
cat $1 | ./omp_kmeans $2 $3 1
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment