Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
cmda3634_materials
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Package Registry
Operate
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
srigavili
cmda3634_materials
Commits
357ad0f4
Commit
357ad0f4
authored
11 months ago
by
Jason R Wilson
Browse files
Options
Downloads
Patches
Plain Diff
files for PSA02
parent
eda828c2
Branches
Branches containing commit
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
PSA02/omp_kmeans.c
+37
-42
37 additions, 42 deletions
PSA02/omp_kmeans.c
PSA02/omp_kmeans.sh
+2
-2
2 additions, 2 deletions
PSA02/omp_kmeans.sh
PSA02/omp_kmeans_timing.sh
+3
-3
3 additions, 3 deletions
PSA02/omp_kmeans_timing.sh
with
42 additions
and
47 deletions
PSA02/omp_kmeans.c
+
37
−
42
View file @
357ad0f4
...
...
@@ -5,13 +5,13 @@
#include
"vec.h"
/* calculate the arg max */
int
calc_arg_max
(
double
*
data
,
int
row
s
,
int
cols
,
int
*
centers
,
int
m
)
{
int
calc_arg_max
(
double
data
[]
,
int
num_point
s
,
int
dim
,
int
centers
[]
,
int
m
)
{
int
arg_max
;
double
cost_sq
=
0
;
for
(
int
i
=
0
;
i
<
row
s
;
i
++
)
{
for
(
int
i
=
0
;
i
<
num_point
s
;
i
++
)
{
double
min_dist_sq
=
DBL_MAX
;
for
(
int
j
=
0
;
j
<
m
;
j
++
)
{
double
dist_sq
=
vec_dist_sq
(
data
+
i
*
cols
,
data
+
centers
[
j
]
*
cols
,
cols
);
double
dist_sq
=
vec_dist_sq
(
data
+
i
*
dim
,
data
+
centers
[
j
]
*
dim
,
dim
);
if
(
dist_sq
<
min_dist_sq
)
{
min_dist_sq
=
dist_sq
;
}
...
...
@@ -25,11 +25,11 @@ int calc_arg_max (double* data, int rows, int cols, int* centers, int m) {
}
/* find the index of the cluster for the given point */
int
find_cluster
(
double
*
kmeans
,
double
*
point
,
int
k
,
int
cols
)
{
int
find_cluster
(
double
kmeans
[]
,
double
point
[]
,
int
k
,
int
dim
)
{
int
cluster
;
double
min_dist_sq
=
DBL_MAX
;
for
(
int
i
=
0
;
i
<
k
;
i
++
)
{
double
dist_sq
=
vec_dist_sq
(
kmeans
+
i
*
cols
,
point
,
cols
);
double
dist_sq
=
vec_dist_sq
(
kmeans
+
i
*
dim
,
point
,
dim
);
if
(
dist_sq
<
min_dist_sq
)
{
min_dist_sq
=
dist_sq
;
cluster
=
i
;
...
...
@@ -39,22 +39,22 @@ int find_cluster (double* kmeans, double* point, int k, int cols) {
}
/* calculate the next kmeans */
void
calc_kmeans_next
(
double
*
data
,
int
row
s
,
int
cols
,
double
*
kmeans
,
double
*
kmeans_next
,
int
k
)
{
void
calc_kmeans_next
(
double
data
[]
,
int
num_point
s
,
int
dim
,
double
kmeans
[]
,
double
kmeans_next
[]
,
int
k
)
{
int
num_points
[
k
];
for
(
int
i
=
0
;
i
<
k
;
i
++
)
{
num_points
[
i
]
=
0
;
}
vec_zero
(
kmeans_next
,
k
*
cols
);
for
(
int
i
=
0
;
i
<
row
s
;
i
++
)
{
int
cluster
=
find_cluster
(
kmeans
,
data
+
i
*
cols
,
k
,
cols
);
double
*
kmean
=
kmeans_next
+
cluster
*
cols
;
vec_add
(
kmean
,
data
+
i
*
cols
,
kmean
,
cols
);
vec_zero
(
kmeans_next
,
k
*
dim
);
for
(
int
i
=
0
;
i
<
num_point
s
;
i
++
)
{
int
cluster
=
find_cluster
(
kmeans
,
data
+
i
*
dim
,
k
,
dim
);
double
*
kmean
=
kmeans_next
+
cluster
*
dim
;
vec_add
(
kmean
,
data
+
i
*
dim
,
kmean
,
dim
);
num_points
[
cluster
]
+=
1
;
}
for
(
int
i
=
0
;
i
<
k
;
i
++
)
{
double
*
kmean
=
kmeans_next
+
i
*
cols
;
double
*
kmean
=
kmeans_next
+
i
*
dim
;
if
(
num_points
[
i
]
>
0
)
{
vec_scalar_mult
(
kmean
,
1
.
0
/
num_points
[
i
],
kmean
,
cols
);
vec_scalar_mult
(
kmean
,
1
.
0
/
num_points
[
i
],
kmean
,
dim
);
}
else
{
printf
(
"error : cluster has no points!
\n
"
);
exit
(
1
);
...
...
@@ -63,60 +63,55 @@ void calc_kmeans_next (double *data, int rows, int cols, double* kmeans, double*
}
/* calculate kmeans using m steps of Lloyd's algorithm */
void
calc_kmeans
(
double
*
data
,
int
row
s
,
int
cols
,
double
*
kmeans
,
int
k
,
int
num_iter
)
{
void
calc_kmeans
(
double
data
[]
,
int
num_point
s
,
int
dim
,
double
kmeans
[]
,
int
k
,
int
num_iter
)
{
/* find k centers using the farthest first algorithm */
int
centers
[
k
];
centers
[
0
]
=
0
;
for
(
int
m
=
1
;
m
<
k
;
m
++
)
{
centers
[
m
]
=
calc_arg_max
(
data
,
rows
,
cols
,
centers
,
m
);
centers
[
m
]
=
calc_arg_max
(
data
,
num_points
,
dim
,
centers
,
m
);
}
/* initialize kmeans using the k centers */
for
(
int
i
=
0
;
i
<
k
;
i
++
)
{
vec_copy
(
kmeans
+
i
*
cols
,
data
+
centers
[
i
]
*
cols
,
cols
);
vec_copy
(
kmeans
+
i
*
dim
,
data
+
centers
[
i
]
*
dim
,
dim
);
}
/* update kmeans num_iter times */
double
kmeans_next
[
k
*
cols
];
double
kmeans_next
[
k
*
dim
];
for
(
int
i
=
0
;
i
<
num_iter
;
i
++
)
{
calc_kmeans_next
(
data
,
rows
,
cols
,
kmeans
,
kmeans_next
,
k
);
vec_copy
(
kmeans
,
kmeans_next
,
k
*
cols
);
calc_kmeans_next
(
data
,
num_points
,
dim
,
kmeans
,
kmeans_next
,
k
);
vec_copy
(
kmeans
,
kmeans_next
,
k
*
dim
);
}
}
int
main
(
int
argc
,
char
*
*
argv
)
{
int
main
(
int
argc
,
char
*
argv
[]
)
{
/* get k, m, and thread_count from command line */
if
(
argc
<
4
)
{
printf
(
"Command usage : %s %s %s %s
\n
"
,
argv
[
0
],
"k"
,
"
m
"
,
"thread_count"
);
printf
(
"Command usage : %s %s %s %s
\n
"
,
argv
[
0
],
"k"
,
"
num_iter
"
,
"thread_count"
);
return
1
;
}
int
k
=
atoi
(
argv
[
1
]);
int
m
=
atoi
(
argv
[
2
]);
int
num_iter
=
atoi
(
argv
[
2
]);
int
thread_count
=
atoi
(
argv
[
3
]);
omp_set_num_threads
(
thread_count
);
/* read the shape of the data matrix */
int
rows
,
cols
;
if
(
scanf
(
"%*c %d %d"
,
&
rows
,
&
cols
)
!=
2
)
{
int
num_points
,
dim
;
if
(
scanf
(
"%*c %d %d"
,
&
num_points
,
&
dim
)
!=
2
)
{
printf
(
"error reading the shape of the data matrix
\n
"
);
return
1
;
}
/* dynamically allocate memory for the data matrix */
/* note: this line is roughly equivalent to */
/* double data[rows*cols] */
/* but the data memory is located on the heap rather than the stack */
double
*
data
=
(
double
*
)
malloc
(
rows
*
cols
*
sizeof
(
double
));
/* read the data matrix */
for
(
int
i
=
0
;
i
<
rows
;
i
++
)
{
for
(
int
j
=
0
;
j
<
cols
;
j
++
)
{
if
(
scanf
(
"%lf"
,
data
+
i
*
cols
+
j
)
!=
1
)
{
printf
(
"error reading data matrix
\n
"
);
return
1
;
}
/* dynamically allocate memory for the data array */
double
*
data
=
(
double
*
)
malloc
(
num_points
*
dim
*
sizeof
(
double
));
/* Read vectors from stdin and store them in the data array */
for
(
int
i
=
0
;
i
<
num_points
;
i
++
)
{
if
(
vec_read_stdin
(
data
+
i
*
dim
,
dim
)
!=
dim
)
{
printf
(
"error reading the next point from stdin
\n
"
);
return
1
;
}
}
...
...
@@ -125,13 +120,13 @@ int main (int argc, char** argv) {
start_time
=
omp_get_wtime
();
/* calculate kmeans using m steps of Lloyd's algorithm */
double
kmeans
[
k
*
cols
];
calc_kmeans
(
data
,
rows
,
cols
,
kmeans
,
k
,
m
);
double
kmeans
[
k
*
dim
];
calc_kmeans
(
data
,
num_points
,
dim
,
kmeans
,
k
,
num_iter
);
/* stop the timer */
end_time
=
omp_get_wtime
();
#ifdef
STUDY
#ifdef
TIMING
printf
(
"(%d,%.4f),"
,
thread_count
,(
end_time
-
start_time
));
#else
/* print out the thread count */
...
...
@@ -142,8 +137,8 @@ int main (int argc, char** argv) {
/* print the results */
for
(
int
i
=
0
;
i
<
k
;
i
++
)
{
for
(
int
j
=
0
;
j
<
cols
;
j
++
)
{
printf
(
"%.5lf "
,
kmeans
[
i
*
cols
+
j
]);
for
(
int
j
=
0
;
j
<
dim
;
j
++
)
{
printf
(
"%.5lf "
,
kmeans
[
i
*
dim
+
j
]);
}
printf
(
"
\n
"
);
}
...
...
This diff is collapsed.
Click to expand it.
PSA02/omp_kmeans.sh
+
2
−
2
View file @
357ad0f4
#!/bin/bash
#SBATCH -A cmda3634_rjh
#SBATCH -p normal_q
#SBATCH -t
00:05:00
#SBATCH -t
5
#SBATCH --cpus-per-task=4
#SBATCH -o omp_kmeans.out
...
...
@@ -16,7 +16,7 @@ gcc -o omp_kmeans omp_kmeans.c vec.c -fopenmp
# OpenMP settings
export
OMP_NUM_THREADS
=
$SLURM_CPUS_PER_TASK
export
OMP_PROC_BIND
=
TRUE
export
OMP_PROC_BIND
=
true
# run omp_kmeans
cat
$1
| ./omp_kmeans
$2
$3
4
This diff is collapsed.
Click to expand it.
PSA02/omp_kmeans_timing.sh
+
3
−
3
View file @
357ad0f4
#!/bin/bash
#SBATCH -A cmda3634_rjh
#SBATCH -p normal_q
#SBATCH -t
00:10:0
0
#SBATCH -t
1
0
#SBATCH --cpus-per-task=32
#SBATCH -o omp_kmeans_timing.out
...
...
@@ -12,11 +12,11 @@ cd $SLURM_SUBMIT_DIR
module load matplotlib
# Build the executable
gcc
-D
STUDY
-o
omp_kmeans omp_kmeans.c vec.c
-fopenmp
gcc
-D
TIMING
-o
omp_kmeans omp_kmeans.c vec.c
-fopenmp
# OpenMP settings
export
OMP_NUM_THREADS
=
$SLURM_CPUS_PER_TASK
export
OMP_PROC_BIND
=
TRUE
export
OMP_PROC_BIND
=
true
# run omp_kmeans
cat
$1
| ./omp_kmeans
$2
$3
1
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment