I need to calculate and compare execution time of multiplication of 2 matrices in 3 different sizes (100 * 100 , 1000 * 1000 and 10000 * 10000) in C programming language. I wrote the following simple code to do that for 1000 * 1000 and I got the execution time
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
int main()
{
int r1 = 1000, c1 = 1000, r2 = 1000, c2 = 1000, i, j, k;
// Dynamic allocation.
double(*a)[r1][c1] = malloc(sizeof *a);
double(*b)[r2][c2] = malloc(sizeof *b);
double(*result)[r1][c2] = malloc(sizeof *result);
// Storing elements of first matrix.
for (i = 0; i < r1; ++i)
{
for (j = 0; j < c1; ++j)
{
(*a)[i][j] = rand() / RAND_MAX;
}
}
// Storing elements of second matrix.
for (i = 0; i < r2; ++i)
{
for (j = 0; j < c2; ++j)
{
(*b)[i][j] = rand() / RAND_MAX;
}
}
// Initializing all elements of result matrix to 0
for (i = 0; i < r1; ++i)
{
for (j = 0; j < c2; ++j)
{
(*result)[i][j] = 0;
}
}
clock_t begin1 = clock();
// Multiplying matrices a and b and
// storing result in result matrix
for (i = 0; i < r1; ++i)
for (j = 0; j < c2; ++j)
for (k = 0; k < c1; ++k)
{
(*result)[i][j] += (*a)[i][k] * (*b)[k][j];
}
clock_t end1 = clock();
double time_taken = (double)(end1 - begin1) / CLOCKS_PER_SEC;
printf("\n function took %f seconds to execute \n", time_taken);
return 0;
}
And now I want to repeat this part for two other sizes and get the result like this at the end of my program with one run:
the execution time for 100 * 100 is 1 second
the execution time for 1000 * 1000 is 2 seconds
the execution time for 10000 * 10000 is 3 seconds
What is the best solution for that? When I repeat this part for 10000 * 10000 after 1000 * 1000 I got the segmentation fault error.
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
int main()
{
int r1 = 1000, c1 = 1000, r2 = 1000, c2 = 1000, i, j, k;
// Dynamic allocation.
double(*a)[r1][c1] = malloc(sizeof *a);
double(*b)[r2][c2] = malloc(sizeof *b);
double(*result)[r1][c2] = malloc(sizeof *result);
// Storing elements of first matrix.
for (i = 0; i < r1; ++i)
{
for (j = 0; j < c1; ++j)
{
(*a)[i][j] = rand() / RAND_MAX;
}
}
// Storing elements of second matrix.
for (i = 0; i < r2; ++i)
{
for (j = 0; j < c2; ++j)
{
(*b)[i][j] = rand() / RAND_MAX;
}
}
// Initializing all elements of result matrix to 0
for (i = 0; i < r1; ++i)
{
for (j = 0; j < c2; ++j)
{
(*result)[i][j] = 0;
}
}
clock_t begin1 = clock();
// Multiplying matrices a and b and
// storing result in result matrix
for (i = 0; i < r1; ++i)
for (j = 0; j < c2; ++j)
for (k = 0; k < c1; ++k)
{
(*result)[i][j] += (*a)[i][k] * (*b)[k][j];
}
clock_t end1 = clock();
double time_taken = (double)(end1 - begin1) / CLOCKS_PER_SEC;
printf("\n \nfunction took %f seconds to execute \n",
time_taken);
free(a);
free(b);
free(result);
r1 = 10000, c1 = 10000, r2 = 10000, c2 = 10000;
printf("\n run second one for %d \n",r1);
// Storing elements of first matrix.
for (i = 0; i < r1; ++i)
{
for (j = 0; j < c1; ++j)
{
(*a)[i][j] = rand() / RAND_MAX;
}
}
// Storing elements of second matrix.
for (i = 0; i < r2; ++i)
{
for (j = 0; j < c2; ++j)
{
(*b)[i][j] = rand() / RAND_MAX;
}
}
// Initializing all elements of result matrix to 0
for (i = 0; i < r1; ++i)
{
for (j = 0; j < c2; ++j)
{
(*result)[i][j] = 0;
}
}
begin1 = clock();
// Multiplying matrices a and b and
// storing result in result matrix
for (i = 0; i < r1; ++i)
for (j = 0; j < c2; ++j)
for (k = 0; k < c1; ++k)
{
(*result)[i][j] += (*a)[i][k] * (*b)[k][j];
}
end1 = clock();
time_taken = (double)(end1 - begin1) / CLOCKS_PER_SEC;
printf("\n second function took %f seconds to execute \n",
time_taken);
free(a);
free(b);
free(result);
return 0;
}
A simplified version of your program:
...
int main()
{
int r1 = 1000, c1 = 1000, r2 = 1000, c2 = 1000, i, j, k;
// Dynamic allocation.
double(*a)[r1][c1] = malloc(sizeof *a);
double(*b)[r2][c2] = malloc(sizeof *b);
double(*result)[r1][c2] = malloc(sizeof *result);
...
free(a);
free(b);
free(result);
r1 = 10000, c1 = 10000, r2 = 10000, c2 = 10000;
for (i = 0; i < r1; ++i)
for (j = 0; j < c1; ++j)
(*a)[i][j] = rand() /RAND_MAX; // KABOOM !
...
}
A quick but crucial information about about VLA arrays. Name "variable" in "variable-length-array" means that the size is stored in a variable, not that the size is variable. This variable is hidden and can be only read via sizeof operator.
The size of array is bound to it's type, not to its value. Therefore the dimensions of VLA type (and object) cannot change, no matter if the object is dynamic or automatic.
The line:
double(*a)[r1][c1] = malloc(sizeof *a);
it interpreted as:
typedef double __hidden_type[r1][c1];
__hidden_type* a = malloc(sizeof *a);
... changes of r1 or c1 do not affect sizeof(__hidden_type)
The sizes are bound to the types when the types are defined. After that the types are immutable.
Therefore changing the r1 does not change the size of *a. You need to create a new a (or rather its type) and allocate memory for this new *a.
I suggest moving the whole test to a function that takes r1, r2, c1 and c2 as parameters. The arrays would be local to the function.
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
void bench(int r1, int c1, int r2, int c2) {
int i, j, k;
// Dynamic allocation.
double(*a)[r1][c1] = malloc(sizeof *a);
double(*b)[r2][c2] = malloc(sizeof *b);
double(*result)[r1][c2] = malloc(sizeof *result);
// Storing elements of first matrix.
for (i = 0; i < r1; ++i)
{
for (j = 0; j < c1; ++j)
{
(*a)[i][j] = rand() /RAND_MAX;
}
}
// Storing elements of second matrix.
for (i = 0; i < r2; ++i)
{
for (j = 0; j < c2; ++j)
{
(*b)[i][j] = rand()/ RAND_MAX;
}
}
// Initializing all elements of result matrix to 0
for (i = 0; i < r1; ++i)
{
for (j = 0; j < c2; ++j)
{
(*result)[i][j] = 0;
}
}
clock_t begin1 = clock();
// Multiplying matrices a and b and
// storing result in result matrix
for (i = 0; i < r1; ++i)
for (j = 0; j < c2; ++j)
for (k = 0; k < c1; ++k)
{
(*result)[i][j] += (*a)[i][k] * (*b)[k][j];
}
clock_t end1 = clock();
double time_taken = (double)(end1 - begin1) / CLOCKS_PER_SEC;
printf("\n \nfunction took %f seconds to execute \n", time_taken);
free(a);
free(b);
free(result);
}
int main()
{
bench(1000, 1000, 1000, 1000);
bench(2000, 2000, 2000, 2000);
}
I've reduced the size from 10000 to 2000 to get results in reasonable time. On my machine I got:
function took 1.966788 seconds to execute
function took 37.370633 seconds to execute
Note that the function is very cache unfriendly.
for (i = 0; i < r1; ++i)
for (j = 0; j < c2; ++j)
for (k = 0; k < c1; ++k)
(*result)[i][j] += (*a)[i][k] * (*b)[k][j];
On every iteration of k you get a cache miss when accessing (*b)[k][j]. Try swapping the j and k loops:
for (i = 0; i < r1; ++i)
for (k = 0; k < c1; ++k)
for (j = 0; j < c2; ++j)
(*result)[i][j] += (*a)[i][k] * (*b)[k][j];
Now when increasing j then (*result)[i][j] and (*b)[k][j] are likely in cache.
On my machine this trivial change gave 10x speedup:
function took 0.319594 seconds to execute
function took 3.829459 seconds to execute
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With