0% found this document useful (0 votes)
8 views24 pages

Matrix Multiplication in C and Assembly

Uploaded by

tdrobey
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
8 views24 pages

Matrix Multiplication in C and Assembly

Uploaded by

tdrobey
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd

In this part, you are expected to implement multiplication of two 2-dimensional matrices in both C (or

Python) and an assembly language of your choice to compare their time and space performances (you
can use command time -v <executable>, and du -sh --apparent-size <file>.) Just another hint:see this link
and make a note that gcc -S filename.c or gcc -masm=intel -S filename.c would generate filename.s with
the compiled assembly code.

You can generate random numbers to populate large arrays of data to test your programs. Remember
to get the time before and after executing your programs to measure their execution times. Please
submit your written report to include

For this assignment, I used visual studio code as my IDE and using a bash script in order to
automate the comparison between them. For the sake of simplicity, I used only square matrices
containing one digit, non-zero numbers as input. I simply used nested for loops to iterate through each
input matrix and create dot product results which are stored in a result matrix. Unfortunately, I am not
familiar with this syntax for assembly and therefore do not think I can produce comments for this code
within the time I have available.

C code:

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

//implement multiplication of two 2-dimensional matrices

//assuming all matrices are prerfect squares containing only non-zero digits for simplicity

//function stringtoArray converts the string argument to a multiudimensional array (in the format of a
matrix)

void stringtoArray(int size, char mat[], int matrix[size][size]){

//position tracker for input string

int pos = 0;

//nested for loop to populate matrix from mat

for (int i =0; i < size; i++){

for (int j = 0; j < size; j++){


//check string elements, finding int to add to matrix. for this exercise, all ints are assumed to be
one non-zero digit

int nextInt = 0;

while (nextInt == 0){

if (mat[pos] != '[' && mat[pos] != ']' && mat[pos] != ','){

nextInt = mat[pos] - '0';

pos++;

matrix[i][j] = nextInt;

int main(int argc, char *argv[]) {

//length or width of a square array is entered as the firsst argument

int matSize = atoi(argv[1]);

//matrices are entered as second and third arguments, starting with "[", ending with "]", using no
spaces and commas as delimiters

int mat1[matSize][matSize];

stringtoArray(matSize, argv[2], mat1);

int mat2[matSize][matSize];

stringtoArray(matSize, argv[3], mat2);

//matrix to store results

int result[matSize][matSize];

//multiply the arrays

//for each row in mat1,

for (int i = 0; i < matSize; i++){


//for each column in mat2

for (int j = 0; j < matSize; j++){

//tracker for dot product sum

int dotSum = 0;

//find dot product

for (int k = 0; k < matSize; k++){

dotSum += (mat1[i][k] * mat2[k][j]);

//assign dot product to new matrix

result[i][j] = dotSum;

ASM Code:

.file "matrix.c"

.text

.globl stringtoArray

.type stringtoArray, @function

stringtoArray:

.LFB6:

.cfi_startproc

endbr64

pushq %rbp

.cfi_def_cfa_offset 16

.cfi_offset 6, -16

movq %rsp, %rbp

.cfi_def_cfa_register 6

pushq %rbx
.cfi_offset 3, -24

movl %edi, -52(%rbp)

movq %rsi, -64(%rbp)

movq %rdx, -72(%rbp)

movl -52(%rbp), %eax

movslq %eax, %rdx

subq $1, %rdx

movq %rdx, -24(%rbp)

movslq %eax, %rdx

movq %rdx, %rcx

movl $0, %ebx

movl $0, -40(%rbp)

movl $0, -36(%rbp)

jmp .L2

.L8:

movl $0, -32(%rbp)

jmp .L3

.L7:

movl $0, -28(%rbp)

jmp .L4

.L6:

movl -40(%rbp), %edx

movslq %edx, %rcx

movq -64(%rbp), %rdx

addq %rcx, %rdx

movzbl (%rdx), %edx

cmpb $91, %dl

je .L5

movl -40(%rbp), %edx


movslq %edx, %rcx

movq -64(%rbp), %rdx

addq %rcx, %rdx

movzbl (%rdx), %edx

cmpb $93, %dl

je .L5

movl -40(%rbp), %edx

movslq %edx, %rcx

movq -64(%rbp), %rdx

addq %rcx, %rdx

movzbl (%rdx), %edx

cmpb $44, %dl

je .L5

movl -40(%rbp), %edx

movslq %edx, %rcx

movq -64(%rbp), %rdx

addq %rcx, %rdx

movzbl (%rdx), %edx

movsbl %dl, %edx

subl $48, %edx

movl %edx, -28(%rbp)

.L5:

addl $1, -40(%rbp)

.L4:

cmpl $0, -28(%rbp)

je .L6

movl -36(%rbp), %edx

movslq %edx, %rcx

movslq %eax, %rdx


imulq %rcx, %rdx

leaq 0(,%rdx,4), %rcx

movq -72(%rbp), %rdx

leaq (%rcx,%rdx), %rsi

movl -32(%rbp), %edx

movslq %edx, %rdx

movl -28(%rbp), %ecx

movl %ecx, (%rsi,%rdx,4)

addl $1, -32(%rbp)

.L3:

movl -32(%rbp), %edx

cmpl -52(%rbp), %edx

jl .L7

addl $1, -36(%rbp)

.L2:

movl -36(%rbp), %edx

cmpl -52(%rbp), %edx

jl .L8

nop

nop

popq %rbx

popq %rbp

.cfi_def_cfa 7, 8

ret

.cfi_endproc

.LFE6:

.size stringtoArray, .-stringtoArray

.globl main

.type main, @function


main:

.LFB7:

.cfi_startproc

endbr64

pushq %rbp

.cfi_def_cfa_offset 16

.cfi_offset 6, -16

movq %rsp, %rbp

.cfi_def_cfa_register 6

pushq %r15

pushq %r14

pushq %r13

pushq %r12

pushq %rbx

subq $360, %rsp

.cfi_offset 15, -24

.cfi_offset 14, -32

.cfi_offset 13, -40

.cfi_offset 12, -48

.cfi_offset 3, -56

movl %edi, -164(%rbp)

movq %rsi, -176(%rbp)

movq %fs:40, %rax

movq %rax, -56(%rbp)

xorl %eax, %eax

movq %rsp, %rax

movq %rax, -376(%rbp)

movq -176(%rbp), %rax

addq $8, %rax


movq (%rax), %rax

movq %rax, %rdi

call atoi@PLT

movl %eax, -148(%rbp)

movl -148(%rbp), %esi

movl -148(%rbp), %edi

movslq %esi, %rax

subq $1, %rax

movq %rax, -112(%rbp)

movslq %esi, %rax

movq %rax, -352(%rbp)

movq $0, -344(%rbp)

movslq %esi, %rax

salq $2, %rax

movq %rax, -352(%rbp)

movslq %edi, %rax

subq $1, %rax

movq %rax, -120(%rbp)

movslq %esi, %rax

movq %rax, %r14

movl $0, %r15d

movslq %edi, %rax

movq %rax, %r12

movl $0, %r13d

movq %r15, %rdx

imulq %r12, %rdx

movq %r13, %rax

imulq %r14, %rax

leaq (%rdx,%rax), %rcx


movq %r14, %rax

mulq %r12

addq %rdx, %rcx

movq %rcx, %rdx

movslq %esi, %rax

movq %rax, -192(%rbp)

movq $0, -184(%rbp)

movslq %edi, %rax

movq %rax, -208(%rbp)

movq $0, -200(%rbp)

movq -192(%rbp), %r9

movq -184(%rbp), %r10

movq %r10, %rdx

movq -208(%rbp), %r11

movq -200(%rbp), %r12

imulq %r11, %rdx

movq %r12, %rax

imulq %r9, %rax

leaq (%rdx,%rax), %rcx

movq %r9, %rax

mulq %r11

addq %rdx, %rcx

movq %rcx, %rdx

movslq %esi, %rdx

movslq %edi, %rax

imulq %rdx, %rax

leaq 0(,%rax,4), %rdx

movl $16, %eax

subq $1, %rax


addq %rdx, %rax

movl $16, %ebx

movl $0, %edx

divq %rbx

imulq $16, %rax, %rdx

movq %rdx, %rax

andq $-4096, %rax

movq %rsp, %rbx

subq %rax, %rbx

movq %rbx, %rax

.L10:

cmpq %rax, %rsp

je .L11

subq $4096, %rsp

orq $0, 4088(%rsp)

jmp .L10

.L11:

movq %rdx, %rax

andl $4095, %eax

subq %rax, %rsp

movq %rdx, %rax

andl $4095, %eax

testq %rax, %rax

je .L12

movq %rdx, %rax

andl $4095, %eax

subq $8, %rax

addq %rsp, %rax

orq $0, (%rax)


.L12:

movq %rsp, %rax

addq $3, %rax

shrq $2, %rax

salq $2, %rax

movq %rax, -128(%rbp)

movq -176(%rbp), %rax

addq $16, %rax

movq (%rax), %rsi

movq -128(%rbp), %rax

movl -148(%rbp), %ecx

movq %rax, %rdx

movl %ecx, %edi

call stringtoArray

movl -148(%rbp), %esi

movl -148(%rbp), %edi

movslq %esi, %rax

subq $1, %rax

movq %rax, -104(%rbp)

movslq %esi, %rax

movq %rax, -368(%rbp)

movq $0, -360(%rbp)

movslq %esi, %rax

leaq 0(,%rax,4), %rbx

movslq %edi, %rax

subq $1, %rax

movq %rax, -96(%rbp)

movslq %esi, %rax

movq %rax, -224(%rbp)


movq $0, -216(%rbp)

movslq %edi, %rax

movq %rax, -240(%rbp)

movq $0, -232(%rbp)

movq -224(%rbp), %r14

movq -216(%rbp), %r15

movq %r15, %rdx

movq -240(%rbp), %r8

movq -232(%rbp), %r9

imulq %r8, %rdx

movq %r9, %rax

imulq %r14, %rax

leaq (%rdx,%rax), %rcx

movq %r14, %rax

mulq %r8

addq %rdx, %rcx

movq %rcx, %rdx

movslq %esi, %rax

movq %rax, -256(%rbp)

movq $0, -248(%rbp)

movslq %edi, %rax

movq %rax, -272(%rbp)

movq $0, -264(%rbp)

movq -256(%rbp), %r8

movq -248(%rbp), %r9

movq %r9, %rdx

movq -272(%rbp), %r10

movq -264(%rbp), %r11

imulq %r10, %rdx


movq %r11, %rax

imulq %r8, %rax

leaq (%rdx,%rax), %rcx

movq %r8, %rax

mulq %r10

addq %rdx, %rcx

movq %rcx, %rdx

movslq %esi, %rdx

movslq %edi, %rax

imulq %rdx, %rax

leaq 0(,%rax,4), %rdx

movl $16, %eax

subq $1, %rax

addq %rdx, %rax

movl $16, %edi

movl $0, %edx

divq %rdi

imulq $16, %rax, %rdx

movq %rdx, %rax

andq $-4096, %rax

movq %rsp, %rdi

subq %rax, %rdi

movq %rdi, %rax

.L13:

cmpq %rax, %rsp

je .L14

subq $4096, %rsp

orq $0, 4088(%rsp)

jmp .L13
.L14:

movq %rdx, %rax

andl $4095, %eax

subq %rax, %rsp

movq %rdx, %rax

andl $4095, %eax

testq %rax, %rax

je .L15

movq %rdx, %rax

andl $4095, %eax

subq $8, %rax

addq %rsp, %rax

orq $0, (%rax)

.L15:

movq %rsp, %rax

addq $3, %rax

shrq $2, %rax

salq $2, %rax

movq %rax, -88(%rbp)

movq -176(%rbp), %rax

addq $24, %rax

movq (%rax), %rsi

movq -88(%rbp), %rax

movl -148(%rbp), %ecx

movq %rax, %rdx

movl %ecx, %edi

call stringtoArray

movl -148(%rbp), %esi

movl -148(%rbp), %edi


movslq %esi, %rax

subq $1, %rax

movq %rax, -80(%rbp)

movslq %esi, %rax

movq %rax, -400(%rbp)

movq $0, -392(%rbp)

movslq %esi, %rax

leaq 0(,%rax,4), %r8

movslq %edi, %rax

subq $1, %rax

movq %rax, -72(%rbp)

movslq %esi, %rax

movq %rax, -288(%rbp)

movq $0, -280(%rbp)

movslq %edi, %rax

movq %rax, -304(%rbp)

movq $0, -296(%rbp)

movq -288(%rbp), %r9

movq -280(%rbp), %r10

movq %r10, %rdx

movq -304(%rbp), %r14

movq -296(%rbp), %r15

imulq %r14, %rdx

movq %r15, %rax

imulq %r9, %rax

leaq (%rdx,%rax), %rcx

movq %r9, %rax

mulq %r14

addq %rdx, %rcx


movq %rcx, %rdx

movslq %esi, %rax

movq %rax, -320(%rbp)

movq $0, -312(%rbp)

movslq %edi, %rax

movq %rax, -336(%rbp)

movq $0, -328(%rbp)

movq -320(%rbp), %r10

movq -312(%rbp), %r11

movq %r11, %rdx

movq -336(%rbp), %r14

movq -328(%rbp), %r15

imulq %r14, %rdx

movq %r15, %rax

imulq %r10, %rax

leaq (%rdx,%rax), %rcx

movq %r10, %rax

mulq %r14

addq %rdx, %rcx

movq %rcx, %rdx

movslq %esi, %rdx

movslq %edi, %rax

imulq %rdx, %rax

leaq 0(,%rax,4), %rdx

movl $16, %eax

subq $1, %rax

addq %rdx, %rax

movl $16, %edi

movl $0, %edx


divq %rdi

imulq $16, %rax, %rax

movq %rax, %rdx

andq $-4096, %rdx

movq %rsp, %rdi

subq %rdx, %rdi

movq %rdi, %rdx

.L16:

cmpq %rdx, %rsp

je .L17

subq $4096, %rsp

orq $0, 4088(%rsp)

jmp .L16

.L17:

movq %rax, %rdx

andl $4095, %edx

subq %rdx, %rsp

movq %rax, %rdx

andl $4095, %edx

testq %rdx, %rdx

je .L18

andl $4095, %eax

subq $8, %rax

addq %rsp, %rax

orq $0, (%rax)

.L18:

movq %rsp, %rax

addq $3, %rax

shrq $2, %rax


salq $2, %rax

movq %rax, -64(%rbp)

movl $0, -132(%rbp)

jmp .L19

.L24:

movl $0, -136(%rbp)

jmp .L20

.L23:

movl $0, -140(%rbp)

movl $0, -144(%rbp)

jmp .L21

.L22:

movq -352(%rbp), %rsi

shrq $2, %rsi

movq -128(%rbp), %rax

movl -144(%rbp), %edx

movslq %edx, %rcx

movl -132(%rbp), %edx

movslq %edx, %rdx

imulq %rsi, %rdx

addq %rcx, %rdx

movl (%rax,%rdx,4), %edx

movq %rbx, %rdi

shrq $2, %rdi

movq -88(%rbp), %rax

movl -136(%rbp), %ecx

movslq %ecx, %rsi

movl -144(%rbp), %ecx

movslq %ecx, %rcx


imulq %rdi, %rcx

addq %rsi, %rcx

movl (%rax,%rcx,4), %eax

imull %edx, %eax

addl %eax, -140(%rbp)

addl $1, -144(%rbp)

.L21:

movl -144(%rbp), %eax

cmpl -148(%rbp), %eax

jl .L22

movq %r8, %rsi

shrq $2, %rsi

movq -64(%rbp), %rax

movl -136(%rbp), %edx

movslq %edx, %rcx

movl -132(%rbp), %edx

movslq %edx, %rdx

imulq %rsi, %rdx

addq %rdx, %rcx

movl -140(%rbp), %edx

movl %edx, (%rax,%rcx,4)

addl $1, -136(%rbp)

.L20:

movl -136(%rbp), %eax

cmpl -148(%rbp), %eax

jl .L23

addl $1, -132(%rbp)

.L19:

movl -132(%rbp), %eax


cmpl -148(%rbp), %eax

jl .L24

movq -376(%rbp), %rsp

movl $0, %eax

movq -56(%rbp), %rbx

xorq %fs:40, %rbx

je .L26

call __stack_chk_fail@PLT

.L26:

leaq -40(%rbp), %rsp

popq %rbx

popq %r12

popq %r13

popq %r14

popq %r15

popq %rbp

.cfi_def_cfa 7, 8

ret

.cfi_endproc

.LFE7:

.size main, .-main

.ident "GCC: (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0"

.section .[Link]-stack,"",@progbits

.section .[Link],"a"

.align 8

.long 1f - 0f

.long 4f - 1f

.long 5

0:
.string "GNU"

1:

.align 8

.long 0xc0000002

.long 3f - 2f

2:

.long 0x3

3:

.align 8

4:

I am using a bash script to generate a file that performs the compilation and measurement of
each program. For the sake of brevity, I am simply using [mat1] and [mat2] to indicate the first and
second matrices used as command line arguments. Matrices used as input were randomly generated,
with the same matrices used for both C and Assembly code of the same size for testing.

Bash Script:
#!/bin/bash

#bash script to compile and compare matrix functions

#compile c code

gcc -o matrix matrix.c

#compile assembly code

gcc -c matrix.s -o matrix.o

gcc matrix.o -o matrixASM

#run programs to test times

echo "C code size:"

du -sh --apparent-size matrix

echo "C code execution time, 10x10 matrices"

time ./matrix 10 [mat1] [mat2]


echo "C code execution time, 100x100 matrices"

time ./matrix 100 [mat1] [mat2]

echo "C code execution time, 200x200 matrices"

time ./matrix 1000 [mat1] [mat2]

echo ""

echo "Assembly code size:"

du -sh --apparent-size matrixASM

echo "Assembly code execution time, 10x10 matrices"

time ./matrixASM 10 [mat1] [mat2]

echo "Assembly code execution time, 100x100 matrices"

time ./matrixASM 100 [mat1] [mat2]

echo "Assembly code execution time, 200x200 matrices"

time ./matrixASM 1000 [mat1] [mat2]


Output From running [Link]:

For the sake of brevity, I will list the relevant metrics here rather than including the screenshots:

C Code Assembly Code


File size 17K 17K
10x10 Execution Time 0S 0S
100x100 Execution Time 0S 0S
200x200 Execution Time 0.06 S 0.07 S
10x10 Memory Requirements 1076 Kb 1092 Kb
100x100 Memory Requirements 1244 Kb 1312 Kb
200x200 Memory Requirements 1776 Kb 1688
10x10 Page Faults 54 54
100x100 Page Faults 92 93
200x200 Page Faults 210 208

All other metrics were the same across all tests.

I was expecting a larger difference in the two executables, though it may be that the assembly
code being auto-generated causes it to be similar in more respects to the original C file than an assembly
program made from scratch might have. Due to the way I implemented the [Link] file, I am unable to
test larger matrices without causing an error due to the maximum command line size. The only
noticeable time difference is that the Assembly code takes 0.01 second longer to execute on a set of
200x200 matrices, though I suspect the smaller matrix multiplications are not, in actuality, 0 seconds but
smaller than 0.01 seconds. Interestingly, the memory requirements and page faults (which I included as
they were the only other metric that varied between tests) are higher for the Assembly code in the
10x10 and 100x100 tests. However, they are both lower for the 200x200 test, which may mean that past
that size of matrix, the Assembly code will start to increase in efficiency compared to the C code.

You might also like