In this part, you are expected to implement multiplication of two 2-dimensional matrices in both C (or
Python) and an assembly language of your choice to compare their time and space performances (you
can use command time -v <executable>, and du -sh --apparent-size <file>.) Just another hint:see this link
and make a note that gcc -S filename.c or gcc -masm=intel -S filename.c would generate filename.s with
the compiled assembly code.
You can generate random numbers to populate large arrays of data to test your programs. Remember
to get the time before and after executing your programs to measure their execution times. Please
submit your written report to include
For this assignment, I used visual studio code as my IDE and using a bash script in order to
automate the comparison between them. For the sake of simplicity, I used only square matrices
containing one digit, non-zero numbers as input. I simply used nested for loops to iterate through each
input matrix and create dot product results which are stored in a result matrix. Unfortunately, I am not
familiar with this syntax for assembly and therefore do not think I can produce comments for this code
within the time I have available.
C code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//implement multiplication of two 2-dimensional matrices
//assuming all matrices are prerfect squares containing only non-zero digits for simplicity
//function stringtoArray converts the string argument to a multiudimensional array (in the format of a
matrix)
void stringtoArray(int size, char mat[], int matrix[size][size]){
//position tracker for input string
int pos = 0;
//nested for loop to populate matrix from mat
for (int i =0; i < size; i++){
for (int j = 0; j < size; j++){
//check string elements, finding int to add to matrix. for this exercise, all ints are assumed to be
one non-zero digit
int nextInt = 0;
while (nextInt == 0){
if (mat[pos] != '[' && mat[pos] != ']' && mat[pos] != ','){
nextInt = mat[pos] - '0';
pos++;
matrix[i][j] = nextInt;
int main(int argc, char *argv[]) {
//length or width of a square array is entered as the firsst argument
int matSize = atoi(argv[1]);
//matrices are entered as second and third arguments, starting with "[", ending with "]", using no
spaces and commas as delimiters
int mat1[matSize][matSize];
stringtoArray(matSize, argv[2], mat1);
int mat2[matSize][matSize];
stringtoArray(matSize, argv[3], mat2);
//matrix to store results
int result[matSize][matSize];
//multiply the arrays
//for each row in mat1,
for (int i = 0; i < matSize; i++){
//for each column in mat2
for (int j = 0; j < matSize; j++){
//tracker for dot product sum
int dotSum = 0;
//find dot product
for (int k = 0; k < matSize; k++){
dotSum += (mat1[i][k] * mat2[k][j]);
//assign dot product to new matrix
result[i][j] = dotSum;
ASM Code:
.file "matrix.c"
.text
.globl stringtoArray
.type stringtoArray, @function
stringtoArray:
.LFB6:
.cfi_startproc
endbr64
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
pushq %rbx
.cfi_offset 3, -24
movl %edi, -52(%rbp)
movq %rsi, -64(%rbp)
movq %rdx, -72(%rbp)
movl -52(%rbp), %eax
movslq %eax, %rdx
subq $1, %rdx
movq %rdx, -24(%rbp)
movslq %eax, %rdx
movq %rdx, %rcx
movl $0, %ebx
movl $0, -40(%rbp)
movl $0, -36(%rbp)
jmp .L2
.L8:
movl $0, -32(%rbp)
jmp .L3
.L7:
movl $0, -28(%rbp)
jmp .L4
.L6:
movl -40(%rbp), %edx
movslq %edx, %rcx
movq -64(%rbp), %rdx
addq %rcx, %rdx
movzbl (%rdx), %edx
cmpb $91, %dl
je .L5
movl -40(%rbp), %edx
movslq %edx, %rcx
movq -64(%rbp), %rdx
addq %rcx, %rdx
movzbl (%rdx), %edx
cmpb $93, %dl
je .L5
movl -40(%rbp), %edx
movslq %edx, %rcx
movq -64(%rbp), %rdx
addq %rcx, %rdx
movzbl (%rdx), %edx
cmpb $44, %dl
je .L5
movl -40(%rbp), %edx
movslq %edx, %rcx
movq -64(%rbp), %rdx
addq %rcx, %rdx
movzbl (%rdx), %edx
movsbl %dl, %edx
subl $48, %edx
movl %edx, -28(%rbp)
.L5:
addl $1, -40(%rbp)
.L4:
cmpl $0, -28(%rbp)
je .L6
movl -36(%rbp), %edx
movslq %edx, %rcx
movslq %eax, %rdx
imulq %rcx, %rdx
leaq 0(,%rdx,4), %rcx
movq -72(%rbp), %rdx
leaq (%rcx,%rdx), %rsi
movl -32(%rbp), %edx
movslq %edx, %rdx
movl -28(%rbp), %ecx
movl %ecx, (%rsi,%rdx,4)
addl $1, -32(%rbp)
.L3:
movl -32(%rbp), %edx
cmpl -52(%rbp), %edx
jl .L7
addl $1, -36(%rbp)
.L2:
movl -36(%rbp), %edx
cmpl -52(%rbp), %edx
jl .L8
nop
nop
popq %rbx
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE6:
.size stringtoArray, .-stringtoArray
.globl main
.type main, @function
main:
.LFB7:
.cfi_startproc
endbr64
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
subq $360, %rsp
.cfi_offset 15, -24
.cfi_offset 14, -32
.cfi_offset 13, -40
.cfi_offset 12, -48
.cfi_offset 3, -56
movl %edi, -164(%rbp)
movq %rsi, -176(%rbp)
movq %fs:40, %rax
movq %rax, -56(%rbp)
xorl %eax, %eax
movq %rsp, %rax
movq %rax, -376(%rbp)
movq -176(%rbp), %rax
addq $8, %rax
movq (%rax), %rax
movq %rax, %rdi
call atoi@PLT
movl %eax, -148(%rbp)
movl -148(%rbp), %esi
movl -148(%rbp), %edi
movslq %esi, %rax
subq $1, %rax
movq %rax, -112(%rbp)
movslq %esi, %rax
movq %rax, -352(%rbp)
movq $0, -344(%rbp)
movslq %esi, %rax
salq $2, %rax
movq %rax, -352(%rbp)
movslq %edi, %rax
subq $1, %rax
movq %rax, -120(%rbp)
movslq %esi, %rax
movq %rax, %r14
movl $0, %r15d
movslq %edi, %rax
movq %rax, %r12
movl $0, %r13d
movq %r15, %rdx
imulq %r12, %rdx
movq %r13, %rax
imulq %r14, %rax
leaq (%rdx,%rax), %rcx
movq %r14, %rax
mulq %r12
addq %rdx, %rcx
movq %rcx, %rdx
movslq %esi, %rax
movq %rax, -192(%rbp)
movq $0, -184(%rbp)
movslq %edi, %rax
movq %rax, -208(%rbp)
movq $0, -200(%rbp)
movq -192(%rbp), %r9
movq -184(%rbp), %r10
movq %r10, %rdx
movq -208(%rbp), %r11
movq -200(%rbp), %r12
imulq %r11, %rdx
movq %r12, %rax
imulq %r9, %rax
leaq (%rdx,%rax), %rcx
movq %r9, %rax
mulq %r11
addq %rdx, %rcx
movq %rcx, %rdx
movslq %esi, %rdx
movslq %edi, %rax
imulq %rdx, %rax
leaq 0(,%rax,4), %rdx
movl $16, %eax
subq $1, %rax
addq %rdx, %rax
movl $16, %ebx
movl $0, %edx
divq %rbx
imulq $16, %rax, %rdx
movq %rdx, %rax
andq $-4096, %rax
movq %rsp, %rbx
subq %rax, %rbx
movq %rbx, %rax
.L10:
cmpq %rax, %rsp
je .L11
subq $4096, %rsp
orq $0, 4088(%rsp)
jmp .L10
.L11:
movq %rdx, %rax
andl $4095, %eax
subq %rax, %rsp
movq %rdx, %rax
andl $4095, %eax
testq %rax, %rax
je .L12
movq %rdx, %rax
andl $4095, %eax
subq $8, %rax
addq %rsp, %rax
orq $0, (%rax)
.L12:
movq %rsp, %rax
addq $3, %rax
shrq $2, %rax
salq $2, %rax
movq %rax, -128(%rbp)
movq -176(%rbp), %rax
addq $16, %rax
movq (%rax), %rsi
movq -128(%rbp), %rax
movl -148(%rbp), %ecx
movq %rax, %rdx
movl %ecx, %edi
call stringtoArray
movl -148(%rbp), %esi
movl -148(%rbp), %edi
movslq %esi, %rax
subq $1, %rax
movq %rax, -104(%rbp)
movslq %esi, %rax
movq %rax, -368(%rbp)
movq $0, -360(%rbp)
movslq %esi, %rax
leaq 0(,%rax,4), %rbx
movslq %edi, %rax
subq $1, %rax
movq %rax, -96(%rbp)
movslq %esi, %rax
movq %rax, -224(%rbp)
movq $0, -216(%rbp)
movslq %edi, %rax
movq %rax, -240(%rbp)
movq $0, -232(%rbp)
movq -224(%rbp), %r14
movq -216(%rbp), %r15
movq %r15, %rdx
movq -240(%rbp), %r8
movq -232(%rbp), %r9
imulq %r8, %rdx
movq %r9, %rax
imulq %r14, %rax
leaq (%rdx,%rax), %rcx
movq %r14, %rax
mulq %r8
addq %rdx, %rcx
movq %rcx, %rdx
movslq %esi, %rax
movq %rax, -256(%rbp)
movq $0, -248(%rbp)
movslq %edi, %rax
movq %rax, -272(%rbp)
movq $0, -264(%rbp)
movq -256(%rbp), %r8
movq -248(%rbp), %r9
movq %r9, %rdx
movq -272(%rbp), %r10
movq -264(%rbp), %r11
imulq %r10, %rdx
movq %r11, %rax
imulq %r8, %rax
leaq (%rdx,%rax), %rcx
movq %r8, %rax
mulq %r10
addq %rdx, %rcx
movq %rcx, %rdx
movslq %esi, %rdx
movslq %edi, %rax
imulq %rdx, %rax
leaq 0(,%rax,4), %rdx
movl $16, %eax
subq $1, %rax
addq %rdx, %rax
movl $16, %edi
movl $0, %edx
divq %rdi
imulq $16, %rax, %rdx
movq %rdx, %rax
andq $-4096, %rax
movq %rsp, %rdi
subq %rax, %rdi
movq %rdi, %rax
.L13:
cmpq %rax, %rsp
je .L14
subq $4096, %rsp
orq $0, 4088(%rsp)
jmp .L13
.L14:
movq %rdx, %rax
andl $4095, %eax
subq %rax, %rsp
movq %rdx, %rax
andl $4095, %eax
testq %rax, %rax
je .L15
movq %rdx, %rax
andl $4095, %eax
subq $8, %rax
addq %rsp, %rax
orq $0, (%rax)
.L15:
movq %rsp, %rax
addq $3, %rax
shrq $2, %rax
salq $2, %rax
movq %rax, -88(%rbp)
movq -176(%rbp), %rax
addq $24, %rax
movq (%rax), %rsi
movq -88(%rbp), %rax
movl -148(%rbp), %ecx
movq %rax, %rdx
movl %ecx, %edi
call stringtoArray
movl -148(%rbp), %esi
movl -148(%rbp), %edi
movslq %esi, %rax
subq $1, %rax
movq %rax, -80(%rbp)
movslq %esi, %rax
movq %rax, -400(%rbp)
movq $0, -392(%rbp)
movslq %esi, %rax
leaq 0(,%rax,4), %r8
movslq %edi, %rax
subq $1, %rax
movq %rax, -72(%rbp)
movslq %esi, %rax
movq %rax, -288(%rbp)
movq $0, -280(%rbp)
movslq %edi, %rax
movq %rax, -304(%rbp)
movq $0, -296(%rbp)
movq -288(%rbp), %r9
movq -280(%rbp), %r10
movq %r10, %rdx
movq -304(%rbp), %r14
movq -296(%rbp), %r15
imulq %r14, %rdx
movq %r15, %rax
imulq %r9, %rax
leaq (%rdx,%rax), %rcx
movq %r9, %rax
mulq %r14
addq %rdx, %rcx
movq %rcx, %rdx
movslq %esi, %rax
movq %rax, -320(%rbp)
movq $0, -312(%rbp)
movslq %edi, %rax
movq %rax, -336(%rbp)
movq $0, -328(%rbp)
movq -320(%rbp), %r10
movq -312(%rbp), %r11
movq %r11, %rdx
movq -336(%rbp), %r14
movq -328(%rbp), %r15
imulq %r14, %rdx
movq %r15, %rax
imulq %r10, %rax
leaq (%rdx,%rax), %rcx
movq %r10, %rax
mulq %r14
addq %rdx, %rcx
movq %rcx, %rdx
movslq %esi, %rdx
movslq %edi, %rax
imulq %rdx, %rax
leaq 0(,%rax,4), %rdx
movl $16, %eax
subq $1, %rax
addq %rdx, %rax
movl $16, %edi
movl $0, %edx
divq %rdi
imulq $16, %rax, %rax
movq %rax, %rdx
andq $-4096, %rdx
movq %rsp, %rdi
subq %rdx, %rdi
movq %rdi, %rdx
.L16:
cmpq %rdx, %rsp
je .L17
subq $4096, %rsp
orq $0, 4088(%rsp)
jmp .L16
.L17:
movq %rax, %rdx
andl $4095, %edx
subq %rdx, %rsp
movq %rax, %rdx
andl $4095, %edx
testq %rdx, %rdx
je .L18
andl $4095, %eax
subq $8, %rax
addq %rsp, %rax
orq $0, (%rax)
.L18:
movq %rsp, %rax
addq $3, %rax
shrq $2, %rax
salq $2, %rax
movq %rax, -64(%rbp)
movl $0, -132(%rbp)
jmp .L19
.L24:
movl $0, -136(%rbp)
jmp .L20
.L23:
movl $0, -140(%rbp)
movl $0, -144(%rbp)
jmp .L21
.L22:
movq -352(%rbp), %rsi
shrq $2, %rsi
movq -128(%rbp), %rax
movl -144(%rbp), %edx
movslq %edx, %rcx
movl -132(%rbp), %edx
movslq %edx, %rdx
imulq %rsi, %rdx
addq %rcx, %rdx
movl (%rax,%rdx,4), %edx
movq %rbx, %rdi
shrq $2, %rdi
movq -88(%rbp), %rax
movl -136(%rbp), %ecx
movslq %ecx, %rsi
movl -144(%rbp), %ecx
movslq %ecx, %rcx
imulq %rdi, %rcx
addq %rsi, %rcx
movl (%rax,%rcx,4), %eax
imull %edx, %eax
addl %eax, -140(%rbp)
addl $1, -144(%rbp)
.L21:
movl -144(%rbp), %eax
cmpl -148(%rbp), %eax
jl .L22
movq %r8, %rsi
shrq $2, %rsi
movq -64(%rbp), %rax
movl -136(%rbp), %edx
movslq %edx, %rcx
movl -132(%rbp), %edx
movslq %edx, %rdx
imulq %rsi, %rdx
addq %rdx, %rcx
movl -140(%rbp), %edx
movl %edx, (%rax,%rcx,4)
addl $1, -136(%rbp)
.L20:
movl -136(%rbp), %eax
cmpl -148(%rbp), %eax
jl .L23
addl $1, -132(%rbp)
.L19:
movl -132(%rbp), %eax
cmpl -148(%rbp), %eax
jl .L24
movq -376(%rbp), %rsp
movl $0, %eax
movq -56(%rbp), %rbx
xorq %fs:40, %rbx
je .L26
call __stack_chk_fail@PLT
.L26:
leaq -40(%rbp), %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE7:
.size main, .-main
.ident "GCC: (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0"
.section .[Link]-stack,"",@progbits
.section .[Link],"a"
.align 8
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.string "GNU"
1:
.align 8
.long 0xc0000002
.long 3f - 2f
2:
.long 0x3
3:
.align 8
4:
I am using a bash script to generate a file that performs the compilation and measurement of
each program. For the sake of brevity, I am simply using [mat1] and [mat2] to indicate the first and
second matrices used as command line arguments. Matrices used as input were randomly generated,
with the same matrices used for both C and Assembly code of the same size for testing.
Bash Script:
#!/bin/bash
#bash script to compile and compare matrix functions
#compile c code
gcc -o matrix matrix.c
#compile assembly code
gcc -c matrix.s -o matrix.o
gcc matrix.o -o matrixASM
#run programs to test times
echo "C code size:"
du -sh --apparent-size matrix
echo "C code execution time, 10x10 matrices"
time ./matrix 10 [mat1] [mat2]
echo "C code execution time, 100x100 matrices"
time ./matrix 100 [mat1] [mat2]
echo "C code execution time, 200x200 matrices"
time ./matrix 1000 [mat1] [mat2]
echo ""
echo "Assembly code size:"
du -sh --apparent-size matrixASM
echo "Assembly code execution time, 10x10 matrices"
time ./matrixASM 10 [mat1] [mat2]
echo "Assembly code execution time, 100x100 matrices"
time ./matrixASM 100 [mat1] [mat2]
echo "Assembly code execution time, 200x200 matrices"
time ./matrixASM 1000 [mat1] [mat2]
Output From running [Link]:
For the sake of brevity, I will list the relevant metrics here rather than including the screenshots:
C Code Assembly Code
File size 17K 17K
10x10 Execution Time 0S 0S
100x100 Execution Time 0S 0S
200x200 Execution Time 0.06 S 0.07 S
10x10 Memory Requirements 1076 Kb 1092 Kb
100x100 Memory Requirements 1244 Kb 1312 Kb
200x200 Memory Requirements 1776 Kb 1688
10x10 Page Faults 54 54
100x100 Page Faults 92 93
200x200 Page Faults 210 208
All other metrics were the same across all tests.
I was expecting a larger difference in the two executables, though it may be that the assembly
code being auto-generated causes it to be similar in more respects to the original C file than an assembly
program made from scratch might have. Due to the way I implemented the [Link] file, I am unable to
test larger matrices without causing an error due to the maximum command line size. The only
noticeable time difference is that the Assembly code takes 0.01 second longer to execute on a set of
200x200 matrices, though I suspect the smaller matrix multiplications are not, in actuality, 0 seconds but
smaller than 0.01 seconds. Interestingly, the memory requirements and page faults (which I included as
they were the only other metric that varied between tests) are higher for the Assembly code in the
10x10 and 100x100 tests. However, they are both lower for the 200x200 test, which may mean that past
that size of matrix, the Assembly code will start to increase in efficiency compared to the C code.