large.asm

URL: https://mirkwood.cs.edinboro.edu/~bennett/class/cmsc3100/spring2026/notes/float/code/large.asm
 
;  The base of this code was generated by gemini
extern printf

section .data
    ; Define 256-bit (32-byte) aligned constants
    align 32
    vector1  dd 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0
    vector2  dd 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8

    floatFmt: db `%f + %f = %f\n`,0

section .bss
    align 32
    result resd 8    ; Reserve space for 8 single-precision floats

section .text
    global main

main:
    ; 1. Load the first 256-bit vector from memory into YMM0
    vmovaps ymm0, [vector1]

    ; 2. Add the second 256-bit vector from memory to YMM0, result in YMM1
    ; VADDPS: Vector ADD Packed Single-precision
    vaddps ymm1, ymm0, [vector2]

    ; 3. Store the 256-bit result back to memory
    vmovaps [result], ymm1

    mov r12, 0
.top:
    cmp r12,8
    je .done

    mov rdi, floatFmt

    mov rax, 3

    movss xmm0, dword [vector1 + r12 * 4]
    cvtss2sd xmm0, xmm0
    movss xmm1, dword [vector2 + r12 * 4]
    cvtss2sd xmm1, xmm1
    movss xmm2, dword [result  + r12 * 4]
    cvtss2sd xmm2, xmm2

    mov r13, rsp
    and rsp, -16
    call printf
    mov rsp, r13

    inc r12
    jmp .top

.done:

    ; Exit program (Linux x64)
    mov rax, 60         ; syscall: exit
    xor rdi, rdi        ; status: 0
    syscall