22

If you use a compiled language, you should periodically look at Godbolt and see what your code is doing and what changes to your code will do in the compiled output.

In this case a positively insane way of calculating squares and cubes generates 311 lines of ARM assembler output that will swallow your memory. With even something as simple as -O1 on the command line it's replaced by one or two multiplications respectively. With -fwhole-program it removes the functions entirely and interlaces them into the loop in main().

Know your tools. It makes huge differences!

top 6 comments
sorted by: hot top controversial new old
[-] mo_ztt@lemmy.world 4 points 1 year ago
int cube(int num) {
    char rv[num][num][num];
    return sizeof(rv);
}

"Doctor, it hurts when I do this."

[-] r2p2@lemmy.world 2 points 1 year ago

Not sure if op is trolling. Seems to be at the same level of sleep sort. (But there not even compiler optimizations can help.)

[-] ttmrichter@lemmy.world 0 points 1 year ago

Not trolling. Just:

  • showing how clever optimizers can get these days
  • introducing a cool web site
  • highlighting the importance of occasionally peeking under the hood to spot gross inefficiencies
[-] mo_ztt@lemmy.world 0 points 1 year ago* (last edited 1 year ago)

Eh. Honestly, I think what you're saying, and the points the article is making, are pretty valid. That's still gonna be way, way overshadowed by the absolutely ridiculous example they chose to use to make their point. Like "Since you're writing code that's ridiculous to such a degree that it wouldn't even occur to most people that the way you're doing it would even work, you better turn optimizations on, so the compiler can fix your code back to normalcy behind the scenes for you."

[-] ttmrichter@lemmy.world 0 points 1 year ago

Multiplication hurts? 😲

cube:
        mul     r3, r0, r0
        mul     r0, r3, r0
        bx      lr
[-] ttmrichter@lemmy.world 0 points 1 year ago

I mean it could hurt:

cube:
        push    {r4, r5, r6, r7, r8, r9, r10, fp}
        sub     sp, sp, #112
        add     r7, sp, #0
        str     r0, [r7, #92]
        mov     r3, sp
        mov     ip, r3
        ldr     r1, [r7, #92]
        ldr     r0, [r7, #92]
        ldr     r6, [r7, #92]
        subs    r3, r1, #1
        str     r3, [r7, #108]
        mov     r2, r1
        movs    r3, #0
        mov     r4, r2
        mov     r5, r3
        mov     r2, #0
        mov     r3, #0
        lsls    r3, r5, #3
        orr     r3, r3, r4, lsr #29
        lsls    r2, r4, #3
        subs    r3, r0, #1
        str     r3, [r7, #104]
        mov     r2, r1
        movs    r3, #0
        str     r2, [r7, #80]
        str     r3, [r7, #84]
        mov     r2, r0
        movs    r3, #0
        str     r2, [r7, #64]
        str     r3, [r7, #68]
        ldrd    r4, [r7, #80]
        mov     r3, r5
        ldr     r2, [r7, #64]
        mul     r2, r2, r3
        ldr     r3, [r7, #68]
        strd    r4, [r7, #80]
        ldr     r4, [r7, #80]
        mul     r3, r4, r3
        add     r3, r3, r2
        ldr     r2, [r7, #80]
        ldr     r4, [r7, #64]
        umull   r8, r9, r2, r4
        add     r3, r3, r9
        mov     r9, r3
        mov     r2, #0
        mov     r3, #0
        lsl     r3, r9, #3
        orr     r3, r3, r8, lsr #29
        lsl     r2, r8, #3
        subs    r3, r6, #1
        str     r3, [r7, #100]
        mov     r2, r1
        movs    r3, #0
        str     r2, [r7, #32]
        str     r3, [r7, #36]
        mov     r2, r0
        movs    r3, #0
        str     r2, [r7, #72]
        str     r3, [r7, #76]
        ldrd    r4, [r7, #32]
        mov     r3, r5
        ldrd    r8, [r7, #72]
        mov     r2, r8
        mul     r2, r2, r3
        strd    r8, [r7, #72]
        ldr     r3, [r7, #76]
        mov     r8, r4
        mov     r9, r5
        mov     r4, r8
        mul     r3, r4, r3
        add     r3, r3, r2
        mov     r2, r8
        ldr     r4, [r7, #72]
        umull   r10, fp, r2, r4
        add     r3, r3, fp
        mov     fp, r3
        mov     r2, r6
        movs    r3, #0
        str     r2, [r7, #24]
        str     r3, [r7, #28]
        ldrd    r4, [r7, #24]
        mov     r3, r4
        mul     r2, r3, fp
        mov     r3, r5
        mul     r3, r10, r3
        add     r3, r3, r2
        mov     r2, r4
        umull   r4, r2, r10, r2
        str     r2, [r7, #60]
        mov     r2, r4
        str     r2, [r7, #56]
        ldr     r2, [r7, #60]
        add     r3, r3, r2
        str     r3, [r7, #60]
        mov     r2, #0
        mov     r3, #0
        ldrd    r8, [r7, #56]
        mov     r4, r9
        lsls    r3, r4, #3
        mov     r4, r8
        orr     r3, r3, r4, lsr #29
        mov     r4, r8
        lsls    r2, r4, #3
        mov     r2, r1
        movs    r3, #0
        str     r2, [r7, #16]
        str     r3, [r7, #20]
        mov     r2, r0
        movs    r3, #0
        str     r2, [r7, #8]
        str     r3, [r7, #12]
        ldrd    r8, [r7, #16]
        mov     r3, r9
        ldrd    r10, [r7, #8]
        mov     r2, r10
        mul     r2, r2, r3
        mov     r3, fp
        mov     r4, r8
        mul     r3, r4, r3
        add     r3, r3, r2
        mov     r2, r8
        mov     r4, r10
        umull   r4, r2, r2, r4
        str     r2, [r7, #52]
        mov     r2, r4
        str     r2, [r7, #48]
        ldr     r2, [r7, #52]
        add     r3, r3, r2
        str     r3, [r7, #52]
        mov     r2, r6
        movs    r3, #0
        str     r2, [r7]
        str     r3, [r7, #4]
        ldrd    r8, [r7, #48]
        mov     r3, r9
        ldrd    r10, [r7]
        mov     r2, r10
        mul     r2, r2, r3
        mov     r3, fp
        mov     r4, r8
        mul     r3, r4, r3
        add     r3, r3, r2
        mov     r2, r8
        mov     r4, r10
        umull   r4, r2, r2, r4
        str     r2, [r7, #44]
        mov     r2, r4
        str     r2, [r7, #40]
        ldr     r2, [r7, #44]
        add     r3, r3, r2
        str     r3, [r7, #44]
        mov     r2, #0
        mov     r3, #0
        ldrd    r8, [r7, #40]
        mov     r4, r9
        lsls    r3, r4, #3
        mov     r4, r8
        orr     r3, r3, r4, lsr #29
        mov     r4, r8
        lsls    r2, r4, #3
        mov     r3, r1
        mov     r2, r0
        mul     r3, r2, r3
        mov     r2, r6
        mul     r3, r2, r3
        adds    r3, r3, #7
        lsrs    r3, r3, #3
        lsls    r3, r3, #3
        sub     sp, sp, r3
        mov     r3, sp
        str     r3, [r7, #96]
        mov     r3, r1
        mov     r2, r0
        mul     r3, r2, r3
        mov     r2, r6
        mul     r3, r2, r3
        mov     sp, ip
        mov     r0, r3
        adds    r7, r7, #112
        mov     sp, r7
        pop     {r4, r5, r6, r7, r8, r9, r10, fp}
        bx      lr
this post was submitted on 29 Dec 2023
22 points (89.3% liked)

General Programming Discussion

7895 readers
1 users here now

A general programming discussion community.

Rules:

  1. Be civil.
  2. Please start discussions that spark conversation

Other communities

Systems

Functional Programming

Also related

founded 5 years ago
MODERATORS