| 网站首页 | 业界新闻 | 小组 | 威客 | 人才 | 下载频道 | 博客 | 代码贴 | 在线编程 | 编程论坛
欢迎加入我们,一同切磋技术
用户名:   
 
密 码:  
共有 4987 人关注过本帖
标题:改天研究一下 貌似GCC生成的汇编效率很渣
取消只看楼主 加入收藏
zklhp
Rank: 20Rank: 20Rank: 20Rank: 20Rank: 20
来 自:china
等 级:贵宾
威 望:254
帖 子:11485
专家分:33241
注 册:2007-7-10
结帖率:100%
收藏
 问题点数:0 回复次数:0 
改天研究一下 貌似GCC生成的汇编效率很渣
程序代码:
// gcc -Wall -O3 -ftracer -fivopts -ftree-loop-linear -ftree-vectorize -fforce-addr -fomit-frame-pointer -fno-bounds-check -funroll-loops -ffast-math -march=native -mfpmath=sse -mmmx -msse -msse2 -msse3 a.c -o a
#include <stdio.h>
#include <time.h>

int main(void)
{
    int i, j, a = 1, b = 1;
    float c = 1.0, d = 1.0;
    double e = 1.0, f = 1.0;
    double start, finish, duration;
    start = clock();

    for (i = 0; i < 1000; i++)
    {
        for (j = 0; j < 1000000; j++)
        {
            a = a + 50;
            b = a - 100;
            a = b * 20;
            c = a + 300.89;
            d = c - 600.89;
            c = d * 90.89;
            d = c / 55.89;
            e = c * 90.89;
            f = e / 55.89;
        }
    }

    finish = clock();
    duration = finish - start;
    printf("%f, %f\n", e, f);
    printf("%10e\n", duration);
    return 0;
}


程序代码:
   0x0000000000402c50 <+0>:    push   rbx
   0x0000000000402c51 <+1>:    sub    rsp,0x40
   0x0000000000402c55 <+5>:    vmovaps XMMWORD PTR [rsp+0x20],xmm6
   0x0000000000402c5b <+11>:    vmovaps XMMWORD PTR [rsp+0x30],xmm7
   0x0000000000402c61 <+17>:    call   0x4016a0 <__main>
   0x0000000000402c66 <+22>:    call   0x402ae8 <clock>
   0x0000000000402c6b <+27>:    vxorpd xmm7,xmm7,xmm7
   0x0000000000402c6f <+31>:    mov    ecx,0x3e8
   0x0000000000402c74 <+36>:    mov    r8d,0x1
   0x0000000000402c7a <+42>:    vcvtsi2sd xmm7,xmm7,eax
   0x0000000000402c7e <+46>:    xchg   ax,ax
   0x0000000000402c80 <+48>:    mov    edx,0xf4240
   0x0000000000402c85 <+53>:    lea    eax,[r8+r8*4-0xfa]
   0x0000000000402c8d <+61>:    lea    ebx,[rax*4-0x32]
   0x0000000000402c94 <+68>:    lea    r8d,[rbx+rbx*4]
   0x0000000000402c98 <+72>:    lea    r9d,[r8*4-0x32]
   0x0000000000402ca0 <+80>:    lea    r10d,[r9+r9*4]
   0x0000000000402ca4 <+84>:    lea    r11d,[r10*4-0x32]
   0x0000000000402cac <+92>:    lea    eax,[r11+r11*4]
   0x0000000000402cb0 <+96>:    lea    ebx,[rax*4-0x32]
   0x0000000000402cb7 <+103>:    lea    r8d,[rbx+rbx*4]
   0x0000000000402cbb <+107>:    lea    r9d,[r8*4-0x32]
   0x0000000000402cc3 <+115>:    lea    r10d,[r9+r9*4]
   0x0000000000402cc7 <+119>:    lea    r11d,[r10*4-0x32]
   0x0000000000402ccf <+127>:    lea    eax,[r11+r11*4]
   0x0000000000402cd3 <+131>:    lea    ebx,[rax*4-0x32]
   0x0000000000402cda <+138>:    lea    r8d,[rbx+rbx*4]
   0x0000000000402cde <+142>:    shl    r8d,0x2
   0x0000000000402ce2 <+146>:    sub    edx,0x8
   0x0000000000402ce5 <+149>:    jne    0x402c85 <main+53>
   0x0000000000402ce7 <+151>:    sub    ecx,0x1
   0x0000000000402cea <+154>:    jne    0x402c80 <main+48>
   0x0000000000402cec <+156>:    vxorpd xmm0,xmm0,xmm0
   0x0000000000402cf0 <+160>:    vcvtsi2sd xmm1,xmm0,r8d
   0x0000000000402cf5 <+165>:    vaddsd xmm2,xmm1,QWORD PTR [rip+0x1313]        # 0x404010
   0x0000000000402cfd <+173>:    vmovsd xmm1,QWORD PTR [rip+0x131b]        # 0x404020
   0x0000000000402d05 <+181>:    vcvtsd2ss xmm3,xmm3,xmm2
   0x0000000000402d09 <+185>:    vcvtss2sd xmm4,xmm4,xmm3
   0x0000000000402d0d <+189>:    vsubsd xmm5,xmm4,QWORD PTR [rip+0x1303]        # 0x404018
   0x0000000000402d15 <+197>:    vcvtsd2ss xmm6,xmm6,xmm5
   0x0000000000402d19 <+201>:    vcvtss2sd xmm0,xmm0,xmm6
   0x0000000000402d1d <+205>:    vmulsd xmm2,xmm0,xmm1
   0x0000000000402d21 <+209>:    vcvtsd2ss xmm3,xmm3,xmm2
   0x0000000000402d25 <+213>:    vcvtss2sd xmm4,xmm4,xmm3
   0x0000000000402d29 <+217>:    vmulsd xmm6,xmm4,xmm1
   0x0000000000402d2d <+221>:    call   0x402ae8 <clock>
   0x0000000000402d32 <+226>:    lea    rcx,[rip+0x12c7]        # 0x404000
   0x0000000000402d39 <+233>:    vmulsd xmm5,xmm6,QWORD PTR [rip+0x12e7]        # 0x404028
   0x0000000000402d41 <+241>:    mov    ebx,eax
   0x0000000000402d43 <+243>:    vmovapd xmm1,xmm6
   0x0000000000402d47 <+247>:    vmovq  rdx,xmm6
   0x0000000000402d4c <+252>:    vmovapd xmm2,xmm5
   0x0000000000402d50 <+256>:    vmovq  r8,xmm5
   0x0000000000402d55 <+261>:    call   0x402ab0 <printf>
   0x0000000000402d5a <+266>:    vxorpd xmm0,xmm0,xmm0
   0x0000000000402d5e <+270>:    lea    rcx,[rip+0x12a3]        # 0x404008
   0x0000000000402d65 <+277>:    vcvtsi2sd xmm1,xmm0,ebx
   0x0000000000402d69 <+281>:    vsubsd xmm7,xmm1,xmm7
   0x0000000000402d6d <+285>:    vmovapd xmm1,xmm7
   0x0000000000402d71 <+289>:    vmovq  rdx,xmm7
   0x0000000000402d76 <+294>:    call   0x402ab0 <printf>
   0x0000000000402d7b <+299>:    nop
   0x0000000000402d7c <+300>:    vmovaps xmm6,XMMWORD PTR [rsp+0x20]
   0x0000000000402d82 <+306>:    xor    eax,eax
   0x0000000000402d84 <+308>:    vmovaps xmm7,XMMWORD PTR [rsp+0x30]
   0x0000000000402d8a <+314>:    add    rsp,0x40
   0x0000000000402d8e <+318>:    pop    rbx
   0x0000000000402d8f <+319>:    ret

搜索更多相关主题的帖子: include color 
2015-06-23 14:14
快速回复:改天研究一下 貌似GCC生成的汇编效率很渣
数据加载中...
 
   



关于我们 | 广告合作 | 编程中国 | 清除Cookies | TOP | 手机版

编程中国 版权所有,并保留所有权利。
Powered by Discuz, Processed in 0.017209 second(s), 8 queries.
Copyright©2004-2024, BCCN.NET, All Rights Reserved