asm { //************************************ _D3_NORM:: PUSH RBP MOV RBP,RSP MOV RDX,U64 SF_ARG1[RBP] FLD U64 [RDX] FMUL ST0,ST0 FLD U64 8[RDX] FMUL ST0,ST0 FADDP ST1,ST0 FLD U64 16[RDX] FMUL ST0,ST0 FADDP ST1,ST0 FSQRT FSTP U64 SF_ARG1[RBP] MOV RAX,U64 SF_ARG1[RBP] POP RBP RET1 8 //************************************ _D3_NORM_SQR:: PUSH RBP MOV RBP,RSP MOV RDX,U64 SF_ARG1[RBP] FLD U64 [RDX] FMUL ST0,ST0 FLD U64 8[RDX] FMUL ST0,ST0 FADDP ST1,ST0 FLD U64 16[RDX] FMUL ST0,ST0 FADDP ST1,ST0 FSTP U64 SF_ARG1[RBP] MOV RAX,U64 SF_ARG1[RBP] POP RBP RET1 8 //************************************ _D3_DIST:: PUSH RBP MOV RBP,RSP MOV RDX,U64 SF_ARG1[RBP] MOV RBX,U64 SF_ARG2[RBP] FLD U64 [RDX] FSUB ST0,U64 [RBX] FMUL ST0,ST0 FLD U64 8[RDX] FSUB ST0,U64 8[RBX] FMUL ST0,ST0 FADDP ST1,ST0 FLD U64 16[RDX] FSUB ST0,U64 16[RBX] FMUL ST0,ST0 FADDP ST1,ST0 FSQRT FSTP U64 SF_ARG1[RBP] MOV RAX,U64 SF_ARG1[RBP] POP RBP RET1 16 //************************************ _D3_DIST_SQR:: PUSH RBP MOV RBP,RSP MOV RDX,U64 SF_ARG1[RBP] MOV RBX,U64 SF_ARG2[RBP] FLD U64 [RDX] FSUB ST0,U64 [RBX] FMUL ST0,ST0 FLD U64 8[RDX] FSUB ST0,U64 8[RBX] FMUL ST0,ST0 FADDP ST1,ST0 FLD U64 16[RDX] FSUB ST0,U64 16[RBX] FMUL ST0,ST0 FADDP ST1,ST0 FSTP U64 SF_ARG1[RBP] MOV RAX,U64 SF_ARG1[RBP] POP RBP RET1 16 //************************************ _D3_CROSS:: PUSH RBP MOV RBP,RSP MOV RAX,U64 SF_ARG1[RBP] MOV RDX,U64 SF_ARG2[RBP] MOV RBX,U64 SF_ARG3[RBP] FLD U64 8[RDX] FMUL ST0,U64 16[RBX] FLD U64 16[RDX] FMUL ST0,U64 8[RBX] FSUBP ST1,ST0 FSTP U64 [RAX] FLD U64 [RDX] FMUL ST0,U64 16[RBX] FLD U64 16[RDX] FMUL ST0,U64 [RBX] FSUBRP ST1,ST0 FSTP U64 8[RAX] FLD U64 [RDX] FMUL ST0,U64 8[RBX] FLD U64 8[RDX] FMUL ST0,U64 [RBX] FSUBP ST1,ST0 FSTP U64 16[RAX] POP RBP RET1 24 //************************************ _D3_DOT:: PUSH RBP MOV RBP,RSP MOV RDX,U64 SF_ARG1[RBP] MOV RBX,U64 SF_ARG2[RBP] FLD U64 [RDX] FMUL ST0,U64 [RBX] FLD U64 8[RDX] FMUL ST0,U64 8[RBX] FADDP ST1,ST0 FLD U64 16[RDX] FMUL ST0,U64 16[RBX] FADDP ST1,ST0 FSTP U64 SF_ARG1[RBP] MOV RAX,U64 SF_ARG1[RBP] POP RBP RET1 16 //************************************ _D3_ADD:: PUSH RBP MOV RBP,RSP MOV RAX,U64 SF_ARG1[RBP] MOV RDX,U64 SF_ARG2[RBP] MOV RBX,U64 SF_ARG3[RBP] FLD U64 [RDX] FADD ST0,U64 [RBX] FSTP U64 [RAX] FLD U64 8[RDX] FADD ST0,U64 8[RBX] FSTP U64 8[RAX] FLD U64 16[RDX] FADD ST0,U64 16[RBX] FSTP U64 16[RAX] POP RBP RET1 24 //************************************ _D3_SUB:: PUSH RBP MOV RBP,RSP MOV RAX,U64 SF_ARG1[RBP] MOV RDX,U64 SF_ARG2[RBP] MOV RBX,U64 SF_ARG3[RBP] FLD U64 [RDX] FSUB ST0,U64 [RBX] FSTP U64 [RAX] FLD U64 8[RDX] FSUB ST0,U64 8[RBX] FSTP U64 8[RAX] FLD U64 16[RDX] FSUB ST0,U64 16[RBX] FSTP U64 16[RAX] POP RBP RET1 24 //************************************ _D3_MUL:: PUSH RBP MOV RBP,RSP MOV RAX,U64 SF_ARG1[RBP] MOV RBX,U64 SF_ARG3[RBP] FLD U64 SF_ARG2[RBP] FLD ST0 FMUL ST0,U64 [RBX] FSTP U64 [RAX] FLD ST0 FMUL ST0,U64 8[RBX] FSTP U64 8[RAX] FMUL ST0,U64 16[RBX] FSTP U64 16[RAX] POP RBP RET1 24 //************************************ _D3_DIV:: PUSH RBP MOV RBP,RSP MOV RAX,U64 SF_ARG1[RBP] MOV RBX,U64 SF_ARG2[RBP] FLD U64 SF_ARG3[RBP] FLD ST0 FDIVR ST0,U64 [RBX] FSTP U64 [RAX] FLD ST0 FDIVR ST0,U64 8[RBX] FSTP U64 8[RAX] FDIVR ST0,U64 16[RBX] FSTP U64 16[RAX] POP RBP RET1 24 //************************************ _D3_ZERO:: PUSH RBP MOV RBP,RSP MOV RAX,U64 SF_ARG1[RBP] XOR RDX,RDX MOV U64 [RAX],RDX MOV U64 8[RAX],RDX MOV U64 16[RAX],RDX POP RBP RET1 8 //************************************ _D3_COPY:: PUSH RBP MOV RBP,RSP MOV RAX,U64 SF_ARG1[RBP] MOV RBX,U64 SF_ARG2[RBP] MOV RDX,U64 [RBX] MOV U64 [RAX],RDX MOV RDX,U64 8[RBX] MOV U64 8[RAX],RDX MOV RDX,U64 16[RBX] MOV U64 16[RAX],RDX POP RBP RET1 16 //************************************ _D3_EQU:: PUSH RBP MOV RBP,RSP MOV RAX,U64 SF_ARG1[RBP] MOV RDX,U64 SF_ARG2[RBP] MOV U64 [RAX],RDX MOV RDX,U64 SF_ARG3[RBP] MOV U64 8[RAX],RDX MOV RDX,U64 SF_ARG4[RBP] MOV U64 16[RAX],RDX POP RBP RET1 32 //************************************ _D3_ADD_EQU:: PUSH RBP MOV RBP,RSP MOV RAX,U64 SF_ARG1[RBP] MOV RBX,U64 SF_ARG2[RBP] FLD U64 [RAX] FADD ST0,U64 [RBX] FSTP U64 [RAX] FLD U64 8[RAX] FADD ST0,U64 8[RBX] FSTP U64 8[RAX] FLD U64 16[RAX] FADD ST0,U64 16[RBX] FSTP U64 16[RAX] POP RBP RET1 16 //************************************ _D3_SUB_EQU:: PUSH RBP MOV RBP,RSP MOV RAX,U64 SF_ARG1[RBP] MOV RBX,U64 SF_ARG2[RBP] FLD U64 [RAX] FSUB ST0,U64 [RBX] FSTP U64 [RAX] FLD U64 8[RAX] FSUB ST0,U64 8[RBX] FSTP U64 8[RAX] FLD U64 16[RAX] FSUB ST0,U64 16[RBX] FSTP U64 16[RAX] POP RBP RET1 16 //************************************ _D3_MUL_EQU:: PUSH RBP MOV RBP,RSP MOV RAX,U64 SF_ARG1[RBP] FLD U64 SF_ARG2[RBP] FLD ST0 FMUL ST0,U64 [RAX] FSTP U64 [RAX] FLD ST0 FMUL ST0,U64 8[RAX] FSTP U64 8[RAX] FMUL ST0,U64 16[RAX] FSTP U64 16[RAX] POP RBP RET1 16 //************************************ _D3_DIV_EQU:: PUSH RBP MOV RBP,RSP MOV RAX,U64 SF_ARG1[RBP] FLD U64 SF_ARG2[RBP] FLD ST0 FDIVR ST0,U64 [RAX] FSTP U64 [RAX] FLD ST0 FDIVR ST0,U64 8[RAX] FSTP U64 8[RAX] FDIVR ST0,U64 16[RAX] FSTP U64 16[RAX] POP RBP RET1 16 //************************************ _D3_UNIT:: PUSH RBP MOV RBP,RSP MOV RAX,U64 SF_ARG1[RBP] FLD U64 [RAX] FMUL ST0,ST0 FLD U64 8[RAX] FMUL ST0,ST0 FADDP ST1,ST0 FLD U64 16[RAX] FMUL ST0,ST0 FADDP ST1,ST0 FSQRT FST U64 SF_ARG1[RBP] MOV RBX,SF_ARG1[RBP] TEST RBX,RBX JZ @@05 FLD ST0 FDIVR ST0,U64 [RAX] FSTP U64 [RAX] FLD ST0 FDIVR ST0,U64 8[RAX] FSTP U64 8[RAX] FDIVR ST0,U64 16[RAX] FSTP U64 16[RAX] POP RBP RET1 8 @@05: FFREE ST0 FINCSTP POP RBP RET1 8 //************************************ _SQR:: PUSH RBP MOV RBP,RSP FLD U64 SF_ARG1[RBP] FMUL ST0,ST0 FSTP U64 SF_ARG1[RBP] MOV RAX,U64 SF_ARG1[RBP] POP RBP RET1 8 //************************************ _SIGN:: PUSH RBP MOV RBP,RSP FLD U64 SF_ARG1[RBP] FTST FSTSW FFREE ST0 FINCSTP MOV RDX,RAX XOR RAX,RAX BT RDX,14 JC @@05 MOV RAX,1.0 BT RDX,8 JNC @@05 MOV RAX,-1.0 @@05: POP RBP RET1 8 //************************************ _ARG:: PUSH RBP MOV RBP,RSP FLD U64 SF_ARG2[RBP] FLD U64 SF_ARG1[RBP] @@05: FPATAN FSTSW TEST AX,0x400 JNZ @@05 FSTP U64 SF_ARG1[RBP] MOV RAX,U64 SF_ARG1[RBP] POP RBP RET1 16 //************************************ _ROUND:: PUSH RBP MOV RBP,RSP SUB RSP,8 FSTCW U16 -8[RBP] MOV AX,U16 -8[RBP] MOV U16 -6[RBP],AX AND AX,~0x0C00 MOV U16 -8[RBP],AX FLDCW U16 -8[RBP] FLD U64 SF_ARG1[RBP] FRNDINT FSTP U64 SF_ARG1[RBP] MOV RAX,U64 SF_ARG1[RBP] FLDCW U16 -6[RBP] LEAVE RET1 8 //************************************ _TRUNC:: PUSH RBP MOV RBP,RSP SUB RSP,8 FSTCW U16 -8[RBP] MOV AX,U16 -8[RBP] MOV U16 -6[RBP],AX OR AX,0x0C00 MOV U16 -8[RBP],AX FLDCW U16 -8[RBP] FLD U64 SF_ARG1[RBP] FRNDINT FSTP U64 SF_ARG1[RBP] MOV RAX,U64 SF_ARG1[RBP] FLDCW U16 -6[RBP] LEAVE RET1 8 //************************************ _FLOOR:: PUSH RBP MOV RBP,RSP SUB RSP,8 FSTCW U16 -8[RBP] MOV AX,U16 -8[RBP] MOV U16 -6[RBP],AX AND AX,~0x0C00 OR AX,0x0400 MOV U16 -8[RBP],AX FLDCW U16 -8[RBP] FLD U64 SF_ARG1[RBP] FRNDINT FSTP U64 SF_ARG1[RBP] MOV RAX,U64 SF_ARG1[RBP] FLDCW U16 -6[RBP] LEAVE RET1 8 //************************************ _CEIL:: PUSH RBP MOV RBP,RSP SUB RSP,8 FSTCW U16 -8[RBP] MOV AX,U16 -8[RBP] MOV U16 -6[RBP],AX AND AX,~0x0C00 OR AX,0x0800 MOV U16 -8[RBP],AX FLDCW U16 -8[RBP] FLD U64 SF_ARG1[RBP] FRNDINT FSTP U64 SF_ARG1[RBP] MOV RAX,SF_ARG1[RBP] FLDCW U16 -6[RBP] LEAVE RET1 8 //************************************ SYS_POW:: // RAX=RDX`RAX PUSH RBP MOV RBP,RSP SUB RSP,24 TEST RAX,RAX //To power of zero? JNZ @@05 MOV RAX,1.0 JMP @@20 @@05: TEST RDX,RDX //zero to a power? JNZ @@10 XOR RAX,RAX JMP @@20 @@10: MOV RCX,RAX //RCX=pow MOV U64 -8[RBP],RDX MOV U64 -16[RBP],RAX FSTCW U16 -24[RBP] MOV AX,U16 -24[RBP] MOV U16 -22[RBP],AX OR AX,0x0C00 MOV U16 -24[RBP],AX FLDCW U16 -24[RBP] FLD U64 -16[RBP] FLD U64 -8[RBP] FABS //base should be pos FYL2X // y*log2(x) FST U64 -8[RBP] FRNDINT FLD U64 -8[RBP] FSUBRP ST1,ST0 F2XM1 FLD1 FADDP ST1,ST0 FSTP U64 -16[RBP] FLD U64 -8[RBP] FLD U64 -16[RBP] FSCALE // ST0<<ST1 TEST RDX,RDX //was base neg? JGE @@15 MOV U64 -16[RBP],RCX FLD U64 -16[RBP] FISTTP U64 -16[RBP] TEST U64 -16[RBP],1 JZ @@15 //was pow odd? FCHS @@15: FSTP U64 -8[RBP] FFREE ST0 FINCSTP FLDCW U16 -22[RBP] MOV RAX,U64 -8[RBP] @@20: LEAVE RET //************************************ _POW:: PUSH RBP MOV RBP,RSP MOV RAX,U64 SF_ARG2[RBP] MOV RDX,U64 SF_ARG1[RBP] CALL SYS_POW POP RBP RET1 16 //************************************ _POW10:: PUSH RBP MOV RBP,RSP SUB RSP,16 FSTCW U16 -8[RBP] MOV AX,U16 -8[RBP] MOV U16 -6[RBP],AX OR AX,0x0C00 MOV U16 -8[RBP],AX FLDCW U16 -8[RBP] FLDL2T FLD U64 SF_ARG1[RBP] FMULP ST1,ST0 FST U64 SF_ARG1[RBP] FRNDINT FLD U64 SF_ARG1[RBP] FSUBRP ST1,ST0 F2XM1 FLD1 FADDP ST1,ST0 FSTP U64 -16[RBP] FLD U64 SF_ARG1[RBP] FLD U64 -16[RBP] FSCALE // ST0<<ST1 FSTP U64 -16[RBP] FFREE ST0 FINCSTP FLDCW U16 -6[RBP] MOV RAX,U64 -16[RBP] LEAVE RET1 8 //************************************ _EXP:: PUSH RBP MOV RBP,RSP SUB RSP,16 FSTCW U16 -8[RBP] MOV AX,U16 -8[RBP] MOV U16 -6[RBP],AX OR AX,0x0C00 MOV U16 -8[RBP],AX FLDCW U16 -8[RBP] FLDL2E FLD U64 SF_ARG1[RBP] FMULP ST1,ST0 FST U64 SF_ARG1[RBP] FRNDINT FLD U64 SF_ARG1[RBP] FSUBRP ST1,ST0 F2XM1 FLD1 FADDP ST1,ST0 FSTP U64 -16[RBP] FLD U64 SF_ARG1[RBP] FLD U64 -16[RBP] FSCALE // ST0<<ST1 FSTP U64 -16[RBP] FFREE ST0 FINCSTP FLDCW U16 -6[RBP] MOV RAX,U64 -16[RBP] LEAVE RET1 8 //************************************ _LOG10:: PUSH RBP MOV RBP,RSP FLDLG2 FLD U64 SF_ARG1[RBP] FYL2X // y*log2(x) FSTP U64 SF_ARG1[RBP] MOV RAX,U64 SF_ARG1[RBP] POP RBP RET1 8 //************************************ _LOG2:: PUSH RBP MOV RBP,RSP FLD1 FLD U64 SF_ARG1[RBP] FYL2X // y*log2(x) FSTP U64 SF_ARG1[RBP] MOV RAX,U64 SF_ARG1[RBP] POP RBP RET1 8 //************************************ _LN:: PUSH RBP MOV RBP,RSP FLDLN2 FLD U64 SF_ARG1[RBP] FYL2X // y*log2(x) FSTP U64 SF_ARG1[RBP] MOV RAX,U64 SF_ARG1[RBP] POP RBP RET1 8 //************************************ _FLDCW:: PUSH RBP MOV RBP,RSP FLDCW U16 SF_ARG1[RBP] POP RBP RET1 8 //************************************ _FSTCW:: PUSH RBP MOV RBP,RSP SUB RSP,8 FSTCW U16 -8[RBP] MOVZX RAX,U16 -8[RBP] LEAVE RET //************************************ _FSTSW:: XOR RAX,RAX FSTSW RET //************************************ _FCLEX:: FCLEX RET }