Ada计算机图形DirectX之XDSP

----------------------------------------
-- Microsoft DirectX Ada binding lib  --
-- File : XDSP.ads                    --
-- Translator:Dongfeng.Gu,2018/11/19  --
-- Mail: [email protected]                --
-- Progress:100%                      --
----------------------------------------

with win32;                   use win32;
with Interfaces.C;

package XDSP is

   function Shift_Left(A:dword;B:Natural) return dword with import,convention=>Intrinsic;
   function Lsh(A:dword;B:Natural) return dword is (Shift_Left(A,B));

   function Shift_Left(A:Interfaces.C.unsigned;B:Natural) return Interfaces.C.unsigned with import,convention=>Intrinsic;
   function Lsh(A:Interfaces.C.unsigned;B:Natural) return Interfaces.C.unsigned is (Shift_Left(A,B));

   function Shift_Right(A:dword;B:Natural) return dword with import,convention=>Intrinsic;
   function Rsh(A:dword;B:Natural) return dword is (Shift_Right(A,B));

   function Shift_Right(A:Interfaces.C.unsigned;B:Natural) return Interfaces.C.unsigned with import,convention=>Intrinsic;
   function Rsh(A:Interfaces.C.unsigned;B:Natural) return Interfaces.C.unsigned is (Shift_Right(A,B));


   type FLOAT_ARRAY  is array (Natural range<>) of FLOAT;
   type UINT8_ARRAY  is array (Natural range<>) of UINT8;
   type UINT16_ARRAY is array (Natural range<>) of UINT16;
   type UINT32_ARRAY is array (Natural range<>) of UINT32;
   type UINT64_ARRAY is array (Natural range<>) of UINT64;
   type INT8_ARRAY   is array (Natural range<>) of INT8;
   type INT16_ARRAY  is array (Natural range<>) of INT16;
   type INT32_ARRAY  is array (Natural range<>) of INT32;
   type INT64_ARRAY  is array (Natural range<>) of INT64;

   -- From xmmintrin.h(Microsoft)
   type tagM128(i:Integer:=0) is record
      case i is
         when 0=>
            m128_f32:FLOAT_ARRAY(0..3);
         when 1=>
            m128_i8:INT8_ARRAY(0..15);
         when 2=>
            m128_i16:INT16_ARRAY(0..7);
         when 3=>
            m128_i32:INT32_ARRAY(0..3);
         when 4=>
            m128_i64:INT64_ARRAY(0..1);
         when 5=>
            m128_u8:UINT8_ARRAY(0..15);
         when 6=>
            m128_u16:UINT16_ARRAY(0..7);
         when 7=>
            m128_u32:UINT32_ARRAY(0..3);
         when 8=>
            m128_u64:UINT64_ARRAY(0..1);
         when others=>
            NULL;
      end case;
   end record with Unchecked_Union,Alignment=>16,size=>128;
   subtype M128 is tagM128;

   subtype XVECTOR is tagM128;
   type LPXVECTOR is access all XVECTOR;
   type LPCXVECTOR is access constant XVECTOR;
   type XVECTOR_ARRAY is array (UINT range<>) of XVECTOR;

   use type Interfaces.C.unsigned_long;
   use type interfaces.C.unsigned;

   function MM_SHUFFLE(fp3,fp2,fp1,fp0:DWORD) return DWORD is (Lsh(fp3,6) or Lsh(fp2,4) or Lsh(fp1,2) or fp0);
   function MM_SHUFFLE(fp3,fp2,fp1,fp0:interfaces.C.unsigned) return interfaces.C.unsigned is (Lsh(fp3,6) or Lsh(fp2,4) or Lsh(fp1,2) or fp0);

   function mm_add_ps(v1,v2:XVECTOR) return XVECTOR with Inline_Always;
   function mm_sub_ps(v1,v2:XVECTOR) return XVECTOR with Inline_Always;
   function mm_mul_ps(v1,v2:XVECTOR) return XVECTOR with Inline_Always;
   function mm_sqrt_ps(v:XVECTOR) return XVECTOR;

   -- *由于SHUFPS指令的第1个参数(正序,GNU汇编是正序的)必须是立即数,
   -- *不得已采用创建导入文件的方式来实现
   -- *!不得删除文件XDSP_SHUFFLES.s文件,否则不能通过编译,
   -- *生成的程序则不依赖于该导入文件.
   function mm_shuffle_ps(v1,v2:XVECTOR;ui:uint) return XVECTOR with Inline_Always;
   function mm_xor_ps(v1,v2:XVECTOR) return XVECTOR;



   procedure vmulComplex(rResult:out XVECTOR;iResult:out XVECTOR;r1,i1,r2,i2:XVECTOR) with Inline_Always;
   procedure vmulComplex(r1,i1:in out XVECTOR;r2,i2:XVECTOR) with Inline_Always;
   procedure ButterflyDIT4_1(r1,i1:in out XVECTOR) with Inline_Always;
   procedure ButterflyDIT4_4(r0,r1,r2,r3,i0,i1,i2,i3:in out XVECTOR;pUnityTableReal,pUnityTableImaginary:access constant XVECTOR_ARRAY;uStride:uint32;fLast:BOOL) with Inline_Always;
   procedure FFT4(pReal:in out XVECTOR_ARRAY;pImaginary:in out XVECTOR_ARRAY;uCount:uint32:=1) with Inline_Always;
   procedure FFT8(pReal:in out XVECTOR_ARRAY;pImaginary:in out XVECTOR_ARRAY;uCount:UINT32:=1) with Inline_Always;
   procedure FFT16(pReal:in out XVECTOR_ARRAY;pImaginary:in out XVECTOR_ARRAY;uCount:UINT32:=1) with Inline_Always;
   procedure FFT(pReal:in out XVECTOR_ARRAY;pImaginary:in out XVECTOR_ARRAY;pUnityTable:access constant XVECTOR_ARRAY;uLength:uint32;uCount:UINT32:=1) with Inline;
   procedure FFTInitializeUnityTable(pUnityTable:in out XVECTOR_ARRAY;uLen:uint32) with Inline;
   procedure FFTUnswizzle(pOutput:out FLOAT_ARRAY;pInput: FLOAT_ARRAY;uLog2Length:UINT32) with Inline;
   procedure FFTPolar(pOutput:out XVECTOR_ARRAY;pInputReal: XVECTOR_ARRAY;pInputImaginary: XVECTOR_ARRAY;uLength:UINT32) with Inline;

end XDSP;
----------------------------------------
-- Microsoft DirectX Ada binding lib  --
-- File : XDSP.adb                    --
-- Translator:Dongfeng.Gu,2018/11/19  --
-- Mail: [email protected]                --
-- Progress:100%                      --
----------------------------------------

with Machine_Code;           use Machine_Code;

with Ada.Streams.Stream_IO;  use Ada.Streams.Stream_IO;

package body XDSP is

   use type Interfaces.C.unsigned;

   NL:constant String:=ASCII.CR&ASCII.LF;

   function mm_sqrt_ps(v:XVECTOR) return XVECTOR is
      v1:XVECTOR with Alignment=>16,Warnings=>OFF;
   begin
      asm(

          template=>
            ("movl 12(%%ebp),%%edx;"&NL&
               "movaps (%%edx),%%xmm0;"&NL&
               "sqrtps %%xmm0,%%xmm1;"&NL&
               "movl 8(%%ebp),%%eax;"&NL&
               "movaps %%xmm1,(%%eax);"&NL

            )
         );
      return v1;
   end;

   function mm_set_ps1(f:FLOAT) return XVECTOR is
      v:XVECTOR:=(0,(others=>f));
   begin
      return v;
   end;

   function mm_add_ps(v1,v2:XVECTOR) return XVECTOR is
      v3:XVECTOR;pragma Warnings(off,v3);
   begin
      asm(template=>(
                     "movl 12(%%esp),%%edx;"&
                       "movaps (%%edx),%%xmm0;"&
                       "movl 8(%%esp),%%eax;"&
                       "movl 16(%%esp),%%edx;"&
                       "addps (%%edx),%%xmm0;"&
                       "movaps %%xmm0,(%%eax);"
                    ),

          inputs=>No_Input_Operands,
          Outputs => No_Output_Operands,
          Volatile=>TRUE
         );
      return v3;
   end;

   function mm_sub_ps(v1,v2:XVECTOR) return XVECTOR is
      v3:XVECTOR;pragma Warnings(off,v3);
   begin
      asm(    "movl 12(%%esp),%%edx;"&
                "movaps (%%edx),%%xmm0;"&
                "movl 8(%%esp),%%eax;"&
                "movl 16(%%esp),%%edx;"&
                "subps (%%edx),%%xmm0;"&
                "movaps %%xmm0,(%%eax);",
              inputs=>No_Input_Operands,
              Outputs => No_Output_Operands,
              Volatile=>TRUE
             );
      return v3;
   end;

   function mm_mul_ps(v1,v2:XVECTOR) return XVECTOR is
      v3:XVECTOR;pragma Warnings(off,v3);
   begin
      asm(    "movl 12(%%esp),%%edx;"&
                "movaps (%%edx),%%xmm0;"&
                "movl 8(%%esp),%%eax;"&
                "movl 16(%%esp),%%edx;"&
                "mulps (%%edx),%%xmm0;"&
                "movaps %%xmm0,(%%eax);",
              inputs=>No_Input_Operands,
              Outputs => No_Output_Operands,
              Volatile=>TRUE
             );
      return v3;
   end;

   function mm_xor_ps(v1,v2:XVECTOR) return XVECTOR is
      v3:XVECTOR;pragma Warnings(off,v3);
   begin
      asm(    "movl 12(%%esp),%%edx;"&
                "movaps (%%edx),%%xmm0;"&
                "movl 8(%%esp),%%eax;"&
                "movl 16(%%esp),%%edx;"&
                "xorps (%%edx),%%xmm0;"&
                "movaps %%xmm0,(%%eax);",
              inputs=>No_Input_Operands,
              Outputs => No_Output_Operands,
              Volatile=>TRUE
             );
      return v3;
   end;



   function  IsFileExist(fn:String) return Boolean is
      file:File_Type;
   begin
      Open(File,in_file,fn);
      Close(File);
      return True;
   exception when others=>
         return False;
   end;


   function CreateShuffleAsm(ui:uint) return Boolean is
      File:File_Type;
      S:Stream_Access;
      UII:String:=UI'IMG;
   begin
      Create(File,Out_File,"XDSP_SHUFFLES.s");
      S:=Stream(File);
      String'Write(S,"# function __mm_shuffle_ps(v1,v2:__m128;i:uint) return __m128;"&NL);
      String'Write(S,".file  "&'"'&"XDSP_SHUFFLES.s"&'"'&NL);
      String'Write(S,".text"&NL);
      String'Write(S,".globl _mm_shuffle_ps;"&NL);
      String'Write(S,".globl __mm_shuffle_ps;"&NL);
      String'Write(S,".def _mm_shuffle_ps"&"; .Scl 2; .type 32; .endef;"&NL);
      String'Write(S,".def __mm_shuffle_ps"&"; .Scl 2; .type 32; .endef;"&NL);
      String'Write(S,"_mm_shuffle_ps:"&NL);
      String'Write(S,"__mm_shuffle_ps:"&NL);
      String'Write(S,"pushl %ebp;"&NL&"movl %esp,%ebp;"&NL);
      String'Write(S,"movl 12(%esp),%edx;"&NL&"movaps (%edx),%xmm0;"&NL&"movl 16(%esp),%edx;"&NL&"movaps (%edx),%xmm1;"&NL&"shufps $"&UII(2..UII'Last)&", %xmm1, %xmm0;"&NL);
      String'Write(S,"movl 8(%esp),%eax;"&NL&"movaps %xmm0,(%eax);"&NL&"leave;"&NL&"ret;"&NL&NL);
      return True;
   exception when others=>
         return false;
   end;



   -- *由于SHUFPS指令的第1个参数(正序)必须是立即数,
   -- *不得已采用创建导入文件的方式来实现
   -- *!不得删除文件XDSP_SHUFFLES.s文件,否则不能通过编译,
   -- *生成的程序则不依赖于该导入文件.
   function mm_shuffle_ps(v1,v2:XVECTOR;ui:uint) return XVECTOR is
      v3:XVECTOR;pragma Warnings(off,v3);
   begin
      if CreateShuffleAsm(ui) then
         declare
            pragma Linker_Options("XDSP_SHUFFLES.s");
            function shuffle_ps(v1,v2:XVECTOR;ui:uint) return XVECTOR with Import,external_name=>"_mm_shuffle_ps";
         begin
            return shuffle_ps(v1,v2,ui);
         end;
      end if;
      return (0,(0.0,0.0,0.0,0.0));
   end;




   procedure vmulComplex(rResult:out XVECTOR;iResult:out XVECTOR;r1,i1,r2,i2:XVECTOR) is
   begin
      declare
         vi1i2:XVECTOR := mm_mul_ps(i1, i2);
         vr1r2:XVECTOR := mm_mul_ps(r1, r2);
         vr1i2:XVECTOR := mm_mul_ps(r1, i2);
         vr2i1:XVECTOR := mm_mul_ps(r2, i1);
      begin
         rResult := mm_sub_ps(vr1r2, vi1i2); -- real:      (r1*r2 - i1*i2)
         iResult := mm_add_ps(vr1i2, vr2i1); -- imaginary: (r1*i2 + r2*i1)
      end;
   end;

   procedure vmulComplex(r1,i1:in out XVECTOR;r2,i2:XVECTOR) is
   begin
      -- (r1, i1) * (r2, i2) := (r1r2 - i1i2, r1i2 + r2i1)
      declare
         vi1i2:XVECTOR := mm_mul_ps(i1, i2);
         vr1r2:XVECTOR := mm_mul_ps(r1, r2);
         vr1i2:XVECTOR := mm_mul_ps(r1, i2);
         vr2i1:XVECTOR := mm_mul_ps(r2, i1);
      begin
         r1 := mm_sub_ps(vr1r2, vi1i2); -- real:      (r1*r2 - i1*i2)
         i1 := mm_add_ps(vr1i2, vr2i1); -- imaginary: (r1*i2 + r2*i1)
      end;
   end;

   procedure ButterflyDIT4_1(r1,i1:in out XVECTOR) is
      vDFT4SignBits1 :constant XVECTOR:= (0,(0.0, -0.0,  0.0, -0.0));
      vDFT4SignBits2 :constant XVECTOR:= (0,(0.0,  0.0, -0.0, -0.0));
      vDFT4SignBits3 :constant XVECTOR:= (0,(0.0, -0.0, -0.0,  0.0));
      rTemp:XVECTOR := mm_add_ps( mm_shuffle_ps(r1, r1, MM_SHUFFLE(1, 1, 0, 0)),                               -- (r1X| r1X|r1Y| r1Y) +
                                  mm_xor_ps(mm_shuffle_ps(r1, r1, MM_SHUFFLE(3, 3, 2, 2)), vDFT4SignBits1) ); -- (r1Z|-r1Z|r1W|-r1W)
      iTemp:XVECTOR := mm_add_ps( mm_shuffle_ps(i1, i1, MM_SHUFFLE(1, 1, 0, 0)),                               -- (i1X| i1X|i1Y| i1Y) +
                                  mm_xor_ps(mm_shuffle_ps(i1, i1, MM_SHUFFLE(3, 3, 2, 2)), vDFT4SignBits1) ); -- (i1Z|-i1Z|i1W|-i1W)
      rZrWiZiW:XVECTOR := mm_shuffle_ps(rTemp, iTemp, mm_SHUFFLE(3, 2, 3, 2));       -- (rTempZ|rTempW|iTempZ|iTempW)
      rZiWrZiW:XVECTOR := mm_shuffle_ps(rZrWiZiW, rZrWiZiW, mm_SHUFFLE(3, 0, 3, 0)); -- (rTempZ|iTempW|rTempZ|iTempW)
      iZrWiZrW:XVECTOR := mm_shuffle_ps(rZrWiZiW, rZrWiZiW, mm_SHUFFLE(1, 2, 1, 2)); -- (rTempZ|iTempW|rTempZ|iTempW)
   begin
      r1 := mm_add_ps( mm_shuffle_ps(rTemp, rTemp, mm_SHUFFLE(1, 0, 1, 0)), -- (rTempX| rTempY| rTempX| rTempY) +
                       mm_xor_ps(rZiWrZiW, vDFT4SignBits2) );                -- (rTempZ| iTempW|-rTempZ|-iTempW)
      i1 := mm_add_ps( mm_shuffle_ps(iTemp, iTemp, mm_SHUFFLE(1, 0, 1, 0)), -- (iTempX| iTempY| iTempX| iTempY) +
                       mm_xor_ps(iZrWiZrW, vDFT4SignBits3) );                -- (iTempZ|-rTempW|-iTempZ| rTempW)
   end;

   procedure ButterflyDIT4_4(r0,r1,r2,r3,i0,i1,i2,i3:in out XVECTOR;pUnityTableReal,pUnityTableImaginary:access constant XVECTOR_ARRAY;uStride:uint32;fLast:BOOL) is
      rTemp0, rTemp1, rTemp2, rTemp3, rTemp4, rTemp5, rTemp6, rTemp7:XVECTOR;
      iTemp0, iTemp1, iTemp2, iTemp3, iTemp4, iTemp5, iTemp6, iTemp7:XVECTOR;
   begin
      rTemp0 := mm_add_ps(r0, r2);          iTemp0 := mm_add_ps(i0, i2);
      rTemp2 := mm_add_ps(r1, r3);          iTemp2 := mm_add_ps(i1, i3);
      rTemp1 := mm_sub_ps(r0, r2);          iTemp1 := mm_sub_ps(i0, i2);
      rTemp3 := mm_sub_ps(r1, r3);          iTemp3 := mm_sub_ps(i1, i3);
      rTemp4 := mm_add_ps(rTemp0, rTemp2);  iTemp4 := mm_add_ps(iTemp0, iTemp2);
      rTemp5 := mm_add_ps(rTemp1, iTemp3);  iTemp5 := mm_sub_ps(iTemp1, rTemp3);
      rTemp6 := mm_sub_ps(rTemp0, rTemp2);  iTemp6 := mm_sub_ps(iTemp0, iTemp2);
      rTemp7 := mm_sub_ps(rTemp1, iTemp3);  iTemp7 := mm_add_ps(iTemp1, rTemp3);

      -- calculating Result
      -- vmulComplex(rTemp0, iTemp0, rTemp0, iTemp0, pUnityTableReal(0), pUnityTableImaginary(0)); -- first one is always trivial
      vmulComplex(rTemp5, iTemp5, pUnityTableReal(uStride), pUnityTableImaginary(uStride));
      vmulComplex(rTemp6, iTemp6, pUnityTableReal(uStride*2), pUnityTableImaginary(uStride*2));
      vmulComplex(rTemp7, iTemp7, pUnityTableReal(uStride*3), pUnityTableImaginary(uStride*3));
      if (fLast/=win32.FALSE) then null;
         ButterflyDIT4_1(rTemp4, iTemp4);
         ButterflyDIT4_1(rTemp5, iTemp5);
         ButterflyDIT4_1(rTemp6, iTemp6);
         ButterflyDIT4_1(rTemp7, iTemp7);
      end if;


      r0 := rTemp4;    i0 := iTemp4;
      r1 := rTemp5;    i1 := iTemp5;
      r2 := rTemp6;    i2 := iTemp6;
      r3 := rTemp7;    i3 := iTemp7;

   end;

   procedure FFT4(pReal:in out XVECTOR_ARRAY;pImaginary:in out XVECTOR_ARRAY;uCount:uint32:=1) is
   begin
      for uIndex in 0..uCount-1 loop
         ButterflyDIT4_1(pReal(uIndex), pImaginary(uIndex));
      end loop;
   end FFT4;

   procedure FFT8(pReal:in out XVECTOR_ARRAY;pImaginary:in out XVECTOR_ARRAY;uCount:UINT32:=1) is
      wr1:constant XVECTOR := (0,(  1.0,  0.70710677,  0.0, -0.70710677));
      wi1:constant XVECTOR := (0,(  0.0, -0.70710677, -1.0, -0.70710677));
      wr2:constant XVECTOR := (0,( -1.0, -0.70710677,  0.0,  0.70710677));
      wi2:constant XVECTOR := (0,(  0.0,  0.70710677,  1.0,  0.70710677));

      pR:access XVECTOR_ARRAY:=pReal'Unrestricted_Access;
      pI:access XVECTOR_ARRAY:=pImaginary'Unrestricted_Access;
   begin

      for uIndex in 0..uCount-1 loop
         declare

            oddsR  :XVECTOR := mm_shuffle_ps(pR(0), pR(1), MM_SHUFFLE(3, 1, 3, 1));
            evensR :XVECTOR := mm_shuffle_ps(pR(0), pR(1), MM_SHUFFLE(2, 0, 2, 0));
            oddsI  :XVECTOR := mm_shuffle_ps(pI(0), pI(1), MM_SHUFFLE(3, 1, 3, 1));
            evensI :XVECTOR := mm_shuffle_ps(pI(0), pI(1), MM_SHUFFLE(2, 0, 2, 0));
            r,i:XVECTOR;

         begin
            ButterflyDIT4_1(oddsR, oddsI);
            ButterflyDIT4_1(evensR, evensI);
            vmulComplex(r, i, oddsR, oddsI, wr1, wi1);
            pR(0) := mm_add_ps(evensR, r);
            pI(0) := mm_add_ps(evensI, i);

            vmulComplex(r, i, oddsR, oddsI, wr2, wi2);
            pR(1) := mm_add_ps(evensR, r);
            pI(1) := mm_add_ps(evensI, i);

         end;
      end loop;
   end;

   procedure FFT16(pReal:in out XVECTOR_ARRAY;pImaginary:in out XVECTOR_ARRAY;uCount:UINT32:=1) is
      aUnityTableReal:XVECTOR_ARRAY(0..3):=((0,(1.0, 1.0, 1.0, 1.0)), (0,(1.0, 0.92387950, 0.70710677, 0.38268343)), (0,(1.0, 0.70710677, -4.3711388e-008, -0.70710677)), (0,(1.0, 0.38268343, -0.70710677, -0.92387950)));
      aUnityTableImaginary:XVECTOR_ARRAY(0..3):=((0,(-0.0, -0.0, -0.0, -0.0)), (0,(-0.0, -0.38268343, -0.70710677, -0.92387950)), (0,(-0.0, -0.70710677, -1.0, -0.70710677)), (0,(-0.0, -0.92387950, -0.70710677, 0.38268343)));
   begin
      for uIndex in 0..uCount-1 loop
         ButterflyDIT4_4(pReal(uIndex*4),
                         pReal(uIndex*4 + 1),
                         pReal(uIndex*4 + 2),
                         pReal(uIndex*4 + 3),
                         pImaginary(uIndex*4),
                         pImaginary(uIndex*4 + 1),
                         pImaginary(uIndex*4 + 2),
                         pImaginary(uIndex*4 + 3),
                         aUnityTableReal'unrestricted_access,
                         aUnityTableImaginary'unrestricted_access,
                         1,
                         win32.TRUE
                        );
      end loop;
   end FFT16;

   procedure FFT(pReal:in out XVECTOR_ARRAY;pImaginary:in out XVECTOR_ARRAY;pUnityTable:access constant XVECTOR_ARRAY;uLength:uint32;uCount:UINT32:=1) is
      pUnityTableReal:access constant XVECTOR_ARRAY:=pUnityTable;
      pUnityTableImaginary:access XVECTOR_ARRAY:=pUnityTable(Rsh(uLength,2)..pUnityTable'Last)'unrestricted_access;
      uTotal:constant UINT32:=uCount * uLength;
      uTotal_vectors:constant UINT32:=Rsh(uTotal,2);
      uStage_vectors:constant UINT32:=Rsh(uLength,2);
      uStride:constant UINT32:=Rsh(uStage_vectors,2);
      uSkip:constant UINT32:=uStage_vectors-uStride;
   begin
      for uIndex in 0..uTotal_vectors-1 loop
         declare
            n:UINT32:=(uIndex/uStride) * (uStride + uSkip) + (uIndex mod uStride);
         begin
            ButterflyDIT4_4(pReal(n),
                            pReal(n + uStride),
                            pReal(n + uStride*2),
                            pReal(n + uStride*3),
                            pImaginary(n ),
                            pImaginary(n + uStride),
                            pImaginary(n + uStride*2),
                            pImaginary(n + uStride*3),
                            pUnityTableReal(n mod uStage_vectors..pUnityTableReal'Last)'unrestricted_access,
                            pUnityTableImaginary( n mod uStage_vectors..pUnityTableImaginary'Last)'unrestricted_access,
                            uStride, win32.FALSE);
         end;

         if uLength>64 then
            FFT(pReal, pImaginary, pUnityTable(Rsh(uLength,1)..pUnityTable'Last)'unrestricted_access, Rsh(uLength,2), uCount*4);
         elsif uLength=64 then
            FFT16(pReal, pImaginary, uCount*4);
         elsif uLength=32 then
            FFT8(pReal, pImaginary, uCount*4);
         elsif uLength=16 then
            FFT4(pReal, pImaginary, uCount*4);
         end if;

      end loop;

   end FFT;

   function sinf(f:FLOAT) return float with Import,external_name=>"sinf";
   function cosf(f:FLOAT) return float with Import,external_name=>"cosf";

   procedure FFTInitializeUnityTable(pUnityTable:in out XVECTOR_ARRAY;uLen:uint32) is
      pfUnityTable: FLOAT_ARRAY(0..4*integer(uLen)-1) with Import,Address=>pUnityTable'Address;
   begin
      if (uLen>16) then
         declare
            flStep:FLOAT:=6.283185307 / FLOAT(uLen);
            uLength:uint32:=uLen;
         begin
            uLength:=Rsh(uLength,2);
            for i in 0..3 loop
               for j in 0..uLength-1 loop
                  declare
                     uIndex:UINT32:=(uint32(i)*uLength) + j;
                  begin
                     pfUnityTable(Integer(uIndex))             := cosf(FLOAT(i)*FLOAT(j)*flStep);  -- real component
                     pfUnityTable(Integer(uIndex) + Integer(uLength)*4) := -sinf(FLOAT(i)*FLOAT(j)*flStep); -- imaginary component
                  end;
               end loop;
            end loop;
            pfUnityTable:= pfUnityTable(Integer(uLength)*8..pfUnityTable'Last);
         end;
      end if;
   end FFTInitializeUnityTable;

   procedure FFTUnswizzle(pOutput:out FLOAT_ARRAY;pInput: FLOAT_ARRAY;uLog2Length:UINT32) is
      uLength : constant Uint32 := Lsh(1,integer(uLog2Length));
   begin
      if ((uLog2Length and 1) = 0) then
         for uIndex in 0..uLength-1 loop
            declare
               n:uint32:=uIndex;
            begin
               n := ( Rsh(n and 16#cccccccc#, 2 )  or  Lsh(n and 16#33333333#, 2 ));
               n := ( Rsh(n and 16#f0f0f0f0#, 4 )  or  Lsh(n and 16#0f0f0f0f#, 4 ));
               n := ( Rsh(n and 16#ff00ff00#, 8 )  or  Lsh(n and 16#00ff00ff#, 8 ));
               n := ( Rsh(n and 16#ffff0000#, 16 ) or  Lsh(n and 16#0000ffff#, 16 ));
               n:=Rsh(n , (32 - Integer(uLog2Length)));
               pOutput(Integer(n)) := pInput(integer(uIndex));
            end;
         end loop;
      else
         for uIndex in 0..uLength-1 loop
            declare
               n:uint32:=Rsh(uIndex,3);
            begin
               n := ( Rsh(n and 16#cccccccc#, 2 )  or  Lsh(n and 16#33333333#, 2 ));
               n := ( Rsh(n and 16#f0f0f0f0#, 4 )  or  Lsh(n and 16#0f0f0f0f#, 4 ));
               n := ( Rsh(n and 16#ff00ff00#, 8 )  or  Lsh(n and 16#00ff00ff#, 8 ));
               n := ( Rsh(n and 16#ffff0000#, 16 ) or  Lsh(n and 16#0000ffff#, 16 ));
               n:=Rsh(n , Integer(32 - (uLog2Length-3)));

               n:=n or Lsh((uIndex and 7) , integer((uLog2Length - 3)));
               pOutput(Integer(n)) := pInput(Integer(uIndex));

            end;
         end loop;
      end if;
   end FFTUnswizzle;


   procedure FFTPolar(pOutput:out XVECTOR_ARRAY;pInputReal: XVECTOR_ARRAY;pInputImaginary: XVECTOR_ARRAY;uLength:UINT32) is
      flOneOverLength:FLOAT:=1.0/FLOAT(ulength);
      vOneOverLength :XVECTOR:= mm_set_ps1(flOneOverLength);
   begin
      for uIndex in 0..Rsh(uLength,2)-1 loop
         declare
            vReal     :XVECTOR := mm_mul_ps(pInputReal(uIndex), vOneOverLength);
            vImaginary:XVECTOR := mm_mul_ps(pInputImaginary(uIndex), vOneOverLength);
            vRR       :XVECTOR := mm_mul_ps(vReal, vReal);
            vII       :XVECTOR := mm_mul_ps(vImaginary, vImaginary);
            vRRplusII :XVECTOR := mm_add_ps(vRR, vII);
            vTotal    :XVECTOR := mm_sqrt_ps(vRRplusII);
         begin
            pOutput(uIndex) := mm_add_ps(vTotal, vTotal);
         end;
      end loop;

   end FFTPolar;




   --
   -- THE END.
   --

end XDSP;














































下方的附件必须保存为XDSP_SHUFFLES.s

# function __mm_shuffle_ps(v1,v2:__m128;i:uint) return __m128;
.file  "XDSP_SHUFFLES.s"
.text
.globl _mm_shuffle_ps;
.globl __mm_shuffle_ps;
.def _mm_shuffle_ps; .Scl 2; .type 32; .endef;
.def __mm_shuffle_ps; .Scl 2; .type 32; .endef;
_mm_shuffle_ps:
__mm_shuffle_ps:
pushl %ebp;
movl %esp,%ebp;
movl 12(%esp),%edx;
movaps (%edx),%xmm0;
movl 16(%esp),%edx;
movaps (%edx),%xmm1;
shufps $221, %xmm1, %xmm0;
movl 8(%esp),%eax;
movaps %xmm0,(%eax);
leave;
ret;

猜你喜欢

转载自blog.csdn.net/adacore/article/details/84245966