From 4cc2d40642d0d90eb62a544e9347629df681ec74 Mon Sep 17 00:00:00 2001 From: Doina Chiroiu Date: Thu, 13 Aug 2020 10:22:33 +0000 Subject: [PATCH 01/42] Simple version of sandbox From 80dd8733755bdb2bde2452e929868c69407a0350 Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Mon, 17 Aug 2020 11:20:56 +0000 Subject: [PATCH 02/42] Initial version sandbox --- oss-internship-2020/pffft/CMakeLists.txt | 91 + oss-internship-2020/pffft/Makefile | 15 + oss-internship-2020/pffft/README.txt | 416 +++ oss-internship-2020/pffft/fftpack.c | 3112 +++++++++++++++++ oss-internship-2020/pffft/fftpack.h | 799 +++++ oss-internship-2020/pffft/myNotes.txt | 101 + oss-internship-2020/pffft/pffft.c | 1881 ++++++++++ oss-internship-2020/pffft/pffft.h | 177 + oss-internship-2020/pffft/test_pffft.c | 419 +++ .../pffft/test_pffft_sandboxed.cc | 150 + 10 files changed, 7161 insertions(+) create mode 100644 oss-internship-2020/pffft/CMakeLists.txt create mode 100644 oss-internship-2020/pffft/Makefile create mode 100644 oss-internship-2020/pffft/README.txt create mode 100644 oss-internship-2020/pffft/fftpack.c create mode 100644 oss-internship-2020/pffft/fftpack.h create mode 100644 oss-internship-2020/pffft/myNotes.txt create mode 100644 oss-internship-2020/pffft/pffft.c create mode 100644 oss-internship-2020/pffft/pffft.h create mode 100644 oss-internship-2020/pffft/test_pffft.c create mode 100644 oss-internship-2020/pffft/test_pffft_sandboxed.cc diff --git a/oss-internship-2020/pffft/CMakeLists.txt b/oss-internship-2020/pffft/CMakeLists.txt new file mode 100644 index 0000000..c0d62cb --- /dev/null +++ b/oss-internship-2020/pffft/CMakeLists.txt @@ -0,0 +1,91 @@ +cmake_minimum_required(VERSION 3.10) + +project(pffft CXX C) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED True) + +add_library(pffft STATIC + pffft.c + pffft.h + fftpack.c + fftpack.h +) + +add_executable(pffft_main + test_pffft.c +) + +target_link_libraries(pffft_main PRIVATE + pffft +) + +set(MATH_LIBS "") +include(CheckLibraryExists) +check_library_exists(m sin "" LIBM) +if(LIBM) + list(APPEND MATH_LIBS "m") +endif() + +target_link_libraries(pffft PUBLIC ${MATH_LIBS}) + + +# Adding dependencies +set(SAPI_ROOT "/usr/local/google/home/inach/sandboxed-api" CACHE PATH "Path to the Sandboxed API source tree") +# Then configure: +# mkdir -p build && cd build +# cmake .. -G Ninja -DSAPI_ROOT=$HOME/sapi_root + +set(SAPI_ENABLE_EXAMPLES OFF CACHE BOOL "") +set(SAPI_ENABLE_TESTS OFF CACHE BOOL "") +add_subdirectory("${SAPI_ROOT}" + "${CMAKE_BINARY_DIR}/sandboxed-api-build" + # Omit this to have the full Sandboxed API in IDE + EXCLUDE_FROM_ALL) + +add_sapi_library(pffft_sapi + FUNCTIONS pffft_new_setup + pffft_destroy_setup + pffft_transform + pffft_transform_ordered + pffft_zreorder + pffft_zconvolve_accumulate + pffft_aligned_malloc + pffft_aligned_free + pffft_simd_size + cffti + cfftf + cfftb + rffti + rfftf + rfftb + cosqi + cosqf + cosqb + costi + cost + sinqi + sinqb + sinqf + sinti + sint + + INPUTS pffft.h fftpack.h + LIBRARY pffft + LIBRARY_NAME pffft + + NAMESPACE "" +) + +target_include_directories(pffft_sapi INTERFACE + "${PROJECT_BINARY_DIR}" +) + +add_executable(pffft_sandboxed + test_pffft_sandboxed.cc +) + +target_link_libraries(pffft_sandboxed PRIVATE + pffft_sapi + sapi::sapi +) \ No newline at end of file diff --git a/oss-internship-2020/pffft/Makefile b/oss-internship-2020/pffft/Makefile new file mode 100644 index 0000000..326fd90 --- /dev/null +++ b/oss-internship-2020/pffft/Makefile @@ -0,0 +1,15 @@ +CXXFLAGS ?= -std=c++17 + +pffft_main: test_pffft.o libpffft.a + $(CXX) -o $@ $(CXXFLAGS) $(LDFLAGS) $^ + +libpffft.a: pffft.o fftpack.o + ar rcs $@ $^ + +pffft.c: pffft.h + +fftpack.c: fftpack.h + +.PHONY: clean +clean: + rm -f *.o *.a pffft_main \ No newline at end of file diff --git a/oss-internship-2020/pffft/README.txt b/oss-internship-2020/pffft/README.txt new file mode 100644 index 0000000..ee20b42 --- /dev/null +++ b/oss-internship-2020/pffft/README.txt @@ -0,0 +1,416 @@ +PFFFT: a pretty fast FFT. + +TL;DR +-- + +PFFFT does 1D Fast Fourier Transforms, of single precision real and +complex vectors. It tries do it fast, it tries to be correct, and it +tries to be small. Computations do take advantage of SSE1 instructions +on x86 cpus, Altivec on powerpc cpus, and NEON on ARM cpus. The +license is BSD-like. + + +Why does it exist: +-- + +I was in search of a good performing FFT library , preferably very +small and with a very liberal license. + +When one says "fft library", FFTW ("Fastest Fourier Transform in the +West") is probably the first name that comes to mind -- I guess that +99% of open-source projects that need a FFT do use FFTW, and are happy +with it. However, it is quite a large library , which does everything +fft related (2d transforms, 3d transforms, other transformations such +as discrete cosine , or fast hartley). And it is licensed under the +GNU GPL , which means that it cannot be used in non open-source +products. + +An alternative to FFTW that is really small, is the venerable FFTPACK +v4, which is available on NETLIB. A more recent version (v5) exists, +but it is larger as it deals with multi-dimensional transforms. This +is a library that is written in FORTRAN 77, a language that is now +considered as a bit antiquated by many. FFTPACKv4 was written in 1985, +by Dr Paul Swarztrauber of NCAR, more than 25 years ago ! And despite +its age, benchmarks show it that it still a very good performing FFT +library, see for example the 1d single precision benchmarks here: +http://www.fftw.org/speed/opteron-2.2GHz-32bit/ . It is however not +competitive with the fastest ones, such as FFTW, Intel MKL, AMD ACML, +Apple vDSP. The reason for that is that those libraries do take +advantage of the SSE SIMD instructions available on Intel CPUs, +available since the days of the Pentium III. These instructions deal +with small vectors of 4 floats at a time, instead of a single float +for a traditionnal FPU, so when using these instructions one may expect +a 4-fold performance improvement. + +The idea was to take this fortran fftpack v4 code, translate to C, +modify it to deal with those SSE instructions, and check that the +final performance is not completely ridiculous when compared to other +SIMD FFT libraries. Translation to C was performed with f2c ( +http://www.netlib.org/f2c/ ). The resulting file was a bit edited in +order to remove the thousands of gotos that were introduced by +f2c. You will find the fftpack.h and fftpack.c sources in the +repository, this a complete translation of +http://www.netlib.org/fftpack/ , with the discrete cosine transform +and the test program. There is no license information in the netlib +repository, but it was confirmed to me by the fftpack v5 curators that +the same terms do apply to fftpack v4: +http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html . This is a +"BSD-like" license, it is compatible with proprietary projects. + +Adapting fftpack to deal with the SIMD 4-element vectors instead of +scalar single precision numbers was more complex than I originally +thought, especially with the real transforms, and I ended up writing +more code than I planned.. + + +The code: +-- + +Only two files, in good old C, pffft.c and pffft.h . The API is very +very simple, just make sure that you read the comments in pffft.h. + + +Comparison with other FFTs: +-- + +The idea was not to break speed records, but to get a decently fast +fft that is at least 50% as fast as the fastest FFT -- especially on +slowest computers . I'm more focused on getting the best performance +on slow cpus (Atom, Intel Core 1, old Athlons, ARM Cortex-A9...), than +on getting top performance on today fastest cpus. + +It can be used in a real-time context as the fft functions do not +perform any memory allocation -- that is why they accept a 'work' +array in their arguments. + +It is also a bit focused on performing 1D convolutions, that is why it +provides "unordered" FFTs , and a fourier domain convolution +operation. + + +Benchmark results (cpu tested: core i7 2600, core 2 quad, core 1 duo, atom N270, cortex-A9, cortex-A15, A8X) +-- + +The benchmark shows the performance of various fft implementations measured in +MFlops, with the number of floating point operations being defined as 5Nlog2(N) +for a length N complex fft, and 2.5*Nlog2(N) for a real fft. +See http://www.fftw.org/speed/method.html for an explanation of these formulas. + +MacOS Lion, gcc 4.2, 64-bit, fftw 3.3 on a 3.4 GHz core i7 2600 + +Built with: + + gcc-4.2 -o test_pffft -arch x86_64 -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -DHAVE_VECLIB -framework veclib -DHAVE_FFTW -lfftw3f + +| input len |real FFTPack| real vDSP | real FFTW | real PFFFT | |cplx FFTPack| cplx vDSP | cplx FFTW | cplx PFFFT | +|-----------+------------+------------+------------+------------| |------------+------------+------------+------------| +| 64 | 2816 | 8596 | 7329 | 8187 | | 2887 | 14898 | 14668 | 11108 | +| 96 | 3298 | n/a | 8378 | 7727 | | 3953 | n/a | 15680 | 10878 | +| 128 | 3507 | 11575 | 9266 | 10108 | | 4233 | 17598 | 16427 | 12000 | +| 160 | 3391 | n/a | 9838 | 10711 | | 4220 | n/a | 16653 | 11187 | +| 192 | 3919 | n/a | 9868 | 10956 | | 4297 | n/a | 15770 | 12540 | +| 256 | 4283 | 13179 | 10694 | 13128 | | 4545 | 19550 | 16350 | 13822 | +| 384 | 3136 | n/a | 10810 | 12061 | | 3600 | n/a | 16103 | 13240 | +| 480 | 3477 | n/a | 10632 | 12074 | | 3536 | n/a | 11630 | 12522 | +| 512 | 3783 | 15141 | 11267 | 13838 | | 3649 | 20002 | 16560 | 13580 | +| 640 | 3639 | n/a | 11164 | 13946 | | 3695 | n/a | 15416 | 13890 | +| 768 | 3800 | n/a | 11245 | 13495 | | 3590 | n/a | 15802 | 14552 | +| 800 | 3440 | n/a | 10499 | 13301 | | 3659 | n/a | 12056 | 13268 | +| 1024 | 3924 | 15605 | 11450 | 15339 | | 3769 | 20963 | 13941 | 15467 | +| 2048 | 4518 | 16195 | 11551 | 15532 | | 4258 | 20413 | 13723 | 15042 | +| 2400 | 4294 | n/a | 10685 | 13078 | | 4093 | n/a | 12777 | 13119 | +| 4096 | 4750 | 16596 | 11672 | 15817 | | 4157 | 19662 | 14316 | 14336 | +| 8192 | 3820 | 16227 | 11084 | 12555 | | 3691 | 18132 | 12102 | 13813 | +| 9216 | 3864 | n/a | 10254 | 12870 | | 3586 | n/a | 12119 | 13994 | +| 16384 | 3822 | 15123 | 10454 | 12822 | | 3613 | 16874 | 12370 | 13881 | +| 32768 | 4175 | 14512 | 10662 | 11095 | | 3881 | 14702 | 11619 | 11524 | +| 262144 | 3317 | 11429 | 6269 | 9517 | | 2810 | 11729 | 7757 | 10179 | +| 1048576 | 2913 | 10551 | 4730 | 5867 | | 2661 | 7881 | 3520 | 5350 | +|-----------+------------+------------+------------+------------| |------------+------------+------------+------------| + + +Debian 6, gcc 4.4.5, 64-bit, fftw 3.3.1 on a 3.4 GHz core i7 2600 + +Built with: +gcc -o test_pffft -DHAVE_FFTW -msse2 -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L$HOME/local/lib -I$HOME/local/include/ -lfftw3f -lm + +| N (input length) | real FFTPack | real FFTW | real PFFFT | | cplx FFTPack | cplx FFTW | cplx PFFFT | +|------------------+--------------+--------------+--------------| |--------------+--------------+--------------| +| 64 | 3840 | 7680 | 8777 | | 4389 | 20480 | 11171 | +| 96 | 4214 | 9633 | 8429 | | 4816 | 22477 | 11238 | +| 128 | 3584 | 10240 | 10240 | | 5120 | 23893 | 11947 | +| 192 | 4854 | 11095 | 12945 | | 4854 | 22191 | 14121 | +| 256 | 4096 | 11703 | 16384 | | 5120 | 23406 | 13653 | +| 384 | 4395 | 14651 | 12558 | | 4884 | 19535 | 14651 | +| 512 | 5760 | 13166 | 15360 | | 4608 | 23040 | 15360 | +| 768 | 4907 | 14020 | 16357 | | 4461 | 19628 | 14020 | +| 1024 | 5120 | 14629 | 14629 | | 5120 | 20480 | 15754 | +| 2048 | 5632 | 14080 | 18773 | | 4693 | 12516 | 16091 | +| 4096 | 5120 | 13653 | 17554 | | 4726 | 7680 | 14456 | +| 8192 | 4160 | 7396 | 13312 | | 4437 | 14791 | 13312 | +| 9216 | 4210 | 6124 | 13473 | | 4491 | 7282 | 14970 | +| 16384 | 3976 | 11010 | 14313 | | 4210 | 11450 | 13631 | +| 32768 | 4260 | 10224 | 10954 | | 4260 | 6816 | 11797 | +| 262144 | 3736 | 6896 | 9961 | | 2359 | 8965 | 9437 | +| 1048576 | 2796 | 4534 | 6453 | | 1864 | 3078 | 5592 | +|------------------+--------------+--------------+--------------| |--------------+--------------+--------------| + + + +MacOS Snow Leopard, gcc 4.0, 32-bit, fftw 3.3 on a 1.83 GHz core 1 duo + +Built with: + + gcc -o test_pffft -DHAVE_FFTW -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -framework veclib + +| input len |real FFTPack| real vDSP | real FFTW | real PFFFT | |cplx FFTPack| cplx vDSP | cplx FFTW | cplx PFFFT | +|-----------+------------+------------+------------+------------| |------------+------------+------------+------------| +| 64 | 745 | 2145 | 1706 | 2028 | | 961 | 3356 | 3313 | 2300 | +| 96 | 877 | n/a | 1976 | 1978 | | 1059 | n/a | 3333 | 2233 | +| 128 | 951 | 2808 | 2213 | 2279 | | 1202 | 3803 | 3739 | 2494 | +| 192 | 1002 | n/a | 2456 | 2429 | | 1186 | n/a | 3701 | 2508 | +| 256 | 1065 | 3205 | 2641 | 2793 | | 1302 | 4013 | 3912 | 2663 | +| 384 | 845 | n/a | 2759 | 2499 | | 948 | n/a | 3729 | 2504 | +| 512 | 900 | 3476 | 2956 | 2759 | | 974 | 4057 | 3954 | 2645 | +| 768 | 910 | n/a | 2912 | 2737 | | 975 | n/a | 3837 | 2614 | +| 1024 | 936 | 3583 | 3107 | 3009 | | 1006 | 4124 | 3821 | 2697 | +| 2048 | 1057 | 3585 | 3091 | 2837 | | 1089 | 3889 | 3701 | 2513 | +| 4096 | 1083 | 3524 | 3092 | 2733 | | 1039 | 3617 | 3462 | 2364 | +| 8192 | 874 | 3252 | 2967 | 2363 | | 911 | 3106 | 2789 | 2302 | +| 9216 | 898 | n/a | 2420 | 2290 | | 865 | n/a | 2676 | 2204 | +| 16384 | 903 | 2892 | 2506 | 2421 | | 899 | 3026 | 2797 | 2289 | +| 32768 | 965 | 2837 | 2550 | 2358 | | 920 | 2922 | 2763 | 2240 | +| 262144 | 738 | 2422 | 1589 | 1708 | | 610 | 2038 | 1436 | 1091 | +| 1048576 | 528 | 1207 | 845 | 880 | | 606 | 1020 | 669 | 1036 | +|-----------+------------+------------+------------+------------| |------------+------------+------------+------------| + + + +Ubuntu 11.04, gcc 4.5, 32-bit, fftw 3.2 on a 2.66 core 2 quad + +Built with: +gcc -o test_pffft -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm + +| input len |real FFTPack| real FFTW | real PFFFT | |cplx FFTPack| cplx FFTW | cplx PFFFT | +|-----------+------------+------------+------------| |------------+------------+------------| +| 64 | 1920 | 3614 | 5120 | | 2194 | 7680 | 6467 | +| 96 | 1873 | 3549 | 5187 | | 2107 | 8429 | 5863 | +| 128 | 2240 | 3773 | 5514 | | 2560 | 7964 | 6827 | +| 192 | 1765 | 4569 | 7767 | | 2284 | 9137 | 7061 | +| 256 | 2048 | 5461 | 7447 | | 2731 | 9638 | 7802 | +| 384 | 1998 | 5861 | 6762 | | 2313 | 9253 | 7644 | +| 512 | 2095 | 6144 | 7680 | | 2194 | 10240 | 7089 | +| 768 | 2230 | 5773 | 7549 | | 2045 | 10331 | 7010 | +| 1024 | 2133 | 6400 | 8533 | | 2133 | 10779 | 7877 | +| 2048 | 2011 | 7040 | 8665 | | 1942 | 10240 | 7768 | +| 4096 | 2194 | 6827 | 8777 | | 1755 | 9452 | 6827 | +| 8192 | 1849 | 6656 | 6656 | | 1752 | 7831 | 6827 | +| 9216 | 1871 | 5858 | 6416 | | 1643 | 6909 | 6266 | +| 16384 | 1883 | 6223 | 6506 | | 1664 | 7340 | 6982 | +| 32768 | 1826 | 6390 | 6667 | | 1631 | 7481 | 6971 | +| 262144 | 1546 | 4075 | 5977 | | 1299 | 3415 | 3551 | +| 1048576 | 1104 | 2071 | 1730 | | 1104 | 1149 | 1834 | +|-----------+------------+------------+------------| |------------+------------+------------| + + + +Ubuntu 11.04, gcc 4.5, 32-bit, fftw 3.3 on a 1.6 GHz Atom N270 + +Built with: +gcc -o test_pffft -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm + +| N (input length) | real FFTPack | real FFTW | real PFFFT | | cplx FFTPack | cplx FFTW | cplx PFFFT | +|------------------+--------------+--------------+--------------| |--------------+--------------+--------------| +| 64 | 452 | 1041 | 1336 | | 549 | 2318 | 1781 | +| 96 | 444 | 1297 | 1297 | | 503 | 2408 | 1686 | +| 128 | 527 | 1525 | 1707 | | 543 | 2655 | 1886 | +| 192 | 498 | 1653 | 1849 | | 539 | 2678 | 1942 | +| 256 | 585 | 1862 | 2156 | | 594 | 2777 | 2244 | +| 384 | 499 | 1870 | 1998 | | 511 | 2586 | 1890 | +| 512 | 562 | 2095 | 2194 | | 542 | 2973 | 2194 | +| 768 | 545 | 2045 | 2133 | | 545 | 2365 | 2133 | +| 1024 | 595 | 2133 | 2438 | | 569 | 2695 | 2179 | +| 2048 | 587 | 2125 | 2347 | | 521 | 2230 | 1707 | +| 4096 | 495 | 1890 | 1834 | | 492 | 1876 | 1672 | +| 8192 | 469 | 1548 | 1729 | | 438 | 1740 | 1664 | +| 9216 | 468 | 1663 | 1663 | | 446 | 1585 | 1531 | +| 16384 | 453 | 1608 | 1767 | | 398 | 1476 | 1664 | +| 32768 | 456 | 1420 | 1503 | | 387 | 1388 | 1345 | +| 262144 | 309 | 385 | 726 | | 262 | 415 | 840 | +| 1048576 | 280 | 351 | 739 | | 261 | 313 | 797 | +|------------------+--------------+--------------+--------------| |--------------+--------------+--------------| + + + +Windows 7, visual c++ 2010 on a 1.6 GHz Atom N270 + +Built with: +cl /Ox -D_USE_MATH_DEFINES /arch:SSE test_pffft.c pffft.c fftpack.c + +(visual c++ is definitively not very good with SSE intrinsics...) + +| N (input length) | real FFTPack | real PFFFT | | cplx FFTPack | cplx PFFFT | +|------------------+--------------+--------------| |--------------+--------------| +| 64 | 173 | 1009 | | 174 | 1159 | +| 96 | 169 | 1029 | | 188 | 1201 | +| 128 | 195 | 1242 | | 191 | 1275 | +| 192 | 178 | 1312 | | 184 | 1276 | +| 256 | 196 | 1591 | | 186 | 1281 | +| 384 | 172 | 1409 | | 181 | 1281 | +| 512 | 187 | 1640 | | 181 | 1313 | +| 768 | 171 | 1614 | | 176 | 1258 | +| 1024 | 186 | 1812 | | 178 | 1223 | +| 2048 | 190 | 1707 | | 186 | 1099 | +| 4096 | 182 | 1446 | | 177 | 975 | +| 8192 | 175 | 1345 | | 169 | 1034 | +| 9216 | 165 | 1271 | | 168 | 1023 | +| 16384 | 166 | 1396 | | 165 | 949 | +| 32768 | 172 | 1311 | | 161 | 881 | +| 262144 | 136 | 632 | | 134 | 629 | +| 1048576 | 134 | 698 | | 127 | 623 | +|------------------+--------------+--------------| |--------------+--------------| + + + +Ubuntu 12.04, gcc-4.7.3, 32-bit, with fftw 3.3.3 (built with --enable-neon), on a 1.2GHz ARM Cortex A9 (Tegra 3) + +Built with: +gcc-4.7 -O3 -DHAVE_FFTW -march=armv7-a -mtune=cortex-a9 -mfloat-abi=hard -mfpu=neon -ffast-math test_pffft.c pffft.c -o test_pffft_arm fftpack.c -lm -I/usr/local/include/ -L/usr/local/lib/ -lfftw3f + +| input len |real FFTPack| real FFTW | real PFFFT | |cplx FFTPack| cplx FFTW | cplx PFFFT | +|-----------+------------+------------+------------| |------------+------------+------------| +| 64 | 549 | 452 | 731 | | 512 | 602 | 640 | +| 96 | 421 | 272 | 702 | | 496 | 571 | 602 | +| 128 | 498 | 512 | 815 | | 597 | 618 | 652 | +| 160 | 521 | 536 | 815 | | 586 | 669 | 625 | +| 192 | 539 | 571 | 883 | | 485 | 597 | 626 | +| 256 | 640 | 539 | 975 | | 569 | 611 | 671 | +| 384 | 499 | 610 | 879 | | 499 | 602 | 637 | +| 480 | 518 | 507 | 877 | | 496 | 661 | 616 | +| 512 | 524 | 591 | 1002 | | 549 | 678 | 668 | +| 640 | 542 | 612 | 955 | | 568 | 663 | 645 | +| 768 | 557 | 613 | 981 | | 491 | 663 | 598 | +| 800 | 514 | 353 | 882 | | 514 | 360 | 574 | +| 1024 | 640 | 640 | 1067 | | 492 | 683 | 602 | +| 2048 | 587 | 640 | 908 | | 486 | 640 | 552 | +| 2400 | 479 | 368 | 777 | | 422 | 376 | 518 | +| 4096 | 511 | 614 | 853 | | 426 | 640 | 534 | +| 8192 | 415 | 584 | 708 | | 386 | 622 | 516 | +| 9216 | 419 | 571 | 687 | | 364 | 586 | 506 | +| 16384 | 426 | 577 | 716 | | 398 | 606 | 530 | +| 32768 | 417 | 572 | 673 | | 399 | 572 | 468 | +| 262144 | 219 | 380 | 293 | | 255 | 431 | 343 | +| 1048576 | 202 | 274 | 237 | | 265 | 282 | 355 | +|-----------+------------+------------+------------| |------------+------------+------------| + +Same platform as above, but this time pffft and fftpack are built with clang 3.2: + +clang -O3 -DHAVE_FFTW -march=armv7-a -mtune=cortex-a9 -mfloat-abi=hard -mfpu=neon -ffast-math test_pffft.c pffft.c -o test_pffft_arm fftpack.c -lm -I/usr/local/include/ -L/usr/local/lib/ -lfftw3f + +| input len |real FFTPack| real FFTW | real PFFFT | |cplx FFTPack| cplx FFTW | cplx PFFFT | +|-----------+------------+------------+------------| |------------+------------+------------| +| 64 | 427 | 452 | 853 | | 427 | 602 | 1024 | +| 96 | 351 | 276 | 843 | | 337 | 571 | 963 | +| 128 | 373 | 512 | 996 | | 390 | 618 | 1054 | +| 160 | 426 | 536 | 987 | | 375 | 669 | 914 | +| 192 | 404 | 571 | 1079 | | 388 | 588 | 1079 | +| 256 | 465 | 539 | 1205 | | 445 | 602 | 1170 | +| 384 | 366 | 610 | 1099 | | 343 | 594 | 1099 | +| 480 | 356 | 507 | 1140 | | 335 | 651 | 931 | +| 512 | 411 | 591 | 1213 | | 384 | 649 | 1124 | +| 640 | 398 | 612 | 1193 | | 373 | 654 | 901 | +| 768 | 409 | 613 | 1227 | | 383 | 663 | 1044 | +| 800 | 411 | 348 | 1073 | | 353 | 358 | 809 | +| 1024 | 427 | 640 | 1280 | | 413 | 692 | 1004 | +| 2048 | 414 | 626 | 1126 | | 371 | 640 | 853 | +| 2400 | 399 | 373 | 898 | | 319 | 368 | 653 | +| 4096 | 404 | 602 | 1059 | | 357 | 633 | 778 | +| 8192 | 332 | 584 | 792 | | 308 | 616 | 716 | +| 9216 | 322 | 561 | 783 | | 299 | 586 | 687 | +| 16384 | 344 | 568 | 778 | | 314 | 617 | 745 | +| 32768 | 342 | 564 | 737 | | 314 | 552 | 629 | +| 262144 | 201 | 383 | 313 | | 227 | 435 | 413 | +| 1048576 | 187 | 262 | 251 | | 228 | 281 | 409 | +|-----------+------------+------------+------------| |------------+------------+------------| + +So it looks like, on ARM, gcc 4.7 is the best at scalar floating point +(the fftpack performance numbers are better with gcc), while clang is +the best with neon intrinsics (see how pffft perf has improved with +clang 3.2). + + +NVIDIA Jetson TK1 board, gcc-4.8.2. The cpu is a 2.3GHz cortex A15 (Tegra K1). + +Built with: +gcc -O3 -march=armv7-a -mtune=native -mfloat-abi=hard -mfpu=neon -ffast-math test_pffft.c pffft.c -o test_pffft_arm fftpack.c -lm + +| input len |real FFTPack| real PFFFT | |cplx FFTPack| cplx PFFFT | +|-----------+------------+------------| |------------+------------| +| 64 | 1735 | 3308 | | 1994 | 3744 | +| 96 | 1596 | 3448 | | 1987 | 3572 | +| 128 | 1807 | 4076 | | 2255 | 3960 | +| 160 | 1769 | 4083 | | 2071 | 3845 | +| 192 | 1990 | 4233 | | 2017 | 3939 | +| 256 | 2191 | 4882 | | 2254 | 4346 | +| 384 | 1878 | 4492 | | 2073 | 4012 | +| 480 | 1748 | 4398 | | 1923 | 3951 | +| 512 | 2030 | 5064 | | 2267 | 4195 | +| 640 | 1918 | 4756 | | 2094 | 4184 | +| 768 | 2099 | 4907 | | 2048 | 4297 | +| 800 | 1822 | 4555 | | 1880 | 4063 | +| 1024 | 2232 | 5355 | | 2187 | 4420 | +| 2048 | 2176 | 4983 | | 2027 | 3602 | +| 2400 | 1741 | 4256 | | 1710 | 3344 | +| 4096 | 1816 | 3914 | | 1851 | 3349 | +| 8192 | 1716 | 3481 | | 1700 | 3255 | +| 9216 | 1735 | 3589 | | 1653 | 3094 | +| 16384 | 1567 | 3483 | | 1637 | 3244 | +| 32768 | 1624 | 3240 | | 1655 | 3156 | +| 262144 | 1012 | 1898 | | 983 | 1503 | +| 1048576 | 876 | 1154 | | 868 | 1341 | +|-----------+------------+------------| |------------+------------| + +The performance on the tegra K1 is pretty impressive. I'm not +including the FFTW numbers as they as slightly below the scalar +fftpack numbers, so something must be wrong (however it seems to be +correctly configured and is using neon simd instructions). + +When using clang 3.4 the pffft version is even a bit faster, reaching +5.7 GFlops for real ffts of size 1024. + + +iPad Air 2 with iOS9, xcode 8.0, arm64. The cpu is an Apple A8X, supposedly running at 1.5GHz. + +| input len |real FFTPack| real vDSP | real PFFFT | |cplx FFTPack| cplx vDSP | cplx PFFFT | +|-----------+------------+------------+------------| |------------+------------+------------| +| 64 | 2517 | 7995 | 6086 | | 2725 | 13006 | 8495 | +| 96 | 2442 | n/a | 6691 | | 2256 | n/a | 7991 | +| 128 | 2664 | 10186 | 7877 | | 2575 | 15115 | 9115 | +| 160 | 2638 | n/a | 8283 | | 2682 | n/a | 8806 | +| 192 | 2903 | n/a | 9083 | | 2634 | n/a | 8980 | +| 256 | 3184 | 11452 | 10039 | | 3026 | 15410 | 10199 | +| 384 | 2665 | n/a | 10100 | | 2275 | n/a | 9247 | +| 480 | 2546 | n/a | 9863 | | 2341 | n/a | 8892 | +| 512 | 2832 | 12197 | 10989 | | 2547 | 16768 | 10154 | +| 640 | 2755 | n/a | 10461 | | 2569 | n/a | 9666 | +| 768 | 2998 | n/a | 11355 | | 2585 | n/a | 9813 | +| 800 | 2516 | n/a | 10332 | | 2433 | n/a | 9164 | +| 1024 | 3109 | 12965 | 12114 | | 2869 | 16448 | 10519 | +| 2048 | 3027 | 12996 | 12023 | | 2648 | 17304 | 10307 | +| 2400 | 2515 | n/a | 10372 | | 2355 | n/a | 8443 | +| 4096 | 3204 | 13603 | 12359 | | 2814 | 16570 | 9780 | +| 8192 | 2759 | 13422 | 10824 | | 2153 | 15652 | 7884 | +| 9216 | 2700 | n/a | 9938 | | 2241 | n/a | 7900 | +| 16384 | 2280 | 13057 | 7976 | | 593 | 4272 | 2534 | +| 32768 | 768 | 4269 | 2882 | | 606 | 4405 | 2604 | +| 262144 | 724 | 3527 | 2630 | | 534 | 2418 | 2157 | +| 1048576 | 674 | 1467 | 2135 | | 530 | 1621 | 2055 | +|-----------+------------+------------+------------| |------------+------------+------------| + +I double-checked to make sure I did not make a mistake in the time +measurements, as the numbers are much higher than what I initially +expected. They are in fact higher than the number I get on the 2.8GHz +Xeon of my 2008 mac pro.. (except for FFT lengths >= 32768 where +having a big cache is useful). A good surprise is also that the perf +is not too far from apple's vDSP (at least for the real FFT). + diff --git a/oss-internship-2020/pffft/fftpack.c b/oss-internship-2020/pffft/fftpack.c new file mode 100644 index 0000000..b6375a8 --- /dev/null +++ b/oss-internship-2020/pffft/fftpack.c @@ -0,0 +1,3112 @@ +/* + compile with cc -DTESTING_FFTPACK fftpack.c in order to build the + test application. + + This is an f2c translation of the full fftpack sources as found on + http://www.netlib.org/fftpack/ The translated code has been + slightlty edited to remove the ugliest artefacts of the translation + (a hundred of wild GOTOs were wiped during that operation). + + The original fftpack file was written by Paul N. Swarztrauber + (Version 4, 1985), in fortran 77. + + FFTPACK license: + + http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html + + Copyright (c) 2004 the University Corporation for Atmospheric + Research ("UCAR"). All rights reserved. Developed by NCAR's + Computational and Information Systems Laboratory, UCAR, + www.cisl.ucar.edu. + + Redistribution and use of the Software in source and binary forms, + with or without modification, is permitted provided that the + following conditions are met: + + - Neither the names of NCAR's Computational and Information Systems + Laboratory, the University Corporation for Atmospheric Research, + nor the names of its sponsors or contributors may be used to + endorse or promote products derived from this Software without + specific prior written permission. + + - Redistributions of source code must retain the above copyright + notices, this list of conditions, and the disclaimer below. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the disclaimer below in the + documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE + SOFTWARE. + + ChangeLog: + 2011/10/02: this is my first release of this file. +*/ + +#include "fftpack.h" +#include + +typedef fftpack_real real; +typedef fftpack_int integer; + +typedef struct f77complex { + real r, i; +} f77complex; + +#ifdef TESTING_FFTPACK +static real c_abs(f77complex *c) { return sqrt(c->r*c->r + c->i*c->i); } +static double dmax(double a, double b) { return a < b ? b : a; } +#endif + +/* translated by f2c (version 20061008), and slightly edited */ + +static void passfb(integer *nac, integer ido, integer ip, integer l1, integer idl1, + real *cc, real *c1, real *c2, real *ch, real *ch2, const real *wa, real fsign) +{ + /* System generated locals */ + integer ch_offset, cc_offset, + c1_offset, c2_offset, ch2_offset; + + /* Local variables */ + integer i, j, k, l, jc, lc, ik, idj, idl, inc, idp; + real wai, war; + integer ipp2, idij, idlj, idot, ipph; + + +#define c1_ref(a_1,a_2,a_3) c1[((a_3)*l1 + (a_2))*ido + a_1] +#define c2_ref(a_1,a_2) c2[(a_2)*idl1 + a_1] +#define cc_ref(a_1,a_2,a_3) cc[((a_3)*ip + (a_2))*ido + a_1] +#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] +#define ch2_ref(a_1,a_2) ch2[(a_2)*idl1 + a_1] + + /* Parameter adjustments */ + ch_offset = 1 + ido * (1 + l1); + ch -= ch_offset; + c1_offset = 1 + ido * (1 + l1); + c1 -= c1_offset; + cc_offset = 1 + ido * (1 + ip); + cc -= cc_offset; + ch2_offset = 1 + idl1; + ch2 -= ch2_offset; + c2_offset = 1 + idl1; + c2 -= c2_offset; + --wa; + + /* Function Body */ + idot = ido / 2; + ipp2 = ip + 2; + ipph = (ip + 1) / 2; + idp = ip * ido; + + if (ido >= l1) { + for (j = 2; j <= ipph; ++j) { + jc = ipp2 - j; + for (k = 1; k <= l1; ++k) { + for (i = 1; i <= ido; ++i) { + ch_ref(i, k, j) = cc_ref(i, j, k) + cc_ref(i, jc, k); + ch_ref(i, k, jc) = cc_ref(i, j, k) - cc_ref(i, jc, k); + } + } + } + for (k = 1; k <= l1; ++k) { + for (i = 1; i <= ido; ++i) { + ch_ref(i, k, 1) = cc_ref(i, 1, k); + } + } + } else { + for (j = 2; j <= ipph; ++j) { + jc = ipp2 - j; + for (i = 1; i <= ido; ++i) { + for (k = 1; k <= l1; ++k) { + ch_ref(i, k, j) = cc_ref(i, j, k) + cc_ref(i, jc, k); + ch_ref(i, k, jc) = cc_ref(i, j, k) - cc_ref(i, jc, k); + } + } + } + for (i = 1; i <= ido; ++i) { + for (k = 1; k <= l1; ++k) { + ch_ref(i, k, 1) = cc_ref(i, 1, k); + } + } + } + idl = 2 - ido; + inc = 0; + for (l = 2; l <= ipph; ++l) { + lc = ipp2 - l; + idl += ido; + for (ik = 1; ik <= idl1; ++ik) { + c2_ref(ik, l) = ch2_ref(ik, 1) + wa[idl - 1] * ch2_ref(ik, 2); + c2_ref(ik, lc) = fsign*wa[idl] * ch2_ref(ik, ip); + } + idlj = idl; + inc += ido; + for (j = 3; j <= ipph; ++j) { + jc = ipp2 - j; + idlj += inc; + if (idlj > idp) { + idlj -= idp; + } + war = wa[idlj - 1]; + wai = wa[idlj]; + for (ik = 1; ik <= idl1; ++ik) { + c2_ref(ik, l) = c2_ref(ik, l) + war * ch2_ref(ik, j); + c2_ref(ik, lc) = c2_ref(ik, lc) + fsign*wai * ch2_ref(ik, jc); + } + } + } + for (j = 2; j <= ipph; ++j) { + for (ik = 1; ik <= idl1; ++ik) { + ch2_ref(ik, 1) = ch2_ref(ik, 1) + ch2_ref(ik, j); + } + } + for (j = 2; j <= ipph; ++j) { + jc = ipp2 - j; + for (ik = 2; ik <= idl1; ik += 2) { + ch2_ref(ik - 1, j) = c2_ref(ik - 1, j) - c2_ref(ik, jc); + ch2_ref(ik - 1, jc) = c2_ref(ik - 1, j) + c2_ref(ik, jc); + ch2_ref(ik, j) = c2_ref(ik, j) + c2_ref(ik - 1, jc); + ch2_ref(ik, jc) = c2_ref(ik, j) - c2_ref(ik - 1, jc); + } + } + *nac = 1; + if (ido == 2) { + return; + } + *nac = 0; + for (ik = 1; ik <= idl1; ++ik) { + c2_ref(ik, 1) = ch2_ref(ik, 1); + } + for (j = 2; j <= ip; ++j) { + for (k = 1; k <= l1; ++k) { + c1_ref(1, k, j) = ch_ref(1, k, j); + c1_ref(2, k, j) = ch_ref(2, k, j); + } + } + if (idot <= l1) { + idij = 0; + for (j = 2; j <= ip; ++j) { + idij += 2; + for (i = 4; i <= ido; i += 2) { + idij += 2; + for (k = 1; k <= l1; ++k) { + c1_ref(i - 1, k, j) = wa[idij - 1] * ch_ref(i - 1, k, j) - fsign*wa[idij] * ch_ref(i, k, j); + c1_ref(i, k, j) = wa[idij - 1] * ch_ref(i, k, j) + fsign*wa[idij] * ch_ref(i - 1, k, j); + } + } + } + return; + } + idj = 2 - ido; + for (j = 2; j <= ip; ++j) { + idj += ido; + for (k = 1; k <= l1; ++k) { + idij = idj; + for (i = 4; i <= ido; i += 2) { + idij += 2; + c1_ref(i - 1, k, j) = wa[idij - 1] * ch_ref(i - 1, k, j) - fsign*wa[idij] * ch_ref(i, k, j); + c1_ref(i, k, j) = wa[idij - 1] * ch_ref(i, k, j) + fsign*wa[idij] * ch_ref(i - 1, k, j); + } + } + } +} /* passb */ + +#undef ch2_ref +#undef ch_ref +#undef cc_ref +#undef c2_ref +#undef c1_ref + + +static void passb2(integer ido, integer l1, const real *cc, real *ch, const real *wa1) +{ + /* System generated locals */ + integer cc_offset, ch_offset; + + /* Local variables */ + integer i, k; + real ti2, tr2; + + +#define cc_ref(a_1,a_2,a_3) cc[((a_3)*2 + (a_2))*ido + a_1] +#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] + + /* Parameter adjustments */ + ch_offset = 1 + ido * (1 + l1); + ch -= ch_offset; + cc_offset = 1 + ido * 3; + cc -= cc_offset; + --wa1; + + /* Function Body */ + if (ido <= 2) { + for (k = 1; k <= l1; ++k) { + ch_ref(1, k, 1) = cc_ref(1, 1, k) + cc_ref(1, 2, k); + ch_ref(1, k, 2) = cc_ref(1, 1, k) - cc_ref(1, 2, k); + ch_ref(2, k, 1) = cc_ref(2, 1, k) + cc_ref(2, 2, k); + ch_ref(2, k, 2) = cc_ref(2, 1, k) - cc_ref(2, 2, k); + } + return; + } + for (k = 1; k <= l1; ++k) { + for (i = 2; i <= ido; i += 2) { + ch_ref(i - 1, k, 1) = cc_ref(i - 1, 1, k) + cc_ref(i - 1, 2, k); + tr2 = cc_ref(i - 1, 1, k) - cc_ref(i - 1, 2, k); + ch_ref(i, k, 1) = cc_ref(i, 1, k) + cc_ref(i, 2, k); + ti2 = cc_ref(i, 1, k) - cc_ref(i, 2, k); + ch_ref(i, k, 2) = wa1[i - 1] * ti2 + wa1[i] * tr2; + ch_ref(i - 1, k, 2) = wa1[i - 1] * tr2 - wa1[i] * ti2; + } + } +} /* passb2 */ + +#undef ch_ref +#undef cc_ref + + +static void passb3(integer ido, integer l1, const real *cc, real *ch, const real *wa1, const real *wa2) +{ + static const real taur = -.5f; + static const real taui = .866025403784439f; + + /* System generated locals */ + integer cc_offset, ch_offset; + + /* Local variables */ + integer i, k; + real ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2; + + +#define cc_ref(a_1,a_2,a_3) cc[((a_3)*3 + (a_2))*ido + a_1] +#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] + + /* Parameter adjustments */ + ch_offset = 1 + ido * (1 + l1); + ch -= ch_offset; + cc_offset = 1 + (ido << 2); + cc -= cc_offset; + --wa1; + --wa2; + + /* Function Body */ + if (ido == 2) { + for (k = 1; k <= l1; ++k) { + tr2 = cc_ref(1, 2, k) + cc_ref(1, 3, k); + cr2 = cc_ref(1, 1, k) + taur * tr2; + ch_ref(1, k, 1) = cc_ref(1, 1, k) + tr2; + ti2 = cc_ref(2, 2, k) + cc_ref(2, 3, k); + ci2 = cc_ref(2, 1, k) + taur * ti2; + ch_ref(2, k, 1) = cc_ref(2, 1, k) + ti2; + cr3 = taui * (cc_ref(1, 2, k) - cc_ref(1, 3, k)); + ci3 = taui * (cc_ref(2, 2, k) - cc_ref(2, 3, k)); + ch_ref(1, k, 2) = cr2 - ci3; + ch_ref(1, k, 3) = cr2 + ci3; + ch_ref(2, k, 2) = ci2 + cr3; + ch_ref(2, k, 3) = ci2 - cr3; + } + } else { + for (k = 1; k <= l1; ++k) { + for (i = 2; i <= ido; i += 2) { + tr2 = cc_ref(i - 1, 2, k) + cc_ref(i - 1, 3, k); + cr2 = cc_ref(i - 1, 1, k) + taur * tr2; + ch_ref(i - 1, k, 1) = cc_ref(i - 1, 1, k) + tr2; + ti2 = cc_ref(i, 2, k) + cc_ref(i, 3, k); + ci2 = cc_ref(i, 1, k) + taur * ti2; + ch_ref(i, k, 1) = cc_ref(i, 1, k) + ti2; + cr3 = taui * (cc_ref(i - 1, 2, k) - cc_ref(i - 1, 3, k)); + ci3 = taui * (cc_ref(i, 2, k) - cc_ref(i, 3, k)); + dr2 = cr2 - ci3; + dr3 = cr2 + ci3; + di2 = ci2 + cr3; + di3 = ci2 - cr3; + ch_ref(i, k, 2) = wa1[i - 1] * di2 + wa1[i] * dr2; + ch_ref(i - 1, k, 2) = wa1[i - 1] * dr2 - wa1[i] * di2; + ch_ref(i, k, 3) = wa2[i - 1] * di3 + wa2[i] * dr3; + ch_ref(i - 1, k, 3) = wa2[i - 1] * dr3 - wa2[i] * di3; + } + } + } +} /* passb3 */ + +#undef ch_ref +#undef cc_ref + + +static void passb4(integer ido, integer l1, const real *cc, real *ch, + const real *wa1, const real *wa2, const real *wa3) +{ + /* System generated locals */ + integer cc_offset, ch_offset; + + /* Local variables */ + integer i, k; + real ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4; + + +#define cc_ref(a_1,a_2,a_3) cc[((a_3)*4 + (a_2))*ido + a_1] +#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] + + /* Parameter adjustments */ + ch_offset = 1 + ido * (1 + l1); + ch -= ch_offset; + cc_offset = 1 + ido * 5; + cc -= cc_offset; + --wa1; + --wa2; + --wa3; + + /* Function Body */ + if (ido == 2) { + for (k = 1; k <= l1; ++k) { + ti1 = cc_ref(2, 1, k) - cc_ref(2, 3, k); + ti2 = cc_ref(2, 1, k) + cc_ref(2, 3, k); + tr4 = cc_ref(2, 4, k) - cc_ref(2, 2, k); + ti3 = cc_ref(2, 2, k) + cc_ref(2, 4, k); + tr1 = cc_ref(1, 1, k) - cc_ref(1, 3, k); + tr2 = cc_ref(1, 1, k) + cc_ref(1, 3, k); + ti4 = cc_ref(1, 2, k) - cc_ref(1, 4, k); + tr3 = cc_ref(1, 2, k) + cc_ref(1, 4, k); + ch_ref(1, k, 1) = tr2 + tr3; + ch_ref(1, k, 3) = tr2 - tr3; + ch_ref(2, k, 1) = ti2 + ti3; + ch_ref(2, k, 3) = ti2 - ti3; + ch_ref(1, k, 2) = tr1 + tr4; + ch_ref(1, k, 4) = tr1 - tr4; + ch_ref(2, k, 2) = ti1 + ti4; + ch_ref(2, k, 4) = ti1 - ti4; + } + } else { + for (k = 1; k <= l1; ++k) { + for (i = 2; i <= ido; i += 2) { + ti1 = cc_ref(i, 1, k) - cc_ref(i, 3, k); + ti2 = cc_ref(i, 1, k) + cc_ref(i, 3, k); + ti3 = cc_ref(i, 2, k) + cc_ref(i, 4, k); + tr4 = cc_ref(i, 4, k) - cc_ref(i, 2, k); + tr1 = cc_ref(i - 1, 1, k) - cc_ref(i - 1, 3, k); + tr2 = cc_ref(i - 1, 1, k) + cc_ref(i - 1, 3, k); + ti4 = cc_ref(i - 1, 2, k) - cc_ref(i - 1, 4, k); + tr3 = cc_ref(i - 1, 2, k) + cc_ref(i - 1, 4, k); + ch_ref(i - 1, k, 1) = tr2 + tr3; + cr3 = tr2 - tr3; + ch_ref(i, k, 1) = ti2 + ti3; + ci3 = ti2 - ti3; + cr2 = tr1 + tr4; + cr4 = tr1 - tr4; + ci2 = ti1 + ti4; + ci4 = ti1 - ti4; + ch_ref(i - 1, k, 2) = wa1[i - 1] * cr2 - wa1[i] * ci2; + ch_ref(i, k, 2) = wa1[i - 1] * ci2 + wa1[i] * cr2; + ch_ref(i - 1, k, 3) = wa2[i - 1] * cr3 - wa2[i] * ci3; + ch_ref(i, k, 3) = wa2[i - 1] * ci3 + wa2[i] * cr3; + ch_ref(i - 1, k, 4) = wa3[i - 1] * cr4 - wa3[i] * ci4; + ch_ref(i, k, 4) = wa3[i - 1] * ci4 + wa3[i] * cr4; + } + } + } +} /* passb4 */ + +#undef ch_ref +#undef cc_ref + +/* passf5 and passb5 merged */ +static void passfb5(integer ido, integer l1, const real *cc, real *ch, + const real *wa1, const real *wa2, const real *wa3, const real *wa4, real fsign) +{ + const real tr11 = .309016994374947f; + const real ti11 = .951056516295154f*fsign; + const real tr12 = -.809016994374947f; + const real ti12 = .587785252292473f*fsign; + + /* System generated locals */ + integer cc_offset, ch_offset; + + /* Local variables */ + integer i, k; + real ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4, ti2, ti3, + ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5; + + +#define cc_ref(a_1,a_2,a_3) cc[((a_3)*5 + (a_2))*ido + a_1] +#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] + + /* Parameter adjustments */ + ch_offset = 1 + ido * (1 + l1); + ch -= ch_offset; + cc_offset = 1 + ido * 6; + cc -= cc_offset; + --wa1; + --wa2; + --wa3; + --wa4; + + /* Function Body */ + if (ido == 2) { + for (k = 1; k <= l1; ++k) { + ti5 = cc_ref(2, 2, k) - cc_ref(2, 5, k); + ti2 = cc_ref(2, 2, k) + cc_ref(2, 5, k); + ti4 = cc_ref(2, 3, k) - cc_ref(2, 4, k); + ti3 = cc_ref(2, 3, k) + cc_ref(2, 4, k); + tr5 = cc_ref(1, 2, k) - cc_ref(1, 5, k); + tr2 = cc_ref(1, 2, k) + cc_ref(1, 5, k); + tr4 = cc_ref(1, 3, k) - cc_ref(1, 4, k); + tr3 = cc_ref(1, 3, k) + cc_ref(1, 4, k); + ch_ref(1, k, 1) = cc_ref(1, 1, k) + tr2 + tr3; + ch_ref(2, k, 1) = cc_ref(2, 1, k) + ti2 + ti3; + cr2 = cc_ref(1, 1, k) + tr11 * tr2 + tr12 * tr3; + ci2 = cc_ref(2, 1, k) + tr11 * ti2 + tr12 * ti3; + cr3 = cc_ref(1, 1, k) + tr12 * tr2 + tr11 * tr3; + ci3 = cc_ref(2, 1, k) + tr12 * ti2 + tr11 * ti3; + cr5 = ti11 * tr5 + ti12 * tr4; + ci5 = ti11 * ti5 + ti12 * ti4; + cr4 = ti12 * tr5 - ti11 * tr4; + ci4 = ti12 * ti5 - ti11 * ti4; + ch_ref(1, k, 2) = cr2 - ci5; + ch_ref(1, k, 5) = cr2 + ci5; + ch_ref(2, k, 2) = ci2 + cr5; + ch_ref(2, k, 3) = ci3 + cr4; + ch_ref(1, k, 3) = cr3 - ci4; + ch_ref(1, k, 4) = cr3 + ci4; + ch_ref(2, k, 4) = ci3 - cr4; + ch_ref(2, k, 5) = ci2 - cr5; + } + } else { + for (k = 1; k <= l1; ++k) { + for (i = 2; i <= ido; i += 2) { + ti5 = cc_ref(i, 2, k) - cc_ref(i, 5, k); + ti2 = cc_ref(i, 2, k) + cc_ref(i, 5, k); + ti4 = cc_ref(i, 3, k) - cc_ref(i, 4, k); + ti3 = cc_ref(i, 3, k) + cc_ref(i, 4, k); + tr5 = cc_ref(i - 1, 2, k) - cc_ref(i - 1, 5, k); + tr2 = cc_ref(i - 1, 2, k) + cc_ref(i - 1, 5, k); + tr4 = cc_ref(i - 1, 3, k) - cc_ref(i - 1, 4, k); + tr3 = cc_ref(i - 1, 3, k) + cc_ref(i - 1, 4, k); + ch_ref(i - 1, k, 1) = cc_ref(i - 1, 1, k) + tr2 + tr3; + ch_ref(i, k, 1) = cc_ref(i, 1, k) + ti2 + ti3; + cr2 = cc_ref(i - 1, 1, k) + tr11 * tr2 + tr12 * tr3; + ci2 = cc_ref(i, 1, k) + tr11 * ti2 + tr12 * ti3; + cr3 = cc_ref(i - 1, 1, k) + tr12 * tr2 + tr11 * tr3; + ci3 = cc_ref(i, 1, k) + tr12 * ti2 + tr11 * ti3; + cr5 = ti11 * tr5 + ti12 * tr4; + ci5 = ti11 * ti5 + ti12 * ti4; + cr4 = ti12 * tr5 - ti11 * tr4; + ci4 = ti12 * ti5 - ti11 * ti4; + dr3 = cr3 - ci4; + dr4 = cr3 + ci4; + di3 = ci3 + cr4; + di4 = ci3 - cr4; + dr5 = cr2 + ci5; + dr2 = cr2 - ci5; + di5 = ci2 - cr5; + di2 = ci2 + cr5; + ch_ref(i - 1, k, 2) = wa1[i - 1] * dr2 - fsign*wa1[i] * di2; + ch_ref(i, k, 2) = wa1[i - 1] * di2 + fsign*wa1[i] * dr2; + ch_ref(i - 1, k, 3) = wa2[i - 1] * dr3 - fsign*wa2[i] * di3; + ch_ref(i, k, 3) = wa2[i - 1] * di3 + fsign*wa2[i] * dr3; + ch_ref(i - 1, k, 4) = wa3[i - 1] * dr4 - fsign*wa3[i] * di4; + ch_ref(i, k, 4) = wa3[i - 1] * di4 + fsign*wa3[i] * dr4; + ch_ref(i - 1, k, 5) = wa4[i - 1] * dr5 - fsign*wa4[i] * di5; + ch_ref(i, k, 5) = wa4[i - 1] * di5 + fsign*wa4[i] * dr5; + } + } + } +} /* passb5 */ + +#undef ch_ref +#undef cc_ref + +static void passf2(integer ido, integer l1, const real *cc, real *ch, const real *wa1) +{ + /* System generated locals */ + integer cc_offset, ch_offset; + + /* Local variables */ + integer i, k; + real ti2, tr2; + + +#define cc_ref(a_1,a_2,a_3) cc[((a_3)*2 + (a_2))*ido + a_1] +#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] + + /* Parameter adjustments */ + ch_offset = 1 + ido * (1 + l1); + ch -= ch_offset; + cc_offset = 1 + ido * 3; + cc -= cc_offset; + --wa1; + + /* Function Body */ + if (ido == 2) { + for (k = 1; k <= l1; ++k) { + ch_ref(1, k, 1) = cc_ref(1, 1, k) + cc_ref(1, 2, k); + ch_ref(1, k, 2) = cc_ref(1, 1, k) - cc_ref(1, 2, k); + ch_ref(2, k, 1) = cc_ref(2, 1, k) + cc_ref(2, 2, k); + ch_ref(2, k, 2) = cc_ref(2, 1, k) - cc_ref(2, 2, k); + } + } else { + for (k = 1; k <= l1; ++k) { + for (i = 2; i <= ido; i += 2) { + ch_ref(i - 1, k, 1) = cc_ref(i - 1, 1, k) + cc_ref(i - 1, 2, + k); + tr2 = cc_ref(i - 1, 1, k) - cc_ref(i - 1, 2, k); + ch_ref(i, k, 1) = cc_ref(i, 1, k) + cc_ref(i, 2, k); + ti2 = cc_ref(i, 1, k) - cc_ref(i, 2, k); + ch_ref(i, k, 2) = wa1[i - 1] * ti2 - wa1[i] * tr2; + ch_ref(i - 1, k, 2) = wa1[i - 1] * tr2 + wa1[i] * ti2; + } + } + } +} /* passf2 */ + +#undef ch_ref +#undef cc_ref + + +static void passf3(integer ido, integer l1, const real *cc, real *ch, + const real *wa1, const real *wa2) +{ + static const real taur = -.5f; + static const real taui = -.866025403784439f; + + /* System generated locals */ + integer cc_offset, ch_offset; + + /* Local variables */ + integer i, k; + real ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2; + + +#define cc_ref(a_1,a_2,a_3) cc[((a_3)*3 + (a_2))*ido + a_1] +#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] + + /* Parameter adjustments */ + ch_offset = 1 + ido * (1 + l1); + ch -= ch_offset; + cc_offset = 1 + (ido << 2); + cc -= cc_offset; + --wa1; + --wa2; + + /* Function Body */ + if (ido == 2) { + for (k = 1; k <= l1; ++k) { + tr2 = cc_ref(1, 2, k) + cc_ref(1, 3, k); + cr2 = cc_ref(1, 1, k) + taur * tr2; + ch_ref(1, k, 1) = cc_ref(1, 1, k) + tr2; + ti2 = cc_ref(2, 2, k) + cc_ref(2, 3, k); + ci2 = cc_ref(2, 1, k) + taur * ti2; + ch_ref(2, k, 1) = cc_ref(2, 1, k) + ti2; + cr3 = taui * (cc_ref(1, 2, k) - cc_ref(1, 3, k)); + ci3 = taui * (cc_ref(2, 2, k) - cc_ref(2, 3, k)); + ch_ref(1, k, 2) = cr2 - ci3; + ch_ref(1, k, 3) = cr2 + ci3; + ch_ref(2, k, 2) = ci2 + cr3; + ch_ref(2, k, 3) = ci2 - cr3; + } + } else { + for (k = 1; k <= l1; ++k) { + for (i = 2; i <= ido; i += 2) { + tr2 = cc_ref(i - 1, 2, k) + cc_ref(i - 1, 3, k); + cr2 = cc_ref(i - 1, 1, k) + taur * tr2; + ch_ref(i - 1, k, 1) = cc_ref(i - 1, 1, k) + tr2; + ti2 = cc_ref(i, 2, k) + cc_ref(i, 3, k); + ci2 = cc_ref(i, 1, k) + taur * ti2; + ch_ref(i, k, 1) = cc_ref(i, 1, k) + ti2; + cr3 = taui * (cc_ref(i - 1, 2, k) - cc_ref(i - 1, 3, k)); + ci3 = taui * (cc_ref(i, 2, k) - cc_ref(i, 3, k)); + dr2 = cr2 - ci3; + dr3 = cr2 + ci3; + di2 = ci2 + cr3; + di3 = ci2 - cr3; + ch_ref(i, k, 2) = wa1[i - 1] * di2 - wa1[i] * dr2; + ch_ref(i - 1, k, 2) = wa1[i - 1] * dr2 + wa1[i] * di2; + ch_ref(i, k, 3) = wa2[i - 1] * di3 - wa2[i] * dr3; + ch_ref(i - 1, k, 3) = wa2[i - 1] * dr3 + wa2[i] * di3; + } + } + } +} /* passf3 */ + +#undef ch_ref +#undef cc_ref + + +static void passf4(integer ido, integer l1, const real *cc, real *ch, + const real *wa1, const real *wa2, const real *wa3) +{ + /* System generated locals */ + integer cc_offset, ch_offset; + + /* Local variables */ + integer i, k; + real ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4; + + +#define cc_ref(a_1,a_2,a_3) cc[((a_3)*4 + (a_2))*ido + a_1] +#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] + + /* Parameter adjustments */ + ch_offset = 1 + ido * (1 + l1); + ch -= ch_offset; + cc_offset = 1 + ido * 5; + cc -= cc_offset; + --wa1; + --wa2; + --wa3; + + /* Function Body */ + if (ido == 2) { + for (k = 1; k <= l1; ++k) { + ti1 = cc_ref(2, 1, k) - cc_ref(2, 3, k); + ti2 = cc_ref(2, 1, k) + cc_ref(2, 3, k); + tr4 = cc_ref(2, 2, k) - cc_ref(2, 4, k); + ti3 = cc_ref(2, 2, k) + cc_ref(2, 4, k); + tr1 = cc_ref(1, 1, k) - cc_ref(1, 3, k); + tr2 = cc_ref(1, 1, k) + cc_ref(1, 3, k); + ti4 = cc_ref(1, 4, k) - cc_ref(1, 2, k); + tr3 = cc_ref(1, 2, k) + cc_ref(1, 4, k); + ch_ref(1, k, 1) = tr2 + tr3; + ch_ref(1, k, 3) = tr2 - tr3; + ch_ref(2, k, 1) = ti2 + ti3; + ch_ref(2, k, 3) = ti2 - ti3; + ch_ref(1, k, 2) = tr1 + tr4; + ch_ref(1, k, 4) = tr1 - tr4; + ch_ref(2, k, 2) = ti1 + ti4; + ch_ref(2, k, 4) = ti1 - ti4; + } + } else { + for (k = 1; k <= l1; ++k) { + for (i = 2; i <= ido; i += 2) { + ti1 = cc_ref(i, 1, k) - cc_ref(i, 3, k); + ti2 = cc_ref(i, 1, k) + cc_ref(i, 3, k); + ti3 = cc_ref(i, 2, k) + cc_ref(i, 4, k); + tr4 = cc_ref(i, 2, k) - cc_ref(i, 4, k); + tr1 = cc_ref(i - 1, 1, k) - cc_ref(i - 1, 3, k); + tr2 = cc_ref(i - 1, 1, k) + cc_ref(i - 1, 3, k); + ti4 = cc_ref(i - 1, 4, k) - cc_ref(i - 1, 2, k); + tr3 = cc_ref(i - 1, 2, k) + cc_ref(i - 1, 4, k); + ch_ref(i - 1, k, 1) = tr2 + tr3; + cr3 = tr2 - tr3; + ch_ref(i, k, 1) = ti2 + ti3; + ci3 = ti2 - ti3; + cr2 = tr1 + tr4; + cr4 = tr1 - tr4; + ci2 = ti1 + ti4; + ci4 = ti1 - ti4; + ch_ref(i - 1, k, 2) = wa1[i - 1] * cr2 + wa1[i] * ci2; + ch_ref(i, k, 2) = wa1[i - 1] * ci2 - wa1[i] * cr2; + ch_ref(i - 1, k, 3) = wa2[i - 1] * cr3 + wa2[i] * ci3; + ch_ref(i, k, 3) = wa2[i - 1] * ci3 - wa2[i] * cr3; + ch_ref(i - 1, k, 4) = wa3[i - 1] * cr4 + wa3[i] * ci4; + ch_ref(i, k, 4) = wa3[i - 1] * ci4 - wa3[i] * cr4; + } + } + } +} /* passf4 */ + +#undef ch_ref +#undef cc_ref + +static void radb2(integer ido, integer l1, const real *cc, real *ch, const real *wa1) +{ + /* System generated locals */ + integer cc_offset, ch_offset; + + /* Local variables */ + integer i, k, ic; + real ti2, tr2; + integer idp2; + + +#define cc_ref(a_1,a_2,a_3) cc[((a_3)*2 + (a_2))*ido + a_1] +#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] + + /* Parameter adjustments */ + ch_offset = 1 + ido * (1 + l1); + ch -= ch_offset; + cc_offset = 1 + ido * 3; + cc -= cc_offset; + --wa1; + + /* Function Body */ + for (k = 1; k <= l1; ++k) { + ch_ref(1, k, 1) = cc_ref(1, 1, k) + cc_ref(ido, 2, k); + ch_ref(1, k, 2) = cc_ref(1, 1, k) - cc_ref(ido, 2, k); + } + if (ido < 2) return; + else if (ido != 2) { + idp2 = ido + 2; + for (k = 1; k <= l1; ++k) { + for (i = 3; i <= ido; i += 2) { + ic = idp2 - i; + ch_ref(i - 1, k, 1) = cc_ref(i - 1, 1, k) + cc_ref(ic - 1, 2, + k); + tr2 = cc_ref(i - 1, 1, k) - cc_ref(ic - 1, 2, k); + ch_ref(i, k, 1) = cc_ref(i, 1, k) - cc_ref(ic, 2, k); + ti2 = cc_ref(i, 1, k) + cc_ref(ic, 2, k); + ch_ref(i - 1, k, 2) = wa1[i - 2] * tr2 - wa1[i - 1] * ti2; + ch_ref(i, k, 2) = wa1[i - 2] * ti2 + wa1[i - 1] * tr2; + } + } + if (ido % 2 == 1) return; + } + for (k = 1; k <= l1; ++k) { + ch_ref(ido, k, 1) = cc_ref(ido, 1, k) + cc_ref(ido, 1, k); + ch_ref(ido, k, 2) = -(cc_ref(1, 2, k) + cc_ref(1, 2, k)); + } +} /* radb2 */ + +#undef ch_ref +#undef cc_ref + + +static void radb3(integer ido, integer l1, const real *cc, real *ch, + const real *wa1, const real *wa2) +{ + /* Initialized data */ + + static const real taur = -.5f; + static const real taui = .866025403784439f; + + /* System generated locals */ + integer cc_offset, ch_offset; + + /* Local variables */ + integer i, k, ic; + real ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2; + integer idp2; + + +#define cc_ref(a_1,a_2,a_3) cc[((a_3)*3 + (a_2))*ido + a_1] +#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] + + /* Parameter adjustments */ + ch_offset = 1 + ido * (1 + l1); + ch -= ch_offset; + cc_offset = 1 + (ido << 2); + cc -= cc_offset; + --wa1; + --wa2; + + /* Function Body */ + for (k = 1; k <= l1; ++k) { + tr2 = cc_ref(ido, 2, k) + cc_ref(ido, 2, k); + cr2 = cc_ref(1, 1, k) + taur * tr2; + ch_ref(1, k, 1) = cc_ref(1, 1, k) + tr2; + ci3 = taui * (cc_ref(1, 3, k) + cc_ref(1, 3, k)); + ch_ref(1, k, 2) = cr2 - ci3; + ch_ref(1, k, 3) = cr2 + ci3; + } + if (ido == 1) { + return; + } + idp2 = ido + 2; + for (k = 1; k <= l1; ++k) { + for (i = 3; i <= ido; i += 2) { + ic = idp2 - i; + tr2 = cc_ref(i - 1, 3, k) + cc_ref(ic - 1, 2, k); + cr2 = cc_ref(i - 1, 1, k) + taur * tr2; + ch_ref(i - 1, k, 1) = cc_ref(i - 1, 1, k) + tr2; + ti2 = cc_ref(i, 3, k) - cc_ref(ic, 2, k); + ci2 = cc_ref(i, 1, k) + taur * ti2; + ch_ref(i, k, 1) = cc_ref(i, 1, k) + ti2; + cr3 = taui * (cc_ref(i - 1, 3, k) - cc_ref(ic - 1, 2, k)); + ci3 = taui * (cc_ref(i, 3, k) + cc_ref(ic, 2, k)); + dr2 = cr2 - ci3; + dr3 = cr2 + ci3; + di2 = ci2 + cr3; + di3 = ci2 - cr3; + ch_ref(i - 1, k, 2) = wa1[i - 2] * dr2 - wa1[i - 1] * di2; + ch_ref(i, k, 2) = wa1[i - 2] * di2 + wa1[i - 1] * dr2; + ch_ref(i - 1, k, 3) = wa2[i - 2] * dr3 - wa2[i - 1] * di3; + ch_ref(i, k, 3) = wa2[i - 2] * di3 + wa2[i - 1] * dr3; + } + } +} /* radb3 */ + +#undef ch_ref +#undef cc_ref + + +static void radb4(integer ido, integer l1, const real *cc, real *ch, + const real *wa1, const real *wa2, const real *wa3) +{ + /* Initialized data */ + + static const real sqrt2 = 1.414213562373095f; + + /* System generated locals */ + integer cc_offset, ch_offset; + + /* Local variables */ + integer i, k, ic; + real ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4; + integer idp2; + + +#define cc_ref(a_1,a_2,a_3) cc[((a_3)*4 + (a_2))*ido + a_1] +#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] + + /* Parameter adjustments */ + ch_offset = 1 + ido * (1 + l1); + ch -= ch_offset; + cc_offset = 1 + ido * 5; + cc -= cc_offset; + --wa1; + --wa2; + --wa3; + + /* Function Body */ + for (k = 1; k <= l1; ++k) { + tr1 = cc_ref(1, 1, k) - cc_ref(ido, 4, k); + tr2 = cc_ref(1, 1, k) + cc_ref(ido, 4, k); + tr3 = cc_ref(ido, 2, k) + cc_ref(ido, 2, k); + tr4 = cc_ref(1, 3, k) + cc_ref(1, 3, k); + ch_ref(1, k, 1) = tr2 + tr3; + ch_ref(1, k, 2) = tr1 - tr4; + ch_ref(1, k, 3) = tr2 - tr3; + ch_ref(1, k, 4) = tr1 + tr4; + } + if (ido < 2) return; + if (ido != 2) { + idp2 = ido + 2; + for (k = 1; k <= l1; ++k) { + for (i = 3; i <= ido; i += 2) { + ic = idp2 - i; + ti1 = cc_ref(i, 1, k) + cc_ref(ic, 4, k); + ti2 = cc_ref(i, 1, k) - cc_ref(ic, 4, k); + ti3 = cc_ref(i, 3, k) - cc_ref(ic, 2, k); + tr4 = cc_ref(i, 3, k) + cc_ref(ic, 2, k); + tr1 = cc_ref(i - 1, 1, k) - cc_ref(ic - 1, 4, k); + tr2 = cc_ref(i - 1, 1, k) + cc_ref(ic - 1, 4, k); + ti4 = cc_ref(i - 1, 3, k) - cc_ref(ic - 1, 2, k); + tr3 = cc_ref(i - 1, 3, k) + cc_ref(ic - 1, 2, k); + ch_ref(i - 1, k, 1) = tr2 + tr3; + cr3 = tr2 - tr3; + ch_ref(i, k, 1) = ti2 + ti3; + ci3 = ti2 - ti3; + cr2 = tr1 - tr4; + cr4 = tr1 + tr4; + ci2 = ti1 + ti4; + ci4 = ti1 - ti4; + ch_ref(i - 1, k, 2) = wa1[i - 2] * cr2 - wa1[i - 1] * ci2; + ch_ref(i, k, 2) = wa1[i - 2] * ci2 + wa1[i - 1] * cr2; + ch_ref(i - 1, k, 3) = wa2[i - 2] * cr3 - wa2[i - 1] * ci3; + ch_ref(i, k, 3) = wa2[i - 2] * ci3 + wa2[i - 1] * cr3; + ch_ref(i - 1, k, 4) = wa3[i - 2] * cr4 - wa3[i - 1] * ci4; + ch_ref(i, k, 4) = wa3[i - 2] * ci4 + wa3[i - 1] * cr4; + } + } + if (ido % 2 == 1) return; + } + for (k = 1; k <= l1; ++k) { + ti1 = cc_ref(1, 2, k) + cc_ref(1, 4, k); + ti2 = cc_ref(1, 4, k) - cc_ref(1, 2, k); + tr1 = cc_ref(ido, 1, k) - cc_ref(ido, 3, k); + tr2 = cc_ref(ido, 1, k) + cc_ref(ido, 3, k); + ch_ref(ido, k, 1) = tr2 + tr2; + ch_ref(ido, k, 2) = sqrt2 * (tr1 - ti1); + ch_ref(ido, k, 3) = ti2 + ti2; + ch_ref(ido, k, 4) = -sqrt2 * (tr1 + ti1); + } +} /* radb4 */ + +#undef ch_ref +#undef cc_ref + + +static void radb5(integer ido, integer l1, const real *cc, real *ch, + const real *wa1, const real *wa2, const real *wa3, const real *wa4) +{ + /* Initialized data */ + + static const real tr11 = .309016994374947f; + static const real ti11 = .951056516295154f; + static const real tr12 = -.809016994374947f; + static const real ti12 = .587785252292473f; + + /* System generated locals */ + integer cc_offset, ch_offset; + + /* Local variables */ + integer i, k, ic; + real ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4, ti2, ti3, + ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5; + integer idp2; + + +#define cc_ref(a_1,a_2,a_3) cc[((a_3)*5 + (a_2))*ido + a_1] +#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] + + /* Parameter adjustments */ + ch_offset = 1 + ido * (1 + l1); + ch -= ch_offset; + cc_offset = 1 + ido * 6; + cc -= cc_offset; + --wa1; + --wa2; + --wa3; + --wa4; + + /* Function Body */ + for (k = 1; k <= l1; ++k) { + ti5 = cc_ref(1, 3, k) + cc_ref(1, 3, k); + ti4 = cc_ref(1, 5, k) + cc_ref(1, 5, k); + tr2 = cc_ref(ido, 2, k) + cc_ref(ido, 2, k); + tr3 = cc_ref(ido, 4, k) + cc_ref(ido, 4, k); + ch_ref(1, k, 1) = cc_ref(1, 1, k) + tr2 + tr3; + cr2 = cc_ref(1, 1, k) + tr11 * tr2 + tr12 * tr3; + cr3 = cc_ref(1, 1, k) + tr12 * tr2 + tr11 * tr3; + ci5 = ti11 * ti5 + ti12 * ti4; + ci4 = ti12 * ti5 - ti11 * ti4; + ch_ref(1, k, 2) = cr2 - ci5; + ch_ref(1, k, 3) = cr3 - ci4; + ch_ref(1, k, 4) = cr3 + ci4; + ch_ref(1, k, 5) = cr2 + ci5; + } + if (ido == 1) { + return; + } + idp2 = ido + 2; + for (k = 1; k <= l1; ++k) { + for (i = 3; i <= ido; i += 2) { + ic = idp2 - i; + ti5 = cc_ref(i, 3, k) + cc_ref(ic, 2, k); + ti2 = cc_ref(i, 3, k) - cc_ref(ic, 2, k); + ti4 = cc_ref(i, 5, k) + cc_ref(ic, 4, k); + ti3 = cc_ref(i, 5, k) - cc_ref(ic, 4, k); + tr5 = cc_ref(i - 1, 3, k) - cc_ref(ic - 1, 2, k); + tr2 = cc_ref(i - 1, 3, k) + cc_ref(ic - 1, 2, k); + tr4 = cc_ref(i - 1, 5, k) - cc_ref(ic - 1, 4, k); + tr3 = cc_ref(i - 1, 5, k) + cc_ref(ic - 1, 4, k); + ch_ref(i - 1, k, 1) = cc_ref(i - 1, 1, k) + tr2 + tr3; + ch_ref(i, k, 1) = cc_ref(i, 1, k) + ti2 + ti3; + cr2 = cc_ref(i - 1, 1, k) + tr11 * tr2 + tr12 * tr3; + ci2 = cc_ref(i, 1, k) + tr11 * ti2 + tr12 * ti3; + cr3 = cc_ref(i - 1, 1, k) + tr12 * tr2 + tr11 * tr3; + ci3 = cc_ref(i, 1, k) + tr12 * ti2 + tr11 * ti3; + cr5 = ti11 * tr5 + ti12 * tr4; + ci5 = ti11 * ti5 + ti12 * ti4; + cr4 = ti12 * tr5 - ti11 * tr4; + ci4 = ti12 * ti5 - ti11 * ti4; + dr3 = cr3 - ci4; + dr4 = cr3 + ci4; + di3 = ci3 + cr4; + di4 = ci3 - cr4; + dr5 = cr2 + ci5; + dr2 = cr2 - ci5; + di5 = ci2 - cr5; + di2 = ci2 + cr5; + ch_ref(i - 1, k, 2) = wa1[i - 2] * dr2 - wa1[i - 1] * di2; + ch_ref(i, k, 2) = wa1[i - 2] * di2 + wa1[i - 1] * dr2; + ch_ref(i - 1, k, 3) = wa2[i - 2] * dr3 - wa2[i - 1] * di3; + ch_ref(i, k, 3) = wa2[i - 2] * di3 + wa2[i - 1] * dr3; + ch_ref(i - 1, k, 4) = wa3[i - 2] * dr4 - wa3[i - 1] * di4; + ch_ref(i, k, 4) = wa3[i - 2] * di4 + wa3[i - 1] * dr4; + ch_ref(i - 1, k, 5) = wa4[i - 2] * dr5 - wa4[i - 1] * di5; + ch_ref(i, k, 5) = wa4[i - 2] * di5 + wa4[i - 1] * dr5; + } + } +} /* radb5 */ + +#undef ch_ref +#undef cc_ref + + +static void radbg(integer ido, integer ip, integer l1, integer idl1, + const real *cc, real *c1, real *c2, real *ch, real *ch2, const real *wa) +{ + /* System generated locals */ + integer ch_offset, cc_offset, + c1_offset, c2_offset, ch2_offset; + + /* Local variables */ + integer i, j, k, l, j2, ic, jc, lc, ik, is; + real dc2, ai1, ai2, ar1, ar2, ds2; + integer nbd; + real dcp, arg, dsp, ar1h, ar2h; + integer idp2, ipp2, idij, ipph; + + +#define c1_ref(a_1,a_2,a_3) c1[((a_3)*l1 + (a_2))*ido + a_1] +#define c2_ref(a_1,a_2) c2[(a_2)*idl1 + a_1] +#define cc_ref(a_1,a_2,a_3) cc[((a_3)*ip + (a_2))*ido + a_1] +#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] +#define ch2_ref(a_1,a_2) ch2[(a_2)*idl1 + a_1] + + /* Parameter adjustments */ + ch_offset = 1 + ido * (1 + l1); + ch -= ch_offset; + c1_offset = 1 + ido * (1 + l1); + c1 -= c1_offset; + cc_offset = 1 + ido * (1 + ip); + cc -= cc_offset; + ch2_offset = 1 + idl1; + ch2 -= ch2_offset; + c2_offset = 1 + idl1; + c2 -= c2_offset; + --wa; + + /* Function Body */ + arg = (2*M_PI) / (real) (ip); + dcp = cos(arg); + dsp = sin(arg); + idp2 = ido + 2; + nbd = (ido - 1) / 2; + ipp2 = ip + 2; + ipph = (ip + 1) / 2; + if (ido >= l1) { + for (k = 1; k <= l1; ++k) { + for (i = 1; i <= ido; ++i) { + ch_ref(i, k, 1) = cc_ref(i, 1, k); + } + } + } else { + for (i = 1; i <= ido; ++i) { + for (k = 1; k <= l1; ++k) { + ch_ref(i, k, 1) = cc_ref(i, 1, k); + } + } + } + for (j = 2; j <= ipph; ++j) { + jc = ipp2 - j; + j2 = j + j; + for (k = 1; k <= l1; ++k) { + ch_ref(1, k, j) = cc_ref(ido, j2 - 2, k) + cc_ref(ido, j2 - 2, k); + ch_ref(1, k, jc) = cc_ref(1, j2 - 1, k) + cc_ref(1, j2 - 1, k); + } + } + if (ido != 1) { + if (nbd >= l1) { + for (j = 2; j <= ipph; ++j) { + jc = ipp2 - j; + for (k = 1; k <= l1; ++k) { + for (i = 3; i <= ido; i += 2) { + ic = idp2 - i; + ch_ref(i - 1, k, j) = cc_ref(i - 1, (j << 1) - 1, k) + cc_ref(ic - 1, (j << 1) - 2, k); + ch_ref(i - 1, k, jc) = cc_ref(i - 1, (j << 1) - 1, k) - cc_ref(ic - 1, (j << 1) - 2, k); + ch_ref(i, k, j) = cc_ref(i, (j << 1) - 1, k) - cc_ref(ic, (j << 1) - 2, k); + ch_ref(i, k, jc) = cc_ref(i, (j << 1) - 1, k) + cc_ref(ic, (j << 1) - 2, k); + } + } + } + } else { + for (j = 2; j <= ipph; ++j) { + jc = ipp2 - j; + for (i = 3; i <= ido; i += 2) { + ic = idp2 - i; + for (k = 1; k <= l1; ++k) { + ch_ref(i - 1, k, j) = cc_ref(i - 1, (j << 1) - 1, k) + cc_ref(ic - 1, (j << 1) - 2, k); + ch_ref(i - 1, k, jc) = cc_ref(i - 1, (j << 1) - 1, k) - cc_ref(ic - 1, (j << 1) - 2, k); + ch_ref(i, k, j) = cc_ref(i, (j << 1) - 1, k) - cc_ref(ic, (j << 1) - 2, k); + ch_ref(i, k, jc) = cc_ref(i, (j << 1) - 1, k) + cc_ref(ic, (j << 1) - 2, k); + } + } + } + } + } + ar1 = 1.f; + ai1 = 0.f; + for (l = 2; l <= ipph; ++l) { + lc = ipp2 - l; + ar1h = dcp * ar1 - dsp * ai1; + ai1 = dcp * ai1 + dsp * ar1; + ar1 = ar1h; + for (ik = 1; ik <= idl1; ++ik) { + c2_ref(ik, l) = ch2_ref(ik, 1) + ar1 * ch2_ref(ik, 2); + c2_ref(ik, lc) = ai1 * ch2_ref(ik, ip); + } + dc2 = ar1; + ds2 = ai1; + ar2 = ar1; + ai2 = ai1; + for (j = 3; j <= ipph; ++j) { + jc = ipp2 - j; + ar2h = dc2 * ar2 - ds2 * ai2; + ai2 = dc2 * ai2 + ds2 * ar2; + ar2 = ar2h; + for (ik = 1; ik <= idl1; ++ik) { + c2_ref(ik, l) = c2_ref(ik, l) + ar2 * ch2_ref(ik, j); + c2_ref(ik, lc) = c2_ref(ik, lc) + ai2 * ch2_ref(ik, jc); + } + } + } + for (j = 2; j <= ipph; ++j) { + for (ik = 1; ik <= idl1; ++ik) { + ch2_ref(ik, 1) = ch2_ref(ik, 1) + ch2_ref(ik, j); + } + } + for (j = 2; j <= ipph; ++j) { + jc = ipp2 - j; + for (k = 1; k <= l1; ++k) { + ch_ref(1, k, j) = c1_ref(1, k, j) - c1_ref(1, k, jc); + ch_ref(1, k, jc) = c1_ref(1, k, j) + c1_ref(1, k, jc); + } + } + if (ido != 1) { + if (nbd >= l1) { + for (j = 2; j <= ipph; ++j) { + jc = ipp2 - j; + for (k = 1; k <= l1; ++k) { + for (i = 3; i <= ido; i += 2) { + ch_ref(i - 1, k, j) = c1_ref(i - 1, k, j) - c1_ref(i, k, jc); + ch_ref(i - 1, k, jc) = c1_ref(i - 1, k, j) + c1_ref(i, k, jc); + ch_ref(i, k, j) = c1_ref(i, k, j) + c1_ref(i - 1, k, jc); + ch_ref(i, k, jc) = c1_ref(i, k, j) - c1_ref(i - 1, k, jc); + } + } + } + } else { + for (j = 2; j <= ipph; ++j) { + jc = ipp2 - j; + for (i = 3; i <= ido; i += 2) { + for (k = 1; k <= l1; ++k) { + ch_ref(i - 1, k, j) = c1_ref(i - 1, k, j) - c1_ref(i, k, jc); + ch_ref(i - 1, k, jc) = c1_ref(i - 1, k, j) + c1_ref(i, k, jc); + ch_ref(i, k, j) = c1_ref(i, k, j) + c1_ref(i - 1, k, jc); + ch_ref(i, k, jc) = c1_ref(i, k, j) - c1_ref(i - 1, k, jc); + } + } + } + } + } + if (ido == 1) { + return; + } + for (ik = 1; ik <= idl1; ++ik) { + c2_ref(ik, 1) = ch2_ref(ik, 1); + } + for (j = 2; j <= ip; ++j) { + for (k = 1; k <= l1; ++k) { + c1_ref(1, k, j) = ch_ref(1, k, j); + } + } + if (nbd <= l1) { + is = -(ido); + for (j = 2; j <= ip; ++j) { + is += ido; + idij = is; + for (i = 3; i <= ido; i += 2) { + idij += 2; + for (k = 1; k <= l1; ++k) { + c1_ref(i - 1, k, j) = wa[idij - 1] * ch_ref(i - 1, k, j) + - wa[idij] * ch_ref(i, k, j); + c1_ref(i, k, j) = wa[idij - 1] * ch_ref(i, k, j) + wa[idij] * ch_ref(i - 1, k, j); + } + } + } + } else { + is = -(ido); + for (j = 2; j <= ip; ++j) { + is += ido; + for (k = 1; k <= l1; ++k) { + idij = is; + for (i = 3; i <= ido; i += 2) { + idij += 2; + c1_ref(i - 1, k, j) = wa[idij - 1] * ch_ref(i - 1, k, j) + - wa[idij] * ch_ref(i, k, j); + c1_ref(i, k, j) = wa[idij - 1] * ch_ref(i, k, j) + wa[idij] * ch_ref(i - 1, k, j); + } + } + } + } +} /* radbg */ + +#undef ch2_ref +#undef ch_ref +#undef cc_ref +#undef c2_ref +#undef c1_ref + + +static void radf2(integer ido, integer l1, const real *cc, real *ch, + const real *wa1) +{ + /* System generated locals */ + integer ch_offset, cc_offset; + + /* Local variables */ + integer i, k, ic; + real ti2, tr2; + integer idp2; + + +#define cc_ref(a_1,a_2,a_3) cc[((a_3)*l1 + (a_2))*ido + a_1] +#define ch_ref(a_1,a_2,a_3) ch[((a_3)*2 + (a_2))*ido + a_1] + + /* Parameter adjustments */ + ch_offset = 1 + ido * 3; + ch -= ch_offset; + cc_offset = 1 + ido * (1 + l1); + cc -= cc_offset; + --wa1; + + /* Function Body */ + for (k = 1; k <= l1; ++k) { + ch_ref(1, 1, k) = cc_ref(1, k, 1) + cc_ref(1, k, 2); + ch_ref(ido, 2, k) = cc_ref(1, k, 1) - cc_ref(1, k, 2); + } + if (ido < 2) return; + if (ido != 2) { + idp2 = ido + 2; + for (k = 1; k <= l1; ++k) { + for (i = 3; i <= ido; i += 2) { + ic = idp2 - i; + tr2 = wa1[i - 2] * cc_ref(i - 1, k, 2) + wa1[i - 1] * + cc_ref(i, k, 2); + ti2 = wa1[i - 2] * cc_ref(i, k, 2) - wa1[i - 1] * cc_ref( + i - 1, k, 2); + ch_ref(i, 1, k) = cc_ref(i, k, 1) + ti2; + ch_ref(ic, 2, k) = ti2 - cc_ref(i, k, 1); + ch_ref(i - 1, 1, k) = cc_ref(i - 1, k, 1) + tr2; + ch_ref(ic - 1, 2, k) = cc_ref(i - 1, k, 1) - tr2; + } + } + if (ido % 2 == 1) { + return; + } + } + for (k = 1; k <= l1; ++k) { + ch_ref(1, 2, k) = -cc_ref(ido, k, 2); + ch_ref(ido, 1, k) = cc_ref(ido, k, 1); + } +} /* radf2 */ + +#undef ch_ref +#undef cc_ref + + +static void radf3(integer ido, integer l1, const real *cc, real *ch, + const real *wa1, const real *wa2) +{ + static const real taur = -.5f; + static const real taui = .866025403784439f; + + /* System generated locals */ + integer ch_offset, cc_offset; + + /* Local variables */ + integer i, k, ic; + real ci2, di2, di3, cr2, dr2, dr3, ti2, ti3, tr2, tr3; + integer idp2; + + +#define cc_ref(a_1,a_2,a_3) cc[((a_3)*l1 + (a_2))*ido + a_1] +#define ch_ref(a_1,a_2,a_3) ch[((a_3)*3 + (a_2))*ido + a_1] + + /* Parameter adjustments */ + ch_offset = 1 + (ido << 2); + ch -= ch_offset; + cc_offset = 1 + ido * (1 + l1); + cc -= cc_offset; + --wa1; + --wa2; + + /* Function Body */ + for (k = 1; k <= l1; ++k) { + cr2 = cc_ref(1, k, 2) + cc_ref(1, k, 3); + ch_ref(1, 1, k) = cc_ref(1, k, 1) + cr2; + ch_ref(1, 3, k) = taui * (cc_ref(1, k, 3) - cc_ref(1, k, 2)); + ch_ref(ido, 2, k) = cc_ref(1, k, 1) + taur * cr2; + } + if (ido == 1) { + return; + } + idp2 = ido + 2; + for (k = 1; k <= l1; ++k) { + for (i = 3; i <= ido; i += 2) { + ic = idp2 - i; + dr2 = wa1[i - 2] * cc_ref(i - 1, k, 2) + wa1[i - 1] * + cc_ref(i, k, 2); + di2 = wa1[i - 2] * cc_ref(i, k, 2) - wa1[i - 1] * cc_ref( + i - 1, k, 2); + dr3 = wa2[i - 2] * cc_ref(i - 1, k, 3) + wa2[i - 1] * + cc_ref(i, k, 3); + di3 = wa2[i - 2] * cc_ref(i, k, 3) - wa2[i - 1] * cc_ref( + i - 1, k, 3); + cr2 = dr2 + dr3; + ci2 = di2 + di3; + ch_ref(i - 1, 1, k) = cc_ref(i - 1, k, 1) + cr2; + ch_ref(i, 1, k) = cc_ref(i, k, 1) + ci2; + tr2 = cc_ref(i - 1, k, 1) + taur * cr2; + ti2 = cc_ref(i, k, 1) + taur * ci2; + tr3 = taui * (di2 - di3); + ti3 = taui * (dr3 - dr2); + ch_ref(i - 1, 3, k) = tr2 + tr3; + ch_ref(ic - 1, 2, k) = tr2 - tr3; + ch_ref(i, 3, k) = ti2 + ti3; + ch_ref(ic, 2, k) = ti3 - ti2; + } + } +} /* radf3 */ + +#undef ch_ref +#undef cc_ref + + +static void radf4(integer ido, integer l1, const real *cc, real *ch, + const real *wa1, const real *wa2, const real *wa3) +{ + /* Initialized data */ + + static const real hsqt2 = .7071067811865475f; + + /* System generated locals */ + integer cc_offset, ch_offset; + + /* Local variables */ + integer i, k, ic; + real ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4; + integer idp2; + + +#define cc_ref(a_1,a_2,a_3) cc[((a_3)*l1 + (a_2))*ido + a_1] +#define ch_ref(a_1,a_2,a_3) ch[((a_3)*4 + (a_2))*ido + a_1] + + /* Parameter adjustments */ + ch_offset = 1 + ido * 5; + ch -= ch_offset; + cc_offset = 1 + ido * (1 + l1); + cc -= cc_offset; + --wa1; + --wa2; + --wa3; + + /* Function Body */ + for (k = 1; k <= l1; ++k) { + tr1 = cc_ref(1, k, 2) + cc_ref(1, k, 4); + tr2 = cc_ref(1, k, 1) + cc_ref(1, k, 3); + ch_ref(1, 1, k) = tr1 + tr2; + ch_ref(ido, 4, k) = tr2 - tr1; + ch_ref(ido, 2, k) = cc_ref(1, k, 1) - cc_ref(1, k, 3); + ch_ref(1, 3, k) = cc_ref(1, k, 4) - cc_ref(1, k, 2); + } + if (ido < 2) return; + if (ido != 2) { + idp2 = ido + 2; + for (k = 1; k <= l1; ++k) { + for (i = 3; i <= ido; i += 2) { + ic = idp2 - i; + cr2 = wa1[i - 2] * cc_ref(i - 1, k, 2) + wa1[i - 1] * + cc_ref(i, k, 2); + ci2 = wa1[i - 2] * cc_ref(i, k, 2) - wa1[i - 1] * cc_ref( + i - 1, k, 2); + cr3 = wa2[i - 2] * cc_ref(i - 1, k, 3) + wa2[i - 1] * + cc_ref(i, k, 3); + ci3 = wa2[i - 2] * cc_ref(i, k, 3) - wa2[i - 1] * cc_ref( + i - 1, k, 3); + cr4 = wa3[i - 2] * cc_ref(i - 1, k, 4) + wa3[i - 1] * + cc_ref(i, k, 4); + ci4 = wa3[i - 2] * cc_ref(i, k, 4) - wa3[i - 1] * cc_ref( + i - 1, k, 4); + tr1 = cr2 + cr4; + tr4 = cr4 - cr2; + ti1 = ci2 + ci4; + ti4 = ci2 - ci4; + ti2 = cc_ref(i, k, 1) + ci3; + ti3 = cc_ref(i, k, 1) - ci3; + tr2 = cc_ref(i - 1, k, 1) + cr3; + tr3 = cc_ref(i - 1, k, 1) - cr3; + ch_ref(i - 1, 1, k) = tr1 + tr2; + ch_ref(ic - 1, 4, k) = tr2 - tr1; + ch_ref(i, 1, k) = ti1 + ti2; + ch_ref(ic, 4, k) = ti1 - ti2; + ch_ref(i - 1, 3, k) = ti4 + tr3; + ch_ref(ic - 1, 2, k) = tr3 - ti4; + ch_ref(i, 3, k) = tr4 + ti3; + ch_ref(ic, 2, k) = tr4 - ti3; + } + } + if (ido % 2 == 1) { + return; + } + } + for (k = 1; k <= l1; ++k) { + ti1 = -hsqt2 * (cc_ref(ido, k, 2) + cc_ref(ido, k, 4)); + tr1 = hsqt2 * (cc_ref(ido, k, 2) - cc_ref(ido, k, 4)); + ch_ref(ido, 1, k) = tr1 + cc_ref(ido, k, 1); + ch_ref(ido, 3, k) = cc_ref(ido, k, 1) - tr1; + ch_ref(1, 2, k) = ti1 - cc_ref(ido, k, 3); + ch_ref(1, 4, k) = ti1 + cc_ref(ido, k, 3); + } +} /* radf4 */ + +#undef ch_ref +#undef cc_ref + + +static void radf5(integer ido, integer l1, const real *cc, real *ch, + const real *wa1, const real *wa2, const real *wa3, const real *wa4) +{ + /* Initialized data */ + + static const real tr11 = .309016994374947f; + static const real ti11 = .951056516295154f; + static const real tr12 = -.809016994374947f; + static const real ti12 = .587785252292473f; + + /* System generated locals */ + integer cc_offset, ch_offset; + + /* Local variables */ + integer i, k, ic; + real ci2, di2, ci4, ci5, di3, di4, di5, ci3, cr2, cr3, dr2, dr3, dr4, dr5, + cr5, cr4, ti2, ti3, ti5, ti4, tr2, tr3, tr4, tr5; + integer idp2; + + +#define cc_ref(a_1,a_2,a_3) cc[((a_3)*l1 + (a_2))*ido + a_1] +#define ch_ref(a_1,a_2,a_3) ch[((a_3)*5 + (a_2))*ido + a_1] + + /* Parameter adjustments */ + ch_offset = 1 + ido * 6; + ch -= ch_offset; + cc_offset = 1 + ido * (1 + l1); + cc -= cc_offset; + --wa1; + --wa2; + --wa3; + --wa4; + + /* Function Body */ + for (k = 1; k <= l1; ++k) { + cr2 = cc_ref(1, k, 5) + cc_ref(1, k, 2); + ci5 = cc_ref(1, k, 5) - cc_ref(1, k, 2); + cr3 = cc_ref(1, k, 4) + cc_ref(1, k, 3); + ci4 = cc_ref(1, k, 4) - cc_ref(1, k, 3); + ch_ref(1, 1, k) = cc_ref(1, k, 1) + cr2 + cr3; + ch_ref(ido, 2, k) = cc_ref(1, k, 1) + tr11 * cr2 + tr12 * cr3; + ch_ref(1, 3, k) = ti11 * ci5 + ti12 * ci4; + ch_ref(ido, 4, k) = cc_ref(1, k, 1) + tr12 * cr2 + tr11 * cr3; + ch_ref(1, 5, k) = ti12 * ci5 - ti11 * ci4; + } + if (ido == 1) { + return; + } + idp2 = ido + 2; + for (k = 1; k <= l1; ++k) { + for (i = 3; i <= ido; i += 2) { + ic = idp2 - i; + dr2 = wa1[i - 2] * cc_ref(i - 1, k, 2) + wa1[i - 1] * cc_ref(i, k, 2); + di2 = wa1[i - 2] * cc_ref(i, k, 2) - wa1[i - 1] * cc_ref(i - 1, k, 2); + dr3 = wa2[i - 2] * cc_ref(i - 1, k, 3) + wa2[i - 1] * cc_ref(i, k, 3); + di3 = wa2[i - 2] * cc_ref(i, k, 3) - wa2[i - 1] * cc_ref(i - 1, k, 3); + dr4 = wa3[i - 2] * cc_ref(i - 1, k, 4) + wa3[i - 1] * cc_ref(i, k, 4); + di4 = wa3[i - 2] * cc_ref(i, k, 4) - wa3[i - 1] * cc_ref(i - 1, k, 4); + dr5 = wa4[i - 2] * cc_ref(i - 1, k, 5) + wa4[i - 1] * cc_ref(i, k, 5); + di5 = wa4[i - 2] * cc_ref(i, k, 5) - wa4[i - 1] * cc_ref(i - 1, k, 5); + cr2 = dr2 + dr5; + ci5 = dr5 - dr2; + cr5 = di2 - di5; + ci2 = di2 + di5; + cr3 = dr3 + dr4; + ci4 = dr4 - dr3; + cr4 = di3 - di4; + ci3 = di3 + di4; + ch_ref(i - 1, 1, k) = cc_ref(i - 1, k, 1) + cr2 + cr3; + ch_ref(i, 1, k) = cc_ref(i, k, 1) + ci2 + ci3; + tr2 = cc_ref(i - 1, k, 1) + tr11 * cr2 + tr12 * cr3; + ti2 = cc_ref(i, k, 1) + tr11 * ci2 + tr12 * ci3; + tr3 = cc_ref(i - 1, k, 1) + tr12 * cr2 + tr11 * cr3; + ti3 = cc_ref(i, k, 1) + tr12 * ci2 + tr11 * ci3; + tr5 = ti11 * cr5 + ti12 * cr4; + ti5 = ti11 * ci5 + ti12 * ci4; + tr4 = ti12 * cr5 - ti11 * cr4; + ti4 = ti12 * ci5 - ti11 * ci4; + ch_ref(i - 1, 3, k) = tr2 + tr5; + ch_ref(ic - 1, 2, k) = tr2 - tr5; + ch_ref(i, 3, k) = ti2 + ti5; + ch_ref(ic, 2, k) = ti5 - ti2; + ch_ref(i - 1, 5, k) = tr3 + tr4; + ch_ref(ic - 1, 4, k) = tr3 - tr4; + ch_ref(i, 5, k) = ti3 + ti4; + ch_ref(ic, 4, k) = ti4 - ti3; + } + } +} /* radf5 */ + +#undef ch_ref +#undef cc_ref + + +static void radfg(integer ido, integer ip, integer l1, integer idl1, + real *cc, real *c1, real *c2, real *ch, real *ch2, const real *wa) +{ + /* System generated locals */ + integer ch_offset, cc_offset, + c1_offset, c2_offset, ch2_offset; + + /* Local variables */ + integer i, j, k, l, j2, ic, jc, lc, ik, is; + real dc2, ai1, ai2, ar1, ar2, ds2; + integer nbd; + real dcp, arg, dsp, ar1h, ar2h; + integer idp2, ipp2, idij, ipph; + + +#define c1_ref(a_1,a_2,a_3) c1[((a_3)*l1 + (a_2))*ido + a_1] +#define c2_ref(a_1,a_2) c2[(a_2)*idl1 + a_1] +#define cc_ref(a_1,a_2,a_3) cc[((a_3)*ip + (a_2))*ido + a_1] +#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] +#define ch2_ref(a_1,a_2) ch2[(a_2)*idl1 + a_1] + + /* Parameter adjustments */ + ch_offset = 1 + ido * (1 + l1); + ch -= ch_offset; + c1_offset = 1 + ido * (1 + l1); + c1 -= c1_offset; + cc_offset = 1 + ido * (1 + ip); + cc -= cc_offset; + ch2_offset = 1 + idl1; + ch2 -= ch2_offset; + c2_offset = 1 + idl1; + c2 -= c2_offset; + --wa; + + /* Function Body */ + arg = (2*M_PI) / (real) (ip); + dcp = cos(arg); + dsp = sin(arg); + ipph = (ip + 1) / 2; + ipp2 = ip + 2; + idp2 = ido + 2; + nbd = (ido - 1) / 2; + if (ido == 1) { + for (ik = 1; ik <= idl1; ++ik) { + c2_ref(ik, 1) = ch2_ref(ik, 1); + } + } else { + for (ik = 1; ik <= idl1; ++ik) { + ch2_ref(ik, 1) = c2_ref(ik, 1); + } + for (j = 2; j <= ip; ++j) { + for (k = 1; k <= l1; ++k) { + ch_ref(1, k, j) = c1_ref(1, k, j); + } + } + if (nbd <= l1) { + is = -(ido); + for (j = 2; j <= ip; ++j) { + is += ido; + idij = is; + for (i = 3; i <= ido; i += 2) { + idij += 2; + for (k = 1; k <= l1; ++k) { + ch_ref(i - 1, k, j) = wa[idij - 1] * c1_ref(i - 1, k, j) + + wa[idij] * c1_ref(i, k, j); + ch_ref(i, k, j) = wa[idij - 1] * c1_ref(i, k, j) - wa[ + idij] * c1_ref(i - 1, k, j); + } + } + } + } else { + is = -(ido); + for (j = 2; j <= ip; ++j) { + is += ido; + for (k = 1; k <= l1; ++k) { + idij = is; + for (i = 3; i <= ido; i += 2) { + idij += 2; + ch_ref(i - 1, k, j) = wa[idij - 1] * c1_ref(i - 1, k, j) + + wa[idij] * c1_ref(i, k, j); + ch_ref(i, k, j) = wa[idij - 1] * c1_ref(i, k, j) - wa[ + idij] * c1_ref(i - 1, k, j); + } + } + } + } + if (nbd >= l1) { + for (j = 2; j <= ipph; ++j) { + jc = ipp2 - j; + for (k = 1; k <= l1; ++k) { + for (i = 3; i <= ido; i += 2) { + c1_ref(i - 1, k, j) = ch_ref(i - 1, k, j) + ch_ref(i - + 1, k, jc); + c1_ref(i - 1, k, jc) = ch_ref(i, k, j) - ch_ref(i, k, + jc); + c1_ref(i, k, j) = ch_ref(i, k, j) + ch_ref(i, k, jc); + c1_ref(i, k, jc) = ch_ref(i - 1, k, jc) - ch_ref(i - 1, + k, j); + } + } + } + } else { + for (j = 2; j <= ipph; ++j) { + jc = ipp2 - j; + for (i = 3; i <= ido; i += 2) { + for (k = 1; k <= l1; ++k) { + c1_ref(i - 1, k, j) = ch_ref(i - 1, k, j) + ch_ref(i - + 1, k, jc); + c1_ref(i - 1, k, jc) = ch_ref(i, k, j) - ch_ref(i, k, + jc); + c1_ref(i, k, j) = ch_ref(i, k, j) + ch_ref(i, k, jc); + c1_ref(i, k, jc) = ch_ref(i - 1, k, jc) - ch_ref(i - 1, + k, j); + } + } + } + } + } + for (j = 2; j <= ipph; ++j) { + jc = ipp2 - j; + for (k = 1; k <= l1; ++k) { + c1_ref(1, k, j) = ch_ref(1, k, j) + ch_ref(1, k, jc); + c1_ref(1, k, jc) = ch_ref(1, k, jc) - ch_ref(1, k, j); + } + } + + ar1 = 1.f; + ai1 = 0.f; + for (l = 2; l <= ipph; ++l) { + lc = ipp2 - l; + ar1h = dcp * ar1 - dsp * ai1; + ai1 = dcp * ai1 + dsp * ar1; + ar1 = ar1h; + for (ik = 1; ik <= idl1; ++ik) { + ch2_ref(ik, l) = c2_ref(ik, 1) + ar1 * c2_ref(ik, 2); + ch2_ref(ik, lc) = ai1 * c2_ref(ik, ip); + } + dc2 = ar1; + ds2 = ai1; + ar2 = ar1; + ai2 = ai1; + for (j = 3; j <= ipph; ++j) { + jc = ipp2 - j; + ar2h = dc2 * ar2 - ds2 * ai2; + ai2 = dc2 * ai2 + ds2 * ar2; + ar2 = ar2h; + for (ik = 1; ik <= idl1; ++ik) { + ch2_ref(ik, l) = ch2_ref(ik, l) + ar2 * c2_ref(ik, j); + ch2_ref(ik, lc) = ch2_ref(ik, lc) + ai2 * c2_ref(ik, jc); + } + } + } + for (j = 2; j <= ipph; ++j) { + for (ik = 1; ik <= idl1; ++ik) { + ch2_ref(ik, 1) = ch2_ref(ik, 1) + c2_ref(ik, j); + } + } + + if (ido >= l1) { + for (k = 1; k <= l1; ++k) { + for (i = 1; i <= ido; ++i) { + cc_ref(i, 1, k) = ch_ref(i, k, 1); + } + } + } else { + for (i = 1; i <= ido; ++i) { + for (k = 1; k <= l1; ++k) { + cc_ref(i, 1, k) = ch_ref(i, k, 1); + } + } + } + for (j = 2; j <= ipph; ++j) { + jc = ipp2 - j; + j2 = j + j; + for (k = 1; k <= l1; ++k) { + cc_ref(ido, j2 - 2, k) = ch_ref(1, k, j); + cc_ref(1, j2 - 1, k) = ch_ref(1, k, jc); + } + } + if (ido == 1) { + return; + } + if (nbd >= l1) { + for (j = 2; j <= ipph; ++j) { + jc = ipp2 - j; + j2 = j + j; + for (k = 1; k <= l1; ++k) { + for (i = 3; i <= ido; i += 2) { + ic = idp2 - i; + cc_ref(i - 1, j2 - 1, k) = ch_ref(i - 1, k, j) + ch_ref( + i - 1, k, jc); + cc_ref(ic - 1, j2 - 2, k) = ch_ref(i - 1, k, j) - ch_ref( + i - 1, k, jc); + cc_ref(i, j2 - 1, k) = ch_ref(i, k, j) + ch_ref(i, k, + jc); + cc_ref(ic, j2 - 2, k) = ch_ref(i, k, jc) - ch_ref(i, k, j) + ; + } + } + } + } else { + for (j = 2; j <= ipph; ++j) { + jc = ipp2 - j; + j2 = j + j; + for (i = 3; i <= ido; i += 2) { + ic = idp2 - i; + for (k = 1; k <= l1; ++k) { + cc_ref(i - 1, j2 - 1, k) = ch_ref(i - 1, k, j) + ch_ref( + i - 1, k, jc); + cc_ref(ic - 1, j2 - 2, k) = ch_ref(i - 1, k, j) - ch_ref( + i - 1, k, jc); + cc_ref(i, j2 - 1, k) = ch_ref(i, k, j) + ch_ref(i, k, + jc); + cc_ref(ic, j2 - 2, k) = ch_ref(i, k, jc) - ch_ref(i, k, j) + ; + } + } + } + } +} /* radfg */ + +#undef ch2_ref +#undef ch_ref +#undef cc_ref +#undef c2_ref +#undef c1_ref + + +static void cfftb1(integer n, real *c, real *ch, const real *wa, integer *ifac) +{ + integer i, k1, l1, l2, na, nf, ip, iw, ix2, ix3, ix4, nac, ido, + idl1, idot; + + /* Function Body */ + nf = ifac[1]; + na = 0; + l1 = 1; + iw = 0; + for (k1 = 1; k1 <= nf; ++k1) { + ip = ifac[k1 + 1]; + l2 = ip * l1; + ido = n / l2; + idot = ido + ido; + idl1 = idot * l1; + switch (ip) { + case 4: + ix2 = iw + idot; + ix3 = ix2 + idot; + passb4(idot, l1, na?ch:c, na?c:ch, &wa[iw], &wa[ix2], &wa[ix3]); + na = 1 - na; + break; + case 2: + passb2(idot, l1, na?ch:c, na?c:ch, &wa[iw]); + na = 1 - na; + break; + case 3: + ix2 = iw + idot; + passb3(idot, l1, na?ch:c, na?c:ch, &wa[iw], &wa[ix2]); + na = 1 - na; + break; + case 5: + ix2 = iw + idot; + ix3 = ix2 + idot; + ix4 = ix3 + idot; + passfb5(idot, l1, na?ch:c, na?c:ch, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], +1); + na = 1 - na; + break; + default: + if (na == 0) { + passfb(&nac, idot, ip, l1, idl1, c, c, c, ch, ch, &wa[iw], +1); + } else { + passfb(&nac, idot, ip, l1, idl1, ch, ch, ch, c, c, &wa[iw], +1); + } + if (nac != 0) { + na = 1 - na; + } + break; + } + l1 = l2; + iw += (ip - 1) * idot; + } + if (na == 0) { + return; + } + for (i = 0; i < 2*n; ++i) { + c[i] = ch[i]; + } +} /* cfftb1 */ + +void cfftb(integer n, real *c, real *wsave) +{ + integer iw1, iw2; + + /* Parameter adjustments */ + --wsave; + --c; + + /* Function Body */ + if (n == 1) { + return; + } + iw1 = 2*n + 1; + iw2 = iw1 + 2*n; + cfftb1(n, &c[1], &wsave[1], &wsave[iw1], (int*)&wsave[iw2]); +} /* cfftb */ + +static void cfftf1(integer n, real *c, real *ch, const real *wa, integer *ifac) +{ + /* Local variables */ + integer i, k1, l1, l2, na, nf, ip, iw, ix2, ix3, ix4, nac, ido, + idl1, idot; + + /* Function Body */ + nf = ifac[1]; + na = 0; + l1 = 1; + iw = 0; + for (k1 = 1; k1 <= nf; ++k1) { + ip = ifac[k1 + 1]; + l2 = ip * l1; + ido = n / l2; + idot = ido + ido; + idl1 = idot * l1; + switch (ip) { + case 4: + ix2 = iw + idot; + ix3 = ix2 + idot; + passf4(idot, l1, na?ch:c, na?c:ch, &wa[iw], &wa[ix2], &wa[ix3]); + na = 1 - na; + break; + case 2: + passf2(idot, l1, na?ch:c, na?c:ch, &wa[iw]); + na = 1 - na; + break; + case 3: + ix2 = iw + idot; + passf3(idot, l1, na?ch:c, na?c:ch, &wa[iw], &wa[ix2]); + na = 1 - na; + break; + case 5: + ix2 = iw + idot; + ix3 = ix2 + idot; + ix4 = ix3 + idot; + passfb5(idot, l1, na?ch:c, na?c:ch, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], -1); + na = 1 - na; + break; + default: + if (na == 0) { + passfb(&nac, idot, ip, l1, idl1, c, c, c, ch, ch, &wa[iw], -1); + } else { + passfb(&nac, idot, ip, l1, idl1, ch, ch, ch, c, c, &wa[iw], -1); + } + if (nac != 0) { + na = 1 - na; + } + break; + } + l1 = l2; + iw += (ip - 1)*idot; + } + if (na == 0) { + return; + } + for (i = 0; i < 2*n; ++i) { + c[i] = ch[i]; + } +} /* cfftf1 */ + +void cfftf(integer n, real *c, real *wsave) +{ + integer iw1, iw2; + + /* Parameter adjustments */ + --wsave; + --c; + + /* Function Body */ + if (n == 1) { + return; + } + iw1 = 2*n + 1; + iw2 = iw1 + 2*n; + cfftf1(n, &c[1], &wsave[1], &wsave[iw1], (int*)&wsave[iw2]); +} /* cfftf */ + +static int decompose(integer n, integer *ifac, integer ntryh[4]) { + integer ntry=0, nl = n, nf = 0, nq, nr, i, j = 0; + do { + if (j < 4) { + ntry = ntryh[j]; + } else { + ntry += 2; + } + ++j; + L104: + nq = nl / ntry; + nr = nl - ntry * nq; + if (nr != 0) continue; + ++nf; + ifac[nf + 2] = ntry; + nl = nq; + if (ntry == 2 && nf != 1) { + for (i = 2; i <= nf; ++i) { + integer ib = nf - i + 2; + ifac[ib + 2] = ifac[ib + 1]; + } + ifac[3] = 2; + } + if (nl != 1) { + goto L104; + } + } while (nl != 1); + ifac[1] = n; + ifac[2] = nf; + return nf; +} + +static void cffti1(integer n, real *wa, integer *ifac) +{ + static integer ntryh[4] = { 3,4,2,5 }; + + /* Local variables */ + integer i, j, i1, k1, l1, l2; + real fi; + integer ld, ii, nf, ip; + real arg; + integer ido, ipm; + real argh; + integer idot; + real argld; + + /* Parameter adjustments */ + --ifac; + --wa; + + nf = decompose(n, ifac, ntryh); + + argh = (2*M_PI) / (real) (n); + i = 2; + l1 = 1; + for (k1 = 1; k1 <= nf; ++k1) { + ip = ifac[k1 + 2]; + ld = 0; + l2 = l1 * ip; + ido = n / l2; + idot = ido + ido + 2; + ipm = ip - 1; + for (j = 1; j <= ipm; ++j) { + i1 = i; + wa[i - 1] = 1.f; + wa[i] = 0.f; + ld += l1; + fi = 0.f; + argld = (real) ld * argh; + for (ii = 4; ii <= idot; ii += 2) { + i += 2; + fi += 1.f; + arg = fi * argld; + wa[i - 1] = cos(arg); + wa[i] = sin(arg); + } + if (ip > 5) { + wa[i1 - 1] = wa[i - 1]; + wa[i1] = wa[i]; + }; + } + l1 = l2; + } +} /* cffti1 */ + +void cffti(integer n, real *wsave) +{ + integer iw1, iw2; + /* Parameter adjustments */ + --wsave; + + /* Function Body */ + if (n == 1) { + return; + } + iw1 = 2*n + 1; + iw2 = iw1 + 2*n; + cffti1(n, &wsave[iw1], (int*)&wsave[iw2]); + return; +} /* cffti */ + +static void rfftb1(integer n, real *c, real *ch, const real *wa, integer *ifac) +{ + /* Local variables */ + integer i, k1, l1, l2, na, nf, ip, iw, ix2, ix3, ix4, ido, idl1; + + /* Function Body */ + nf = ifac[1]; + na = 0; + l1 = 1; + iw = 0; + for (k1 = 1; k1 <= nf; ++k1) { + ip = ifac[k1 + 1]; + l2 = ip * l1; + ido = n / l2; + idl1 = ido * l1; + switch (ip) { + case 4: + ix2 = iw + ido; + ix3 = ix2 + ido; + radb4(ido, l1, na?ch:c, na?c:ch, &wa[iw], &wa[ix2], &wa[ix3]); + na = 1 - na; + break; + case 2: + radb2(ido, l1, na?ch:c, na?c:ch, &wa[iw]); + na = 1 - na; + break; + case 3: + ix2 = iw + ido; + radb3(ido, l1, na?ch:c, na?c:ch, &wa[iw], &wa[ix2]); + na = 1 - na; + break; + case 5: + ix2 = iw + ido; + ix3 = ix2 + ido; + ix4 = ix3 + ido; + radb5(ido, l1, na?ch:c, na?c:ch, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4]); + na = 1 - na; + break; + default: + if (na == 0) { + radbg(ido, ip, l1, idl1, c, c, c, ch, ch, &wa[iw]); + } else { + radbg(ido, ip, l1, idl1, ch, ch, ch, c, c, &wa[iw]); + } + if (ido == 1) { + na = 1 - na; + } + break; + } + l1 = l2; + iw += (ip - 1) * ido; + } + if (na == 0) { + return; + } + for (i = 0; i < n; ++i) { + c[i] = ch[i]; + } +} /* rfftb1 */ + +static void rfftf1(integer n, real *c, real *ch, const real *wa, integer *ifac) +{ + /* Local variables */ + integer i, k1, l1, l2, na, kh, nf, ip, iw, ix2, ix3, ix4, ido, idl1; + + /* Function Body */ + nf = ifac[1]; + na = 1; + l2 = n; + iw = n-1; + for (k1 = 1; k1 <= nf; ++k1) { + kh = nf - k1; + ip = ifac[kh + 2]; + l1 = l2 / ip; + ido = n / l2; + idl1 = ido * l1; + iw -= (ip - 1) * ido; + na = 1 - na; + switch (ip) { + case 4: + ix2 = iw + ido; + ix3 = ix2 + ido; + radf4(ido, l1, na ? ch : c, na ? c : ch, &wa[iw], &wa[ix2], &wa[ix3]); + break; + case 2: + radf2(ido, l1, na ? ch : c, na ? c : ch, &wa[iw]); + break; + case 3: + ix2 = iw + ido; + radf3(ido, l1, na ? ch : c, na ? c : ch, &wa[iw], &wa[ix2]); + break; + case 5: + ix2 = iw + ido; + ix3 = ix2 + ido; + ix4 = ix3 + ido; + radf5(ido, l1, na ? ch : c, na ? c : ch, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4]); + break; + default: + if (ido == 1) { + na = 1 - na; + } + if (na == 0) { + radfg(ido, ip, l1, idl1, c, c, c, ch, ch, &wa[iw]); + na = 1; + } else { + radfg(ido, ip, l1, idl1, ch, ch, ch, c, c, &wa[iw]); + na = 0; + } + break; + } + l2 = l1; + } + if (na == 1) { + return; + } + for (i = 0; i < n; ++i) { + c[i] = ch[i]; + } +} + +void rfftb(integer n, real *r, real *wsave) +{ + + /* Parameter adjustments */ + --wsave; + --r; + + /* Function Body */ + if (n == 1) { + return; + } + rfftb1(n, &r[1], &wsave[1], &wsave[n + 1], (int*)&wsave[(n << 1) + 1]); +} /* rfftb */ + +static void rffti1(integer n, real *wa, integer *ifac) +{ + static integer ntryh[4] = { 4,2,3,5 }; + + /* Local variables */ + integer i, j, k1, l1, l2; + real fi; + integer ld, ii, nf, ip, is; + real arg; + integer ido, ipm; + integer nfm1; + real argh; + real argld; + + /* Parameter adjustments */ + --ifac; + --wa; + + nf = decompose(n, ifac, ntryh); + + argh = (2*M_PI) / (real) (n); + is = 0; + nfm1 = nf - 1; + l1 = 1; + if (nfm1 == 0) { + return; + } + for (k1 = 1; k1 <= nfm1; ++k1) { + ip = ifac[k1 + 2]; + ld = 0; + l2 = l1 * ip; + ido = n / l2; + ipm = ip - 1; + for (j = 1; j <= ipm; ++j) { + ld += l1; + i = is; + argld = (real) ld * argh; + fi = 0.f; + for (ii = 3; ii <= ido; ii += 2) { + i += 2; + fi += 1.f; + arg = fi * argld; + wa[i - 1] = cos(arg); + wa[i] = sin(arg); + } + is += ido; + } + l1 = l2; + } +} /* rffti1 */ + +void rfftf(integer n, real *r, real *wsave) +{ + + /* Parameter adjustments */ + --wsave; + --r; + + /* Function Body */ + if (n == 1) { + return; + } + rfftf1(n, &r[1], &wsave[1], &wsave[n + 1], (int*)&wsave[(n << 1) + 1]); +} /* rfftf */ + +void rffti(integer n, real *wsave) +{ + /* Parameter adjustments */ + --wsave; + + /* Function Body */ + if (n == 1) { + return; + } + rffti1(n, &wsave[n + 1], (int*)&wsave[(n << 1) + 1]); + return; +} /* rffti */ + +static void cosqb1(integer n, real *x, real *w, real *xh) +{ + /* Local variables */ + integer i, k, kc, np2, ns2; + real xim1; + integer modn; + + /* Parameter adjustments */ + --xh; + --w; + --x; + + /* Function Body */ + ns2 = (n + 1) / 2; + np2 = n + 2; + for (i = 3; i <= n; i += 2) { + xim1 = x[i - 1] + x[i]; + x[i] -= x[i - 1]; + x[i - 1] = xim1; + } + x[1] += x[1]; + modn = n % 2; + if (modn == 0) { + x[n] += x[n]; + } + rfftb(n, &x[1], &xh[1]); + for (k = 2; k <= ns2; ++k) { + kc = np2 - k; + xh[k] = w[k - 1] * x[kc] + w[kc - 1] * x[k]; + xh[kc] = w[k - 1] * x[k] - w[kc - 1] * x[kc]; + } + if (modn == 0) { + x[ns2 + 1] = w[ns2] * (x[ns2 + 1] + x[ns2 + 1]); + } + for (k = 2; k <= ns2; ++k) { + kc = np2 - k; + x[k] = xh[k] + xh[kc]; + x[kc] = xh[k] - xh[kc]; + } + x[1] += x[1]; +} /* cosqb1 */ + +void cosqb(integer n, real *x, real *wsave) +{ + static const real tsqrt2 = 2.82842712474619f; + + /* Local variables */ + real x1; + + /* Parameter adjustments */ + --wsave; + --x; + + if (n < 2) { + x[1] *= 4.f; + } else if (n == 2) { + x1 = (x[1] + x[2]) * 4.f; + x[2] = tsqrt2 * (x[1] - x[2]); + x[1] = x1; + } else { + cosqb1(n, &x[1], &wsave[1], &wsave[n + 1]); + } +} /* cosqb */ + +static void cosqf1(integer n, real *x, real *w, real *xh) +{ + /* Local variables */ + integer i, k, kc, np2, ns2; + real xim1; + integer modn; + + /* Parameter adjustments */ + --xh; + --w; + --x; + + /* Function Body */ + ns2 = (n + 1) / 2; + np2 = n + 2; + for (k = 2; k <= ns2; ++k) { + kc = np2 - k; + xh[k] = x[k] + x[kc]; + xh[kc] = x[k] - x[kc]; + } + modn = n % 2; + if (modn == 0) { + xh[ns2 + 1] = x[ns2 + 1] + x[ns2 + 1]; + } + for (k = 2; k <= ns2; ++k) { + kc = np2 - k; + x[k] = w[k - 1] * xh[kc] + w[kc - 1] * xh[k]; + x[kc] = w[k - 1] * xh[k] - w[kc - 1] * xh[kc]; + } + if (modn == 0) { + x[ns2 + 1] = w[ns2] * xh[ns2 + 1]; + } + rfftf(n, &x[1], &xh[1]); + for (i = 3; i <= n; i += 2) { + xim1 = x[i - 1] - x[i]; + x[i] = x[i - 1] + x[i]; + x[i - 1] = xim1; + } +} /* cosqf1 */ + +void cosqf(integer n, real *x, real *wsave) +{ + static const real sqrt2 = 1.4142135623731f; + + /* Local variables */ + real tsqx; + + /* Parameter adjustments */ + --wsave; + --x; + + if (n == 2) { + tsqx = sqrt2 * x[2]; + x[2] = x[1] - tsqx; + x[1] += tsqx; + } else if (n > 2) { + cosqf1(n, &x[1], &wsave[1], &wsave[n + 1]); + } +} /* cosqf */ + +void cosqi(integer n, real *wsave) +{ + /* Local variables */ + integer k; + real fk, dt; + + /* Parameter adjustments */ + --wsave; + + dt = M_PI/2 / (real) (n); + fk = 0.f; + for (k = 1; k <= n; ++k) { + fk += 1.f; + wsave[k] = cos(fk * dt); + } + rffti(n, &wsave[n + 1]); +} /* cosqi */ + +void cost(integer n, real *x, real *wsave) +{ + /* Local variables */ + integer i, k; + real c1, t1, t2; + integer kc; + real xi; + integer nm1, np1; + real x1h; + integer ns2; + real tx2, x1p3, xim2; + integer modn; + + /* Parameter adjustments */ + --wsave; + --x; + + /* Function Body */ + nm1 = n - 1; + np1 = n + 1; + ns2 = n / 2; + if (n < 2) { + } else if (n == 2) { + x1h = x[1] + x[2]; + x[2] = x[1] - x[2]; + x[1] = x1h; + } else if (n == 3) { + x1p3 = x[1] + x[3]; + tx2 = x[2] + x[2]; + x[2] = x[1] - x[3]; + x[1] = x1p3 + tx2; + x[3] = x1p3 - tx2; + } else { + c1 = x[1] - x[n]; + x[1] += x[n]; + for (k = 2; k <= ns2; ++k) { + kc = np1 - k; + t1 = x[k] + x[kc]; + t2 = x[k] - x[kc]; + c1 += wsave[kc] * t2; + t2 = wsave[k] * t2; + x[k] = t1 - t2; + x[kc] = t1 + t2; + } + modn = n % 2; + if (modn != 0) { + x[ns2 + 1] += x[ns2 + 1]; + } + rfftf(nm1, &x[1], &wsave[n + 1]); + xim2 = x[2]; + x[2] = c1; + for (i = 4; i <= n; i += 2) { + xi = x[i]; + x[i] = x[i - 2] - x[i - 1]; + x[i - 1] = xim2; + xim2 = xi; + } + if (modn != 0) { + x[n] = xim2; + } + } +} /* cost */ + +void costi(integer n, real *wsave) +{ + /* Initialized data */ + + /* Local variables */ + integer k, kc; + real fk, dt; + integer nm1, np1, ns2; + + /* Parameter adjustments */ + --wsave; + + /* Function Body */ + if (n <= 3) { + return; + } + nm1 = n - 1; + np1 = n + 1; + ns2 = n / 2; + dt = M_PI / (real) nm1; + fk = 0.f; + for (k = 2; k <= ns2; ++k) { + kc = np1 - k; + fk += 1.f; + wsave[k] = sin(fk * dt) * 2.f; + wsave[kc] = cos(fk * dt) * 2.f; + } + rffti(nm1, &wsave[n + 1]); +} /* costi */ + +void sinqb(integer n, real *x, real *wsave) +{ + /* Local variables */ + integer k, kc, ns2; + real xhold; + + /* Parameter adjustments */ + --wsave; + --x; + + /* Function Body */ + if (n <= 1) { + x[1] *= 4.f; + return; + } + ns2 = n / 2; + for (k = 2; k <= n; k += 2) { + x[k] = -x[k]; + } + cosqb(n, &x[1], &wsave[1]); + for (k = 1; k <= ns2; ++k) { + kc = n - k; + xhold = x[k]; + x[k] = x[kc + 1]; + x[kc + 1] = xhold; + } +} /* sinqb */ + +void sinqf(integer n, real *x, real *wsave) +{ + /* Local variables */ + integer k, kc, ns2; + real xhold; + + /* Parameter adjustments */ + --wsave; + --x; + + /* Function Body */ + if (n == 1) { + return; + } + ns2 = n / 2; + for (k = 1; k <= ns2; ++k) { + kc = n - k; + xhold = x[k]; + x[k] = x[kc + 1]; + x[kc + 1] = xhold; + } + cosqf(n, &x[1], &wsave[1]); + for (k = 2; k <= n; k += 2) { + x[k] = -x[k]; + } +} /* sinqf */ + +void sinqi(integer n, real *wsave) +{ + + /* Parameter adjustments */ + --wsave; + + /* Function Body */ + cosqi(n, &wsave[1]); +} /* sinqi */ + +static void sint1(integer n, real *war, real *was, real *xh, real * + x, integer *ifac) +{ + /* Initialized data */ + + static const real sqrt3 = 1.73205080756888f; + + /* Local variables */ + integer i, k; + real t1, t2; + integer kc, np1, ns2, modn; + real xhold; + + /* Parameter adjustments */ + --ifac; + --x; + --xh; + --was; + --war; + + /* Function Body */ + for (i = 1; i <= n; ++i) { + xh[i] = war[i]; + war[i] = x[i]; + } + + if (n < 2) { + xh[1] += xh[1]; + } else if (n == 2) { + xhold = sqrt3 * (xh[1] + xh[2]); + xh[2] = sqrt3 * (xh[1] - xh[2]); + xh[1] = xhold; + } else { + np1 = n + 1; + ns2 = n / 2; + x[1] = 0.f; + for (k = 1; k <= ns2; ++k) { + kc = np1 - k; + t1 = xh[k] - xh[kc]; + t2 = was[k] * (xh[k] + xh[kc]); + x[k + 1] = t1 + t2; + x[kc + 1] = t2 - t1; + } + modn = n % 2; + if (modn != 0) { + x[ns2 + 2] = xh[ns2 + 1] * 4.f; + } + rfftf1(np1, &x[1], &xh[1], &war[1], &ifac[1]); + xh[1] = x[1] * .5f; + for (i = 3; i <= n; i += 2) { + xh[i - 1] = -x[i]; + xh[i] = xh[i - 2] + x[i - 1]; + } + if (modn == 0) { + xh[n] = -x[n + 1]; + } + } + for (i = 1; i <= n; ++i) { + x[i] = war[i]; + war[i] = xh[i]; + } +} /* sint1 */ + +void sinti(integer n, real *wsave) +{ + /* Local variables */ + integer k; + real dt; + integer np1, ns2; + + /* Parameter adjustments */ + --wsave; + + /* Function Body */ + if (n <= 1) { + return; + } + ns2 = n / 2; + np1 = n + 1; + dt = M_PI / (real) np1; + for (k = 1; k <= ns2; ++k) { + wsave[k] = sin(k * dt) * 2.f; + } + rffti(np1, &wsave[ns2 + 1]); +} /* sinti */ + +void sint(integer n, real *x, real *wsave) +{ + integer np1, iw1, iw2, iw3; + + /* Parameter adjustments */ + --wsave; + --x; + + /* Function Body */ + np1 = n + 1; + iw1 = n / 2 + 1; + iw2 = iw1 + np1; + iw3 = iw2 + np1; + sint1(n, &x[1], &wsave[1], &wsave[iw1], &wsave[iw2], (int*)&wsave[iw3]); +} /* sint */ + +#ifdef TESTING_FFTPACK +#include + +int main(void) +{ + static integer nd[] = { 120,91,54,49,32,28,24,8,4,3,2 }; + + /* System generated locals */ + real r1, r2, r3; + f77complex q1, q2, q3; + + /* Local variables */ + integer i, j, k, n; + real w[2000], x[200], y[200], cf, fn, dt; + f77complex cx[200], cy[200]; + real xh[200]; + integer nz, nm1, np1, ns2; + real arg, tfn; + real sum, arg1, arg2; + real sum1, sum2, dcfb; + integer modn; + real rftb, rftf; + real sqrt2; + real rftfb; + real costt, sintt, dcfftb, dcfftf, cosqfb, costfb; + real sinqfb; + real sintfb; + real cosqbt, cosqft, sinqbt, sinqft; + + + + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + + /* VERSION 4 APRIL 1985 */ + + /* A TEST DRIVER FOR */ + /* A PACKAGE OF FORTRAN SUBPROGRAMS FOR THE FAST FOURIER */ + /* TRANSFORM OF PERIODIC AND OTHER SYMMETRIC SEQUENCES */ + + /* BY */ + + /* PAUL N SWARZTRAUBER */ + + /* NATIONAL CENTER FOR ATMOSPHERIC RESEARCH BOULDER,COLORADO 80307 */ + + /* WHICH IS SPONSORED BY THE NATIONAL SCIENCE FOUNDATION */ + + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + + + /* THIS PROGRAM TESTS THE PACKAGE OF FAST FOURIER */ + /* TRANSFORMS FOR BOTH COMPLEX AND REAL PERIODIC SEQUENCES AND */ + /* CERTIAN OTHER SYMMETRIC SEQUENCES THAT ARE LISTED BELOW. */ + + /* 1. RFFTI INITIALIZE RFFTF AND RFFTB */ + /* 2. RFFTF FORWARD TRANSFORM OF A REAL PERIODIC SEQUENCE */ + /* 3. RFFTB BACKWARD TRANSFORM OF A REAL COEFFICIENT ARRAY */ + + /* 4. EZFFTI INITIALIZE EZFFTF AND EZFFTB */ + /* 5. EZFFTF A SIMPLIFIED REAL PERIODIC FORWARD TRANSFORM */ + /* 6. EZFFTB A SIMPLIFIED REAL PERIODIC BACKWARD TRANSFORM */ + + /* 7. SINTI INITIALIZE SINT */ + /* 8. SINT SINE TRANSFORM OF A REAL ODD SEQUENCE */ + + /* 9. COSTI INITIALIZE COST */ + /* 10. COST COSINE TRANSFORM OF A REAL EVEN SEQUENCE */ + + /* 11. SINQI INITIALIZE SINQF AND SINQB */ + /* 12. SINQF FORWARD SINE TRANSFORM WITH ODD WAVE NUMBERS */ + /* 13. SINQB UNNORMALIZED INVERSE OF SINQF */ + + /* 14. COSQI INITIALIZE COSQF AND COSQB */ + /* 15. COSQF FORWARD COSINE TRANSFORM WITH ODD WAVE NUMBERS */ + /* 16. COSQB UNNORMALIZED INVERSE OF COSQF */ + + /* 17. CFFTI INITIALIZE CFFTF AND CFFTB */ + /* 18. CFFTF FORWARD TRANSFORM OF A COMPLEX PERIODIC SEQUENCE */ + /* 19. CFFTB UNNORMALIZED INVERSE OF CFFTF */ + + + sqrt2 = sqrt(2.f); + int all_ok = 1; + for (nz = 1; nz <= (int)(sizeof nd/sizeof nd[0]); ++nz) { + n = nd[nz - 1]; + modn = n % 2; + fn = (real) n; + tfn = fn + fn; + np1 = n + 1; + nm1 = n - 1; + for (j = 1; j <= np1; ++j) { + x[j - 1] = sin((real) j * sqrt2); + y[j - 1] = x[j - 1]; + xh[j - 1] = x[j - 1]; + } + + /* TEST SUBROUTINES RFFTI,RFFTF AND RFFTB */ + + rffti(n, w); + dt = (2*M_PI) / fn; + ns2 = (n + 1) / 2; + if (ns2 < 2) { + goto L104; + } + for (k = 2; k <= ns2; ++k) { + sum1 = 0.f; + sum2 = 0.f; + arg = (real) (k - 1) * dt; + for (i = 1; i <= n; ++i) { + arg1 = (real) (i - 1) * arg; + sum1 += x[i - 1] * cos(arg1); + sum2 += x[i - 1] * sin(arg1); + } + y[(k << 1) - 3] = sum1; + y[(k << 1) - 2] = -sum2; + } + L104: + sum1 = 0.f; + sum2 = 0.f; + for (i = 1; i <= nm1; i += 2) { + sum1 += x[i - 1]; + sum2 += x[i]; + } + if (modn == 1) { + sum1 += x[n - 1]; + } + y[0] = sum1 + sum2; + if (modn == 0) { + y[n - 1] = sum1 - sum2; + } + rfftf(n, x, w); + rftf = 0.f; + for (i = 1; i <= n; ++i) { + /* Computing MAX */ + r2 = rftf, r3 = (r1 = x[i - 1] - y[i - 1], fabs(r1)); + rftf = dmax(r2,r3); + x[i - 1] = xh[i - 1]; + } + rftf /= fn; + for (i = 1; i <= n; ++i) { + sum = x[0] * .5f; + arg = (real) (i - 1) * dt; + if (ns2 < 2) { + goto L108; + } + for (k = 2; k <= ns2; ++k) { + arg1 = (real) (k - 1) * arg; + sum = sum + x[(k << 1) - 3] * cos(arg1) - x[(k << 1) - 2] * + sin(arg1); + } + L108: + if (modn == 0) { + sum += (real)pow(-1, i-1) * .5f * x[n - 1]; + } + y[i - 1] = sum + sum; + } + rfftb(n, x, w); + rftb = 0.f; + for (i = 1; i <= n; ++i) { + /* Computing MAX */ + r2 = rftb, r3 = (r1 = x[i - 1] - y[i - 1], fabs(r1)); + rftb = dmax(r2,r3); + x[i - 1] = xh[i - 1]; + y[i - 1] = xh[i - 1]; + } + rfftb(n, y, w); + rfftf(n, y, w); + cf = 1.f / fn; + rftfb = 0.f; + for (i = 1; i <= n; ++i) { + /* Computing MAX */ + r2 = rftfb, r3 = (r1 = cf * y[i - 1] - x[i - 1], fabs( + r1)); + rftfb = dmax(r2,r3); + } + + /* TEST SUBROUTINES SINTI AND SINT */ + + dt = M_PI / fn; + for (i = 1; i <= nm1; ++i) { + x[i - 1] = xh[i - 1]; + } + for (i = 1; i <= nm1; ++i) { + y[i - 1] = 0.f; + arg1 = (real) i * dt; + for (k = 1; k <= nm1; ++k) { + y[i - 1] += x[k - 1] * sin((real) k * arg1); + } + y[i - 1] += y[i - 1]; + } + sinti(nm1, w); + sint(nm1, x, w); + cf = .5f / fn; + sintt = 0.f; + for (i = 1; i <= nm1; ++i) { + /* Computing MAX */ + r2 = sintt, r3 = (r1 = x[i - 1] - y[i - 1], fabs(r1)); + sintt = dmax(r2,r3); + x[i - 1] = xh[i - 1]; + y[i - 1] = x[i - 1]; + } + sintt = cf * sintt; + sint(nm1, x, w); + sint(nm1, x, w); + sintfb = 0.f; + for (i = 1; i <= nm1; ++i) { + /* Computing MAX */ + r2 = sintfb, r3 = (r1 = cf * x[i - 1] - y[i - 1], fabs( + r1)); + sintfb = dmax(r2,r3); + } + + /* TEST SUBROUTINES COSTI AND COST */ + + for (i = 1; i <= np1; ++i) { + x[i - 1] = xh[i - 1]; + } + for (i = 1; i <= np1; ++i) { + y[i - 1] = (x[0] + (real) pow(-1, i+1) * x[n]) * .5f; + arg = (real) (i - 1) * dt; + for (k = 2; k <= n; ++k) { + y[i - 1] += x[k - 1] * cos((real) (k - 1) * arg); + } + y[i - 1] += y[i - 1]; + } + costi(np1, w); + cost(np1, x, w); + costt = 0.f; + for (i = 1; i <= np1; ++i) { + /* Computing MAX */ + r2 = costt, r3 = (r1 = x[i - 1] - y[i - 1], fabs(r1)); + costt = dmax(r2,r3); + x[i - 1] = xh[i - 1]; + y[i - 1] = xh[i - 1]; + } + costt = cf * costt; + cost(np1, x, w); + cost(np1, x, w); + costfb = 0.f; + for (i = 1; i <= np1; ++i) { + /* Computing MAX */ + r2 = costfb, r3 = (r1 = cf * x[i - 1] - y[i - 1], fabs( + r1)); + costfb = dmax(r2,r3); + } + + /* TEST SUBROUTINES SINQI,SINQF AND SINQB */ + + cf = .25f / fn; + for (i = 1; i <= n; ++i) { + y[i - 1] = xh[i - 1]; + } + dt = M_PI / (fn + fn); + for (i = 1; i <= n; ++i) { + x[i - 1] = 0.f; + arg = dt * (real) i; + for (k = 1; k <= n; ++k) { + x[i - 1] += y[k - 1] * sin((real) (k + k - 1) * arg); + } + x[i - 1] *= 4.f; + } + sinqi(n, w); + sinqb(n, y, w); + sinqbt = 0.f; + for (i = 1; i <= n; ++i) { + /* Computing MAX */ + r2 = sinqbt, r3 = (r1 = y[i - 1] - x[i - 1], fabs(r1)) + ; + sinqbt = dmax(r2,r3); + x[i - 1] = xh[i - 1]; + } + sinqbt = cf * sinqbt; + for (i = 1; i <= n; ++i) { + arg = (real) (i + i - 1) * dt; + y[i - 1] = (real) pow(-1, i+1) * .5f * x[n - 1]; + for (k = 1; k <= nm1; ++k) { + y[i - 1] += x[k - 1] * sin((real) k * arg); + } + y[i - 1] += y[i - 1]; + } + sinqf(n, x, w); + sinqft = 0.f; + for (i = 1; i <= n; ++i) { + /* Computing MAX */ + r2 = sinqft, r3 = (r1 = x[i - 1] - y[i - 1], fabs(r1)) + ; + sinqft = dmax(r2,r3); + y[i - 1] = xh[i - 1]; + x[i - 1] = xh[i - 1]; + } + sinqf(n, y, w); + sinqb(n, y, w); + sinqfb = 0.f; + for (i = 1; i <= n; ++i) { + /* Computing MAX */ + r2 = sinqfb, r3 = (r1 = cf * y[i - 1] - x[i - 1], fabs( + r1)); + sinqfb = dmax(r2,r3); + } + + /* TEST SUBROUTINES COSQI,COSQF AND COSQB */ + + for (i = 1; i <= n; ++i) { + y[i - 1] = xh[i - 1]; + } + for (i = 1; i <= n; ++i) { + x[i - 1] = 0.f; + arg = (real) (i - 1) * dt; + for (k = 1; k <= n; ++k) { + x[i - 1] += y[k - 1] * cos((real) (k + k - 1) * arg); + } + x[i - 1] *= 4.f; + } + cosqi(n, w); + cosqb(n, y, w); + cosqbt = 0.f; + for (i = 1; i <= n; ++i) { + /* Computing MAX */ + r2 = cosqbt, r3 = (r1 = x[i - 1] - y[i - 1], fabs(r1)) + ; + cosqbt = dmax(r2,r3); + x[i - 1] = xh[i - 1]; + } + cosqbt = cf * cosqbt; + for (i = 1; i <= n; ++i) { + y[i - 1] = x[0] * .5f; + arg = (real) (i + i - 1) * dt; + for (k = 2; k <= n; ++k) { + y[i - 1] += x[k - 1] * cos((real) (k - 1) * arg); + } + y[i - 1] += y[i - 1]; + } + cosqf(n, x, w); + cosqft = 0.f; + for (i = 1; i <= n; ++i) { + /* Computing MAX */ + r2 = cosqft, r3 = (r1 = y[i - 1] - x[i - 1], fabs(r1)) + ; + cosqft = dmax(r2,r3); + x[i - 1] = xh[i - 1]; + y[i - 1] = xh[i - 1]; + } + cosqft = cf * cosqft; + cosqb(n, x, w); + cosqf(n, x, w); + cosqfb = 0.f; + for (i = 1; i <= n; ++i) { + /* Computing MAX */ + r2 = cosqfb, r3 = (r1 = cf * x[i - 1] - y[i - 1], fabs(r1)); + cosqfb = dmax(r2,r3); + } + + /* TEST CFFTI,CFFTF,CFFTB */ + + for (i = 1; i <= n; ++i) { + r1 = cos(sqrt2 * (real) i); + r2 = sin(sqrt2 * (real) (i * i)); + q1.r = r1, q1.i = r2; + cx[i-1].r = q1.r, cx[i-1].i = q1.i; + } + dt = (2*M_PI) / fn; + for (i = 1; i <= n; ++i) { + arg1 = -((real) (i - 1)) * dt; + cy[i-1].r = 0.f, cy[i-1].i = 0.f; + for (k = 1; k <= n; ++k) { + arg2 = (real) (k - 1) * arg1; + r1 = cos(arg2); + r2 = sin(arg2); + q3.r = r1, q3.i = r2; + q2.r = q3.r * cx[k-1].r - q3.i * cx[k-1].i, q2.i = + q3.r * cx[k-1].i + q3.i * cx[k-1].r; + q1.r = cy[i-1].r + q2.r, q1.i = cy[i-1].i + q2.i; + cy[i-1].r = q1.r, cy[i-1].i = q1.i; + } + } + cffti(n, w); + cfftf(n, (real*)cx, w); + dcfftf = 0.f; + for (i = 1; i <= n; ++i) { + /* Computing MAX */ + q1.r = cx[i-1].r - cy[i-1].r, q1.i = cx[i-1].i - cy[i-1] + .i; + r1 = dcfftf, r2 = c_abs(&q1); + dcfftf = dmax(r1,r2); + q1.r = cx[i-1].r / fn, q1.i = cx[i-1].i / fn; + cx[i-1].r = q1.r, cx[i-1].i = q1.i; + } + dcfftf /= fn; + for (i = 1; i <= n; ++i) { + arg1 = (real) (i - 1) * dt; + cy[i-1].r = 0.f, cy[i-1].i = 0.f; + for (k = 1; k <= n; ++k) { + arg2 = (real) (k - 1) * arg1; + r1 = cos(arg2); + r2 = sin(arg2); + q3.r = r1, q3.i = r2; + q2.r = q3.r * cx[k-1].r - q3.i * cx[k-1].i, q2.i = + q3.r * cx[k-1].i + q3.i * cx[k-1].r; + q1.r = cy[i-1].r + q2.r, q1.i = cy[i-1].i + q2.i; + cy[i-1].r = q1.r, cy[i-1].i = q1.i; + } + } + cfftb(n, (real*)cx, w); + dcfftb = 0.f; + for (i = 1; i <= n; ++i) { + /* Computing MAX */ + q1.r = cx[i-1].r - cy[i-1].r, q1.i = cx[i-1].i - cy[i-1].i; + r1 = dcfftb, r2 = c_abs(&q1); + dcfftb = dmax(r1,r2); + cx[i-1].r = cy[i-1].r, cx[i-1].i = cy[i-1].i; + } + cf = 1.f / fn; + cfftf(n, (real*)cx, w); + cfftb(n, (real*)cx, w); + dcfb = 0.f; + for (i = 1; i <= n; ++i) { + /* Computing MAX */ + q2.r = cf * cx[i-1].r, q2.i = cf * cx[i-1].i; + q1.r = q2.r - cy[i-1].r, q1.i = q2.i - cy[i-1].i; + r1 = dcfb, r2 = c_abs(&q1); + dcfb = dmax(r1,r2); + } + printf("%d\tRFFTF %10.3g\tRFFTB %10.ge\tRFFTFB %10.3g", n, rftf, rftb, rftfb); + printf( "\tSINT %10.3g\tSINTFB %10.ge\tCOST %10.3g\n", sintt, sintfb, costt); + printf( "\tCOSTFB %10.3g\tSINQF %10.ge\tSINQB %10.3g", costfb, sinqft, sinqbt); + printf( "\tSINQFB %10.3g\tCOSQF %10.ge\tCOSQB %10.3g\n", sinqfb, cosqft, cosqbt); + printf( "\tCOSQFB %10.3g\t", cosqfb); + printf( "\tCFFTF %10.ge\tCFFTB %10.3g\n", dcfftf, dcfftb); + printf( "\tCFFTFB %10.3g\n", dcfb); + +#define CHECK(x) if (x > 1e-3) { printf(#x " failed: %g\n", x); all_ok = 0; } + CHECK(rftf); CHECK(rftb); CHECK(rftfb); CHECK(sintt); CHECK(sintfb); CHECK(costt); + CHECK(costfb); CHECK(sinqft); CHECK(sinqbt); CHECK(sinqfb); CHECK(cosqft); CHECK(cosqbt); + CHECK(cosqfb); CHECK(dcfftf); CHECK(dcfftb); + } + + if (all_ok) printf("Everything looks fine.\n"); + else printf("ERRORS WERE DETECTED.\n"); + /* + expected: + 120 RFFTF 2.786e-06 RFFTB 6.847e-04 RFFTFB 2.795e-07 SINT 1.312e-06 SINTFB 1.237e-06 COST 1.319e-06 + COSTFB 4.355e-06 SINQF 3.281e-04 SINQB 1.876e-06 SINQFB 2.198e-07 COSQF 6.199e-07 COSQB 2.193e-06 + COSQFB 2.300e-07 DEZF 5.573e-06 DEZB 1.363e-05 DEZFB 1.371e-06 CFFTF 5.590e-06 CFFTB 4.751e-05 + CFFTFB 4.215e-07 + 54 RFFTF 4.708e-07 RFFTB 3.052e-05 RFFTFB 3.439e-07 SINT 3.532e-07 SINTFB 4.145e-07 COST 3.002e-07 + COSTFB 6.343e-07 SINQF 4.959e-05 SINQB 4.415e-07 SINQFB 2.882e-07 COSQF 2.826e-07 COSQB 2.472e-07 + COSQFB 3.439e-07 DEZF 9.388e-07 DEZB 5.066e-06 DEZFB 5.960e-07 CFFTF 1.426e-06 CFFTB 9.482e-06 + CFFTFB 2.980e-07 + 49 RFFTF 4.476e-07 RFFTB 5.341e-05 RFFTFB 2.574e-07 SINT 9.196e-07 SINTFB 9.401e-07 COST 8.174e-07 + COSTFB 1.331e-06 SINQF 4.005e-05 SINQB 9.342e-07 SINQFB 3.057e-07 COSQF 2.530e-07 COSQB 6.228e-07 + COSQFB 4.826e-07 DEZF 9.071e-07 DEZB 4.590e-06 DEZFB 5.960e-07 CFFTF 2.095e-06 CFFTB 1.414e-05 + CFFTFB 7.398e-07 + 32 RFFTF 4.619e-07 RFFTB 2.861e-05 RFFTFB 1.192e-07 SINT 3.874e-07 SINTFB 4.172e-07 COST 4.172e-07 + COSTFB 1.699e-06 SINQF 2.551e-05 SINQB 6.407e-07 SINQFB 2.980e-07 COSQF 1.639e-07 COSQB 1.714e-07 + COSQFB 2.384e-07 DEZF 1.013e-06 DEZB 2.339e-06 DEZFB 7.749e-07 CFFTF 1.127e-06 CFFTB 6.744e-06 + CFFTFB 2.666e-07 + 4 RFFTF 1.490e-08 RFFTB 1.490e-07 RFFTFB 5.960e-08 SINT 7.451e-09 SINTFB 0.000e+00 COST 2.980e-08 + COSTFB 1.192e-07 SINQF 4.768e-07 SINQB 2.980e-08 SINQFB 5.960e-08 COSQF 2.608e-08 COSQB 5.960e-08 + COSQFB 1.192e-07 DEZF 2.980e-08 DEZB 5.960e-08 DEZFB 0.000e+00 CFFTF 6.664e-08 CFFTB 5.960e-08 + CFFTFB 6.144e-08 + 3 RFFTF 3.974e-08 RFFTB 1.192e-07 RFFTFB 3.303e-08 SINT 1.987e-08 SINTFB 1.069e-08 COST 4.967e-08 + COSTFB 5.721e-08 SINQF 8.941e-08 SINQB 2.980e-08 SINQFB 1.259e-07 COSQF 7.451e-09 COSQB 4.967e-08 + COSQFB 7.029e-08 DEZF 1.192e-07 DEZB 5.960e-08 DEZFB 5.960e-08 CFFTF 7.947e-08 CFFTB 8.429e-08 + CFFTFB 9.064e-08 + 2 RFFTF 0.000e+00 RFFTB 0.000e+00 RFFTFB 0.000e+00 SINT 0.000e+00 SINTFB 0.000e+00 COST 0.000e+00 + COSTFB 0.000e+00 SINQF 1.192e-07 SINQB 2.980e-08 SINQFB 5.960e-08 COSQF 7.451e-09 COSQB 1.490e-08 + COSQFB 0.000e+00 DEZF 0.000e+00 DEZB 0.000e+00 DEZFB 0.000e+00 CFFTF 0.000e+00 CFFTB 5.960e-08 + CFFTFB 5.960e-08 + Everything looks fine. + + */ + + return all_ok ? 0 : 1; +} +#endif //TESTING_FFTPACK diff --git a/oss-internship-2020/pffft/fftpack.h b/oss-internship-2020/pffft/fftpack.h new file mode 100644 index 0000000..5971b9f --- /dev/null +++ b/oss-internship-2020/pffft/fftpack.h @@ -0,0 +1,799 @@ +/* + Interface for the f2c translation of fftpack as found on http://www.netlib.org/fftpack/ + + FFTPACK license: + + http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html + + Copyright (c) 2004 the University Corporation for Atmospheric + Research ("UCAR"). All rights reserved. Developed by NCAR's + Computational and Information Systems Laboratory, UCAR, + www.cisl.ucar.edu. + + Redistribution and use of the Software in source and binary forms, + with or without modification, is permitted provided that the + following conditions are met: + + - Neither the names of NCAR's Computational and Information Systems + Laboratory, the University Corporation for Atmospheric Research, + nor the names of its sponsors or contributors may be used to + endorse or promote products derived from this Software without + specific prior written permission. + + - Redistributions of source code must retain the above copyright + notices, this list of conditions, and the disclaimer below. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the disclaimer below in the + documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE + SOFTWARE. + + ChangeLog: + 2011/10/02: this is my first release of this file. +*/ + +#ifndef FFTPACK_H +#define FFTPACK_H + +#ifdef __cplusplus +extern "C" { +#endif + +// just define FFTPACK_DOUBLE_PRECISION if you want to build it as a double precision fft + +#ifndef FFTPACK_DOUBLE_PRECISION + typedef float fftpack_real; + typedef int fftpack_int; +#else + typedef double fftpack_real; + typedef int fftpack_int; +#endif + + void cffti(fftpack_int n, fftpack_real *wsave); + + void cfftf(fftpack_int n, fftpack_real *c, fftpack_real *wsave); + + void cfftb(fftpack_int n, fftpack_real *c, fftpack_real *wsave); + + void rffti(fftpack_int n, fftpack_real *wsave); + void rfftf(fftpack_int n, fftpack_real *r, fftpack_real *wsave); + void rfftb(fftpack_int n, fftpack_real *r, fftpack_real *wsave); + + void cosqi(fftpack_int n, fftpack_real *wsave); + void cosqf(fftpack_int n, fftpack_real *x, fftpack_real *wsave); + void cosqb(fftpack_int n, fftpack_real *x, fftpack_real *wsave); + + void costi(fftpack_int n, fftpack_real *wsave); + void cost(fftpack_int n, fftpack_real *x, fftpack_real *wsave); + + void sinqi(fftpack_int n, fftpack_real *wsave); + void sinqb(fftpack_int n, fftpack_real *x, fftpack_real *wsave); + void sinqf(fftpack_int n, fftpack_real *x, fftpack_real *wsave); + + void sinti(fftpack_int n, fftpack_real *wsave); + void sint(fftpack_int n, fftpack_real *x, fftpack_real *wsave); + +#ifdef __cplusplus +} +#endif + +#endif /* FFTPACK_H */ + +/* + + FFTPACK + +* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + + version 4 april 1985 + + a package of fortran subprograms for the fast fourier + transform of periodic and other symmetric sequences + + by + + paul n swarztrauber + + national center for atmospheric research boulder,colorado 80307 + + which is sponsored by the national science foundation + +* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + + +this package consists of programs which perform fast fourier +transforms for both complex and real periodic sequences and +certain other symmetric sequences that are listed below. + +1. rffti initialize rfftf and rfftb +2. rfftf forward transform of a real periodic sequence +3. rfftb backward transform of a real coefficient array + +4. ezffti initialize ezfftf and ezfftb +5. ezfftf a simplified real periodic forward transform +6. ezfftb a simplified real periodic backward transform + +7. sinti initialize sint +8. sint sine transform of a real odd sequence + +9. costi initialize cost +10. cost cosine transform of a real even sequence + +11. sinqi initialize sinqf and sinqb +12. sinqf forward sine transform with odd wave numbers +13. sinqb unnormalized inverse of sinqf + +14. cosqi initialize cosqf and cosqb +15. cosqf forward cosine transform with odd wave numbers +16. cosqb unnormalized inverse of cosqf + +17. cffti initialize cfftf and cfftb +18. cfftf forward transform of a complex periodic sequence +19. cfftb unnormalized inverse of cfftf + + +****************************************************************** + +subroutine rffti(n,wsave) + + **************************************************************** + +subroutine rffti initializes the array wsave which is used in +both rfftf and rfftb. the prime factorization of n together with +a tabulation of the trigonometric functions are computed and +stored in wsave. + +input parameter + +n the length of the sequence to be transformed. + +output parameter + +wsave a work array which must be dimensioned at least 2*n+15. + the same work array can be used for both rfftf and rfftb + as long as n remains unchanged. different wsave arrays + are required for different values of n. the contents of + wsave must not be changed between calls of rfftf or rfftb. + +****************************************************************** + +subroutine rfftf(n,r,wsave) + +****************************************************************** + +subroutine rfftf computes the fourier coefficients of a real +perodic sequence (fourier analysis). the transform is defined +below at output parameter r. + +input parameters + +n the length of the array r to be transformed. the method + is most efficient when n is a product of small primes. + n may change so long as different work arrays are provided + +r a real array of length n which contains the sequence + to be transformed + +wsave a work array which must be dimensioned at least 2*n+15. + in the program that calls rfftf. the wsave array must be + initialized by calling subroutine rffti(n,wsave) and a + different wsave array must be used for each different + value of n. this initialization does not have to be + repeated so long as n remains unchanged thus subsequent + transforms can be obtained faster than the first. + the same wsave array can be used by rfftf and rfftb. + + +output parameters + +r r(1) = the sum from i=1 to i=n of r(i) + + if n is even set l =n/2 , if n is odd set l = (n+1)/2 + + then for k = 2,...,l + + r(2*k-2) = the sum from i = 1 to i = n of + + r(i)*cos((k-1)*(i-1)*2*pi/n) + + r(2*k-1) = the sum from i = 1 to i = n of + + -r(i)*sin((k-1)*(i-1)*2*pi/n) + + if n is even + + r(n) = the sum from i = 1 to i = n of + + (-1)**(i-1)*r(i) + + ***** note + this transform is unnormalized since a call of rfftf + followed by a call of rfftb will multiply the input + sequence by n. + +wsave contains results which must not be destroyed between + calls of rfftf or rfftb. + + +****************************************************************** + +subroutine rfftb(n,r,wsave) + +****************************************************************** + +subroutine rfftb computes the real perodic sequence from its +fourier coefficients (fourier synthesis). the transform is defined +below at output parameter r. + +input parameters + +n the length of the array r to be transformed. the method + is most efficient when n is a product of small primes. + n may change so long as different work arrays are provided + +r a real array of length n which contains the sequence + to be transformed + +wsave a work array which must be dimensioned at least 2*n+15. + in the program that calls rfftb. the wsave array must be + initialized by calling subroutine rffti(n,wsave) and a + different wsave array must be used for each different + value of n. this initialization does not have to be + repeated so long as n remains unchanged thus subsequent + transforms can be obtained faster than the first. + the same wsave array can be used by rfftf and rfftb. + + +output parameters + +r for n even and for i = 1,...,n + + r(i) = r(1)+(-1)**(i-1)*r(n) + + plus the sum from k=2 to k=n/2 of + + 2.*r(2*k-2)*cos((k-1)*(i-1)*2*pi/n) + + -2.*r(2*k-1)*sin((k-1)*(i-1)*2*pi/n) + + for n odd and for i = 1,...,n + + r(i) = r(1) plus the sum from k=2 to k=(n+1)/2 of + + 2.*r(2*k-2)*cos((k-1)*(i-1)*2*pi/n) + + -2.*r(2*k-1)*sin((k-1)*(i-1)*2*pi/n) + + ***** note + this transform is unnormalized since a call of rfftf + followed by a call of rfftb will multiply the input + sequence by n. + +wsave contains results which must not be destroyed between + calls of rfftb or rfftf. + +****************************************************************** + +subroutine sinti(n,wsave) + +****************************************************************** + +subroutine sinti initializes the array wsave which is used in +subroutine sint. the prime factorization of n together with +a tabulation of the trigonometric functions are computed and +stored in wsave. + +input parameter + +n the length of the sequence to be transformed. the method + is most efficient when n+1 is a product of small primes. + +output parameter + +wsave a work array with at least int(2.5*n+15) locations. + different wsave arrays are required for different values + of n. the contents of wsave must not be changed between + calls of sint. + +****************************************************************** + +subroutine sint(n,x,wsave) + +****************************************************************** + +subroutine sint computes the discrete fourier sine transform +of an odd sequence x(i). the transform is defined below at +output parameter x. + +sint is the unnormalized inverse of itself since a call of sint +followed by another call of sint will multiply the input sequence +x by 2*(n+1). + +the array wsave which is used by subroutine sint must be +initialized by calling subroutine sinti(n,wsave). + +input parameters + +n the length of the sequence to be transformed. the method + is most efficient when n+1 is the product of small primes. + +x an array which contains the sequence to be transformed + + +wsave a work array with dimension at least int(2.5*n+15) + in the program that calls sint. the wsave array must be + initialized by calling subroutine sinti(n,wsave) and a + different wsave array must be used for each different + value of n. this initialization does not have to be + repeated so long as n remains unchanged thus subsequent + transforms can be obtained faster than the first. + +output parameters + +x for i=1,...,n + + x(i)= the sum from k=1 to k=n + + 2*x(k)*sin(k*i*pi/(n+1)) + + a call of sint followed by another call of + sint will multiply the sequence x by 2*(n+1). + hence sint is the unnormalized inverse + of itself. + +wsave contains initialization calculations which must not be + destroyed between calls of sint. + +****************************************************************** + +subroutine costi(n,wsave) + +****************************************************************** + +subroutine costi initializes the array wsave which is used in +subroutine cost. the prime factorization of n together with +a tabulation of the trigonometric functions are computed and +stored in wsave. + +input parameter + +n the length of the sequence to be transformed. the method + is most efficient when n-1 is a product of small primes. + +output parameter + +wsave a work array which must be dimensioned at least 3*n+15. + different wsave arrays are required for different values + of n. the contents of wsave must not be changed between + calls of cost. + +****************************************************************** + +subroutine cost(n,x,wsave) + +****************************************************************** + +subroutine cost computes the discrete fourier cosine transform +of an even sequence x(i). the transform is defined below at output +parameter x. + +cost is the unnormalized inverse of itself since a call of cost +followed by another call of cost will multiply the input sequence +x by 2*(n-1). the transform is defined below at output parameter x + +the array wsave which is used by subroutine cost must be +initialized by calling subroutine costi(n,wsave). + +input parameters + +n the length of the sequence x. n must be greater than 1. + the method is most efficient when n-1 is a product of + small primes. + +x an array which contains the sequence to be transformed + +wsave a work array which must be dimensioned at least 3*n+15 + in the program that calls cost. the wsave array must be + initialized by calling subroutine costi(n,wsave) and a + different wsave array must be used for each different + value of n. this initialization does not have to be + repeated so long as n remains unchanged thus subsequent + transforms can be obtained faster than the first. + +output parameters + +x for i=1,...,n + + x(i) = x(1)+(-1)**(i-1)*x(n) + + + the sum from k=2 to k=n-1 + + 2*x(k)*cos((k-1)*(i-1)*pi/(n-1)) + + a call of cost followed by another call of + cost will multiply the sequence x by 2*(n-1) + hence cost is the unnormalized inverse + of itself. + +wsave contains initialization calculations which must not be + destroyed between calls of cost. + +****************************************************************** + +subroutine sinqi(n,wsave) + +****************************************************************** + +subroutine sinqi initializes the array wsave which is used in +both sinqf and sinqb. the prime factorization of n together with +a tabulation of the trigonometric functions are computed and +stored in wsave. + +input parameter + +n the length of the sequence to be transformed. the method + is most efficient when n is a product of small primes. + +output parameter + +wsave a work array which must be dimensioned at least 3*n+15. + the same work array can be used for both sinqf and sinqb + as long as n remains unchanged. different wsave arrays + are required for different values of n. the contents of + wsave must not be changed between calls of sinqf or sinqb. + +****************************************************************** + +subroutine sinqf(n,x,wsave) + +****************************************************************** + +subroutine sinqf computes the fast fourier transform of quarter +wave data. that is , sinqf computes the coefficients in a sine +series representation with only odd wave numbers. the transform +is defined below at output parameter x. + +sinqb is the unnormalized inverse of sinqf since a call of sinqf +followed by a call of sinqb will multiply the input sequence x +by 4*n. + +the array wsave which is used by subroutine sinqf must be +initialized by calling subroutine sinqi(n,wsave). + + +input parameters + +n the length of the array x to be transformed. the method + is most efficient when n is a product of small primes. + +x an array which contains the sequence to be transformed + +wsave a work array which must be dimensioned at least 3*n+15. + in the program that calls sinqf. the wsave array must be + initialized by calling subroutine sinqi(n,wsave) and a + different wsave array must be used for each different + value of n. this initialization does not have to be + repeated so long as n remains unchanged thus subsequent + transforms can be obtained faster than the first. + +output parameters + +x for i=1,...,n + + x(i) = (-1)**(i-1)*x(n) + + + the sum from k=1 to k=n-1 of + + 2*x(k)*sin((2*i-1)*k*pi/(2*n)) + + a call of sinqf followed by a call of + sinqb will multiply the sequence x by 4*n. + therefore sinqb is the unnormalized inverse + of sinqf. + +wsave contains initialization calculations which must not + be destroyed between calls of sinqf or sinqb. + +****************************************************************** + +subroutine sinqb(n,x,wsave) + +****************************************************************** + +subroutine sinqb computes the fast fourier transform of quarter +wave data. that is , sinqb computes a sequence from its +representation in terms of a sine series with odd wave numbers. +the transform is defined below at output parameter x. + +sinqf is the unnormalized inverse of sinqb since a call of sinqb +followed by a call of sinqf will multiply the input sequence x +by 4*n. + +the array wsave which is used by subroutine sinqb must be +initialized by calling subroutine sinqi(n,wsave). + + +input parameters + +n the length of the array x to be transformed. the method + is most efficient when n is a product of small primes. + +x an array which contains the sequence to be transformed + +wsave a work array which must be dimensioned at least 3*n+15. + in the program that calls sinqb. the wsave array must be + initialized by calling subroutine sinqi(n,wsave) and a + different wsave array must be used for each different + value of n. this initialization does not have to be + repeated so long as n remains unchanged thus subsequent + transforms can be obtained faster than the first. + +output parameters + +x for i=1,...,n + + x(i)= the sum from k=1 to k=n of + + 4*x(k)*sin((2k-1)*i*pi/(2*n)) + + a call of sinqb followed by a call of + sinqf will multiply the sequence x by 4*n. + therefore sinqf is the unnormalized inverse + of sinqb. + +wsave contains initialization calculations which must not + be destroyed between calls of sinqb or sinqf. + +****************************************************************** + +subroutine cosqi(n,wsave) + +****************************************************************** + +subroutine cosqi initializes the array wsave which is used in +both cosqf and cosqb. the prime factorization of n together with +a tabulation of the trigonometric functions are computed and +stored in wsave. + +input parameter + +n the length of the array to be transformed. the method + is most efficient when n is a product of small primes. + +output parameter + +wsave a work array which must be dimensioned at least 3*n+15. + the same work array can be used for both cosqf and cosqb + as long as n remains unchanged. different wsave arrays + are required for different values of n. the contents of + wsave must not be changed between calls of cosqf or cosqb. + +****************************************************************** + +subroutine cosqf(n,x,wsave) + +****************************************************************** + +subroutine cosqf computes the fast fourier transform of quarter +wave data. that is , cosqf computes the coefficients in a cosine +series representation with only odd wave numbers. the transform +is defined below at output parameter x + +cosqf is the unnormalized inverse of cosqb since a call of cosqf +followed by a call of cosqb will multiply the input sequence x +by 4*n. + +the array wsave which is used by subroutine cosqf must be +initialized by calling subroutine cosqi(n,wsave). + + +input parameters + +n the length of the array x to be transformed. the method + is most efficient when n is a product of small primes. + +x an array which contains the sequence to be transformed + +wsave a work array which must be dimensioned at least 3*n+15 + in the program that calls cosqf. the wsave array must be + initialized by calling subroutine cosqi(n,wsave) and a + different wsave array must be used for each different + value of n. this initialization does not have to be + repeated so long as n remains unchanged thus subsequent + transforms can be obtained faster than the first. + +output parameters + +x for i=1,...,n + + x(i) = x(1) plus the sum from k=2 to k=n of + + 2*x(k)*cos((2*i-1)*(k-1)*pi/(2*n)) + + a call of cosqf followed by a call of + cosqb will multiply the sequence x by 4*n. + therefore cosqb is the unnormalized inverse + of cosqf. + +wsave contains initialization calculations which must not + be destroyed between calls of cosqf or cosqb. + +****************************************************************** + +subroutine cosqb(n,x,wsave) + +****************************************************************** + +subroutine cosqb computes the fast fourier transform of quarter +wave data. that is , cosqb computes a sequence from its +representation in terms of a cosine series with odd wave numbers. +the transform is defined below at output parameter x. + +cosqb is the unnormalized inverse of cosqf since a call of cosqb +followed by a call of cosqf will multiply the input sequence x +by 4*n. + +the array wsave which is used by subroutine cosqb must be +initialized by calling subroutine cosqi(n,wsave). + + +input parameters + +n the length of the array x to be transformed. the method + is most efficient when n is a product of small primes. + +x an array which contains the sequence to be transformed + +wsave a work array that must be dimensioned at least 3*n+15 + in the program that calls cosqb. the wsave array must be + initialized by calling subroutine cosqi(n,wsave) and a + different wsave array must be used for each different + value of n. this initialization does not have to be + repeated so long as n remains unchanged thus subsequent + transforms can be obtained faster than the first. + +output parameters + +x for i=1,...,n + + x(i)= the sum from k=1 to k=n of + + 4*x(k)*cos((2*k-1)*(i-1)*pi/(2*n)) + + a call of cosqb followed by a call of + cosqf will multiply the sequence x by 4*n. + therefore cosqf is the unnormalized inverse + of cosqb. + +wsave contains initialization calculations which must not + be destroyed between calls of cosqb or cosqf. + +****************************************************************** + +subroutine cffti(n,wsave) + +****************************************************************** + +subroutine cffti initializes the array wsave which is used in +both cfftf and cfftb. the prime factorization of n together with +a tabulation of the trigonometric functions are computed and +stored in wsave. + +input parameter + +n the length of the sequence to be transformed + +output parameter + +wsave a work array which must be dimensioned at least 4*n+15 + the same work array can be used for both cfftf and cfftb + as long as n remains unchanged. different wsave arrays + are required for different values of n. the contents of + wsave must not be changed between calls of cfftf or cfftb. + +****************************************************************** + +subroutine cfftf(n,c,wsave) + +****************************************************************** + +subroutine cfftf computes the forward complex discrete fourier +transform (the fourier analysis). equivalently , cfftf computes +the fourier coefficients of a complex periodic sequence. +the transform is defined below at output parameter c. + +the transform is not normalized. to obtain a normalized transform +the output must be divided by n. otherwise a call of cfftf +followed by a call of cfftb will multiply the sequence by n. + +the array wsave which is used by subroutine cfftf must be +initialized by calling subroutine cffti(n,wsave). + +input parameters + + +n the length of the complex sequence c. the method is + more efficient when n is the product of small primes. n + +c a complex array of length n which contains the sequence + +wsave a real work array which must be dimensioned at least 4n+15 + in the program that calls cfftf. the wsave array must be + initialized by calling subroutine cffti(n,wsave) and a + different wsave array must be used for each different + value of n. this initialization does not have to be + repeated so long as n remains unchanged thus subsequent + transforms can be obtained faster than the first. + the same wsave array can be used by cfftf and cfftb. + +output parameters + +c for j=1,...,n + + c(j)=the sum from k=1,...,n of + + c(k)*exp(-i*(j-1)*(k-1)*2*pi/n) + + where i=sqrt(-1) + +wsave contains initialization calculations which must not be + destroyed between calls of subroutine cfftf or cfftb + +****************************************************************** + +subroutine cfftb(n,c,wsave) + +****************************************************************** + +subroutine cfftb computes the backward complex discrete fourier +transform (the fourier synthesis). equivalently , cfftb computes +a complex periodic sequence from its fourier coefficients. +the transform is defined below at output parameter c. + +a call of cfftf followed by a call of cfftb will multiply the +sequence by n. + +the array wsave which is used by subroutine cfftb must be +initialized by calling subroutine cffti(n,wsave). + +input parameters + + +n the length of the complex sequence c. the method is + more efficient when n is the product of small primes. + +c a complex array of length n which contains the sequence + +wsave a real work array which must be dimensioned at least 4n+15 + in the program that calls cfftb. the wsave array must be + initialized by calling subroutine cffti(n,wsave) and a + different wsave array must be used for each different + value of n. this initialization does not have to be + repeated so long as n remains unchanged thus subsequent + transforms can be obtained faster than the first. + the same wsave array can be used by cfftf and cfftb. + +output parameters + +c for j=1,...,n + + c(j)=the sum from k=1,...,n of + + c(k)*exp(i*(j-1)*(k-1)*2*pi/n) + + where i=sqrt(-1) + +wsave contains initialization calculations which must not be + destroyed between calls of subroutine cfftf or cfftb + +*/ diff --git a/oss-internship-2020/pffft/myNotes.txt b/oss-internship-2020/pffft/myNotes.txt new file mode 100644 index 0000000..0dd3bcb --- /dev/null +++ b/oss-internship-2020/pffft/myNotes.txt @@ -0,0 +1,101 @@ +About library's functions: + * pffft_aligned_malloc(size_t) + returns an allocated array considering the alignment offset + * pffft_aligned_free(void *) + frees the memory + * pffft_simd_size() + returns the SIMD_SZ = 4 (regarding simd vector) + * pffft_new_setup(int, ...) + with a fft size (first argument) being a multiple of 16, 32. + +Deleted part (validate function) + /*for (pass = 0; pass < 2; pass++) { + if (pass == 0) { + for (k = 0; k < Nfloat; k++) { + ref_[k] = in_[k] = frand() * 2 - 1; + out_[k] = 1e30; + } + + if (!cplx) { + api.rffti(N, wrk_.PtrBoth()).IgnoreError(); + api.rfftf(N, ref_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); + + { + float refN = ref_[N - 1]; + for (k = N - 2; k >= 1; --k) { + ref_[k + 1] = ref_[k]; + } + ref_[1] = refN; + } + } else { + api.cffti(N, wrk_.PtrBoth()).IgnoreError(); + api.cfftf(N, ref_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); + } + } + + for (k = 0; k < Nfloat; ++k) { + ref_max = MAX(ref_max, fabs(ref_[k])); + } + + if (pass == 0) { + api.pffft_transform(s_reg.PtrBefore(), in_.PtrBoth(), tmp_.PtrBoth(), wrk_.PtrBoth(), PFFFT_FORWARD).IgnoreError(); + + memcpy(tmp2, tmp, Nbytes); + memcpy(tmp, in, Nbytes); + + api.pffft_transform(s_reg.PtrBefore(), tmp_.PtrBoth(), tmp_.PtrBoth(), wrk_.PtrBoth(), PFFFT_FORWARD).IgnoreError(); + + printf("Forward transformation test passed.\n"); + + api.pffft_zreorder(s_reg.PtrBefore(), tmp_.PtrBoth(), out_.PtrBoth(), PFFFT_FORWARD).IgnoreError(); + api.pffft_zreorder(s_reg.PtrBefore(), out_.PtrBoth(), tmp_.PtrBoth(), PFFFT_BACKWARD).IgnoreError(); + + printf("Reordering test passed.\n"); + } else { + api.pffft_transform_ordered(s_reg.PtrBefore(), in_.PtrBoth(), tmp_.PtrBoth(), wrk_.PtrBoth(), PFFFT_FORWARD).IgnoreError(); + + } + } */ + + +MACRO for testing +TEST(AssignOrReturn, AssignsMultipleVariablesInSequence) { + auto func = []() -> absl::Status { + int value1; + SAPI_ASSIGN_OR_RETURN(value1, StatusOr(1)); + EXPECT_EQ(1, value1); + int value2; + SAPI_ASSIGN_OR_RETURN(value2, StatusOr(2)); + EXPECT_EQ(2, value2); + int value3; + SAPI_ASSIGN_OR_RETURN(value3, StatusOr(3)); + EXPECT_EQ(3, value3); + int value4; + SAPI_ASSIGN_OR_RETURN(value4, + StatusOr(absl::UnknownError("EXPECTED" + int value1; + SAPI_ASSIGN_OR_RETURN(value1, StatusOr(1)); + + + // PFFFT benchmark + /*{ + sapi::StatusOr s = api.pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); + if (s.ok()) { + sapi::v::GenericPtr s_reg(s.value()); + + t0 = uclock_sec(); + for (iter = 0; iter < max_iter; ++iter) { + printf("%s 1\n", api.pffft_transform(s_reg.PtrBoth(), X_.PtrBoth(), Z_.PtrBoth(), Y_.PtrBoth(), PFFFT_FORWARD).ToString().c_str()); + printf("%s 2\n", api.pffft_transform(s_reg.PtrBoth(), X_.PtrBoth(), Z_.PtrBoth(), Y_.PtrBoth(), PFFFT_FORWARD).ToString().c_str()); + } + t1 = uclock_sec(); + printf("%s 3 \n", api.pffft_destroy_setup(s_reg.PtrBoth()).ToString().c_str()); + + + flops = (max_iter*2) * ((cplx ? 5 : 2.5) * N * log((double)N) / M_LN2); + show_output("PFFFT", N, cplx, flops, t0, t1, max_iter); + } else { + fprintf(stderr, "s NULL :(\n\n"); + } + }*/ + \ No newline at end of file diff --git a/oss-internship-2020/pffft/pffft.c b/oss-internship-2020/pffft/pffft.c new file mode 100644 index 0000000..1686e15 --- /dev/null +++ b/oss-internship-2020/pffft/pffft.c @@ -0,0 +1,1881 @@ +/* Copyright (c) 2013 Julien Pommier ( pommier@modartt.com ) + + Based on original fortran 77 code from FFTPACKv4 from NETLIB + (http://www.netlib.org/fftpack), authored by Dr Paul Swarztrauber + of NCAR, in 1985. + + As confirmed by the NCAR fftpack software curators, the following + FFTPACKv5 license applies to FFTPACKv4 sources. My changes are + released under the same terms. + + FFTPACK license: + + http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html + + Copyright (c) 2004 the University Corporation for Atmospheric + Research ("UCAR"). All rights reserved. Developed by NCAR's + Computational and Information Systems Laboratory, UCAR, + www.cisl.ucar.edu. + + Redistribution and use of the Software in source and binary forms, + with or without modification, is permitted provided that the + following conditions are met: + + - Neither the names of NCAR's Computational and Information Systems + Laboratory, the University Corporation for Atmospheric Research, + nor the names of its sponsors or contributors may be used to + endorse or promote products derived from this Software without + specific prior written permission. + + - Redistributions of source code must retain the above copyright + notices, this list of conditions, and the disclaimer below. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the disclaimer below in the + documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE + SOFTWARE. + + + PFFFT : a Pretty Fast FFT. + + This file is largerly based on the original FFTPACK implementation, modified in + order to take advantage of SIMD instructions of modern CPUs. +*/ + +/* + ChangeLog: + - 2011/10/02, version 1: This is the very first release of this file. +*/ + +#include "pffft.h" +#include +#include +#include +#include + +/* detect compiler flavour */ +#if defined(_MSC_VER) +# define COMPILER_MSVC +#elif defined(__GNUC__) +# define COMPILER_GCC +#endif + +#if defined(COMPILER_GCC) +# define ALWAYS_INLINE(return_type) inline return_type __attribute__ ((always_inline)) +# define NEVER_INLINE(return_type) return_type __attribute__ ((noinline)) +# define RESTRICT __restrict +# define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ varname__[size__]; +#elif defined(COMPILER_MSVC) +# define ALWAYS_INLINE(return_type) __forceinline return_type +# define NEVER_INLINE(return_type) __declspec(noinline) return_type +# define RESTRICT __restrict +# define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ *varname__ = (type__*)_alloca(size__ * sizeof(type__)) +#endif + + +/* + vector support macros: the rest of the code is independant of + SSE/Altivec/NEON -- adding support for other platforms with 4-element + vectors should be limited to these macros +*/ + + +// define PFFFT_SIMD_DISABLE if you want to use scalar code instead of simd code +//#define PFFFT_SIMD_DISABLE + +/* + Altivec support macros +*/ +#if !defined(PFFFT_SIMD_DISABLE) && (defined(__ppc__) || defined(__ppc64__)) +typedef vector float v4sf; +# define SIMD_SZ 4 +# define VZERO() ((vector float) vec_splat_u8(0)) +# define VMUL(a,b) vec_madd(a,b, VZERO()) +# define VADD(a,b) vec_add(a,b) +# define VMADD(a,b,c) vec_madd(a,b,c) +# define VSUB(a,b) vec_sub(a,b) +inline v4sf ld_ps1(const float *p) { v4sf v=vec_lde(0,p); return vec_splat(vec_perm(v, v, vec_lvsl(0, p)), 0); } +# define LD_PS1(p) ld_ps1(&p) +# define INTERLEAVE2(in1, in2, out1, out2) { v4sf tmp__ = vec_mergeh(in1, in2); out2 = vec_mergel(in1, in2); out1 = tmp__; } +# define UNINTERLEAVE2(in1, in2, out1, out2) { \ + vector unsigned char vperm1 = (vector unsigned char)(0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); \ + vector unsigned char vperm2 = (vector unsigned char)(4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); \ + v4sf tmp__ = vec_perm(in1, in2, vperm1); out2 = vec_perm(in1, in2, vperm2); out1 = tmp__; \ + } +# define VTRANSPOSE4(x0,x1,x2,x3) { \ + v4sf y0 = vec_mergeh(x0, x2); \ + v4sf y1 = vec_mergel(x0, x2); \ + v4sf y2 = vec_mergeh(x1, x3); \ + v4sf y3 = vec_mergel(x1, x3); \ + x0 = vec_mergeh(y0, y2); \ + x1 = vec_mergel(y0, y2); \ + x2 = vec_mergeh(y1, y3); \ + x3 = vec_mergel(y1, y3); \ + } +# define VSWAPHL(a,b) vec_perm(a,b, (vector unsigned char)(16,17,18,19,20,21,22,23,8,9,10,11,12,13,14,15)) +# define VALIGNED(ptr) ((((long)(ptr)) & 0xF) == 0) + +/* + SSE1 support macros +*/ +#elif !defined(PFFFT_SIMD_DISABLE) && (defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(_M_IX86)) + +#include +typedef __m128 v4sf; +# define SIMD_SZ 4 // 4 floats by simd vector -- this is pretty much hardcoded in the preprocess/finalize functions anyway so you will have to work if you want to enable AVX with its 256-bit vectors. +# define VZERO() _mm_setzero_ps() +# define VMUL(a,b) _mm_mul_ps(a,b) +# define VADD(a,b) _mm_add_ps(a,b) +# define VMADD(a,b,c) _mm_add_ps(_mm_mul_ps(a,b), c) +# define VSUB(a,b) _mm_sub_ps(a,b) +# define LD_PS1(p) _mm_set1_ps(p) +# define INTERLEAVE2(in1, in2, out1, out2) { v4sf tmp__ = _mm_unpacklo_ps(in1, in2); out2 = _mm_unpackhi_ps(in1, in2); out1 = tmp__; } +# define UNINTERLEAVE2(in1, in2, out1, out2) { v4sf tmp__ = _mm_shuffle_ps(in1, in2, _MM_SHUFFLE(2,0,2,0)); out2 = _mm_shuffle_ps(in1, in2, _MM_SHUFFLE(3,1,3,1)); out1 = tmp__; } +# define VTRANSPOSE4(x0,x1,x2,x3) _MM_TRANSPOSE4_PS(x0,x1,x2,x3) +# define VSWAPHL(a,b) _mm_shuffle_ps(b, a, _MM_SHUFFLE(3,2,1,0)) +# define VALIGNED(ptr) ((((long)(ptr)) & 0xF) == 0) + +/* + ARM NEON support macros +*/ +#elif !defined(PFFFT_SIMD_DISABLE) && (defined(__arm__) || defined(__aarch64__) || defined(__arm64__)) +# include +typedef float32x4_t v4sf; +# define SIMD_SZ 4 +# define VZERO() vdupq_n_f32(0) +# define VMUL(a,b) vmulq_f32(a,b) +# define VADD(a,b) vaddq_f32(a,b) +# define VMADD(a,b,c) vmlaq_f32(c,a,b) +# define VSUB(a,b) vsubq_f32(a,b) +# define LD_PS1(p) vld1q_dup_f32(&(p)) +# define INTERLEAVE2(in1, in2, out1, out2) { float32x4x2_t tmp__ = vzipq_f32(in1,in2); out1=tmp__.val[0]; out2=tmp__.val[1]; } +# define UNINTERLEAVE2(in1, in2, out1, out2) { float32x4x2_t tmp__ = vuzpq_f32(in1,in2); out1=tmp__.val[0]; out2=tmp__.val[1]; } +# define VTRANSPOSE4(x0,x1,x2,x3) { \ + float32x4x2_t t0_ = vzipq_f32(x0, x2); \ + float32x4x2_t t1_ = vzipq_f32(x1, x3); \ + float32x4x2_t u0_ = vzipq_f32(t0_.val[0], t1_.val[0]); \ + float32x4x2_t u1_ = vzipq_f32(t0_.val[1], t1_.val[1]); \ + x0 = u0_.val[0]; x1 = u0_.val[1]; x2 = u1_.val[0]; x3 = u1_.val[1]; \ + } +// marginally faster version +//# define VTRANSPOSE4(x0,x1,x2,x3) { asm("vtrn.32 %q0, %q1;\n vtrn.32 %q2,%q3\n vswp %f0,%e2\n vswp %f1,%e3" : "+w"(x0), "+w"(x1), "+w"(x2), "+w"(x3)::); } +# define VSWAPHL(a,b) vcombine_f32(vget_low_f32(b), vget_high_f32(a)) +# define VALIGNED(ptr) ((((long)(ptr)) & 0x3) == 0) +#else +# if !defined(PFFFT_SIMD_DISABLE) +# warning "building with simd disabled !\n"; +# define PFFFT_SIMD_DISABLE // fallback to scalar code +# endif +#endif + +// fallback mode for situations where SSE/Altivec are not available, use scalar mode instead +#ifdef PFFFT_SIMD_DISABLE +typedef float v4sf; +# define SIMD_SZ 1 +# define VZERO() 0.f +# define VMUL(a,b) ((a)*(b)) +# define VADD(a,b) ((a)+(b)) +# define VMADD(a,b,c) ((a)*(b)+(c)) +# define VSUB(a,b) ((a)-(b)) +# define LD_PS1(p) (p) +# define VALIGNED(ptr) ((((long)(ptr)) & 0x3) == 0) +#endif + +// shortcuts for complex multiplcations +#define VCPLXMUL(ar,ai,br,bi) { v4sf tmp; tmp=VMUL(ar,bi); ar=VMUL(ar,br); ar=VSUB(ar,VMUL(ai,bi)); ai=VMUL(ai,br); ai=VADD(ai,tmp); } +#define VCPLXMULCONJ(ar,ai,br,bi) { v4sf tmp; tmp=VMUL(ar,bi); ar=VMUL(ar,br); ar=VADD(ar,VMUL(ai,bi)); ai=VMUL(ai,br); ai=VSUB(ai,tmp); } +#ifndef SVMUL +// multiply a scalar with a vector +#define SVMUL(f,v) VMUL(LD_PS1(f),v) +#endif + +#if !defined(PFFFT_SIMD_DISABLE) +typedef union v4sf_union { + v4sf v; + float f[4]; +} v4sf_union; + +#include + +#define assertv4(v,f0,f1,f2,f3) assert(v.f[0] == (f0) && v.f[1] == (f1) && v.f[2] == (f2) && v.f[3] == (f3)) + +/* detect bugs with the vector support macros */ +void validate_pffft_simd() { + float f[16] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 }; + v4sf_union a0, a1, a2, a3, t, u; + memcpy(a0.f, f, 4*sizeof(float)); + memcpy(a1.f, f+4, 4*sizeof(float)); + memcpy(a2.f, f+8, 4*sizeof(float)); + memcpy(a3.f, f+12, 4*sizeof(float)); + + t = a0; u = a1; t.v = VZERO(); + printf("VZERO=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 0, 0, 0, 0); + t.v = VADD(a1.v, a2.v); + printf("VADD(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 12, 14, 16, 18); + t.v = VMUL(a1.v, a2.v); + printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 45, 60, 77); + t.v = VMADD(a1.v, a2.v,a0.v); + printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 46, 62, 80); + + INTERLEAVE2(a1.v,a2.v,t.v,u.v); + printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]); + assertv4(t, 4, 8, 5, 9); assertv4(u, 6, 10, 7, 11); + UNINTERLEAVE2(a1.v,a2.v,t.v,u.v); + printf("UNINTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]); + assertv4(t, 4, 6, 8, 10); assertv4(u, 5, 7, 9, 11); + + t.v=LD_PS1(f[15]); + printf("LD_PS1(15)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); + assertv4(t, 15, 15, 15, 15); + t.v = VSWAPHL(a1.v, a2.v); + printf("VSWAPHL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); + assertv4(t, 8, 9, 6, 7); + VTRANSPOSE4(a0.v, a1.v, a2.v, a3.v); + printf("VTRANSPOSE4(0:3,4:7,8:11,12:15)=[%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", + a0.f[0], a0.f[1], a0.f[2], a0.f[3], a1.f[0], a1.f[1], a1.f[2], a1.f[3], + a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]); + assertv4(a0, 0, 4, 8, 12); assertv4(a1, 1, 5, 9, 13); assertv4(a2, 2, 6, 10, 14); assertv4(a3, 3, 7, 11, 15); +} +#endif //!PFFFT_SIMD_DISABLE + +/* SSE and co like 16-bytes aligned pointers */ +#define MALLOC_V4SF_ALIGNMENT 64 // with a 64-byte alignment, we are even aligned on L2 cache lines... +void *pffft_aligned_malloc(size_t nb_bytes) { + void *p, *p0 = malloc(nb_bytes + MALLOC_V4SF_ALIGNMENT); + if (!p0) return (void *) 0; + p = (void *) (((size_t) p0 + MALLOC_V4SF_ALIGNMENT) & (~((size_t) (MALLOC_V4SF_ALIGNMENT-1)))); + *((void **) p - 1) = p0; + return p; +} + +void pffft_aligned_free(void *p) { + if (p) free(*((void **) p - 1)); +} + +int pffft_simd_size() { return SIMD_SZ; } + +/* + passf2 and passb2 has been merged here, fsign = -1 for passf2, +1 for passb2 +*/ +static NEVER_INLINE(void) passf2_ps(int ido, int l1, const v4sf *cc, v4sf *ch, const float *wa1, float fsign) { + int k, i; + int l1ido = l1*ido; + if (ido <= 2) { + for (k=0; k < l1ido; k += ido, ch += ido, cc+= 2*ido) { + ch[0] = VADD(cc[0], cc[ido+0]); + ch[l1ido] = VSUB(cc[0], cc[ido+0]); + ch[1] = VADD(cc[1], cc[ido+1]); + ch[l1ido + 1] = VSUB(cc[1], cc[ido+1]); + } + } else { + for (k=0; k < l1ido; k += ido, ch += ido, cc += 2*ido) { + for (i=0; i 2); + for (k=0; k< l1ido; k += ido, cc+= 3*ido, ch +=ido) { + for (i=0; i 2); + for (k = 0; k < l1; ++k, cc += 5*ido, ch += ido) { + for (i = 0; i < ido-1; i += 2) { + ti5 = VSUB(cc_ref(i , 2), cc_ref(i , 5)); + ti2 = VADD(cc_ref(i , 2), cc_ref(i , 5)); + ti4 = VSUB(cc_ref(i , 3), cc_ref(i , 4)); + ti3 = VADD(cc_ref(i , 3), cc_ref(i , 4)); + tr5 = VSUB(cc_ref(i-1, 2), cc_ref(i-1, 5)); + tr2 = VADD(cc_ref(i-1, 2), cc_ref(i-1, 5)); + tr4 = VSUB(cc_ref(i-1, 3), cc_ref(i-1, 4)); + tr3 = VADD(cc_ref(i-1, 3), cc_ref(i-1, 4)); + ch_ref(i-1, 1) = VADD(cc_ref(i-1, 1), VADD(tr2, tr3)); + ch_ref(i , 1) = VADD(cc_ref(i , 1), VADD(ti2, ti3)); + cr2 = VADD(cc_ref(i-1, 1), VADD(SVMUL(tr11, tr2),SVMUL(tr12, tr3))); + ci2 = VADD(cc_ref(i , 1), VADD(SVMUL(tr11, ti2),SVMUL(tr12, ti3))); + cr3 = VADD(cc_ref(i-1, 1), VADD(SVMUL(tr12, tr2),SVMUL(tr11, tr3))); + ci3 = VADD(cc_ref(i , 1), VADD(SVMUL(tr12, ti2),SVMUL(tr11, ti3))); + cr5 = VADD(SVMUL(ti11, tr5), SVMUL(ti12, tr4)); + ci5 = VADD(SVMUL(ti11, ti5), SVMUL(ti12, ti4)); + cr4 = VSUB(SVMUL(ti12, tr5), SVMUL(ti11, tr4)); + ci4 = VSUB(SVMUL(ti12, ti5), SVMUL(ti11, ti4)); + dr3 = VSUB(cr3, ci4); + dr4 = VADD(cr3, ci4); + di3 = VADD(ci3, cr4); + di4 = VSUB(ci3, cr4); + dr5 = VADD(cr2, ci5); + dr2 = VSUB(cr2, ci5); + di5 = VSUB(ci2, cr5); + di2 = VADD(ci2, cr5); + wr1=wa1[i], wi1=fsign*wa1[i+1], wr2=wa2[i], wi2=fsign*wa2[i+1]; + wr3=wa3[i], wi3=fsign*wa3[i+1], wr4=wa4[i], wi4=fsign*wa4[i+1]; + VCPLXMUL(dr2, di2, LD_PS1(wr1), LD_PS1(wi1)); + ch_ref(i - 1, 2) = dr2; + ch_ref(i, 2) = di2; + VCPLXMUL(dr3, di3, LD_PS1(wr2), LD_PS1(wi2)); + ch_ref(i - 1, 3) = dr3; + ch_ref(i, 3) = di3; + VCPLXMUL(dr4, di4, LD_PS1(wr3), LD_PS1(wi3)); + ch_ref(i - 1, 4) = dr4; + ch_ref(i, 4) = di4; + VCPLXMUL(dr5, di5, LD_PS1(wr4), LD_PS1(wi4)); + ch_ref(i - 1, 5) = dr5; + ch_ref(i, 5) = di5; + } + } +#undef ch_ref +#undef cc_ref +} + +static NEVER_INLINE(void) radf2_ps(int ido, int l1, const v4sf * RESTRICT cc, v4sf * RESTRICT ch, const float *wa1) { + static const float minus_one = -1.f; + int i, k, l1ido = l1*ido; + for (k=0; k < l1ido; k += ido) { + v4sf a = cc[k], b = cc[k + l1ido]; + ch[2*k] = VADD(a, b); + ch[2*(k+ido)-1] = VSUB(a, b); + } + if (ido < 2) return; + if (ido != 2) { + for (k=0; k < l1ido; k += ido) { + for (i=2; i 5) { + wa[i1-1] = wa[i-1]; + wa[i1] = wa[i]; + } + } + l1 = l2; + } +} /* cffti1 */ + + +v4sf *cfftf1_ps(int n, const v4sf *input_readonly, v4sf *work1, v4sf *work2, const float *wa, const int *ifac, int isign) { + v4sf *in = (v4sf*)input_readonly; + v4sf *out = (in == work2 ? work1 : work2); + int nf = ifac[1], k1; + int l1 = 1; + int iw = 0; + assert(in != out && work1 != work2); + for (k1=2; k1<=nf+1; k1++) { + int ip = ifac[k1]; + int l2 = ip*l1; + int ido = n / l2; + int idot = ido + ido; + switch (ip) { + case 5: { + int ix2 = iw + idot; + int ix3 = ix2 + idot; + int ix4 = ix3 + idot; + passf5_ps(idot, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); + } break; + case 4: { + int ix2 = iw + idot; + int ix3 = ix2 + idot; + passf4_ps(idot, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3], isign); + } break; + case 2: { + passf2_ps(idot, l1, in, out, &wa[iw], isign); + } break; + case 3: { + int ix2 = iw + idot; + passf3_ps(idot, l1, in, out, &wa[iw], &wa[ix2], isign); + } break; + default: + assert(0); + } + l1 = l2; + iw += (ip - 1)*idot; + if (out == work2) { + out = work1; in = work2; + } else { + out = work2; in = work1; + } + } + + return in; /* this is in fact the output .. */ +} + + +struct PFFFT_Setup { + int N; + int Ncvec; // nb of complex simd vectors (N/4 if PFFFT_COMPLEX, N/8 if PFFFT_REAL) + int ifac[15]; + pffft_transform_t transform; + v4sf *data; // allocated room for twiddle coefs + float *e; // points into 'data' , N/4*3 elements + float *twiddle; // points into 'data', N/4 elements +}; + +PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform) { + PFFFT_Setup *s = (PFFFT_Setup*)malloc(sizeof(PFFFT_Setup)); + int k, m; + /* unfortunately, the fft size must be a multiple of 16 for complex FFTs + and 32 for real FFTs -- a lot of stuff would need to be rewritten to + handle other cases (or maybe just switch to a scalar fft, I don't know..) */ + if (transform == PFFFT_REAL) { assert((N%(2*SIMD_SZ*SIMD_SZ))==0 && N>0); } + if (transform == PFFFT_COMPLEX) { assert((N%(SIMD_SZ*SIMD_SZ))==0 && N>0); } + //assert((N % 32) == 0); + s->N = N; + s->transform = transform; + /* nb of complex simd vectors */ + s->Ncvec = (transform == PFFFT_REAL ? N/2 : N)/SIMD_SZ; + s->data = (v4sf*)pffft_aligned_malloc(2*s->Ncvec * sizeof(v4sf)); + s->e = (float*)s->data; + s->twiddle = (float*)(s->data + (2*s->Ncvec*(SIMD_SZ-1))/SIMD_SZ); + + if (transform == PFFFT_REAL) { + for (k=0; k < s->Ncvec; ++k) { + int i = k/SIMD_SZ; + int j = k%SIMD_SZ; + for (m=0; m < SIMD_SZ-1; ++m) { + float A = -2*M_PI*(m+1)*k / N; + s->e[(2*(i*3 + m) + 0) * SIMD_SZ + j] = cos(A); + s->e[(2*(i*3 + m) + 1) * SIMD_SZ + j] = sin(A); + } + } + rffti1_ps(N/SIMD_SZ, s->twiddle, s->ifac); + } else { + for (k=0; k < s->Ncvec; ++k) { + int i = k/SIMD_SZ; + int j = k%SIMD_SZ; + for (m=0; m < SIMD_SZ-1; ++m) { + float A = -2*M_PI*(m+1)*k / N; + s->e[(2*(i*3 + m) + 0)*SIMD_SZ + j] = cos(A); + s->e[(2*(i*3 + m) + 1)*SIMD_SZ + j] = sin(A); + } + } + cffti1_ps(N/SIMD_SZ, s->twiddle, s->ifac); + } + + /* check that N is decomposable with allowed prime factors */ + for (k=0, m=1; k < s->ifac[1]; ++k) { m *= s->ifac[2+k]; } + if (m != N/SIMD_SZ) { + pffft_destroy_setup(s); s = 0; + } + + return s; +} + + +void pffft_destroy_setup(PFFFT_Setup *s) { + pffft_aligned_free(s->data); + free(s); +} + +#if !defined(PFFFT_SIMD_DISABLE) + +/* [0 0 1 2 3 4 5 6 7 8] -> [0 8 7 6 5 4 3 2 1] */ +static void reversed_copy(int N, const v4sf *in, int in_stride, v4sf *out) { + v4sf g0, g1; + int k; + INTERLEAVE2(in[0], in[1], g0, g1); in += in_stride; + + *--out = VSWAPHL(g0, g1); // [g0l, g0h], [g1l g1h] -> [g1l, g0h] + for (k=1; k < N; ++k) { + v4sf h0, h1; + INTERLEAVE2(in[0], in[1], h0, h1); in += in_stride; + *--out = VSWAPHL(g1, h0); + *--out = VSWAPHL(h0, h1); + g1 = h1; + } + *--out = VSWAPHL(g1, g0); +} + +static void unreversed_copy(int N, const v4sf *in, v4sf *out, int out_stride) { + v4sf g0, g1, h0, h1; + int k; + g0 = g1 = in[0]; ++in; + for (k=1; k < N; ++k) { + h0 = *in++; h1 = *in++; + g1 = VSWAPHL(g1, h0); + h0 = VSWAPHL(h0, h1); + UNINTERLEAVE2(h0, g1, out[0], out[1]); out += out_stride; + g1 = h1; + } + h0 = *in++; h1 = g0; + g1 = VSWAPHL(g1, h0); + h0 = VSWAPHL(h0, h1); + UNINTERLEAVE2(h0, g1, out[0], out[1]); +} + +void pffft_zreorder(PFFFT_Setup *setup, const float *in, float *out, pffft_direction_t direction) { + int k, N = setup->N, Ncvec = setup->Ncvec; + const v4sf *vin = (const v4sf*)in; + v4sf *vout = (v4sf*)out; + assert(in != out); + if (setup->transform == PFFFT_REAL) { + int k, dk = N/32; + if (direction == PFFFT_FORWARD) { + for (k=0; k < dk; ++k) { + INTERLEAVE2(vin[k*8 + 0], vin[k*8 + 1], vout[2*(0*dk + k) + 0], vout[2*(0*dk + k) + 1]); + INTERLEAVE2(vin[k*8 + 4], vin[k*8 + 5], vout[2*(2*dk + k) + 0], vout[2*(2*dk + k) + 1]); + } + reversed_copy(dk, vin+2, 8, (v4sf*)(out + N/2)); + reversed_copy(dk, vin+6, 8, (v4sf*)(out + N)); + } else { + for (k=0; k < dk; ++k) { + UNINTERLEAVE2(vin[2*(0*dk + k) + 0], vin[2*(0*dk + k) + 1], vout[k*8 + 0], vout[k*8 + 1]); + UNINTERLEAVE2(vin[2*(2*dk + k) + 0], vin[2*(2*dk + k) + 1], vout[k*8 + 4], vout[k*8 + 5]); + } + unreversed_copy(dk, (v4sf*)(in + N/4), (v4sf*)(out + N - 6*SIMD_SZ), -8); + unreversed_copy(dk, (v4sf*)(in + 3*N/4), (v4sf*)(out + N - 2*SIMD_SZ), -8); + } + } else { + if (direction == PFFFT_FORWARD) { + for (k=0; k < Ncvec; ++k) { + int kk = (k/4) + (k%4)*(Ncvec/4); + INTERLEAVE2(vin[k*2], vin[k*2+1], vout[kk*2], vout[kk*2+1]); + } + } else { + for (k=0; k < Ncvec; ++k) { + int kk = (k/4) + (k%4)*(Ncvec/4); + UNINTERLEAVE2(vin[kk*2], vin[kk*2+1], vout[k*2], vout[k*2+1]); + } + } + } +} + +void pffft_cplx_finalize(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) { + int k, dk = Ncvec/SIMD_SZ; // number of 4x4 matrix blocks + v4sf r0, i0, r1, i1, r2, i2, r3, i3; + v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1; + assert(in != out); + for (k=0; k < dk; ++k) { + r0 = in[8*k+0]; i0 = in[8*k+1]; + r1 = in[8*k+2]; i1 = in[8*k+3]; + r2 = in[8*k+4]; i2 = in[8*k+5]; + r3 = in[8*k+6]; i3 = in[8*k+7]; + VTRANSPOSE4(r0,r1,r2,r3); + VTRANSPOSE4(i0,i1,i2,i3); + VCPLXMUL(r1,i1,e[k*6+0],e[k*6+1]); + VCPLXMUL(r2,i2,e[k*6+2],e[k*6+3]); + VCPLXMUL(r3,i3,e[k*6+4],e[k*6+5]); + + sr0 = VADD(r0,r2); dr0 = VSUB(r0, r2); + sr1 = VADD(r1,r3); dr1 = VSUB(r1, r3); + si0 = VADD(i0,i2); di0 = VSUB(i0, i2); + si1 = VADD(i1,i3); di1 = VSUB(i1, i3); + + /* + transformation for each column is: + + [1 1 1 1 0 0 0 0] [r0] + [1 0 -1 0 0 -1 0 1] [r1] + [1 -1 1 -1 0 0 0 0] [r2] + [1 0 -1 0 0 1 0 -1] [r3] + [0 0 0 0 1 1 1 1] * [i0] + [0 1 0 -1 1 0 -1 0] [i1] + [0 0 0 0 1 -1 1 -1] [i2] + [0 -1 0 1 1 0 -1 0] [i3] + */ + + r0 = VADD(sr0, sr1); i0 = VADD(si0, si1); + r1 = VADD(dr0, di1); i1 = VSUB(di0, dr1); + r2 = VSUB(sr0, sr1); i2 = VSUB(si0, si1); + r3 = VSUB(dr0, di1); i3 = VADD(di0, dr1); + + *out++ = r0; *out++ = i0; *out++ = r1; *out++ = i1; + *out++ = r2; *out++ = i2; *out++ = r3; *out++ = i3; + } +} + +void pffft_cplx_preprocess(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) { + int k, dk = Ncvec/SIMD_SZ; // number of 4x4 matrix blocks + v4sf r0, i0, r1, i1, r2, i2, r3, i3; + v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1; + assert(in != out); + for (k=0; k < dk; ++k) { + r0 = in[8*k+0]; i0 = in[8*k+1]; + r1 = in[8*k+2]; i1 = in[8*k+3]; + r2 = in[8*k+4]; i2 = in[8*k+5]; + r3 = in[8*k+6]; i3 = in[8*k+7]; + + sr0 = VADD(r0,r2); dr0 = VSUB(r0, r2); + sr1 = VADD(r1,r3); dr1 = VSUB(r1, r3); + si0 = VADD(i0,i2); di0 = VSUB(i0, i2); + si1 = VADD(i1,i3); di1 = VSUB(i1, i3); + + r0 = VADD(sr0, sr1); i0 = VADD(si0, si1); + r1 = VSUB(dr0, di1); i1 = VADD(di0, dr1); + r2 = VSUB(sr0, sr1); i2 = VSUB(si0, si1); + r3 = VADD(dr0, di1); i3 = VSUB(di0, dr1); + + VCPLXMULCONJ(r1,i1,e[k*6+0],e[k*6+1]); + VCPLXMULCONJ(r2,i2,e[k*6+2],e[k*6+3]); + VCPLXMULCONJ(r3,i3,e[k*6+4],e[k*6+5]); + + VTRANSPOSE4(r0,r1,r2,r3); + VTRANSPOSE4(i0,i1,i2,i3); + + *out++ = r0; *out++ = i0; *out++ = r1; *out++ = i1; + *out++ = r2; *out++ = i2; *out++ = r3; *out++ = i3; + } +} + + +static ALWAYS_INLINE(void) pffft_real_finalize_4x4(const v4sf *in0, const v4sf *in1, const v4sf *in, + const v4sf *e, v4sf *out) { + v4sf r0, i0, r1, i1, r2, i2, r3, i3; + v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1; + r0 = *in0; i0 = *in1; + r1 = *in++; i1 = *in++; r2 = *in++; i2 = *in++; r3 = *in++; i3 = *in++; + VTRANSPOSE4(r0,r1,r2,r3); + VTRANSPOSE4(i0,i1,i2,i3); + + /* + transformation for each column is: + + [1 1 1 1 0 0 0 0] [r0] + [1 0 -1 0 0 -1 0 1] [r1] + [1 0 -1 0 0 1 0 -1] [r2] + [1 -1 1 -1 0 0 0 0] [r3] + [0 0 0 0 1 1 1 1] * [i0] + [0 -1 0 1 -1 0 1 0] [i1] + [0 -1 0 1 1 0 -1 0] [i2] + [0 0 0 0 -1 1 -1 1] [i3] + */ + + //cerr << "matrix initial, before e , REAL:\n 1: " << r0 << "\n 1: " << r1 << "\n 1: " << r2 << "\n 1: " << r3 << "\n"; + //cerr << "matrix initial, before e, IMAG :\n 1: " << i0 << "\n 1: " << i1 << "\n 1: " << i2 << "\n 1: " << i3 << "\n"; + + VCPLXMUL(r1,i1,e[0],e[1]); + VCPLXMUL(r2,i2,e[2],e[3]); + VCPLXMUL(r3,i3,e[4],e[5]); + + //cerr << "matrix initial, real part:\n 1: " << r0 << "\n 1: " << r1 << "\n 1: " << r2 << "\n 1: " << r3 << "\n"; + //cerr << "matrix initial, imag part:\n 1: " << i0 << "\n 1: " << i1 << "\n 1: " << i2 << "\n 1: " << i3 << "\n"; + + sr0 = VADD(r0,r2); dr0 = VSUB(r0,r2); + sr1 = VADD(r1,r3); dr1 = VSUB(r3,r1); + si0 = VADD(i0,i2); di0 = VSUB(i0,i2); + si1 = VADD(i1,i3); di1 = VSUB(i3,i1); + + r0 = VADD(sr0, sr1); + r3 = VSUB(sr0, sr1); + i0 = VADD(si0, si1); + i3 = VSUB(si1, si0); + r1 = VADD(dr0, di1); + r2 = VSUB(dr0, di1); + i1 = VSUB(dr1, di0); + i2 = VADD(dr1, di0); + + *out++ = r0; + *out++ = i0; + *out++ = r1; + *out++ = i1; + *out++ = r2; + *out++ = i2; + *out++ = r3; + *out++ = i3; + +} + +static NEVER_INLINE(void) pffft_real_finalize(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) { + int k, dk = Ncvec/SIMD_SZ; // number of 4x4 matrix blocks + /* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */ + + v4sf_union cr, ci, *uout = (v4sf_union*)out; + v4sf save = in[7], zero=VZERO(); + float xr0, xi0, xr1, xi1, xr2, xi2, xr3, xi3; + static const float s = M_SQRT2/2; + + cr.v = in[0]; ci.v = in[Ncvec*2-1]; + assert(in != out); + pffft_real_finalize_4x4(&zero, &zero, in+1, e, out); + + /* + [cr0 cr1 cr2 cr3 ci0 ci1 ci2 ci3] + + [Xr(1)] ] [1 1 1 1 0 0 0 0] + [Xr(N/4) ] [0 0 0 0 1 s 0 -s] + [Xr(N/2) ] [1 0 -1 0 0 0 0 0] + [Xr(3N/4)] [0 0 0 0 1 -s 0 s] + [Xi(1) ] [1 -1 1 -1 0 0 0 0] + [Xi(N/4) ] [0 0 0 0 0 -s -1 -s] + [Xi(N/2) ] [0 -1 0 1 0 0 0 0] + [Xi(3N/4)] [0 0 0 0 0 -s 1 -s] + */ + + xr0=(cr.f[0]+cr.f[2]) + (cr.f[1]+cr.f[3]); uout[0].f[0] = xr0; + xi0=(cr.f[0]+cr.f[2]) - (cr.f[1]+cr.f[3]); uout[1].f[0] = xi0; + xr2=(cr.f[0]-cr.f[2]); uout[4].f[0] = xr2; + xi2=(cr.f[3]-cr.f[1]); uout[5].f[0] = xi2; + xr1= ci.f[0] + s*(ci.f[1]-ci.f[3]); uout[2].f[0] = xr1; + xi1=-ci.f[2] - s*(ci.f[1]+ci.f[3]); uout[3].f[0] = xi1; + xr3= ci.f[0] - s*(ci.f[1]-ci.f[3]); uout[6].f[0] = xr3; + xi3= ci.f[2] - s*(ci.f[1]+ci.f[3]); uout[7].f[0] = xi3; + + for (k=1; k < dk; ++k) { + v4sf save_next = in[8*k+7]; + pffft_real_finalize_4x4(&save, &in[8*k+0], in + 8*k+1, + e + k*6, out + k*8); + save = save_next; + } + +} + +static ALWAYS_INLINE(void) pffft_real_preprocess_4x4(const v4sf *in, + const v4sf *e, v4sf *out, int first) { + v4sf r0=in[0], i0=in[1], r1=in[2], i1=in[3], r2=in[4], i2=in[5], r3=in[6], i3=in[7]; + /* + transformation for each column is: + + [1 1 1 1 0 0 0 0] [r0] + [1 0 0 -1 0 -1 -1 0] [r1] + [1 -1 -1 1 0 0 0 0] [r2] + [1 0 0 -1 0 1 1 0] [r3] + [0 0 0 0 1 -1 1 -1] * [i0] + [0 -1 1 0 1 0 0 1] [i1] + [0 0 0 0 1 1 -1 -1] [i2] + [0 1 -1 0 1 0 0 1] [i3] + */ + + v4sf sr0 = VADD(r0,r3), dr0 = VSUB(r0,r3); + v4sf sr1 = VADD(r1,r2), dr1 = VSUB(r1,r2); + v4sf si0 = VADD(i0,i3), di0 = VSUB(i0,i3); + v4sf si1 = VADD(i1,i2), di1 = VSUB(i1,i2); + + r0 = VADD(sr0, sr1); + r2 = VSUB(sr0, sr1); + r1 = VSUB(dr0, si1); + r3 = VADD(dr0, si1); + i0 = VSUB(di0, di1); + i2 = VADD(di0, di1); + i1 = VSUB(si0, dr1); + i3 = VADD(si0, dr1); + + VCPLXMULCONJ(r1,i1,e[0],e[1]); + VCPLXMULCONJ(r2,i2,e[2],e[3]); + VCPLXMULCONJ(r3,i3,e[4],e[5]); + + VTRANSPOSE4(r0,r1,r2,r3); + VTRANSPOSE4(i0,i1,i2,i3); + + if (!first) { + *out++ = r0; + *out++ = i0; + } + *out++ = r1; + *out++ = i1; + *out++ = r2; + *out++ = i2; + *out++ = r3; + *out++ = i3; +} + +static NEVER_INLINE(void) pffft_real_preprocess(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) { + int k, dk = Ncvec/SIMD_SZ; // number of 4x4 matrix blocks + /* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */ + + v4sf_union Xr, Xi, *uout = (v4sf_union*)out; + float cr0, ci0, cr1, ci1, cr2, ci2, cr3, ci3; + static const float s = M_SQRT2; + assert(in != out); + for (k=0; k < 4; ++k) { + Xr.f[k] = ((float*)in)[8*k]; + Xi.f[k] = ((float*)in)[8*k+4]; + } + + pffft_real_preprocess_4x4(in, e, out+1, 1); // will write only 6 values + + /* + [Xr0 Xr1 Xr2 Xr3 Xi0 Xi1 Xi2 Xi3] + + [cr0] [1 0 2 0 1 0 0 0] + [cr1] [1 0 0 0 -1 0 -2 0] + [cr2] [1 0 -2 0 1 0 0 0] + [cr3] [1 0 0 0 -1 0 2 0] + [ci0] [0 2 0 2 0 0 0 0] + [ci1] [0 s 0 -s 0 -s 0 -s] + [ci2] [0 0 0 0 0 -2 0 2] + [ci3] [0 -s 0 s 0 -s 0 -s] + */ + for (k=1; k < dk; ++k) { + pffft_real_preprocess_4x4(in+8*k, e + k*6, out-1+k*8, 0); + } + + cr0=(Xr.f[0]+Xi.f[0]) + 2*Xr.f[2]; uout[0].f[0] = cr0; + cr1=(Xr.f[0]-Xi.f[0]) - 2*Xi.f[2]; uout[0].f[1] = cr1; + cr2=(Xr.f[0]+Xi.f[0]) - 2*Xr.f[2]; uout[0].f[2] = cr2; + cr3=(Xr.f[0]-Xi.f[0]) + 2*Xi.f[2]; uout[0].f[3] = cr3; + ci0= 2*(Xr.f[1]+Xr.f[3]); uout[2*Ncvec-1].f[0] = ci0; + ci1= s*(Xr.f[1]-Xr.f[3]) - s*(Xi.f[1]+Xi.f[3]); uout[2*Ncvec-1].f[1] = ci1; + ci2= 2*(Xi.f[3]-Xi.f[1]); uout[2*Ncvec-1].f[2] = ci2; + ci3=-s*(Xr.f[1]-Xr.f[3]) - s*(Xi.f[1]+Xi.f[3]); uout[2*Ncvec-1].f[3] = ci3; +} + + +void pffft_transform_internal(PFFFT_Setup *setup, const float *finput, float *foutput, v4sf *scratch, + pffft_direction_t direction, int ordered) { + int k, Ncvec = setup->Ncvec; + int nf_odd = (setup->ifac[1] & 1); + + // temporary buffer is allocated on the stack if the scratch pointer is NULL + int stack_allocate = (scratch == 0 ? Ncvec*2 : 1); + VLA_ARRAY_ON_STACK(v4sf, scratch_on_stack, stack_allocate); + + const v4sf *vinput = (const v4sf*)finput; + v4sf *voutput = (v4sf*)foutput; + v4sf *buff[2] = { voutput, scratch ? scratch : scratch_on_stack }; + int ib = (nf_odd ^ ordered ? 1 : 0); + + assert(VALIGNED(finput) && VALIGNED(foutput)); + + //assert(finput != foutput); + if (direction == PFFFT_FORWARD) { + ib = !ib; + if (setup->transform == PFFFT_REAL) { + ib = (rfftf1_ps(Ncvec*2, vinput, buff[ib], buff[!ib], + setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1); + pffft_real_finalize(Ncvec, buff[ib], buff[!ib], (v4sf*)setup->e); + } else { + v4sf *tmp = buff[ib]; + for (k=0; k < Ncvec; ++k) { + UNINTERLEAVE2(vinput[k*2], vinput[k*2+1], tmp[k*2], tmp[k*2+1]); + } + ib = (cfftf1_ps(Ncvec, buff[ib], buff[!ib], buff[ib], + setup->twiddle, &setup->ifac[0], -1) == buff[0] ? 0 : 1); + pffft_cplx_finalize(Ncvec, buff[ib], buff[!ib], (v4sf*)setup->e); + } + if (ordered) { + pffft_zreorder(setup, (float*)buff[!ib], (float*)buff[ib], PFFFT_FORWARD); + } else ib = !ib; + } else { + if (vinput == buff[ib]) { + ib = !ib; // may happen when finput == foutput + } + if (ordered) { + pffft_zreorder(setup, (float*)vinput, (float*)buff[ib], PFFFT_BACKWARD); + vinput = buff[ib]; ib = !ib; + } + if (setup->transform == PFFFT_REAL) { + pffft_real_preprocess(Ncvec, vinput, buff[ib], (v4sf*)setup->e); + ib = (rfftb1_ps(Ncvec*2, buff[ib], buff[0], buff[1], + setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1); + } else { + pffft_cplx_preprocess(Ncvec, vinput, buff[ib], (v4sf*)setup->e); + ib = (cfftf1_ps(Ncvec, buff[ib], buff[0], buff[1], + setup->twiddle, &setup->ifac[0], +1) == buff[0] ? 0 : 1); + for (k=0; k < Ncvec; ++k) { + INTERLEAVE2(buff[ib][k*2], buff[ib][k*2+1], buff[ib][k*2], buff[ib][k*2+1]); + } + } + } + + if (buff[ib] != voutput) { + /* extra copy required -- this situation should only happen when finput == foutput */ + assert(finput==foutput); + for (k=0; k < Ncvec; ++k) { + v4sf a = buff[ib][2*k], b = buff[ib][2*k+1]; + voutput[2*k] = a; voutput[2*k+1] = b; + } + ib = !ib; + } + assert(buff[ib] == voutput); +} + +void pffft_zconvolve_accumulate(PFFFT_Setup *s, const float *a, const float *b, float *ab, float scaling) { + int Ncvec = s->Ncvec; + const v4sf * RESTRICT va = (const v4sf*)a; + const v4sf * RESTRICT vb = (const v4sf*)b; + v4sf * RESTRICT vab = (v4sf*)ab; + +#ifdef __arm__ + __builtin_prefetch(va); + __builtin_prefetch(vb); + __builtin_prefetch(vab); + __builtin_prefetch(va+2); + __builtin_prefetch(vb+2); + __builtin_prefetch(vab+2); + __builtin_prefetch(va+4); + __builtin_prefetch(vb+4); + __builtin_prefetch(vab+4); + __builtin_prefetch(va+6); + __builtin_prefetch(vb+6); + __builtin_prefetch(vab+6); +# ifndef __clang__ +# define ZCONVOLVE_USING_INLINE_NEON_ASM +# endif +#endif + + float ar, ai, br, bi, abr, abi; +#ifndef ZCONVOLVE_USING_INLINE_ASM + v4sf vscal = LD_PS1(scaling); + int i; +#endif + + assert(VALIGNED(a) && VALIGNED(b) && VALIGNED(ab)); + ar = ((v4sf_union*)va)[0].f[0]; + ai = ((v4sf_union*)va)[1].f[0]; + br = ((v4sf_union*)vb)[0].f[0]; + bi = ((v4sf_union*)vb)[1].f[0]; + abr = ((v4sf_union*)vab)[0].f[0]; + abi = ((v4sf_union*)vab)[1].f[0]; + +#ifdef ZCONVOLVE_USING_INLINE_ASM // inline asm version, unfortunately miscompiled by clang 3.2, at least on ubuntu.. so this will be restricted to gcc + const float *a_ = a, *b_ = b; float *ab_ = ab; + int N = Ncvec; + asm volatile("mov r8, %2 \n" + "vdup.f32 q15, %4 \n" + "1: \n" + "pld [%0,#64] \n" + "pld [%1,#64] \n" + "pld [%2,#64] \n" + "pld [%0,#96] \n" + "pld [%1,#96] \n" + "pld [%2,#96] \n" + "vld1.f32 {q0,q1}, [%0,:128]! \n" + "vld1.f32 {q4,q5}, [%1,:128]! \n" + "vld1.f32 {q2,q3}, [%0,:128]! \n" + "vld1.f32 {q6,q7}, [%1,:128]! \n" + "vld1.f32 {q8,q9}, [r8,:128]! \n" + + "vmul.f32 q10, q0, q4 \n" + "vmul.f32 q11, q0, q5 \n" + "vmul.f32 q12, q2, q6 \n" + "vmul.f32 q13, q2, q7 \n" + "vmls.f32 q10, q1, q5 \n" + "vmla.f32 q11, q1, q4 \n" + "vld1.f32 {q0,q1}, [r8,:128]! \n" + "vmls.f32 q12, q3, q7 \n" + "vmla.f32 q13, q3, q6 \n" + "vmla.f32 q8, q10, q15 \n" + "vmla.f32 q9, q11, q15 \n" + "vmla.f32 q0, q12, q15 \n" + "vmla.f32 q1, q13, q15 \n" + "vst1.f32 {q8,q9},[%2,:128]! \n" + "vst1.f32 {q0,q1},[%2,:128]! \n" + "subs %3, #2 \n" + "bne 1b \n" + : "+r"(a_), "+r"(b_), "+r"(ab_), "+r"(N) : "r"(scaling) : "r8", "q0","q1","q2","q3","q4","q5","q6","q7","q8","q9", "q10","q11","q12","q13","q15","memory"); +#else // default routine, works fine for non-arm cpus with current compilers + for (i=0; i < Ncvec; i += 2) { + v4sf ar, ai, br, bi; + ar = va[2*i+0]; ai = va[2*i+1]; + br = vb[2*i+0]; bi = vb[2*i+1]; + VCPLXMUL(ar, ai, br, bi); + vab[2*i+0] = VMADD(ar, vscal, vab[2*i+0]); + vab[2*i+1] = VMADD(ai, vscal, vab[2*i+1]); + ar = va[2*i+2]; ai = va[2*i+3]; + br = vb[2*i+2]; bi = vb[2*i+3]; + VCPLXMUL(ar, ai, br, bi); + vab[2*i+2] = VMADD(ar, vscal, vab[2*i+2]); + vab[2*i+3] = VMADD(ai, vscal, vab[2*i+3]); + } +#endif + if (s->transform == PFFFT_REAL) { + ((v4sf_union*)vab)[0].f[0] = abr + ar*br*scaling; + ((v4sf_union*)vab)[1].f[0] = abi + ai*bi*scaling; + } +} + + +#else // defined(PFFFT_SIMD_DISABLE) + +// standard routine using scalar floats, without SIMD stuff. + +#define pffft_zreorder_nosimd pffft_zreorder +void pffft_zreorder_nosimd(PFFFT_Setup *setup, const float *in, float *out, pffft_direction_t direction) { + int k, N = setup->N; + if (setup->transform == PFFFT_COMPLEX) { + for (k=0; k < 2*N; ++k) out[k] = in[k]; + return; + } + else if (direction == PFFFT_FORWARD) { + float x_N = in[N-1]; + for (k=N-1; k > 1; --k) out[k] = in[k-1]; + out[0] = in[0]; + out[1] = x_N; + } else { + float x_N = in[1]; + for (k=1; k < N-1; ++k) out[k] = in[k+1]; + out[0] = in[0]; + out[N-1] = x_N; + } +} + +#define pffft_transform_internal_nosimd pffft_transform_internal +void pffft_transform_internal_nosimd(PFFFT_Setup *setup, const float *input, float *output, float *scratch, + pffft_direction_t direction, int ordered) { + int Ncvec = setup->Ncvec; + int nf_odd = (setup->ifac[1] & 1); + + // temporary buffer is allocated on the stack if the scratch pointer is NULL + int stack_allocate = (scratch == 0 ? Ncvec*2 : 1); + VLA_ARRAY_ON_STACK(v4sf, scratch_on_stack, stack_allocate); + float *buff[2]; + int ib; + if (scratch == 0) scratch = scratch_on_stack; + buff[0] = output; buff[1] = scratch; + + if (setup->transform == PFFFT_COMPLEX) ordered = 0; // it is always ordered. + ib = (nf_odd ^ ordered ? 1 : 0); + + if (direction == PFFFT_FORWARD) { + if (setup->transform == PFFFT_REAL) { + ib = (rfftf1_ps(Ncvec*2, input, buff[ib], buff[!ib], + setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1); + } else { + ib = (cfftf1_ps(Ncvec, input, buff[ib], buff[!ib], + setup->twiddle, &setup->ifac[0], -1) == buff[0] ? 0 : 1); + } + if (ordered) { + pffft_zreorder(setup, buff[ib], buff[!ib], PFFFT_FORWARD); ib = !ib; + } + } else { + if (input == buff[ib]) { + ib = !ib; // may happen when finput == foutput + } + if (ordered) { + pffft_zreorder(setup, input, buff[!ib], PFFFT_BACKWARD); + input = buff[!ib]; + } + if (setup->transform == PFFFT_REAL) { + ib = (rfftb1_ps(Ncvec*2, input, buff[ib], buff[!ib], + setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1); + } else { + ib = (cfftf1_ps(Ncvec, input, buff[ib], buff[!ib], + setup->twiddle, &setup->ifac[0], +1) == buff[0] ? 0 : 1); + } + } + if (buff[ib] != output) { + int k; + // extra copy required -- this situation should happens only when finput == foutput + assert(input==output); + for (k=0; k < Ncvec; ++k) { + float a = buff[ib][2*k], b = buff[ib][2*k+1]; + output[2*k] = a; output[2*k+1] = b; + } + ib = !ib; + } + assert(buff[ib] == output); +} + +#define pffft_zconvolve_accumulate_nosimd pffft_zconvolve_accumulate +void pffft_zconvolve_accumulate_nosimd(PFFFT_Setup *s, const float *a, const float *b, + float *ab, float scaling) { + int i, Ncvec = s->Ncvec; + + if (s->transform == PFFFT_REAL) { + // take care of the fftpack ordering + ab[0] += a[0]*b[0]*scaling; + ab[2*Ncvec-1] += a[2*Ncvec-1]*b[2*Ncvec-1]*scaling; + ++ab; ++a; ++b; --Ncvec; + } + for (i=0; i < Ncvec; ++i) { + float ar, ai, br, bi; + ar = a[2*i+0]; ai = a[2*i+1]; + br = b[2*i+0]; bi = b[2*i+1]; + VCPLXMUL(ar, ai, br, bi); + ab[2*i+0] += ar*scaling; + ab[2*i+1] += ai*scaling; + } +} + +#endif // defined(PFFFT_SIMD_DISABLE) + +void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction) { + pffft_transform_internal(setup, input, output, (v4sf*)work, direction, 0); +} + +void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction) { + pffft_transform_internal(setup, input, output, (v4sf*)work, direction, 1); +} diff --git a/oss-internship-2020/pffft/pffft.h b/oss-internship-2020/pffft/pffft.h new file mode 100644 index 0000000..2bfa7b3 --- /dev/null +++ b/oss-internship-2020/pffft/pffft.h @@ -0,0 +1,177 @@ +/* Copyright (c) 2013 Julien Pommier ( pommier@modartt.com ) + + Based on original fortran 77 code from FFTPACKv4 from NETLIB, + authored by Dr Paul Swarztrauber of NCAR, in 1985. + + As confirmed by the NCAR fftpack software curators, the following + FFTPACKv5 license applies to FFTPACKv4 sources. My changes are + released under the same terms. + + FFTPACK license: + + http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html + + Copyright (c) 2004 the University Corporation for Atmospheric + Research ("UCAR"). All rights reserved. Developed by NCAR's + Computational and Information Systems Laboratory, UCAR, + www.cisl.ucar.edu. + + Redistribution and use of the Software in source and binary forms, + with or without modification, is permitted provided that the + following conditions are met: + + - Neither the names of NCAR's Computational and Information Systems + Laboratory, the University Corporation for Atmospheric Research, + nor the names of its sponsors or contributors may be used to + endorse or promote products derived from this Software without + specific prior written permission. + + - Redistributions of source code must retain the above copyright + notices, this list of conditions, and the disclaimer below. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the disclaimer below in the + documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE + SOFTWARE. +*/ + +/* + PFFFT : a Pretty Fast FFT. + + This is basically an adaptation of the single precision fftpack + (v4) as found on netlib taking advantage of SIMD instruction found + on cpus such as intel x86 (SSE1), powerpc (Altivec), and arm (NEON). + + For architectures where no SIMD instruction is available, the code + falls back to a scalar version. + + Restrictions: + + - 1D transforms only, with 32-bit single precision. + + - supports only transforms for inputs of length N of the form + N=(2^a)*(3^b)*(5^c), a >= 5, b >=0, c >= 0 (32, 48, 64, 96, 128, + 144, 160, etc are all acceptable lengths). Performance is best for + 128<=N<=8192. + + - all (float*) pointers in the functions below are expected to + have an "simd-compatible" alignment, that is 16 bytes on x86 and + powerpc CPUs. + + You can allocate such buffers with the functions + pffft_aligned_malloc / pffft_aligned_free (or with stuff like + posix_memalign..) + +*/ + +#ifndef PFFFT_H +#define PFFFT_H + +#include // for size_t + +#ifdef __cplusplus +extern "C" { +#endif + + /* opaque struct holding internal stuff (precomputed twiddle factors) + this struct can be shared by many threads as it contains only + read-only data. + */ + typedef struct PFFFT_Setup PFFFT_Setup; + + /* direction of the transform */ + typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t; + + /* type of transform */ + typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t; + + /* + prepare for performing transforms of size N -- the returned + PFFFT_Setup structure is read-only so it can safely be shared by + multiple concurrent threads. + */ + PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform); + void pffft_destroy_setup(PFFFT_Setup *); + /* + Perform a Fourier transform , The z-domain data is stored in the + most efficient order for transforming it back, or using it for + convolution. If you need to have its content sorted in the + "usual" way, that is as an array of interleaved complex numbers, + either use pffft_transform_ordered , or call pffft_zreorder after + the forward fft, and before the backward fft. + + Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x. + Typically you will want to scale the backward transform by 1/N. + + The 'work' pointer should point to an area of N (2*N for complex + fft) floats, properly aligned. If 'work' is NULL, then stack will + be used instead (this is probably the best strategy for small + FFTs, say for N < 16384). + + input and output may alias. + */ + void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction); + + /* + Similar to pffft_transform, but makes sure that the output is + ordered as expected (interleaved complex numbers). This is + similar to calling pffft_transform and then pffft_zreorder. + + input and output may alias. + */ + void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction); + + /* + call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(..., + PFFFT_FORWARD) if you want to have the frequency components in + the correct "canonical" order, as interleaved complex numbers. + + (for real transforms, both 0-frequency and half frequency + components, which are real, are assembled in the first entry as + F(0)+i*F(n/2+1). Note that the original fftpack did place + F(n/2+1) at the end of the arrays). + + input and output should not alias. + */ + void pffft_zreorder(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction); + + /* + Perform a multiplication of the frequency components of dft_a and + dft_b and accumulate them into dft_ab. The arrays should have + been obtained with pffft_transform(.., PFFFT_FORWARD) and should + *not* have been reordered with pffft_zreorder (otherwise just + perform the operation yourself as the dft coefs are stored as + interleaved complex numbers). + + the operation performed is: dft_ab += (dft_a * fdt_b)*scaling + + The dft_a, dft_b and dft_ab pointers may alias. + */ + void pffft_zconvolve_accumulate(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling); + + /* + the float buffers must have the correct alignment (16-byte boundary + on intel and powerpc). This function may be used to obtain such + correctly aligned buffers. + */ + void *pffft_aligned_malloc(size_t nb_bytes); + void pffft_aligned_free(void *); + + /* return 4 or 1 wether support SSE/Altivec instructions was enable when building pffft.c */ + int pffft_simd_size(); + +#ifdef __cplusplus +} +#endif + +#endif // PFFFT_H diff --git a/oss-internship-2020/pffft/test_pffft.c b/oss-internship-2020/pffft/test_pffft.c new file mode 100644 index 0000000..a5d20c2 --- /dev/null +++ b/oss-internship-2020/pffft/test_pffft.c @@ -0,0 +1,419 @@ +/* + Copyright (c) 2013 Julien Pommier. + + Small test & bench for PFFFT, comparing its performance with the scalar FFTPACK, FFTW, and Apple vDSP + + How to build: + + on linux, with fftw3: + gcc -o test_pffft -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm + + on macos, without fftw3: + clang -o test_pffft -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -framework Accelerate + + on macos, with fftw3: + clang -o test_pffft -DHAVE_FFTW -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -framework Accelerate + + on windows, with visual c++: + cl /Ox -D_USE_MATH_DEFINES /arch:SSE test_pffft.c pffft.c fftpack.c + + build without SIMD instructions: + gcc -o test_pffft -DPFFFT_SIMD_DISABLE -O3 -Wall -W pffft.c test_pffft.c fftpack.c -lm + + */ + +#include "pffft.h" +#include "fftpack.h" + +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_SYS_TIMES +# include +# include +#endif + +#ifdef HAVE_VECLIB +# include +#endif + +#ifdef HAVE_FFTW +# include +#endif + +#define MAX(x,y) ((x)>(y)?(x):(y)) + +double frand() { + return rand()/(double)RAND_MAX; +} + +#if defined(HAVE_SYS_TIMES) + inline double uclock_sec(void) { + static double ttclk = 0.; + if (ttclk == 0.) ttclk = sysconf(_SC_CLK_TCK); + struct tms t; return ((double)times(&t)) / ttclk; + } +# else + double uclock_sec(void) +{ return (double)clock()/(double)CLOCKS_PER_SEC; } +#endif + + +/* compare results with the regular fftpack */ +void pffft_validate_N(int N, int cplx) { + int Nfloat = N*(cplx?2:1); + int Nbytes = Nfloat * sizeof(float); + float *ref, *in, *out, *tmp, *tmp2; + PFFFT_Setup *s = pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); + int pass; + + if (!s) { printf("Skipping N=%d, not supported\n", N); return; } + ref = pffft_aligned_malloc(Nbytes); + in = pffft_aligned_malloc(Nbytes); + out = pffft_aligned_malloc(Nbytes); + tmp = pffft_aligned_malloc(Nbytes); + tmp2 = pffft_aligned_malloc(Nbytes); + + for (pass=0; pass < 2; ++pass) { + float ref_max = 0; + int k; + //printf("N=%d pass=%d cplx=%d\n", N, pass, cplx); + // compute reference solution with FFTPACK + if (pass == 0) { + float *wrk = malloc(2*Nbytes+15*sizeof(float)); + for (k=0; k < Nfloat; ++k) { + ref[k] = in[k] = frand()*2-1; + out[k] = 1e30; + } + if (!cplx) { + rffti(N, wrk); + rfftf(N, ref, wrk); + // use our ordering for real ffts instead of the one of fftpack + { + float refN=ref[N-1]; + for (k=N-2; k >= 1; --k) ref[k+1] = ref[k]; + ref[1] = refN; + } + } else { + cffti(N, wrk); + cfftf(N, ref, wrk); + } + free(wrk); + } + + for (k = 0; k < Nfloat; ++k) ref_max = MAX(ref_max, fabs(ref[k])); + + + // pass 0 : non canonical ordering of transform coefficients + if (pass == 0) { + // test forward transform, with different input / output + pffft_transform(s, in, tmp, 0, PFFFT_FORWARD); + memcpy(tmp2, tmp, Nbytes); + memcpy(tmp, in, Nbytes); + pffft_transform(s, tmp, tmp, 0, PFFFT_FORWARD); + for (k = 0; k < Nfloat; ++k) { + assert(tmp2[k] == tmp[k]); + } + + // test reordering + pffft_zreorder(s, tmp, out, PFFFT_FORWARD); + pffft_zreorder(s, out, tmp, PFFFT_BACKWARD); + for (k = 0; k < Nfloat; ++k) { + assert(tmp2[k] == tmp[k]); + } + pffft_zreorder(s, tmp, out, PFFFT_FORWARD); + } else { + // pass 1 : canonical ordering of transform coeffs. + pffft_transform_ordered(s, in, tmp, 0, PFFFT_FORWARD); + memcpy(tmp2, tmp, Nbytes); + memcpy(tmp, in, Nbytes); + pffft_transform_ordered(s, tmp, tmp, 0, PFFFT_FORWARD); + for (k = 0; k < Nfloat; ++k) { + assert(tmp2[k] == tmp[k]); + } + memcpy(out, tmp, Nbytes); + } + + { + for (k=0; k < Nfloat; ++k) { + if (!(fabs(ref[k] - out[k]) < 1e-3*ref_max)) { + printf("%s forward PFFFT mismatch found for N=%d\n", (cplx?"CPLX":"REAL"), N); + exit(1); + } + } + + if (pass == 0) pffft_transform(s, tmp, out, 0, PFFFT_BACKWARD); + else pffft_transform_ordered(s, tmp, out, 0, PFFFT_BACKWARD); + memcpy(tmp2, out, Nbytes); + memcpy(out, tmp, Nbytes); + if (pass == 0) pffft_transform(s, out, out, 0, PFFFT_BACKWARD); + else pffft_transform_ordered(s, out, out, 0, PFFFT_BACKWARD); + for (k = 0; k < Nfloat; ++k) { + assert(tmp2[k] == out[k]); + out[k] *= 1.f/N; + } + for (k = 0; k < Nfloat; ++k) { + if (fabs(in[k] - out[k]) > 1e-3 * ref_max) { + printf("pass=%d, %s IFFFT does not match for N=%d\n", pass, (cplx?"CPLX":"REAL"), N); break; + exit(1); + } + } + } + + // quick test of the circular convolution in fft domain + { + float conv_err = 0, conv_max = 0; + + pffft_zreorder(s, ref, tmp, PFFFT_FORWARD); + memset(out, 0, Nbytes); + pffft_zconvolve_accumulate(s, ref, ref, out, 1.0); + pffft_zreorder(s, out, tmp2, PFFFT_FORWARD); + + for (k=0; k < Nfloat; k += 2) { + float ar = tmp[k], ai=tmp[k+1]; + if (cplx || k > 0) { + tmp[k] = ar*ar - ai*ai; + tmp[k+1] = 2*ar*ai; + } else { + tmp[0] = ar*ar; + tmp[1] = ai*ai; + } + } + + for (k=0; k < Nfloat; ++k) { + float d = fabs(tmp[k] - tmp2[k]), e = fabs(tmp[k]); + if (d > conv_err) conv_err = d; + if (e > conv_max) conv_max = e; + } + if (conv_err > 1e-5*conv_max) { + printf("zconvolve error ? %g %g\n", conv_err, conv_max); exit(1); + } + } + + } + + printf("%s PFFFT is OK for N=%d\n", (cplx?"CPLX":"REAL"), N); fflush(stdout); + + pffft_destroy_setup(s); + pffft_aligned_free(ref); + pffft_aligned_free(in); + pffft_aligned_free(out); + pffft_aligned_free(tmp); + pffft_aligned_free(tmp2); +} + +void pffft_validate(int cplx) { + static int Ntest[] = { 16, 32, 64, 96, 128, 160, 192, 256, 288, 384, 5*96, 512, 576, 5*128, 800, 864, 1024, 2048, 2592, 4000, 4096, 12000, 36864, 0}; + int k; + for (k = 0; Ntest[k]; ++k) { + int N = Ntest[k]; + if (N == 16 && !cplx) continue; + pffft_validate_N(N, cplx); + } +} + +int array_output_format = 0; + +void show_output(const char *name, int N, int cplx, float flops, float t0, float t1, int max_iter) { + float mflops = flops/1e6/(t1 - t0 + 1e-16); + if (array_output_format) { + if (flops != -1) { + printf("|%9.0f ", mflops); + } else printf("| n/a "); + } else { + if (flops != -1) { + printf("N=%5d, %s %16s : %6.0f MFlops [t=%6.0f ns, %d runs]\n", N, (cplx?"CPLX":"REAL"), name, mflops, (t1-t0)/2/max_iter * 1e9, max_iter); + } + } + fflush(stdout); +} + +void benchmark_ffts(int N, int cplx) { + int Nfloat = (cplx ? N*2 : N); + int Nbytes = Nfloat * sizeof(float); + float *X = pffft_aligned_malloc(Nbytes), *Y = pffft_aligned_malloc(Nbytes), *Z = pffft_aligned_malloc(Nbytes); + + double t0, t1, flops; + + int k; + int max_iter = 5120000/N*4; +#ifdef __arm__ + max_iter /= 4; +#endif + int iter; + + for (k = 0; k < Nfloat; ++k) { + X[k] = 0; //sqrtf(k+1); + } + + // FFTPack benchmark + { + float *wrk = malloc(2*Nbytes + 15*sizeof(float)); + int max_iter_ = max_iter/pffft_simd_size(); if (max_iter_ == 0) max_iter_ = 1; + if (cplx) cffti(N, wrk); + else rffti(N, wrk); + t0 = uclock_sec(); + + for (iter = 0; iter < max_iter_; ++iter) { + if (cplx) { + cfftf(N, X, wrk); + cfftb(N, X, wrk); + } else { + rfftf(N, X, wrk); + rfftb(N, X, wrk); + } + } + t1 = uclock_sec(); + free(wrk); + + flops = (max_iter_*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html + show_output("FFTPack", N, cplx, flops, t0, t1, max_iter_); + } + +#ifdef HAVE_VECLIB + int log2N = (int)(log(N)/log(2) + 0.5f); + if (N == (1< 1 && strcmp(argv[1], "--array-format") == 0) { + array_output_format = 1; + } + +#ifndef PFFFT_SIMD_DISABLE + validate_pffft_simd(); +#endif + pffft_validate(1); + pffft_validate(0); + if (!array_output_format) { + for (i=0; Nvalues[i] > 0; ++i) { + benchmark_ffts(Nvalues[i], 0 /* real fft */); + } + for (i=0; Nvalues[i] > 0; ++i) { + benchmark_ffts(Nvalues[i], 1 /* cplx fft */); + } + } else { + printf("| input len "); + printf("|real FFTPack"); +#ifdef HAVE_VECLIB + printf("| real vDSP "); +#endif +#ifdef HAVE_FFTW + printf("| real FFTW "); +#endif + printf("| real PFFFT | "); + + printf("|cplx FFTPack"); +#ifdef HAVE_VECLIB + printf("| cplx vDSP "); +#endif +#ifdef HAVE_FFTW + printf("| cplx FFTW "); +#endif + printf("| cplx PFFFT |\n"); + for (i=0; Nvalues[i] > 0; ++i) { + printf("|%9d ", Nvalues[i]); + benchmark_ffts(Nvalues[i], 0); + printf("| "); + benchmark_ffts(Nvalues[i], 1); + printf("|\n"); + } + printf(" (numbers are given in MFlops)\n"); + } + + + return 0; +} diff --git a/oss-internship-2020/pffft/test_pffft_sandboxed.cc b/oss-internship-2020/pffft/test_pffft_sandboxed.cc new file mode 100644 index 0000000..dd6dc0b --- /dev/null +++ b/oss-internship-2020/pffft/test_pffft_sandboxed.cc @@ -0,0 +1,150 @@ +#include "fftpack.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pffft_sapi.sapi.h" +#include "sandboxed_api/util/flag.h" +#include "sandboxed_api/vars.h" + +ABSL_DECLARE_FLAG(string, sandbox2_danger_danger_permit_all); +ABSL_DECLARE_FLAG(string, sandbox2_danger_danger_permit_all_and_log); + +class pffftSapiSandbox : public pffftSandbox { + public: + std::unique_ptr ModifyPolicy(sandbox2::PolicyBuilder*) override { + return sandbox2::PolicyBuilder() + .AllowStaticStartup() + .AllowOpen() + .AllowRead() + .AllowWrite() + .AllowSystemMalloc() + .AllowExit() + .AllowSyscalls({ + __NR_futex, + __NR_close, + __NR_getrusage, + }) + .DisableNamespaces() + .BuildOrDie(); + } +}; + +double frand() { + return rand()/(double)RAND_MAX; +} + +double uclock_sec(void) { + return (double)clock()/(double)CLOCKS_PER_SEC; +} + +int array_output_format = 0; + +void show_output(const char *name, int N, int cplx, float flops, float t0, float t1, int max_iter) { + float mflops = flops/1e6/(t1 - t0 + 1e-16); + if (array_output_format) { + if (flops != -1) { + printf("|%9.0f ", mflops); + } else printf("| n/a "); + } else { + if (flops != -1) { + printf("N=%5d, %s %16s : %6.0f MFlops [t=%6.0f ns, %d runs]\n", N, (cplx?"CPLX":"REAL"), name, mflops, (t1-t0)/2/max_iter * 1e9, max_iter); + } + } + fflush(stdout); +} + +/* + For debug: + SAPI_VLOG_LEVEL=1 ./pffft_sandboxed --v=100 --sandbox2_danger_danger_permit_all_and_log my_aux_file +*/ + +int main(int argc, char* argv[]) { + gflags::ParseCommandLineFlags(&argc, &argv, true); + int Nvalues[] = { 64, 96, 128, 160, 192, 256, 384, 5*96, 512, 5*128, 3*256, 800, 1024, 2048, 2400, 4096, 8192, 9*1024, 16384, 32768, 256*1024, 1024*1024, -1 }; + int i; + + printf("initializing sandbox...\n"); + + pffftSapiSandbox sandbox; + sandbox.Init().IgnoreError(); + + pffftApi api(&sandbox); + + int N, cplx; + + cplx = 0; + + for (i = 0; i < 5; i++) { + N = Nvalues[i]; + + int Nfloat = N * (cplx ? 2 : 1); + int Nbytes = Nfloat * sizeof(float); + int pass; + + float ref[Nbytes], in[Nbytes], out[Nbytes], tmp[Nbytes], tmp2[Nbytes]; + + sapi::v::Array ref_(ref, Nbytes); + sapi::v::Array in_(in, Nbytes); + sapi::v::Array out_(out, Nbytes); + sapi::v::Array tmp_(tmp, Nbytes); + sapi::v::Array tmp2_(tmp2, Nbytes); + + float wrk[2 * Nbytes + 15 * sizeof(float)]; + sapi::v::Array wrk_(wrk, 2 * Nbytes + 15 * sizeof(float)); + + float ref_max = 0; + int k; + + Nfloat = (cplx ? N * 2 : N); + float X[Nbytes], Y[Nbytes], Z[Nbytes]; + sapi::v::Array X_(X, Nbytes), Y_(Y, Nbytes), Z_(Z, Nbytes); + + double t0, t1, flops; + + int max_iter = 5120000/N*4; + #ifdef __arm__ + max_iter /= 4; + #endif + int iter; + + for (k = 0; k < Nfloat; ++k) { + X[k] = 0; + } + + // FFTPack benchmark + { + int max_iter_ = max_iter/4; // SIMD_SZ == 4 (returning value of pffft_simd_size()) + if (max_iter_ == 0) max_iter_ = 1; + if (cplx) { + api.cffti(N, wrk_.PtrBoth()).IgnoreError(); + } else { + api.rffti(N, wrk_.PtrBoth()).IgnoreError(); + } + t0 = uclock_sec(); + + for (iter = 0; iter < max_iter_; ++iter) { + if (cplx) { + api.cfftf(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); + api.cfftb(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); + } else { + api.rfftf(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); + api.rfftb(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); + } + } + t1 = uclock_sec(); + + flops = (max_iter_ * 2) * ((cplx ? 5 : 2.5) * N * log((double)N) / M_LN2); + show_output("FFTPack", N, cplx, flops, t0, t1, max_iter_); + + } + } + + return 0; +} \ No newline at end of file From deb8869bb660a6f2cae4f44dd14b25a9c746debe Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Mon, 17 Aug 2020 11:21:11 +0000 Subject: [PATCH 03/42] Added .gitignore --- oss-internship-2020/pffft/.gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 oss-internship-2020/pffft/.gitignore diff --git a/oss-internship-2020/pffft/.gitignore b/oss-internship-2020/pffft/.gitignore new file mode 100644 index 0000000..f9ed608 --- /dev/null +++ b/oss-internship-2020/pffft/.gitignore @@ -0,0 +1 @@ +.hg/ From 50921e222d27198717c3ae174fe93299306ae01b Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Mon, 17 Aug 2020 11:21:19 +0000 Subject: [PATCH 04/42] Coding style adaptation --- .../pffft/test_pffft_sandboxed.cc | 82 ++++++++++--------- 1 file changed, 43 insertions(+), 39 deletions(-) diff --git a/oss-internship-2020/pffft/test_pffft_sandboxed.cc b/oss-internship-2020/pffft/test_pffft_sandboxed.cc index dd6dc0b..d50dff3 100644 --- a/oss-internship-2020/pffft/test_pffft_sandboxed.cc +++ b/oss-internship-2020/pffft/test_pffft_sandboxed.cc @@ -1,14 +1,13 @@ -#include "fftpack.h" - +#include #include #include #include -#include -#include #include -#include #include +#include +#include +#include "fftpack.h" #include "pffft_sapi.sapi.h" #include "sandboxed_api/util/flag.h" #include "sandboxed_api/vars.h" @@ -17,9 +16,10 @@ ABSL_DECLARE_FLAG(string, sandbox2_danger_danger_permit_all); ABSL_DECLARE_FLAG(string, sandbox2_danger_danger_permit_all_and_log); class pffftSapiSandbox : public pffftSandbox { - public: - std::unique_ptr ModifyPolicy(sandbox2::PolicyBuilder*) override { - return sandbox2::PolicyBuilder() + public: + std::unique_ptr ModifyPolicy( + sandbox2::PolicyBuilder*) override { + return sandbox2::PolicyBuilder() .AllowStaticStartup() .AllowOpen() .AllowRead() @@ -27,47 +27,51 @@ class pffftSapiSandbox : public pffftSandbox { .AllowSystemMalloc() .AllowExit() .AllowSyscalls({ - __NR_futex, - __NR_close, - __NR_getrusage, + __NR_futex, + __NR_close, + __NR_getrusage, }) .DisableNamespaces() .BuildOrDie(); - } + } }; -double frand() { - return rand()/(double)RAND_MAX; -} +double frand() { return rand() / (double)RAND_MAX; } -double uclock_sec(void) { - return (double)clock()/(double)CLOCKS_PER_SEC; -} +double uclock_sec(void) { return (double)clock() / (double)CLOCKS_PER_SEC; } int array_output_format = 0; -void show_output(const char *name, int N, int cplx, float flops, float t0, float t1, int max_iter) { - float mflops = flops/1e6/(t1 - t0 + 1e-16); +void show_output(const char* name, int N, int cplx, float flops, float t0, + float t1, int max_iter) { + float mflops = flops / 1e6 / (t1 - t0 + 1e-16); if (array_output_format) { if (flops != -1) { printf("|%9.0f ", mflops); - } else printf("| n/a "); + } else + printf("| n/a "); } else { if (flops != -1) { - printf("N=%5d, %s %16s : %6.0f MFlops [t=%6.0f ns, %d runs]\n", N, (cplx?"CPLX":"REAL"), name, mflops, (t1-t0)/2/max_iter * 1e9, max_iter); + printf("N=%5d, %s %16s : %6.0f MFlops [t=%6.0f ns, %d runs]\n", N, + (cplx ? "CPLX" : "REAL"), name, mflops, + (t1 - t0) / 2 / max_iter * 1e9, max_iter); } } fflush(stdout); } -/* +/* For debug: - SAPI_VLOG_LEVEL=1 ./pffft_sandboxed --v=100 --sandbox2_danger_danger_permit_all_and_log my_aux_file + SAPI_VLOG_LEVEL=1 ./pffft_sandboxed --v=100 + --sandbox2_danger_danger_permit_all_and_log my_aux_file */ int main(int argc, char* argv[]) { gflags::ParseCommandLineFlags(&argc, &argv, true); - int Nvalues[] = { 64, 96, 128, 160, 192, 256, 384, 5*96, 512, 5*128, 3*256, 800, 1024, 2048, 2400, 4096, 8192, 9*1024, 16384, 32768, 256*1024, 1024*1024, -1 }; + int Nvalues[] = {64, 96, 128, 160, 192, 256, + 384, 5 * 96, 512, 5 * 128, 3 * 256, 800, + 1024, 2048, 2400, 4096, 8192, 9 * 1024, + 16384, 32768, 256 * 1024, 1024 * 1024, -1}; int i; printf("initializing sandbox...\n"); @@ -76,14 +80,14 @@ int main(int argc, char* argv[]) { sandbox.Init().IgnoreError(); pffftApi api(&sandbox); - + int N, cplx; cplx = 0; for (i = 0; i < 5; i++) { N = Nvalues[i]; - + int Nfloat = N * (cplx ? 2 : 1); int Nbytes = Nfloat * sizeof(float); int pass; @@ -108,27 +112,28 @@ int main(int argc, char* argv[]) { double t0, t1, flops; - int max_iter = 5120000/N*4; - #ifdef __arm__ - max_iter /= 4; - #endif + int max_iter = 5120000 / N * 4; +#ifdef __arm__ + max_iter /= 4; +#endif int iter; for (k = 0; k < Nfloat; ++k) { - X[k] = 0; + X[k] = 0; } // FFTPack benchmark { - int max_iter_ = max_iter/4; // SIMD_SZ == 4 (returning value of pffft_simd_size()) + int max_iter_ = + max_iter / 4; // SIMD_SZ == 4 (returning value of pffft_simd_size()) if (max_iter_ == 0) max_iter_ = 1; if (cplx) { api.cffti(N, wrk_.PtrBoth()).IgnoreError(); } else { api.rffti(N, wrk_.PtrBoth()).IgnoreError(); } - t0 = uclock_sec(); - + t0 = uclock_sec(); + for (iter = 0; iter < max_iter_; ++iter) { if (cplx) { api.cfftf(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); @@ -139,10 +144,9 @@ int main(int argc, char* argv[]) { } } t1 = uclock_sec(); - - flops = (max_iter_ * 2) * ((cplx ? 5 : 2.5) * N * log((double)N) / M_LN2); - show_output("FFTPack", N, cplx, flops, t0, t1, max_iter_); - + + flops = (max_iter_ * 2) * ((cplx ? 5 : 2.5) * N * log((double)N) / M_LN2); + show_output("FFTPack", N, cplx, flops, t0, t1, max_iter_); } } From 2ab097f82fe012956e0e5299bd76a5db19530788 Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Mon, 17 Aug 2020 11:21:27 +0000 Subject: [PATCH 05/42] Added README --- oss-internship-2020/pffft/README.txt | 425 +-------------------- oss-internship-2020/pffft/README_pffft.txt | 416 ++++++++++++++++++++ 2 files changed, 430 insertions(+), 411 deletions(-) create mode 100644 oss-internship-2020/pffft/README_pffft.txt diff --git a/oss-internship-2020/pffft/README.txt b/oss-internship-2020/pffft/README.txt index ee20b42..ef51f1a 100644 --- a/oss-internship-2020/pffft/README.txt +++ b/oss-internship-2020/pffft/README.txt @@ -1,416 +1,19 @@ -PFFFT: a pretty fast FFT. +Sandboxing PFFFT library -TL;DR --- +Builder: CMake -PFFFT does 1D Fast Fourier Transforms, of single precision real and -complex vectors. It tries do it fast, it tries to be correct, and it -tries to be small. Computations do take advantage of SSE1 instructions -on x86 cpus, Altivec on powerpc cpus, and NEON on ARM cpus. The -license is BSD-like. +For testing: +`cd build`, then `./pffft_sandboxed` +For debug: +`SAPI_VLOG_LEVEL=1 ./pffft_sandboxed --v=100 +--sandbox2_danger_danger_permit_all_and_log ` -Why does it exist: --- - -I was in search of a good performing FFT library , preferably very -small and with a very liberal license. - -When one says "fft library", FFTW ("Fastest Fourier Transform in the -West") is probably the first name that comes to mind -- I guess that -99% of open-source projects that need a FFT do use FFTW, and are happy -with it. However, it is quite a large library , which does everything -fft related (2d transforms, 3d transforms, other transformations such -as discrete cosine , or fast hartley). And it is licensed under the -GNU GPL , which means that it cannot be used in non open-source -products. - -An alternative to FFTW that is really small, is the venerable FFTPACK -v4, which is available on NETLIB. A more recent version (v5) exists, -but it is larger as it deals with multi-dimensional transforms. This -is a library that is written in FORTRAN 77, a language that is now -considered as a bit antiquated by many. FFTPACKv4 was written in 1985, -by Dr Paul Swarztrauber of NCAR, more than 25 years ago ! And despite -its age, benchmarks show it that it still a very good performing FFT -library, see for example the 1d single precision benchmarks here: -http://www.fftw.org/speed/opteron-2.2GHz-32bit/ . It is however not -competitive with the fastest ones, such as FFTW, Intel MKL, AMD ACML, -Apple vDSP. The reason for that is that those libraries do take -advantage of the SSE SIMD instructions available on Intel CPUs, -available since the days of the Pentium III. These instructions deal -with small vectors of 4 floats at a time, instead of a single float -for a traditionnal FPU, so when using these instructions one may expect -a 4-fold performance improvement. - -The idea was to take this fortran fftpack v4 code, translate to C, -modify it to deal with those SSE instructions, and check that the -final performance is not completely ridiculous when compared to other -SIMD FFT libraries. Translation to C was performed with f2c ( -http://www.netlib.org/f2c/ ). The resulting file was a bit edited in -order to remove the thousands of gotos that were introduced by -f2c. You will find the fftpack.h and fftpack.c sources in the -repository, this a complete translation of -http://www.netlib.org/fftpack/ , with the discrete cosine transform -and the test program. There is no license information in the netlib -repository, but it was confirmed to me by the fftpack v5 curators that -the same terms do apply to fftpack v4: -http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html . This is a -"BSD-like" license, it is compatible with proprietary projects. - -Adapting fftpack to deal with the SIMD 4-element vectors instead of -scalar single precision numbers was more complex than I originally -thought, especially with the real transforms, and I ended up writing -more code than I planned.. - - -The code: --- - -Only two files, in good old C, pffft.c and pffft.h . The API is very -very simple, just make sure that you read the comments in pffft.h. - - -Comparison with other FFTs: --- - -The idea was not to break speed records, but to get a decently fast -fft that is at least 50% as fast as the fastest FFT -- especially on -slowest computers . I'm more focused on getting the best performance -on slow cpus (Atom, Intel Core 1, old Athlons, ARM Cortex-A9...), than -on getting top performance on today fastest cpus. - -It can be used in a real-time context as the fft functions do not -perform any memory allocation -- that is why they accept a 'work' -array in their arguments. - -It is also a bit focused on performing 1D convolutions, that is why it -provides "unordered" FFTs , and a fourier domain convolution -operation. - - -Benchmark results (cpu tested: core i7 2600, core 2 quad, core 1 duo, atom N270, cortex-A9, cortex-A15, A8X) --- - -The benchmark shows the performance of various fft implementations measured in -MFlops, with the number of floating point operations being defined as 5Nlog2(N) -for a length N complex fft, and 2.5*Nlog2(N) for a real fft. -See http://www.fftw.org/speed/method.html for an explanation of these formulas. - -MacOS Lion, gcc 4.2, 64-bit, fftw 3.3 on a 3.4 GHz core i7 2600 - -Built with: - - gcc-4.2 -o test_pffft -arch x86_64 -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -DHAVE_VECLIB -framework veclib -DHAVE_FFTW -lfftw3f - -| input len |real FFTPack| real vDSP | real FFTW | real PFFFT | |cplx FFTPack| cplx vDSP | cplx FFTW | cplx PFFFT | -|-----------+------------+------------+------------+------------| |------------+------------+------------+------------| -| 64 | 2816 | 8596 | 7329 | 8187 | | 2887 | 14898 | 14668 | 11108 | -| 96 | 3298 | n/a | 8378 | 7727 | | 3953 | n/a | 15680 | 10878 | -| 128 | 3507 | 11575 | 9266 | 10108 | | 4233 | 17598 | 16427 | 12000 | -| 160 | 3391 | n/a | 9838 | 10711 | | 4220 | n/a | 16653 | 11187 | -| 192 | 3919 | n/a | 9868 | 10956 | | 4297 | n/a | 15770 | 12540 | -| 256 | 4283 | 13179 | 10694 | 13128 | | 4545 | 19550 | 16350 | 13822 | -| 384 | 3136 | n/a | 10810 | 12061 | | 3600 | n/a | 16103 | 13240 | -| 480 | 3477 | n/a | 10632 | 12074 | | 3536 | n/a | 11630 | 12522 | -| 512 | 3783 | 15141 | 11267 | 13838 | | 3649 | 20002 | 16560 | 13580 | -| 640 | 3639 | n/a | 11164 | 13946 | | 3695 | n/a | 15416 | 13890 | -| 768 | 3800 | n/a | 11245 | 13495 | | 3590 | n/a | 15802 | 14552 | -| 800 | 3440 | n/a | 10499 | 13301 | | 3659 | n/a | 12056 | 13268 | -| 1024 | 3924 | 15605 | 11450 | 15339 | | 3769 | 20963 | 13941 | 15467 | -| 2048 | 4518 | 16195 | 11551 | 15532 | | 4258 | 20413 | 13723 | 15042 | -| 2400 | 4294 | n/a | 10685 | 13078 | | 4093 | n/a | 12777 | 13119 | -| 4096 | 4750 | 16596 | 11672 | 15817 | | 4157 | 19662 | 14316 | 14336 | -| 8192 | 3820 | 16227 | 11084 | 12555 | | 3691 | 18132 | 12102 | 13813 | -| 9216 | 3864 | n/a | 10254 | 12870 | | 3586 | n/a | 12119 | 13994 | -| 16384 | 3822 | 15123 | 10454 | 12822 | | 3613 | 16874 | 12370 | 13881 | -| 32768 | 4175 | 14512 | 10662 | 11095 | | 3881 | 14702 | 11619 | 11524 | -| 262144 | 3317 | 11429 | 6269 | 9517 | | 2810 | 11729 | 7757 | 10179 | -| 1048576 | 2913 | 10551 | 4730 | 5867 | | 2661 | 7881 | 3520 | 5350 | -|-----------+------------+------------+------------+------------| |------------+------------+------------+------------| - - -Debian 6, gcc 4.4.5, 64-bit, fftw 3.3.1 on a 3.4 GHz core i7 2600 - -Built with: -gcc -o test_pffft -DHAVE_FFTW -msse2 -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L$HOME/local/lib -I$HOME/local/include/ -lfftw3f -lm - -| N (input length) | real FFTPack | real FFTW | real PFFFT | | cplx FFTPack | cplx FFTW | cplx PFFFT | -|------------------+--------------+--------------+--------------| |--------------+--------------+--------------| -| 64 | 3840 | 7680 | 8777 | | 4389 | 20480 | 11171 | -| 96 | 4214 | 9633 | 8429 | | 4816 | 22477 | 11238 | -| 128 | 3584 | 10240 | 10240 | | 5120 | 23893 | 11947 | -| 192 | 4854 | 11095 | 12945 | | 4854 | 22191 | 14121 | -| 256 | 4096 | 11703 | 16384 | | 5120 | 23406 | 13653 | -| 384 | 4395 | 14651 | 12558 | | 4884 | 19535 | 14651 | -| 512 | 5760 | 13166 | 15360 | | 4608 | 23040 | 15360 | -| 768 | 4907 | 14020 | 16357 | | 4461 | 19628 | 14020 | -| 1024 | 5120 | 14629 | 14629 | | 5120 | 20480 | 15754 | -| 2048 | 5632 | 14080 | 18773 | | 4693 | 12516 | 16091 | -| 4096 | 5120 | 13653 | 17554 | | 4726 | 7680 | 14456 | -| 8192 | 4160 | 7396 | 13312 | | 4437 | 14791 | 13312 | -| 9216 | 4210 | 6124 | 13473 | | 4491 | 7282 | 14970 | -| 16384 | 3976 | 11010 | 14313 | | 4210 | 11450 | 13631 | -| 32768 | 4260 | 10224 | 10954 | | 4260 | 6816 | 11797 | -| 262144 | 3736 | 6896 | 9961 | | 2359 | 8965 | 9437 | -| 1048576 | 2796 | 4534 | 6453 | | 1864 | 3078 | 5592 | -|------------------+--------------+--------------+--------------| |--------------+--------------+--------------| - - - -MacOS Snow Leopard, gcc 4.0, 32-bit, fftw 3.3 on a 1.83 GHz core 1 duo - -Built with: - - gcc -o test_pffft -DHAVE_FFTW -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -framework veclib - -| input len |real FFTPack| real vDSP | real FFTW | real PFFFT | |cplx FFTPack| cplx vDSP | cplx FFTW | cplx PFFFT | -|-----------+------------+------------+------------+------------| |------------+------------+------------+------------| -| 64 | 745 | 2145 | 1706 | 2028 | | 961 | 3356 | 3313 | 2300 | -| 96 | 877 | n/a | 1976 | 1978 | | 1059 | n/a | 3333 | 2233 | -| 128 | 951 | 2808 | 2213 | 2279 | | 1202 | 3803 | 3739 | 2494 | -| 192 | 1002 | n/a | 2456 | 2429 | | 1186 | n/a | 3701 | 2508 | -| 256 | 1065 | 3205 | 2641 | 2793 | | 1302 | 4013 | 3912 | 2663 | -| 384 | 845 | n/a | 2759 | 2499 | | 948 | n/a | 3729 | 2504 | -| 512 | 900 | 3476 | 2956 | 2759 | | 974 | 4057 | 3954 | 2645 | -| 768 | 910 | n/a | 2912 | 2737 | | 975 | n/a | 3837 | 2614 | -| 1024 | 936 | 3583 | 3107 | 3009 | | 1006 | 4124 | 3821 | 2697 | -| 2048 | 1057 | 3585 | 3091 | 2837 | | 1089 | 3889 | 3701 | 2513 | -| 4096 | 1083 | 3524 | 3092 | 2733 | | 1039 | 3617 | 3462 | 2364 | -| 8192 | 874 | 3252 | 2967 | 2363 | | 911 | 3106 | 2789 | 2302 | -| 9216 | 898 | n/a | 2420 | 2290 | | 865 | n/a | 2676 | 2204 | -| 16384 | 903 | 2892 | 2506 | 2421 | | 899 | 3026 | 2797 | 2289 | -| 32768 | 965 | 2837 | 2550 | 2358 | | 920 | 2922 | 2763 | 2240 | -| 262144 | 738 | 2422 | 1589 | 1708 | | 610 | 2038 | 1436 | 1091 | -| 1048576 | 528 | 1207 | 845 | 880 | | 606 | 1020 | 669 | 1036 | -|-----------+------------+------------+------------+------------| |------------+------------+------------+------------| - - - -Ubuntu 11.04, gcc 4.5, 32-bit, fftw 3.2 on a 2.66 core 2 quad - -Built with: -gcc -o test_pffft -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm - -| input len |real FFTPack| real FFTW | real PFFFT | |cplx FFTPack| cplx FFTW | cplx PFFFT | -|-----------+------------+------------+------------| |------------+------------+------------| -| 64 | 1920 | 3614 | 5120 | | 2194 | 7680 | 6467 | -| 96 | 1873 | 3549 | 5187 | | 2107 | 8429 | 5863 | -| 128 | 2240 | 3773 | 5514 | | 2560 | 7964 | 6827 | -| 192 | 1765 | 4569 | 7767 | | 2284 | 9137 | 7061 | -| 256 | 2048 | 5461 | 7447 | | 2731 | 9638 | 7802 | -| 384 | 1998 | 5861 | 6762 | | 2313 | 9253 | 7644 | -| 512 | 2095 | 6144 | 7680 | | 2194 | 10240 | 7089 | -| 768 | 2230 | 5773 | 7549 | | 2045 | 10331 | 7010 | -| 1024 | 2133 | 6400 | 8533 | | 2133 | 10779 | 7877 | -| 2048 | 2011 | 7040 | 8665 | | 1942 | 10240 | 7768 | -| 4096 | 2194 | 6827 | 8777 | | 1755 | 9452 | 6827 | -| 8192 | 1849 | 6656 | 6656 | | 1752 | 7831 | 6827 | -| 9216 | 1871 | 5858 | 6416 | | 1643 | 6909 | 6266 | -| 16384 | 1883 | 6223 | 6506 | | 1664 | 7340 | 6982 | -| 32768 | 1826 | 6390 | 6667 | | 1631 | 7481 | 6971 | -| 262144 | 1546 | 4075 | 5977 | | 1299 | 3415 | 3551 | -| 1048576 | 1104 | 2071 | 1730 | | 1104 | 1149 | 1834 | -|-----------+------------+------------+------------| |------------+------------+------------| - - - -Ubuntu 11.04, gcc 4.5, 32-bit, fftw 3.3 on a 1.6 GHz Atom N270 - -Built with: -gcc -o test_pffft -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm - -| N (input length) | real FFTPack | real FFTW | real PFFFT | | cplx FFTPack | cplx FFTW | cplx PFFFT | -|------------------+--------------+--------------+--------------| |--------------+--------------+--------------| -| 64 | 452 | 1041 | 1336 | | 549 | 2318 | 1781 | -| 96 | 444 | 1297 | 1297 | | 503 | 2408 | 1686 | -| 128 | 527 | 1525 | 1707 | | 543 | 2655 | 1886 | -| 192 | 498 | 1653 | 1849 | | 539 | 2678 | 1942 | -| 256 | 585 | 1862 | 2156 | | 594 | 2777 | 2244 | -| 384 | 499 | 1870 | 1998 | | 511 | 2586 | 1890 | -| 512 | 562 | 2095 | 2194 | | 542 | 2973 | 2194 | -| 768 | 545 | 2045 | 2133 | | 545 | 2365 | 2133 | -| 1024 | 595 | 2133 | 2438 | | 569 | 2695 | 2179 | -| 2048 | 587 | 2125 | 2347 | | 521 | 2230 | 1707 | -| 4096 | 495 | 1890 | 1834 | | 492 | 1876 | 1672 | -| 8192 | 469 | 1548 | 1729 | | 438 | 1740 | 1664 | -| 9216 | 468 | 1663 | 1663 | | 446 | 1585 | 1531 | -| 16384 | 453 | 1608 | 1767 | | 398 | 1476 | 1664 | -| 32768 | 456 | 1420 | 1503 | | 387 | 1388 | 1345 | -| 262144 | 309 | 385 | 726 | | 262 | 415 | 840 | -| 1048576 | 280 | 351 | 739 | | 261 | 313 | 797 | -|------------------+--------------+--------------+--------------| |--------------+--------------+--------------| - - - -Windows 7, visual c++ 2010 on a 1.6 GHz Atom N270 - -Built with: -cl /Ox -D_USE_MATH_DEFINES /arch:SSE test_pffft.c pffft.c fftpack.c - -(visual c++ is definitively not very good with SSE intrinsics...) - -| N (input length) | real FFTPack | real PFFFT | | cplx FFTPack | cplx PFFFT | -|------------------+--------------+--------------| |--------------+--------------| -| 64 | 173 | 1009 | | 174 | 1159 | -| 96 | 169 | 1029 | | 188 | 1201 | -| 128 | 195 | 1242 | | 191 | 1275 | -| 192 | 178 | 1312 | | 184 | 1276 | -| 256 | 196 | 1591 | | 186 | 1281 | -| 384 | 172 | 1409 | | 181 | 1281 | -| 512 | 187 | 1640 | | 181 | 1313 | -| 768 | 171 | 1614 | | 176 | 1258 | -| 1024 | 186 | 1812 | | 178 | 1223 | -| 2048 | 190 | 1707 | | 186 | 1099 | -| 4096 | 182 | 1446 | | 177 | 975 | -| 8192 | 175 | 1345 | | 169 | 1034 | -| 9216 | 165 | 1271 | | 168 | 1023 | -| 16384 | 166 | 1396 | | 165 | 949 | -| 32768 | 172 | 1311 | | 161 | 881 | -| 262144 | 136 | 632 | | 134 | 629 | -| 1048576 | 134 | 698 | | 127 | 623 | -|------------------+--------------+--------------| |--------------+--------------| - - - -Ubuntu 12.04, gcc-4.7.3, 32-bit, with fftw 3.3.3 (built with --enable-neon), on a 1.2GHz ARM Cortex A9 (Tegra 3) - -Built with: -gcc-4.7 -O3 -DHAVE_FFTW -march=armv7-a -mtune=cortex-a9 -mfloat-abi=hard -mfpu=neon -ffast-math test_pffft.c pffft.c -o test_pffft_arm fftpack.c -lm -I/usr/local/include/ -L/usr/local/lib/ -lfftw3f - -| input len |real FFTPack| real FFTW | real PFFFT | |cplx FFTPack| cplx FFTW | cplx PFFFT | -|-----------+------------+------------+------------| |------------+------------+------------| -| 64 | 549 | 452 | 731 | | 512 | 602 | 640 | -| 96 | 421 | 272 | 702 | | 496 | 571 | 602 | -| 128 | 498 | 512 | 815 | | 597 | 618 | 652 | -| 160 | 521 | 536 | 815 | | 586 | 669 | 625 | -| 192 | 539 | 571 | 883 | | 485 | 597 | 626 | -| 256 | 640 | 539 | 975 | | 569 | 611 | 671 | -| 384 | 499 | 610 | 879 | | 499 | 602 | 637 | -| 480 | 518 | 507 | 877 | | 496 | 661 | 616 | -| 512 | 524 | 591 | 1002 | | 549 | 678 | 668 | -| 640 | 542 | 612 | 955 | | 568 | 663 | 645 | -| 768 | 557 | 613 | 981 | | 491 | 663 | 598 | -| 800 | 514 | 353 | 882 | | 514 | 360 | 574 | -| 1024 | 640 | 640 | 1067 | | 492 | 683 | 602 | -| 2048 | 587 | 640 | 908 | | 486 | 640 | 552 | -| 2400 | 479 | 368 | 777 | | 422 | 376 | 518 | -| 4096 | 511 | 614 | 853 | | 426 | 640 | 534 | -| 8192 | 415 | 584 | 708 | | 386 | 622 | 516 | -| 9216 | 419 | 571 | 687 | | 364 | 586 | 506 | -| 16384 | 426 | 577 | 716 | | 398 | 606 | 530 | -| 32768 | 417 | 572 | 673 | | 399 | 572 | 468 | -| 262144 | 219 | 380 | 293 | | 255 | 431 | 343 | -| 1048576 | 202 | 274 | 237 | | 265 | 282 | 355 | -|-----------+------------+------------+------------| |------------+------------+------------| - -Same platform as above, but this time pffft and fftpack are built with clang 3.2: - -clang -O3 -DHAVE_FFTW -march=armv7-a -mtune=cortex-a9 -mfloat-abi=hard -mfpu=neon -ffast-math test_pffft.c pffft.c -o test_pffft_arm fftpack.c -lm -I/usr/local/include/ -L/usr/local/lib/ -lfftw3f - -| input len |real FFTPack| real FFTW | real PFFFT | |cplx FFTPack| cplx FFTW | cplx PFFFT | -|-----------+------------+------------+------------| |------------+------------+------------| -| 64 | 427 | 452 | 853 | | 427 | 602 | 1024 | -| 96 | 351 | 276 | 843 | | 337 | 571 | 963 | -| 128 | 373 | 512 | 996 | | 390 | 618 | 1054 | -| 160 | 426 | 536 | 987 | | 375 | 669 | 914 | -| 192 | 404 | 571 | 1079 | | 388 | 588 | 1079 | -| 256 | 465 | 539 | 1205 | | 445 | 602 | 1170 | -| 384 | 366 | 610 | 1099 | | 343 | 594 | 1099 | -| 480 | 356 | 507 | 1140 | | 335 | 651 | 931 | -| 512 | 411 | 591 | 1213 | | 384 | 649 | 1124 | -| 640 | 398 | 612 | 1193 | | 373 | 654 | 901 | -| 768 | 409 | 613 | 1227 | | 383 | 663 | 1044 | -| 800 | 411 | 348 | 1073 | | 353 | 358 | 809 | -| 1024 | 427 | 640 | 1280 | | 413 | 692 | 1004 | -| 2048 | 414 | 626 | 1126 | | 371 | 640 | 853 | -| 2400 | 399 | 373 | 898 | | 319 | 368 | 653 | -| 4096 | 404 | 602 | 1059 | | 357 | 633 | 778 | -| 8192 | 332 | 584 | 792 | | 308 | 616 | 716 | -| 9216 | 322 | 561 | 783 | | 299 | 586 | 687 | -| 16384 | 344 | 568 | 778 | | 314 | 617 | 745 | -| 32768 | 342 | 564 | 737 | | 314 | 552 | 629 | -| 262144 | 201 | 383 | 313 | | 227 | 435 | 413 | -| 1048576 | 187 | 262 | 251 | | 228 | 281 | 409 | -|-----------+------------+------------+------------| |------------+------------+------------| - -So it looks like, on ARM, gcc 4.7 is the best at scalar floating point -(the fftpack performance numbers are better with gcc), while clang is -the best with neon intrinsics (see how pffft perf has improved with -clang 3.2). - - -NVIDIA Jetson TK1 board, gcc-4.8.2. The cpu is a 2.3GHz cortex A15 (Tegra K1). - -Built with: -gcc -O3 -march=armv7-a -mtune=native -mfloat-abi=hard -mfpu=neon -ffast-math test_pffft.c pffft.c -o test_pffft_arm fftpack.c -lm - -| input len |real FFTPack| real PFFFT | |cplx FFTPack| cplx PFFFT | -|-----------+------------+------------| |------------+------------| -| 64 | 1735 | 3308 | | 1994 | 3744 | -| 96 | 1596 | 3448 | | 1987 | 3572 | -| 128 | 1807 | 4076 | | 2255 | 3960 | -| 160 | 1769 | 4083 | | 2071 | 3845 | -| 192 | 1990 | 4233 | | 2017 | 3939 | -| 256 | 2191 | 4882 | | 2254 | 4346 | -| 384 | 1878 | 4492 | | 2073 | 4012 | -| 480 | 1748 | 4398 | | 1923 | 3951 | -| 512 | 2030 | 5064 | | 2267 | 4195 | -| 640 | 1918 | 4756 | | 2094 | 4184 | -| 768 | 2099 | 4907 | | 2048 | 4297 | -| 800 | 1822 | 4555 | | 1880 | 4063 | -| 1024 | 2232 | 5355 | | 2187 | 4420 | -| 2048 | 2176 | 4983 | | 2027 | 3602 | -| 2400 | 1741 | 4256 | | 1710 | 3344 | -| 4096 | 1816 | 3914 | | 1851 | 3349 | -| 8192 | 1716 | 3481 | | 1700 | 3255 | -| 9216 | 1735 | 3589 | | 1653 | 3094 | -| 16384 | 1567 | 3483 | | 1637 | 3244 | -| 32768 | 1624 | 3240 | | 1655 | 3156 | -| 262144 | 1012 | 1898 | | 983 | 1503 | -| 1048576 | 876 | 1154 | | 868 | 1341 | -|-----------+------------+------------| |------------+------------| - -The performance on the tegra K1 is pretty impressive. I'm not -including the FFTW numbers as they as slightly below the scalar -fftpack numbers, so something must be wrong (however it seems to be -correctly configured and is using neon simd instructions). - -When using clang 3.4 the pffft version is even a bit faster, reaching -5.7 GFlops for real ffts of size 1024. - - -iPad Air 2 with iOS9, xcode 8.0, arm64. The cpu is an Apple A8X, supposedly running at 1.5GHz. - -| input len |real FFTPack| real vDSP | real PFFFT | |cplx FFTPack| cplx vDSP | cplx PFFFT | -|-----------+------------+------------+------------| |------------+------------+------------| -| 64 | 2517 | 7995 | 6086 | | 2725 | 13006 | 8495 | -| 96 | 2442 | n/a | 6691 | | 2256 | n/a | 7991 | -| 128 | 2664 | 10186 | 7877 | | 2575 | 15115 | 9115 | -| 160 | 2638 | n/a | 8283 | | 2682 | n/a | 8806 | -| 192 | 2903 | n/a | 9083 | | 2634 | n/a | 8980 | -| 256 | 3184 | 11452 | 10039 | | 3026 | 15410 | 10199 | -| 384 | 2665 | n/a | 10100 | | 2275 | n/a | 9247 | -| 480 | 2546 | n/a | 9863 | | 2341 | n/a | 8892 | -| 512 | 2832 | 12197 | 10989 | | 2547 | 16768 | 10154 | -| 640 | 2755 | n/a | 10461 | | 2569 | n/a | 9666 | -| 768 | 2998 | n/a | 11355 | | 2585 | n/a | 9813 | -| 800 | 2516 | n/a | 10332 | | 2433 | n/a | 9164 | -| 1024 | 3109 | 12965 | 12114 | | 2869 | 16448 | 10519 | -| 2048 | 3027 | 12996 | 12023 | | 2648 | 17304 | 10307 | -| 2400 | 2515 | n/a | 10372 | | 2355 | n/a | 8443 | -| 4096 | 3204 | 13603 | 12359 | | 2814 | 16570 | 9780 | -| 8192 | 2759 | 13422 | 10824 | | 2153 | 15652 | 7884 | -| 9216 | 2700 | n/a | 9938 | | 2241 | n/a | 7900 | -| 16384 | 2280 | 13057 | 7976 | | 593 | 4272 | 2534 | -| 32768 | 768 | 4269 | 2882 | | 606 | 4405 | 2604 | -| 262144 | 724 | 3527 | 2630 | | 534 | 2418 | 2157 | -| 1048576 | 674 | 1467 | 2135 | | 530 | 1621 | 2055 | -|-----------+------------+------------+------------| |------------+------------+------------| - -I double-checked to make sure I did not make a mistake in the time -measurements, as the numbers are much higher than what I initially -expected. They are in fact higher than the number I get on the 2.8GHz -Xeon of my 2008 mac pro.. (except for FFT lengths >= 32768 where -having a big cache is useful). A good surprise is also that the perf -is not too far from apple's vDSP (at least for the real FFT). +CMake observations: + * linking pffft and fftpack (which contains necessary functions for pffft) + * set math library +Sandboxed main observations: + * containing two testing parts (fft / pffft benchmarks) + ! current stage: fft - works :) + pffft - not implemented diff --git a/oss-internship-2020/pffft/README_pffft.txt b/oss-internship-2020/pffft/README_pffft.txt new file mode 100644 index 0000000..ee20b42 --- /dev/null +++ b/oss-internship-2020/pffft/README_pffft.txt @@ -0,0 +1,416 @@ +PFFFT: a pretty fast FFT. + +TL;DR +-- + +PFFFT does 1D Fast Fourier Transforms, of single precision real and +complex vectors. It tries do it fast, it tries to be correct, and it +tries to be small. Computations do take advantage of SSE1 instructions +on x86 cpus, Altivec on powerpc cpus, and NEON on ARM cpus. The +license is BSD-like. + + +Why does it exist: +-- + +I was in search of a good performing FFT library , preferably very +small and with a very liberal license. + +When one says "fft library", FFTW ("Fastest Fourier Transform in the +West") is probably the first name that comes to mind -- I guess that +99% of open-source projects that need a FFT do use FFTW, and are happy +with it. However, it is quite a large library , which does everything +fft related (2d transforms, 3d transforms, other transformations such +as discrete cosine , or fast hartley). And it is licensed under the +GNU GPL , which means that it cannot be used in non open-source +products. + +An alternative to FFTW that is really small, is the venerable FFTPACK +v4, which is available on NETLIB. A more recent version (v5) exists, +but it is larger as it deals with multi-dimensional transforms. This +is a library that is written in FORTRAN 77, a language that is now +considered as a bit antiquated by many. FFTPACKv4 was written in 1985, +by Dr Paul Swarztrauber of NCAR, more than 25 years ago ! And despite +its age, benchmarks show it that it still a very good performing FFT +library, see for example the 1d single precision benchmarks here: +http://www.fftw.org/speed/opteron-2.2GHz-32bit/ . It is however not +competitive with the fastest ones, such as FFTW, Intel MKL, AMD ACML, +Apple vDSP. The reason for that is that those libraries do take +advantage of the SSE SIMD instructions available on Intel CPUs, +available since the days of the Pentium III. These instructions deal +with small vectors of 4 floats at a time, instead of a single float +for a traditionnal FPU, so when using these instructions one may expect +a 4-fold performance improvement. + +The idea was to take this fortran fftpack v4 code, translate to C, +modify it to deal with those SSE instructions, and check that the +final performance is not completely ridiculous when compared to other +SIMD FFT libraries. Translation to C was performed with f2c ( +http://www.netlib.org/f2c/ ). The resulting file was a bit edited in +order to remove the thousands of gotos that were introduced by +f2c. You will find the fftpack.h and fftpack.c sources in the +repository, this a complete translation of +http://www.netlib.org/fftpack/ , with the discrete cosine transform +and the test program. There is no license information in the netlib +repository, but it was confirmed to me by the fftpack v5 curators that +the same terms do apply to fftpack v4: +http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html . This is a +"BSD-like" license, it is compatible with proprietary projects. + +Adapting fftpack to deal with the SIMD 4-element vectors instead of +scalar single precision numbers was more complex than I originally +thought, especially with the real transforms, and I ended up writing +more code than I planned.. + + +The code: +-- + +Only two files, in good old C, pffft.c and pffft.h . The API is very +very simple, just make sure that you read the comments in pffft.h. + + +Comparison with other FFTs: +-- + +The idea was not to break speed records, but to get a decently fast +fft that is at least 50% as fast as the fastest FFT -- especially on +slowest computers . I'm more focused on getting the best performance +on slow cpus (Atom, Intel Core 1, old Athlons, ARM Cortex-A9...), than +on getting top performance on today fastest cpus. + +It can be used in a real-time context as the fft functions do not +perform any memory allocation -- that is why they accept a 'work' +array in their arguments. + +It is also a bit focused on performing 1D convolutions, that is why it +provides "unordered" FFTs , and a fourier domain convolution +operation. + + +Benchmark results (cpu tested: core i7 2600, core 2 quad, core 1 duo, atom N270, cortex-A9, cortex-A15, A8X) +-- + +The benchmark shows the performance of various fft implementations measured in +MFlops, with the number of floating point operations being defined as 5Nlog2(N) +for a length N complex fft, and 2.5*Nlog2(N) for a real fft. +See http://www.fftw.org/speed/method.html for an explanation of these formulas. + +MacOS Lion, gcc 4.2, 64-bit, fftw 3.3 on a 3.4 GHz core i7 2600 + +Built with: + + gcc-4.2 -o test_pffft -arch x86_64 -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -DHAVE_VECLIB -framework veclib -DHAVE_FFTW -lfftw3f + +| input len |real FFTPack| real vDSP | real FFTW | real PFFFT | |cplx FFTPack| cplx vDSP | cplx FFTW | cplx PFFFT | +|-----------+------------+------------+------------+------------| |------------+------------+------------+------------| +| 64 | 2816 | 8596 | 7329 | 8187 | | 2887 | 14898 | 14668 | 11108 | +| 96 | 3298 | n/a | 8378 | 7727 | | 3953 | n/a | 15680 | 10878 | +| 128 | 3507 | 11575 | 9266 | 10108 | | 4233 | 17598 | 16427 | 12000 | +| 160 | 3391 | n/a | 9838 | 10711 | | 4220 | n/a | 16653 | 11187 | +| 192 | 3919 | n/a | 9868 | 10956 | | 4297 | n/a | 15770 | 12540 | +| 256 | 4283 | 13179 | 10694 | 13128 | | 4545 | 19550 | 16350 | 13822 | +| 384 | 3136 | n/a | 10810 | 12061 | | 3600 | n/a | 16103 | 13240 | +| 480 | 3477 | n/a | 10632 | 12074 | | 3536 | n/a | 11630 | 12522 | +| 512 | 3783 | 15141 | 11267 | 13838 | | 3649 | 20002 | 16560 | 13580 | +| 640 | 3639 | n/a | 11164 | 13946 | | 3695 | n/a | 15416 | 13890 | +| 768 | 3800 | n/a | 11245 | 13495 | | 3590 | n/a | 15802 | 14552 | +| 800 | 3440 | n/a | 10499 | 13301 | | 3659 | n/a | 12056 | 13268 | +| 1024 | 3924 | 15605 | 11450 | 15339 | | 3769 | 20963 | 13941 | 15467 | +| 2048 | 4518 | 16195 | 11551 | 15532 | | 4258 | 20413 | 13723 | 15042 | +| 2400 | 4294 | n/a | 10685 | 13078 | | 4093 | n/a | 12777 | 13119 | +| 4096 | 4750 | 16596 | 11672 | 15817 | | 4157 | 19662 | 14316 | 14336 | +| 8192 | 3820 | 16227 | 11084 | 12555 | | 3691 | 18132 | 12102 | 13813 | +| 9216 | 3864 | n/a | 10254 | 12870 | | 3586 | n/a | 12119 | 13994 | +| 16384 | 3822 | 15123 | 10454 | 12822 | | 3613 | 16874 | 12370 | 13881 | +| 32768 | 4175 | 14512 | 10662 | 11095 | | 3881 | 14702 | 11619 | 11524 | +| 262144 | 3317 | 11429 | 6269 | 9517 | | 2810 | 11729 | 7757 | 10179 | +| 1048576 | 2913 | 10551 | 4730 | 5867 | | 2661 | 7881 | 3520 | 5350 | +|-----------+------------+------------+------------+------------| |------------+------------+------------+------------| + + +Debian 6, gcc 4.4.5, 64-bit, fftw 3.3.1 on a 3.4 GHz core i7 2600 + +Built with: +gcc -o test_pffft -DHAVE_FFTW -msse2 -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L$HOME/local/lib -I$HOME/local/include/ -lfftw3f -lm + +| N (input length) | real FFTPack | real FFTW | real PFFFT | | cplx FFTPack | cplx FFTW | cplx PFFFT | +|------------------+--------------+--------------+--------------| |--------------+--------------+--------------| +| 64 | 3840 | 7680 | 8777 | | 4389 | 20480 | 11171 | +| 96 | 4214 | 9633 | 8429 | | 4816 | 22477 | 11238 | +| 128 | 3584 | 10240 | 10240 | | 5120 | 23893 | 11947 | +| 192 | 4854 | 11095 | 12945 | | 4854 | 22191 | 14121 | +| 256 | 4096 | 11703 | 16384 | | 5120 | 23406 | 13653 | +| 384 | 4395 | 14651 | 12558 | | 4884 | 19535 | 14651 | +| 512 | 5760 | 13166 | 15360 | | 4608 | 23040 | 15360 | +| 768 | 4907 | 14020 | 16357 | | 4461 | 19628 | 14020 | +| 1024 | 5120 | 14629 | 14629 | | 5120 | 20480 | 15754 | +| 2048 | 5632 | 14080 | 18773 | | 4693 | 12516 | 16091 | +| 4096 | 5120 | 13653 | 17554 | | 4726 | 7680 | 14456 | +| 8192 | 4160 | 7396 | 13312 | | 4437 | 14791 | 13312 | +| 9216 | 4210 | 6124 | 13473 | | 4491 | 7282 | 14970 | +| 16384 | 3976 | 11010 | 14313 | | 4210 | 11450 | 13631 | +| 32768 | 4260 | 10224 | 10954 | | 4260 | 6816 | 11797 | +| 262144 | 3736 | 6896 | 9961 | | 2359 | 8965 | 9437 | +| 1048576 | 2796 | 4534 | 6453 | | 1864 | 3078 | 5592 | +|------------------+--------------+--------------+--------------| |--------------+--------------+--------------| + + + +MacOS Snow Leopard, gcc 4.0, 32-bit, fftw 3.3 on a 1.83 GHz core 1 duo + +Built with: + + gcc -o test_pffft -DHAVE_FFTW -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -framework veclib + +| input len |real FFTPack| real vDSP | real FFTW | real PFFFT | |cplx FFTPack| cplx vDSP | cplx FFTW | cplx PFFFT | +|-----------+------------+------------+------------+------------| |------------+------------+------------+------------| +| 64 | 745 | 2145 | 1706 | 2028 | | 961 | 3356 | 3313 | 2300 | +| 96 | 877 | n/a | 1976 | 1978 | | 1059 | n/a | 3333 | 2233 | +| 128 | 951 | 2808 | 2213 | 2279 | | 1202 | 3803 | 3739 | 2494 | +| 192 | 1002 | n/a | 2456 | 2429 | | 1186 | n/a | 3701 | 2508 | +| 256 | 1065 | 3205 | 2641 | 2793 | | 1302 | 4013 | 3912 | 2663 | +| 384 | 845 | n/a | 2759 | 2499 | | 948 | n/a | 3729 | 2504 | +| 512 | 900 | 3476 | 2956 | 2759 | | 974 | 4057 | 3954 | 2645 | +| 768 | 910 | n/a | 2912 | 2737 | | 975 | n/a | 3837 | 2614 | +| 1024 | 936 | 3583 | 3107 | 3009 | | 1006 | 4124 | 3821 | 2697 | +| 2048 | 1057 | 3585 | 3091 | 2837 | | 1089 | 3889 | 3701 | 2513 | +| 4096 | 1083 | 3524 | 3092 | 2733 | | 1039 | 3617 | 3462 | 2364 | +| 8192 | 874 | 3252 | 2967 | 2363 | | 911 | 3106 | 2789 | 2302 | +| 9216 | 898 | n/a | 2420 | 2290 | | 865 | n/a | 2676 | 2204 | +| 16384 | 903 | 2892 | 2506 | 2421 | | 899 | 3026 | 2797 | 2289 | +| 32768 | 965 | 2837 | 2550 | 2358 | | 920 | 2922 | 2763 | 2240 | +| 262144 | 738 | 2422 | 1589 | 1708 | | 610 | 2038 | 1436 | 1091 | +| 1048576 | 528 | 1207 | 845 | 880 | | 606 | 1020 | 669 | 1036 | +|-----------+------------+------------+------------+------------| |------------+------------+------------+------------| + + + +Ubuntu 11.04, gcc 4.5, 32-bit, fftw 3.2 on a 2.66 core 2 quad + +Built with: +gcc -o test_pffft -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm + +| input len |real FFTPack| real FFTW | real PFFFT | |cplx FFTPack| cplx FFTW | cplx PFFFT | +|-----------+------------+------------+------------| |------------+------------+------------| +| 64 | 1920 | 3614 | 5120 | | 2194 | 7680 | 6467 | +| 96 | 1873 | 3549 | 5187 | | 2107 | 8429 | 5863 | +| 128 | 2240 | 3773 | 5514 | | 2560 | 7964 | 6827 | +| 192 | 1765 | 4569 | 7767 | | 2284 | 9137 | 7061 | +| 256 | 2048 | 5461 | 7447 | | 2731 | 9638 | 7802 | +| 384 | 1998 | 5861 | 6762 | | 2313 | 9253 | 7644 | +| 512 | 2095 | 6144 | 7680 | | 2194 | 10240 | 7089 | +| 768 | 2230 | 5773 | 7549 | | 2045 | 10331 | 7010 | +| 1024 | 2133 | 6400 | 8533 | | 2133 | 10779 | 7877 | +| 2048 | 2011 | 7040 | 8665 | | 1942 | 10240 | 7768 | +| 4096 | 2194 | 6827 | 8777 | | 1755 | 9452 | 6827 | +| 8192 | 1849 | 6656 | 6656 | | 1752 | 7831 | 6827 | +| 9216 | 1871 | 5858 | 6416 | | 1643 | 6909 | 6266 | +| 16384 | 1883 | 6223 | 6506 | | 1664 | 7340 | 6982 | +| 32768 | 1826 | 6390 | 6667 | | 1631 | 7481 | 6971 | +| 262144 | 1546 | 4075 | 5977 | | 1299 | 3415 | 3551 | +| 1048576 | 1104 | 2071 | 1730 | | 1104 | 1149 | 1834 | +|-----------+------------+------------+------------| |------------+------------+------------| + + + +Ubuntu 11.04, gcc 4.5, 32-bit, fftw 3.3 on a 1.6 GHz Atom N270 + +Built with: +gcc -o test_pffft -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm + +| N (input length) | real FFTPack | real FFTW | real PFFFT | | cplx FFTPack | cplx FFTW | cplx PFFFT | +|------------------+--------------+--------------+--------------| |--------------+--------------+--------------| +| 64 | 452 | 1041 | 1336 | | 549 | 2318 | 1781 | +| 96 | 444 | 1297 | 1297 | | 503 | 2408 | 1686 | +| 128 | 527 | 1525 | 1707 | | 543 | 2655 | 1886 | +| 192 | 498 | 1653 | 1849 | | 539 | 2678 | 1942 | +| 256 | 585 | 1862 | 2156 | | 594 | 2777 | 2244 | +| 384 | 499 | 1870 | 1998 | | 511 | 2586 | 1890 | +| 512 | 562 | 2095 | 2194 | | 542 | 2973 | 2194 | +| 768 | 545 | 2045 | 2133 | | 545 | 2365 | 2133 | +| 1024 | 595 | 2133 | 2438 | | 569 | 2695 | 2179 | +| 2048 | 587 | 2125 | 2347 | | 521 | 2230 | 1707 | +| 4096 | 495 | 1890 | 1834 | | 492 | 1876 | 1672 | +| 8192 | 469 | 1548 | 1729 | | 438 | 1740 | 1664 | +| 9216 | 468 | 1663 | 1663 | | 446 | 1585 | 1531 | +| 16384 | 453 | 1608 | 1767 | | 398 | 1476 | 1664 | +| 32768 | 456 | 1420 | 1503 | | 387 | 1388 | 1345 | +| 262144 | 309 | 385 | 726 | | 262 | 415 | 840 | +| 1048576 | 280 | 351 | 739 | | 261 | 313 | 797 | +|------------------+--------------+--------------+--------------| |--------------+--------------+--------------| + + + +Windows 7, visual c++ 2010 on a 1.6 GHz Atom N270 + +Built with: +cl /Ox -D_USE_MATH_DEFINES /arch:SSE test_pffft.c pffft.c fftpack.c + +(visual c++ is definitively not very good with SSE intrinsics...) + +| N (input length) | real FFTPack | real PFFFT | | cplx FFTPack | cplx PFFFT | +|------------------+--------------+--------------| |--------------+--------------| +| 64 | 173 | 1009 | | 174 | 1159 | +| 96 | 169 | 1029 | | 188 | 1201 | +| 128 | 195 | 1242 | | 191 | 1275 | +| 192 | 178 | 1312 | | 184 | 1276 | +| 256 | 196 | 1591 | | 186 | 1281 | +| 384 | 172 | 1409 | | 181 | 1281 | +| 512 | 187 | 1640 | | 181 | 1313 | +| 768 | 171 | 1614 | | 176 | 1258 | +| 1024 | 186 | 1812 | | 178 | 1223 | +| 2048 | 190 | 1707 | | 186 | 1099 | +| 4096 | 182 | 1446 | | 177 | 975 | +| 8192 | 175 | 1345 | | 169 | 1034 | +| 9216 | 165 | 1271 | | 168 | 1023 | +| 16384 | 166 | 1396 | | 165 | 949 | +| 32768 | 172 | 1311 | | 161 | 881 | +| 262144 | 136 | 632 | | 134 | 629 | +| 1048576 | 134 | 698 | | 127 | 623 | +|------------------+--------------+--------------| |--------------+--------------| + + + +Ubuntu 12.04, gcc-4.7.3, 32-bit, with fftw 3.3.3 (built with --enable-neon), on a 1.2GHz ARM Cortex A9 (Tegra 3) + +Built with: +gcc-4.7 -O3 -DHAVE_FFTW -march=armv7-a -mtune=cortex-a9 -mfloat-abi=hard -mfpu=neon -ffast-math test_pffft.c pffft.c -o test_pffft_arm fftpack.c -lm -I/usr/local/include/ -L/usr/local/lib/ -lfftw3f + +| input len |real FFTPack| real FFTW | real PFFFT | |cplx FFTPack| cplx FFTW | cplx PFFFT | +|-----------+------------+------------+------------| |------------+------------+------------| +| 64 | 549 | 452 | 731 | | 512 | 602 | 640 | +| 96 | 421 | 272 | 702 | | 496 | 571 | 602 | +| 128 | 498 | 512 | 815 | | 597 | 618 | 652 | +| 160 | 521 | 536 | 815 | | 586 | 669 | 625 | +| 192 | 539 | 571 | 883 | | 485 | 597 | 626 | +| 256 | 640 | 539 | 975 | | 569 | 611 | 671 | +| 384 | 499 | 610 | 879 | | 499 | 602 | 637 | +| 480 | 518 | 507 | 877 | | 496 | 661 | 616 | +| 512 | 524 | 591 | 1002 | | 549 | 678 | 668 | +| 640 | 542 | 612 | 955 | | 568 | 663 | 645 | +| 768 | 557 | 613 | 981 | | 491 | 663 | 598 | +| 800 | 514 | 353 | 882 | | 514 | 360 | 574 | +| 1024 | 640 | 640 | 1067 | | 492 | 683 | 602 | +| 2048 | 587 | 640 | 908 | | 486 | 640 | 552 | +| 2400 | 479 | 368 | 777 | | 422 | 376 | 518 | +| 4096 | 511 | 614 | 853 | | 426 | 640 | 534 | +| 8192 | 415 | 584 | 708 | | 386 | 622 | 516 | +| 9216 | 419 | 571 | 687 | | 364 | 586 | 506 | +| 16384 | 426 | 577 | 716 | | 398 | 606 | 530 | +| 32768 | 417 | 572 | 673 | | 399 | 572 | 468 | +| 262144 | 219 | 380 | 293 | | 255 | 431 | 343 | +| 1048576 | 202 | 274 | 237 | | 265 | 282 | 355 | +|-----------+------------+------------+------------| |------------+------------+------------| + +Same platform as above, but this time pffft and fftpack are built with clang 3.2: + +clang -O3 -DHAVE_FFTW -march=armv7-a -mtune=cortex-a9 -mfloat-abi=hard -mfpu=neon -ffast-math test_pffft.c pffft.c -o test_pffft_arm fftpack.c -lm -I/usr/local/include/ -L/usr/local/lib/ -lfftw3f + +| input len |real FFTPack| real FFTW | real PFFFT | |cplx FFTPack| cplx FFTW | cplx PFFFT | +|-----------+------------+------------+------------| |------------+------------+------------| +| 64 | 427 | 452 | 853 | | 427 | 602 | 1024 | +| 96 | 351 | 276 | 843 | | 337 | 571 | 963 | +| 128 | 373 | 512 | 996 | | 390 | 618 | 1054 | +| 160 | 426 | 536 | 987 | | 375 | 669 | 914 | +| 192 | 404 | 571 | 1079 | | 388 | 588 | 1079 | +| 256 | 465 | 539 | 1205 | | 445 | 602 | 1170 | +| 384 | 366 | 610 | 1099 | | 343 | 594 | 1099 | +| 480 | 356 | 507 | 1140 | | 335 | 651 | 931 | +| 512 | 411 | 591 | 1213 | | 384 | 649 | 1124 | +| 640 | 398 | 612 | 1193 | | 373 | 654 | 901 | +| 768 | 409 | 613 | 1227 | | 383 | 663 | 1044 | +| 800 | 411 | 348 | 1073 | | 353 | 358 | 809 | +| 1024 | 427 | 640 | 1280 | | 413 | 692 | 1004 | +| 2048 | 414 | 626 | 1126 | | 371 | 640 | 853 | +| 2400 | 399 | 373 | 898 | | 319 | 368 | 653 | +| 4096 | 404 | 602 | 1059 | | 357 | 633 | 778 | +| 8192 | 332 | 584 | 792 | | 308 | 616 | 716 | +| 9216 | 322 | 561 | 783 | | 299 | 586 | 687 | +| 16384 | 344 | 568 | 778 | | 314 | 617 | 745 | +| 32768 | 342 | 564 | 737 | | 314 | 552 | 629 | +| 262144 | 201 | 383 | 313 | | 227 | 435 | 413 | +| 1048576 | 187 | 262 | 251 | | 228 | 281 | 409 | +|-----------+------------+------------+------------| |------------+------------+------------| + +So it looks like, on ARM, gcc 4.7 is the best at scalar floating point +(the fftpack performance numbers are better with gcc), while clang is +the best with neon intrinsics (see how pffft perf has improved with +clang 3.2). + + +NVIDIA Jetson TK1 board, gcc-4.8.2. The cpu is a 2.3GHz cortex A15 (Tegra K1). + +Built with: +gcc -O3 -march=armv7-a -mtune=native -mfloat-abi=hard -mfpu=neon -ffast-math test_pffft.c pffft.c -o test_pffft_arm fftpack.c -lm + +| input len |real FFTPack| real PFFFT | |cplx FFTPack| cplx PFFFT | +|-----------+------------+------------| |------------+------------| +| 64 | 1735 | 3308 | | 1994 | 3744 | +| 96 | 1596 | 3448 | | 1987 | 3572 | +| 128 | 1807 | 4076 | | 2255 | 3960 | +| 160 | 1769 | 4083 | | 2071 | 3845 | +| 192 | 1990 | 4233 | | 2017 | 3939 | +| 256 | 2191 | 4882 | | 2254 | 4346 | +| 384 | 1878 | 4492 | | 2073 | 4012 | +| 480 | 1748 | 4398 | | 1923 | 3951 | +| 512 | 2030 | 5064 | | 2267 | 4195 | +| 640 | 1918 | 4756 | | 2094 | 4184 | +| 768 | 2099 | 4907 | | 2048 | 4297 | +| 800 | 1822 | 4555 | | 1880 | 4063 | +| 1024 | 2232 | 5355 | | 2187 | 4420 | +| 2048 | 2176 | 4983 | | 2027 | 3602 | +| 2400 | 1741 | 4256 | | 1710 | 3344 | +| 4096 | 1816 | 3914 | | 1851 | 3349 | +| 8192 | 1716 | 3481 | | 1700 | 3255 | +| 9216 | 1735 | 3589 | | 1653 | 3094 | +| 16384 | 1567 | 3483 | | 1637 | 3244 | +| 32768 | 1624 | 3240 | | 1655 | 3156 | +| 262144 | 1012 | 1898 | | 983 | 1503 | +| 1048576 | 876 | 1154 | | 868 | 1341 | +|-----------+------------+------------| |------------+------------| + +The performance on the tegra K1 is pretty impressive. I'm not +including the FFTW numbers as they as slightly below the scalar +fftpack numbers, so something must be wrong (however it seems to be +correctly configured and is using neon simd instructions). + +When using clang 3.4 the pffft version is even a bit faster, reaching +5.7 GFlops for real ffts of size 1024. + + +iPad Air 2 with iOS9, xcode 8.0, arm64. The cpu is an Apple A8X, supposedly running at 1.5GHz. + +| input len |real FFTPack| real vDSP | real PFFFT | |cplx FFTPack| cplx vDSP | cplx PFFFT | +|-----------+------------+------------+------------| |------------+------------+------------| +| 64 | 2517 | 7995 | 6086 | | 2725 | 13006 | 8495 | +| 96 | 2442 | n/a | 6691 | | 2256 | n/a | 7991 | +| 128 | 2664 | 10186 | 7877 | | 2575 | 15115 | 9115 | +| 160 | 2638 | n/a | 8283 | | 2682 | n/a | 8806 | +| 192 | 2903 | n/a | 9083 | | 2634 | n/a | 8980 | +| 256 | 3184 | 11452 | 10039 | | 3026 | 15410 | 10199 | +| 384 | 2665 | n/a | 10100 | | 2275 | n/a | 9247 | +| 480 | 2546 | n/a | 9863 | | 2341 | n/a | 8892 | +| 512 | 2832 | 12197 | 10989 | | 2547 | 16768 | 10154 | +| 640 | 2755 | n/a | 10461 | | 2569 | n/a | 9666 | +| 768 | 2998 | n/a | 11355 | | 2585 | n/a | 9813 | +| 800 | 2516 | n/a | 10332 | | 2433 | n/a | 9164 | +| 1024 | 3109 | 12965 | 12114 | | 2869 | 16448 | 10519 | +| 2048 | 3027 | 12996 | 12023 | | 2648 | 17304 | 10307 | +| 2400 | 2515 | n/a | 10372 | | 2355 | n/a | 8443 | +| 4096 | 3204 | 13603 | 12359 | | 2814 | 16570 | 9780 | +| 8192 | 2759 | 13422 | 10824 | | 2153 | 15652 | 7884 | +| 9216 | 2700 | n/a | 9938 | | 2241 | n/a | 7900 | +| 16384 | 2280 | 13057 | 7976 | | 593 | 4272 | 2534 | +| 32768 | 768 | 4269 | 2882 | | 606 | 4405 | 2604 | +| 262144 | 724 | 3527 | 2630 | | 534 | 2418 | 2157 | +| 1048576 | 674 | 1467 | 2135 | | 530 | 1621 | 2055 | +|-----------+------------+------------+------------| |------------+------------+------------| + +I double-checked to make sure I did not make a mistake in the time +measurements, as the numbers are much higher than what I initially +expected. They are in fact higher than the number I get on the 2.8GHz +Xeon of my 2008 mac pro.. (except for FFT lengths >= 32768 where +having a big cache is useful). A good surprise is also that the perf +is not too far from apple's vDSP (at least for the real FFT). + From c39787ddc7b874553ecd60c641ce9dd0dceb51e2 Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Mon, 17 Aug 2020 11:21:33 +0000 Subject: [PATCH 06/42] Testing pffft - elaborate output --- oss-internship-2020/pffft/README.txt | 6 ++- .../pffft/test_pffft_sandboxed.cc | 41 ++++++++++++++++--- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/oss-internship-2020/pffft/README.txt b/oss-internship-2020/pffft/README.txt index ef51f1a..5688ac9 100644 --- a/oss-internship-2020/pffft/README.txt +++ b/oss-internship-2020/pffft/README.txt @@ -16,4 +16,8 @@ CMake observations: Sandboxed main observations: * containing two testing parts (fft / pffft benchmarks) ! current stage: fft - works :) - pffft - not implemented + pffft - implemented + * pffft benchmark bug: "Sandbox not active" + => loop in pffft_transform for N = 64 (why?); + N = 64, status OK, pffft_transform generates error + N > 64, status not OK diff --git a/oss-internship-2020/pffft/test_pffft_sandboxed.cc b/oss-internship-2020/pffft/test_pffft_sandboxed.cc index d50dff3..5a56a6f 100644 --- a/oss-internship-2020/pffft/test_pffft_sandboxed.cc +++ b/oss-internship-2020/pffft/test_pffft_sandboxed.cc @@ -60,12 +60,6 @@ void show_output(const char* name, int N, int cplx, float flops, float t0, fflush(stdout); } -/* - For debug: - SAPI_VLOG_LEVEL=1 ./pffft_sandboxed --v=100 - --sandbox2_danger_danger_permit_all_and_log my_aux_file -*/ - int main(int argc, char* argv[]) { gflags::ParseCommandLineFlags(&argc, &argv, true); int Nvalues[] = {64, 96, 128, 160, 192, 256, @@ -148,6 +142,41 @@ int main(int argc, char* argv[]) { flops = (max_iter_ * 2) * ((cplx ? 5 : 2.5) * N * log((double)N) / M_LN2); show_output("FFTPack", N, cplx, flops, t0, t1, max_iter_); } + + // PFFFT benchmark + { + sapi::StatusOr s = + api.pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); + + printf("%s\n", s.status().ToString().c_str()); + + if (s.ok()) { + sapi::v::GenericPtr s_reg(s.value()); + + t0 = uclock_sec(); + for (iter = 0; iter < max_iter; ++iter) { + printf("%s\n", + api.pffft_transform(s_reg.PtrBoth(), X_.PtrBoth(), + Z_.PtrBoth(), Y_.PtrBoth(), PFFFT_FORWARD) + .ToString() + .c_str()); + printf("%s\n", + api.pffft_transform(s_reg.PtrBoth(), X_.PtrBoth(), + Z_.PtrBoth(), Y_.PtrBoth(), PFFFT_FORWARD) + .ToString() + .c_str()); + } + + t1 = uclock_sec(); + printf("%s\n", + api.pffft_destroy_setup(s_reg.PtrBoth()).ToString().c_str()); + + flops = + (max_iter * 2) * ((cplx ? 5 : 2.5) * N * log((double)N) / M_LN2); + show_output("PFFFT", N, cplx, flops, t0, t1, max_iter); + } + printf("\n\n"); + } } return 0; From d51d558083b57481971b46998f20928b57e9b59e Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Mon, 17 Aug 2020 11:21:42 +0000 Subject: [PATCH 07/42] Update .gitignore --- oss-internship-2020/pffft/.gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/oss-internship-2020/pffft/.gitignore b/oss-internship-2020/pffft/.gitignore index f9ed608..0469302 100644 --- a/oss-internship-2020/pffft/.gitignore +++ b/oss-internship-2020/pffft/.gitignore @@ -1 +1,6 @@ .hg/ +pffft.o +test_pffft.o +fftpack.o +libpffft.a +pffft_main \ No newline at end of file From 3fb4d5954513dbdc43272a7220ebdb3aca9c68ff Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Mon, 17 Aug 2020 11:21:49 +0000 Subject: [PATCH 08/42] Sandbox not active error tracking observations --- oss-internship-2020/pffft/README.txt | 7 +++++++ oss-internship-2020/pffft/test_pffft_sandboxed.cc | 6 ++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/oss-internship-2020/pffft/README.txt b/oss-internship-2020/pffft/README.txt index 5688ac9..9c2556a 100644 --- a/oss-internship-2020/pffft/README.txt +++ b/oss-internship-2020/pffft/README.txt @@ -21,3 +21,10 @@ Sandboxed main observations: => loop in pffft_transform for N = 64 (why?); N = 64, status OK, pffft_transform generates error N > 64, status not OK + Problem on initialising sapi::StatusOr s; + the memory that stays for s is not the same with the address passed + in pffft_transform function. + (sapi::v::GenericPtr to be changed?) + + Temporary solution (not done): change the generated files to accept + uintptr_t instead of PFFFT_Setup diff --git a/oss-internship-2020/pffft/test_pffft_sandboxed.cc b/oss-internship-2020/pffft/test_pffft_sandboxed.cc index 5a56a6f..8c91a77 100644 --- a/oss-internship-2020/pffft/test_pffft_sandboxed.cc +++ b/oss-internship-2020/pffft/test_pffft_sandboxed.cc @@ -73,6 +73,8 @@ int main(int argc, char* argv[]) { pffftSapiSandbox sandbox; sandbox.Init().IgnoreError(); + printf("%s\n", sandbox.Init().ToString().c_str()); + pffftApi api(&sandbox); int N, cplx; @@ -164,7 +166,7 @@ int main(int argc, char* argv[]) { api.pffft_transform(s_reg.PtrBoth(), X_.PtrBoth(), Z_.PtrBoth(), Y_.PtrBoth(), PFFFT_FORWARD) .ToString() - .c_str()); + .c_str()); } t1 = uclock_sec(); @@ -175,7 +177,7 @@ int main(int argc, char* argv[]) { (max_iter * 2) * ((cplx ? 5 : 2.5) * N * log((double)N) / M_LN2); show_output("PFFFT", N, cplx, flops, t0, t1, max_iter); } - printf("\n\n"); + printf("\n\n"); } } From 06bf6cdd34a172bdd0048720d62bbda856d5593f Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Mon, 17 Aug 2020 11:21:56 +0000 Subject: [PATCH 09/42] Sandbox not active error - resolved --- oss-internship-2020/pffft/README.txt | 26 +++++++++-------- .../pffft/test_pffft_sandboxed.cc | 29 ++++++++----------- 2 files changed, 26 insertions(+), 29 deletions(-) diff --git a/oss-internship-2020/pffft/README.txt b/oss-internship-2020/pffft/README.txt index 9c2556a..c7ac70b 100644 --- a/oss-internship-2020/pffft/README.txt +++ b/oss-internship-2020/pffft/README.txt @@ -16,15 +16,17 @@ CMake observations: Sandboxed main observations: * containing two testing parts (fft / pffft benchmarks) ! current stage: fft - works :) - pffft - implemented - * pffft benchmark bug: "Sandbox not active" - => loop in pffft_transform for N = 64 (why?); - N = 64, status OK, pffft_transform generates error - N > 64, status not OK - Problem on initialising sapi::StatusOr s; - the memory that stays for s is not the same with the address passed - in pffft_transform function. - (sapi::v::GenericPtr to be changed?) - - Temporary solution (not done): change the generated files to accept - uintptr_t instead of PFFFT_Setup + pffft - implemented + * (Solved) pffft benchmark bug: "Sandbox not active" + N = 64, status OK, pffft_transform generates error + N > 64, status not OK + Problem on initialising sapi::StatusOr s; + the memory that stays for s is not the same with the address passed + in pffft_transform function. + (sapi :: v :: GenericPtr - to be changed) + + Temporary solution: change the generated files to accept + uintptr_t instead of PFFFT_Setup + + Solution: using "sapi :: v :: RemotePtr" instead of "sapi :: v :: GenericPtr" + to access the memory of object s diff --git a/oss-internship-2020/pffft/test_pffft_sandboxed.cc b/oss-internship-2020/pffft/test_pffft_sandboxed.cc index 8c91a77..e4b4602 100644 --- a/oss-internship-2020/pffft/test_pffft_sandboxed.cc +++ b/oss-internship-2020/pffft/test_pffft_sandboxed.cc @@ -73,7 +73,7 @@ int main(int argc, char* argv[]) { pffftSapiSandbox sandbox; sandbox.Init().IgnoreError(); - printf("%s\n", sandbox.Init().ToString().c_str()); + printf("Initialization: %s\n", sandbox.Init().ToString().c_str()); pffftApi api(&sandbox); @@ -81,7 +81,7 @@ int main(int argc, char* argv[]) { cplx = 0; - for (i = 0; i < 5; i++) { + for (i = 0; i < 23; i++) { N = Nvalues[i]; int Nfloat = N * (cplx ? 2 : 1); @@ -150,34 +150,29 @@ int main(int argc, char* argv[]) { sapi::StatusOr s = api.pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); - printf("%s\n", s.status().ToString().c_str()); + printf("Setup status is: %s\n", s.status().ToString().c_str()); if (s.ok()) { - sapi::v::GenericPtr s_reg(s.value()); + sapi::v::RemotePtr s_reg(s.value()); t0 = uclock_sec(); for (iter = 0; iter < max_iter; ++iter) { - printf("%s\n", - api.pffft_transform(s_reg.PtrBoth(), X_.PtrBoth(), - Z_.PtrBoth(), Y_.PtrBoth(), PFFFT_FORWARD) - .ToString() - .c_str()); - printf("%s\n", - api.pffft_transform(s_reg.PtrBoth(), X_.PtrBoth(), - Z_.PtrBoth(), Y_.PtrBoth(), PFFFT_FORWARD) - .ToString() - .c_str()); + api.pffft_transform(&s_reg, X_.PtrBoth(), Z_.PtrBoth(), Y_.PtrBoth(), + PFFFT_FORWARD) + .IgnoreError(); + api.pffft_transform(&s_reg, X_.PtrBoth(), Z_.PtrBoth(), Y_.PtrBoth(), + PFFFT_FORWARD) + .IgnoreError(); } t1 = uclock_sec(); - printf("%s\n", - api.pffft_destroy_setup(s_reg.PtrBoth()).ToString().c_str()); + api.pffft_destroy_setup(&s_reg).IgnoreError(); flops = (max_iter * 2) * ((cplx ? 5 : 2.5) * N * log((double)N) / M_LN2); show_output("PFFFT", N, cplx, flops, t0, t1, max_iter); } - printf("\n\n"); + printf("\n\n"); } } From 096d02625dd616fadb536cd29ed204aa8e2452ea Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Thu, 20 Aug 2020 07:44:27 +0000 Subject: [PATCH 10/42] Modified sapi root to a general path --- oss-internship-2020/pffft/.gitignore | 7 ++----- oss-internship-2020/pffft/CMakeLists.txt | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/oss-internship-2020/pffft/.gitignore b/oss-internship-2020/pffft/.gitignore index 0469302..ad64a9d 100644 --- a/oss-internship-2020/pffft/.gitignore +++ b/oss-internship-2020/pffft/.gitignore @@ -1,6 +1,3 @@ -.hg/ -pffft.o -test_pffft.o -fftpack.o -libpffft.a +*.o +*.a pffft_main \ No newline at end of file diff --git a/oss-internship-2020/pffft/CMakeLists.txt b/oss-internship-2020/pffft/CMakeLists.txt index c0d62cb..a9ba38a 100644 --- a/oss-internship-2020/pffft/CMakeLists.txt +++ b/oss-internship-2020/pffft/CMakeLists.txt @@ -31,7 +31,7 @@ target_link_libraries(pffft PUBLIC ${MATH_LIBS}) # Adding dependencies -set(SAPI_ROOT "/usr/local/google/home/inach/sandboxed-api" CACHE PATH "Path to the Sandboxed API source tree") +set(SAPI_ROOT "../.." CACHE PATH "Path to the Sandboxed API source tree") # Then configure: # mkdir -p build && cd build # cmake .. -G Ninja -DSAPI_ROOT=$HOME/sapi_root From 94fcf82dd3aaf68481f0145338ec2f4b3d9941e6 Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Thu, 20 Aug 2020 08:21:32 +0000 Subject: [PATCH 11/42] Added comment on Nvalues[] and N purpose --- .../pffft/test_pffft_sandboxed.cc | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/oss-internship-2020/pffft/test_pffft_sandboxed.cc b/oss-internship-2020/pffft/test_pffft_sandboxed.cc index e4b4602..e1b897e 100644 --- a/oss-internship-2020/pffft/test_pffft_sandboxed.cc +++ b/oss-internship-2020/pffft/test_pffft_sandboxed.cc @@ -62,6 +62,13 @@ void show_output(const char* name, int N, int cplx, float flops, float t0, int main(int argc, char* argv[]) { gflags::ParseCommandLineFlags(&argc, &argv, true); + /* + * Nvalues is a vector keeping the values by which iterates N, its value + * representing the input length. More concrete, N is the number of + * data points the caclulus is up to (determinating its accuracy). + * To show the performance of Fast-Fourier Transformations the program is + * testing for various values of N. + */ int Nvalues[] = {64, 96, 128, 160, 192, 256, 384, 5 * 96, 512, 5 * 128, 3 * 256, 800, 1024, 2048, 2400, 4096, 8192, 9 * 1024, @@ -118,10 +125,16 @@ int main(int argc, char* argv[]) { X[k] = 0; } - // FFTPack benchmark + /* + * FFTPack benchmark + */ { + /* + * SIMD_SZ == 4 (returning value of pffft_simd_size()) + */ int max_iter_ = - max_iter / 4; // SIMD_SZ == 4 (returning value of pffft_simd_size()) + max_iter / 4; + if (max_iter_ == 0) max_iter_ = 1; if (cplx) { api.cffti(N, wrk_.PtrBoth()).IgnoreError(); @@ -145,7 +158,9 @@ int main(int argc, char* argv[]) { show_output("FFTPack", N, cplx, flops, t0, t1, max_iter_); } - // PFFFT benchmark + /* + * PFFFT benchmark + */ { sapi::StatusOr s = api.pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); From 257e87e07669c29bc8fb3b32bddd66b6a43d44d7 Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Thu, 20 Aug 2020 08:58:30 +0000 Subject: [PATCH 12/42] Added log printing for debug --- .../pffft/test_pffft_sandboxed.cc | 179 ++++++++++-------- 1 file changed, 95 insertions(+), 84 deletions(-) diff --git a/oss-internship-2020/pffft/test_pffft_sandboxed.cc b/oss-internship-2020/pffft/test_pffft_sandboxed.cc index e1b897e..523dce0 100644 --- a/oss-internship-2020/pffft/test_pffft_sandboxed.cc +++ b/oss-internship-2020/pffft/test_pffft_sandboxed.cc @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -61,6 +62,11 @@ void show_output(const char* name, int N, int cplx, float flops, float t0, } int main(int argc, char* argv[]) { + /* + * Initialize Google's logging library. + */ + google::InitGoogleLogging(argv[0]); + gflags::ParseCommandLineFlags(&argc, &argv, true); /* * Nvalues is a vector keeping the values by which iterates N, its value @@ -75,12 +81,12 @@ int main(int argc, char* argv[]) { 16384, 32768, 256 * 1024, 1024 * 1024, -1}; int i; - printf("initializing sandbox...\n"); + VLOG(1) << "Initializing sandbox...\n"; pffftSapiSandbox sandbox; sandbox.Init().IgnoreError(); - printf("Initialization: %s\n", sandbox.Init().ToString().c_str()); + VLOG(1) << "Initialization: " << sandbox.Init().ToString().c_str() << "\n"; pffftApi api(&sandbox); @@ -88,108 +94,113 @@ int main(int argc, char* argv[]) { cplx = 0; - for (i = 0; i < 23; i++) { - N = Nvalues[i]; + do { + for (i = 0; i < 23; i++) { + N = Nvalues[i]; - int Nfloat = N * (cplx ? 2 : 1); - int Nbytes = Nfloat * sizeof(float); - int pass; + int Nfloat = N * (cplx ? 2 : 1); + int Nbytes = Nfloat * sizeof(float); + int pass; - float ref[Nbytes], in[Nbytes], out[Nbytes], tmp[Nbytes], tmp2[Nbytes]; + float ref[Nbytes], in[Nbytes], out[Nbytes], tmp[Nbytes], tmp2[Nbytes]; - sapi::v::Array ref_(ref, Nbytes); - sapi::v::Array in_(in, Nbytes); - sapi::v::Array out_(out, Nbytes); - sapi::v::Array tmp_(tmp, Nbytes); - sapi::v::Array tmp2_(tmp2, Nbytes); + sapi::v::Array ref_(ref, Nbytes); + sapi::v::Array in_(in, Nbytes); + sapi::v::Array out_(out, Nbytes); + sapi::v::Array tmp_(tmp, Nbytes); + sapi::v::Array tmp2_(tmp2, Nbytes); - float wrk[2 * Nbytes + 15 * sizeof(float)]; - sapi::v::Array wrk_(wrk, 2 * Nbytes + 15 * sizeof(float)); + float wrk[2 * Nbytes + 15 * sizeof(float)]; + sapi::v::Array wrk_(wrk, 2 * Nbytes + 15 * sizeof(float)); - float ref_max = 0; - int k; + float ref_max = 0; + int k; - Nfloat = (cplx ? N * 2 : N); - float X[Nbytes], Y[Nbytes], Z[Nbytes]; - sapi::v::Array X_(X, Nbytes), Y_(Y, Nbytes), Z_(Z, Nbytes); + Nfloat = (cplx ? N * 2 : N); + float X[Nbytes], Y[Nbytes], Z[Nbytes]; + sapi::v::Array X_(X, Nbytes), Y_(Y, Nbytes), Z_(Z, Nbytes); - double t0, t1, flops; + double t0, t1, flops; - int max_iter = 5120000 / N * 4; -#ifdef __arm__ - max_iter /= 4; -#endif - int iter; + int max_iter = 5120000 / N * 4; + #ifdef __arm__ + max_iter /= 4; + #endif + int iter; - for (k = 0; k < Nfloat; ++k) { - X[k] = 0; - } + for (k = 0; k < Nfloat; ++k) { + X[k] = 0; + } - /* - * FFTPack benchmark - */ - { /* - * SIMD_SZ == 4 (returning value of pffft_simd_size()) + * FFTPack benchmark */ - int max_iter_ = - max_iter / 4; + { + /* + * SIMD_SZ == 4 (returning value of pffft_simd_size()) + */ + int max_iter_ = + max_iter / 4; - if (max_iter_ == 0) max_iter_ = 1; - if (cplx) { - api.cffti(N, wrk_.PtrBoth()).IgnoreError(); - } else { - api.rffti(N, wrk_.PtrBoth()).IgnoreError(); - } - t0 = uclock_sec(); - - for (iter = 0; iter < max_iter_; ++iter) { + if (max_iter_ == 0) max_iter_ = 1; if (cplx) { - api.cfftf(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); - api.cfftb(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); + api.cffti(N, wrk_.PtrBoth()).IgnoreError(); } else { - api.rfftf(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); - api.rfftb(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); + api.rffti(N, wrk_.PtrBoth()).IgnoreError(); } - } - t1 = uclock_sec(); - - flops = (max_iter_ * 2) * ((cplx ? 5 : 2.5) * N * log((double)N) / M_LN2); - show_output("FFTPack", N, cplx, flops, t0, t1, max_iter_); - } - - /* - * PFFFT benchmark - */ - { - sapi::StatusOr s = - api.pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); - - printf("Setup status is: %s\n", s.status().ToString().c_str()); - - if (s.ok()) { - sapi::v::RemotePtr s_reg(s.value()); - t0 = uclock_sec(); - for (iter = 0; iter < max_iter; ++iter) { - api.pffft_transform(&s_reg, X_.PtrBoth(), Z_.PtrBoth(), Y_.PtrBoth(), - PFFFT_FORWARD) - .IgnoreError(); - api.pffft_transform(&s_reg, X_.PtrBoth(), Z_.PtrBoth(), Y_.PtrBoth(), - PFFFT_FORWARD) - .IgnoreError(); + + for (iter = 0; iter < max_iter_; ++iter) { + if (cplx) { + api.cfftf(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); + api.cfftb(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); + } else { + api.rfftf(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); + api.rfftb(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); + } + } + t1 = uclock_sec(); + + flops = (max_iter_ * 2) * ((cplx ? 5 : 2.5) * N * log((double)N) / M_LN2); + show_output("FFTPack", N, cplx, flops, t0, t1, max_iter_); + } + + /* + * PFFFT benchmark + */ + { + sapi::StatusOr s = + api.pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); + + VLOG(1) << "Setup status is: " << s.status().ToString().c_str() << "\n"; + + if (s.ok()) { + sapi::v::RemotePtr s_reg(s.value()); + + t0 = uclock_sec(); + for (iter = 0; iter < max_iter; ++iter) { + api.pffft_transform(&s_reg, X_.PtrBoth(), Z_.PtrBoth(), Y_.PtrBoth(), + PFFFT_FORWARD) + .IgnoreError(); + api.pffft_transform(&s_reg, X_.PtrBoth(), Z_.PtrBoth(), Y_.PtrBoth(), + PFFFT_FORWARD) + .IgnoreError(); + } + + t1 = uclock_sec(); + api.pffft_destroy_setup(&s_reg).IgnoreError(); + + flops = + (max_iter * 2) * ((cplx ? 5 : 2.5) * N * log((double)N) / M_LN2); + show_output("PFFFT", N, cplx, flops, t0, t1, max_iter); } - t1 = uclock_sec(); - api.pffft_destroy_setup(&s_reg).IgnoreError(); - - flops = - (max_iter * 2) * ((cplx ? 5 : 2.5) * N * log((double)N) / M_LN2); - show_output("PFFFT", N, cplx, flops, t0, t1, max_iter); + VLOG(1) << "N = " << N << " SUCCESSFULLY\n\n"; } - printf("\n\n"); - } - } + } + + cplx = !cplx; + } while (cplx); return 0; } \ No newline at end of file From 25d18f985d258a34bc9b89fb41151174ab8f729b Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Thu, 20 Aug 2020 11:19:49 +0000 Subject: [PATCH 13/42] Updated README and change names of main files --- oss-internship-2020/pffft/README.md | 67 +++ oss-internship-2020/pffft/main_pffft.c | 419 ++++++++++++++++++ .../pffft/main_pffft_sandboxed.cc | 208 +++++++++ .../pffft/pffft_library_notes.txt | 416 +++++++++++++++++ 4 files changed, 1110 insertions(+) create mode 100644 oss-internship-2020/pffft/README.md create mode 100644 oss-internship-2020/pffft/main_pffft.c create mode 100644 oss-internship-2020/pffft/main_pffft_sandboxed.cc create mode 100644 oss-internship-2020/pffft/pffft_library_notes.txt diff --git a/oss-internship-2020/pffft/README.md b/oss-internship-2020/pffft/README.md new file mode 100644 index 0000000..52facf3 --- /dev/null +++ b/oss-internship-2020/pffft/README.md @@ -0,0 +1,67 @@ +# Sandboxing PFFFT library + +Builder: CMake +OS: Linux + +### For testing: +`cd build`, then `./pffft_sandboxed` + +### For debug: +`SAPI_VLOG_LEVEL=1 ./pffft_sandboxed --v=100 +--sandbox2_danger_danger_permit_all_and_log ` + +## ***About the project*** +*PFFFT library is concerned with 1D Fast-Fourier Transformations finding a +compromise between accuracy and speed. It deals with real and complex +vectors, both cases being illustrated in the testing part (`main_pffft.c` +for initially and original version, `main_pffft_sandboxed.cc` for our +currently implemented sandboxed version). +The original files can be found at: https://bitbucket.org/jpommier/pffft/src.* + +*The purpose of sandboxing is to limit the permissions and capabilities of +library’s methods, in order to secure the usage of them. +After obtaining the sandbox, the functions will be called through an +Sandbox API (being called `api` in the current test) and so, the +operations, system calls or namspaces access may be controlled. +From both `pffft.h` and `fftpack.h` headers, useful methods are added to +sapi library builded with CMake. There is also a need to link math library +as the transformations made require mathematical operators. +Regarding the testing of the methods, one main is doing this job by +iterating through a set of values, that represents the accuracy of +transformations and print the speed for each value and type of +transformation. More specifically, the input length is the target for +accuracy (named as `N`) and it stands for the number of data points from +the series that calculate the result of transformation. It is also +important to mention that the `cplx` variable stands for a boolean value +that tells the type of transformation (0 for REAL and 1 for COMPLEX) and +it is taken into account while testing. +In the end, the performance of PFFFT library it is outlined by the output.* + +#### CMake observations resume: + * linking pffft and fftpack (which contains necessary functions for pffft) + * set math library + +#### Sandboxed main observations resume: + * containing two testing parts (fft / pffft benchmarks) + * showing the performance of the transformations implies + testing them through various FFT dimenstions. + Variable N, the input length, will take specific values + meaning the number of points to which it is set the calculus + (more details of mathematical purpose of N - https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm). + * output shows speed depending on the input length + + + +### Bugs history + - [Solved] pffft benchmark bug: "Sandbox not active" + N = 64, status OK, pffft_transform generates error + N > 64, status not OK + Problem on initialising sapi::StatusOr s; the memory that stays + for s is not the same with the address passed in pffft_transform function. + (sapi :: v :: GenericPtr - to be changed) + + Temporary solution: change the generated files to accept + uintptr_t instead of PFFFT_Setup + + Solution: using "sapi::v::RemotePtr" instead of "sapi::v::GenericPtr" + to access the memory of object s \ No newline at end of file diff --git a/oss-internship-2020/pffft/main_pffft.c b/oss-internship-2020/pffft/main_pffft.c new file mode 100644 index 0000000..a5d20c2 --- /dev/null +++ b/oss-internship-2020/pffft/main_pffft.c @@ -0,0 +1,419 @@ +/* + Copyright (c) 2013 Julien Pommier. + + Small test & bench for PFFFT, comparing its performance with the scalar FFTPACK, FFTW, and Apple vDSP + + How to build: + + on linux, with fftw3: + gcc -o test_pffft -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm + + on macos, without fftw3: + clang -o test_pffft -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -framework Accelerate + + on macos, with fftw3: + clang -o test_pffft -DHAVE_FFTW -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -framework Accelerate + + on windows, with visual c++: + cl /Ox -D_USE_MATH_DEFINES /arch:SSE test_pffft.c pffft.c fftpack.c + + build without SIMD instructions: + gcc -o test_pffft -DPFFFT_SIMD_DISABLE -O3 -Wall -W pffft.c test_pffft.c fftpack.c -lm + + */ + +#include "pffft.h" +#include "fftpack.h" + +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_SYS_TIMES +# include +# include +#endif + +#ifdef HAVE_VECLIB +# include +#endif + +#ifdef HAVE_FFTW +# include +#endif + +#define MAX(x,y) ((x)>(y)?(x):(y)) + +double frand() { + return rand()/(double)RAND_MAX; +} + +#if defined(HAVE_SYS_TIMES) + inline double uclock_sec(void) { + static double ttclk = 0.; + if (ttclk == 0.) ttclk = sysconf(_SC_CLK_TCK); + struct tms t; return ((double)times(&t)) / ttclk; + } +# else + double uclock_sec(void) +{ return (double)clock()/(double)CLOCKS_PER_SEC; } +#endif + + +/* compare results with the regular fftpack */ +void pffft_validate_N(int N, int cplx) { + int Nfloat = N*(cplx?2:1); + int Nbytes = Nfloat * sizeof(float); + float *ref, *in, *out, *tmp, *tmp2; + PFFFT_Setup *s = pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); + int pass; + + if (!s) { printf("Skipping N=%d, not supported\n", N); return; } + ref = pffft_aligned_malloc(Nbytes); + in = pffft_aligned_malloc(Nbytes); + out = pffft_aligned_malloc(Nbytes); + tmp = pffft_aligned_malloc(Nbytes); + tmp2 = pffft_aligned_malloc(Nbytes); + + for (pass=0; pass < 2; ++pass) { + float ref_max = 0; + int k; + //printf("N=%d pass=%d cplx=%d\n", N, pass, cplx); + // compute reference solution with FFTPACK + if (pass == 0) { + float *wrk = malloc(2*Nbytes+15*sizeof(float)); + for (k=0; k < Nfloat; ++k) { + ref[k] = in[k] = frand()*2-1; + out[k] = 1e30; + } + if (!cplx) { + rffti(N, wrk); + rfftf(N, ref, wrk); + // use our ordering for real ffts instead of the one of fftpack + { + float refN=ref[N-1]; + for (k=N-2; k >= 1; --k) ref[k+1] = ref[k]; + ref[1] = refN; + } + } else { + cffti(N, wrk); + cfftf(N, ref, wrk); + } + free(wrk); + } + + for (k = 0; k < Nfloat; ++k) ref_max = MAX(ref_max, fabs(ref[k])); + + + // pass 0 : non canonical ordering of transform coefficients + if (pass == 0) { + // test forward transform, with different input / output + pffft_transform(s, in, tmp, 0, PFFFT_FORWARD); + memcpy(tmp2, tmp, Nbytes); + memcpy(tmp, in, Nbytes); + pffft_transform(s, tmp, tmp, 0, PFFFT_FORWARD); + for (k = 0; k < Nfloat; ++k) { + assert(tmp2[k] == tmp[k]); + } + + // test reordering + pffft_zreorder(s, tmp, out, PFFFT_FORWARD); + pffft_zreorder(s, out, tmp, PFFFT_BACKWARD); + for (k = 0; k < Nfloat; ++k) { + assert(tmp2[k] == tmp[k]); + } + pffft_zreorder(s, tmp, out, PFFFT_FORWARD); + } else { + // pass 1 : canonical ordering of transform coeffs. + pffft_transform_ordered(s, in, tmp, 0, PFFFT_FORWARD); + memcpy(tmp2, tmp, Nbytes); + memcpy(tmp, in, Nbytes); + pffft_transform_ordered(s, tmp, tmp, 0, PFFFT_FORWARD); + for (k = 0; k < Nfloat; ++k) { + assert(tmp2[k] == tmp[k]); + } + memcpy(out, tmp, Nbytes); + } + + { + for (k=0; k < Nfloat; ++k) { + if (!(fabs(ref[k] - out[k]) < 1e-3*ref_max)) { + printf("%s forward PFFFT mismatch found for N=%d\n", (cplx?"CPLX":"REAL"), N); + exit(1); + } + } + + if (pass == 0) pffft_transform(s, tmp, out, 0, PFFFT_BACKWARD); + else pffft_transform_ordered(s, tmp, out, 0, PFFFT_BACKWARD); + memcpy(tmp2, out, Nbytes); + memcpy(out, tmp, Nbytes); + if (pass == 0) pffft_transform(s, out, out, 0, PFFFT_BACKWARD); + else pffft_transform_ordered(s, out, out, 0, PFFFT_BACKWARD); + for (k = 0; k < Nfloat; ++k) { + assert(tmp2[k] == out[k]); + out[k] *= 1.f/N; + } + for (k = 0; k < Nfloat; ++k) { + if (fabs(in[k] - out[k]) > 1e-3 * ref_max) { + printf("pass=%d, %s IFFFT does not match for N=%d\n", pass, (cplx?"CPLX":"REAL"), N); break; + exit(1); + } + } + } + + // quick test of the circular convolution in fft domain + { + float conv_err = 0, conv_max = 0; + + pffft_zreorder(s, ref, tmp, PFFFT_FORWARD); + memset(out, 0, Nbytes); + pffft_zconvolve_accumulate(s, ref, ref, out, 1.0); + pffft_zreorder(s, out, tmp2, PFFFT_FORWARD); + + for (k=0; k < Nfloat; k += 2) { + float ar = tmp[k], ai=tmp[k+1]; + if (cplx || k > 0) { + tmp[k] = ar*ar - ai*ai; + tmp[k+1] = 2*ar*ai; + } else { + tmp[0] = ar*ar; + tmp[1] = ai*ai; + } + } + + for (k=0; k < Nfloat; ++k) { + float d = fabs(tmp[k] - tmp2[k]), e = fabs(tmp[k]); + if (d > conv_err) conv_err = d; + if (e > conv_max) conv_max = e; + } + if (conv_err > 1e-5*conv_max) { + printf("zconvolve error ? %g %g\n", conv_err, conv_max); exit(1); + } + } + + } + + printf("%s PFFFT is OK for N=%d\n", (cplx?"CPLX":"REAL"), N); fflush(stdout); + + pffft_destroy_setup(s); + pffft_aligned_free(ref); + pffft_aligned_free(in); + pffft_aligned_free(out); + pffft_aligned_free(tmp); + pffft_aligned_free(tmp2); +} + +void pffft_validate(int cplx) { + static int Ntest[] = { 16, 32, 64, 96, 128, 160, 192, 256, 288, 384, 5*96, 512, 576, 5*128, 800, 864, 1024, 2048, 2592, 4000, 4096, 12000, 36864, 0}; + int k; + for (k = 0; Ntest[k]; ++k) { + int N = Ntest[k]; + if (N == 16 && !cplx) continue; + pffft_validate_N(N, cplx); + } +} + +int array_output_format = 0; + +void show_output(const char *name, int N, int cplx, float flops, float t0, float t1, int max_iter) { + float mflops = flops/1e6/(t1 - t0 + 1e-16); + if (array_output_format) { + if (flops != -1) { + printf("|%9.0f ", mflops); + } else printf("| n/a "); + } else { + if (flops != -1) { + printf("N=%5d, %s %16s : %6.0f MFlops [t=%6.0f ns, %d runs]\n", N, (cplx?"CPLX":"REAL"), name, mflops, (t1-t0)/2/max_iter * 1e9, max_iter); + } + } + fflush(stdout); +} + +void benchmark_ffts(int N, int cplx) { + int Nfloat = (cplx ? N*2 : N); + int Nbytes = Nfloat * sizeof(float); + float *X = pffft_aligned_malloc(Nbytes), *Y = pffft_aligned_malloc(Nbytes), *Z = pffft_aligned_malloc(Nbytes); + + double t0, t1, flops; + + int k; + int max_iter = 5120000/N*4; +#ifdef __arm__ + max_iter /= 4; +#endif + int iter; + + for (k = 0; k < Nfloat; ++k) { + X[k] = 0; //sqrtf(k+1); + } + + // FFTPack benchmark + { + float *wrk = malloc(2*Nbytes + 15*sizeof(float)); + int max_iter_ = max_iter/pffft_simd_size(); if (max_iter_ == 0) max_iter_ = 1; + if (cplx) cffti(N, wrk); + else rffti(N, wrk); + t0 = uclock_sec(); + + for (iter = 0; iter < max_iter_; ++iter) { + if (cplx) { + cfftf(N, X, wrk); + cfftb(N, X, wrk); + } else { + rfftf(N, X, wrk); + rfftb(N, X, wrk); + } + } + t1 = uclock_sec(); + free(wrk); + + flops = (max_iter_*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html + show_output("FFTPack", N, cplx, flops, t0, t1, max_iter_); + } + +#ifdef HAVE_VECLIB + int log2N = (int)(log(N)/log(2) + 0.5f); + if (N == (1< 1 && strcmp(argv[1], "--array-format") == 0) { + array_output_format = 1; + } + +#ifndef PFFFT_SIMD_DISABLE + validate_pffft_simd(); +#endif + pffft_validate(1); + pffft_validate(0); + if (!array_output_format) { + for (i=0; Nvalues[i] > 0; ++i) { + benchmark_ffts(Nvalues[i], 0 /* real fft */); + } + for (i=0; Nvalues[i] > 0; ++i) { + benchmark_ffts(Nvalues[i], 1 /* cplx fft */); + } + } else { + printf("| input len "); + printf("|real FFTPack"); +#ifdef HAVE_VECLIB + printf("| real vDSP "); +#endif +#ifdef HAVE_FFTW + printf("| real FFTW "); +#endif + printf("| real PFFFT | "); + + printf("|cplx FFTPack"); +#ifdef HAVE_VECLIB + printf("| cplx vDSP "); +#endif +#ifdef HAVE_FFTW + printf("| cplx FFTW "); +#endif + printf("| cplx PFFFT |\n"); + for (i=0; Nvalues[i] > 0; ++i) { + printf("|%9d ", Nvalues[i]); + benchmark_ffts(Nvalues[i], 0); + printf("| "); + benchmark_ffts(Nvalues[i], 1); + printf("|\n"); + } + printf(" (numbers are given in MFlops)\n"); + } + + + return 0; +} diff --git a/oss-internship-2020/pffft/main_pffft_sandboxed.cc b/oss-internship-2020/pffft/main_pffft_sandboxed.cc new file mode 100644 index 0000000..662b949 --- /dev/null +++ b/oss-internship-2020/pffft/main_pffft_sandboxed.cc @@ -0,0 +1,208 @@ +#define GOOGLE_STRIP_LOG 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fftpack.h" +#include "pffft_sapi.sapi.h" +#include "sandboxed_api/util/flag.h" +#include "sandboxed_api/vars.h" + +ABSL_DECLARE_FLAG(string, sandbox2_danger_danger_permit_all); +ABSL_DECLARE_FLAG(string, sandbox2_danger_danger_permit_all_and_log); + +class pffftSapiSandbox : public pffftSandbox { + public: + std::unique_ptr ModifyPolicy( + sandbox2::PolicyBuilder*) override { + return sandbox2::PolicyBuilder() + .AllowStaticStartup() + .AllowOpen() + .AllowRead() + .AllowWrite() + .AllowSystemMalloc() + .AllowExit() + .AllowSyscalls({ + __NR_futex, + __NR_close, + __NR_getrusage, + }) + .DisableNamespaces() + .BuildOrDie(); + } +}; + +double frand() { return rand() / (double)RAND_MAX; } + +double uclock_sec(void) { return (double)clock() / (double)CLOCKS_PER_SEC; } + +int array_output_format = 0; + +void show_output(const char* name, int N, int cplx, float flops, float t0, + float t1, int max_iter) { + float mflops = flops / 1e6 / (t1 - t0 + 1e-16); + if (array_output_format) { + if (flops != -1) { + printf("|%9.0f ", mflops); + } else + printf("| n/a "); + } else { + if (flops != -1) { + printf("N=%5d, %s %16s : %6.0f MFlops [t=%6.0f ns, %d runs]\n", N, + (cplx ? "CPLX" : "REAL"), name, mflops, + (t1 - t0) / 2 / max_iter * 1e9, max_iter); + } + } + fflush(stdout); +} + +int main(int argc, char* argv[]) { + /* + * Initialize Google's logging library. + */ + google::InitGoogleLogging(argv[0]); + + gflags::ParseCommandLineFlags(&argc, &argv, true); + /* + * Nvalues is a vector keeping the values by which iterates N, its value + * representing the input length. More concrete, N is the number of + * data points the caclulus is up to (determinating its accuracy). + * To show the performance of Fast-Fourier Transformations the program is + * testing for various values of N. + */ + int Nvalues[] = {64, 96, 128, 160, 192, 256, + 384, 5 * 96, 512, 5 * 128, 3 * 256, 800, + 1024, 2048, 2400, 4096, 8192, 9 * 1024, + 16384, 32768, 256 * 1024, 1024 * 1024, -1}; + int i; + + VLOG(1) << "Initializing sandbox...\n"; + + pffftSapiSandbox sandbox; + sandbox.Init().IgnoreError(); + + VLOG(1) << "Initialization: " << sandbox.Init().ToString().c_str() << "\n"; + + pffftApi api(&sandbox); + + int N, cplx; + + cplx = 0; + + do { + for (i = 0; i < 23; i++) { + N = Nvalues[i]; + + int Nfloat = N * (cplx ? 2 : 1); + int Nbytes = Nfloat * sizeof(float); + int pass; + + float ref[Nbytes], in[Nbytes], out[Nbytes], tmp[Nbytes], tmp2[Nbytes]; + + sapi::v::Array ref_(ref, Nbytes); + sapi::v::Array in_(in, Nbytes); + sapi::v::Array out_(out, Nbytes); + sapi::v::Array tmp_(tmp, Nbytes); + sapi::v::Array tmp2_(tmp2, Nbytes); + + float wrk[2 * Nbytes + 15 * sizeof(float)]; + sapi::v::Array wrk_(wrk, 2 * Nbytes + 15 * sizeof(float)); + + float ref_max = 0; + int k; + + Nfloat = (cplx ? N * 2 : N); + float X[Nbytes], Y[Nbytes], Z[Nbytes]; + sapi::v::Array X_(X, Nbytes), Y_(Y, Nbytes), Z_(Z, Nbytes); + + double t0, t1, flops; + + int max_iter = 5120000 / N * 4; +#ifdef __arm__ + max_iter /= 4; +#endif + int iter; + + for (k = 0; k < Nfloat; ++k) { + X[k] = 0; + } + + /* + * FFTPack benchmark + */ + { + /* + * SIMD_SZ == 4 (returning value of pffft_simd_size()) + */ + int max_iter_ = max_iter / 4; + + if (max_iter_ == 0) max_iter_ = 1; + if (cplx) { + api.cffti(N, wrk_.PtrBoth()).IgnoreError(); + } else { + api.rffti(N, wrk_.PtrBoth()).IgnoreError(); + } + t0 = uclock_sec(); + + for (iter = 0; iter < max_iter_; ++iter) { + if (cplx) { + api.cfftf(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); + api.cfftb(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); + } else { + api.rfftf(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); + api.rfftb(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); + } + } + t1 = uclock_sec(); + + flops = + (max_iter_ * 2) * ((cplx ? 5 : 2.5) * N * log((double)N) / M_LN2); + show_output("FFTPack", N, cplx, flops, t0, t1, max_iter_); + } + + /* + * PFFFT benchmark + */ + { + sapi::StatusOr s = + api.pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); + + VLOG(1) << "Setup status is: " << s.status().ToString().c_str() << "\n"; + + if (s.ok()) { + sapi::v::RemotePtr s_reg(s.value()); + + t0 = uclock_sec(); + for (iter = 0; iter < max_iter; ++iter) { + api.pffft_transform(&s_reg, X_.PtrBoth(), Z_.PtrBoth(), + Y_.PtrBoth(), PFFFT_FORWARD) + .IgnoreError(); + api.pffft_transform(&s_reg, X_.PtrBoth(), Z_.PtrBoth(), + Y_.PtrBoth(), PFFFT_FORWARD) + .IgnoreError(); + } + + t1 = uclock_sec(); + api.pffft_destroy_setup(&s_reg).IgnoreError(); + + flops = + (max_iter * 2) * ((cplx ? 5 : 2.5) * N * log((double)N) / M_LN2); + show_output("PFFFT", N, cplx, flops, t0, t1, max_iter); + } + + VLOG(1) << "N = " << N << " SUCCESSFULLY\n\n"; + } + } + + cplx = !cplx; + } while (cplx); + + return 0; +} \ No newline at end of file diff --git a/oss-internship-2020/pffft/pffft_library_notes.txt b/oss-internship-2020/pffft/pffft_library_notes.txt new file mode 100644 index 0000000..ee20b42 --- /dev/null +++ b/oss-internship-2020/pffft/pffft_library_notes.txt @@ -0,0 +1,416 @@ +PFFFT: a pretty fast FFT. + +TL;DR +-- + +PFFFT does 1D Fast Fourier Transforms, of single precision real and +complex vectors. It tries do it fast, it tries to be correct, and it +tries to be small. Computations do take advantage of SSE1 instructions +on x86 cpus, Altivec on powerpc cpus, and NEON on ARM cpus. The +license is BSD-like. + + +Why does it exist: +-- + +I was in search of a good performing FFT library , preferably very +small and with a very liberal license. + +When one says "fft library", FFTW ("Fastest Fourier Transform in the +West") is probably the first name that comes to mind -- I guess that +99% of open-source projects that need a FFT do use FFTW, and are happy +with it. However, it is quite a large library , which does everything +fft related (2d transforms, 3d transforms, other transformations such +as discrete cosine , or fast hartley). And it is licensed under the +GNU GPL , which means that it cannot be used in non open-source +products. + +An alternative to FFTW that is really small, is the venerable FFTPACK +v4, which is available on NETLIB. A more recent version (v5) exists, +but it is larger as it deals with multi-dimensional transforms. This +is a library that is written in FORTRAN 77, a language that is now +considered as a bit antiquated by many. FFTPACKv4 was written in 1985, +by Dr Paul Swarztrauber of NCAR, more than 25 years ago ! And despite +its age, benchmarks show it that it still a very good performing FFT +library, see for example the 1d single precision benchmarks here: +http://www.fftw.org/speed/opteron-2.2GHz-32bit/ . It is however not +competitive with the fastest ones, such as FFTW, Intel MKL, AMD ACML, +Apple vDSP. The reason for that is that those libraries do take +advantage of the SSE SIMD instructions available on Intel CPUs, +available since the days of the Pentium III. These instructions deal +with small vectors of 4 floats at a time, instead of a single float +for a traditionnal FPU, so when using these instructions one may expect +a 4-fold performance improvement. + +The idea was to take this fortran fftpack v4 code, translate to C, +modify it to deal with those SSE instructions, and check that the +final performance is not completely ridiculous when compared to other +SIMD FFT libraries. Translation to C was performed with f2c ( +http://www.netlib.org/f2c/ ). The resulting file was a bit edited in +order to remove the thousands of gotos that were introduced by +f2c. You will find the fftpack.h and fftpack.c sources in the +repository, this a complete translation of +http://www.netlib.org/fftpack/ , with the discrete cosine transform +and the test program. There is no license information in the netlib +repository, but it was confirmed to me by the fftpack v5 curators that +the same terms do apply to fftpack v4: +http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html . This is a +"BSD-like" license, it is compatible with proprietary projects. + +Adapting fftpack to deal with the SIMD 4-element vectors instead of +scalar single precision numbers was more complex than I originally +thought, especially with the real transforms, and I ended up writing +more code than I planned.. + + +The code: +-- + +Only two files, in good old C, pffft.c and pffft.h . The API is very +very simple, just make sure that you read the comments in pffft.h. + + +Comparison with other FFTs: +-- + +The idea was not to break speed records, but to get a decently fast +fft that is at least 50% as fast as the fastest FFT -- especially on +slowest computers . I'm more focused on getting the best performance +on slow cpus (Atom, Intel Core 1, old Athlons, ARM Cortex-A9...), than +on getting top performance on today fastest cpus. + +It can be used in a real-time context as the fft functions do not +perform any memory allocation -- that is why they accept a 'work' +array in their arguments. + +It is also a bit focused on performing 1D convolutions, that is why it +provides "unordered" FFTs , and a fourier domain convolution +operation. + + +Benchmark results (cpu tested: core i7 2600, core 2 quad, core 1 duo, atom N270, cortex-A9, cortex-A15, A8X) +-- + +The benchmark shows the performance of various fft implementations measured in +MFlops, with the number of floating point operations being defined as 5Nlog2(N) +for a length N complex fft, and 2.5*Nlog2(N) for a real fft. +See http://www.fftw.org/speed/method.html for an explanation of these formulas. + +MacOS Lion, gcc 4.2, 64-bit, fftw 3.3 on a 3.4 GHz core i7 2600 + +Built with: + + gcc-4.2 -o test_pffft -arch x86_64 -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -DHAVE_VECLIB -framework veclib -DHAVE_FFTW -lfftw3f + +| input len |real FFTPack| real vDSP | real FFTW | real PFFFT | |cplx FFTPack| cplx vDSP | cplx FFTW | cplx PFFFT | +|-----------+------------+------------+------------+------------| |------------+------------+------------+------------| +| 64 | 2816 | 8596 | 7329 | 8187 | | 2887 | 14898 | 14668 | 11108 | +| 96 | 3298 | n/a | 8378 | 7727 | | 3953 | n/a | 15680 | 10878 | +| 128 | 3507 | 11575 | 9266 | 10108 | | 4233 | 17598 | 16427 | 12000 | +| 160 | 3391 | n/a | 9838 | 10711 | | 4220 | n/a | 16653 | 11187 | +| 192 | 3919 | n/a | 9868 | 10956 | | 4297 | n/a | 15770 | 12540 | +| 256 | 4283 | 13179 | 10694 | 13128 | | 4545 | 19550 | 16350 | 13822 | +| 384 | 3136 | n/a | 10810 | 12061 | | 3600 | n/a | 16103 | 13240 | +| 480 | 3477 | n/a | 10632 | 12074 | | 3536 | n/a | 11630 | 12522 | +| 512 | 3783 | 15141 | 11267 | 13838 | | 3649 | 20002 | 16560 | 13580 | +| 640 | 3639 | n/a | 11164 | 13946 | | 3695 | n/a | 15416 | 13890 | +| 768 | 3800 | n/a | 11245 | 13495 | | 3590 | n/a | 15802 | 14552 | +| 800 | 3440 | n/a | 10499 | 13301 | | 3659 | n/a | 12056 | 13268 | +| 1024 | 3924 | 15605 | 11450 | 15339 | | 3769 | 20963 | 13941 | 15467 | +| 2048 | 4518 | 16195 | 11551 | 15532 | | 4258 | 20413 | 13723 | 15042 | +| 2400 | 4294 | n/a | 10685 | 13078 | | 4093 | n/a | 12777 | 13119 | +| 4096 | 4750 | 16596 | 11672 | 15817 | | 4157 | 19662 | 14316 | 14336 | +| 8192 | 3820 | 16227 | 11084 | 12555 | | 3691 | 18132 | 12102 | 13813 | +| 9216 | 3864 | n/a | 10254 | 12870 | | 3586 | n/a | 12119 | 13994 | +| 16384 | 3822 | 15123 | 10454 | 12822 | | 3613 | 16874 | 12370 | 13881 | +| 32768 | 4175 | 14512 | 10662 | 11095 | | 3881 | 14702 | 11619 | 11524 | +| 262144 | 3317 | 11429 | 6269 | 9517 | | 2810 | 11729 | 7757 | 10179 | +| 1048576 | 2913 | 10551 | 4730 | 5867 | | 2661 | 7881 | 3520 | 5350 | +|-----------+------------+------------+------------+------------| |------------+------------+------------+------------| + + +Debian 6, gcc 4.4.5, 64-bit, fftw 3.3.1 on a 3.4 GHz core i7 2600 + +Built with: +gcc -o test_pffft -DHAVE_FFTW -msse2 -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L$HOME/local/lib -I$HOME/local/include/ -lfftw3f -lm + +| N (input length) | real FFTPack | real FFTW | real PFFFT | | cplx FFTPack | cplx FFTW | cplx PFFFT | +|------------------+--------------+--------------+--------------| |--------------+--------------+--------------| +| 64 | 3840 | 7680 | 8777 | | 4389 | 20480 | 11171 | +| 96 | 4214 | 9633 | 8429 | | 4816 | 22477 | 11238 | +| 128 | 3584 | 10240 | 10240 | | 5120 | 23893 | 11947 | +| 192 | 4854 | 11095 | 12945 | | 4854 | 22191 | 14121 | +| 256 | 4096 | 11703 | 16384 | | 5120 | 23406 | 13653 | +| 384 | 4395 | 14651 | 12558 | | 4884 | 19535 | 14651 | +| 512 | 5760 | 13166 | 15360 | | 4608 | 23040 | 15360 | +| 768 | 4907 | 14020 | 16357 | | 4461 | 19628 | 14020 | +| 1024 | 5120 | 14629 | 14629 | | 5120 | 20480 | 15754 | +| 2048 | 5632 | 14080 | 18773 | | 4693 | 12516 | 16091 | +| 4096 | 5120 | 13653 | 17554 | | 4726 | 7680 | 14456 | +| 8192 | 4160 | 7396 | 13312 | | 4437 | 14791 | 13312 | +| 9216 | 4210 | 6124 | 13473 | | 4491 | 7282 | 14970 | +| 16384 | 3976 | 11010 | 14313 | | 4210 | 11450 | 13631 | +| 32768 | 4260 | 10224 | 10954 | | 4260 | 6816 | 11797 | +| 262144 | 3736 | 6896 | 9961 | | 2359 | 8965 | 9437 | +| 1048576 | 2796 | 4534 | 6453 | | 1864 | 3078 | 5592 | +|------------------+--------------+--------------+--------------| |--------------+--------------+--------------| + + + +MacOS Snow Leopard, gcc 4.0, 32-bit, fftw 3.3 on a 1.83 GHz core 1 duo + +Built with: + + gcc -o test_pffft -DHAVE_FFTW -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -framework veclib + +| input len |real FFTPack| real vDSP | real FFTW | real PFFFT | |cplx FFTPack| cplx vDSP | cplx FFTW | cplx PFFFT | +|-----------+------------+------------+------------+------------| |------------+------------+------------+------------| +| 64 | 745 | 2145 | 1706 | 2028 | | 961 | 3356 | 3313 | 2300 | +| 96 | 877 | n/a | 1976 | 1978 | | 1059 | n/a | 3333 | 2233 | +| 128 | 951 | 2808 | 2213 | 2279 | | 1202 | 3803 | 3739 | 2494 | +| 192 | 1002 | n/a | 2456 | 2429 | | 1186 | n/a | 3701 | 2508 | +| 256 | 1065 | 3205 | 2641 | 2793 | | 1302 | 4013 | 3912 | 2663 | +| 384 | 845 | n/a | 2759 | 2499 | | 948 | n/a | 3729 | 2504 | +| 512 | 900 | 3476 | 2956 | 2759 | | 974 | 4057 | 3954 | 2645 | +| 768 | 910 | n/a | 2912 | 2737 | | 975 | n/a | 3837 | 2614 | +| 1024 | 936 | 3583 | 3107 | 3009 | | 1006 | 4124 | 3821 | 2697 | +| 2048 | 1057 | 3585 | 3091 | 2837 | | 1089 | 3889 | 3701 | 2513 | +| 4096 | 1083 | 3524 | 3092 | 2733 | | 1039 | 3617 | 3462 | 2364 | +| 8192 | 874 | 3252 | 2967 | 2363 | | 911 | 3106 | 2789 | 2302 | +| 9216 | 898 | n/a | 2420 | 2290 | | 865 | n/a | 2676 | 2204 | +| 16384 | 903 | 2892 | 2506 | 2421 | | 899 | 3026 | 2797 | 2289 | +| 32768 | 965 | 2837 | 2550 | 2358 | | 920 | 2922 | 2763 | 2240 | +| 262144 | 738 | 2422 | 1589 | 1708 | | 610 | 2038 | 1436 | 1091 | +| 1048576 | 528 | 1207 | 845 | 880 | | 606 | 1020 | 669 | 1036 | +|-----------+------------+------------+------------+------------| |------------+------------+------------+------------| + + + +Ubuntu 11.04, gcc 4.5, 32-bit, fftw 3.2 on a 2.66 core 2 quad + +Built with: +gcc -o test_pffft -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm + +| input len |real FFTPack| real FFTW | real PFFFT | |cplx FFTPack| cplx FFTW | cplx PFFFT | +|-----------+------------+------------+------------| |------------+------------+------------| +| 64 | 1920 | 3614 | 5120 | | 2194 | 7680 | 6467 | +| 96 | 1873 | 3549 | 5187 | | 2107 | 8429 | 5863 | +| 128 | 2240 | 3773 | 5514 | | 2560 | 7964 | 6827 | +| 192 | 1765 | 4569 | 7767 | | 2284 | 9137 | 7061 | +| 256 | 2048 | 5461 | 7447 | | 2731 | 9638 | 7802 | +| 384 | 1998 | 5861 | 6762 | | 2313 | 9253 | 7644 | +| 512 | 2095 | 6144 | 7680 | | 2194 | 10240 | 7089 | +| 768 | 2230 | 5773 | 7549 | | 2045 | 10331 | 7010 | +| 1024 | 2133 | 6400 | 8533 | | 2133 | 10779 | 7877 | +| 2048 | 2011 | 7040 | 8665 | | 1942 | 10240 | 7768 | +| 4096 | 2194 | 6827 | 8777 | | 1755 | 9452 | 6827 | +| 8192 | 1849 | 6656 | 6656 | | 1752 | 7831 | 6827 | +| 9216 | 1871 | 5858 | 6416 | | 1643 | 6909 | 6266 | +| 16384 | 1883 | 6223 | 6506 | | 1664 | 7340 | 6982 | +| 32768 | 1826 | 6390 | 6667 | | 1631 | 7481 | 6971 | +| 262144 | 1546 | 4075 | 5977 | | 1299 | 3415 | 3551 | +| 1048576 | 1104 | 2071 | 1730 | | 1104 | 1149 | 1834 | +|-----------+------------+------------+------------| |------------+------------+------------| + + + +Ubuntu 11.04, gcc 4.5, 32-bit, fftw 3.3 on a 1.6 GHz Atom N270 + +Built with: +gcc -o test_pffft -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm + +| N (input length) | real FFTPack | real FFTW | real PFFFT | | cplx FFTPack | cplx FFTW | cplx PFFFT | +|------------------+--------------+--------------+--------------| |--------------+--------------+--------------| +| 64 | 452 | 1041 | 1336 | | 549 | 2318 | 1781 | +| 96 | 444 | 1297 | 1297 | | 503 | 2408 | 1686 | +| 128 | 527 | 1525 | 1707 | | 543 | 2655 | 1886 | +| 192 | 498 | 1653 | 1849 | | 539 | 2678 | 1942 | +| 256 | 585 | 1862 | 2156 | | 594 | 2777 | 2244 | +| 384 | 499 | 1870 | 1998 | | 511 | 2586 | 1890 | +| 512 | 562 | 2095 | 2194 | | 542 | 2973 | 2194 | +| 768 | 545 | 2045 | 2133 | | 545 | 2365 | 2133 | +| 1024 | 595 | 2133 | 2438 | | 569 | 2695 | 2179 | +| 2048 | 587 | 2125 | 2347 | | 521 | 2230 | 1707 | +| 4096 | 495 | 1890 | 1834 | | 492 | 1876 | 1672 | +| 8192 | 469 | 1548 | 1729 | | 438 | 1740 | 1664 | +| 9216 | 468 | 1663 | 1663 | | 446 | 1585 | 1531 | +| 16384 | 453 | 1608 | 1767 | | 398 | 1476 | 1664 | +| 32768 | 456 | 1420 | 1503 | | 387 | 1388 | 1345 | +| 262144 | 309 | 385 | 726 | | 262 | 415 | 840 | +| 1048576 | 280 | 351 | 739 | | 261 | 313 | 797 | +|------------------+--------------+--------------+--------------| |--------------+--------------+--------------| + + + +Windows 7, visual c++ 2010 on a 1.6 GHz Atom N270 + +Built with: +cl /Ox -D_USE_MATH_DEFINES /arch:SSE test_pffft.c pffft.c fftpack.c + +(visual c++ is definitively not very good with SSE intrinsics...) + +| N (input length) | real FFTPack | real PFFFT | | cplx FFTPack | cplx PFFFT | +|------------------+--------------+--------------| |--------------+--------------| +| 64 | 173 | 1009 | | 174 | 1159 | +| 96 | 169 | 1029 | | 188 | 1201 | +| 128 | 195 | 1242 | | 191 | 1275 | +| 192 | 178 | 1312 | | 184 | 1276 | +| 256 | 196 | 1591 | | 186 | 1281 | +| 384 | 172 | 1409 | | 181 | 1281 | +| 512 | 187 | 1640 | | 181 | 1313 | +| 768 | 171 | 1614 | | 176 | 1258 | +| 1024 | 186 | 1812 | | 178 | 1223 | +| 2048 | 190 | 1707 | | 186 | 1099 | +| 4096 | 182 | 1446 | | 177 | 975 | +| 8192 | 175 | 1345 | | 169 | 1034 | +| 9216 | 165 | 1271 | | 168 | 1023 | +| 16384 | 166 | 1396 | | 165 | 949 | +| 32768 | 172 | 1311 | | 161 | 881 | +| 262144 | 136 | 632 | | 134 | 629 | +| 1048576 | 134 | 698 | | 127 | 623 | +|------------------+--------------+--------------| |--------------+--------------| + + + +Ubuntu 12.04, gcc-4.7.3, 32-bit, with fftw 3.3.3 (built with --enable-neon), on a 1.2GHz ARM Cortex A9 (Tegra 3) + +Built with: +gcc-4.7 -O3 -DHAVE_FFTW -march=armv7-a -mtune=cortex-a9 -mfloat-abi=hard -mfpu=neon -ffast-math test_pffft.c pffft.c -o test_pffft_arm fftpack.c -lm -I/usr/local/include/ -L/usr/local/lib/ -lfftw3f + +| input len |real FFTPack| real FFTW | real PFFFT | |cplx FFTPack| cplx FFTW | cplx PFFFT | +|-----------+------------+------------+------------| |------------+------------+------------| +| 64 | 549 | 452 | 731 | | 512 | 602 | 640 | +| 96 | 421 | 272 | 702 | | 496 | 571 | 602 | +| 128 | 498 | 512 | 815 | | 597 | 618 | 652 | +| 160 | 521 | 536 | 815 | | 586 | 669 | 625 | +| 192 | 539 | 571 | 883 | | 485 | 597 | 626 | +| 256 | 640 | 539 | 975 | | 569 | 611 | 671 | +| 384 | 499 | 610 | 879 | | 499 | 602 | 637 | +| 480 | 518 | 507 | 877 | | 496 | 661 | 616 | +| 512 | 524 | 591 | 1002 | | 549 | 678 | 668 | +| 640 | 542 | 612 | 955 | | 568 | 663 | 645 | +| 768 | 557 | 613 | 981 | | 491 | 663 | 598 | +| 800 | 514 | 353 | 882 | | 514 | 360 | 574 | +| 1024 | 640 | 640 | 1067 | | 492 | 683 | 602 | +| 2048 | 587 | 640 | 908 | | 486 | 640 | 552 | +| 2400 | 479 | 368 | 777 | | 422 | 376 | 518 | +| 4096 | 511 | 614 | 853 | | 426 | 640 | 534 | +| 8192 | 415 | 584 | 708 | | 386 | 622 | 516 | +| 9216 | 419 | 571 | 687 | | 364 | 586 | 506 | +| 16384 | 426 | 577 | 716 | | 398 | 606 | 530 | +| 32768 | 417 | 572 | 673 | | 399 | 572 | 468 | +| 262144 | 219 | 380 | 293 | | 255 | 431 | 343 | +| 1048576 | 202 | 274 | 237 | | 265 | 282 | 355 | +|-----------+------------+------------+------------| |------------+------------+------------| + +Same platform as above, but this time pffft and fftpack are built with clang 3.2: + +clang -O3 -DHAVE_FFTW -march=armv7-a -mtune=cortex-a9 -mfloat-abi=hard -mfpu=neon -ffast-math test_pffft.c pffft.c -o test_pffft_arm fftpack.c -lm -I/usr/local/include/ -L/usr/local/lib/ -lfftw3f + +| input len |real FFTPack| real FFTW | real PFFFT | |cplx FFTPack| cplx FFTW | cplx PFFFT | +|-----------+------------+------------+------------| |------------+------------+------------| +| 64 | 427 | 452 | 853 | | 427 | 602 | 1024 | +| 96 | 351 | 276 | 843 | | 337 | 571 | 963 | +| 128 | 373 | 512 | 996 | | 390 | 618 | 1054 | +| 160 | 426 | 536 | 987 | | 375 | 669 | 914 | +| 192 | 404 | 571 | 1079 | | 388 | 588 | 1079 | +| 256 | 465 | 539 | 1205 | | 445 | 602 | 1170 | +| 384 | 366 | 610 | 1099 | | 343 | 594 | 1099 | +| 480 | 356 | 507 | 1140 | | 335 | 651 | 931 | +| 512 | 411 | 591 | 1213 | | 384 | 649 | 1124 | +| 640 | 398 | 612 | 1193 | | 373 | 654 | 901 | +| 768 | 409 | 613 | 1227 | | 383 | 663 | 1044 | +| 800 | 411 | 348 | 1073 | | 353 | 358 | 809 | +| 1024 | 427 | 640 | 1280 | | 413 | 692 | 1004 | +| 2048 | 414 | 626 | 1126 | | 371 | 640 | 853 | +| 2400 | 399 | 373 | 898 | | 319 | 368 | 653 | +| 4096 | 404 | 602 | 1059 | | 357 | 633 | 778 | +| 8192 | 332 | 584 | 792 | | 308 | 616 | 716 | +| 9216 | 322 | 561 | 783 | | 299 | 586 | 687 | +| 16384 | 344 | 568 | 778 | | 314 | 617 | 745 | +| 32768 | 342 | 564 | 737 | | 314 | 552 | 629 | +| 262144 | 201 | 383 | 313 | | 227 | 435 | 413 | +| 1048576 | 187 | 262 | 251 | | 228 | 281 | 409 | +|-----------+------------+------------+------------| |------------+------------+------------| + +So it looks like, on ARM, gcc 4.7 is the best at scalar floating point +(the fftpack performance numbers are better with gcc), while clang is +the best with neon intrinsics (see how pffft perf has improved with +clang 3.2). + + +NVIDIA Jetson TK1 board, gcc-4.8.2. The cpu is a 2.3GHz cortex A15 (Tegra K1). + +Built with: +gcc -O3 -march=armv7-a -mtune=native -mfloat-abi=hard -mfpu=neon -ffast-math test_pffft.c pffft.c -o test_pffft_arm fftpack.c -lm + +| input len |real FFTPack| real PFFFT | |cplx FFTPack| cplx PFFFT | +|-----------+------------+------------| |------------+------------| +| 64 | 1735 | 3308 | | 1994 | 3744 | +| 96 | 1596 | 3448 | | 1987 | 3572 | +| 128 | 1807 | 4076 | | 2255 | 3960 | +| 160 | 1769 | 4083 | | 2071 | 3845 | +| 192 | 1990 | 4233 | | 2017 | 3939 | +| 256 | 2191 | 4882 | | 2254 | 4346 | +| 384 | 1878 | 4492 | | 2073 | 4012 | +| 480 | 1748 | 4398 | | 1923 | 3951 | +| 512 | 2030 | 5064 | | 2267 | 4195 | +| 640 | 1918 | 4756 | | 2094 | 4184 | +| 768 | 2099 | 4907 | | 2048 | 4297 | +| 800 | 1822 | 4555 | | 1880 | 4063 | +| 1024 | 2232 | 5355 | | 2187 | 4420 | +| 2048 | 2176 | 4983 | | 2027 | 3602 | +| 2400 | 1741 | 4256 | | 1710 | 3344 | +| 4096 | 1816 | 3914 | | 1851 | 3349 | +| 8192 | 1716 | 3481 | | 1700 | 3255 | +| 9216 | 1735 | 3589 | | 1653 | 3094 | +| 16384 | 1567 | 3483 | | 1637 | 3244 | +| 32768 | 1624 | 3240 | | 1655 | 3156 | +| 262144 | 1012 | 1898 | | 983 | 1503 | +| 1048576 | 876 | 1154 | | 868 | 1341 | +|-----------+------------+------------| |------------+------------| + +The performance on the tegra K1 is pretty impressive. I'm not +including the FFTW numbers as they as slightly below the scalar +fftpack numbers, so something must be wrong (however it seems to be +correctly configured and is using neon simd instructions). + +When using clang 3.4 the pffft version is even a bit faster, reaching +5.7 GFlops for real ffts of size 1024. + + +iPad Air 2 with iOS9, xcode 8.0, arm64. The cpu is an Apple A8X, supposedly running at 1.5GHz. + +| input len |real FFTPack| real vDSP | real PFFFT | |cplx FFTPack| cplx vDSP | cplx PFFFT | +|-----------+------------+------------+------------| |------------+------------+------------| +| 64 | 2517 | 7995 | 6086 | | 2725 | 13006 | 8495 | +| 96 | 2442 | n/a | 6691 | | 2256 | n/a | 7991 | +| 128 | 2664 | 10186 | 7877 | | 2575 | 15115 | 9115 | +| 160 | 2638 | n/a | 8283 | | 2682 | n/a | 8806 | +| 192 | 2903 | n/a | 9083 | | 2634 | n/a | 8980 | +| 256 | 3184 | 11452 | 10039 | | 3026 | 15410 | 10199 | +| 384 | 2665 | n/a | 10100 | | 2275 | n/a | 9247 | +| 480 | 2546 | n/a | 9863 | | 2341 | n/a | 8892 | +| 512 | 2832 | 12197 | 10989 | | 2547 | 16768 | 10154 | +| 640 | 2755 | n/a | 10461 | | 2569 | n/a | 9666 | +| 768 | 2998 | n/a | 11355 | | 2585 | n/a | 9813 | +| 800 | 2516 | n/a | 10332 | | 2433 | n/a | 9164 | +| 1024 | 3109 | 12965 | 12114 | | 2869 | 16448 | 10519 | +| 2048 | 3027 | 12996 | 12023 | | 2648 | 17304 | 10307 | +| 2400 | 2515 | n/a | 10372 | | 2355 | n/a | 8443 | +| 4096 | 3204 | 13603 | 12359 | | 2814 | 16570 | 9780 | +| 8192 | 2759 | 13422 | 10824 | | 2153 | 15652 | 7884 | +| 9216 | 2700 | n/a | 9938 | | 2241 | n/a | 7900 | +| 16384 | 2280 | 13057 | 7976 | | 593 | 4272 | 2534 | +| 32768 | 768 | 4269 | 2882 | | 606 | 4405 | 2604 | +| 262144 | 724 | 3527 | 2630 | | 534 | 2418 | 2157 | +| 1048576 | 674 | 1467 | 2135 | | 530 | 1621 | 2055 | +|-----------+------------+------------+------------| |------------+------------+------------| + +I double-checked to make sure I did not make a mistake in the time +measurements, as the numbers are much higher than what I initially +expected. They are in fact higher than the number I get on the 2.8GHz +Xeon of my 2008 mac pro.. (except for FFT lengths >= 32768 where +having a big cache is useful). A good surprise is also that the perf +is not too far from apple's vDSP (at least for the real FFT). + From fc6e9e82c60987271cbb286327ea6dcf2ca4656d Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Thu, 20 Aug 2020 11:24:19 +0000 Subject: [PATCH 14/42] Unnecessary files removed --- oss-internship-2020/pffft/CMakeLists.txt | 4 +- oss-internship-2020/pffft/README.txt | 32 -- oss-internship-2020/pffft/README_pffft.txt | 416 ----------------- oss-internship-2020/pffft/myNotes.txt | 101 ----- oss-internship-2020/pffft/test_pffft.c | 419 ------------------ .../pffft/test_pffft_sandboxed.cc | 206 --------- 6 files changed, 2 insertions(+), 1176 deletions(-) delete mode 100644 oss-internship-2020/pffft/README.txt delete mode 100644 oss-internship-2020/pffft/README_pffft.txt delete mode 100644 oss-internship-2020/pffft/myNotes.txt delete mode 100644 oss-internship-2020/pffft/test_pffft.c delete mode 100644 oss-internship-2020/pffft/test_pffft_sandboxed.cc diff --git a/oss-internship-2020/pffft/CMakeLists.txt b/oss-internship-2020/pffft/CMakeLists.txt index a9ba38a..6c7e776 100644 --- a/oss-internship-2020/pffft/CMakeLists.txt +++ b/oss-internship-2020/pffft/CMakeLists.txt @@ -13,7 +13,7 @@ add_library(pffft STATIC ) add_executable(pffft_main - test_pffft.c + main_pffft.c ) target_link_libraries(pffft_main PRIVATE @@ -82,7 +82,7 @@ target_include_directories(pffft_sapi INTERFACE ) add_executable(pffft_sandboxed - test_pffft_sandboxed.cc + main_pffft_sandboxed.cc ) target_link_libraries(pffft_sandboxed PRIVATE diff --git a/oss-internship-2020/pffft/README.txt b/oss-internship-2020/pffft/README.txt deleted file mode 100644 index c7ac70b..0000000 --- a/oss-internship-2020/pffft/README.txt +++ /dev/null @@ -1,32 +0,0 @@ -Sandboxing PFFFT library - -Builder: CMake - -For testing: -`cd build`, then `./pffft_sandboxed` - -For debug: -`SAPI_VLOG_LEVEL=1 ./pffft_sandboxed --v=100 ---sandbox2_danger_danger_permit_all_and_log ` - -CMake observations: - * linking pffft and fftpack (which contains necessary functions for pffft) - * set math library - -Sandboxed main observations: - * containing two testing parts (fft / pffft benchmarks) - ! current stage: fft - works :) - pffft - implemented - * (Solved) pffft benchmark bug: "Sandbox not active" - N = 64, status OK, pffft_transform generates error - N > 64, status not OK - Problem on initialising sapi::StatusOr s; - the memory that stays for s is not the same with the address passed - in pffft_transform function. - (sapi :: v :: GenericPtr - to be changed) - - Temporary solution: change the generated files to accept - uintptr_t instead of PFFFT_Setup - - Solution: using "sapi :: v :: RemotePtr" instead of "sapi :: v :: GenericPtr" - to access the memory of object s diff --git a/oss-internship-2020/pffft/README_pffft.txt b/oss-internship-2020/pffft/README_pffft.txt deleted file mode 100644 index ee20b42..0000000 --- a/oss-internship-2020/pffft/README_pffft.txt +++ /dev/null @@ -1,416 +0,0 @@ -PFFFT: a pretty fast FFT. - -TL;DR --- - -PFFFT does 1D Fast Fourier Transforms, of single precision real and -complex vectors. It tries do it fast, it tries to be correct, and it -tries to be small. Computations do take advantage of SSE1 instructions -on x86 cpus, Altivec on powerpc cpus, and NEON on ARM cpus. The -license is BSD-like. - - -Why does it exist: --- - -I was in search of a good performing FFT library , preferably very -small and with a very liberal license. - -When one says "fft library", FFTW ("Fastest Fourier Transform in the -West") is probably the first name that comes to mind -- I guess that -99% of open-source projects that need a FFT do use FFTW, and are happy -with it. However, it is quite a large library , which does everything -fft related (2d transforms, 3d transforms, other transformations such -as discrete cosine , or fast hartley). And it is licensed under the -GNU GPL , which means that it cannot be used in non open-source -products. - -An alternative to FFTW that is really small, is the venerable FFTPACK -v4, which is available on NETLIB. A more recent version (v5) exists, -but it is larger as it deals with multi-dimensional transforms. This -is a library that is written in FORTRAN 77, a language that is now -considered as a bit antiquated by many. FFTPACKv4 was written in 1985, -by Dr Paul Swarztrauber of NCAR, more than 25 years ago ! And despite -its age, benchmarks show it that it still a very good performing FFT -library, see for example the 1d single precision benchmarks here: -http://www.fftw.org/speed/opteron-2.2GHz-32bit/ . It is however not -competitive with the fastest ones, such as FFTW, Intel MKL, AMD ACML, -Apple vDSP. The reason for that is that those libraries do take -advantage of the SSE SIMD instructions available on Intel CPUs, -available since the days of the Pentium III. These instructions deal -with small vectors of 4 floats at a time, instead of a single float -for a traditionnal FPU, so when using these instructions one may expect -a 4-fold performance improvement. - -The idea was to take this fortran fftpack v4 code, translate to C, -modify it to deal with those SSE instructions, and check that the -final performance is not completely ridiculous when compared to other -SIMD FFT libraries. Translation to C was performed with f2c ( -http://www.netlib.org/f2c/ ). The resulting file was a bit edited in -order to remove the thousands of gotos that were introduced by -f2c. You will find the fftpack.h and fftpack.c sources in the -repository, this a complete translation of -http://www.netlib.org/fftpack/ , with the discrete cosine transform -and the test program. There is no license information in the netlib -repository, but it was confirmed to me by the fftpack v5 curators that -the same terms do apply to fftpack v4: -http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html . This is a -"BSD-like" license, it is compatible with proprietary projects. - -Adapting fftpack to deal with the SIMD 4-element vectors instead of -scalar single precision numbers was more complex than I originally -thought, especially with the real transforms, and I ended up writing -more code than I planned.. - - -The code: --- - -Only two files, in good old C, pffft.c and pffft.h . The API is very -very simple, just make sure that you read the comments in pffft.h. - - -Comparison with other FFTs: --- - -The idea was not to break speed records, but to get a decently fast -fft that is at least 50% as fast as the fastest FFT -- especially on -slowest computers . I'm more focused on getting the best performance -on slow cpus (Atom, Intel Core 1, old Athlons, ARM Cortex-A9...), than -on getting top performance on today fastest cpus. - -It can be used in a real-time context as the fft functions do not -perform any memory allocation -- that is why they accept a 'work' -array in their arguments. - -It is also a bit focused on performing 1D convolutions, that is why it -provides "unordered" FFTs , and a fourier domain convolution -operation. - - -Benchmark results (cpu tested: core i7 2600, core 2 quad, core 1 duo, atom N270, cortex-A9, cortex-A15, A8X) --- - -The benchmark shows the performance of various fft implementations measured in -MFlops, with the number of floating point operations being defined as 5Nlog2(N) -for a length N complex fft, and 2.5*Nlog2(N) for a real fft. -See http://www.fftw.org/speed/method.html for an explanation of these formulas. - -MacOS Lion, gcc 4.2, 64-bit, fftw 3.3 on a 3.4 GHz core i7 2600 - -Built with: - - gcc-4.2 -o test_pffft -arch x86_64 -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -DHAVE_VECLIB -framework veclib -DHAVE_FFTW -lfftw3f - -| input len |real FFTPack| real vDSP | real FFTW | real PFFFT | |cplx FFTPack| cplx vDSP | cplx FFTW | cplx PFFFT | -|-----------+------------+------------+------------+------------| |------------+------------+------------+------------| -| 64 | 2816 | 8596 | 7329 | 8187 | | 2887 | 14898 | 14668 | 11108 | -| 96 | 3298 | n/a | 8378 | 7727 | | 3953 | n/a | 15680 | 10878 | -| 128 | 3507 | 11575 | 9266 | 10108 | | 4233 | 17598 | 16427 | 12000 | -| 160 | 3391 | n/a | 9838 | 10711 | | 4220 | n/a | 16653 | 11187 | -| 192 | 3919 | n/a | 9868 | 10956 | | 4297 | n/a | 15770 | 12540 | -| 256 | 4283 | 13179 | 10694 | 13128 | | 4545 | 19550 | 16350 | 13822 | -| 384 | 3136 | n/a | 10810 | 12061 | | 3600 | n/a | 16103 | 13240 | -| 480 | 3477 | n/a | 10632 | 12074 | | 3536 | n/a | 11630 | 12522 | -| 512 | 3783 | 15141 | 11267 | 13838 | | 3649 | 20002 | 16560 | 13580 | -| 640 | 3639 | n/a | 11164 | 13946 | | 3695 | n/a | 15416 | 13890 | -| 768 | 3800 | n/a | 11245 | 13495 | | 3590 | n/a | 15802 | 14552 | -| 800 | 3440 | n/a | 10499 | 13301 | | 3659 | n/a | 12056 | 13268 | -| 1024 | 3924 | 15605 | 11450 | 15339 | | 3769 | 20963 | 13941 | 15467 | -| 2048 | 4518 | 16195 | 11551 | 15532 | | 4258 | 20413 | 13723 | 15042 | -| 2400 | 4294 | n/a | 10685 | 13078 | | 4093 | n/a | 12777 | 13119 | -| 4096 | 4750 | 16596 | 11672 | 15817 | | 4157 | 19662 | 14316 | 14336 | -| 8192 | 3820 | 16227 | 11084 | 12555 | | 3691 | 18132 | 12102 | 13813 | -| 9216 | 3864 | n/a | 10254 | 12870 | | 3586 | n/a | 12119 | 13994 | -| 16384 | 3822 | 15123 | 10454 | 12822 | | 3613 | 16874 | 12370 | 13881 | -| 32768 | 4175 | 14512 | 10662 | 11095 | | 3881 | 14702 | 11619 | 11524 | -| 262144 | 3317 | 11429 | 6269 | 9517 | | 2810 | 11729 | 7757 | 10179 | -| 1048576 | 2913 | 10551 | 4730 | 5867 | | 2661 | 7881 | 3520 | 5350 | -|-----------+------------+------------+------------+------------| |------------+------------+------------+------------| - - -Debian 6, gcc 4.4.5, 64-bit, fftw 3.3.1 on a 3.4 GHz core i7 2600 - -Built with: -gcc -o test_pffft -DHAVE_FFTW -msse2 -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L$HOME/local/lib -I$HOME/local/include/ -lfftw3f -lm - -| N (input length) | real FFTPack | real FFTW | real PFFFT | | cplx FFTPack | cplx FFTW | cplx PFFFT | -|------------------+--------------+--------------+--------------| |--------------+--------------+--------------| -| 64 | 3840 | 7680 | 8777 | | 4389 | 20480 | 11171 | -| 96 | 4214 | 9633 | 8429 | | 4816 | 22477 | 11238 | -| 128 | 3584 | 10240 | 10240 | | 5120 | 23893 | 11947 | -| 192 | 4854 | 11095 | 12945 | | 4854 | 22191 | 14121 | -| 256 | 4096 | 11703 | 16384 | | 5120 | 23406 | 13653 | -| 384 | 4395 | 14651 | 12558 | | 4884 | 19535 | 14651 | -| 512 | 5760 | 13166 | 15360 | | 4608 | 23040 | 15360 | -| 768 | 4907 | 14020 | 16357 | | 4461 | 19628 | 14020 | -| 1024 | 5120 | 14629 | 14629 | | 5120 | 20480 | 15754 | -| 2048 | 5632 | 14080 | 18773 | | 4693 | 12516 | 16091 | -| 4096 | 5120 | 13653 | 17554 | | 4726 | 7680 | 14456 | -| 8192 | 4160 | 7396 | 13312 | | 4437 | 14791 | 13312 | -| 9216 | 4210 | 6124 | 13473 | | 4491 | 7282 | 14970 | -| 16384 | 3976 | 11010 | 14313 | | 4210 | 11450 | 13631 | -| 32768 | 4260 | 10224 | 10954 | | 4260 | 6816 | 11797 | -| 262144 | 3736 | 6896 | 9961 | | 2359 | 8965 | 9437 | -| 1048576 | 2796 | 4534 | 6453 | | 1864 | 3078 | 5592 | -|------------------+--------------+--------------+--------------| |--------------+--------------+--------------| - - - -MacOS Snow Leopard, gcc 4.0, 32-bit, fftw 3.3 on a 1.83 GHz core 1 duo - -Built with: - - gcc -o test_pffft -DHAVE_FFTW -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -framework veclib - -| input len |real FFTPack| real vDSP | real FFTW | real PFFFT | |cplx FFTPack| cplx vDSP | cplx FFTW | cplx PFFFT | -|-----------+------------+------------+------------+------------| |------------+------------+------------+------------| -| 64 | 745 | 2145 | 1706 | 2028 | | 961 | 3356 | 3313 | 2300 | -| 96 | 877 | n/a | 1976 | 1978 | | 1059 | n/a | 3333 | 2233 | -| 128 | 951 | 2808 | 2213 | 2279 | | 1202 | 3803 | 3739 | 2494 | -| 192 | 1002 | n/a | 2456 | 2429 | | 1186 | n/a | 3701 | 2508 | -| 256 | 1065 | 3205 | 2641 | 2793 | | 1302 | 4013 | 3912 | 2663 | -| 384 | 845 | n/a | 2759 | 2499 | | 948 | n/a | 3729 | 2504 | -| 512 | 900 | 3476 | 2956 | 2759 | | 974 | 4057 | 3954 | 2645 | -| 768 | 910 | n/a | 2912 | 2737 | | 975 | n/a | 3837 | 2614 | -| 1024 | 936 | 3583 | 3107 | 3009 | | 1006 | 4124 | 3821 | 2697 | -| 2048 | 1057 | 3585 | 3091 | 2837 | | 1089 | 3889 | 3701 | 2513 | -| 4096 | 1083 | 3524 | 3092 | 2733 | | 1039 | 3617 | 3462 | 2364 | -| 8192 | 874 | 3252 | 2967 | 2363 | | 911 | 3106 | 2789 | 2302 | -| 9216 | 898 | n/a | 2420 | 2290 | | 865 | n/a | 2676 | 2204 | -| 16384 | 903 | 2892 | 2506 | 2421 | | 899 | 3026 | 2797 | 2289 | -| 32768 | 965 | 2837 | 2550 | 2358 | | 920 | 2922 | 2763 | 2240 | -| 262144 | 738 | 2422 | 1589 | 1708 | | 610 | 2038 | 1436 | 1091 | -| 1048576 | 528 | 1207 | 845 | 880 | | 606 | 1020 | 669 | 1036 | -|-----------+------------+------------+------------+------------| |------------+------------+------------+------------| - - - -Ubuntu 11.04, gcc 4.5, 32-bit, fftw 3.2 on a 2.66 core 2 quad - -Built with: -gcc -o test_pffft -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm - -| input len |real FFTPack| real FFTW | real PFFFT | |cplx FFTPack| cplx FFTW | cplx PFFFT | -|-----------+------------+------------+------------| |------------+------------+------------| -| 64 | 1920 | 3614 | 5120 | | 2194 | 7680 | 6467 | -| 96 | 1873 | 3549 | 5187 | | 2107 | 8429 | 5863 | -| 128 | 2240 | 3773 | 5514 | | 2560 | 7964 | 6827 | -| 192 | 1765 | 4569 | 7767 | | 2284 | 9137 | 7061 | -| 256 | 2048 | 5461 | 7447 | | 2731 | 9638 | 7802 | -| 384 | 1998 | 5861 | 6762 | | 2313 | 9253 | 7644 | -| 512 | 2095 | 6144 | 7680 | | 2194 | 10240 | 7089 | -| 768 | 2230 | 5773 | 7549 | | 2045 | 10331 | 7010 | -| 1024 | 2133 | 6400 | 8533 | | 2133 | 10779 | 7877 | -| 2048 | 2011 | 7040 | 8665 | | 1942 | 10240 | 7768 | -| 4096 | 2194 | 6827 | 8777 | | 1755 | 9452 | 6827 | -| 8192 | 1849 | 6656 | 6656 | | 1752 | 7831 | 6827 | -| 9216 | 1871 | 5858 | 6416 | | 1643 | 6909 | 6266 | -| 16384 | 1883 | 6223 | 6506 | | 1664 | 7340 | 6982 | -| 32768 | 1826 | 6390 | 6667 | | 1631 | 7481 | 6971 | -| 262144 | 1546 | 4075 | 5977 | | 1299 | 3415 | 3551 | -| 1048576 | 1104 | 2071 | 1730 | | 1104 | 1149 | 1834 | -|-----------+------------+------------+------------| |------------+------------+------------| - - - -Ubuntu 11.04, gcc 4.5, 32-bit, fftw 3.3 on a 1.6 GHz Atom N270 - -Built with: -gcc -o test_pffft -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm - -| N (input length) | real FFTPack | real FFTW | real PFFFT | | cplx FFTPack | cplx FFTW | cplx PFFFT | -|------------------+--------------+--------------+--------------| |--------------+--------------+--------------| -| 64 | 452 | 1041 | 1336 | | 549 | 2318 | 1781 | -| 96 | 444 | 1297 | 1297 | | 503 | 2408 | 1686 | -| 128 | 527 | 1525 | 1707 | | 543 | 2655 | 1886 | -| 192 | 498 | 1653 | 1849 | | 539 | 2678 | 1942 | -| 256 | 585 | 1862 | 2156 | | 594 | 2777 | 2244 | -| 384 | 499 | 1870 | 1998 | | 511 | 2586 | 1890 | -| 512 | 562 | 2095 | 2194 | | 542 | 2973 | 2194 | -| 768 | 545 | 2045 | 2133 | | 545 | 2365 | 2133 | -| 1024 | 595 | 2133 | 2438 | | 569 | 2695 | 2179 | -| 2048 | 587 | 2125 | 2347 | | 521 | 2230 | 1707 | -| 4096 | 495 | 1890 | 1834 | | 492 | 1876 | 1672 | -| 8192 | 469 | 1548 | 1729 | | 438 | 1740 | 1664 | -| 9216 | 468 | 1663 | 1663 | | 446 | 1585 | 1531 | -| 16384 | 453 | 1608 | 1767 | | 398 | 1476 | 1664 | -| 32768 | 456 | 1420 | 1503 | | 387 | 1388 | 1345 | -| 262144 | 309 | 385 | 726 | | 262 | 415 | 840 | -| 1048576 | 280 | 351 | 739 | | 261 | 313 | 797 | -|------------------+--------------+--------------+--------------| |--------------+--------------+--------------| - - - -Windows 7, visual c++ 2010 on a 1.6 GHz Atom N270 - -Built with: -cl /Ox -D_USE_MATH_DEFINES /arch:SSE test_pffft.c pffft.c fftpack.c - -(visual c++ is definitively not very good with SSE intrinsics...) - -| N (input length) | real FFTPack | real PFFFT | | cplx FFTPack | cplx PFFFT | -|------------------+--------------+--------------| |--------------+--------------| -| 64 | 173 | 1009 | | 174 | 1159 | -| 96 | 169 | 1029 | | 188 | 1201 | -| 128 | 195 | 1242 | | 191 | 1275 | -| 192 | 178 | 1312 | | 184 | 1276 | -| 256 | 196 | 1591 | | 186 | 1281 | -| 384 | 172 | 1409 | | 181 | 1281 | -| 512 | 187 | 1640 | | 181 | 1313 | -| 768 | 171 | 1614 | | 176 | 1258 | -| 1024 | 186 | 1812 | | 178 | 1223 | -| 2048 | 190 | 1707 | | 186 | 1099 | -| 4096 | 182 | 1446 | | 177 | 975 | -| 8192 | 175 | 1345 | | 169 | 1034 | -| 9216 | 165 | 1271 | | 168 | 1023 | -| 16384 | 166 | 1396 | | 165 | 949 | -| 32768 | 172 | 1311 | | 161 | 881 | -| 262144 | 136 | 632 | | 134 | 629 | -| 1048576 | 134 | 698 | | 127 | 623 | -|------------------+--------------+--------------| |--------------+--------------| - - - -Ubuntu 12.04, gcc-4.7.3, 32-bit, with fftw 3.3.3 (built with --enable-neon), on a 1.2GHz ARM Cortex A9 (Tegra 3) - -Built with: -gcc-4.7 -O3 -DHAVE_FFTW -march=armv7-a -mtune=cortex-a9 -mfloat-abi=hard -mfpu=neon -ffast-math test_pffft.c pffft.c -o test_pffft_arm fftpack.c -lm -I/usr/local/include/ -L/usr/local/lib/ -lfftw3f - -| input len |real FFTPack| real FFTW | real PFFFT | |cplx FFTPack| cplx FFTW | cplx PFFFT | -|-----------+------------+------------+------------| |------------+------------+------------| -| 64 | 549 | 452 | 731 | | 512 | 602 | 640 | -| 96 | 421 | 272 | 702 | | 496 | 571 | 602 | -| 128 | 498 | 512 | 815 | | 597 | 618 | 652 | -| 160 | 521 | 536 | 815 | | 586 | 669 | 625 | -| 192 | 539 | 571 | 883 | | 485 | 597 | 626 | -| 256 | 640 | 539 | 975 | | 569 | 611 | 671 | -| 384 | 499 | 610 | 879 | | 499 | 602 | 637 | -| 480 | 518 | 507 | 877 | | 496 | 661 | 616 | -| 512 | 524 | 591 | 1002 | | 549 | 678 | 668 | -| 640 | 542 | 612 | 955 | | 568 | 663 | 645 | -| 768 | 557 | 613 | 981 | | 491 | 663 | 598 | -| 800 | 514 | 353 | 882 | | 514 | 360 | 574 | -| 1024 | 640 | 640 | 1067 | | 492 | 683 | 602 | -| 2048 | 587 | 640 | 908 | | 486 | 640 | 552 | -| 2400 | 479 | 368 | 777 | | 422 | 376 | 518 | -| 4096 | 511 | 614 | 853 | | 426 | 640 | 534 | -| 8192 | 415 | 584 | 708 | | 386 | 622 | 516 | -| 9216 | 419 | 571 | 687 | | 364 | 586 | 506 | -| 16384 | 426 | 577 | 716 | | 398 | 606 | 530 | -| 32768 | 417 | 572 | 673 | | 399 | 572 | 468 | -| 262144 | 219 | 380 | 293 | | 255 | 431 | 343 | -| 1048576 | 202 | 274 | 237 | | 265 | 282 | 355 | -|-----------+------------+------------+------------| |------------+------------+------------| - -Same platform as above, but this time pffft and fftpack are built with clang 3.2: - -clang -O3 -DHAVE_FFTW -march=armv7-a -mtune=cortex-a9 -mfloat-abi=hard -mfpu=neon -ffast-math test_pffft.c pffft.c -o test_pffft_arm fftpack.c -lm -I/usr/local/include/ -L/usr/local/lib/ -lfftw3f - -| input len |real FFTPack| real FFTW | real PFFFT | |cplx FFTPack| cplx FFTW | cplx PFFFT | -|-----------+------------+------------+------------| |------------+------------+------------| -| 64 | 427 | 452 | 853 | | 427 | 602 | 1024 | -| 96 | 351 | 276 | 843 | | 337 | 571 | 963 | -| 128 | 373 | 512 | 996 | | 390 | 618 | 1054 | -| 160 | 426 | 536 | 987 | | 375 | 669 | 914 | -| 192 | 404 | 571 | 1079 | | 388 | 588 | 1079 | -| 256 | 465 | 539 | 1205 | | 445 | 602 | 1170 | -| 384 | 366 | 610 | 1099 | | 343 | 594 | 1099 | -| 480 | 356 | 507 | 1140 | | 335 | 651 | 931 | -| 512 | 411 | 591 | 1213 | | 384 | 649 | 1124 | -| 640 | 398 | 612 | 1193 | | 373 | 654 | 901 | -| 768 | 409 | 613 | 1227 | | 383 | 663 | 1044 | -| 800 | 411 | 348 | 1073 | | 353 | 358 | 809 | -| 1024 | 427 | 640 | 1280 | | 413 | 692 | 1004 | -| 2048 | 414 | 626 | 1126 | | 371 | 640 | 853 | -| 2400 | 399 | 373 | 898 | | 319 | 368 | 653 | -| 4096 | 404 | 602 | 1059 | | 357 | 633 | 778 | -| 8192 | 332 | 584 | 792 | | 308 | 616 | 716 | -| 9216 | 322 | 561 | 783 | | 299 | 586 | 687 | -| 16384 | 344 | 568 | 778 | | 314 | 617 | 745 | -| 32768 | 342 | 564 | 737 | | 314 | 552 | 629 | -| 262144 | 201 | 383 | 313 | | 227 | 435 | 413 | -| 1048576 | 187 | 262 | 251 | | 228 | 281 | 409 | -|-----------+------------+------------+------------| |------------+------------+------------| - -So it looks like, on ARM, gcc 4.7 is the best at scalar floating point -(the fftpack performance numbers are better with gcc), while clang is -the best with neon intrinsics (see how pffft perf has improved with -clang 3.2). - - -NVIDIA Jetson TK1 board, gcc-4.8.2. The cpu is a 2.3GHz cortex A15 (Tegra K1). - -Built with: -gcc -O3 -march=armv7-a -mtune=native -mfloat-abi=hard -mfpu=neon -ffast-math test_pffft.c pffft.c -o test_pffft_arm fftpack.c -lm - -| input len |real FFTPack| real PFFFT | |cplx FFTPack| cplx PFFFT | -|-----------+------------+------------| |------------+------------| -| 64 | 1735 | 3308 | | 1994 | 3744 | -| 96 | 1596 | 3448 | | 1987 | 3572 | -| 128 | 1807 | 4076 | | 2255 | 3960 | -| 160 | 1769 | 4083 | | 2071 | 3845 | -| 192 | 1990 | 4233 | | 2017 | 3939 | -| 256 | 2191 | 4882 | | 2254 | 4346 | -| 384 | 1878 | 4492 | | 2073 | 4012 | -| 480 | 1748 | 4398 | | 1923 | 3951 | -| 512 | 2030 | 5064 | | 2267 | 4195 | -| 640 | 1918 | 4756 | | 2094 | 4184 | -| 768 | 2099 | 4907 | | 2048 | 4297 | -| 800 | 1822 | 4555 | | 1880 | 4063 | -| 1024 | 2232 | 5355 | | 2187 | 4420 | -| 2048 | 2176 | 4983 | | 2027 | 3602 | -| 2400 | 1741 | 4256 | | 1710 | 3344 | -| 4096 | 1816 | 3914 | | 1851 | 3349 | -| 8192 | 1716 | 3481 | | 1700 | 3255 | -| 9216 | 1735 | 3589 | | 1653 | 3094 | -| 16384 | 1567 | 3483 | | 1637 | 3244 | -| 32768 | 1624 | 3240 | | 1655 | 3156 | -| 262144 | 1012 | 1898 | | 983 | 1503 | -| 1048576 | 876 | 1154 | | 868 | 1341 | -|-----------+------------+------------| |------------+------------| - -The performance on the tegra K1 is pretty impressive. I'm not -including the FFTW numbers as they as slightly below the scalar -fftpack numbers, so something must be wrong (however it seems to be -correctly configured and is using neon simd instructions). - -When using clang 3.4 the pffft version is even a bit faster, reaching -5.7 GFlops for real ffts of size 1024. - - -iPad Air 2 with iOS9, xcode 8.0, arm64. The cpu is an Apple A8X, supposedly running at 1.5GHz. - -| input len |real FFTPack| real vDSP | real PFFFT | |cplx FFTPack| cplx vDSP | cplx PFFFT | -|-----------+------------+------------+------------| |------------+------------+------------| -| 64 | 2517 | 7995 | 6086 | | 2725 | 13006 | 8495 | -| 96 | 2442 | n/a | 6691 | | 2256 | n/a | 7991 | -| 128 | 2664 | 10186 | 7877 | | 2575 | 15115 | 9115 | -| 160 | 2638 | n/a | 8283 | | 2682 | n/a | 8806 | -| 192 | 2903 | n/a | 9083 | | 2634 | n/a | 8980 | -| 256 | 3184 | 11452 | 10039 | | 3026 | 15410 | 10199 | -| 384 | 2665 | n/a | 10100 | | 2275 | n/a | 9247 | -| 480 | 2546 | n/a | 9863 | | 2341 | n/a | 8892 | -| 512 | 2832 | 12197 | 10989 | | 2547 | 16768 | 10154 | -| 640 | 2755 | n/a | 10461 | | 2569 | n/a | 9666 | -| 768 | 2998 | n/a | 11355 | | 2585 | n/a | 9813 | -| 800 | 2516 | n/a | 10332 | | 2433 | n/a | 9164 | -| 1024 | 3109 | 12965 | 12114 | | 2869 | 16448 | 10519 | -| 2048 | 3027 | 12996 | 12023 | | 2648 | 17304 | 10307 | -| 2400 | 2515 | n/a | 10372 | | 2355 | n/a | 8443 | -| 4096 | 3204 | 13603 | 12359 | | 2814 | 16570 | 9780 | -| 8192 | 2759 | 13422 | 10824 | | 2153 | 15652 | 7884 | -| 9216 | 2700 | n/a | 9938 | | 2241 | n/a | 7900 | -| 16384 | 2280 | 13057 | 7976 | | 593 | 4272 | 2534 | -| 32768 | 768 | 4269 | 2882 | | 606 | 4405 | 2604 | -| 262144 | 724 | 3527 | 2630 | | 534 | 2418 | 2157 | -| 1048576 | 674 | 1467 | 2135 | | 530 | 1621 | 2055 | -|-----------+------------+------------+------------| |------------+------------+------------| - -I double-checked to make sure I did not make a mistake in the time -measurements, as the numbers are much higher than what I initially -expected. They are in fact higher than the number I get on the 2.8GHz -Xeon of my 2008 mac pro.. (except for FFT lengths >= 32768 where -having a big cache is useful). A good surprise is also that the perf -is not too far from apple's vDSP (at least for the real FFT). - diff --git a/oss-internship-2020/pffft/myNotes.txt b/oss-internship-2020/pffft/myNotes.txt deleted file mode 100644 index 0dd3bcb..0000000 --- a/oss-internship-2020/pffft/myNotes.txt +++ /dev/null @@ -1,101 +0,0 @@ -About library's functions: - * pffft_aligned_malloc(size_t) - returns an allocated array considering the alignment offset - * pffft_aligned_free(void *) - frees the memory - * pffft_simd_size() - returns the SIMD_SZ = 4 (regarding simd vector) - * pffft_new_setup(int, ...) - with a fft size (first argument) being a multiple of 16, 32. - -Deleted part (validate function) - /*for (pass = 0; pass < 2; pass++) { - if (pass == 0) { - for (k = 0; k < Nfloat; k++) { - ref_[k] = in_[k] = frand() * 2 - 1; - out_[k] = 1e30; - } - - if (!cplx) { - api.rffti(N, wrk_.PtrBoth()).IgnoreError(); - api.rfftf(N, ref_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); - - { - float refN = ref_[N - 1]; - for (k = N - 2; k >= 1; --k) { - ref_[k + 1] = ref_[k]; - } - ref_[1] = refN; - } - } else { - api.cffti(N, wrk_.PtrBoth()).IgnoreError(); - api.cfftf(N, ref_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); - } - } - - for (k = 0; k < Nfloat; ++k) { - ref_max = MAX(ref_max, fabs(ref_[k])); - } - - if (pass == 0) { - api.pffft_transform(s_reg.PtrBefore(), in_.PtrBoth(), tmp_.PtrBoth(), wrk_.PtrBoth(), PFFFT_FORWARD).IgnoreError(); - - memcpy(tmp2, tmp, Nbytes); - memcpy(tmp, in, Nbytes); - - api.pffft_transform(s_reg.PtrBefore(), tmp_.PtrBoth(), tmp_.PtrBoth(), wrk_.PtrBoth(), PFFFT_FORWARD).IgnoreError(); - - printf("Forward transformation test passed.\n"); - - api.pffft_zreorder(s_reg.PtrBefore(), tmp_.PtrBoth(), out_.PtrBoth(), PFFFT_FORWARD).IgnoreError(); - api.pffft_zreorder(s_reg.PtrBefore(), out_.PtrBoth(), tmp_.PtrBoth(), PFFFT_BACKWARD).IgnoreError(); - - printf("Reordering test passed.\n"); - } else { - api.pffft_transform_ordered(s_reg.PtrBefore(), in_.PtrBoth(), tmp_.PtrBoth(), wrk_.PtrBoth(), PFFFT_FORWARD).IgnoreError(); - - } - } */ - - -MACRO for testing -TEST(AssignOrReturn, AssignsMultipleVariablesInSequence) { - auto func = []() -> absl::Status { - int value1; - SAPI_ASSIGN_OR_RETURN(value1, StatusOr(1)); - EXPECT_EQ(1, value1); - int value2; - SAPI_ASSIGN_OR_RETURN(value2, StatusOr(2)); - EXPECT_EQ(2, value2); - int value3; - SAPI_ASSIGN_OR_RETURN(value3, StatusOr(3)); - EXPECT_EQ(3, value3); - int value4; - SAPI_ASSIGN_OR_RETURN(value4, - StatusOr(absl::UnknownError("EXPECTED" - int value1; - SAPI_ASSIGN_OR_RETURN(value1, StatusOr(1)); - - - // PFFFT benchmark - /*{ - sapi::StatusOr s = api.pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); - if (s.ok()) { - sapi::v::GenericPtr s_reg(s.value()); - - t0 = uclock_sec(); - for (iter = 0; iter < max_iter; ++iter) { - printf("%s 1\n", api.pffft_transform(s_reg.PtrBoth(), X_.PtrBoth(), Z_.PtrBoth(), Y_.PtrBoth(), PFFFT_FORWARD).ToString().c_str()); - printf("%s 2\n", api.pffft_transform(s_reg.PtrBoth(), X_.PtrBoth(), Z_.PtrBoth(), Y_.PtrBoth(), PFFFT_FORWARD).ToString().c_str()); - } - t1 = uclock_sec(); - printf("%s 3 \n", api.pffft_destroy_setup(s_reg.PtrBoth()).ToString().c_str()); - - - flops = (max_iter*2) * ((cplx ? 5 : 2.5) * N * log((double)N) / M_LN2); - show_output("PFFFT", N, cplx, flops, t0, t1, max_iter); - } else { - fprintf(stderr, "s NULL :(\n\n"); - } - }*/ - \ No newline at end of file diff --git a/oss-internship-2020/pffft/test_pffft.c b/oss-internship-2020/pffft/test_pffft.c deleted file mode 100644 index a5d20c2..0000000 --- a/oss-internship-2020/pffft/test_pffft.c +++ /dev/null @@ -1,419 +0,0 @@ -/* - Copyright (c) 2013 Julien Pommier. - - Small test & bench for PFFFT, comparing its performance with the scalar FFTPACK, FFTW, and Apple vDSP - - How to build: - - on linux, with fftw3: - gcc -o test_pffft -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm - - on macos, without fftw3: - clang -o test_pffft -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -framework Accelerate - - on macos, with fftw3: - clang -o test_pffft -DHAVE_FFTW -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -framework Accelerate - - on windows, with visual c++: - cl /Ox -D_USE_MATH_DEFINES /arch:SSE test_pffft.c pffft.c fftpack.c - - build without SIMD instructions: - gcc -o test_pffft -DPFFFT_SIMD_DISABLE -O3 -Wall -W pffft.c test_pffft.c fftpack.c -lm - - */ - -#include "pffft.h" -#include "fftpack.h" - -#include -#include -#include -#include -#include -#include - -#ifdef HAVE_SYS_TIMES -# include -# include -#endif - -#ifdef HAVE_VECLIB -# include -#endif - -#ifdef HAVE_FFTW -# include -#endif - -#define MAX(x,y) ((x)>(y)?(x):(y)) - -double frand() { - return rand()/(double)RAND_MAX; -} - -#if defined(HAVE_SYS_TIMES) - inline double uclock_sec(void) { - static double ttclk = 0.; - if (ttclk == 0.) ttclk = sysconf(_SC_CLK_TCK); - struct tms t; return ((double)times(&t)) / ttclk; - } -# else - double uclock_sec(void) -{ return (double)clock()/(double)CLOCKS_PER_SEC; } -#endif - - -/* compare results with the regular fftpack */ -void pffft_validate_N(int N, int cplx) { - int Nfloat = N*(cplx?2:1); - int Nbytes = Nfloat * sizeof(float); - float *ref, *in, *out, *tmp, *tmp2; - PFFFT_Setup *s = pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); - int pass; - - if (!s) { printf("Skipping N=%d, not supported\n", N); return; } - ref = pffft_aligned_malloc(Nbytes); - in = pffft_aligned_malloc(Nbytes); - out = pffft_aligned_malloc(Nbytes); - tmp = pffft_aligned_malloc(Nbytes); - tmp2 = pffft_aligned_malloc(Nbytes); - - for (pass=0; pass < 2; ++pass) { - float ref_max = 0; - int k; - //printf("N=%d pass=%d cplx=%d\n", N, pass, cplx); - // compute reference solution with FFTPACK - if (pass == 0) { - float *wrk = malloc(2*Nbytes+15*sizeof(float)); - for (k=0; k < Nfloat; ++k) { - ref[k] = in[k] = frand()*2-1; - out[k] = 1e30; - } - if (!cplx) { - rffti(N, wrk); - rfftf(N, ref, wrk); - // use our ordering for real ffts instead of the one of fftpack - { - float refN=ref[N-1]; - for (k=N-2; k >= 1; --k) ref[k+1] = ref[k]; - ref[1] = refN; - } - } else { - cffti(N, wrk); - cfftf(N, ref, wrk); - } - free(wrk); - } - - for (k = 0; k < Nfloat; ++k) ref_max = MAX(ref_max, fabs(ref[k])); - - - // pass 0 : non canonical ordering of transform coefficients - if (pass == 0) { - // test forward transform, with different input / output - pffft_transform(s, in, tmp, 0, PFFFT_FORWARD); - memcpy(tmp2, tmp, Nbytes); - memcpy(tmp, in, Nbytes); - pffft_transform(s, tmp, tmp, 0, PFFFT_FORWARD); - for (k = 0; k < Nfloat; ++k) { - assert(tmp2[k] == tmp[k]); - } - - // test reordering - pffft_zreorder(s, tmp, out, PFFFT_FORWARD); - pffft_zreorder(s, out, tmp, PFFFT_BACKWARD); - for (k = 0; k < Nfloat; ++k) { - assert(tmp2[k] == tmp[k]); - } - pffft_zreorder(s, tmp, out, PFFFT_FORWARD); - } else { - // pass 1 : canonical ordering of transform coeffs. - pffft_transform_ordered(s, in, tmp, 0, PFFFT_FORWARD); - memcpy(tmp2, tmp, Nbytes); - memcpy(tmp, in, Nbytes); - pffft_transform_ordered(s, tmp, tmp, 0, PFFFT_FORWARD); - for (k = 0; k < Nfloat; ++k) { - assert(tmp2[k] == tmp[k]); - } - memcpy(out, tmp, Nbytes); - } - - { - for (k=0; k < Nfloat; ++k) { - if (!(fabs(ref[k] - out[k]) < 1e-3*ref_max)) { - printf("%s forward PFFFT mismatch found for N=%d\n", (cplx?"CPLX":"REAL"), N); - exit(1); - } - } - - if (pass == 0) pffft_transform(s, tmp, out, 0, PFFFT_BACKWARD); - else pffft_transform_ordered(s, tmp, out, 0, PFFFT_BACKWARD); - memcpy(tmp2, out, Nbytes); - memcpy(out, tmp, Nbytes); - if (pass == 0) pffft_transform(s, out, out, 0, PFFFT_BACKWARD); - else pffft_transform_ordered(s, out, out, 0, PFFFT_BACKWARD); - for (k = 0; k < Nfloat; ++k) { - assert(tmp2[k] == out[k]); - out[k] *= 1.f/N; - } - for (k = 0; k < Nfloat; ++k) { - if (fabs(in[k] - out[k]) > 1e-3 * ref_max) { - printf("pass=%d, %s IFFFT does not match for N=%d\n", pass, (cplx?"CPLX":"REAL"), N); break; - exit(1); - } - } - } - - // quick test of the circular convolution in fft domain - { - float conv_err = 0, conv_max = 0; - - pffft_zreorder(s, ref, tmp, PFFFT_FORWARD); - memset(out, 0, Nbytes); - pffft_zconvolve_accumulate(s, ref, ref, out, 1.0); - pffft_zreorder(s, out, tmp2, PFFFT_FORWARD); - - for (k=0; k < Nfloat; k += 2) { - float ar = tmp[k], ai=tmp[k+1]; - if (cplx || k > 0) { - tmp[k] = ar*ar - ai*ai; - tmp[k+1] = 2*ar*ai; - } else { - tmp[0] = ar*ar; - tmp[1] = ai*ai; - } - } - - for (k=0; k < Nfloat; ++k) { - float d = fabs(tmp[k] - tmp2[k]), e = fabs(tmp[k]); - if (d > conv_err) conv_err = d; - if (e > conv_max) conv_max = e; - } - if (conv_err > 1e-5*conv_max) { - printf("zconvolve error ? %g %g\n", conv_err, conv_max); exit(1); - } - } - - } - - printf("%s PFFFT is OK for N=%d\n", (cplx?"CPLX":"REAL"), N); fflush(stdout); - - pffft_destroy_setup(s); - pffft_aligned_free(ref); - pffft_aligned_free(in); - pffft_aligned_free(out); - pffft_aligned_free(tmp); - pffft_aligned_free(tmp2); -} - -void pffft_validate(int cplx) { - static int Ntest[] = { 16, 32, 64, 96, 128, 160, 192, 256, 288, 384, 5*96, 512, 576, 5*128, 800, 864, 1024, 2048, 2592, 4000, 4096, 12000, 36864, 0}; - int k; - for (k = 0; Ntest[k]; ++k) { - int N = Ntest[k]; - if (N == 16 && !cplx) continue; - pffft_validate_N(N, cplx); - } -} - -int array_output_format = 0; - -void show_output(const char *name, int N, int cplx, float flops, float t0, float t1, int max_iter) { - float mflops = flops/1e6/(t1 - t0 + 1e-16); - if (array_output_format) { - if (flops != -1) { - printf("|%9.0f ", mflops); - } else printf("| n/a "); - } else { - if (flops != -1) { - printf("N=%5d, %s %16s : %6.0f MFlops [t=%6.0f ns, %d runs]\n", N, (cplx?"CPLX":"REAL"), name, mflops, (t1-t0)/2/max_iter * 1e9, max_iter); - } - } - fflush(stdout); -} - -void benchmark_ffts(int N, int cplx) { - int Nfloat = (cplx ? N*2 : N); - int Nbytes = Nfloat * sizeof(float); - float *X = pffft_aligned_malloc(Nbytes), *Y = pffft_aligned_malloc(Nbytes), *Z = pffft_aligned_malloc(Nbytes); - - double t0, t1, flops; - - int k; - int max_iter = 5120000/N*4; -#ifdef __arm__ - max_iter /= 4; -#endif - int iter; - - for (k = 0; k < Nfloat; ++k) { - X[k] = 0; //sqrtf(k+1); - } - - // FFTPack benchmark - { - float *wrk = malloc(2*Nbytes + 15*sizeof(float)); - int max_iter_ = max_iter/pffft_simd_size(); if (max_iter_ == 0) max_iter_ = 1; - if (cplx) cffti(N, wrk); - else rffti(N, wrk); - t0 = uclock_sec(); - - for (iter = 0; iter < max_iter_; ++iter) { - if (cplx) { - cfftf(N, X, wrk); - cfftb(N, X, wrk); - } else { - rfftf(N, X, wrk); - rfftb(N, X, wrk); - } - } - t1 = uclock_sec(); - free(wrk); - - flops = (max_iter_*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html - show_output("FFTPack", N, cplx, flops, t0, t1, max_iter_); - } - -#ifdef HAVE_VECLIB - int log2N = (int)(log(N)/log(2) + 0.5f); - if (N == (1< 1 && strcmp(argv[1], "--array-format") == 0) { - array_output_format = 1; - } - -#ifndef PFFFT_SIMD_DISABLE - validate_pffft_simd(); -#endif - pffft_validate(1); - pffft_validate(0); - if (!array_output_format) { - for (i=0; Nvalues[i] > 0; ++i) { - benchmark_ffts(Nvalues[i], 0 /* real fft */); - } - for (i=0; Nvalues[i] > 0; ++i) { - benchmark_ffts(Nvalues[i], 1 /* cplx fft */); - } - } else { - printf("| input len "); - printf("|real FFTPack"); -#ifdef HAVE_VECLIB - printf("| real vDSP "); -#endif -#ifdef HAVE_FFTW - printf("| real FFTW "); -#endif - printf("| real PFFFT | "); - - printf("|cplx FFTPack"); -#ifdef HAVE_VECLIB - printf("| cplx vDSP "); -#endif -#ifdef HAVE_FFTW - printf("| cplx FFTW "); -#endif - printf("| cplx PFFFT |\n"); - for (i=0; Nvalues[i] > 0; ++i) { - printf("|%9d ", Nvalues[i]); - benchmark_ffts(Nvalues[i], 0); - printf("| "); - benchmark_ffts(Nvalues[i], 1); - printf("|\n"); - } - printf(" (numbers are given in MFlops)\n"); - } - - - return 0; -} diff --git a/oss-internship-2020/pffft/test_pffft_sandboxed.cc b/oss-internship-2020/pffft/test_pffft_sandboxed.cc deleted file mode 100644 index 523dce0..0000000 --- a/oss-internship-2020/pffft/test_pffft_sandboxed.cc +++ /dev/null @@ -1,206 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "fftpack.h" -#include "pffft_sapi.sapi.h" -#include "sandboxed_api/util/flag.h" -#include "sandboxed_api/vars.h" - -ABSL_DECLARE_FLAG(string, sandbox2_danger_danger_permit_all); -ABSL_DECLARE_FLAG(string, sandbox2_danger_danger_permit_all_and_log); - -class pffftSapiSandbox : public pffftSandbox { - public: - std::unique_ptr ModifyPolicy( - sandbox2::PolicyBuilder*) override { - return sandbox2::PolicyBuilder() - .AllowStaticStartup() - .AllowOpen() - .AllowRead() - .AllowWrite() - .AllowSystemMalloc() - .AllowExit() - .AllowSyscalls({ - __NR_futex, - __NR_close, - __NR_getrusage, - }) - .DisableNamespaces() - .BuildOrDie(); - } -}; - -double frand() { return rand() / (double)RAND_MAX; } - -double uclock_sec(void) { return (double)clock() / (double)CLOCKS_PER_SEC; } - -int array_output_format = 0; - -void show_output(const char* name, int N, int cplx, float flops, float t0, - float t1, int max_iter) { - float mflops = flops / 1e6 / (t1 - t0 + 1e-16); - if (array_output_format) { - if (flops != -1) { - printf("|%9.0f ", mflops); - } else - printf("| n/a "); - } else { - if (flops != -1) { - printf("N=%5d, %s %16s : %6.0f MFlops [t=%6.0f ns, %d runs]\n", N, - (cplx ? "CPLX" : "REAL"), name, mflops, - (t1 - t0) / 2 / max_iter * 1e9, max_iter); - } - } - fflush(stdout); -} - -int main(int argc, char* argv[]) { - /* - * Initialize Google's logging library. - */ - google::InitGoogleLogging(argv[0]); - - gflags::ParseCommandLineFlags(&argc, &argv, true); - /* - * Nvalues is a vector keeping the values by which iterates N, its value - * representing the input length. More concrete, N is the number of - * data points the caclulus is up to (determinating its accuracy). - * To show the performance of Fast-Fourier Transformations the program is - * testing for various values of N. - */ - int Nvalues[] = {64, 96, 128, 160, 192, 256, - 384, 5 * 96, 512, 5 * 128, 3 * 256, 800, - 1024, 2048, 2400, 4096, 8192, 9 * 1024, - 16384, 32768, 256 * 1024, 1024 * 1024, -1}; - int i; - - VLOG(1) << "Initializing sandbox...\n"; - - pffftSapiSandbox sandbox; - sandbox.Init().IgnoreError(); - - VLOG(1) << "Initialization: " << sandbox.Init().ToString().c_str() << "\n"; - - pffftApi api(&sandbox); - - int N, cplx; - - cplx = 0; - - do { - for (i = 0; i < 23; i++) { - N = Nvalues[i]; - - int Nfloat = N * (cplx ? 2 : 1); - int Nbytes = Nfloat * sizeof(float); - int pass; - - float ref[Nbytes], in[Nbytes], out[Nbytes], tmp[Nbytes], tmp2[Nbytes]; - - sapi::v::Array ref_(ref, Nbytes); - sapi::v::Array in_(in, Nbytes); - sapi::v::Array out_(out, Nbytes); - sapi::v::Array tmp_(tmp, Nbytes); - sapi::v::Array tmp2_(tmp2, Nbytes); - - float wrk[2 * Nbytes + 15 * sizeof(float)]; - sapi::v::Array wrk_(wrk, 2 * Nbytes + 15 * sizeof(float)); - - float ref_max = 0; - int k; - - Nfloat = (cplx ? N * 2 : N); - float X[Nbytes], Y[Nbytes], Z[Nbytes]; - sapi::v::Array X_(X, Nbytes), Y_(Y, Nbytes), Z_(Z, Nbytes); - - double t0, t1, flops; - - int max_iter = 5120000 / N * 4; - #ifdef __arm__ - max_iter /= 4; - #endif - int iter; - - for (k = 0; k < Nfloat; ++k) { - X[k] = 0; - } - - /* - * FFTPack benchmark - */ - { - /* - * SIMD_SZ == 4 (returning value of pffft_simd_size()) - */ - int max_iter_ = - max_iter / 4; - - if (max_iter_ == 0) max_iter_ = 1; - if (cplx) { - api.cffti(N, wrk_.PtrBoth()).IgnoreError(); - } else { - api.rffti(N, wrk_.PtrBoth()).IgnoreError(); - } - t0 = uclock_sec(); - - for (iter = 0; iter < max_iter_; ++iter) { - if (cplx) { - api.cfftf(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); - api.cfftb(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); - } else { - api.rfftf(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); - api.rfftb(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); - } - } - t1 = uclock_sec(); - - flops = (max_iter_ * 2) * ((cplx ? 5 : 2.5) * N * log((double)N) / M_LN2); - show_output("FFTPack", N, cplx, flops, t0, t1, max_iter_); - } - - /* - * PFFFT benchmark - */ - { - sapi::StatusOr s = - api.pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); - - VLOG(1) << "Setup status is: " << s.status().ToString().c_str() << "\n"; - - if (s.ok()) { - sapi::v::RemotePtr s_reg(s.value()); - - t0 = uclock_sec(); - for (iter = 0; iter < max_iter; ++iter) { - api.pffft_transform(&s_reg, X_.PtrBoth(), Z_.PtrBoth(), Y_.PtrBoth(), - PFFFT_FORWARD) - .IgnoreError(); - api.pffft_transform(&s_reg, X_.PtrBoth(), Z_.PtrBoth(), Y_.PtrBoth(), - PFFFT_FORWARD) - .IgnoreError(); - } - - t1 = uclock_sec(); - api.pffft_destroy_setup(&s_reg).IgnoreError(); - - flops = - (max_iter * 2) * ((cplx ? 5 : 2.5) * N * log((double)N) / M_LN2); - show_output("PFFFT", N, cplx, flops, t0, t1, max_iter); - } - - VLOG(1) << "N = " << N << " SUCCESSFULLY\n\n"; - } - } - - cplx = !cplx; - } while (cplx); - - return 0; -} \ No newline at end of file From 726a9345fffac8c152cc9f36e1a3b0d5b437f3e7 Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Thu, 20 Aug 2020 12:37:34 +0000 Subject: [PATCH 15/42] Enable namespaces --- oss-internship-2020/pffft/README.md | 8 +++++++- oss-internship-2020/pffft/main_pffft_sandboxed.cc | 9 ++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/oss-internship-2020/pffft/README.md b/oss-internship-2020/pffft/README.md index 52facf3..b3ceef8 100644 --- a/oss-internship-2020/pffft/README.md +++ b/oss-internship-2020/pffft/README.md @@ -64,4 +64,10 @@ In the end, the performance of PFFFT library it is outlined by the output.* uintptr_t instead of PFFFT_Setup Solution: using "sapi::v::RemotePtr" instead of "sapi::v::GenericPtr" - to access the memory of object s \ No newline at end of file + to access the memory of object s + + - [Unresolved] compiling bug: "No spave left on device" + The building process creates some `embed` files that use lots of + memory, trying to write them on /tmp. + + Temporary solution: clean /tmp directory by `sudo rm -rf /tmp/*`. \ No newline at end of file diff --git a/oss-internship-2020/pffft/main_pffft_sandboxed.cc b/oss-internship-2020/pffft/main_pffft_sandboxed.cc index 662b949..b9a5a90 100644 --- a/oss-internship-2020/pffft/main_pffft_sandboxed.cc +++ b/oss-internship-2020/pffft/main_pffft_sandboxed.cc @@ -34,7 +34,6 @@ class pffftSapiSandbox : public pffftSandbox { __NR_close, __NR_getrusage, }) - .DisableNamespaces() .BuildOrDie(); } }; @@ -83,12 +82,12 @@ int main(int argc, char* argv[]) { 16384, 32768, 256 * 1024, 1024 * 1024, -1}; int i; - VLOG(1) << "Initializing sandbox...\n"; + LOG(INFO) << "Initializing sandbox...\n"; pffftSapiSandbox sandbox; sandbox.Init().IgnoreError(); - VLOG(1) << "Initialization: " << sandbox.Init().ToString().c_str() << "\n"; + LOG(INFO) << "Initialization: " << sandbox.Init().ToString().c_str() << "\n"; pffftApi api(&sandbox); @@ -174,7 +173,7 @@ int main(int argc, char* argv[]) { sapi::StatusOr s = api.pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); - VLOG(1) << "Setup status is: " << s.status().ToString().c_str() << "\n"; + LOG(INFO) << "Setup status is: " << s.status().ToString().c_str() << "\n"; if (s.ok()) { sapi::v::RemotePtr s_reg(s.value()); @@ -197,7 +196,7 @@ int main(int argc, char* argv[]) { show_output("PFFFT", N, cplx, flops, t0, t1, max_iter); } - VLOG(1) << "N = " << N << " SUCCESSFULLY\n\n"; + LOG(INFO) << "N = " << N << " SUCCESSFULLY\n\n"; } } From a2873ac099bd08ddd08427ebeb74e6e6f334cf58 Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Thu, 20 Aug 2020 13:15:02 +0000 Subject: [PATCH 16/42] Update README.md --- oss-internship-2020/pffft/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/oss-internship-2020/pffft/README.md b/oss-internship-2020/pffft/README.md index b3ceef8..8b8b2bd 100644 --- a/oss-internship-2020/pffft/README.md +++ b/oss-internship-2020/pffft/README.md @@ -58,7 +58,7 @@ In the end, the performance of PFFFT library it is outlined by the output.* N > 64, status not OK Problem on initialising sapi::StatusOr s; the memory that stays for s is not the same with the address passed in pffft_transform function. - (sapi :: v :: GenericPtr - to be changed) + (sapi::v::GenericPtr - to be changed) Temporary solution: change the generated files to accept uintptr_t instead of PFFFT_Setup @@ -66,8 +66,8 @@ In the end, the performance of PFFFT library it is outlined by the output.* Solution: using "sapi::v::RemotePtr" instead of "sapi::v::GenericPtr" to access the memory of object s - - [Unresolved] compiling bug: "No spave left on device" + - [Unresolved] compiling bug: "No space left on device" The building process creates some `embed` files that use lots of memory, trying to write them on /tmp. - + Temporary solution: clean /tmp directory by `sudo rm -rf /tmp/*`. \ No newline at end of file From 82c56775ef4d08fe241af21b240cf3be9ac60c60 Mon Sep 17 00:00:00 2001 From: Wiktor Garbacz Date: Tue, 25 Aug 2020 06:21:43 -0700 Subject: [PATCH 17/42] `StatusOr` cleanups PiperOrigin-RevId: 328318284 Change-Id: I207570c0fee6797dbc8995d36ef2130b0bff28fa --- sandboxed_api/proto_helper.h | 2 +- sandboxed_api/rpcchannel.h | 2 +- sandboxed_api/sandbox.cc | 2 +- sandboxed_api/sandbox.h | 5 +++-- sandboxed_api/sandbox2/comms.cc | 2 +- sandboxed_api/sandbox2/examples/network_proxy/BUILD.bazel | 1 + .../sandbox2/examples/network_proxy/networkproxy_bin.cc | 4 ++-- sandboxed_api/sandbox2/forkserver.cc | 2 +- sandboxed_api/sandbox2/mounts.cc | 2 +- sandboxed_api/sandbox2/network_proxy/BUILD.bazel | 2 ++ sandboxed_api/sandbox2/network_proxy/client.cc | 2 +- sandboxed_api/sandbox2/network_proxy/client.h | 2 +- sandboxed_api/sandbox2/network_proxy/filtering.cc | 3 ++- sandboxed_api/sandbox2/network_proxy/filtering.h | 2 +- sandboxed_api/sandbox2/policybuilder.h | 2 +- sandboxed_api/sandbox2/sandbox2.h | 2 +- sandboxed_api/sandbox2/util.cc | 4 ++-- sandboxed_api/sandbox2/util.h | 4 ++-- sandboxed_api/sandbox2/util/minielf.cc | 8 +++----- sandboxed_api/sandbox2/util/minielf.h | 2 +- sandboxed_api/tools/clang_generator/emitter.cc | 2 +- sandboxed_api/tools/clang_generator/emitter.h | 2 +- sandboxed_api/tools/clang_generator/generator.cc | 4 ++-- sandboxed_api/tools/clang_generator/generator.h | 4 ++-- sandboxed_api/tools/clang_generator/types.cc | 2 +- sandboxed_api/util/status.h | 4 ---- 26 files changed, 36 insertions(+), 37 deletions(-) diff --git a/sandboxed_api/proto_helper.h b/sandboxed_api/proto_helper.h index 53ac26d..b00a73e 100644 --- a/sandboxed_api/proto_helper.h +++ b/sandboxed_api/proto_helper.h @@ -22,8 +22,8 @@ #include #include "absl/status/status.h" -#include "sandboxed_api/proto_arg.pb.h" #include "sandboxed_api/util/statusor.h" +#include "sandboxed_api/proto_arg.pb.h" namespace sapi { diff --git a/sandboxed_api/rpcchannel.h b/sandboxed_api/rpcchannel.h index ed50326..c54ef8e 100644 --- a/sandboxed_api/rpcchannel.h +++ b/sandboxed_api/rpcchannel.h @@ -18,11 +18,11 @@ #include #include "absl/status/status.h" +#include "sandboxed_api/util/statusor.h" #include "absl/synchronization/mutex.h" #include "sandboxed_api/call.h" #include "sandboxed_api/sandbox2/comms.h" #include "sandboxed_api/var_type.h" -#include "sandboxed_api/util/statusor.h" namespace sapi { diff --git a/sandboxed_api/sandbox.cc b/sandboxed_api/sandbox.cc index b29646b..18239a4 100644 --- a/sandboxed_api/sandbox.cc +++ b/sandboxed_api/sandbox.cc @@ -392,7 +392,7 @@ absl::Status Sandbox::TransferFromSandboxee(v::Var* var) { } sapi::StatusOr Sandbox::GetCString(const v::RemotePtr& str, - uint64_t max_length) { + uint64_t max_length) { if (!is_active()) { return absl::UnavailableError("Sandbox not active"); } diff --git a/sandboxed_api/sandbox.h b/sandboxed_api/sandbox.h index d8f37ef..8b4ee6a 100644 --- a/sandboxed_api/sandbox.h +++ b/sandboxed_api/sandbox.h @@ -102,8 +102,9 @@ class Sandbox { absl::Status TransferToSandboxee(v::Var* var); absl::Status TransferFromSandboxee(v::Var* var); - sapi::StatusOr GetCString( - const v::RemotePtr& str, uint64_t max_length = 10ULL << 20 /* 10 MiB*/ + sapi::StatusOr GetCString(const v::RemotePtr& str, + uint64_t max_length = 10ULL + << 20 /* 10 MiB*/ ); // Waits until the sandbox terminated and returns the result. diff --git a/sandboxed_api/sandbox2/comms.cc b/sandboxed_api/sandbox2/comms.cc index 6bb891c..3e61013 100644 --- a/sandboxed_api/sandbox2/comms.cc +++ b/sandboxed_api/sandbox2/comms.cc @@ -36,6 +36,7 @@ #include "google/protobuf/message.h" #include "absl/memory/memory.h" #include "absl/status/status.h" +#include "sandboxed_api/util/statusor.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/synchronization/mutex.h" @@ -44,7 +45,6 @@ #include "sandboxed_api/util/raw_logging.h" #include "sandboxed_api/util/status.h" #include "sandboxed_api/util/status_macros.h" -#include "sandboxed_api/util/statusor.h" #ifdef MEMORY_SANITIZER #include "base/dynamic_annotations.h" diff --git a/sandboxed_api/sandbox2/examples/network_proxy/BUILD.bazel b/sandboxed_api/sandbox2/examples/network_proxy/BUILD.bazel index 133cb88..480c078 100644 --- a/sandboxed_api/sandbox2/examples/network_proxy/BUILD.bazel +++ b/sandboxed_api/sandbox2/examples/network_proxy/BUILD.bazel @@ -49,6 +49,7 @@ cc_binary( "//sandboxed_api/util:flags", "//sandboxed_api/util:status", "//sandboxed_api/util:statusor", + "@com_google_absl//absl/status", "@com_google_absl//absl/strings:str_format", ], ) diff --git a/sandboxed_api/sandbox2/examples/network_proxy/networkproxy_bin.cc b/sandboxed_api/sandbox2/examples/network_proxy/networkproxy_bin.cc index c22947d..b355522 100644 --- a/sandboxed_api/sandbox2/examples/network_proxy/networkproxy_bin.cc +++ b/sandboxed_api/sandbox2/examples/network_proxy/networkproxy_bin.cc @@ -12,15 +12,15 @@ #include #include "sandboxed_api/util/flag.h" +#include "absl/status/status.h" +#include "sandboxed_api/util/statusor.h" #include "absl/strings/str_format.h" #include "sandboxed_api/sandbox2/client.h" #include "sandboxed_api/sandbox2/comms.h" #include "sandboxed_api/sandbox2/network_proxy/client.h" #include "sandboxed_api/sandbox2/util/fileops.h" #include "sandboxed_api/sandbox2/util/strerror.h" -#include "sandboxed_api/util/status.h" #include "sandboxed_api/util/status_macros.h" -#include "sandboxed_api/util/statusor.h" ABSL_FLAG(bool, connect_with_handler, true, "Connect using automatic mode."); diff --git a/sandboxed_api/sandbox2/forkserver.cc b/sandboxed_api/sandbox2/forkserver.cc index 2125927..d0b637c 100644 --- a/sandboxed_api/sandbox2/forkserver.cc +++ b/sandboxed_api/sandbox2/forkserver.cc @@ -36,6 +36,7 @@ #include "absl/memory/memory.h" #include "absl/status/status.h" +#include "sandboxed_api/util/statusor.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" @@ -55,7 +56,6 @@ #include "sandboxed_api/sandbox2/util/fileops.h" #include "sandboxed_api/sandbox2/util/strerror.h" #include "sandboxed_api/util/raw_logging.h" -#include "sandboxed_api/util/statusor.h" namespace { // "Moves" the old FD to the new FD number. diff --git a/sandboxed_api/sandbox2/mounts.cc b/sandboxed_api/sandbox2/mounts.cc index 2f734e2..e1c0198 100644 --- a/sandboxed_api/sandbox2/mounts.cc +++ b/sandboxed_api/sandbox2/mounts.cc @@ -27,6 +27,7 @@ #include "google/protobuf/util/message_differencer.h" #include "absl/container/flat_hash_set.h" +#include "sandboxed_api/util/statusor.h" #include "absl/strings/ascii.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" @@ -39,7 +40,6 @@ #include "sandboxed_api/sandbox2/util/strerror.h" #include "sandboxed_api/util/raw_logging.h" #include "sandboxed_api/util/status_macros.h" -#include "sandboxed_api/util/statusor.h" namespace sandbox2 { namespace { diff --git a/sandboxed_api/sandbox2/network_proxy/BUILD.bazel b/sandboxed_api/sandbox2/network_proxy/BUILD.bazel index 3a3a9d6..5935390 100644 --- a/sandboxed_api/sandbox2/network_proxy/BUILD.bazel +++ b/sandboxed_api/sandbox2/network_proxy/BUILD.bazel @@ -46,6 +46,7 @@ cc_library( "//sandboxed_api/sandbox2/util:strerror", "//sandboxed_api/util:status", "@com_google_absl//absl/memory", + "@com_google_absl//absl/status", "@com_google_absl//absl/strings", "@com_google_absl//absl/synchronization", "@com_google_glog//:glog", @@ -62,6 +63,7 @@ cc_library( "//sandboxed_api/sandbox2/util:strerror", "//sandboxed_api/util:status", "//sandboxed_api/util:statusor", + "@com_google_absl//absl/status", "@com_google_absl//absl/strings", "@com_google_glog//:glog", ], diff --git a/sandboxed_api/sandbox2/network_proxy/client.cc b/sandboxed_api/sandbox2/network_proxy/client.cc index 586d201..e528041 100644 --- a/sandboxed_api/sandbox2/network_proxy/client.cc +++ b/sandboxed_api/sandbox2/network_proxy/client.cc @@ -25,9 +25,9 @@ #include #include "absl/memory/memory.h" +#include "absl/status/status.h" #include "absl/strings/str_cat.h" #include "sandboxed_api/sandbox2/util/strerror.h" -#include "sandboxed_api/util/status.h" #include "sandboxed_api/util/status_macros.h" namespace sandbox2 { diff --git a/sandboxed_api/sandbox2/network_proxy/client.h b/sandboxed_api/sandbox2/network_proxy/client.h index e7af4dc..7318993 100644 --- a/sandboxed_api/sandbox2/network_proxy/client.h +++ b/sandboxed_api/sandbox2/network_proxy/client.h @@ -17,9 +17,9 @@ #include +#include "absl/status/status.h" #include "absl/synchronization/mutex.h" #include "sandboxed_api/sandbox2/comms.h" -#include "sandboxed_api/util/status.h" namespace sandbox2 { diff --git a/sandboxed_api/sandbox2/network_proxy/filtering.cc b/sandboxed_api/sandbox2/network_proxy/filtering.cc index d6389d8..fc93e6c 100644 --- a/sandboxed_api/sandbox2/network_proxy/filtering.cc +++ b/sandboxed_api/sandbox2/network_proxy/filtering.cc @@ -17,11 +17,12 @@ #include #include +#include "absl/status/status.h" +#include "sandboxed_api/util/statusor.h" #include "absl/strings/numbers.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" #include "sandboxed_api/sandbox2/util/strerror.h" -#include "sandboxed_api/util/status.h" #include "sandboxed_api/util/status_macros.h" namespace sandbox2 { diff --git a/sandboxed_api/sandbox2/network_proxy/filtering.h b/sandboxed_api/sandbox2/network_proxy/filtering.h index fb6714a..c0a235b 100644 --- a/sandboxed_api/sandbox2/network_proxy/filtering.h +++ b/sandboxed_api/sandbox2/network_proxy/filtering.h @@ -19,8 +19,8 @@ #include -#include "sandboxed_api/sandbox2/comms.h" #include "sandboxed_api/util/statusor.h" +#include "sandboxed_api/sandbox2/comms.h" namespace sandbox2 { diff --git a/sandboxed_api/sandbox2/policybuilder.h b/sandboxed_api/sandbox2/policybuilder.h index f05023c..07095ba 100644 --- a/sandboxed_api/sandbox2/policybuilder.h +++ b/sandboxed_api/sandbox2/policybuilder.h @@ -29,11 +29,11 @@ #include #include "absl/base/macros.h" #include "absl/memory/memory.h" +#include "sandboxed_api/util/statusor.h" #include "absl/strings/string_view.h" #include "sandboxed_api/sandbox2/mounts.h" #include "sandboxed_api/sandbox2/network_proxy/filtering.h" #include "sandboxed_api/sandbox2/policy.h" -#include "sandboxed_api/util/statusor.h" struct bpf_labels; diff --git a/sandboxed_api/sandbox2/sandbox2.h b/sandboxed_api/sandbox2/sandbox2.h index 9527ff2..633297f 100644 --- a/sandboxed_api/sandbox2/sandbox2.h +++ b/sandboxed_api/sandbox2/sandbox2.h @@ -26,6 +26,7 @@ #include #include "absl/base/macros.h" #include "absl/memory/memory.h" +#include "sandboxed_api/util/statusor.h" #include "sandboxed_api/sandbox2/comms.h" #include "sandboxed_api/sandbox2/executor.h" #include "sandboxed_api/sandbox2/ipc.h" @@ -33,7 +34,6 @@ #include "sandboxed_api/sandbox2/notify.h" #include "sandboxed_api/sandbox2/policy.h" #include "sandboxed_api/sandbox2/result.h" -#include "sandboxed_api/util/statusor.h" namespace sandbox2 { diff --git a/sandboxed_api/sandbox2/util.cc b/sandboxed_api/sandbox2/util.cc index 24d4a36..b760d26 100644 --- a/sandboxed_api/sandbox2/util.cc +++ b/sandboxed_api/sandbox2/util.cc @@ -183,8 +183,8 @@ bool CreateMemFd(int* fd, const char* name) { } sapi::StatusOr Communicate(const std::vector& argv, - const std::vector& envv, - std::string* output) { + const std::vector& envv, + std::string* output) { int cout_pipe[2]; posix_spawn_file_actions_t action; diff --git a/sandboxed_api/sandbox2/util.h b/sandboxed_api/sandbox2/util.h index 87fe31c..e42bf72 100644 --- a/sandboxed_api/sandbox2/util.h +++ b/sandboxed_api/sandbox2/util.h @@ -63,8 +63,8 @@ bool CreateMemFd(int* fd, const char* name = "buffer_file"); // Executes a the program given by argv and the specified environment and // captures any output to stdout/stderr. sapi::StatusOr Communicate(const std::vector& argv, - const std::vector& envv, - std::string* output); + const std::vector& envv, + std::string* output); // Returns signal description. std::string GetSignalName(int signo); diff --git a/sandboxed_api/sandbox2/util/minielf.cc b/sandboxed_api/sandbox2/util/minielf.cc index 3eac979..dce033b 100644 --- a/sandboxed_api/sandbox2/util/minielf.cc +++ b/sandboxed_api/sandbox2/util/minielf.cc @@ -219,8 +219,7 @@ absl::Status ElfParser::ReadFileHeader() { return absl::OkStatus(); } -sapi::StatusOr ElfParser::ReadSectionHeader( - absl::string_view src) { +sapi::StatusOr ElfParser::ReadSectionHeader(absl::string_view src) { if (src.size() < sizeof(Elf64_Shdr)) { return absl::FailedPreconditionError( absl::StrCat("invalid section header data: got ", src.size(), @@ -293,8 +292,7 @@ sapi::StatusOr ElfParser::ReadSectionContents( return rv; } -sapi::StatusOr ElfParser::ReadProgramHeader( - absl::string_view src) { +sapi::StatusOr ElfParser::ReadProgramHeader(absl::string_view src) { if (src.size() < sizeof(Elf64_Phdr)) { return absl::FailedPreconditionError( absl::StrCat("invalid program header data: got ", src.size(), @@ -514,7 +512,7 @@ sapi::StatusOr ElfParser::Parse(FILE* elf, uint32_t features) { } sapi::StatusOr ElfFile::ParseFromFile(const std::string& filename, - uint32_t features) { + uint32_t features) { std::unique_ptr elf{fopen(filename.c_str(), "r"), [](FILE* f) { fclose(f); }}; if (!elf) { diff --git a/sandboxed_api/sandbox2/util/minielf.h b/sandboxed_api/sandbox2/util/minielf.h index 247d271..c33a604 100644 --- a/sandboxed_api/sandbox2/util/minielf.h +++ b/sandboxed_api/sandbox2/util/minielf.h @@ -34,7 +34,7 @@ class ElfFile { }; static sapi::StatusOr ParseFromFile(const std::string& filename, - uint32_t features); + uint32_t features); int64_t file_size() const { return file_size_; } const std::string& interpreter() const { return interpreter_; } diff --git a/sandboxed_api/tools/clang_generator/emitter.cc b/sandboxed_api/tools/clang_generator/emitter.cc index 18fdcd9..78b9a9a 100644 --- a/sandboxed_api/tools/clang_generator/emitter.cc +++ b/sandboxed_api/tools/clang_generator/emitter.cc @@ -46,10 +46,10 @@ constexpr absl::string_view kHeaderProlog = #include "absl/base/macros.h" #include "absl/status/status.h" +#include "sandboxed_api/util/statusor.h" #include "sandboxed_api/sandbox.h" #include "sandboxed_api/vars.h" #include "sandboxed_api/util/status_macros.h" -#include "sandboxed_api/util/statusor.h" )"; constexpr absl::string_view kHeaderEpilog = diff --git a/sandboxed_api/tools/clang_generator/emitter.h b/sandboxed_api/tools/clang_generator/emitter.h index 42ba92d..ad9f5f8 100644 --- a/sandboxed_api/tools/clang_generator/emitter.h +++ b/sandboxed_api/tools/clang_generator/emitter.h @@ -18,12 +18,12 @@ #include #include "absl/status/status.h" +#include "sandboxed_api/util/statusor.h" #include "absl/strings/string_view.h" #include "clang/AST/Decl.h" #include "clang/AST/Type.h" #include "sandboxed_api/tools/clang_generator/generator.h" #include "sandboxed_api/tools/clang_generator/types.h" -#include "sandboxed_api/util/statusor.h" namespace sapi { diff --git a/sandboxed_api/tools/clang_generator/generator.cc b/sandboxed_api/tools/clang_generator/generator.cc index 508877e..ffe2106 100644 --- a/sandboxed_api/tools/clang_generator/generator.cc +++ b/sandboxed_api/tools/clang_generator/generator.cc @@ -18,12 +18,12 @@ #include #include "absl/status/status.h" +#include "sandboxed_api/util/statusor.h" #include "clang/Format/Format.h" #include "sandboxed_api/sandbox2/util/fileops.h" #include "sandboxed_api/tools/clang_generator/diagnostics.h" #include "sandboxed_api/tools/clang_generator/emitter.h" #include "sandboxed_api/util/status_macros.h" -#include "sandboxed_api/util/statusor.h" namespace sapi { namespace { @@ -68,7 +68,7 @@ bool GeneratorASTVisitor::VisitFunctionDecl(clang::FunctionDecl* decl) { namespace internal { sapi::StatusOr ReformatGoogleStyle(const std::string& filename, - const std::string& code) { + const std::string& code) { // Configure code style based on Google style, but enforce pointer alignment clang::format::FormatStyle style = clang::format::getGoogleStyle(clang::format::FormatStyle::LK_Cpp); diff --git a/sandboxed_api/tools/clang_generator/generator.h b/sandboxed_api/tools/clang_generator/generator.h index 24e2658..3b9680b 100644 --- a/sandboxed_api/tools/clang_generator/generator.h +++ b/sandboxed_api/tools/clang_generator/generator.h @@ -20,13 +20,13 @@ #include "absl/container/flat_hash_set.h" #include "absl/memory/memory.h" #include "absl/status/status.h" +#include "sandboxed_api/util/statusor.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendAction.h" #include "clang/Tooling/Tooling.h" #include "sandboxed_api/tools/clang_generator/types.h" -#include "sandboxed_api/util/statusor.h" namespace sapi { @@ -67,7 +67,7 @@ class GeneratorASTVisitor namespace internal { sapi::StatusOr ReformatGoogleStyle(const std::string& filename, - const std::string& code); + const std::string& code); } // namespace internal diff --git a/sandboxed_api/tools/clang_generator/types.cc b/sandboxed_api/tools/clang_generator/types.cc index 4d91499..321d1e2 100644 --- a/sandboxed_api/tools/clang_generator/types.cc +++ b/sandboxed_api/tools/clang_generator/types.cc @@ -208,7 +208,7 @@ std::string MapQualTypeReturn(const clang::ASTContext& context, return "absl::Status"; } // Remove const qualifier like in MapQualType(). - return absl::StrCat("::sapi::StatusOr<", + return absl::StrCat("sapi::StatusOr<", MaybeRemoveConst(context, qual).getAsString(), ">"); } diff --git a/sandboxed_api/util/status.h b/sandboxed_api/util/status.h index 6390bc0..d9962fd 100644 --- a/sandboxed_api/util/status.h +++ b/sandboxed_api/util/status.h @@ -12,10 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -// This file and it's implementation provide a custom fork of -// util/task/status.h. This will become obsolete and will be replaced once -// Abseil releases absl::Status. - #ifndef THIRD_PARTY_SAPI_UTIL_STATUS_H_ #define THIRD_PARTY_SAPI_UTIL_STATUS_H_ From e7a195ce42a1c3dbcdd1f78c954a4583b85fc789 Mon Sep 17 00:00:00 2001 From: Sandboxed API Team Date: Tue, 25 Aug 2020 09:00:55 -0700 Subject: [PATCH 18/42] Automated rollback of commit 82c56775ef4d08fe241af21b240cf3be9ac60c60. PiperOrigin-RevId: 328340042 Change-Id: Ib225f8012fb373c74e3f1b3e6201b2daca7da40b --- sandboxed_api/proto_helper.h | 2 +- sandboxed_api/rpcchannel.h | 2 +- sandboxed_api/sandbox.cc | 2 +- sandboxed_api/sandbox.h | 5 ++--- sandboxed_api/sandbox2/comms.cc | 2 +- sandboxed_api/sandbox2/examples/network_proxy/BUILD.bazel | 1 - .../sandbox2/examples/network_proxy/networkproxy_bin.cc | 4 ++-- sandboxed_api/sandbox2/forkserver.cc | 2 +- sandboxed_api/sandbox2/mounts.cc | 2 +- sandboxed_api/sandbox2/network_proxy/BUILD.bazel | 2 -- sandboxed_api/sandbox2/network_proxy/client.cc | 2 +- sandboxed_api/sandbox2/network_proxy/client.h | 2 +- sandboxed_api/sandbox2/network_proxy/filtering.cc | 3 +-- sandboxed_api/sandbox2/network_proxy/filtering.h | 2 +- sandboxed_api/sandbox2/policybuilder.h | 2 +- sandboxed_api/sandbox2/sandbox2.h | 2 +- sandboxed_api/sandbox2/util.cc | 4 ++-- sandboxed_api/sandbox2/util.h | 4 ++-- sandboxed_api/sandbox2/util/minielf.cc | 8 +++++--- sandboxed_api/sandbox2/util/minielf.h | 2 +- sandboxed_api/tools/clang_generator/emitter.cc | 2 +- sandboxed_api/tools/clang_generator/emitter.h | 2 +- sandboxed_api/tools/clang_generator/generator.cc | 4 ++-- sandboxed_api/tools/clang_generator/generator.h | 4 ++-- sandboxed_api/tools/clang_generator/types.cc | 2 +- sandboxed_api/util/status.h | 4 ++++ 26 files changed, 37 insertions(+), 36 deletions(-) diff --git a/sandboxed_api/proto_helper.h b/sandboxed_api/proto_helper.h index b00a73e..53ac26d 100644 --- a/sandboxed_api/proto_helper.h +++ b/sandboxed_api/proto_helper.h @@ -22,8 +22,8 @@ #include #include "absl/status/status.h" -#include "sandboxed_api/util/statusor.h" #include "sandboxed_api/proto_arg.pb.h" +#include "sandboxed_api/util/statusor.h" namespace sapi { diff --git a/sandboxed_api/rpcchannel.h b/sandboxed_api/rpcchannel.h index c54ef8e..ed50326 100644 --- a/sandboxed_api/rpcchannel.h +++ b/sandboxed_api/rpcchannel.h @@ -18,11 +18,11 @@ #include #include "absl/status/status.h" -#include "sandboxed_api/util/statusor.h" #include "absl/synchronization/mutex.h" #include "sandboxed_api/call.h" #include "sandboxed_api/sandbox2/comms.h" #include "sandboxed_api/var_type.h" +#include "sandboxed_api/util/statusor.h" namespace sapi { diff --git a/sandboxed_api/sandbox.cc b/sandboxed_api/sandbox.cc index 18239a4..b29646b 100644 --- a/sandboxed_api/sandbox.cc +++ b/sandboxed_api/sandbox.cc @@ -392,7 +392,7 @@ absl::Status Sandbox::TransferFromSandboxee(v::Var* var) { } sapi::StatusOr Sandbox::GetCString(const v::RemotePtr& str, - uint64_t max_length) { + uint64_t max_length) { if (!is_active()) { return absl::UnavailableError("Sandbox not active"); } diff --git a/sandboxed_api/sandbox.h b/sandboxed_api/sandbox.h index 8b4ee6a..d8f37ef 100644 --- a/sandboxed_api/sandbox.h +++ b/sandboxed_api/sandbox.h @@ -102,9 +102,8 @@ class Sandbox { absl::Status TransferToSandboxee(v::Var* var); absl::Status TransferFromSandboxee(v::Var* var); - sapi::StatusOr GetCString(const v::RemotePtr& str, - uint64_t max_length = 10ULL - << 20 /* 10 MiB*/ + sapi::StatusOr GetCString( + const v::RemotePtr& str, uint64_t max_length = 10ULL << 20 /* 10 MiB*/ ); // Waits until the sandbox terminated and returns the result. diff --git a/sandboxed_api/sandbox2/comms.cc b/sandboxed_api/sandbox2/comms.cc index 3e61013..6bb891c 100644 --- a/sandboxed_api/sandbox2/comms.cc +++ b/sandboxed_api/sandbox2/comms.cc @@ -36,7 +36,6 @@ #include "google/protobuf/message.h" #include "absl/memory/memory.h" #include "absl/status/status.h" -#include "sandboxed_api/util/statusor.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/synchronization/mutex.h" @@ -45,6 +44,7 @@ #include "sandboxed_api/util/raw_logging.h" #include "sandboxed_api/util/status.h" #include "sandboxed_api/util/status_macros.h" +#include "sandboxed_api/util/statusor.h" #ifdef MEMORY_SANITIZER #include "base/dynamic_annotations.h" diff --git a/sandboxed_api/sandbox2/examples/network_proxy/BUILD.bazel b/sandboxed_api/sandbox2/examples/network_proxy/BUILD.bazel index 480c078..133cb88 100644 --- a/sandboxed_api/sandbox2/examples/network_proxy/BUILD.bazel +++ b/sandboxed_api/sandbox2/examples/network_proxy/BUILD.bazel @@ -49,7 +49,6 @@ cc_binary( "//sandboxed_api/util:flags", "//sandboxed_api/util:status", "//sandboxed_api/util:statusor", - "@com_google_absl//absl/status", "@com_google_absl//absl/strings:str_format", ], ) diff --git a/sandboxed_api/sandbox2/examples/network_proxy/networkproxy_bin.cc b/sandboxed_api/sandbox2/examples/network_proxy/networkproxy_bin.cc index b355522..c22947d 100644 --- a/sandboxed_api/sandbox2/examples/network_proxy/networkproxy_bin.cc +++ b/sandboxed_api/sandbox2/examples/network_proxy/networkproxy_bin.cc @@ -12,15 +12,15 @@ #include #include "sandboxed_api/util/flag.h" -#include "absl/status/status.h" -#include "sandboxed_api/util/statusor.h" #include "absl/strings/str_format.h" #include "sandboxed_api/sandbox2/client.h" #include "sandboxed_api/sandbox2/comms.h" #include "sandboxed_api/sandbox2/network_proxy/client.h" #include "sandboxed_api/sandbox2/util/fileops.h" #include "sandboxed_api/sandbox2/util/strerror.h" +#include "sandboxed_api/util/status.h" #include "sandboxed_api/util/status_macros.h" +#include "sandboxed_api/util/statusor.h" ABSL_FLAG(bool, connect_with_handler, true, "Connect using automatic mode."); diff --git a/sandboxed_api/sandbox2/forkserver.cc b/sandboxed_api/sandbox2/forkserver.cc index d0b637c..2125927 100644 --- a/sandboxed_api/sandbox2/forkserver.cc +++ b/sandboxed_api/sandbox2/forkserver.cc @@ -36,7 +36,6 @@ #include "absl/memory/memory.h" #include "absl/status/status.h" -#include "sandboxed_api/util/statusor.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" @@ -56,6 +55,7 @@ #include "sandboxed_api/sandbox2/util/fileops.h" #include "sandboxed_api/sandbox2/util/strerror.h" #include "sandboxed_api/util/raw_logging.h" +#include "sandboxed_api/util/statusor.h" namespace { // "Moves" the old FD to the new FD number. diff --git a/sandboxed_api/sandbox2/mounts.cc b/sandboxed_api/sandbox2/mounts.cc index e1c0198..2f734e2 100644 --- a/sandboxed_api/sandbox2/mounts.cc +++ b/sandboxed_api/sandbox2/mounts.cc @@ -27,7 +27,6 @@ #include "google/protobuf/util/message_differencer.h" #include "absl/container/flat_hash_set.h" -#include "sandboxed_api/util/statusor.h" #include "absl/strings/ascii.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" @@ -40,6 +39,7 @@ #include "sandboxed_api/sandbox2/util/strerror.h" #include "sandboxed_api/util/raw_logging.h" #include "sandboxed_api/util/status_macros.h" +#include "sandboxed_api/util/statusor.h" namespace sandbox2 { namespace { diff --git a/sandboxed_api/sandbox2/network_proxy/BUILD.bazel b/sandboxed_api/sandbox2/network_proxy/BUILD.bazel index 5935390..3a3a9d6 100644 --- a/sandboxed_api/sandbox2/network_proxy/BUILD.bazel +++ b/sandboxed_api/sandbox2/network_proxy/BUILD.bazel @@ -46,7 +46,6 @@ cc_library( "//sandboxed_api/sandbox2/util:strerror", "//sandboxed_api/util:status", "@com_google_absl//absl/memory", - "@com_google_absl//absl/status", "@com_google_absl//absl/strings", "@com_google_absl//absl/synchronization", "@com_google_glog//:glog", @@ -63,7 +62,6 @@ cc_library( "//sandboxed_api/sandbox2/util:strerror", "//sandboxed_api/util:status", "//sandboxed_api/util:statusor", - "@com_google_absl//absl/status", "@com_google_absl//absl/strings", "@com_google_glog//:glog", ], diff --git a/sandboxed_api/sandbox2/network_proxy/client.cc b/sandboxed_api/sandbox2/network_proxy/client.cc index e528041..586d201 100644 --- a/sandboxed_api/sandbox2/network_proxy/client.cc +++ b/sandboxed_api/sandbox2/network_proxy/client.cc @@ -25,9 +25,9 @@ #include #include "absl/memory/memory.h" -#include "absl/status/status.h" #include "absl/strings/str_cat.h" #include "sandboxed_api/sandbox2/util/strerror.h" +#include "sandboxed_api/util/status.h" #include "sandboxed_api/util/status_macros.h" namespace sandbox2 { diff --git a/sandboxed_api/sandbox2/network_proxy/client.h b/sandboxed_api/sandbox2/network_proxy/client.h index 7318993..e7af4dc 100644 --- a/sandboxed_api/sandbox2/network_proxy/client.h +++ b/sandboxed_api/sandbox2/network_proxy/client.h @@ -17,9 +17,9 @@ #include -#include "absl/status/status.h" #include "absl/synchronization/mutex.h" #include "sandboxed_api/sandbox2/comms.h" +#include "sandboxed_api/util/status.h" namespace sandbox2 { diff --git a/sandboxed_api/sandbox2/network_proxy/filtering.cc b/sandboxed_api/sandbox2/network_proxy/filtering.cc index fc93e6c..d6389d8 100644 --- a/sandboxed_api/sandbox2/network_proxy/filtering.cc +++ b/sandboxed_api/sandbox2/network_proxy/filtering.cc @@ -17,12 +17,11 @@ #include #include -#include "absl/status/status.h" -#include "sandboxed_api/util/statusor.h" #include "absl/strings/numbers.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" #include "sandboxed_api/sandbox2/util/strerror.h" +#include "sandboxed_api/util/status.h" #include "sandboxed_api/util/status_macros.h" namespace sandbox2 { diff --git a/sandboxed_api/sandbox2/network_proxy/filtering.h b/sandboxed_api/sandbox2/network_proxy/filtering.h index c0a235b..fb6714a 100644 --- a/sandboxed_api/sandbox2/network_proxy/filtering.h +++ b/sandboxed_api/sandbox2/network_proxy/filtering.h @@ -19,8 +19,8 @@ #include -#include "sandboxed_api/util/statusor.h" #include "sandboxed_api/sandbox2/comms.h" +#include "sandboxed_api/util/statusor.h" namespace sandbox2 { diff --git a/sandboxed_api/sandbox2/policybuilder.h b/sandboxed_api/sandbox2/policybuilder.h index 07095ba..f05023c 100644 --- a/sandboxed_api/sandbox2/policybuilder.h +++ b/sandboxed_api/sandbox2/policybuilder.h @@ -29,11 +29,11 @@ #include #include "absl/base/macros.h" #include "absl/memory/memory.h" -#include "sandboxed_api/util/statusor.h" #include "absl/strings/string_view.h" #include "sandboxed_api/sandbox2/mounts.h" #include "sandboxed_api/sandbox2/network_proxy/filtering.h" #include "sandboxed_api/sandbox2/policy.h" +#include "sandboxed_api/util/statusor.h" struct bpf_labels; diff --git a/sandboxed_api/sandbox2/sandbox2.h b/sandboxed_api/sandbox2/sandbox2.h index 633297f..9527ff2 100644 --- a/sandboxed_api/sandbox2/sandbox2.h +++ b/sandboxed_api/sandbox2/sandbox2.h @@ -26,7 +26,6 @@ #include #include "absl/base/macros.h" #include "absl/memory/memory.h" -#include "sandboxed_api/util/statusor.h" #include "sandboxed_api/sandbox2/comms.h" #include "sandboxed_api/sandbox2/executor.h" #include "sandboxed_api/sandbox2/ipc.h" @@ -34,6 +33,7 @@ #include "sandboxed_api/sandbox2/notify.h" #include "sandboxed_api/sandbox2/policy.h" #include "sandboxed_api/sandbox2/result.h" +#include "sandboxed_api/util/statusor.h" namespace sandbox2 { diff --git a/sandboxed_api/sandbox2/util.cc b/sandboxed_api/sandbox2/util.cc index b760d26..24d4a36 100644 --- a/sandboxed_api/sandbox2/util.cc +++ b/sandboxed_api/sandbox2/util.cc @@ -183,8 +183,8 @@ bool CreateMemFd(int* fd, const char* name) { } sapi::StatusOr Communicate(const std::vector& argv, - const std::vector& envv, - std::string* output) { + const std::vector& envv, + std::string* output) { int cout_pipe[2]; posix_spawn_file_actions_t action; diff --git a/sandboxed_api/sandbox2/util.h b/sandboxed_api/sandbox2/util.h index e42bf72..87fe31c 100644 --- a/sandboxed_api/sandbox2/util.h +++ b/sandboxed_api/sandbox2/util.h @@ -63,8 +63,8 @@ bool CreateMemFd(int* fd, const char* name = "buffer_file"); // Executes a the program given by argv and the specified environment and // captures any output to stdout/stderr. sapi::StatusOr Communicate(const std::vector& argv, - const std::vector& envv, - std::string* output); + const std::vector& envv, + std::string* output); // Returns signal description. std::string GetSignalName(int signo); diff --git a/sandboxed_api/sandbox2/util/minielf.cc b/sandboxed_api/sandbox2/util/minielf.cc index dce033b..3eac979 100644 --- a/sandboxed_api/sandbox2/util/minielf.cc +++ b/sandboxed_api/sandbox2/util/minielf.cc @@ -219,7 +219,8 @@ absl::Status ElfParser::ReadFileHeader() { return absl::OkStatus(); } -sapi::StatusOr ElfParser::ReadSectionHeader(absl::string_view src) { +sapi::StatusOr ElfParser::ReadSectionHeader( + absl::string_view src) { if (src.size() < sizeof(Elf64_Shdr)) { return absl::FailedPreconditionError( absl::StrCat("invalid section header data: got ", src.size(), @@ -292,7 +293,8 @@ sapi::StatusOr ElfParser::ReadSectionContents( return rv; } -sapi::StatusOr ElfParser::ReadProgramHeader(absl::string_view src) { +sapi::StatusOr ElfParser::ReadProgramHeader( + absl::string_view src) { if (src.size() < sizeof(Elf64_Phdr)) { return absl::FailedPreconditionError( absl::StrCat("invalid program header data: got ", src.size(), @@ -512,7 +514,7 @@ sapi::StatusOr ElfParser::Parse(FILE* elf, uint32_t features) { } sapi::StatusOr ElfFile::ParseFromFile(const std::string& filename, - uint32_t features) { + uint32_t features) { std::unique_ptr elf{fopen(filename.c_str(), "r"), [](FILE* f) { fclose(f); }}; if (!elf) { diff --git a/sandboxed_api/sandbox2/util/minielf.h b/sandboxed_api/sandbox2/util/minielf.h index c33a604..247d271 100644 --- a/sandboxed_api/sandbox2/util/minielf.h +++ b/sandboxed_api/sandbox2/util/minielf.h @@ -34,7 +34,7 @@ class ElfFile { }; static sapi::StatusOr ParseFromFile(const std::string& filename, - uint32_t features); + uint32_t features); int64_t file_size() const { return file_size_; } const std::string& interpreter() const { return interpreter_; } diff --git a/sandboxed_api/tools/clang_generator/emitter.cc b/sandboxed_api/tools/clang_generator/emitter.cc index 78b9a9a..18fdcd9 100644 --- a/sandboxed_api/tools/clang_generator/emitter.cc +++ b/sandboxed_api/tools/clang_generator/emitter.cc @@ -46,10 +46,10 @@ constexpr absl::string_view kHeaderProlog = #include "absl/base/macros.h" #include "absl/status/status.h" -#include "sandboxed_api/util/statusor.h" #include "sandboxed_api/sandbox.h" #include "sandboxed_api/vars.h" #include "sandboxed_api/util/status_macros.h" +#include "sandboxed_api/util/statusor.h" )"; constexpr absl::string_view kHeaderEpilog = diff --git a/sandboxed_api/tools/clang_generator/emitter.h b/sandboxed_api/tools/clang_generator/emitter.h index ad9f5f8..42ba92d 100644 --- a/sandboxed_api/tools/clang_generator/emitter.h +++ b/sandboxed_api/tools/clang_generator/emitter.h @@ -18,12 +18,12 @@ #include #include "absl/status/status.h" -#include "sandboxed_api/util/statusor.h" #include "absl/strings/string_view.h" #include "clang/AST/Decl.h" #include "clang/AST/Type.h" #include "sandboxed_api/tools/clang_generator/generator.h" #include "sandboxed_api/tools/clang_generator/types.h" +#include "sandboxed_api/util/statusor.h" namespace sapi { diff --git a/sandboxed_api/tools/clang_generator/generator.cc b/sandboxed_api/tools/clang_generator/generator.cc index ffe2106..508877e 100644 --- a/sandboxed_api/tools/clang_generator/generator.cc +++ b/sandboxed_api/tools/clang_generator/generator.cc @@ -18,12 +18,12 @@ #include #include "absl/status/status.h" -#include "sandboxed_api/util/statusor.h" #include "clang/Format/Format.h" #include "sandboxed_api/sandbox2/util/fileops.h" #include "sandboxed_api/tools/clang_generator/diagnostics.h" #include "sandboxed_api/tools/clang_generator/emitter.h" #include "sandboxed_api/util/status_macros.h" +#include "sandboxed_api/util/statusor.h" namespace sapi { namespace { @@ -68,7 +68,7 @@ bool GeneratorASTVisitor::VisitFunctionDecl(clang::FunctionDecl* decl) { namespace internal { sapi::StatusOr ReformatGoogleStyle(const std::string& filename, - const std::string& code) { + const std::string& code) { // Configure code style based on Google style, but enforce pointer alignment clang::format::FormatStyle style = clang::format::getGoogleStyle(clang::format::FormatStyle::LK_Cpp); diff --git a/sandboxed_api/tools/clang_generator/generator.h b/sandboxed_api/tools/clang_generator/generator.h index 3b9680b..24e2658 100644 --- a/sandboxed_api/tools/clang_generator/generator.h +++ b/sandboxed_api/tools/clang_generator/generator.h @@ -20,13 +20,13 @@ #include "absl/container/flat_hash_set.h" #include "absl/memory/memory.h" #include "absl/status/status.h" -#include "sandboxed_api/util/statusor.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendAction.h" #include "clang/Tooling/Tooling.h" #include "sandboxed_api/tools/clang_generator/types.h" +#include "sandboxed_api/util/statusor.h" namespace sapi { @@ -67,7 +67,7 @@ class GeneratorASTVisitor namespace internal { sapi::StatusOr ReformatGoogleStyle(const std::string& filename, - const std::string& code); + const std::string& code); } // namespace internal diff --git a/sandboxed_api/tools/clang_generator/types.cc b/sandboxed_api/tools/clang_generator/types.cc index 321d1e2..4d91499 100644 --- a/sandboxed_api/tools/clang_generator/types.cc +++ b/sandboxed_api/tools/clang_generator/types.cc @@ -208,7 +208,7 @@ std::string MapQualTypeReturn(const clang::ASTContext& context, return "absl::Status"; } // Remove const qualifier like in MapQualType(). - return absl::StrCat("sapi::StatusOr<", + return absl::StrCat("::sapi::StatusOr<", MaybeRemoveConst(context, qual).getAsString(), ">"); } diff --git a/sandboxed_api/util/status.h b/sandboxed_api/util/status.h index d9962fd..6390bc0 100644 --- a/sandboxed_api/util/status.h +++ b/sandboxed_api/util/status.h @@ -12,6 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +// This file and it's implementation provide a custom fork of +// util/task/status.h. This will become obsolete and will be replaced once +// Abseil releases absl::Status. + #ifndef THIRD_PARTY_SAPI_UTIL_STATUS_H_ #define THIRD_PARTY_SAPI_UTIL_STATUS_H_ From 8f21b0e931ed702876a5421bf2bbf13374a58987 Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Wed, 26 Aug 2020 11:23:33 +0000 Subject: [PATCH 19/42] Coding style update --- .../pffft/main_pffft_sandboxed.cc | 66 +++++++------------ 1 file changed, 23 insertions(+), 43 deletions(-) diff --git a/oss-internship-2020/pffft/main_pffft_sandboxed.cc b/oss-internship-2020/pffft/main_pffft_sandboxed.cc index b9a5a90..4da5abd 100644 --- a/oss-internship-2020/pffft/main_pffft_sandboxed.cc +++ b/oss-internship-2020/pffft/main_pffft_sandboxed.cc @@ -1,15 +1,14 @@ -#define GOOGLE_STRIP_LOG 1 - -#include #include -#include -#include #include #include #include #include #include +#include +#include +#include + #include "fftpack.h" #include "pffft_sapi.sapi.h" #include "sandboxed_api/util/flag.h" @@ -38,14 +37,12 @@ class pffftSapiSandbox : public pffftSandbox { } }; -double frand() { return rand() / (double)RAND_MAX; } - -double uclock_sec(void) { return (double)clock() / (double)CLOCKS_PER_SEC; } +double UclockSec(void) { return (double)clock() / (double)CLOCKS_PER_SEC; } int array_output_format = 0; -void show_output(const char* name, int N, int cplx, float flops, float t0, - float t1, int max_iter) { +void ShowOutput(const char* name, int N, int cplx, float flops, float t0, + float t1, int max_iter) { float mflops = flops / 1e6 / (t1 - t0 + 1e-16); if (array_output_format) { if (flops != -1) { @@ -85,39 +82,21 @@ int main(int argc, char* argv[]) { LOG(INFO) << "Initializing sandbox...\n"; pffftSapiSandbox sandbox; - sandbox.Init().IgnoreError(); + absl::Status init_status = sandbox.Init(); - LOG(INFO) << "Initialization: " << sandbox.Init().ToString().c_str() << "\n"; + LOG(INFO) << "Initialization: " << init_status.ToString().c_str() << "\n"; pffftApi api(&sandbox); - - int N, cplx; - - cplx = 0; + int cplx = 0; do { - for (i = 0; i < 23; i++) { - N = Nvalues[i]; - - int Nfloat = N * (cplx ? 2 : 1); + for (int N : Nvalues) { + const int Nfloat = N * (cplx ? 2 : 1); int Nbytes = Nfloat * sizeof(float); - int pass; - float ref[Nbytes], in[Nbytes], out[Nbytes], tmp[Nbytes], tmp2[Nbytes]; + float wrk[2 * Nfloat + 15 * sizeof(float)]; + sapi::v::Array wrk_(wrk, 2 * Nfloat + 15 * sizeof(float)); - sapi::v::Array ref_(ref, Nbytes); - sapi::v::Array in_(in, Nbytes); - sapi::v::Array out_(out, Nbytes); - sapi::v::Array tmp_(tmp, Nbytes); - sapi::v::Array tmp2_(tmp2, Nbytes); - - float wrk[2 * Nbytes + 15 * sizeof(float)]; - sapi::v::Array wrk_(wrk, 2 * Nbytes + 15 * sizeof(float)); - - float ref_max = 0; - int k; - - Nfloat = (cplx ? N * 2 : N); float X[Nbytes], Y[Nbytes], Z[Nbytes]; sapi::v::Array X_(X, Nbytes), Y_(Y, Nbytes), Z_(Z, Nbytes); @@ -127,7 +106,7 @@ int main(int argc, char* argv[]) { #ifdef __arm__ max_iter /= 4; #endif - int iter; + int iter, k; for (k = 0; k < Nfloat; ++k) { X[k] = 0; @@ -148,7 +127,7 @@ int main(int argc, char* argv[]) { } else { api.rffti(N, wrk_.PtrBoth()).IgnoreError(); } - t0 = uclock_sec(); + t0 = UclockSec(); for (iter = 0; iter < max_iter_; ++iter) { if (cplx) { @@ -159,11 +138,11 @@ int main(int argc, char* argv[]) { api.rfftb(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); } } - t1 = uclock_sec(); + t1 = UclockSec(); flops = (max_iter_ * 2) * ((cplx ? 5 : 2.5) * N * log((double)N) / M_LN2); - show_output("FFTPack", N, cplx, flops, t0, t1, max_iter_); + ShowOutput("FFTPack", N, cplx, flops, t0, t1, max_iter_); } /* @@ -173,12 +152,13 @@ int main(int argc, char* argv[]) { sapi::StatusOr s = api.pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); - LOG(INFO) << "Setup status is: " << s.status().ToString().c_str() << "\n"; + LOG(INFO) << "Setup status is: " << s.status().ToString().c_str() + << "\n"; if (s.ok()) { sapi::v::RemotePtr s_reg(s.value()); - t0 = uclock_sec(); + t0 = UclockSec(); for (iter = 0; iter < max_iter; ++iter) { api.pffft_transform(&s_reg, X_.PtrBoth(), Z_.PtrBoth(), Y_.PtrBoth(), PFFFT_FORWARD) @@ -188,12 +168,12 @@ int main(int argc, char* argv[]) { .IgnoreError(); } - t1 = uclock_sec(); + t1 = UclockSec(); api.pffft_destroy_setup(&s_reg).IgnoreError(); flops = (max_iter * 2) * ((cplx ? 5 : 2.5) * N * log((double)N) / M_LN2); - show_output("PFFFT", N, cplx, flops, t0, t1, max_iter); + ShowOutput("PFFFT", N, cplx, flops, t0, t1, max_iter); } LOG(INFO) << "N = " << N << " SUCCESSFULLY\n\n"; From 139723d3b8ea803417ed1c654302d26991bbdbd0 Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Wed, 26 Aug 2020 14:18:31 +0000 Subject: [PATCH 20/42] Added LICENSE & coding changes required --- oss-internship-2020/pffft/CMakeLists.txt | 16 +++- oss-internship-2020/pffft/Makefile | 15 --- oss-internship-2020/pffft/README.md | 29 ++++-- .../{pffft_library_notes.txt => README.txt} | 0 .../pffft/main_pffft_sandboxed.cc | 93 ++++++++++--------- .../pffft/{main_pffft.c => test_pffft.c} | 0 6 files changed, 85 insertions(+), 68 deletions(-) delete mode 100644 oss-internship-2020/pffft/Makefile rename oss-internship-2020/pffft/{pffft_library_notes.txt => README.txt} (100%) rename oss-internship-2020/pffft/{main_pffft.c => test_pffft.c} (100%) diff --git a/oss-internship-2020/pffft/CMakeLists.txt b/oss-internship-2020/pffft/CMakeLists.txt index 6c7e776..442bb74 100644 --- a/oss-internship-2020/pffft/CMakeLists.txt +++ b/oss-internship-2020/pffft/CMakeLists.txt @@ -1,3 +1,17 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + cmake_minimum_required(VERSION 3.10) project(pffft CXX C) @@ -13,7 +27,7 @@ add_library(pffft STATIC ) add_executable(pffft_main - main_pffft.c + test_pffft.c ) target_link_libraries(pffft_main PRIVATE diff --git a/oss-internship-2020/pffft/Makefile b/oss-internship-2020/pffft/Makefile deleted file mode 100644 index 326fd90..0000000 --- a/oss-internship-2020/pffft/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -CXXFLAGS ?= -std=c++17 - -pffft_main: test_pffft.o libpffft.a - $(CXX) -o $@ $(CXXFLAGS) $(LDFLAGS) $^ - -libpffft.a: pffft.o fftpack.o - ar rcs $@ $^ - -pffft.c: pffft.h - -fftpack.c: fftpack.h - -.PHONY: clean -clean: - rm -f *.o *.a pffft_main \ No newline at end of file diff --git a/oss-internship-2020/pffft/README.md b/oss-internship-2020/pffft/README.md index 8b8b2bd..6cef583 100644 --- a/oss-internship-2020/pffft/README.md +++ b/oss-internship-2020/pffft/README.md @@ -1,14 +1,23 @@ # Sandboxing PFFFT library -Builder: CMake +Build System: CMake OS: Linux +### Check out the PFFFT library & CMake set up +`mkdir -p build && cd build` + +`git clone https://bitbucket.org/jpommier/pffft.git` + +`cmake .. -G Ninja -DPFFFT_ROOT_DIR=$PWD` + +`ninja` + ### For testing: `cd build`, then `./pffft_sandboxed` ### For debug: -`SAPI_VLOG_LEVEL=1 ./pffft_sandboxed --v=100 ---sandbox2_danger_danger_permit_all_and_log ` +display custom info with +`./pffft_sandboxed --logtostderr` ## ***About the project*** *PFFFT library is concerned with 1D Fast-Fourier Transformations finding a @@ -38,22 +47,22 @@ it is taken into account while testing. In the end, the performance of PFFFT library it is outlined by the output.* #### CMake observations resume: - * linking pffft and fftpack (which contains necessary functions for pffft) - * set math library +* linking pffft and fftpack (which contains necessary functions for pffft) +* set math library #### Sandboxed main observations resume: - * containing two testing parts (fft / pffft benchmarks) - * showing the performance of the transformations implies +* containing two testing parts (fft / pffft benchmarks) +* showing the performance of the transformations implies testing them through various FFT dimenstions. Variable N, the input length, will take specific values meaning the number of points to which it is set the calculus (more details of mathematical purpose of N - https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm). - * output shows speed depending on the input length +* output shows speed depending on the input length ### Bugs history - - [Solved] pffft benchmark bug: "Sandbox not active" + 1. [Solved] pffft benchmark bug: "Sandbox not active" N = 64, status OK, pffft_transform generates error N > 64, status not OK Problem on initialising sapi::StatusOr s; the memory that stays @@ -66,7 +75,7 @@ In the end, the performance of PFFFT library it is outlined by the output.* Solution: using "sapi::v::RemotePtr" instead of "sapi::v::GenericPtr" to access the memory of object s - - [Unresolved] compiling bug: "No space left on device" + 2. [Unresolved] compiling bug: "No space left on device" The building process creates some `embed` files that use lots of memory, trying to write them on /tmp. diff --git a/oss-internship-2020/pffft/pffft_library_notes.txt b/oss-internship-2020/pffft/README.txt similarity index 100% rename from oss-internship-2020/pffft/pffft_library_notes.txt rename to oss-internship-2020/pffft/README.txt diff --git a/oss-internship-2020/pffft/main_pffft_sandboxed.cc b/oss-internship-2020/pffft/main_pffft_sandboxed.cc index 4da5abd..8b7e6d1 100644 --- a/oss-internship-2020/pffft/main_pffft_sandboxed.cc +++ b/oss-internship-2020/pffft/main_pffft_sandboxed.cc @@ -1,3 +1,17 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include #include #include @@ -17,7 +31,7 @@ ABSL_DECLARE_FLAG(string, sandbox2_danger_danger_permit_all); ABSL_DECLARE_FLAG(string, sandbox2_danger_danger_permit_all_and_log); -class pffftSapiSandbox : public pffftSandbox { +class PffftSapiSandbox : public pffftSandbox { public: std::unique_ptr ModifyPolicy( sandbox2::PolicyBuilder*) override { @@ -60,28 +74,26 @@ void ShowOutput(const char* name, int N, int cplx, float flops, float t0, } int main(int argc, char* argv[]) { - /* - * Initialize Google's logging library. - */ + + // Initialize Google's logging library. google::InitGoogleLogging(argv[0]); gflags::ParseCommandLineFlags(&argc, &argv, true); - /* - * Nvalues is a vector keeping the values by which iterates N, its value - * representing the input length. More concrete, N is the number of - * data points the caclulus is up to (determinating its accuracy). - * To show the performance of Fast-Fourier Transformations the program is - * testing for various values of N. - */ + + // Nvalues is a vector keeping the values by which iterates N, its value + // representing the input length. More concrete, N is the number of + // data points the caclulus is up to (determinating its accuracy). + // To show the performance of Fast-Fourier Transformations the program is + // testing for various values of N. int Nvalues[] = {64, 96, 128, 160, 192, 256, 384, 5 * 96, 512, 5 * 128, 3 * 256, 800, 1024, 2048, 2400, 4096, 8192, 9 * 1024, - 16384, 32768, 256 * 1024, 1024 * 1024, -1}; + 16384, 32768}; int i; LOG(INFO) << "Initializing sandbox...\n"; - pffftSapiSandbox sandbox; + PffftSapiSandbox sandbox; absl::Status init_status = sandbox.Init(); LOG(INFO) << "Initialization: " << init_status.ToString().c_str() << "\n"; @@ -112,13 +124,9 @@ int main(int argc, char* argv[]) { X[k] = 0; } - /* - * FFTPack benchmark - */ + // FFTPack benchmark { - /* - * SIMD_SZ == 4 (returning value of pffft_simd_size()) - */ + // SIMD_SZ == 4 (returning value of pffft_simd_size()) int max_iter_ = max_iter / 4; if (max_iter_ == 0) max_iter_ = 1; @@ -144,10 +152,8 @@ int main(int argc, char* argv[]) { (max_iter_ * 2) * ((cplx ? 5 : 2.5) * N * log((double)N) / M_LN2); ShowOutput("FFTPack", N, cplx, flops, t0, t1, max_iter_); } - - /* - * PFFFT benchmark - */ + + // PFFFT benchmark { sapi::StatusOr s = api.pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); @@ -155,27 +161,30 @@ int main(int argc, char* argv[]) { LOG(INFO) << "Setup status is: " << s.status().ToString().c_str() << "\n"; - if (s.ok()) { - sapi::v::RemotePtr s_reg(s.value()); - - t0 = UclockSec(); - for (iter = 0; iter < max_iter; ++iter) { - api.pffft_transform(&s_reg, X_.PtrBoth(), Z_.PtrBoth(), - Y_.PtrBoth(), PFFFT_FORWARD) - .IgnoreError(); - api.pffft_transform(&s_reg, X_.PtrBoth(), Z_.PtrBoth(), - Y_.PtrBoth(), PFFFT_FORWARD) - .IgnoreError(); - } - - t1 = UclockSec(); - api.pffft_destroy_setup(&s_reg).IgnoreError(); - - flops = - (max_iter * 2) * ((cplx ? 5 : 2.5) * N * log((double)N) / M_LN2); - ShowOutput("PFFFT", N, cplx, flops, t0, t1, max_iter); + if (!s.ok()) { + printf("Sandbox failed.\n"); + return 1; } + sapi::v::RemotePtr s_reg(s.value()); + + t0 = UclockSec(); + for (iter = 0; iter < max_iter; ++iter) { + api.pffft_transform(&s_reg, X_.PtrBoth(), Z_.PtrBoth(), + Y_.PtrBoth(), PFFFT_FORWARD) + .IgnoreError(); + api.pffft_transform(&s_reg, X_.PtrBoth(), Z_.PtrBoth(), + Y_.PtrBoth(), PFFFT_FORWARD) + .IgnoreError(); + } + + t1 = UclockSec(); + api.pffft_destroy_setup(&s_reg).IgnoreError(); + + flops = + (max_iter * 2) * ((cplx ? 5 : 2.5) * N * log((double)N) / M_LN2); + ShowOutput("PFFFT", N, cplx, flops, t0, t1, max_iter); + LOG(INFO) << "N = " << N << " SUCCESSFULLY\n\n"; } } diff --git a/oss-internship-2020/pffft/main_pffft.c b/oss-internship-2020/pffft/test_pffft.c similarity index 100% rename from oss-internship-2020/pffft/main_pffft.c rename to oss-internship-2020/pffft/test_pffft.c From b08726540a76c290e8c339e0f2b6bee65ddaf889 Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Wed, 26 Aug 2020 14:25:11 +0000 Subject: [PATCH 21/42] Added PFFFT submodule - master directory --- .gitmodules | 3 +++ oss-internship-2020/pffft/master | 1 + 2 files changed, 4 insertions(+) create mode 100644 .gitmodules create mode 160000 oss-internship-2020/pffft/master diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..793361c --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "oss-internship-2020/pffft/master"] + path = oss-internship-2020/pffft/master + url = https://bitbucket.org/jpommier/pffft/src/master/ diff --git a/oss-internship-2020/pffft/master b/oss-internship-2020/pffft/master new file mode 160000 index 0000000..74d7261 --- /dev/null +++ b/oss-internship-2020/pffft/master @@ -0,0 +1 @@ +Subproject commit 74d7261be17cf659d5930d4830609406bd7553e3 From c8d07aeaa59e5850d212644357af31a54f4787fb Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Wed, 26 Aug 2020 14:56:19 +0000 Subject: [PATCH 22/42] Changing paths for propertly usage of the submodule --- oss-internship-2020/pffft/CMakeLists.txt | 12 +- oss-internship-2020/pffft/README.md | 4 +- oss-internship-2020/pffft/README.txt | 416 --- oss-internship-2020/pffft/fftpack.c | 3112 ----------------- oss-internship-2020/pffft/fftpack.h | 799 ----- .../pffft/main_pffft_sandboxed.cc | 1 - oss-internship-2020/pffft/pffft.c | 1881 ---------- oss-internship-2020/pffft/pffft.h | 177 - oss-internship-2020/pffft/test_pffft.c | 419 --- 9 files changed, 8 insertions(+), 6813 deletions(-) delete mode 100644 oss-internship-2020/pffft/README.txt delete mode 100644 oss-internship-2020/pffft/fftpack.c delete mode 100644 oss-internship-2020/pffft/fftpack.h delete mode 100644 oss-internship-2020/pffft/pffft.c delete mode 100644 oss-internship-2020/pffft/pffft.h delete mode 100644 oss-internship-2020/pffft/test_pffft.c diff --git a/oss-internship-2020/pffft/CMakeLists.txt b/oss-internship-2020/pffft/CMakeLists.txt index 442bb74..c2a7bcd 100644 --- a/oss-internship-2020/pffft/CMakeLists.txt +++ b/oss-internship-2020/pffft/CMakeLists.txt @@ -20,14 +20,14 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED True) add_library(pffft STATIC - pffft.c - pffft.h - fftpack.c - fftpack.h + master/pffft.c + master/pffft.h + master/fftpack.c + master/fftpack.h ) add_executable(pffft_main - test_pffft.c + master/test_pffft.c ) target_link_libraries(pffft_main PRIVATE @@ -84,7 +84,7 @@ add_sapi_library(pffft_sapi sinti sint - INPUTS pffft.h fftpack.h + INPUTS master/pffft.h master/fftpack.h LIBRARY pffft LIBRARY_NAME pffft diff --git a/oss-internship-2020/pffft/README.md b/oss-internship-2020/pffft/README.md index 6cef583..2d2a9ca 100644 --- a/oss-internship-2020/pffft/README.md +++ b/oss-internship-2020/pffft/README.md @@ -4,9 +4,9 @@ Build System: CMake OS: Linux ### Check out the PFFFT library & CMake set up -`mkdir -p build && cd build` +`git submodule add https://bitbucket.org/jpommier/pffft.git` -`git clone https://bitbucket.org/jpommier/pffft.git` +`mkdir -p build && cd build` `cmake .. -G Ninja -DPFFFT_ROOT_DIR=$PWD` diff --git a/oss-internship-2020/pffft/README.txt b/oss-internship-2020/pffft/README.txt deleted file mode 100644 index ee20b42..0000000 --- a/oss-internship-2020/pffft/README.txt +++ /dev/null @@ -1,416 +0,0 @@ -PFFFT: a pretty fast FFT. - -TL;DR --- - -PFFFT does 1D Fast Fourier Transforms, of single precision real and -complex vectors. It tries do it fast, it tries to be correct, and it -tries to be small. Computations do take advantage of SSE1 instructions -on x86 cpus, Altivec on powerpc cpus, and NEON on ARM cpus. The -license is BSD-like. - - -Why does it exist: --- - -I was in search of a good performing FFT library , preferably very -small and with a very liberal license. - -When one says "fft library", FFTW ("Fastest Fourier Transform in the -West") is probably the first name that comes to mind -- I guess that -99% of open-source projects that need a FFT do use FFTW, and are happy -with it. However, it is quite a large library , which does everything -fft related (2d transforms, 3d transforms, other transformations such -as discrete cosine , or fast hartley). And it is licensed under the -GNU GPL , which means that it cannot be used in non open-source -products. - -An alternative to FFTW that is really small, is the venerable FFTPACK -v4, which is available on NETLIB. A more recent version (v5) exists, -but it is larger as it deals with multi-dimensional transforms. This -is a library that is written in FORTRAN 77, a language that is now -considered as a bit antiquated by many. FFTPACKv4 was written in 1985, -by Dr Paul Swarztrauber of NCAR, more than 25 years ago ! And despite -its age, benchmarks show it that it still a very good performing FFT -library, see for example the 1d single precision benchmarks here: -http://www.fftw.org/speed/opteron-2.2GHz-32bit/ . It is however not -competitive with the fastest ones, such as FFTW, Intel MKL, AMD ACML, -Apple vDSP. The reason for that is that those libraries do take -advantage of the SSE SIMD instructions available on Intel CPUs, -available since the days of the Pentium III. These instructions deal -with small vectors of 4 floats at a time, instead of a single float -for a traditionnal FPU, so when using these instructions one may expect -a 4-fold performance improvement. - -The idea was to take this fortran fftpack v4 code, translate to C, -modify it to deal with those SSE instructions, and check that the -final performance is not completely ridiculous when compared to other -SIMD FFT libraries. Translation to C was performed with f2c ( -http://www.netlib.org/f2c/ ). The resulting file was a bit edited in -order to remove the thousands of gotos that were introduced by -f2c. You will find the fftpack.h and fftpack.c sources in the -repository, this a complete translation of -http://www.netlib.org/fftpack/ , with the discrete cosine transform -and the test program. There is no license information in the netlib -repository, but it was confirmed to me by the fftpack v5 curators that -the same terms do apply to fftpack v4: -http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html . This is a -"BSD-like" license, it is compatible with proprietary projects. - -Adapting fftpack to deal with the SIMD 4-element vectors instead of -scalar single precision numbers was more complex than I originally -thought, especially with the real transforms, and I ended up writing -more code than I planned.. - - -The code: --- - -Only two files, in good old C, pffft.c and pffft.h . The API is very -very simple, just make sure that you read the comments in pffft.h. - - -Comparison with other FFTs: --- - -The idea was not to break speed records, but to get a decently fast -fft that is at least 50% as fast as the fastest FFT -- especially on -slowest computers . I'm more focused on getting the best performance -on slow cpus (Atom, Intel Core 1, old Athlons, ARM Cortex-A9...), than -on getting top performance on today fastest cpus. - -It can be used in a real-time context as the fft functions do not -perform any memory allocation -- that is why they accept a 'work' -array in their arguments. - -It is also a bit focused on performing 1D convolutions, that is why it -provides "unordered" FFTs , and a fourier domain convolution -operation. - - -Benchmark results (cpu tested: core i7 2600, core 2 quad, core 1 duo, atom N270, cortex-A9, cortex-A15, A8X) --- - -The benchmark shows the performance of various fft implementations measured in -MFlops, with the number of floating point operations being defined as 5Nlog2(N) -for a length N complex fft, and 2.5*Nlog2(N) for a real fft. -See http://www.fftw.org/speed/method.html for an explanation of these formulas. - -MacOS Lion, gcc 4.2, 64-bit, fftw 3.3 on a 3.4 GHz core i7 2600 - -Built with: - - gcc-4.2 -o test_pffft -arch x86_64 -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -DHAVE_VECLIB -framework veclib -DHAVE_FFTW -lfftw3f - -| input len |real FFTPack| real vDSP | real FFTW | real PFFFT | |cplx FFTPack| cplx vDSP | cplx FFTW | cplx PFFFT | -|-----------+------------+------------+------------+------------| |------------+------------+------------+------------| -| 64 | 2816 | 8596 | 7329 | 8187 | | 2887 | 14898 | 14668 | 11108 | -| 96 | 3298 | n/a | 8378 | 7727 | | 3953 | n/a | 15680 | 10878 | -| 128 | 3507 | 11575 | 9266 | 10108 | | 4233 | 17598 | 16427 | 12000 | -| 160 | 3391 | n/a | 9838 | 10711 | | 4220 | n/a | 16653 | 11187 | -| 192 | 3919 | n/a | 9868 | 10956 | | 4297 | n/a | 15770 | 12540 | -| 256 | 4283 | 13179 | 10694 | 13128 | | 4545 | 19550 | 16350 | 13822 | -| 384 | 3136 | n/a | 10810 | 12061 | | 3600 | n/a | 16103 | 13240 | -| 480 | 3477 | n/a | 10632 | 12074 | | 3536 | n/a | 11630 | 12522 | -| 512 | 3783 | 15141 | 11267 | 13838 | | 3649 | 20002 | 16560 | 13580 | -| 640 | 3639 | n/a | 11164 | 13946 | | 3695 | n/a | 15416 | 13890 | -| 768 | 3800 | n/a | 11245 | 13495 | | 3590 | n/a | 15802 | 14552 | -| 800 | 3440 | n/a | 10499 | 13301 | | 3659 | n/a | 12056 | 13268 | -| 1024 | 3924 | 15605 | 11450 | 15339 | | 3769 | 20963 | 13941 | 15467 | -| 2048 | 4518 | 16195 | 11551 | 15532 | | 4258 | 20413 | 13723 | 15042 | -| 2400 | 4294 | n/a | 10685 | 13078 | | 4093 | n/a | 12777 | 13119 | -| 4096 | 4750 | 16596 | 11672 | 15817 | | 4157 | 19662 | 14316 | 14336 | -| 8192 | 3820 | 16227 | 11084 | 12555 | | 3691 | 18132 | 12102 | 13813 | -| 9216 | 3864 | n/a | 10254 | 12870 | | 3586 | n/a | 12119 | 13994 | -| 16384 | 3822 | 15123 | 10454 | 12822 | | 3613 | 16874 | 12370 | 13881 | -| 32768 | 4175 | 14512 | 10662 | 11095 | | 3881 | 14702 | 11619 | 11524 | -| 262144 | 3317 | 11429 | 6269 | 9517 | | 2810 | 11729 | 7757 | 10179 | -| 1048576 | 2913 | 10551 | 4730 | 5867 | | 2661 | 7881 | 3520 | 5350 | -|-----------+------------+------------+------------+------------| |------------+------------+------------+------------| - - -Debian 6, gcc 4.4.5, 64-bit, fftw 3.3.1 on a 3.4 GHz core i7 2600 - -Built with: -gcc -o test_pffft -DHAVE_FFTW -msse2 -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L$HOME/local/lib -I$HOME/local/include/ -lfftw3f -lm - -| N (input length) | real FFTPack | real FFTW | real PFFFT | | cplx FFTPack | cplx FFTW | cplx PFFFT | -|------------------+--------------+--------------+--------------| |--------------+--------------+--------------| -| 64 | 3840 | 7680 | 8777 | | 4389 | 20480 | 11171 | -| 96 | 4214 | 9633 | 8429 | | 4816 | 22477 | 11238 | -| 128 | 3584 | 10240 | 10240 | | 5120 | 23893 | 11947 | -| 192 | 4854 | 11095 | 12945 | | 4854 | 22191 | 14121 | -| 256 | 4096 | 11703 | 16384 | | 5120 | 23406 | 13653 | -| 384 | 4395 | 14651 | 12558 | | 4884 | 19535 | 14651 | -| 512 | 5760 | 13166 | 15360 | | 4608 | 23040 | 15360 | -| 768 | 4907 | 14020 | 16357 | | 4461 | 19628 | 14020 | -| 1024 | 5120 | 14629 | 14629 | | 5120 | 20480 | 15754 | -| 2048 | 5632 | 14080 | 18773 | | 4693 | 12516 | 16091 | -| 4096 | 5120 | 13653 | 17554 | | 4726 | 7680 | 14456 | -| 8192 | 4160 | 7396 | 13312 | | 4437 | 14791 | 13312 | -| 9216 | 4210 | 6124 | 13473 | | 4491 | 7282 | 14970 | -| 16384 | 3976 | 11010 | 14313 | | 4210 | 11450 | 13631 | -| 32768 | 4260 | 10224 | 10954 | | 4260 | 6816 | 11797 | -| 262144 | 3736 | 6896 | 9961 | | 2359 | 8965 | 9437 | -| 1048576 | 2796 | 4534 | 6453 | | 1864 | 3078 | 5592 | -|------------------+--------------+--------------+--------------| |--------------+--------------+--------------| - - - -MacOS Snow Leopard, gcc 4.0, 32-bit, fftw 3.3 on a 1.83 GHz core 1 duo - -Built with: - - gcc -o test_pffft -DHAVE_FFTW -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -framework veclib - -| input len |real FFTPack| real vDSP | real FFTW | real PFFFT | |cplx FFTPack| cplx vDSP | cplx FFTW | cplx PFFFT | -|-----------+------------+------------+------------+------------| |------------+------------+------------+------------| -| 64 | 745 | 2145 | 1706 | 2028 | | 961 | 3356 | 3313 | 2300 | -| 96 | 877 | n/a | 1976 | 1978 | | 1059 | n/a | 3333 | 2233 | -| 128 | 951 | 2808 | 2213 | 2279 | | 1202 | 3803 | 3739 | 2494 | -| 192 | 1002 | n/a | 2456 | 2429 | | 1186 | n/a | 3701 | 2508 | -| 256 | 1065 | 3205 | 2641 | 2793 | | 1302 | 4013 | 3912 | 2663 | -| 384 | 845 | n/a | 2759 | 2499 | | 948 | n/a | 3729 | 2504 | -| 512 | 900 | 3476 | 2956 | 2759 | | 974 | 4057 | 3954 | 2645 | -| 768 | 910 | n/a | 2912 | 2737 | | 975 | n/a | 3837 | 2614 | -| 1024 | 936 | 3583 | 3107 | 3009 | | 1006 | 4124 | 3821 | 2697 | -| 2048 | 1057 | 3585 | 3091 | 2837 | | 1089 | 3889 | 3701 | 2513 | -| 4096 | 1083 | 3524 | 3092 | 2733 | | 1039 | 3617 | 3462 | 2364 | -| 8192 | 874 | 3252 | 2967 | 2363 | | 911 | 3106 | 2789 | 2302 | -| 9216 | 898 | n/a | 2420 | 2290 | | 865 | n/a | 2676 | 2204 | -| 16384 | 903 | 2892 | 2506 | 2421 | | 899 | 3026 | 2797 | 2289 | -| 32768 | 965 | 2837 | 2550 | 2358 | | 920 | 2922 | 2763 | 2240 | -| 262144 | 738 | 2422 | 1589 | 1708 | | 610 | 2038 | 1436 | 1091 | -| 1048576 | 528 | 1207 | 845 | 880 | | 606 | 1020 | 669 | 1036 | -|-----------+------------+------------+------------+------------| |------------+------------+------------+------------| - - - -Ubuntu 11.04, gcc 4.5, 32-bit, fftw 3.2 on a 2.66 core 2 quad - -Built with: -gcc -o test_pffft -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm - -| input len |real FFTPack| real FFTW | real PFFFT | |cplx FFTPack| cplx FFTW | cplx PFFFT | -|-----------+------------+------------+------------| |------------+------------+------------| -| 64 | 1920 | 3614 | 5120 | | 2194 | 7680 | 6467 | -| 96 | 1873 | 3549 | 5187 | | 2107 | 8429 | 5863 | -| 128 | 2240 | 3773 | 5514 | | 2560 | 7964 | 6827 | -| 192 | 1765 | 4569 | 7767 | | 2284 | 9137 | 7061 | -| 256 | 2048 | 5461 | 7447 | | 2731 | 9638 | 7802 | -| 384 | 1998 | 5861 | 6762 | | 2313 | 9253 | 7644 | -| 512 | 2095 | 6144 | 7680 | | 2194 | 10240 | 7089 | -| 768 | 2230 | 5773 | 7549 | | 2045 | 10331 | 7010 | -| 1024 | 2133 | 6400 | 8533 | | 2133 | 10779 | 7877 | -| 2048 | 2011 | 7040 | 8665 | | 1942 | 10240 | 7768 | -| 4096 | 2194 | 6827 | 8777 | | 1755 | 9452 | 6827 | -| 8192 | 1849 | 6656 | 6656 | | 1752 | 7831 | 6827 | -| 9216 | 1871 | 5858 | 6416 | | 1643 | 6909 | 6266 | -| 16384 | 1883 | 6223 | 6506 | | 1664 | 7340 | 6982 | -| 32768 | 1826 | 6390 | 6667 | | 1631 | 7481 | 6971 | -| 262144 | 1546 | 4075 | 5977 | | 1299 | 3415 | 3551 | -| 1048576 | 1104 | 2071 | 1730 | | 1104 | 1149 | 1834 | -|-----------+------------+------------+------------| |------------+------------+------------| - - - -Ubuntu 11.04, gcc 4.5, 32-bit, fftw 3.3 on a 1.6 GHz Atom N270 - -Built with: -gcc -o test_pffft -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm - -| N (input length) | real FFTPack | real FFTW | real PFFFT | | cplx FFTPack | cplx FFTW | cplx PFFFT | -|------------------+--------------+--------------+--------------| |--------------+--------------+--------------| -| 64 | 452 | 1041 | 1336 | | 549 | 2318 | 1781 | -| 96 | 444 | 1297 | 1297 | | 503 | 2408 | 1686 | -| 128 | 527 | 1525 | 1707 | | 543 | 2655 | 1886 | -| 192 | 498 | 1653 | 1849 | | 539 | 2678 | 1942 | -| 256 | 585 | 1862 | 2156 | | 594 | 2777 | 2244 | -| 384 | 499 | 1870 | 1998 | | 511 | 2586 | 1890 | -| 512 | 562 | 2095 | 2194 | | 542 | 2973 | 2194 | -| 768 | 545 | 2045 | 2133 | | 545 | 2365 | 2133 | -| 1024 | 595 | 2133 | 2438 | | 569 | 2695 | 2179 | -| 2048 | 587 | 2125 | 2347 | | 521 | 2230 | 1707 | -| 4096 | 495 | 1890 | 1834 | | 492 | 1876 | 1672 | -| 8192 | 469 | 1548 | 1729 | | 438 | 1740 | 1664 | -| 9216 | 468 | 1663 | 1663 | | 446 | 1585 | 1531 | -| 16384 | 453 | 1608 | 1767 | | 398 | 1476 | 1664 | -| 32768 | 456 | 1420 | 1503 | | 387 | 1388 | 1345 | -| 262144 | 309 | 385 | 726 | | 262 | 415 | 840 | -| 1048576 | 280 | 351 | 739 | | 261 | 313 | 797 | -|------------------+--------------+--------------+--------------| |--------------+--------------+--------------| - - - -Windows 7, visual c++ 2010 on a 1.6 GHz Atom N270 - -Built with: -cl /Ox -D_USE_MATH_DEFINES /arch:SSE test_pffft.c pffft.c fftpack.c - -(visual c++ is definitively not very good with SSE intrinsics...) - -| N (input length) | real FFTPack | real PFFFT | | cplx FFTPack | cplx PFFFT | -|------------------+--------------+--------------| |--------------+--------------| -| 64 | 173 | 1009 | | 174 | 1159 | -| 96 | 169 | 1029 | | 188 | 1201 | -| 128 | 195 | 1242 | | 191 | 1275 | -| 192 | 178 | 1312 | | 184 | 1276 | -| 256 | 196 | 1591 | | 186 | 1281 | -| 384 | 172 | 1409 | | 181 | 1281 | -| 512 | 187 | 1640 | | 181 | 1313 | -| 768 | 171 | 1614 | | 176 | 1258 | -| 1024 | 186 | 1812 | | 178 | 1223 | -| 2048 | 190 | 1707 | | 186 | 1099 | -| 4096 | 182 | 1446 | | 177 | 975 | -| 8192 | 175 | 1345 | | 169 | 1034 | -| 9216 | 165 | 1271 | | 168 | 1023 | -| 16384 | 166 | 1396 | | 165 | 949 | -| 32768 | 172 | 1311 | | 161 | 881 | -| 262144 | 136 | 632 | | 134 | 629 | -| 1048576 | 134 | 698 | | 127 | 623 | -|------------------+--------------+--------------| |--------------+--------------| - - - -Ubuntu 12.04, gcc-4.7.3, 32-bit, with fftw 3.3.3 (built with --enable-neon), on a 1.2GHz ARM Cortex A9 (Tegra 3) - -Built with: -gcc-4.7 -O3 -DHAVE_FFTW -march=armv7-a -mtune=cortex-a9 -mfloat-abi=hard -mfpu=neon -ffast-math test_pffft.c pffft.c -o test_pffft_arm fftpack.c -lm -I/usr/local/include/ -L/usr/local/lib/ -lfftw3f - -| input len |real FFTPack| real FFTW | real PFFFT | |cplx FFTPack| cplx FFTW | cplx PFFFT | -|-----------+------------+------------+------------| |------------+------------+------------| -| 64 | 549 | 452 | 731 | | 512 | 602 | 640 | -| 96 | 421 | 272 | 702 | | 496 | 571 | 602 | -| 128 | 498 | 512 | 815 | | 597 | 618 | 652 | -| 160 | 521 | 536 | 815 | | 586 | 669 | 625 | -| 192 | 539 | 571 | 883 | | 485 | 597 | 626 | -| 256 | 640 | 539 | 975 | | 569 | 611 | 671 | -| 384 | 499 | 610 | 879 | | 499 | 602 | 637 | -| 480 | 518 | 507 | 877 | | 496 | 661 | 616 | -| 512 | 524 | 591 | 1002 | | 549 | 678 | 668 | -| 640 | 542 | 612 | 955 | | 568 | 663 | 645 | -| 768 | 557 | 613 | 981 | | 491 | 663 | 598 | -| 800 | 514 | 353 | 882 | | 514 | 360 | 574 | -| 1024 | 640 | 640 | 1067 | | 492 | 683 | 602 | -| 2048 | 587 | 640 | 908 | | 486 | 640 | 552 | -| 2400 | 479 | 368 | 777 | | 422 | 376 | 518 | -| 4096 | 511 | 614 | 853 | | 426 | 640 | 534 | -| 8192 | 415 | 584 | 708 | | 386 | 622 | 516 | -| 9216 | 419 | 571 | 687 | | 364 | 586 | 506 | -| 16384 | 426 | 577 | 716 | | 398 | 606 | 530 | -| 32768 | 417 | 572 | 673 | | 399 | 572 | 468 | -| 262144 | 219 | 380 | 293 | | 255 | 431 | 343 | -| 1048576 | 202 | 274 | 237 | | 265 | 282 | 355 | -|-----------+------------+------------+------------| |------------+------------+------------| - -Same platform as above, but this time pffft and fftpack are built with clang 3.2: - -clang -O3 -DHAVE_FFTW -march=armv7-a -mtune=cortex-a9 -mfloat-abi=hard -mfpu=neon -ffast-math test_pffft.c pffft.c -o test_pffft_arm fftpack.c -lm -I/usr/local/include/ -L/usr/local/lib/ -lfftw3f - -| input len |real FFTPack| real FFTW | real PFFFT | |cplx FFTPack| cplx FFTW | cplx PFFFT | -|-----------+------------+------------+------------| |------------+------------+------------| -| 64 | 427 | 452 | 853 | | 427 | 602 | 1024 | -| 96 | 351 | 276 | 843 | | 337 | 571 | 963 | -| 128 | 373 | 512 | 996 | | 390 | 618 | 1054 | -| 160 | 426 | 536 | 987 | | 375 | 669 | 914 | -| 192 | 404 | 571 | 1079 | | 388 | 588 | 1079 | -| 256 | 465 | 539 | 1205 | | 445 | 602 | 1170 | -| 384 | 366 | 610 | 1099 | | 343 | 594 | 1099 | -| 480 | 356 | 507 | 1140 | | 335 | 651 | 931 | -| 512 | 411 | 591 | 1213 | | 384 | 649 | 1124 | -| 640 | 398 | 612 | 1193 | | 373 | 654 | 901 | -| 768 | 409 | 613 | 1227 | | 383 | 663 | 1044 | -| 800 | 411 | 348 | 1073 | | 353 | 358 | 809 | -| 1024 | 427 | 640 | 1280 | | 413 | 692 | 1004 | -| 2048 | 414 | 626 | 1126 | | 371 | 640 | 853 | -| 2400 | 399 | 373 | 898 | | 319 | 368 | 653 | -| 4096 | 404 | 602 | 1059 | | 357 | 633 | 778 | -| 8192 | 332 | 584 | 792 | | 308 | 616 | 716 | -| 9216 | 322 | 561 | 783 | | 299 | 586 | 687 | -| 16384 | 344 | 568 | 778 | | 314 | 617 | 745 | -| 32768 | 342 | 564 | 737 | | 314 | 552 | 629 | -| 262144 | 201 | 383 | 313 | | 227 | 435 | 413 | -| 1048576 | 187 | 262 | 251 | | 228 | 281 | 409 | -|-----------+------------+------------+------------| |------------+------------+------------| - -So it looks like, on ARM, gcc 4.7 is the best at scalar floating point -(the fftpack performance numbers are better with gcc), while clang is -the best with neon intrinsics (see how pffft perf has improved with -clang 3.2). - - -NVIDIA Jetson TK1 board, gcc-4.8.2. The cpu is a 2.3GHz cortex A15 (Tegra K1). - -Built with: -gcc -O3 -march=armv7-a -mtune=native -mfloat-abi=hard -mfpu=neon -ffast-math test_pffft.c pffft.c -o test_pffft_arm fftpack.c -lm - -| input len |real FFTPack| real PFFFT | |cplx FFTPack| cplx PFFFT | -|-----------+------------+------------| |------------+------------| -| 64 | 1735 | 3308 | | 1994 | 3744 | -| 96 | 1596 | 3448 | | 1987 | 3572 | -| 128 | 1807 | 4076 | | 2255 | 3960 | -| 160 | 1769 | 4083 | | 2071 | 3845 | -| 192 | 1990 | 4233 | | 2017 | 3939 | -| 256 | 2191 | 4882 | | 2254 | 4346 | -| 384 | 1878 | 4492 | | 2073 | 4012 | -| 480 | 1748 | 4398 | | 1923 | 3951 | -| 512 | 2030 | 5064 | | 2267 | 4195 | -| 640 | 1918 | 4756 | | 2094 | 4184 | -| 768 | 2099 | 4907 | | 2048 | 4297 | -| 800 | 1822 | 4555 | | 1880 | 4063 | -| 1024 | 2232 | 5355 | | 2187 | 4420 | -| 2048 | 2176 | 4983 | | 2027 | 3602 | -| 2400 | 1741 | 4256 | | 1710 | 3344 | -| 4096 | 1816 | 3914 | | 1851 | 3349 | -| 8192 | 1716 | 3481 | | 1700 | 3255 | -| 9216 | 1735 | 3589 | | 1653 | 3094 | -| 16384 | 1567 | 3483 | | 1637 | 3244 | -| 32768 | 1624 | 3240 | | 1655 | 3156 | -| 262144 | 1012 | 1898 | | 983 | 1503 | -| 1048576 | 876 | 1154 | | 868 | 1341 | -|-----------+------------+------------| |------------+------------| - -The performance on the tegra K1 is pretty impressive. I'm not -including the FFTW numbers as they as slightly below the scalar -fftpack numbers, so something must be wrong (however it seems to be -correctly configured and is using neon simd instructions). - -When using clang 3.4 the pffft version is even a bit faster, reaching -5.7 GFlops for real ffts of size 1024. - - -iPad Air 2 with iOS9, xcode 8.0, arm64. The cpu is an Apple A8X, supposedly running at 1.5GHz. - -| input len |real FFTPack| real vDSP | real PFFFT | |cplx FFTPack| cplx vDSP | cplx PFFFT | -|-----------+------------+------------+------------| |------------+------------+------------| -| 64 | 2517 | 7995 | 6086 | | 2725 | 13006 | 8495 | -| 96 | 2442 | n/a | 6691 | | 2256 | n/a | 7991 | -| 128 | 2664 | 10186 | 7877 | | 2575 | 15115 | 9115 | -| 160 | 2638 | n/a | 8283 | | 2682 | n/a | 8806 | -| 192 | 2903 | n/a | 9083 | | 2634 | n/a | 8980 | -| 256 | 3184 | 11452 | 10039 | | 3026 | 15410 | 10199 | -| 384 | 2665 | n/a | 10100 | | 2275 | n/a | 9247 | -| 480 | 2546 | n/a | 9863 | | 2341 | n/a | 8892 | -| 512 | 2832 | 12197 | 10989 | | 2547 | 16768 | 10154 | -| 640 | 2755 | n/a | 10461 | | 2569 | n/a | 9666 | -| 768 | 2998 | n/a | 11355 | | 2585 | n/a | 9813 | -| 800 | 2516 | n/a | 10332 | | 2433 | n/a | 9164 | -| 1024 | 3109 | 12965 | 12114 | | 2869 | 16448 | 10519 | -| 2048 | 3027 | 12996 | 12023 | | 2648 | 17304 | 10307 | -| 2400 | 2515 | n/a | 10372 | | 2355 | n/a | 8443 | -| 4096 | 3204 | 13603 | 12359 | | 2814 | 16570 | 9780 | -| 8192 | 2759 | 13422 | 10824 | | 2153 | 15652 | 7884 | -| 9216 | 2700 | n/a | 9938 | | 2241 | n/a | 7900 | -| 16384 | 2280 | 13057 | 7976 | | 593 | 4272 | 2534 | -| 32768 | 768 | 4269 | 2882 | | 606 | 4405 | 2604 | -| 262144 | 724 | 3527 | 2630 | | 534 | 2418 | 2157 | -| 1048576 | 674 | 1467 | 2135 | | 530 | 1621 | 2055 | -|-----------+------------+------------+------------| |------------+------------+------------| - -I double-checked to make sure I did not make a mistake in the time -measurements, as the numbers are much higher than what I initially -expected. They are in fact higher than the number I get on the 2.8GHz -Xeon of my 2008 mac pro.. (except for FFT lengths >= 32768 where -having a big cache is useful). A good surprise is also that the perf -is not too far from apple's vDSP (at least for the real FFT). - diff --git a/oss-internship-2020/pffft/fftpack.c b/oss-internship-2020/pffft/fftpack.c deleted file mode 100644 index b6375a8..0000000 --- a/oss-internship-2020/pffft/fftpack.c +++ /dev/null @@ -1,3112 +0,0 @@ -/* - compile with cc -DTESTING_FFTPACK fftpack.c in order to build the - test application. - - This is an f2c translation of the full fftpack sources as found on - http://www.netlib.org/fftpack/ The translated code has been - slightlty edited to remove the ugliest artefacts of the translation - (a hundred of wild GOTOs were wiped during that operation). - - The original fftpack file was written by Paul N. Swarztrauber - (Version 4, 1985), in fortran 77. - - FFTPACK license: - - http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html - - Copyright (c) 2004 the University Corporation for Atmospheric - Research ("UCAR"). All rights reserved. Developed by NCAR's - Computational and Information Systems Laboratory, UCAR, - www.cisl.ucar.edu. - - Redistribution and use of the Software in source and binary forms, - with or without modification, is permitted provided that the - following conditions are met: - - - Neither the names of NCAR's Computational and Information Systems - Laboratory, the University Corporation for Atmospheric Research, - nor the names of its sponsors or contributors may be used to - endorse or promote products derived from this Software without - specific prior written permission. - - - Redistributions of source code must retain the above copyright - notices, this list of conditions, and the disclaimer below. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the disclaimer below in the - documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT - HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE - SOFTWARE. - - ChangeLog: - 2011/10/02: this is my first release of this file. -*/ - -#include "fftpack.h" -#include - -typedef fftpack_real real; -typedef fftpack_int integer; - -typedef struct f77complex { - real r, i; -} f77complex; - -#ifdef TESTING_FFTPACK -static real c_abs(f77complex *c) { return sqrt(c->r*c->r + c->i*c->i); } -static double dmax(double a, double b) { return a < b ? b : a; } -#endif - -/* translated by f2c (version 20061008), and slightly edited */ - -static void passfb(integer *nac, integer ido, integer ip, integer l1, integer idl1, - real *cc, real *c1, real *c2, real *ch, real *ch2, const real *wa, real fsign) -{ - /* System generated locals */ - integer ch_offset, cc_offset, - c1_offset, c2_offset, ch2_offset; - - /* Local variables */ - integer i, j, k, l, jc, lc, ik, idj, idl, inc, idp; - real wai, war; - integer ipp2, idij, idlj, idot, ipph; - - -#define c1_ref(a_1,a_2,a_3) c1[((a_3)*l1 + (a_2))*ido + a_1] -#define c2_ref(a_1,a_2) c2[(a_2)*idl1 + a_1] -#define cc_ref(a_1,a_2,a_3) cc[((a_3)*ip + (a_2))*ido + a_1] -#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] -#define ch2_ref(a_1,a_2) ch2[(a_2)*idl1 + a_1] - - /* Parameter adjustments */ - ch_offset = 1 + ido * (1 + l1); - ch -= ch_offset; - c1_offset = 1 + ido * (1 + l1); - c1 -= c1_offset; - cc_offset = 1 + ido * (1 + ip); - cc -= cc_offset; - ch2_offset = 1 + idl1; - ch2 -= ch2_offset; - c2_offset = 1 + idl1; - c2 -= c2_offset; - --wa; - - /* Function Body */ - idot = ido / 2; - ipp2 = ip + 2; - ipph = (ip + 1) / 2; - idp = ip * ido; - - if (ido >= l1) { - for (j = 2; j <= ipph; ++j) { - jc = ipp2 - j; - for (k = 1; k <= l1; ++k) { - for (i = 1; i <= ido; ++i) { - ch_ref(i, k, j) = cc_ref(i, j, k) + cc_ref(i, jc, k); - ch_ref(i, k, jc) = cc_ref(i, j, k) - cc_ref(i, jc, k); - } - } - } - for (k = 1; k <= l1; ++k) { - for (i = 1; i <= ido; ++i) { - ch_ref(i, k, 1) = cc_ref(i, 1, k); - } - } - } else { - for (j = 2; j <= ipph; ++j) { - jc = ipp2 - j; - for (i = 1; i <= ido; ++i) { - for (k = 1; k <= l1; ++k) { - ch_ref(i, k, j) = cc_ref(i, j, k) + cc_ref(i, jc, k); - ch_ref(i, k, jc) = cc_ref(i, j, k) - cc_ref(i, jc, k); - } - } - } - for (i = 1; i <= ido; ++i) { - for (k = 1; k <= l1; ++k) { - ch_ref(i, k, 1) = cc_ref(i, 1, k); - } - } - } - idl = 2 - ido; - inc = 0; - for (l = 2; l <= ipph; ++l) { - lc = ipp2 - l; - idl += ido; - for (ik = 1; ik <= idl1; ++ik) { - c2_ref(ik, l) = ch2_ref(ik, 1) + wa[idl - 1] * ch2_ref(ik, 2); - c2_ref(ik, lc) = fsign*wa[idl] * ch2_ref(ik, ip); - } - idlj = idl; - inc += ido; - for (j = 3; j <= ipph; ++j) { - jc = ipp2 - j; - idlj += inc; - if (idlj > idp) { - idlj -= idp; - } - war = wa[idlj - 1]; - wai = wa[idlj]; - for (ik = 1; ik <= idl1; ++ik) { - c2_ref(ik, l) = c2_ref(ik, l) + war * ch2_ref(ik, j); - c2_ref(ik, lc) = c2_ref(ik, lc) + fsign*wai * ch2_ref(ik, jc); - } - } - } - for (j = 2; j <= ipph; ++j) { - for (ik = 1; ik <= idl1; ++ik) { - ch2_ref(ik, 1) = ch2_ref(ik, 1) + ch2_ref(ik, j); - } - } - for (j = 2; j <= ipph; ++j) { - jc = ipp2 - j; - for (ik = 2; ik <= idl1; ik += 2) { - ch2_ref(ik - 1, j) = c2_ref(ik - 1, j) - c2_ref(ik, jc); - ch2_ref(ik - 1, jc) = c2_ref(ik - 1, j) + c2_ref(ik, jc); - ch2_ref(ik, j) = c2_ref(ik, j) + c2_ref(ik - 1, jc); - ch2_ref(ik, jc) = c2_ref(ik, j) - c2_ref(ik - 1, jc); - } - } - *nac = 1; - if (ido == 2) { - return; - } - *nac = 0; - for (ik = 1; ik <= idl1; ++ik) { - c2_ref(ik, 1) = ch2_ref(ik, 1); - } - for (j = 2; j <= ip; ++j) { - for (k = 1; k <= l1; ++k) { - c1_ref(1, k, j) = ch_ref(1, k, j); - c1_ref(2, k, j) = ch_ref(2, k, j); - } - } - if (idot <= l1) { - idij = 0; - for (j = 2; j <= ip; ++j) { - idij += 2; - for (i = 4; i <= ido; i += 2) { - idij += 2; - for (k = 1; k <= l1; ++k) { - c1_ref(i - 1, k, j) = wa[idij - 1] * ch_ref(i - 1, k, j) - fsign*wa[idij] * ch_ref(i, k, j); - c1_ref(i, k, j) = wa[idij - 1] * ch_ref(i, k, j) + fsign*wa[idij] * ch_ref(i - 1, k, j); - } - } - } - return; - } - idj = 2 - ido; - for (j = 2; j <= ip; ++j) { - idj += ido; - for (k = 1; k <= l1; ++k) { - idij = idj; - for (i = 4; i <= ido; i += 2) { - idij += 2; - c1_ref(i - 1, k, j) = wa[idij - 1] * ch_ref(i - 1, k, j) - fsign*wa[idij] * ch_ref(i, k, j); - c1_ref(i, k, j) = wa[idij - 1] * ch_ref(i, k, j) + fsign*wa[idij] * ch_ref(i - 1, k, j); - } - } - } -} /* passb */ - -#undef ch2_ref -#undef ch_ref -#undef cc_ref -#undef c2_ref -#undef c1_ref - - -static void passb2(integer ido, integer l1, const real *cc, real *ch, const real *wa1) -{ - /* System generated locals */ - integer cc_offset, ch_offset; - - /* Local variables */ - integer i, k; - real ti2, tr2; - - -#define cc_ref(a_1,a_2,a_3) cc[((a_3)*2 + (a_2))*ido + a_1] -#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] - - /* Parameter adjustments */ - ch_offset = 1 + ido * (1 + l1); - ch -= ch_offset; - cc_offset = 1 + ido * 3; - cc -= cc_offset; - --wa1; - - /* Function Body */ - if (ido <= 2) { - for (k = 1; k <= l1; ++k) { - ch_ref(1, k, 1) = cc_ref(1, 1, k) + cc_ref(1, 2, k); - ch_ref(1, k, 2) = cc_ref(1, 1, k) - cc_ref(1, 2, k); - ch_ref(2, k, 1) = cc_ref(2, 1, k) + cc_ref(2, 2, k); - ch_ref(2, k, 2) = cc_ref(2, 1, k) - cc_ref(2, 2, k); - } - return; - } - for (k = 1; k <= l1; ++k) { - for (i = 2; i <= ido; i += 2) { - ch_ref(i - 1, k, 1) = cc_ref(i - 1, 1, k) + cc_ref(i - 1, 2, k); - tr2 = cc_ref(i - 1, 1, k) - cc_ref(i - 1, 2, k); - ch_ref(i, k, 1) = cc_ref(i, 1, k) + cc_ref(i, 2, k); - ti2 = cc_ref(i, 1, k) - cc_ref(i, 2, k); - ch_ref(i, k, 2) = wa1[i - 1] * ti2 + wa1[i] * tr2; - ch_ref(i - 1, k, 2) = wa1[i - 1] * tr2 - wa1[i] * ti2; - } - } -} /* passb2 */ - -#undef ch_ref -#undef cc_ref - - -static void passb3(integer ido, integer l1, const real *cc, real *ch, const real *wa1, const real *wa2) -{ - static const real taur = -.5f; - static const real taui = .866025403784439f; - - /* System generated locals */ - integer cc_offset, ch_offset; - - /* Local variables */ - integer i, k; - real ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2; - - -#define cc_ref(a_1,a_2,a_3) cc[((a_3)*3 + (a_2))*ido + a_1] -#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] - - /* Parameter adjustments */ - ch_offset = 1 + ido * (1 + l1); - ch -= ch_offset; - cc_offset = 1 + (ido << 2); - cc -= cc_offset; - --wa1; - --wa2; - - /* Function Body */ - if (ido == 2) { - for (k = 1; k <= l1; ++k) { - tr2 = cc_ref(1, 2, k) + cc_ref(1, 3, k); - cr2 = cc_ref(1, 1, k) + taur * tr2; - ch_ref(1, k, 1) = cc_ref(1, 1, k) + tr2; - ti2 = cc_ref(2, 2, k) + cc_ref(2, 3, k); - ci2 = cc_ref(2, 1, k) + taur * ti2; - ch_ref(2, k, 1) = cc_ref(2, 1, k) + ti2; - cr3 = taui * (cc_ref(1, 2, k) - cc_ref(1, 3, k)); - ci3 = taui * (cc_ref(2, 2, k) - cc_ref(2, 3, k)); - ch_ref(1, k, 2) = cr2 - ci3; - ch_ref(1, k, 3) = cr2 + ci3; - ch_ref(2, k, 2) = ci2 + cr3; - ch_ref(2, k, 3) = ci2 - cr3; - } - } else { - for (k = 1; k <= l1; ++k) { - for (i = 2; i <= ido; i += 2) { - tr2 = cc_ref(i - 1, 2, k) + cc_ref(i - 1, 3, k); - cr2 = cc_ref(i - 1, 1, k) + taur * tr2; - ch_ref(i - 1, k, 1) = cc_ref(i - 1, 1, k) + tr2; - ti2 = cc_ref(i, 2, k) + cc_ref(i, 3, k); - ci2 = cc_ref(i, 1, k) + taur * ti2; - ch_ref(i, k, 1) = cc_ref(i, 1, k) + ti2; - cr3 = taui * (cc_ref(i - 1, 2, k) - cc_ref(i - 1, 3, k)); - ci3 = taui * (cc_ref(i, 2, k) - cc_ref(i, 3, k)); - dr2 = cr2 - ci3; - dr3 = cr2 + ci3; - di2 = ci2 + cr3; - di3 = ci2 - cr3; - ch_ref(i, k, 2) = wa1[i - 1] * di2 + wa1[i] * dr2; - ch_ref(i - 1, k, 2) = wa1[i - 1] * dr2 - wa1[i] * di2; - ch_ref(i, k, 3) = wa2[i - 1] * di3 + wa2[i] * dr3; - ch_ref(i - 1, k, 3) = wa2[i - 1] * dr3 - wa2[i] * di3; - } - } - } -} /* passb3 */ - -#undef ch_ref -#undef cc_ref - - -static void passb4(integer ido, integer l1, const real *cc, real *ch, - const real *wa1, const real *wa2, const real *wa3) -{ - /* System generated locals */ - integer cc_offset, ch_offset; - - /* Local variables */ - integer i, k; - real ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4; - - -#define cc_ref(a_1,a_2,a_3) cc[((a_3)*4 + (a_2))*ido + a_1] -#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] - - /* Parameter adjustments */ - ch_offset = 1 + ido * (1 + l1); - ch -= ch_offset; - cc_offset = 1 + ido * 5; - cc -= cc_offset; - --wa1; - --wa2; - --wa3; - - /* Function Body */ - if (ido == 2) { - for (k = 1; k <= l1; ++k) { - ti1 = cc_ref(2, 1, k) - cc_ref(2, 3, k); - ti2 = cc_ref(2, 1, k) + cc_ref(2, 3, k); - tr4 = cc_ref(2, 4, k) - cc_ref(2, 2, k); - ti3 = cc_ref(2, 2, k) + cc_ref(2, 4, k); - tr1 = cc_ref(1, 1, k) - cc_ref(1, 3, k); - tr2 = cc_ref(1, 1, k) + cc_ref(1, 3, k); - ti4 = cc_ref(1, 2, k) - cc_ref(1, 4, k); - tr3 = cc_ref(1, 2, k) + cc_ref(1, 4, k); - ch_ref(1, k, 1) = tr2 + tr3; - ch_ref(1, k, 3) = tr2 - tr3; - ch_ref(2, k, 1) = ti2 + ti3; - ch_ref(2, k, 3) = ti2 - ti3; - ch_ref(1, k, 2) = tr1 + tr4; - ch_ref(1, k, 4) = tr1 - tr4; - ch_ref(2, k, 2) = ti1 + ti4; - ch_ref(2, k, 4) = ti1 - ti4; - } - } else { - for (k = 1; k <= l1; ++k) { - for (i = 2; i <= ido; i += 2) { - ti1 = cc_ref(i, 1, k) - cc_ref(i, 3, k); - ti2 = cc_ref(i, 1, k) + cc_ref(i, 3, k); - ti3 = cc_ref(i, 2, k) + cc_ref(i, 4, k); - tr4 = cc_ref(i, 4, k) - cc_ref(i, 2, k); - tr1 = cc_ref(i - 1, 1, k) - cc_ref(i - 1, 3, k); - tr2 = cc_ref(i - 1, 1, k) + cc_ref(i - 1, 3, k); - ti4 = cc_ref(i - 1, 2, k) - cc_ref(i - 1, 4, k); - tr3 = cc_ref(i - 1, 2, k) + cc_ref(i - 1, 4, k); - ch_ref(i - 1, k, 1) = tr2 + tr3; - cr3 = tr2 - tr3; - ch_ref(i, k, 1) = ti2 + ti3; - ci3 = ti2 - ti3; - cr2 = tr1 + tr4; - cr4 = tr1 - tr4; - ci2 = ti1 + ti4; - ci4 = ti1 - ti4; - ch_ref(i - 1, k, 2) = wa1[i - 1] * cr2 - wa1[i] * ci2; - ch_ref(i, k, 2) = wa1[i - 1] * ci2 + wa1[i] * cr2; - ch_ref(i - 1, k, 3) = wa2[i - 1] * cr3 - wa2[i] * ci3; - ch_ref(i, k, 3) = wa2[i - 1] * ci3 + wa2[i] * cr3; - ch_ref(i - 1, k, 4) = wa3[i - 1] * cr4 - wa3[i] * ci4; - ch_ref(i, k, 4) = wa3[i - 1] * ci4 + wa3[i] * cr4; - } - } - } -} /* passb4 */ - -#undef ch_ref -#undef cc_ref - -/* passf5 and passb5 merged */ -static void passfb5(integer ido, integer l1, const real *cc, real *ch, - const real *wa1, const real *wa2, const real *wa3, const real *wa4, real fsign) -{ - const real tr11 = .309016994374947f; - const real ti11 = .951056516295154f*fsign; - const real tr12 = -.809016994374947f; - const real ti12 = .587785252292473f*fsign; - - /* System generated locals */ - integer cc_offset, ch_offset; - - /* Local variables */ - integer i, k; - real ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4, ti2, ti3, - ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5; - - -#define cc_ref(a_1,a_2,a_3) cc[((a_3)*5 + (a_2))*ido + a_1] -#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] - - /* Parameter adjustments */ - ch_offset = 1 + ido * (1 + l1); - ch -= ch_offset; - cc_offset = 1 + ido * 6; - cc -= cc_offset; - --wa1; - --wa2; - --wa3; - --wa4; - - /* Function Body */ - if (ido == 2) { - for (k = 1; k <= l1; ++k) { - ti5 = cc_ref(2, 2, k) - cc_ref(2, 5, k); - ti2 = cc_ref(2, 2, k) + cc_ref(2, 5, k); - ti4 = cc_ref(2, 3, k) - cc_ref(2, 4, k); - ti3 = cc_ref(2, 3, k) + cc_ref(2, 4, k); - tr5 = cc_ref(1, 2, k) - cc_ref(1, 5, k); - tr2 = cc_ref(1, 2, k) + cc_ref(1, 5, k); - tr4 = cc_ref(1, 3, k) - cc_ref(1, 4, k); - tr3 = cc_ref(1, 3, k) + cc_ref(1, 4, k); - ch_ref(1, k, 1) = cc_ref(1, 1, k) + tr2 + tr3; - ch_ref(2, k, 1) = cc_ref(2, 1, k) + ti2 + ti3; - cr2 = cc_ref(1, 1, k) + tr11 * tr2 + tr12 * tr3; - ci2 = cc_ref(2, 1, k) + tr11 * ti2 + tr12 * ti3; - cr3 = cc_ref(1, 1, k) + tr12 * tr2 + tr11 * tr3; - ci3 = cc_ref(2, 1, k) + tr12 * ti2 + tr11 * ti3; - cr5 = ti11 * tr5 + ti12 * tr4; - ci5 = ti11 * ti5 + ti12 * ti4; - cr4 = ti12 * tr5 - ti11 * tr4; - ci4 = ti12 * ti5 - ti11 * ti4; - ch_ref(1, k, 2) = cr2 - ci5; - ch_ref(1, k, 5) = cr2 + ci5; - ch_ref(2, k, 2) = ci2 + cr5; - ch_ref(2, k, 3) = ci3 + cr4; - ch_ref(1, k, 3) = cr3 - ci4; - ch_ref(1, k, 4) = cr3 + ci4; - ch_ref(2, k, 4) = ci3 - cr4; - ch_ref(2, k, 5) = ci2 - cr5; - } - } else { - for (k = 1; k <= l1; ++k) { - for (i = 2; i <= ido; i += 2) { - ti5 = cc_ref(i, 2, k) - cc_ref(i, 5, k); - ti2 = cc_ref(i, 2, k) + cc_ref(i, 5, k); - ti4 = cc_ref(i, 3, k) - cc_ref(i, 4, k); - ti3 = cc_ref(i, 3, k) + cc_ref(i, 4, k); - tr5 = cc_ref(i - 1, 2, k) - cc_ref(i - 1, 5, k); - tr2 = cc_ref(i - 1, 2, k) + cc_ref(i - 1, 5, k); - tr4 = cc_ref(i - 1, 3, k) - cc_ref(i - 1, 4, k); - tr3 = cc_ref(i - 1, 3, k) + cc_ref(i - 1, 4, k); - ch_ref(i - 1, k, 1) = cc_ref(i - 1, 1, k) + tr2 + tr3; - ch_ref(i, k, 1) = cc_ref(i, 1, k) + ti2 + ti3; - cr2 = cc_ref(i - 1, 1, k) + tr11 * tr2 + tr12 * tr3; - ci2 = cc_ref(i, 1, k) + tr11 * ti2 + tr12 * ti3; - cr3 = cc_ref(i - 1, 1, k) + tr12 * tr2 + tr11 * tr3; - ci3 = cc_ref(i, 1, k) + tr12 * ti2 + tr11 * ti3; - cr5 = ti11 * tr5 + ti12 * tr4; - ci5 = ti11 * ti5 + ti12 * ti4; - cr4 = ti12 * tr5 - ti11 * tr4; - ci4 = ti12 * ti5 - ti11 * ti4; - dr3 = cr3 - ci4; - dr4 = cr3 + ci4; - di3 = ci3 + cr4; - di4 = ci3 - cr4; - dr5 = cr2 + ci5; - dr2 = cr2 - ci5; - di5 = ci2 - cr5; - di2 = ci2 + cr5; - ch_ref(i - 1, k, 2) = wa1[i - 1] * dr2 - fsign*wa1[i] * di2; - ch_ref(i, k, 2) = wa1[i - 1] * di2 + fsign*wa1[i] * dr2; - ch_ref(i - 1, k, 3) = wa2[i - 1] * dr3 - fsign*wa2[i] * di3; - ch_ref(i, k, 3) = wa2[i - 1] * di3 + fsign*wa2[i] * dr3; - ch_ref(i - 1, k, 4) = wa3[i - 1] * dr4 - fsign*wa3[i] * di4; - ch_ref(i, k, 4) = wa3[i - 1] * di4 + fsign*wa3[i] * dr4; - ch_ref(i - 1, k, 5) = wa4[i - 1] * dr5 - fsign*wa4[i] * di5; - ch_ref(i, k, 5) = wa4[i - 1] * di5 + fsign*wa4[i] * dr5; - } - } - } -} /* passb5 */ - -#undef ch_ref -#undef cc_ref - -static void passf2(integer ido, integer l1, const real *cc, real *ch, const real *wa1) -{ - /* System generated locals */ - integer cc_offset, ch_offset; - - /* Local variables */ - integer i, k; - real ti2, tr2; - - -#define cc_ref(a_1,a_2,a_3) cc[((a_3)*2 + (a_2))*ido + a_1] -#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] - - /* Parameter adjustments */ - ch_offset = 1 + ido * (1 + l1); - ch -= ch_offset; - cc_offset = 1 + ido * 3; - cc -= cc_offset; - --wa1; - - /* Function Body */ - if (ido == 2) { - for (k = 1; k <= l1; ++k) { - ch_ref(1, k, 1) = cc_ref(1, 1, k) + cc_ref(1, 2, k); - ch_ref(1, k, 2) = cc_ref(1, 1, k) - cc_ref(1, 2, k); - ch_ref(2, k, 1) = cc_ref(2, 1, k) + cc_ref(2, 2, k); - ch_ref(2, k, 2) = cc_ref(2, 1, k) - cc_ref(2, 2, k); - } - } else { - for (k = 1; k <= l1; ++k) { - for (i = 2; i <= ido; i += 2) { - ch_ref(i - 1, k, 1) = cc_ref(i - 1, 1, k) + cc_ref(i - 1, 2, - k); - tr2 = cc_ref(i - 1, 1, k) - cc_ref(i - 1, 2, k); - ch_ref(i, k, 1) = cc_ref(i, 1, k) + cc_ref(i, 2, k); - ti2 = cc_ref(i, 1, k) - cc_ref(i, 2, k); - ch_ref(i, k, 2) = wa1[i - 1] * ti2 - wa1[i] * tr2; - ch_ref(i - 1, k, 2) = wa1[i - 1] * tr2 + wa1[i] * ti2; - } - } - } -} /* passf2 */ - -#undef ch_ref -#undef cc_ref - - -static void passf3(integer ido, integer l1, const real *cc, real *ch, - const real *wa1, const real *wa2) -{ - static const real taur = -.5f; - static const real taui = -.866025403784439f; - - /* System generated locals */ - integer cc_offset, ch_offset; - - /* Local variables */ - integer i, k; - real ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2; - - -#define cc_ref(a_1,a_2,a_3) cc[((a_3)*3 + (a_2))*ido + a_1] -#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] - - /* Parameter adjustments */ - ch_offset = 1 + ido * (1 + l1); - ch -= ch_offset; - cc_offset = 1 + (ido << 2); - cc -= cc_offset; - --wa1; - --wa2; - - /* Function Body */ - if (ido == 2) { - for (k = 1; k <= l1; ++k) { - tr2 = cc_ref(1, 2, k) + cc_ref(1, 3, k); - cr2 = cc_ref(1, 1, k) + taur * tr2; - ch_ref(1, k, 1) = cc_ref(1, 1, k) + tr2; - ti2 = cc_ref(2, 2, k) + cc_ref(2, 3, k); - ci2 = cc_ref(2, 1, k) + taur * ti2; - ch_ref(2, k, 1) = cc_ref(2, 1, k) + ti2; - cr3 = taui * (cc_ref(1, 2, k) - cc_ref(1, 3, k)); - ci3 = taui * (cc_ref(2, 2, k) - cc_ref(2, 3, k)); - ch_ref(1, k, 2) = cr2 - ci3; - ch_ref(1, k, 3) = cr2 + ci3; - ch_ref(2, k, 2) = ci2 + cr3; - ch_ref(2, k, 3) = ci2 - cr3; - } - } else { - for (k = 1; k <= l1; ++k) { - for (i = 2; i <= ido; i += 2) { - tr2 = cc_ref(i - 1, 2, k) + cc_ref(i - 1, 3, k); - cr2 = cc_ref(i - 1, 1, k) + taur * tr2; - ch_ref(i - 1, k, 1) = cc_ref(i - 1, 1, k) + tr2; - ti2 = cc_ref(i, 2, k) + cc_ref(i, 3, k); - ci2 = cc_ref(i, 1, k) + taur * ti2; - ch_ref(i, k, 1) = cc_ref(i, 1, k) + ti2; - cr3 = taui * (cc_ref(i - 1, 2, k) - cc_ref(i - 1, 3, k)); - ci3 = taui * (cc_ref(i, 2, k) - cc_ref(i, 3, k)); - dr2 = cr2 - ci3; - dr3 = cr2 + ci3; - di2 = ci2 + cr3; - di3 = ci2 - cr3; - ch_ref(i, k, 2) = wa1[i - 1] * di2 - wa1[i] * dr2; - ch_ref(i - 1, k, 2) = wa1[i - 1] * dr2 + wa1[i] * di2; - ch_ref(i, k, 3) = wa2[i - 1] * di3 - wa2[i] * dr3; - ch_ref(i - 1, k, 3) = wa2[i - 1] * dr3 + wa2[i] * di3; - } - } - } -} /* passf3 */ - -#undef ch_ref -#undef cc_ref - - -static void passf4(integer ido, integer l1, const real *cc, real *ch, - const real *wa1, const real *wa2, const real *wa3) -{ - /* System generated locals */ - integer cc_offset, ch_offset; - - /* Local variables */ - integer i, k; - real ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4; - - -#define cc_ref(a_1,a_2,a_3) cc[((a_3)*4 + (a_2))*ido + a_1] -#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] - - /* Parameter adjustments */ - ch_offset = 1 + ido * (1 + l1); - ch -= ch_offset; - cc_offset = 1 + ido * 5; - cc -= cc_offset; - --wa1; - --wa2; - --wa3; - - /* Function Body */ - if (ido == 2) { - for (k = 1; k <= l1; ++k) { - ti1 = cc_ref(2, 1, k) - cc_ref(2, 3, k); - ti2 = cc_ref(2, 1, k) + cc_ref(2, 3, k); - tr4 = cc_ref(2, 2, k) - cc_ref(2, 4, k); - ti3 = cc_ref(2, 2, k) + cc_ref(2, 4, k); - tr1 = cc_ref(1, 1, k) - cc_ref(1, 3, k); - tr2 = cc_ref(1, 1, k) + cc_ref(1, 3, k); - ti4 = cc_ref(1, 4, k) - cc_ref(1, 2, k); - tr3 = cc_ref(1, 2, k) + cc_ref(1, 4, k); - ch_ref(1, k, 1) = tr2 + tr3; - ch_ref(1, k, 3) = tr2 - tr3; - ch_ref(2, k, 1) = ti2 + ti3; - ch_ref(2, k, 3) = ti2 - ti3; - ch_ref(1, k, 2) = tr1 + tr4; - ch_ref(1, k, 4) = tr1 - tr4; - ch_ref(2, k, 2) = ti1 + ti4; - ch_ref(2, k, 4) = ti1 - ti4; - } - } else { - for (k = 1; k <= l1; ++k) { - for (i = 2; i <= ido; i += 2) { - ti1 = cc_ref(i, 1, k) - cc_ref(i, 3, k); - ti2 = cc_ref(i, 1, k) + cc_ref(i, 3, k); - ti3 = cc_ref(i, 2, k) + cc_ref(i, 4, k); - tr4 = cc_ref(i, 2, k) - cc_ref(i, 4, k); - tr1 = cc_ref(i - 1, 1, k) - cc_ref(i - 1, 3, k); - tr2 = cc_ref(i - 1, 1, k) + cc_ref(i - 1, 3, k); - ti4 = cc_ref(i - 1, 4, k) - cc_ref(i - 1, 2, k); - tr3 = cc_ref(i - 1, 2, k) + cc_ref(i - 1, 4, k); - ch_ref(i - 1, k, 1) = tr2 + tr3; - cr3 = tr2 - tr3; - ch_ref(i, k, 1) = ti2 + ti3; - ci3 = ti2 - ti3; - cr2 = tr1 + tr4; - cr4 = tr1 - tr4; - ci2 = ti1 + ti4; - ci4 = ti1 - ti4; - ch_ref(i - 1, k, 2) = wa1[i - 1] * cr2 + wa1[i] * ci2; - ch_ref(i, k, 2) = wa1[i - 1] * ci2 - wa1[i] * cr2; - ch_ref(i - 1, k, 3) = wa2[i - 1] * cr3 + wa2[i] * ci3; - ch_ref(i, k, 3) = wa2[i - 1] * ci3 - wa2[i] * cr3; - ch_ref(i - 1, k, 4) = wa3[i - 1] * cr4 + wa3[i] * ci4; - ch_ref(i, k, 4) = wa3[i - 1] * ci4 - wa3[i] * cr4; - } - } - } -} /* passf4 */ - -#undef ch_ref -#undef cc_ref - -static void radb2(integer ido, integer l1, const real *cc, real *ch, const real *wa1) -{ - /* System generated locals */ - integer cc_offset, ch_offset; - - /* Local variables */ - integer i, k, ic; - real ti2, tr2; - integer idp2; - - -#define cc_ref(a_1,a_2,a_3) cc[((a_3)*2 + (a_2))*ido + a_1] -#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] - - /* Parameter adjustments */ - ch_offset = 1 + ido * (1 + l1); - ch -= ch_offset; - cc_offset = 1 + ido * 3; - cc -= cc_offset; - --wa1; - - /* Function Body */ - for (k = 1; k <= l1; ++k) { - ch_ref(1, k, 1) = cc_ref(1, 1, k) + cc_ref(ido, 2, k); - ch_ref(1, k, 2) = cc_ref(1, 1, k) - cc_ref(ido, 2, k); - } - if (ido < 2) return; - else if (ido != 2) { - idp2 = ido + 2; - for (k = 1; k <= l1; ++k) { - for (i = 3; i <= ido; i += 2) { - ic = idp2 - i; - ch_ref(i - 1, k, 1) = cc_ref(i - 1, 1, k) + cc_ref(ic - 1, 2, - k); - tr2 = cc_ref(i - 1, 1, k) - cc_ref(ic - 1, 2, k); - ch_ref(i, k, 1) = cc_ref(i, 1, k) - cc_ref(ic, 2, k); - ti2 = cc_ref(i, 1, k) + cc_ref(ic, 2, k); - ch_ref(i - 1, k, 2) = wa1[i - 2] * tr2 - wa1[i - 1] * ti2; - ch_ref(i, k, 2) = wa1[i - 2] * ti2 + wa1[i - 1] * tr2; - } - } - if (ido % 2 == 1) return; - } - for (k = 1; k <= l1; ++k) { - ch_ref(ido, k, 1) = cc_ref(ido, 1, k) + cc_ref(ido, 1, k); - ch_ref(ido, k, 2) = -(cc_ref(1, 2, k) + cc_ref(1, 2, k)); - } -} /* radb2 */ - -#undef ch_ref -#undef cc_ref - - -static void radb3(integer ido, integer l1, const real *cc, real *ch, - const real *wa1, const real *wa2) -{ - /* Initialized data */ - - static const real taur = -.5f; - static const real taui = .866025403784439f; - - /* System generated locals */ - integer cc_offset, ch_offset; - - /* Local variables */ - integer i, k, ic; - real ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2; - integer idp2; - - -#define cc_ref(a_1,a_2,a_3) cc[((a_3)*3 + (a_2))*ido + a_1] -#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] - - /* Parameter adjustments */ - ch_offset = 1 + ido * (1 + l1); - ch -= ch_offset; - cc_offset = 1 + (ido << 2); - cc -= cc_offset; - --wa1; - --wa2; - - /* Function Body */ - for (k = 1; k <= l1; ++k) { - tr2 = cc_ref(ido, 2, k) + cc_ref(ido, 2, k); - cr2 = cc_ref(1, 1, k) + taur * tr2; - ch_ref(1, k, 1) = cc_ref(1, 1, k) + tr2; - ci3 = taui * (cc_ref(1, 3, k) + cc_ref(1, 3, k)); - ch_ref(1, k, 2) = cr2 - ci3; - ch_ref(1, k, 3) = cr2 + ci3; - } - if (ido == 1) { - return; - } - idp2 = ido + 2; - for (k = 1; k <= l1; ++k) { - for (i = 3; i <= ido; i += 2) { - ic = idp2 - i; - tr2 = cc_ref(i - 1, 3, k) + cc_ref(ic - 1, 2, k); - cr2 = cc_ref(i - 1, 1, k) + taur * tr2; - ch_ref(i - 1, k, 1) = cc_ref(i - 1, 1, k) + tr2; - ti2 = cc_ref(i, 3, k) - cc_ref(ic, 2, k); - ci2 = cc_ref(i, 1, k) + taur * ti2; - ch_ref(i, k, 1) = cc_ref(i, 1, k) + ti2; - cr3 = taui * (cc_ref(i - 1, 3, k) - cc_ref(ic - 1, 2, k)); - ci3 = taui * (cc_ref(i, 3, k) + cc_ref(ic, 2, k)); - dr2 = cr2 - ci3; - dr3 = cr2 + ci3; - di2 = ci2 + cr3; - di3 = ci2 - cr3; - ch_ref(i - 1, k, 2) = wa1[i - 2] * dr2 - wa1[i - 1] * di2; - ch_ref(i, k, 2) = wa1[i - 2] * di2 + wa1[i - 1] * dr2; - ch_ref(i - 1, k, 3) = wa2[i - 2] * dr3 - wa2[i - 1] * di3; - ch_ref(i, k, 3) = wa2[i - 2] * di3 + wa2[i - 1] * dr3; - } - } -} /* radb3 */ - -#undef ch_ref -#undef cc_ref - - -static void radb4(integer ido, integer l1, const real *cc, real *ch, - const real *wa1, const real *wa2, const real *wa3) -{ - /* Initialized data */ - - static const real sqrt2 = 1.414213562373095f; - - /* System generated locals */ - integer cc_offset, ch_offset; - - /* Local variables */ - integer i, k, ic; - real ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4; - integer idp2; - - -#define cc_ref(a_1,a_2,a_3) cc[((a_3)*4 + (a_2))*ido + a_1] -#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] - - /* Parameter adjustments */ - ch_offset = 1 + ido * (1 + l1); - ch -= ch_offset; - cc_offset = 1 + ido * 5; - cc -= cc_offset; - --wa1; - --wa2; - --wa3; - - /* Function Body */ - for (k = 1; k <= l1; ++k) { - tr1 = cc_ref(1, 1, k) - cc_ref(ido, 4, k); - tr2 = cc_ref(1, 1, k) + cc_ref(ido, 4, k); - tr3 = cc_ref(ido, 2, k) + cc_ref(ido, 2, k); - tr4 = cc_ref(1, 3, k) + cc_ref(1, 3, k); - ch_ref(1, k, 1) = tr2 + tr3; - ch_ref(1, k, 2) = tr1 - tr4; - ch_ref(1, k, 3) = tr2 - tr3; - ch_ref(1, k, 4) = tr1 + tr4; - } - if (ido < 2) return; - if (ido != 2) { - idp2 = ido + 2; - for (k = 1; k <= l1; ++k) { - for (i = 3; i <= ido; i += 2) { - ic = idp2 - i; - ti1 = cc_ref(i, 1, k) + cc_ref(ic, 4, k); - ti2 = cc_ref(i, 1, k) - cc_ref(ic, 4, k); - ti3 = cc_ref(i, 3, k) - cc_ref(ic, 2, k); - tr4 = cc_ref(i, 3, k) + cc_ref(ic, 2, k); - tr1 = cc_ref(i - 1, 1, k) - cc_ref(ic - 1, 4, k); - tr2 = cc_ref(i - 1, 1, k) + cc_ref(ic - 1, 4, k); - ti4 = cc_ref(i - 1, 3, k) - cc_ref(ic - 1, 2, k); - tr3 = cc_ref(i - 1, 3, k) + cc_ref(ic - 1, 2, k); - ch_ref(i - 1, k, 1) = tr2 + tr3; - cr3 = tr2 - tr3; - ch_ref(i, k, 1) = ti2 + ti3; - ci3 = ti2 - ti3; - cr2 = tr1 - tr4; - cr4 = tr1 + tr4; - ci2 = ti1 + ti4; - ci4 = ti1 - ti4; - ch_ref(i - 1, k, 2) = wa1[i - 2] * cr2 - wa1[i - 1] * ci2; - ch_ref(i, k, 2) = wa1[i - 2] * ci2 + wa1[i - 1] * cr2; - ch_ref(i - 1, k, 3) = wa2[i - 2] * cr3 - wa2[i - 1] * ci3; - ch_ref(i, k, 3) = wa2[i - 2] * ci3 + wa2[i - 1] * cr3; - ch_ref(i - 1, k, 4) = wa3[i - 2] * cr4 - wa3[i - 1] * ci4; - ch_ref(i, k, 4) = wa3[i - 2] * ci4 + wa3[i - 1] * cr4; - } - } - if (ido % 2 == 1) return; - } - for (k = 1; k <= l1; ++k) { - ti1 = cc_ref(1, 2, k) + cc_ref(1, 4, k); - ti2 = cc_ref(1, 4, k) - cc_ref(1, 2, k); - tr1 = cc_ref(ido, 1, k) - cc_ref(ido, 3, k); - tr2 = cc_ref(ido, 1, k) + cc_ref(ido, 3, k); - ch_ref(ido, k, 1) = tr2 + tr2; - ch_ref(ido, k, 2) = sqrt2 * (tr1 - ti1); - ch_ref(ido, k, 3) = ti2 + ti2; - ch_ref(ido, k, 4) = -sqrt2 * (tr1 + ti1); - } -} /* radb4 */ - -#undef ch_ref -#undef cc_ref - - -static void radb5(integer ido, integer l1, const real *cc, real *ch, - const real *wa1, const real *wa2, const real *wa3, const real *wa4) -{ - /* Initialized data */ - - static const real tr11 = .309016994374947f; - static const real ti11 = .951056516295154f; - static const real tr12 = -.809016994374947f; - static const real ti12 = .587785252292473f; - - /* System generated locals */ - integer cc_offset, ch_offset; - - /* Local variables */ - integer i, k, ic; - real ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4, ti2, ti3, - ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5; - integer idp2; - - -#define cc_ref(a_1,a_2,a_3) cc[((a_3)*5 + (a_2))*ido + a_1] -#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] - - /* Parameter adjustments */ - ch_offset = 1 + ido * (1 + l1); - ch -= ch_offset; - cc_offset = 1 + ido * 6; - cc -= cc_offset; - --wa1; - --wa2; - --wa3; - --wa4; - - /* Function Body */ - for (k = 1; k <= l1; ++k) { - ti5 = cc_ref(1, 3, k) + cc_ref(1, 3, k); - ti4 = cc_ref(1, 5, k) + cc_ref(1, 5, k); - tr2 = cc_ref(ido, 2, k) + cc_ref(ido, 2, k); - tr3 = cc_ref(ido, 4, k) + cc_ref(ido, 4, k); - ch_ref(1, k, 1) = cc_ref(1, 1, k) + tr2 + tr3; - cr2 = cc_ref(1, 1, k) + tr11 * tr2 + tr12 * tr3; - cr3 = cc_ref(1, 1, k) + tr12 * tr2 + tr11 * tr3; - ci5 = ti11 * ti5 + ti12 * ti4; - ci4 = ti12 * ti5 - ti11 * ti4; - ch_ref(1, k, 2) = cr2 - ci5; - ch_ref(1, k, 3) = cr3 - ci4; - ch_ref(1, k, 4) = cr3 + ci4; - ch_ref(1, k, 5) = cr2 + ci5; - } - if (ido == 1) { - return; - } - idp2 = ido + 2; - for (k = 1; k <= l1; ++k) { - for (i = 3; i <= ido; i += 2) { - ic = idp2 - i; - ti5 = cc_ref(i, 3, k) + cc_ref(ic, 2, k); - ti2 = cc_ref(i, 3, k) - cc_ref(ic, 2, k); - ti4 = cc_ref(i, 5, k) + cc_ref(ic, 4, k); - ti3 = cc_ref(i, 5, k) - cc_ref(ic, 4, k); - tr5 = cc_ref(i - 1, 3, k) - cc_ref(ic - 1, 2, k); - tr2 = cc_ref(i - 1, 3, k) + cc_ref(ic - 1, 2, k); - tr4 = cc_ref(i - 1, 5, k) - cc_ref(ic - 1, 4, k); - tr3 = cc_ref(i - 1, 5, k) + cc_ref(ic - 1, 4, k); - ch_ref(i - 1, k, 1) = cc_ref(i - 1, 1, k) + tr2 + tr3; - ch_ref(i, k, 1) = cc_ref(i, 1, k) + ti2 + ti3; - cr2 = cc_ref(i - 1, 1, k) + tr11 * tr2 + tr12 * tr3; - ci2 = cc_ref(i, 1, k) + tr11 * ti2 + tr12 * ti3; - cr3 = cc_ref(i - 1, 1, k) + tr12 * tr2 + tr11 * tr3; - ci3 = cc_ref(i, 1, k) + tr12 * ti2 + tr11 * ti3; - cr5 = ti11 * tr5 + ti12 * tr4; - ci5 = ti11 * ti5 + ti12 * ti4; - cr4 = ti12 * tr5 - ti11 * tr4; - ci4 = ti12 * ti5 - ti11 * ti4; - dr3 = cr3 - ci4; - dr4 = cr3 + ci4; - di3 = ci3 + cr4; - di4 = ci3 - cr4; - dr5 = cr2 + ci5; - dr2 = cr2 - ci5; - di5 = ci2 - cr5; - di2 = ci2 + cr5; - ch_ref(i - 1, k, 2) = wa1[i - 2] * dr2 - wa1[i - 1] * di2; - ch_ref(i, k, 2) = wa1[i - 2] * di2 + wa1[i - 1] * dr2; - ch_ref(i - 1, k, 3) = wa2[i - 2] * dr3 - wa2[i - 1] * di3; - ch_ref(i, k, 3) = wa2[i - 2] * di3 + wa2[i - 1] * dr3; - ch_ref(i - 1, k, 4) = wa3[i - 2] * dr4 - wa3[i - 1] * di4; - ch_ref(i, k, 4) = wa3[i - 2] * di4 + wa3[i - 1] * dr4; - ch_ref(i - 1, k, 5) = wa4[i - 2] * dr5 - wa4[i - 1] * di5; - ch_ref(i, k, 5) = wa4[i - 2] * di5 + wa4[i - 1] * dr5; - } - } -} /* radb5 */ - -#undef ch_ref -#undef cc_ref - - -static void radbg(integer ido, integer ip, integer l1, integer idl1, - const real *cc, real *c1, real *c2, real *ch, real *ch2, const real *wa) -{ - /* System generated locals */ - integer ch_offset, cc_offset, - c1_offset, c2_offset, ch2_offset; - - /* Local variables */ - integer i, j, k, l, j2, ic, jc, lc, ik, is; - real dc2, ai1, ai2, ar1, ar2, ds2; - integer nbd; - real dcp, arg, dsp, ar1h, ar2h; - integer idp2, ipp2, idij, ipph; - - -#define c1_ref(a_1,a_2,a_3) c1[((a_3)*l1 + (a_2))*ido + a_1] -#define c2_ref(a_1,a_2) c2[(a_2)*idl1 + a_1] -#define cc_ref(a_1,a_2,a_3) cc[((a_3)*ip + (a_2))*ido + a_1] -#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] -#define ch2_ref(a_1,a_2) ch2[(a_2)*idl1 + a_1] - - /* Parameter adjustments */ - ch_offset = 1 + ido * (1 + l1); - ch -= ch_offset; - c1_offset = 1 + ido * (1 + l1); - c1 -= c1_offset; - cc_offset = 1 + ido * (1 + ip); - cc -= cc_offset; - ch2_offset = 1 + idl1; - ch2 -= ch2_offset; - c2_offset = 1 + idl1; - c2 -= c2_offset; - --wa; - - /* Function Body */ - arg = (2*M_PI) / (real) (ip); - dcp = cos(arg); - dsp = sin(arg); - idp2 = ido + 2; - nbd = (ido - 1) / 2; - ipp2 = ip + 2; - ipph = (ip + 1) / 2; - if (ido >= l1) { - for (k = 1; k <= l1; ++k) { - for (i = 1; i <= ido; ++i) { - ch_ref(i, k, 1) = cc_ref(i, 1, k); - } - } - } else { - for (i = 1; i <= ido; ++i) { - for (k = 1; k <= l1; ++k) { - ch_ref(i, k, 1) = cc_ref(i, 1, k); - } - } - } - for (j = 2; j <= ipph; ++j) { - jc = ipp2 - j; - j2 = j + j; - for (k = 1; k <= l1; ++k) { - ch_ref(1, k, j) = cc_ref(ido, j2 - 2, k) + cc_ref(ido, j2 - 2, k); - ch_ref(1, k, jc) = cc_ref(1, j2 - 1, k) + cc_ref(1, j2 - 1, k); - } - } - if (ido != 1) { - if (nbd >= l1) { - for (j = 2; j <= ipph; ++j) { - jc = ipp2 - j; - for (k = 1; k <= l1; ++k) { - for (i = 3; i <= ido; i += 2) { - ic = idp2 - i; - ch_ref(i - 1, k, j) = cc_ref(i - 1, (j << 1) - 1, k) + cc_ref(ic - 1, (j << 1) - 2, k); - ch_ref(i - 1, k, jc) = cc_ref(i - 1, (j << 1) - 1, k) - cc_ref(ic - 1, (j << 1) - 2, k); - ch_ref(i, k, j) = cc_ref(i, (j << 1) - 1, k) - cc_ref(ic, (j << 1) - 2, k); - ch_ref(i, k, jc) = cc_ref(i, (j << 1) - 1, k) + cc_ref(ic, (j << 1) - 2, k); - } - } - } - } else { - for (j = 2; j <= ipph; ++j) { - jc = ipp2 - j; - for (i = 3; i <= ido; i += 2) { - ic = idp2 - i; - for (k = 1; k <= l1; ++k) { - ch_ref(i - 1, k, j) = cc_ref(i - 1, (j << 1) - 1, k) + cc_ref(ic - 1, (j << 1) - 2, k); - ch_ref(i - 1, k, jc) = cc_ref(i - 1, (j << 1) - 1, k) - cc_ref(ic - 1, (j << 1) - 2, k); - ch_ref(i, k, j) = cc_ref(i, (j << 1) - 1, k) - cc_ref(ic, (j << 1) - 2, k); - ch_ref(i, k, jc) = cc_ref(i, (j << 1) - 1, k) + cc_ref(ic, (j << 1) - 2, k); - } - } - } - } - } - ar1 = 1.f; - ai1 = 0.f; - for (l = 2; l <= ipph; ++l) { - lc = ipp2 - l; - ar1h = dcp * ar1 - dsp * ai1; - ai1 = dcp * ai1 + dsp * ar1; - ar1 = ar1h; - for (ik = 1; ik <= idl1; ++ik) { - c2_ref(ik, l) = ch2_ref(ik, 1) + ar1 * ch2_ref(ik, 2); - c2_ref(ik, lc) = ai1 * ch2_ref(ik, ip); - } - dc2 = ar1; - ds2 = ai1; - ar2 = ar1; - ai2 = ai1; - for (j = 3; j <= ipph; ++j) { - jc = ipp2 - j; - ar2h = dc2 * ar2 - ds2 * ai2; - ai2 = dc2 * ai2 + ds2 * ar2; - ar2 = ar2h; - for (ik = 1; ik <= idl1; ++ik) { - c2_ref(ik, l) = c2_ref(ik, l) + ar2 * ch2_ref(ik, j); - c2_ref(ik, lc) = c2_ref(ik, lc) + ai2 * ch2_ref(ik, jc); - } - } - } - for (j = 2; j <= ipph; ++j) { - for (ik = 1; ik <= idl1; ++ik) { - ch2_ref(ik, 1) = ch2_ref(ik, 1) + ch2_ref(ik, j); - } - } - for (j = 2; j <= ipph; ++j) { - jc = ipp2 - j; - for (k = 1; k <= l1; ++k) { - ch_ref(1, k, j) = c1_ref(1, k, j) - c1_ref(1, k, jc); - ch_ref(1, k, jc) = c1_ref(1, k, j) + c1_ref(1, k, jc); - } - } - if (ido != 1) { - if (nbd >= l1) { - for (j = 2; j <= ipph; ++j) { - jc = ipp2 - j; - for (k = 1; k <= l1; ++k) { - for (i = 3; i <= ido; i += 2) { - ch_ref(i - 1, k, j) = c1_ref(i - 1, k, j) - c1_ref(i, k, jc); - ch_ref(i - 1, k, jc) = c1_ref(i - 1, k, j) + c1_ref(i, k, jc); - ch_ref(i, k, j) = c1_ref(i, k, j) + c1_ref(i - 1, k, jc); - ch_ref(i, k, jc) = c1_ref(i, k, j) - c1_ref(i - 1, k, jc); - } - } - } - } else { - for (j = 2; j <= ipph; ++j) { - jc = ipp2 - j; - for (i = 3; i <= ido; i += 2) { - for (k = 1; k <= l1; ++k) { - ch_ref(i - 1, k, j) = c1_ref(i - 1, k, j) - c1_ref(i, k, jc); - ch_ref(i - 1, k, jc) = c1_ref(i - 1, k, j) + c1_ref(i, k, jc); - ch_ref(i, k, j) = c1_ref(i, k, j) + c1_ref(i - 1, k, jc); - ch_ref(i, k, jc) = c1_ref(i, k, j) - c1_ref(i - 1, k, jc); - } - } - } - } - } - if (ido == 1) { - return; - } - for (ik = 1; ik <= idl1; ++ik) { - c2_ref(ik, 1) = ch2_ref(ik, 1); - } - for (j = 2; j <= ip; ++j) { - for (k = 1; k <= l1; ++k) { - c1_ref(1, k, j) = ch_ref(1, k, j); - } - } - if (nbd <= l1) { - is = -(ido); - for (j = 2; j <= ip; ++j) { - is += ido; - idij = is; - for (i = 3; i <= ido; i += 2) { - idij += 2; - for (k = 1; k <= l1; ++k) { - c1_ref(i - 1, k, j) = wa[idij - 1] * ch_ref(i - 1, k, j) - - wa[idij] * ch_ref(i, k, j); - c1_ref(i, k, j) = wa[idij - 1] * ch_ref(i, k, j) + wa[idij] * ch_ref(i - 1, k, j); - } - } - } - } else { - is = -(ido); - for (j = 2; j <= ip; ++j) { - is += ido; - for (k = 1; k <= l1; ++k) { - idij = is; - for (i = 3; i <= ido; i += 2) { - idij += 2; - c1_ref(i - 1, k, j) = wa[idij - 1] * ch_ref(i - 1, k, j) - - wa[idij] * ch_ref(i, k, j); - c1_ref(i, k, j) = wa[idij - 1] * ch_ref(i, k, j) + wa[idij] * ch_ref(i - 1, k, j); - } - } - } - } -} /* radbg */ - -#undef ch2_ref -#undef ch_ref -#undef cc_ref -#undef c2_ref -#undef c1_ref - - -static void radf2(integer ido, integer l1, const real *cc, real *ch, - const real *wa1) -{ - /* System generated locals */ - integer ch_offset, cc_offset; - - /* Local variables */ - integer i, k, ic; - real ti2, tr2; - integer idp2; - - -#define cc_ref(a_1,a_2,a_3) cc[((a_3)*l1 + (a_2))*ido + a_1] -#define ch_ref(a_1,a_2,a_3) ch[((a_3)*2 + (a_2))*ido + a_1] - - /* Parameter adjustments */ - ch_offset = 1 + ido * 3; - ch -= ch_offset; - cc_offset = 1 + ido * (1 + l1); - cc -= cc_offset; - --wa1; - - /* Function Body */ - for (k = 1; k <= l1; ++k) { - ch_ref(1, 1, k) = cc_ref(1, k, 1) + cc_ref(1, k, 2); - ch_ref(ido, 2, k) = cc_ref(1, k, 1) - cc_ref(1, k, 2); - } - if (ido < 2) return; - if (ido != 2) { - idp2 = ido + 2; - for (k = 1; k <= l1; ++k) { - for (i = 3; i <= ido; i += 2) { - ic = idp2 - i; - tr2 = wa1[i - 2] * cc_ref(i - 1, k, 2) + wa1[i - 1] * - cc_ref(i, k, 2); - ti2 = wa1[i - 2] * cc_ref(i, k, 2) - wa1[i - 1] * cc_ref( - i - 1, k, 2); - ch_ref(i, 1, k) = cc_ref(i, k, 1) + ti2; - ch_ref(ic, 2, k) = ti2 - cc_ref(i, k, 1); - ch_ref(i - 1, 1, k) = cc_ref(i - 1, k, 1) + tr2; - ch_ref(ic - 1, 2, k) = cc_ref(i - 1, k, 1) - tr2; - } - } - if (ido % 2 == 1) { - return; - } - } - for (k = 1; k <= l1; ++k) { - ch_ref(1, 2, k) = -cc_ref(ido, k, 2); - ch_ref(ido, 1, k) = cc_ref(ido, k, 1); - } -} /* radf2 */ - -#undef ch_ref -#undef cc_ref - - -static void radf3(integer ido, integer l1, const real *cc, real *ch, - const real *wa1, const real *wa2) -{ - static const real taur = -.5f; - static const real taui = .866025403784439f; - - /* System generated locals */ - integer ch_offset, cc_offset; - - /* Local variables */ - integer i, k, ic; - real ci2, di2, di3, cr2, dr2, dr3, ti2, ti3, tr2, tr3; - integer idp2; - - -#define cc_ref(a_1,a_2,a_3) cc[((a_3)*l1 + (a_2))*ido + a_1] -#define ch_ref(a_1,a_2,a_3) ch[((a_3)*3 + (a_2))*ido + a_1] - - /* Parameter adjustments */ - ch_offset = 1 + (ido << 2); - ch -= ch_offset; - cc_offset = 1 + ido * (1 + l1); - cc -= cc_offset; - --wa1; - --wa2; - - /* Function Body */ - for (k = 1; k <= l1; ++k) { - cr2 = cc_ref(1, k, 2) + cc_ref(1, k, 3); - ch_ref(1, 1, k) = cc_ref(1, k, 1) + cr2; - ch_ref(1, 3, k) = taui * (cc_ref(1, k, 3) - cc_ref(1, k, 2)); - ch_ref(ido, 2, k) = cc_ref(1, k, 1) + taur * cr2; - } - if (ido == 1) { - return; - } - idp2 = ido + 2; - for (k = 1; k <= l1; ++k) { - for (i = 3; i <= ido; i += 2) { - ic = idp2 - i; - dr2 = wa1[i - 2] * cc_ref(i - 1, k, 2) + wa1[i - 1] * - cc_ref(i, k, 2); - di2 = wa1[i - 2] * cc_ref(i, k, 2) - wa1[i - 1] * cc_ref( - i - 1, k, 2); - dr3 = wa2[i - 2] * cc_ref(i - 1, k, 3) + wa2[i - 1] * - cc_ref(i, k, 3); - di3 = wa2[i - 2] * cc_ref(i, k, 3) - wa2[i - 1] * cc_ref( - i - 1, k, 3); - cr2 = dr2 + dr3; - ci2 = di2 + di3; - ch_ref(i - 1, 1, k) = cc_ref(i - 1, k, 1) + cr2; - ch_ref(i, 1, k) = cc_ref(i, k, 1) + ci2; - tr2 = cc_ref(i - 1, k, 1) + taur * cr2; - ti2 = cc_ref(i, k, 1) + taur * ci2; - tr3 = taui * (di2 - di3); - ti3 = taui * (dr3 - dr2); - ch_ref(i - 1, 3, k) = tr2 + tr3; - ch_ref(ic - 1, 2, k) = tr2 - tr3; - ch_ref(i, 3, k) = ti2 + ti3; - ch_ref(ic, 2, k) = ti3 - ti2; - } - } -} /* radf3 */ - -#undef ch_ref -#undef cc_ref - - -static void radf4(integer ido, integer l1, const real *cc, real *ch, - const real *wa1, const real *wa2, const real *wa3) -{ - /* Initialized data */ - - static const real hsqt2 = .7071067811865475f; - - /* System generated locals */ - integer cc_offset, ch_offset; - - /* Local variables */ - integer i, k, ic; - real ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4; - integer idp2; - - -#define cc_ref(a_1,a_2,a_3) cc[((a_3)*l1 + (a_2))*ido + a_1] -#define ch_ref(a_1,a_2,a_3) ch[((a_3)*4 + (a_2))*ido + a_1] - - /* Parameter adjustments */ - ch_offset = 1 + ido * 5; - ch -= ch_offset; - cc_offset = 1 + ido * (1 + l1); - cc -= cc_offset; - --wa1; - --wa2; - --wa3; - - /* Function Body */ - for (k = 1; k <= l1; ++k) { - tr1 = cc_ref(1, k, 2) + cc_ref(1, k, 4); - tr2 = cc_ref(1, k, 1) + cc_ref(1, k, 3); - ch_ref(1, 1, k) = tr1 + tr2; - ch_ref(ido, 4, k) = tr2 - tr1; - ch_ref(ido, 2, k) = cc_ref(1, k, 1) - cc_ref(1, k, 3); - ch_ref(1, 3, k) = cc_ref(1, k, 4) - cc_ref(1, k, 2); - } - if (ido < 2) return; - if (ido != 2) { - idp2 = ido + 2; - for (k = 1; k <= l1; ++k) { - for (i = 3; i <= ido; i += 2) { - ic = idp2 - i; - cr2 = wa1[i - 2] * cc_ref(i - 1, k, 2) + wa1[i - 1] * - cc_ref(i, k, 2); - ci2 = wa1[i - 2] * cc_ref(i, k, 2) - wa1[i - 1] * cc_ref( - i - 1, k, 2); - cr3 = wa2[i - 2] * cc_ref(i - 1, k, 3) + wa2[i - 1] * - cc_ref(i, k, 3); - ci3 = wa2[i - 2] * cc_ref(i, k, 3) - wa2[i - 1] * cc_ref( - i - 1, k, 3); - cr4 = wa3[i - 2] * cc_ref(i - 1, k, 4) + wa3[i - 1] * - cc_ref(i, k, 4); - ci4 = wa3[i - 2] * cc_ref(i, k, 4) - wa3[i - 1] * cc_ref( - i - 1, k, 4); - tr1 = cr2 + cr4; - tr4 = cr4 - cr2; - ti1 = ci2 + ci4; - ti4 = ci2 - ci4; - ti2 = cc_ref(i, k, 1) + ci3; - ti3 = cc_ref(i, k, 1) - ci3; - tr2 = cc_ref(i - 1, k, 1) + cr3; - tr3 = cc_ref(i - 1, k, 1) - cr3; - ch_ref(i - 1, 1, k) = tr1 + tr2; - ch_ref(ic - 1, 4, k) = tr2 - tr1; - ch_ref(i, 1, k) = ti1 + ti2; - ch_ref(ic, 4, k) = ti1 - ti2; - ch_ref(i - 1, 3, k) = ti4 + tr3; - ch_ref(ic - 1, 2, k) = tr3 - ti4; - ch_ref(i, 3, k) = tr4 + ti3; - ch_ref(ic, 2, k) = tr4 - ti3; - } - } - if (ido % 2 == 1) { - return; - } - } - for (k = 1; k <= l1; ++k) { - ti1 = -hsqt2 * (cc_ref(ido, k, 2) + cc_ref(ido, k, 4)); - tr1 = hsqt2 * (cc_ref(ido, k, 2) - cc_ref(ido, k, 4)); - ch_ref(ido, 1, k) = tr1 + cc_ref(ido, k, 1); - ch_ref(ido, 3, k) = cc_ref(ido, k, 1) - tr1; - ch_ref(1, 2, k) = ti1 - cc_ref(ido, k, 3); - ch_ref(1, 4, k) = ti1 + cc_ref(ido, k, 3); - } -} /* radf4 */ - -#undef ch_ref -#undef cc_ref - - -static void radf5(integer ido, integer l1, const real *cc, real *ch, - const real *wa1, const real *wa2, const real *wa3, const real *wa4) -{ - /* Initialized data */ - - static const real tr11 = .309016994374947f; - static const real ti11 = .951056516295154f; - static const real tr12 = -.809016994374947f; - static const real ti12 = .587785252292473f; - - /* System generated locals */ - integer cc_offset, ch_offset; - - /* Local variables */ - integer i, k, ic; - real ci2, di2, ci4, ci5, di3, di4, di5, ci3, cr2, cr3, dr2, dr3, dr4, dr5, - cr5, cr4, ti2, ti3, ti5, ti4, tr2, tr3, tr4, tr5; - integer idp2; - - -#define cc_ref(a_1,a_2,a_3) cc[((a_3)*l1 + (a_2))*ido + a_1] -#define ch_ref(a_1,a_2,a_3) ch[((a_3)*5 + (a_2))*ido + a_1] - - /* Parameter adjustments */ - ch_offset = 1 + ido * 6; - ch -= ch_offset; - cc_offset = 1 + ido * (1 + l1); - cc -= cc_offset; - --wa1; - --wa2; - --wa3; - --wa4; - - /* Function Body */ - for (k = 1; k <= l1; ++k) { - cr2 = cc_ref(1, k, 5) + cc_ref(1, k, 2); - ci5 = cc_ref(1, k, 5) - cc_ref(1, k, 2); - cr3 = cc_ref(1, k, 4) + cc_ref(1, k, 3); - ci4 = cc_ref(1, k, 4) - cc_ref(1, k, 3); - ch_ref(1, 1, k) = cc_ref(1, k, 1) + cr2 + cr3; - ch_ref(ido, 2, k) = cc_ref(1, k, 1) + tr11 * cr2 + tr12 * cr3; - ch_ref(1, 3, k) = ti11 * ci5 + ti12 * ci4; - ch_ref(ido, 4, k) = cc_ref(1, k, 1) + tr12 * cr2 + tr11 * cr3; - ch_ref(1, 5, k) = ti12 * ci5 - ti11 * ci4; - } - if (ido == 1) { - return; - } - idp2 = ido + 2; - for (k = 1; k <= l1; ++k) { - for (i = 3; i <= ido; i += 2) { - ic = idp2 - i; - dr2 = wa1[i - 2] * cc_ref(i - 1, k, 2) + wa1[i - 1] * cc_ref(i, k, 2); - di2 = wa1[i - 2] * cc_ref(i, k, 2) - wa1[i - 1] * cc_ref(i - 1, k, 2); - dr3 = wa2[i - 2] * cc_ref(i - 1, k, 3) + wa2[i - 1] * cc_ref(i, k, 3); - di3 = wa2[i - 2] * cc_ref(i, k, 3) - wa2[i - 1] * cc_ref(i - 1, k, 3); - dr4 = wa3[i - 2] * cc_ref(i - 1, k, 4) + wa3[i - 1] * cc_ref(i, k, 4); - di4 = wa3[i - 2] * cc_ref(i, k, 4) - wa3[i - 1] * cc_ref(i - 1, k, 4); - dr5 = wa4[i - 2] * cc_ref(i - 1, k, 5) + wa4[i - 1] * cc_ref(i, k, 5); - di5 = wa4[i - 2] * cc_ref(i, k, 5) - wa4[i - 1] * cc_ref(i - 1, k, 5); - cr2 = dr2 + dr5; - ci5 = dr5 - dr2; - cr5 = di2 - di5; - ci2 = di2 + di5; - cr3 = dr3 + dr4; - ci4 = dr4 - dr3; - cr4 = di3 - di4; - ci3 = di3 + di4; - ch_ref(i - 1, 1, k) = cc_ref(i - 1, k, 1) + cr2 + cr3; - ch_ref(i, 1, k) = cc_ref(i, k, 1) + ci2 + ci3; - tr2 = cc_ref(i - 1, k, 1) + tr11 * cr2 + tr12 * cr3; - ti2 = cc_ref(i, k, 1) + tr11 * ci2 + tr12 * ci3; - tr3 = cc_ref(i - 1, k, 1) + tr12 * cr2 + tr11 * cr3; - ti3 = cc_ref(i, k, 1) + tr12 * ci2 + tr11 * ci3; - tr5 = ti11 * cr5 + ti12 * cr4; - ti5 = ti11 * ci5 + ti12 * ci4; - tr4 = ti12 * cr5 - ti11 * cr4; - ti4 = ti12 * ci5 - ti11 * ci4; - ch_ref(i - 1, 3, k) = tr2 + tr5; - ch_ref(ic - 1, 2, k) = tr2 - tr5; - ch_ref(i, 3, k) = ti2 + ti5; - ch_ref(ic, 2, k) = ti5 - ti2; - ch_ref(i - 1, 5, k) = tr3 + tr4; - ch_ref(ic - 1, 4, k) = tr3 - tr4; - ch_ref(i, 5, k) = ti3 + ti4; - ch_ref(ic, 4, k) = ti4 - ti3; - } - } -} /* radf5 */ - -#undef ch_ref -#undef cc_ref - - -static void radfg(integer ido, integer ip, integer l1, integer idl1, - real *cc, real *c1, real *c2, real *ch, real *ch2, const real *wa) -{ - /* System generated locals */ - integer ch_offset, cc_offset, - c1_offset, c2_offset, ch2_offset; - - /* Local variables */ - integer i, j, k, l, j2, ic, jc, lc, ik, is; - real dc2, ai1, ai2, ar1, ar2, ds2; - integer nbd; - real dcp, arg, dsp, ar1h, ar2h; - integer idp2, ipp2, idij, ipph; - - -#define c1_ref(a_1,a_2,a_3) c1[((a_3)*l1 + (a_2))*ido + a_1] -#define c2_ref(a_1,a_2) c2[(a_2)*idl1 + a_1] -#define cc_ref(a_1,a_2,a_3) cc[((a_3)*ip + (a_2))*ido + a_1] -#define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] -#define ch2_ref(a_1,a_2) ch2[(a_2)*idl1 + a_1] - - /* Parameter adjustments */ - ch_offset = 1 + ido * (1 + l1); - ch -= ch_offset; - c1_offset = 1 + ido * (1 + l1); - c1 -= c1_offset; - cc_offset = 1 + ido * (1 + ip); - cc -= cc_offset; - ch2_offset = 1 + idl1; - ch2 -= ch2_offset; - c2_offset = 1 + idl1; - c2 -= c2_offset; - --wa; - - /* Function Body */ - arg = (2*M_PI) / (real) (ip); - dcp = cos(arg); - dsp = sin(arg); - ipph = (ip + 1) / 2; - ipp2 = ip + 2; - idp2 = ido + 2; - nbd = (ido - 1) / 2; - if (ido == 1) { - for (ik = 1; ik <= idl1; ++ik) { - c2_ref(ik, 1) = ch2_ref(ik, 1); - } - } else { - for (ik = 1; ik <= idl1; ++ik) { - ch2_ref(ik, 1) = c2_ref(ik, 1); - } - for (j = 2; j <= ip; ++j) { - for (k = 1; k <= l1; ++k) { - ch_ref(1, k, j) = c1_ref(1, k, j); - } - } - if (nbd <= l1) { - is = -(ido); - for (j = 2; j <= ip; ++j) { - is += ido; - idij = is; - for (i = 3; i <= ido; i += 2) { - idij += 2; - for (k = 1; k <= l1; ++k) { - ch_ref(i - 1, k, j) = wa[idij - 1] * c1_ref(i - 1, k, j) - + wa[idij] * c1_ref(i, k, j); - ch_ref(i, k, j) = wa[idij - 1] * c1_ref(i, k, j) - wa[ - idij] * c1_ref(i - 1, k, j); - } - } - } - } else { - is = -(ido); - for (j = 2; j <= ip; ++j) { - is += ido; - for (k = 1; k <= l1; ++k) { - idij = is; - for (i = 3; i <= ido; i += 2) { - idij += 2; - ch_ref(i - 1, k, j) = wa[idij - 1] * c1_ref(i - 1, k, j) - + wa[idij] * c1_ref(i, k, j); - ch_ref(i, k, j) = wa[idij - 1] * c1_ref(i, k, j) - wa[ - idij] * c1_ref(i - 1, k, j); - } - } - } - } - if (nbd >= l1) { - for (j = 2; j <= ipph; ++j) { - jc = ipp2 - j; - for (k = 1; k <= l1; ++k) { - for (i = 3; i <= ido; i += 2) { - c1_ref(i - 1, k, j) = ch_ref(i - 1, k, j) + ch_ref(i - - 1, k, jc); - c1_ref(i - 1, k, jc) = ch_ref(i, k, j) - ch_ref(i, k, - jc); - c1_ref(i, k, j) = ch_ref(i, k, j) + ch_ref(i, k, jc); - c1_ref(i, k, jc) = ch_ref(i - 1, k, jc) - ch_ref(i - 1, - k, j); - } - } - } - } else { - for (j = 2; j <= ipph; ++j) { - jc = ipp2 - j; - for (i = 3; i <= ido; i += 2) { - for (k = 1; k <= l1; ++k) { - c1_ref(i - 1, k, j) = ch_ref(i - 1, k, j) + ch_ref(i - - 1, k, jc); - c1_ref(i - 1, k, jc) = ch_ref(i, k, j) - ch_ref(i, k, - jc); - c1_ref(i, k, j) = ch_ref(i, k, j) + ch_ref(i, k, jc); - c1_ref(i, k, jc) = ch_ref(i - 1, k, jc) - ch_ref(i - 1, - k, j); - } - } - } - } - } - for (j = 2; j <= ipph; ++j) { - jc = ipp2 - j; - for (k = 1; k <= l1; ++k) { - c1_ref(1, k, j) = ch_ref(1, k, j) + ch_ref(1, k, jc); - c1_ref(1, k, jc) = ch_ref(1, k, jc) - ch_ref(1, k, j); - } - } - - ar1 = 1.f; - ai1 = 0.f; - for (l = 2; l <= ipph; ++l) { - lc = ipp2 - l; - ar1h = dcp * ar1 - dsp * ai1; - ai1 = dcp * ai1 + dsp * ar1; - ar1 = ar1h; - for (ik = 1; ik <= idl1; ++ik) { - ch2_ref(ik, l) = c2_ref(ik, 1) + ar1 * c2_ref(ik, 2); - ch2_ref(ik, lc) = ai1 * c2_ref(ik, ip); - } - dc2 = ar1; - ds2 = ai1; - ar2 = ar1; - ai2 = ai1; - for (j = 3; j <= ipph; ++j) { - jc = ipp2 - j; - ar2h = dc2 * ar2 - ds2 * ai2; - ai2 = dc2 * ai2 + ds2 * ar2; - ar2 = ar2h; - for (ik = 1; ik <= idl1; ++ik) { - ch2_ref(ik, l) = ch2_ref(ik, l) + ar2 * c2_ref(ik, j); - ch2_ref(ik, lc) = ch2_ref(ik, lc) + ai2 * c2_ref(ik, jc); - } - } - } - for (j = 2; j <= ipph; ++j) { - for (ik = 1; ik <= idl1; ++ik) { - ch2_ref(ik, 1) = ch2_ref(ik, 1) + c2_ref(ik, j); - } - } - - if (ido >= l1) { - for (k = 1; k <= l1; ++k) { - for (i = 1; i <= ido; ++i) { - cc_ref(i, 1, k) = ch_ref(i, k, 1); - } - } - } else { - for (i = 1; i <= ido; ++i) { - for (k = 1; k <= l1; ++k) { - cc_ref(i, 1, k) = ch_ref(i, k, 1); - } - } - } - for (j = 2; j <= ipph; ++j) { - jc = ipp2 - j; - j2 = j + j; - for (k = 1; k <= l1; ++k) { - cc_ref(ido, j2 - 2, k) = ch_ref(1, k, j); - cc_ref(1, j2 - 1, k) = ch_ref(1, k, jc); - } - } - if (ido == 1) { - return; - } - if (nbd >= l1) { - for (j = 2; j <= ipph; ++j) { - jc = ipp2 - j; - j2 = j + j; - for (k = 1; k <= l1; ++k) { - for (i = 3; i <= ido; i += 2) { - ic = idp2 - i; - cc_ref(i - 1, j2 - 1, k) = ch_ref(i - 1, k, j) + ch_ref( - i - 1, k, jc); - cc_ref(ic - 1, j2 - 2, k) = ch_ref(i - 1, k, j) - ch_ref( - i - 1, k, jc); - cc_ref(i, j2 - 1, k) = ch_ref(i, k, j) + ch_ref(i, k, - jc); - cc_ref(ic, j2 - 2, k) = ch_ref(i, k, jc) - ch_ref(i, k, j) - ; - } - } - } - } else { - for (j = 2; j <= ipph; ++j) { - jc = ipp2 - j; - j2 = j + j; - for (i = 3; i <= ido; i += 2) { - ic = idp2 - i; - for (k = 1; k <= l1; ++k) { - cc_ref(i - 1, j2 - 1, k) = ch_ref(i - 1, k, j) + ch_ref( - i - 1, k, jc); - cc_ref(ic - 1, j2 - 2, k) = ch_ref(i - 1, k, j) - ch_ref( - i - 1, k, jc); - cc_ref(i, j2 - 1, k) = ch_ref(i, k, j) + ch_ref(i, k, - jc); - cc_ref(ic, j2 - 2, k) = ch_ref(i, k, jc) - ch_ref(i, k, j) - ; - } - } - } - } -} /* radfg */ - -#undef ch2_ref -#undef ch_ref -#undef cc_ref -#undef c2_ref -#undef c1_ref - - -static void cfftb1(integer n, real *c, real *ch, const real *wa, integer *ifac) -{ - integer i, k1, l1, l2, na, nf, ip, iw, ix2, ix3, ix4, nac, ido, - idl1, idot; - - /* Function Body */ - nf = ifac[1]; - na = 0; - l1 = 1; - iw = 0; - for (k1 = 1; k1 <= nf; ++k1) { - ip = ifac[k1 + 1]; - l2 = ip * l1; - ido = n / l2; - idot = ido + ido; - idl1 = idot * l1; - switch (ip) { - case 4: - ix2 = iw + idot; - ix3 = ix2 + idot; - passb4(idot, l1, na?ch:c, na?c:ch, &wa[iw], &wa[ix2], &wa[ix3]); - na = 1 - na; - break; - case 2: - passb2(idot, l1, na?ch:c, na?c:ch, &wa[iw]); - na = 1 - na; - break; - case 3: - ix2 = iw + idot; - passb3(idot, l1, na?ch:c, na?c:ch, &wa[iw], &wa[ix2]); - na = 1 - na; - break; - case 5: - ix2 = iw + idot; - ix3 = ix2 + idot; - ix4 = ix3 + idot; - passfb5(idot, l1, na?ch:c, na?c:ch, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], +1); - na = 1 - na; - break; - default: - if (na == 0) { - passfb(&nac, idot, ip, l1, idl1, c, c, c, ch, ch, &wa[iw], +1); - } else { - passfb(&nac, idot, ip, l1, idl1, ch, ch, ch, c, c, &wa[iw], +1); - } - if (nac != 0) { - na = 1 - na; - } - break; - } - l1 = l2; - iw += (ip - 1) * idot; - } - if (na == 0) { - return; - } - for (i = 0; i < 2*n; ++i) { - c[i] = ch[i]; - } -} /* cfftb1 */ - -void cfftb(integer n, real *c, real *wsave) -{ - integer iw1, iw2; - - /* Parameter adjustments */ - --wsave; - --c; - - /* Function Body */ - if (n == 1) { - return; - } - iw1 = 2*n + 1; - iw2 = iw1 + 2*n; - cfftb1(n, &c[1], &wsave[1], &wsave[iw1], (int*)&wsave[iw2]); -} /* cfftb */ - -static void cfftf1(integer n, real *c, real *ch, const real *wa, integer *ifac) -{ - /* Local variables */ - integer i, k1, l1, l2, na, nf, ip, iw, ix2, ix3, ix4, nac, ido, - idl1, idot; - - /* Function Body */ - nf = ifac[1]; - na = 0; - l1 = 1; - iw = 0; - for (k1 = 1; k1 <= nf; ++k1) { - ip = ifac[k1 + 1]; - l2 = ip * l1; - ido = n / l2; - idot = ido + ido; - idl1 = idot * l1; - switch (ip) { - case 4: - ix2 = iw + idot; - ix3 = ix2 + idot; - passf4(idot, l1, na?ch:c, na?c:ch, &wa[iw], &wa[ix2], &wa[ix3]); - na = 1 - na; - break; - case 2: - passf2(idot, l1, na?ch:c, na?c:ch, &wa[iw]); - na = 1 - na; - break; - case 3: - ix2 = iw + idot; - passf3(idot, l1, na?ch:c, na?c:ch, &wa[iw], &wa[ix2]); - na = 1 - na; - break; - case 5: - ix2 = iw + idot; - ix3 = ix2 + idot; - ix4 = ix3 + idot; - passfb5(idot, l1, na?ch:c, na?c:ch, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], -1); - na = 1 - na; - break; - default: - if (na == 0) { - passfb(&nac, idot, ip, l1, idl1, c, c, c, ch, ch, &wa[iw], -1); - } else { - passfb(&nac, idot, ip, l1, idl1, ch, ch, ch, c, c, &wa[iw], -1); - } - if (nac != 0) { - na = 1 - na; - } - break; - } - l1 = l2; - iw += (ip - 1)*idot; - } - if (na == 0) { - return; - } - for (i = 0; i < 2*n; ++i) { - c[i] = ch[i]; - } -} /* cfftf1 */ - -void cfftf(integer n, real *c, real *wsave) -{ - integer iw1, iw2; - - /* Parameter adjustments */ - --wsave; - --c; - - /* Function Body */ - if (n == 1) { - return; - } - iw1 = 2*n + 1; - iw2 = iw1 + 2*n; - cfftf1(n, &c[1], &wsave[1], &wsave[iw1], (int*)&wsave[iw2]); -} /* cfftf */ - -static int decompose(integer n, integer *ifac, integer ntryh[4]) { - integer ntry=0, nl = n, nf = 0, nq, nr, i, j = 0; - do { - if (j < 4) { - ntry = ntryh[j]; - } else { - ntry += 2; - } - ++j; - L104: - nq = nl / ntry; - nr = nl - ntry * nq; - if (nr != 0) continue; - ++nf; - ifac[nf + 2] = ntry; - nl = nq; - if (ntry == 2 && nf != 1) { - for (i = 2; i <= nf; ++i) { - integer ib = nf - i + 2; - ifac[ib + 2] = ifac[ib + 1]; - } - ifac[3] = 2; - } - if (nl != 1) { - goto L104; - } - } while (nl != 1); - ifac[1] = n; - ifac[2] = nf; - return nf; -} - -static void cffti1(integer n, real *wa, integer *ifac) -{ - static integer ntryh[4] = { 3,4,2,5 }; - - /* Local variables */ - integer i, j, i1, k1, l1, l2; - real fi; - integer ld, ii, nf, ip; - real arg; - integer ido, ipm; - real argh; - integer idot; - real argld; - - /* Parameter adjustments */ - --ifac; - --wa; - - nf = decompose(n, ifac, ntryh); - - argh = (2*M_PI) / (real) (n); - i = 2; - l1 = 1; - for (k1 = 1; k1 <= nf; ++k1) { - ip = ifac[k1 + 2]; - ld = 0; - l2 = l1 * ip; - ido = n / l2; - idot = ido + ido + 2; - ipm = ip - 1; - for (j = 1; j <= ipm; ++j) { - i1 = i; - wa[i - 1] = 1.f; - wa[i] = 0.f; - ld += l1; - fi = 0.f; - argld = (real) ld * argh; - for (ii = 4; ii <= idot; ii += 2) { - i += 2; - fi += 1.f; - arg = fi * argld; - wa[i - 1] = cos(arg); - wa[i] = sin(arg); - } - if (ip > 5) { - wa[i1 - 1] = wa[i - 1]; - wa[i1] = wa[i]; - }; - } - l1 = l2; - } -} /* cffti1 */ - -void cffti(integer n, real *wsave) -{ - integer iw1, iw2; - /* Parameter adjustments */ - --wsave; - - /* Function Body */ - if (n == 1) { - return; - } - iw1 = 2*n + 1; - iw2 = iw1 + 2*n; - cffti1(n, &wsave[iw1], (int*)&wsave[iw2]); - return; -} /* cffti */ - -static void rfftb1(integer n, real *c, real *ch, const real *wa, integer *ifac) -{ - /* Local variables */ - integer i, k1, l1, l2, na, nf, ip, iw, ix2, ix3, ix4, ido, idl1; - - /* Function Body */ - nf = ifac[1]; - na = 0; - l1 = 1; - iw = 0; - for (k1 = 1; k1 <= nf; ++k1) { - ip = ifac[k1 + 1]; - l2 = ip * l1; - ido = n / l2; - idl1 = ido * l1; - switch (ip) { - case 4: - ix2 = iw + ido; - ix3 = ix2 + ido; - radb4(ido, l1, na?ch:c, na?c:ch, &wa[iw], &wa[ix2], &wa[ix3]); - na = 1 - na; - break; - case 2: - radb2(ido, l1, na?ch:c, na?c:ch, &wa[iw]); - na = 1 - na; - break; - case 3: - ix2 = iw + ido; - radb3(ido, l1, na?ch:c, na?c:ch, &wa[iw], &wa[ix2]); - na = 1 - na; - break; - case 5: - ix2 = iw + ido; - ix3 = ix2 + ido; - ix4 = ix3 + ido; - radb5(ido, l1, na?ch:c, na?c:ch, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4]); - na = 1 - na; - break; - default: - if (na == 0) { - radbg(ido, ip, l1, idl1, c, c, c, ch, ch, &wa[iw]); - } else { - radbg(ido, ip, l1, idl1, ch, ch, ch, c, c, &wa[iw]); - } - if (ido == 1) { - na = 1 - na; - } - break; - } - l1 = l2; - iw += (ip - 1) * ido; - } - if (na == 0) { - return; - } - for (i = 0; i < n; ++i) { - c[i] = ch[i]; - } -} /* rfftb1 */ - -static void rfftf1(integer n, real *c, real *ch, const real *wa, integer *ifac) -{ - /* Local variables */ - integer i, k1, l1, l2, na, kh, nf, ip, iw, ix2, ix3, ix4, ido, idl1; - - /* Function Body */ - nf = ifac[1]; - na = 1; - l2 = n; - iw = n-1; - for (k1 = 1; k1 <= nf; ++k1) { - kh = nf - k1; - ip = ifac[kh + 2]; - l1 = l2 / ip; - ido = n / l2; - idl1 = ido * l1; - iw -= (ip - 1) * ido; - na = 1 - na; - switch (ip) { - case 4: - ix2 = iw + ido; - ix3 = ix2 + ido; - radf4(ido, l1, na ? ch : c, na ? c : ch, &wa[iw], &wa[ix2], &wa[ix3]); - break; - case 2: - radf2(ido, l1, na ? ch : c, na ? c : ch, &wa[iw]); - break; - case 3: - ix2 = iw + ido; - radf3(ido, l1, na ? ch : c, na ? c : ch, &wa[iw], &wa[ix2]); - break; - case 5: - ix2 = iw + ido; - ix3 = ix2 + ido; - ix4 = ix3 + ido; - radf5(ido, l1, na ? ch : c, na ? c : ch, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4]); - break; - default: - if (ido == 1) { - na = 1 - na; - } - if (na == 0) { - radfg(ido, ip, l1, idl1, c, c, c, ch, ch, &wa[iw]); - na = 1; - } else { - radfg(ido, ip, l1, idl1, ch, ch, ch, c, c, &wa[iw]); - na = 0; - } - break; - } - l2 = l1; - } - if (na == 1) { - return; - } - for (i = 0; i < n; ++i) { - c[i] = ch[i]; - } -} - -void rfftb(integer n, real *r, real *wsave) -{ - - /* Parameter adjustments */ - --wsave; - --r; - - /* Function Body */ - if (n == 1) { - return; - } - rfftb1(n, &r[1], &wsave[1], &wsave[n + 1], (int*)&wsave[(n << 1) + 1]); -} /* rfftb */ - -static void rffti1(integer n, real *wa, integer *ifac) -{ - static integer ntryh[4] = { 4,2,3,5 }; - - /* Local variables */ - integer i, j, k1, l1, l2; - real fi; - integer ld, ii, nf, ip, is; - real arg; - integer ido, ipm; - integer nfm1; - real argh; - real argld; - - /* Parameter adjustments */ - --ifac; - --wa; - - nf = decompose(n, ifac, ntryh); - - argh = (2*M_PI) / (real) (n); - is = 0; - nfm1 = nf - 1; - l1 = 1; - if (nfm1 == 0) { - return; - } - for (k1 = 1; k1 <= nfm1; ++k1) { - ip = ifac[k1 + 2]; - ld = 0; - l2 = l1 * ip; - ido = n / l2; - ipm = ip - 1; - for (j = 1; j <= ipm; ++j) { - ld += l1; - i = is; - argld = (real) ld * argh; - fi = 0.f; - for (ii = 3; ii <= ido; ii += 2) { - i += 2; - fi += 1.f; - arg = fi * argld; - wa[i - 1] = cos(arg); - wa[i] = sin(arg); - } - is += ido; - } - l1 = l2; - } -} /* rffti1 */ - -void rfftf(integer n, real *r, real *wsave) -{ - - /* Parameter adjustments */ - --wsave; - --r; - - /* Function Body */ - if (n == 1) { - return; - } - rfftf1(n, &r[1], &wsave[1], &wsave[n + 1], (int*)&wsave[(n << 1) + 1]); -} /* rfftf */ - -void rffti(integer n, real *wsave) -{ - /* Parameter adjustments */ - --wsave; - - /* Function Body */ - if (n == 1) { - return; - } - rffti1(n, &wsave[n + 1], (int*)&wsave[(n << 1) + 1]); - return; -} /* rffti */ - -static void cosqb1(integer n, real *x, real *w, real *xh) -{ - /* Local variables */ - integer i, k, kc, np2, ns2; - real xim1; - integer modn; - - /* Parameter adjustments */ - --xh; - --w; - --x; - - /* Function Body */ - ns2 = (n + 1) / 2; - np2 = n + 2; - for (i = 3; i <= n; i += 2) { - xim1 = x[i - 1] + x[i]; - x[i] -= x[i - 1]; - x[i - 1] = xim1; - } - x[1] += x[1]; - modn = n % 2; - if (modn == 0) { - x[n] += x[n]; - } - rfftb(n, &x[1], &xh[1]); - for (k = 2; k <= ns2; ++k) { - kc = np2 - k; - xh[k] = w[k - 1] * x[kc] + w[kc - 1] * x[k]; - xh[kc] = w[k - 1] * x[k] - w[kc - 1] * x[kc]; - } - if (modn == 0) { - x[ns2 + 1] = w[ns2] * (x[ns2 + 1] + x[ns2 + 1]); - } - for (k = 2; k <= ns2; ++k) { - kc = np2 - k; - x[k] = xh[k] + xh[kc]; - x[kc] = xh[k] - xh[kc]; - } - x[1] += x[1]; -} /* cosqb1 */ - -void cosqb(integer n, real *x, real *wsave) -{ - static const real tsqrt2 = 2.82842712474619f; - - /* Local variables */ - real x1; - - /* Parameter adjustments */ - --wsave; - --x; - - if (n < 2) { - x[1] *= 4.f; - } else if (n == 2) { - x1 = (x[1] + x[2]) * 4.f; - x[2] = tsqrt2 * (x[1] - x[2]); - x[1] = x1; - } else { - cosqb1(n, &x[1], &wsave[1], &wsave[n + 1]); - } -} /* cosqb */ - -static void cosqf1(integer n, real *x, real *w, real *xh) -{ - /* Local variables */ - integer i, k, kc, np2, ns2; - real xim1; - integer modn; - - /* Parameter adjustments */ - --xh; - --w; - --x; - - /* Function Body */ - ns2 = (n + 1) / 2; - np2 = n + 2; - for (k = 2; k <= ns2; ++k) { - kc = np2 - k; - xh[k] = x[k] + x[kc]; - xh[kc] = x[k] - x[kc]; - } - modn = n % 2; - if (modn == 0) { - xh[ns2 + 1] = x[ns2 + 1] + x[ns2 + 1]; - } - for (k = 2; k <= ns2; ++k) { - kc = np2 - k; - x[k] = w[k - 1] * xh[kc] + w[kc - 1] * xh[k]; - x[kc] = w[k - 1] * xh[k] - w[kc - 1] * xh[kc]; - } - if (modn == 0) { - x[ns2 + 1] = w[ns2] * xh[ns2 + 1]; - } - rfftf(n, &x[1], &xh[1]); - for (i = 3; i <= n; i += 2) { - xim1 = x[i - 1] - x[i]; - x[i] = x[i - 1] + x[i]; - x[i - 1] = xim1; - } -} /* cosqf1 */ - -void cosqf(integer n, real *x, real *wsave) -{ - static const real sqrt2 = 1.4142135623731f; - - /* Local variables */ - real tsqx; - - /* Parameter adjustments */ - --wsave; - --x; - - if (n == 2) { - tsqx = sqrt2 * x[2]; - x[2] = x[1] - tsqx; - x[1] += tsqx; - } else if (n > 2) { - cosqf1(n, &x[1], &wsave[1], &wsave[n + 1]); - } -} /* cosqf */ - -void cosqi(integer n, real *wsave) -{ - /* Local variables */ - integer k; - real fk, dt; - - /* Parameter adjustments */ - --wsave; - - dt = M_PI/2 / (real) (n); - fk = 0.f; - for (k = 1; k <= n; ++k) { - fk += 1.f; - wsave[k] = cos(fk * dt); - } - rffti(n, &wsave[n + 1]); -} /* cosqi */ - -void cost(integer n, real *x, real *wsave) -{ - /* Local variables */ - integer i, k; - real c1, t1, t2; - integer kc; - real xi; - integer nm1, np1; - real x1h; - integer ns2; - real tx2, x1p3, xim2; - integer modn; - - /* Parameter adjustments */ - --wsave; - --x; - - /* Function Body */ - nm1 = n - 1; - np1 = n + 1; - ns2 = n / 2; - if (n < 2) { - } else if (n == 2) { - x1h = x[1] + x[2]; - x[2] = x[1] - x[2]; - x[1] = x1h; - } else if (n == 3) { - x1p3 = x[1] + x[3]; - tx2 = x[2] + x[2]; - x[2] = x[1] - x[3]; - x[1] = x1p3 + tx2; - x[3] = x1p3 - tx2; - } else { - c1 = x[1] - x[n]; - x[1] += x[n]; - for (k = 2; k <= ns2; ++k) { - kc = np1 - k; - t1 = x[k] + x[kc]; - t2 = x[k] - x[kc]; - c1 += wsave[kc] * t2; - t2 = wsave[k] * t2; - x[k] = t1 - t2; - x[kc] = t1 + t2; - } - modn = n % 2; - if (modn != 0) { - x[ns2 + 1] += x[ns2 + 1]; - } - rfftf(nm1, &x[1], &wsave[n + 1]); - xim2 = x[2]; - x[2] = c1; - for (i = 4; i <= n; i += 2) { - xi = x[i]; - x[i] = x[i - 2] - x[i - 1]; - x[i - 1] = xim2; - xim2 = xi; - } - if (modn != 0) { - x[n] = xim2; - } - } -} /* cost */ - -void costi(integer n, real *wsave) -{ - /* Initialized data */ - - /* Local variables */ - integer k, kc; - real fk, dt; - integer nm1, np1, ns2; - - /* Parameter adjustments */ - --wsave; - - /* Function Body */ - if (n <= 3) { - return; - } - nm1 = n - 1; - np1 = n + 1; - ns2 = n / 2; - dt = M_PI / (real) nm1; - fk = 0.f; - for (k = 2; k <= ns2; ++k) { - kc = np1 - k; - fk += 1.f; - wsave[k] = sin(fk * dt) * 2.f; - wsave[kc] = cos(fk * dt) * 2.f; - } - rffti(nm1, &wsave[n + 1]); -} /* costi */ - -void sinqb(integer n, real *x, real *wsave) -{ - /* Local variables */ - integer k, kc, ns2; - real xhold; - - /* Parameter adjustments */ - --wsave; - --x; - - /* Function Body */ - if (n <= 1) { - x[1] *= 4.f; - return; - } - ns2 = n / 2; - for (k = 2; k <= n; k += 2) { - x[k] = -x[k]; - } - cosqb(n, &x[1], &wsave[1]); - for (k = 1; k <= ns2; ++k) { - kc = n - k; - xhold = x[k]; - x[k] = x[kc + 1]; - x[kc + 1] = xhold; - } -} /* sinqb */ - -void sinqf(integer n, real *x, real *wsave) -{ - /* Local variables */ - integer k, kc, ns2; - real xhold; - - /* Parameter adjustments */ - --wsave; - --x; - - /* Function Body */ - if (n == 1) { - return; - } - ns2 = n / 2; - for (k = 1; k <= ns2; ++k) { - kc = n - k; - xhold = x[k]; - x[k] = x[kc + 1]; - x[kc + 1] = xhold; - } - cosqf(n, &x[1], &wsave[1]); - for (k = 2; k <= n; k += 2) { - x[k] = -x[k]; - } -} /* sinqf */ - -void sinqi(integer n, real *wsave) -{ - - /* Parameter adjustments */ - --wsave; - - /* Function Body */ - cosqi(n, &wsave[1]); -} /* sinqi */ - -static void sint1(integer n, real *war, real *was, real *xh, real * - x, integer *ifac) -{ - /* Initialized data */ - - static const real sqrt3 = 1.73205080756888f; - - /* Local variables */ - integer i, k; - real t1, t2; - integer kc, np1, ns2, modn; - real xhold; - - /* Parameter adjustments */ - --ifac; - --x; - --xh; - --was; - --war; - - /* Function Body */ - for (i = 1; i <= n; ++i) { - xh[i] = war[i]; - war[i] = x[i]; - } - - if (n < 2) { - xh[1] += xh[1]; - } else if (n == 2) { - xhold = sqrt3 * (xh[1] + xh[2]); - xh[2] = sqrt3 * (xh[1] - xh[2]); - xh[1] = xhold; - } else { - np1 = n + 1; - ns2 = n / 2; - x[1] = 0.f; - for (k = 1; k <= ns2; ++k) { - kc = np1 - k; - t1 = xh[k] - xh[kc]; - t2 = was[k] * (xh[k] + xh[kc]); - x[k + 1] = t1 + t2; - x[kc + 1] = t2 - t1; - } - modn = n % 2; - if (modn != 0) { - x[ns2 + 2] = xh[ns2 + 1] * 4.f; - } - rfftf1(np1, &x[1], &xh[1], &war[1], &ifac[1]); - xh[1] = x[1] * .5f; - for (i = 3; i <= n; i += 2) { - xh[i - 1] = -x[i]; - xh[i] = xh[i - 2] + x[i - 1]; - } - if (modn == 0) { - xh[n] = -x[n + 1]; - } - } - for (i = 1; i <= n; ++i) { - x[i] = war[i]; - war[i] = xh[i]; - } -} /* sint1 */ - -void sinti(integer n, real *wsave) -{ - /* Local variables */ - integer k; - real dt; - integer np1, ns2; - - /* Parameter adjustments */ - --wsave; - - /* Function Body */ - if (n <= 1) { - return; - } - ns2 = n / 2; - np1 = n + 1; - dt = M_PI / (real) np1; - for (k = 1; k <= ns2; ++k) { - wsave[k] = sin(k * dt) * 2.f; - } - rffti(np1, &wsave[ns2 + 1]); -} /* sinti */ - -void sint(integer n, real *x, real *wsave) -{ - integer np1, iw1, iw2, iw3; - - /* Parameter adjustments */ - --wsave; - --x; - - /* Function Body */ - np1 = n + 1; - iw1 = n / 2 + 1; - iw2 = iw1 + np1; - iw3 = iw2 + np1; - sint1(n, &x[1], &wsave[1], &wsave[iw1], &wsave[iw2], (int*)&wsave[iw3]); -} /* sint */ - -#ifdef TESTING_FFTPACK -#include - -int main(void) -{ - static integer nd[] = { 120,91,54,49,32,28,24,8,4,3,2 }; - - /* System generated locals */ - real r1, r2, r3; - f77complex q1, q2, q3; - - /* Local variables */ - integer i, j, k, n; - real w[2000], x[200], y[200], cf, fn, dt; - f77complex cx[200], cy[200]; - real xh[200]; - integer nz, nm1, np1, ns2; - real arg, tfn; - real sum, arg1, arg2; - real sum1, sum2, dcfb; - integer modn; - real rftb, rftf; - real sqrt2; - real rftfb; - real costt, sintt, dcfftb, dcfftf, cosqfb, costfb; - real sinqfb; - real sintfb; - real cosqbt, cosqft, sinqbt, sinqft; - - - - /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - - /* VERSION 4 APRIL 1985 */ - - /* A TEST DRIVER FOR */ - /* A PACKAGE OF FORTRAN SUBPROGRAMS FOR THE FAST FOURIER */ - /* TRANSFORM OF PERIODIC AND OTHER SYMMETRIC SEQUENCES */ - - /* BY */ - - /* PAUL N SWARZTRAUBER */ - - /* NATIONAL CENTER FOR ATMOSPHERIC RESEARCH BOULDER,COLORADO 80307 */ - - /* WHICH IS SPONSORED BY THE NATIONAL SCIENCE FOUNDATION */ - - /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - - - /* THIS PROGRAM TESTS THE PACKAGE OF FAST FOURIER */ - /* TRANSFORMS FOR BOTH COMPLEX AND REAL PERIODIC SEQUENCES AND */ - /* CERTIAN OTHER SYMMETRIC SEQUENCES THAT ARE LISTED BELOW. */ - - /* 1. RFFTI INITIALIZE RFFTF AND RFFTB */ - /* 2. RFFTF FORWARD TRANSFORM OF A REAL PERIODIC SEQUENCE */ - /* 3. RFFTB BACKWARD TRANSFORM OF A REAL COEFFICIENT ARRAY */ - - /* 4. EZFFTI INITIALIZE EZFFTF AND EZFFTB */ - /* 5. EZFFTF A SIMPLIFIED REAL PERIODIC FORWARD TRANSFORM */ - /* 6. EZFFTB A SIMPLIFIED REAL PERIODIC BACKWARD TRANSFORM */ - - /* 7. SINTI INITIALIZE SINT */ - /* 8. SINT SINE TRANSFORM OF A REAL ODD SEQUENCE */ - - /* 9. COSTI INITIALIZE COST */ - /* 10. COST COSINE TRANSFORM OF A REAL EVEN SEQUENCE */ - - /* 11. SINQI INITIALIZE SINQF AND SINQB */ - /* 12. SINQF FORWARD SINE TRANSFORM WITH ODD WAVE NUMBERS */ - /* 13. SINQB UNNORMALIZED INVERSE OF SINQF */ - - /* 14. COSQI INITIALIZE COSQF AND COSQB */ - /* 15. COSQF FORWARD COSINE TRANSFORM WITH ODD WAVE NUMBERS */ - /* 16. COSQB UNNORMALIZED INVERSE OF COSQF */ - - /* 17. CFFTI INITIALIZE CFFTF AND CFFTB */ - /* 18. CFFTF FORWARD TRANSFORM OF A COMPLEX PERIODIC SEQUENCE */ - /* 19. CFFTB UNNORMALIZED INVERSE OF CFFTF */ - - - sqrt2 = sqrt(2.f); - int all_ok = 1; - for (nz = 1; nz <= (int)(sizeof nd/sizeof nd[0]); ++nz) { - n = nd[nz - 1]; - modn = n % 2; - fn = (real) n; - tfn = fn + fn; - np1 = n + 1; - nm1 = n - 1; - for (j = 1; j <= np1; ++j) { - x[j - 1] = sin((real) j * sqrt2); - y[j - 1] = x[j - 1]; - xh[j - 1] = x[j - 1]; - } - - /* TEST SUBROUTINES RFFTI,RFFTF AND RFFTB */ - - rffti(n, w); - dt = (2*M_PI) / fn; - ns2 = (n + 1) / 2; - if (ns2 < 2) { - goto L104; - } - for (k = 2; k <= ns2; ++k) { - sum1 = 0.f; - sum2 = 0.f; - arg = (real) (k - 1) * dt; - for (i = 1; i <= n; ++i) { - arg1 = (real) (i - 1) * arg; - sum1 += x[i - 1] * cos(arg1); - sum2 += x[i - 1] * sin(arg1); - } - y[(k << 1) - 3] = sum1; - y[(k << 1) - 2] = -sum2; - } - L104: - sum1 = 0.f; - sum2 = 0.f; - for (i = 1; i <= nm1; i += 2) { - sum1 += x[i - 1]; - sum2 += x[i]; - } - if (modn == 1) { - sum1 += x[n - 1]; - } - y[0] = sum1 + sum2; - if (modn == 0) { - y[n - 1] = sum1 - sum2; - } - rfftf(n, x, w); - rftf = 0.f; - for (i = 1; i <= n; ++i) { - /* Computing MAX */ - r2 = rftf, r3 = (r1 = x[i - 1] - y[i - 1], fabs(r1)); - rftf = dmax(r2,r3); - x[i - 1] = xh[i - 1]; - } - rftf /= fn; - for (i = 1; i <= n; ++i) { - sum = x[0] * .5f; - arg = (real) (i - 1) * dt; - if (ns2 < 2) { - goto L108; - } - for (k = 2; k <= ns2; ++k) { - arg1 = (real) (k - 1) * arg; - sum = sum + x[(k << 1) - 3] * cos(arg1) - x[(k << 1) - 2] * - sin(arg1); - } - L108: - if (modn == 0) { - sum += (real)pow(-1, i-1) * .5f * x[n - 1]; - } - y[i - 1] = sum + sum; - } - rfftb(n, x, w); - rftb = 0.f; - for (i = 1; i <= n; ++i) { - /* Computing MAX */ - r2 = rftb, r3 = (r1 = x[i - 1] - y[i - 1], fabs(r1)); - rftb = dmax(r2,r3); - x[i - 1] = xh[i - 1]; - y[i - 1] = xh[i - 1]; - } - rfftb(n, y, w); - rfftf(n, y, w); - cf = 1.f / fn; - rftfb = 0.f; - for (i = 1; i <= n; ++i) { - /* Computing MAX */ - r2 = rftfb, r3 = (r1 = cf * y[i - 1] - x[i - 1], fabs( - r1)); - rftfb = dmax(r2,r3); - } - - /* TEST SUBROUTINES SINTI AND SINT */ - - dt = M_PI / fn; - for (i = 1; i <= nm1; ++i) { - x[i - 1] = xh[i - 1]; - } - for (i = 1; i <= nm1; ++i) { - y[i - 1] = 0.f; - arg1 = (real) i * dt; - for (k = 1; k <= nm1; ++k) { - y[i - 1] += x[k - 1] * sin((real) k * arg1); - } - y[i - 1] += y[i - 1]; - } - sinti(nm1, w); - sint(nm1, x, w); - cf = .5f / fn; - sintt = 0.f; - for (i = 1; i <= nm1; ++i) { - /* Computing MAX */ - r2 = sintt, r3 = (r1 = x[i - 1] - y[i - 1], fabs(r1)); - sintt = dmax(r2,r3); - x[i - 1] = xh[i - 1]; - y[i - 1] = x[i - 1]; - } - sintt = cf * sintt; - sint(nm1, x, w); - sint(nm1, x, w); - sintfb = 0.f; - for (i = 1; i <= nm1; ++i) { - /* Computing MAX */ - r2 = sintfb, r3 = (r1 = cf * x[i - 1] - y[i - 1], fabs( - r1)); - sintfb = dmax(r2,r3); - } - - /* TEST SUBROUTINES COSTI AND COST */ - - for (i = 1; i <= np1; ++i) { - x[i - 1] = xh[i - 1]; - } - for (i = 1; i <= np1; ++i) { - y[i - 1] = (x[0] + (real) pow(-1, i+1) * x[n]) * .5f; - arg = (real) (i - 1) * dt; - for (k = 2; k <= n; ++k) { - y[i - 1] += x[k - 1] * cos((real) (k - 1) * arg); - } - y[i - 1] += y[i - 1]; - } - costi(np1, w); - cost(np1, x, w); - costt = 0.f; - for (i = 1; i <= np1; ++i) { - /* Computing MAX */ - r2 = costt, r3 = (r1 = x[i - 1] - y[i - 1], fabs(r1)); - costt = dmax(r2,r3); - x[i - 1] = xh[i - 1]; - y[i - 1] = xh[i - 1]; - } - costt = cf * costt; - cost(np1, x, w); - cost(np1, x, w); - costfb = 0.f; - for (i = 1; i <= np1; ++i) { - /* Computing MAX */ - r2 = costfb, r3 = (r1 = cf * x[i - 1] - y[i - 1], fabs( - r1)); - costfb = dmax(r2,r3); - } - - /* TEST SUBROUTINES SINQI,SINQF AND SINQB */ - - cf = .25f / fn; - for (i = 1; i <= n; ++i) { - y[i - 1] = xh[i - 1]; - } - dt = M_PI / (fn + fn); - for (i = 1; i <= n; ++i) { - x[i - 1] = 0.f; - arg = dt * (real) i; - for (k = 1; k <= n; ++k) { - x[i - 1] += y[k - 1] * sin((real) (k + k - 1) * arg); - } - x[i - 1] *= 4.f; - } - sinqi(n, w); - sinqb(n, y, w); - sinqbt = 0.f; - for (i = 1; i <= n; ++i) { - /* Computing MAX */ - r2 = sinqbt, r3 = (r1 = y[i - 1] - x[i - 1], fabs(r1)) - ; - sinqbt = dmax(r2,r3); - x[i - 1] = xh[i - 1]; - } - sinqbt = cf * sinqbt; - for (i = 1; i <= n; ++i) { - arg = (real) (i + i - 1) * dt; - y[i - 1] = (real) pow(-1, i+1) * .5f * x[n - 1]; - for (k = 1; k <= nm1; ++k) { - y[i - 1] += x[k - 1] * sin((real) k * arg); - } - y[i - 1] += y[i - 1]; - } - sinqf(n, x, w); - sinqft = 0.f; - for (i = 1; i <= n; ++i) { - /* Computing MAX */ - r2 = sinqft, r3 = (r1 = x[i - 1] - y[i - 1], fabs(r1)) - ; - sinqft = dmax(r2,r3); - y[i - 1] = xh[i - 1]; - x[i - 1] = xh[i - 1]; - } - sinqf(n, y, w); - sinqb(n, y, w); - sinqfb = 0.f; - for (i = 1; i <= n; ++i) { - /* Computing MAX */ - r2 = sinqfb, r3 = (r1 = cf * y[i - 1] - x[i - 1], fabs( - r1)); - sinqfb = dmax(r2,r3); - } - - /* TEST SUBROUTINES COSQI,COSQF AND COSQB */ - - for (i = 1; i <= n; ++i) { - y[i - 1] = xh[i - 1]; - } - for (i = 1; i <= n; ++i) { - x[i - 1] = 0.f; - arg = (real) (i - 1) * dt; - for (k = 1; k <= n; ++k) { - x[i - 1] += y[k - 1] * cos((real) (k + k - 1) * arg); - } - x[i - 1] *= 4.f; - } - cosqi(n, w); - cosqb(n, y, w); - cosqbt = 0.f; - for (i = 1; i <= n; ++i) { - /* Computing MAX */ - r2 = cosqbt, r3 = (r1 = x[i - 1] - y[i - 1], fabs(r1)) - ; - cosqbt = dmax(r2,r3); - x[i - 1] = xh[i - 1]; - } - cosqbt = cf * cosqbt; - for (i = 1; i <= n; ++i) { - y[i - 1] = x[0] * .5f; - arg = (real) (i + i - 1) * dt; - for (k = 2; k <= n; ++k) { - y[i - 1] += x[k - 1] * cos((real) (k - 1) * arg); - } - y[i - 1] += y[i - 1]; - } - cosqf(n, x, w); - cosqft = 0.f; - for (i = 1; i <= n; ++i) { - /* Computing MAX */ - r2 = cosqft, r3 = (r1 = y[i - 1] - x[i - 1], fabs(r1)) - ; - cosqft = dmax(r2,r3); - x[i - 1] = xh[i - 1]; - y[i - 1] = xh[i - 1]; - } - cosqft = cf * cosqft; - cosqb(n, x, w); - cosqf(n, x, w); - cosqfb = 0.f; - for (i = 1; i <= n; ++i) { - /* Computing MAX */ - r2 = cosqfb, r3 = (r1 = cf * x[i - 1] - y[i - 1], fabs(r1)); - cosqfb = dmax(r2,r3); - } - - /* TEST CFFTI,CFFTF,CFFTB */ - - for (i = 1; i <= n; ++i) { - r1 = cos(sqrt2 * (real) i); - r2 = sin(sqrt2 * (real) (i * i)); - q1.r = r1, q1.i = r2; - cx[i-1].r = q1.r, cx[i-1].i = q1.i; - } - dt = (2*M_PI) / fn; - for (i = 1; i <= n; ++i) { - arg1 = -((real) (i - 1)) * dt; - cy[i-1].r = 0.f, cy[i-1].i = 0.f; - for (k = 1; k <= n; ++k) { - arg2 = (real) (k - 1) * arg1; - r1 = cos(arg2); - r2 = sin(arg2); - q3.r = r1, q3.i = r2; - q2.r = q3.r * cx[k-1].r - q3.i * cx[k-1].i, q2.i = - q3.r * cx[k-1].i + q3.i * cx[k-1].r; - q1.r = cy[i-1].r + q2.r, q1.i = cy[i-1].i + q2.i; - cy[i-1].r = q1.r, cy[i-1].i = q1.i; - } - } - cffti(n, w); - cfftf(n, (real*)cx, w); - dcfftf = 0.f; - for (i = 1; i <= n; ++i) { - /* Computing MAX */ - q1.r = cx[i-1].r - cy[i-1].r, q1.i = cx[i-1].i - cy[i-1] - .i; - r1 = dcfftf, r2 = c_abs(&q1); - dcfftf = dmax(r1,r2); - q1.r = cx[i-1].r / fn, q1.i = cx[i-1].i / fn; - cx[i-1].r = q1.r, cx[i-1].i = q1.i; - } - dcfftf /= fn; - for (i = 1; i <= n; ++i) { - arg1 = (real) (i - 1) * dt; - cy[i-1].r = 0.f, cy[i-1].i = 0.f; - for (k = 1; k <= n; ++k) { - arg2 = (real) (k - 1) * arg1; - r1 = cos(arg2); - r2 = sin(arg2); - q3.r = r1, q3.i = r2; - q2.r = q3.r * cx[k-1].r - q3.i * cx[k-1].i, q2.i = - q3.r * cx[k-1].i + q3.i * cx[k-1].r; - q1.r = cy[i-1].r + q2.r, q1.i = cy[i-1].i + q2.i; - cy[i-1].r = q1.r, cy[i-1].i = q1.i; - } - } - cfftb(n, (real*)cx, w); - dcfftb = 0.f; - for (i = 1; i <= n; ++i) { - /* Computing MAX */ - q1.r = cx[i-1].r - cy[i-1].r, q1.i = cx[i-1].i - cy[i-1].i; - r1 = dcfftb, r2 = c_abs(&q1); - dcfftb = dmax(r1,r2); - cx[i-1].r = cy[i-1].r, cx[i-1].i = cy[i-1].i; - } - cf = 1.f / fn; - cfftf(n, (real*)cx, w); - cfftb(n, (real*)cx, w); - dcfb = 0.f; - for (i = 1; i <= n; ++i) { - /* Computing MAX */ - q2.r = cf * cx[i-1].r, q2.i = cf * cx[i-1].i; - q1.r = q2.r - cy[i-1].r, q1.i = q2.i - cy[i-1].i; - r1 = dcfb, r2 = c_abs(&q1); - dcfb = dmax(r1,r2); - } - printf("%d\tRFFTF %10.3g\tRFFTB %10.ge\tRFFTFB %10.3g", n, rftf, rftb, rftfb); - printf( "\tSINT %10.3g\tSINTFB %10.ge\tCOST %10.3g\n", sintt, sintfb, costt); - printf( "\tCOSTFB %10.3g\tSINQF %10.ge\tSINQB %10.3g", costfb, sinqft, sinqbt); - printf( "\tSINQFB %10.3g\tCOSQF %10.ge\tCOSQB %10.3g\n", sinqfb, cosqft, cosqbt); - printf( "\tCOSQFB %10.3g\t", cosqfb); - printf( "\tCFFTF %10.ge\tCFFTB %10.3g\n", dcfftf, dcfftb); - printf( "\tCFFTFB %10.3g\n", dcfb); - -#define CHECK(x) if (x > 1e-3) { printf(#x " failed: %g\n", x); all_ok = 0; } - CHECK(rftf); CHECK(rftb); CHECK(rftfb); CHECK(sintt); CHECK(sintfb); CHECK(costt); - CHECK(costfb); CHECK(sinqft); CHECK(sinqbt); CHECK(sinqfb); CHECK(cosqft); CHECK(cosqbt); - CHECK(cosqfb); CHECK(dcfftf); CHECK(dcfftb); - } - - if (all_ok) printf("Everything looks fine.\n"); - else printf("ERRORS WERE DETECTED.\n"); - /* - expected: - 120 RFFTF 2.786e-06 RFFTB 6.847e-04 RFFTFB 2.795e-07 SINT 1.312e-06 SINTFB 1.237e-06 COST 1.319e-06 - COSTFB 4.355e-06 SINQF 3.281e-04 SINQB 1.876e-06 SINQFB 2.198e-07 COSQF 6.199e-07 COSQB 2.193e-06 - COSQFB 2.300e-07 DEZF 5.573e-06 DEZB 1.363e-05 DEZFB 1.371e-06 CFFTF 5.590e-06 CFFTB 4.751e-05 - CFFTFB 4.215e-07 - 54 RFFTF 4.708e-07 RFFTB 3.052e-05 RFFTFB 3.439e-07 SINT 3.532e-07 SINTFB 4.145e-07 COST 3.002e-07 - COSTFB 6.343e-07 SINQF 4.959e-05 SINQB 4.415e-07 SINQFB 2.882e-07 COSQF 2.826e-07 COSQB 2.472e-07 - COSQFB 3.439e-07 DEZF 9.388e-07 DEZB 5.066e-06 DEZFB 5.960e-07 CFFTF 1.426e-06 CFFTB 9.482e-06 - CFFTFB 2.980e-07 - 49 RFFTF 4.476e-07 RFFTB 5.341e-05 RFFTFB 2.574e-07 SINT 9.196e-07 SINTFB 9.401e-07 COST 8.174e-07 - COSTFB 1.331e-06 SINQF 4.005e-05 SINQB 9.342e-07 SINQFB 3.057e-07 COSQF 2.530e-07 COSQB 6.228e-07 - COSQFB 4.826e-07 DEZF 9.071e-07 DEZB 4.590e-06 DEZFB 5.960e-07 CFFTF 2.095e-06 CFFTB 1.414e-05 - CFFTFB 7.398e-07 - 32 RFFTF 4.619e-07 RFFTB 2.861e-05 RFFTFB 1.192e-07 SINT 3.874e-07 SINTFB 4.172e-07 COST 4.172e-07 - COSTFB 1.699e-06 SINQF 2.551e-05 SINQB 6.407e-07 SINQFB 2.980e-07 COSQF 1.639e-07 COSQB 1.714e-07 - COSQFB 2.384e-07 DEZF 1.013e-06 DEZB 2.339e-06 DEZFB 7.749e-07 CFFTF 1.127e-06 CFFTB 6.744e-06 - CFFTFB 2.666e-07 - 4 RFFTF 1.490e-08 RFFTB 1.490e-07 RFFTFB 5.960e-08 SINT 7.451e-09 SINTFB 0.000e+00 COST 2.980e-08 - COSTFB 1.192e-07 SINQF 4.768e-07 SINQB 2.980e-08 SINQFB 5.960e-08 COSQF 2.608e-08 COSQB 5.960e-08 - COSQFB 1.192e-07 DEZF 2.980e-08 DEZB 5.960e-08 DEZFB 0.000e+00 CFFTF 6.664e-08 CFFTB 5.960e-08 - CFFTFB 6.144e-08 - 3 RFFTF 3.974e-08 RFFTB 1.192e-07 RFFTFB 3.303e-08 SINT 1.987e-08 SINTFB 1.069e-08 COST 4.967e-08 - COSTFB 5.721e-08 SINQF 8.941e-08 SINQB 2.980e-08 SINQFB 1.259e-07 COSQF 7.451e-09 COSQB 4.967e-08 - COSQFB 7.029e-08 DEZF 1.192e-07 DEZB 5.960e-08 DEZFB 5.960e-08 CFFTF 7.947e-08 CFFTB 8.429e-08 - CFFTFB 9.064e-08 - 2 RFFTF 0.000e+00 RFFTB 0.000e+00 RFFTFB 0.000e+00 SINT 0.000e+00 SINTFB 0.000e+00 COST 0.000e+00 - COSTFB 0.000e+00 SINQF 1.192e-07 SINQB 2.980e-08 SINQFB 5.960e-08 COSQF 7.451e-09 COSQB 1.490e-08 - COSQFB 0.000e+00 DEZF 0.000e+00 DEZB 0.000e+00 DEZFB 0.000e+00 CFFTF 0.000e+00 CFFTB 5.960e-08 - CFFTFB 5.960e-08 - Everything looks fine. - - */ - - return all_ok ? 0 : 1; -} -#endif //TESTING_FFTPACK diff --git a/oss-internship-2020/pffft/fftpack.h b/oss-internship-2020/pffft/fftpack.h deleted file mode 100644 index 5971b9f..0000000 --- a/oss-internship-2020/pffft/fftpack.h +++ /dev/null @@ -1,799 +0,0 @@ -/* - Interface for the f2c translation of fftpack as found on http://www.netlib.org/fftpack/ - - FFTPACK license: - - http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html - - Copyright (c) 2004 the University Corporation for Atmospheric - Research ("UCAR"). All rights reserved. Developed by NCAR's - Computational and Information Systems Laboratory, UCAR, - www.cisl.ucar.edu. - - Redistribution and use of the Software in source and binary forms, - with or without modification, is permitted provided that the - following conditions are met: - - - Neither the names of NCAR's Computational and Information Systems - Laboratory, the University Corporation for Atmospheric Research, - nor the names of its sponsors or contributors may be used to - endorse or promote products derived from this Software without - specific prior written permission. - - - Redistributions of source code must retain the above copyright - notices, this list of conditions, and the disclaimer below. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the disclaimer below in the - documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT - HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE - SOFTWARE. - - ChangeLog: - 2011/10/02: this is my first release of this file. -*/ - -#ifndef FFTPACK_H -#define FFTPACK_H - -#ifdef __cplusplus -extern "C" { -#endif - -// just define FFTPACK_DOUBLE_PRECISION if you want to build it as a double precision fft - -#ifndef FFTPACK_DOUBLE_PRECISION - typedef float fftpack_real; - typedef int fftpack_int; -#else - typedef double fftpack_real; - typedef int fftpack_int; -#endif - - void cffti(fftpack_int n, fftpack_real *wsave); - - void cfftf(fftpack_int n, fftpack_real *c, fftpack_real *wsave); - - void cfftb(fftpack_int n, fftpack_real *c, fftpack_real *wsave); - - void rffti(fftpack_int n, fftpack_real *wsave); - void rfftf(fftpack_int n, fftpack_real *r, fftpack_real *wsave); - void rfftb(fftpack_int n, fftpack_real *r, fftpack_real *wsave); - - void cosqi(fftpack_int n, fftpack_real *wsave); - void cosqf(fftpack_int n, fftpack_real *x, fftpack_real *wsave); - void cosqb(fftpack_int n, fftpack_real *x, fftpack_real *wsave); - - void costi(fftpack_int n, fftpack_real *wsave); - void cost(fftpack_int n, fftpack_real *x, fftpack_real *wsave); - - void sinqi(fftpack_int n, fftpack_real *wsave); - void sinqb(fftpack_int n, fftpack_real *x, fftpack_real *wsave); - void sinqf(fftpack_int n, fftpack_real *x, fftpack_real *wsave); - - void sinti(fftpack_int n, fftpack_real *wsave); - void sint(fftpack_int n, fftpack_real *x, fftpack_real *wsave); - -#ifdef __cplusplus -} -#endif - -#endif /* FFTPACK_H */ - -/* - - FFTPACK - -* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * - - version 4 april 1985 - - a package of fortran subprograms for the fast fourier - transform of periodic and other symmetric sequences - - by - - paul n swarztrauber - - national center for atmospheric research boulder,colorado 80307 - - which is sponsored by the national science foundation - -* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * - - -this package consists of programs which perform fast fourier -transforms for both complex and real periodic sequences and -certain other symmetric sequences that are listed below. - -1. rffti initialize rfftf and rfftb -2. rfftf forward transform of a real periodic sequence -3. rfftb backward transform of a real coefficient array - -4. ezffti initialize ezfftf and ezfftb -5. ezfftf a simplified real periodic forward transform -6. ezfftb a simplified real periodic backward transform - -7. sinti initialize sint -8. sint sine transform of a real odd sequence - -9. costi initialize cost -10. cost cosine transform of a real even sequence - -11. sinqi initialize sinqf and sinqb -12. sinqf forward sine transform with odd wave numbers -13. sinqb unnormalized inverse of sinqf - -14. cosqi initialize cosqf and cosqb -15. cosqf forward cosine transform with odd wave numbers -16. cosqb unnormalized inverse of cosqf - -17. cffti initialize cfftf and cfftb -18. cfftf forward transform of a complex periodic sequence -19. cfftb unnormalized inverse of cfftf - - -****************************************************************** - -subroutine rffti(n,wsave) - - **************************************************************** - -subroutine rffti initializes the array wsave which is used in -both rfftf and rfftb. the prime factorization of n together with -a tabulation of the trigonometric functions are computed and -stored in wsave. - -input parameter - -n the length of the sequence to be transformed. - -output parameter - -wsave a work array which must be dimensioned at least 2*n+15. - the same work array can be used for both rfftf and rfftb - as long as n remains unchanged. different wsave arrays - are required for different values of n. the contents of - wsave must not be changed between calls of rfftf or rfftb. - -****************************************************************** - -subroutine rfftf(n,r,wsave) - -****************************************************************** - -subroutine rfftf computes the fourier coefficients of a real -perodic sequence (fourier analysis). the transform is defined -below at output parameter r. - -input parameters - -n the length of the array r to be transformed. the method - is most efficient when n is a product of small primes. - n may change so long as different work arrays are provided - -r a real array of length n which contains the sequence - to be transformed - -wsave a work array which must be dimensioned at least 2*n+15. - in the program that calls rfftf. the wsave array must be - initialized by calling subroutine rffti(n,wsave) and a - different wsave array must be used for each different - value of n. this initialization does not have to be - repeated so long as n remains unchanged thus subsequent - transforms can be obtained faster than the first. - the same wsave array can be used by rfftf and rfftb. - - -output parameters - -r r(1) = the sum from i=1 to i=n of r(i) - - if n is even set l =n/2 , if n is odd set l = (n+1)/2 - - then for k = 2,...,l - - r(2*k-2) = the sum from i = 1 to i = n of - - r(i)*cos((k-1)*(i-1)*2*pi/n) - - r(2*k-1) = the sum from i = 1 to i = n of - - -r(i)*sin((k-1)*(i-1)*2*pi/n) - - if n is even - - r(n) = the sum from i = 1 to i = n of - - (-1)**(i-1)*r(i) - - ***** note - this transform is unnormalized since a call of rfftf - followed by a call of rfftb will multiply the input - sequence by n. - -wsave contains results which must not be destroyed between - calls of rfftf or rfftb. - - -****************************************************************** - -subroutine rfftb(n,r,wsave) - -****************************************************************** - -subroutine rfftb computes the real perodic sequence from its -fourier coefficients (fourier synthesis). the transform is defined -below at output parameter r. - -input parameters - -n the length of the array r to be transformed. the method - is most efficient when n is a product of small primes. - n may change so long as different work arrays are provided - -r a real array of length n which contains the sequence - to be transformed - -wsave a work array which must be dimensioned at least 2*n+15. - in the program that calls rfftb. the wsave array must be - initialized by calling subroutine rffti(n,wsave) and a - different wsave array must be used for each different - value of n. this initialization does not have to be - repeated so long as n remains unchanged thus subsequent - transforms can be obtained faster than the first. - the same wsave array can be used by rfftf and rfftb. - - -output parameters - -r for n even and for i = 1,...,n - - r(i) = r(1)+(-1)**(i-1)*r(n) - - plus the sum from k=2 to k=n/2 of - - 2.*r(2*k-2)*cos((k-1)*(i-1)*2*pi/n) - - -2.*r(2*k-1)*sin((k-1)*(i-1)*2*pi/n) - - for n odd and for i = 1,...,n - - r(i) = r(1) plus the sum from k=2 to k=(n+1)/2 of - - 2.*r(2*k-2)*cos((k-1)*(i-1)*2*pi/n) - - -2.*r(2*k-1)*sin((k-1)*(i-1)*2*pi/n) - - ***** note - this transform is unnormalized since a call of rfftf - followed by a call of rfftb will multiply the input - sequence by n. - -wsave contains results which must not be destroyed between - calls of rfftb or rfftf. - -****************************************************************** - -subroutine sinti(n,wsave) - -****************************************************************** - -subroutine sinti initializes the array wsave which is used in -subroutine sint. the prime factorization of n together with -a tabulation of the trigonometric functions are computed and -stored in wsave. - -input parameter - -n the length of the sequence to be transformed. the method - is most efficient when n+1 is a product of small primes. - -output parameter - -wsave a work array with at least int(2.5*n+15) locations. - different wsave arrays are required for different values - of n. the contents of wsave must not be changed between - calls of sint. - -****************************************************************** - -subroutine sint(n,x,wsave) - -****************************************************************** - -subroutine sint computes the discrete fourier sine transform -of an odd sequence x(i). the transform is defined below at -output parameter x. - -sint is the unnormalized inverse of itself since a call of sint -followed by another call of sint will multiply the input sequence -x by 2*(n+1). - -the array wsave which is used by subroutine sint must be -initialized by calling subroutine sinti(n,wsave). - -input parameters - -n the length of the sequence to be transformed. the method - is most efficient when n+1 is the product of small primes. - -x an array which contains the sequence to be transformed - - -wsave a work array with dimension at least int(2.5*n+15) - in the program that calls sint. the wsave array must be - initialized by calling subroutine sinti(n,wsave) and a - different wsave array must be used for each different - value of n. this initialization does not have to be - repeated so long as n remains unchanged thus subsequent - transforms can be obtained faster than the first. - -output parameters - -x for i=1,...,n - - x(i)= the sum from k=1 to k=n - - 2*x(k)*sin(k*i*pi/(n+1)) - - a call of sint followed by another call of - sint will multiply the sequence x by 2*(n+1). - hence sint is the unnormalized inverse - of itself. - -wsave contains initialization calculations which must not be - destroyed between calls of sint. - -****************************************************************** - -subroutine costi(n,wsave) - -****************************************************************** - -subroutine costi initializes the array wsave which is used in -subroutine cost. the prime factorization of n together with -a tabulation of the trigonometric functions are computed and -stored in wsave. - -input parameter - -n the length of the sequence to be transformed. the method - is most efficient when n-1 is a product of small primes. - -output parameter - -wsave a work array which must be dimensioned at least 3*n+15. - different wsave arrays are required for different values - of n. the contents of wsave must not be changed between - calls of cost. - -****************************************************************** - -subroutine cost(n,x,wsave) - -****************************************************************** - -subroutine cost computes the discrete fourier cosine transform -of an even sequence x(i). the transform is defined below at output -parameter x. - -cost is the unnormalized inverse of itself since a call of cost -followed by another call of cost will multiply the input sequence -x by 2*(n-1). the transform is defined below at output parameter x - -the array wsave which is used by subroutine cost must be -initialized by calling subroutine costi(n,wsave). - -input parameters - -n the length of the sequence x. n must be greater than 1. - the method is most efficient when n-1 is a product of - small primes. - -x an array which contains the sequence to be transformed - -wsave a work array which must be dimensioned at least 3*n+15 - in the program that calls cost. the wsave array must be - initialized by calling subroutine costi(n,wsave) and a - different wsave array must be used for each different - value of n. this initialization does not have to be - repeated so long as n remains unchanged thus subsequent - transforms can be obtained faster than the first. - -output parameters - -x for i=1,...,n - - x(i) = x(1)+(-1)**(i-1)*x(n) - - + the sum from k=2 to k=n-1 - - 2*x(k)*cos((k-1)*(i-1)*pi/(n-1)) - - a call of cost followed by another call of - cost will multiply the sequence x by 2*(n-1) - hence cost is the unnormalized inverse - of itself. - -wsave contains initialization calculations which must not be - destroyed between calls of cost. - -****************************************************************** - -subroutine sinqi(n,wsave) - -****************************************************************** - -subroutine sinqi initializes the array wsave which is used in -both sinqf and sinqb. the prime factorization of n together with -a tabulation of the trigonometric functions are computed and -stored in wsave. - -input parameter - -n the length of the sequence to be transformed. the method - is most efficient when n is a product of small primes. - -output parameter - -wsave a work array which must be dimensioned at least 3*n+15. - the same work array can be used for both sinqf and sinqb - as long as n remains unchanged. different wsave arrays - are required for different values of n. the contents of - wsave must not be changed between calls of sinqf or sinqb. - -****************************************************************** - -subroutine sinqf(n,x,wsave) - -****************************************************************** - -subroutine sinqf computes the fast fourier transform of quarter -wave data. that is , sinqf computes the coefficients in a sine -series representation with only odd wave numbers. the transform -is defined below at output parameter x. - -sinqb is the unnormalized inverse of sinqf since a call of sinqf -followed by a call of sinqb will multiply the input sequence x -by 4*n. - -the array wsave which is used by subroutine sinqf must be -initialized by calling subroutine sinqi(n,wsave). - - -input parameters - -n the length of the array x to be transformed. the method - is most efficient when n is a product of small primes. - -x an array which contains the sequence to be transformed - -wsave a work array which must be dimensioned at least 3*n+15. - in the program that calls sinqf. the wsave array must be - initialized by calling subroutine sinqi(n,wsave) and a - different wsave array must be used for each different - value of n. this initialization does not have to be - repeated so long as n remains unchanged thus subsequent - transforms can be obtained faster than the first. - -output parameters - -x for i=1,...,n - - x(i) = (-1)**(i-1)*x(n) - - + the sum from k=1 to k=n-1 of - - 2*x(k)*sin((2*i-1)*k*pi/(2*n)) - - a call of sinqf followed by a call of - sinqb will multiply the sequence x by 4*n. - therefore sinqb is the unnormalized inverse - of sinqf. - -wsave contains initialization calculations which must not - be destroyed between calls of sinqf or sinqb. - -****************************************************************** - -subroutine sinqb(n,x,wsave) - -****************************************************************** - -subroutine sinqb computes the fast fourier transform of quarter -wave data. that is , sinqb computes a sequence from its -representation in terms of a sine series with odd wave numbers. -the transform is defined below at output parameter x. - -sinqf is the unnormalized inverse of sinqb since a call of sinqb -followed by a call of sinqf will multiply the input sequence x -by 4*n. - -the array wsave which is used by subroutine sinqb must be -initialized by calling subroutine sinqi(n,wsave). - - -input parameters - -n the length of the array x to be transformed. the method - is most efficient when n is a product of small primes. - -x an array which contains the sequence to be transformed - -wsave a work array which must be dimensioned at least 3*n+15. - in the program that calls sinqb. the wsave array must be - initialized by calling subroutine sinqi(n,wsave) and a - different wsave array must be used for each different - value of n. this initialization does not have to be - repeated so long as n remains unchanged thus subsequent - transforms can be obtained faster than the first. - -output parameters - -x for i=1,...,n - - x(i)= the sum from k=1 to k=n of - - 4*x(k)*sin((2k-1)*i*pi/(2*n)) - - a call of sinqb followed by a call of - sinqf will multiply the sequence x by 4*n. - therefore sinqf is the unnormalized inverse - of sinqb. - -wsave contains initialization calculations which must not - be destroyed between calls of sinqb or sinqf. - -****************************************************************** - -subroutine cosqi(n,wsave) - -****************************************************************** - -subroutine cosqi initializes the array wsave which is used in -both cosqf and cosqb. the prime factorization of n together with -a tabulation of the trigonometric functions are computed and -stored in wsave. - -input parameter - -n the length of the array to be transformed. the method - is most efficient when n is a product of small primes. - -output parameter - -wsave a work array which must be dimensioned at least 3*n+15. - the same work array can be used for both cosqf and cosqb - as long as n remains unchanged. different wsave arrays - are required for different values of n. the contents of - wsave must not be changed between calls of cosqf or cosqb. - -****************************************************************** - -subroutine cosqf(n,x,wsave) - -****************************************************************** - -subroutine cosqf computes the fast fourier transform of quarter -wave data. that is , cosqf computes the coefficients in a cosine -series representation with only odd wave numbers. the transform -is defined below at output parameter x - -cosqf is the unnormalized inverse of cosqb since a call of cosqf -followed by a call of cosqb will multiply the input sequence x -by 4*n. - -the array wsave which is used by subroutine cosqf must be -initialized by calling subroutine cosqi(n,wsave). - - -input parameters - -n the length of the array x to be transformed. the method - is most efficient when n is a product of small primes. - -x an array which contains the sequence to be transformed - -wsave a work array which must be dimensioned at least 3*n+15 - in the program that calls cosqf. the wsave array must be - initialized by calling subroutine cosqi(n,wsave) and a - different wsave array must be used for each different - value of n. this initialization does not have to be - repeated so long as n remains unchanged thus subsequent - transforms can be obtained faster than the first. - -output parameters - -x for i=1,...,n - - x(i) = x(1) plus the sum from k=2 to k=n of - - 2*x(k)*cos((2*i-1)*(k-1)*pi/(2*n)) - - a call of cosqf followed by a call of - cosqb will multiply the sequence x by 4*n. - therefore cosqb is the unnormalized inverse - of cosqf. - -wsave contains initialization calculations which must not - be destroyed between calls of cosqf or cosqb. - -****************************************************************** - -subroutine cosqb(n,x,wsave) - -****************************************************************** - -subroutine cosqb computes the fast fourier transform of quarter -wave data. that is , cosqb computes a sequence from its -representation in terms of a cosine series with odd wave numbers. -the transform is defined below at output parameter x. - -cosqb is the unnormalized inverse of cosqf since a call of cosqb -followed by a call of cosqf will multiply the input sequence x -by 4*n. - -the array wsave which is used by subroutine cosqb must be -initialized by calling subroutine cosqi(n,wsave). - - -input parameters - -n the length of the array x to be transformed. the method - is most efficient when n is a product of small primes. - -x an array which contains the sequence to be transformed - -wsave a work array that must be dimensioned at least 3*n+15 - in the program that calls cosqb. the wsave array must be - initialized by calling subroutine cosqi(n,wsave) and a - different wsave array must be used for each different - value of n. this initialization does not have to be - repeated so long as n remains unchanged thus subsequent - transforms can be obtained faster than the first. - -output parameters - -x for i=1,...,n - - x(i)= the sum from k=1 to k=n of - - 4*x(k)*cos((2*k-1)*(i-1)*pi/(2*n)) - - a call of cosqb followed by a call of - cosqf will multiply the sequence x by 4*n. - therefore cosqf is the unnormalized inverse - of cosqb. - -wsave contains initialization calculations which must not - be destroyed between calls of cosqb or cosqf. - -****************************************************************** - -subroutine cffti(n,wsave) - -****************************************************************** - -subroutine cffti initializes the array wsave which is used in -both cfftf and cfftb. the prime factorization of n together with -a tabulation of the trigonometric functions are computed and -stored in wsave. - -input parameter - -n the length of the sequence to be transformed - -output parameter - -wsave a work array which must be dimensioned at least 4*n+15 - the same work array can be used for both cfftf and cfftb - as long as n remains unchanged. different wsave arrays - are required for different values of n. the contents of - wsave must not be changed between calls of cfftf or cfftb. - -****************************************************************** - -subroutine cfftf(n,c,wsave) - -****************************************************************** - -subroutine cfftf computes the forward complex discrete fourier -transform (the fourier analysis). equivalently , cfftf computes -the fourier coefficients of a complex periodic sequence. -the transform is defined below at output parameter c. - -the transform is not normalized. to obtain a normalized transform -the output must be divided by n. otherwise a call of cfftf -followed by a call of cfftb will multiply the sequence by n. - -the array wsave which is used by subroutine cfftf must be -initialized by calling subroutine cffti(n,wsave). - -input parameters - - -n the length of the complex sequence c. the method is - more efficient when n is the product of small primes. n - -c a complex array of length n which contains the sequence - -wsave a real work array which must be dimensioned at least 4n+15 - in the program that calls cfftf. the wsave array must be - initialized by calling subroutine cffti(n,wsave) and a - different wsave array must be used for each different - value of n. this initialization does not have to be - repeated so long as n remains unchanged thus subsequent - transforms can be obtained faster than the first. - the same wsave array can be used by cfftf and cfftb. - -output parameters - -c for j=1,...,n - - c(j)=the sum from k=1,...,n of - - c(k)*exp(-i*(j-1)*(k-1)*2*pi/n) - - where i=sqrt(-1) - -wsave contains initialization calculations which must not be - destroyed between calls of subroutine cfftf or cfftb - -****************************************************************** - -subroutine cfftb(n,c,wsave) - -****************************************************************** - -subroutine cfftb computes the backward complex discrete fourier -transform (the fourier synthesis). equivalently , cfftb computes -a complex periodic sequence from its fourier coefficients. -the transform is defined below at output parameter c. - -a call of cfftf followed by a call of cfftb will multiply the -sequence by n. - -the array wsave which is used by subroutine cfftb must be -initialized by calling subroutine cffti(n,wsave). - -input parameters - - -n the length of the complex sequence c. the method is - more efficient when n is the product of small primes. - -c a complex array of length n which contains the sequence - -wsave a real work array which must be dimensioned at least 4n+15 - in the program that calls cfftb. the wsave array must be - initialized by calling subroutine cffti(n,wsave) and a - different wsave array must be used for each different - value of n. this initialization does not have to be - repeated so long as n remains unchanged thus subsequent - transforms can be obtained faster than the first. - the same wsave array can be used by cfftf and cfftb. - -output parameters - -c for j=1,...,n - - c(j)=the sum from k=1,...,n of - - c(k)*exp(i*(j-1)*(k-1)*2*pi/n) - - where i=sqrt(-1) - -wsave contains initialization calculations which must not be - destroyed between calls of subroutine cfftf or cfftb - -*/ diff --git a/oss-internship-2020/pffft/main_pffft_sandboxed.cc b/oss-internship-2020/pffft/main_pffft_sandboxed.cc index 8b7e6d1..10ea802 100644 --- a/oss-internship-2020/pffft/main_pffft_sandboxed.cc +++ b/oss-internship-2020/pffft/main_pffft_sandboxed.cc @@ -23,7 +23,6 @@ #include #include -#include "fftpack.h" #include "pffft_sapi.sapi.h" #include "sandboxed_api/util/flag.h" #include "sandboxed_api/vars.h" diff --git a/oss-internship-2020/pffft/pffft.c b/oss-internship-2020/pffft/pffft.c deleted file mode 100644 index 1686e15..0000000 --- a/oss-internship-2020/pffft/pffft.c +++ /dev/null @@ -1,1881 +0,0 @@ -/* Copyright (c) 2013 Julien Pommier ( pommier@modartt.com ) - - Based on original fortran 77 code from FFTPACKv4 from NETLIB - (http://www.netlib.org/fftpack), authored by Dr Paul Swarztrauber - of NCAR, in 1985. - - As confirmed by the NCAR fftpack software curators, the following - FFTPACKv5 license applies to FFTPACKv4 sources. My changes are - released under the same terms. - - FFTPACK license: - - http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html - - Copyright (c) 2004 the University Corporation for Atmospheric - Research ("UCAR"). All rights reserved. Developed by NCAR's - Computational and Information Systems Laboratory, UCAR, - www.cisl.ucar.edu. - - Redistribution and use of the Software in source and binary forms, - with or without modification, is permitted provided that the - following conditions are met: - - - Neither the names of NCAR's Computational and Information Systems - Laboratory, the University Corporation for Atmospheric Research, - nor the names of its sponsors or contributors may be used to - endorse or promote products derived from this Software without - specific prior written permission. - - - Redistributions of source code must retain the above copyright - notices, this list of conditions, and the disclaimer below. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the disclaimer below in the - documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT - HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE - SOFTWARE. - - - PFFFT : a Pretty Fast FFT. - - This file is largerly based on the original FFTPACK implementation, modified in - order to take advantage of SIMD instructions of modern CPUs. -*/ - -/* - ChangeLog: - - 2011/10/02, version 1: This is the very first release of this file. -*/ - -#include "pffft.h" -#include -#include -#include -#include - -/* detect compiler flavour */ -#if defined(_MSC_VER) -# define COMPILER_MSVC -#elif defined(__GNUC__) -# define COMPILER_GCC -#endif - -#if defined(COMPILER_GCC) -# define ALWAYS_INLINE(return_type) inline return_type __attribute__ ((always_inline)) -# define NEVER_INLINE(return_type) return_type __attribute__ ((noinline)) -# define RESTRICT __restrict -# define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ varname__[size__]; -#elif defined(COMPILER_MSVC) -# define ALWAYS_INLINE(return_type) __forceinline return_type -# define NEVER_INLINE(return_type) __declspec(noinline) return_type -# define RESTRICT __restrict -# define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ *varname__ = (type__*)_alloca(size__ * sizeof(type__)) -#endif - - -/* - vector support macros: the rest of the code is independant of - SSE/Altivec/NEON -- adding support for other platforms with 4-element - vectors should be limited to these macros -*/ - - -// define PFFFT_SIMD_DISABLE if you want to use scalar code instead of simd code -//#define PFFFT_SIMD_DISABLE - -/* - Altivec support macros -*/ -#if !defined(PFFFT_SIMD_DISABLE) && (defined(__ppc__) || defined(__ppc64__)) -typedef vector float v4sf; -# define SIMD_SZ 4 -# define VZERO() ((vector float) vec_splat_u8(0)) -# define VMUL(a,b) vec_madd(a,b, VZERO()) -# define VADD(a,b) vec_add(a,b) -# define VMADD(a,b,c) vec_madd(a,b,c) -# define VSUB(a,b) vec_sub(a,b) -inline v4sf ld_ps1(const float *p) { v4sf v=vec_lde(0,p); return vec_splat(vec_perm(v, v, vec_lvsl(0, p)), 0); } -# define LD_PS1(p) ld_ps1(&p) -# define INTERLEAVE2(in1, in2, out1, out2) { v4sf tmp__ = vec_mergeh(in1, in2); out2 = vec_mergel(in1, in2); out1 = tmp__; } -# define UNINTERLEAVE2(in1, in2, out1, out2) { \ - vector unsigned char vperm1 = (vector unsigned char)(0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); \ - vector unsigned char vperm2 = (vector unsigned char)(4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); \ - v4sf tmp__ = vec_perm(in1, in2, vperm1); out2 = vec_perm(in1, in2, vperm2); out1 = tmp__; \ - } -# define VTRANSPOSE4(x0,x1,x2,x3) { \ - v4sf y0 = vec_mergeh(x0, x2); \ - v4sf y1 = vec_mergel(x0, x2); \ - v4sf y2 = vec_mergeh(x1, x3); \ - v4sf y3 = vec_mergel(x1, x3); \ - x0 = vec_mergeh(y0, y2); \ - x1 = vec_mergel(y0, y2); \ - x2 = vec_mergeh(y1, y3); \ - x3 = vec_mergel(y1, y3); \ - } -# define VSWAPHL(a,b) vec_perm(a,b, (vector unsigned char)(16,17,18,19,20,21,22,23,8,9,10,11,12,13,14,15)) -# define VALIGNED(ptr) ((((long)(ptr)) & 0xF) == 0) - -/* - SSE1 support macros -*/ -#elif !defined(PFFFT_SIMD_DISABLE) && (defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(_M_IX86)) - -#include -typedef __m128 v4sf; -# define SIMD_SZ 4 // 4 floats by simd vector -- this is pretty much hardcoded in the preprocess/finalize functions anyway so you will have to work if you want to enable AVX with its 256-bit vectors. -# define VZERO() _mm_setzero_ps() -# define VMUL(a,b) _mm_mul_ps(a,b) -# define VADD(a,b) _mm_add_ps(a,b) -# define VMADD(a,b,c) _mm_add_ps(_mm_mul_ps(a,b), c) -# define VSUB(a,b) _mm_sub_ps(a,b) -# define LD_PS1(p) _mm_set1_ps(p) -# define INTERLEAVE2(in1, in2, out1, out2) { v4sf tmp__ = _mm_unpacklo_ps(in1, in2); out2 = _mm_unpackhi_ps(in1, in2); out1 = tmp__; } -# define UNINTERLEAVE2(in1, in2, out1, out2) { v4sf tmp__ = _mm_shuffle_ps(in1, in2, _MM_SHUFFLE(2,0,2,0)); out2 = _mm_shuffle_ps(in1, in2, _MM_SHUFFLE(3,1,3,1)); out1 = tmp__; } -# define VTRANSPOSE4(x0,x1,x2,x3) _MM_TRANSPOSE4_PS(x0,x1,x2,x3) -# define VSWAPHL(a,b) _mm_shuffle_ps(b, a, _MM_SHUFFLE(3,2,1,0)) -# define VALIGNED(ptr) ((((long)(ptr)) & 0xF) == 0) - -/* - ARM NEON support macros -*/ -#elif !defined(PFFFT_SIMD_DISABLE) && (defined(__arm__) || defined(__aarch64__) || defined(__arm64__)) -# include -typedef float32x4_t v4sf; -# define SIMD_SZ 4 -# define VZERO() vdupq_n_f32(0) -# define VMUL(a,b) vmulq_f32(a,b) -# define VADD(a,b) vaddq_f32(a,b) -# define VMADD(a,b,c) vmlaq_f32(c,a,b) -# define VSUB(a,b) vsubq_f32(a,b) -# define LD_PS1(p) vld1q_dup_f32(&(p)) -# define INTERLEAVE2(in1, in2, out1, out2) { float32x4x2_t tmp__ = vzipq_f32(in1,in2); out1=tmp__.val[0]; out2=tmp__.val[1]; } -# define UNINTERLEAVE2(in1, in2, out1, out2) { float32x4x2_t tmp__ = vuzpq_f32(in1,in2); out1=tmp__.val[0]; out2=tmp__.val[1]; } -# define VTRANSPOSE4(x0,x1,x2,x3) { \ - float32x4x2_t t0_ = vzipq_f32(x0, x2); \ - float32x4x2_t t1_ = vzipq_f32(x1, x3); \ - float32x4x2_t u0_ = vzipq_f32(t0_.val[0], t1_.val[0]); \ - float32x4x2_t u1_ = vzipq_f32(t0_.val[1], t1_.val[1]); \ - x0 = u0_.val[0]; x1 = u0_.val[1]; x2 = u1_.val[0]; x3 = u1_.val[1]; \ - } -// marginally faster version -//# define VTRANSPOSE4(x0,x1,x2,x3) { asm("vtrn.32 %q0, %q1;\n vtrn.32 %q2,%q3\n vswp %f0,%e2\n vswp %f1,%e3" : "+w"(x0), "+w"(x1), "+w"(x2), "+w"(x3)::); } -# define VSWAPHL(a,b) vcombine_f32(vget_low_f32(b), vget_high_f32(a)) -# define VALIGNED(ptr) ((((long)(ptr)) & 0x3) == 0) -#else -# if !defined(PFFFT_SIMD_DISABLE) -# warning "building with simd disabled !\n"; -# define PFFFT_SIMD_DISABLE // fallback to scalar code -# endif -#endif - -// fallback mode for situations where SSE/Altivec are not available, use scalar mode instead -#ifdef PFFFT_SIMD_DISABLE -typedef float v4sf; -# define SIMD_SZ 1 -# define VZERO() 0.f -# define VMUL(a,b) ((a)*(b)) -# define VADD(a,b) ((a)+(b)) -# define VMADD(a,b,c) ((a)*(b)+(c)) -# define VSUB(a,b) ((a)-(b)) -# define LD_PS1(p) (p) -# define VALIGNED(ptr) ((((long)(ptr)) & 0x3) == 0) -#endif - -// shortcuts for complex multiplcations -#define VCPLXMUL(ar,ai,br,bi) { v4sf tmp; tmp=VMUL(ar,bi); ar=VMUL(ar,br); ar=VSUB(ar,VMUL(ai,bi)); ai=VMUL(ai,br); ai=VADD(ai,tmp); } -#define VCPLXMULCONJ(ar,ai,br,bi) { v4sf tmp; tmp=VMUL(ar,bi); ar=VMUL(ar,br); ar=VADD(ar,VMUL(ai,bi)); ai=VMUL(ai,br); ai=VSUB(ai,tmp); } -#ifndef SVMUL -// multiply a scalar with a vector -#define SVMUL(f,v) VMUL(LD_PS1(f),v) -#endif - -#if !defined(PFFFT_SIMD_DISABLE) -typedef union v4sf_union { - v4sf v; - float f[4]; -} v4sf_union; - -#include - -#define assertv4(v,f0,f1,f2,f3) assert(v.f[0] == (f0) && v.f[1] == (f1) && v.f[2] == (f2) && v.f[3] == (f3)) - -/* detect bugs with the vector support macros */ -void validate_pffft_simd() { - float f[16] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 }; - v4sf_union a0, a1, a2, a3, t, u; - memcpy(a0.f, f, 4*sizeof(float)); - memcpy(a1.f, f+4, 4*sizeof(float)); - memcpy(a2.f, f+8, 4*sizeof(float)); - memcpy(a3.f, f+12, 4*sizeof(float)); - - t = a0; u = a1; t.v = VZERO(); - printf("VZERO=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 0, 0, 0, 0); - t.v = VADD(a1.v, a2.v); - printf("VADD(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 12, 14, 16, 18); - t.v = VMUL(a1.v, a2.v); - printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 45, 60, 77); - t.v = VMADD(a1.v, a2.v,a0.v); - printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 46, 62, 80); - - INTERLEAVE2(a1.v,a2.v,t.v,u.v); - printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]); - assertv4(t, 4, 8, 5, 9); assertv4(u, 6, 10, 7, 11); - UNINTERLEAVE2(a1.v,a2.v,t.v,u.v); - printf("UNINTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]); - assertv4(t, 4, 6, 8, 10); assertv4(u, 5, 7, 9, 11); - - t.v=LD_PS1(f[15]); - printf("LD_PS1(15)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); - assertv4(t, 15, 15, 15, 15); - t.v = VSWAPHL(a1.v, a2.v); - printf("VSWAPHL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); - assertv4(t, 8, 9, 6, 7); - VTRANSPOSE4(a0.v, a1.v, a2.v, a3.v); - printf("VTRANSPOSE4(0:3,4:7,8:11,12:15)=[%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", - a0.f[0], a0.f[1], a0.f[2], a0.f[3], a1.f[0], a1.f[1], a1.f[2], a1.f[3], - a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]); - assertv4(a0, 0, 4, 8, 12); assertv4(a1, 1, 5, 9, 13); assertv4(a2, 2, 6, 10, 14); assertv4(a3, 3, 7, 11, 15); -} -#endif //!PFFFT_SIMD_DISABLE - -/* SSE and co like 16-bytes aligned pointers */ -#define MALLOC_V4SF_ALIGNMENT 64 // with a 64-byte alignment, we are even aligned on L2 cache lines... -void *pffft_aligned_malloc(size_t nb_bytes) { - void *p, *p0 = malloc(nb_bytes + MALLOC_V4SF_ALIGNMENT); - if (!p0) return (void *) 0; - p = (void *) (((size_t) p0 + MALLOC_V4SF_ALIGNMENT) & (~((size_t) (MALLOC_V4SF_ALIGNMENT-1)))); - *((void **) p - 1) = p0; - return p; -} - -void pffft_aligned_free(void *p) { - if (p) free(*((void **) p - 1)); -} - -int pffft_simd_size() { return SIMD_SZ; } - -/* - passf2 and passb2 has been merged here, fsign = -1 for passf2, +1 for passb2 -*/ -static NEVER_INLINE(void) passf2_ps(int ido, int l1, const v4sf *cc, v4sf *ch, const float *wa1, float fsign) { - int k, i; - int l1ido = l1*ido; - if (ido <= 2) { - for (k=0; k < l1ido; k += ido, ch += ido, cc+= 2*ido) { - ch[0] = VADD(cc[0], cc[ido+0]); - ch[l1ido] = VSUB(cc[0], cc[ido+0]); - ch[1] = VADD(cc[1], cc[ido+1]); - ch[l1ido + 1] = VSUB(cc[1], cc[ido+1]); - } - } else { - for (k=0; k < l1ido; k += ido, ch += ido, cc += 2*ido) { - for (i=0; i 2); - for (k=0; k< l1ido; k += ido, cc+= 3*ido, ch +=ido) { - for (i=0; i 2); - for (k = 0; k < l1; ++k, cc += 5*ido, ch += ido) { - for (i = 0; i < ido-1; i += 2) { - ti5 = VSUB(cc_ref(i , 2), cc_ref(i , 5)); - ti2 = VADD(cc_ref(i , 2), cc_ref(i , 5)); - ti4 = VSUB(cc_ref(i , 3), cc_ref(i , 4)); - ti3 = VADD(cc_ref(i , 3), cc_ref(i , 4)); - tr5 = VSUB(cc_ref(i-1, 2), cc_ref(i-1, 5)); - tr2 = VADD(cc_ref(i-1, 2), cc_ref(i-1, 5)); - tr4 = VSUB(cc_ref(i-1, 3), cc_ref(i-1, 4)); - tr3 = VADD(cc_ref(i-1, 3), cc_ref(i-1, 4)); - ch_ref(i-1, 1) = VADD(cc_ref(i-1, 1), VADD(tr2, tr3)); - ch_ref(i , 1) = VADD(cc_ref(i , 1), VADD(ti2, ti3)); - cr2 = VADD(cc_ref(i-1, 1), VADD(SVMUL(tr11, tr2),SVMUL(tr12, tr3))); - ci2 = VADD(cc_ref(i , 1), VADD(SVMUL(tr11, ti2),SVMUL(tr12, ti3))); - cr3 = VADD(cc_ref(i-1, 1), VADD(SVMUL(tr12, tr2),SVMUL(tr11, tr3))); - ci3 = VADD(cc_ref(i , 1), VADD(SVMUL(tr12, ti2),SVMUL(tr11, ti3))); - cr5 = VADD(SVMUL(ti11, tr5), SVMUL(ti12, tr4)); - ci5 = VADD(SVMUL(ti11, ti5), SVMUL(ti12, ti4)); - cr4 = VSUB(SVMUL(ti12, tr5), SVMUL(ti11, tr4)); - ci4 = VSUB(SVMUL(ti12, ti5), SVMUL(ti11, ti4)); - dr3 = VSUB(cr3, ci4); - dr4 = VADD(cr3, ci4); - di3 = VADD(ci3, cr4); - di4 = VSUB(ci3, cr4); - dr5 = VADD(cr2, ci5); - dr2 = VSUB(cr2, ci5); - di5 = VSUB(ci2, cr5); - di2 = VADD(ci2, cr5); - wr1=wa1[i], wi1=fsign*wa1[i+1], wr2=wa2[i], wi2=fsign*wa2[i+1]; - wr3=wa3[i], wi3=fsign*wa3[i+1], wr4=wa4[i], wi4=fsign*wa4[i+1]; - VCPLXMUL(dr2, di2, LD_PS1(wr1), LD_PS1(wi1)); - ch_ref(i - 1, 2) = dr2; - ch_ref(i, 2) = di2; - VCPLXMUL(dr3, di3, LD_PS1(wr2), LD_PS1(wi2)); - ch_ref(i - 1, 3) = dr3; - ch_ref(i, 3) = di3; - VCPLXMUL(dr4, di4, LD_PS1(wr3), LD_PS1(wi3)); - ch_ref(i - 1, 4) = dr4; - ch_ref(i, 4) = di4; - VCPLXMUL(dr5, di5, LD_PS1(wr4), LD_PS1(wi4)); - ch_ref(i - 1, 5) = dr5; - ch_ref(i, 5) = di5; - } - } -#undef ch_ref -#undef cc_ref -} - -static NEVER_INLINE(void) radf2_ps(int ido, int l1, const v4sf * RESTRICT cc, v4sf * RESTRICT ch, const float *wa1) { - static const float minus_one = -1.f; - int i, k, l1ido = l1*ido; - for (k=0; k < l1ido; k += ido) { - v4sf a = cc[k], b = cc[k + l1ido]; - ch[2*k] = VADD(a, b); - ch[2*(k+ido)-1] = VSUB(a, b); - } - if (ido < 2) return; - if (ido != 2) { - for (k=0; k < l1ido; k += ido) { - for (i=2; i 5) { - wa[i1-1] = wa[i-1]; - wa[i1] = wa[i]; - } - } - l1 = l2; - } -} /* cffti1 */ - - -v4sf *cfftf1_ps(int n, const v4sf *input_readonly, v4sf *work1, v4sf *work2, const float *wa, const int *ifac, int isign) { - v4sf *in = (v4sf*)input_readonly; - v4sf *out = (in == work2 ? work1 : work2); - int nf = ifac[1], k1; - int l1 = 1; - int iw = 0; - assert(in != out && work1 != work2); - for (k1=2; k1<=nf+1; k1++) { - int ip = ifac[k1]; - int l2 = ip*l1; - int ido = n / l2; - int idot = ido + ido; - switch (ip) { - case 5: { - int ix2 = iw + idot; - int ix3 = ix2 + idot; - int ix4 = ix3 + idot; - passf5_ps(idot, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); - } break; - case 4: { - int ix2 = iw + idot; - int ix3 = ix2 + idot; - passf4_ps(idot, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3], isign); - } break; - case 2: { - passf2_ps(idot, l1, in, out, &wa[iw], isign); - } break; - case 3: { - int ix2 = iw + idot; - passf3_ps(idot, l1, in, out, &wa[iw], &wa[ix2], isign); - } break; - default: - assert(0); - } - l1 = l2; - iw += (ip - 1)*idot; - if (out == work2) { - out = work1; in = work2; - } else { - out = work2; in = work1; - } - } - - return in; /* this is in fact the output .. */ -} - - -struct PFFFT_Setup { - int N; - int Ncvec; // nb of complex simd vectors (N/4 if PFFFT_COMPLEX, N/8 if PFFFT_REAL) - int ifac[15]; - pffft_transform_t transform; - v4sf *data; // allocated room for twiddle coefs - float *e; // points into 'data' , N/4*3 elements - float *twiddle; // points into 'data', N/4 elements -}; - -PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform) { - PFFFT_Setup *s = (PFFFT_Setup*)malloc(sizeof(PFFFT_Setup)); - int k, m; - /* unfortunately, the fft size must be a multiple of 16 for complex FFTs - and 32 for real FFTs -- a lot of stuff would need to be rewritten to - handle other cases (or maybe just switch to a scalar fft, I don't know..) */ - if (transform == PFFFT_REAL) { assert((N%(2*SIMD_SZ*SIMD_SZ))==0 && N>0); } - if (transform == PFFFT_COMPLEX) { assert((N%(SIMD_SZ*SIMD_SZ))==0 && N>0); } - //assert((N % 32) == 0); - s->N = N; - s->transform = transform; - /* nb of complex simd vectors */ - s->Ncvec = (transform == PFFFT_REAL ? N/2 : N)/SIMD_SZ; - s->data = (v4sf*)pffft_aligned_malloc(2*s->Ncvec * sizeof(v4sf)); - s->e = (float*)s->data; - s->twiddle = (float*)(s->data + (2*s->Ncvec*(SIMD_SZ-1))/SIMD_SZ); - - if (transform == PFFFT_REAL) { - for (k=0; k < s->Ncvec; ++k) { - int i = k/SIMD_SZ; - int j = k%SIMD_SZ; - for (m=0; m < SIMD_SZ-1; ++m) { - float A = -2*M_PI*(m+1)*k / N; - s->e[(2*(i*3 + m) + 0) * SIMD_SZ + j] = cos(A); - s->e[(2*(i*3 + m) + 1) * SIMD_SZ + j] = sin(A); - } - } - rffti1_ps(N/SIMD_SZ, s->twiddle, s->ifac); - } else { - for (k=0; k < s->Ncvec; ++k) { - int i = k/SIMD_SZ; - int j = k%SIMD_SZ; - for (m=0; m < SIMD_SZ-1; ++m) { - float A = -2*M_PI*(m+1)*k / N; - s->e[(2*(i*3 + m) + 0)*SIMD_SZ + j] = cos(A); - s->e[(2*(i*3 + m) + 1)*SIMD_SZ + j] = sin(A); - } - } - cffti1_ps(N/SIMD_SZ, s->twiddle, s->ifac); - } - - /* check that N is decomposable with allowed prime factors */ - for (k=0, m=1; k < s->ifac[1]; ++k) { m *= s->ifac[2+k]; } - if (m != N/SIMD_SZ) { - pffft_destroy_setup(s); s = 0; - } - - return s; -} - - -void pffft_destroy_setup(PFFFT_Setup *s) { - pffft_aligned_free(s->data); - free(s); -} - -#if !defined(PFFFT_SIMD_DISABLE) - -/* [0 0 1 2 3 4 5 6 7 8] -> [0 8 7 6 5 4 3 2 1] */ -static void reversed_copy(int N, const v4sf *in, int in_stride, v4sf *out) { - v4sf g0, g1; - int k; - INTERLEAVE2(in[0], in[1], g0, g1); in += in_stride; - - *--out = VSWAPHL(g0, g1); // [g0l, g0h], [g1l g1h] -> [g1l, g0h] - for (k=1; k < N; ++k) { - v4sf h0, h1; - INTERLEAVE2(in[0], in[1], h0, h1); in += in_stride; - *--out = VSWAPHL(g1, h0); - *--out = VSWAPHL(h0, h1); - g1 = h1; - } - *--out = VSWAPHL(g1, g0); -} - -static void unreversed_copy(int N, const v4sf *in, v4sf *out, int out_stride) { - v4sf g0, g1, h0, h1; - int k; - g0 = g1 = in[0]; ++in; - for (k=1; k < N; ++k) { - h0 = *in++; h1 = *in++; - g1 = VSWAPHL(g1, h0); - h0 = VSWAPHL(h0, h1); - UNINTERLEAVE2(h0, g1, out[0], out[1]); out += out_stride; - g1 = h1; - } - h0 = *in++; h1 = g0; - g1 = VSWAPHL(g1, h0); - h0 = VSWAPHL(h0, h1); - UNINTERLEAVE2(h0, g1, out[0], out[1]); -} - -void pffft_zreorder(PFFFT_Setup *setup, const float *in, float *out, pffft_direction_t direction) { - int k, N = setup->N, Ncvec = setup->Ncvec; - const v4sf *vin = (const v4sf*)in; - v4sf *vout = (v4sf*)out; - assert(in != out); - if (setup->transform == PFFFT_REAL) { - int k, dk = N/32; - if (direction == PFFFT_FORWARD) { - for (k=0; k < dk; ++k) { - INTERLEAVE2(vin[k*8 + 0], vin[k*8 + 1], vout[2*(0*dk + k) + 0], vout[2*(0*dk + k) + 1]); - INTERLEAVE2(vin[k*8 + 4], vin[k*8 + 5], vout[2*(2*dk + k) + 0], vout[2*(2*dk + k) + 1]); - } - reversed_copy(dk, vin+2, 8, (v4sf*)(out + N/2)); - reversed_copy(dk, vin+6, 8, (v4sf*)(out + N)); - } else { - for (k=0; k < dk; ++k) { - UNINTERLEAVE2(vin[2*(0*dk + k) + 0], vin[2*(0*dk + k) + 1], vout[k*8 + 0], vout[k*8 + 1]); - UNINTERLEAVE2(vin[2*(2*dk + k) + 0], vin[2*(2*dk + k) + 1], vout[k*8 + 4], vout[k*8 + 5]); - } - unreversed_copy(dk, (v4sf*)(in + N/4), (v4sf*)(out + N - 6*SIMD_SZ), -8); - unreversed_copy(dk, (v4sf*)(in + 3*N/4), (v4sf*)(out + N - 2*SIMD_SZ), -8); - } - } else { - if (direction == PFFFT_FORWARD) { - for (k=0; k < Ncvec; ++k) { - int kk = (k/4) + (k%4)*(Ncvec/4); - INTERLEAVE2(vin[k*2], vin[k*2+1], vout[kk*2], vout[kk*2+1]); - } - } else { - for (k=0; k < Ncvec; ++k) { - int kk = (k/4) + (k%4)*(Ncvec/4); - UNINTERLEAVE2(vin[kk*2], vin[kk*2+1], vout[k*2], vout[k*2+1]); - } - } - } -} - -void pffft_cplx_finalize(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) { - int k, dk = Ncvec/SIMD_SZ; // number of 4x4 matrix blocks - v4sf r0, i0, r1, i1, r2, i2, r3, i3; - v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1; - assert(in != out); - for (k=0; k < dk; ++k) { - r0 = in[8*k+0]; i0 = in[8*k+1]; - r1 = in[8*k+2]; i1 = in[8*k+3]; - r2 = in[8*k+4]; i2 = in[8*k+5]; - r3 = in[8*k+6]; i3 = in[8*k+7]; - VTRANSPOSE4(r0,r1,r2,r3); - VTRANSPOSE4(i0,i1,i2,i3); - VCPLXMUL(r1,i1,e[k*6+0],e[k*6+1]); - VCPLXMUL(r2,i2,e[k*6+2],e[k*6+3]); - VCPLXMUL(r3,i3,e[k*6+4],e[k*6+5]); - - sr0 = VADD(r0,r2); dr0 = VSUB(r0, r2); - sr1 = VADD(r1,r3); dr1 = VSUB(r1, r3); - si0 = VADD(i0,i2); di0 = VSUB(i0, i2); - si1 = VADD(i1,i3); di1 = VSUB(i1, i3); - - /* - transformation for each column is: - - [1 1 1 1 0 0 0 0] [r0] - [1 0 -1 0 0 -1 0 1] [r1] - [1 -1 1 -1 0 0 0 0] [r2] - [1 0 -1 0 0 1 0 -1] [r3] - [0 0 0 0 1 1 1 1] * [i0] - [0 1 0 -1 1 0 -1 0] [i1] - [0 0 0 0 1 -1 1 -1] [i2] - [0 -1 0 1 1 0 -1 0] [i3] - */ - - r0 = VADD(sr0, sr1); i0 = VADD(si0, si1); - r1 = VADD(dr0, di1); i1 = VSUB(di0, dr1); - r2 = VSUB(sr0, sr1); i2 = VSUB(si0, si1); - r3 = VSUB(dr0, di1); i3 = VADD(di0, dr1); - - *out++ = r0; *out++ = i0; *out++ = r1; *out++ = i1; - *out++ = r2; *out++ = i2; *out++ = r3; *out++ = i3; - } -} - -void pffft_cplx_preprocess(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) { - int k, dk = Ncvec/SIMD_SZ; // number of 4x4 matrix blocks - v4sf r0, i0, r1, i1, r2, i2, r3, i3; - v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1; - assert(in != out); - for (k=0; k < dk; ++k) { - r0 = in[8*k+0]; i0 = in[8*k+1]; - r1 = in[8*k+2]; i1 = in[8*k+3]; - r2 = in[8*k+4]; i2 = in[8*k+5]; - r3 = in[8*k+6]; i3 = in[8*k+7]; - - sr0 = VADD(r0,r2); dr0 = VSUB(r0, r2); - sr1 = VADD(r1,r3); dr1 = VSUB(r1, r3); - si0 = VADD(i0,i2); di0 = VSUB(i0, i2); - si1 = VADD(i1,i3); di1 = VSUB(i1, i3); - - r0 = VADD(sr0, sr1); i0 = VADD(si0, si1); - r1 = VSUB(dr0, di1); i1 = VADD(di0, dr1); - r2 = VSUB(sr0, sr1); i2 = VSUB(si0, si1); - r3 = VADD(dr0, di1); i3 = VSUB(di0, dr1); - - VCPLXMULCONJ(r1,i1,e[k*6+0],e[k*6+1]); - VCPLXMULCONJ(r2,i2,e[k*6+2],e[k*6+3]); - VCPLXMULCONJ(r3,i3,e[k*6+4],e[k*6+5]); - - VTRANSPOSE4(r0,r1,r2,r3); - VTRANSPOSE4(i0,i1,i2,i3); - - *out++ = r0; *out++ = i0; *out++ = r1; *out++ = i1; - *out++ = r2; *out++ = i2; *out++ = r3; *out++ = i3; - } -} - - -static ALWAYS_INLINE(void) pffft_real_finalize_4x4(const v4sf *in0, const v4sf *in1, const v4sf *in, - const v4sf *e, v4sf *out) { - v4sf r0, i0, r1, i1, r2, i2, r3, i3; - v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1; - r0 = *in0; i0 = *in1; - r1 = *in++; i1 = *in++; r2 = *in++; i2 = *in++; r3 = *in++; i3 = *in++; - VTRANSPOSE4(r0,r1,r2,r3); - VTRANSPOSE4(i0,i1,i2,i3); - - /* - transformation for each column is: - - [1 1 1 1 0 0 0 0] [r0] - [1 0 -1 0 0 -1 0 1] [r1] - [1 0 -1 0 0 1 0 -1] [r2] - [1 -1 1 -1 0 0 0 0] [r3] - [0 0 0 0 1 1 1 1] * [i0] - [0 -1 0 1 -1 0 1 0] [i1] - [0 -1 0 1 1 0 -1 0] [i2] - [0 0 0 0 -1 1 -1 1] [i3] - */ - - //cerr << "matrix initial, before e , REAL:\n 1: " << r0 << "\n 1: " << r1 << "\n 1: " << r2 << "\n 1: " << r3 << "\n"; - //cerr << "matrix initial, before e, IMAG :\n 1: " << i0 << "\n 1: " << i1 << "\n 1: " << i2 << "\n 1: " << i3 << "\n"; - - VCPLXMUL(r1,i1,e[0],e[1]); - VCPLXMUL(r2,i2,e[2],e[3]); - VCPLXMUL(r3,i3,e[4],e[5]); - - //cerr << "matrix initial, real part:\n 1: " << r0 << "\n 1: " << r1 << "\n 1: " << r2 << "\n 1: " << r3 << "\n"; - //cerr << "matrix initial, imag part:\n 1: " << i0 << "\n 1: " << i1 << "\n 1: " << i2 << "\n 1: " << i3 << "\n"; - - sr0 = VADD(r0,r2); dr0 = VSUB(r0,r2); - sr1 = VADD(r1,r3); dr1 = VSUB(r3,r1); - si0 = VADD(i0,i2); di0 = VSUB(i0,i2); - si1 = VADD(i1,i3); di1 = VSUB(i3,i1); - - r0 = VADD(sr0, sr1); - r3 = VSUB(sr0, sr1); - i0 = VADD(si0, si1); - i3 = VSUB(si1, si0); - r1 = VADD(dr0, di1); - r2 = VSUB(dr0, di1); - i1 = VSUB(dr1, di0); - i2 = VADD(dr1, di0); - - *out++ = r0; - *out++ = i0; - *out++ = r1; - *out++ = i1; - *out++ = r2; - *out++ = i2; - *out++ = r3; - *out++ = i3; - -} - -static NEVER_INLINE(void) pffft_real_finalize(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) { - int k, dk = Ncvec/SIMD_SZ; // number of 4x4 matrix blocks - /* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */ - - v4sf_union cr, ci, *uout = (v4sf_union*)out; - v4sf save = in[7], zero=VZERO(); - float xr0, xi0, xr1, xi1, xr2, xi2, xr3, xi3; - static const float s = M_SQRT2/2; - - cr.v = in[0]; ci.v = in[Ncvec*2-1]; - assert(in != out); - pffft_real_finalize_4x4(&zero, &zero, in+1, e, out); - - /* - [cr0 cr1 cr2 cr3 ci0 ci1 ci2 ci3] - - [Xr(1)] ] [1 1 1 1 0 0 0 0] - [Xr(N/4) ] [0 0 0 0 1 s 0 -s] - [Xr(N/2) ] [1 0 -1 0 0 0 0 0] - [Xr(3N/4)] [0 0 0 0 1 -s 0 s] - [Xi(1) ] [1 -1 1 -1 0 0 0 0] - [Xi(N/4) ] [0 0 0 0 0 -s -1 -s] - [Xi(N/2) ] [0 -1 0 1 0 0 0 0] - [Xi(3N/4)] [0 0 0 0 0 -s 1 -s] - */ - - xr0=(cr.f[0]+cr.f[2]) + (cr.f[1]+cr.f[3]); uout[0].f[0] = xr0; - xi0=(cr.f[0]+cr.f[2]) - (cr.f[1]+cr.f[3]); uout[1].f[0] = xi0; - xr2=(cr.f[0]-cr.f[2]); uout[4].f[0] = xr2; - xi2=(cr.f[3]-cr.f[1]); uout[5].f[0] = xi2; - xr1= ci.f[0] + s*(ci.f[1]-ci.f[3]); uout[2].f[0] = xr1; - xi1=-ci.f[2] - s*(ci.f[1]+ci.f[3]); uout[3].f[0] = xi1; - xr3= ci.f[0] - s*(ci.f[1]-ci.f[3]); uout[6].f[0] = xr3; - xi3= ci.f[2] - s*(ci.f[1]+ci.f[3]); uout[7].f[0] = xi3; - - for (k=1; k < dk; ++k) { - v4sf save_next = in[8*k+7]; - pffft_real_finalize_4x4(&save, &in[8*k+0], in + 8*k+1, - e + k*6, out + k*8); - save = save_next; - } - -} - -static ALWAYS_INLINE(void) pffft_real_preprocess_4x4(const v4sf *in, - const v4sf *e, v4sf *out, int first) { - v4sf r0=in[0], i0=in[1], r1=in[2], i1=in[3], r2=in[4], i2=in[5], r3=in[6], i3=in[7]; - /* - transformation for each column is: - - [1 1 1 1 0 0 0 0] [r0] - [1 0 0 -1 0 -1 -1 0] [r1] - [1 -1 -1 1 0 0 0 0] [r2] - [1 0 0 -1 0 1 1 0] [r3] - [0 0 0 0 1 -1 1 -1] * [i0] - [0 -1 1 0 1 0 0 1] [i1] - [0 0 0 0 1 1 -1 -1] [i2] - [0 1 -1 0 1 0 0 1] [i3] - */ - - v4sf sr0 = VADD(r0,r3), dr0 = VSUB(r0,r3); - v4sf sr1 = VADD(r1,r2), dr1 = VSUB(r1,r2); - v4sf si0 = VADD(i0,i3), di0 = VSUB(i0,i3); - v4sf si1 = VADD(i1,i2), di1 = VSUB(i1,i2); - - r0 = VADD(sr0, sr1); - r2 = VSUB(sr0, sr1); - r1 = VSUB(dr0, si1); - r3 = VADD(dr0, si1); - i0 = VSUB(di0, di1); - i2 = VADD(di0, di1); - i1 = VSUB(si0, dr1); - i3 = VADD(si0, dr1); - - VCPLXMULCONJ(r1,i1,e[0],e[1]); - VCPLXMULCONJ(r2,i2,e[2],e[3]); - VCPLXMULCONJ(r3,i3,e[4],e[5]); - - VTRANSPOSE4(r0,r1,r2,r3); - VTRANSPOSE4(i0,i1,i2,i3); - - if (!first) { - *out++ = r0; - *out++ = i0; - } - *out++ = r1; - *out++ = i1; - *out++ = r2; - *out++ = i2; - *out++ = r3; - *out++ = i3; -} - -static NEVER_INLINE(void) pffft_real_preprocess(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) { - int k, dk = Ncvec/SIMD_SZ; // number of 4x4 matrix blocks - /* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */ - - v4sf_union Xr, Xi, *uout = (v4sf_union*)out; - float cr0, ci0, cr1, ci1, cr2, ci2, cr3, ci3; - static const float s = M_SQRT2; - assert(in != out); - for (k=0; k < 4; ++k) { - Xr.f[k] = ((float*)in)[8*k]; - Xi.f[k] = ((float*)in)[8*k+4]; - } - - pffft_real_preprocess_4x4(in, e, out+1, 1); // will write only 6 values - - /* - [Xr0 Xr1 Xr2 Xr3 Xi0 Xi1 Xi2 Xi3] - - [cr0] [1 0 2 0 1 0 0 0] - [cr1] [1 0 0 0 -1 0 -2 0] - [cr2] [1 0 -2 0 1 0 0 0] - [cr3] [1 0 0 0 -1 0 2 0] - [ci0] [0 2 0 2 0 0 0 0] - [ci1] [0 s 0 -s 0 -s 0 -s] - [ci2] [0 0 0 0 0 -2 0 2] - [ci3] [0 -s 0 s 0 -s 0 -s] - */ - for (k=1; k < dk; ++k) { - pffft_real_preprocess_4x4(in+8*k, e + k*6, out-1+k*8, 0); - } - - cr0=(Xr.f[0]+Xi.f[0]) + 2*Xr.f[2]; uout[0].f[0] = cr0; - cr1=(Xr.f[0]-Xi.f[0]) - 2*Xi.f[2]; uout[0].f[1] = cr1; - cr2=(Xr.f[0]+Xi.f[0]) - 2*Xr.f[2]; uout[0].f[2] = cr2; - cr3=(Xr.f[0]-Xi.f[0]) + 2*Xi.f[2]; uout[0].f[3] = cr3; - ci0= 2*(Xr.f[1]+Xr.f[3]); uout[2*Ncvec-1].f[0] = ci0; - ci1= s*(Xr.f[1]-Xr.f[3]) - s*(Xi.f[1]+Xi.f[3]); uout[2*Ncvec-1].f[1] = ci1; - ci2= 2*(Xi.f[3]-Xi.f[1]); uout[2*Ncvec-1].f[2] = ci2; - ci3=-s*(Xr.f[1]-Xr.f[3]) - s*(Xi.f[1]+Xi.f[3]); uout[2*Ncvec-1].f[3] = ci3; -} - - -void pffft_transform_internal(PFFFT_Setup *setup, const float *finput, float *foutput, v4sf *scratch, - pffft_direction_t direction, int ordered) { - int k, Ncvec = setup->Ncvec; - int nf_odd = (setup->ifac[1] & 1); - - // temporary buffer is allocated on the stack if the scratch pointer is NULL - int stack_allocate = (scratch == 0 ? Ncvec*2 : 1); - VLA_ARRAY_ON_STACK(v4sf, scratch_on_stack, stack_allocate); - - const v4sf *vinput = (const v4sf*)finput; - v4sf *voutput = (v4sf*)foutput; - v4sf *buff[2] = { voutput, scratch ? scratch : scratch_on_stack }; - int ib = (nf_odd ^ ordered ? 1 : 0); - - assert(VALIGNED(finput) && VALIGNED(foutput)); - - //assert(finput != foutput); - if (direction == PFFFT_FORWARD) { - ib = !ib; - if (setup->transform == PFFFT_REAL) { - ib = (rfftf1_ps(Ncvec*2, vinput, buff[ib], buff[!ib], - setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1); - pffft_real_finalize(Ncvec, buff[ib], buff[!ib], (v4sf*)setup->e); - } else { - v4sf *tmp = buff[ib]; - for (k=0; k < Ncvec; ++k) { - UNINTERLEAVE2(vinput[k*2], vinput[k*2+1], tmp[k*2], tmp[k*2+1]); - } - ib = (cfftf1_ps(Ncvec, buff[ib], buff[!ib], buff[ib], - setup->twiddle, &setup->ifac[0], -1) == buff[0] ? 0 : 1); - pffft_cplx_finalize(Ncvec, buff[ib], buff[!ib], (v4sf*)setup->e); - } - if (ordered) { - pffft_zreorder(setup, (float*)buff[!ib], (float*)buff[ib], PFFFT_FORWARD); - } else ib = !ib; - } else { - if (vinput == buff[ib]) { - ib = !ib; // may happen when finput == foutput - } - if (ordered) { - pffft_zreorder(setup, (float*)vinput, (float*)buff[ib], PFFFT_BACKWARD); - vinput = buff[ib]; ib = !ib; - } - if (setup->transform == PFFFT_REAL) { - pffft_real_preprocess(Ncvec, vinput, buff[ib], (v4sf*)setup->e); - ib = (rfftb1_ps(Ncvec*2, buff[ib], buff[0], buff[1], - setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1); - } else { - pffft_cplx_preprocess(Ncvec, vinput, buff[ib], (v4sf*)setup->e); - ib = (cfftf1_ps(Ncvec, buff[ib], buff[0], buff[1], - setup->twiddle, &setup->ifac[0], +1) == buff[0] ? 0 : 1); - for (k=0; k < Ncvec; ++k) { - INTERLEAVE2(buff[ib][k*2], buff[ib][k*2+1], buff[ib][k*2], buff[ib][k*2+1]); - } - } - } - - if (buff[ib] != voutput) { - /* extra copy required -- this situation should only happen when finput == foutput */ - assert(finput==foutput); - for (k=0; k < Ncvec; ++k) { - v4sf a = buff[ib][2*k], b = buff[ib][2*k+1]; - voutput[2*k] = a; voutput[2*k+1] = b; - } - ib = !ib; - } - assert(buff[ib] == voutput); -} - -void pffft_zconvolve_accumulate(PFFFT_Setup *s, const float *a, const float *b, float *ab, float scaling) { - int Ncvec = s->Ncvec; - const v4sf * RESTRICT va = (const v4sf*)a; - const v4sf * RESTRICT vb = (const v4sf*)b; - v4sf * RESTRICT vab = (v4sf*)ab; - -#ifdef __arm__ - __builtin_prefetch(va); - __builtin_prefetch(vb); - __builtin_prefetch(vab); - __builtin_prefetch(va+2); - __builtin_prefetch(vb+2); - __builtin_prefetch(vab+2); - __builtin_prefetch(va+4); - __builtin_prefetch(vb+4); - __builtin_prefetch(vab+4); - __builtin_prefetch(va+6); - __builtin_prefetch(vb+6); - __builtin_prefetch(vab+6); -# ifndef __clang__ -# define ZCONVOLVE_USING_INLINE_NEON_ASM -# endif -#endif - - float ar, ai, br, bi, abr, abi; -#ifndef ZCONVOLVE_USING_INLINE_ASM - v4sf vscal = LD_PS1(scaling); - int i; -#endif - - assert(VALIGNED(a) && VALIGNED(b) && VALIGNED(ab)); - ar = ((v4sf_union*)va)[0].f[0]; - ai = ((v4sf_union*)va)[1].f[0]; - br = ((v4sf_union*)vb)[0].f[0]; - bi = ((v4sf_union*)vb)[1].f[0]; - abr = ((v4sf_union*)vab)[0].f[0]; - abi = ((v4sf_union*)vab)[1].f[0]; - -#ifdef ZCONVOLVE_USING_INLINE_ASM // inline asm version, unfortunately miscompiled by clang 3.2, at least on ubuntu.. so this will be restricted to gcc - const float *a_ = a, *b_ = b; float *ab_ = ab; - int N = Ncvec; - asm volatile("mov r8, %2 \n" - "vdup.f32 q15, %4 \n" - "1: \n" - "pld [%0,#64] \n" - "pld [%1,#64] \n" - "pld [%2,#64] \n" - "pld [%0,#96] \n" - "pld [%1,#96] \n" - "pld [%2,#96] \n" - "vld1.f32 {q0,q1}, [%0,:128]! \n" - "vld1.f32 {q4,q5}, [%1,:128]! \n" - "vld1.f32 {q2,q3}, [%0,:128]! \n" - "vld1.f32 {q6,q7}, [%1,:128]! \n" - "vld1.f32 {q8,q9}, [r8,:128]! \n" - - "vmul.f32 q10, q0, q4 \n" - "vmul.f32 q11, q0, q5 \n" - "vmul.f32 q12, q2, q6 \n" - "vmul.f32 q13, q2, q7 \n" - "vmls.f32 q10, q1, q5 \n" - "vmla.f32 q11, q1, q4 \n" - "vld1.f32 {q0,q1}, [r8,:128]! \n" - "vmls.f32 q12, q3, q7 \n" - "vmla.f32 q13, q3, q6 \n" - "vmla.f32 q8, q10, q15 \n" - "vmla.f32 q9, q11, q15 \n" - "vmla.f32 q0, q12, q15 \n" - "vmla.f32 q1, q13, q15 \n" - "vst1.f32 {q8,q9},[%2,:128]! \n" - "vst1.f32 {q0,q1},[%2,:128]! \n" - "subs %3, #2 \n" - "bne 1b \n" - : "+r"(a_), "+r"(b_), "+r"(ab_), "+r"(N) : "r"(scaling) : "r8", "q0","q1","q2","q3","q4","q5","q6","q7","q8","q9", "q10","q11","q12","q13","q15","memory"); -#else // default routine, works fine for non-arm cpus with current compilers - for (i=0; i < Ncvec; i += 2) { - v4sf ar, ai, br, bi; - ar = va[2*i+0]; ai = va[2*i+1]; - br = vb[2*i+0]; bi = vb[2*i+1]; - VCPLXMUL(ar, ai, br, bi); - vab[2*i+0] = VMADD(ar, vscal, vab[2*i+0]); - vab[2*i+1] = VMADD(ai, vscal, vab[2*i+1]); - ar = va[2*i+2]; ai = va[2*i+3]; - br = vb[2*i+2]; bi = vb[2*i+3]; - VCPLXMUL(ar, ai, br, bi); - vab[2*i+2] = VMADD(ar, vscal, vab[2*i+2]); - vab[2*i+3] = VMADD(ai, vscal, vab[2*i+3]); - } -#endif - if (s->transform == PFFFT_REAL) { - ((v4sf_union*)vab)[0].f[0] = abr + ar*br*scaling; - ((v4sf_union*)vab)[1].f[0] = abi + ai*bi*scaling; - } -} - - -#else // defined(PFFFT_SIMD_DISABLE) - -// standard routine using scalar floats, without SIMD stuff. - -#define pffft_zreorder_nosimd pffft_zreorder -void pffft_zreorder_nosimd(PFFFT_Setup *setup, const float *in, float *out, pffft_direction_t direction) { - int k, N = setup->N; - if (setup->transform == PFFFT_COMPLEX) { - for (k=0; k < 2*N; ++k) out[k] = in[k]; - return; - } - else if (direction == PFFFT_FORWARD) { - float x_N = in[N-1]; - for (k=N-1; k > 1; --k) out[k] = in[k-1]; - out[0] = in[0]; - out[1] = x_N; - } else { - float x_N = in[1]; - for (k=1; k < N-1; ++k) out[k] = in[k+1]; - out[0] = in[0]; - out[N-1] = x_N; - } -} - -#define pffft_transform_internal_nosimd pffft_transform_internal -void pffft_transform_internal_nosimd(PFFFT_Setup *setup, const float *input, float *output, float *scratch, - pffft_direction_t direction, int ordered) { - int Ncvec = setup->Ncvec; - int nf_odd = (setup->ifac[1] & 1); - - // temporary buffer is allocated on the stack if the scratch pointer is NULL - int stack_allocate = (scratch == 0 ? Ncvec*2 : 1); - VLA_ARRAY_ON_STACK(v4sf, scratch_on_stack, stack_allocate); - float *buff[2]; - int ib; - if (scratch == 0) scratch = scratch_on_stack; - buff[0] = output; buff[1] = scratch; - - if (setup->transform == PFFFT_COMPLEX) ordered = 0; // it is always ordered. - ib = (nf_odd ^ ordered ? 1 : 0); - - if (direction == PFFFT_FORWARD) { - if (setup->transform == PFFFT_REAL) { - ib = (rfftf1_ps(Ncvec*2, input, buff[ib], buff[!ib], - setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1); - } else { - ib = (cfftf1_ps(Ncvec, input, buff[ib], buff[!ib], - setup->twiddle, &setup->ifac[0], -1) == buff[0] ? 0 : 1); - } - if (ordered) { - pffft_zreorder(setup, buff[ib], buff[!ib], PFFFT_FORWARD); ib = !ib; - } - } else { - if (input == buff[ib]) { - ib = !ib; // may happen when finput == foutput - } - if (ordered) { - pffft_zreorder(setup, input, buff[!ib], PFFFT_BACKWARD); - input = buff[!ib]; - } - if (setup->transform == PFFFT_REAL) { - ib = (rfftb1_ps(Ncvec*2, input, buff[ib], buff[!ib], - setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1); - } else { - ib = (cfftf1_ps(Ncvec, input, buff[ib], buff[!ib], - setup->twiddle, &setup->ifac[0], +1) == buff[0] ? 0 : 1); - } - } - if (buff[ib] != output) { - int k; - // extra copy required -- this situation should happens only when finput == foutput - assert(input==output); - for (k=0; k < Ncvec; ++k) { - float a = buff[ib][2*k], b = buff[ib][2*k+1]; - output[2*k] = a; output[2*k+1] = b; - } - ib = !ib; - } - assert(buff[ib] == output); -} - -#define pffft_zconvolve_accumulate_nosimd pffft_zconvolve_accumulate -void pffft_zconvolve_accumulate_nosimd(PFFFT_Setup *s, const float *a, const float *b, - float *ab, float scaling) { - int i, Ncvec = s->Ncvec; - - if (s->transform == PFFFT_REAL) { - // take care of the fftpack ordering - ab[0] += a[0]*b[0]*scaling; - ab[2*Ncvec-1] += a[2*Ncvec-1]*b[2*Ncvec-1]*scaling; - ++ab; ++a; ++b; --Ncvec; - } - for (i=0; i < Ncvec; ++i) { - float ar, ai, br, bi; - ar = a[2*i+0]; ai = a[2*i+1]; - br = b[2*i+0]; bi = b[2*i+1]; - VCPLXMUL(ar, ai, br, bi); - ab[2*i+0] += ar*scaling; - ab[2*i+1] += ai*scaling; - } -} - -#endif // defined(PFFFT_SIMD_DISABLE) - -void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction) { - pffft_transform_internal(setup, input, output, (v4sf*)work, direction, 0); -} - -void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction) { - pffft_transform_internal(setup, input, output, (v4sf*)work, direction, 1); -} diff --git a/oss-internship-2020/pffft/pffft.h b/oss-internship-2020/pffft/pffft.h deleted file mode 100644 index 2bfa7b3..0000000 --- a/oss-internship-2020/pffft/pffft.h +++ /dev/null @@ -1,177 +0,0 @@ -/* Copyright (c) 2013 Julien Pommier ( pommier@modartt.com ) - - Based on original fortran 77 code from FFTPACKv4 from NETLIB, - authored by Dr Paul Swarztrauber of NCAR, in 1985. - - As confirmed by the NCAR fftpack software curators, the following - FFTPACKv5 license applies to FFTPACKv4 sources. My changes are - released under the same terms. - - FFTPACK license: - - http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html - - Copyright (c) 2004 the University Corporation for Atmospheric - Research ("UCAR"). All rights reserved. Developed by NCAR's - Computational and Information Systems Laboratory, UCAR, - www.cisl.ucar.edu. - - Redistribution and use of the Software in source and binary forms, - with or without modification, is permitted provided that the - following conditions are met: - - - Neither the names of NCAR's Computational and Information Systems - Laboratory, the University Corporation for Atmospheric Research, - nor the names of its sponsors or contributors may be used to - endorse or promote products derived from this Software without - specific prior written permission. - - - Redistributions of source code must retain the above copyright - notices, this list of conditions, and the disclaimer below. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the disclaimer below in the - documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT - HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE - SOFTWARE. -*/ - -/* - PFFFT : a Pretty Fast FFT. - - This is basically an adaptation of the single precision fftpack - (v4) as found on netlib taking advantage of SIMD instruction found - on cpus such as intel x86 (SSE1), powerpc (Altivec), and arm (NEON). - - For architectures where no SIMD instruction is available, the code - falls back to a scalar version. - - Restrictions: - - - 1D transforms only, with 32-bit single precision. - - - supports only transforms for inputs of length N of the form - N=(2^a)*(3^b)*(5^c), a >= 5, b >=0, c >= 0 (32, 48, 64, 96, 128, - 144, 160, etc are all acceptable lengths). Performance is best for - 128<=N<=8192. - - - all (float*) pointers in the functions below are expected to - have an "simd-compatible" alignment, that is 16 bytes on x86 and - powerpc CPUs. - - You can allocate such buffers with the functions - pffft_aligned_malloc / pffft_aligned_free (or with stuff like - posix_memalign..) - -*/ - -#ifndef PFFFT_H -#define PFFFT_H - -#include // for size_t - -#ifdef __cplusplus -extern "C" { -#endif - - /* opaque struct holding internal stuff (precomputed twiddle factors) - this struct can be shared by many threads as it contains only - read-only data. - */ - typedef struct PFFFT_Setup PFFFT_Setup; - - /* direction of the transform */ - typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t; - - /* type of transform */ - typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t; - - /* - prepare for performing transforms of size N -- the returned - PFFFT_Setup structure is read-only so it can safely be shared by - multiple concurrent threads. - */ - PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform); - void pffft_destroy_setup(PFFFT_Setup *); - /* - Perform a Fourier transform , The z-domain data is stored in the - most efficient order for transforming it back, or using it for - convolution. If you need to have its content sorted in the - "usual" way, that is as an array of interleaved complex numbers, - either use pffft_transform_ordered , or call pffft_zreorder after - the forward fft, and before the backward fft. - - Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x. - Typically you will want to scale the backward transform by 1/N. - - The 'work' pointer should point to an area of N (2*N for complex - fft) floats, properly aligned. If 'work' is NULL, then stack will - be used instead (this is probably the best strategy for small - FFTs, say for N < 16384). - - input and output may alias. - */ - void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction); - - /* - Similar to pffft_transform, but makes sure that the output is - ordered as expected (interleaved complex numbers). This is - similar to calling pffft_transform and then pffft_zreorder. - - input and output may alias. - */ - void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction); - - /* - call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(..., - PFFFT_FORWARD) if you want to have the frequency components in - the correct "canonical" order, as interleaved complex numbers. - - (for real transforms, both 0-frequency and half frequency - components, which are real, are assembled in the first entry as - F(0)+i*F(n/2+1). Note that the original fftpack did place - F(n/2+1) at the end of the arrays). - - input and output should not alias. - */ - void pffft_zreorder(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction); - - /* - Perform a multiplication of the frequency components of dft_a and - dft_b and accumulate them into dft_ab. The arrays should have - been obtained with pffft_transform(.., PFFFT_FORWARD) and should - *not* have been reordered with pffft_zreorder (otherwise just - perform the operation yourself as the dft coefs are stored as - interleaved complex numbers). - - the operation performed is: dft_ab += (dft_a * fdt_b)*scaling - - The dft_a, dft_b and dft_ab pointers may alias. - */ - void pffft_zconvolve_accumulate(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling); - - /* - the float buffers must have the correct alignment (16-byte boundary - on intel and powerpc). This function may be used to obtain such - correctly aligned buffers. - */ - void *pffft_aligned_malloc(size_t nb_bytes); - void pffft_aligned_free(void *); - - /* return 4 or 1 wether support SSE/Altivec instructions was enable when building pffft.c */ - int pffft_simd_size(); - -#ifdef __cplusplus -} -#endif - -#endif // PFFFT_H diff --git a/oss-internship-2020/pffft/test_pffft.c b/oss-internship-2020/pffft/test_pffft.c deleted file mode 100644 index a5d20c2..0000000 --- a/oss-internship-2020/pffft/test_pffft.c +++ /dev/null @@ -1,419 +0,0 @@ -/* - Copyright (c) 2013 Julien Pommier. - - Small test & bench for PFFFT, comparing its performance with the scalar FFTPACK, FFTW, and Apple vDSP - - How to build: - - on linux, with fftw3: - gcc -o test_pffft -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm - - on macos, without fftw3: - clang -o test_pffft -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -framework Accelerate - - on macos, with fftw3: - clang -o test_pffft -DHAVE_FFTW -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -framework Accelerate - - on windows, with visual c++: - cl /Ox -D_USE_MATH_DEFINES /arch:SSE test_pffft.c pffft.c fftpack.c - - build without SIMD instructions: - gcc -o test_pffft -DPFFFT_SIMD_DISABLE -O3 -Wall -W pffft.c test_pffft.c fftpack.c -lm - - */ - -#include "pffft.h" -#include "fftpack.h" - -#include -#include -#include -#include -#include -#include - -#ifdef HAVE_SYS_TIMES -# include -# include -#endif - -#ifdef HAVE_VECLIB -# include -#endif - -#ifdef HAVE_FFTW -# include -#endif - -#define MAX(x,y) ((x)>(y)?(x):(y)) - -double frand() { - return rand()/(double)RAND_MAX; -} - -#if defined(HAVE_SYS_TIMES) - inline double uclock_sec(void) { - static double ttclk = 0.; - if (ttclk == 0.) ttclk = sysconf(_SC_CLK_TCK); - struct tms t; return ((double)times(&t)) / ttclk; - } -# else - double uclock_sec(void) -{ return (double)clock()/(double)CLOCKS_PER_SEC; } -#endif - - -/* compare results with the regular fftpack */ -void pffft_validate_N(int N, int cplx) { - int Nfloat = N*(cplx?2:1); - int Nbytes = Nfloat * sizeof(float); - float *ref, *in, *out, *tmp, *tmp2; - PFFFT_Setup *s = pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); - int pass; - - if (!s) { printf("Skipping N=%d, not supported\n", N); return; } - ref = pffft_aligned_malloc(Nbytes); - in = pffft_aligned_malloc(Nbytes); - out = pffft_aligned_malloc(Nbytes); - tmp = pffft_aligned_malloc(Nbytes); - tmp2 = pffft_aligned_malloc(Nbytes); - - for (pass=0; pass < 2; ++pass) { - float ref_max = 0; - int k; - //printf("N=%d pass=%d cplx=%d\n", N, pass, cplx); - // compute reference solution with FFTPACK - if (pass == 0) { - float *wrk = malloc(2*Nbytes+15*sizeof(float)); - for (k=0; k < Nfloat; ++k) { - ref[k] = in[k] = frand()*2-1; - out[k] = 1e30; - } - if (!cplx) { - rffti(N, wrk); - rfftf(N, ref, wrk); - // use our ordering for real ffts instead of the one of fftpack - { - float refN=ref[N-1]; - for (k=N-2; k >= 1; --k) ref[k+1] = ref[k]; - ref[1] = refN; - } - } else { - cffti(N, wrk); - cfftf(N, ref, wrk); - } - free(wrk); - } - - for (k = 0; k < Nfloat; ++k) ref_max = MAX(ref_max, fabs(ref[k])); - - - // pass 0 : non canonical ordering of transform coefficients - if (pass == 0) { - // test forward transform, with different input / output - pffft_transform(s, in, tmp, 0, PFFFT_FORWARD); - memcpy(tmp2, tmp, Nbytes); - memcpy(tmp, in, Nbytes); - pffft_transform(s, tmp, tmp, 0, PFFFT_FORWARD); - for (k = 0; k < Nfloat; ++k) { - assert(tmp2[k] == tmp[k]); - } - - // test reordering - pffft_zreorder(s, tmp, out, PFFFT_FORWARD); - pffft_zreorder(s, out, tmp, PFFFT_BACKWARD); - for (k = 0; k < Nfloat; ++k) { - assert(tmp2[k] == tmp[k]); - } - pffft_zreorder(s, tmp, out, PFFFT_FORWARD); - } else { - // pass 1 : canonical ordering of transform coeffs. - pffft_transform_ordered(s, in, tmp, 0, PFFFT_FORWARD); - memcpy(tmp2, tmp, Nbytes); - memcpy(tmp, in, Nbytes); - pffft_transform_ordered(s, tmp, tmp, 0, PFFFT_FORWARD); - for (k = 0; k < Nfloat; ++k) { - assert(tmp2[k] == tmp[k]); - } - memcpy(out, tmp, Nbytes); - } - - { - for (k=0; k < Nfloat; ++k) { - if (!(fabs(ref[k] - out[k]) < 1e-3*ref_max)) { - printf("%s forward PFFFT mismatch found for N=%d\n", (cplx?"CPLX":"REAL"), N); - exit(1); - } - } - - if (pass == 0) pffft_transform(s, tmp, out, 0, PFFFT_BACKWARD); - else pffft_transform_ordered(s, tmp, out, 0, PFFFT_BACKWARD); - memcpy(tmp2, out, Nbytes); - memcpy(out, tmp, Nbytes); - if (pass == 0) pffft_transform(s, out, out, 0, PFFFT_BACKWARD); - else pffft_transform_ordered(s, out, out, 0, PFFFT_BACKWARD); - for (k = 0; k < Nfloat; ++k) { - assert(tmp2[k] == out[k]); - out[k] *= 1.f/N; - } - for (k = 0; k < Nfloat; ++k) { - if (fabs(in[k] - out[k]) > 1e-3 * ref_max) { - printf("pass=%d, %s IFFFT does not match for N=%d\n", pass, (cplx?"CPLX":"REAL"), N); break; - exit(1); - } - } - } - - // quick test of the circular convolution in fft domain - { - float conv_err = 0, conv_max = 0; - - pffft_zreorder(s, ref, tmp, PFFFT_FORWARD); - memset(out, 0, Nbytes); - pffft_zconvolve_accumulate(s, ref, ref, out, 1.0); - pffft_zreorder(s, out, tmp2, PFFFT_FORWARD); - - for (k=0; k < Nfloat; k += 2) { - float ar = tmp[k], ai=tmp[k+1]; - if (cplx || k > 0) { - tmp[k] = ar*ar - ai*ai; - tmp[k+1] = 2*ar*ai; - } else { - tmp[0] = ar*ar; - tmp[1] = ai*ai; - } - } - - for (k=0; k < Nfloat; ++k) { - float d = fabs(tmp[k] - tmp2[k]), e = fabs(tmp[k]); - if (d > conv_err) conv_err = d; - if (e > conv_max) conv_max = e; - } - if (conv_err > 1e-5*conv_max) { - printf("zconvolve error ? %g %g\n", conv_err, conv_max); exit(1); - } - } - - } - - printf("%s PFFFT is OK for N=%d\n", (cplx?"CPLX":"REAL"), N); fflush(stdout); - - pffft_destroy_setup(s); - pffft_aligned_free(ref); - pffft_aligned_free(in); - pffft_aligned_free(out); - pffft_aligned_free(tmp); - pffft_aligned_free(tmp2); -} - -void pffft_validate(int cplx) { - static int Ntest[] = { 16, 32, 64, 96, 128, 160, 192, 256, 288, 384, 5*96, 512, 576, 5*128, 800, 864, 1024, 2048, 2592, 4000, 4096, 12000, 36864, 0}; - int k; - for (k = 0; Ntest[k]; ++k) { - int N = Ntest[k]; - if (N == 16 && !cplx) continue; - pffft_validate_N(N, cplx); - } -} - -int array_output_format = 0; - -void show_output(const char *name, int N, int cplx, float flops, float t0, float t1, int max_iter) { - float mflops = flops/1e6/(t1 - t0 + 1e-16); - if (array_output_format) { - if (flops != -1) { - printf("|%9.0f ", mflops); - } else printf("| n/a "); - } else { - if (flops != -1) { - printf("N=%5d, %s %16s : %6.0f MFlops [t=%6.0f ns, %d runs]\n", N, (cplx?"CPLX":"REAL"), name, mflops, (t1-t0)/2/max_iter * 1e9, max_iter); - } - } - fflush(stdout); -} - -void benchmark_ffts(int N, int cplx) { - int Nfloat = (cplx ? N*2 : N); - int Nbytes = Nfloat * sizeof(float); - float *X = pffft_aligned_malloc(Nbytes), *Y = pffft_aligned_malloc(Nbytes), *Z = pffft_aligned_malloc(Nbytes); - - double t0, t1, flops; - - int k; - int max_iter = 5120000/N*4; -#ifdef __arm__ - max_iter /= 4; -#endif - int iter; - - for (k = 0; k < Nfloat; ++k) { - X[k] = 0; //sqrtf(k+1); - } - - // FFTPack benchmark - { - float *wrk = malloc(2*Nbytes + 15*sizeof(float)); - int max_iter_ = max_iter/pffft_simd_size(); if (max_iter_ == 0) max_iter_ = 1; - if (cplx) cffti(N, wrk); - else rffti(N, wrk); - t0 = uclock_sec(); - - for (iter = 0; iter < max_iter_; ++iter) { - if (cplx) { - cfftf(N, X, wrk); - cfftb(N, X, wrk); - } else { - rfftf(N, X, wrk); - rfftb(N, X, wrk); - } - } - t1 = uclock_sec(); - free(wrk); - - flops = (max_iter_*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html - show_output("FFTPack", N, cplx, flops, t0, t1, max_iter_); - } - -#ifdef HAVE_VECLIB - int log2N = (int)(log(N)/log(2) + 0.5f); - if (N == (1< 1 && strcmp(argv[1], "--array-format") == 0) { - array_output_format = 1; - } - -#ifndef PFFFT_SIMD_DISABLE - validate_pffft_simd(); -#endif - pffft_validate(1); - pffft_validate(0); - if (!array_output_format) { - for (i=0; Nvalues[i] > 0; ++i) { - benchmark_ffts(Nvalues[i], 0 /* real fft */); - } - for (i=0; Nvalues[i] > 0; ++i) { - benchmark_ffts(Nvalues[i], 1 /* cplx fft */); - } - } else { - printf("| input len "); - printf("|real FFTPack"); -#ifdef HAVE_VECLIB - printf("| real vDSP "); -#endif -#ifdef HAVE_FFTW - printf("| real FFTW "); -#endif - printf("| real PFFFT | "); - - printf("|cplx FFTPack"); -#ifdef HAVE_VECLIB - printf("| cplx vDSP "); -#endif -#ifdef HAVE_FFTW - printf("| cplx FFTW "); -#endif - printf("| cplx PFFFT |\n"); - for (i=0; Nvalues[i] > 0; ++i) { - printf("|%9d ", Nvalues[i]); - benchmark_ffts(Nvalues[i], 0); - printf("| "); - benchmark_ffts(Nvalues[i], 1); - printf("|\n"); - } - printf(" (numbers are given in MFlops)\n"); - } - - - return 0; -} From 5ae99706e20cbd579416661a28726754c5e77bfd Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Wed, 26 Aug 2020 15:03:25 +0000 Subject: [PATCH 23/42] Added PFFFT_ROOT_DIR --- oss-internship-2020/pffft/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/oss-internship-2020/pffft/CMakeLists.txt b/oss-internship-2020/pffft/CMakeLists.txt index c2a7bcd..41d7520 100644 --- a/oss-internship-2020/pffft/CMakeLists.txt +++ b/oss-internship-2020/pffft/CMakeLists.txt @@ -19,6 +19,8 @@ project(pffft CXX C) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED True) +set(PFFFT_ROOT_DIR https://bitbucket.org/jpommier/pffft.git) + add_library(pffft STATIC master/pffft.c master/pffft.h From bb623d4c0b25baa8ba2fa0d91a0110f045fd0399 Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Thu, 27 Aug 2020 12:54:57 +0000 Subject: [PATCH 24/42] Required changes resolved --- oss-internship-2020/pffft/CMakeLists.txt | 2 +- oss-internship-2020/pffft/README.md | 36 +++--- .../pffft/main_pffft_sandboxed.cc | 108 ++++++++++-------- 3 files changed, 79 insertions(+), 67 deletions(-) diff --git a/oss-internship-2020/pffft/CMakeLists.txt b/oss-internship-2020/pffft/CMakeLists.txt index 41d7520..be687cd 100644 --- a/oss-internship-2020/pffft/CMakeLists.txt +++ b/oss-internship-2020/pffft/CMakeLists.txt @@ -88,7 +88,7 @@ add_sapi_library(pffft_sapi INPUTS master/pffft.h master/fftpack.h LIBRARY pffft - LIBRARY_NAME pffft + LIBRARY_NAME Pffft NAMESPACE "" ) diff --git a/oss-internship-2020/pffft/README.md b/oss-internship-2020/pffft/README.md index 2d2a9ca..eaadb20 100644 --- a/oss-internship-2020/pffft/README.md +++ b/oss-internship-2020/pffft/README.md @@ -4,7 +4,9 @@ Build System: CMake OS: Linux ### Check out the PFFFT library & CMake set up -`git submodule add https://bitbucket.org/jpommier/pffft.git` +`git clone https://github.com/doinachiroiu/sandboxed-api/tree/master/oss-internship-2020/pffft` + +`git submodule update --init --recursive` `mkdir -p build && cd build` @@ -22,7 +24,7 @@ display custom info with ## ***About the project*** *PFFFT library is concerned with 1D Fast-Fourier Transformations finding a compromise between accuracy and speed. It deals with real and complex -vectors, both cases being illustrated in the testing part (`main_pffft.c` +vectors, both cases being illustrated in the testing part (`test_pffft.c` for initially and original version, `main_pffft_sandboxed.cc` for our currently implemented sandboxed version). The original files can be found at: https://bitbucket.org/jpommier/pffft/src.* @@ -62,21 +64,23 @@ In the end, the performance of PFFFT library it is outlined by the output.* ### Bugs history - 1. [Solved] pffft benchmark bug: "Sandbox not active" - N = 64, status OK, pffft_transform generates error - N > 64, status not OK - Problem on initialising sapi::StatusOr s; the memory that stays - for s is not the same with the address passed in pffft_transform function. - (sapi::v::GenericPtr - to be changed) +1. [Solved] pffft benchmark bug: "Sandbox not active" + + N = 64, status OK, pffft_transform generates error + N > 64, status not OK + Problem on initialising sapi::StatusOr s; the memory that stays + for s is not the same with the address passed in pffft_transform function. + (sapi::v::GenericPtr - to be changed) - Temporary solution: change the generated files to accept - uintptr_t instead of PFFFT_Setup + Temporary solution: change the generated files to accept + uintptr_t instead of PFFFT_Setup - Solution: using "sapi::v::RemotePtr" instead of "sapi::v::GenericPtr" - to access the memory of object s + Solution: using "sapi::v::RemotePtr" instead of "sapi::v::GenericPtr" + to access the memory of object s - 2. [Unresolved] compiling bug: "No space left on device" - The building process creates some `embed` files that use lots of - memory, trying to write them on /tmp. +2. [Unresolved] compiling bug: "No space left on device" + + The building process creates some `embed` files that use lots of + memory, trying to write them on /tmp. - Temporary solution: clean /tmp directory by `sudo rm -rf /tmp/*`. \ No newline at end of file + Temporary solution: clean /tmp directory by `sudo rm -rf /tmp/*`. \ No newline at end of file diff --git a/oss-internship-2020/pffft/main_pffft_sandboxed.cc b/oss-internship-2020/pffft/main_pffft_sandboxed.cc index 10ea802..f6abf05 100644 --- a/oss-internship-2020/pffft/main_pffft_sandboxed.cc +++ b/oss-internship-2020/pffft/main_pffft_sandboxed.cc @@ -30,10 +30,10 @@ ABSL_DECLARE_FLAG(string, sandbox2_danger_danger_permit_all); ABSL_DECLARE_FLAG(string, sandbox2_danger_danger_permit_all_and_log); -class PffftSapiSandbox : public pffftSandbox { +class PffftSapiSandbox : public PffftSandbox { public: std::unique_ptr ModifyPolicy( - sandbox2::PolicyBuilder*) override { + sandbox2::PolicyBuilder*) { return sandbox2::PolicyBuilder() .AllowStaticStartup() .AllowOpen() @@ -50,11 +50,11 @@ class PffftSapiSandbox : public pffftSandbox { } }; -double UclockSec(void) { return (double)clock() / (double)CLOCKS_PER_SEC; } +double UclockSec() { return static_cast(clock()) / CLOCKS_PER_SEC; } int array_output_format = 0; -void ShowOutput(const char* name, int N, int cplx, float flops, float t0, +void ShowOutput(const char* name, int n, int cplx, float flops, float t0, float t1, int max_iter) { float mflops = flops / 1e6 / (t1 - t0 + 1e-16); if (array_output_format) { @@ -64,7 +64,7 @@ void ShowOutput(const char* name, int N, int cplx, float flops, float t0, printf("| n/a "); } else { if (flops != -1) { - printf("N=%5d, %s %16s : %6.0f MFlops [t=%6.0f ns, %d runs]\n", N, + printf("n=%5d, %s %16s : %6.0f MFlops [t=%6.0f ns, %d runs]\n", n, (cplx ? "CPLX" : "REAL"), name, mflops, (t1 - t0) / 2 / max_iter * 1e9, max_iter); } @@ -72,55 +72,64 @@ void ShowOutput(const char* name, int N, int cplx, float flops, float t0, fflush(stdout); } +absl::Status PffftMain() { + PffftSapiSandbox sandbox; + SAPI_RETURN_IF_ERROR(sandbox.Init()); + + return absl::OkStatus(); +} + int main(int argc, char* argv[]) { - // Initialize Google's logging library. google::InitGoogleLogging(argv[0]); gflags::ParseCommandLineFlags(&argc, &argv, true); - // Nvalues is a vector keeping the values by which iterates N, its value - // representing the input length. More concrete, N is the number of + // kTransformSizes is a vector keeping the values by which iterates n, its value + // representing the input length. More concrete, n is the number of // data points the caclulus is up to (determinating its accuracy). // To show the performance of Fast-Fourier Transformations the program is - // testing for various values of N. - int Nvalues[] = {64, 96, 128, 160, 192, 256, + // testing for various values of n. + constexpr int kTransformSizes[] = {64, 96, 128, 160, 192, 256, 384, 5 * 96, 512, 5 * 128, 3 * 256, 800, 1024, 2048, 2400, 4096, 8192, 9 * 1024, 16384, 32768}; - int i; LOG(INFO) << "Initializing sandbox...\n"; PffftSapiSandbox sandbox; absl::Status init_status = sandbox.Init(); - LOG(INFO) << "Initialization: " << init_status.ToString().c_str() << "\n"; + if (absl::Status status = PffftMain(); !status.ok()) { + LOG(ERROR) << "Initialization failed: " << status.ToString(); + return EXIT_FAILURE; + } - pffftApi api(&sandbox); + LOG(INFO) << "Initialization: " << init_status.ToString(); + + PffftApi api(&sandbox); int cplx = 0; do { - for (int N : Nvalues) { - const int Nfloat = N * (cplx ? 2 : 1); - int Nbytes = Nfloat * sizeof(float); + for (int n : kTransformSizes) { + const int n_float = n * (cplx ? 2 : 1); + int n_bytes = n_float * sizeof(float); - float wrk[2 * Nfloat + 15 * sizeof(float)]; - sapi::v::Array wrk_(wrk, 2 * Nfloat + 15 * sizeof(float)); + std::vector work(2 * n_float + 15, 0.0); + sapi::v::Array work_array(&work[0], work.size()); - float X[Nbytes], Y[Nbytes], Z[Nbytes]; - sapi::v::Array X_(X, Nbytes), Y_(Y, Nbytes), Z_(Z, Nbytes); + float x[n_bytes], y[n_bytes], z[n_bytes]; + sapi::v::Array x_array(x, n_bytes), y_array(y, n_bytes), z_array(z, n_bytes); - double t0, t1, flops; + double t0; + double t1; + double flops; - int max_iter = 5120000 / N * 4; -#ifdef __arm__ - max_iter /= 4; -#endif - int iter, k; + int k; + int max_iter = 5120000 / n * 4; - for (k = 0; k < Nfloat; ++k) { - X[k] = 0; + for (k = 0; k < n_float; ++k) { + x[k] = 0; } // FFTPack benchmark @@ -130,50 +139,49 @@ int main(int argc, char* argv[]) { if (max_iter_ == 0) max_iter_ = 1; if (cplx) { - api.cffti(N, wrk_.PtrBoth()).IgnoreError(); + api.cffti(n, work_array.PtrBoth()).IgnoreError(); } else { - api.rffti(N, wrk_.PtrBoth()).IgnoreError(); + api.rffti(n, work_array.PtrBoth()).IgnoreError(); } t0 = UclockSec(); - for (iter = 0; iter < max_iter_; ++iter) { + for (int iter = 0; iter < max_iter_; ++iter) { if (cplx) { - api.cfftf(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); - api.cfftb(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); + api.cfftf(n, x_array.PtrBoth(), work_array.PtrBoth()).IgnoreError(); + api.cfftb(n, x_array.PtrBoth(), work_array.PtrBoth()).IgnoreError(); } else { - api.rfftf(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); - api.rfftb(N, X_.PtrBoth(), wrk_.PtrBoth()).IgnoreError(); + api.rfftf(n, x_array.PtrBoth(), work_array.PtrBoth()).IgnoreError(); + api.rfftb(n, x_array.PtrBoth(), work_array.PtrBoth()).IgnoreError(); } } t1 = UclockSec(); flops = - (max_iter_ * 2) * ((cplx ? 5 : 2.5) * N * log((double)N) / M_LN2); - ShowOutput("FFTPack", N, cplx, flops, t0, t1, max_iter_); + (max_iter_ * 2) * ((cplx ? 5 : 2.5) * n * log((double)n) / M_LN2); + ShowOutput("FFTPack", n, cplx, flops, t0, t1, max_iter_); } // PFFFT benchmark { sapi::StatusOr s = - api.pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); + api.pffft_new_setup(n, cplx ? PFFFT_COMPLEX : PFFFT_REAL); - LOG(INFO) << "Setup status is: " << s.status().ToString().c_str() - << "\n"; + LOG(INFO) << "Setup status is: " << s.status().ToString(); if (!s.ok()) { printf("Sandbox failed.\n"); - return 1; + return EXIT_FAILURE; } sapi::v::RemotePtr s_reg(s.value()); t0 = UclockSec(); - for (iter = 0; iter < max_iter; ++iter) { - api.pffft_transform(&s_reg, X_.PtrBoth(), Z_.PtrBoth(), - Y_.PtrBoth(), PFFFT_FORWARD) + for (int iter = 0; iter < max_iter; ++iter) { + api.pffft_transform(&s_reg, x_array.PtrBoth(), z_array.PtrBoth(), + y_array.PtrBoth(), PFFFT_FORWARD) .IgnoreError(); - api.pffft_transform(&s_reg, X_.PtrBoth(), Z_.PtrBoth(), - Y_.PtrBoth(), PFFFT_FORWARD) + api.pffft_transform(&s_reg, x_array.PtrBoth(), z_array.PtrBoth(), + y_array.PtrBoth(), PFFFT_FORWARD) .IgnoreError(); } @@ -181,15 +189,15 @@ int main(int argc, char* argv[]) { api.pffft_destroy_setup(&s_reg).IgnoreError(); flops = - (max_iter * 2) * ((cplx ? 5 : 2.5) * N * log((double)N) / M_LN2); - ShowOutput("PFFFT", N, cplx, flops, t0, t1, max_iter); + (max_iter * 2) * ((cplx ? 5 : 2.5) * n * log((double)n) / M_LN2); + ShowOutput("PFFFT", n, cplx, flops, t0, t1, max_iter); - LOG(INFO) << "N = " << N << " SUCCESSFULLY\n\n"; + LOG(INFO) << "n = " << n << " SUCCESSFULLY"; } } cplx = !cplx; } while (cplx); - return 0; + return EXIT_SUCCESS; } \ No newline at end of file From 4336c005d3b6505db883a34da40ceb03843d0a6e Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Thu, 27 Aug 2020 16:46:59 +0000 Subject: [PATCH 25/42] Added output format flag and made other required changes --- oss-internship-2020/pffft/README.md | 8 +- .../pffft/main_pffft_sandboxed.cc | 118 ++++++++++-------- 2 files changed, 72 insertions(+), 54 deletions(-) diff --git a/oss-internship-2020/pffft/README.md b/oss-internship-2020/pffft/README.md index eaadb20..cf04563 100644 --- a/oss-internship-2020/pffft/README.md +++ b/oss-internship-2020/pffft/README.md @@ -46,7 +46,11 @@ the series that calculate the result of transformation. It is also important to mention that the `cplx` variable stands for a boolean value that tells the type of transformation (0 for REAL and 1 for COMPLEX) and it is taken into account while testing. -In the end, the performance of PFFFT library it is outlined by the output.* +In the end, the performance of PFFFT library it is outlined by the output. +There are two output formats available, from which you can choose through +from which you can choose through `--output_format=` command-line flag. +Without using this type of argument when running, the output format is set +by default.* #### CMake observations resume: * linking pffft and fftpack (which contains necessary functions for pffft) @@ -60,6 +64,8 @@ In the end, the performance of PFFFT library it is outlined by the output.* meaning the number of points to which it is set the calculus (more details of mathematical purpose of N - https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm). * output shows speed depending on the input length +* use `--output_format=0` or `--output_format=1` arguments to choose between output formats. + `0` is for a detailed output, while `1` is only displaying each transformation process speed. diff --git a/oss-internship-2020/pffft/main_pffft_sandboxed.cc b/oss-internship-2020/pffft/main_pffft_sandboxed.cc index f6abf05..fc14319 100644 --- a/oss-internship-2020/pffft/main_pffft_sandboxed.cc +++ b/oss-internship-2020/pffft/main_pffft_sandboxed.cc @@ -12,16 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include -#include -#include #include #include -#include #include #include #include +#include +#include +#include #include "pffft_sapi.sapi.h" #include "sandboxed_api/util/flag.h" @@ -32,8 +33,7 @@ ABSL_DECLARE_FLAG(string, sandbox2_danger_danger_permit_all_and_log); class PffftSapiSandbox : public PffftSandbox { public: - std::unique_ptr ModifyPolicy( - sandbox2::PolicyBuilder*) { + std::unique_ptr ModifyPolicy(sandbox2::PolicyBuilder*) { return sandbox2::PolicyBuilder() .AllowStaticStartup() .AllowOpen() @@ -50,14 +50,27 @@ class PffftSapiSandbox : public PffftSandbox { } }; -double UclockSec() { return static_cast(clock()) / CLOCKS_PER_SEC; } +// output_format flag determines whether the output shows information in detail +// or not. By default, the flag is set as 0, meaning an elaborate display +// (see ShowOutput method). +static bool ValidateFlag(const char* flagname, int32_t value) { + if (value >= 0 && value < 32768) { + return true; + } -int array_output_format = 0; + LOG(ERROR) << "Invalid value for --" << flagname << "."; + return false; +} + +DEFINE_int32(output_format, 0, "Value to specific the output format."); +DEFINE_validator(output_format, &ValidateFlag); + +double UclockSec() { return static_cast(clock()) / CLOCKS_PER_SEC; } void ShowOutput(const char* name, int n, int cplx, float flops, float t0, float t1, int max_iter) { float mflops = flops / 1e6 / (t1 - t0 + 1e-16); - if (array_output_format) { + if (FLAGS_output_format) { if (flops != -1) { printf("|%9.0f ", mflops); } else @@ -76,40 +89,18 @@ absl::Status PffftMain() { PffftSapiSandbox sandbox; SAPI_RETURN_IF_ERROR(sandbox.Init()); - return absl::OkStatus(); -} - -int main(int argc, char* argv[]) { - // Initialize Google's logging library. - google::InitGoogleLogging(argv[0]); - - gflags::ParseCommandLineFlags(&argc, &argv, true); - - // kTransformSizes is a vector keeping the values by which iterates n, its value - // representing the input length. More concrete, n is the number of - // data points the caclulus is up to (determinating its accuracy). - // To show the performance of Fast-Fourier Transformations the program is - // testing for various values of n. - constexpr int kTransformSizes[] = {64, 96, 128, 160, 192, 256, - 384, 5 * 96, 512, 5 * 128, 3 * 256, 800, - 1024, 2048, 2400, 4096, 8192, 9 * 1024, - 16384, 32768}; - - LOG(INFO) << "Initializing sandbox...\n"; - - PffftSapiSandbox sandbox; - absl::Status init_status = sandbox.Init(); - - if (absl::Status status = PffftMain(); !status.ok()) { - LOG(ERROR) << "Initialization failed: " << status.ToString(); - return EXIT_FAILURE; - } - - LOG(INFO) << "Initialization: " << init_status.ToString(); - PffftApi api(&sandbox); int cplx = 0; + // kTransformSizes is a vector keeping the values by which iterates n, its + // value representing the input length. More concrete, n is the number of data + // points the caclulus is up to (determinating its accuracy). To show the + // performance of Fast-Fourier Transformations the program is testing for + // various values of n. + constexpr int kTransformSizes[] = { + 64, 96, 128, 160, 192, 256, 384, 5 * 96, 512, 5 * 128, + 3 * 256, 800, 1024, 2048, 2400, 4096, 8192, 9 * 1024, 16384, 32768}; + do { for (int n : kTransformSizes) { const int n_float = n * (cplx ? 2 : 1); @@ -118,26 +109,31 @@ int main(int argc, char* argv[]) { std::vector work(2 * n_float + 15, 0.0); sapi::v::Array work_array(&work[0], work.size()); - float x[n_bytes], y[n_bytes], z[n_bytes]; - sapi::v::Array x_array(x, n_bytes), y_array(y, n_bytes), z_array(z, n_bytes); + std::vector x(n_bytes, 0.0); + sapi::v::Array x_array(&x[0], x.size()); + + std::vector y(n_bytes, 0.0); + sapi::v::Array y_array(&y[0], y.size()); + + std::vector z(n_bytes, 0.0); + sapi::v::Array z_array(&z[0], z.size()); double t0; double t1; double flops; - int k; int max_iter = 5120000 / n * 4; - for (k = 0; k < n_float; ++k) { + for (int k = 0; k < n_float; ++k) { x[k] = 0; } // FFTPack benchmark { // SIMD_SZ == 4 (returning value of pffft_simd_size()) - int max_iter_ = max_iter / 4; + int simd_size_iter = max_iter / 4; - if (max_iter_ == 0) max_iter_ = 1; + if (simd_size_iter == 0) simd_size_iter = 1; if (cplx) { api.cffti(n, work_array.PtrBoth()).IgnoreError(); } else { @@ -145,7 +141,7 @@ int main(int argc, char* argv[]) { } t0 = UclockSec(); - for (int iter = 0; iter < max_iter_; ++iter) { + for (int iter = 0; iter < simd_size_iter; ++iter) { if (cplx) { api.cfftf(n, x_array.PtrBoth(), work_array.PtrBoth()).IgnoreError(); api.cfftb(n, x_array.PtrBoth(), work_array.PtrBoth()).IgnoreError(); @@ -156,11 +152,11 @@ int main(int argc, char* argv[]) { } t1 = UclockSec(); - flops = - (max_iter_ * 2) * ((cplx ? 5 : 2.5) * n * log((double)n) / M_LN2); - ShowOutput("FFTPack", n, cplx, flops, t0, t1, max_iter_); + flops = (simd_size_iter * 2) * + ((cplx ? 5 : 2.5) * n * log((double)n) / M_LN2); + ShowOutput("FFTPack", n, cplx, flops, t0, t1, simd_size_iter); } - + // PFFFT benchmark { sapi::StatusOr s = @@ -170,7 +166,7 @@ int main(int argc, char* argv[]) { if (!s.ok()) { printf("Sandbox failed.\n"); - return EXIT_FAILURE; + return s.status(); } sapi::v::RemotePtr s_reg(s.value()); @@ -188,8 +184,8 @@ int main(int argc, char* argv[]) { t1 = UclockSec(); api.pffft_destroy_setup(&s_reg).IgnoreError(); - flops = - (max_iter * 2) * ((cplx ? 5 : 2.5) * n * log((double)n) / M_LN2); + flops = (max_iter * 2) * ((cplx ? 5 : 2.5) * static_cast(n) * + log((double)n) / M_LN2); ShowOutput("PFFFT", n, cplx, flops, t0, t1, max_iter); LOG(INFO) << "n = " << n << " SUCCESSFULLY"; @@ -199,5 +195,21 @@ int main(int argc, char* argv[]) { cplx = !cplx; } while (cplx); + return absl::OkStatus(); +} + +int main(int argc, char* argv[]) { + // Initialize Google's logging library. + google::InitGoogleLogging(argv[0]); + + gflags::ParseCommandLineFlags(&argc, &argv, true); + + LOG(INFO) << "Initializing sandbox...\n"; + + if (absl::Status status = PffftMain(); !status.ok()) { + LOG(ERROR) << "Initialization failed: " << status.ToString(); + return EXIT_FAILURE; + } + return EXIT_SUCCESS; } \ No newline at end of file From 60b3b5057ce5c32eebd9c47600c71f8a31245c8e Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Thu, 27 Aug 2020 16:49:09 +0000 Subject: [PATCH 26/42] Small README correction --- oss-internship-2020/pffft/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/oss-internship-2020/pffft/README.md b/oss-internship-2020/pffft/README.md index cf04563..4f2338a 100644 --- a/oss-internship-2020/pffft/README.md +++ b/oss-internship-2020/pffft/README.md @@ -47,8 +47,8 @@ important to mention that the `cplx` variable stands for a boolean value that tells the type of transformation (0 for REAL and 1 for COMPLEX) and it is taken into account while testing. In the end, the performance of PFFFT library it is outlined by the output. -There are two output formats available, from which you can choose through -from which you can choose through `--output_format=` command-line flag. +There are two output formats available, from which you can choose through +`--output_format=` command-line flag. Without using this type of argument when running, the output format is set by default.* From b2351ec6395c7fc0ebe6a8b90d6becc0ed12bd8b Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Thu, 27 Aug 2020 16:55:55 +0000 Subject: [PATCH 27/42] Small coding style corrections --- oss-internship-2020/pffft/README.md | 2 +- .../pffft/main_pffft_sandboxed.cc | 27 +++++++++---------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/oss-internship-2020/pffft/README.md b/oss-internship-2020/pffft/README.md index 4f2338a..d94a083 100644 --- a/oss-internship-2020/pffft/README.md +++ b/oss-internship-2020/pffft/README.md @@ -43,7 +43,7 @@ transformations and print the speed for each value and type of transformation. More specifically, the input length is the target for accuracy (named as `N`) and it stands for the number of data points from the series that calculate the result of transformation. It is also -important to mention that the `cplx` variable stands for a boolean value +important to mention that the `complex` variable stands for a boolean value that tells the type of transformation (0 for REAL and 1 for COMPLEX) and it is taken into account while testing. In the end, the performance of PFFFT library it is outlined by the output. diff --git a/oss-internship-2020/pffft/main_pffft_sandboxed.cc b/oss-internship-2020/pffft/main_pffft_sandboxed.cc index fc14319..33116ed 100644 --- a/oss-internship-2020/pffft/main_pffft_sandboxed.cc +++ b/oss-internship-2020/pffft/main_pffft_sandboxed.cc @@ -67,7 +67,7 @@ DEFINE_validator(output_format, &ValidateFlag); double UclockSec() { return static_cast(clock()) / CLOCKS_PER_SEC; } -void ShowOutput(const char* name, int n, int cplx, float flops, float t0, +void ShowOutput(const char* name, int n, int complex, float flops, float t0, float t1, int max_iter) { float mflops = flops / 1e6 / (t1 - t0 + 1e-16); if (FLAGS_output_format) { @@ -78,7 +78,7 @@ void ShowOutput(const char* name, int n, int cplx, float flops, float t0, } else { if (flops != -1) { printf("n=%5d, %s %16s : %6.0f MFlops [t=%6.0f ns, %d runs]\n", n, - (cplx ? "CPLX" : "REAL"), name, mflops, + (complex ? "CPLX" : "REAL"), name, mflops, (t1 - t0) / 2 / max_iter * 1e9, max_iter); } } @@ -90,7 +90,6 @@ absl::Status PffftMain() { SAPI_RETURN_IF_ERROR(sandbox.Init()); PffftApi api(&sandbox); - int cplx = 0; // kTransformSizes is a vector keeping the values by which iterates n, its // value representing the input length. More concrete, n is the number of data @@ -101,9 +100,9 @@ absl::Status PffftMain() { 64, 96, 128, 160, 192, 256, 384, 5 * 96, 512, 5 * 128, 3 * 256, 800, 1024, 2048, 2400, 4096, 8192, 9 * 1024, 16384, 32768}; - do { + for (int complex : {0, 1}) { for (int n : kTransformSizes) { - const int n_float = n * (cplx ? 2 : 1); + const int n_float = n * (complex ? 2 : 1); int n_bytes = n_float * sizeof(float); std::vector work(2 * n_float + 15, 0.0); @@ -134,7 +133,7 @@ absl::Status PffftMain() { int simd_size_iter = max_iter / 4; if (simd_size_iter == 0) simd_size_iter = 1; - if (cplx) { + if (complex) { api.cffti(n, work_array.PtrBoth()).IgnoreError(); } else { api.rffti(n, work_array.PtrBoth()).IgnoreError(); @@ -142,7 +141,7 @@ absl::Status PffftMain() { t0 = UclockSec(); for (int iter = 0; iter < simd_size_iter; ++iter) { - if (cplx) { + if (complex) { api.cfftf(n, x_array.PtrBoth(), work_array.PtrBoth()).IgnoreError(); api.cfftb(n, x_array.PtrBoth(), work_array.PtrBoth()).IgnoreError(); } else { @@ -153,14 +152,14 @@ absl::Status PffftMain() { t1 = UclockSec(); flops = (simd_size_iter * 2) * - ((cplx ? 5 : 2.5) * n * log((double)n) / M_LN2); - ShowOutput("FFTPack", n, cplx, flops, t0, t1, simd_size_iter); + ((complex ? 5 : 2.5) * n * log((double)n) / M_LN2); + ShowOutput("FFTPack", n, complex, flops, t0, t1, simd_size_iter); } // PFFFT benchmark { sapi::StatusOr s = - api.pffft_new_setup(n, cplx ? PFFFT_COMPLEX : PFFFT_REAL); + api.pffft_new_setup(n, complex ? PFFFT_COMPLEX : PFFFT_REAL); LOG(INFO) << "Setup status is: " << s.status().ToString(); @@ -184,16 +183,14 @@ absl::Status PffftMain() { t1 = UclockSec(); api.pffft_destroy_setup(&s_reg).IgnoreError(); - flops = (max_iter * 2) * ((cplx ? 5 : 2.5) * static_cast(n) * + flops = (max_iter * 2) * ((complex ? 5 : 2.5) * static_cast(n) * log((double)n) / M_LN2); - ShowOutput("PFFFT", n, cplx, flops, t0, t1, max_iter); + ShowOutput("PFFFT", n, complex, flops, t0, t1, max_iter); LOG(INFO) << "n = " << n << " SUCCESSFULLY"; } } - - cplx = !cplx; - } while (cplx); + } return absl::OkStatus(); } From 905f86a0ab64a78ca05e038b7bd553ca5fd1b85e Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Thu, 27 Aug 2020 16:59:58 +0000 Subject: [PATCH 28/42] Small README update regarding variables names --- oss-internship-2020/pffft/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/oss-internship-2020/pffft/README.md b/oss-internship-2020/pffft/README.md index d94a083..a380ff4 100644 --- a/oss-internship-2020/pffft/README.md +++ b/oss-internship-2020/pffft/README.md @@ -41,7 +41,7 @@ Regarding the testing of the methods, one main is doing this job by iterating through a set of values, that represents the accuracy of transformations and print the speed for each value and type of transformation. More specifically, the input length is the target for -accuracy (named as `N`) and it stands for the number of data points from +accuracy (named as `n`) and it stands for the number of data points from the series that calculate the result of transformation. It is also important to mention that the `complex` variable stands for a boolean value that tells the type of transformation (0 for REAL and 1 for COMPLEX) and @@ -60,9 +60,9 @@ by default.* * containing two testing parts (fft / pffft benchmarks) * showing the performance of the transformations implies testing them through various FFT dimenstions. - Variable N, the input length, will take specific values + Variable n, the input length, will take specific values meaning the number of points to which it is set the calculus - (more details of mathematical purpose of N - https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm). + (more details of mathematical purpose of n - https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm). * output shows speed depending on the input length * use `--output_format=0` or `--output_format=1` arguments to choose between output formats. `0` is for a detailed output, while `1` is only displaying each transformation process speed. @@ -72,8 +72,8 @@ by default.* ### Bugs history 1. [Solved] pffft benchmark bug: "Sandbox not active" - N = 64, status OK, pffft_transform generates error - N > 64, status not OK + n = 64, status OK, pffft_transform generates error + n > 64, status not OK Problem on initialising sapi::StatusOr s; the memory that stays for s is not the same with the address passed in pffft_transform function. (sapi::v::GenericPtr - to be changed) From c53f2a900f37ab15ccda9f193dcd79215a728dfc Mon Sep 17 00:00:00 2001 From: Wiktor Garbacz Date: Fri, 28 Aug 2020 04:49:15 -0700 Subject: [PATCH 29/42] Automated rollback of commit e7a195ce42a1c3dbcdd1f78c954a4583b85fc789. PiperOrigin-RevId: 328918626 Change-Id: Iabe93ec7062ea6e750e4185e2b0b672a37111ee7 --- sandboxed_api/proto_helper.h | 2 +- sandboxed_api/rpcchannel.h | 2 +- sandboxed_api/sandbox.cc | 2 +- sandboxed_api/sandbox.h | 5 +++-- sandboxed_api/sandbox2/comms.cc | 2 +- sandboxed_api/sandbox2/examples/network_proxy/BUILD.bazel | 1 + .../sandbox2/examples/network_proxy/networkproxy_bin.cc | 4 ++-- sandboxed_api/sandbox2/forkserver.cc | 2 +- sandboxed_api/sandbox2/mounts.cc | 2 +- sandboxed_api/sandbox2/network_proxy/BUILD.bazel | 2 ++ sandboxed_api/sandbox2/network_proxy/client.cc | 2 +- sandboxed_api/sandbox2/network_proxy/client.h | 2 +- sandboxed_api/sandbox2/network_proxy/filtering.cc | 3 ++- sandboxed_api/sandbox2/network_proxy/filtering.h | 2 +- sandboxed_api/sandbox2/policybuilder.h | 2 +- sandboxed_api/sandbox2/sandbox2.h | 2 +- sandboxed_api/sandbox2/util.cc | 4 ++-- sandboxed_api/sandbox2/util.h | 4 ++-- sandboxed_api/sandbox2/util/minielf.cc | 8 +++----- sandboxed_api/sandbox2/util/minielf.h | 2 +- sandboxed_api/tools/clang_generator/emitter.cc | 2 +- sandboxed_api/tools/clang_generator/emitter.h | 2 +- sandboxed_api/tools/clang_generator/generator.cc | 4 ++-- sandboxed_api/tools/clang_generator/generator.h | 4 ++-- sandboxed_api/tools/clang_generator/types.cc | 2 +- sandboxed_api/util/status.h | 4 ---- 26 files changed, 36 insertions(+), 37 deletions(-) diff --git a/sandboxed_api/proto_helper.h b/sandboxed_api/proto_helper.h index 53ac26d..b00a73e 100644 --- a/sandboxed_api/proto_helper.h +++ b/sandboxed_api/proto_helper.h @@ -22,8 +22,8 @@ #include #include "absl/status/status.h" -#include "sandboxed_api/proto_arg.pb.h" #include "sandboxed_api/util/statusor.h" +#include "sandboxed_api/proto_arg.pb.h" namespace sapi { diff --git a/sandboxed_api/rpcchannel.h b/sandboxed_api/rpcchannel.h index ed50326..c54ef8e 100644 --- a/sandboxed_api/rpcchannel.h +++ b/sandboxed_api/rpcchannel.h @@ -18,11 +18,11 @@ #include #include "absl/status/status.h" +#include "sandboxed_api/util/statusor.h" #include "absl/synchronization/mutex.h" #include "sandboxed_api/call.h" #include "sandboxed_api/sandbox2/comms.h" #include "sandboxed_api/var_type.h" -#include "sandboxed_api/util/statusor.h" namespace sapi { diff --git a/sandboxed_api/sandbox.cc b/sandboxed_api/sandbox.cc index b29646b..18239a4 100644 --- a/sandboxed_api/sandbox.cc +++ b/sandboxed_api/sandbox.cc @@ -392,7 +392,7 @@ absl::Status Sandbox::TransferFromSandboxee(v::Var* var) { } sapi::StatusOr Sandbox::GetCString(const v::RemotePtr& str, - uint64_t max_length) { + uint64_t max_length) { if (!is_active()) { return absl::UnavailableError("Sandbox not active"); } diff --git a/sandboxed_api/sandbox.h b/sandboxed_api/sandbox.h index d8f37ef..8b4ee6a 100644 --- a/sandboxed_api/sandbox.h +++ b/sandboxed_api/sandbox.h @@ -102,8 +102,9 @@ class Sandbox { absl::Status TransferToSandboxee(v::Var* var); absl::Status TransferFromSandboxee(v::Var* var); - sapi::StatusOr GetCString( - const v::RemotePtr& str, uint64_t max_length = 10ULL << 20 /* 10 MiB*/ + sapi::StatusOr GetCString(const v::RemotePtr& str, + uint64_t max_length = 10ULL + << 20 /* 10 MiB*/ ); // Waits until the sandbox terminated and returns the result. diff --git a/sandboxed_api/sandbox2/comms.cc b/sandboxed_api/sandbox2/comms.cc index 6bb891c..3e61013 100644 --- a/sandboxed_api/sandbox2/comms.cc +++ b/sandboxed_api/sandbox2/comms.cc @@ -36,6 +36,7 @@ #include "google/protobuf/message.h" #include "absl/memory/memory.h" #include "absl/status/status.h" +#include "sandboxed_api/util/statusor.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/synchronization/mutex.h" @@ -44,7 +45,6 @@ #include "sandboxed_api/util/raw_logging.h" #include "sandboxed_api/util/status.h" #include "sandboxed_api/util/status_macros.h" -#include "sandboxed_api/util/statusor.h" #ifdef MEMORY_SANITIZER #include "base/dynamic_annotations.h" diff --git a/sandboxed_api/sandbox2/examples/network_proxy/BUILD.bazel b/sandboxed_api/sandbox2/examples/network_proxy/BUILD.bazel index 133cb88..480c078 100644 --- a/sandboxed_api/sandbox2/examples/network_proxy/BUILD.bazel +++ b/sandboxed_api/sandbox2/examples/network_proxy/BUILD.bazel @@ -49,6 +49,7 @@ cc_binary( "//sandboxed_api/util:flags", "//sandboxed_api/util:status", "//sandboxed_api/util:statusor", + "@com_google_absl//absl/status", "@com_google_absl//absl/strings:str_format", ], ) diff --git a/sandboxed_api/sandbox2/examples/network_proxy/networkproxy_bin.cc b/sandboxed_api/sandbox2/examples/network_proxy/networkproxy_bin.cc index c22947d..b355522 100644 --- a/sandboxed_api/sandbox2/examples/network_proxy/networkproxy_bin.cc +++ b/sandboxed_api/sandbox2/examples/network_proxy/networkproxy_bin.cc @@ -12,15 +12,15 @@ #include #include "sandboxed_api/util/flag.h" +#include "absl/status/status.h" +#include "sandboxed_api/util/statusor.h" #include "absl/strings/str_format.h" #include "sandboxed_api/sandbox2/client.h" #include "sandboxed_api/sandbox2/comms.h" #include "sandboxed_api/sandbox2/network_proxy/client.h" #include "sandboxed_api/sandbox2/util/fileops.h" #include "sandboxed_api/sandbox2/util/strerror.h" -#include "sandboxed_api/util/status.h" #include "sandboxed_api/util/status_macros.h" -#include "sandboxed_api/util/statusor.h" ABSL_FLAG(bool, connect_with_handler, true, "Connect using automatic mode."); diff --git a/sandboxed_api/sandbox2/forkserver.cc b/sandboxed_api/sandbox2/forkserver.cc index 2125927..d0b637c 100644 --- a/sandboxed_api/sandbox2/forkserver.cc +++ b/sandboxed_api/sandbox2/forkserver.cc @@ -36,6 +36,7 @@ #include "absl/memory/memory.h" #include "absl/status/status.h" +#include "sandboxed_api/util/statusor.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" @@ -55,7 +56,6 @@ #include "sandboxed_api/sandbox2/util/fileops.h" #include "sandboxed_api/sandbox2/util/strerror.h" #include "sandboxed_api/util/raw_logging.h" -#include "sandboxed_api/util/statusor.h" namespace { // "Moves" the old FD to the new FD number. diff --git a/sandboxed_api/sandbox2/mounts.cc b/sandboxed_api/sandbox2/mounts.cc index 2f734e2..e1c0198 100644 --- a/sandboxed_api/sandbox2/mounts.cc +++ b/sandboxed_api/sandbox2/mounts.cc @@ -27,6 +27,7 @@ #include "google/protobuf/util/message_differencer.h" #include "absl/container/flat_hash_set.h" +#include "sandboxed_api/util/statusor.h" #include "absl/strings/ascii.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" @@ -39,7 +40,6 @@ #include "sandboxed_api/sandbox2/util/strerror.h" #include "sandboxed_api/util/raw_logging.h" #include "sandboxed_api/util/status_macros.h" -#include "sandboxed_api/util/statusor.h" namespace sandbox2 { namespace { diff --git a/sandboxed_api/sandbox2/network_proxy/BUILD.bazel b/sandboxed_api/sandbox2/network_proxy/BUILD.bazel index 3a3a9d6..5935390 100644 --- a/sandboxed_api/sandbox2/network_proxy/BUILD.bazel +++ b/sandboxed_api/sandbox2/network_proxy/BUILD.bazel @@ -46,6 +46,7 @@ cc_library( "//sandboxed_api/sandbox2/util:strerror", "//sandboxed_api/util:status", "@com_google_absl//absl/memory", + "@com_google_absl//absl/status", "@com_google_absl//absl/strings", "@com_google_absl//absl/synchronization", "@com_google_glog//:glog", @@ -62,6 +63,7 @@ cc_library( "//sandboxed_api/sandbox2/util:strerror", "//sandboxed_api/util:status", "//sandboxed_api/util:statusor", + "@com_google_absl//absl/status", "@com_google_absl//absl/strings", "@com_google_glog//:glog", ], diff --git a/sandboxed_api/sandbox2/network_proxy/client.cc b/sandboxed_api/sandbox2/network_proxy/client.cc index 586d201..e528041 100644 --- a/sandboxed_api/sandbox2/network_proxy/client.cc +++ b/sandboxed_api/sandbox2/network_proxy/client.cc @@ -25,9 +25,9 @@ #include #include "absl/memory/memory.h" +#include "absl/status/status.h" #include "absl/strings/str_cat.h" #include "sandboxed_api/sandbox2/util/strerror.h" -#include "sandboxed_api/util/status.h" #include "sandboxed_api/util/status_macros.h" namespace sandbox2 { diff --git a/sandboxed_api/sandbox2/network_proxy/client.h b/sandboxed_api/sandbox2/network_proxy/client.h index e7af4dc..7318993 100644 --- a/sandboxed_api/sandbox2/network_proxy/client.h +++ b/sandboxed_api/sandbox2/network_proxy/client.h @@ -17,9 +17,9 @@ #include +#include "absl/status/status.h" #include "absl/synchronization/mutex.h" #include "sandboxed_api/sandbox2/comms.h" -#include "sandboxed_api/util/status.h" namespace sandbox2 { diff --git a/sandboxed_api/sandbox2/network_proxy/filtering.cc b/sandboxed_api/sandbox2/network_proxy/filtering.cc index d6389d8..fc93e6c 100644 --- a/sandboxed_api/sandbox2/network_proxy/filtering.cc +++ b/sandboxed_api/sandbox2/network_proxy/filtering.cc @@ -17,11 +17,12 @@ #include #include +#include "absl/status/status.h" +#include "sandboxed_api/util/statusor.h" #include "absl/strings/numbers.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" #include "sandboxed_api/sandbox2/util/strerror.h" -#include "sandboxed_api/util/status.h" #include "sandboxed_api/util/status_macros.h" namespace sandbox2 { diff --git a/sandboxed_api/sandbox2/network_proxy/filtering.h b/sandboxed_api/sandbox2/network_proxy/filtering.h index fb6714a..c0a235b 100644 --- a/sandboxed_api/sandbox2/network_proxy/filtering.h +++ b/sandboxed_api/sandbox2/network_proxy/filtering.h @@ -19,8 +19,8 @@ #include -#include "sandboxed_api/sandbox2/comms.h" #include "sandboxed_api/util/statusor.h" +#include "sandboxed_api/sandbox2/comms.h" namespace sandbox2 { diff --git a/sandboxed_api/sandbox2/policybuilder.h b/sandboxed_api/sandbox2/policybuilder.h index f05023c..07095ba 100644 --- a/sandboxed_api/sandbox2/policybuilder.h +++ b/sandboxed_api/sandbox2/policybuilder.h @@ -29,11 +29,11 @@ #include #include "absl/base/macros.h" #include "absl/memory/memory.h" +#include "sandboxed_api/util/statusor.h" #include "absl/strings/string_view.h" #include "sandboxed_api/sandbox2/mounts.h" #include "sandboxed_api/sandbox2/network_proxy/filtering.h" #include "sandboxed_api/sandbox2/policy.h" -#include "sandboxed_api/util/statusor.h" struct bpf_labels; diff --git a/sandboxed_api/sandbox2/sandbox2.h b/sandboxed_api/sandbox2/sandbox2.h index 9527ff2..633297f 100644 --- a/sandboxed_api/sandbox2/sandbox2.h +++ b/sandboxed_api/sandbox2/sandbox2.h @@ -26,6 +26,7 @@ #include #include "absl/base/macros.h" #include "absl/memory/memory.h" +#include "sandboxed_api/util/statusor.h" #include "sandboxed_api/sandbox2/comms.h" #include "sandboxed_api/sandbox2/executor.h" #include "sandboxed_api/sandbox2/ipc.h" @@ -33,7 +34,6 @@ #include "sandboxed_api/sandbox2/notify.h" #include "sandboxed_api/sandbox2/policy.h" #include "sandboxed_api/sandbox2/result.h" -#include "sandboxed_api/util/statusor.h" namespace sandbox2 { diff --git a/sandboxed_api/sandbox2/util.cc b/sandboxed_api/sandbox2/util.cc index 24d4a36..b760d26 100644 --- a/sandboxed_api/sandbox2/util.cc +++ b/sandboxed_api/sandbox2/util.cc @@ -183,8 +183,8 @@ bool CreateMemFd(int* fd, const char* name) { } sapi::StatusOr Communicate(const std::vector& argv, - const std::vector& envv, - std::string* output) { + const std::vector& envv, + std::string* output) { int cout_pipe[2]; posix_spawn_file_actions_t action; diff --git a/sandboxed_api/sandbox2/util.h b/sandboxed_api/sandbox2/util.h index 87fe31c..e42bf72 100644 --- a/sandboxed_api/sandbox2/util.h +++ b/sandboxed_api/sandbox2/util.h @@ -63,8 +63,8 @@ bool CreateMemFd(int* fd, const char* name = "buffer_file"); // Executes a the program given by argv and the specified environment and // captures any output to stdout/stderr. sapi::StatusOr Communicate(const std::vector& argv, - const std::vector& envv, - std::string* output); + const std::vector& envv, + std::string* output); // Returns signal description. std::string GetSignalName(int signo); diff --git a/sandboxed_api/sandbox2/util/minielf.cc b/sandboxed_api/sandbox2/util/minielf.cc index 3eac979..dce033b 100644 --- a/sandboxed_api/sandbox2/util/minielf.cc +++ b/sandboxed_api/sandbox2/util/minielf.cc @@ -219,8 +219,7 @@ absl::Status ElfParser::ReadFileHeader() { return absl::OkStatus(); } -sapi::StatusOr ElfParser::ReadSectionHeader( - absl::string_view src) { +sapi::StatusOr ElfParser::ReadSectionHeader(absl::string_view src) { if (src.size() < sizeof(Elf64_Shdr)) { return absl::FailedPreconditionError( absl::StrCat("invalid section header data: got ", src.size(), @@ -293,8 +292,7 @@ sapi::StatusOr ElfParser::ReadSectionContents( return rv; } -sapi::StatusOr ElfParser::ReadProgramHeader( - absl::string_view src) { +sapi::StatusOr ElfParser::ReadProgramHeader(absl::string_view src) { if (src.size() < sizeof(Elf64_Phdr)) { return absl::FailedPreconditionError( absl::StrCat("invalid program header data: got ", src.size(), @@ -514,7 +512,7 @@ sapi::StatusOr ElfParser::Parse(FILE* elf, uint32_t features) { } sapi::StatusOr ElfFile::ParseFromFile(const std::string& filename, - uint32_t features) { + uint32_t features) { std::unique_ptr elf{fopen(filename.c_str(), "r"), [](FILE* f) { fclose(f); }}; if (!elf) { diff --git a/sandboxed_api/sandbox2/util/minielf.h b/sandboxed_api/sandbox2/util/minielf.h index 247d271..c33a604 100644 --- a/sandboxed_api/sandbox2/util/minielf.h +++ b/sandboxed_api/sandbox2/util/minielf.h @@ -34,7 +34,7 @@ class ElfFile { }; static sapi::StatusOr ParseFromFile(const std::string& filename, - uint32_t features); + uint32_t features); int64_t file_size() const { return file_size_; } const std::string& interpreter() const { return interpreter_; } diff --git a/sandboxed_api/tools/clang_generator/emitter.cc b/sandboxed_api/tools/clang_generator/emitter.cc index 18fdcd9..78b9a9a 100644 --- a/sandboxed_api/tools/clang_generator/emitter.cc +++ b/sandboxed_api/tools/clang_generator/emitter.cc @@ -46,10 +46,10 @@ constexpr absl::string_view kHeaderProlog = #include "absl/base/macros.h" #include "absl/status/status.h" +#include "sandboxed_api/util/statusor.h" #include "sandboxed_api/sandbox.h" #include "sandboxed_api/vars.h" #include "sandboxed_api/util/status_macros.h" -#include "sandboxed_api/util/statusor.h" )"; constexpr absl::string_view kHeaderEpilog = diff --git a/sandboxed_api/tools/clang_generator/emitter.h b/sandboxed_api/tools/clang_generator/emitter.h index 42ba92d..ad9f5f8 100644 --- a/sandboxed_api/tools/clang_generator/emitter.h +++ b/sandboxed_api/tools/clang_generator/emitter.h @@ -18,12 +18,12 @@ #include #include "absl/status/status.h" +#include "sandboxed_api/util/statusor.h" #include "absl/strings/string_view.h" #include "clang/AST/Decl.h" #include "clang/AST/Type.h" #include "sandboxed_api/tools/clang_generator/generator.h" #include "sandboxed_api/tools/clang_generator/types.h" -#include "sandboxed_api/util/statusor.h" namespace sapi { diff --git a/sandboxed_api/tools/clang_generator/generator.cc b/sandboxed_api/tools/clang_generator/generator.cc index 508877e..ffe2106 100644 --- a/sandboxed_api/tools/clang_generator/generator.cc +++ b/sandboxed_api/tools/clang_generator/generator.cc @@ -18,12 +18,12 @@ #include #include "absl/status/status.h" +#include "sandboxed_api/util/statusor.h" #include "clang/Format/Format.h" #include "sandboxed_api/sandbox2/util/fileops.h" #include "sandboxed_api/tools/clang_generator/diagnostics.h" #include "sandboxed_api/tools/clang_generator/emitter.h" #include "sandboxed_api/util/status_macros.h" -#include "sandboxed_api/util/statusor.h" namespace sapi { namespace { @@ -68,7 +68,7 @@ bool GeneratorASTVisitor::VisitFunctionDecl(clang::FunctionDecl* decl) { namespace internal { sapi::StatusOr ReformatGoogleStyle(const std::string& filename, - const std::string& code) { + const std::string& code) { // Configure code style based on Google style, but enforce pointer alignment clang::format::FormatStyle style = clang::format::getGoogleStyle(clang::format::FormatStyle::LK_Cpp); diff --git a/sandboxed_api/tools/clang_generator/generator.h b/sandboxed_api/tools/clang_generator/generator.h index 24e2658..3b9680b 100644 --- a/sandboxed_api/tools/clang_generator/generator.h +++ b/sandboxed_api/tools/clang_generator/generator.h @@ -20,13 +20,13 @@ #include "absl/container/flat_hash_set.h" #include "absl/memory/memory.h" #include "absl/status/status.h" +#include "sandboxed_api/util/statusor.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendAction.h" #include "clang/Tooling/Tooling.h" #include "sandboxed_api/tools/clang_generator/types.h" -#include "sandboxed_api/util/statusor.h" namespace sapi { @@ -67,7 +67,7 @@ class GeneratorASTVisitor namespace internal { sapi::StatusOr ReformatGoogleStyle(const std::string& filename, - const std::string& code); + const std::string& code); } // namespace internal diff --git a/sandboxed_api/tools/clang_generator/types.cc b/sandboxed_api/tools/clang_generator/types.cc index 4d91499..321d1e2 100644 --- a/sandboxed_api/tools/clang_generator/types.cc +++ b/sandboxed_api/tools/clang_generator/types.cc @@ -208,7 +208,7 @@ std::string MapQualTypeReturn(const clang::ASTContext& context, return "absl::Status"; } // Remove const qualifier like in MapQualType(). - return absl::StrCat("::sapi::StatusOr<", + return absl::StrCat("sapi::StatusOr<", MaybeRemoveConst(context, qual).getAsString(), ">"); } diff --git a/sandboxed_api/util/status.h b/sandboxed_api/util/status.h index 6390bc0..d9962fd 100644 --- a/sandboxed_api/util/status.h +++ b/sandboxed_api/util/status.h @@ -12,10 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -// This file and it's implementation provide a custom fork of -// util/task/status.h. This will become obsolete and will be replaced once -// Abseil releases absl::Status. - #ifndef THIRD_PARTY_SAPI_UTIL_STATUS_H_ #define THIRD_PARTY_SAPI_UTIL_STATUS_H_ From cfac8eb2d9abfbec77d622542e6da0e1bd4ba5ab Mon Sep 17 00:00:00 2001 From: Sandboxed API Team Date: Mon, 31 Aug 2020 00:13:27 -0700 Subject: [PATCH 30/42] Internal cleanup migrating StatusOr. PiperOrigin-RevId: 329250595 Change-Id: I0447d8154a57b1132981b116f02b4d5bceedfd4c --- sandboxed_api/examples/zlib/BUILD.bazel | 5 +++-- sandboxed_api/examples/zlib/main_zlib.cc | 1 + sandboxed_api/rpcchannel.cc | 1 + sandboxed_api/tools/clang_generator/emitter.cc | 1 + 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/sandboxed_api/examples/zlib/BUILD.bazel b/sandboxed_api/examples/zlib/BUILD.bazel index d318e48..3dc1c75 100644 --- a/sandboxed_api/examples/zlib/BUILD.bazel +++ b/sandboxed_api/examples/zlib/BUILD.bazel @@ -14,11 +14,11 @@ # Description: Sandboxed API reimplementation of zlib's zpipe.c example. -licenses(["notice"]) - load("//sandboxed_api/bazel:build_defs.bzl", "sapi_platform_copts") load("//sandboxed_api/bazel:sapi.bzl", "sapi_library") +licenses(["notice"]) + sapi_library( name = "zlib-sapi", srcs = [], @@ -42,6 +42,7 @@ cc_binary( ":zlib-sapi_embed", "//sandboxed_api:vars", "//sandboxed_api/util:flags", + "//sandboxed_api/util:statusor", "@com_google_absl//absl/base:core_headers", ], ) diff --git a/sandboxed_api/examples/zlib/main_zlib.cc b/sandboxed_api/examples/zlib/main_zlib.cc index 969ae33..7d94c10 100644 --- a/sandboxed_api/examples/zlib/main_zlib.cc +++ b/sandboxed_api/examples/zlib/main_zlib.cc @@ -20,6 +20,7 @@ #include #include "absl/base/macros.h" #include "sandboxed_api/util/flag.h" +#include "sandboxed_api/util/statusor.h" #include "sandboxed_api/examples/zlib/zlib-sapi.sapi.h" #include "sandboxed_api/examples/zlib/zlib-sapi_embed.h" #include "sandboxed_api/vars.h" diff --git a/sandboxed_api/rpcchannel.cc b/sandboxed_api/rpcchannel.cc index 72a9250..1aad74d 100644 --- a/sandboxed_api/rpcchannel.cc +++ b/sandboxed_api/rpcchannel.cc @@ -15,6 +15,7 @@ #include "sandboxed_api/rpcchannel.h" #include +#include "sandboxed_api/util/statusor.h" #include "absl/strings/str_cat.h" #include "absl/synchronization/mutex.h" #include "sandboxed_api/call.h" diff --git a/sandboxed_api/tools/clang_generator/emitter.cc b/sandboxed_api/tools/clang_generator/emitter.cc index 78b9a9a..5f30953 100644 --- a/sandboxed_api/tools/clang_generator/emitter.cc +++ b/sandboxed_api/tools/clang_generator/emitter.cc @@ -15,6 +15,7 @@ #include "sandboxed_api/tools/clang_generator/emitter.h" #include "absl/random/random.h" +#include "sandboxed_api/util/statusor.h" #include "absl/strings/ascii.h" #include "absl/strings/escaping.h" #include "absl/strings/match.h" From 531850afc219947ccc4fd3d5208b0049b55a42c6 Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Mon, 31 Aug 2020 11:17:15 +0000 Subject: [PATCH 31/42] Required changes resolved --- oss-internship-2020/pffft/README.md | 2 +- oss-internship-2020/pffft/main_pffft_sandboxed.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/oss-internship-2020/pffft/README.md b/oss-internship-2020/pffft/README.md index a380ff4..194c6b6 100644 --- a/oss-internship-2020/pffft/README.md +++ b/oss-internship-2020/pffft/README.md @@ -4,7 +4,7 @@ Build System: CMake OS: Linux ### Check out the PFFFT library & CMake set up -`git clone https://github.com/doinachiroiu/sandboxed-api/tree/master/oss-internship-2020/pffft` +`git checkout -b master` `git submodule update --init --recursive` diff --git a/oss-internship-2020/pffft/main_pffft_sandboxed.cc b/oss-internship-2020/pffft/main_pffft_sandboxed.cc index 33116ed..ea42a26 100644 --- a/oss-internship-2020/pffft/main_pffft_sandboxed.cc +++ b/oss-internship-2020/pffft/main_pffft_sandboxed.cc @@ -86,6 +86,8 @@ void ShowOutput(const char* name, int n, int complex, float flops, float t0, } absl::Status PffftMain() { + LOG(INFO) << "Initializing sandbox...\n"; + PffftSapiSandbox sandbox; SAPI_RETURN_IF_ERROR(sandbox.Init()); @@ -201,8 +203,6 @@ int main(int argc, char* argv[]) { gflags::ParseCommandLineFlags(&argc, &argv, true); - LOG(INFO) << "Initializing sandbox...\n"; - if (absl::Status status = PffftMain(); !status.ok()) { LOG(ERROR) << "Initialization failed: " << status.ToString(); return EXIT_FAILURE; From 7e22952c424536dd950ccf32e42a9abc6ce0a6f7 Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Mon, 31 Aug 2020 11:48:19 +0000 Subject: [PATCH 32/42] Setup error handling modified --- .../pffft/main_pffft_sandboxed.cc | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/oss-internship-2020/pffft/main_pffft_sandboxed.cc b/oss-internship-2020/pffft/main_pffft_sandboxed.cc index ea42a26..51e271d 100644 --- a/oss-internship-2020/pffft/main_pffft_sandboxed.cc +++ b/oss-internship-2020/pffft/main_pffft_sandboxed.cc @@ -160,17 +160,11 @@ absl::Status PffftMain() { // PFFFT benchmark { - sapi::StatusOr s = - api.pffft_new_setup(n, complex ? PFFFT_COMPLEX : PFFFT_REAL); + SAPI_ASSIGN_OR_RETURN( + PFFFT_Setup *s, + api.pffft_new_setup(n, complex ? PFFFT_COMPLEX : PFFFT_REAL)); - LOG(INFO) << "Setup status is: " << s.status().ToString(); - - if (!s.ok()) { - printf("Sandbox failed.\n"); - return s.status(); - } - - sapi::v::RemotePtr s_reg(s.value()); + sapi::v::RemotePtr s_reg(s); t0 = UclockSec(); for (int iter = 0; iter < max_iter; ++iter) { @@ -192,7 +186,7 @@ absl::Status PffftMain() { LOG(INFO) << "n = " << n << " SUCCESSFULLY"; } } - } + } return absl::OkStatus(); } From 1c833d6f25ae4763eb1e4fb4401f25d2bdc6ef40 Mon Sep 17 00:00:00 2001 From: Sandboxed API Team Date: Mon, 31 Aug 2020 08:13:00 -0700 Subject: [PATCH 33/42] Internal cleanup migrating StatusOr. PiperOrigin-RevId: 329304527 Change-Id: Id6c141272df54c4e165829d690f9f5b2e9ee90cc --- sandboxed_api/sandbox2/BUILD.bazel | 1 + sandboxed_api/sandbox2/buffer.cc | 1 + sandboxed_api/sandbox2/network_proxy/BUILD.bazel | 1 + sandboxed_api/sandbox2/network_proxy/server.cc | 1 + sandboxed_api/sandbox2/policybuilder.cc | 2 ++ sandboxed_api/sandbox2/policybuilder_test.cc | 1 + sandboxed_api/sandbox2/sandbox2.cc | 1 + sandboxed_api/sandbox2/util/maps_parser.cc | 1 + sandboxed_api/sandbox2/util/temp_file.cc | 1 + 9 files changed, 10 insertions(+) diff --git a/sandboxed_api/sandbox2/BUILD.bazel b/sandboxed_api/sandbox2/BUILD.bazel index 09dbab4..ad415c2 100644 --- a/sandboxed_api/sandbox2/BUILD.bazel +++ b/sandboxed_api/sandbox2/BUILD.bazel @@ -803,6 +803,7 @@ cc_test( ":testing", "//sandboxed_api/sandbox2/util:bpf_helper", "//sandboxed_api/util:status_matchers", + "//sandboxed_api/util:statusor", "@com_google_absl//absl/memory", "@com_google_absl//absl/status", "@com_google_absl//absl/strings", diff --git a/sandboxed_api/sandbox2/buffer.cc b/sandboxed_api/sandbox2/buffer.cc index ea5f808..9c39339 100644 --- a/sandboxed_api/sandbox2/buffer.cc +++ b/sandboxed_api/sandbox2/buffer.cc @@ -21,6 +21,7 @@ #include #include "absl/memory/memory.h" +#include "sandboxed_api/util/statusor.h" #include "absl/strings/str_cat.h" #include "sandboxed_api/sandbox2/util.h" #include "sandboxed_api/sandbox2/util/strerror.h" diff --git a/sandboxed_api/sandbox2/network_proxy/BUILD.bazel b/sandboxed_api/sandbox2/network_proxy/BUILD.bazel index 5935390..d489a53 100644 --- a/sandboxed_api/sandbox2/network_proxy/BUILD.bazel +++ b/sandboxed_api/sandbox2/network_proxy/BUILD.bazel @@ -29,6 +29,7 @@ cc_library( ":filtering", "//sandboxed_api/sandbox2:comms", "//sandboxed_api/sandbox2/util:fileops", + "//sandboxed_api/util:statusor", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_glog//:glog", diff --git a/sandboxed_api/sandbox2/network_proxy/server.cc b/sandboxed_api/sandbox2/network_proxy/server.cc index c183812..41f6f7e 100644 --- a/sandboxed_api/sandbox2/network_proxy/server.cc +++ b/sandboxed_api/sandbox2/network_proxy/server.cc @@ -26,6 +26,7 @@ #include #include "absl/memory/memory.h" +#include "sandboxed_api/util/statusor.h" #include "sandboxed_api/sandbox2/util/fileops.h" namespace sandbox2 { diff --git a/sandboxed_api/sandbox2/policybuilder.cc b/sandboxed_api/sandbox2/policybuilder.cc index 9eec379..d185631 100644 --- a/sandboxed_api/sandbox2/policybuilder.cc +++ b/sandboxed_api/sandbox2/policybuilder.cc @@ -15,6 +15,7 @@ #include "sandboxed_api/sandbox2/policybuilder.h" #include // For TCGETS + #if defined(__x86_64__) #include #endif @@ -34,6 +35,7 @@ #include #include "absl/memory/memory.h" +#include "sandboxed_api/util/statusor.h" #include "absl/strings/escaping.h" #include "absl/strings/match.h" #include "sandboxed_api/sandbox2/namespace.h" diff --git a/sandboxed_api/sandbox2/policybuilder_test.cc b/sandboxed_api/sandbox2/policybuilder_test.cc index a86befd..ca13614 100644 --- a/sandboxed_api/sandbox2/policybuilder_test.cc +++ b/sandboxed_api/sandbox2/policybuilder_test.cc @@ -24,6 +24,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/memory/memory.h" +#include "sandboxed_api/util/statusor.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" diff --git a/sandboxed_api/sandbox2/sandbox2.cc b/sandboxed_api/sandbox2/sandbox2.cc index 559bceb..86c06dc 100644 --- a/sandboxed_api/sandbox2/sandbox2.cc +++ b/sandboxed_api/sandbox2/sandbox2.cc @@ -21,6 +21,7 @@ #include #include "absl/memory/memory.h" +#include "sandboxed_api/util/statusor.h" #include "absl/time/time.h" #include "sandboxed_api/sandbox2/monitor.h" #include "sandboxed_api/sandbox2/result.h" diff --git a/sandboxed_api/sandbox2/util/maps_parser.cc b/sandboxed_api/sandbox2/util/maps_parser.cc index d3040cc..cf0955f 100644 --- a/sandboxed_api/sandbox2/util/maps_parser.cc +++ b/sandboxed_api/sandbox2/util/maps_parser.cc @@ -15,6 +15,7 @@ #include "sandboxed_api/sandbox2/util/maps_parser.h" #include "absl/status/status.h" +#include "sandboxed_api/util/statusor.h" #include "absl/strings/str_split.h" namespace sandbox2 { diff --git a/sandboxed_api/sandbox2/util/temp_file.cc b/sandboxed_api/sandbox2/util/temp_file.cc index 9caac66..602a18c 100644 --- a/sandboxed_api/sandbox2/util/temp_file.cc +++ b/sandboxed_api/sandbox2/util/temp_file.cc @@ -22,6 +22,7 @@ #include #include +#include "sandboxed_api/util/statusor.h" #include "absl/strings/str_cat.h" #include "sandboxed_api/sandbox2/util/fileops.h" #include "sandboxed_api/sandbox2/util/strerror.h" From 23da55c19a903fca32252e8bda68a4d3d0fa1377 Mon Sep 17 00:00:00 2001 From: Sandboxed API Team Date: Wed, 2 Sep 2020 08:46:48 -0700 Subject: [PATCH 34/42] Internal BUILD refactoring PiperOrigin-RevId: 329720214 Change-Id: I25fbb94dea17db3bdca6438d17508fa304d9706f --- sandboxed_api/BUILD.bazel | 2 +- sandboxed_api/examples/zlib/BUILD.bazel | 2 +- sandboxed_api/examples/zlib/main_zlib.cc | 4 +- sandboxed_api/proto_helper.h | 6 +- sandboxed_api/rpcchannel.cc | 6 +- sandboxed_api/rpcchannel.h | 6 +- sandboxed_api/sandbox.cc | 2 +- sandboxed_api/sandbox.h | 2 +- sandboxed_api/sandbox2/BUILD.bazel | 16 +- sandboxed_api/sandbox2/buffer.cc | 6 +- sandboxed_api/sandbox2/buffer.h | 6 +- sandboxed_api/sandbox2/comms.cc | 2 +- .../examples/network_proxy/BUILD.bazel | 2 +- .../network_proxy/networkproxy_bin.cc | 8 +- sandboxed_api/sandbox2/forkserver.cc | 4 +- sandboxed_api/sandbox2/mounts.cc | 4 +- .../sandbox2/network_proxy/BUILD.bazel | 4 +- .../sandbox2/network_proxy/filtering.cc | 8 +- .../sandbox2/network_proxy/filtering.h | 4 +- .../sandbox2/network_proxy/server.cc | 4 +- sandboxed_api/sandbox2/policybuilder.cc | 8 +- sandboxed_api/sandbox2/policybuilder.h | 8 +- sandboxed_api/sandbox2/policybuilder_test.cc | 4 +- sandboxed_api/sandbox2/sandbox2.cc | 4 +- sandboxed_api/sandbox2/sandbox2.h | 4 +- sandboxed_api/sandbox2/util.cc | 4 +- sandboxed_api/sandbox2/util.h | 6 +- sandboxed_api/sandbox2/util/BUILD.bazel | 6 +- sandboxed_api/sandbox2/util/maps_parser.cc | 4 +- sandboxed_api/sandbox2/util/maps_parser.h | 4 +- sandboxed_api/sandbox2/util/minielf.cc | 22 +- sandboxed_api/sandbox2/util/minielf.h | 4 +- sandboxed_api/sandbox2/util/temp_file.cc | 8 +- sandboxed_api/sandbox2/util/temp_file.h | 8 +- .../tools/clang_generator/emitter.cc | 8 +- sandboxed_api/tools/clang_generator/emitter.h | 4 +- .../tools/clang_generator/generator.cc | 4 +- .../tools/clang_generator/generator.h | 4 +- sandboxed_api/tools/clang_generator/types.cc | 2 +- sandboxed_api/tools/clang_generator/types.h | 2 +- sandboxed_api/tools/generator2/code.py | 4 +- .../tools/generator2/code_test_util.py | 16 +- sandboxed_api/util/statusor_test.cc | 407 ------------------ sandboxed_api/var_proto.h | 4 +- 44 files changed, 120 insertions(+), 527 deletions(-) delete mode 100644 sandboxed_api/util/statusor_test.cc diff --git a/sandboxed_api/BUILD.bazel b/sandboxed_api/BUILD.bazel index fbe40b3..59ed83a 100644 --- a/sandboxed_api/BUILD.bazel +++ b/sandboxed_api/BUILD.bazel @@ -147,10 +147,10 @@ cc_library( ":var_type", "//sandboxed_api/sandbox2:comms", "//sandboxed_api/util:status", - "//sandboxed_api/util:statusor", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/memory", "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/synchronization", diff --git a/sandboxed_api/examples/zlib/BUILD.bazel b/sandboxed_api/examples/zlib/BUILD.bazel index 3dc1c75..3716280 100644 --- a/sandboxed_api/examples/zlib/BUILD.bazel +++ b/sandboxed_api/examples/zlib/BUILD.bazel @@ -42,7 +42,7 @@ cc_binary( ":zlib-sapi_embed", "//sandboxed_api:vars", "//sandboxed_api/util:flags", - "//sandboxed_api/util:statusor", "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/status:statusor", ], ) diff --git a/sandboxed_api/examples/zlib/main_zlib.cc b/sandboxed_api/examples/zlib/main_zlib.cc index 7d94c10..e6e2b2a 100644 --- a/sandboxed_api/examples/zlib/main_zlib.cc +++ b/sandboxed_api/examples/zlib/main_zlib.cc @@ -20,7 +20,7 @@ #include #include "absl/base/macros.h" #include "sandboxed_api/util/flag.h" -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" #include "sandboxed_api/examples/zlib/zlib-sapi.sapi.h" #include "sandboxed_api/examples/zlib/zlib-sapi_embed.h" #include "sandboxed_api/vars.h" @@ -48,7 +48,7 @@ int main(int argc, char** argv) { << status.message(); } - sapi::StatusOr ret; + absl::StatusOr ret; int flush; unsigned have; sapi::v::Struct strm; diff --git a/sandboxed_api/proto_helper.h b/sandboxed_api/proto_helper.h index b00a73e..63c4294 100644 --- a/sandboxed_api/proto_helper.h +++ b/sandboxed_api/proto_helper.h @@ -22,13 +22,13 @@ #include #include "absl/status/status.h" -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" #include "sandboxed_api/proto_arg.pb.h" namespace sapi { template -sapi::StatusOr> SerializeProto(const T& proto) { +absl::StatusOr> SerializeProto(const T& proto) { static_assert(std::is_base_of::value, "Template argument must be a proto message"); // Wrap protobuf in a envelope so that we know the name of the protobuf @@ -46,7 +46,7 @@ sapi::StatusOr> SerializeProto(const T& proto) { } template -sapi::StatusOr DeserializeProto(const char* data, size_t len) { +absl::StatusOr DeserializeProto(const char* data, size_t len) { static_assert(std::is_base_of::value, "Template argument must be a proto message"); ProtoArg envelope; diff --git a/sandboxed_api/rpcchannel.cc b/sandboxed_api/rpcchannel.cc index 1aad74d..66e1b2b 100644 --- a/sandboxed_api/rpcchannel.cc +++ b/sandboxed_api/rpcchannel.cc @@ -15,7 +15,7 @@ #include "sandboxed_api/rpcchannel.h" #include -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" #include "absl/strings/str_cat.h" #include "absl/synchronization/mutex.h" #include "sandboxed_api/call.h" @@ -36,7 +36,7 @@ absl::Status RPCChannel::Call(const FuncCall& call, uint32_t tag, FuncRet* ret, return absl::OkStatus(); } -sapi::StatusOr RPCChannel::Return(v::Type exp_type) { +absl::StatusOr RPCChannel::Return(v::Type exp_type) { uint32_t tag; uint64_t len; FuncRet ret; @@ -203,7 +203,7 @@ absl::Status RPCChannel::Close(int remote_fd) { return absl::OkStatus(); } -sapi::StatusOr RPCChannel::Strlen(void* str) { +absl::StatusOr RPCChannel::Strlen(void* str) { absl::MutexLock lock(&mutex_); if (!comms_->SendTLV(comms::kMsgStrlen, sizeof(str), reinterpret_cast(&str))) { diff --git a/sandboxed_api/rpcchannel.h b/sandboxed_api/rpcchannel.h index c54ef8e..4fec23b 100644 --- a/sandboxed_api/rpcchannel.h +++ b/sandboxed_api/rpcchannel.h @@ -18,7 +18,7 @@ #include #include "absl/status/status.h" -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" #include "absl/synchronization/mutex.h" #include "sandboxed_api/call.h" #include "sandboxed_api/sandbox2/comms.h" @@ -61,13 +61,13 @@ class RPCChannel { absl::Status Close(int remote_fd); // Returns length of a null-terminated c-style string (invokes strlen). - sapi::StatusOr Strlen(void* str); + absl::StatusOr Strlen(void* str); sandbox2::Comms* comms() const { return comms_; } private: // Receives the result after a call. - sapi::StatusOr Return(v::Type exp_type); + absl::StatusOr Return(v::Type exp_type); sandbox2::Comms* comms_; // Owned by sandbox2; absl::Mutex mutex_; diff --git a/sandboxed_api/sandbox.cc b/sandboxed_api/sandbox.cc index 18239a4..b18a1b0 100644 --- a/sandboxed_api/sandbox.cc +++ b/sandboxed_api/sandbox.cc @@ -391,7 +391,7 @@ absl::Status Sandbox::TransferFromSandboxee(v::Var* var) { return var->TransferFromSandboxee(GetRpcChannel(), pid()); } -sapi::StatusOr Sandbox::GetCString(const v::RemotePtr& str, +absl::StatusOr Sandbox::GetCString(const v::RemotePtr& str, uint64_t max_length) { if (!is_active()) { return absl::UnavailableError("Sandbox not active"); diff --git a/sandboxed_api/sandbox.h b/sandboxed_api/sandbox.h index 8b4ee6a..5d69eb0 100644 --- a/sandboxed_api/sandbox.h +++ b/sandboxed_api/sandbox.h @@ -102,7 +102,7 @@ class Sandbox { absl::Status TransferToSandboxee(v::Var* var); absl::Status TransferFromSandboxee(v::Var* var); - sapi::StatusOr GetCString(const v::RemotePtr& str, + absl::StatusOr GetCString(const v::RemotePtr& str, uint64_t max_length = 10ULL << 20 /* 10 MiB*/ ); diff --git a/sandboxed_api/sandbox2/BUILD.bazel b/sandboxed_api/sandbox2/BUILD.bazel index ad415c2..2bf7542 100644 --- a/sandboxed_api/sandbox2/BUILD.bazel +++ b/sandboxed_api/sandbox2/BUILD.bazel @@ -88,9 +88,9 @@ cc_library( ":regs", ":syscall", ":util", - "//sandboxed_api/util:statusor", "@com_google_absl//absl/memory", "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", ], ) @@ -300,12 +300,12 @@ cc_library( "//sandboxed_api/util:flags", "//sandboxed_api/util:raw_logging", "//sandboxed_api/util:status", - "//sandboxed_api/util:statusor", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/memory", "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/synchronization", @@ -374,9 +374,9 @@ cc_library( "//sandboxed_api/sandbox2/util:fileops", "//sandboxed_api/sandbox2/util:strerror", "//sandboxed_api/util:raw_logging", - "//sandboxed_api/util:statusor", "@com_google_absl//absl/memory", "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@org_kernel_libcap//:libcap", @@ -411,10 +411,10 @@ cc_library( "//sandboxed_api/sandbox2/util:strerror", "//sandboxed_api/util:raw_logging", "//sandboxed_api/util:status", - "//sandboxed_api/util:statusor", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_protobuf//:protobuf", ], @@ -509,8 +509,8 @@ cc_library( "//sandboxed_api/sandbox2/util:fileops", "//sandboxed_api/sandbox2/util:strerror", "//sandboxed_api/util:raw_logging", - "//sandboxed_api/util:statusor", "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", ], @@ -526,9 +526,9 @@ cc_library( ":util", "//sandboxed_api/sandbox2/util:strerror", "//sandboxed_api/util:status", - "//sandboxed_api/util:statusor", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/memory", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", ], ) @@ -573,10 +573,10 @@ cc_library( "//sandboxed_api/util:raw_logging", "//sandboxed_api/util:status", "//sandboxed_api/util:status_proto", - "//sandboxed_api/util:statusor", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/memory", "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/synchronization", @@ -803,9 +803,9 @@ cc_test( ":testing", "//sandboxed_api/sandbox2/util:bpf_helper", "//sandboxed_api/util:status_matchers", - "//sandboxed_api/util:statusor", "@com_google_absl//absl/memory", "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_glog//:glog", "@com_google_googletest//:gtest_main", diff --git a/sandboxed_api/sandbox2/buffer.cc b/sandboxed_api/sandbox2/buffer.cc index 9c39339..074618d 100644 --- a/sandboxed_api/sandbox2/buffer.cc +++ b/sandboxed_api/sandbox2/buffer.cc @@ -21,7 +21,7 @@ #include #include "absl/memory/memory.h" -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" #include "absl/strings/str_cat.h" #include "sandboxed_api/sandbox2/util.h" #include "sandboxed_api/sandbox2/util/strerror.h" @@ -29,7 +29,7 @@ namespace sandbox2 { // Creates a new Buffer that is backed by the specified file descriptor. -sapi::StatusOr> Buffer::CreateFromFd(int fd) { +absl::StatusOr> Buffer::CreateFromFd(int fd) { auto buffer = absl::WrapUnique(new Buffer{}); struct stat stat_buf; @@ -54,7 +54,7 @@ sapi::StatusOr> Buffer::CreateFromFd(int fd) { // Creates a new Buffer of the specified size, backed by a temporary file that // will be immediately deleted. -sapi::StatusOr> Buffer::CreateWithSize(int64_t size) { +absl::StatusOr> Buffer::CreateWithSize(int64_t size) { int fd; if (!util::CreateMemFd(&fd)) { return absl::InternalError("Could not create buffer temp file"); diff --git a/sandboxed_api/sandbox2/buffer.h b/sandboxed_api/sandbox2/buffer.h index 72f042d..d613bcd 100644 --- a/sandboxed_api/sandbox2/buffer.h +++ b/sandboxed_api/sandbox2/buffer.h @@ -19,7 +19,7 @@ #include #include -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" namespace sandbox2 { @@ -37,11 +37,11 @@ class Buffer final { // Creates a new Buffer that is backed by the specified file descriptor. // The Buffer takes ownership of the descriptor and will close it when // destroyed. - static sapi::StatusOr> CreateFromFd(int fd); + static absl::StatusOr> CreateFromFd(int fd); // Creates a new Buffer of the specified size, backed by a temporary file that // will be immediately deleted. - static sapi::StatusOr> CreateWithSize(int64_t size); + static absl::StatusOr> CreateWithSize(int64_t size); // Returns a pointer to the buffer, which is read/write. uint8_t* data() const { return buf_; } diff --git a/sandboxed_api/sandbox2/comms.cc b/sandboxed_api/sandbox2/comms.cc index 3e61013..8a357b6 100644 --- a/sandboxed_api/sandbox2/comms.cc +++ b/sandboxed_api/sandbox2/comms.cc @@ -36,7 +36,7 @@ #include "google/protobuf/message.h" #include "absl/memory/memory.h" #include "absl/status/status.h" -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/synchronization/mutex.h" diff --git a/sandboxed_api/sandbox2/examples/network_proxy/BUILD.bazel b/sandboxed_api/sandbox2/examples/network_proxy/BUILD.bazel index 480c078..fb07dd5 100644 --- a/sandboxed_api/sandbox2/examples/network_proxy/BUILD.bazel +++ b/sandboxed_api/sandbox2/examples/network_proxy/BUILD.bazel @@ -48,8 +48,8 @@ cc_binary( "//sandboxed_api/sandbox2/util:strerror", "//sandboxed_api/util:flags", "//sandboxed_api/util:status", - "//sandboxed_api/util:statusor", "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings:str_format", ], ) diff --git a/sandboxed_api/sandbox2/examples/network_proxy/networkproxy_bin.cc b/sandboxed_api/sandbox2/examples/network_proxy/networkproxy_bin.cc index b355522..84eb715 100644 --- a/sandboxed_api/sandbox2/examples/network_proxy/networkproxy_bin.cc +++ b/sandboxed_api/sandbox2/examples/network_proxy/networkproxy_bin.cc @@ -13,7 +13,7 @@ #include "sandboxed_api/util/flag.h" #include "absl/status/status.h" -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" #include "absl/strings/str_format.h" #include "sandboxed_api/sandbox2/client.h" #include "sandboxed_api/sandbox2/comms.h" @@ -58,7 +58,7 @@ absl::Status CommunicationTest(int sock) { return absl::OkStatus(); } -sapi::StatusOr CreateAddres(int port) { +absl::StatusOr CreateAddres(int port) { static struct sockaddr_in6 saddr {}; saddr.sin6_family = AF_INET6; saddr.sin6_port = htons(port); @@ -86,7 +86,7 @@ absl::Status ConnectWithHandler(int s, const struct sockaddr_in6& saddr) { return absl::OkStatus(); } -sapi::StatusOr ConnectToServer(int port) { +absl::StatusOr ConnectToServer(int port) { SAPI_ASSIGN_OR_RETURN(struct sockaddr_in6 saddr, CreateAddres(port)); sandbox2::file_util::fileops::FDCloser s(socket(AF_INET6, SOCK_STREAM, 0)); @@ -134,7 +134,7 @@ int main(int argc, char** argv) { return 2; } - sapi::StatusOr sock_s = ConnectToServer(port); + absl::StatusOr sock_s = ConnectToServer(port); if (!sock_s.ok()) { LOG(ERROR) << sock_s.status().message(); return 3; diff --git a/sandboxed_api/sandbox2/forkserver.cc b/sandboxed_api/sandbox2/forkserver.cc index d0b637c..dc6a283 100644 --- a/sandboxed_api/sandbox2/forkserver.cc +++ b/sandboxed_api/sandbox2/forkserver.cc @@ -36,7 +36,7 @@ #include "absl/memory/memory.h" #include "absl/status/status.h" -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" @@ -142,7 +142,7 @@ absl::Status SendPid(int signaling_fd) { return absl::OkStatus(); } -sapi::StatusOr ReceivePid(int signaling_fd) { +absl::StatusOr ReceivePid(int signaling_fd) { union { struct cmsghdr cmh; char ctrl[CMSG_SPACE(sizeof(struct ucred))]; diff --git a/sandboxed_api/sandbox2/mounts.cc b/sandboxed_api/sandbox2/mounts.cc index e1c0198..7e800ea 100644 --- a/sandboxed_api/sandbox2/mounts.cc +++ b/sandboxed_api/sandbox2/mounts.cc @@ -27,7 +27,7 @@ #include "google/protobuf/util/message_differencer.h" #include "absl/container/flat_hash_set.h" -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" #include "absl/strings/ascii.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" @@ -97,7 +97,7 @@ absl::string_view GetOutsidePath(const MountTree::Node& node) { } } -sapi::StatusOr ExistingPathInsideDir( +absl::StatusOr ExistingPathInsideDir( absl::string_view dir_path, absl::string_view relative_path) { auto path = file::CleanPath(file::JoinPath(dir_path, relative_path)); if (file_util::fileops::StripBasename(path) != dir_path) { diff --git a/sandboxed_api/sandbox2/network_proxy/BUILD.bazel b/sandboxed_api/sandbox2/network_proxy/BUILD.bazel index d489a53..f099939 100644 --- a/sandboxed_api/sandbox2/network_proxy/BUILD.bazel +++ b/sandboxed_api/sandbox2/network_proxy/BUILD.bazel @@ -29,8 +29,8 @@ cc_library( ":filtering", "//sandboxed_api/sandbox2:comms", "//sandboxed_api/sandbox2/util:fileops", - "//sandboxed_api/util:statusor", "@com_google_absl//absl/memory", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_glog//:glog", ], @@ -63,8 +63,8 @@ cc_library( "//sandboxed_api/sandbox2:comms", "//sandboxed_api/sandbox2/util:strerror", "//sandboxed_api/util:status", - "//sandboxed_api/util:statusor", "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_glog//:glog", ], diff --git a/sandboxed_api/sandbox2/network_proxy/filtering.cc b/sandboxed_api/sandbox2/network_proxy/filtering.cc index fc93e6c..aac47df 100644 --- a/sandboxed_api/sandbox2/network_proxy/filtering.cc +++ b/sandboxed_api/sandbox2/network_proxy/filtering.cc @@ -18,7 +18,7 @@ #include #include "absl/status/status.h" -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" #include "absl/strings/numbers.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" @@ -27,7 +27,7 @@ namespace sandbox2 { -static sapi::StatusOr Addr6ToString( +static absl::StatusOr Addr6ToString( const struct sockaddr_in6* saddr) { char addr[INET6_ADDRSTRLEN]; int port = htons(saddr->sin6_port); @@ -39,7 +39,7 @@ static sapi::StatusOr Addr6ToString( } // Converts sockaddr_in structure into a string IPv4 representation. -static sapi::StatusOr Addr4ToString( +static absl::StatusOr Addr4ToString( const struct sockaddr_in* saddr) { char addr[INET_ADDRSTRLEN]; int port = htons(saddr->sin_port); @@ -51,7 +51,7 @@ static sapi::StatusOr Addr4ToString( } // Converts sockaddr_in6 structure into a string IPv6 representation. -sapi::StatusOr AddrToString(const struct sockaddr* saddr) { +absl::StatusOr AddrToString(const struct sockaddr* saddr) { switch (saddr->sa_family) { case AF_INET: return Addr4ToString(reinterpret_cast(saddr)); diff --git a/sandboxed_api/sandbox2/network_proxy/filtering.h b/sandboxed_api/sandbox2/network_proxy/filtering.h index c0a235b..b830d8e 100644 --- a/sandboxed_api/sandbox2/network_proxy/filtering.h +++ b/sandboxed_api/sandbox2/network_proxy/filtering.h @@ -19,14 +19,14 @@ #include -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" #include "sandboxed_api/sandbox2/comms.h" namespace sandbox2 { // Converts sockaddr_in or sockaddr_in6 structure into a string // representation. -sapi::StatusOr AddrToString(const struct sockaddr* saddr); +absl::StatusOr AddrToString(const struct sockaddr* saddr); struct IPv4 { in_addr_t ip; diff --git a/sandboxed_api/sandbox2/network_proxy/server.cc b/sandboxed_api/sandbox2/network_proxy/server.cc index 41f6f7e..6ba1ba7 100644 --- a/sandboxed_api/sandbox2/network_proxy/server.cc +++ b/sandboxed_api/sandbox2/network_proxy/server.cc @@ -26,7 +26,7 @@ #include #include "absl/memory/memory.h" -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" #include "sandboxed_api/sandbox2/util/fileops.h" namespace sandbox2 { @@ -105,7 +105,7 @@ void NetworkProxyServer::NotifySuccess() { } void NetworkProxyServer::NotifyViolation(const struct sockaddr* saddr) { - if (sapi::StatusOr result = AddrToString(saddr); result.ok()) { + if (absl::StatusOr result = AddrToString(saddr); result.ok()) { violation_msg_ = std::move(result).value(); } else { violation_msg_ = std::string(result.status().message()); diff --git a/sandboxed_api/sandbox2/policybuilder.cc b/sandboxed_api/sandbox2/policybuilder.cc index d185631..dbae2d9 100644 --- a/sandboxed_api/sandbox2/policybuilder.cc +++ b/sandboxed_api/sandbox2/policybuilder.cc @@ -35,7 +35,7 @@ #include #include "absl/memory/memory.h" -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" #include "absl/strings/escaping.h" #include "absl/strings/match.h" #include "sandboxed_api/sandbox2/namespace.h" @@ -658,7 +658,7 @@ PolicyBuilder& PolicyBuilder::DangerDefaultAllowAll() { return *this; } -sapi::StatusOr PolicyBuilder::ValidateAbsolutePath( +absl::StatusOr PolicyBuilder::ValidateAbsolutePath( absl::string_view path) { if (!file::IsAbsolutePath(path)) { return absl::InvalidArgumentError( @@ -667,7 +667,7 @@ sapi::StatusOr PolicyBuilder::ValidateAbsolutePath( return ValidatePath(path); } -sapi::StatusOr PolicyBuilder::ValidatePath( +absl::StatusOr PolicyBuilder::ValidatePath( absl::string_view path) { std::string fixed_path = file::CleanPath(path); if (fixed_path != path) { @@ -688,7 +688,7 @@ std::vector PolicyBuilder::ResolveBpfFunc(BpfFunc f) { return policy; } -sapi::StatusOr> PolicyBuilder::TryBuild() { +absl::StatusOr> PolicyBuilder::TryBuild() { auto output = absl::WrapUnique(new Policy()); if (!last_status_.ok()) { diff --git a/sandboxed_api/sandbox2/policybuilder.h b/sandboxed_api/sandbox2/policybuilder.h index 07095ba..17e95ff 100644 --- a/sandboxed_api/sandbox2/policybuilder.h +++ b/sandboxed_api/sandbox2/policybuilder.h @@ -29,7 +29,7 @@ #include #include "absl/base/macros.h" #include "absl/memory/memory.h" -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" #include "absl/strings/string_view.h" #include "sandboxed_api/sandbox2/mounts.h" #include "sandboxed_api/sandbox2/network_proxy/filtering.h" @@ -390,7 +390,7 @@ class PolicyBuilder final { // Builds the policy returning a unique_ptr to it. This should only be called // once. - sapi::StatusOr> TryBuild(); + absl::StatusOr> TryBuild(); // Builds the policy returning a unique_ptr to it. This should only be called // once. @@ -532,9 +532,9 @@ class PolicyBuilder final { std::vector ResolveBpfFunc(BpfFunc f); - static sapi::StatusOr ValidateAbsolutePath( + static absl::StatusOr ValidateAbsolutePath( absl::string_view path); - static sapi::StatusOr ValidatePath(absl::string_view path); + static absl::StatusOr ValidatePath(absl::string_view path); void StoreDescription(PolicyBuilderDescription* pb_description); diff --git a/sandboxed_api/sandbox2/policybuilder_test.cc b/sandboxed_api/sandbox2/policybuilder_test.cc index ca13614..b5387d9 100644 --- a/sandboxed_api/sandbox2/policybuilder_test.cc +++ b/sandboxed_api/sandbox2/policybuilder_test.cc @@ -24,7 +24,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/memory/memory.h" -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" @@ -58,7 +58,7 @@ class PolicyBuilderPeer { int policy_size() const { return builder_->user_policy_.size(); } - static sapi::StatusOr ValidateAbsolutePath( + static absl::StatusOr ValidateAbsolutePath( absl::string_view path) { return PolicyBuilder::ValidateAbsolutePath(path); } diff --git a/sandboxed_api/sandbox2/sandbox2.cc b/sandboxed_api/sandbox2/sandbox2.cc index 86c06dc..d0c1e22 100644 --- a/sandboxed_api/sandbox2/sandbox2.cc +++ b/sandboxed_api/sandbox2/sandbox2.cc @@ -21,7 +21,7 @@ #include #include "absl/memory/memory.h" -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" #include "absl/time/time.h" #include "sandboxed_api/sandbox2/monitor.h" #include "sandboxed_api/sandbox2/result.h" @@ -34,7 +34,7 @@ Sandbox2::~Sandbox2() { } } -sapi::StatusOr Sandbox2::AwaitResultWithTimeout( +absl::StatusOr Sandbox2::AwaitResultWithTimeout( absl::Duration timeout) { CHECK(monitor_ != nullptr) << "Sandbox was not launched yet"; CHECK(monitor_thread_ != nullptr) << "Sandbox was already waited on"; diff --git a/sandboxed_api/sandbox2/sandbox2.h b/sandboxed_api/sandbox2/sandbox2.h index 633297f..62dd4fe 100644 --- a/sandboxed_api/sandbox2/sandbox2.h +++ b/sandboxed_api/sandbox2/sandbox2.h @@ -26,7 +26,7 @@ #include #include "absl/base/macros.h" #include "absl/memory/memory.h" -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" #include "sandboxed_api/sandbox2/comms.h" #include "sandboxed_api/sandbox2/executor.h" #include "sandboxed_api/sandbox2/ipc.h" @@ -76,7 +76,7 @@ class Sandbox2 final { // Waits for sandbox execution to finish within the timeout. // Returns execution result or a DeadlineExceededError if the sandboxee does // not finish in time. - sapi::StatusOr AwaitResultWithTimeout(absl::Duration timeout); + absl::StatusOr AwaitResultWithTimeout(absl::Duration timeout); // Requests termination of the sandboxee. // Sandbox should still waited with AwaitResult(), as it may finish for other diff --git a/sandboxed_api/sandbox2/util.cc b/sandboxed_api/sandbox2/util.cc index b760d26..55f7aa6 100644 --- a/sandboxed_api/sandbox2/util.cc +++ b/sandboxed_api/sandbox2/util.cc @@ -182,7 +182,7 @@ bool CreateMemFd(int* fd, const char* name) { return true; } -sapi::StatusOr Communicate(const std::vector& argv, +absl::StatusOr Communicate(const std::vector& argv, const std::vector& envv, std::string* output) { int cout_pipe[2]; @@ -280,7 +280,7 @@ std::string GetRlimitName(int resource) { } } -sapi::StatusOr ReadCPathFromPid(pid_t pid, uintptr_t ptr) { +absl::StatusOr ReadCPathFromPid(pid_t pid, uintptr_t ptr) { std::string path(PATH_MAX, '\0'); iovec local_iov[] = {{&path[0], path.size()}}; diff --git a/sandboxed_api/sandbox2/util.h b/sandboxed_api/sandbox2/util.h index e42bf72..a925b02 100644 --- a/sandboxed_api/sandbox2/util.h +++ b/sandboxed_api/sandbox2/util.h @@ -25,7 +25,7 @@ #include #include "absl/base/macros.h" -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" namespace sandbox2 { namespace util { @@ -62,7 +62,7 @@ bool CreateMemFd(int* fd, const char* name = "buffer_file"); // Executes a the program given by argv and the specified environment and // captures any output to stdout/stderr. -sapi::StatusOr Communicate(const std::vector& argv, +absl::StatusOr Communicate(const std::vector& argv, const std::vector& envv, std::string* output); @@ -74,7 +74,7 @@ std::string GetRlimitName(int resource); // Reads a path string (NUL-terminated, shorter than PATH_MAX) from another // process memory -sapi::StatusOr ReadCPathFromPid(pid_t pid, uintptr_t ptr); +absl::StatusOr ReadCPathFromPid(pid_t pid, uintptr_t ptr); } // namespace util } // namespace sandbox2 diff --git a/sandboxed_api/sandbox2/util/BUILD.bazel b/sandboxed_api/sandbox2/util/BUILD.bazel index d48d9a4..924e22e 100644 --- a/sandboxed_api/sandbox2/util/BUILD.bazel +++ b/sandboxed_api/sandbox2/util/BUILD.bazel @@ -139,9 +139,9 @@ cc_library( "//sandboxed_api/sandbox2:util", "//sandboxed_api/util:raw_logging", "//sandboxed_api/util:status", - "//sandboxed_api/util:statusor", "@com_google_absl//absl/base:endian", "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", ], ) @@ -173,8 +173,8 @@ cc_library( deps = [ ":fileops", ":strerror", - "//sandboxed_api/util:statusor", "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", ], ) @@ -199,8 +199,8 @@ cc_library( hdrs = ["maps_parser.h"], copts = sapi_platform_copts(), deps = [ - "//sandboxed_api/util:statusor", "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", ], ) diff --git a/sandboxed_api/sandbox2/util/maps_parser.cc b/sandboxed_api/sandbox2/util/maps_parser.cc index cf0955f..6687989 100644 --- a/sandboxed_api/sandbox2/util/maps_parser.cc +++ b/sandboxed_api/sandbox2/util/maps_parser.cc @@ -15,12 +15,12 @@ #include "sandboxed_api/sandbox2/util/maps_parser.h" #include "absl/status/status.h" -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" #include "absl/strings/str_split.h" namespace sandbox2 { -sapi::StatusOr> ParseProcMaps( +absl::StatusOr> ParseProcMaps( const std::string& contents) { // Note: The format string // https://github.com/torvalds/linux/blob/v4.14/fs/proc/task_mmu.c#L289 diff --git a/sandboxed_api/sandbox2/util/maps_parser.h b/sandboxed_api/sandbox2/util/maps_parser.h index 862d3e7..ea0a419 100644 --- a/sandboxed_api/sandbox2/util/maps_parser.h +++ b/sandboxed_api/sandbox2/util/maps_parser.h @@ -19,7 +19,7 @@ #include #include -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" namespace sandbox2 { @@ -37,7 +37,7 @@ struct MapsEntry { std::string path; }; -sapi::StatusOr> ParseProcMaps( +absl::StatusOr> ParseProcMaps( const std::string& contents); } // namespace sandbox2 diff --git a/sandboxed_api/sandbox2/util/minielf.cc b/sandboxed_api/sandbox2/util/minielf.cc index dce033b..737dc97 100644 --- a/sandboxed_api/sandbox2/util/minielf.cc +++ b/sandboxed_api/sandbox2/util/minielf.cc @@ -95,7 +95,7 @@ class ElfParser { static constexpr size_t kMaxInterpreterSize = 1000; ElfParser() = default; - sapi::StatusOr Parse(FILE* elf, uint32_t features); + absl::StatusOr Parse(FILE* elf, uint32_t features); private: // Endianess support functions @@ -133,16 +133,16 @@ class ElfParser { // Reads elf header. absl::Status ReadFileHeader(); // Reads a single elf program header. - sapi::StatusOr ReadProgramHeader(absl::string_view src); + absl::StatusOr ReadProgramHeader(absl::string_view src); // Reads all elf program headers. absl::Status ReadProgramHeaders(); // Reads a single elf section header. - sapi::StatusOr ReadSectionHeader(absl::string_view src); + absl::StatusOr ReadSectionHeader(absl::string_view src); // Reads all elf section headers. absl::Status ReadSectionHeaders(); // Reads contents of an elf section. - sapi::StatusOr ReadSectionContents(int idx); - sapi::StatusOr ReadSectionContents( + absl::StatusOr ReadSectionContents(int idx); + absl::StatusOr ReadSectionContents( const Elf64_Shdr& section_header); // Reads all symbols from symtab section. absl::Status ReadSymbolsFromSymtab(const Elf64_Shdr& symtab); @@ -219,7 +219,7 @@ absl::Status ElfParser::ReadFileHeader() { return absl::OkStatus(); } -sapi::StatusOr ElfParser::ReadSectionHeader(absl::string_view src) { +absl::StatusOr ElfParser::ReadSectionHeader(absl::string_view src) { if (src.size() < sizeof(Elf64_Shdr)) { return absl::FailedPreconditionError( absl::StrCat("invalid section header data: got ", src.size(), @@ -266,7 +266,7 @@ absl::Status ElfParser::ReadSectionHeaders() { return absl::OkStatus(); } -sapi::StatusOr ElfParser::ReadSectionContents(int idx) { +absl::StatusOr ElfParser::ReadSectionContents(int idx) { if (idx < 0 || idx >= section_headers_.size()) { return absl::FailedPreconditionError( absl::StrCat("invalid section header index: ", idx)); @@ -274,7 +274,7 @@ sapi::StatusOr ElfParser::ReadSectionContents(int idx) { return ReadSectionContents(section_headers_.at(idx)); } -sapi::StatusOr ElfParser::ReadSectionContents( +absl::StatusOr ElfParser::ReadSectionContents( const Elf64_Shdr& section_header) { auto offset = section_header.sh_offset; if (offset > file_size_) { @@ -292,7 +292,7 @@ sapi::StatusOr ElfParser::ReadSectionContents( return rv; } -sapi::StatusOr ElfParser::ReadProgramHeader(absl::string_view src) { +absl::StatusOr ElfParser::ReadProgramHeader(absl::string_view src) { if (src.size() < sizeof(Elf64_Phdr)) { return absl::FailedPreconditionError( absl::StrCat("invalid program header data: got ", src.size(), @@ -454,7 +454,7 @@ absl::Status ElfParser::ReadImportedLibrariesFromDynamic( return absl::OkStatus(); } -sapi::StatusOr ElfParser::Parse(FILE* elf, uint32_t features) { +absl::StatusOr ElfParser::Parse(FILE* elf, uint32_t features) { elf_ = elf; // Basic sanity check. if (features & ~(ElfFile::kAll)) { @@ -511,7 +511,7 @@ sapi::StatusOr ElfParser::Parse(FILE* elf, uint32_t features) { return std::move(result_); } -sapi::StatusOr ElfFile::ParseFromFile(const std::string& filename, +absl::StatusOr ElfFile::ParseFromFile(const std::string& filename, uint32_t features) { std::unique_ptr elf{fopen(filename.c_str(), "r"), [](FILE* f) { fclose(f); }}; diff --git a/sandboxed_api/sandbox2/util/minielf.h b/sandboxed_api/sandbox2/util/minielf.h index c33a604..027e21c 100644 --- a/sandboxed_api/sandbox2/util/minielf.h +++ b/sandboxed_api/sandbox2/util/minielf.h @@ -20,7 +20,7 @@ #include #include -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" namespace sandbox2 { @@ -33,7 +33,7 @@ class ElfFile { std::string name; }; - static sapi::StatusOr ParseFromFile(const std::string& filename, + static absl::StatusOr ParseFromFile(const std::string& filename, uint32_t features); int64_t file_size() const { return file_size_; } diff --git a/sandboxed_api/sandbox2/util/temp_file.cc b/sandboxed_api/sandbox2/util/temp_file.cc index 602a18c..09a5a1b 100644 --- a/sandboxed_api/sandbox2/util/temp_file.cc +++ b/sandboxed_api/sandbox2/util/temp_file.cc @@ -22,7 +22,7 @@ #include #include -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" #include "absl/strings/str_cat.h" #include "sandboxed_api/sandbox2/util/fileops.h" #include "sandboxed_api/sandbox2/util/strerror.h" @@ -33,7 +33,7 @@ namespace { constexpr absl::string_view kMktempSuffix = "XXXXXX"; } // namespace -sapi::StatusOr> CreateNamedTempFile( +absl::StatusOr> CreateNamedTempFile( absl::string_view prefix) { std::string name_template = absl::StrCat(prefix, kMktempSuffix); int fd = mkstemp(&name_template[0]); @@ -43,7 +43,7 @@ sapi::StatusOr> CreateNamedTempFile( return std::pair{std::move(name_template), fd}; } -sapi::StatusOr CreateNamedTempFileAndClose( +absl::StatusOr CreateNamedTempFileAndClose( absl::string_view prefix) { auto result_or = CreateNamedTempFile(prefix); if (result_or.ok()) { @@ -56,7 +56,7 @@ sapi::StatusOr CreateNamedTempFileAndClose( return result_or.status(); } -sapi::StatusOr CreateTempDir(absl::string_view prefix) { +absl::StatusOr CreateTempDir(absl::string_view prefix) { std::string name_template = absl::StrCat(prefix, kMktempSuffix); if (mkdtemp(&name_template[0]) == nullptr) { return absl::UnknownError(absl::StrCat("mkdtemp():", StrError(errno))); diff --git a/sandboxed_api/sandbox2/util/temp_file.h b/sandboxed_api/sandbox2/util/temp_file.h index ae8f986..8324a61 100644 --- a/sandboxed_api/sandbox2/util/temp_file.h +++ b/sandboxed_api/sandbox2/util/temp_file.h @@ -17,24 +17,24 @@ #include -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" namespace sandbox2 { // Creates a temporary file under a path starting with prefix. File is not // unlinked and its path is returned together with an open fd. -sapi::StatusOr> CreateNamedTempFile( +absl::StatusOr> CreateNamedTempFile( absl::string_view prefix); // Creates a temporary file under a path starting with prefix. File is not // unlinked and its path is returned. FD of the created file is closed just // after creation. -sapi::StatusOr CreateNamedTempFileAndClose( +absl::StatusOr CreateNamedTempFileAndClose( absl::string_view prefix); // Creates a temporary directory under a path starting with prefix. // Returns the path of the created directory. -sapi::StatusOr CreateTempDir(absl::string_view prefix); +absl::StatusOr CreateTempDir(absl::string_view prefix); } // namespace sandbox2 diff --git a/sandboxed_api/tools/clang_generator/emitter.cc b/sandboxed_api/tools/clang_generator/emitter.cc index 5f30953..03bb5e3 100644 --- a/sandboxed_api/tools/clang_generator/emitter.cc +++ b/sandboxed_api/tools/clang_generator/emitter.cc @@ -15,7 +15,7 @@ #include "sandboxed_api/tools/clang_generator/emitter.h" #include "absl/random/random.h" -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" #include "absl/strings/ascii.h" #include "absl/strings/escaping.h" #include "absl/strings/match.h" @@ -47,7 +47,7 @@ constexpr absl::string_view kHeaderProlog = #include "absl/base/macros.h" #include "absl/status/status.h" -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" #include "sandboxed_api/sandbox.h" #include "sandboxed_api/vars.h" #include "sandboxed_api/util/status_macros.h" @@ -197,7 +197,7 @@ std::string PrintFunctionPrototype(const clang::FunctionDecl* decl) { return out; } -sapi::StatusOr EmitFunction(const clang::FunctionDecl* decl) { +absl::StatusOr EmitFunction(const clang::FunctionDecl* decl) { std::string out; absl::StrAppend(&out, "\n// ", PrintFunctionPrototype(decl), "\n"); const std::string function_name = decl->getNameAsString(); @@ -249,7 +249,7 @@ sapi::StatusOr EmitFunction(const clang::FunctionDecl* decl) { return out; } -sapi::StatusOr EmitHeader( +absl::StatusOr EmitHeader( std::vector functions, const QualTypeSet& types, const GeneratorOptions& options) { std::string out; diff --git a/sandboxed_api/tools/clang_generator/emitter.h b/sandboxed_api/tools/clang_generator/emitter.h index ad9f5f8..78e1f12 100644 --- a/sandboxed_api/tools/clang_generator/emitter.h +++ b/sandboxed_api/tools/clang_generator/emitter.h @@ -18,7 +18,7 @@ #include #include "absl/status/status.h" -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" #include "absl/strings/string_view.h" #include "clang/AST/Decl.h" #include "clang/AST/Type.h" @@ -36,7 +36,7 @@ namespace sapi { std::string GetIncludeGuard(absl::string_view filename); // Outputs a formatted header for a list of functions and their related types. -sapi::StatusOr EmitHeader( +absl::StatusOr EmitHeader( std::vector functions, const QualTypeSet& types, const GeneratorOptions& options); diff --git a/sandboxed_api/tools/clang_generator/generator.cc b/sandboxed_api/tools/clang_generator/generator.cc index ffe2106..90504eb 100644 --- a/sandboxed_api/tools/clang_generator/generator.cc +++ b/sandboxed_api/tools/clang_generator/generator.cc @@ -18,7 +18,7 @@ #include #include "absl/status/status.h" -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" #include "clang/Format/Format.h" #include "sandboxed_api/sandbox2/util/fileops.h" #include "sandboxed_api/tools/clang_generator/diagnostics.h" @@ -67,7 +67,7 @@ bool GeneratorASTVisitor::VisitFunctionDecl(clang::FunctionDecl* decl) { namespace internal { -sapi::StatusOr ReformatGoogleStyle(const std::string& filename, +absl::StatusOr ReformatGoogleStyle(const std::string& filename, const std::string& code) { // Configure code style based on Google style, but enforce pointer alignment clang::format::FormatStyle style = diff --git a/sandboxed_api/tools/clang_generator/generator.h b/sandboxed_api/tools/clang_generator/generator.h index 3b9680b..8732a26 100644 --- a/sandboxed_api/tools/clang_generator/generator.h +++ b/sandboxed_api/tools/clang_generator/generator.h @@ -20,7 +20,7 @@ #include "absl/container/flat_hash_set.h" #include "absl/memory/memory.h" #include "absl/status/status.h" -#include "sandboxed_api/util/statusor.h" +#include "absl/status/statusor.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/Frontend/CompilerInstance.h" @@ -66,7 +66,7 @@ class GeneratorASTVisitor namespace internal { -sapi::StatusOr ReformatGoogleStyle(const std::string& filename, +absl::StatusOr ReformatGoogleStyle(const std::string& filename, const std::string& code); } // namespace internal diff --git a/sandboxed_api/tools/clang_generator/types.cc b/sandboxed_api/tools/clang_generator/types.cc index 321d1e2..8efb4ef 100644 --- a/sandboxed_api/tools/clang_generator/types.cc +++ b/sandboxed_api/tools/clang_generator/types.cc @@ -208,7 +208,7 @@ std::string MapQualTypeReturn(const clang::ASTContext& context, return "absl::Status"; } // Remove const qualifier like in MapQualType(). - return absl::StrCat("sapi::StatusOr<", + return absl::StrCat("absl::StatusOr<", MaybeRemoveConst(context, qual).getAsString(), ">"); } diff --git a/sandboxed_api/tools/clang_generator/types.h b/sandboxed_api/tools/clang_generator/types.h index 82a4478..b4d5946 100644 --- a/sandboxed_api/tools/clang_generator/types.h +++ b/sandboxed_api/tools/clang_generator/types.h @@ -68,7 +68,7 @@ std::string MapQualTypeParameter(const clang::ASTContext& context, // Maps a qualified type used as a function return type to a type name // compatible with the generated Sandboxed API. Uses MapQualTypeParameter() and -// wraps the type in a sapi::StatusOr<> if qual is non-void. Otherwise returns +// wraps the type in a absl::StatusOr<> if qual is non-void. Otherwise returns // absl::Status. std::string MapQualTypeReturn(const clang::ASTContext& context, clang::QualType qual); diff --git a/sandboxed_api/tools/generator2/code.py b/sandboxed_api/tools/generator2/code.py index fb31101..84cff80 100644 --- a/sandboxed_api/tools/generator2/code.py +++ b/sandboxed_api/tools/generator2/code.py @@ -461,7 +461,7 @@ class ReturnType(ArgumentType): """Class representing function return type. Attributes: - return_type: sapi::StatusOr where T is original return type, or + return_type: absl::StatusOr where T is original return type, or absl::Status for functions returning void """ @@ -474,7 +474,7 @@ class ReturnType(ArgumentType): """Returns function return type prepared from the type.""" # TODO(szwl): const ptrs do not play well with SAPI C++ API... spelling = self._clang_type.spelling.replace('const', '') - return_type = 'sapi::StatusOr<{}>'.format(spelling) + return_type = 'absl::StatusOr<{}>'.format(spelling) return_type = 'absl::Status' if self.is_void() else return_type return return_type diff --git a/sandboxed_api/tools/generator2/code_test_util.py b/sandboxed_api/tools/generator2/code_test_util.py index 1fd5f9d..82ee3a6 100644 --- a/sandboxed_api/tools/generator2/code_test_util.py +++ b/sandboxed_api/tools/generator2/code_test_util.py @@ -32,7 +32,7 @@ class TestApi { ::sapi::Sandbox* sandbox() const { return sandbox_; } // int function_a(int, int) - sapi::StatusOr function_a(int x, int y) { + absl::StatusOr function_a(int x, int y) { ::sapi::v::Int ret; ::sapi::v::Int x_((x)); ::sapi::v::Int y_((y)); @@ -42,7 +42,7 @@ class TestApi { } // int types_1(bool, unsigned char, char, unsigned short, short) - sapi::StatusOr types_1(bool a0, unsigned char a1, char a2, unsigned short a3, short a4) { + absl::StatusOr types_1(bool a0, unsigned char a1, char a2, unsigned short a3, short a4) { ::sapi::v::Int ret; ::sapi::v::Bool a0_((a0)); ::sapi::v::UChar a1_((a1)); @@ -55,7 +55,7 @@ class TestApi { } // int types_2(int, unsigned int, long, unsigned long) - sapi::StatusOr types_2(int a0, unsigned int a1, long a2, unsigned long a3) { + absl::StatusOr types_2(int a0, unsigned int a1, long a2, unsigned long a3) { ::sapi::v::Int ret; ::sapi::v::Int a0_((a0)); ::sapi::v::UInt a1_((a1)); @@ -67,7 +67,7 @@ class TestApi { } // int types_3(long long, unsigned long long, float, double) - sapi::StatusOr types_3(long long a0, unsigned long long a1, float a2, double a3) { + absl::StatusOr types_3(long long a0, unsigned long long a1, float a2, double a3) { ::sapi::v::Int ret; ::sapi::v::LLong a0_((a0)); ::sapi::v::ULLong a1_((a1)); @@ -79,7 +79,7 @@ class TestApi { } // int types_4(signed char, short, int, long) - sapi::StatusOr types_4(signed char a0, short a1, int a2, long a3) { + absl::StatusOr types_4(signed char a0, short a1, int a2, long a3) { ::sapi::v::Int ret; ::sapi::v::SChar a0_((a0)); ::sapi::v::Short a1_((a1)); @@ -91,7 +91,7 @@ class TestApi { } // int types_5(long long, long double) - sapi::StatusOr types_5(long long a0, long double a1) { + absl::StatusOr types_5(long long a0, long double a1) { ::sapi::v::Int ret; ::sapi::v::LLong a0_((a0)); ::sapi::v::Reg a1_((a1)); @@ -136,7 +136,7 @@ class TestApi { ::sapi::Sandbox* sandbox() const { return sandbox_; } // uint function(uintp) - sapi::StatusOr function(::sapi::v::Ptr* a) { + absl::StatusOr function(::sapi::v::Ptr* a) { ::sapi::v::UInt ret; SAPI_RETURN_IF_ERROR(sandbox_->Call("function", &ret, a)); @@ -173,7 +173,7 @@ class TestApi { ::sapi::Sandbox* sandbox() const { return sandbox_; } // ProcessStatus ProcessDatapoint(ProcessStatus) - sapi::StatusOr ProcessDatapoint(ProcessStatus status) { + absl::StatusOr ProcessDatapoint(ProcessStatus status) { ::sapi::v::IntBase ret; ::sapi::v::IntBase status_((status)); diff --git a/sandboxed_api/util/statusor_test.cc b/sandboxed_api/util/statusor_test.cc deleted file mode 100644 index ba80ce1..0000000 --- a/sandboxed_api/util/statusor_test.cc +++ /dev/null @@ -1,407 +0,0 @@ -// Copyright 2019 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// This file is a custom fork of the version in Asylo. This will become obsolete -// and will be replaced once Abseil releases absl::Status. - -#include "sandboxed_api/util/statusor.h" - -#include -#include -#include - -#include "gmock/gmock.h" -#include "gtest/gtest.h" -#include "sandboxed_api/util/status_matchers.h" - -using ::testing::Eq; -using ::testing::IsFalse; -using ::testing::Not; -using ::testing::Pointee; - -namespace sapi { -namespace { - -constexpr auto kErrorCode = absl::StatusCode::kInvalidArgument; -constexpr char kErrorMessage[] = "Invalid argument"; - -const int kIntElement = 47; -constexpr char kStringElement[] = "47 is 42, corrected for inflation"; - -// A data type without a default constructor. -struct Foo { - int bar; - std::string baz; - - explicit Foo(int value) : bar(value), baz(kStringElement) {} -}; - -// A data type with dynamically-allocated data. -struct HeapAllocatedObject { - int* value; - - HeapAllocatedObject() { - value = new int; - *value = kIntElement; - } - - HeapAllocatedObject(const HeapAllocatedObject& other) { *this = other; } - - HeapAllocatedObject& operator=(const HeapAllocatedObject& other) { - value = new int; - *value = *other.value; - return *this; - } - - HeapAllocatedObject(HeapAllocatedObject&& other) { *this = std::move(other); } - - HeapAllocatedObject& operator=(HeapAllocatedObject&& other) { - value = other.value; - other.value = nullptr; - return *this; - } - - ~HeapAllocatedObject() { delete value; } -}; - -// Constructs a Foo. -struct FooCtor { - using value_type = Foo; - - Foo operator()() { return Foo(kIntElement); } -}; - -// Constructs a HeapAllocatedObject. -struct HeapAllocatedObjectCtor { - using value_type = HeapAllocatedObject; - - HeapAllocatedObject operator()() { return HeapAllocatedObject(); } -}; - -// Constructs an integer. -struct IntCtor { - using value_type = int; - - int operator()() { return kIntElement; } -}; - -// Constructs a string. -struct StringCtor { - using value_type = std::string; - - std::string operator()() { return std::string(kStringElement); } -}; - -// Constructs a vector of strings. -struct StringVectorCtor { - using value_type = std::vector; - - std::vector operator()() { - return {kStringElement, kErrorMessage}; - } -}; - -bool operator==(const Foo& lhs, const Foo& rhs) { - return (lhs.bar == rhs.bar) && (lhs.baz == rhs.baz); -} - -bool operator==(const HeapAllocatedObject& lhs, - const HeapAllocatedObject& rhs) { - return *lhs.value == *rhs.value; -} - -// Returns an rvalue reference to the StatusOr object pointed to by -// |statusor|. -template -StatusOr&& MoveStatusOr(StatusOr* statusor) { - return std::move(*statusor); -} - -// A test fixture is required for typed tests. -template -class StatusOrTest : public ::testing::Test {}; - -using TestTypes = ::testing::Types; -TYPED_TEST_SUITE(StatusOrTest, TestTypes); - -// Verify that the default constructor for StatusOr constructs an object with a -// non-ok status. -TYPED_TEST(StatusOrTest, ConstructorDefault) { - StatusOr statusor; - EXPECT_THAT(statusor.ok(), IsFalse()); - EXPECT_THAT(statusor.status().code(), Eq(absl::StatusCode::kUnknown)); -} - -// Verify that StatusOr can be constructed from a Status object. -TYPED_TEST(StatusOrTest, ConstructorStatus) { - StatusOr statusor( - absl::Status(kErrorCode, kErrorMessage)); - - EXPECT_THAT(statusor.ok(), IsFalse()); - EXPECT_THAT(statusor.status().ok(), IsFalse()); - EXPECT_THAT(statusor.status(), Eq(absl::Status(kErrorCode, kErrorMessage))); -} - -// Verify that StatusOr can be constructed from an object of its element type. -TYPED_TEST(StatusOrTest, ConstructorElementConstReference) { - auto value = TypeParam()(); - StatusOr statusor{value}; - - ASSERT_THAT(statusor, IsOk()); - ASSERT_THAT(statusor.status(), IsOk()); - EXPECT_THAT(statusor.ValueOrDie(), Eq(value)); -} - -// Verify that StatusOr can be constructed from an rvalue reference of an object -// of its element type. -TYPED_TEST(StatusOrTest, ConstructorElementRValue) { - auto value = TypeParam()(); - auto value_copy(value); - StatusOr statusor(std::move(value)); - - ASSERT_THAT(statusor, IsOk()); - ASSERT_THAT(statusor.status(), IsOk()); - - // Compare to a copy of the original value, since the original was moved. - EXPECT_THAT(statusor.ValueOrDie(), Eq(value_copy)); -} - -// Verify that StatusOr can be copy-constructed from a StatusOr with a non-ok -// status. -TYPED_TEST(StatusOrTest, CopyConstructorNonOkStatus) { - StatusOr statusor1 = - absl::Status(kErrorCode, kErrorMessage); - StatusOr statusor2(statusor1); - - EXPECT_THAT(statusor1.ok(), Eq(statusor2.ok())); - EXPECT_THAT(statusor1.status(), Eq(statusor2.status())); -} - -// Verify that StatusOr can be copy-constructed from a StatusOr with an ok -// status. -TYPED_TEST(StatusOrTest, CopyConstructorOkStatus) { - StatusOr statusor1{TypeParam()()}; - StatusOr statusor2{statusor1}; - - EXPECT_THAT(statusor1.ok(), Eq(statusor2.ok())); - ASSERT_THAT(statusor2, IsOk()); - EXPECT_THAT(statusor1.ValueOrDie(), Eq(statusor2.ValueOrDie())); -} - -// Verify that copy-assignment of a StatusOr with a non-ok is working as -// expected. -TYPED_TEST(StatusOrTest, CopyAssignmentNonOkStatus) { - StatusOr statusor1{ - absl::Status(kErrorCode, kErrorMessage)}; - StatusOr statusor2{TypeParam()()}; - - // Invoke the copy-assignment operator. - statusor2 = statusor1; - EXPECT_THAT(statusor1.ok(), Eq(statusor2.ok())); - EXPECT_THAT(statusor1.status(), Eq(statusor2.status())); -} - -// Verify that copy-assignment of a StatusOr with an ok status is working as -// expected. -TYPED_TEST(StatusOrTest, CopyAssignmentOkStatus) { - StatusOr statusor1{TypeParam()()}; - StatusOr statusor2{ - absl::Status(kErrorCode, kErrorMessage)}; - - // Invoke the copy-assignment operator. - statusor2 = statusor1; - EXPECT_THAT(statusor1.ok(), Eq(statusor2.ok())); - ASSERT_THAT(statusor2, IsOk()); - EXPECT_THAT(statusor1.ValueOrDie(), Eq(statusor2.ValueOrDie())); -} - -// Verify that StatusOr can be move-constructed from a StatusOr with a non-ok -// status. -TYPED_TEST(StatusOrTest, MoveConstructorNonOkStatus) { - absl::Status status(kErrorCode, kErrorMessage); - StatusOr statusor1(status); - StatusOr statusor2(std::move(statusor1)); - - // Verify that the status of the donor object was updated. - EXPECT_THAT(statusor1.ok(), IsFalse()); // NOLINT - EXPECT_THAT(statusor1.status(), StatusIs(absl::StatusCode::kInternal)); - - // Verify that the destination object contains the status previously held by - // the donor. - EXPECT_THAT(statusor2.ok(), IsFalse()); - EXPECT_THAT(statusor2.status(), Eq(status)); -} - -// Verify that StatusOr can be move-constructed from a StatusOr with an ok -// status. -TYPED_TEST(StatusOrTest, MoveConstructorOkStatus) { - auto value = TypeParam()(); - StatusOr statusor1(value); - StatusOr statusor2(std::move(statusor1)); - - // The destination object should possess the value previously held by the - // donor. - ASSERT_THAT(statusor2, IsOk()); - EXPECT_THAT(statusor2.ValueOrDie(), Eq(value)); -} - -// Verify that move-assignment from a StatusOr with a non-ok status is working -// as expected. -TYPED_TEST(StatusOrTest, MoveAssignmentOperatorNonOkStatus) { - absl::Status status(kErrorCode, kErrorMessage); - StatusOr statusor1(status); - StatusOr statusor2{TypeParam()()}; - - // Invoke the move-assignment operator. - statusor2 = std::move(statusor1); - - // Verify that the status of the donor object was updated. - EXPECT_THAT(statusor1.ok(), IsFalse()); // NOLINT - EXPECT_THAT(statusor1.status(), StatusIs(absl::StatusCode::kInternal)); - - // Verify that the destination object contains the status previously held by - // the donor. - EXPECT_THAT(statusor2.ok(), IsFalse()); - EXPECT_THAT(statusor2.status(), Eq(status)); -} - -// Verify that move-assignment from a StatusOr with an ok status is working as -// expected. -TYPED_TEST(StatusOrTest, MoveAssignmentOperatorOkStatus) { - auto value = TypeParam()(); - StatusOr statusor1(value); - StatusOr statusor2( - absl::Status(kErrorCode, kErrorMessage)); - - // Invoke the move-assignment operator. - statusor2 = std::move(statusor1); - - // The destination object should possess the value previously held by the - // donor. - ASSERT_THAT(statusor2, IsOk()); - EXPECT_THAT(statusor2.ValueOrDie(), Eq(value)); -} - -// Verify that the sapi::IsOk() gMock matcher works with StatusOr. -TYPED_TEST(StatusOrTest, IsOkMatcher) { - auto value = TypeParam()(); - StatusOr statusor(value); - - EXPECT_THAT(statusor, IsOk()); - - statusor = StatusOr( - absl::Status(kErrorCode, kErrorMessage)); - EXPECT_THAT(statusor, Not(IsOk())); -} - -// Tests for move-only types. These tests use std::unique_ptr<> as the -// test type, since it is valuable to support this type in the Asylo infra. -// These tests are not part of the typed test suite for the following reasons: -// * std::unique_ptr<> cannot be used as a type in tests that expect -// the test type to support copy operations. -// * std::unique_ptr<> provides an equality operator that checks equality of -// the underlying ptr. Consequently, it is difficult to generalize existing -// tests that verify ValueOrDie() functionality using equality comparisons. - -// Verify that a StatusOr object can be constructed from a move-only type. -TEST(StatusOrTest, InitializationMoveOnlyType) { - auto* str = new std::string(kStringElement); - std::unique_ptr value(str); - StatusOr> statusor(std::move(value)); - - ASSERT_THAT(statusor, IsOk()); - EXPECT_THAT(statusor.ValueOrDie().get(), Eq(str)); -} - -// Verify that a StatusOr object can be move-constructed from a move-only type. -TEST(StatusOrTest, MoveConstructorMoveOnlyType) { - auto* str = new std::string(kStringElement); - std::unique_ptr value(str); - StatusOr> statusor1(std::move(value)); - StatusOr> statusor2(std::move(statusor1)); - - // The destination object should possess the value previously held by the - // donor. - ASSERT_THAT(statusor2, IsOk()); - EXPECT_THAT(statusor2.ValueOrDie().get(), Eq(str)); -} - -// Verify that a StatusOr object can be move-assigned to from a StatusOr object -// containing a move-only type. -TEST(StatusOrTest, MoveAssignmentMoveOnlyType) { - auto* str = new std::string(kStringElement); - std::unique_ptr value(str); - StatusOr> statusor1(std::move(value)); - StatusOr> statusor2( - absl::Status(kErrorCode, kErrorMessage)); - - // Invoke the move-assignment operator. - statusor2 = std::move(statusor1); - - // The destination object should possess the value previously held by the - // donor. - ASSERT_THAT(statusor2, IsOk()); - EXPECT_THAT(statusor2.ValueOrDie().get(), Eq(str)); -} - -// Verify that a value can be moved out of a StatusOr object via ValueOrDie(). -TEST(StatusOrTest, ValueOrDieMovedValue) { - auto* str = new std::string(kStringElement); - std::unique_ptr value(str); - StatusOr> statusor(std::move(value)); - - std::unique_ptr moved_value = std::move(statusor).ValueOrDie(); - EXPECT_THAT(moved_value.get(), Eq(str)); - EXPECT_THAT(*moved_value, Eq(kStringElement)); -} - -TEST(StatusOrTest, MapToStatusOrUniquePtr) { - // A reduced version of a problematic type found in the wild. All of the - // operations below should compile. - using MapType = std::map>>; - - MapType a; - - // Move-construction - MapType b(std::move(a)); - - // Move-assignment - a = std::move(b); -} - -TEST(StatusOrTest, ValueOrOk) { - const StatusOr status_or = 0; - EXPECT_EQ(status_or.value_or(-1), 0); -} - -TEST(StatusOrTest, ValueOrDefault) { - const StatusOr status_or = absl::CancelledError(); - EXPECT_EQ(status_or.value_or(-1), -1); -} - -TEST(StatusOrTest, MoveOnlyValueOrOk) { - EXPECT_THAT(StatusOr>(absl::make_unique(0)) - .value_or(absl::make_unique(-1)), - Pointee(0)); -} - -TEST(StatusOr, MoveOnlyValueOrDefault) { - EXPECT_THAT(StatusOr>(absl::CancelledError()) - .value_or(absl::make_unique(-1)), - Pointee(-1)); -} - -} // namespace -} // namespace sapi diff --git a/sandboxed_api/var_proto.h b/sandboxed_api/var_proto.h index 4d1dac2..ffa8128 100644 --- a/sandboxed_api/var_proto.h +++ b/sandboxed_api/var_proto.h @@ -41,7 +41,7 @@ class Proto : public Pointable, public Var { explicit Proto(const T& proto) : wrapped_var_(SerializeProto(proto).value()) {} - static sapi::StatusOr> FromMessage(const T& proto) { + static absl::StatusOr> FromMessage(const T& proto) { SAPI_ASSIGN_OR_RETURN(std::vector len_val, SerializeProto(proto)); return Proto(len_val); } @@ -59,7 +59,7 @@ class Proto : public Pointable, public Var { void* GetLocal() const override { return wrapped_var_.GetLocal(); } // Returns a copy of the stored protobuf object. - sapi::StatusOr GetMessage() const { + absl::StatusOr GetMessage() const { return DeserializeProto( reinterpret_cast(wrapped_var_.GetData()), wrapped_var_.GetDataSize()); From fdf0483ca0fe65fcc03a96924decb79dbc70a1d8 Mon Sep 17 00:00:00 2001 From: Christian Blichmann Date: Thu, 3 Sep 2020 07:40:09 -0700 Subject: [PATCH 35/42] Migrate to open-source `absl::StatusOr<>` This removes our own fork of `absl::StatusOr<>`. Sandboxed API still includes a custom matcher for Googletest, as that is not open source yet. For compatibility, the `statusor.h` header is still retained and now aliases `sapi::StatusOr<>` to `absl::StatusOr<>`. PiperOrigin-RevId: 329916309 Change-Id: I0544b73a9e312dce499bc4128c28457e04ab9929 --- cmake/abseil/CMakeLists.txt.in | 2 +- sandboxed_api/CMakeLists.txt | 9 +- sandboxed_api/bazel/sapi_deps.bzl | 6 +- sandboxed_api/examples/zlib/CMakeLists.txt | 2 +- sandboxed_api/sandbox2/CMakeLists.txt | 17 +- .../examples/network_proxy/BUILD.bazel | 2 +- .../examples/network_proxy/CMakeLists.txt | 5 +- .../network_proxy/networkproxy_bin.cc | 1 + .../sandbox2/network_proxy/CMakeLists.txt | 14 +- sandboxed_api/sandbox2/testcases/BUILD.bazel | 7 +- sandboxed_api/sandbox2/util/CMakeLists.txt | 5 +- sandboxed_api/sandbox2/util_test.cc | 4 +- sandboxed_api/util/BUILD.bazel | 30 +-- sandboxed_api/util/CMakeLists.txt | 22 +- sandboxed_api/util/status_macros.h | 2 +- sandboxed_api/util/status_macros_test.cc | 27 +-- sandboxed_api/util/status_matchers.h | 2 +- sandboxed_api/util/statusor.h | 209 +----------------- 18 files changed, 81 insertions(+), 285 deletions(-) diff --git a/cmake/abseil/CMakeLists.txt.in b/cmake/abseil/CMakeLists.txt.in index 2484f6e..01aacba 100644 --- a/cmake/abseil/CMakeLists.txt.in +++ b/cmake/abseil/CMakeLists.txt.in @@ -18,7 +18,7 @@ project(absl-download NONE) include(ExternalProject) ExternalProject_Add(absl GIT_REPOSITORY https://github.com/abseil/abseil-cpp - GIT_TAG 6e18c7115df9b7ca0987cc346b1b1d4b3cc829b3 # 2020-04-28 + GIT_TAG 0e9921b75a0fdd639a504ec8443fc1fe801becd7 # 2020-09-02 SOURCE_DIR "${CMAKE_BINARY_DIR}/absl-src" BINARY_DIR "${CMAKE_BINARY_DIR}/absl-build" CONFIGURE_COMMAND "" diff --git a/sandboxed_api/CMakeLists.txt b/sandboxed_api/CMakeLists.txt index 0f4ca85..c2b6d2f 100644 --- a/sandboxed_api/CMakeLists.txt +++ b/sandboxed_api/CMakeLists.txt @@ -43,6 +43,8 @@ add_library(sapi_embed_file STATIC add_library(sapi::embed_file ALIAS sapi_embed_file) target_link_libraries(sapi_embed_file PRIVATE absl::flat_hash_map + absl::status + absl::statusor absl::strings absl::synchronization glog::glog @@ -65,6 +67,8 @@ add_library(sapi::sapi ALIAS sapi_sapi) target_link_libraries(sapi_sapi PRIVATE absl::flat_hash_map absl::memory + absl::status + absl::statusor absl::str_format absl::strings absl::synchronization @@ -76,7 +80,6 @@ target_link_libraries(sapi_sapi sandbox2::strerror sandbox2::util sapi::embed_file - sapi::status sapi::vars PUBLIC absl::core_headers sandbox2::client @@ -137,6 +140,8 @@ add_library(sapi_vars STATIC add_library(sapi::vars ALIAS sapi_vars) target_link_libraries(sapi_vars PRIVATE absl::core_headers + absl::status + absl::statusor absl::str_format absl::strings absl::synchronization @@ -147,7 +152,6 @@ target_link_libraries(sapi_vars PRIVATE sapi::lenval_core sapi::proto_arg_proto sapi::status - sapi::statusor sapi::var_type ) @@ -178,6 +182,7 @@ if(SAPI_ENABLE_TESTS) ) target_link_libraries(sapi_test PRIVATE absl::memory + absl::status benchmark sapi::sapi sapi::status diff --git a/sandboxed_api/bazel/sapi_deps.bzl b/sandboxed_api/bazel/sapi_deps.bzl index ade3211..ceff2f4 100644 --- a/sandboxed_api/bazel/sapi_deps.bzl +++ b/sandboxed_api/bazel/sapi_deps.bzl @@ -34,9 +34,9 @@ def sapi_deps(): maybe( http_archive, name = "com_google_absl", - sha256 = "6668ada01192e2b95b42bb3668cfa5282c047de5176f5e567028e12f8bfb8aef", # 2020-04-28 - strip_prefix = "abseil-cpp-6e18c7115df9b7ca0987cc346b1b1d4b3cc829b3", - urls = ["https://github.com/abseil/abseil-cpp/archive/6e18c7115df9b7ca0987cc346b1b1d4b3cc829b3.zip"], + sha256 = "8061df0ebbd3f599bcd3f5e57fb8003564d50a9b6a81a7f968fb0196b952365d", # 2020-09-02 + strip_prefix = "abseil-cpp-0e9921b75a0fdd639a504ec8443fc1fe801becd7", + urls = ["https://github.com/abseil/abseil-cpp/archive/0e9921b75a0fdd639a504ec8443fc1fe801becd7.zip"], ) maybe( http_archive, diff --git a/sandboxed_api/examples/zlib/CMakeLists.txt b/sandboxed_api/examples/zlib/CMakeLists.txt index 561b6d3..5750c7d 100644 --- a/sandboxed_api/examples/zlib/CMakeLists.txt +++ b/sandboxed_api/examples/zlib/CMakeLists.txt @@ -32,11 +32,11 @@ add_executable(main_zlib main_zlib.cc ) target_link_libraries(main_zlib PRIVATE + absl::status sapi::base glog::glog sapi::flags sapi::sapi sapi::status - sapi::statusor sapi::zlib_sapi ) diff --git a/sandboxed_api/sandbox2/CMakeLists.txt b/sandboxed_api/sandbox2/CMakeLists.txt index f5919ec..e70356f 100644 --- a/sandboxed_api/sandbox2/CMakeLists.txt +++ b/sandboxed_api/sandbox2/CMakeLists.txt @@ -271,6 +271,8 @@ target_link_libraries(sandbox2_sandbox2 absl::flat_hash_set absl::memory absl::optional + absl::status + absl::statusor absl::str_format absl::strings absl::synchronization @@ -300,7 +302,6 @@ target_link_libraries(sandbox2_sandbox2 sandbox2::util sandbox2::violation_proto sapi::base - sapi::statusor PUBLIC sapi::flags sapi::status sandbox2::logsink @@ -351,6 +352,8 @@ add_library(sandbox2_forkserver STATIC add_library(sandbox2::forkserver ALIAS sandbox2_forkserver) target_link_libraries(sandbox2_forkserver PRIVATE absl::memory + absl::status + absl::statusor absl::str_format absl::strings libcap::libcap @@ -369,7 +372,6 @@ target_link_libraries(sandbox2_forkserver PRIVATE sandbox2::util sapi::base sapi::raw_logging - sapi::statusor ) # sandboxed_api/sandbox2:fork_client @@ -397,6 +399,7 @@ target_link_libraries(sandbox2_mounts PRIVATE absl::core_headers absl::flat_hash_set absl::status + absl::statusor absl::str_format absl::strings protobuf::libprotobuf @@ -408,7 +411,6 @@ target_link_libraries(sandbox2_mounts PRIVATE sapi::base sapi::raw_logging sapi::status - sapi::statusor ) # sandboxed_api/sandbox2:namespace @@ -463,8 +465,8 @@ target_link_libraries(sandbox2_util sandbox2::strerror sapi::base sapi::raw_logging - sapi::statusor PUBLIC absl::status + absl::statusor ) target_compile_options(sandbox2_util PRIVATE # The default is 16384, however we need to do a clone with a @@ -482,12 +484,13 @@ add_library(sandbox2::buffer ALIAS sandbox2_buffer) target_link_libraries(sandbox2_buffer PRIVATE absl::core_headers absl::memory + absl::status + absl::statusor absl::strings sandbox2::strerror sandbox2::util sapi::base sapi::status - sapi::statusor ) # sandboxed_api/sandbox2:forkserver_proto @@ -527,6 +530,8 @@ add_library(sandbox2_comms STATIC add_library(sandbox2::comms ALIAS sandbox2_comms) target_link_libraries(sandbox2_comms PRIVATE absl::memory + absl::status + absl::statusor absl::str_format absl::strings sandbox2::strerror @@ -534,7 +539,6 @@ target_link_libraries(sandbox2_comms sapi::base sapi::raw_logging sapi::status_proto - sapi::statusor PUBLIC absl::core_headers absl::status absl::synchronization @@ -832,6 +836,7 @@ if(SAPI_ENABLE_TESTS) ) target_link_libraries(stack_trace_test PRIVATE absl::memory + absl::status absl::strings sandbox2::bpf_helper sandbox2::fileops diff --git a/sandboxed_api/sandbox2/examples/network_proxy/BUILD.bazel b/sandboxed_api/sandbox2/examples/network_proxy/BUILD.bazel index fb07dd5..ccbefc5 100644 --- a/sandboxed_api/sandbox2/examples/network_proxy/BUILD.bazel +++ b/sandboxed_api/sandbox2/examples/network_proxy/BUILD.bazel @@ -27,7 +27,6 @@ cc_binary( deps = [ "//sandboxed_api/sandbox2", "//sandboxed_api/sandbox2:comms", - "//sandboxed_api/sandbox2/network_proxy:filtering", "//sandboxed_api/sandbox2/util:bpf_helper", "//sandboxed_api/sandbox2/util:fileops", "//sandboxed_api/sandbox2/util:runfiles", @@ -50,6 +49,7 @@ cc_binary( "//sandboxed_api/util:status", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", ], ) diff --git a/sandboxed_api/sandbox2/examples/network_proxy/CMakeLists.txt b/sandboxed_api/sandbox2/examples/network_proxy/CMakeLists.txt index 084c8ed..3df9f21 100644 --- a/sandboxed_api/sandbox2/examples/network_proxy/CMakeLists.txt +++ b/sandboxed_api/sandbox2/examples/network_proxy/CMakeLists.txt @@ -31,13 +31,14 @@ target_link_libraries(sandbox2_networkproxy_sandbox PRIVATE sapi::flags ) - # sandboxed_api/sandbox2/examples/networkproxy:networkproxy_bin add_executable(sandbox2_networkproxy_bin networkproxy_bin.cc ) add_executable(sandbox2::networkproxy_bin ALIAS sandbox2_networkproxy_bin) target_link_libraries(sandbox2_networkproxy_bin PRIVATE + absl::status + absl::statusor absl::str_format glog::glog gflags::gflags @@ -48,6 +49,4 @@ target_link_libraries(sandbox2_networkproxy_bin PRIVATE sapi::base sapi::flags sapi::status - sapi::statusor ) - diff --git a/sandboxed_api/sandbox2/examples/network_proxy/networkproxy_bin.cc b/sandboxed_api/sandbox2/examples/network_proxy/networkproxy_bin.cc index 84eb715..d5d01f0 100644 --- a/sandboxed_api/sandbox2/examples/network_proxy/networkproxy_bin.cc +++ b/sandboxed_api/sandbox2/examples/network_proxy/networkproxy_bin.cc @@ -14,6 +14,7 @@ #include "sandboxed_api/util/flag.h" #include "absl/status/status.h" #include "absl/status/statusor.h" +#include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "sandboxed_api/sandbox2/client.h" #include "sandboxed_api/sandbox2/comms.h" diff --git a/sandboxed_api/sandbox2/network_proxy/CMakeLists.txt b/sandboxed_api/sandbox2/network_proxy/CMakeLists.txt index 8ae6c40..c23d917 100644 --- a/sandboxed_api/sandbox2/network_proxy/CMakeLists.txt +++ b/sandboxed_api/sandbox2/network_proxy/CMakeLists.txt @@ -33,14 +33,14 @@ add_library(sandbox2_network_proxy_filtering STATIC filtering.h ) add_library(sandbox2::network_proxy_filtering ALIAS sandbox2_network_proxy_filtering) -target_link_libraries(sandbox2_network_proxy_filtering PRIVATE - absl::memory - glog::glog - sandbox2::comms - sandbox2::fileops - sapi::base +target_link_libraries(sandbox2_network_proxy_filtering + PRIVATE absl::memory + absl::status + glog::glog + sandbox2::comms + sandbox2::fileops + sapi::base PUBLIC sapi::status - sapi::statusor ) # sandboxed_api/sandbox2/network_proxy:client diff --git a/sandboxed_api/sandbox2/testcases/BUILD.bazel b/sandboxed_api/sandbox2/testcases/BUILD.bazel index 80a1441..1983e4d 100644 --- a/sandboxed_api/sandbox2/testcases/BUILD.bazel +++ b/sandboxed_api/sandbox2/testcases/BUILD.bazel @@ -41,7 +41,12 @@ STATIC_LINKOPTS = [ ] # TODO(https://github.com/bazelbuild/bazel/issues/8672): Remove this workaround -EXTRA_FULLY_STATIC_LINKOPTS = ["-l:libstdc++.a"] +# Change is scheduled for Bazel 4.0. Specifying +# `--incompatible_linkopts_to_linklibs` also works +EXTRA_FULLY_STATIC_LINKOPTS = [ + "-l:libstdc++.a", + "-l:libm.a", +] cc_binary( name = "abort", diff --git a/sandboxed_api/sandbox2/util/CMakeLists.txt b/sandboxed_api/sandbox2/util/CMakeLists.txt index 1e13da4..46af750 100644 --- a/sandboxed_api/sandbox2/util/CMakeLists.txt +++ b/sandboxed_api/sandbox2/util/CMakeLists.txt @@ -80,7 +80,6 @@ target_link_libraries(sandbox2_util_minielf PRIVATE sandbox2::util sapi::base sapi::raw_logging - sapi::statusor ) # sandboxed_api/sandbox2/util:temp_file @@ -95,7 +94,7 @@ target_link_libraries(sandbox2_util_temp_file sandbox2::strerror sapi::base PUBLIC absl::status - sapi::statusor + absl::statusor ) # sandboxed_api/sandbox2/util:maps_parser @@ -106,9 +105,9 @@ add_library(sandbox2_util_maps_parser STATIC add_library(sandbox2::maps_parser ALIAS sandbox2_util_maps_parser) target_link_libraries(sandbox2_util_maps_parser PRIVATE absl::status + absl::statusor absl::strings sapi::base - sapi::statusor ) # sandboxed_api/sandbox2/util:runfiles diff --git a/sandboxed_api/sandbox2/util_test.cc b/sandboxed_api/sandbox2/util_test.cc index 55de298..3fa21ff 100644 --- a/sandboxed_api/sandbox2/util_test.cc +++ b/sandboxed_api/sandbox2/util_test.cc @@ -23,8 +23,8 @@ #include "sandboxed_api/sandbox2/testing.h" #include "sandboxed_api/sandbox2/util/path.h" -using testing::Gt; -using testing::IsTrue; +using ::testing::Gt; +using ::testing::IsTrue; namespace sandbox2 { namespace util { diff --git a/sandboxed_api/util/BUILD.bazel b/sandboxed_api/util/BUILD.bazel index 6f21685..d0f453c 100644 --- a/sandboxed_api/util/BUILD.bazel +++ b/sandboxed_api/util/BUILD.bazel @@ -45,20 +45,15 @@ cc_library( cc_library( name = "statusor", hdrs = ["statusor.h"], - copts = sapi_platform_copts(), - visibility = ["//visibility:public"], + deprecation = "Migrate to `absl::StatusOr`", deps = [ - ":raw_logging", - ":status", "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/base:log_severity", - "@com_google_absl//absl/status", - "@com_google_absl//absl/types:variant", + "@com_google_absl//absl/status:statusor", ], ) -# gMock matchers for absl::Status and sapi::StatusOr and a gUnit printer -# extension for sapi::StatusOr. +# gMock matchers for absl::Status and absl::StatusOr and a gUnit printer +# extension. Adapted from the version in Asylo. cc_library( name = "status_matchers", testonly = 1, @@ -67,8 +62,8 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":status", - ":statusor", "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/types:optional", "@com_google_googletest//:gtest", ], @@ -87,19 +82,6 @@ cc_test( ], ) -# Tests for the StatusOr template class. -cc_test( - name = "statusor_test", - srcs = ["statusor_test.cc"], - copts = sapi_platform_copts(), - deps = [ - ":status", - ":status_matchers", - ":statusor", - "@com_google_googletest//:gtest_main", - ], -) - # Tests for the Status macros. cc_test( name = "status_macros_test", @@ -108,9 +90,9 @@ cc_test( deps = [ ":status", ":status_matchers", - ":statusor", "@com_google_absl//absl/memory", "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_googletest//:gtest_main", ], diff --git a/sandboxed_api/util/CMakeLists.txt b/sandboxed_api/util/CMakeLists.txt index 4bd6d25..9564188 100644 --- a/sandboxed_api/util/CMakeLists.txt +++ b/sandboxed_api/util/CMakeLists.txt @@ -48,11 +48,10 @@ add_library(sapi_util_statusor STATIC ) add_library(sapi::statusor ALIAS sapi_util_statusor) target_link_libraries(sapi_util_statusor PRIVATE - absl::base absl::core_headers - absl::variant - sapi::raw_logging - sapi::status + absl::status + absl::statusor + sapi::base ) # sandboxed_api/util:flag @@ -90,7 +89,6 @@ if(SAPI_ENABLE_TESTS) sapi::base PUBLIC absl::status sapi::status - sapi::statusor ) # sandboxed_api/util:status_test @@ -98,20 +96,24 @@ if(SAPI_ENABLE_TESTS) status_test.cc ) target_link_libraries(status_test PRIVATE + sapi::status sapi::status_matchers sapi::test_main + absl::status absl::type_traits ) gtest_discover_tests(status_test) - # sandboxed_api/util:statusor_test - add_executable(statusor_test - statusor_test.cc + # sandboxed_api/util:status_macros_test + add_executable(status_macros_test + status_macros_test.cc ) - target_link_libraries(statusor_test PRIVATE + target_link_libraries(status_macros_test PRIVATE sapi::status_matchers sapi::test_main + absl::status + absl::statusor absl::type_traits ) - gtest_discover_tests(statusor_test) + gtest_discover_tests(status_macros_test) endif() diff --git a/sandboxed_api/util/status_macros.h b/sandboxed_api/util/status_macros.h index 8c7c011..d22215b 100644 --- a/sandboxed_api/util/status_macros.h +++ b/sandboxed_api/util/status_macros.h @@ -42,6 +42,6 @@ if (ABSL_PREDICT_FALSE(!statusor.ok())) { \ return statusor.status(); \ } \ - lhs = std::move(statusor).ValueOrDie(); + lhs = std::move(statusor).value(); #endif // THIRD_PARTY_SAPI_UTIL_STATUS_MACROS_H_ diff --git a/sandboxed_api/util/status_macros_test.cc b/sandboxed_api/util/status_macros_test.cc index f74c83c..647149b 100644 --- a/sandboxed_api/util/status_macros_test.cc +++ b/sandboxed_api/util/status_macros_test.cc @@ -20,10 +20,10 @@ #include "gtest/gtest.h" #include "absl/memory/memory.h" #include "absl/status/status.h" +#include "absl/status/statusor.h" #include "absl/strings/str_cat.h" #include "sandboxed_api/util/status.h" #include "sandboxed_api/util/status_matchers.h" -#include "sandboxed_api/util/statusor.h" namespace sapi { namespace { @@ -52,17 +52,17 @@ TEST(ReturnIfError, ReturnsOnErrorFromLambda) { TEST(AssignOrReturn, AssignsMultipleVariablesInSequence) { auto func = []() -> absl::Status { int value1; - SAPI_ASSIGN_OR_RETURN(value1, StatusOr(1)); + SAPI_ASSIGN_OR_RETURN(value1, absl::StatusOr(1)); EXPECT_EQ(1, value1); int value2; - SAPI_ASSIGN_OR_RETURN(value2, StatusOr(2)); + SAPI_ASSIGN_OR_RETURN(value2, absl::StatusOr(2)); EXPECT_EQ(2, value2); int value3; - SAPI_ASSIGN_OR_RETURN(value3, StatusOr(3)); + SAPI_ASSIGN_OR_RETURN(value3, absl::StatusOr(3)); EXPECT_EQ(3, value3); int value4; SAPI_ASSIGN_OR_RETURN(value4, - StatusOr(absl::UnknownError("EXPECTED"))); + absl::StatusOr(absl::UnknownError("EXPECTED"))); return absl::UnknownError(absl::StrCat("ERROR: assigned value ", value4)); }; @@ -72,11 +72,12 @@ TEST(AssignOrReturn, AssignsMultipleVariablesInSequence) { TEST(AssignOrReturn, AssignsRepeatedlyToSingleVariable) { auto func = []() -> absl::Status { int value = 1; - SAPI_ASSIGN_OR_RETURN(value, StatusOr(2)); + SAPI_ASSIGN_OR_RETURN(value, absl::StatusOr(2)); EXPECT_EQ(2, value); - SAPI_ASSIGN_OR_RETURN(value, StatusOr(3)); + SAPI_ASSIGN_OR_RETURN(value, absl::StatusOr(3)); EXPECT_EQ(3, value); - SAPI_ASSIGN_OR_RETURN(value, StatusOr(absl::UnknownError("EXPECTED"))); + SAPI_ASSIGN_OR_RETURN(value, + absl::StatusOr(absl::UnknownError("EXPECTED"))); return absl::UnknownError("ERROR"); }; @@ -87,7 +88,7 @@ TEST(AssignOrReturn, MovesUniquePtr) { auto func = []() -> absl::Status { std::unique_ptr ptr; SAPI_ASSIGN_OR_RETURN( - ptr, StatusOr>(absl::make_unique(1))); + ptr, absl::StatusOr>(absl::make_unique(1))); EXPECT_EQ(*ptr, 1); return absl::UnknownError("EXPECTED"); }; @@ -98,8 +99,8 @@ TEST(AssignOrReturn, MovesUniquePtr) { TEST(AssignOrReturn, DoesNotAssignUniquePtrOnErrorStatus) { auto func = []() -> absl::Status { std::unique_ptr ptr; - SAPI_ASSIGN_OR_RETURN( - ptr, StatusOr>(absl::UnknownError("EXPECTED"))); + SAPI_ASSIGN_OR_RETURN(ptr, absl::StatusOr>( + absl::UnknownError("EXPECTED"))); EXPECT_EQ(ptr, nullptr); return absl::OkStatus(); }; @@ -111,10 +112,10 @@ TEST(AssignOrReturn, MovesUniquePtrRepeatedlyToSingleVariable) { auto func = []() -> absl::Status { std::unique_ptr ptr; SAPI_ASSIGN_OR_RETURN( - ptr, StatusOr>(absl::make_unique(1))); + ptr, absl::StatusOr>(absl::make_unique(1))); EXPECT_EQ(*ptr, 1); SAPI_ASSIGN_OR_RETURN( - ptr, StatusOr>(absl::make_unique(2))); + ptr, absl::StatusOr>(absl::make_unique(2))); EXPECT_EQ(*ptr, 2); return absl::UnknownError("EXPECTED"); }; diff --git a/sandboxed_api/util/status_matchers.h b/sandboxed_api/util/status_matchers.h index c7ed2e5..f181fc6 100644 --- a/sandboxed_api/util/status_matchers.h +++ b/sandboxed_api/util/status_matchers.h @@ -19,9 +19,9 @@ #include "gmock/gmock.h" #include "absl/status/status.h" +#include "absl/status/statusor.h" #include "absl/types/optional.h" #include "sandboxed_api/util/status_macros.h" -#include "sandboxed_api/util/statusor.h" #define SAPI_ASSERT_OK_AND_ASSIGN(lhs, rexpr) \ SAPI_ASSERT_OK_AND_ASSIGN_IMPL( \ diff --git a/sandboxed_api/util/statusor.h b/sandboxed_api/util/statusor.h index f1f5be3..51bd488 100644 --- a/sandboxed_api/util/statusor.h +++ b/sandboxed_api/util/statusor.h @@ -12,220 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -// This file and it's implementation provide a custom fork of -// util/task/statusor.h. This will become obsolete and will be replaced once -// Abseil releases absl::Status. - #ifndef THIRD_PARTY_SAPI_UTIL_STATUSOR_H_ #define THIRD_PARTY_SAPI_UTIL_STATUSOR_H_ -#include -#include - -#include "absl/base/internal/raw_logging.h" #include "absl/base/attributes.h" -#include "absl/base/log_severity.h" -#include "absl/status/status.h" -#include "absl/types/variant.h" -#include "sandboxed_api/util/raw_logging.h" +#include "absl/status/statusor.h" namespace sapi { template -class ABSL_MUST_USE_RESULT StatusOr { - template - friend class StatusOr; - - public: - using element_type = T; - - explicit StatusOr() : variant_(absl::UnknownError("")) {} - - StatusOr(const StatusOr&) = default; - StatusOr& operator=(const StatusOr&) = default; - - StatusOr(StatusOr&&) = default; - StatusOr& operator=(StatusOr&&) = default; - - // Not implemented: - // template StatusOr(const StatusOr& other) - // template StatusOr(StatusOr&& other) - - template - StatusOr& operator=(const StatusOr& other) { - if (other.ok()) { - variant_ = other.value(); - } else { - variant_ = other.status(); - } - return *this; - } - - template - StatusOr& operator=(StatusOr&& other) { - if (other.ok()) { - variant_ = std::move(other).value(); - } else { - variant_ = std::move(other).status(); - } - return *this; - } - - StatusOr(const T& value) : variant_(value) {} - - StatusOr(const absl::Status& status) : variant_(status) { EnsureNotOk(); } - - // Not implemented: - // template StatusOr& operator=(U&& value) - - StatusOr(T&& value) : variant_(std::move(value)) {} - - StatusOr(absl::Status&& value) : variant_(std::move(value)) {} - - StatusOr& operator=(absl::Status&& status) { - variant_ = std::move(status); - EnsureNotOk(); - } - - template - explicit StatusOr(absl::in_place_t, Args&&... args) - : StatusOr(T(std::forward(args)...)) {} - - template - explicit StatusOr(absl::in_place_t, std::initializer_list ilist, - Args&&... args) - : StatusOr(ilist, U(std::forward(args)...)) {} - - explicit operator bool() const { return ok(); } - - ABSL_MUST_USE_RESULT bool ok() const { - return absl::holds_alternative(variant_); - } - - const absl::Status& status() const& { - static const auto* ok_status = new absl::Status(); - return ok() ? *ok_status : absl::get(variant_); - } - - absl::Status status() && { - return ok() ? absl::OkStatus() - : std::move(absl::get(variant_)); - } - - const T& value() const& { - EnsureOk(); - return absl::get(variant_); - } - - T& value() & { - EnsureOk(); - return absl::get(variant_); - } - - const T&& value() const&& { - EnsureOk(); - return absl::get(std::move(variant_)); - } - - T&& value() && { - EnsureOk(); - return absl::get(std::move(variant_)); - } - - const T& ValueOrDie() const& { - EnsureOk(); - return absl::get(variant_); - } - - T& ValueOrDie() & { - EnsureOk(); - return absl::get(variant_); - } - - T&& ValueOrDie() && { - EnsureOk(); - return absl::get(std::move(variant_)); - } - - const T& operator*() const& { - EnsureOk(); - return absl::get(variant_); - } - - T& operator*() & { - EnsureOk(); - return absl::get(variant_); - } - - const T&& operator*() const&& { - EnsureOk(); - return absl::get(std::move(variant_)); - } - - T&& operator*() && { - EnsureOk(); - return absl::get(std::move(variant_)); - } - - const T* operator->() const { - EnsureOk(); - return &absl::get(variant_); - } - - T* operator->() { - EnsureOk(); - return &absl::get(variant_); - } - - template - T value_or(U&& default_value) const& { - if (ok()) { - return absl::get(variant_); - } - return std::forward(default_value); - } - - template - T value_or(U&& default_value) && { - if (ok()) { - return absl::get(std::move(variant_)); - } - return std::forward(default_value); - } - - void IgnoreError() const { /* no-op */ - } - - template - T& emplace(Args&&... args) { - return variant_.template emplace(std::forward(args)...); - } - - template - T& emplace(std::initializer_list ilist, Args&&... args) { - return variant_.template emplace(ilist, std::forward(args)...); - } - - private: - void EnsureOk() const { - if (!ok()) { - // GoogleTest needs this exact error message for death tests to work. - SAPI_RAW_LOG(FATAL, - "Attempting to fetch value instead of handling error %s", - status().message()); - } - } - - void EnsureNotOk() const { - if (ok()) { - SAPI_RAW_LOG( - FATAL, - "An OK status is not a valid constructor argument to StatusOr"); - } - } - - absl::variant variant_; -}; +using StatusOr ABSL_DEPRECATED("Use absl::StatusOr instead") = + absl::StatusOr; } // namespace sapi From 1869fe515ff110f5b3b12f72ff0e5d9afd04a314 Mon Sep 17 00:00:00 2001 From: doinachiroiu Date: Thu, 3 Sep 2020 14:59:54 +0000 Subject: [PATCH 36/42] Required changes resolved --- oss-internship-2020/pffft/CMakeLists.txt | 3 -- oss-internship-2020/pffft/README.md | 15 ++++---- .../pffft/main_pffft_sandboxed.cc | 36 ++++++++++--------- 3 files changed, 26 insertions(+), 28 deletions(-) diff --git a/oss-internship-2020/pffft/CMakeLists.txt b/oss-internship-2020/pffft/CMakeLists.txt index be687cd..b172009 100644 --- a/oss-internship-2020/pffft/CMakeLists.txt +++ b/oss-internship-2020/pffft/CMakeLists.txt @@ -19,8 +19,6 @@ project(pffft CXX C) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED True) -set(PFFFT_ROOT_DIR https://bitbucket.org/jpommier/pffft.git) - add_library(pffft STATIC master/pffft.c master/pffft.h @@ -45,7 +43,6 @@ endif() target_link_libraries(pffft PUBLIC ${MATH_LIBS}) - # Adding dependencies set(SAPI_ROOT "../.." CACHE PATH "Path to the Sandboxed API source tree") # Then configure: diff --git a/oss-internship-2020/pffft/README.md b/oss-internship-2020/pffft/README.md index 194c6b6..2e023c2 100644 --- a/oss-internship-2020/pffft/README.md +++ b/oss-internship-2020/pffft/README.md @@ -4,16 +4,13 @@ Build System: CMake OS: Linux ### Check out the PFFFT library & CMake set up -`git checkout -b master` - -`git submodule update --init --recursive` - -`mkdir -p build && cd build` - -`cmake .. -G Ninja -DPFFFT_ROOT_DIR=$PWD` - -`ninja` +``` +git submodule update --init --recursive +mkdir -p build && cd build +cmake .. -G Ninja -DPFFFT_ROOT_DIR=$PWD +ninjas +``` ### For testing: `cd build`, then `./pffft_sandboxed` diff --git a/oss-internship-2020/pffft/main_pffft_sandboxed.cc b/oss-internship-2020/pffft/main_pffft_sandboxed.cc index 51e271d..f922275 100644 --- a/oss-internship-2020/pffft/main_pffft_sandboxed.cc +++ b/oss-internship-2020/pffft/main_pffft_sandboxed.cc @@ -136,51 +136,55 @@ absl::Status PffftMain() { if (simd_size_iter == 0) simd_size_iter = 1; if (complex) { - api.cffti(n, work_array.PtrBoth()).IgnoreError(); + SAPI_RETURN_IF_ERROR(api.cffti(n, work_array.PtrBoth())) } else { - api.rffti(n, work_array.PtrBoth()).IgnoreError(); + SAPI_RETURN_IF_ERROR(api.rffti(n, work_array.PtrBoth())); } t0 = UclockSec(); for (int iter = 0; iter < simd_size_iter; ++iter) { if (complex) { - api.cfftf(n, x_array.PtrBoth(), work_array.PtrBoth()).IgnoreError(); - api.cfftb(n, x_array.PtrBoth(), work_array.PtrBoth()).IgnoreError(); + SAPI_RETURN_IF_ERROR( + api.cfftf(n, x_array.PtrBoth(), work_array.PtrBoth())); + SAPI_RETURN_IF_ERROR( + api.cfftb(n, x_array.PtrBoth(), work_array.PtrBoth())); } else { - api.rfftf(n, x_array.PtrBoth(), work_array.PtrBoth()).IgnoreError(); - api.rfftb(n, x_array.PtrBoth(), work_array.PtrBoth()).IgnoreError(); + SAPI_RETURN_IF_ERROR( + api.rfftf(n, x_array.PtrBoth(), work_array.PtrBoth())); + SAPI_RETURN_IF_ERROR( + api.rfftb(n, x_array.PtrBoth(), work_array.PtrBoth())); } } t1 = UclockSec(); flops = (simd_size_iter * 2) * - ((complex ? 5 : 2.5) * n * log((double)n) / M_LN2); + ((complex ? 5 : 2.5) * static_cast(n) * log(static_cast(n)) / M_LN2); ShowOutput("FFTPack", n, complex, flops, t0, t1, simd_size_iter); } // PFFFT benchmark { SAPI_ASSIGN_OR_RETURN( - PFFFT_Setup *s, + PFFFT_Setup * s, api.pffft_new_setup(n, complex ? PFFFT_COMPLEX : PFFFT_REAL)); sapi::v::RemotePtr s_reg(s); t0 = UclockSec(); for (int iter = 0; iter < max_iter; ++iter) { - api.pffft_transform(&s_reg, x_array.PtrBoth(), z_array.PtrBoth(), - y_array.PtrBoth(), PFFFT_FORWARD) - .IgnoreError(); - api.pffft_transform(&s_reg, x_array.PtrBoth(), z_array.PtrBoth(), - y_array.PtrBoth(), PFFFT_FORWARD) - .IgnoreError(); + SAPI_RETURN_IF_ERROR( + api.pffft_transform(&s_reg, x_array.PtrBoth(), z_array.PtrBoth(), + y_array.PtrBoth(), PFFFT_FORWARD)); + SAPI_RETURN_IF_ERROR( + api.pffft_transform(&s_reg, x_array.PtrBoth(), z_array.PtrBoth(), + y_array.PtrBoth(), PFFFT_FORWARD)); } t1 = UclockSec(); - api.pffft_destroy_setup(&s_reg).IgnoreError(); + SAPI_RETURN_IF_ERROR(api.pffft_destroy_setup(&s_reg)); flops = (max_iter * 2) * ((complex ? 5 : 2.5) * static_cast(n) * - log((double)n) / M_LN2); + log(static_cast(n)) / M_LN2); ShowOutput("PFFFT", n, complex, flops, t0, t1, max_iter); LOG(INFO) << "n = " << n << " SUCCESSFULLY"; From 776e34502a3e58dac0c61f7cf7f9a328bd63d658 Mon Sep 17 00:00:00 2001 From: Sandboxed API Team Date: Tue, 8 Sep 2020 12:23:59 -0700 Subject: [PATCH 37/42] Internal cleanup migrating StatusOr. PiperOrigin-RevId: 330561315 Change-Id: Ie8d8857e7fa5819be3358b26425790ede97c99f8 --- sandboxed_api/tools/clang_generator/types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sandboxed_api/tools/clang_generator/types.h b/sandboxed_api/tools/clang_generator/types.h index b4d5946..7947df3 100644 --- a/sandboxed_api/tools/clang_generator/types.h +++ b/sandboxed_api/tools/clang_generator/types.h @@ -68,7 +68,7 @@ std::string MapQualTypeParameter(const clang::ASTContext& context, // Maps a qualified type used as a function return type to a type name // compatible with the generated Sandboxed API. Uses MapQualTypeParameter() and -// wraps the type in a absl::StatusOr<> if qual is non-void. Otherwise returns +// wraps the type in an absl::StatusOr<> if qual is non-void. Otherwise returns // absl::Status. std::string MapQualTypeReturn(const clang::ASTContext& context, clang::QualType qual); From 1f8e88586b46fea3252264369147564164b46cb4 Mon Sep 17 00:00:00 2001 From: Kevin Hamacher Date: Wed, 9 Sep 2020 02:12:05 -0700 Subject: [PATCH 38/42] Log details when executor fails to open the sandboxee binary PiperOrigin-RevId: 330680717 Change-Id: I4ec855861196177321783dc94f2e05a28e84d512 --- sandboxed_api/sandbox2/executor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sandboxed_api/sandbox2/executor.cc b/sandboxed_api/sandbox2/executor.cc index 6170a9d..f2afb3a 100644 --- a/sandboxed_api/sandbox2/executor.cc +++ b/sandboxed_api/sandbox2/executor.cc @@ -86,7 +86,7 @@ pid_t Executor::StartSubProcess(int32_t clone_flags, const Namespace* ns, if (!path_.empty()) { exec_fd_ = open(path_.c_str(), O_PATH); if (exec_fd_ < 0) { - LOG(ERROR) << "Could not open file " << path_; + PLOG(ERROR) << "Could not open file " << path_; return -1; } } From 6a1e4b881c2c24f65e74f5c71b7a11b2632b8278 Mon Sep 17 00:00:00 2001 From: Christian Blichmann Date: Thu, 10 Sep 2020 05:47:32 -0700 Subject: [PATCH 39/42] Introduce config header to centralize CPU architecture checks This allows us to remove some uses of macros. Related changes: - Make it clear that we support hosting sandboxed binaries from 64-bit processes only. CPU architectures are x86-64 and POWER64 (little endian). - Introduced CPU architecture macros, abstracting away compiler specifics PiperOrigin-RevId: 330918134 Change-Id: Ife7ad5f14723eec9f68055127b0583b8aecd38dd --- sandboxed_api/sandbox2/BUILD.bazel | 19 +++++ sandboxed_api/sandbox2/CMakeLists.txt | 21 +++++ sandboxed_api/sandbox2/buffer_test.cc | 7 +- sandboxed_api/sandbox2/config.h | 83 +++++++++++++++++++ sandboxed_api/sandbox2/limits_test.cc | 1 + sandboxed_api/sandbox2/monitor.cc | 3 +- sandboxed_api/sandbox2/mounts.cc | 21 +++-- sandboxed_api/sandbox2/namespace_test.cc | 1 + .../sandbox2/network_proxy/BUILD.bazel | 1 + .../sandbox2/network_proxy/CMakeLists.txt | 1 + .../sandbox2/network_proxy/client.cc | 12 +-- sandboxed_api/sandbox2/policy.cc | 16 ++-- sandboxed_api/sandbox2/policy_test.cc | 9 +- sandboxed_api/sandbox2/policybuilder.cc | 22 ++--- sandboxed_api/sandbox2/policybuilder.h | 6 +- sandboxed_api/sandbox2/regs.cc | 69 ++++++++------- sandboxed_api/sandbox2/regs.h | 13 ++- sandboxed_api/sandbox2/result.h | 5 +- sandboxed_api/sandbox2/sandbox2_test.cc | 1 + sandboxed_api/sandbox2/syscall.cc | 47 +++++------ sandboxed_api/sandbox2/syscall.h | 32 +++---- sandboxed_api/sandbox2/syscall_defs.cc | 23 ++--- sandboxed_api/sandbox2/syscall_defs.h | 3 +- sandboxed_api/sandbox2/syscall_test.cc | 5 +- sandboxed_api/sandbox2/testcases/BUILD.bazel | 1 + .../sandbox2/testcases/CMakeLists.txt | 1 + sandboxed_api/sandbox2/testcases/policy.cc | 10 ++- sandboxed_api/sandbox2/util.cc | 14 ++-- 28 files changed, 282 insertions(+), 165 deletions(-) create mode 100644 sandboxed_api/sandbox2/config.h diff --git a/sandboxed_api/sandbox2/BUILD.bazel b/sandboxed_api/sandbox2/BUILD.bazel index 2bf7542..839a7e0 100644 --- a/sandboxed_api/sandbox2/BUILD.bazel +++ b/sandboxed_api/sandbox2/BUILD.bazel @@ -26,6 +26,13 @@ licenses(["notice"]) # Apache 2.0 exports_files(["testdata/hostname"]) +cc_library( + name = "config", + hdrs = ["config.h"], + copts = sapi_platform_copts(), + deps = ["@com_google_absl//absl/base:config"], +) + cc_library( name = "bpfdisassembler", srcs = ["bpfdisassembler.cc"], @@ -40,6 +47,7 @@ cc_library( hdrs = ["regs.h"], copts = sapi_platform_copts(), deps = [ + ":config", ":syscall", ":violation_cc_proto", "//sandboxed_api/sandbox2/util:strerror", @@ -60,6 +68,7 @@ cc_library( copts = sapi_platform_copts(), visibility = ["//visibility:public"], deps = [ + ":config", ":util", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", @@ -73,6 +82,7 @@ cc_test( srcs = ["syscall_test.cc"], copts = sapi_platform_copts(), deps = [ + ":config", ":syscall", "@com_google_absl//absl/strings", "@com_google_googletest//:gtest_main", @@ -85,6 +95,7 @@ cc_library( hdrs = ["result.h"], copts = sapi_platform_copts(), deps = [ + ":config", ":regs", ":syscall", ":util", @@ -272,6 +283,7 @@ cc_library( deps = [ ":client", ":comms", + ":config", ":executor", ":fork_client", ":forkserver_cc_proto", @@ -404,6 +416,7 @@ cc_library( hdrs = ["mounts.h"], copts = sapi_platform_copts(), deps = [ + ":config", ":mounttree_cc_proto", "//sandboxed_api/sandbox2/util:file_base", "//sandboxed_api/sandbox2/util:fileops", @@ -468,6 +481,7 @@ cc_test( ], deps = [ ":comms", + ":config", ":namespace", ":sandbox2", ":testing", @@ -505,6 +519,7 @@ cc_library( copts = sapi_platform_copts(), visibility = ["//visibility:public"], deps = [ + ":config", "//sandboxed_api/sandbox2/util:file_base", "//sandboxed_api/sandbox2/util:fileops", "//sandboxed_api/sandbox2/util:strerror", @@ -541,6 +556,7 @@ cc_test( deps = [ ":buffer", ":comms", + ":config", ":sandbox2", ":testing", "//sandboxed_api/util:status_matchers", @@ -629,6 +645,7 @@ cc_test( copts = sapi_platform_copts(), data = ["//sandboxed_api/sandbox2/testcases:limits"], deps = [ + ":config", ":limits", ":sandbox2", ":testing", @@ -671,6 +688,7 @@ cc_test( "//sandboxed_api/sandbox2/testcases:policy", ], deps = [ + ":config", ":limits", ":regs", ":sandbox2", @@ -695,6 +713,7 @@ cc_test( ], tags = ["local"], deps = [ + ":config", ":sandbox2", ":testing", "//sandboxed_api/sandbox2/util:bpf_helper", diff --git a/sandboxed_api/sandbox2/CMakeLists.txt b/sandboxed_api/sandbox2/CMakeLists.txt index e70356f..e6a746b 100644 --- a/sandboxed_api/sandbox2/CMakeLists.txt +++ b/sandboxed_api/sandbox2/CMakeLists.txt @@ -17,6 +17,16 @@ add_subdirectory(unwind) add_subdirectory(util) add_subdirectory(network_proxy) +# sandboxed_api/sandbox2:config +add_library(sandbox2_config STATIC + config.h +) +add_library(sandbox2::config ALIAS sandbox2_config) +target_link_libraries(sandbox2_config PRIVATE + absl::config + sapi::base +) + # sandboxed_api/sandbox2:bpfdisassembler add_library(sandbox2_bpfdisassembler STATIC bpfdisassembler.cc @@ -37,6 +47,7 @@ add_library(sandbox2::regs ALIAS sandbox2_regs) target_link_libraries(sandbox2_regs PRIVATE absl::core_headers absl::strings + sandbox2::config sandbox2::strerror sandbox2::syscall sandbox2::violation_proto @@ -72,6 +83,7 @@ target_link_libraries(sandbox2_result PRIVATE absl::base absl::memory absl::strings + sandbox2::config sandbox2::regs sandbox2::syscall sandbox2::util @@ -280,6 +292,7 @@ target_link_libraries(sandbox2_sandbox2 sandbox2::bpf_helper sandbox2::client sandbox2::comms + sandbox2::config sandbox2::executor sandbox2::file_base sandbox2::fileops @@ -403,6 +416,7 @@ target_link_libraries(sandbox2_mounts PRIVATE absl::str_format absl::strings protobuf::libprotobuf + sandbox2::config sandbox2::file_base sandbox2::fileops sandbox2::minielf @@ -460,6 +474,7 @@ target_link_libraries(sandbox2_util PRIVATE absl::core_headers absl::str_format absl::strings + sandbox2::config sandbox2::file_base sandbox2::fileops sandbox2::strerror @@ -570,6 +585,7 @@ if(SAPI_ENABLE_TESTS) ) target_link_libraries(syscall_test PRIVATE absl::strings + sandbox2::config sandbox2::syscall sapi::test_main ) @@ -608,6 +624,7 @@ if(SAPI_ENABLE_TESTS) absl::memory absl::strings sandbox2::comms + sandbox2::config sandbox2::fileops sandbox2::namespace sandbox2::sandbox2 @@ -632,6 +649,7 @@ if(SAPI_ENABLE_TESTS) absl::memory sandbox2::buffer sandbox2::comms + sandbox2::config sandbox2::sandbox2 sandbox2::testing sapi::status_matchers @@ -706,6 +724,7 @@ if(SAPI_ENABLE_TESTS) target_link_libraries(limits_test PRIVATE absl::memory sandbox2::bpf_helper + sandbox2::config sandbox2::limits sandbox2::sandbox2 sandbox2::testing @@ -755,6 +774,7 @@ if(SAPI_ENABLE_TESTS) absl::memory absl::strings sandbox2::bpf_helper + sandbox2::config sandbox2::limits sandbox2::regs sandbox2::sandbox2 @@ -780,6 +800,7 @@ if(SAPI_ENABLE_TESTS) absl::memory absl::strings sandbox2::bpf_helper + sandbox2::config sandbox2::sandbox2 sandbox2::testing sapi::status_matchers diff --git a/sandboxed_api/sandbox2/buffer_test.cc b/sandboxed_api/sandbox2/buffer_test.cc index 78d8cf9..876be31 100644 --- a/sandboxed_api/sandbox2/buffer_test.cc +++ b/sandboxed_api/sandbox2/buffer_test.cc @@ -29,6 +29,7 @@ #include "gtest/gtest.h" #include "absl/memory/memory.h" #include "sandboxed_api/sandbox2/comms.h" +#include "sandboxed_api/sandbox2/config.h" #include "sandboxed_api/sandbox2/executor.h" #include "sandboxed_api/sandbox2/ipc.h" #include "sandboxed_api/sandbox2/policy.h" @@ -90,12 +91,6 @@ std::unique_ptr BufferTestcasePolicy() { .BlockSyscallWithErrno(__NR_access, ENOENT) .BuildOrDie(); -#if defined(__powerpc64__) - - s2p->AllowUnsafeMmapFiles(); - s2p->AllowUnsafeMmapShared(); -#endif /* defined(__powerpc64__) */ - return s2p; } diff --git a/sandboxed_api/sandbox2/config.h b/sandboxed_api/sandbox2/config.h new file mode 100644 index 0000000..ba0fec5 --- /dev/null +++ b/sandboxed_api/sandbox2/config.h @@ -0,0 +1,83 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SANDBOXED_API_SANDBOX2_CONFIG_H_ +#define SANDBOXED_API_SANDBOX2_CONFIG_H_ + +#include + +#include "absl/base/config.h" + +// GCC/Clang define __x86_64__, Visual Studio uses _M_X64 +#if defined(__x86_64__) || defined(_M_X64) +#define SAPI_X86_64 1 + +// Check various spellings for 64-bit POWER. Not checking for Visual Studio, as +// it does not support 64-bit POWER. +#elif (defined(__PPC64__) || defined(__powerpc64__) || defined(__ppc64__)) && \ + defined(ABSL_IS_LITTLE_ENDIAN) +#define SAPI_PPC64_LE 1 + +// Spellings for AArch64 +#elif defined(__aarch64__) || defined(_M_ARM64) +#define SAPI_ARM64 1 +#endif + +namespace sandbox2 { + +namespace cpu { + +// CPU architectures known to Sandbox2 +enum Architecture : uint16_t { + // Linux: Use a magic value, so it can be easily spotted in the seccomp-bpf + // bytecode decompilation stream. Must be < (1<<15), as/ that's the size of + // data which can be returned by BPF. + kUnknown = 0xCAF0, + kX8664, + kX86, + kPPC64LE, + kArm64, +}; + +} // namespace cpu + +namespace host_cpu { + +// Returns the current host CPU architecture if supported. If not supported, +// returns cpu::kUnknown. +constexpr cpu::Architecture Architecture() { +#if defined(SAPI_X86_64) + return cpu::kX8664; +#elif defined(SAPI_PPC64_LE) + return cpu::kPPC64LE; +#else + return cpu::kUnknown; +#endif +} + +constexpr bool IsX8664() { return Architecture() == cpu::kX8664; } + +constexpr bool IsPPC64LE() { return Architecture() == cpu::kPPC64LE; } + +constexpr bool IsArm64() { return Architecture() == cpu::kArm64; } + +} // namespace host_cpu + +static_assert(host_cpu::Architecture() != cpu::kUnknown, + "Host CPU architecture is not supported: One of x86-64 or " + "POWER64 (little endian) is required."); + +} // namespace sandbox2 + +#endif // SANDBOXED_API_SANDBOX2_CONFIG_H_ diff --git a/sandboxed_api/sandbox2/limits_test.cc b/sandboxed_api/sandbox2/limits_test.cc index 1f3386a..35d9bba 100644 --- a/sandboxed_api/sandbox2/limits_test.cc +++ b/sandboxed_api/sandbox2/limits_test.cc @@ -23,6 +23,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/memory/memory.h" +#include "sandboxed_api/sandbox2/config.h" #include "sandboxed_api/sandbox2/executor.h" #include "sandboxed_api/sandbox2/policy.h" #include "sandboxed_api/sandbox2/policybuilder.h" diff --git a/sandboxed_api/sandbox2/monitor.cc b/sandboxed_api/sandbox2/monitor.cc index 10e0710..d15c1af 100644 --- a/sandboxed_api/sandbox2/monitor.cc +++ b/sandboxed_api/sandbox2/monitor.cc @@ -49,6 +49,7 @@ #include "absl/time/time.h" #include "sandboxed_api/sandbox2/client.h" #include "sandboxed_api/sandbox2/comms.h" +#include "sandboxed_api/sandbox2/config.h" #include "sandboxed_api/sandbox2/executor.h" #include "sandboxed_api/sandbox2/limits.h" #include "sandboxed_api/sandbox2/mounts.h" @@ -761,7 +762,7 @@ void Monitor::LogSyscallViolation(const Syscall& syscall) const { void Monitor::EventPtraceSeccomp(pid_t pid, int event_msg) { // If the seccomp-policy is using RET_TRACE, we request that it returns the // syscall architecture identifier in the SECCOMP_RET_DATA. - const auto syscall_arch = static_cast(event_msg); + const auto syscall_arch = static_cast(event_msg); Regs regs(pid); auto status = regs.Fetch(); if (!status.ok()) { diff --git a/sandboxed_api/sandbox2/mounts.cc b/sandboxed_api/sandbox2/mounts.cc index 7e800ea..d722e6f 100644 --- a/sandboxed_api/sandbox2/mounts.cc +++ b/sandboxed_api/sandbox2/mounts.cc @@ -34,6 +34,7 @@ #include "absl/strings/str_join.h" #include "absl/strings/str_split.h" #include "absl/strings/string_view.h" +#include "sandboxed_api/sandbox2/config.h" #include "sandboxed_api/sandbox2/util/fileops.h" #include "sandboxed_api/sandbox2/util/minielf.h" #include "sandboxed_api/sandbox2/util/path.h" @@ -132,15 +133,19 @@ std::string ResolveLibraryPath(absl::string_view lib_name, return ""; } +constexpr absl::string_view GetPlatformCPUName() { + switch (host_cpu::Architecture()) { + case cpu::kX8664: + return "x86_64"; + case cpu::kPPC64LE: + return "ppc64"; + default: + return "unknown"; + } +} + std::string GetPlatform(absl::string_view interpreter) { -#if defined(__x86_64__) - constexpr absl::string_view kCpuPlatform = "x86_64"; -#elif defined(__powerpc64__) - constexpr absl::string_view kCpuPlatform = "ppc64"; -#else - constexpr absl::string_view kCpuPlatform = "unknown"; -#endif - return absl::StrCat(kCpuPlatform, "-linux-gnu"); + return absl::StrCat(GetPlatformCPUName(), "-linux-gnu"); } } // namespace diff --git a/sandboxed_api/sandbox2/namespace_test.cc b/sandboxed_api/sandbox2/namespace_test.cc index 89da97b..2bedf16 100644 --- a/sandboxed_api/sandbox2/namespace_test.cc +++ b/sandboxed_api/sandbox2/namespace_test.cc @@ -28,6 +28,7 @@ #include "absl/strings/numbers.h" #include "absl/strings/str_cat.h" #include "sandboxed_api/sandbox2/comms.h" +#include "sandboxed_api/sandbox2/config.h" #include "sandboxed_api/sandbox2/executor.h" #include "sandboxed_api/sandbox2/policy.h" #include "sandboxed_api/sandbox2/policybuilder.h" diff --git a/sandboxed_api/sandbox2/network_proxy/BUILD.bazel b/sandboxed_api/sandbox2/network_proxy/BUILD.bazel index f099939..ac58976 100644 --- a/sandboxed_api/sandbox2/network_proxy/BUILD.bazel +++ b/sandboxed_api/sandbox2/network_proxy/BUILD.bazel @@ -44,6 +44,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ "//sandboxed_api/sandbox2:comms", + "//sandboxed_api/sandbox2:config", "//sandboxed_api/sandbox2/util:strerror", "//sandboxed_api/util:status", "@com_google_absl//absl/memory", diff --git a/sandboxed_api/sandbox2/network_proxy/CMakeLists.txt b/sandboxed_api/sandbox2/network_proxy/CMakeLists.txt index c23d917..781cb9d 100644 --- a/sandboxed_api/sandbox2/network_proxy/CMakeLists.txt +++ b/sandboxed_api/sandbox2/network_proxy/CMakeLists.txt @@ -54,6 +54,7 @@ target_link_libraries(sandbox2_network_proxy_client PRIVATE absl::synchronization glog::glog sandbox2::comms + sandbox2::config sandbox2::strerror sapi::base sapi::status diff --git a/sandboxed_api/sandbox2/network_proxy/client.cc b/sandboxed_api/sandbox2/network_proxy/client.cc index e528041..51b1260 100644 --- a/sandboxed_api/sandbox2/network_proxy/client.cc +++ b/sandboxed_api/sandbox2/network_proxy/client.cc @@ -27,6 +27,7 @@ #include "absl/memory/memory.h" #include "absl/status/status.h" #include "absl/strings/str_cat.h" +#include "sandboxed_api/sandbox2/config.h" #include "sandboxed_api/sandbox2/util/strerror.h" #include "sandboxed_api/util/status_macros.h" @@ -36,14 +37,13 @@ namespace sandbox2 { constexpr int SYS_SECCOMP = 1; #endif -#if defined(__x86_64__) +#if defined(SAPI_X86_64) constexpr int kRegResult = REG_RAX; constexpr int kRegSyscall = REG_RAX; constexpr int kRegArg0 = REG_RDI; constexpr int kRegArg1 = REG_RSI; constexpr int kRegArg2 = REG_RDX; -#endif -#if defined(__powerpc64__) +#elif defined(SAPI_PPC64_LE) constexpr int kRegResult = 3; constexpr int kRegSyscall = 0; constexpr int kRegArg0 = 3; @@ -161,9 +161,9 @@ void NetworkProxyHandler::ProcessSeccompTrap(int nr, siginfo_t* info, } if (!ctx) return; -#if defined(__x86_64__) +#if defined(SAPI_X86_64) auto* registers = ctx->uc_mcontext.gregs; -#elif defined(__powerpc64__) +#elif defined(SAPI_PPC64_LE) auto* registers = ctx->uc_mcontext.gp_regs; using ppc_gpreg_t = std::decay::type; #endif @@ -178,7 +178,7 @@ void NetworkProxyHandler::ProcessSeccompTrap(int nr, siginfo_t* info, sockfd = static_cast(registers[kRegArg0]); addr = reinterpret_cast(registers[kRegArg1]); addrlen = static_cast(registers[kRegArg2]); -#if defined(__powerpc64__) +#if defined(SAPI_PPC64_LE) } else if (syscall == __NR_socketcall && static_cast(registers[kRegArg0]) == SYS_CONNECT) { ppc_gpreg_t* args = reinterpret_cast(registers[kRegArg1]); diff --git a/sandboxed_api/sandbox2/policy.cc b/sandboxed_api/sandbox2/policy.cc index e631c0a..4e3fe03 100644 --- a/sandboxed_api/sandbox2/policy.cc +++ b/sandboxed_api/sandbox2/policy.cc @@ -85,10 +85,10 @@ std::vector Policy::GetDefaultPolicy() const { // If compiled arch is different than the runtime one, inform the Monitor. LOAD_ARCH, JEQ32(Syscall::GetHostAuditArch(), JUMP(&l, past_arch_check_l)), - JEQ32(AUDIT_ARCH_X86_64, TRACE(Syscall::kX86_64)), - JEQ32(AUDIT_ARCH_I386, TRACE(Syscall::kX86_32)), - JEQ32(AUDIT_ARCH_PPC64LE, TRACE(Syscall::kPPC_64)), - TRACE(Syscall::kUnknown), + JEQ32(AUDIT_ARCH_X86_64, TRACE(cpu::kX8664)), + JEQ32(AUDIT_ARCH_I386, TRACE(cpu::kX86)), + JEQ32(AUDIT_ARCH_PPC64LE, TRACE(cpu::kPPC64LE)), + TRACE(cpu::kUnknown), LABEL(&l, past_arch_check_l), // After the policy is uploaded, forkserver will execve the sandboxee. We @@ -130,10 +130,10 @@ std::vector Policy::GetDefaultPolicy() const { std::vector Policy::GetTrackingPolicy() const { return { LOAD_ARCH, - JEQ32(AUDIT_ARCH_X86_64, TRACE(Syscall::kX86_64)), - JEQ32(AUDIT_ARCH_I386, TRACE(Syscall::kX86_32)), - JEQ32(AUDIT_ARCH_PPC64LE, TRACE(Syscall::kPPC_64)), - TRACE(Syscall::kUnknown), + JEQ32(AUDIT_ARCH_X86_64, TRACE(cpu::kX8664)), + JEQ32(AUDIT_ARCH_I386, TRACE(cpu::kX86)), + JEQ32(AUDIT_ARCH_PPC64LE, TRACE(cpu::kPPC64LE)), + TRACE(cpu::kUnknown), }; } diff --git a/sandboxed_api/sandbox2/policy_test.cc b/sandboxed_api/sandbox2/policy_test.cc index 809ecd9..226b450 100644 --- a/sandboxed_api/sandbox2/policy_test.cc +++ b/sandboxed_api/sandbox2/policy_test.cc @@ -25,6 +25,7 @@ #include "gtest/gtest.h" #include "absl/memory/memory.h" #include "absl/strings/string_view.h" +#include "sandboxed_api/sandbox2/config.h" #include "sandboxed_api/sandbox2/executor.h" #include "sandboxed_api/sandbox2/limits.h" #include "sandboxed_api/sandbox2/policybuilder.h" @@ -56,7 +57,7 @@ std::unique_ptr PolicyTestcasePolicy() { .BuildOrDie(); } -#if defined(__x86_64__) +#ifdef SAPI_X86_64 // Test that 32-bit syscalls from 64-bit are disallowed. TEST(PolicyTest, AMD64Syscall32PolicyAllowed) { SKIP_SANITIZERS_AND_COVERAGE; @@ -72,7 +73,7 @@ TEST(PolicyTest, AMD64Syscall32PolicyAllowed) { ASSERT_THAT(result.final_status(), Eq(Result::VIOLATION)); EXPECT_THAT(result.reason_code(), Eq(1)); // __NR_exit in 32-bit - EXPECT_THAT(result.GetSyscallArch(), Eq(Syscall::kX86_32)); + EXPECT_THAT(result.GetSyscallArch(), Eq(cpu::kX86)); } // Test that 32-bit syscalls from 64-bit for FS checks are disallowed. @@ -90,9 +91,9 @@ TEST(PolicyTest, AMD64Syscall32FsAllowed) { ASSERT_THAT(result.final_status(), Eq(Result::VIOLATION)); EXPECT_THAT(result.reason_code(), Eq(33)); // __NR_access in 32-bit - EXPECT_THAT(result.GetSyscallArch(), Eq(Syscall::kX86_32)); + EXPECT_THAT(result.GetSyscallArch(), Eq(cpu::kX86)); } -#endif // defined(__x86_64__) +#endif // Test that ptrace(2) is disallowed. TEST(PolicyTest, PtraceDisallowed) { diff --git a/sandboxed_api/sandbox2/policybuilder.cc b/sandboxed_api/sandbox2/policybuilder.cc index dbae2d9..a560190 100644 --- a/sandboxed_api/sandbox2/policybuilder.cc +++ b/sandboxed_api/sandbox2/policybuilder.cc @@ -15,14 +15,7 @@ #include "sandboxed_api/sandbox2/policybuilder.h" #include // For TCGETS - -#if defined(__x86_64__) -#include -#endif -#if defined(__powerpc64__) -#include // On PPC, TCGETS macro needs termios -#endif -#include // For the fcntl flags +#include // For the fcntl flags #include #include // For SYS_CONNECT #include // For GRND_NONBLOCK @@ -38,11 +31,18 @@ #include "absl/status/statusor.h" #include "absl/strings/escaping.h" #include "absl/strings/match.h" +#include "sandboxed_api/sandbox2/config.h" #include "sandboxed_api/sandbox2/namespace.h" #include "sandboxed_api/sandbox2/util/bpf_helper.h" #include "sandboxed_api/sandbox2/util/path.h" #include "sandboxed_api/util/status_macros.h" +#if defined(SAPI_X86_64) +#include +#elif defined(SAPI_PPC64_LE) +#include // On PPC, TCGETS macro needs termios +#endif + namespace sandbox2 { namespace { @@ -512,7 +512,7 @@ PolicyBuilder& PolicyBuilder::AllowStaticStartup() { JEQ32(SIG_UNBLOCK, ALLOW), }); -#if defined(__x86_64__) +#ifdef SAPI_X86_64 // The second argument is a pointer. AddPolicyOnSyscall(__NR_arch_prctl, { ARG_32(0), @@ -901,7 +901,7 @@ PolicyBuilder& PolicyBuilder::AddNetworkProxyPolicy() { LABEL(&labels, getsockopt_end), }; }); -#if defined(__powerpc64__) +#ifdef SAPI_PPC64_LE AddPolicyOnSyscall(__NR_socketcall, { ARG_32(0), JEQ32(SYS_SOCKET, ALLOW), @@ -927,7 +927,7 @@ PolicyBuilder& PolicyBuilder::AddNetworkProxyHandlerPolicy() { }); AddPolicyOnSyscall(__NR_connect, {TRAP(0)}); -#if defined(__powerpc64__) +#ifdef SAPI_PPC64_LE AddPolicyOnSyscall(__NR_socketcall, { ARG_32(0), JEQ32(SYS_CONNECT, TRAP(0)), diff --git a/sandboxed_api/sandbox2/policybuilder.h b/sandboxed_api/sandbox2/policybuilder.h index 17e95ff..5a5fd97 100644 --- a/sandboxed_api/sandbox2/policybuilder.h +++ b/sandboxed_api/sandbox2/policybuilder.h @@ -39,8 +39,6 @@ struct bpf_labels; namespace sandbox2 { -constexpr char kDefaultHostname[] = "sandbox2"; - // PolicyBuilder is a helper class to simplify creation of policies. The builder // uses fluent interface for convenience and increased readability of policies. // @@ -91,6 +89,8 @@ constexpr char kDefaultHostname[] = "sandbox2"; // For a more complicated example, see examples/persistent/persistent_sandbox.cc class PolicyBuilder final { public: + static constexpr absl::string_view kDefaultHostname = "sandbox2"; + using BpfInitializer = std::initializer_list; using BpfFunc = const std::function(bpf_labels&)>&; using SyscallInitializer = std::initializer_list; @@ -542,7 +542,7 @@ class PolicyBuilder final { bool use_namespaces_ = true; bool requires_namespaces_ = false; bool allow_unrestricted_networking_ = false; - std::string hostname_ = kDefaultHostname; + std::string hostname_ = std::string(kDefaultHostname); bool collect_stacktrace_on_violation_ = true; bool collect_stacktrace_on_signal_ = true; diff --git a/sandboxed_api/sandbox2/regs.cc b/sandboxed_api/sandbox2/regs.cc index 6dfcd14..07f8e87 100644 --- a/sandboxed_api/sandbox2/regs.cc +++ b/sandboxed_api/sandbox2/regs.cc @@ -24,64 +24,69 @@ #include #include "absl/strings/str_cat.h" +#include "sandboxed_api/sandbox2/config.h" #include "sandboxed_api/sandbox2/util/strerror.h" namespace sandbox2 { absl::Status Regs::Fetch() { -#if defined(__powerpc64__) - iovec pt_iov = {&user_regs_, sizeof(user_regs_)}; - - if (ptrace(PTRACE_GETREGSET, pid_, NT_PRSTATUS, &pt_iov) == -1L) { - return absl::InternalError(absl::StrCat( - "ptrace(PTRACE_GETREGSET, pid=", pid_, ") failed: ", StrError(errno))); - } - if (pt_iov.iov_len != sizeof(user_regs_)) { - return absl::InternalError(absl::StrCat( - "ptrace(PTRACE_GETREGSET, pid=", pid_, - ") size returned: ", pt_iov.iov_len, - " different than sizeof(user_regs_): ", sizeof(user_regs_))); - } -#else +#ifdef SAPI_X86_64 if (ptrace(PTRACE_GETREGS, pid_, 0, &user_regs_) == -1L) { return absl::InternalError(absl::StrCat("ptrace(PTRACE_GETREGS, pid=", pid_, ") failed: ", StrError(errno))); } #endif + if constexpr (host_cpu::IsPPC64LE()) { + iovec pt_iov = {&user_regs_, sizeof(user_regs_)}; + + if (ptrace(PTRACE_GETREGSET, pid_, NT_PRSTATUS, &pt_iov) == -1L) { + return absl::InternalError( + absl::StrCat("ptrace(PTRACE_GETREGSET, pid=", pid_, + ") failed: ", StrError(errno))); + } + if (pt_iov.iov_len != sizeof(user_regs_)) { + return absl::InternalError(absl::StrCat( + "ptrace(PTRACE_GETREGSET, pid=", pid_, + ") size returned: ", pt_iov.iov_len, + " different than sizeof(user_regs_): ", sizeof(user_regs_))); + } + } return absl::OkStatus(); } absl::Status Regs::Store() { -#if defined(__powerpc64__) - iovec pt_iov = {&user_regs_, sizeof(user_regs_)}; - - if (ptrace(PTRACE_SETREGSET, pid_, NT_PRSTATUS, &pt_iov) == -1L) { - return absl::InternalError(absl::StrCat( - "ptrace(PTRACE_SETREGSET, pid=", pid_, ") failed: ", StrError(errno))); - } -#else +#ifdef SAPI_X86_64 if (ptrace(PTRACE_SETREGS, pid_, 0, &user_regs_) == -1) { return absl::InternalError(absl::StrCat("ptrace(PTRACE_SETREGS, pid=", pid_, ") failed: ", StrError(errno))); } #endif + if constexpr (host_cpu::IsPPC64LE()) { + iovec pt_iov = {&user_regs_, sizeof(user_regs_)}; + + if (ptrace(PTRACE_SETREGSET, pid_, NT_PRSTATUS, &pt_iov) == -1L) { + return absl::InternalError( + absl::StrCat("ptrace(PTRACE_SETREGSET, pid=", pid_, + ") failed: ", StrError(errno))); + } + } return absl::OkStatus(); } absl::Status Regs::SkipSyscallReturnValue(uint64_t value) { -#if defined(__x86_64__) +#if defined(SAPI_X86_64) user_regs_.orig_rax = -1; user_regs_.rax = value; -#elif defined(__powerpc64__) +#elif defined(SAPI_PPC64_LE) user_regs_.gpr[0] = -1; user_regs_.gpr[3] = value; #endif return Store(); } -Syscall Regs::ToSyscall(Syscall::CpuArch syscall_arch) const { -#if defined(__x86_64__) - if (ABSL_PREDICT_TRUE(syscall_arch == Syscall::kX86_64)) { +Syscall Regs::ToSyscall(cpu::Architecture syscall_arch) const { +#if defined(SAPI_X86_64) + if (ABSL_PREDICT_TRUE(syscall_arch == cpu::kX8664)) { auto syscall = user_regs_.orig_rax; Syscall::Args args = {user_regs_.rdi, user_regs_.rsi, user_regs_.rdx, user_regs_.r10, user_regs_.r8, user_regs_.r9}; @@ -89,7 +94,7 @@ Syscall Regs::ToSyscall(Syscall::CpuArch syscall_arch) const { auto ip = user_regs_.rip; return Syscall(syscall_arch, syscall, args, pid_, sp, ip); } - if (syscall_arch == Syscall::kX86_32) { + if (syscall_arch == cpu::kX86) { auto syscall = user_regs_.orig_rax & 0xFFFFFFFF; Syscall::Args args = { user_regs_.rbx & 0xFFFFFFFF, user_regs_.rcx & 0xFFFFFFFF, @@ -99,8 +104,8 @@ Syscall Regs::ToSyscall(Syscall::CpuArch syscall_arch) const { auto ip = user_regs_.rip & 0xFFFFFFFF; return Syscall(syscall_arch, syscall, args, pid_, sp, ip); } -#elif defined(__powerpc64__) - if (ABSL_PREDICT_TRUE(syscall_arch == Syscall::kPPC_64)) { +#elif defined(SAPI_PPC64_LE) + if (ABSL_PREDICT_TRUE(syscall_arch == cpu::kPPC64LE)) { auto syscall = user_regs_.gpr[0]; Syscall::Args args = {user_regs_.orig_gpr3, user_regs_.gpr[4], user_regs_.gpr[5], user_regs_.gpr[6], @@ -114,7 +119,7 @@ Syscall Regs::ToSyscall(Syscall::CpuArch syscall_arch) const { } void Regs::StoreRegisterValuesInProtobuf(RegisterValues* values) const { -#if defined(__x86_64__) +#if defined(SAPI_X86_64) RegisterX8664* regs = values->mutable_register_x86_64(); regs->set_r15(user_regs_.r15); regs->set_r14(user_regs_.r14); @@ -143,7 +148,7 @@ void Regs::StoreRegisterValuesInProtobuf(RegisterValues* values) const { regs->set_es(user_regs_.es); regs->set_fs(user_regs_.fs); regs->set_gs(user_regs_.gs); -#elif defined(__powerpc64__) +#elif defined(SAPI_PPC64_LE) RegisterPowerpc64* regs = values->mutable_register_powerpc64(); for (int i = 0; i < ABSL_ARRAYSIZE(user_regs_.gpr); ++i) { regs->add_gpr(user_regs_.gpr[i]); diff --git a/sandboxed_api/sandbox2/regs.h b/sandboxed_api/sandbox2/regs.h index f1149a9..661c8b6 100644 --- a/sandboxed_api/sandbox2/regs.h +++ b/sandboxed_api/sandbox2/regs.h @@ -24,6 +24,7 @@ #include #include "absl/status/status.h" +#include "sandboxed_api/sandbox2/config.h" #include "sandboxed_api/sandbox2/syscall.h" #include "sandboxed_api/sandbox2/violation.pb.h" @@ -33,10 +34,6 @@ namespace sandbox2 { // assumes the process is already attached. class Regs { public: -#if !defined(__x86_64__) && !defined(__powerpc64__) - static_assert(false, "No support for the current CPU architecture"); -#endif - explicit Regs(pid_t pid) : pid_(pid) {} // Copies register values from the process @@ -49,7 +46,7 @@ class Regs { absl::Status SkipSyscallReturnValue(uint64_t value); // Converts raw register values obtained on syscall entry to syscall info - Syscall ToSyscall(Syscall::CpuArch syscall_arch) const; + Syscall ToSyscall(cpu::Architecture syscall_arch) const; pid_t pid() const { return pid_; } @@ -60,7 +57,7 @@ class Regs { friend class StackTracePeer; struct PtraceRegisters { -#if defined(__x86_64__) +#if defined(SAPI_X86_64) uint64_t r15; uint64_t r14; uint64_t r13; @@ -88,7 +85,7 @@ class Regs { uint64_t es; uint64_t fs; uint64_t gs; -#elif defined(__powerpc64__) +#elif defined(SAPI_PPC64_LE) uint64_t gpr[32]; uint64_t nip; uint64_t msr; @@ -108,6 +105,8 @@ class Regs { uint64_t zero1; uint64_t zero2; uint64_t zero3; +#else + static_assert(false, "Host CPU architecture not supported, see config.h"); #endif }; diff --git a/sandboxed_api/sandbox2/result.h b/sandboxed_api/sandbox2/result.h index f381ccb..d2d9c5a 100644 --- a/sandboxed_api/sandbox2/result.h +++ b/sandboxed_api/sandbox2/result.h @@ -28,6 +28,7 @@ #include "absl/memory/memory.h" #include "absl/status/status.h" +#include "sandboxed_api/sandbox2/config.h" #include "sandboxed_api/sandbox2/regs.h" #include "sandboxed_api/sandbox2/syscall.h" @@ -131,8 +132,8 @@ class Result { // Returns the current syscall architecture. // Client architecture when final_status_ == VIOLATION, might be different // from the host architecture (32-bit vs 64-bit syscalls). - Syscall::CpuArch GetSyscallArch() const { - return syscall_ ? syscall_->arch() : Syscall::kUnknown; + cpu::Architecture GetSyscallArch() const { + return syscall_ ? syscall_->arch() : cpu::kUnknown; } const std::vector stack_trace() { return stack_trace_; } diff --git a/sandboxed_api/sandbox2/sandbox2_test.cc b/sandboxed_api/sandbox2/sandbox2_test.cc index edf3d6c..df94c1f 100644 --- a/sandboxed_api/sandbox2/sandbox2_test.cc +++ b/sandboxed_api/sandbox2/sandbox2_test.cc @@ -26,6 +26,7 @@ #include "gtest/gtest.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" +#include "sandboxed_api/sandbox2/config.h" #include "sandboxed_api/sandbox2/executor.h" #include "sandboxed_api/sandbox2/policy.h" #include "sandboxed_api/sandbox2/policybuilder.h" diff --git a/sandboxed_api/sandbox2/syscall.cc b/sandboxed_api/sandbox2/syscall.cc index 2b3ffad..7e06717 100644 --- a/sandboxed_api/sandbox2/syscall.cc +++ b/sandboxed_api/sandbox2/syscall.cc @@ -12,12 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Implementation of the sandbox2::Syscall class. - #include "sandboxed_api/sandbox2/syscall.h" #include #include + #include #include #include @@ -26,6 +25,7 @@ #include #include "absl/strings/str_format.h" #include "absl/strings/str_join.h" +#include "sandboxed_api/sandbox2/config.h" #include "sandboxed_api/sandbox2/syscall_defs.h" #ifndef AUDIT_ARCH_PPC64LE @@ -34,13 +34,13 @@ namespace sandbox2 { -std::string Syscall::GetArchDescription(CpuArch arch) { +std::string Syscall::GetArchDescription(cpu::Architecture arch) { switch (arch) { - case kX86_64: + case cpu::kX8664: return "[X86-64]"; - case kX86_32: + case cpu::kX86: return "[X86-32]"; - case kPPC_64: + case cpu::kPPC64LE: return "[PPC-64]"; default: LOG(ERROR) << "Unknown CPU architecture: " << arch; @@ -48,32 +48,25 @@ std::string Syscall::GetArchDescription(CpuArch arch) { } } -Syscall::CpuArch Syscall::GetHostArch() { -#if defined(__x86_64__) - return kX86_64; -#elif defined(__i386__) - return kX86_32; -#elif defined(__powerpc64__) - return kPPC_64; -#endif -} - uint32_t Syscall::GetHostAuditArch() { -#if defined(__x86_64__) - return AUDIT_ARCH_X86_64; -#elif defined(__i386__) - return AUDIT_ARCH_I386; -#elif defined(__powerpc64__) - return AUDIT_ARCH_PPC64LE; -#endif + switch (host_cpu::Architecture()) { + case cpu::kX8664: + return AUDIT_ARCH_X86_64; + case cpu::kPPC64LE: + return AUDIT_ARCH_PPC64LE; + default: + // The static_assert() in config.h should prevent us from ever getting + // here. + return 0; // Not reached + } } std::string Syscall::GetName() const { - absl::string_view name = SyscallTable::get(arch_).GetName(nr_); - if (name.empty()) { - return absl::StrFormat("UNKNOWN[%d/0x%x]", nr_, nr_); + if (absl::string_view name = SyscallTable::get(arch_).GetName(nr_); + !name.empty()) { + return std::string(name); } - return std::string(name); + return absl::StrFormat("UNKNOWN[%d/0x%x]", nr_, nr_); } std::vector Syscall::GetArgumentsDescription() const { diff --git a/sandboxed_api/sandbox2/syscall.h b/sandboxed_api/sandbox2/syscall.h index 50f8dbf..132a505 100644 --- a/sandboxed_api/sandbox2/syscall.h +++ b/sandboxed_api/sandbox2/syscall.h @@ -13,7 +13,7 @@ // limitations under the License. // The sandbox2::Syscalls class defines mostly static helper methods which -// are used to analyze status of the ptraced process +// are used to analyze the status of the sandboxed process. #ifndef SANDBOXED_API_SANDBOX2_SYSCALL_H__ #define SANDBOXED_API_SANDBOX2_SYSCALL_H__ @@ -26,40 +26,34 @@ #include #include +#include "sandboxed_api/sandbox2/config.h" + namespace sandbox2 { class Syscall { public: - // Supported CPU architectures. - // Linux: Use a magic value, so it can be easily spotted in the seccomp-bpf - // bytecode decompilation stream. Must be < (1<<15), as/ that's the size of - // data which can be returned by BPF. - enum CpuArch { - kUnknown = 0xCAF0, - kX86_64, - kX86_32, - kPPC_64, - }; // Maximum number of syscall arguments static constexpr size_t kMaxArgs = 6; using Args = std::array; // Returns the host architecture, according to CpuArch. - static CpuArch GetHostArch(); + static constexpr cpu::Architecture GetHostArch() { + return host_cpu::Architecture(); + } // Returns the host architecture, according to . static uint32_t GetHostAuditArch(); // Returns a description of the architecture. - static std::string GetArchDescription(CpuArch arch); + static std::string GetArchDescription(cpu::Architecture arch); Syscall() = default; - Syscall(CpuArch arch, uint64_t nr, Args args = {}) + Syscall(cpu::Architecture arch, uint64_t nr, Args args = {}) : arch_(arch), nr_(nr), args_(args) {} pid_t pid() const { return pid_; } uint64_t nr() const { return nr_; } - CpuArch arch() const { return arch_; } + cpu::Architecture arch() const { return arch_; } const Args& args() const { return args_; } uint64_t stack_pointer() const { return sp_; } uint64_t instruction_pointer() const { return ip_; } @@ -72,12 +66,12 @@ class Syscall { private: friend class Regs; - Syscall(pid_t pid) : pid_(pid) {} - Syscall(CpuArch arch, uint64_t nr, Args args, pid_t pid, uint64_t sp, - uint64_t ip) + explicit Syscall(pid_t pid) : pid_(pid) {} + Syscall(cpu::Architecture arch, uint64_t nr, Args args, pid_t pid, + uint64_t sp, uint64_t ip) : arch_(arch), nr_(nr), args_(args), pid_(pid), sp_(sp), ip_(ip) {} - CpuArch arch_ = kUnknown; + cpu::Architecture arch_ = cpu::kUnknown; uint64_t nr_ = -1; Args args_ = {}; pid_t pid_ = -1; diff --git a/sandboxed_api/sandbox2/syscall_defs.cc b/sandboxed_api/sandbox2/syscall_defs.cc index 1a8c94f..410d054 100644 --- a/sandboxed_api/sandbox2/syscall_defs.cc +++ b/sandboxed_api/sandbox2/syscall_defs.cc @@ -7,6 +7,7 @@ #include "absl/strings/escaping.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" +#include "sandboxed_api/sandbox2/config.h" #include "sandboxed_api/sandbox2/util.h" namespace sandbox2 { @@ -128,7 +129,6 @@ std::vector SyscallTable::GetArgumentsDescription( #define SYSCALLS_UNUSED00_99(prefix) \ SYSCALLS_UNUSED00_49(prefix), SYSCALLS_UNUSED50_99(prefix) -#if defined(__x86_64__) // Syscall description table for Linux x86_64 constexpr SyscallTable::Entry kSyscallDataX8664[] = { MakeEntry("read", kInt, kHex, kInt), // 0 @@ -824,12 +824,10 @@ constexpr SyscallTable::Entry kSyscallDataX8632[] = { MakeEntry("bpf", kHex, kHex, kHex, kHex, kHex, kHex), // 357 }; -#elif defined(__powerpc64__) - // http://lxr.free-electrons.com/source/arch/powerpc/include/uapi/asm/unistd.h // Note: PPC64 syscalls can have up to 7 register arguments, but nobody is // using the 7th argument - probably for x64 compatibility reasons. -constexpr SyscallTable::Entry kSyscallDataPPC64[] = { +constexpr SyscallTable::Entry kSyscallDataPPC64LE[] = { MakeEntry("restart_syscall", kGen, kGen, kGen, kGen, kGen, kGen), // 0 MakeEntry("exit", kInt, kGen, kGen, kGen, kGen, kGen), // 1 MakeEntry("fork", kGen, kGen, kGen, kGen, kGen, kGen), // 2 @@ -1218,25 +1216,20 @@ constexpr SyscallTable::Entry kSyscallDataPPC64[] = { MakeEntry("pwritev2", kHex, kHex, kHex, kHex, kHex, kHex), // 381 }; -#endif - #undef SYSCALLS_UNUSED00_99 #undef SYSCALLS_UNUSED50_99 #undef SYSCALLS_UNUSED00_49 #undef SYSCALLS_UNUSED0_9 #undef SYSCALLS_UNUSED -SyscallTable SyscallTable::get(Syscall::CpuArch arch) { - switch (arch) { -#if defined(__x86_64__) - case Syscall::kX86_64: +SyscallTable SyscallTable::get(cpu::Architecture arch) { + switch (host_cpu::Architecture()) { + case cpu::kX8664: return SyscallTable(kSyscallDataX8664); - case Syscall::kX86_32: + case cpu::kX86: return SyscallTable(kSyscallDataX8632); -#elif defined(__powerpc64__) - case Syscall::kPPC_64: - return SyscallTable(kSyscallDataPPC64); -#endif + case cpu::kPPC64LE: + return SyscallTable(kSyscallDataPPC64LE); default: return SyscallTable(); } diff --git a/sandboxed_api/sandbox2/syscall_defs.h b/sandboxed_api/sandbox2/syscall_defs.h index bef3d19..168ed1d 100644 --- a/sandboxed_api/sandbox2/syscall_defs.h +++ b/sandboxed_api/sandbox2/syscall_defs.h @@ -9,6 +9,7 @@ #include "absl/strings/string_view.h" #include "absl/types/span.h" +#include "sandboxed_api/sandbox2/config.h" #include "sandboxed_api/sandbox2/syscall.h" namespace sandbox2 { @@ -23,7 +24,7 @@ class SyscallTable { struct Entry; // Returns the syscall table for the architecture. - static SyscallTable get(Syscall::CpuArch arch); + static SyscallTable get(cpu::Architecture arch); int size() { return data_.size(); } diff --git a/sandboxed_api/sandbox2/syscall_test.cc b/sandboxed_api/sandbox2/syscall_test.cc index 1cb7228..2d13257 100644 --- a/sandboxed_api/sandbox2/syscall_test.cc +++ b/sandboxed_api/sandbox2/syscall_test.cc @@ -19,6 +19,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/strings/str_cat.h" +#include "sandboxed_api/sandbox2/config.h" using ::testing::Eq; using ::testing::StartsWith; @@ -45,7 +46,7 @@ TEST(SyscallTest, Basic) { EXPECT_THAT(arg_desc[2], Eq("0x5 [5]")); EXPECT_THAT( syscall.GetDescription(), - Eq(absl::StrCat(Syscall::GetArchDescription(Syscall::GetHostArch()), + Eq(absl::StrCat(Syscall::GetArchDescription(host_cpu::Architecture()), " read [", __NR_read, "](0x1 [1], 0xbadbeef, 0x5 [5]) IP: 0, STACK: 0"))); } @@ -53,7 +54,7 @@ TEST(SyscallTest, Basic) { TEST(SyscallTest, Empty) { Syscall syscall; - EXPECT_THAT(syscall.arch(), Eq(Syscall::kUnknown)); + EXPECT_THAT(syscall.arch(), Eq(cpu::kUnknown)); EXPECT_THAT(syscall.GetName(), StartsWith("UNKNOWN")); EXPECT_THAT(syscall.GetArgumentsDescription().size(), Eq(Syscall::kMaxArgs)); } diff --git a/sandboxed_api/sandbox2/testcases/BUILD.bazel b/sandboxed_api/sandbox2/testcases/BUILD.bazel index 1983e4d..307db47 100644 --- a/sandboxed_api/sandbox2/testcases/BUILD.bazel +++ b/sandboxed_api/sandbox2/testcases/BUILD.bazel @@ -178,6 +178,7 @@ cc_binary( "fully_static_link", # link libc statically ], linkstatic = 1, # prefer static libraries + deps = ["//sandboxed_api/sandbox2:config"], ) # security: disable=cc-static-no-pie diff --git a/sandboxed_api/sandbox2/testcases/CMakeLists.txt b/sandboxed_api/sandbox2/testcases/CMakeLists.txt index 4af43ba..c57ffc5 100644 --- a/sandboxed_api/sandbox2/testcases/CMakeLists.txt +++ b/sandboxed_api/sandbox2/testcases/CMakeLists.txt @@ -159,6 +159,7 @@ set_target_properties(policy PROPERTIES ) target_link_libraries(policy PRIVATE sapi::base + sandbox2::config ${_sandbox2_fully_static_linkopts} ) diff --git a/sandboxed_api/sandbox2/testcases/policy.cc b/sandboxed_api/sandbox2/testcases/policy.cc index e1f851e..ae598cf 100644 --- a/sandboxed_api/sandbox2/testcases/policy.cc +++ b/sandboxed_api/sandbox2/testcases/policy.cc @@ -23,7 +23,9 @@ #include #include -#if defined(__x86_64__) +#include "sandboxed_api/sandbox2/config.h" + +#ifdef SAPI_X86_64 void TestAMD64SyscallMismatch() { int64_t result; @@ -53,7 +55,7 @@ void TestAMD64SyscallMismatchFs() { : "rax", "rbx", "rcx"); exit(-result); } -#endif // defined(__x86_64__) +#endif void TestPtrace() { ptrace(PTRACE_SEIZE, getppid(), 0, 0); @@ -97,14 +99,14 @@ int main(int argc, char** argv) { int testno = atoi(argv[1]); // NOLINT switch (testno) { -#if defined(__x86_64__) +#ifdef SAPI_X86_64 case 1: TestAMD64SyscallMismatch(); break; case 2: TestAMD64SyscallMismatchFs(); break; -#endif // defined(__x86_64__) +#endif case 3: TestPtrace(); break; diff --git a/sandboxed_api/sandbox2/util.cc b/sandboxed_api/sandbox2/util.cc index 55f7aa6..3e97abc 100644 --- a/sandboxed_api/sandbox2/util.cc +++ b/sandboxed_api/sandbox2/util.cc @@ -39,13 +39,13 @@ #include "absl/strings/str_format.h" #include "absl/strings/string_view.h" #include "absl/strings/strip.h" +#include "sandboxed_api/sandbox2/config.h" #include "sandboxed_api/sandbox2/util/fileops.h" #include "sandboxed_api/sandbox2/util/path.h" #include "sandboxed_api/sandbox2/util/strerror.h" #include "sandboxed_api/util/raw_logging.h" -namespace sandbox2 { -namespace util { +namespace sandbox2::util { void CharPtrArrToVecString(char* const* arr, std::vector* vec) { for (int i = 0; arr[i]; ++i) { @@ -126,13 +126,10 @@ ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS ABSL_ATTRIBUTE_NOINLINE pid_t CloneAndJump(int flags, jmp_buf* env_ptr) { uint8_t stack_buf[PTHREAD_STACK_MIN] ABSL_CACHELINE_ALIGNED; -#if defined(__x86_64__) || defined(__x86__) || defined(__i386__) || \ - defined(__powerpc64__) + static_assert(host_cpu::IsX8664() || host_cpu::IsPPC64LE(), + "Host CPU architecture not supported, see config.h"); // Stack grows down. void* stack = stack_buf + sizeof(stack_buf); -#else -#error "Architecture is not supported" -#endif int r; { r = clone(&ChildFunc, stack, flags, env_ptr, nullptr, nullptr, nullptr); @@ -321,5 +318,4 @@ absl::StatusOr ReadCPathFromPid(pid_t pid, uintptr_t ptr) { return path; } -} // namespace util -} // namespace sandbox2 +} // namespace sandbox2::util From c19949eb7b66d4ec872014dd7baccac5c7942c84 Mon Sep 17 00:00:00 2001 From: Christian Blichmann Date: Fri, 11 Sep 2020 03:13:46 -0700 Subject: [PATCH 40/42] Use inclusive language PiperOrigin-RevId: 331116936 Change-Id: I7084b24440a1c78c0d70030da900330f0b8d954f --- sandboxed_api/sandbox2/comms.cc | 2 +- sandboxed_api/sandbox2/mounts.cc | 2 +- sandboxed_api/sandbox2/violation.proto | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sandboxed_api/sandbox2/comms.cc b/sandboxed_api/sandbox2/comms.cc index 8a357b6..a4845c8 100644 --- a/sandboxed_api/sandbox2/comms.cc +++ b/sandboxed_api/sandbox2/comms.cc @@ -347,7 +347,7 @@ bool Comms::RecvFD(int* fd) { const auto op = [&msg](int fd) -> ssize_t { PotentiallyBlockingRegion region; - // Use syscall, otherwise we would need to whitelist socketcall() on PPC. + // Use syscall, otherwise we would need to allow socketcall() on PPC. return TEMP_FAILURE_RETRY( util::Syscall(__NR_recvmsg, fd, reinterpret_cast(&msg), 0)); }; diff --git a/sandboxed_api/sandbox2/mounts.cc b/sandboxed_api/sandbox2/mounts.cc index d722e6f..24afdc6 100644 --- a/sandboxed_api/sandbox2/mounts.cc +++ b/sandboxed_api/sandbox2/mounts.cc @@ -501,7 +501,7 @@ std::string MountFlagsToString(uint64_t flags) { SAPI_MAP(MS_POSIXACL), SAPI_MAP(MS_UNBINDABLE), SAPI_MAP(MS_PRIVATE), - SAPI_MAP(MS_SLAVE), + SAPI_MAP(MS_SLAVE), // Inclusive language: system constant SAPI_MAP(MS_SHARED), SAPI_MAP(MS_RELATIME), SAPI_MAP(MS_KERNMOUNT), diff --git a/sandboxed_api/sandbox2/violation.proto b/sandboxed_api/sandbox2/violation.proto index acd8a6f..57a3b3d 100644 --- a/sandboxed_api/sandbox2/violation.proto +++ b/sandboxed_api/sandbox2/violation.proto @@ -105,10 +105,10 @@ message SyscallDescription { } message FsDescription { - repeated string file_whitelist = 1; - repeated string symlink_whitelist = 2; + repeated string file_allowlist = 1; + repeated string symlink_allowlist = 2; repeated string file_greylist = 3; - repeated string file_blacklist = 4; + repeated string file_denylist = 4; } message PolicyBuilderDescription { @@ -125,7 +125,7 @@ message NamespaceDescription { message PolicyDescription { bytes user_bpf_policy = 1; reserved 2 to 5; - // This requires additional fields. (e.g. whitelisted syscall #s) + // This requires additional fields. (e.g. allowed syscall numbers) PolicyBuilderDescription policy_builder_description = 6; // namespace From 21f7373e76ff909ea6e1a87a2f3aa0d7344095eb Mon Sep 17 00:00:00 2001 From: Christian Blichmann Date: Fri, 11 Sep 2020 06:33:57 -0700 Subject: [PATCH 41/42] Initial changes to support AArch64 This is a work in progress: - Syscall tables need work - Only tested on real hardware using one of our test hosts As a drive-by, this change also enables the open source version to function on POWER. Another side-effect of this change is that the default policies no longer check for different host architectures at runtime. On x86_64, we do not need to check for PPC or AArch64 specifice and vice versa. PiperOrigin-RevId: 331137472 Change-Id: Ic6d6be5cbe61d83dbe13d5a0be036871754b2eb8 --- sandboxed_api/sandbox.cc | 2 + sandboxed_api/sandbox2/buffer_test.cc | 7 + sandboxed_api/sandbox2/config.h | 6 +- .../examples/static/static_sandbox.cc | 2 + sandboxed_api/sandbox2/mounts.cc | 4 + .../sandbox2/network_proxy/client.cc | 27 +- sandboxed_api/sandbox2/notify_test.cc | 4 + sandboxed_api/sandbox2/policy.cc | 79 ++--- sandboxed_api/sandbox2/policy_test.cc | 17 ++ sandboxed_api/sandbox2/policybuilder.cc | 7 + sandboxed_api/sandbox2/policybuilder_test.cc | 2 +- sandboxed_api/sandbox2/regs.cc | 66 +++- sandboxed_api/sandbox2/regs.h | 8 + sandboxed_api/sandbox2/stack_trace.cc | 4 + sandboxed_api/sandbox2/syscall.cc | 4 + sandboxed_api/sandbox2/syscall_defs.cc | 287 ++++++++++++++++++ sandboxed_api/sandbox2/util.cc | 5 +- sandboxed_api/sandbox2/violation.proto | 4 +- 18 files changed, 477 insertions(+), 58 deletions(-) diff --git a/sandboxed_api/sandbox.cc b/sandboxed_api/sandbox.cc index b18a1b0..8c9da3d 100644 --- a/sandboxed_api/sandbox.cc +++ b/sandboxed_api/sandbox.cc @@ -84,7 +84,9 @@ void InitDefaultPolicyBuilder(sandbox2::PolicyBuilder* builder) { __NR_kill, __NR_tgkill, __NR_tkill, +#ifdef __NR_readlink __NR_readlink, +#endif #ifdef __NR_arch_prctl // x86-64 only __NR_arch_prctl, #endif diff --git a/sandboxed_api/sandbox2/buffer_test.cc b/sandboxed_api/sandbox2/buffer_test.cc index 876be31..2b72a8b 100644 --- a/sandboxed_api/sandbox2/buffer_test.cc +++ b/sandboxed_api/sandbox2/buffer_test.cc @@ -84,11 +84,18 @@ std::unique_ptr BufferTestcasePolicy() { .AllowSyscall(__NR_lseek) .AllowSyscall(__NR_close) .BlockSyscallWithErrno(__NR_prlimit64, EPERM) +#ifdef __NR_open .BlockSyscallWithErrno(__NR_open, ENOENT) +#endif .BlockSyscallWithErrno(__NR_openat, ENOENT) +#ifdef __NR_access // On Debian, even static binaries check existence of // /etc/ld.so.nohwcap. .BlockSyscallWithErrno(__NR_access, ENOENT) +#endif +#ifdef __NR_faccessat + .BlockSyscallWithErrno(__NR_faccessat, ENOENT) +#endif .BuildOrDie(); return s2p; diff --git a/sandboxed_api/sandbox2/config.h b/sandboxed_api/sandbox2/config.h index ba0fec5..209cd82 100644 --- a/sandboxed_api/sandbox2/config.h +++ b/sandboxed_api/sandbox2/config.h @@ -61,6 +61,8 @@ constexpr cpu::Architecture Architecture() { return cpu::kX8664; #elif defined(SAPI_PPC64_LE) return cpu::kPPC64LE; +#elif defined(SAPI_ARM64) + return cpu::kArm64; #else return cpu::kUnknown; #endif @@ -75,8 +77,8 @@ constexpr bool IsArm64() { return Architecture() == cpu::kArm64; } } // namespace host_cpu static_assert(host_cpu::Architecture() != cpu::kUnknown, - "Host CPU architecture is not supported: One of x86-64 or " - "POWER64 (little endian) is required."); + "Host CPU architecture is not supported: One of x86-64, POWER64 " + "(little endian) or AArch64 is required."); } // namespace sandbox2 diff --git a/sandboxed_api/sandbox2/examples/static/static_sandbox.cc b/sandboxed_api/sandbox2/examples/static/static_sandbox.cc index 44f794c..7290b72 100644 --- a/sandboxed_api/sandbox2/examples/static/static_sandbox.cc +++ b/sandboxed_api/sandbox2/examples/static/static_sandbox.cc @@ -52,8 +52,10 @@ std::unique_ptr GetPolicy() { // Allow the getpid() syscall. .AllowSyscall(__NR_getpid) +#ifdef __NR_access // On Debian, even static binaries check existence of /etc/ld.so.nohwcap. .BlockSyscallWithErrno(__NR_access, ENOENT) +#endif // Examples for AddPolicyOnSyscall: .AddPolicyOnSyscall(__NR_write, diff --git a/sandboxed_api/sandbox2/mounts.cc b/sandboxed_api/sandbox2/mounts.cc index 24afdc6..4c71d52 100644 --- a/sandboxed_api/sandbox2/mounts.cc +++ b/sandboxed_api/sandbox2/mounts.cc @@ -113,6 +113,8 @@ absl::StatusOr ExistingPathInsideDir( absl::Status ValidateInterpreter(absl::string_view interpreter) { const absl::flat_hash_set allowed_interpreters = { "/lib64/ld-linux-x86-64.so.2", + "/lib64/ld64.so.2", // PPC64 + "/lib/ld-linux-aarch64.so.1", // AArch64 }; if (!allowed_interpreters.contains(interpreter)) { @@ -139,6 +141,8 @@ constexpr absl::string_view GetPlatformCPUName() { return "x86_64"; case cpu::kPPC64LE: return "ppc64"; + case cpu::kArm64: + return "aarch64"; default: return "unknown"; } diff --git a/sandboxed_api/sandbox2/network_proxy/client.cc b/sandboxed_api/sandbox2/network_proxy/client.cc index 51b1260..3dde8c1 100644 --- a/sandboxed_api/sandbox2/network_proxy/client.cc +++ b/sandboxed_api/sandbox2/network_proxy/client.cc @@ -49,10 +49,14 @@ constexpr int kRegSyscall = 0; constexpr int kRegArg0 = 3; constexpr int kRegArg1 = 4; constexpr int kRegArg2 = 5; +#elif defined(SAPI_ARM64) +constexpr int kRegResult = 0; +constexpr int kRegSyscall = 8; +constexpr int kRegArg0 = 0; +constexpr int kRegArg1 = 1; +constexpr int kRegArg2 = 2; #endif -constexpr char NetworkProxyClient::kFDName[]; - int NetworkProxyClient::ConnectHandler(int sockfd, const struct sockaddr* addr, socklen_t addrlen) { absl::Status status = Connect(sockfd, addr, addrlen); @@ -154,20 +158,22 @@ void NetworkProxyHandler::InvokeOldAct(int nr, siginfo_t* info, void NetworkProxyHandler::ProcessSeccompTrap(int nr, siginfo_t* info, void* void_context) { - ucontext_t* ctx = (ucontext_t*)(void_context); if (info->si_code != SYS_SECCOMP) { InvokeOldAct(nr, info, void_context); return; } - if (!ctx) return; + auto* ctx = static_cast(void_context); + if (!ctx) { + return; + } #if defined(SAPI_X86_64) auto* registers = ctx->uc_mcontext.gregs; #elif defined(SAPI_PPC64_LE) auto* registers = ctx->uc_mcontext.gp_regs; - using ppc_gpreg_t = std::decay::type; +#elif defined(SAPI_ARM64) + auto* registers = ctx->uc_mcontext.regs; #endif - int syscall = registers[kRegSyscall]; int sockfd; @@ -181,11 +187,10 @@ void NetworkProxyHandler::ProcessSeccompTrap(int nr, siginfo_t* info, #if defined(SAPI_PPC64_LE) } else if (syscall == __NR_socketcall && static_cast(registers[kRegArg0]) == SYS_CONNECT) { - ppc_gpreg_t* args = reinterpret_cast(registers[kRegArg1]); - - sockfd = static_cast(args[0]); - addr = reinterpret_cast(args[1]); - addrlen = static_cast(args[2]); + auto* connect_args = reinterpret_cast(registers[kRegArg1]); + sockfd = static_cast(connect_args[0]); + addr = reinterpret_cast(connect_args[1]); + addrlen = static_cast(connect_args[2]); #endif } else { InvokeOldAct(nr, info, void_context); diff --git a/sandboxed_api/sandbox2/notify_test.cc b/sandboxed_api/sandbox2/notify_test.cc index 77fc67f..8e0bc38 100644 --- a/sandboxed_api/sandbox2/notify_test.cc +++ b/sandboxed_api/sandbox2/notify_test.cc @@ -49,8 +49,12 @@ std::unique_ptr NotifyTestcasePolicy() { .AllowWrite() .AllowSyscall(__NR_close) .AddPolicyOnSyscall(__NR_personality, {SANDBOX2_TRACE}) +#ifdef __NR_open .BlockSyscallWithErrno(__NR_open, ENOENT) +#endif +#ifdef __NR_access .BlockSyscallWithErrno(__NR_access, ENOENT) +#endif .BlockSyscallWithErrno(__NR_openat, ENOENT) .BlockSyscallWithErrno(__NR_prlimit64, EPERM) .BuildOrDie(); diff --git a/sandboxed_api/sandbox2/policy.cc b/sandboxed_api/sandbox2/policy.cc index 4e3fe03..398fbfe 100644 --- a/sandboxed_api/sandbox2/policy.cc +++ b/sandboxed_api/sandbox2/policy.cc @@ -82,41 +82,41 @@ std::vector Policy::GetDefaultPolicy() const { bpf_labels l = {0}; std::vector policy = { - // If compiled arch is different than the runtime one, inform the Monitor. - LOAD_ARCH, - JEQ32(Syscall::GetHostAuditArch(), JUMP(&l, past_arch_check_l)), - JEQ32(AUDIT_ARCH_X86_64, TRACE(cpu::kX8664)), - JEQ32(AUDIT_ARCH_I386, TRACE(cpu::kX86)), - JEQ32(AUDIT_ARCH_PPC64LE, TRACE(cpu::kPPC64LE)), - TRACE(cpu::kUnknown), - LABEL(&l, past_arch_check_l), + // If compiled arch is different than the runtime one, inform the Monitor. + LOAD_ARCH, + JEQ32(Syscall::GetHostAuditArch(), JUMP(&l, past_arch_check_l)), +#if defined(SAPI_X86_64) + JEQ32(AUDIT_ARCH_I386, TRACE(cpu::kX86)), // 32-bit sandboxee +#endif + TRACE(cpu::kUnknown), + LABEL(&l, past_arch_check_l), - // After the policy is uploaded, forkserver will execve the sandboxee. We - // need to allow this execve but not others. Since BPF does not have - // state, we need to inform the Monitor to decide, and for that we use a - // magic value in syscall args 5. Note that this value is not supposed to - // be secret, but just an optimization so that the monitor is not - // triggered on every call to execveat. - LOAD_SYSCALL_NR, - JNE32(__NR_execveat, JUMP(&l, past_execveat_l)), - ARG_32(4), - JNE32(AT_EMPTY_PATH, JUMP(&l, past_execveat_l)), - ARG_32(5), - JNE32(internal::kExecveMagic, JUMP(&l, past_execveat_l)), - SANDBOX2_TRACE, - LABEL(&l, past_execveat_l), + // After the policy is uploaded, forkserver will execve the sandboxee. We + // need to allow this execve but not others. Since BPF does not have + // state, we need to inform the Monitor to decide, and for that we use a + // magic value in syscall args 5. Note that this value is not supposed to + // be secret, but just an optimization so that the monitor is not + // triggered on every call to execveat. + LOAD_SYSCALL_NR, + JNE32(__NR_execveat, JUMP(&l, past_execveat_l)), + ARG_32(4), + JNE32(AT_EMPTY_PATH, JUMP(&l, past_execveat_l)), + ARG_32(5), + JNE32(internal::kExecveMagic, JUMP(&l, past_execveat_l)), + SANDBOX2_TRACE, + LABEL(&l, past_execveat_l), - // Forbid some syscalls because unsafe or too risky. - LOAD_SYSCALL_NR, - JEQ32(__NR_ptrace, DENY), - JEQ32(__NR_bpf, DENY), + // Forbid some syscalls because unsafe or too risky. + LOAD_SYSCALL_NR, + JEQ32(__NR_ptrace, DENY), + JEQ32(__NR_bpf, DENY), - // Disallow clone with CLONE_UNTRACED flag. - JNE32(__NR_clone, JUMP(&l, past_clone_untraced_l)), - // Regardless of arch, we only care about the lower 32-bits of the flags. - ARG_32(0), - JA32(CLONE_UNTRACED, DENY), - LABEL(&l, past_clone_untraced_l), + // Disallow clone with CLONE_UNTRACED flag. + JNE32(__NR_clone, JUMP(&l, past_clone_untraced_l)), + // Regardless of arch, we only care about the lower 32-bits of the flags. + ARG_32(0), + JA32(CLONE_UNTRACED, DENY), + LABEL(&l, past_clone_untraced_l), }; if (bpf_resolve_jumps(&l, policy.data(), policy.size()) != 0) { @@ -129,11 +129,16 @@ std::vector Policy::GetDefaultPolicy() const { std::vector Policy::GetTrackingPolicy() const { return { - LOAD_ARCH, - JEQ32(AUDIT_ARCH_X86_64, TRACE(cpu::kX8664)), - JEQ32(AUDIT_ARCH_I386, TRACE(cpu::kX86)), - JEQ32(AUDIT_ARCH_PPC64LE, TRACE(cpu::kPPC64LE)), - TRACE(cpu::kUnknown), + LOAD_ARCH, +#if defined(SAPI_X86_64) + JEQ32(AUDIT_ARCH_X86_64, TRACE(cpu::kX8664)), + JEQ32(AUDIT_ARCH_I386, TRACE(cpu::kX86)), +#elif defined(SAPI_PPC64_LE) + JEQ32(AUDIT_ARCH_PPC64LE, TRACE(cpu::kPPC64LE)), +#elif defined(SAPI_ARM64) + JEQ32(AUDIT_ARCH_AARCH64, TRACE(cpu::kArm64)), +#endif + TRACE(cpu::kUnknown), }; } diff --git a/sandboxed_api/sandbox2/policy_test.cc b/sandboxed_api/sandbox2/policy_test.cc index 226b450..a7cf852 100644 --- a/sandboxed_api/sandbox2/policy_test.cc +++ b/sandboxed_api/sandbox2/policy_test.cc @@ -50,9 +50,16 @@ std::unique_ptr PolicyTestcasePolicy() { .AllowSyscall(__NR_close) .AllowSyscall(__NR_getppid) .AllowTCGETS() +#ifdef __NR_open .BlockSyscallWithErrno(__NR_open, ENOENT) +#endif .BlockSyscallWithErrno(__NR_openat, ENOENT) +#ifdef __NR_access .BlockSyscallWithErrno(__NR_access, ENOENT) +#endif +#ifdef __NR_faccessat + .BlockSyscallWithErrno(__NR_faccessat, ENOENT) +#endif .BlockSyscallWithErrno(__NR_prlimit64, EPERM) .BuildOrDie(); } @@ -162,7 +169,9 @@ std::unique_ptr MinimalTestcasePolicy() { .AllowStaticStartup() .AllowExit() .BlockSyscallWithErrno(__NR_prlimit64, EPERM) +#ifdef __NR_access .BlockSyscallWithErrno(__NR_access, ENOENT) +#endif .BuildOrDie(); } @@ -197,8 +206,10 @@ TEST(MinimalTest, MinimalSharedBinaryWorks) { .AllowOpen() .AllowExit() .AllowMmap() +#ifdef __NR_access // New glibc accesses /etc/ld.so.preload .BlockSyscallWithErrno(__NR_access, ENOENT) +#endif .BlockSyscallWithErrno(__NR_prlimit64, EPERM) .AddLibrariesForBinary(path) .BuildOrDie(); @@ -223,7 +234,9 @@ TEST(MallocTest, SystemMallocWorks) { .AllowSystemMalloc() .AllowExit() .BlockSyscallWithErrno(__NR_prlimit64, EPERM) +#ifdef __NR_access .BlockSyscallWithErrno(__NR_access, ENOENT) +#endif .BuildOrDie(); Sandbox2 s2(std::move(executor), std::move(policy)); @@ -247,7 +260,9 @@ TEST(MultipleSyscalls, AddPolicyOnSyscallsWorks) { auto policy = PolicyBuilder() +#ifdef __NR_open .BlockSyscallWithErrno(__NR_open, ENOENT) +#endif .BlockSyscallWithErrno(__NR_openat, ENOENT) .AllowStaticStartup() .AllowTcMalloc() @@ -258,7 +273,9 @@ TEST(MultipleSyscalls, AddPolicyOnSyscallsWorks) { .AddPolicyOnSyscalls({__NR_read, __NR_write}, {ERRNO(43)}) .AddPolicyOnSyscall(__NR_umask, {DENY}) .BlockSyscallWithErrno(__NR_prlimit64, EPERM) +#ifdef __NR_access .BlockSyscallWithErrno(__NR_access, ENOENT) +#endif .BuildOrDie(); Sandbox2 s2(std::move(executor), std::move(policy)); diff --git a/sandboxed_api/sandbox2/policybuilder.cc b/sandboxed_api/sandbox2/policybuilder.cc index a560190..c0bfd41 100644 --- a/sandboxed_api/sandbox2/policybuilder.cc +++ b/sandboxed_api/sandbox2/policybuilder.cc @@ -520,7 +520,12 @@ PolicyBuilder& PolicyBuilder::AllowStaticStartup() { }); #endif + if constexpr (host_cpu::IsArm64()) { + BlockSyscallWithErrno(__NR_readlinkat, ENOENT); + } +#ifdef __NR_readlink BlockSyscallWithErrno(__NR_readlink, ENOENT); +#endif return *this; } @@ -881,7 +886,9 @@ PolicyBuilder& PolicyBuilder::AddNetworkProxyPolicy() { AllowFutexOp(FUTEX_WAIT); AllowFutexOp(FUTEX_WAIT_BITSET); AllowSyscalls({ +#ifdef __NR_dup2 __NR_dup2, +#endif __NR_recvmsg, __NR_close, __NR_gettid, diff --git a/sandboxed_api/sandbox2/policybuilder_test.cc b/sandboxed_api/sandbox2/policybuilder_test.cc index b5387d9..652a827 100644 --- a/sandboxed_api/sandbox2/policybuilder_test.cc +++ b/sandboxed_api/sandbox2/policybuilder_test.cc @@ -101,7 +101,7 @@ TEST_F(PolicyBuilderTest, Testpolicy_size) { builder.AllowSystemMalloc(); assert_increased(); builder.AllowSyscall(__NR_munmap); assert_same(); builder.BlockSyscallWithErrno(__NR_munmap, 1); assert_same(); - builder.BlockSyscallWithErrno(__NR_open, 1); + builder.BlockSyscallWithErrno(__NR_openat, 1); assert_increased(); builder.AllowTCGETS(); assert_increased(); diff --git a/sandboxed_api/sandbox2/regs.cc b/sandboxed_api/sandbox2/regs.cc index 07f8e87..7a4c214 100644 --- a/sandboxed_api/sandbox2/regs.cc +++ b/sandboxed_api/sandbox2/regs.cc @@ -23,12 +23,18 @@ #include +#include "absl/base/macros.h" +#include "absl/status/status.h" #include "absl/strings/str_cat.h" #include "sandboxed_api/sandbox2/config.h" #include "sandboxed_api/sandbox2/util/strerror.h" namespace sandbox2 { +#ifndef NT_ARM_SYSTEM_CALL +#define NT_ARM_SYSTEM_CALL 0x404 +#endif + absl::Status Regs::Fetch() { #ifdef SAPI_X86_64 if (ptrace(PTRACE_GETREGS, pid_, 0, &user_regs_) == -1L) { @@ -36,7 +42,7 @@ absl::Status Regs::Fetch() { ") failed: ", StrError(errno))); } #endif - if constexpr (host_cpu::IsPPC64LE()) { + if constexpr (host_cpu::IsPPC64LE() || host_cpu::IsArm64()) { iovec pt_iov = {&user_regs_, sizeof(user_regs_)}; if (ptrace(PTRACE_GETREGSET, pid_, NT_PRSTATUS, &pt_iov) == -1L) { @@ -50,6 +56,24 @@ absl::Status Regs::Fetch() { ") size returned: ", pt_iov.iov_len, " different than sizeof(user_regs_): ", sizeof(user_regs_))); } + + // On AArch64, we are not done yet. Read the syscall number. + if constexpr (host_cpu::IsArm64()) { + iovec sys_iov = {&syscall_number_, sizeof(syscall_number_)}; + + if (ptrace(PTRACE_GETREGSET, pid_, NT_ARM_SYSTEM_CALL, &sys_iov) == -1L) { + return absl::InternalError( + absl::StrCat("ptrace(PTRACE_GETREGSET, pid=", pid_, + ", NT_ARM_SYSTEM_CALL) failed: ", StrError(errno))); + } + if (sys_iov.iov_len != sizeof(syscall_number_)) { + return absl::InternalError(absl::StrCat( + "ptrace(PTRACE_GETREGSET, pid=", pid_, + ", NT_ARM_SYSTEM_CALL) size returned: ", sys_iov.iov_len, + " different than sizeof(syscall_number_): ", + sizeof(syscall_number_))); + } + } } return absl::OkStatus(); } @@ -61,7 +85,7 @@ absl::Status Regs::Store() { ") failed: ", StrError(errno))); } #endif - if constexpr (host_cpu::IsPPC64LE()) { + if constexpr (host_cpu::IsPPC64LE() || host_cpu::IsArm64()) { iovec pt_iov = {&user_regs_, sizeof(user_regs_)}; if (ptrace(PTRACE_SETREGSET, pid_, NT_PRSTATUS, &pt_iov) == -1L) { @@ -69,6 +93,17 @@ absl::Status Regs::Store() { absl::StrCat("ptrace(PTRACE_SETREGSET, pid=", pid_, ") failed: ", StrError(errno))); } + + // Store syscall number on AArch64. + if constexpr (host_cpu::IsArm64()) { + iovec sys_iov = {&syscall_number_, sizeof(syscall_number_)}; + + if (ptrace(PTRACE_SETREGSET, pid_, NT_ARM_SYSTEM_CALL, &sys_iov) == -1L) { + return absl::InternalError( + absl::StrCat("ptrace(PTRACE_SETREGSET, pid=", pid_, + ", NT_ARM_SYSTEM_CALL) failed: ", StrError(errno))); + } + } } return absl::OkStatus(); } @@ -80,6 +115,9 @@ absl::Status Regs::SkipSyscallReturnValue(uint64_t value) { #elif defined(SAPI_PPC64_LE) user_regs_.gpr[0] = -1; user_regs_.gpr[3] = value; +#elif defined(SAPI_ARM64) + user_regs_.regs[0] = -1; + syscall_number_ = value; #endif return Store(); } @@ -114,6 +152,22 @@ Syscall Regs::ToSyscall(cpu::Architecture syscall_arch) const { auto ip = user_regs_.nip; return Syscall(syscall_arch, syscall, args, pid_, sp, ip); } +#elif defined(SAPI_ARM64) + if (ABSL_PREDICT_TRUE(syscall_arch == cpu::kArm64)) { + Syscall::Args args = { + // First argument should be orig_x0, which is not available to ptrace on + // AArch64 (see + // https://undo.io/resources/arm64-vs-arm32-whats-different-linux-programmers/), + // as it will have been overwritten. For our use case, though, using + // regs[0] is fine, as we are always called on syscall entry and never + // on exit. + user_regs_.regs[0], user_regs_.regs[1], user_regs_.regs[2], + user_regs_.regs[3], user_regs_.regs[4], user_regs_.regs[5], + }; + auto sp = user_regs_.sp; + auto ip = user_regs_.pc; + return Syscall(syscall_arch, syscall_number_, args, pid_, sp, ip); + } #endif return Syscall(pid_); } @@ -169,6 +223,14 @@ void Regs::StoreRegisterValuesInProtobuf(RegisterValues* values) const { regs->set_zero1(user_regs_.zero1); regs->set_zero2(user_regs_.zero2); regs->set_zero3(user_regs_.zero3); +#elif defined(SAPI_ARM64) + RegisterAarch64* regs = values->mutable_register_aarch64(); + for (int i = 0; i < ABSL_ARRAYSIZE(user_regs_.regs); ++i) { + regs->add_regs(user_regs_.regs[i]); + } + regs->set_sp(user_regs_.sp); + regs->set_pc(user_regs_.pc); + regs->set_pstate(user_regs_.pstate); #endif } diff --git a/sandboxed_api/sandbox2/regs.h b/sandboxed_api/sandbox2/regs.h index 661c8b6..cce7022 100644 --- a/sandboxed_api/sandbox2/regs.h +++ b/sandboxed_api/sandbox2/regs.h @@ -105,6 +105,11 @@ class Regs { uint64_t zero1; uint64_t zero2; uint64_t zero3; +#elif defined(SAPI_ARM64) + uint64_t regs[31]; + uint64_t sp; + uint64_t pc; + uint64_t pstate; #else static_assert(false, "Host CPU architecture not supported, see config.h"); #endif @@ -115,6 +120,9 @@ class Regs { // Registers fetched with ptrace(PR_GETREGS/GETREGSET, pid). PtraceRegisters user_regs_ = {}; + + // On AArch64, obtaining the syscall number needs a specific call to ptrace() + int syscall_number_ = 0; }; } // namespace sandbox2 diff --git a/sandboxed_api/sandbox2/stack_trace.cc b/sandboxed_api/sandbox2/stack_trace.cc index 7df7952..868b89e 100644 --- a/sandboxed_api/sandbox2/stack_trace.cc +++ b/sandboxed_api/sandbox2/stack_trace.cc @@ -31,6 +31,7 @@ #include "absl/strings/strip.h" #include "libcap/include/sys/capability.h" #include "sandboxed_api/sandbox2/comms.h" +#include "sandboxed_api/sandbox2/config.h" #include "sandboxed_api/sandbox2/executor.h" #include "sandboxed_api/sandbox2/ipc.h" #include "sandboxed_api/sandbox2/limits.h" @@ -270,6 +271,9 @@ bool StackTracePeer::LaunchLibunwindSandbox(const Regs* regs, } std::vector GetStackTrace(const Regs* regs, const Mounts& mounts) { + if constexpr (host_cpu::IsArm64()) { + return {"[Stack traces unavailable]"}; + } if (absl::GetFlag(FLAGS_sandbox_disable_all_stack_traces)) { return {"[Stacktraces disabled]"}; } diff --git a/sandboxed_api/sandbox2/syscall.cc b/sandboxed_api/sandbox2/syscall.cc index 7e06717..980320a 100644 --- a/sandboxed_api/sandbox2/syscall.cc +++ b/sandboxed_api/sandbox2/syscall.cc @@ -42,6 +42,8 @@ std::string Syscall::GetArchDescription(cpu::Architecture arch) { return "[X86-32]"; case cpu::kPPC64LE: return "[PPC-64]"; + case cpu::kArm64: + return "[Arm-64]"; default: LOG(ERROR) << "Unknown CPU architecture: " << arch; return absl::StrFormat("[UNKNOWN_ARCH:%d]", arch); @@ -54,6 +56,8 @@ uint32_t Syscall::GetHostAuditArch() { return AUDIT_ARCH_X86_64; case cpu::kPPC64LE: return AUDIT_ARCH_PPC64LE; + case cpu::kArm64: + return AUDIT_ARCH_AARCH64; default: // The static_assert() in config.h should prevent us from ever getting // here. diff --git a/sandboxed_api/sandbox2/syscall_defs.cc b/sandboxed_api/sandbox2/syscall_defs.cc index 410d054..b79f6c7 100644 --- a/sandboxed_api/sandbox2/syscall_defs.cc +++ b/sandboxed_api/sandbox2/syscall_defs.cc @@ -1216,6 +1216,291 @@ constexpr SyscallTable::Entry kSyscallDataPPC64LE[] = { MakeEntry("pwritev2", kHex, kHex, kHex, kHex, kHex, kHex), // 381 }; +// TODO(cblichmann): Confirm the entries in this list. +// https://github.com/torvalds/linux/blob/v5.8/include/uapi/asm-generic/unistd.h +constexpr SyscallTable::Entry kSyscallDataArm64[] = { + MakeEntry("io_setup", UnknownArguments()), // 0 + MakeEntry("io_destroy", UnknownArguments()), // 1 + MakeEntry("io_submit", UnknownArguments()), // 2 + MakeEntry("io_cancel", UnknownArguments()), // 3 + MakeEntry("io_getevents", UnknownArguments()), // 4 + MakeEntry("setxattr", kPath, kString, kGen, kInt, kHex, kGen), // 5 + MakeEntry("lsetxattr", kPath, kString, kGen, kInt, kHex, kGen), // 6 + MakeEntry("fsetxattr", UnknownArguments()), // 7 + MakeEntry("getxattr", kPath, kString, kGen, kInt, kGen, kGen), // 8 + MakeEntry("lgetxattr", kPath, kString, kGen, kInt, kGen, kGen), // 9 + MakeEntry("fgetxattr", UnknownArguments()), // 10 + MakeEntry("listxattr", kPath, kGen, kInt, kGen, kGen, kGen), // 11 + MakeEntry("llistxattr", kPath, kGen, kInt, kGen, kGen, kGen), // 12 + MakeEntry("flistxattr", UnknownArguments()), // 13 + MakeEntry("removexattr", kPath, kString, kGen, kGen, kGen, kGen), // 14 + MakeEntry("lremovexattr", UnknownArguments()), // 15 + MakeEntry("fremovexattr", UnknownArguments()), // 16 + MakeEntry("getcwd", UnknownArguments()), // 17 + MakeEntry("lookup_dcookie", UnknownArguments()), // 18 + MakeEntry("eventfd2", UnknownArguments()), // 19 + MakeEntry("epoll_create1", UnknownArguments()), // 20 + MakeEntry("epoll_ctl", UnknownArguments()), // 21 + MakeEntry("epoll_pwait", UnknownArguments()), // 22 + MakeEntry("dup", UnknownArguments()), // 23 + MakeEntry("dup3", UnknownArguments()), // 24 + MakeEntry("fcntl", UnknownArguments()), // 25 + MakeEntry("inotify_init1", UnknownArguments()), // 26 + MakeEntry("inotify_add_watch", UnknownArguments()), // 27 + MakeEntry("inotify_rm_watch", UnknownArguments()), // 28 + MakeEntry("ioctl", UnknownArguments()), // 29 + MakeEntry("ioprio_set", UnknownArguments()), // 30 + MakeEntry("ioprio_get", UnknownArguments()), // 31 + MakeEntry("flock", UnknownArguments()), // 32 + MakeEntry("mknodat", kGen, kPath, kGen, kGen, kGen, kGen), // 33 + MakeEntry("mkdirat", kGen, kPath, kGen, kGen, kGen, kGen), // 34 + MakeEntry("unlinkat", kGen, kPath, kGen, kGen, kGen, kGen), // 35 + MakeEntry("symlinkat", kPath, kGen, kPath, kGen, kGen, kGen), // 36 + MakeEntry("linkat", kGen, kPath, kGen, kPath, kGen, kGen), // 37 + MakeEntry("renameat", kGen, kPath, kGen, kPath, kGen, kGen), // 38 + MakeEntry("umount2", kPath, kHex, kGen, kGen, kGen, kGen), // 39 + MakeEntry("mount", kPath, kPath, kString, kHex, kGen, kGen), // 40 + MakeEntry("pivot_root", kPath, kPath, kGen, kGen, kGen, kGen), // 41 + MakeEntry("nfsservctl", UnknownArguments()), // 42 + MakeEntry("statfs", kPath, kGen, kGen, kGen, kGen, kGen), // 43 + MakeEntry("fstatfs", UnknownArguments()), // 44 + MakeEntry("truncate", kPath, kInt, kGen, kGen, kGen, kGen), // 45 + MakeEntry("ftruncate", UnknownArguments()), // 46 + MakeEntry("fallocate", UnknownArguments()), // 47 + MakeEntry("faccessat", kGen, kPath, kGen, kGen, kGen, kGen), // 48 + MakeEntry("chdir", kPath, kGen, kGen, kGen, kGen, kGen), // 49 + MakeEntry("fchdir", UnknownArguments()), // 50 + MakeEntry("chroot", kPath, kGen, kGen, kGen, kGen, kGen), // 51 + MakeEntry("fchmod", UnknownArguments()), // 52 + MakeEntry("fchmodat", kGen, kPath, kGen, kGen, kGen, kGen), // 53 + MakeEntry("fchownat", kGen, kPath, kGen, kGen, kGen, kGen), // 54 + MakeEntry("fchown", UnknownArguments()), // 55 + MakeEntry("openat", kGen, kPath, kOct, kHex, kGen, kGen), // 56 + MakeEntry("close", kInt, kGen, kGen, kGen, kGen, kGen), // 57 + MakeEntry("vhangup", UnknownArguments()), // 58 + MakeEntry("pipe2", UnknownArguments()), // 59 + MakeEntry("quotactl", kInt, kPath, kInt, kGen, kGen, kGen), // 60 + MakeEntry("getdents64", UnknownArguments()), // 61 + MakeEntry("lseek", UnknownArguments()), // 62 + MakeEntry("read", kInt, kHex, kInt, kGen, kGen, kGen), // 63 + MakeEntry("write", kInt, kHex, kInt, kGen, kGen, kGen), // 64 + MakeEntry("readv", UnknownArguments()), // 65 + MakeEntry("writev", UnknownArguments()), // 66 + MakeEntry("pread64", UnknownArguments()), // 67 + MakeEntry("pwrite64", UnknownArguments()), // 68 + MakeEntry("preadv", UnknownArguments()), // 69 + MakeEntry("pwritev", UnknownArguments()), // 70 + MakeEntry("sendfile", UnknownArguments()), // 71 + MakeEntry("pselect6", UnknownArguments()), // 72 + MakeEntry("ppoll", UnknownArguments()), // 73 + MakeEntry("signalfd4", UnknownArguments()), // 74 + MakeEntry("vmsplice", UnknownArguments()), // 75 + MakeEntry("splice", UnknownArguments()), // 76 + MakeEntry("tee", UnknownArguments()), // 77 + MakeEntry("readlinkat", kGen, kPath, kGen, kGen, kGen, kGen), // 78 + MakeEntry("newfstatat", kGen, kPath, kGen, kGen, kGen, kGen), // 79 + MakeEntry("fstat", kInt, kHex, kGen, kGen, kGen, kGen), // 80 + MakeEntry("sync", UnknownArguments()), // 81 + MakeEntry("fsync", UnknownArguments()), // 82 + MakeEntry("fdatasync", UnknownArguments()), // 83 + MakeEntry("sync_file_range", UnknownArguments()), // 84 + MakeEntry("timerfd_create", UnknownArguments()), // 85 + MakeEntry("timerfd_settime", UnknownArguments()), // 86 + MakeEntry("timerfd_gettime", UnknownArguments()), // 87 + MakeEntry("utimensat", UnknownArguments()), // 88 + MakeEntry("acct", kPath, kGen, kGen, kGen, kGen, kGen), // 89 + MakeEntry("capget", UnknownArguments()), // 90 + MakeEntry("capset", UnknownArguments()), // 91 + MakeEntry("personality", UnknownArguments()), // 92 + MakeEntry("exit", kInt, kGen, kGen, kGen, kGen, kGen), // 93 + MakeEntry("exit_group", kInt, kGen, kGen, kGen, kGen, kGen), // 94 + MakeEntry("waitid", UnknownArguments()), // 95 + MakeEntry("set_tid_address", kHex, kGen, kGen, kGen, kGen, kGen), // 96 + MakeEntry("unshare", UnknownArguments()), // 97 + MakeEntry("futex", UnknownArguments()), // 98 + MakeEntry("set_robust_list", UnknownArguments()), // 99 + MakeEntry("get_robust_list", UnknownArguments()), // 100 + MakeEntry("nanosleep", kHex, kHex, kGen, kGen, kGen, kGen), // 101 + MakeEntry("getitimer", UnknownArguments()), // 102 + MakeEntry("setitimer", UnknownArguments()), // 103 + MakeEntry("kexec_load", UnknownArguments()), // 104 + MakeEntry("init_module", UnknownArguments()), // 105 + MakeEntry("delete_module", UnknownArguments()), // 106 + MakeEntry("timer_create", UnknownArguments()), // 107 + MakeEntry("timer_gettime", UnknownArguments()), // 108 + MakeEntry("timer_getoverrun", UnknownArguments()), // 109 + MakeEntry("timer_settime", UnknownArguments()), // 110 + MakeEntry("timer_delete", UnknownArguments()), // 111 + MakeEntry("clock_settime", UnknownArguments()), // 112 + MakeEntry("clock_gettime", UnknownArguments()), // 113 + MakeEntry("clock_getres", UnknownArguments()), // 114 + MakeEntry("clock_nanosleep", UnknownArguments()), // 115 + MakeEntry("syslog", UnknownArguments()), // 116 + MakeEntry("ptrace", UnknownArguments()), // 117 + MakeEntry("sched_setparam", UnknownArguments()), // 118 + MakeEntry("sched_setscheduler", UnknownArguments()), // 119 + MakeEntry("sched_getscheduler", UnknownArguments()), // 120 + MakeEntry("sched_getparam", UnknownArguments()), // 121 + MakeEntry("sched_setaffinity", UnknownArguments()), // 122 + MakeEntry("sched_getaffinity", UnknownArguments()), // 123 + MakeEntry("sched_yield", UnknownArguments()), // 124 + MakeEntry("sched_get_priority_max", UnknownArguments()), // 125 + MakeEntry("sched_get_priority_min", UnknownArguments()), // 126 + MakeEntry("sched_rr_get_interval", UnknownArguments()), // 127 + MakeEntry("restart_syscall", UnknownArguments()), // 128 + MakeEntry("kill", kInt, kSignal, kGen, kGen, kGen, kGen), // 129 + MakeEntry("tkill", kInt, kSignal, kGen, kGen, kGen, kGen), // 130 + MakeEntry("tgkill", kInt, kInt, kSignal, kGen, kGen, kGen), // 131 + MakeEntry("sigaltstack", UnknownArguments()), // 132 + MakeEntry("rt_sigsuspend", UnknownArguments()), // 133 + MakeEntry("rt_sigaction", kSignal, kHex, kHex, kInt, kGen, kGen), // 134 + MakeEntry("rt_sigprocmask", UnknownArguments()), // 135 + MakeEntry("rt_sigpending", UnknownArguments()), // 136 + MakeEntry("rt_sigtimedwait", UnknownArguments()), // 137 + MakeEntry("rt_sigqueueinfo", UnknownArguments()), // 138 + MakeEntry("rt_sigreturn", UnknownArguments()), // 139 + MakeEntry("setpriority", UnknownArguments()), // 140 + MakeEntry("getpriority", UnknownArguments()), // 141 + MakeEntry("reboot", UnknownArguments()), // 142 + MakeEntry("setregid", UnknownArguments()), // 143 + MakeEntry("setgid", UnknownArguments()), // 144 + MakeEntry("setreuid", UnknownArguments()), // 145 + MakeEntry("setuid", UnknownArguments()), // 146 + MakeEntry("setresuid", UnknownArguments()), // 147 + MakeEntry("getresuid", UnknownArguments()), // 148 + MakeEntry("setresgid", UnknownArguments()), // 149 + MakeEntry("getresgid", UnknownArguments()), // 150 + MakeEntry("setfsuid", UnknownArguments()), // 151 + MakeEntry("setfsgid", UnknownArguments()), // 152 + MakeEntry("times", UnknownArguments()), // 153 + MakeEntry("setpgid", UnknownArguments()), // 154 + MakeEntry("getpgid", UnknownArguments()), // 155 + MakeEntry("getsid", UnknownArguments()), // 156 + MakeEntry("setsid", UnknownArguments()), // 157 + MakeEntry("getgroups", UnknownArguments()), // 158 + MakeEntry("setgroups", UnknownArguments()), // 159 + MakeEntry("uname", UnknownArguments()), // 160 + MakeEntry("sethostname", UnknownArguments()), // 161 + MakeEntry("setdomainname", UnknownArguments()), // 162 + MakeEntry("getrlimit", UnknownArguments()), // 163 + MakeEntry("setrlimit", UnknownArguments()), // 164 + MakeEntry("getrusage", UnknownArguments()), // 165 + MakeEntry("umask", kHex, kGen, kGen, kGen, kGen, kGen), // 166 + MakeEntry("prctl", kInt, kHex, kHex, kHex, kHex, kGen), // 167 + MakeEntry("getcpu", kHex, kHex, kHex, kGen, kGen, kGen), // 168 + MakeEntry("gettimeofday", kHex, kHex, kGen, kGen, kGen, kGen), // 169 + MakeEntry("settimeofday", kHex, kHex, kGen, kGen, kGen, kGen), // 170 + MakeEntry("adjtimex", UnknownArguments()), // 171 + MakeEntry("getpid", UnknownArguments()), // 172 + MakeEntry("getppid", UnknownArguments()), // 173 + MakeEntry("getuid", UnknownArguments()), // 174 + MakeEntry("geteuid", UnknownArguments()), // 175 + MakeEntry("getgid", UnknownArguments()), // 176 + MakeEntry("getegid", UnknownArguments()), // 177 + MakeEntry("gettid", UnknownArguments()), // 178 + MakeEntry("sysinfo", UnknownArguments()), // 179 + MakeEntry("mq_open", UnknownArguments()), // 180 + MakeEntry("mq_unlink", UnknownArguments()), // 181 + MakeEntry("mq_timedsend", UnknownArguments()), // 182 + MakeEntry("mq_timedreceive", UnknownArguments()), // 183 + MakeEntry("mq_notify", UnknownArguments()), // 184 + MakeEntry("mq_getsetattr", UnknownArguments()), // 185 + MakeEntry("msgget", UnknownArguments()), // 186 + MakeEntry("msgctl", UnknownArguments()), // 187 + MakeEntry("msgrcv", UnknownArguments()), // 188 + MakeEntry("msgsnd", UnknownArguments()), // 189 + MakeEntry("semget", UnknownArguments()), // 190 + MakeEntry("semctl", UnknownArguments()), // 191 + MakeEntry("semtimedop", UnknownArguments()), // 192 + MakeEntry("semop", UnknownArguments()), // 193 + MakeEntry("shmget", UnknownArguments()), // 194 + MakeEntry("shmctl", UnknownArguments()), // 195 + MakeEntry("shmat", UnknownArguments()), // 196 + MakeEntry("shmdt", UnknownArguments()), // 197 + MakeEntry("socket", kAddressFamily, kInt, kInt, kGen, kGen, kGen), // 198 + MakeEntry("socketpair", UnknownArguments()), // 199 + MakeEntry("bind", UnknownArguments()), // 200 + MakeEntry("listen", UnknownArguments()), // 201 + MakeEntry("accept", UnknownArguments()), // 202 + MakeEntry("connect", kInt, kSockaddr, kInt, kGen, kGen, kGen), // 203 + MakeEntry("getsockname", UnknownArguments()), // 204 + MakeEntry("getpeername", UnknownArguments()), // 205 + MakeEntry("sendto", kInt, kGen, kInt, kHex, kSockaddr, kInt), // 206 + MakeEntry("recvfrom", UnknownArguments()), // 207 + MakeEntry("setsockopt", UnknownArguments()), // 208 + MakeEntry("getsockopt", UnknownArguments()), // 209 + MakeEntry("shutdown", UnknownArguments()), // 210 + MakeEntry("sendmsg", kInt, kSockmsghdr, kHex, kGen, kGen, kGen), // 211 + MakeEntry("recvmsg", UnknownArguments()), // 212 + MakeEntry("readahead", UnknownArguments()), // 213 + MakeEntry("brk", kHex, kGen, kGen, kGen, kGen, kGen), // 214 + MakeEntry("munmap", kHex, kHex, kGen, kGen, kGen, kGen), // 215 + MakeEntry("mremap", UnknownArguments()), // 216 + MakeEntry("add_key", UnknownArguments()), // 217 + MakeEntry("request_key", UnknownArguments()), // 218 + MakeEntry("keyctl", UnknownArguments()), // 219 + MakeEntry("clone", kCloneFlag, kHex, kHex, kHex, kHex, kGen), // 220 + MakeEntry("execve", kPath, kHex, kHex, kGen, kGen, kGen), // 221 + MakeEntry("mmap", kHex, kInt, kHex, kHex, kInt, kInt), // 222 + MakeEntry("fadvise64", UnknownArguments()), // 223 + MakeEntry("swapon", kPath, kHex, kGen, kGen, kGen, kGen), // 224 + MakeEntry("swapoff", kPath, kGen, kGen, kGen, kGen, kGen), // 225 + MakeEntry("mprotect", kHex, kHex, kHex, kGen, kGen, kGen), // 226 + MakeEntry("msync", UnknownArguments()), // 227 + MakeEntry("mlock", UnknownArguments()), // 228 + MakeEntry("munlock", UnknownArguments()), // 229 + MakeEntry("mlockall", UnknownArguments()), // 230 + MakeEntry("munlockall", UnknownArguments()), // 231 + MakeEntry("mincore", UnknownArguments()), // 232 + MakeEntry("madvise", UnknownArguments()), // 233 + MakeEntry("remap_file_pages", UnknownArguments()), // 234 + MakeEntry("mbind", UnknownArguments()), // 235 + MakeEntry("get_mempolicy", UnknownArguments()), // 236 + MakeEntry("set_mempolicy", UnknownArguments()), // 237 + MakeEntry("migrate_pages", UnknownArguments()), // 238 + MakeEntry("move_pages", UnknownArguments()), // 239 + MakeEntry("rt_tgsigqueueinfo", UnknownArguments()), // 240 + MakeEntry("perf_event_open", UnknownArguments()), // 241 + MakeEntry("accept4", UnknownArguments()), // 242 + MakeEntry("recvmmsg", kInt, kHex, kHex, kHex, kGen, kGen), // 243 + SYSCALLS_UNUSED("UNUSED244"), // 244 + SYSCALLS_UNUSED("UNUSED245"), // 245 + SYSCALLS_UNUSED("UNUSED246"), // 246 + SYSCALLS_UNUSED("UNUSED247"), // 247 + SYSCALLS_UNUSED("UNUSED248"), // 248 + SYSCALLS_UNUSED("UNUSED249"), // 249 + SYSCALLS_UNUSED("UNUSED250"), // 250 + SYSCALLS_UNUSED("UNUSED251"), // 251 + SYSCALLS_UNUSED("UNUSED252"), // 252 + SYSCALLS_UNUSED("UNUSED253"), // 253 + SYSCALLS_UNUSED("UNUSED254"), // 254 + SYSCALLS_UNUSED("UNUSED255"), // 255 + SYSCALLS_UNUSED("UNUSED256"), // 256 + SYSCALLS_UNUSED("UNUSED257"), // 257 + SYSCALLS_UNUSED("UNUSED258"), // 258 + SYSCALLS_UNUSED("UNUSED259"), // 259 + MakeEntry("wait4", kInt, kHex, kHex, kHex, kGen, kGen), // 260 + MakeEntry("prlimit64", kInt, kInt, kHex, kHex, kGen, kGen), // 261 + MakeEntry("fanotify_init", kHex, kHex, kInt, kGen, kGen, kGen), // 262 + MakeEntry("fanotify_mark", kInt, kHex, kInt, kPath, kGen, kGen), // 263 + MakeEntry("name_to_handle_at", kInt, kGen, kHex, kHex, kHex, kGen), // 264 + MakeEntry("open_by_handle_at", kInt, kHex, kHex, kGen, kGen, kGen), // 265 + MakeEntry("clock_adjtime", kInt, kHex, kGen, kGen, kGen, kGen), // 266 + MakeEntry("syncfs", kInt, kGen, kGen, kGen, kGen, kGen), // 267 + MakeEntry("setns", kInt, kHex, kGen, kGen, kGen, kGen), // 268 + MakeEntry("sendmmsg", kInt, kHex, kInt, kHex, kGen, kGen), // 269 + MakeEntry("process_vm_readv", kInt, kHex, kInt, kHex, kInt, kInt), // 270 + MakeEntry("process_vm_writev", kInt, kHex, kInt, kHex, kInt, kInt), // 271 + MakeEntry("kcmp", kInt, kInt, kInt, kHex, kHex, kGen), // 272 + MakeEntry("finit_module", kInt, kPath, kHex, kGen, kGen, kGen), // 273 + MakeEntry("sched_setattr", UnknownArguments()), // 274 + MakeEntry("sched_getattr", UnknownArguments()), // 275 + MakeEntry("renameat2", kGen, kPath, kGen, kPath, kGen, kGen), // 276 + MakeEntry("seccomp", UnknownArguments()), // 277 + MakeEntry("getrandom", UnknownArguments()), // 278 + MakeEntry("memfd_create", UnknownArguments()), // 279 +}; + #undef SYSCALLS_UNUSED00_99 #undef SYSCALLS_UNUSED50_99 #undef SYSCALLS_UNUSED00_49 @@ -1230,6 +1515,8 @@ SyscallTable SyscallTable::get(cpu::Architecture arch) { return SyscallTable(kSyscallDataX8632); case cpu::kPPC64LE: return SyscallTable(kSyscallDataPPC64LE); + case cpu::kArm64: + return SyscallTable(kSyscallDataArm64); default: return SyscallTable(); } diff --git a/sandboxed_api/sandbox2/util.cc b/sandboxed_api/sandbox2/util.cc index 3e97abc..c3d90ba 100644 --- a/sandboxed_api/sandbox2/util.cc +++ b/sandboxed_api/sandbox2/util.cc @@ -126,8 +126,9 @@ ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS ABSL_ATTRIBUTE_NOINLINE pid_t CloneAndJump(int flags, jmp_buf* env_ptr) { uint8_t stack_buf[PTHREAD_STACK_MIN] ABSL_CACHELINE_ALIGNED; - static_assert(host_cpu::IsX8664() || host_cpu::IsPPC64LE(), - "Host CPU architecture not supported, see config.h"); + static_assert( + host_cpu::IsX8664() || host_cpu::IsPPC64LE() || host_cpu::IsArm64(), + "Host CPU architecture not supported, see config.h"); // Stack grows down. void* stack = stack_buf + sizeof(stack_buf); int r; diff --git a/sandboxed_api/sandbox2/violation.proto b/sandboxed_api/sandbox2/violation.proto index 57a3b3d..235cf6a 100644 --- a/sandboxed_api/sandbox2/violation.proto +++ b/sandboxed_api/sandbox2/violation.proto @@ -25,7 +25,6 @@ enum PBViolationType { SYSCALL_ARCHITECTURE_MISMATCH = 3; } -// X86_64 not allowed (naming convention...) message RegisterX8664 { uint64 r15 = 1; uint64 r14 = 2; @@ -77,7 +76,6 @@ message RegisterPowerpc64 { uint64 zero3 = 17; } -// Deprecated. message RegisterAarch64 { repeated uint64 regs = 1; uint64 sp = 2; @@ -90,7 +88,7 @@ message RegisterValues { oneof register_values { RegisterX8664 register_x86_64 = 2; RegisterPowerpc64 register_powerpc64 = 3; - RegisterAarch64 register_aarch64 = 4; // Deprecated. + RegisterAarch64 register_aarch64 = 4; } } From ed0086eb66f49f54fd3a8502fb781554131ff3a9 Mon Sep 17 00:00:00 2001 From: Christian Blichmann Date: Mon, 14 Sep 2020 01:18:09 -0700 Subject: [PATCH 42/42] Fix dynamic binary startup on PPC and newer glibc (> 2.19) This allows the `_llseek` syscall when it is defined. PiperOrigin-RevId: 331498182 Change-Id: I2760b264e3a82000b38d278a9c280501a3dbc724 --- sandboxed_api/sandbox2/policybuilder.cc | 6 +++++- sandboxed_api/sandbox2/stack_trace.cc | 3 +++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/sandboxed_api/sandbox2/policybuilder.cc b/sandboxed_api/sandbox2/policybuilder.cc index c0bfd41..678677d 100644 --- a/sandboxed_api/sandbox2/policybuilder.cc +++ b/sandboxed_api/sandbox2/policybuilder.cc @@ -533,7 +533,11 @@ PolicyBuilder& PolicyBuilder::AllowStaticStartup() { PolicyBuilder& PolicyBuilder::AllowDynamicStartup() { AllowRead(); AllowStat(); - AllowSyscalls({__NR_lseek, __NR_close, __NR_munmap}); + AllowSyscalls({__NR_lseek, +#ifdef __NR__llseek + __NR__llseek, // Newer glibc on PPC +#endif + __NR_close, __NR_munmap}); AddPolicyOnSyscall(__NR_mprotect, { ARG_32(2), JEQ32(PROT_READ, ALLOW), diff --git a/sandboxed_api/sandbox2/stack_trace.cc b/sandboxed_api/sandbox2/stack_trace.cc index 868b89e..ef494b1 100644 --- a/sandboxed_api/sandbox2/stack_trace.cc +++ b/sandboxed_api/sandbox2/stack_trace.cc @@ -86,6 +86,9 @@ std::unique_ptr StackTracePeer::GetPolicy(pid_t target_pid, // libunwind .AllowSyscall(__NR_fstat) .AllowSyscall(__NR_lseek) +#ifdef __NR__llseek + .AllowSyscall(__NR__llseek) // Newer glibc on PPC +#endif .AllowSyscall(__NR_mincore) .AllowSyscall(__NR_mprotect) .AllowSyscall(__NR_munmap)