diff --git a/.gitignore b/.gitignore index 80fa14348..0f4ce3a73 100644 --- a/.gitignore +++ b/.gitignore @@ -62,3 +62,4 @@ __pycache__ # Generated files *.pc +.vscode/settings.json diff --git a/CMakeLists.txt b/CMakeLists.txt index 92c886dfc..67c58f083 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -140,6 +140,7 @@ set(XTENSOR_HEADERS ${XTENSOR_INCLUDE_DIR}/xtensor/xfixed.hpp ${XTENSOR_INCLUDE_DIR}/xtensor/xfunction.hpp ${XTENSOR_INCLUDE_DIR}/xtensor/xfunctor_view.hpp + ${XTENSOR_INCLUDE_DIR}/xtensor/xfft.hpp ${XTENSOR_INCLUDE_DIR}/xtensor/xgenerator.hpp ${XTENSOR_INCLUDE_DIR}/xtensor/xhistogram.hpp ${XTENSOR_INCLUDE_DIR}/xtensor/xindex_view.hpp @@ -199,6 +200,7 @@ target_link_libraries(xtensor INTERFACE xtl) OPTION(XTENSOR_ENABLE_ASSERT "xtensor bound check" OFF) OPTION(XTENSOR_CHECK_DIMENSION "xtensor dimension check" OFF) +OPTION(XTENSOR_FORCE_TEMPORARY_MEMORY_IN_ASSIGNMENTS "xtensor force the use of temporary memory when assigning instead of an automatic overlap check" ON) OPTION(BUILD_TESTS "xtensor test suite" OFF) OPTION(BUILD_BENCHMARK "xtensor benchmark" OFF) OPTION(DOWNLOAD_GTEST "build gtest from downloaded sources" OFF) @@ -219,6 +221,10 @@ if(XTENSOR_CHECK_DIMENSION) add_definitions(-DXTENSOR_ENABLE_CHECK_DIMENSION) endif() +if(XTENSOR_FORCE_TEMPORARY_MEMORY_IN_ASSIGNMENTS) + add_definitions(-DXTENSOR_FORCE_TEMPORARY_MEMORY_IN_ASSIGNMENTS) +endif() + if(DEFAULT_COLUMN_MAJOR) add_definitions(-DXTENSOR_DEFAULT_LAYOUT=layout_type::column_major) endif() diff --git a/README.md b/README.md index a52909a17..7be17f6ee 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # ![xtensor](docs/source/xtensor.svg) -![linux](https://github.com/xtensor-stack/xtensor/actions/workflows/linux.yml/badge.svg) -![osx](https://github.com/xtensor-stack/xtensor/actions/workflows/osx.yml/badge.svg) -![windows](https://github.com/xtensor-stack/xtensor/actions/workflows/windows.yml/badge.svg) +[![GHA Linux](https://github.com/xtensor-stack/xtensor/actions/workflows/linux.yml/badge.svg)](https://github.com/xtensor-stack/xtensor/actions/workflows/linux.yml) +[![GHA OSX](https://github.com/xtensor-stack/xtensor/actions/workflows/osx.yml/badge.svg)](https://github.com/xtensor-stack/xtensor/actions/workflows/osx.yml) +[![GHA Windows](https://github.com/xtensor-stack/xtensor/actions/workflows/windows.yml/badge.svg)](https://github.com/xtensor-stack/xtensor/actions/workflows/windows.yml) [![Documentation](http://readthedocs.org/projects/xtensor/badge/?version=latest)](https://xtensor.readthedocs.io/en/latest/?badge=latest) [![Doxygen -> gh-pages](https://github.com/xtensor-stack/xtensor/workflows/gh-pages/badge.svg)](https://xtensor-stack.github.io/xtensor) [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/xtensor-stack/xtensor/stable?filepath=notebooks%2Fxtensor.ipynb) diff --git a/docs/source/api/container_index.rst b/docs/source/api/container_index.rst index bb3e2d724..ef8c0ba84 100644 --- a/docs/source/api/container_index.rst +++ b/docs/source/api/container_index.rst @@ -33,3 +33,4 @@ xexpression API is actually implemented in ``xstrided_container`` and ``xcontain xindex_view xfunctor_view xrepeat + xfft diff --git a/docs/source/xfft.rst b/docs/source/xfft.rst new file mode 100644 index 000000000..78ba5ff7d --- /dev/null +++ b/docs/source/xfft.rst @@ -0,0 +1,17 @@ +.. Copyright (c) 2016, Johan Mabille, Sylvain Corlay and Wolf Vollprecht + Distributed under the terms of the BSD 3-Clause License. + The full license is in the file LICENSE, distributed with this software. +xfft +==== + +Defined in ``xtensor/xfft.hpp`` + +.. doxygenclass:: xt::fft_convolve + :project: xtensor + :members: + +.. doxygentypedef:: xt::fft + :project: xtensor + +.. doxygentypedef:: xt::ifft + :project: xtensor diff --git a/include/xtensor/xbroadcast.hpp b/include/xtensor/xbroadcast.hpp index 798b9cc9d..20b04edab 100644 --- a/include/xtensor/xbroadcast.hpp +++ b/include/xtensor/xbroadcast.hpp @@ -118,6 +118,29 @@ namespace xt return linear_end(c.expression()); } + /************************************* + * overlapping_memory_checker_traits * + *************************************/ + + template + struct overlapping_memory_checker_traits< + E, + std::enable_if_t::value && is_specialization_of::value>> + { + static bool check_overlap(const E& expr, const memory_range& dst_range) + { + if (expr.size() == 0) + { + return false; + } + else + { + using ChildE = std::decay_t; + return overlapping_memory_checker_traits::check_overlap(expr.expression(), dst_range); + } + } + }; + /** * @class xbroadcast * @brief Broadcasted xexpression to a specified shape. diff --git a/include/xtensor/xfft.hpp b/include/xtensor/xfft.hpp new file mode 100644 index 000000000..472302d07 --- /dev/null +++ b/include/xtensor/xfft.hpp @@ -0,0 +1,241 @@ +#ifdef XTENSOR_USE_TBB +#include +#endif +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace xt +{ + namespace fft + { + namespace detail + { + template < + class E, + typename std::enable_if::type::value_type>::value, bool>::type = true> + inline auto radix2(E&& e) + { + using namespace xt::placeholders; + using namespace std::complex_literals; + using value_type = typename std::decay_t::value_type; + using precision = typename value_type::value_type; + auto N = e.size(); + const bool powerOfTwo = !(N == 0) && !(N & (N - 1)); + // check for power of 2 + if (!powerOfTwo || N == 0) + { + // TODO: Replace implementation with dft + XTENSOR_THROW(std::runtime_error, "FFT Implementation requires power of 2"); + } + auto pi = xt::numeric_constants::PI; + xt::xtensor ev = e; + if (N <= 1) + { + return ev; + } + else + { +#ifdef XTENSOR_USE_TBB + xt::xtensor even; + xt::xtensor odd; + oneapi::tbb::parallel_invoke( + [&] + { + even = radix2(xt::view(ev, xt::range(0, _, 2))); + }, + [&] + { + odd = radix2(xt::view(ev, xt::range(1, _, 2))); + } + ); +#else + auto even = radix2(xt::view(ev, xt::range(0, _, 2))); + auto odd = radix2(xt::view(ev, xt::range(1, _, 2))); +#endif + + auto range = xt::arange(N / 2); + auto exp = xt::exp(static_cast(-2i) * pi * range / N); + auto t = exp * odd; + auto first_half = even + t; + auto second_half = even - t; + // TODO: should be a call to stack if performance was improved + auto spectrum = xt::xtensor::from_shape({N}); + xt::view(spectrum, xt::range(0, N / 2)) = first_half; + xt::view(spectrum, xt::range(N / 2, N)) = second_half; + return spectrum; + } + } + + template + auto transform_bluestein(E&& data) + { + using value_type = typename std::decay_t::value_type; + using precision = typename value_type::value_type; + + // Find a power-of-2 convolution length m such that m >= n * 2 + 1 + const std::size_t n = data.size(); + size_t m = std::ceil(std::log2(n * 2 + 1)); + m = std::pow(2, m); + + // Trignometric table + auto exp_table = xt::xtensor, 1>::from_shape({n}); + xt::xtensor i = xt::pow(xt::linspace(0, n - 1, n), 2); + i %= (n * 2); + + auto angles = xt::eval(precision{3.141592653589793238463} * i / n); + auto j = std::complex(0, 1); + exp_table = xt::exp(-angles * j); + + // Temporary vectors and preprocessing + auto av = xt::empty>({m}); + xt::view(av, xt::range(0, n)) = data * exp_table; + + + auto bv = xt::empty>({m}); + xt::view(bv, xt::range(0, n)) = ::xt::conj(exp_table); + xt::view(bv, xt::range(-n + 1, xt::placeholders::_)) = xt::view( + ::xt::conj(xt::flip(exp_table)), + xt::range(xt::placeholders::_, -1) + ); + + // Convolution + auto xv = radix2(av); + auto yv = radix2(bv); + auto spectrum_k = xv * yv; + auto complex_args = xt::conj(spectrum_k); + auto fft_res = radix2(complex_args); + auto cv = xt::conj(fft_res) / m; + + return xt::eval(xt::view(cv, xt::range(0, n)) * exp_table); + } + } // namespace detail + + /** + * @brief 1D FFT of an Nd array along a specified axis + * @param e an Nd expression to be transformed to the fourier domain + * @param axis the axis along which to perform the 1D FFT + * @return a transformed xarray of the specified precision + */ + template < + class E, + typename std::enable_if::type::value_type>::value, bool>::type = true> + inline auto fft(E&& e, std::ptrdiff_t axis = -1) + { + using value_type = typename std::decay_t::value_type; + using precision = typename value_type::value_type; + const auto saxis = xt::normalize_axis(e.dimension(), axis); + const size_t N = e.shape(saxis); + const bool powerOfTwo = !(N == 0) && !(N & (N - 1)); + xt::xarray> out = xt::eval(e); + auto begin = xt::axis_slice_begin(out, saxis); + auto end = xt::axis_slice_end(out, saxis); + for (auto iter = begin; iter != end; iter++) + { + if (powerOfTwo) + { + xt::noalias(*iter) = detail::radix2(*iter); + } + else + { + xt::noalias(*iter) = detail::transform_bluestein(*iter); + } + } + return out; + } + + /** + * @breif 1D FFT of an Nd array along a specified axis + * @param e an Nd expression to be transformed to the fourier domain + * @param axis the axis along which to perform the 1D FFT + * @return a transformed xarray of the specified precision + */ + template < + class E, + typename std::enable_if::type::value_type>::value, bool>::type = true> + inline auto fft(E&& e, std::ptrdiff_t axis = -1) + { + using value_type = typename std::decay::type::value_type; + return fft(xt::cast>(e), axis); + } + + template < + class E, + typename std::enable_if::type::value_type>::value, bool>::type = true> + auto ifft(E&& e, std::ptrdiff_t axis = -1) + { + // check the length of the data on that axis + const std::size_t n = e.shape(axis); + if (n == 0) + { + XTENSOR_THROW(std::runtime_error, "Cannot take the iFFT along an empty dimention"); + } + auto complex_args = xt::conj(e); + auto fft_res = xt::fft::fft(complex_args, axis); + fft_res = xt::conj(fft_res); + return fft_res; + } + + template < + class E, + typename std::enable_if::type::value_type>::value, bool>::type = true> + inline auto ifft(E&& e, std::ptrdiff_t axis = -1) + { + using value_type = typename std::decay::type::value_type; + return ifft(xt::cast>(e), axis); + } + + /* + * @brief performs a circular fft convolution xvec and yvec must + * be the same shape. + * @param xvec first array of the convolution + * @param yvec second array of the convolution + * @param axis axis along which to perform the convolution + */ + template + auto convolve(E1&& xvec, E2&& yvec, std::ptrdiff_t axis = -1) + { + // we could broadcast but that could get complicated??? + if (xvec.dimension() != yvec.dimension()) + { + XTENSOR_THROW(std::runtime_error, "Mismatched dimentions"); + } + + auto saxis = xt::normalize_axis(xvec.dimension(), axis); + if (xvec.shape(saxis) != yvec.shape(saxis)) + { + XTENSOR_THROW(std::runtime_error, "Mismatched lengths along slice axis"); + } + + const std::size_t n = xvec.shape(saxis); + + auto xv = fft(xvec, axis); + auto yv = fft(yvec, axis); + + auto begin_x = xt::axis_slice_begin(xv, saxis); + auto end_x = xt::axis_slice_end(xv, saxis); + auto iter_y = xt::axis_slice_begin(yv, saxis); + + for (auto iter = begin_x; iter != end_x; iter++) + { + (*iter) = (*iter_y++) * (*iter); + } + + auto outvec = ifft(xv, axis); + + // Scaling (because this FFT implementation omits it) + outvec = outvec / n; + + return outvec; + } + + } +} // namespace xt::fft diff --git a/include/xtensor/xfunction.hpp b/include/xtensor/xfunction.hpp index 08a3dc1c1..f11362cdb 100644 --- a/include/xtensor/xfunction.hpp +++ b/include/xtensor/xfunction.hpp @@ -162,6 +162,42 @@ namespace xt { }; + /************************************* + * overlapping_memory_checker_traits * + *************************************/ + + template + struct overlapping_memory_checker_traits< + E, + std::enable_if_t::value && is_specialization_of::value>> + { + template = 0> + static bool check_tuple(const std::tuple&, const memory_range&) + { + return false; + } + + template = 0> + static bool check_tuple(const std::tuple& t, const memory_range& dst_range) + { + using ChildE = std::decay_t(t))>; + return overlapping_memory_checker_traits::check_overlap(std::get(t), dst_range) + || check_tuple(t, dst_range); + } + + static bool check_overlap(const E& expr, const memory_range& dst_range) + { + if (expr.size() == 0) + { + return false; + } + else + { + return check_tuple(expr.arguments(), dst_range); + } + } + }; + /************* * xfunction * *************/ diff --git a/include/xtensor/xgenerator.hpp b/include/xtensor/xgenerator.hpp index 551bb7e24..03433adca 100644 --- a/include/xtensor/xgenerator.hpp +++ b/include/xtensor/xgenerator.hpp @@ -76,6 +76,21 @@ namespace xt using size_type = std::size_t; }; + /************************************* + * overlapping_memory_checker_traits * + *************************************/ + + template + struct overlapping_memory_checker_traits< + E, + std::enable_if_t::value && is_specialization_of::value>> + { + static bool check_overlap(const E&, const memory_range&) + { + return false; + } + }; + /** * @class xgenerator * @brief Multidimensional function operating on indices. diff --git a/include/xtensor/xmath.hpp b/include/xtensor/xmath.hpp index 6e32df15b..3035502a5 100644 --- a/include/xtensor/xmath.hpp +++ b/include/xtensor/xmath.hpp @@ -338,6 +338,7 @@ namespace xt XTENSOR_UNARY_MATH_FUNCTOR(isfinite); XTENSOR_UNARY_MATH_FUNCTOR(isinf); XTENSOR_UNARY_MATH_FUNCTOR(isnan); + XTENSOR_UNARY_MATH_FUNCTOR(conj); } #undef XTENSOR_UNARY_MATH_FUNCTOR diff --git a/include/xtensor/xsemantic.hpp b/include/xtensor/xsemantic.hpp index 41f14951c..8aa76cfc9 100644 --- a/include/xtensor/xsemantic.hpp +++ b/include/xtensor/xsemantic.hpp @@ -217,6 +217,29 @@ namespace xt template using disable_xcontainer_semantics = typename std::enable_if::value, R>::type; + + template + class xview_semantic; + + template + struct overlapping_memory_checker_traits< + E, + std::enable_if_t::value && is_crtp_base_of::value>> + { + static bool check_overlap(const E& expr, const memory_range& dst_range) + { + if (expr.size() == 0) + { + return false; + } + else + { + using ChildE = std::decay_t; + return overlapping_memory_checker_traits::check_overlap(expr.expression(), dst_range); + } + } + }; + /** * @class xview_semantic * @brief Implementation of the xsemantic_base interface for @@ -598,8 +621,22 @@ namespace xt template inline auto xsemantic_base::operator=(const xexpression& e) -> derived_type& { +#ifdef XTENSOR_FORCE_TEMPORARY_MEMORY_IN_ASSIGNMENTS temporary_type tmp(e); return this->derived_cast().assign_temporary(std::move(tmp)); +#else + auto&& this_derived = this->derived_cast(); + auto memory_checker = make_overlapping_memory_checker(this_derived); + if (memory_checker.check_overlap(e.derived_cast())) + { + temporary_type tmp(e); + return this_derived.assign_temporary(std::move(tmp)); + } + else + { + return this->assign(e); + } +#endif } /************************************** diff --git a/include/xtensor/xutils.hpp b/include/xtensor/xutils.hpp index 137d0e70e..21c452489 100644 --- a/include/xtensor/xutils.hpp +++ b/include/xtensor/xutils.hpp @@ -119,6 +119,20 @@ namespace xt using type = T; }; + /*************************************** + * is_specialization_of implementation * + ***************************************/ + + template