OpenJPH
Open-source implementation of JPEG2000 Part-15
Loading...
Searching...
No Matches
ojph_arch.h
Go to the documentation of this file.
1//***************************************************************************/
2// This software is released under the 2-Clause BSD license, included
3// below.
4//
5// Copyright (c) 2019, Aous Naman
6// Copyright (c) 2019, Kakadu Software Pty Ltd, Australia
7// Copyright (c) 2019, The University of New South Wales, Australia
8//
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13// 1. Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//
16// 2. Redistributions in binary form must reproduce the above copyright
17// notice, this list of conditions and the following disclaimer in the
18// documentation and/or other materials provided with the distribution.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31//***************************************************************************/
32// This file is part of the OpenJPH software implementation.
33// File: ojph_arch.h
34// Author: Aous Naman
35// Date: 28 August 2019
36//***************************************************************************/
37
38
39#ifndef OJPH_ARCH_H
40#define OJPH_ARCH_H
41
42#include <cstdio>
43#include <cstdint>
44#include <cmath>
45
46#include "ojph_defs.h"
47
48
50// preprocessor directives for compiler
52#ifdef _MSC_VER
53#define OJPH_COMPILER_MSVC
54#elif (defined __GNUC__)
55#define OJPH_COMPILER_GNUC
56#endif
57
58#ifdef __EMSCRIPTEN__
59#define OJPH_EMSCRIPTEN
60#endif
61
62#ifdef OJPH_COMPILER_MSVC
63#include <intrin.h>
64#endif
65
67// preprocessor directives for architecture
69#if defined(__arm__) || defined(__TARGET_ARCH_ARM) \
70 || defined(__aarch64__) || defined(_M_ARM64)
71 #define OJPH_ARCH_ARM
72#elif defined(__i386) || defined(__i386__) || defined(_M_IX86)
73 #define OJPH_ARCH_I386
74#elif defined(__x86_64) || defined(__x86_64__) || defined(__amd64) \
75 || defined(_M_X64)
76 #define OJPH_ARCH_X86_64
77#elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
78 #define OJPH_ARCH_IA64
79#elif defined(__ppc__) || defined(__ppc) || defined(__powerpc__) \
80 || defined(_ARCH_COM) || defined(_ARCH_PWR) || defined(_ARCH_PPC) \
81 || defined(_M_MPPC) || defined(_M_PPC)
82 #if defined(__ppc64__) || defined(__powerpc64__) || defined(__64BIT__)
83 #define OJPH_ARCH_PPC64
84 #else
85 #define OJPH_ARCH_PPC
86 #endif
87#else
88 #define OJPH_ARCH_UNKNOWN
89#endif
90
91namespace ojph {
93 // disable SIMD for unknown architecture
95#if !defined(OJPH_ARCH_X86_64) && !defined(OJPH_ARCH_I386) && \
96 !defined(OJPH_ARCH_ARM) && !defined(OJPH_DISABLE_SIMD)
97#define OJPH_DISABLE_SIMD
98#endif // !OJPH_ARCH_UNKNOWN
99
101 // OS detection definitions
103#if (defined WIN32) || (defined _WIN32) || (defined _WIN64)
104#define OJPH_OS_WINDOWS
105#elif (defined __APPLE__)
106#define OJPH_OS_APPLE
107#elif (defined __ANDROID__)
108#define OJPH_OS_ANDROID
109#elif (defined __linux)
110#define OJPH_OS_LINUX
111#endif
112
114 // defines for dll
116#if defined(OJPH_OS_WINDOWS) && defined(OJPH_BUILD_SHARED_LIBRARY)
117#define OJPH_EXPORT __declspec(dllexport)
118#else
119#define OJPH_EXPORT
120#endif
121
123 // cpu features
126 int get_cpu_ext_level();
127
128 enum : int {
141 };
142
143 enum : int {
149 };
150
152 static inline ui32 population_count(ui32 val)
153 {
154 #if defined(OJPH_COMPILER_MSVC) \
155 && (defined(OJPH_ARCH_X86_64) || defined(OJPH_ARCH_I386))
156 return (ui32)__popcnt(val);
157 #elif (defined OJPH_COMPILER_GNUC)
158 return (ui32)__builtin_popcount(val);
159 #else
160 val -= ((val >> 1) & 0x55555555);
161 val = (((val >> 2) & 0x33333333) + (val & 0x33333333));
162 val = (((val >> 4) + val) & 0x0f0f0f0f);
163 val += (val >> 8);
164 val += (val >> 16);
165 return (int)(val & 0x0000003f);
166 #endif
167 }
168
170 static inline ui32 population_count64(ui64 val)
171 {
172 #if defined(OJPH_COMPILER_MSVC) \
173 && (defined(OJPH_ARCH_X86_64) || defined(OJPH_ARCH_I386))
174 return (ui32)__popcnt64(val);
175 #elif (defined OJPH_COMPILER_GNUC)
176 return (ui32)__builtin_popcountll(val);
177 #else
178 const ui64 k1 = 0x5555555555555555ull;
179 const ui64 k2 = 0x3333333333333333ull;
180 const ui64 k4 = 0x0F0F0F0F0F0F0F0Full;
181 const ui64 kf = 0x0101010101010101ull;
182
183 // put count of each 2 bits into those 2 bits
184 val = val - ((val >> 1) & k1);
185 // put count of each 4 bits into those 4 bits
186 val = (val & k2) + ((val >> 2) & k2);
187 // put count of each 8 bits into those 8 bits
188 val = (val + (val >> 4)) & k4 ;
189 // returns 8 most significant bits of x + (x<<8) + (x<<16) + (x<<24) + ...
190 val = (val * kf) >> 56;
191 return (ui32) val;
192 #endif
193 }
194
196#ifdef OJPH_COMPILER_MSVC
197 #pragma intrinsic(_BitScanReverse)
198#endif
199 static inline ui32 count_leading_zeros(ui32 val)
200 {
201 #ifdef OJPH_COMPILER_MSVC
202 unsigned long result = 0;
203 _BitScanReverse(&result, val);
204 return 31 ^ (ui32)result;
205 #elif (defined OJPH_COMPILER_GNUC)
206 return (ui32)__builtin_clz(val);
207 #else
208 val |= (val >> 1);
209 val |= (val >> 2);
210 val |= (val >> 4);
211 val |= (val >> 8);
212 val |= (val >> 16);
213 return 32 - population_count(val);
214 #endif
215 }
216
218#ifdef OJPH_COMPILER_MSVC
219 #pragma intrinsic(_BitScanReverse64)
220#endif
221 static inline ui32 count_leading_zeros(ui64 val)
222 {
223 #ifdef OJPH_COMPILER_MSVC
224 unsigned long result = 0;
225 _BitScanReverse64(&result, val);
226 return 63 ^ (ui32)result;
227 #elif (defined OJPH_COMPILER_GNUC)
228 return (ui32)__builtin_clzll(val);
229 #else
230 val |= (val >> 1);
231 val |= (val >> 2);
232 val |= (val >> 4);
233 val |= (val >> 8);
234 val |= (val >> 16);
235 val |= (val >> 32);
236 return 64 - population_count64(val);
237 #endif
238 }
239
241#ifdef OJPH_COMPILER_MSVC
242 #pragma intrinsic(_BitScanForward)
243#endif
244 static inline ui32 count_trailing_zeros(ui32 val)
245 {
246 #ifdef OJPH_COMPILER_MSVC
247 unsigned long result = 0;
248 _BitScanForward(&result, val);
249 return (ui32)result;
250 #elif (defined OJPH_COMPILER_GNUC)
251 return (ui32)__builtin_ctz(val);
252 #else
253 val |= (val << 1);
254 val |= (val << 2);
255 val |= (val << 4);
256 val |= (val << 8);
257 val |= (val << 16);
258 return 32 - population_count(val);
259 #endif
260 }
261
263 static inline si32 ojph_round(float val)
264 {
265 #ifdef OJPH_COMPILER_MSVC
266 return (si32)(val + (val >= 0.0f ? 0.5f : -0.5f));
267 #elif (defined OJPH_COMPILER_GNUC)
268 return (si32)(val + (val >= 0.0f ? 0.5f : -0.5f));
269 #else
270 return (si32)round(val);
271 #endif
272 }
273
275 static inline si32 ojph_trunc(float val)
276 {
277 #ifdef OJPH_COMPILER_MSVC
278 return (si32)(val);
279 #elif (defined OJPH_COMPILER_GNUC)
280 return (si32)(val);
281 #else
282 return (si32)trunc(val);
283 #endif
284 }
285
287 // constants
289 #ifndef OJPH_EMSCRIPTEN
290 const ui32 byte_alignment = 64; // 64 bytes == 512 bits
293 #else
294 const ui32 byte_alignment = 16; // 16 bytes == 128 bits
296 const ui32 object_alignment = 8;
297 #endif
298
300 // templates for alignment
302
304 // finds the size such that it is a multiple of byte_alignment
305 template <typename T, ui32 N>
306 size_t calc_aligned_size(size_t size) {
307 size = size * sizeof(T) + N - 1;
308 size &= ~((1ULL << (31 - count_leading_zeros(N))) - 1);
309 size >>= (63 - count_leading_zeros((ui64)sizeof(T)));
310 return size;
311 }
312
314 // moves the pointer to first address that is a multiple of byte_alignment
315 template <typename T, ui32 N>
316 inline T *align_ptr(T *ptr) {
317 intptr_t p = reinterpret_cast<intptr_t>(ptr);
318 p += N - 1;
319 p &= ~((1ULL << (31 - count_leading_zeros(N))) - 1);
320 return reinterpret_cast<T *>(p);
321 }
322
323}
324
325#endif // !OJPH_ARCH_H
@ X86_CPU_EXT_LEVEL_AVX2
Definition ojph_arch.h:138
@ X86_CPU_EXT_LEVEL_AVX
Definition ojph_arch.h:137
@ X86_CPU_EXT_LEVEL_AVX512
Definition ojph_arch.h:140
@ X86_CPU_EXT_LEVEL_GENERIC
Definition ojph_arch.h:129
@ X86_CPU_EXT_LEVEL_SSE2
Definition ojph_arch.h:132
@ X86_CPU_EXT_LEVEL_SSE41
Definition ojph_arch.h:135
@ X86_CPU_EXT_LEVEL_SSE
Definition ojph_arch.h:131
@ X86_CPU_EXT_LEVEL_MMX
Definition ojph_arch.h:130
@ X86_CPU_EXT_LEVEL_SSE42
Definition ojph_arch.h:136
@ X86_CPU_EXT_LEVEL_SSSE3
Definition ojph_arch.h:134
@ X86_CPU_EXT_LEVEL_SSE3
Definition ojph_arch.h:133
@ X86_CPU_EXT_LEVEL_AVX2FMA
Definition ojph_arch.h:139
const ui32 object_alignment
Definition ojph_arch.h:292
const ui32 byte_alignment
Definition ojph_arch.h:290
uint64_t ui64
Definition ojph_defs.h:56
static si32 ojph_round(float val)
Definition ojph_arch.h:263
size_t calc_aligned_size(size_t size)
Definition ojph_arch.h:306
static ui32 population_count64(ui64 val)
Definition ojph_arch.h:170
T * align_ptr(T *ptr)
Definition ojph_arch.h:316
static ui32 population_count(ui32 val)
Definition ojph_arch.h:152
OJPH_EXPORT int get_cpu_ext_level()
static si32 ojph_trunc(float val)
Definition ojph_arch.h:275
static ui32 count_trailing_zeros(ui32 val)
Definition ojph_arch.h:244
static ui32 count_leading_zeros(ui32 val)
Definition ojph_arch.h:199
int32_t si32
Definition ojph_defs.h:55
@ ARM_CPU_EXT_LEVEL_SVE
Definition ojph_arch.h:147
@ ARM_CPU_EXT_LEVEL_SVE2
Definition ojph_arch.h:148
@ ARM_CPU_EXT_LEVEL_NEON
Definition ojph_arch.h:145
@ ARM_CPU_EXT_LEVEL_GENERIC
Definition ojph_arch.h:144
@ ARM_CPU_EXT_LEVEL_ASIMD
Definition ojph_arch.h:146
const ui32 log_byte_alignment
Definition ojph_arch.h:291
uint32_t ui32
Definition ojph_defs.h:54
#define OJPH_EXPORT
Definition ojph_arch.h:119