Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 22 additions & 60 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@
#
# Copyright (c) 2012 Louis Dionne
#
cmake_minimum_required(VERSION 3.0)
set (CMAKE_CXX_STANDARD 11) # for constexpr specifier and other goodies
cmake_minimum_required(VERSION 3.5)

set (CMAKE_CXX_STANDARD 11) # for constexpr specifier and other goodies
set(CMAKE_CXX_STANDARD_REQUIRED True)
set (CMAKE_C_STANDARD 99)
set(CMAKE_C_STANDARD_REQUIRED True)
if (NOT CMAKE_BUILD_TYPE)
message(STATUS "No build type selected, default to Release")
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
Expand All @@ -28,26 +31,6 @@ include("${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules/environment.cmake")

message("Building for architecture: ${CMAKE_SYSTEM_PROCESSOR}")

# Runs compiler with "-dumpversion" and parses major/minor
# version with a regex.
#
# Taken&Modified from Boost.cmake
#
function(CXX_COMPILER_DUMPVERSION _OUTPUT_VERSION)

exec_program(${CMAKE_CXX_COMPILER}
ARGS ${CMAKE_CXX_COMPILER_ARG1} -dumpversion
OUTPUT_VARIABLE COMPILER_VERSION
)
#string(REGEX REPLACE "([0-9])\\.([0-9])(\\.[0-9])?" "\\1\\2"
# COMPILER_VERSION ${COMPILER_VERSION})

set(${_OUTPUT_VERSION} ${COMPILER_VERSION} PARENT_SCOPE)
endfunction()

if(NOT WIN32)
CXX_COMPILER_DUMPVERSION(CXX_COMPILER_VERSION)
endif()

MESSAGE( STATUS "CMAKE_SIZEOF_VOID_P (should be 8): " ${CMAKE_SIZEOF_VOID_P} )
if( CMAKE_SIZEOF_VOID_P EQUAL 8 )
Expand All @@ -57,7 +40,6 @@ else()
endif()
MESSAGE( STATUS "CMAKE_CXX_COMPILER_ID: " ${CMAKE_CXX_COMPILER_ID} )
MESSAGE( STATUS "CMAKE_C_COMPILER: " ${CMAKE_C_COMPILER} )
MESSAGE( STATUS "CXX_COMPILER_VERSION: " ${CXX_COMPILER_VERSION} )
if( SUPPORT_SSE42 )
MESSAGE( STATUS "SSE 4.2 support detected" )
else()
Expand All @@ -69,52 +51,28 @@ else()
endif ()
endif()

if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")
# require at least gcc 4.7
if (CXX_COMPILER_VERSION VERSION_LESS 4.7)
message(STATUS "GCC version must be at least 4.7!")
endif()
# Uncomment the following lines to see how the code compiles without AVX,SSE4.2 and/or SSE2
#set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Ofast -lm -DNDEBUG -std=c++11 -DHAVE_CXX0X -march=x86-64")
#set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Ofast -lm -DNDEBUG -std=c++11 -DHAVE_CXX0X -march=core2")
#set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Ofast -lm -DNDEBUG -std=c++11 -DHAVE_CXX0X -msse4.2")
set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wcast-align -Ofast -lm -DNDEBUG -std=c++11 -DHAVE_CXX0X -march=native")
set (CMAKE_CXX_FLAGS_DEBUG "-Wall -Wcast-align -ggdb -lm -std=c++11 -DHAVE_CXX0X -march=native")
set (CMAKE_C_FLAGS_RELEASE "-Wall -Wcast-align -Ofast -lm -DNDEBUG -std=c99 -march=native")
set (CMAKE_C_FLAGS_DEBUG "-Wall -Wcast-align -ggdb -lm -std=c99 -march=native")
elseif(${CMAKE_CXX_COMPILER_ID} STREQUAL "Intel")
if (CXX_COMPILER_VERSION VERSION_LESS 14.0.1)
message(STATUS "Intel version must be at least 14.0.1!")
endif()
set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Ofast -DNDEBUG -std=c++11 -DHAVE_CXX0X -march=native")
set (CMAKE_CXX_FLAGS_DEBUG "-Wall -ggdb -std=c++11 -DHAVE_CXX0X -march=native")
set (CMAKE_C_FLAGS_RELEASE "-Wall -Ofast -DNDEBUG -std=c99 -march=native")
set (CMAKE_C_FLAGS_DEBUG "-Wall -ggdb -std=c99 -march=native")
elseif(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang" OR ${CMAKE_CXX_COMPILER_ID} STREQUAL "AppleClang")
if (CXX_COMPILER_VERSION VERSION_LESS 4.2.1)
message(STATUS "Clang version must be at least 4.2.1!" )
endif()
set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wcast-align -O3 -DNDEBUG -std=c++11 -DHAVE_CXX0X -march=native")
set (CMAKE_CXX_FLAGS_DEBUG "-Wall -Wcast-align -ggdb -std=c++11 -DHAVE_CXX0X -march=native")
set (CMAKE_C_FLAGS_RELEASE "-Wall -Wcast-align -O3 -DNDEBUG -std=c99 -march=native")
set (CMAKE_C_FLAGS_DEBUG "-Wall -Wcast-align -ggdb -std=c99 -march=native")
elseif(WIN32)
# TODO add support for later versions?
if(NOT MSVC12)
message(STATUS "On Windows, only MSVC version 12 is supported!")
endif()
else ()
message(FATAL_ERROR "Please, use GCC, Clang, or the Intel compiler!")


if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang" OR ${CMAKE_CXX_COMPILER_ID} STREQUAL "AppleClang")
add_compile_options(-Wall -Wextra)
endif()
include(CheckCXXCompilerFlag)
unset(FASTPFOR_COMPILER_SUPPORTS_MARCH_NATIVE CACHE)
CHECK_CXX_COMPILER_FLAG(-march=native FASTPFOR_COMPILER_SUPPORTS_MARCH_NATIVE)
if(FASTPFOR_COMPILER_SUPPORTS_MARCH_NATIVE)
add_compile_options(-march=native)
else()
message(STATUS "native target not supported")
endif()


MESSAGE( STATUS "CMAKE_CXX_FLAGS_DEBUG: " ${CMAKE_CXX_FLAGS_DEBUG} )
MESSAGE( STATUS "CMAKE_CXX_FLAGS_RELEASE: " ${CMAKE_CXX_FLAGS_RELEASE} )
MESSAGE( STATUS "CMAKE_C_FLAGS_DEBUG: " ${CMAKE_C_FLAGS_DEBUG} )
MESSAGE( STATUS "CMAKE_C_FLAGS_RELEASE: " ${CMAKE_C_FLAGS_RELEASE} )




# library target
include_directories(headers)
add_library(FastPFOR STATIC
Expand All @@ -129,6 +87,10 @@ add_library(FastPFOR STATIC
src/streamvbyte.c)
set_target_properties(FastPFOR PROPERTIES POSITION_INDEPENDENT_CODE TRUE)

#if (NOT WIN32)
# target_link_libraries(FastPFOR m)
#endif()


# other executables
add_executable(gapstats src/gapstats.cpp)
Expand Down
6 changes: 3 additions & 3 deletions headers/fastpfor.h
Original file line number Diff line number Diff line change
Expand Up @@ -236,11 +236,11 @@ class FastPForImpl {
inexcept += (sizeof(IntType) + sizeof(uint32_t) - 1) / sizeof(uint32_t);
for (uint32_t k = 2; k <= sizeof(IntType) * 8; ++k) {
if ((bitmap & (1ULL << (k - 1))) != 0) {
uint32_t nvalue = *inexcept;
datatobepacked[k].resize((nvalue + PACKSIZE - 1) / PACKSIZE * PACKSIZE);
uint32_t tnvalue = *inexcept;
datatobepacked[k].resize((tnvalue + PACKSIZE - 1) / PACKSIZE * PACKSIZE);
inexcept = packingvector<32>::unpackmetight(
inexcept, datatobepacked[k].data(), datatobepacked[k].size(), k);
datatobepacked[k].resize(nvalue);
datatobepacked[k].resize(tnvalue);
}
}
length = inexcept - initin;
Expand Down
28 changes: 14 additions & 14 deletions headers/simdgroupsimple.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ namespace FastPForLib {
__m128i comprBlock = _mm_load_si128(in++);
for (size_t k = 1; k < n; k++)
comprBlock = _mm_or_si128(comprBlock,
mm_slli_epi32_unrolled(_mm_load_si128(in++), k * b));
mm_slli_epi32_unrolled(_mm_load_si128(in++), (unsigned int)(k * b)));
_mm_store_si128(out++, comprBlock);
}

Expand Down Expand Up @@ -651,7 +651,7 @@ namespace FastPForLib {
const __m128i comprBlock = _mm_load_si128(in++);
for (size_t k = 0; k < n; k++)
_mm_store_si128(out++,
_mm_and_si128(mm_srli_epi32_unrolled(comprBlock, k * b), mask));
_mm_and_si128(mm_srli_epi32_unrolled(comprBlock, (unsigned int)(k * b)), mask));
}

inline static __m128i mm_srli_epi32_unrolled(__m128i comprBlock, unsigned int n) {
Expand Down Expand Up @@ -1170,7 +1170,7 @@ namespace FastPForLib {
// The number of bytes actually used for the selectors area.
const size_t countSelArea8Used = outSelArea8 - initOutSelArea8;
// The total number of selectors.
const int countSels = countSelArea8Used * 2 - (even ? 0 : 1);
const int countSels = int(countSelArea8Used * 2 - (even ? 0 : 1));

// The number of bytes that could be required for the selectors area in the
// worst case.
Expand Down Expand Up @@ -1202,15 +1202,15 @@ namespace FastPForLib {
for (int m = 0; m < countSels - 1; m++) {
const uint8_t i = extractSel(initOutSelArea8, m);
const size_t n = tableNum[i];
comprCompleteBlock(n, in128, outDataArea128);
comprCompleteBlock(uint8_t(n), in128, outDataArea128);
}
if (countQuadsLastBlock)
comprIncompleteBlock(countQuadsLastBlock, in128, outDataArea128);

// Write some meta data to the header.
outHeader32[0] = len;
outHeader32[1] = countSels;
outHeader32[2] = countSelArea8;
outHeader32[0] = uint32_t(len);
outHeader32[1] = uint32_t(countSels);
outHeader32[2] = uint32_t(countSelArea8);

// The position of the last byte written to the output relative to the
// start of the output. Note that the actual number of written bytes might
Expand All @@ -1220,7 +1220,7 @@ namespace FastPForLib {
countSelArea8 + sizeof(uint8_t) + countPadBytes +
(outDataArea128 - initOutDataArea128) * sizeof(__m128i);
// Rounding the number of bytes to full 32-bit integers.
nvalue = div_roundup(nbytes, sizeof(uint32_t));
nvalue = div_roundup(uint32_t(nbytes), sizeof(uint32_t));
}

/**
Expand Down Expand Up @@ -1319,7 +1319,7 @@ namespace FastPForLib {
// not seem to yield any benefit.
} else
// This can only happen for the last block/selector
comprIncompleteBlock(rbSize, in128, outDataArea128_wGap);
comprIncompleteBlock(uint8_t(rbSize), in128, outDataArea128_wGap);
}
if (!even)
// The last used byte in the selectors area was touched, but not finished.
Expand Down Expand Up @@ -1359,9 +1359,9 @@ namespace FastPForLib {
}

// Write some meta data to the header.
outHeader32[0] = len;
outHeader32[1] = countSels;
outHeader32[2] = countSelArea8;
outHeader32[0] = uint32_t(len);
outHeader32[1] = uint32_t(countSels);
outHeader32[2] = uint32_t(countSelArea8);

// The position of the last byte written to the output relative to the
// start of the output. Note that the actual number of written bytes might
Expand All @@ -1371,7 +1371,7 @@ namespace FastPForLib {
countSelArea8 + sizeof(uint8_t) + actualPaddingBytes +
countDataArea128 * sizeof(__m128i);
// Rounding the number of bytes to full 32-bit integers.
nvalue = div_roundup(nbytes, sizeof(uint32_t));
nvalue = div_roundup(uint32_t(nbytes), sizeof(uint32_t));
}

void encodeArray(const uint32_t *in, const size_t len, uint32_t *out,
Expand Down Expand Up @@ -1415,7 +1415,7 @@ namespace FastPForLib {
for (int m = 0; m < countSels - 1; m++) {
const uint8_t i = extractSel(inSelArea8, m);
const size_t n = tableNum[i];
decomprCompleteBlock(n, inDataArea128, out128);
decomprCompleteBlock(uint8_t(n), inDataArea128, out128);
}
const uint8_t countQuadsLastBlock = inSelArea8[countSelArea8Used];
if (countQuadsLastBlock)
Expand Down
4 changes: 2 additions & 2 deletions headers/simple16.h
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ void Simple16<MarkLength>::encodeArray(const uint32_t *in, const size_t length,
NumberOfValuesCoded += base;
} else if (tryme<1, 3, 4, 4, 3, 3>(in, ValuesRemaining)) {
out[0] = 6;
NumberOfValuesCoded = (ValuesRemaining < 1) ? ValuesRemaining : 1;
NumberOfValuesCoded = (ValuesRemaining < 1) ? uint32_t(ValuesRemaining) : 1;
for (uint32_t i = 0; i < NumberOfValuesCoded; i++)
bit_writer(out, *in++, 3);
uint32_t fill = 3 * NumberOfValuesCoded;
Expand Down Expand Up @@ -593,7 +593,7 @@ void Simple16<MarkLength>::encodeArray(const uint32_t *in, const size_t length,
assert(which(out) == 12);
} else if (tryme<1, 10, 2, 9>(in, ValuesRemaining)) {
out[0] = 13;
NumberOfValuesCoded = (ValuesRemaining < 1) ? ValuesRemaining : 1;
NumberOfValuesCoded = (ValuesRemaining < 1) ? uint32_t(ValuesRemaining) : 1;
for (uint32_t i = 0; i < NumberOfValuesCoded; i++)
bit_writer(out, *in++, 10);
const uint32_t base = NumberOfValuesCoded;
Expand Down
2 changes: 1 addition & 1 deletion headers/simple8b.h
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ void Simple8b<MarkLength>::encodeArray(const uint32_t *in, const size_t length,
assert(which(out64) == 14);
} else if (tryme<1, 60>(in, ValuesRemaining)) {
out64[0] = 15;
NumberOfValuesCoded = (ValuesRemaining < 1) ? ValuesRemaining : 1;
NumberOfValuesCoded = (ValuesRemaining < 1) ? uint32_t(ValuesRemaining) : 1;
for (uint32_t i = 0; i < NumberOfValuesCoded; i++)
bit_writer(out64, *in++, 60);
out64[0] <<= 64 - SIMPLE8B_LOGDESC - 60 * NumberOfValuesCoded;
Expand Down
4 changes: 2 additions & 2 deletions headers/simple8b_rle.h
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ template <bool MarkLength> class Simple8b_RLE : public IntegerCODEC {
// this may lead to unaligned access. Performance may be affected.
// not much of an effect in practice on recent Intel processors.
uint64_t *out64 = reinterpret_cast<uint64_t *>(out);
auto count = Simple8b_Codec::Compress(in, 0, length, out64, 0);
auto count = Simple8b_Codec::Compress(in, 0, uint32_t(length), out64, 0);
nvalue = count * 2;
}

Expand Down Expand Up @@ -296,7 +296,7 @@ template <bool MarkLength> class Simple8b_RLE : public IntegerCODEC {

uint32_t pos = 0;

pos = Simple8b_Codec::Decompress(in64, 0, out, 0, nvalue);
pos = Simple8b_Codec::Decompress(in64, 0, out, 0, uint32_t(nvalue));

assert(in64 + pos <= finalin64);
in = reinterpret_cast<const uint32_t *>(in64 + pos);
Expand Down
22 changes: 11 additions & 11 deletions headers/simple9.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class Simple9 : public IntegerCODEC {
}
template <uint32_t num1, uint32_t log1>
static bool tryme(const uint32_t *n, size_t len) {
const uint32_t min = (len < num1) ? len : num1;
const uint32_t min = (len < num1) ? uint32_t(len) : num1;
for (uint32_t i = 0; i < min; i++) {
if ((n[i]) >= (1U << log1))
return false;
Expand Down Expand Up @@ -91,7 +91,7 @@ void Simple9<MarkLength, hacked>::encodeArray(const uint32_t *in,
uint32_t NumberOfValuesCoded;
const uint32_t *const initout(out);
if (MarkLength)
*(out++) = length;
*(out++) = uint32_t(length);
size_t ValuesRemaining(length);
// precompute
const bool becareful = false;
Expand Down Expand Up @@ -187,12 +187,12 @@ void Simple9<MarkLength, hacked>::encodeArray(const uint32_t *in,
while (ValuesRemaining > 0) {
if (hacked && trymefull<28, 0>(in)) {
out[0] = 9 << (32 - SIMPLE9_LOGDESC);
NumberOfValuesCoded = (ValuesRemaining < 28) ? ValuesRemaining : 28;
NumberOfValuesCoded = (ValuesRemaining < 28) ? uint32_t(ValuesRemaining) : 28;
in += NumberOfValuesCoded;
} else if (tryme<28, 1>(in, ValuesRemaining)) {
out[0] = 0;

NumberOfValuesCoded = (ValuesRemaining < 28) ? ValuesRemaining : 28;
NumberOfValuesCoded = (ValuesRemaining < 28) ? uint32_t(ValuesRemaining) : 28;
for (uint32_t i = 0; i < NumberOfValuesCoded; i++)
bit_writer(out, *in++, 1);
*out <<= 28 - NumberOfValuesCoded;
Expand All @@ -201,15 +201,15 @@ void Simple9<MarkLength, hacked>::encodeArray(const uint32_t *in,
} else if (tryme<14, 2>(in, ValuesRemaining)) {
out[0] = 1;

NumberOfValuesCoded = (ValuesRemaining < 14) ? ValuesRemaining : 14;
NumberOfValuesCoded = (ValuesRemaining < 14) ? uint32_t(ValuesRemaining) : 14;
for (uint32_t i = 0; i < NumberOfValuesCoded; i++)
bit_writer(out, *in++, 2);
*out <<= 28 - 2 * NumberOfValuesCoded;
if (becareful)
assert(which(out) == 1);
} else if (tryme<9, 3>(in, ValuesRemaining)) {
out[0] = 2;
NumberOfValuesCoded = (ValuesRemaining < 9) ? ValuesRemaining : 9;
NumberOfValuesCoded = (ValuesRemaining < 9) ? uint32_t(ValuesRemaining) : 9;
for (uint32_t i = 0; i < NumberOfValuesCoded; i++)
bit_writer(out, *in++, 3);
*out <<= 28 - 3 * NumberOfValuesCoded;
Expand All @@ -218,7 +218,7 @@ void Simple9<MarkLength, hacked>::encodeArray(const uint32_t *in,
} else if (tryme<7, 4>(in, ValuesRemaining)) {
out[0] = 3;

NumberOfValuesCoded = (ValuesRemaining < 7) ? ValuesRemaining : 7;
NumberOfValuesCoded = (ValuesRemaining < 7) ? uint32_t(ValuesRemaining) : 7;
for (uint32_t i = 0; i < NumberOfValuesCoded; i++)
bit_writer(out, *in++, 4);
*out <<= 28 - 4 * NumberOfValuesCoded;
Expand All @@ -227,15 +227,15 @@ void Simple9<MarkLength, hacked>::encodeArray(const uint32_t *in,
} else if (tryme<5, 5>(in, ValuesRemaining)) {
out[0] = 4;

NumberOfValuesCoded = (ValuesRemaining < 5) ? ValuesRemaining : 5;
NumberOfValuesCoded = (ValuesRemaining < 5) ? uint32_t(ValuesRemaining) : 5;
for (uint32_t i = 0; i < NumberOfValuesCoded; i++)
bit_writer(out, *in++, 5);
*out <<= 28 - 5 * NumberOfValuesCoded;
if (becareful)
assert(which(out) == 4);
} else if (tryme<4, 7>(in, ValuesRemaining)) {
out[0] = 5;
NumberOfValuesCoded = (ValuesRemaining < 4) ? ValuesRemaining : 4;
NumberOfValuesCoded = (ValuesRemaining < 4) ? uint32_t(ValuesRemaining) : 4;
for (uint32_t i = 0; i < NumberOfValuesCoded; i++)
bit_writer(out, *in++, 7);

Expand All @@ -244,15 +244,15 @@ void Simple9<MarkLength, hacked>::encodeArray(const uint32_t *in,
assert(which(out) == 5);
} else if (tryme<3, 9>(in, ValuesRemaining)) {
out[0] = 6;
NumberOfValuesCoded = (ValuesRemaining < 3) ? ValuesRemaining : 3;
NumberOfValuesCoded = (ValuesRemaining < 3) ? uint32_t(ValuesRemaining) : 3;
for (uint32_t i = 0; i < NumberOfValuesCoded; i++)
bit_writer(out, *in++, 9);
*out <<= 28 - 9 * NumberOfValuesCoded;
if (becareful)
assert(which(out) == 6);
} else if (tryme<2, 14>(in, ValuesRemaining)) {
out[0] = 7;
NumberOfValuesCoded = (ValuesRemaining < 2) ? ValuesRemaining : 2;
NumberOfValuesCoded = (ValuesRemaining < 2) ? uint32_t(ValuesRemaining) : 2;
for (uint32_t i = 0; i < NumberOfValuesCoded; i++)
bit_writer(out, *in++, 14);
*out <<= 28 - 14 * NumberOfValuesCoded;
Expand Down
4 changes: 2 additions & 2 deletions headers/simple9_rle.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ template <bool MarkLength> class Simple9_RLE : public IntegerCODEC {
if (MarkLength) {
*out++ = static_cast<uint32_t>(length);
}
auto count = Simple9_Codec::Compress(input, 0, length, out, 0);
auto count = Simple9_Codec::Compress(input, 0, uint32_t(length), out, 0);
nvalue = count;
}

Expand All @@ -289,7 +289,7 @@ template <bool MarkLength> class Simple9_RLE : public IntegerCODEC {
fprintf(stderr, "possible overrun\n");
}
auto count = actualvalue;
Simple9_Codec::Decompress(input, 0, out, 0, count);
Simple9_Codec::Decompress(input, 0, out, 0, uint32_t(count));
nvalue = MarkLength ? actualvalue : count;
input += count;
return input;
Expand Down
Loading