Skip to content

Commit

Permalink
Merge branch 'master' into backup3
Browse files Browse the repository at this point in the history
  • Loading branch information
kimwalisch committed Mar 5, 2021
2 parents 1ebd2a4 + 4e02809 commit 259f841
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 30 deletions.
3 changes: 2 additions & 1 deletion ChangeLog
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Changes in primecount-6.3, 2021-03-01
Changes in primecount-6.3, 2021-03-02

The memory usage of the PiTable and SegmentedPiTable has been
reduced by about 2x, this speeds up the computation of the easy
Expand All @@ -11,6 +11,7 @@ Changes in primecount-6.3, 2021-03-01

* PiTable.cpp: Reduce memory usage by 2x.
* SegmentedPiTable.cpp: Reduce memory usage by 2x.
* Sieve.cpp: Reduce memory usage of counters array by 2x.
* phi.cpp: Fixed integer overflow #39.
* phi.cpp: Cache 40x more phi(x, a) results.
* phi.cpp: Use pi(x) if a > pi(sqrt(x)).
Expand Down
2 changes: 1 addition & 1 deletion include/Sieve.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ class Sieve
uint64_t counters_dist_log2_ = 0;
uint64_t counters_stop_ = 0;
pod_vector<uint8_t> sieve_;
pod_vector<uint64_t> counters_;
pod_vector<uint32_t> counters_;
std::vector<Wheel> wheel_;
};

Expand Down
26 changes: 16 additions & 10 deletions src/Sieve.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ struct WheelInit
/// Categorize sieving primes according to their modulo 30
/// congruence class { 1, 7, 11, 13, 17, 19, 23, 29 }.
///
const array<int, 30> wheel_offsets =
const array<uint8_t, 30> wheel_offsets =
{
0, 8 * 0, 0, 0, 0, 0,
0, 8 * 1, 0, 0, 0, 8 * 2,
Expand All @@ -73,15 +73,15 @@ const array<int, 30> wheel_offsets =
/// Used to calculate the first multiple > start of a
/// sieving prime that is coprime to 2, 3, 5.
///
const WheelInit wheel_init[30] =
{
const array<WheelInit, 30> wheel_init
{{
{1, 0}, {0, 0}, {5, 1}, {4, 1}, {3, 1},
{2, 1}, {1, 1}, {0, 1}, {3, 2}, {2, 2},
{1, 2}, {0, 2}, {1, 3}, {0, 3}, {3, 4},
{2, 4}, {1, 4}, {0, 4}, {1, 5}, {0, 5},
{3, 6}, {2, 6}, {1, 6}, {0, 6}, {5, 7},
{4, 7}, {3, 7}, {2, 7}, {1, 7}, {0, 7}
};
}};

} // namespace

Expand Down Expand Up @@ -120,7 +120,7 @@ Sieve::Sieve(uint64_t low,
///
void Sieve::allocate_counters(uint64_t low)
{
double average_leaf_dist = sqrt((double) low);
double average_leaf_dist = sqrt(low);
double counters_dist = sqrt(average_leaf_dist);

// Here we balance counting with the counters array and
Expand All @@ -137,11 +137,17 @@ void Sieve::allocate_counters(uint64_t low)
uint64_t byte_dist = counters_dist_ / 30;
byte_dist = max(byte_dist, 64);
byte_dist = next_power_of_2(byte_dist);
counters_dist_ = byte_dist * 30;
counters_dist_log2_ = ilog2(byte_dist);

// Make sure the counters (32-bit) don't overflow.
// This can never happen since each counters array element
// only counts the number of unsieved elements (1 bits) in
// an interval of size: sieve_limit^(1/4) * sqrt(240).
// Hence the max(counter value) = 2^18.
assert(byte_dist * 8 <= numeric_limits<uint32_t>::max());
uint64_t counters_size = ceil_div(sieve_.size(), byte_dist);
counters_.resize(counters_size);
counters_dist_ = byte_dist * 30;
counters_dist_log2_ = ilog2(byte_dist);
}

/// The segment size is sieve.size() * 30 as each
Expand Down Expand Up @@ -204,9 +210,9 @@ void Sieve::init_counters(uint64_t low, uint64_t high)
stop = min(stop, max_stop);
uint64_t cnt = count(start, stop);
uint64_t byte_index = start / 30;
uint64_t i = byte_index >> counters_dist_log2_;

counters_[byte_index >> counters_dist_log2_] = cnt;

counters_[i] = (uint32_t) cnt;
total_count_ += cnt;
start += counters_dist_;
}
Expand Down Expand Up @@ -541,7 +547,7 @@ void Sieve::cross_off_count(uint64_t prime, uint64_t i)
uint64_t total_count = total_count_;
uint64_t counters_dist_log2 = counters_dist_log2_;
uint64_t sieve_size = sieve_.size();
uint64_t* counters = counters_.data();
uint32_t* counters = counters_.data();
uint8_t* sieve = sieve_.data();

#define CHECK_FINISHED(wheel_index) \
Expand Down
11 changes: 7 additions & 4 deletions src/deleglise-rivat/S2_hard.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
/// method, Revista do DETUA, vol. 4, no. 6, March 2006,
/// pp. 759-768.
///
/// Copyright (C) 2020 Kim Walisch, <[email protected]>
/// Copyright (C) 2021 Kim Walisch, <[email protected]>
///
/// This file is distributed under the BSD License. See the COPYING
/// file in the top level directory.
Expand Down Expand Up @@ -71,12 +71,13 @@ T S2_hard_thread(T x,
T sum = 0;

int64_t low = thread.low;
int64_t low1 = max(low, 1);
int64_t segments = thread.segments;
int64_t segment_size = thread.segment_size;
int64_t pi_sqrty = pi[isqrt(y)];
int64_t low1 = max(low, 1);
int64_t limit = min(low + segments * segment_size, z);
int64_t max_b = pi[min3(isqrt(x / low1), isqrt(z), y)];
int64_t pi_sqrty = pi[isqrt(y)];
int64_t max_b = (limit <= y) ? pi_sqrty
: pi[min3(isqrt(x / low1), isqrt(z), y)];
int64_t min_b = pi[min(z / limit, primes[max_b])];
min_b = max(c, min_b) + 1;

Expand All @@ -94,6 +95,8 @@ T S2_hard_thread(T x,
int64_t high = min(low + segment_size, limit);
low1 = max(low, 1);

// For b < min_b there are no special leaves:
// low <= x / (primes[b] * m) < high
sieve.pre_sieve(primes, min_b - 1, low, high);
int64_t b = min_b;

Expand Down
8 changes: 4 additions & 4 deletions src/gourdon/D.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
/// compressed lookup table of moebius function values,
/// least prime factors and max prime factors.
///
/// Copyright (C) 2020 Kim Walisch, <[email protected]>
/// Copyright (C) 2021 Kim Walisch, <[email protected]>
///
/// This file is distributed under the BSD License. See the COPYING
/// file in the top level directory.
Expand Down Expand Up @@ -61,10 +61,10 @@ T D_thread(T x,
T sum = 0;

int64_t low = thread.low;
int64_t low1 = max(low, 1);
int64_t segments = thread.segments;
int64_t segment_size = thread.segment_size;
int64_t pi_sqrtz = pi[isqrt(z)];
int64_t low1 = max(low, 1);
int64_t limit = min(low + segments * segment_size, xz);
int64_t max_b = pi[min3(isqrt(x / low1), isqrt(limit), x_star)];
int64_t min_b = pi[min(xz / limit, x_star)];
Expand All @@ -84,8 +84,8 @@ T D_thread(T x,
int64_t high = min(low + segment_size, limit);
low1 = max(low, 1);

// For i < min_b there are no special leaves:
// low <= x / (primes[i] * m) < high
// For b < min_b there are no special leaves:
// low <= x / (primes[b] * m) < high
sieve.pre_sieve(primes, min_b - 1, low, high);
int64_t b = min_b;

Expand Down
16 changes: 10 additions & 6 deletions src/lmo/pi_lmo_parallel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
/// method, Revista do DETUA, vol. 4, no. 6, March 2006,
/// pp. 759-768.
///
/// Copyright (C) 2020 Kim Walisch, <[email protected]>
/// Copyright (C) 2021 Kim Walisch, <[email protected]>
///
/// This file is distributed under the BSD License. See the COPYING
/// file in the top level directory.
Expand Down Expand Up @@ -59,14 +59,16 @@ int64_t S2_thread(int64_t x,
{
int64_t sum = 0;
int64_t low = thread.low;
int64_t low1 = max(low, 1);
int64_t segments = thread.segments;
int64_t segment_size = thread.segment_size;
int64_t low1 = max(low, 1);
int64_t pi_sqrty = pi[isqrt(y)];
int64_t limit = min(low + segments * segment_size, z + 1);
int64_t max_b = pi[min(isqrt(x / low1), y - 1)];
int64_t pi_sqrty = pi[isqrt(y)];
int64_t min_b = pi[min(z / limit, primes[max_b])];
min_b = max(c, min_b) + 1;

if (c >= max_b)
if (min_b > max_b)
return 0;

Sieve sieve(low, segment_size, max_b);
Expand All @@ -80,8 +82,10 @@ int64_t S2_thread(int64_t x,
int64_t high = min(low + segment_size, limit);
low1 = max(low, 1);

sieve.pre_sieve(primes, c, low, high);
int64_t b = c + 1;
// For b < min_b there are no special leaves:
// low <= x / (primes[b] * m) < high
sieve.pre_sieve(primes, min_b - 1, low, high);
int64_t b = min_b;

// For c + 1 <= b <= pi_sqrty
// Find all special leaves in the current segment that are
Expand Down
8 changes: 4 additions & 4 deletions src/mpi/D_mpi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
/// algorithm) that has been distributed using MPI (Message
/// Passing Interface) and multi-threaded using OpenMP.
///
/// Copyright (C) 2020 Kim Walisch, <[email protected]>
/// Copyright (C) 2021 Kim Walisch, <[email protected]>
///
/// This file is distributed under the BSD License. See the COPYING
/// file in the top level directory.
Expand Down Expand Up @@ -52,10 +52,10 @@ T D_thread(T x,
T sum = 0;

int64_t low = thread.low;
int64_t low1 = max(low, 1);
int64_t segments = thread.segments;
int64_t segment_size = thread.segment_size;
int64_t pi_sqrtz = pi[isqrt(z)];
int64_t low1 = max(low, 1);
int64_t limit = min(low + segments * segment_size, xz);
int64_t max_b = pi[min3(isqrt(x / low1), isqrt(limit), x_star)];
int64_t min_b = pi[min(xz / limit, x_star)];
Expand All @@ -75,8 +75,8 @@ T D_thread(T x,
int64_t high = min(low + segment_size, limit);
low1 = max(low, 1);

// For i < min_b there are no special leaves:
// low <= x / (primes[i] * m) < high
// For b < min_b there are no special leaves:
// low <= x / (primes[b] * m) < high
sieve.pre_sieve(primes, min_b - 1, low, high);
int64_t b = min_b;

Expand Down

0 comments on commit 259f841

Please sign in to comment.