# Sieb des Eratosthenes mit Heap und Hilfsarray

• From: Bernhard Helmes <rom@xxxxxxxxxx>
• Date: Sat, 12 Sep 2009 11:39:04 -0700 (PDT)

Einen erfreulichen, guten Abend

anbei ein Quelltext mit Dokumentation für eine Implementation für das
Sieb des Eratosthenes.
Das Schöne daran ist, daß die Datenstruktur auf einem Heap und einem
zyklischen Hilfsarray basiert.

Ich würde mich freuen, wenn jemand mit einem anderen Prozesor > 128 kb
First level cache die Runtime überprüft.

Schöne Grüße von den Primzahlen
Bernhard

//
***************************************************************************
/*
Algorithm : Bernhard Helmes
Implementation : Jasper Neumann
Documentation : Andrea Engel

Version : V 0.1 from Thu 10.09.2009 18:00 p.m.

Sieve of Eratosthenes

------------------------

The algorithm finds all primes p up to n=10^12.

1. Heap-Structure:
------------------

Every prime up to the square of the maximum of n Element N is insert
in the heap.
The heap is sorted, the smallest number is the first element.
There is no need for the complete list of 3, 5, 7 - n_max
Using a heap, which is placed in conventional RAM, is unfortunately
slow.
So we are using a help array instead, which is placed and fits into
the first or second level CPU-cache,
to speed up our calculations.

2. Help Array in the second Level Cache:
----------------------------------------

The byte size of the help array must be a power of 2.
We are using a ring array where the start pointer
moves cyclic through the array.
All stored numbers remain at the same byte positions in this array to
further speed up the program execution.

3. Recommended procedure to calculate the primes p < 10^18 with MPI:
-------------------------------------------------------------------

a) calculation of the primes for the heap.
b) distribution of the heap for the nodes and calculation
for the startpoints of the heap
c) run the program on the nodes
d) there is no need for a fast inter-process-communication, the heap
content is
only distributed one time in the beginning of the program
execution.

Note: MPI is currently not implementated.

4. Libs
-------

Necessary libs : iostream (Standard Ansi-C++ I/O libary.)
math.h (Standard Ansi-C mathematical libary.)

5. Results / Run time performance
---------------------------------

a) Test results for Computer RedPelican:

Hardware : CPU AMD 64 bit 2*4000+, 8 gbyte RAM, 128 kbyte first
level CPU cache
Compiler : GNU g++ V 3.41
OS-system : PelicanHPC

Runtime
time : .------------------------------------------------------------------.
! Run number: ! max_a ! max_a1= max_a / 2^4 !
Runtime Real User !
.....................................................................
! 1 ! 2^19 !
2^15 ! 504m !
.....................................................................
! 2 ! 2^20 !
2^16 ! 370m !
.....................................................................
! 2 ! 2^21 !
2^17 ! 439m 273m !
.................................................................
! 3 ! 2^22 !
2^18 ! 388m 223m !
.....................................................................

-------------

Modul name : p9.cpp
Modul task : Defines the entry point for the console application.
*/
//
***************************************************************************

#include<iostream>

#include<math.h>

// Define size of Array !!!
//===========================

#define max_a (1<<19)

// max_a1=max_a>>4
// max_a1 is the help array

//===========================

#define sqr(x) ((x)*(x))

#define max_heap 1000000

#define max_limit 1000000

#define max_x sqr((unsigned long long)(max_limit))

using namespace std;

/*@/// struct to_bits */

// // bytes

// #define buf_size (max_a)

// #define max_a1 (max_a-1)

//

// struct to_bits {

// char buf[buf_size];

// void init() {memset(&buf,0, buf_size);}

// bool get(int i) {return buf[i & max_a1]!=0;}

// void set0(int i) {buf[i & max_a1]=0;}

// void set1(int i) {buf[i & max_a1]=1;}

// };

// // bits

// #define buf_size (max_a>>3)

// #define max_a1 (buf_size-1)

//

// struct to_bits {

// char buf[buf_size];

// void init() {

// memset(&buf,0, buf_size);

// }

// bool get(int i) {

// return ( buf[(i>>3) & max_a1] & (1<<(i&7)) ) != 0;

// }

// void set0(int i) {

// buf[(i>>3) & max_a1] &= ~(1<<(i&7));

// }

// void set1(int i) {

// buf[(i>>3) & max_a1] |= (1<<(i&7));

// }

// };

// bits for odd numbers

// array size is defined !!!
//==========================
#define buf_size (max_a>>4)

#define max_a1 (buf_size-1)

struct to_bits {

char buf[buf_size];

void init() {

memset(&buf,0, buf_size);

}

bool get(int i) {

return ( buf[(i>>4) & max_a1] & (1<<((i>>1)&7)) ) != 0;

}

void set0(int i) {

buf[(i>>4) & max_a1] &= ~(1<<((i>>1)&7));

}

void set1(int i) {

buf[(i>>4) & max_a1] |= (1<<((i>>1)&7));

}

};

/*@\\\000C002B25002B3400312C+FE81*/

/*@/// struct tle_node */

struct tle_node {

public:

unsigned long long x,t;

// bool operator<(tle_node& b) {

// return x<b.x;

// }

};

/*@\\\+352F*/

typedef tle_node T;

// template <class T>

/*@/// void exch(T& a, T& b) */

void exch(T& a, T& b) {

T tmp;

tmp=a;

a=b;

b=tmp;

}

/*@\\\+BBFE*/

// template <class T>

/*@/// void fixdown(T a[], int idx, int count) */

void fixdown(T a[], int idx, int count) {

while (true) {

int j=idx*2+1;

if (j>=count)

break;

// if (j<count-1 && a[j+1]<a[j])

if (j<count-1 && a[j+1].x<a[j].x)

j++;

// if (!(a[j]<a[idx]))

if (!(a[j].x<a[idx].x))

break;

exch(a[idx],a[j]);

idx=j;

}

}

/*@\\\+E9F3*/

// template <class T>

/*@/// void fixup(T a[], int idx) */
//
****************************************************************************

void fixup(T a[], int idx) {

int n;

while (idx>0) {

n=(idx-1) >> 1;

// if (a[n]<a[idx])

if (a[n].x<a[idx].x)

break;

exch(a[n],a[idx]);

idx=n;

}

}
//
****************************************************************************

/*@\\\+5828*/

// template <class T>

/*@/// class PQ */

class PQ {

public:

T* pq;

int count;

PQ(int max) {

pq=new T[max_heap];

count=0;

}

int empty() const {

return count==0;

}

void include(T& q) {

pq[count]=q;

count++;

fixup(pq,count-1);

}

T& first() {

return pq[0];

}

void delete_first() {

exch(pq[0],pq[count-1]);

count--;

fixdown(pq,0,count);

}

};

/*@\\\+206F*/

// Anzahl!

unsigned long long max_count;

PQ* l_node;

/*@/// void insert(unsigned long long _x, unsigned long long _t) { */

//
****************************************************************************
void insert(unsigned long long _x, unsigned long long _t) {

tle_node le_node;

le_node.x=_x;

le_node.t=_t;

l_node->include(le_node);

}
//
****************************************************************************

/*@\\\+909B*/

#define min(a,b) ((a<b)?a:b)

/*@/// int main(int argc, char* argv[]) */

//
****************************************************************************
//***** Function main ***** Vers.: 0.1 *** Date:
09.09.09 *****
//
****************************************************************************
int main(int argc, char* argv[])

{

struct to_bits bits;

unsigned long long p,x, anz_aus=10;
unsigned long long anz=1, anz_m4_1=0, anz_m4_3=0;
unsigned long long anz_m8_1=0, anz_m8_3=0, anz_m8_5=0, anz_m8_7=0;
unsigned long long next;

l_node=new PQ(max_heap);

// printf ("Size of heap = %n\n", sizeof(bits.buf)); ???

tle_node le_node_found;

bits.init();

insert(0xffffffffffffffffull,0); // dummy

max_count=0;

x=3;

le_node_found=l_node->first();

do {

// p=x;

/* if ((((int)(x))&0xfffff)==1)

{

cout << x << " \r";

}

*/ if (le_node_found.x==x) {

/*@/// // Treffer */

// Treffer

next=min(max_x,x+max_a);

do {

l_node->delete_first();

while (true) {

le_node_found.x+=le_node_found.t;

if (le_node_found.x>=next)

break;

bits.set1(le_node_found.x);

};

if (le_node_found.x<max_x)

l_node->include(le_node_found);

le_node_found=l_node->first();

} while (le_node_found.x==x);

/*@\\\0000000B01+AF0C*/

bits.set0(x);

} else if (!bits.get(x)) {

/*@/// // Kein Treffer */

// Kein Treffer

anz++;

/*

// This was a try to examine the primes mod 4 = 1, mod 4 = 3, mod 8 =
1, mod 8 = 3, etc
if ((x&0x00002)==2)
{
anz_m4_3++;
if ((x&0x00004)==4)
{
anz_m8_7++;
}
else
{
anz_m8_3++;
}
}
else
{
anz_m4_1++;
if ((x&0x00004)==4)
{
anz_m8_5++;
}
else
{
anz_m8_1++;
}
}
*/
if (x<max_limit)

{

insert(x*x,x*2);

}

/*@\\\0000000602+4A43*/

le_node_found=l_node->first();

} else {

bits.set0(x);

}

x+=2;

if (x>anz_aus)

{

anz_aus*=10;

cout << anz << " " << anz_m4_1 << " " << anz_m4_3 << " "
<< anz_m8_1 << " " << anz_m8_3<< " " << anz_m8_5 << " " <<
anz_m8_7<< " \n";

}

} while (x<=max_x);

}
//
***************************************************************************

/*@\\\+1D1D*/

/*@\\\0033000A09000A12003421000011003421*/

.