Welcome to the MacNN Forums.

fhoubi · Apr 4, 2001, 06:13 PM

Just today I found a c(++) compiler in OS-X. YES!!!

Just type c++ in a terminal window. Maybe you have to install the developer-cd, I'm not sure.

Time to test a game sollution program I wrote for about 7 years. It solves the (I think the name of the game is) peggie puzzle, where 32 pieces are placed on a '+' shaped board, and you have to end your last move just in the middle of the board, leaving only one piece.

It does only recursive calls upto 31 plys deep, it needs 7.6 million iterations to find the first solution, then the program ends with the first solution found.

So, entering:

c++ peggie.c
no errors! A program is born!
Next

./a.out

to run the program. O Yes, press 7 & <enter> and wait.

50 seconds on my iMac 233 Rev. A 192 MB with ONLY 1 terminal window running, nothing else.

Sounds great? Well..., on my PC pentium 1 look-a-like, an AMD K6-200 running Windows 95 still containing the source & the program does it in 40 seconds... I do not know anymore which compiler I used, but the exe-file contains "TURBO-C 1988", so I guess it was an old Borland C compiler.

Is the OS-X compiler not the state of the art? Is c++ the compiler which apple uses? Is it the "gnu compiler"? Is it the PCC architecture? Note, the program does not do any calculations, just "decisions"...

Please give some feedback how quick the program runs on faster G3 & G4's.

For wo is interested, the complete source looks like this...

(sorry to put the source in here, but i thought it was small enough)

/* PEGGIE.C */

#include <stdio.h>
//#include <process.h>

enum choices { EMPTY, OCCUPIED, OFF, UP, DOWN, LEFT, RIGHT, START };

int board[12][12];
int ply = 0;
int pieces;
double iter = 0.0;

struct {
int r, k;
int jump;
} move[40];

/* FUNCTION PROTOTYPES */

//void main(int, char **);
//void main(int argc, char *argv[])
void Do_move(int, int, int);
void Do_move_back(int, int, int);
void Init_board(void);
void Print_board(void);
void Count_pieces(void);
void Fill_board(void);

void Init_board(void)
{
int r, k;

for (r = 1; r <= 11; r++)
for (k = 1; k <= 11; k ++)
board[r][k] = OFF;

for (r = 3; r <= 9; r++)
for (k = 5; k <= 7; k ++)
board[r][k] = EMPTY;

for (r = 5; r <= 7; r++)
for (k = 3; k <= 9; k ++)
board[r][k] = EMPTY;
}

void Count_pieces(void)
{
int r, k;

pieces = 0;
for (r = 3; r <= 9; r++)
for (k = 3; k <= 9; k++)
if (board[r][k] == OCCUPIED)
pieces++;
}

void Print_board(void)
{
int r, k;

printf(" 3456789 Ply:%2d pieces:%2d\n", ply, pieces);
printf(" /-------\n");
for (r = 3; r <= 9; r++) {
printf("%d|", r);
for (k = 3; k <= 9; k++)
switch (board[r][k]) {
case OFF:
printf("=");
break;
case EMPTY:
printf(".");
break;
case OCCUPIED:
printf("O");
break;
}
printf("\n");
}
printf("\n");
}

void Fill_board(void)
{
int r, k;
int keuze;

keuze = getchar() - '0';
switch (keuze) {
case 1:
board[4][6] = board[5][6] = board[6][6] = board[7][6] = OCCUPIED;
board[5][5] = board[5][7] = OCCUPIED;
break;
case 2:
board[4][6] = board[5][6] = board[7][6] = board[8][6] = OCCUPIED;
board[6][4] = board[6][5] = board[6][7] = board[6][8] = OCCUPIED;
board[6][6] = OCCUPIED;
break;
case 3:
board[3][5] = board[3][6] = board[3][7] = OCCUPIED;
board[4][5] = board[4][6] = board[4][7] = OCCUPIED;
board[5][5] = board[5][6] = board[5][7] = OCCUPIED;
board[6][5] = board[6][7] = OCCUPIED;
break;
case 4:
board[3][6] = OCCUPIED;
board[4][5] = board[4][6] = board[4][7] = OCCUPIED;
board[5][4] = board[5][5] = board[5][6] = OCCUPIED;
board[5][7] = board[5][8] = OCCUPIED;
board[6][6] = OCCUPIED;
board[7][6] = OCCUPIED;
board[8][5] = board[8][6] = board[8][7] = OCCUPIED;
board[9][5] = board[9][6] = board[9][7] = OCCUPIED;
break;
case 5:
board[4][6] = OCCUPIED;
for (k = 5; k <= 7; k++) board[5][k] = OCCUPIED;
for (k = 4; k <= 8; k++) board[6][k] = OCCUPIED;
for (k = 3; k <= 9; k++) board[7][k] = OCCUPIED;
break;
case 6:
board[3][6] = OCCUPIED;
for (k = 5; k <= 7; k++) board[4][k] = OCCUPIED;
for (k = 4; k <= 8; k++) board[5][k] = OCCUPIED;
for (k = 3; k <= 9; k++) board[6][k] = OCCUPIED;
for (k = 4; k <= 8; k++) board[7][k] = OCCUPIED;
for (k = 5; k <= 7; k++) board[8][k] = OCCUPIED;
board[9][6] = OCCUPIED;
board[6][6] = EMPTY;
break;
case 7:
for (r = 3; r <= 9; r++)
for (k = 5; k <= 7; k++)
board[r][k] = OCCUPIED;
for (r = 5; r <= 7; r++)
for (k = 3; k <= 9; k++)
board[r][k] = OCCUPIED;
board[6][6] = EMPTY;
break;
}
}

void Do_move(int zr, int zk, int jump)
{
int r, k;

if (jump != START) {
switch (jump) {
case UP:
board[zr][zk] = EMPTY;
board[zr-1][zk] = EMPTY;
board[zr-2][zk] = OCCUPIED;
break;
case DOWN:
board[zr][zk] = EMPTY;
board[zr+1][zk] = EMPTY;
board[zr+2][zk] = OCCUPIED;
break;
case LEFT:
board[zr][zk] = EMPTY;
board[zr][zk-1] = EMPTY;
board[zr][zk-2] = OCCUPIED;
break;
case RIGHT:
board[zr][zk] = EMPTY;
board[zr][zk+1] = EMPTY;
board[zr][zk+2] = OCCUPIED;
break;
}
ply++;
pieces--;
iter++;
move[ply].r = zr;
move[ply].k = zk;
move[ply].jump = jump;
}

if (pieces == 1 && board[6][6] == OCCUPIED) {
int n;

for (n = 1; n <= ply; n++) {
printf("move:%3d) %d%d ", n, move[n].r, move[n].k);
switch (move[n].jump) {
case UP:
printf("UP\n");
break;
case DOWN:
printf("DOWN\n");
break;
case LEFT:
printf("LEFT\n");
break;
case RIGHT:
printf("RIGHT\n");
break;
}
}
printf("Iter: %10.0f\n", iter);
exit(0);
}
/* Print_board(); */

for (r = 3; r <= 9; r++)
for (k = 3; k <= 9; k++)
if (board[r][k] == OCCUPIED) {
if (board[r-1][k] == OCCUPIED && board[r-2][k] == EMPTY) {
Do_move(r, k, UP);
Do_move_back(r, k, UP);
}
if (board[r][k+1] == OCCUPIED && board[r][k+2] == EMPTY) {
Do_move(r, k, RIGHT);
Do_move_back(r, k, RIGHT);
}
if (board[r+1][k] == OCCUPIED && board[r+2][k] == EMPTY) {
Do_move(r, k, DOWN);
Do_move_back(r, k, DOWN);
}
if (board[r][k-1] == OCCUPIED && board[r][k-2] == EMPTY) {
Do_move(r, k, LEFT);
Do_move_back(r, k, LEFT);
}
}
}

void Do_move_back(int zr, int zk, int jump)
{
switch (jump) {
case UP:
board[zr][zk] = OCCUPIED;
board[zr-1][zk] = OCCUPIED;
board[zr-2][zk] = EMPTY;
break;
case DOWN:
board[zr][zk] = OCCUPIED;
board[zr+1][zk] = OCCUPIED;
board[zr+2][zk] = EMPTY;
break;
case LEFT:
board[zr][zk] = OCCUPIED;
board[zr][zk-1] = OCCUPIED;
board[zr][zk-2] = EMPTY;
break;
case RIGHT:
board[zr][zk] = OCCUPIED;
board[zr][zk+1] = OCCUPIED;
board[zr][zk+2] = EMPTY;
break;
}
ply--;
pieces++;
}

void main(int argc, char *argv[])
{
Init_board();
Fill_board();
Count_pieces();
Print_board();
Do_move(0, 0, START);
}

mudmonkey · Apr 4, 2001, 06:32 PM

First, your code is just 'c', no need for the C++ compiler.

I tried it on my G4/450 with A LOT of stuff open and running.

I ran it with time doing a measurement. It reported:

% time ./a.out
26.770u 0.220s 0:44.47 60.6% 0+0k 0+1io 0pf+0w

Meaning 26.77 seconds of user time (clock). This includes the pause while it waits for my to hit 7 and return.

I recompiled it with the -O2 option for better optimization:

% time ./a.out
11.910u 0.070s 0:28.86 41.5% 0+0k 0+0io 0pf+0w

So, on a G4, it is taking under 12 seconds when properly compiled.

tmornini · Apr 4, 2001, 06:42 PM

Originally posted by mudmonkey:
I recompiled it with the -O2 option for better optimization:

% time ./a.out
11.910u 0.070s 0:28.86 41.5% 0+0k 0+0io 0pf+0w

So, on a G4, it is taking under 12 seconds when properly compiled.[/B]

I'm glad you pointed this out and tried it. gcc supports -O3 as well.

Do you get any improvement? It didn't seem to operate properly for me.

--
-- Tom Mornini

mudmonkey · Apr 4, 2001, 06:47 PM

The O3 optimizations don't seem to help much for this code...

With O2: 11.91s
With O3: 11.58s

foobars · Apr 4, 2001, 06:52 PM

29.760u 0.060s 0:31.04 96.0% 0+0k 0+0io 0pf+0w

... on a G4/400 using vanilla c++.

BZ · Apr 4, 2001, 06:57 PM

Ok...

23.6 : TiG4 500 / 512 MB Ram - c++
33.37 : G4 AGP 350 / 512 MB Ram - c++

How do I compile it with different options?

BZ

3.1416 · Apr 4, 2001, 07:54 PM

How do I compile it with different options?

cc -O3 peggie.c
(letter 'O', can also do O1 or O2)

Results on a Wallstreet G3/300, no other processes actively using the CPU:
no optimization: 48.02
-O1: 27.76
-O2: 24.89
-O3: 25.00

So optimization is clearly good, although going from O2 to O3 doesn't help.

Ghoser777 · Apr 4, 2001, 08:08 PM

Specs: 333Mhz slot-loading iMac w/OSX v 10.0.1, 64MB RAM

1st Test-
Compiler and arguments: cc
Open Apps:iTunes playing music, OmniWeb in background, Terminal open
Results:34.380u 0.210s 0:43.51 79.4% 0+0k 0+0io 0pf+0w

2nd Test-
Compiler and arguments: cc
Open Apps:Terminal
Results:33.830u 0.060s 0:35.00 96.8% 0+0k 0+1io 0pf+0w

3rd Test-
Compiler and arguments: cc -O2
Open Apps:iTunes playing music, OmniWeb in background, Terminal open
Results:14.360u 0.250s 0:30.22 48.3% 0+0k 0+1io 0pf+0w

4th Test-
Compiler and arguments: cc -O2
Open Apps:Terminal
Results:15.960u 0.030s 0:16.54 96.6% 0+0k 0+1io 0pf+0w

F-bacher

Ghoser777 · Apr 4, 2001, 08:10 PM

Originally posted by BZ:

How do I compile it with different options?

BZ

It's not 02 (number 0 + number 2), but O2 (letter "O" + number 2).

HTH,
F-bacher

Apr 4, 2001, 08:39 PM

DP533, OS X 10.0, 512Mb

Other apps: IE, Mail, TextEdit, Terminal

cc peg.c
21.820u 0.020s 0:22.72 96.1% 0+0k 0+1io 0pf+0w

cc -o2 peg.c
9.690u 0.010s 0:10.35 93.7% 0+0k 0+1io 0pf+0w

cc -o3 peg.c
9.700u 0.000s 0:10.30 94.1% 0+0k 0+1io 0pf+0w

foobars · Apr 4, 2001, 09:09 PM

cc peggie.c: 29.610u 0.150s 0:36.30 81.9% 0+0k 0+2io 0pf+0w

cc -O2 peggie.c: 12.010u 0.270s 0:14.38 85.3% 0+0k 0+2io 0pf+0w

G4/400 AGP

tz3gm · Apr 4, 2001, 09:59 PM

okay, so what does all of this mean?

Ghoser777 · Apr 4, 2001, 10:49 PM

Originally posted by tz3gm:
okay, so what does all of this mean?

Here's what I take from all this:
1) It appears that OSX's multitasking ability is underrated, considering run times with other apps runnign (processor intensive ones too) doesn't slow down the run time much if at all
2) More mhz = faster run times
3) Single processor is slower than 2 of the same processor (as it should be)
4) We can draw no information about whether the program runs faster on pcs or macs, although according to the first post, a slower MHS PC was faster than a faster MHZ Mac... but that hasn't been replicated.

F-bacher

SkullMacPN · Apr 4, 2001, 11:08 PM

Originally posted by tz3gm:
okay, so what does all of this mean?

Well, we also noticed that compiling using the wrong settings (let alone using the wrong compiler as fhoubi did) can have a drastic effect on the performance of even the "simplest" (well, compared to something like IE or the Finder) of apps.

grease · Apr 4, 2001, 11:13 PM

cc -O2 peggie.c results on G4 500 AGP with 320 MB; iTunes playing and OmniWeb open:

10.460u 0.050s 0:13.24 79.3% 0+0k 0+0io 0pf+0w

Apr 4, 2001, 11:33 PM

Compiling with the -O2 optimizations on an Athlon 700, the code takes 14 seconds to run. Take that info for what it's worth.

SkullMacPN · Apr 4, 2001, 11:39 PM

Originally posted by Dan Andersen:
Compiling with the -O2 optimizations on an Athlon 700, the code takes 14 seconds to run. Take that info for what it's worth.

Wow, let's make this into Steve's next keynote promo;P

Seriously though, if someone's 333 slotload iMac (does he mean 350?) can do it in 14-15 seconds and a 700 Athalon 700 takes 14... that's damn good.

Scott_H · Apr 5, 2001, 12:20 AM

The "time" command can tell you the "real" time running so any PMT issues are moot here.

I made the following change to the code and compiled it on my 266 iMac with the -O3 option.

Code:
keuze =7 /*getchar() - '0'*/;

>cc peggy.c -O3 -o peggy
>time peggy
21.500u 0.120s 0:37.58 57.5% 0+0k 0+0io 0pf+0w

so 37.58 seconds to run on a 266 iMac.

Ghoser777 · Apr 5, 2001, 12:26 AM

Originally posted by SkullMacPN:
Wow, let's make this into Steve's next keynote promo;P

Seriously though, if someone's 333 slotload iMac (does he mean 350?) can do it in 14-15 seconds and a 700 Athalon 700 takes 14... that's damn good.

No, I meant 333, but I didn't mean slot-loading. Mines got the tray (which amazingly has yet to break).

F-bacher

fats · Apr 5, 2001, 12:31 AM

Hmm. I did it in 27.25 seconds using O2 on my iMac. [ A second run later on gave much better results of 13.43 seconds ] That was with Omniweb running. I also have a Pentium 200 mhz. Testing this program on that gives 71 seconds.

I wonder how every one on these forums has such fast PCs compared to mine. The whole reason I bought a Mac is because my PC was a dog. I guess it still is at 71 seconds to do that operation. I just have a hard time imagining that the PCs owned by Macnn forum visitors are always so much faster than mine at the same clock speeds.

By the way, I compiled the pc version as a DOS executable using Borland C 5 command line compiler with flags set for pentium and speed optimization (rather than size or default) so my optimizations were fair.

There were no programs running on the pc but I was running Omniweb on my iMac. Please, fhoubi, or anybody else, tell me where I can get one of these magically fast pcs - I think I'll be switching back to maybe linux on Intel for my next computer if I could find one that would be as fast as all these people on MacNN have.

Oh and BTW I also compiled a quick little prime number finder. An int (32768 - but actually I just used 32000) takes 11 seconds on my P200 and it takes ~ 2 seconds on my G3-400. Maybe the primes test is just a scrolling numbers on the screen test though because my program printed every number that was prime. This would favor the pc since scrolling text in a big black screen is a lot easier than scrolling shadowed green text in a translucent black screen on top of my other OS X windows.

So there. Also, I figured that saying increases in mhz made a linear proportion to better times on these tests then my pc would have to run around 525 [1057 second run] mhz to beat my iMac at this peggie thing and it would have to run around 1.1 ghz to beat my iMac at prime numbers.

I'm guessing that neither of these tests play into the Macs hands since what I understand is that they have more of the advantage in floating point operations. So my conclusion is that I am suspicious of the first posters results and I am a nerd.

[This message has been edited by fats (edited 04-05-2001).]

fats · Apr 5, 2001, 12:48 AM

Oh, I think maybe I got a bum run on peggie the first time. This time I did it in
13.430u 0.090s 0:14.41 93.8% 0+0k 0+0io 0pf+0w
So that would make my pc have to be what, 1057 mhz. I guess that is more reasonable in comparison with my primes test. I'll try these programs on a real 1 ghz pc when I get a chance and see if it can beat my iMac. Heh.

Another run of primes on my iMac yielded .120 seconds.

[This message has been edited by fats (edited 04-05-2001).]

Apr 5, 2001, 01:31 AM

Originally posted by Scott_H:
The "time" command can tell you the "real" time running so any PMT issues are moot here.

I made the following change to the code and compiled it on my 266 iMac with the -O3 option.

Code:
keuze =7 /*getchar() - '0'*/;

>cc peggy.c -O3 -o peggy
>time peggy
21.500u 0.120s 0:37.58 57.5% 0+0k 0+0io 0pf+0w

so 37.58 seconds to run on a 266 iMac.

450MHz G4 Cube, 320MB RAM,
lots of things running, but not 'doing' anything...

[~/Projects] johnb% time ./peggy
3456789 Ply: 0 pieces:32

Iter: 7667770
11.860u 0.030s 0:11.96 99.4% 0+0k 0+0io 0pf+0w

Apr 5, 2001, 01:35 AM

Originally posted by Scott_H:
The "time" command can tell you the "real" time running so any PMT issues are moot here.

I made the following change to the code and compiled it on my 266 iMac with the -O3 option.

Code:
keuze =7 /*getchar() - '0'*/;

>cc peggy.c -O3 -o peggy
>time peggy
21.500u 0.120s 0:37.58 57.5% 0+0k 0+0io 0pf+0w

so 37.58 seconds to run on a 266 iMac.

450MHz G4 Cube, 320MB RAM,
lots of things running, but not 'doing' anything...

[~/Projects] johnb% time ./peggy
3456789 Ply: 0 pieces:32

Iter: 7667770
11.860u 0.030s 0:11.96 99.4% 0+0k 0+0io 0pf+0w

Zoro · Apr 5, 2001, 09:46 AM

Celeron 500, 128 MB, NetBSD 1.5, KDE2.1, Opera (linux emulation), etc. running

cc peggy.c
18.301u 0.009s 0:18.80 97.3% 0+0k 0+0io 0pf+0w

cc -O2 peggy.c
14.177u 0.000s 0:14.91 95.0% 0+0k 0+0io 1pf+0w

------------------
MacTV

scotty · Apr 5, 2001, 09:52 AM

Originally posted by Ghoser777:

3) Single processor is slower than 2 of the same processor (as it should be)

But two processors (or four or eight for that matter) won't help execution speed of this code in and by itself, will it? This code will always run on only one processor, since it's not written as a multithreaded program.

Anyway, in my opinion this is a too theoretical way to benchmark a computer. What does the numbers really tell us? How fast a Mac can loop?

[This message has been edited by scotty (edited 04-05-2001).]

foobars · Apr 5, 2001, 10:12 AM

Originally posted by scotty:

Anyway, in my opinion this is a too theoretical way to benchmark a computer. What does the numbers really tell us? How fast a Mac can loop?

[This message has been edited by scotty (edited 04-05-2001).]

It tells us how quicky a Mac can execute the same number of commands in comparison to other computers.

Gee4orce · Apr 5, 2001, 10:39 AM

Anybody care to write a program that does a lot of FFT calculations ?

Then vectorise it for AltiVec. I'd love to see the difference that makes.

I'm sure SETI@home could be speeded up by at least 4x on a G4

Zoro · Apr 5, 2001, 11:24 AM

Quadra 650, NetBSD 1.5 / 36 MB, apache, sshd, etc running :-)))

cc peggy.c
568.176u 114.907s 11:27.33 99.3% 0+0k 0+0io 3pf+0w

cc -O2 peggy.c
392.521u 113.918s 8:29.21 99.4% 0+0k 0+0io 3pf+0w

done from work via ssh !

------------------
MacTV

Ghoser777 · Apr 5, 2001, 11:45 AM

Originally posted by scotty:
But two processors (or four or eight for that matter) won't help execution speed of this code in and by itself, will it? This code will always run on only one processor, since it's not written as a multithreaded program.

Anyway, in my opinion this is a too theoretical way to benchmark a computer. What does the numbers really tell us? How fast a Mac can loop?

[This message has been edited by scotty (edited 04-05-2001).]

But the program itslef doesn't have to be multithreaded for a dual processor machine to increase a programs speed. Even though the program isn't multithreaded, each other running process will be distributed pretty fairly between the processors. On a single processor machine, all processes go throught the same processor, but on a dual they are distributed. Make sense?

F-bacher

Todd Madson · Apr 5, 2001, 11:54 AM

Re: Seti at home:
More than 4x, but the Seti group will never make an altivec client
and doesn't want you to make one either. Remember, due to their
already excessive network bandwidth usage (which costs money to the
University that hosts them) and the fact that they only have so many
work units (they're only searching part of the sky remember) you are
stuck with the very long - super science version 3.04. Just the way
it is. Find another distributed computing project if you want altivec
mega-speed (like distributed.net).

Apr 5, 2001, 12:26 PM

Comparing OS speeds using a program like that is kinda silly. Anything running for that time and using all of it time slice (looping without making any kernel call during the time it has been allowed to run) will automaticaly get 'reniced', meaning it's priority will go fall like a stone to let the other processes running (whatever they might be) to get some time.

So, the results will varry widely, and cannot be used to to test a multitasking system!

If you want an example, make that program loop uppon itself, and watch it's priority in 'top'

Scott_H · Apr 5, 2001, 12:36 PM

Could you all please read the man page for the time command. time give several "times" back. The "real" time and time in execution with some others. IT DOES NOT MATTER WHAT ELSE IT RUNNING OR HOW MANY CPUS YOU HAVE BECAUSE time GIVES YOU THE TIME THE CPU SPENT ON THE COMMAND.

Scott_H · Apr 5, 2001, 12:40 PM

But the program itslef doesn't have to be multithreaded for a dual processor machine to increase a programs speed.

Yes it does. Fact is when we care about speed, like the case above, we have the machine doing just about nothing but peggy. Sure there's stuff running in the background but it only takes up a few % of the CPU. So in the above case assume one CPU will do peggy full time it will only boot it 5% at best. I'd rather have a single 733 than a dual 533 for this reason.

Apr 5, 2001, 01:32 PM

Just for the sake of discussion...

G3 233 (Beige Desktop), 64Mb, LinuxPPC, Kernel 2.0.16, gcc 2.95.2

Other apps: All the usual suspects: Apache, sendmail, MySQL, etc.

cc peg.c
44.780u 0.020s 0:45.13 99.2% 0+0k 0+0io 120pf+0w

cc -o2 peg.c
19.540u 0.010s 0:20.63 94.7% 0+0k 0+0io 120pf+0w

cc -o3 peg.c
19.560u 0.010s 0:19.96 98.0% 0+0k 0+0io 120pf+0w

Ashman · Apr 5, 2001, 01:33 PM

Just for kicks, here are some results run on my machines at work:

Sun Ultra60, Sun OS 5.7, 2x450MHz, 2GB RAM

gcc peg.c
36.0u 0.0s 2:07 28%
gcc -O1 peg.c
11.0u 0.0s 0:41 26%
gcc -O2 peg.c
13.0u 0.0s 0:45 28%
gcc -O3 peg.c
13.0u 0.0s 0:31 40%

Sun custom box (made for my VLSI group), Sun OS 5.7, 8x400MHz, 11GB RAM

gcc peg.c
41.0u 0.0s 1:05 62%
gcc -O1 peg.c
12.0u 0.0s 0:17 67%
gcc -O2 peg.c
15.0u 0.0s 0:20 72%
gcc -O3 peg.c
15.0u 0.0s 0:20 72%

Sun Sparc5, SunOS 5.6, 120MHz, 112MB RAM

gcc peg.c
244.0u 0.0s 4:17 94%
gcc -O1 peg.c
91.0u 0.0s 1:38 92%
gcc -O2 peg.c
120.0u 0.0s 2:08 93%
gcc -O3 peg.c
119.0u 0.0s 2:46 71%

Enjoy!

3.1416 · Apr 5, 2001, 02:27 PM

Yes it does. Fact is when we care about speed, like the case above, we have the machine doing just about nothing but peggy. Sure there's stuff running in the background but it only takes up a few % of the CPU. So in the above case assume one CPU will do peggy full time it will only boot it 5% at best. I'd rather have a single 733 than a dual 533 for this reason.

Well, yes and no. If you only care about finishing one run of peggy as fast as possible, then a single 733 wins. But if you're running any other processes then the dual 533 has an advantage because it can dedicate one processor to peggy while letting the other one handle Omniweb and iTunes and whatever. Also with a dual 533 you can run two instances of peggy simultaneously at full speed, and thus finish two runs before the single 733 would.

fhoubi · Apr 5, 2001, 02:47 PM

HELLO C AGAIN!

May I tell you that I regain a big smile on my face again? Thanks for your postings everbody. It's time to be embarressed myself.

To set things straight...

I haven't programmed in C for ages, and it showed...

I used c++ because I didn't know cc was a compiler too (By the way, it makes no difference in speed, only c++ gives me no warning *g*). I struggled with the vi editor to make little changes, because I had to exclude a header and translate some dutch words. At least now you know the dutch word for choice is keuze... *g*

I went to my local pub late yesterday evening, after my disappointing experience. Is a PPC really that slow? I even tried the -o2 option after getting home, no improvements.

It puzzled me all day long why the Seti 2.x client on my iMac was 1.85 faster than my pc-version, and my little app not... (14.5 hours compaired to 27).

@ tmornini, use the -O2 option (IN UPPERCASE !!!) It works!
22 seconds on my iMac 233.

---> Conclusion? 40 (seconds on my old PC) / 22 makes again 1.8 'o)
---> Roughly said, a PPC is about 60% faster than a AMD/Pentium (1) on same clock speeds ( 200/233 * 1.85). I knew it! *laugh*

It is a perfect benchmark? No, but a nice one. It takes only a little time, not too fast, not too slow. And as a bonus, the solution of this nice little puzzle. I hope you enjoyed it as much I did.

fats · Apr 5, 2001, 07:50 PM

Somebody asked about floating point operations. Here is a little program for calculating pi using the Maclaurin series for arctan. Pi is 4 times the infinite sum of the maclaurin series arctan of 1/2 + the infinite sum of the maclaurin series 1/3. In my program the two are summed in two individual steps in the same loop. Also there is something up with it where you get not a number if you pick an iterations above 20 (how lame is that? My calculator can do more than that!)

Anyhow, if anybody knows how two multithread this and make it so it can do more iterations it would be a good program to run on multiprocessors and it converges relatively quickly. Even 20 iterations gives more accuracy than my program displays (I think you would use like "%.50lf" in the printf command to display more decimals but I'm not sure). So here it is:

#include <stdio.h>

/* program to calculate pi using user specified number of iterations
with the Maclaurin series for arctan */

long power(int a, int b);

int main(void)
{
float sum3 = 0, sum2 = 0, pi, a3, a2;
int iter, i;
short neg = 0;
printf("How many iterations?\n"); /* actually to what final power is a better question */
scanf("%d", &iter);
for(i = 1; i <= (iter + 1); i += 2)
{
a3 = ((float)1/(power(3, i))/(float)i);
a2 = ((float)1/(power(2, i))/(float)i);
printf("a3: %f a2: %f \n", a3, a2);
if(neg == 0) { sum3 += a3; sum2 += a2; neg = 1; }
else { /*printf("hello\n");*/ sum3 -= a3; sum2 -= a2; neg = 0; }
/*printf("sum3: %lf sum2: %lf \n", sum3, sum2);
printf("i: %d \n", i);
printf("pi/4 now: %lf \n", (sum3 + sum2));*/
}
pi = 4*(sum3 + sum2);
printf("Here you have it

n %f \n", pi);
return 0;
}

long power(int base, int raise)
{
long sum = 1;
int i;

for(i = 1; i <= raise; i++)
sum *= (long)base;
return sum;
}

[This message has been edited by fats (edited 04-05-2001).]

Gavin · Apr 6, 2001, 03:25 AM

350 B&W G3 128 MB

peggy

cc pg.c -o peggy
32.370u 0.150s 1:16.52 42.4% 0+0k 0+1io 0pf+0w

cc -O2 pg.c -o peggy
15.230u 0.150s 0:26.77 57.4% 0+0k 0+1io 0pf+0w

float

cc float.c -o float (20,000 iterations)
6.920u 0.240s 1:00.83 11.7% 0+0k 0+0io 0pf+0w

cc -O2 float.c -o float (20,000 iterations)
1.350u 0.130s 0:17.85 8.2% 0+0k 0+0io 0pf+0w

750 Duron 64 MB running Linux Mandrake 7.1

peggy

cc pg.c -o peggy
18.33user 0.01system 0:20.40elapsed 89%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (95major+11minor)pagefaults 0swaps

cc -O2 pg.c -o peggy
12.06user 0.01system 0:14.09elapsed 85%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (95major+12minor)pagefaults 0swaps

cc -O3 pg.c -o peggy
12.01user 0.00system 0:14.12elapsed 85%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (95major+12minor)pagefaults 0swaps

float

cc float.c -o float (20,000 iterations)
2.70user 0.03system 0:06.83elapsed 39%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (95major+11minor)pagefaults 0swaps

cc -O2 float.c -o float (20,000 iterations)
1.46user 0.08system 0:07.37elapsed 20%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (95major+11minor)pagefaults 0swaps