stalin can not optimize stream

Re: A fast scheme implementation?
http://newsgroups.derkeiler.com/Archive/Comp/comp.lang.scheme/2007-12/msg00061.html
ftp://ftp.ecn.purdue.edu/qobi/integ.tgz

でstalinが非常に高速なことを示す例としてあげられている積分プログラムをstreamで書きなおしてみた。
結論としてはstalinはstreamで書いたコードは最適化しないように見える。実行時間は、ほぼCと同じだった。
ただし別なCPUで実行した場合、手動で最適化されたC(integ2-c.c)と近い計算結果にもなる。gccの性能の問題なのだろうか。どちらにしても単純にdoで書くのが一番高速なようだ

(define the-empty-stream '())

(define stream-null? null?)

(define (stream-car stream) (car stream))

(define (stream-cdr stream) (force (cdr stream)))

(define (stream-ref s n)
  (if (= n 0)
      (stream-car s)
      (stream-ref (stream-cdr s) (- n 1))))

;; SICP prob 3.50 stream-map
(define (stream-map proc . argstreams)
  (if (stream-null? (car argstreams))
      the-empty-stream
      (
       ;cons-stream
       cons 
       (apply proc (map stream-car argstreams))
       (delay (apply stream-map
		     (cons proc (map stream-cdr argstreams))))
       )
))


;; sicp prob 3.55

(define (partial-sums s)
  (cons
   (stream-car s)
   (delay 
     (add-streams (stream-cdr s)
                (partial-sums s)))))


(define (add-streams s1 s2)
  (stream-map + s1 s2))

(define (sum-of-squares-stream s1 s2)
   (partial-sums (stream-map - s1 s2))
)

(define (integers-starting-from n)
  (cons n (delay (integers-starting-from (+ n 1)))))

(define integers (integers-starting-from 1))





;;;;;;;end util;;;;;;;;;;

(define (integrate-1D L U F)
 (let ((D (/ (- U L) 8.0)))
  (* (+ (* (F L) 0.5)
	(F (+ L D))
	(F (+ L (* 2.0 D)))
	(F (+ L (* 3.0 D)))
	(F (+ L (* 4.0 D)))
	(F (- U (* 3.0 D)))
	(F (- U (* 2.0 D)))
	(F (- U D))
	(* (F U) 0.5))
     D)))

(define (integrate-2D L1 U1 L2 U2 F)
 (integrate-1D L2 U2 (lambda (y) (integrate-1D L1 U1 (lambda (x) (F x y))) )))

(define (zark U V)
 (integrate-2D 0.0 U 0.0 V (lambda (X Y) (* X Y)) ))


(define (zark-i I)
  (zark (* I 1.0) (* I 2.0)))

(define r-stream 
  (stream-map zark-i integers))

(define (i-fun I)
  (let ((I2 (* (* I I) 1.0))) (* I2 I2)))

(define i-stream 
  (stream-map i-fun integers))

(define r-total-stream
   (partial-sums r-stream)
)
  
(define i-total-stream
  (partial-sums i-stream)
)

 (begin
   (display (stream-ref 
	  (sum-of-squares-stream   r-total-stream  i-total-stream) 1000))
	(newline))

以下が実行結果

$ more /proc/cpuinfo
model name	: Intel(R) Celeron(R) CPU        E3300  @ 2.50GHz

$ time ./integ-stream
0.0

real	0m1.905s
user	0m1.760s
sys	0m0.110s


$ time ./integ-c
0.000000

real	0m1.767s
user	0m1.700s
sys	0m0.000s


$ time ./integ
0.0

real	0m0.299s
user	0m0.230s
sys	0m0.000s







./run

Stalin Version
0.11
GCC Version
Using built-in specs.
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Ubuntu 4.4.1-4ubuntu9' --with-bugurl=file:///usr/share/doc/gcc-4.4/README.Bugs --enable-languages=c,c++,fortran,objc,obj-c++ --prefix=/usr --enable-shared --enable-multiarch --enable-linker-build-id --with-system-zlib --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --with-gxx-include-dir=/usr/include/c++/4.4 --program-suffix=-4.4 --enable-nls --enable-clocale=gnu --enable-libstdcxx-debug --enable-objc-gc --disable-werror --with-arch-32=i486 --with-tune=generic --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu
Thread model: posix
gcc version 4.4.1 (Ubuntu 4.4.1-4ubuntu9) 
Stalin integ
0.0
0.200u 0.000s 0:00.16 125.0%	0+0k 0+0io 0pf+0w
Stalin integ-stream
0.0
1.610u 0.080s 0:01.73 97.6%	0+0k 0+0io 0pf+0w
Stalin integ2
0.0
0.040u 0.000s 0:00.04 100.0%	0+0k 0+0io 0pf+0w
Stalin integ3
0.0
0.290u 0.000s 0:00.29 100.0%	0+0k 0+0io 0pf+0w
GCC integ-c
0.000000
3.020u 0.000s 0:03.06 98.6%	0+0k 0+0io 0pf+0w
GCC integ2-c
0.000000
0.230u 0.000s 0:00.21 109.5%	0+0k 0+0io 0pf+0w

一方違うパソコンで実行すると違う結果も

$ more /proc/cpuinfo
model name	: Intel(R) Pentium(R) 4 CPU 3.00GHz

$time ./integ
0.0

real	0m0.277s
user	0m0.276s

$ time ./integ-c 
0.000000

real	0m35.566s
user	0m35.274s
sys	0m0.084s

$ time ./integ-stream 
0.0

real	0m4.235s
user	0m3.764s
sys	0m0.220s

./run 

Stalin Version
0.11
GCC Version
Using built-in specs.
Target: i486-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Ubuntu 4.4.3-4ubuntu5' --with-bugurl=file:///usr/share/doc/gcc-4.4/README.Bugs --enable-languages=c,c++,fortran,objc,obj-c++ --prefix=/usr --enable-shared --enable-multiarch --enable-linker-build-id --with-system-zlib --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --with-gxx-include-dir=/usr/include/c++/4.4 --program-suffix=-4.4 --enable-nls --enable-clocale=gnu --enable-libstdcxx-debug --enable-plugin --enable-objc-gc --enable-targets=all --disable-werror --with-arch-32=i486 --with-tune=generic --enable-checking=release --build=i486-linux-gnu --host=i486-linux-gnu --target=i486-linux-gnu
Thread model: posix
gcc version 4.4.3 (Ubuntu 4.4.3-4ubuntu5) 
Stalin integ
0.0
0.284u 0.000s 0:00.28 100.0%	0+0k 0+0io 0pf+0w
Stalin integ-stream
0.0
3.540u 0.164s 0:03.77 98.1%	0+0k 0+0io 0pf+0w
Stalin integ2
0.0
0.080u 0.004s 0:00.08 100.0%	0+0k 0+0io 0pf+0w
Stalin integ3
0.0
0.588u 0.000s 0:00.58 100.0%	0+0k 0+0io 0pf+0w
GCC integ-c
0.000000
31.649u 0.036s 0:31.98 99.0%	0+0k 0+0io 0pf+0w
GCC integ2-c
0.000000
1.248u 0.000s 0:01.28 96.8%	0+0k 0+0io 0pf+0w


http://niitsuma.blogspot.com/2010/05/stalin-can-not-optimize-stream.html