stalin can not optimize stream
Re: A fast scheme implementation?
http://newsgroups.derkeiler.com/Archive/Comp/comp.lang.scheme/2007-12/msg00061.html
ftp://ftp.ecn.purdue.edu/qobi/integ.tgz
でstalinが非常に高速なことを示す例としてあげられている積分プログラムをstreamで書きなおしてみた。
結論としてはstalinはstreamで書いたコードは最適化しないように見える。実行時間は、ほぼCと同じだった。
ただし別なCPUで実行した場合、手動で最適化されたC(integ2-c.c)と近い計算結果にもなる。gccの性能の問題なのだろうか。どちらにしても単純にdoで書くのが一番高速なようだ
(define the-empty-stream '()) (define stream-null? null?) (define (stream-car stream) (car stream)) (define (stream-cdr stream) (force (cdr stream))) (define (stream-ref s n) (if (= n 0) (stream-car s) (stream-ref (stream-cdr s) (- n 1)))) ;; SICP prob 3.50 stream-map (define (stream-map proc . argstreams) (if (stream-null? (car argstreams)) the-empty-stream ( ;cons-stream cons (apply proc (map stream-car argstreams)) (delay (apply stream-map (cons proc (map stream-cdr argstreams)))) ) )) ;; sicp prob 3.55 (define (partial-sums s) (cons (stream-car s) (delay (add-streams (stream-cdr s) (partial-sums s))))) (define (add-streams s1 s2) (stream-map + s1 s2)) (define (sum-of-squares-stream s1 s2) (partial-sums (stream-map - s1 s2)) ) (define (integers-starting-from n) (cons n (delay (integers-starting-from (+ n 1))))) (define integers (integers-starting-from 1)) ;;;;;;;end util;;;;;;;;;; (define (integrate-1D L U F) (let ((D (/ (- U L) 8.0))) (* (+ (* (F L) 0.5) (F (+ L D)) (F (+ L (* 2.0 D))) (F (+ L (* 3.0 D))) (F (+ L (* 4.0 D))) (F (- U (* 3.0 D))) (F (- U (* 2.0 D))) (F (- U D)) (* (F U) 0.5)) D))) (define (integrate-2D L1 U1 L2 U2 F) (integrate-1D L2 U2 (lambda (y) (integrate-1D L1 U1 (lambda (x) (F x y))) ))) (define (zark U V) (integrate-2D 0.0 U 0.0 V (lambda (X Y) (* X Y)) )) (define (zark-i I) (zark (* I 1.0) (* I 2.0))) (define r-stream (stream-map zark-i integers)) (define (i-fun I) (let ((I2 (* (* I I) 1.0))) (* I2 I2))) (define i-stream (stream-map i-fun integers)) (define r-total-stream (partial-sums r-stream) ) (define i-total-stream (partial-sums i-stream) ) (begin (display (stream-ref (sum-of-squares-stream r-total-stream i-total-stream) 1000)) (newline))
以下が実行結果
$ more /proc/cpuinfo model name : Intel(R) Celeron(R) CPU E3300 @ 2.50GHz $ time ./integ-stream 0.0 real 0m1.905s user 0m1.760s sys 0m0.110s $ time ./integ-c 0.000000 real 0m1.767s user 0m1.700s sys 0m0.000s $ time ./integ 0.0 real 0m0.299s user 0m0.230s sys 0m0.000s ./run Stalin Version 0.11 GCC Version Using built-in specs. Target: x86_64-linux-gnu Configured with: ../src/configure -v --with-pkgversion='Ubuntu 4.4.1-4ubuntu9' --with-bugurl=file:///usr/share/doc/gcc-4.4/README.Bugs --enable-languages=c,c++,fortran,objc,obj-c++ --prefix=/usr --enable-shared --enable-multiarch --enable-linker-build-id --with-system-zlib --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --with-gxx-include-dir=/usr/include/c++/4.4 --program-suffix=-4.4 --enable-nls --enable-clocale=gnu --enable-libstdcxx-debug --enable-objc-gc --disable-werror --with-arch-32=i486 --with-tune=generic --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu Thread model: posix gcc version 4.4.1 (Ubuntu 4.4.1-4ubuntu9) Stalin integ 0.0 0.200u 0.000s 0:00.16 125.0% 0+0k 0+0io 0pf+0w Stalin integ-stream 0.0 1.610u 0.080s 0:01.73 97.6% 0+0k 0+0io 0pf+0w Stalin integ2 0.0 0.040u 0.000s 0:00.04 100.0% 0+0k 0+0io 0pf+0w Stalin integ3 0.0 0.290u 0.000s 0:00.29 100.0% 0+0k 0+0io 0pf+0w GCC integ-c 0.000000 3.020u 0.000s 0:03.06 98.6% 0+0k 0+0io 0pf+0w GCC integ2-c 0.000000 0.230u 0.000s 0:00.21 109.5% 0+0k 0+0io 0pf+0w
一方違うパソコンで実行すると違う結果も
$ more /proc/cpuinfo model name : Intel(R) Pentium(R) 4 CPU 3.00GHz $time ./integ 0.0 real 0m0.277s user 0m0.276s $ time ./integ-c 0.000000 real 0m35.566s user 0m35.274s sys 0m0.084s $ time ./integ-stream 0.0 real 0m4.235s user 0m3.764s sys 0m0.220s ./run Stalin Version 0.11 GCC Version Using built-in specs. Target: i486-linux-gnu Configured with: ../src/configure -v --with-pkgversion='Ubuntu 4.4.3-4ubuntu5' --with-bugurl=file:///usr/share/doc/gcc-4.4/README.Bugs --enable-languages=c,c++,fortran,objc,obj-c++ --prefix=/usr --enable-shared --enable-multiarch --enable-linker-build-id --with-system-zlib --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --with-gxx-include-dir=/usr/include/c++/4.4 --program-suffix=-4.4 --enable-nls --enable-clocale=gnu --enable-libstdcxx-debug --enable-plugin --enable-objc-gc --enable-targets=all --disable-werror --with-arch-32=i486 --with-tune=generic --enable-checking=release --build=i486-linux-gnu --host=i486-linux-gnu --target=i486-linux-gnu Thread model: posix gcc version 4.4.3 (Ubuntu 4.4.3-4ubuntu5) Stalin integ 0.0 0.284u 0.000s 0:00.28 100.0% 0+0k 0+0io 0pf+0w Stalin integ-stream 0.0 3.540u 0.164s 0:03.77 98.1% 0+0k 0+0io 0pf+0w Stalin integ2 0.0 0.080u 0.004s 0:00.08 100.0% 0+0k 0+0io 0pf+0w Stalin integ3 0.0 0.588u 0.000s 0:00.58 100.0% 0+0k 0+0io 0pf+0w GCC integ-c 0.000000 31.649u 0.036s 0:31.98 99.0% 0+0k 0+0io 0pf+0w GCC integ2-c 0.000000 1.248u 0.000s 0:01.28 96.8% 0+0k 0+0io 0pf+0w
http://niitsuma.blogspot.com/2010/05/stalin-can-not-optimize-stream.html