a = rand(10^7)
10000000-element Array{Float64,1}:
0.13997315894811813
0.5193989738727267
0.7584553290895242
0.03761052189109426
0.8107007385842229
0.6011153909050382
0.6710419114618749
0.9444411988853978
0.8305919587530377
0.9739140500532291
0.493902631161933
0.6317669219670345
0.30207131966498757
⋮
0.018940070436631595
0.8882776865473041
0.9293140862639186
0.3061161368305956
0.818294648991674
0.9358912520138245
0.23190548038074343
0.8909549363740683
0.7398116366044267
0.6914286023441532
0.5101539272991089
0.7746593570353935
# 결과가 너무 복잡한 형태
@time sum(a)
0.005311 seconds (1 allocation: 16 bytes)
4.999371988291268e6
using Pkg
Pkg.add("BenchmarkTools")
# BenchmarkTools package를 이용해 정확하고 쉽게 벤치마크 가능
using BenchmarkTools
┌ Info: Precompiling BenchmarkTools [6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf]
└ @ Base loading.jl:1278
Pkg.add("Libdl")
using Libdl
C_code = """
#include <stddef.h>
double c_sum(size_t n, double *X) {
double s = 0.0;
for (size_t i = 0; i < n; ++i) {
s += X[i];
}
return s;
}
"""
const Clib = tempname() # make a temporary file
# compile to a shared library by piping C_code to gcc
# (works only if you have gcc installed):
open(`gcc -fPIC -O3 -msse3 -xc -shared -o $(Clib * "." * Libdl.dlext) -`, "w") do f
print(f, C_code)
end
# define a Julia function that calls the C function:
c_sum(X::Array{Float64}) = ccall(("c_sum", Clib), Float64, (Csize_t, Ptr{Float64}), length(X), X)
c_sum (generic function with 1 method)
c_sum(a)
4.999371988291785e6
c_sum(a) ≈ sum(a)
true
c_sum(a) - sum(a)
5.168840289115906e-7
c_bench = @benchmark c_sum($a)
BenchmarkTools.Trial:
memory estimate: 0 bytes
allocs estimate: 0
--------------
minimum time: 10.382 ms (0.00% GC)
median time: 10.453 ms (0.00% GC)
mean time: 10.458 ms (0.00% GC)
maximum time: 10.641 ms (0.00% GC)
--------------
samples: 479
evals/sample: 1
println("C: Fastest time was $(minimum(c_bench.times) / 1e6) msec")
C: Fastest time was 10.382286 msec
d = Dict() # a "dictionary", i.e. an associative array
d["C"] = minimum(c_bench.times) / 1e6 # in milliseconds
d
Dict{Any,Any} with 1 entry:
"C" => 10.3823
using Plots
gr()
Plots.GRBackend()
using Statistics
t = c_bench.times /1e6 # in milliseconds
m, σ = minimum(t), std(t)
histogram(t, bins=500,
xlim=(m - 0.01, m + σ),
xlabel="milliseconds", ylabel="count", label=" ")
const Clib_fastmath = tempname() # make a temporary file
# The same as above but with a -ffast-math flag added
open(`gcc -fPIC -O3 -msse3 -xc -shared -ffast-math -o $(Clib_fastmath * "." * Libdl.dlext) -`, "w") do f
print(f, C_code)
end
# define a Julia function that calls the C function:
c_sum_fastmath(X::Array{Float64}) = ccall(("c_sum", Clib_fastmath), Float64, (Csize_t, Ptr{Float64}), length(X), X)
c_sum_fastmath (generic function with 1 method)
c_fastmath_bench = @benchmark $c_sum_fastmath($a)
BenchmarkTools.Trial:
memory estimate: 0 bytes
allocs estimate: 0
--------------
minimum time: 5.524 ms (0.00% GC)
median time: 5.573 ms (0.00% GC)
mean time: 5.585 ms (0.00% GC)
maximum time: 6.143 ms (0.00% GC)
--------------
samples: 896
evals/sample: 1
d["C -ffast-math"] = minimum(c_fastmath_bench.times) / 1e6
5.524106
d
Dict{Any,Any} with 2 entries:
"C" => 10.3823
"C -ffast-math" => 5.52411
Pkg.add("PyCall")
using PyCall
pysum = pybuiltin("sum")
PyObject <built-in function sum>
pysum(a)
4.999371988291785e6
pysum(a) ≈ sum(a)
true
py_list_bench = @benchmark $pysum($a)
BenchmarkTools.Trial:
memory estimate: 152.60 MiB
allocs estimate: 10000036
--------------
minimum time: 1.520 s (18.33% GC)
median time: 1.649 s (19.90% GC)
mean time: 1.912 s (20.83% GC)
maximum time: 2.831 s (23.24% GC)
--------------
samples: 4
evals/sample: 1
d["Python built-in"] = minimum(py_list_bench.times) / 1e6
d
Dict{Any,Any} with 3 entries:
"C" => 10.3823
"Python built-in" => 1519.75
"C -ffast-math" => 5.52411
Pkg.add("Conda")
using Conda
Conda.add("numpy")
numpy_sum = pyimport("numpy")["sum"]
py_numpy_bench = @benchmark $numpy_sum($a)
numpy_sum(a)
numpy_sum(a) ≈ sum(a)
d["Python numpy"] = minimum(py_numpy_bench.times) / 1e6
d
py"""
def py_sum(A):
s = 0.0
for a in A:
s += a
return s
"""
sum_py = py"py_sum"
PyObject <function py_sum at 0x7fd8a8067e50>
py_hand = @benchmark $sum_py($a)
BenchmarkTools.Trial:
memory estimate: 152.60 MiB
allocs estimate: 10000036
--------------
minimum time: 2.034 s (13.82% GC)
median time: 2.047 s (13.91% GC)
mean time: 2.125 s (14.81% GC)
maximum time: 2.294 s (16.48% GC)
--------------
samples: 3
evals/sample: 1
sum_py(a)
4.999371988291785e6
sum_py(a) ≈ sum(a)
true
d["Python hand-written"] = minimum(py_hand.times) / 1e6
d
Dict{Any,Any} with 4 entries:
"C" => 10.3823
"Python hand-written" => 2034.44
"Python built-in" => 1519.75
"C -ffast-math" => 5.52411
@which sum(a)
sum(a::AbstractArray; dims) in Base at reducedim.jl:719
j_bench = @benchmark sum($a)
BenchmarkTools.Trial:
memory estimate: 0 bytes
allocs estimate: 0
--------------
minimum time: 4.835 ms (0.00% GC)
median time: 4.893 ms (0.00% GC)
mean time: 4.912 ms (0.00% GC)
maximum time: 5.559 ms (0.00% GC)
--------------
samples: 1018
evals/sample: 1
d["Julia built-in"] = minimum(j_bench.times) / 1e6
d
Dict{Any,Any} with 5 entries:
"C" => 10.3823
"Python hand-written" => 2034.44
"Python built-in" => 1519.75
"Julia built-in" => 4.83467
"C -ffast-math" => 5.52411
function mysum(A)
s = 0.0
for a in A
s += a
end
s
end
mysum (generic function with 1 method)
j_bench_hand = @benchmark mysum($a)
BenchmarkTools.Trial:
memory estimate: 0 bytes
allocs estimate: 0
--------------
minimum time: 10.515 ms (0.00% GC)
median time: 10.558 ms (0.00% GC)
mean time: 10.567 ms (0.00% GC)
maximum time: 10.771 ms (0.00% GC)
--------------
samples: 474
evals/sample: 1
d["Julia hand-written"] = minimum(j_bench_hand.times) / 1e6
d
Dict{Any,Any} with 6 entries:
"C" => 10.3823
"Julia hand-written" => 10.5148
"Python hand-written" => 2034.44
"Python built-in" => 1519.75
"Julia built-in" => 4.83467
"C -ffast-math" => 5.52411
function mysum_simd(A)
s = 0.0
@simd for a in A
s += a
end
s
end
mysum_simd (generic function with 1 method)
j_bench_hand_simd = @benchmark mysum_simd($a)
BenchmarkTools.Trial:
memory estimate: 0 bytes
allocs estimate: 0
--------------
minimum time: 4.766 ms (0.00% GC)
median time: 4.797 ms (0.00% GC)
mean time: 4.822 ms (0.00% GC)
maximum time: 5.167 ms (0.00% GC)
--------------
samples: 1037
evals/sample: 1
mysum_simd(a)
4.999371988291293e6
d["Julia hand-written simd"] = minimum(j_bench_hand_simd.times) / 1e6
d
Dict{Any,Any} with 7 entries:
"Julia hand-written simd" => 4.76597
"C" => 10.3823
"Julia hand-written" => 10.5148
"Python hand-written" => 2034.44
"Python built-in" => 1519.75
"Julia built-in" => 4.83467
"C -ffast-math" => 5.52411
for (key, value) in sort(collect(d), by=last)
println(rpad(key, 25, "."), lpad(round(value; digits=1), 6, "."))
end
Julia hand-written simd.....4.8
Julia built-in..............4.8
C -ffast-math...............5.5
C..........................10.4
Julia hand-written.........10.5
Python built-in..........1519.8
Python hand-written......2034.4