Benchmark Analysis
Master the art of interpreting benchmark results to make data-driven optimization decisions and avoid common pitfalls in performance analysis.
Understanding Benchmark Output
Basic Benchmark Results Format
BenchmarkExample-8 1000000 1234 ns/op 456 B/op 7 allocs/op
│ │ │ │ │ │
│ │ │ │ │ └─ Allocations per operation
│ │ │ │ └─ Bytes allocated per operation
│ │ │ └─ Nanoseconds per operation
│ │ └─ Number of iterations
│ └─ Number of CPU cores used
└─ Benchmark name
Memory Statistics
func BenchmarkStringOperations(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
result := fmt.Sprintf("iteration_%d", i)
_ = result
}
}
Example output:
BenchmarkStringOperations-8 5000000 245 ns/op 16 B/op 1 allocs/op
Statistical Analysis of Results
Running Multiple Iterations
# Run benchmark multiple times for statistical significance
go test -bench=BenchmarkExample -count=10
# Use benchstat for analysis
go get golang.org/x/perf/cmd/benchstat
go test -bench=BenchmarkExample -count=10 > before.txt
# Make changes
go test -bench=BenchmarkExample -count=10 > after.txt
benchstat before.txt after.txt
Sample benchstat Output
name old time/op new time/op delta
StringOperations-8 245ns ± 2% 198ns ± 3% -19.18% (p=0.000 n=10+10)
name old alloc/op new alloc/op delta
StringOperations-8 16.0B ± 0% 12.0B ± 0% -25.00% (p=0.000 n=10+10)
name old allocs/op new allocs/op delta
StringOperations-8 1.00 ± 0% 1.00 ± 0% ~ (all equal)
Common Analysis Patterns
Performance Regression Detection
func BenchmarkSuite(b *testing.B) {
testCases := []struct {
name string
size int
}{
{"Small", 10},
{"Medium", 100},
{"Large", 1000},
{"XLarge", 10000},
}
for _, tc := range testCases {
b.Run(tc.name, func(b *testing.B) {
data := generateTestData(tc.size)
b.ResetTimer()
for i := 0; i < b.N; i++ {
result := processData(data)
_ = result
}
})
}
}
Scalability Analysis
func BenchmarkScalability(b *testing.B) {
sizes := []int{10, 100, 1000, 10000, 100000}
for _, size := range sizes {
b.Run(fmt.Sprintf("Size_%d", size), func(b *testing.B) {
data := make([]int, size)
for i := range data {
data[i] = rand.Intn(1000)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
sort.Ints(data)
}
})
}
}
Example results showing O(n log n) complexity:
BenchmarkScalability/Size_10-8 5000000 245 ns/op
BenchmarkScalability/Size_100-8 500000 2834 ns/op
BenchmarkScalability/Size_1000-8 50000 34521 ns/op
BenchmarkScalability/Size_10000-8 5000 456789 ns/op
Memory Analysis
Understanding Allocation Patterns
func BenchmarkMemoryPatterns(b *testing.B) {
b.Run("PreAllocated", func(b *testing.B) {
b.ReportAllocs()
slice := make([]int, 0, 1000) // Pre-allocate capacity
for i := 0; i < b.N; i++ {
slice = slice[:0] // Reset length, keep capacity
for j := 0; j < 1000; j++ {
slice = append(slice, j)
}
}
})
b.Run("GrowthPattern", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
var slice []int // Start with zero capacity
for j := 0; j < 1000; j++ {
slice = append(slice, j)
}
}
})
}
Results comparison:
BenchmarkMemoryPatterns/PreAllocated-8 500000 2456 ns/op 8192 B/op 1 allocs/op
BenchmarkMemoryPatterns/GrowthPattern-8 100000 12834 ns/op 24576 B/op 11 allocs/op
String vs StringBuilder
func BenchmarkStringBuilding(b *testing.B) {
words := []string{"hello", "world", "benchmark", "analysis"}
b.Run("StringConcatenation", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
var result string
for _, word := range words {
result += word + " "
}
_ = result
}
})
b.Run("StringBuilder", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
var builder strings.Builder
builder.Grow(50) // Pre-allocate expected size
for _, word := range words {
builder.WriteString(word)
builder.WriteString(" ")
}
_ = builder.String()
}
})
b.Run("ByteBuffer", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
var buf bytes.Buffer
buf.Grow(50) // Pre-allocate expected size
for _, word := range words {
buf.WriteString(word)
buf.WriteString(" ")
}
_ = buf.String()
}
})
}
CPU Profiling Integration
Profile-Guided Analysis
func BenchmarkWithProfile(b *testing.B) {
// Enable CPU profiling
if *cpuprofile != "" {
f, err := os.Create(*cpuprofile)
if err != nil {
b.Fatal(err)
}
defer f.Close()
if err := pprof.StartCPUProfile(f); err != nil {
b.Fatal(err)
}
defer pprof.StopCPUProfile()
}
// Your benchmark code
for i := 0; i < b.N; i++ {
result := expensiveFunction()
_ = result
}
}
Command line usage:
go test -bench=BenchmarkWithProfile -cpuprofile=cpu.prof
go tool pprof cpu.prof
Comparative Analysis Techniques
Before/After Performance Analysis
// benchmark_test.go
func BenchmarkOptimizationComparison(b *testing.B) {
data := generateLargeTestData()
b.Run("Original", func(b *testing.B) {
for i := 0; i < b.N; i++ {
result := originalAlgorithm(data)
_ = result
}
})
b.Run("Optimized", func(b *testing.B) {
for i := 0; i < b.N; i++ {
result := optimizedAlgorithm(data)
_ = result
}
})
}
Cross-Platform Performance
func BenchmarkCrossPlatform(b *testing.B) {
b.Run(fmt.Sprintf("GOOS_%s_GOARCH_%s", runtime.GOOS, runtime.GOARCH), func(b *testing.B) {
for i := 0; i < b.N; i++ {
result := platformSensitiveOperation()
_ = result
}
})
}
Advanced Analysis Patterns
Performance Regression Testing
// Create a benchmark suite for regression testing
func BenchmarkRegressionSuite(b *testing.B) {
benchmarks := []struct {
name string
function func() interface{}
maxTime time.Duration
maxAlloc int64
}{
{"CriticalPath", criticalPathFunction, 100 * time.Microsecond, 1024},
{"DataProcessing", dataProcessingFunction, 1 * time.Millisecond, 4096},
{"NetworkIO", networkIOFunction, 10 * time.Millisecond, 8192},
}
for _, bm := range benchmarks {
b.Run(bm.name, func(b *testing.B) {
b.ReportAllocs()
start := time.Now()
var totalAlloc int64
for i := 0; i < b.N; i++ {
before := getAllocatedBytes()
result := bm.function()
after := getAllocatedBytes()
totalAlloc += after - before
_ = result
}
avgTime := time.Since(start) / time.Duration(b.N)
avgAlloc := totalAlloc / int64(b.N)
if avgTime > bm.maxTime {
b.Errorf("Performance regression: %v > %v", avgTime, bm.maxTime)
}
if avgAlloc > bm.maxAlloc {
b.Errorf("Memory regression: %d > %d bytes", avgAlloc, bm.maxAlloc)
}
})
}
}
Latency Distribution Analysis
func BenchmarkLatencyDistribution(b *testing.B) {
var latencies []time.Duration
b.Run("LatencyMeasurement", func(b *testing.B) {
latencies = make([]time.Duration, b.N)
for i := 0; i < b.N; i++ {
start := time.Now()
result := operationWithVariableLatency()
latencies[i] = time.Since(start)
_ = result
}
})
// Analyze latency distribution
sort.Slice(latencies, func(i, j int) bool {
return latencies[i] < latencies[j]
})
p50 := latencies[len(latencies)*50/100]
p95 := latencies[len(latencies)*95/100]
p99 := latencies[len(latencies)*99/100]
b.Logf("Latency P50: %v, P95: %v, P99: %v", p50, p95, p99)
}
Benchmark Result Validation
Consistency Checks
func BenchmarkConsistency(b *testing.B) {
const expectedResult = 42
b.Run("ConsistencyCheck", func(b *testing.B) {
for i := 0; i < b.N; i++ {
result := deterministicFunction()
if result != expectedResult {
b.Fatalf("Inconsistent result: got %d, want %d", result, expectedResult)
}
}
})
}
Variance Analysis
func BenchmarkVarianceAnalysis(b *testing.B) {
measurements := make([]float64, 100)
for run := 0; run < len(measurements); run++ {
start := time.Now()
for i := 0; i < 10000; i++ {
result := functionUnderTest()
_ = result
}
measurements[run] = float64(time.Since(start).Nanoseconds()) / 10000.0
}
mean := calculateMean(measurements)
stddev := calculateStdDev(measurements, mean)
cv := stddev / mean // Coefficient of variation
b.Logf("Mean: %.2f ns, StdDev: %.2f ns, CV: %.2f%%", mean, stddev, cv*100)
if cv > 0.1 { // More than 10% variation
b.Logf("Warning: High variance detected (CV: %.2f%%)", cv*100)
}
}
Automated Analysis Tools
Custom Benchmark Analysis
type BenchmarkResult struct {
Name string
Iterations int
NsPerOp float64
BytesPerOp int64
AllocsPerOp int64
MBPerSec float64
}
func AnalyzeBenchmarkResults(results []BenchmarkResult) {
for _, result := range results {
efficiency := result.MBPerSec / float64(result.AllocsPerOp)
fmt.Printf("Benchmark: %s\n", result.Name)
fmt.Printf(" Performance: %.2f ns/op\n", result.NsPerOp)
fmt.Printf(" Memory efficiency: %.2f MB/s per allocation\n", efficiency)
if result.NsPerOp > 1000 {
fmt.Printf(" ⚠️ Slow operation detected\n")
}
if result.AllocsPerOp > 0 {
fmt.Printf(" 💾 Memory allocations: %d allocs/op\n", result.AllocsPerOp)
}
fmt.Println()
}
}
Common Analysis Pitfalls
1. Insufficient Sample Size
// BAD: Too few iterations for reliable results
func BenchmarkUnreliable(b *testing.B) {
if b.N < 1000 {
b.N = 1000 // Force minimum iterations
}
for i := 0; i < b.N; i++ {
result := randomVariableFunction()
_ = result
}
}
2. Ignoring Warmup Effects
// GOOD: Account for JIT warmup and cache effects
func BenchmarkWithWarmup(b *testing.B) {
// Warmup phase
for i := 0; i < 1000; i++ {
_ = functionUnderTest()
}
b.ResetTimer() // Start measuring after warmup
for i := 0; i < b.N; i++ {
result := functionUnderTest()
_ = result
}
}
3. Environmental Factors
func BenchmarkEnvironmentalAware(b *testing.B) {
// Check system load
if getSystemLoad() > 0.8 {
b.Skip("System under high load, skipping benchmark")
}
// Disable GC during critical measurements
gcPercent := debug.SetGCPercent(-1)
defer debug.SetGCPercent(gcPercent)
runtime.GC() // Clean slate
for i := 0; i < b.N; i++ {
result := memoryIntensiveFunction()
_ = result
}
}
Proper benchmark analysis transforms raw performance data into actionable insights, enabling you to make informed optimization decisions and maintain performance standards over time.