Micro-benchmarks
Master the art of precise, focused benchmarking for specific code paths and optimizations in Go applications.
Understanding Micro-benchmarks
Micro-benchmarks measure the performance of small, isolated pieces of code to understand their precise behavior and identify optimization opportunities.
Basic Micro-benchmark Structure
// Simple function to benchmark
func fibonacci(n int) int {
if n <= 1 {
return n
}
return fibonacci(n-1) + fibonacci(n-2)
}
// Basic micro-benchmark
func BenchmarkFibonacci(b *testing.B) {
for i := 0; i < b.N; i++ {
fibonacci(20)
}
}
// Benchmark with different input sizes
func BenchmarkFibonacciSizes(b *testing.B) {
sizes := []int{10, 15, 20, 25}
for _, size := range sizes {
b.Run(fmt.Sprintf("n=%d", size), func(b *testing.B) {
for i := 0; i < b.N; i++ {
fibonacci(size)
}
})
}
}
Memory Allocation Benchmarking
// String concatenation micro-benchmarks
func BenchmarkStringConcat(b *testing.B) {
parts := []string{"hello", "world", "from", "golang", "benchmark"}
b.Run("Plus", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
result := ""
for _, part := range parts {
result += part
}
_ = result
}
})
b.Run("Builder", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
var builder strings.Builder
for _, part := range parts {
builder.WriteString(part)
}
_ = builder.String()
}
})
b.Run("BuilderPrealloc", func(b *testing.B) {
b.ReportAllocs()
totalLen := 0
for _, part := range parts {
totalLen += len(part)
}
for i := 0; i < b.N; i++ {
var builder strings.Builder
builder.Grow(totalLen)
for _, part := range parts {
builder.WriteString(part)
}
_ = builder.String()
}
})
b.Run("Join", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
result := strings.Join(parts, "")
_ = result
}
})
}
CPU-Intensive Micro-benchmarks
// Algorithm comparison micro-benchmarks
func BenchmarkSortingAlgorithms(b *testing.B) {
sizes := []int{100, 1000, 10000}
for _, size := range sizes {
data := generateRandomData(size)
b.Run(fmt.Sprintf("QuickSort_%d", size), func(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
testData := make([]int, len(data))
copy(testData, data)
b.StartTimer()
quickSort(testData, 0, len(testData)-1)
}
})
b.Run(fmt.Sprintf("HeapSort_%d", size), func(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
testData := make([]int, len(data))
copy(testData, data)
b.StartTimer()
heapSort(testData)
}
})
b.Run(fmt.Sprintf("StdSort_%d", size), func(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
testData := make([]int, len(data))
copy(testData, data)
b.StartTimer()
sort.Ints(testData)
}
})
}
}
func generateRandomData(size int) []int {
rand.Seed(42) // Fixed seed for reproducible benchmarks
data := make([]int, size)
for i := range data {
data[i] = rand.Intn(size * 10)
}
return data
}
func quickSort(arr []int, low, high int) {
if low < high {
pivot := partition(arr, low, high)
quickSort(arr, low, pivot-1)
quickSort(arr, pivot+1, high)
}
}
func partition(arr []int, low, high int) int {
pivot := arr[high]
i := low - 1
for j := low; j < high; j++ {
if arr[j] <= pivot {
i++
arr[i], arr[j] = arr[j], arr[i]
}
}
arr[i+1], arr[high] = arr[high], arr[i+1]
return i + 1
}
func heapSort(arr []int) {
n := len(arr)
// Build heap
for i := n/2 - 1; i >= 0; i-- {
heapify(arr, n, i)
}
// Extract elements
for i := n - 1; i > 0; i-- {
arr[0], arr[i] = arr[i], arr[0]
heapify(arr, i, 0)
}
}
func heapify(arr []int, n, i int) {
largest := i
left := 2*i + 1
right := 2*i + 2
if left < n && arr[left] > arr[largest] {
largest = left
}
if right < n && arr[right] > arr[largest] {
largest = right
}
if largest != i {
arr[i], arr[largest] = arr[largest], arr[i]
heapify(arr, n, largest)
}
}
Data Structure Performance Micro-benchmarks
// Data structure access pattern benchmarks
func BenchmarkDataStructures(b *testing.B) {
const size = 10000
// Generate test data
keys := make([]string, size)
for i := 0; i < size; i++ {
keys[i] = fmt.Sprintf("key_%06d", i)
}
// Map benchmarks
b.Run("Map", func(b *testing.B) {
m := make(map[string]int, size)
for i, key := range keys {
m[key] = i
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
key := keys[i%size]
_ = m[key]
}
})
// Slice with linear search
b.Run("SliceLinear", func(b *testing.B) {
type keyValue struct {
key string
value int
}
slice := make([]keyValue, size)
for i, key := range keys {
slice[i] = keyValue{key: key, value: i}
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
target := keys[i%size]
for _, kv := range slice {
if kv.key == target {
_ = kv.value
break
}
}
}
})
// Sorted slice with binary search
b.Run("SliceBinary", func(b *testing.B) {
sortedKeys := make([]string, len(keys))
copy(sortedKeys, keys)
sort.Strings(sortedKeys)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
target := keys[i%size]
idx := sort.SearchStrings(sortedKeys, target)
if idx < len(sortedKeys) && sortedKeys[idx] == target {
_ = idx
}
}
})
}
Memory Access Pattern Benchmarks
// Cache-friendly vs cache-unfriendly access patterns
func BenchmarkMemoryAccess(b *testing.B) {
const size = 1024 * 1024 // 1M elements
// Sequential access benchmark
b.Run("Sequential", func(b *testing.B) {
data := make([]int64, size)
for i := range data {
data[i] = int64(i)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
sum := int64(0)
for j := 0; j < size; j++ {
sum += data[j]
}
_ = sum
}
})
// Random access benchmark
b.Run("Random", func(b *testing.B) {
data := make([]int64, size)
indices := make([]int, size)
for i := range data {
data[i] = int64(i)
indices[i] = i
}
rand.Seed(42)
rand.Shuffle(len(indices), func(i, j int) {
indices[i], indices[j] = indices[j], indices[i]
})
b.ResetTimer()
for i := 0; i < b.N; i++ {
sum := int64(0)
for j := 0; j < size; j++ {
sum += data[indices[j]]
}
_ = sum
}
})
// Strided access benchmark
b.Run("Strided", func(b *testing.B) {
data := make([]int64, size)
for i := range data {
data[i] = int64(i)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
sum := int64(0)
for j := 0; j < size; j += 64 { // Skip cache lines
sum += data[j]
}
_ = sum
}
})
}
Function Call Overhead Benchmarks
// Function call overhead analysis
func BenchmarkFunctionCalls(b *testing.B) {
value := 42
// Direct computation
b.Run("Direct", func(b *testing.B) {
for i := 0; i < b.N; i++ {
result := value * 2 + 1
_ = result
}
})
// Function call
b.Run("FunctionCall", func(b *testing.B) {
for i := 0; i < b.N; i++ {
result := simpleOperation(value)
_ = result
}
})
// Method call
b.Run("MethodCall", func(b *testing.B) {
calculator := Calculator{multiplier: 2}
for i := 0; i < b.N; i++ {
result := calculator.Calculate(value)
_ = result
}
})
// Interface call
b.Run("InterfaceCall", func(b *testing.B) {
var calculator Calculable = Calculator{multiplier: 2}
for i := 0; i < b.N; i++ {
result := calculator.Calculate(value)
_ = result
}
})
// Function pointer call
b.Run("FunctionPointer", func(b *testing.B) {
fn := simpleOperation
for i := 0; i < b.N; i++ {
result := fn(value)
_ = result
}
})
}
func simpleOperation(x int) int {
return x*2 + 1
}
type Calculator struct {
multiplier int
}
func (c Calculator) Calculate(x int) int {
return x*c.multiplier + 1
}
type Calculable interface {
Calculate(int) int
}
Lock Performance Micro-benchmarks
// Lock contention and performance benchmarks
func BenchmarkLocks(b *testing.B) {
var counter int64
// No lock baseline
b.Run("NoLock", func(b *testing.B) {
localCounter := int64(0)
for i := 0; i < b.N; i++ {
localCounter++
}
_ = localCounter
})
// Mutex lock
b.Run("Mutex", func(b *testing.B) {
var mu sync.Mutex
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
mu.Lock()
counter++
mu.Unlock()
}
})
})
// RWMutex read lock
b.Run("RWMutexRead", func(b *testing.B) {
var rwmu sync.RWMutex
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
rwmu.RLock()
_ = counter
rwmu.RUnlock()
}
})
})
// RWMutex write lock
b.Run("RWMutexWrite", func(b *testing.B) {
var rwmu sync.RWMutex
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
rwmu.Lock()
counter++
rwmu.Unlock()
}
})
})
// Atomic operations
b.Run("Atomic", func(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
atomic.AddInt64(&counter, 1)
}
})
})
// Channel communication
b.Run("Channel", func(b *testing.B) {
ch := make(chan int64, 1)
ch <- 0
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
val := <-ch
ch <- val + 1
}
})
})
}
I/O Performance Micro-benchmarks
// I/O operation micro-benchmarks
func BenchmarkIO(b *testing.B) {
data := make([]byte, 4096) // 4KB buffer
for i := range data {
data[i] = byte(i % 256)
}
// Memory copy
b.Run("MemoryCopy", func(b *testing.B) {
dest := make([]byte, len(data))
b.ResetTimer()
for i := 0; i < b.N; i++ {
copy(dest, data)
}
})
// File write
b.Run("FileWrite", func(b *testing.B) {
tmpFile, err := os.CreateTemp("", "benchmark")
if err != nil {
b.Fatal(err)
}
defer os.Remove(tmpFile.Name())
defer tmpFile.Close()
b.ResetTimer()
for i := 0; i < b.N; i++ {
tmpFile.Seek(0, 0)
tmpFile.Write(data)
}
})
// Buffered file write
b.Run("BufferedFileWrite", func(b *testing.B) {
tmpFile, err := os.CreateTemp("", "benchmark")
if err != nil {
b.Fatal(err)
}
defer os.Remove(tmpFile.Name())
defer tmpFile.Close()
writer := bufio.NewWriter(tmpFile)
defer writer.Flush()
b.ResetTimer()
for i := 0; i < b.N; i++ {
writer.Write(data)
}
})
// Network write (to local server)
b.Run("NetworkWrite", func(b *testing.B) {
// Start a simple echo server
listener, err := net.Listen("tcp", "localhost:0")
if err != nil {
b.Fatal(err)
}
defer listener.Close()
go func() {
for {
conn, err := listener.Accept()
if err != nil {
return
}
go func(c net.Conn) {
defer c.Close()
io.Copy(io.Discard, c)
}(conn)
}
}()
conn, err := net.Dial("tcp", listener.Addr().String())
if err != nil {
b.Fatal(err)
}
defer conn.Close()
b.ResetTimer()
for i := 0; i < b.N; i++ {
conn.Write(data)
}
})
}
Advanced Micro-benchmark Techniques
Custom Metrics and Reporting
// Custom benchmark with additional metrics
func BenchmarkWithCustomMetrics(b *testing.B) {
var totalAllocations int64
var totalDuration time.Duration
b.ReportAllocs()
for i := 0; i < b.N; i++ {
start := time.Now()
// Your code here
data := make([]byte, 1024)
for j := range data {
data[j] = byte(j)
}
duration := time.Since(start)
totalDuration += duration
totalAllocations++
}
// Report custom metrics
b.ReportMetric(float64(totalDuration.Nanoseconds())/float64(b.N), "ns/op")
b.ReportMetric(float64(totalAllocations)/float64(b.N), "allocs/op")
}
Benchmark Utilities
// Utilities for consistent micro-benchmarking
func setupBenchmarkData(size int) []int {
data := make([]int, size)
for i := range data {
data[i] = rand.Intn(1000)
}
return data
}
func runBenchmarkSizes(b *testing.B, sizes []int, benchFunc func(*testing.B, []int)) {
for _, size := range sizes {
data := setupBenchmarkData(size)
b.Run(fmt.Sprintf("size_%d", size), func(b *testing.B) {
benchFunc(b, data)
})
}
}
// Example usage
func BenchmarkExampleWithSizes(b *testing.B) {
sizes := []int{100, 1000, 10000, 100000}
runBenchmarkSizes(b, sizes, func(b *testing.B, data []int) {
for i := 0; i < b.N; i++ {
sum := 0
for _, v := range data {
sum += v
}
_ = sum
}
})
}
Best Practices for Micro-benchmarks
1. Isolate What You're Measuring
- Benchmark one thing at a time
- Use
b.StopTimer()andb.StartTimer()to exclude setup - Reset state between iterations when necessary
2. Use Realistic Data
- Use representative data sizes and patterns
- Include edge cases in separate benchmarks
- Consider cache effects and memory layout
3. Control for Variables
- Use fixed seeds for random data
- Run benchmarks multiple times
- Consider system load and other processes
4. Measure What Matters
- Include memory allocation metrics with
b.ReportAllocs() - Consider both CPU and memory performance
- Benchmark different input sizes to understand scaling
Micro-benchmarks are essential for understanding detailed performance characteristics and validating optimization efforts at the function and algorithm level.