Essential Tools and Frameworks
This comprehensive guide covers the essential tools and frameworks for Go performance engineering, from basic profiling to advanced distributed system analysis.
Core Go Performance Tools
pprof - The Foundation Tool
The most essential tool for Go performance analysis:
// Comprehensive pprof integration example
package main
import (
"context"
"fmt"
"log"
"net/http"
_ "net/http/pprof" // Import for side effects
"os"
"runtime"
"runtime/pprof"
"runtime/trace"
"time"
)
// Advanced pprof wrapper for production use
type ProfilerManager struct {
enabled bool
profiles map[string]*ProfileConfig
server *http.Server
outputDir string
maxProfiles int
}
type ProfileConfig struct {
Name string `yaml:"name"`
Type string `yaml:"type"` // "cpu", "heap", "goroutine", "allocs", "block", "mutex"
Duration time.Duration `yaml:"duration"`
Rate int `yaml:"rate"`
Enabled bool `yaml:"enabled"`
AutoCapture bool `yaml:"auto_capture"`
Triggers []string `yaml:"triggers"`
}
func NewProfilerManager(config *ProfilerConfig) *ProfilerManager {
pm := &ProfilerManager{
enabled: config.Enabled,
profiles: make(map[string]*ProfileConfig),
outputDir: config.OutputDir,
maxProfiles: config.MaxProfiles,
}
// Configure default profiles
pm.profiles["cpu"] = &ProfileConfig{
Name: "cpu",
Type: "cpu",
Duration: 30 * time.Second,
Enabled: true,
AutoCapture: false,
Triggers: []string{"high_cpu", "manual"},
}
pm.profiles["heap"] = &ProfileConfig{
Name: "heap",
Type: "heap",
Enabled: true,
AutoCapture: true,
Triggers: []string{"high_memory", "gc_pressure", "manual"},
}
pm.profiles["goroutine"] = &ProfileConfig{
Name: "goroutine",
Type: "goroutine",
Enabled: true,
AutoCapture: true,
Triggers: []string{"goroutine_leak", "manual"},
}
pm.profiles["allocs"] = &ProfileConfig{
Name: "allocs",
Type: "allocs",
Duration: 60 * time.Second,
Enabled: true,
Triggers: []string{"memory_churn", "manual"},
}
pm.profiles["block"] = &ProfileConfig{
Name: "block",
Type: "block",
Duration: 30 * time.Second,
Rate: 1, // Block profiling rate
Enabled: true,
Triggers: []string{"high_blocking", "manual"},
}
pm.profiles["mutex"] = &ProfileConfig{
Name: "mutex",
Type: "mutex",
Duration: 30 * time.Second,
Rate: 1, // Mutex profiling rate
Enabled: true,
Triggers: []string{"lock_contention", "manual"},
}
return pm
}
func (pm *ProfilerManager) Start() error {
if !pm.enabled {
return nil
}
// Enable profiling rates
if config, exists := pm.profiles["block"]; exists && config.Enabled {
runtime.SetBlockProfileRate(config.Rate)
}
if config, exists := pm.profiles["mutex"]; exists && config.Enabled {
runtime.SetMutexProfileFraction(config.Rate)
}
// Start pprof HTTP server
mux := http.NewServeMux()
// Add custom profiling endpoints
mux.HandleFunc("/debug/pprof/", pprof.Index)
mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
mux.HandleFunc("/debug/pprof/profile", pprof.Profile)
mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
mux.HandleFunc("/debug/pprof/trace", pprof.Trace)
// Custom profiling endpoints
mux.HandleFunc("/debug/pprof/capture", pm.handleCaptureProfile)
mux.HandleFunc("/debug/pprof/status", pm.handleProfileStatus)
mux.HandleFunc("/debug/pprof/config", pm.handleProfileConfig)
pm.server = &http.Server{
Addr: ":6060",
Handler: mux,
}
go func() {
if err := pm.server.ListenAndServe(); err != http.ErrServerClosed {
log.Printf("pprof server error: %v", err)
}
}()
// Start automatic profiling if configured
pm.startAutomaticProfiling()
log.Println("ProfilerManager started on :6060")
return nil
}
func (pm *ProfilerManager) CaptureProfile(profileType string, duration time.Duration) (string, error) {
config, exists := pm.profiles[profileType]
if !exists || !config.Enabled {
return "", fmt.Errorf("profile type %s not available", profileType)
}
timestamp := time.Now().Format("20060102-150405")
filename := fmt.Sprintf("%s/%s-profile-%s.pprof", pm.outputDir, profileType, timestamp)
file, err := os.Create(filename)
if err != nil {
return "", err
}
defer file.Close()
switch profileType {
case "cpu":
if err := pprof.StartCPUProfile(file); err != nil {
return "", err
}
time.Sleep(duration)
pprof.StopCPUProfile()
case "heap":
runtime.GC() // Force GC for accurate heap profile
if err := pprof.WriteHeapProfile(file); err != nil {
return "", err
}
case "goroutine":
profile := pprof.Lookup("goroutine")
if err := profile.WriteTo(file, 0); err != nil {
return "", err
}
case "allocs":
profile := pprof.Lookup("allocs")
if err := profile.WriteTo(file, 0); err != nil {
return "", err
}
case "block":
profile := pprof.Lookup("block")
if err := profile.WriteTo(file, 0); err != nil {
return "", err
}
case "mutex":
profile := pprof.Lookup("mutex")
if err := profile.WriteTo(file, 0); err != nil {
return "", err
}
default:
return "", fmt.Errorf("unknown profile type: %s", profileType)
}
log.Printf("Captured %s profile: %s", profileType, filename)
return filename, nil
}
// Enhanced trace integration
func (pm *ProfilerManager) CaptureTrace(duration time.Duration) (string, error) {
timestamp := time.Now().Format("20060102-150405")
filename := fmt.Sprintf("%s/trace-%s.out", pm.outputDir, timestamp)
file, err := os.Create(filename)
if err != nil {
return "", err
}
defer file.Close()
if err := trace.Start(file); err != nil {
return "", err
}
time.Sleep(duration)
trace.Stop()
log.Printf("Captured trace: %s", filename)
return filename, nil
}
// Advanced profiling analysis
func (pm *ProfilerManager) AnalyzeProfile(filename string) (*ProfileAnalysis, error) {
analysis := &ProfileAnalysis{
Filename: filename,
Timestamp: time.Now(),
}
// Parse the profile file
file, err := os.Open(filename)
if err != nil {
return nil, err
}
defer file.Close()
profile, err := pprof.Parse(file)
if err != nil {
return nil, err
}
// Extract key metrics
analysis.SampleCount = int64(len(profile.Sample))
analysis.Duration = profile.DurationNanos
analysis.PeriodType = profile.PeriodType.Type
analysis.Period = profile.Period
// Analyze top functions
analysis.TopFunctions = pm.extractTopFunctions(profile, 10)
// Analyze allocation patterns if heap profile
if strings.Contains(filename, "heap") || strings.Contains(filename, "allocs") {
analysis.AllocationPatterns = pm.analyzeAllocations(profile)
}
// Analyze goroutine patterns if goroutine profile
if strings.Contains(filename, "goroutine") {
analysis.GoroutinePatterns = pm.analyzeGoroutines(profile)
}
return analysis, nil
}
type ProfileAnalysis struct {
Filename string `json:"filename"`
Timestamp time.Time `json:"timestamp"`
SampleCount int64 `json:"sample_count"`
Duration int64 `json:"duration_nanos"`
PeriodType string `json:"period_type"`
Period int64 `json:"period"`
TopFunctions []FunctionSample `json:"top_functions"`
AllocationPatterns *AllocationAnalysis `json:"allocation_patterns,omitempty"`
GoroutinePatterns *GoroutineAnalysis `json:"goroutine_patterns,omitempty"`
}
type FunctionSample struct {
FunctionName string `json:"function_name"`
SelfValue int64 `json:"self_value"`
CumValue int64 `json:"cum_value"`
SelfPercent float64 `json:"self_percent"`
CumPercent float64 `json:"cum_percent"`
}
type AllocationAnalysis struct {
TotalAllocations int64 `json:"total_allocations"`
TotalBytes int64 `json:"total_bytes"`
LargeAllocations []AllocationSample `json:"large_allocations"`
AllocationHotSpots []FunctionSample `json:"allocation_hot_spots"`
}
type GoroutineAnalysis struct {
TotalGoroutines int `json:"total_goroutines"`
BlockedGoroutines int `json:"blocked_goroutines"`
GoroutineStates map[string]int `json:"goroutine_states"`
CommonStackTraces []StackTraceFrequency `json:"common_stack_traces"`
}
Benchmarking Framework
// Advanced benchmarking framework
package benchmarks
import (
"context"
"fmt"
"math"
"runtime"
"sort"
"testing"
"time"
)
// Enhanced benchmark runner with statistical analysis
type BenchmarkSuite struct {
name string
benchmarks []BenchmarkFunc
config *BenchmarkConfig
results []BenchmarkResult
baseline *BenchmarkBaseline
}
type BenchmarkConfig struct {
Iterations int `yaml:"iterations"`
WarmupRounds int `yaml:"warmup_rounds"`
MinDuration time.Duration `yaml:"min_duration"`
MaxDuration time.Duration `yaml:"max_duration"`
CPUCores []int `yaml:"cpu_cores"`
GCEnabled bool `yaml:"gc_enabled"`
MemProfileRate int `yaml:"mem_profile_rate"`
Statistical bool `yaml:"statistical_analysis"`
OutputFormat string `yaml:"output_format"` // "json", "csv", "html"
}
type BenchmarkFunc struct {
Name string
Function func(*testing.B)
Setup func() error
Teardown func() error
Parallel bool
Category string
Tags []string
}
type BenchmarkResult struct {
Name string `json:"name"`
Iterations int `json:"iterations"`
NsPerOp int64 `json:"ns_per_op"`
AllocsPerOp int64 `json:"allocs_per_op"`
BytesPerOp int64 `json:"bytes_per_op"`
MemoryUsage MemoryStats `json:"memory_usage"`
Statistics *BenchmarkStatistics `json:"statistics,omitempty"`
Timestamp time.Time `json:"timestamp"`
GoVersion string `json:"go_version"`
CPUCount int `json:"cpu_count"`
Environment map[string]interface{} `json:"environment"`
}
type BenchmarkStatistics struct {
Mean float64 `json:"mean_ns"`
Median float64 `json:"median_ns"`
StdDev float64 `json:"std_dev_ns"`
Min float64 `json:"min_ns"`
Max float64 `json:"max_ns"`
P95 float64 `json:"p95_ns"`
P99 float64 `json:"p99_ns"`
CoefficientOfVariation float64 `json:"cv"`
Samples []float64 `json:"samples"`
}
func NewBenchmarkSuite(name string, config *BenchmarkConfig) *BenchmarkSuite {
return &BenchmarkSuite{
name: name,
config: config,
results: make([]BenchmarkResult, 0),
}
}
func (bs *BenchmarkSuite) AddBenchmark(name string, fn func(*testing.B), options ...BenchmarkOption) {
benchmark := BenchmarkFunc{
Name: name,
Function: fn,
Category: "default",
Tags: []string{},
}
for _, option := range options {
option(&benchmark)
}
bs.benchmarks = append(bs.benchmarks, benchmark)
}
// Benchmark options
type BenchmarkOption func(*BenchmarkFunc)
func WithSetup(setup func() error) BenchmarkOption {
return func(bf *BenchmarkFunc) {
bf.Setup = setup
}
}
func WithTeardown(teardown func() error) BenchmarkOption {
return func(bf *BenchmarkFunc) {
bf.Teardown = teardown
}
}
func WithCategory(category string) BenchmarkOption {
return func(bf *BenchmarkFunc) {
bf.Category = category
}
}
func WithTags(tags ...string) BenchmarkOption {
return func(bf *BenchmarkFunc) {
bf.Tags = append(bf.Tags, tags...)
}
}
func Parallel() BenchmarkOption {
return func(bf *BenchmarkFunc) {
bf.Parallel = true
}
}
// Run comprehensive benchmarks
func (bs *BenchmarkSuite) Run() error {
fmt.Printf("Running benchmark suite: %s\n", bs.name)
fmt.Printf("Configuration: %+v\n", bs.config)
// Collect environment information
env := bs.collectEnvironmentInfo()
for _, benchmark := range bs.benchmarks {
fmt.Printf("Running benchmark: %s\n", benchmark.Name)
// Setup
if benchmark.Setup != nil {
if err := benchmark.Setup(); err != nil {
return fmt.Errorf("setup failed for %s: %v", benchmark.Name, err)
}
}
// Run benchmark
result, err := bs.runSingleBenchmark(benchmark, env)
if err != nil {
return fmt.Errorf("benchmark %s failed: %v", benchmark.Name, err)
}
bs.results = append(bs.results, *result)
// Teardown
if benchmark.Teardown != nil {
if err := benchmark.Teardown(); err != nil {
fmt.Printf("Warning: teardown failed for %s: %v\n", benchmark.Name, err)
}
}
// Compare with baseline if available
if bs.baseline != nil {
comparison := bs.compareWithBaseline(*result)
bs.printComparison(comparison)
}
}
return nil
}
func (bs *BenchmarkSuite) runSingleBenchmark(benchmark BenchmarkFunc, env map[string]interface{}) (*BenchmarkResult, error) {
// Create a testing.B-like structure for collection
var samples []float64
var totalIterations int
var totalNs int64
var totalAllocs int64
var totalBytes int64
memBefore := bs.getMemoryStats()
// Run multiple rounds for statistical analysis
rounds := 1
if bs.config.Statistical {
rounds = max(5, bs.config.Iterations/10) // At least 5 rounds
}
for round := 0; round < rounds; round++ {
// Warmup
if bs.config.WarmupRounds > 0 {
for i := 0; i < bs.config.WarmupRounds; i++ {
testing.Benchmark(benchmark.Function)
}
}
// Actual benchmark
result := testing.Benchmark(benchmark.Function)
samples = append(samples, float64(result.NsPerOp()))
totalIterations += result.N
totalNs += int64(result.N) * result.NsPerOp()
totalAllocs += int64(result.N) * result.AllocsPerOp()
totalBytes += int64(result.N) * result.BytesPerOp()
}
memAfter := bs.getMemoryStats()
// Calculate averages
avgIterations := totalIterations / rounds
avgNsPerOp := totalNs / int64(totalIterations)
avgAllocsPerOp := totalAllocs / int64(totalIterations)
avgBytesPerOp := totalBytes / int64(totalIterations)
result := &BenchmarkResult{
Name: benchmark.Name,
Iterations: avgIterations,
NsPerOp: avgNsPerOp,
AllocsPerOp: avgAllocsPerOp,
BytesPerOp: avgBytesPerOp,
MemoryUsage: MemoryStats{
Before: memBefore,
After: memAfter,
Delta: memAfter.HeapInuse - memBefore.HeapInuse,
},
Timestamp: time.Now(),
GoVersion: runtime.Version(),
CPUCount: runtime.NumCPU(),
Environment: env,
}
// Calculate statistics if enabled
if bs.config.Statistical && len(samples) > 1 {
result.Statistics = bs.calculateStatistics(samples)
}
return result, nil
}
func (bs *BenchmarkSuite) calculateStatistics(samples []float64) *BenchmarkStatistics {
if len(samples) == 0 {
return nil
}
// Sort samples for percentile calculations
sorted := make([]float64, len(samples))
copy(sorted, samples)
sort.Float64s(sorted)
// Calculate mean
var sum float64
for _, sample := range samples {
sum += sample
}
mean := sum / float64(len(samples))
// Calculate variance and standard deviation
var variance float64
for _, sample := range samples {
diff := sample - mean
variance += diff * diff
}
variance /= float64(len(samples))
stdDev := math.Sqrt(variance)
// Calculate percentiles
p95Index := int(0.95 * float64(len(sorted)-1))
p99Index := int(0.99 * float64(len(sorted)-1))
medianIndex := len(sorted) / 2
stats := &BenchmarkStatistics{
Mean: mean,
Median: sorted[medianIndex],
StdDev: stdDev,
Min: sorted[0],
Max: sorted[len(sorted)-1],
P95: sorted[p95Index],
P99: sorted[p99Index],
CoefficientOfVariation: stdDev / mean,
Samples: samples,
}
return stats
}
// Baseline comparison system
type BenchmarkBaseline struct {
Version string `json:"version"`
Timestamp time.Time `json:"timestamp"`
Results map[string]BenchmarkResult `json:"results"`
Metadata map[string]interface{} `json:"metadata"`
}
type BenchmarkComparison struct {
BenchmarkName string `json:"benchmark_name"`
Current BenchmarkResult `json:"current"`
Baseline BenchmarkResult `json:"baseline"`
Improvements ComparisonMetrics `json:"improvements"`
Regressions ComparisonMetrics `json:"regressions"`
Status string `json:"status"` // "improved", "regressed", "unchanged"
Significance string `json:"significance"` // "minor", "moderate", "major"
}
type ComparisonMetrics struct {
PerformanceChange float64 `json:"performance_change_percent"`
MemoryChange float64 `json:"memory_change_percent"`
AllocationChange float64 `json:"allocation_change_percent"`
}
func (bs *BenchmarkSuite) compareWithBaseline(current BenchmarkResult) *BenchmarkComparison {
baseline, exists := bs.baseline.Results[current.Name]
if !exists {
return nil
}
comparison := &BenchmarkComparison{
BenchmarkName: current.Name,
Current: current,
Baseline: baseline,
}
// Calculate performance change (lower is better for ns/op)
perfChange := float64(current.NsPerOp-baseline.NsPerOp) / float64(baseline.NsPerOp) * 100
memChange := float64(current.BytesPerOp-baseline.BytesPerOp) / float64(baseline.BytesPerOp) * 100
allocChange := float64(current.AllocsPerOp-baseline.AllocsPerOp) / float64(baseline.AllocsPerOp) * 100
if perfChange < 0 { // Improvement
comparison.Improvements.PerformanceChange = -perfChange
comparison.Status = "improved"
} else if perfChange > 0 { // Regression
comparison.Regressions.PerformanceChange = perfChange
comparison.Status = "regressed"
} else {
comparison.Status = "unchanged"
}
comparison.Improvements.MemoryChange = max(0, -memChange)
comparison.Regressions.MemoryChange = max(0, memChange)
comparison.Improvements.AllocationChange = max(0, -allocChange)
comparison.Regressions.AllocationChange = max(0, allocChange)
// Determine significance
maxChange := math.Max(math.Abs(perfChange), math.Max(math.Abs(memChange), math.Abs(allocChange)))
if maxChange < 5 {
comparison.Significance = "minor"
} else if maxChange < 25 {
comparison.Significance = "moderate"
} else {
comparison.Significance = "major"
}
return comparison
}
External Monitoring Tools Integration
Prometheus Integration
// Comprehensive Prometheus metrics integration
package monitoring
import (
"context"
"net/http"
"time"
"github.com/prometheus/client_golang/api"
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/prometheus/common/model"
)
// Custom metrics registry for Go applications
type GoMetricsRegistry struct {
registry *prometheus.Registry
// Runtime metrics
goRoutines prometheus.Gauge
gcDuration prometheus.Histogram
heapSize prometheus.Gauge
heapInuse prometheus.Gauge
stackSize prometheus.Gauge
// Application metrics
httpRequests *prometheus.CounterVec
httpDuration *prometheus.HistogramVec
dbConnections prometheus.Gauge
dbQueryDuration *prometheus.HistogramVec
cacheHitRate prometheus.Gauge
// Performance metrics
cpuUsage prometheus.Gauge
memoryUsage prometheus.Gauge
allocRate prometheus.Gauge
gcPressure prometheus.Gauge
// Custom business metrics
customMetrics map[string]prometheus.Collector
}
func NewGoMetricsRegistry() *GoMetricsRegistry {
registry := prometheus.NewRegistry()
gmr := &GoMetricsRegistry{
registry: registry,
customMetrics: make(map[string]prometheus.Collector),
}
gmr.initializeRuntimeMetrics()
gmr.initializeApplicationMetrics()
gmr.initializePerformanceMetrics()
return gmr
}
func (gmr *GoMetricsRegistry) initializeRuntimeMetrics() {
// Goroutine count
gmr.goRoutines = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "go",
Subsystem: "runtime",
Name: "goroutines_total",
Help: "Number of goroutines currently running",
})
// GC duration
gmr.gcDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: "go",
Subsystem: "runtime",
Name: "gc_duration_seconds",
Help: "Time spent in garbage collection",
Buckets: []float64{0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0},
})
// Memory metrics
gmr.heapSize = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "go",
Subsystem: "memory",
Name: "heap_bytes",
Help: "Total heap size in bytes",
})
gmr.heapInuse = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "go",
Subsystem: "memory",
Name: "heap_inuse_bytes",
Help: "Heap memory currently in use",
})
gmr.stackSize = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "go",
Subsystem: "memory",
Name: "stack_bytes",
Help: "Stack memory in use",
})
// Register metrics
gmr.registry.MustRegister(
gmr.goRoutines,
gmr.gcDuration,
gmr.heapSize,
gmr.heapInuse,
gmr.stackSize,
)
}
func (gmr *GoMetricsRegistry) initializeApplicationMetrics() {
// HTTP metrics
gmr.httpRequests = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "app",
Subsystem: "http",
Name: "requests_total",
Help: "Total number of HTTP requests",
},
[]string{"method", "endpoint", "status"},
)
gmr.httpDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "app",
Subsystem: "http",
Name: "request_duration_seconds",
Help: "HTTP request duration",
Buckets: []float64{0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10},
},
[]string{"method", "endpoint"},
)
// Database metrics
gmr.dbConnections = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "app",
Subsystem: "database",
Name: "connections_active",
Help: "Number of active database connections",
})
gmr.dbQueryDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "app",
Subsystem: "database",
Name: "query_duration_seconds",
Help: "Database query duration",
Buckets: []float64{0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5},
},
[]string{"query_type", "table"},
)
// Cache metrics
gmr.cacheHitRate = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "app",
Subsystem: "cache",
Name: "hit_rate",
Help: "Cache hit rate percentage",
})
// Register metrics
gmr.registry.MustRegister(
gmr.httpRequests,
gmr.httpDuration,
gmr.dbConnections,
gmr.dbQueryDuration,
gmr.cacheHitRate,
)
}
func (gmr *GoMetricsRegistry) StartMetricsCollection(interval time.Duration) {
ticker := time.NewTicker(interval)
go func() {
for range ticker.C {
gmr.collectRuntimeMetrics()
gmr.collectPerformanceMetrics()
}
}()
}
func (gmr *GoMetricsRegistry) collectRuntimeMetrics() {
var memStats runtime.MemStats
runtime.ReadMemStats(&memStats)
gmr.goRoutines.Set(float64(runtime.NumGoroutine()))
gmr.heapSize.Set(float64(memStats.HeapSys))
gmr.heapInuse.Set(float64(memStats.HeapInuse))
gmr.stackSize.Set(float64(memStats.StackInuse))
// GC metrics
gmr.gcPressure.Set(float64(memStats.NumGC))
}
// Prometheus query client for analysis
type PrometheusAnalyzer struct {
client v1.API
config *AnalyzerConfig
}
type AnalyzerConfig struct {
BaseURL string `yaml:"base_url"`
Timeout time.Duration `yaml:"timeout"`
QueryInterval time.Duration `yaml:"query_interval"`
RetentionDays int `yaml:"retention_days"`
}
func NewPrometheusAnalyzer(config *AnalyzerConfig) (*PrometheusAnalyzer, error) {
client, err := api.NewClient(api.Config{
Address: config.BaseURL,
})
if err != nil {
return nil, err
}
return &PrometheusAnalyzer{
client: v1.NewAPI(client),
config: config,
}, nil
}
func (pa *PrometheusAnalyzer) AnalyzePerformanceTrends(ctx context.Context, duration time.Duration) (*PerformanceTrendAnalysis, error) {
endTime := time.Now()
startTime := endTime.Add(-duration)
analysis := &PerformanceTrendAnalysis{
StartTime: startTime,
EndTime: endTime,
Metrics: make(map[string]*MetricTrend),
}
// Define queries for key performance metrics
queries := map[string]string{
"response_time_p95": `histogram_quantile(0.95, rate(app_http_request_duration_seconds_bucket[5m]))`,
"response_time_p99": `histogram_quantile(0.99, rate(app_http_request_duration_seconds_bucket[5m]))`,
"throughput": `rate(app_http_requests_total[5m])`,
"error_rate": `rate(app_http_requests_total{status=~"5.."}[5m]) / rate(app_http_requests_total[5m])`,
"memory_usage": `go_memory_heap_inuse_bytes`,
"goroutines": `go_runtime_goroutines_total`,
"gc_duration": `rate(go_runtime_gc_duration_seconds_sum[5m])`,
}
for metricName, query := range queries {
trend, err := pa.analyzeMetricTrend(ctx, query, startTime, endTime)
if err != nil {
continue // Log error but continue with other metrics
}
analysis.Metrics[metricName] = trend
}
// Calculate overall health score
analysis.HealthScore = pa.calculateHealthScore(analysis.Metrics)
return analysis, nil
}
func (pa *PrometheusAnalyzer) analyzeMetricTrend(ctx context.Context, query string, start, end time.Time) (*MetricTrend, error) {
// Query range data
r := v1.Range{
Start: start,
End: end,
Step: pa.config.QueryInterval,
}
result, warnings, err := pa.client.QueryRange(ctx, query, r)
if err != nil {
return nil, err
}
if len(warnings) > 0 {
fmt.Printf("Query warnings: %v\n", warnings)
}
trend := &MetricTrend{
Query: query,
DataPoints: []DataPoint{},
}
// Process result
if matrix, ok := result.(model.Matrix); ok {
for _, sampleStream := range matrix {
for _, pair := range sampleStream.Values {
trend.DataPoints = append(trend.DataPoints, DataPoint{
Timestamp: time.Unix(int64(pair.Timestamp), 0),
Value: float64(pair.Value),
})
}
}
}
// Calculate trend statistics
trend.Statistics = pa.calculateTrendStatistics(trend.DataPoints)
return trend, nil
}
type MetricTrend struct {
Query string `json:"query"`
DataPoints []DataPoint `json:"data_points"`
Statistics *TrendStatistics `json:"statistics"`
}
type TrendStatistics struct {
Mean float64 `json:"mean"`
Min float64 `json:"min"`
Max float64 `json:"max"`
StdDev float64 `json:"std_dev"`
Trend string `json:"trend"` // "increasing", "decreasing", "stable"
Slope float64 `json:"slope"`
R2 float64 `json:"r_squared"`
}
Grafana Dashboard Configuration
// Grafana dashboard provisioning
package grafana
import (
"encoding/json"
"fmt"
"time"
)
// Go Performance Dashboard definition
type GrafanaDashboard struct {
ID int `json:"id,omitempty"`
UID string `json:"uid,omitempty"`
Title string `json:"title"`
Description string `json:"description"`
Tags []string `json:"tags"`
Timezone string `json:"timezone"`
Panels []Panel `json:"panels"`
Templating Templating `json:"templating"`
Time TimeRange `json:"time"`
Refresh string `json:"refresh"`
SchemaVersion int `json:"schemaVersion"`
}
type Panel struct {
ID int `json:"id"`
Title string `json:"title"`
Type string `json:"type"`
GridPos GridPos `json:"gridPos"`
Targets []Target `json:"targets"`
Options map[string]interface{} `json:"options,omitempty"`
FieldConfig FieldConfig `json:"fieldConfig"`
Alert *AlertConfig `json:"alert,omitempty"`
}
func CreateGoPerformanceDashboard() *GrafanaDashboard {
dashboard := &GrafanaDashboard{
UID: "go-performance-monitoring",
Title: "Go Application Performance Monitoring",
Description: "Comprehensive performance monitoring for Go applications",
Tags: []string{"go", "performance", "monitoring"},
Timezone: "browser",
SchemaVersion: 27,
Time: TimeRange{
From: "now-1h",
To: "now",
},
Refresh: "5s",
}
// Add panels
dashboard.Panels = []Panel{
createOverviewPanel(),
createResponseTimePanel(),
createThroughputPanel(),
createErrorRatePanel(),
createMemoryPanel(),
createGoroutinesPanel(),
createGCPanel(),
createCPUPanel(),
createDatabasePanel(),
createCachePanel(),
}
// Add templating variables
dashboard.Templating = createTemplatingConfig()
return dashboard
}
func createOverviewPanel() Panel {
return Panel{
ID: 1,
Title: "Application Overview",
Type: "stat",
GridPos: GridPos{
X: 0, Y: 0, W: 24, H: 4,
},
Targets: []Target{
{
Expr: "up{job=\"go-app\"}",
RefID: "A",
LegendFormat: "Service Status",
},
{
Expr: "rate(app_http_requests_total[5m])",
RefID: "B",
LegendFormat: "RPS",
},
{
Expr: "histogram_quantile(0.95, rate(app_http_request_duration_seconds_bucket[5m]))",
RefID: "C",
LegendFormat: "P95 Latency",
},
{
Expr: "rate(app_http_requests_total{status=~\"5..\"}[5m]) / rate(app_http_requests_total[5m]) * 100",
RefID: "D",
LegendFormat: "Error Rate %",
},
},
FieldConfig: FieldConfig{
Defaults: FieldDefaults{
Unit: "short",
Thresholds: Thresholds{
Steps: []ThresholdStep{
{Color: "green", Value: 0},
{Color: "yellow", Value: 80},
{Color: "red", Value: 95},
},
},
},
},
}
}
func createResponseTimePanel() Panel {
return Panel{
ID: 2,
Title: "Response Time Distribution",
Type: "timeseries",
GridPos: GridPos{
X: 0, Y: 4, W: 12, H: 8,
},
Targets: []Target{
{
Expr: "histogram_quantile(0.50, rate(app_http_request_duration_seconds_bucket[5m]))",
RefID: "A",
LegendFormat: "P50",
},
{
Expr: "histogram_quantile(0.95, rate(app_http_request_duration_seconds_bucket[5m]))",
RefID: "B",
LegendFormat: "P95",
},
{
Expr: "histogram_quantile(0.99, rate(app_http_request_duration_seconds_bucket[5m]))",
RefID: "C",
LegendFormat: "P99",
},
},
FieldConfig: FieldConfig{
Defaults: FieldDefaults{
Unit: "s",
},
},
Alert: &AlertConfig{
Name: "High Response Time",
Message: "P95 response time is above threshold",
Frequency: "30s",
Conditions: []AlertCondition{
{
Query: AlertQuery{
RefID: "B",
Model: map[string]interface{}{
"expr": "histogram_quantile(0.95, rate(app_http_request_duration_seconds_bucket[5m]))",
},
},
Reducer: AlertReducer{
Type: "last",
},
Evaluator: AlertEvaluator{
Type: "gt",
Params: []float64{0.5}, // 500ms threshold
},
},
},
},
}
}
func createMemoryPanel() Panel {
return Panel{
ID: 5,
Title: "Memory Usage",
Type: "timeseries",
GridPos: GridPos{
X: 0, Y: 12, W: 12, H: 8,
},
Targets: []Target{
{
Expr: "go_memory_heap_inuse_bytes",
RefID: "A",
LegendFormat: "Heap In Use",
},
{
Expr: "go_memory_heap_bytes",
RefID: "B",
LegendFormat: "Heap Size",
},
{
Expr: "go_memory_stack_bytes",
RefID: "C",
LegendFormat: "Stack Size",
},
{
Expr: "rate(go_memory_allocations_bytes_total[5m])",
RefID: "D",
LegendFormat: "Allocation Rate",
},
},
FieldConfig: FieldConfig{
Defaults: FieldDefaults{
Unit: "bytes",
},
},
}
}
func createGCPanel() Panel {
return Panel{
ID: 7,
Title: "Garbage Collection",
Type: "timeseries",
GridPos: GridPos{
X: 12, Y: 12, W: 12, H: 8,
},
Targets: []Target{
{
Expr: "rate(go_runtime_gc_duration_seconds_sum[5m])",
RefID: "A",
LegendFormat: "GC Duration",
},
{
Expr: "rate(go_runtime_gc_runs_total[5m])",
RefID: "B",
LegendFormat: "GC Frequency",
},
{
Expr: "go_runtime_gc_pause_ns",
RefID: "C",
LegendFormat: "GC Pause",
},
},
FieldConfig: FieldConfig{
Defaults: FieldDefaults{
Unit: "s",
},
},
}
}
// Dashboard export and provisioning
func (gd *GrafanaDashboard) ExportJSON() ([]byte, error) {
return json.MarshalIndent(gd, "", " ")
}
func ProvisionDashboard(dashboardDir string) error {
dashboard := CreateGoPerformanceDashboard()
jsonData, err := dashboard.ExportJSON()
if err != nil {
return err
}
filename := fmt.Sprintf("%s/go-performance-dashboard.json", dashboardDir)
return os.WriteFile(filename, jsonData, 0644)
}
// Alert rule definitions
func CreateGoPerformanceAlerts() []AlertRule {
return []AlertRule{
{
Alert: "HighResponseTime",
Expr: "histogram_quantile(0.95, rate(app_http_request_duration_seconds_bucket[5m])) > 0.5",
For: "2m",
Labels: map[string]string{"severity": "warning"},
Annotations: map[string]string{
"summary": "High response time detected",
"description": "P95 response time is {{ $value }}s",
},
},
{
Alert: "HighErrorRate",
Expr: "rate(app_http_requests_total{status=~\"5..\"}[5m]) / rate(app_http_requests_total[5m]) > 0.05",
For: "1m",
Labels: map[string]string{"severity": "critical"},
Annotations: map[string]string{
"summary": "High error rate detected",
"description": "Error rate is {{ $value | humanizePercentage }}",
},
},
{
Alert: "MemoryLeakSuspected",
Expr: "increase(go_memory_heap_inuse_bytes[1h]) > 100*1024*1024", // 100MB increase per hour
For: "5m",
Labels: map[string]string{"severity": "warning"},
Annotations: map[string]string{
"summary": "Potential memory leak detected",
"description": "Memory usage increased by {{ $value | humanizeBytes }} in the last hour",
},
},
{
Alert: "GoroutineLeak",
Expr: "increase(go_runtime_goroutines_total[10m]) > 1000",
For: "2m",
Labels: map[string]string{"severity": "critical"},
Annotations: map[string]string{
"summary": "Goroutine leak detected",
"description": "Goroutine count increased by {{ $value }} in 10 minutes",
},
},
}
}
This comprehensive tools and frameworks section provides practical implementations for essential Go performance monitoring, from basic profiling to advanced distributed monitoring systems with Prometheus and Grafana integration.