|
- // 首先确定监测指标:度量什么(Name),
- // 再确定监测点
- package metrics
- import (
- "fmt"
- "log"
- "net/http"
- "sync"
- "leafstalk/conf"
- "github.com/prometheus/client_golang/prometheus"
- "github.com/prometheus/client_golang/prometheus/promhttp"
- )
- var (
- prometheusReporter *PrometheusReporter
- once sync.Once
- )
- // PrometheusReporter reports metrics to prometheus
- type PrometheusReporter struct {
- serverType string
- project string
- countReportersMap map[string]*prometheus.CounterVec
- summaryReportersMap map[string]*prometheus.SummaryVec
- gaugeReportersMap map[string]*prometheus.GaugeVec
- histogramReportersMap map[string]*prometheus.HistogramVec
- additionalLabels map[string]string
- }
- func (p *PrometheusReporter) registerCustomMetrics(
- constLabels map[string]string,
- additionalLabelsKeys []string,
- spec *CustomMetricsSpec,
- ) {
- // 监测点指定比例的分布情况
- for _, summary := range spec.Summaries {
- p.summaryReportersMap[summary.Name] = prometheus.NewSummaryVec(
- prometheus.SummaryOpts{
- Namespace: p.serverType,
- Subsystem: summary.Subsystem,
- Name: summary.Name,
- Help: summary.Help,
- Objectives: summary.Objectives,
- ConstLabels: constLabels,
- },
- append(additionalLabelsKeys, summary.Labels...),
- )
- }
- // 监测点数值变化度量,变大变小
- for _, gauge := range spec.Gauges {
- p.gaugeReportersMap[gauge.Name] = prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- Namespace: p.serverType,
- Subsystem: gauge.Subsystem,
- Name: gauge.Name,
- Help: gauge.Help,
- ConstLabels: constLabels,
- },
- append(additionalLabelsKeys, gauge.Labels...),
- )
- }
- // 监测点命中次数或
- for _, counter := range spec.Counters {
- p.countReportersMap[counter.Name] = prometheus.NewCounterVec(
- prometheus.CounterOpts{
- Namespace: p.serverType,
- Subsystem: counter.Subsystem,
- Name: counter.Name,
- Help: counter.Help,
- ConstLabels: constLabels,
- },
- append(additionalLabelsKeys, counter.Labels...),
- )
- }
- }
- func (p *PrometheusReporter) registerMetrics(
- constLabels, additionalLabels map[string]string,
- spec *CustomMetricsSpec,
- ) {
- // constLabels["game"] = p.game
- // constLabels["serverType"] = p.serverType
- p.additionalLabels = additionalLabels
- additionalLabelsKeys := make([]string, 0, len(additionalLabels))
- for key := range additionalLabels {
- additionalLabelsKeys = append(additionalLabelsKeys, key)
- }
- p.registerCustomMetrics(constLabels, additionalLabelsKeys, spec)
- // 定义多个指标,并选择响应的度量方式
- // HandlerResponseTimeMs summary
- // 响应耗时分布统计
- p.summaryReportersMap[ResponseTime] = prometheus.NewSummaryVec(
- prometheus.SummaryOpts{
- Namespace: p.serverType,
- Subsystem: "handler",
- Name: ResponseTime,
- Help: "处理一条消息耗时NS",
- Objectives: map[float64]float64{0.7: 0.02, 0.95: 0.005, 0.99: 0.001},
- ConstLabels: constLabels,
- },
- append([]string{"route", "status", "type", "code"}, additionalLabelsKeys...),
- )
- // ProcessDelay summary
- // 用户请求延时分布
- p.summaryReportersMap[ProcessDelay] = prometheus.NewSummaryVec(
- prometheus.SummaryOpts{
- Namespace: p.serverType,
- Subsystem: "handler",
- Name: ProcessDelay,
- Help: "处理消息延迟NS",
- Objectives: map[float64]float64{0.7: 0.02, 0.95: 0.005, 0.99: 0.001},
- ConstLabels: constLabels,
- },
- append([]string{"route", "type"}, additionalLabelsKeys...),
- )
- // ConnectedClients gauge
- // 当前的客户端连接数
- p.gaugeReportersMap[ConnectedClients] = prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- Namespace: p.serverType,
- Subsystem: "acceptor",
- Name: ConnectedClients,
- Help: "当前连接的客户端数目",
- ConstLabels: constLabels,
- },
- additionalLabelsKeys,
- )
- // 当前服务端连接数
- p.gaugeReportersMap[CountServers] = prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- Namespace: p.serverType,
- Subsystem: "service_cluster",
- Name: CountServers,
- Help: "当前连接的服务端数量",
- ConstLabels: constLabels,
- },
- append([]string{"type"}, additionalLabelsKeys...),
- )
- // 通道可用容量
- p.gaugeReportersMap[ChannelCapacity] = prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- Namespace: p.serverType,
- Subsystem: "channel",
- Name: ChannelCapacity,
- Help: "the available capacity of the channel",
- ConstLabels: constLabels,
- },
- append([]string{"channel"}, additionalLabelsKeys...),
- )
- // 丢弃RPC消息的数量
- p.gaugeReportersMap[DroppedMessages] = prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- Namespace: p.serverType,
- Subsystem: "rpc_server",
- Name: DroppedMessages,
- Help: "the number of rpc server dropped messages (messages that are not handled)",
- ConstLabels: constLabels,
- },
- additionalLabelsKeys,
- )
- // 当前协程数量
- p.gaugeReportersMap[Goroutines] = prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- Namespace: p.serverType,
- Subsystem: "sys",
- Name: Goroutines,
- Help: "当前协程数量",
- ConstLabels: constLabels,
- },
- additionalLabelsKeys,
- )
- // 当前堆大小
- p.gaugeReportersMap[HeapSize] = prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- Namespace: p.serverType,
- Subsystem: "sys",
- Name: HeapSize,
- Help: "当前堆的大小",
- ConstLabels: constLabels,
- },
- additionalLabelsKeys,
- )
- // 当前堆栈对象数目
- p.gaugeReportersMap[HeapObjects] = prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- Namespace: p.serverType,
- Subsystem: "sys",
- Name: HeapObjects,
- Help: "当前堆对象数目",
- ConstLabels: constLabels,
- },
- additionalLabelsKeys,
- )
- // 当前重试JOB数量
- p.gaugeReportersMap[WorkerJobsRetry] = prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- Namespace: p.serverType,
- Subsystem: "worker",
- Name: WorkerJobsRetry,
- Help: "当前重试JOB数量",
- ConstLabels: constLabels,
- },
- additionalLabelsKeys,
- )
- //当前队列大小
- p.gaugeReportersMap[WorkerQueueSize] = prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- Namespace: p.serverType,
- Subsystem: "worker",
- Name: WorkerQueueSize,
- Help: "当前队列大小",
- ConstLabels: constLabels,
- },
- append([]string{"queue"}, additionalLabelsKeys...),
- )
- //
- p.gaugeReportersMap[WorkerJobsTotal] = prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- Namespace: p.serverType,
- Subsystem: "worker",
- Name: WorkerJobsTotal,
- Help: "the total executed jobs",
- ConstLabels: constLabels,
- },
- append([]string{"status"}, additionalLabelsKeys...),
- )
- // 计数度量
- p.countReportersMap[ExceededRateLimiting] = prometheus.NewCounterVec(
- prometheus.CounterOpts{
- Namespace: p.serverType,
- Subsystem: "acceptor",
- Name: ExceededRateLimiting,
- Help: "the number of blocked requests by exceeded rate limiting",
- ConstLabels: constLabels,
- },
- additionalLabelsKeys,
- )
- // 玩家请求的消息计数
- p.countReportersMap[MessageHandler] = prometheus.NewCounterVec(
- prometheus.CounterOpts{
- Namespace: p.serverType,
- Subsystem: "messageHandler",
- Name: MessageHandler,
- Help: "玩家发送请求数量",
- ConstLabels: constLabels,
- },
- append([]string{"handler"}, additionalLabelsKeys...),
- )
- p.histogramReportersMap[MessageResponseTime] = prometheus.NewHistogramVec(
- prometheus.HistogramOpts{
- Namespace: p.serverType,
- Name: MessageResponseTime,
- Help: "各种消息响应耗时",
- },
- []string{"handler"},
- )
- // 注册到注册表
- toRegister := make([]prometheus.Collector, 0)
- for _, c := range p.countReportersMap {
- toRegister = append(toRegister, c)
- }
- for _, c := range p.gaugeReportersMap {
- toRegister = append(toRegister, c)
- }
- for _, c := range p.summaryReportersMap {
- toRegister = append(toRegister, c)
- }
- for _, c := range p.histogramReportersMap {
- toRegister = append(toRegister, c)
- }
- prometheus.MustRegister(toRegister...)
- }
- // StartPrometheusReporter gets the prometheus reporter singleton
- func StartPrometheusReporter(
- serverType string,
- config *conf.Config,
- // constLabels map[string]string,
- ) (*PrometheusReporter, error) {
- once.Do(func() {
- var (
- port = config.GetInt("metrics.prometheus.port")
- game = config.GetString("projectName")
- additionalLabels = config.GetStringMapString("metrics.additionalTags")
- constLabels = config.GetStringMapString("metrics.constTags")
- )
- prometheusReporter = &PrometheusReporter{
- serverType: serverType,
- project: game,
- countReportersMap: make(map[string]*prometheus.CounterVec),
- summaryReportersMap: make(map[string]*prometheus.SummaryVec),
- gaugeReportersMap: make(map[string]*prometheus.GaugeVec),
- histogramReportersMap: make(map[string]*prometheus.HistogramVec),
- }
- spec, err := NewCustomMetricsSpec(config)
- if err != nil {
- log.Fatalf("NewCustomMetricsSpec error. %v", err)
- return
- }
- prometheusReporter.registerMetrics(constLabels, additionalLabels, spec)
- http.Handle("/metrics", promhttp.Handler())
- go (func() {
- log.Fatal(http.ListenAndServe(fmt.Sprintf(":%d", port), nil))
- })()
- })
- return prometheusReporter, nil
- }
- // ReportSummary reports a summary metric
- func (p *PrometheusReporter) ReportSummary(metric string, labels map[string]string, value float64) error {
- sum := p.summaryReportersMap[metric]
- if sum != nil {
- labels = p.ensureLabels(labels)
- sum.With(labels).Observe(value)
- return nil
- }
- return ErrMetricNotKnown
- }
- // ReportCount reports a summary metric
- func (p *PrometheusReporter) ReportCount(metric string, labels map[string]string, count float64) error {
- cnt := p.countReportersMap[metric]
- if cnt != nil {
- labels = p.ensureLabels(labels)
- cnt.With(labels).Add(count)
- return nil
- }
- return ErrMetricNotKnown
- }
- // ReportGauge reports a gauge metric
- func (p *PrometheusReporter) ReportGauge(metric string, labels map[string]string, value float64) error {
- g := p.gaugeReportersMap[metric]
- if g != nil {
- labels = p.ensureLabels(labels)
- g.With(labels).Set(value)
- return nil
- }
- return ErrMetricNotKnown
- }
- // ReportHistogram reports a histogram metric
- func (p *PrometheusReporter) ReportHistogram(metric string, label string, value float64) error {
- g := p.histogramReportersMap[metric]
- if g != nil {
- g.WithLabelValues(label).Observe(value)
- return nil
- }
- return ErrMetricNotKnown
- }
- // ensureLabels checks if labels contains the additionalLabels values,
- // otherwise adds them with the default values
- func (p *PrometheusReporter) ensureLabels(labels map[string]string) map[string]string {
- for key, defaultVal := range p.additionalLabels {
- if _, ok := labels[key]; !ok {
- labels[key] = defaultVal
- }
- }
- return labels
- }
|