Wrong metrics from AS


#1

I setup monitoring in zabbix for some AS metrics. We have one single node with minimal load. When the load is minimal AS answers wrong data in metrics. Some trouble with AMC

AS v3.3.19


#2

Is this still taking place? If so could you provide:

  • asmonitor -e "asinfo -v 'statistics'"
  • asmonitor -e "asinfo -v 'namespace/ssd'"

Also your screen capture cut off the version of AMC, could you let us know the version?


#3

AMC CE 3.4.7 but is not AMC problem in zabbix some metric status/ i grab it with script

#!/bin/bash

# Aerospike 3 check script
METRIC="$1"

IP=`ifconfig  | grep 'inet addr:'| grep -v '127.0.0.1' | cut -d: -f2 | awk '{ print $1}' | grep -v 192.168`

CACHE=`asinfo -h ${IP} -v statistics | tail -n 1 | tr ";" "\n"`

if [[ -z "$1" ]]
then
    echo "Metric not set"
    exit 0
fi

if [ $METRIC == "ALL" ]
then
    echo -e "$CACHE"
    exit 0
fi

if [ $METRIC == "VmRSS" ]
then
    KBYTES=$(cat /proc/`cat /run/aerospike/asd.pid`/status | grep VmRSS | grep -oE "[[:digit:]]{1,}" )
    echo $(( $KBYTES * 1024 ))
    exit 0
fi

if [ $METRIC == "available_pct" ]
then
    SSD=`asinfo -h ${IP} -v namespace/ssd | tail -n 1 | tr ";" "\n"`
    echo -e "$SSD" | grep "available_pct=" | head -n1 |cut -d'=' -f2
    exit 0
fi

echo -e "$CACHE" | grep "$METRIC=" | head -n1 |cut -d'=' -f2

conf

UserParameter=aerospike[*],/etc/zabbix/scripts/aerospike_check.sh $1

Some data when it occurs Aerospike data-used-bytes-memory

2014-11-15 05:38:27 1635534 2014-11-15 05:33:27 18446744073709484204 2014-11-15 05:28:27 1868659 2014-11-15 05:23:27 18446744073709529795 2014-11-15 05:18:27 1755738 2014-11-15 05:13:27 59210 2014-11-15 05:08:27 895207 2014-11-15 05:03:27 43844 2014-11-15 04:58:27 710460 2014-11-15 04:53:27 18446744073708987934 2014-11-15 04:48:27 789209 2014-11-15 04:43:27 18446744073708783371 2014-11-15 04:38:27 930650 2014-11-15 04:33:27 18446744073708990156 2014-11-15 04:28:27 626714 2014-11-15 04:23:27 18446744073709254240 2014-11-15 04:18:27 617512 2014-11-15 04:13:27 18446744073708753942 2014-11-15 04:08:27 504002 2014-11-15 04:03:27 18446744073708777479 2014-11-15 03:58:27 898728 2014-11-15 03:53:27 18446744073708973754 2014-11-15 03:48:28 687565 2014-11-15 03:43:27 18446744073709365090 2014-11-15 03:38:27 929092 2014-11-15 03:33:27 18446744073709073223 2014-11-15 03:28:27 996942 2014-11-15 03:23:27 18446744073709022957 2014-11-15 03:18:27 1407114 2014-11-15 03:13:27 18446744073709505356 2014-11-15 03:08:27 1685096 2014-11-15 03:03:27 513940 2014-11-15 02:58:27 1931724

Aerospike used-bytes-memory

Timestamp Value

2014-11-15 05:23:39 6481394 2014-11-15 05:18:39 299909 2014-11-15 05:13:39 6545796 2014-11-15 05:08:39 804292 2014-11-15 05:03:39 6521911 2014-11-15 04:58:39 1876911 2014-11-15 04:53:39 5168400 2014-11-15 04:48:39 18446744073708914968 2014-11-15 04:43:39 4698026 2014-11-15 04:38:39 18446744073708623643 2014-11-15 04:33:39 5151560 2014-11-15 04:28:39 18446744073709321993 2014-11-15 04:23:39 5695475 2014-11-15 04:18:39 1061287 2014-11-15 04:13:39 4655975 2014-11-15 04:08:39 18446744073708381886 2014-11-15 04:03:39 4746783 2014-11-15 03:58:39 18446744073708494365 2014-11-15 03:53:39 5149699 2014-11-15 03:48:39 493035 2014-11-15 03:43:39 5948884 2014-11-15 03:38:39 1862227 2014-11-15 03:33:39 5363306 2014-11-15 03:28:39 18446744073708793650 2014-11-15 03:23:39 5291760 2014-11-15 03:18:39 18446744073708995798 2014-11-15 03:13:39 6314051


#4

Some graphics


#5

Thanks for the extra details. Following up with the right folks internally.


#6

Could you provide your aerospike configurations and details about the host?

  • /etc/aerospike/aerospike.conf
  • /etc/issue
  • /proc/meminfo
  • /proc/cpuinfo
  • uname -r

#7
# cat /etc/aerospike/aerospike.conf
service {
    user                              root
    group                             root
    run-as-daemon
    paxos-single-replica-limit        1
    paxos-recovery-policy             manual
    auto-dun                          false
    auto-undun                        false

    pidfile                           /var/run/aerospike/asd.pid

    service-threads                   24
    fabric-workers                    36

    transaction-queues                12
    transaction-threads-per-queue     3
    transaction-pending-limit         30


    nsup-startup-evict                true
    nsup-period                       120
    nsup-queue-escape                 10
    nsup-queue-hwm                    500
    nsup-queue-lwm                    1

    storage-benchmarks                false
    microbenchmarks                   false

    respond-client-on-master-completion    true

    proto-fd-max                      50000

    hist-track-thresholds              1,4,8,16,32,64,128,256,512,1024,2048,4096,8192

}

logging {
    file /var/log/aerospike/aerospike.log {
        context any info
    }
}

network {
    service {
        address                       any
        port                          3000
        access-address                x.x.x.x
        reuse-address
    }
    heartbeat {
        mode multicast
        address 239.1.99.222
        port 9918

        interval 1500
        timeout 10
    }
    fabric {
        port 3001
    }
    info {
        address                       any
        port                          3003
    }
}

namespace ssd {
    replication-factor                2
    default-ttl                       0
    memory-size                       118G
    high-water-memory-pct             99
    high-water-disk-pct               80
    stop-writes-pct                   80

    storage-engine device {
        scheduler-mode                noop
        device                        /dev/sda4
        device                        /dev/sdb4
        write-block-size              128k
        defrag-lwm-pct                50
        min-avail-pct                 1
        data-in-memory                true
        max-write-cache               134217728
        post-write-queue              2048
    }

}


# cat /etc/issue
Ubuntu 12.04.5 LTS \n \l

cat /proc/meminfo 
MemTotal:       32883644 kB
MemFree:        21196968 kB
Buffers:          254496 kB
Cached:          2718008 kB
SwapCached:            0 kB
Active:         10929176 kB
Inactive:         159664 kB
Active(anon):    8111408 kB
Inactive(anon):     9176 kB
Active(file):    2817768 kB
Inactive(file):   150488 kB
Unevictable:        4636 kB
Mlocked:            4636 kB
SwapTotal:       8384444 kB
SwapFree:        8384444 kB
Dirty:               128 kB
Writeback:             0 kB
AnonPages:       8120976 kB
Mapped:            26408 kB
Shmem:               856 kB
Slab:             343044 kB
SReclaimable:     300136 kB
SUnreclaim:        42908 kB
KernelStack:        3784 kB
PageTables:        24692 kB
NFS_Unstable:          0 kB
Bounce:                0 kB
WritebackTmp:          0 kB
CommitLimit:    24826264 kB
Committed_AS:   12701876 kB
VmallocTotal:   34359738367 kB
VmallocUsed:      126692 kB
VmallocChunk:   34359608948 kB
HardwareCorrupted:     0 kB
AnonHugePages:         0 kB
HugePages_Total:       0
HugePages_Free:        0
HugePages_Rsvd:        0
HugePages_Surp:        0
Hugepagesize:       2048 kB
DirectMap4k:       69460 kB
DirectMap2M:     3012608 kB
DirectMap1G:    32505856 kB

# cat /proc/cpuinfo 
processor	: 0
vendor_id	: GenuineIntel
cpu family	: 6
model		: 60
model name	: Intel(R) Xeon(R) CPU E3-1270 v3 @ 3.50GHz
stepping	: 3
microcode	: 0x9
cpu MHz		: 800.000
cache size	: 8192 KB
physical id	: 0
siblings	: 8
core id		: 0
cpu cores	: 4
apicid		: 0
initial apicid	: 0
fpu		: yes
fpu_exception	: yes
cpuid level	: 13
wp		: yes
flags		: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 fma cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm ida arat epb xsaveopt pln pts dtherm tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm
bogomips	: 6984.14
clflush size	: 64
cache_alignment	: 64
address sizes	: 39 bits physical, 48 bits virtual
power management:

processor	: 1
vendor_id	: GenuineIntel
cpu family	: 6
model		: 60
model name	: Intel(R) Xeon(R) CPU E3-1270 v3 @ 3.50GHz
stepping	: 3
microcode	: 0x9
cpu MHz		: 800.000
cache size	: 8192 KB
physical id	: 0
siblings	: 8
core id		: 1
cpu cores	: 4
apicid		: 2
initial apicid	: 2
fpu		: yes
fpu_exception	: yes
cpuid level	: 13
wp		: yes
flags		: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 fma cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm ida arat epb xsaveopt pln pts dtherm tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm
bogomips	: 6984.14
clflush size	: 64
cache_alignment	: 64
address sizes	: 39 bits physical, 48 bits virtual
power management:

processor	: 2
vendor_id	: GenuineIntel
cpu family	: 6
model		: 60
model name	: Intel(R) Xeon(R) CPU E3-1270 v3 @ 3.50GHz
stepping	: 3
microcode	: 0x9
cpu MHz		: 800.000
cache size	: 8192 KB
physical id	: 0
siblings	: 8
core id		: 2
cpu cores	: 4
apicid		: 4
initial apicid	: 4
fpu		: yes
fpu_exception	: yes
cpuid level	: 13
wp		: yes
flags		: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 fma cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm ida arat epb xsaveopt pln pts dtherm tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm
bogomips	: 6984.14
clflush size	: 64
cache_alignment	: 64
address sizes	: 39 bits physical, 48 bits virtual
power management:

processor	: 3
vendor_id	: GenuineIntel
cpu family	: 6
model		: 60
model name	: Intel(R) Xeon(R) CPU E3-1270 v3 @ 3.50GHz
stepping	: 3
microcode	: 0x9
cpu MHz		: 800.000
cache size	: 8192 KB
physical id	: 0
siblings	: 8
core id		: 3
cpu cores	: 4
apicid		: 6
initial apicid	: 6
fpu		: yes
fpu_exception	: yes
cpuid level	: 13
wp		: yes
flags		: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 fma cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm ida arat epb xsaveopt pln pts dtherm tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm
bogomips	: 6984.14
clflush size	: 64
cache_alignment	: 64
address sizes	: 39 bits physical, 48 bits virtual
power management:

processor	: 4
vendor_id	: GenuineIntel
cpu family	: 6
model		: 60
model name	: Intel(R) Xeon(R) CPU E3-1270 v3 @ 3.50GHz
stepping	: 3
microcode	: 0x9
cpu MHz		: 800.000
cache size	: 8192 KB
physical id	: 0
siblings	: 8
core id		: 0
cpu cores	: 4
apicid		: 1
initial apicid	: 1
fpu		: yes
fpu_exception	: yes
cpuid level	: 13
wp		: yes
flags		: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 fma cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm ida arat epb xsaveopt pln pts dtherm tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm
bogomips	: 6984.14
clflush size	: 64
cache_alignment	: 64
address sizes	: 39 bits physical, 48 bits virtual
power management:

processor	: 5
vendor_id	: GenuineIntel
cpu family	: 6
model		: 60
model name	: Intel(R) Xeon(R) CPU E3-1270 v3 @ 3.50GHz
stepping	: 3
microcode	: 0x9
cpu MHz		: 800.000
cache size	: 8192 KB
physical id	: 0
siblings	: 8
core id		: 1
cpu cores	: 4
apicid		: 3
initial apicid	: 3
fpu		: yes
fpu_exception	: yes
cpuid level	: 13
wp		: yes
flags		: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 fma cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm ida arat epb xsaveopt pln pts dtherm tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm
bogomips	: 6984.14
clflush size	: 64
cache_alignment	: 64
address sizes	: 39 bits physical, 48 bits virtual
power management:

processor	: 6
vendor_id	: GenuineIntel
cpu family	: 6
model		: 60
model name	: Intel(R) Xeon(R) CPU E3-1270 v3 @ 3.50GHz
stepping	: 3
microcode	: 0x9
cpu MHz		: 800.000
cache size	: 8192 KB
physical id	: 0
siblings	: 8
core id		: 2
cpu cores	: 4
apicid		: 5
initial apicid	: 5
fpu		: yes
fpu_exception	: yes
cpuid level	: 13
wp		: yes
flags		: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 fma cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm ida arat epb xsaveopt pln pts dtherm tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm
bogomips	: 6984.14
clflush size	: 64
cache_alignment	: 64
address sizes	: 39 bits physical, 48 bits virtual
power management:

processor	: 7
vendor_id	: GenuineIntel
cpu family	: 6
model		: 60
model name	: Intel(R) Xeon(R) CPU E3-1270 v3 @ 3.50GHz
stepping	: 3
microcode	: 0x9
cpu MHz		: 800.000
cache size	: 8192 KB
physical id	: 0
siblings	: 8
core id		: 3
cpu cores	: 4
apicid		: 7
initial apicid	: 7
fpu		: yes
fpu_exception	: yes
cpuid level	: 13
wp		: yes
flags		: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 fma cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm ida arat epb xsaveopt pln pts dtherm tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm
bogomips	: 6984.14
clflush size	: 64
cache_alignment	: 64
address sizes	: 39 bits physical, 48 bits virtual
power management:

# uname -r
3.11.0-19-generic

Aerospike Configuration Review
#8

Earlier I asked for these stats as well:

It seems a memory metric is misreporting, these stats would tell us which memory metric.

Basically there are two used-bytes-memory one in the service context and one in the namespace context and I am trying to determine if one or all of them are misreporting.

Admin> show statistics like used-bytes-memory
~~~~~~~~~~~~~~~~~~~~~~~Service Statistics~~~~~~~~~~~~~~~~~~~~~~~~
NODE                    :   u10           u12           u13           
data-used-bytes-memory  :   0             0             0             
index-used-bytes-memory :   3160655936    2859582336    3136948160    
sindex-used-bytes-memory:   14792779137   15848083997   9661880641    
used-bytes-memory       :   17953435073   18707666333   12798828801   

~~~~~~~~~~~~~~~~~phobos_sindex Namespace Statistics~~~~~~~~~~~~~~~~
NODE                    :   u10           u12           u13           
data-used-bytes-memory  :   0             0             0             
index-used-bytes-memory :   3160656064    2859582784    3136948480    
sindex-used-bytes-memory:   14792779168   15848083140   9661880978    
used-bytes-memory       :   17953435232   18707665924   12798829458   

#9
# asmonitor -e "asinfo -v 'statistics'"
1 hosts in cluster: x.x.x.x:3000
-v 'statistics'
x.x.x.x:3000 returned : cluster_size=1;cluster_key=A1466666A92B9F1C;cluster_integrity=true;objects=3961514;total-bytes-disk=427493949440;used-bytes-disk=1000024448;free-pct-disk=99;total-bytes-memory=126701535232;used-bytes-memory=456480690;data-used-bytes-memory=202943794;index-used-bytes-memory=253536896;sindex-used-bytes-memory=0;free-pct-memory=99;stat_read_reqs=0;stat_read_reqs_xdr=0;stat_read_success=0;stat_read_errs_notfound=0;stat_read_errs_other=0;stat_write_reqs=34228630833;stat_write_reqs_xdr=0;stat_write_success=34227218786;stat_write_errs=1412046;stat_xdr_pipe_writes=0;stat_xdr_pipe_miss=0;stat_delete_success=2050657519;stat_rw_timeout=0;udf_read_reqs=0;udf_read_success=0;udf_read_errs_other=0;udf_write_reqs=0;udf_write_success=0;udf_write_err_others=0;udf_delete_reqs=0;udf_delete_success=0;udf_delete_err_others=0;udf_lua_errs=0;udf_scan_rec_reqs=0;udf_query_rec_reqs=0;udf_replica_writes=0;stat_proxy_reqs=0;stat_proxy_reqs_xdr=0;stat_proxy_success=0;stat_proxy_errs=0;stat_cluster_key_trans_to_proxy_retry=0;stat_cluster_key_transaction_reenqueue=0;stat_slow_trans_queue_push=0;stat_slow_trans_queue_pop=0;stat_slow_trans_queue_batch_pop=0;stat_cluster_key_regular_processed=0;stat_cluster_key_prole_retry=0;stat_cluster_key_err_ack_dup_trans_reenqueue=0;stat_cluster_key_partition_transaction_queue_count=0;stat_cluster_key_err_ack_rw_trans_reenqueue=0;stat_expired_objects=2050657519;stat_evicted_objects=0;stat_deleted_set_objects=0;stat_evicted_set_objects=0;stat_evicted_objects_time=0;stat_zero_bin_records=0;stat_nsup_deletes_not_shipped=2050657419;err_tsvc_requests=1412046;err_out_of_space=0;err_duplicate_proxy_request=0;err_rw_request_not_found=48265;err_rw_pending_limit=0;err_rw_cant_put_unique=0;fabric_msgs_sent=0;fabric_msgs_rcvd=0;paxos_principal=BB9EC8199671E00;migrate_msgs_sent=0;migrate_msgs_recv=0;migrate_progress_send=0;migrate_progress_recv=0;migrate_num_incoming_accepted=0;migrate_num_incoming_refused=0;queue=0;transactions=32187871083;reaped_fds=1757;tscan_initiate=13669;tscan_pending=2;tscan_succeeded=13671;tscan_aborted=0;batch_initiate=0;batch_queue=0;batch_tree_count=0;batch_timeout=0;batch_errors=0;info_queue=0;proxy_initiate=0;proxy_action=0;proxy_retry=0;proxy_retry_q_full=0;proxy_unproxy=0;proxy_retry_same_dest=0;proxy_retry_new_dest=0;write_master=34228630836;write_prole=0;read_dup_prole=0;rw_err_dup_internal=0;rw_err_dup_cluster_key=0;rw_err_dup_send=0;rw_err_write_internal=0;rw_err_write_cluster_key=0;rw_err_write_send=0;rw_err_ack_internal=0;rw_err_ack_nomatch=0;rw_err_ack_badnode=0;client_connections=841;waiting_transactions=0;tree_count=0;record_refs=3961516;record_locks=2;migrate_tx_objs=0;migrate_rx_objs=0;ongoing_write_reqs=4;err_storage_queue_full=0;partition_actual=4096;partition_replica=0;partition_desync=0;partition_absent=0;partition_object_count=3961514;partition_ref_count=4096;system_free_mem_pct=74;sindex_ucgarbage_found=0;sindex_gc_locktimedout=0;sindex_gc_inactivity_dur=0;sindex_gc_activity_dur=0;sindex_gc_list_creation_time=0;sindex_gc_list_deletion_time=0;sindex_gc_objects_validated=0;sindex_gc_garbage_found=0;sindex_gc_garbage_cleaned=0;system_swapping=false;err_replica_null_node=0;err_replica_non_null_node=0;err_sync_copy_null_node=0;err_sync_copy_null_master=0;storage_defrag_corrupt_record=0;err_write_fail_prole_unknown=0;err_write_fail_prole_generation=0;err_write_fail_unknown=0;err_write_fail_key_exists=0;err_write_fail_generation=0;err_write_fail_generation_xdr=0;err_write_fail_bin_exists=0;err_write_fail_parameter=0;err_write_fail_incompatible_type=0;err_write_fail_noxdr=0;err_write_fail_prole_delete=0;err_write_fail_not_found=1412046;err_write_fail_key_mismatch=0;stat_duplicate_operation=0;uptime=1753538;stat_write_errs_notfound=1412046;stat_write_errs_other=0;heartbeat_received_self=1168258;heartbeat_received_foreign=0;query_reqs=0;query_success=0;query_fail=0;query_abort=0;query_avg_rec_count=0;query_short_queue_full=0;query_long_queue_full=0;query_short_running=0;query_long_running=0;query_tracked=0;query_agg=0;query_agg_success=0;query_agg_err=0;query_agg_abort=0;query_agg_avg_rec_count=0;query_lookups=0;query_lookup_success=0;query_lookup_err=0;query_lookup_abort=0;query_lookup_avg_rec_count=0

# asmonitor -e "asinfo -v 'namespace/ssd'"

Enter help for commands

1 hosts in cluster: x.x.x.x:3000
-v 'namespace/ssd'
x.x.x.x:3000 returned : type=device;objects=3855349;master-objects=3855349;prole-objects=0;expired-objects=2050846081;evicted-objects=0;set-deleted-objects=0;set-evicted-objects=0;used-bytes-memory=443613356;data-used-bytes-memory=196871020;index-used-bytes-memory=246742336;sindex-used-bytes-memory=0;free-pct-memory=99;max-void-time=154118690;non-expirable-objects=3814733;current-time=154118700;stop-writes=false;hwm-breached=false;available-bin-names=32767;ldt_reads=0;ldt_read_success=0;ldt_deletes=0;ldt_delete_success=0;ldt_writes=0;ldt_write_success=0;ldt_updates=0;ldt_errors=0;used-bytes-disk=972551680;free-pct-disk=99;available_pct=99;sets-enable-xdr=true;memory-size=126701535232;high-water-disk-pct=80;high-water-memory-pct=99;evict-tenths-pct=5;stop-writes-pct=80;cold-start-evict-ttl=4294967295;repl-factor=1;default-ttl=0;max-ttl=0;conflict-resolution-policy=generation;allow_versions=false;single-bin=false;enable-xdr=false;disallow-null-setname=false;total-bytes-memory=126701535232;total-bytes-disk=427493949440;defrag-lwm-pct=50;defrag-sleep=1000;defrag-startup-minimum=10;flush-max-ms=1000;fsync-max-sec=0;write-smoothing-period=0;max-write-cache=67108864;min-avail-pct=1;post-write-queue=0;data-in-memory=true;dev=/dev/sda4;dev=/dev/sdb4;filesize=17179869184;writethreads=1;writecache=67108864;obj-size-hist-max=100

But i cant catch moment while metric is misreporting.


#10

Yes, that would be difficult :slight_smile:

After a bit of code inspection, there is a log line that may print and may help whittle this down a bit.

Can you grep your logs during the time of these mis-reporting for:

grep "namespace memory bytes big" /var/log/aerospike/aerospike.log

#11

Link to log. https://drive.google.com/file/d/0B3Jnz5-g9xJ8dmlxM3JyNzgyYlk/view?usp=sharing

I may enable debug for next day.

Cant find misreporting metrics in log.

For that day from zabbix GMT+3 is:

2014-11-19 05:43:27	18446744073709488271
2014-11-19 05:38:27	1376473
2014-11-19 05:33:27	18446744073709467903
2014-11-19 05:28:27	1590934
2014-11-19 05:23:27	18446744073709309473
2014-11-19 05:18:27	1183890
2014-11-19 05:13:27	18446744073709012453
2014-11-19 05:08:27	1052982
2014-11-19 05:03:27	18446744073708798841
2014-11-19 04:58:27	441559
2014-11-19 04:53:27	18446744073708651515
2014-11-19 04:48:27	494275
2014-11-19 04:43:27	18446744073708935440
2014-11-19 04:38:28	419870
2014-11-19 04:33:27	18446744073708275598
2014-11-19 04:28:27	474218
2014-11-19 04:23:27	18446744073708673517
2014-11-19 04:18:28	89342
2014-11-19 04:13:27	18446744073708421332
2014-11-19 04:08:27	369847
2014-11-19 04:03:27	18446744073708829325
2014-11-19 03:58:27	218309
2014-11-19 03:53:27	18446744073708510321
2014-11-19 03:48:27	393076
2014-11-19 03:43:27	18446744073708645875
2014-11-19 03:38:27	18446744073709524887
2014-11-19 03:33:27	18446744073708408421
2014-11-19 03:28:27	589450
2014-11-19 03:23:27	18446744073708954092
2014-11-19 03:18:27	517548
2014-11-19 03:13:27	18446744073708831621
2014-11-19 03:08:27	1040683
2014-11-19 03:03:27	18446744073709191370
2014-11-19 02:58:27	945152
2014-11-19 02:53:27	18446744073709097300
2014-11-19 02:48:27	1574924

#12

Thanks for the update, I have updated our bug tracker and will respond back here when this the cause is discovered and fixed.


#13

There was a memory accounting fix in [3.3.21]http://www.aerospike.com/download/server/notes.html#3.3.21) that may account for this issue. Please upgrade to the latest build and let us know if the issue persists.


#14

After upgrade all recived metric return to normal. Thanks