Hi, I am new to aerospike. In my current company we are using Aerospike as datastore. We were using 2 node cluster and changed to 3 node cluster. Simple query by aql on secondary index with set size 15m and no of results 15 is giving timeout. Can someone please help me in figuring out what is wrong with my configuration.
All 3 nodes are with 8 cores, 60gb RAM and 100gb SSD.
Here is my config.
1 : node
BB9FC2EF1A7B702
2 : statistics
cluster_size=3;cluster_key=CD88A92ECF512904;cluster_integrity=true;uptime=221663;system_free_mem_pct=81;system_swapping=false;objects=42806357;sub_objects=0;tombstones=0;tsvc_queue=0;info_queue=0;delete_queue=0;rw_in_progress=0;proxy_in_progress=0;tree_gc_queue=0;client_connections=670;heartbeat_connections=2;fabric_connections=41;heartbeat_received_self=0;heartbeat_received_foreign=1055205;reaped_fds=3421;info_complete=14577832;proxy_retry=0;demarshal_error=0;early_tsvc_client_error=7;early_tsvc_batch_sub_error=0;early_tsvc_udf_sub_error=0;batch_index_initiate=4083;batch_index_queue=0:0,0:0,0:0,0:0;batch_index_complete=3952;batch_index_error=0;batch_index_timeout=131;batch_index_unused_buffers=2;batch_index_huge_buffers=0;batch_index_created_buffers=2;batch_index_destroyed_buffers=0;batch_initiate=0;batch_queue=0;batch_error=0;batch_timeout=0;scans_active=1;query_short_running=2;query_long_running=0;sindex_ucgarbage_found=0;sindex_gc_locktimedout=1;sindex_gc_inactivity_dur=78952859;sindex_gc_activity_dur=230141;sindex_gc_list_creation_time=201871;sindex_gc_list_deletion_time=28062;sindex_gc_objects_validated=77348297;sindex_gc_garbage_found=13632968;sindex_gc_garbage_cleaned=13632258;paxos_principal=BB9FC2EF1A7B702;migrate_allowed=true;migrate_partitions_remaining=12712;fabric_msgs_sent=3669598;fabric_msgs_rcvd=3669585
3 : features
peers;cdt-list;cdt-map;pipelining;geo;float;batch-index;replicas-all;replicas-master;replicas-prole;udf
4 : cluster-generation
1
5 : partition-generation
2756
6 : build_time
Fri Oct 21 03:47:43 UTC 2016
7 : edition
Aerospike Community Edition
8 : version
Aerospike Community Edition build 3.10.0.3
9 : build
3.10.0.3
10 : services
10.10.XX.XX:3000;10.10.XX.XX:3000
11 : services-alumni
10.10.XX.XX:3000;10.10.XX.XX:3000
12 : build_os
el6
Latency Info:
[ abhishek.gupta@ip-10-10-XX-XX ~]$ asadm
Aerospike Interactive Shell, version 0.1.5
Found 3 nodes
Online: 10.10.XX.XX:3000, 10.10.XX.XX:3000, 10.10.XX.XX:3000
Admin> show latency
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~query Latency~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Node Time Ops/Sec >1Ms >8Ms >64Ms
. Span . . . .
ip-10-10-XX-XXX.ap-south-1.compute.internal:3000 07:13:42->07:13:52 3.8 92.11 92.11 92.11
ip-10-10-XX-XXX.ap-south-1.compute.internal:3000 07:13:44->07:13:54 3.8 84.21 84.21 84.21
ip-10-10-XX-XXX.ap-south-1.compute.internal:3000 07:13:40->07:13:50 3.4 70.59 70.59 61.76
Number of rows: 3
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~read Latency~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Node Time Ops/Sec >1Ms >8Ms >64Ms
. Span . . . .
ip-10-10-XX-XXX.ap-south-1.compute.internal:3000 07:13:42->07:13:52 20.1 2.49 1.98 1.98
ip-10-10-XX-XXX.ap-south-1.compute.internal:3000 07:13:44->07:13:54 27.7 0.0 0.0 0.0
ip-10-10-XX-XXX.ap-south-1.compute.internal:3000 07:13:40->07:13:50 22.7 1.33 1.33 0.88
Number of rows: 3
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~write Latency~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Node Time Ops/Sec >1Ms >8Ms >64Ms
. Span . . . .
ip-10-10-XX-XXX.ap-south-1.compute.internal:3000 07:13:42->07:13:52 7.1 78.88 7.04 5.64
ip-10-10-XX-XXX.ap-south-1.compute.internal:3000 07:13:44->07:13:54 3.1 70.97 3.23 3.23
ip-10-10-XX-XXX.ap-south-1.compute.internal:3000 07:13:40->07:13:50 3.3 60.61 9.09 9.09
Number of rows: 3
Admin>
asinfo:
[abhishek.gupta@ip-10-10-XX-XX ~]$ asinfo -v 'get-config:' -l
paxos-single-replica-limit=1
pidfile=/var/run/aerospike/asd.pid
service-threads=32
transaction-queues=4
transaction-threads-per-queue=4
proto-fd-max=15000
advertise-ipv6=false
allow-inline-transactions=true
batch-threads=4
batch-max-buffers-per-queue=255
batch-max-requests=5000
batch-max-unused-buffers=256
batch-priority=200
batch-index-threads=4
clock-skew-max-ms=1000
cluster-name=null
enable-benchmarks-fabric=false
enable-benchmarks-svc=false
enable-hist-info=false
fabric-workers=16
hist-track-back=300
hist-track-slice=10
hist-track-thresholds=null
info-threads=16
ldt-benchmarks=false
log-local-time=false
migrate-max-num-incoming=256
migrate-rx-lifetime-ms=60000
migrate-threads=1
node-id-interface=null
nsup-delete-sleep=100
nsup-period=120
nsup-startup-evict=true
paxos-max-cluster-size=32
paxos-protocol=v3
paxos-recovery-policy=auto-reset-master
paxos-retransmit-period=5
proto-fd-idle-ms=60000
proto-slow-netio-sleep-ms=1
query-batch-size=100
query-buf-size=2097152
query-bufpool-size=256
query-in-transaction-thread=false
query-long-q-max-size=500
query-microbenchmark=false
query-pre-reserve-partitions=false
query-priority=10
query-priority-sleep-us=1
query-rec-count-bound=18446744073709551615
query-req-in-query-thread=false
query-req-max-inflight=100
query-short-q-max-size=500
query-threads=6
query-threshold=10
query-untracked-time-ms=1000
query-worker-threads=15
respond-client-on-master-completion=false
run-as-daemon=true
scan-max-active=100
scan-max-done=100
scan-max-udf-transactions=32
scan-threads=4
sindex-builder-threads=4
sindex-data-max-memory=ULONG_MAX
sindex-gc-enable-histogram=false
ticker-interval=10
transaction-max-ms=1000
transaction-pending-limit=20
transaction-repeatable-read=false
transaction-retry-ms=1000
use-queue-per-device=false
work-directory=/opt/aerospike
write-duplicate-resolution-disable=false
fabric-dump-msgs=false
max-msgs-per-type=-1
memory-accounting=false
prole-extra-ttl=0
non-master-sets-delete=false
service.port=3000
service.address=any
service.tls-port=0
service.tls-name=null
service.alternate-port=0
service.alternate-tls-port=0
heartbeat.mode=mesh
heartbeat.address=10.10.89.59
heartbeat.port=3002
heartbeat.mesh-seed-address-port=10.10.89.180:3002
heartbeat.mesh-seed-address-port=10.10.89.52:3002
heartbeat.interval=150
heartbeat.timeout=10
heartbeat.fabric-grace-factor=-1
heartbeat.mtu=9001
heartbeat.protocol=v2
fabric.port=3001
fabric.keepalive-enabled=true
fabric.keepalive-time=1
fabric.keepalive-intvl=1
fabric.keepalive-probes=10
fabric.latency-max-ms=0
info.port=3003
mode=none
self-group-id=0
self-node-id=0
enable-security=false
privilege-refresh-period=300
report-authentication-sinks=0
report-data-op-sinks=0
report-sys-admin-sinks=0
report-user-admin-sinks=0
report-violation-sinks=0
syslog-local=-1
My Config file:
[abhishek.gupta@ip-10-10-XX-XX ~]$ cat /etc/aerospike/aerospike.conf
service {
user root
group root
paxos-single-replica-limit 1 # Number of nodes where the replica count is automatically reduced to 1.
pidfile /var/run/aerospike/asd.pid
service-threads 32
transaction-queues 4
transaction-threads-per-queue 4
proto-fd-max 15000
}
logging {
# Log file must be an absolute path.
file /var/log/aerospike/aerospike.log {
context any info
}
}
network {
service {
address any
port 3000
}
heartbeat {
mode mesh
address 10.10.89.59
port 3002
mesh-seed-address-port 10.10.89.52 3002
mesh-seed-address-port 10.10.89.180 3002
mesh-seed-address-port 10.10.89.59 3002
# To use unicast-mesh heartbeats, remove the 3 lines above, and see
# aerospike_mesh.conf for alternative.
interval 150
timeout 10
}
fabric {
port 3001
}
info {
port 3003
}
}
namespace lazypay {
replication-factor 2
memory-size 40G
default-ttl 0
high-water-memory-pct 90
high-water-disk-pct 90
stop-writes-pct 90
storage-engine device {
device /dev/xvdg
data-in-memory true
write-block-size 128K
scheduler-mode noop
}
set users {
set-disable-eviction true
}
set merchants {
set-disable-eviction true
}
set userEmailMbMapping {
set-disable-eviction true
}
set merchantAccessKeyMapping {
set-disable-eviction true
}
set merchantVanityMapping {
set-disable-eviction true
}
set userEmailOrMbMapping {
set-disable-eviction true
}
set merchantRedCategory {
set-disable-eviction true
}
set merchantRedSetting {
set-disable-eviction true
}
set lazypay_id_gen {
set-disable-eviction true
}
set primusTriggers {
set-disable-eviction true
}
}
namespace lazypayData {
replication-factor 2
default-ttl 0
high-water-disk-pct 90
stop-writes-pct 90
storage-engine device {
device /dev/xvdh
write-block-size 128K
scheduler-mode noop
}
set transactions {
set-disable-eviction true
}
set statements {
set-disable-eviction true
}
set disputes {
set-disable-eviction true
}
set ctxTxnIdMapping {
set-disable-eviction true
}
set mtxTxnIdMapping {
set-disable-eviction true
}
set mtxMidTxnIdMapping {
set-disable-eviction true
}
set disputeTxnIdMapping {
set-disable-eviction true
}
set products {
set-disable-eviction true
}
set address {
set-disable-eviction true
}
set subscriptions {
set-disable-eviction true
}
set verificationTxnRequestDetails {
set-disable-eviction true
}
set hashMbMapping {
set-disable-eviction true
}
set cardFingerPrintDetails {
set-disable-eviction true
}
set otpData {
set-disable-eviction true
}
set memo {
set-disable-eviction true
}
}
namespace lazypayDump {
replication-factor 2
default-ttl 0
storage-engine device {
file /mnt/db1/lazypayDump.dat
filesize 95G
}
}
namespace lazypayCache {
replication-factor 2
memory-size 10G
default-ttl 1h
storage-engine memory
}