Aerospike_llist_update return time out


#1

Hi guys,

My client application has been working for several months.But today I found a return error from the following API.

as_status aerospike_llist_update (aerospike *as, as_error *err, const as_policy_apply *policy, const as_key *key, const as_ldt *ldt, const as_val *val)

the error code is AEROSPIKE_ERR_TIMEOUT = 9,err.message is : timeout=1000 iterations=1 failedNodes=0 failedConns=0.

The following is system information I got from server.

l> stat system ±-----------------------------------------------±-------------------+ | name | value | ±-----------------------------------------------±-------------------+ | “cluster_size” | 1 | | “cluster_key” | “11A4FA7B1DCDFA7B” | | “cluster_integrity” | “true” | | “objects” | 417 | | “sub-records” | 0 | | “total-bytes-disk” | 17179869184 | | “used-bytes-disk” | 161664 | | “free-pct-disk” | 99 | | “total-bytes-memory” | 4294967296 | | “used-bytes-memory” | 127650 | | “data-used-bytes-memory” | 100962 | | “index-used-bytes-memory” | 26688 | | “sindex-used-bytes-memory” | 0 | | “free-pct-memory” | 99 | | “stat_read_reqs” | 69601 | | “stat_read_reqs_xdr” | 0 | | “stat_read_success” | 64503 | | “stat_read_errs_notfound” | 5098 | | “stat_read_errs_other” | 0 | | “stat_write_reqs” | 2079 | | “stat_write_reqs_xdr” | 0 | | “stat_write_success” | 2079 | | “stat_write_errs” | 0 | | “stat_xdr_pipe_writes” | 0 | | “stat_xdr_pipe_miss” | 0 | | “stat_delete_success” | 3 | | “stat_rw_timeout” | 0 | | “udf_read_reqs” | 69584 | | “udf_read_success” | 64444 | | “udf_read_errs_other” | 5140 | | “udf_write_reqs” | 2061 | | “udf_write_success” | 2061 | | “udf_write_err_others” | 0 | | “udf_delete_reqs” | 0 | | “udf_delete_success” | 0 | | “udf_delete_err_others” | 0 | | “udf_lua_errs” | 0 | | “udf_scan_rec_reqs” | 0 | | “udf_query_rec_reqs” | 0 | | “udf_replica_writes” | 0 | | “stat_proxy_reqs” | 0 | | “stat_proxy_reqs_xdr” | 0 | | “stat_proxy_success” | 0 | | “stat_proxy_errs” | 0 | | “stat_ldt_proxy” | 0 | | “stat_cluster_key_err_ack_dup_trans_reenqueue” | 0 | | “stat_expired_objects” | 0 | | “stat_evicted_objects” | 0 | | “stat_deleted_set_objects” | 0 | | “stat_evicted_objects_time” | 0 | | “stat_zero_bin_records” | 0 | | “stat_nsup_deletes_not_shipped” | 0 | | “stat_compressed_pkts_received” | 0 | | “err_tsvc_requests” | 3 | | “err_tsvc_requests_timeout” | 1 | | “err_out_of_space” | 0 | | “err_duplicate_proxy_request” | 0 | | “err_rw_request_not_found” | 1 | | “err_rw_pending_limit” | 0 | | “err_rw_cant_put_unique” | 0 | | “geo_region_query_count” | 0 | | “geo_region_query_cells” | 0 | | “geo_region_query_points” | 0 | | “geo_region_query_falsepos” | 0 | | “fabric_msgs_sent” | 0 | | “fabric_msgs_rcvd” | 0 | | “paxos_principal” | “BB90221010A4202” | | “migrate_msgs_sent” | 0 | | “migrate_msgs_recv” | 0 | | “migrate_progress_send” | 0 | | “migrate_progress_recv” | 0 | | “migrate_num_incoming_accepted” | 0 | | “migrate_num_incoming_refused” | 0 | | “queue” | 0 | | “transactions” | 137085 | | “reaped_fds” | 18 | | “scans_active” | 0 | | “basic_scans_succeeded” | 6 | | “basic_scans_failed” | 1 | | “aggr_scans_succeeded” | 0 | | “aggr_scans_failed” | 0 | | “udf_bg_scans_succeeded” | 0 | | “udf_bg_scans_failed” | 0 | | “batch_index_initiate” | 0 | | “batch_index_queue” | “0:0,0:0,0:0,0:0” | | “batch_index_complete” | 0 | | “batch_index_timeout” | 0 | | “batch_index_errors” | 0 | | “batch_index_unused_buffers” | 0 | | “batch_index_huge_buffers” | 0 | | “batch_index_created_buffers” | 0 | | “batch_index_destroyed_buffers” | 0 | | “batch_initiate” | 0 | | “batch_queue” | 0 | | “batch_tree_count” | 0 | | “batch_timeout” | 0 | | “batch_errors” | 0 | | “info_queue” | 1 | | “delete_queue” | 0 | ±-----------------------------------------------±-------------------+ ±-----------------------------------±--------+ | name | value | ±-----------------------------------±--------+ | “proxy_in_progress” | 0 | | “proxy_initiate” | 0 | | “proxy_action” | 0 | | “proxy_retry” | 0 | | “proxy_retry_q_full” | 0 | | “proxy_unproxy” | 0 | | “proxy_retry_same_dest” | 0 | | “proxy_retry_new_dest” | 0 | | “write_master” | 2079 | | “write_prole” | 0 | | “read_dup_prole” | 0 | | “rw_err_dup_internal” | 0 | | “rw_err_dup_cluster_key” | 0 | | “rw_err_dup_send” | 0 | | “rw_err_write_internal” | 0 | | “rw_err_write_cluster_key” | 0 | | “rw_err_write_send” | 0 | | “rw_err_ack_internal” | 0 | | “rw_err_ack_nomatch” | 0 | | “rw_err_ack_badnode” | 0 | | “client_connections” | 25 | | “waiting_transactions” | 0 | | “tree_count” | 0 | | “record_refs” | 417 | | “record_locks” | 0 | | “migrate_tx_objs” | 0 | | “migrate_rx_objs” | 0 | | “ongoing_write_reqs” | 0 | | “err_storage_queue_full” | 0 | | “partition_actual” | 16384 | | “partition_replica” | 0 | | “partition_desync” | 0 | | “partition_absent” | 0 | | “partition_zombie” | 0 | | “partition_object_count” | 417 | | “partition_ref_count” | 16384 | | “system_free_mem_pct” | 57 | | “sindex_ucgarbage_found” | 0 | | “sindex_gc_locktimedout” | 0 | | “sindex_gc_inactivity_dur” | 0 | | “sindex_gc_activity_dur” | 0 | | “sindex_gc_list_creation_time” | 0 | | “sindex_gc_list_deletion_time” | 0 | | “sindex_gc_objects_validated” | 0 | | “sindex_gc_garbage_found” | 0 | | “sindex_gc_garbage_cleaned” | 0 | | “system_swapping” | “false” | | “err_replica_null_node” | 0 | | “err_replica_non_null_node” | 0 | | “err_sync_copy_null_master” | 0 | | “storage_defrag_corrupt_record” | 0 | | “err_write_fail_prole_unknown” | 0 | | “err_write_fail_prole_generation” | 0 | | “err_write_fail_unknown” | 0 | | “err_write_fail_key_exists” | 0 | | “err_write_fail_generation” | 0 | | “err_write_fail_generation_xdr” | 0 | | “err_write_fail_bin_exists” | 0 | | “err_write_fail_parameter” | 0 | | “err_write_fail_incompatible_type” | 0 | | “err_write_fail_noxdr” | 0 | | “err_write_fail_prole_delete” | 0 | | “err_write_fail_not_found” | 0 | | “err_write_fail_key_mismatch” | 0 | | “err_write_fail_record_too_big” | 0 | | “err_write_fail_bin_name” | 0 | | “err_write_fail_bin_not_found” | 0 | | “err_write_fail_forbidden” | 0 | | “stat_duplicate_operation” | 0 | | “uptime” | 3104 | | “stat_write_errs_notfound” | 0 | | “stat_write_errs_other” | 0 | | “heartbeat_received_self” | 19724 | | “heartbeat_received_foreign” | 0 | | “query_reqs” | 0 | | “query_success” | 0 | | “query_fail” | 0 | | “query_abort” | 0 | | “query_avg_rec_count” | 0 | | “query_short_running” | 0 | | “query_long_running” | 0 | | “query_short_queue_full” | 0 | | “query_long_queue_full” | 0 | | “query_short_reqs” | 0 | | “query_long_reqs” | 0 | | “query_agg” | 0 | | “query_agg_success” | 0 | | “query_agg_err” | 0 | | “query_agg_abort” | 0 | | “query_agg_avg_rec_count” | 0 | | “query_lookups” | 0 | | “query_lookup_success” | 0 | | “query_lookup_err” | 0 | | “query_lookup_abort” | 0 | | “query_lookup_avg_rec_count” | 0 | ±-----------------------------------±--------+

And The function,which checks health of all nodes in the cluster return Refresh error "node BB90221010A4202 refresh failed: AEROSPIKE_ERR_TIMEOUT Network time". I suppose It happened to break in the network for several seconds.How can I fix it?Thanks!

Best Regards,

Steve