I was performing a NoSQL performance benchmark for a client. I was wondering if my Aerospike Python code is optimal? I’m trying to record query time and load time. The data has 500,000 rows and 8 columns. My code is below.
def test_db():
config = {
'hosts': [ ('127.0.0.1', 3000) ]
}
client = aerospike.client(config).connect()
t0 = time.time()
global rec
rec = {}
with open('skunkworks.csv', 'r') as f:
reader = csv.reader(f)
rownum = 0
for row in reader:
# Save First Row with headers
if rownum == 0:
header = row
else:
colnum = 0
for col in row:
rec[header[colnum]] = col
colnum += 1
rownum += 1
if rec:
client.put(('test', 'demo', str(rownum)), rec)
rec = {}
t1 = time.time()
load_time = t1 - t0
t2 = time.time()
for i in range(2,500002):
(key, metadata, record) = client.get(('test', 'demo', str(i)))
# print(record)
t3 = time.time()
read_time = t3 - t2
return [load_time , read_time]