Using Python to extract data from Oracle table in CSV format and load it into Aerospike set

Hi,

I wrote this piece of code in Python to extract an Oracle table called DUMMY and load it into Aerospike. I installed the generic JDBC package for Python jaydebeapi and aerospike Python client.

I put all these under src directory (both on Linux and Eclipse IDE) as follows:

#! /usr/bin/env python
from __future__ import print_function
# jdbc stuff
import jaydebeapi
import variables as v
import configs as c
import sys
import pprint
import csv

# aerospike stuff
import aerospike
from aerospike import exception as ex

class main:

  rec = {}

  def read_oracle_table(self):
    # Check Oracle is accessible
    try:
      c.connection
    except jaydebeapi.Error as e:
      print("Error: {0} [{1}]".format(e.msg, e.code))
      sys.exit(1)
    else:
      # Check if table exists
      if (c.rs.next()):
        print("\nTable " + v._dbschema+"."+ v._dbtable + " exists\n")
        c.cursor.execute(c.sql)
        # get column descriptions
        columns = [i[0] for i in c.cursor.description]
        rows = c.cursor.fetchall()
        # write oracle data to the csv file
        csv_file = open(v.csv_file_name, mode='w')
        writer = csv.writer(csv_file, delimiter=',', lineterminator="\n", quoting=csv.QUOTE_NONNUMERIC)
        # write column headers to csv file
        writer.writerow(columns)
        for row in rows:
          writer.writerow(row)   ## write rows to csv file

        print("writing to csv file complete")
        c.cursor.close()
        c.connection.close()
        csv_file.close()
        sys.exit(0)
      else:
        print("Table " + v._dbschema+"."+ v._dbtable + " does not exist, quitting!")
        c.connection.close()
        sys.exit(1)

  def read_aerospike_set(self):
    # Check aerospike is accessible
    try:
      c.client
    except ex.ClientError as e:
      print("Error: {0} [{1}]".format(e.msg, e.code))
      sys.exit(1)
    else:
      print("Connection successful")
      keys = []
      for k in xrange(1,10):
         key = (v.namespace, v.dbSet, str(k))
         keys.append(key)

      records = c.client.get_many(keys)
      pprint.PrettyPrinter(depth=4).pprint (records)
      print("\nget everyting for one record with pk = '9'")
      (key, meta, bins)= c.client.get((v.namespace, v.dbSet, '9'))
      print (key)
      print (meta)
      print (bins)
      c.client.close()
      sys.exit(0)

  def write_aerospike_set(self):
    # Check aerospike is accessible
    try:
      c.client
    except ex.ClientError as e:
      print("Error: {0} [{1}]".format(e.msg, e.code))
      sys.exit(1)
    else:
      print("Connection to aerospike successful")
      rec = {}
      # read from csv file
      csv_file = open(v.csv_file_name, mode='r')
      reader = csv.reader(csv_file, delimiter=',')
      rownum = 0
      for row in reader:
        if rownum == 0:
          header = row
        else:
          column = 0
          for col in row:
            # print (rownum,header[colnum],col)
            rec[header[column]] = col
            column += 1
        rownum += 1
        #print(rownum, rec)
        if rec:
          c.client.put((v.namespace, v.dbSet, str(rownum)), rec)
        rec = {}
      print("writing to aerospike set complete")
      csv_file.close()
      c.client.close()
      sys.exit(0)

a = main()
option = sys.argv[1]
if option == "1":
  a.read_oracle_table()
elif option == "2":
  a.write_aerospike_set()
elif option == "3":
  a.read_aerospike_set()
else:
  print("incorrect option, valid options are: 1, 2 and 3")
  sys.exit(1)
sys.exit(0)

In addition I defined variables and configs on separate files as below:

 cat variables.py
#! /usr/bin/env python
'''
Created on Nov 18, 2019

@author: hduser
'''
'''
Created on Nov 18, 2019

@author: hduser
'''
# oracle variables
driverName = "oracle.jdbc.OracleDriver"
url= "jdbc:oracle:thin:@rhes564:1521:mydb12"
_username = "scratchpad"
_password = "xxx"
_dbschema = "SCRATCHPAD"
_dbtable = "DUMMY"
csv_file_name = "/home/hduser/dba/bin/python/DUMMY.csv"

# aerospike variables
dbHost = "rhes75"
dbPort = 3000
dbConnection = "mich"
namespace = "test"
dbPassword = "xxx"
dbSet = "oracletoaerospike2"
dbKey = "ID"
cat configs.py
#! /usr/bin/env python
'''
Created on Nov 18, 2019

@author: hduser
'''
import variables as v
import jaydebeapi
import aerospike
from aerospike import exception as ex
# aerospike
write_policy = {'key': aerospike.POLICY_KEY_SEND}
policies = {'write': write_policy, 'total_timeout': 1000}
config = {
  'hosts': [(v.dbHost, v.dbPort)],
  'policies': policies
}
client = aerospike.client(config).connect(v.dbConnection, v.dbPassword)
# oracle stuff
connection = jaydebeapi.connect(v.driverName, v.url, [v._username, v._password])
metadata = connection.jconn.getMetaData()
rs = metadata.getTables(None, v._dbschema, v._dbtable, None)
cursor = connection.cursor()
sql="""
    SELECT ID, CLUSTERED, SCATTERED, RANDOMISED, RANDOM_STRING, SMALL_VC, PADDING FROM scratchpad.dummy where ROWNUM <= 10
    """

It works fine and is a three stage process namely 1) extract oracle table to a csv file, 2) load csv file into aerospike set and 3) read records from aerospike set.

I am no Python expert so appreciate any advise.

Thanks,

Mich

1 Like
© 2015 Copyright Aerospike, Inc. | All rights reserved. Creators of the Aerospike Database.