96 lines
3.9 KiB
Python
96 lines
3.9 KiB
Python
#------------------------------------------------------------------------------
|
|
# Copyright (c) 2022, Oracle and/or its affiliates.
|
|
#
|
|
# This software is dual-licensed to you under the Universal Permissive License
|
|
# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License
|
|
# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose
|
|
# either license.
|
|
#
|
|
# If you elect to accept the software under the Apache License, Version 2.0,
|
|
# the following applies:
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# https://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#------------------------------------------------------------------------------
|
|
|
|
#------------------------------------------------------------------------------
|
|
# load_csv.py
|
|
#
|
|
# A sample showing how to load CSV data.
|
|
# ------------------------------------------------------------------------------
|
|
|
|
import csv
|
|
import os
|
|
|
|
import oracledb
|
|
import sample_env
|
|
|
|
# determine whether to use python-oracledb thin mode or thick mode
|
|
if not sample_env.get_is_thin():
|
|
oracledb.init_oracle_client(lib_dir=sample_env.get_oracle_client())
|
|
|
|
# CSV file. This sample file has both valid rows and some rows with data too
|
|
# large to insert.
|
|
FILE_NAME = os.path.join('data', 'load_csv.csv')
|
|
|
|
# Adjust the number of rows to be inserted in each iteration to meet your
|
|
# memory and performance requirements. Typically this is a large-ish value to
|
|
# reduce the number of calls to executemany() to a reasonable size. For this
|
|
# demo with a small CSV file a smaller number is used to show the looping
|
|
# behavior of the code.
|
|
BATCH_SIZE = 19
|
|
|
|
connection = oracledb.connect(user=sample_env.get_main_user(),
|
|
password=sample_env.get_main_password(),
|
|
dsn=sample_env.get_connect_string())
|
|
|
|
def process_batch(batch_number, cursor, data):
|
|
print("processing batch", batch_number + 1)
|
|
cursor.executemany(sql, data, batcherrors=True)
|
|
for error in cursor.getbatcherrors():
|
|
line_num = (batch_number * BATCH_SIZE) + error.offset + 1
|
|
print("Error", error.message, "at line", line_num)
|
|
|
|
with connection.cursor() as cursor:
|
|
|
|
# Clean up the table for demonstration purposes
|
|
cursor.execute('truncate table LoadCsvTab');
|
|
|
|
# Predefine the memory areas to match the table definition.
|
|
# This can improve performance by avoiding memory reallocations.
|
|
# Here, one parameter is passed for each of the columns.
|
|
# "None" is used for the ID column, since the size of NUMBER isn't
|
|
# variable. The "25" matches the maximum expected data size for the
|
|
# NAME column
|
|
cursor.setinputsizes(None, 25)
|
|
|
|
# Loop over the data and insert it in batches
|
|
with open(FILE_NAME, 'r') as csv_file:
|
|
csv_reader = csv.reader(csv_file, delimiter=',')
|
|
sql = "insert into LoadCsvTab (id, name) values (:1, :2)"
|
|
data = []
|
|
batch_number = 0
|
|
for line in csv_reader:
|
|
data.append((line[0], line[1]))
|
|
if len(data) % BATCH_SIZE == 0:
|
|
process_batch(batch_number, cursor, data)
|
|
data = []
|
|
batch_number += 1
|
|
if data:
|
|
process_batch(batch_number, cursor, data)
|
|
|
|
# In a production system you might choose to fix any invalid rows,
|
|
# re-insert them, and then commit. Or you could rollback everything.
|
|
# In this sample we simply commit and ignore the invalid rows that
|
|
# couldn't be inserted.
|
|
connection.commit()
|