Add support for OSON containing field names > 255 bytes and relative
offsets in compressed OSON.
This commit is contained in:
parent
3574d726a5
commit
1cad058fb2
|
@ -13,6 +13,10 @@ oracledb 2.0.0 (TBD)
|
|||
Thin Mode Changes
|
||||
+++++++++++++++++
|
||||
|
||||
#) Added support for an Oracle Database 23c JSON feature allowing for field
|
||||
names with more than 255 UTF-8 encoded bytes.
|
||||
#) Added support for an Oracle Database 23c JSON feature improving JSON
|
||||
storage usage.
|
||||
#) Fixed bug in detecting the current time zone
|
||||
(`issue 257 <https://github.com/oracle/python-oracledb/issues/257>`__).
|
||||
#) Added connection establishment parameter :data:`ConnectParams.ssl_context`
|
||||
|
|
|
@ -516,7 +516,7 @@ ERR_MESSAGE_FORMATS = {
|
|||
),
|
||||
ERR_ORACLE_TYPE_NOT_SUPPORTED: "Oracle data type {num} is not supported",
|
||||
ERR_OSON_FIELD_NAME_LIMITATION: (
|
||||
"OSON field names may not exceed 255 UTF-8 encoded bytes"
|
||||
"OSON field names may not exceed {max_fname_size} UTF-8 encoded bytes"
|
||||
),
|
||||
ERR_OSON_NODE_TYPE_NOT_SUPPORTED: (
|
||||
"OSON node type 0x{node_type:x} is not supported"
|
||||
|
|
|
@ -40,6 +40,7 @@ cdef class Capabilities:
|
|||
bytearray runtime_caps
|
||||
uint32_t max_string_size
|
||||
bint supports_oob
|
||||
ssize_t oson_max_fname_size
|
||||
|
||||
def __init__(self):
|
||||
self._init_compile_caps()
|
||||
|
@ -55,6 +56,9 @@ cdef class Capabilities:
|
|||
if server_caps[TNS_CCAP_FIELD_VERSION] < self.ttc_field_version:
|
||||
self.ttc_field_version = server_caps[TNS_CCAP_FIELD_VERSION]
|
||||
self.compile_caps[TNS_CCAP_FIELD_VERSION] = self.ttc_field_version
|
||||
self.oson_max_fname_size = 65535 \
|
||||
if self.ttc_field_version >= TNS_CCAP_FIELD_VERSION_23_1 \
|
||||
else 255
|
||||
|
||||
@cython.boundscheck(False)
|
||||
cdef void _adjust_for_server_runtime_caps(self, bytearray server_caps):
|
||||
|
|
|
@ -564,9 +564,9 @@ cdef enum:
|
|||
TNS_JSON_MAGIC_BYTE_1 = 0xff
|
||||
TNS_JSON_MAGIC_BYTE_2 = 0x4a # 'J'
|
||||
TNS_JSON_MAGIC_BYTE_3 = 0x5a # 'Z'
|
||||
TNS_JSON_VERSION = 1
|
||||
TNS_JSON_VERSION_MAX_FNAME_255 = 1
|
||||
TNS_JSON_VERSION_MAX_FNAME_65535 = 3
|
||||
TNS_JSON_FLAG_HASH_ID_UINT8 = 0x0100
|
||||
TNS_JSON_FLAG_HASH_ID_UINT16 = 0x0200
|
||||
TNS_JSON_FLAG_NUM_FNAMES_UINT16 = 0x0400
|
||||
TNS_JSON_FLAG_FNAMES_SEG_UINT32 = 0x0800
|
||||
TNS_JSON_FLAG_TINY_NODES_STAT = 0x2000
|
||||
|
@ -576,6 +576,7 @@ cdef enum:
|
|||
TNS_JSON_FLAG_LEN_IN_PCODE = 0x04
|
||||
TNS_JSON_FLAG_NUM_FNAMES_UINT32 = 0x08
|
||||
TNS_JSON_FLAG_IS_SCALAR = 0x10
|
||||
TNS_JSON_FLAG_SEC_FNAMES_SEG_UINT16 = 0x0100
|
||||
|
||||
# JSON data types
|
||||
cdef enum:
|
||||
|
|
|
@ -33,10 +33,12 @@
|
|||
cdef class OsonDecoder(Buffer):
|
||||
|
||||
cdef:
|
||||
uint16_t primary_flags, secondary_flags
|
||||
ssize_t field_id_length
|
||||
ssize_t tree_seg_pos
|
||||
list field_names
|
||||
uint16_t flags
|
||||
uint8_t version
|
||||
bint relative_offsets
|
||||
|
||||
cdef object _decode_container_node(self, uint8_t node_type):
|
||||
"""
|
||||
|
@ -45,8 +47,8 @@ cdef class OsonDecoder(Buffer):
|
|||
cdef:
|
||||
bint is_shared, is_object = (node_type & 0x40) == 0
|
||||
ssize_t field_ids_pos = 0, offsets_pos = 0, pos
|
||||
uint32_t container_offset, offset, temp32
|
||||
uint32_t i, num_children = 0
|
||||
uint32_t offset, temp32
|
||||
uint16_t temp16
|
||||
uint8_t temp8
|
||||
object value
|
||||
|
@ -55,6 +57,7 @@ cdef class OsonDecoder(Buffer):
|
|||
# determine the number of children by examining the 4th and 5th most
|
||||
# significant bits of the node type; determine the offsets in the tree
|
||||
# segment to the field ids array and the value offsets array
|
||||
container_offset = self._pos - self.tree_seg_pos - 1
|
||||
self._get_num_children(node_type, &num_children, &is_shared)
|
||||
if is_shared:
|
||||
value = {}
|
||||
|
@ -88,6 +91,8 @@ cdef class OsonDecoder(Buffer):
|
|||
field_ids_pos = self._pos
|
||||
self.skip_to(offsets_pos)
|
||||
self._get_offset(node_type, &offset)
|
||||
if self.relative_offsets:
|
||||
offset += container_offset
|
||||
offsets_pos = self._pos
|
||||
self.skip_to(self.tree_seg_pos + offset)
|
||||
if is_object:
|
||||
|
@ -193,6 +198,43 @@ cdef class OsonDecoder(Buffer):
|
|||
errors._raise_err(errors.ERR_OSON_NODE_TYPE_NOT_SUPPORTED,
|
||||
node_type=node_type)
|
||||
|
||||
cdef list _get_long_field_names(self, uint32_t num_fields,
|
||||
ssize_t offsets_size,
|
||||
uint32_t field_names_seg_size):
|
||||
"""
|
||||
Read the long field names from the buffer.
|
||||
"""
|
||||
cdef:
|
||||
ssize_t offsets_pos, final_pos
|
||||
const char_type* ptr
|
||||
uint32_t offset, i
|
||||
list field_names
|
||||
uint16_t temp16
|
||||
uint8_t temp8
|
||||
|
||||
# skip the hash id array (2 bytes for each field)
|
||||
self.skip_raw_bytes(num_fields * 2)
|
||||
|
||||
# skip the field name offsets array for now
|
||||
offsets_pos = self._pos
|
||||
self.skip_raw_bytes(num_fields * offsets_size)
|
||||
ptr = self._get_raw(field_names_seg_size)
|
||||
final_pos = self._pos
|
||||
|
||||
# determine the names of the fields
|
||||
self.skip_to(offsets_pos)
|
||||
field_names = [None] * num_fields
|
||||
for i in range(num_fields):
|
||||
if offsets_size == 2:
|
||||
self.read_uint16(&temp16)
|
||||
offset = temp16
|
||||
else:
|
||||
self.read_uint32(&offset)
|
||||
temp16 = unpack_uint16(&ptr[offset], BYTE_ORDER_MSB)
|
||||
field_names[i] = ptr[offset + 2:offset + temp16 + 2].decode()
|
||||
self.skip_to(final_pos)
|
||||
return field_names
|
||||
|
||||
cdef int _get_num_children(self, uint8_t node_type, uint32_t* num_children,
|
||||
bint* is_shared) except -1:
|
||||
"""
|
||||
|
@ -233,15 +275,55 @@ cdef class OsonDecoder(Buffer):
|
|||
self.read_uint16(&temp16)
|
||||
offset[0] = temp16
|
||||
|
||||
cdef list _get_short_field_names(self, uint32_t num_fields,
|
||||
ssize_t offsets_size,
|
||||
uint32_t field_names_seg_size):
|
||||
"""
|
||||
Read the short field names from the buffer.
|
||||
"""
|
||||
cdef:
|
||||
ssize_t offsets_pos, final_pos
|
||||
const char_type* ptr
|
||||
uint32_t offset, i
|
||||
list field_names
|
||||
uint16_t temp16
|
||||
uint8_t temp8
|
||||
|
||||
# skip the hash id array (1 byte for each field)
|
||||
self.skip_raw_bytes(num_fields)
|
||||
|
||||
# skip the field name offsets array for now
|
||||
offsets_pos = self._pos
|
||||
self.skip_raw_bytes(num_fields * offsets_size)
|
||||
ptr = self._get_raw(field_names_seg_size)
|
||||
final_pos = self._pos
|
||||
|
||||
# determine the names of the fields
|
||||
self.skip_to(offsets_pos)
|
||||
field_names = [None] * num_fields
|
||||
for i in range(num_fields):
|
||||
if offsets_size == 2:
|
||||
self.read_uint16(&temp16)
|
||||
offset = temp16
|
||||
else:
|
||||
self.read_uint32(&offset)
|
||||
temp8 = ptr[offset]
|
||||
field_names[i] = ptr[offset + 1:offset + temp8 + 1].decode()
|
||||
self.skip_to(final_pos)
|
||||
return field_names
|
||||
|
||||
cdef object decode(self, bytes data):
|
||||
"""
|
||||
Returns a Python object corresponding to the encoded OSON bytes.
|
||||
"""
|
||||
cdef:
|
||||
uint32_t num_field_names, field_names_seg_size, tree_seg_size, i
|
||||
ssize_t hash_id_size, field_name_offsets_size
|
||||
uint32_t short_field_names_seg_size, long_field_names_seg_size = 0
|
||||
uint32_t num_short_field_names, num_long_field_names = 0
|
||||
ssize_t hash_id_size, short_field_name_offsets_size
|
||||
ssize_t long_field_name_offsets_size = 0
|
||||
uint16_t num_tiny_nodes, temp16
|
||||
ssize_t field_name_offsets_pos
|
||||
uint32_t tree_seg_size, i
|
||||
uint8_t version, temp8
|
||||
const char_type* ptr
|
||||
uint32_t offset
|
||||
|
@ -255,44 +337,60 @@ cdef class OsonDecoder(Buffer):
|
|||
ptr[1] != TNS_JSON_MAGIC_BYTE_2 or \
|
||||
ptr[2] != TNS_JSON_MAGIC_BYTE_3:
|
||||
errors._raise_err(errors.ERR_UNEXPECTED_DATA, data=ptr[:3])
|
||||
self.read_ub1(&version)
|
||||
if version != TNS_JSON_VERSION:
|
||||
self.read_ub1(&self.version)
|
||||
if self.version not in (
|
||||
TNS_JSON_VERSION_MAX_FNAME_255,
|
||||
TNS_JSON_VERSION_MAX_FNAME_65535
|
||||
):
|
||||
errors._raise_err(errors.ERR_OSON_VERSION_NOT_SUPPORTED,
|
||||
version=version)
|
||||
self.read_uint16(&self.flags)
|
||||
version=self.version)
|
||||
self.read_uint16(&self.primary_flags)
|
||||
self.relative_offsets = \
|
||||
self.primary_flags & TNS_JSON_FLAG_REL_OFFSET_MODE
|
||||
|
||||
# if value is a scalar value, the header is much smaller
|
||||
if self.flags & TNS_JSON_FLAG_IS_SCALAR:
|
||||
if self.flags & TNS_JSON_FLAG_TREE_SEG_UINT32:
|
||||
if self.primary_flags & TNS_JSON_FLAG_IS_SCALAR:
|
||||
if self.primary_flags & TNS_JSON_FLAG_TREE_SEG_UINT32:
|
||||
self.skip_raw_bytes(4)
|
||||
else:
|
||||
self.skip_raw_bytes(2)
|
||||
return self._decode_node()
|
||||
|
||||
# determine the number of field names
|
||||
if self.flags & TNS_JSON_FLAG_NUM_FNAMES_UINT32:
|
||||
self.read_uint32(&num_field_names)
|
||||
if self.primary_flags & TNS_JSON_FLAG_NUM_FNAMES_UINT32:
|
||||
self.read_uint32(&num_short_field_names)
|
||||
self.field_id_length = 4
|
||||
elif self.flags & TNS_JSON_FLAG_NUM_FNAMES_UINT16:
|
||||
elif self.primary_flags & TNS_JSON_FLAG_NUM_FNAMES_UINT16:
|
||||
self.read_uint16(&temp16)
|
||||
num_field_names = temp16
|
||||
num_short_field_names = temp16
|
||||
self.field_id_length = 2
|
||||
else:
|
||||
self.read_ub1(&temp8)
|
||||
num_field_names = temp8
|
||||
num_short_field_names = temp8
|
||||
self.field_id_length = 1
|
||||
|
||||
# determine the size of the field names segment
|
||||
if self.flags & TNS_JSON_FLAG_FNAMES_SEG_UINT32:
|
||||
field_name_offsets_size = 4
|
||||
self.read_uint32(&field_names_seg_size)
|
||||
if self.primary_flags & TNS_JSON_FLAG_FNAMES_SEG_UINT32:
|
||||
short_field_name_offsets_size = 4
|
||||
self.read_uint32(&short_field_names_seg_size)
|
||||
else:
|
||||
field_name_offsets_size = 2
|
||||
short_field_name_offsets_size = 2
|
||||
self.read_uint16(&temp16)
|
||||
field_names_seg_size = temp16
|
||||
short_field_names_seg_size = temp16
|
||||
|
||||
# if the version indicates that field names > 255 bytes exist, parse
|
||||
# the information about that segment
|
||||
if self.version == TNS_JSON_VERSION_MAX_FNAME_65535:
|
||||
self.read_uint16(&self.secondary_flags)
|
||||
if self.secondary_flags & TNS_JSON_FLAG_SEC_FNAMES_SEG_UINT16:
|
||||
long_field_name_offsets_size = 2
|
||||
else:
|
||||
long_field_name_offsets_size = 4
|
||||
self.read_uint32(&num_long_field_names)
|
||||
self.read_uint32(&long_field_names_seg_size)
|
||||
|
||||
# determine the size of the tree segment
|
||||
if self.flags & TNS_JSON_FLAG_TREE_SEG_UINT32:
|
||||
if self.primary_flags & TNS_JSON_FLAG_TREE_SEG_UINT32:
|
||||
self.read_uint32(&tree_seg_size)
|
||||
else:
|
||||
self.read_uint16(&temp16)
|
||||
|
@ -301,34 +399,28 @@ cdef class OsonDecoder(Buffer):
|
|||
# determine the number of "tiny" nodes
|
||||
self.read_uint16(&num_tiny_nodes)
|
||||
|
||||
# skip the hash id array
|
||||
if self.flags & TNS_JSON_FLAG_HASH_ID_UINT8:
|
||||
hash_id_size = 1
|
||||
elif self.flags & TNS_JSON_FLAG_HASH_ID_UINT16:
|
||||
hash_id_size = 2
|
||||
else:
|
||||
hash_id_size = 4
|
||||
self.skip_raw_bytes(num_field_names * hash_id_size)
|
||||
# if there are any short names, read them now
|
||||
self.field_names = []
|
||||
if num_short_field_names > 0:
|
||||
self.field_names.extend(
|
||||
self._get_short_field_names(
|
||||
num_short_field_names,
|
||||
short_field_name_offsets_size,
|
||||
short_field_names_seg_size
|
||||
)
|
||||
)
|
||||
|
||||
# skip the field name offsets array for now
|
||||
field_name_offsets_pos = self._pos
|
||||
self.skip_raw_bytes(num_field_names * field_name_offsets_size)
|
||||
ptr = self._get_raw(field_names_seg_size)
|
||||
|
||||
# determine the names of the fields
|
||||
self.skip_to(field_name_offsets_pos)
|
||||
self.field_names = [None] * num_field_names
|
||||
for i in range(num_field_names):
|
||||
if self.flags & TNS_JSON_FLAG_FNAMES_SEG_UINT32:
|
||||
self.read_uint32(&offset)
|
||||
else:
|
||||
self.read_uint16(&temp16)
|
||||
offset = temp16
|
||||
temp8 = ptr[offset]
|
||||
self.field_names[i] = ptr[offset + 1:offset + temp8 + 1].decode()
|
||||
# if there are any long names, read them now
|
||||
if num_long_field_names > 0:
|
||||
self.field_names.extend(
|
||||
self._get_long_field_names(
|
||||
num_long_field_names,
|
||||
long_field_name_offsets_size,
|
||||
long_field_names_seg_size
|
||||
)
|
||||
)
|
||||
|
||||
# get tree segment
|
||||
self.skip_raw_bytes(field_names_seg_size)
|
||||
self.tree_seg_pos = self._pos
|
||||
|
||||
# return root node
|
||||
|
@ -359,20 +451,18 @@ cdef class OsonFieldName:
|
|||
self.hash_id = (self.hash_id ^ ptr[i]) * 16777619
|
||||
|
||||
@staticmethod
|
||||
cdef OsonFieldName create(str name):
|
||||
cdef OsonFieldName create(str name, ssize_t max_fname_size):
|
||||
"""
|
||||
Creates and initializes the field name.
|
||||
"""
|
||||
cdef:
|
||||
OsonFieldName field_name
|
||||
ssize_t name_bytes_len
|
||||
cdef OsonFieldName field_name
|
||||
field_name = OsonFieldName.__new__(OsonFieldName)
|
||||
field_name.name = name
|
||||
field_name.name_bytes = name.encode()
|
||||
name_bytes_len = len(field_name.name_bytes)
|
||||
if name_bytes_len > 255:
|
||||
errors._raise_err(errors.ERR_OSON_FIELD_NAME_LIMITATION)
|
||||
field_name.name_bytes_len = <uint8_t> name_bytes_len
|
||||
field_name.name_bytes_len = len(field_name.name_bytes)
|
||||
if field_name.name_bytes_len > max_fname_size:
|
||||
errors._raise_err(errors.ERR_OSON_FIELD_NAME_LIMITATION,
|
||||
max_fname_size=max_fname_size)
|
||||
field_name._calc_hash_id()
|
||||
return field_name
|
||||
|
||||
|
@ -388,45 +478,22 @@ cdef class OsonFieldNamesSegment(GrowableBuffer):
|
|||
|
||||
cdef:
|
||||
uint32_t num_field_names
|
||||
dict field_names_dict
|
||||
list field_names
|
||||
|
||||
cdef int _examine_node(self, object value) except -1:
|
||||
cdef int add_name(self, OsonFieldName field_name) except -1:
|
||||
"""
|
||||
Examines the value. If it is a dictionary, all keys are extracted and
|
||||
unique names retained. Elements in lists and tuples and values in
|
||||
dictionaries are then examined to determine if they contain
|
||||
dictionaries as well.
|
||||
Adds a name to the field names segment.
|
||||
"""
|
||||
cdef OsonFieldName field_name
|
||||
if isinstance(value, (list, tuple)):
|
||||
for child_value in value:
|
||||
self._examine_node(child_value)
|
||||
elif isinstance(value, dict):
|
||||
for key, child_value in (<dict> value).items():
|
||||
if key not in self.field_names_dict:
|
||||
field_name = OsonFieldName.create(key)
|
||||
self.field_names_dict[key] = field_name
|
||||
field_name.offset = self._pos
|
||||
self.write_uint8(field_name.name_bytes_len)
|
||||
self.write_bytes(field_name.name_bytes)
|
||||
self._examine_node(child_value)
|
||||
|
||||
cdef int _process_field_names(self) except -1:
|
||||
"""
|
||||
Processes the field names in preparation for encoding within OSON.
|
||||
"""
|
||||
cdef:
|
||||
OsonFieldName field_name
|
||||
ssize_t i
|
||||
self.field_names = sorted(self.field_names_dict.values(),
|
||||
key=OsonFieldName.sort_key)
|
||||
for i, field_name in enumerate(self.field_names):
|
||||
field_name.field_id = i + 1
|
||||
self.num_field_names = <uint32_t> len(self.field_names)
|
||||
field_name.offset = self._pos
|
||||
if field_name.name_bytes_len <= 255:
|
||||
self.write_uint8(field_name.name_bytes_len)
|
||||
else:
|
||||
self.write_uint16(field_name.name_bytes_len)
|
||||
self.write_bytes(field_name.name_bytes)
|
||||
self.field_names.append(field_name)
|
||||
|
||||
@staticmethod
|
||||
cdef OsonFieldNamesSegment create(object value):
|
||||
cdef OsonFieldNamesSegment create():
|
||||
"""
|
||||
Creates and initializes the segment. The value (and all of its
|
||||
children) are examined for dictionaries and the keys retained as
|
||||
|
@ -435,11 +502,21 @@ cdef class OsonFieldNamesSegment(GrowableBuffer):
|
|||
cdef OsonFieldNamesSegment seg
|
||||
seg = OsonFieldNamesSegment.__new__(OsonFieldNamesSegment)
|
||||
seg._initialize(TNS_CHUNK_SIZE)
|
||||
seg.field_names_dict = {}
|
||||
seg._examine_node(value)
|
||||
seg._process_field_names()
|
||||
seg.field_names = []
|
||||
return seg
|
||||
|
||||
cdef int process_field_names(self, ssize_t field_id_offset) except -1:
|
||||
"""
|
||||
Processes the field names in preparation for encoding within OSON.
|
||||
"""
|
||||
cdef:
|
||||
OsonFieldName field_name
|
||||
ssize_t i
|
||||
self.field_names.sort(key=OsonFieldName.sort_key)
|
||||
for i, field_name in enumerate(self.field_names):
|
||||
field_name.field_id = field_id_offset + i + 1
|
||||
self.num_field_names = <uint32_t> len(self.field_names)
|
||||
|
||||
|
||||
@cython.final
|
||||
cdef class OsonTreeSegment(GrowableBuffer):
|
||||
|
@ -462,8 +539,7 @@ cdef class OsonTreeSegment(GrowableBuffer):
|
|||
else:
|
||||
self.write_uint32(<uint32_t> num_children)
|
||||
|
||||
cdef int encode_array(self, object value,
|
||||
OsonFieldNamesSegment fnames_seg) except -1:
|
||||
cdef int encode_array(self, object value, OsonEncoder encoder) except -1:
|
||||
"""
|
||||
Encode an array in the OSON tree segment.
|
||||
"""
|
||||
|
@ -478,48 +554,41 @@ cdef class OsonTreeSegment(GrowableBuffer):
|
|||
for element in value:
|
||||
pack_uint32(&self._data[offset], self._pos, BYTE_ORDER_MSB)
|
||||
offset += sizeof(uint32_t)
|
||||
self.encode_node(element, fnames_seg)
|
||||
self.encode_node(element, encoder)
|
||||
|
||||
cdef int encode_object(self, dict value,
|
||||
OsonFieldNamesSegment fnames_seg) except -1:
|
||||
cdef int encode_object(self, dict value, OsonEncoder encoder) except -1:
|
||||
"""
|
||||
Encode an object in the OSON tree segment.
|
||||
"""
|
||||
cdef:
|
||||
uint32_t field_id_offset, value_offset
|
||||
uint8_t node_type, field_id_size
|
||||
uint32_t field_id_offset, value_offset, final_offset
|
||||
OsonFieldName field_name
|
||||
ssize_t num_children
|
||||
object child_value
|
||||
uint8_t node_type
|
||||
str key
|
||||
num_children = len(value)
|
||||
self._encode_container(TNS_JSON_TYPE_OBJECT, num_children)
|
||||
if fnames_seg.num_field_names < 256:
|
||||
field_id_size = 1
|
||||
elif fnames_seg.num_field_names < 65536:
|
||||
field_id_size = 2
|
||||
else:
|
||||
field_id_size = 4
|
||||
field_id_offset = self._pos
|
||||
value_offset = self._pos + num_children * field_id_size
|
||||
self._reserve_space(num_children * (field_id_size + sizeof(uint32_t)))
|
||||
value_offset = self._pos + num_children * encoder.field_id_size
|
||||
final_offset = value_offset + num_children * sizeof(uint32_t)
|
||||
self._reserve_space(final_offset - self._pos)
|
||||
for key, child_value in value.items():
|
||||
field_name = fnames_seg.field_names_dict[key]
|
||||
if field_id_size == 1:
|
||||
field_name = encoder.field_names_dict[key]
|
||||
if encoder.field_id_size == 1:
|
||||
self._data[field_id_offset] = <uint8_t> field_name.field_id
|
||||
elif field_id_size == 2:
|
||||
elif encoder.field_id_size == 2:
|
||||
pack_uint16(&self._data[field_id_offset],
|
||||
<uint16_t> field_name.field_id, BYTE_ORDER_MSB)
|
||||
else:
|
||||
pack_uint32(&self._data[field_id_offset], field_name.field_id,
|
||||
BYTE_ORDER_MSB)
|
||||
pack_uint32(&self._data[value_offset], self._pos, BYTE_ORDER_MSB)
|
||||
field_id_offset += field_id_size
|
||||
field_id_offset += encoder.field_id_size
|
||||
value_offset += sizeof(uint32_t)
|
||||
self.encode_node(child_value, fnames_seg)
|
||||
self.encode_node(child_value, encoder)
|
||||
|
||||
cdef int encode_node(self, object value,
|
||||
OsonFieldNamesSegment fnames_seg) except -1:
|
||||
cdef int encode_node(self, object value, OsonEncoder encoder) except -1:
|
||||
"""
|
||||
Encode a value (node) in the OSON tree segment.
|
||||
"""
|
||||
|
@ -592,11 +661,11 @@ cdef class OsonTreeSegment(GrowableBuffer):
|
|||
|
||||
# handle lists/tuples
|
||||
elif isinstance(value, (list, tuple)):
|
||||
self.encode_array(value, fnames_seg)
|
||||
self.encode_array(value, encoder)
|
||||
|
||||
# handle dictionaries
|
||||
elif isinstance(value, dict):
|
||||
self.encode_object(value, fnames_seg)
|
||||
self.encode_object(value, encoder)
|
||||
|
||||
# other types are not supported
|
||||
else:
|
||||
|
@ -607,35 +676,156 @@ cdef class OsonTreeSegment(GrowableBuffer):
|
|||
@cython.final
|
||||
cdef class OsonEncoder(GrowableBuffer):
|
||||
|
||||
cdef int encode(self, object value) except -1:
|
||||
cdef:
|
||||
OsonFieldNamesSegment short_fnames_seg
|
||||
OsonFieldNamesSegment long_fnames_seg
|
||||
uint32_t num_field_names
|
||||
ssize_t max_fname_size
|
||||
dict field_names_dict
|
||||
uint8_t field_id_size
|
||||
|
||||
cdef int _add_field_name(self, str name) except -1:
|
||||
"""
|
||||
Add a field with the given name.
|
||||
"""
|
||||
cdef OsonFieldName field_name
|
||||
field_name = OsonFieldName.create(name, self.max_fname_size)
|
||||
self.field_names_dict[name] = field_name
|
||||
if field_name.name_bytes_len <= 255:
|
||||
self.short_fnames_seg.add_name(field_name)
|
||||
else:
|
||||
if self.long_fnames_seg is None:
|
||||
self.long_fnames_seg = OsonFieldNamesSegment.create()
|
||||
self.long_fnames_seg.add_name(field_name)
|
||||
|
||||
cdef int _determine_flags(self, object value, uint16_t *flags) except -1:
|
||||
"""
|
||||
Determine the flags to use for the OSON image.
|
||||
"""
|
||||
|
||||
# if value is a simple scalar, nothing more needs to be done
|
||||
flags[0] = TNS_JSON_FLAG_INLINE_LEAF
|
||||
if not isinstance(value, (list, tuple, dict)):
|
||||
flags[0] |= TNS_JSON_FLAG_IS_SCALAR
|
||||
return 0
|
||||
|
||||
# examine all values recursively to determine the unique set of field
|
||||
# names and whether they need to be added to the long field names
|
||||
# segment (> 255 bytes) or short field names segment (<= 255 bytes)
|
||||
self.field_names_dict = {}
|
||||
self.short_fnames_seg = OsonFieldNamesSegment.create()
|
||||
self._examine_node(value)
|
||||
|
||||
# perform processing of field names segments and determine the total
|
||||
# number of unique field names in the value
|
||||
if self.short_fnames_seg is not None:
|
||||
self.short_fnames_seg.process_field_names(0)
|
||||
self.num_field_names += self.short_fnames_seg.num_field_names
|
||||
if self.long_fnames_seg is not None:
|
||||
self.long_fnames_seg.process_field_names(self.num_field_names)
|
||||
self.num_field_names += self.long_fnames_seg.num_field_names
|
||||
|
||||
# determine remaining flags and field id size
|
||||
flags[0] |= TNS_JSON_FLAG_HASH_ID_UINT8 | \
|
||||
TNS_JSON_FLAG_TINY_NODES_STAT
|
||||
if self.num_field_names > 65535:
|
||||
flags[0] |= TNS_JSON_FLAG_NUM_FNAMES_UINT32
|
||||
self.field_id_size = 4
|
||||
elif self.num_field_names > 255:
|
||||
flags[0] |= TNS_JSON_FLAG_NUM_FNAMES_UINT16
|
||||
self.field_id_size = 2
|
||||
else:
|
||||
self.field_id_size = 1
|
||||
if self.short_fnames_seg._pos > 65535:
|
||||
flags[0] |= TNS_JSON_FLAG_FNAMES_SEG_UINT32
|
||||
|
||||
cdef int _examine_node(self, object value) except -1:
|
||||
"""
|
||||
Examines the value. If it is a dictionary, all keys are extracted and
|
||||
unique names retained. Elements in lists and tuples and values in
|
||||
dictionaries are then examined to determine if they contain
|
||||
dictionaries as well.
|
||||
"""
|
||||
cdef str key
|
||||
if isinstance(value, (list, tuple)):
|
||||
for child_value in value:
|
||||
self._examine_node(child_value)
|
||||
elif isinstance(value, dict):
|
||||
for key, child_value in (<dict> value).items():
|
||||
if key not in self.field_names_dict:
|
||||
self._add_field_name(key)
|
||||
self._examine_node(child_value)
|
||||
|
||||
cdef int _write_extended_header(self) except -1:
|
||||
"""
|
||||
Write the extended header containing information about the short and
|
||||
long field name segments.
|
||||
"""
|
||||
cdef uint16_t secondary_flags = 0
|
||||
|
||||
# write number of short field names
|
||||
if self.field_id_size == 1:
|
||||
self.write_uint8(<uint8_t> self.short_fnames_seg.num_field_names)
|
||||
elif self.field_id_size == 2:
|
||||
self.write_uint16(<uint16_t> self.short_fnames_seg.num_field_names)
|
||||
else:
|
||||
self.write_uint32(self.short_fnames_seg.num_field_names)
|
||||
|
||||
# write size of short field names segment
|
||||
if self.short_fnames_seg._pos < 65536:
|
||||
self.write_uint16(<uint16_t> self.short_fnames_seg._pos)
|
||||
else:
|
||||
self.write_uint32(self.short_fnames_seg._pos)
|
||||
|
||||
# write fields for long field names segment, if applicable
|
||||
if self.long_fnames_seg is not None:
|
||||
if self.long_fnames_seg._pos < 65536:
|
||||
secondary_flags = TNS_JSON_FLAG_SEC_FNAMES_SEG_UINT16
|
||||
self.write_uint16(secondary_flags)
|
||||
self.write_uint32(self.long_fnames_seg.num_field_names)
|
||||
self.write_uint32(self.long_fnames_seg._pos)
|
||||
|
||||
cdef int _write_fnames_seg(self, OsonFieldNamesSegment seg) except -1:
|
||||
"""
|
||||
Write the contents of the field names segment to the buffer.
|
||||
"""
|
||||
cdef OsonFieldName field_name
|
||||
|
||||
# write array of hash ids
|
||||
for field_name in seg.field_names:
|
||||
if field_name.name_bytes_len <= 255:
|
||||
self.write_uint8(field_name.hash_id & 0xff)
|
||||
else:
|
||||
self.write_uint16(field_name.hash_id & 0xffff)
|
||||
|
||||
# write array of field name offsets for the short field names
|
||||
for field_name in seg.field_names:
|
||||
if seg._pos < 65536:
|
||||
self.write_uint16(<uint16_t> field_name.offset)
|
||||
else:
|
||||
self.write_uint32(field_name.offset)
|
||||
|
||||
# write field names
|
||||
if seg._pos > 0:
|
||||
self.write_raw(seg._data, seg._pos)
|
||||
|
||||
cdef int encode(self, object value, ssize_t max_fname_size) except -1:
|
||||
"""
|
||||
Encodes the given value to OSON.
|
||||
"""
|
||||
cdef:
|
||||
OsonFieldNamesSegment fnames_seg = None
|
||||
OsonFieldName field_name
|
||||
OsonTreeSegment tree_seg
|
||||
uint16_t flags
|
||||
|
||||
# determine the flags to use
|
||||
flags = TNS_JSON_FLAG_INLINE_LEAF
|
||||
if isinstance(value, (list, tuple, dict)):
|
||||
flags |= TNS_JSON_FLAG_HASH_ID_UINT8 | \
|
||||
TNS_JSON_FLAG_TINY_NODES_STAT
|
||||
fnames_seg = OsonFieldNamesSegment.create(value);
|
||||
if fnames_seg.num_field_names > 65535:
|
||||
flags |= TNS_JSON_FLAG_NUM_FNAMES_UINT32
|
||||
elif fnames_seg.num_field_names > 255:
|
||||
flags |= TNS_JSON_FLAG_NUM_FNAMES_UINT16
|
||||
if fnames_seg._pos > 65535:
|
||||
flags |= TNS_JSON_FLAG_FNAMES_SEG_UINT32
|
||||
else:
|
||||
flags |= TNS_JSON_FLAG_IS_SCALAR
|
||||
self.max_fname_size = max_fname_size
|
||||
self._determine_flags(value, &flags)
|
||||
|
||||
# encode values into tree segment
|
||||
tree_seg = OsonTreeSegment.__new__(OsonTreeSegment)
|
||||
tree_seg._initialize(TNS_CHUNK_SIZE)
|
||||
tree_seg.encode_node(value, fnames_seg)
|
||||
tree_seg.encode_node(value, self)
|
||||
if tree_seg._pos > 65535:
|
||||
flags |= TNS_JSON_FLAG_TREE_SEG_UINT32
|
||||
|
||||
|
@ -643,52 +833,32 @@ cdef class OsonEncoder(GrowableBuffer):
|
|||
self.write_uint8(TNS_JSON_MAGIC_BYTE_1)
|
||||
self.write_uint8(TNS_JSON_MAGIC_BYTE_2)
|
||||
self.write_uint8(TNS_JSON_MAGIC_BYTE_3)
|
||||
self.write_uint8(TNS_JSON_VERSION)
|
||||
if self.long_fnames_seg is not None:
|
||||
self.write_uint8(TNS_JSON_VERSION_MAX_FNAME_65535)
|
||||
else:
|
||||
self.write_uint8(TNS_JSON_VERSION_MAX_FNAME_255)
|
||||
self.write_uint16(flags)
|
||||
|
||||
# write extended header (when value is not scalar)
|
||||
if fnames_seg is not None:
|
||||
|
||||
# write number of field names
|
||||
if fnames_seg.num_field_names < 256:
|
||||
self.write_uint8(<uint8_t> fnames_seg.num_field_names)
|
||||
elif fnames_seg.num_field_names < 65536:
|
||||
self.write_uint16(<uint16_t> fnames_seg.num_field_names)
|
||||
else:
|
||||
self.write_uint32(fnames_seg.num_field_names)
|
||||
|
||||
# write size of field names segment
|
||||
if fnames_seg._pos < 65536:
|
||||
self.write_uint16(<uint16_t> fnames_seg._pos)
|
||||
else:
|
||||
self.write_uint32(fnames_seg._pos)
|
||||
if self.short_fnames_seg is not None:
|
||||
self._write_extended_header()
|
||||
|
||||
# write size of tree segment
|
||||
if (tree_seg._pos < 65536):
|
||||
if tree_seg._pos < 65536:
|
||||
self.write_uint16(<uint16_t> tree_seg._pos)
|
||||
else:
|
||||
self.write_uint32(tree_seg._pos)
|
||||
|
||||
# write remainder of header and any data (when value is not scalar)
|
||||
if fnames_seg is not None:
|
||||
if self.short_fnames_seg is not None:
|
||||
|
||||
# write number of "tiny" nodes (always zero)
|
||||
self.write_uint16(0)
|
||||
|
||||
# write array of hash ids
|
||||
for field_name in fnames_seg.field_names:
|
||||
self.write_uint8(field_name.hash_id & 0xff)
|
||||
|
||||
# write array of field name offsets
|
||||
for field_name in fnames_seg.field_names:
|
||||
if fnames_seg._pos < 65536:
|
||||
self.write_uint16(<uint16_t> field_name.offset)
|
||||
else:
|
||||
self.write_uint32(field_name.offset)
|
||||
|
||||
# write field names
|
||||
if fnames_seg._pos > 0:
|
||||
self.write_raw(fnames_seg._data, fnames_seg._pos)
|
||||
# write field name segments
|
||||
self._write_fnames_seg(self.short_fnames_seg)
|
||||
if self.long_fnames_seg is not None:
|
||||
self._write_fnames_seg(self.long_fnames_seg)
|
||||
|
||||
# write tree segment data
|
||||
self.write_raw(tree_seg._data, tree_seg._pos)
|
||||
|
|
|
@ -681,7 +681,7 @@ cdef class WriteBuffer(Buffer):
|
|||
it.
|
||||
"""
|
||||
cdef OsonEncoder encoder = OsonEncoder.__new__(OsonEncoder)
|
||||
encoder.encode(value)
|
||||
encoder.encode(value, self._caps.oson_max_fname_size)
|
||||
self.write_qlocator(encoder._pos)
|
||||
self._write_raw_bytes_and_length(encoder._data, encoder._pos)
|
||||
|
||||
|
|
|
@ -50,3 +50,11 @@ insert into &main_user..TableWithDomainAndAnnotations values (1, 25)
|
|||
|
||||
commit
|
||||
/
|
||||
|
||||
create table &main_user..TestCompressedJson (
|
||||
IntCol number(9) not null,
|
||||
JsonCol json not null
|
||||
)
|
||||
json (JsonCol)
|
||||
store as (compress high)
|
||||
/
|
||||
|
|
|
@ -0,0 +1,185 @@
|
|||
# -----------------------------------------------------------------------------
|
||||
# Copyright (c) 2023, Oracle and/or its affiliates.
|
||||
#
|
||||
# This software is dual-licensed to you under the Universal Permissive License
|
||||
# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License
|
||||
# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose
|
||||
# either license.
|
||||
#
|
||||
# If you elect to accept the software under the Apache License, Version 2.0,
|
||||
# the following applies:
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
"""
|
||||
6700 - Module for testing the JSON data type extension in Oracle Database 23c.
|
||||
"""
|
||||
|
||||
import json
|
||||
import unittest
|
||||
|
||||
import oracledb
|
||||
import test_env
|
||||
|
||||
|
||||
@unittest.skipUnless(
|
||||
test_env.get_client_version() >= (23, 0), "unsupported client"
|
||||
)
|
||||
@unittest.skipUnless(
|
||||
test_env.get_server_version() >= (23, 0), "unsupported server"
|
||||
)
|
||||
class TestCase(test_env.BaseTestCase):
|
||||
def __test_fetch_json(self, value, table_name="TestJson"):
|
||||
"""
|
||||
Tests fetching JSON encoded by the database.
|
||||
"""
|
||||
self.cursor.execute(f"delete from {table_name}")
|
||||
self.cursor.execute(
|
||||
f"insert into {table_name} values (1, :1)", [json.dumps(value)]
|
||||
)
|
||||
self.cursor.execute(f"select JsonCol from {table_name}")
|
||||
(fetched_value,) = self.cursor.fetchone()
|
||||
self.assertEqual(fetched_value, value)
|
||||
|
||||
def __test_round_trip_json(self, value):
|
||||
"""
|
||||
Tests fetching JSON encoded by the driver.
|
||||
"""
|
||||
self.cursor.execute("delete from TestJson")
|
||||
self.cursor.setinputsizes(oracledb.DB_TYPE_JSON)
|
||||
self.cursor.execute("insert into TestJson values (1, :1)", [value])
|
||||
self.cursor.execute("select JsonCol from TestJson")
|
||||
(fetched_value,) = self.cursor.fetchone()
|
||||
self.assertEqual(fetched_value, value)
|
||||
|
||||
def test_6700_json_with_field_name_greater_than_255(self):
|
||||
"6700 - fetch JSON with a field name greater than 255 bytes"
|
||||
fname_long = "A" * 256
|
||||
value = {}
|
||||
value[fname_long] = 6700
|
||||
self.__test_fetch_json(value)
|
||||
|
||||
def test_6701_json_with_field_name_greater_and_less_than_255(self):
|
||||
"6701 - fetch JSON with field names greater and less than 255 bytes"
|
||||
fname_short = "short_name"
|
||||
fname_long = "A" * 256
|
||||
value = {}
|
||||
value[fname_short] = "Short name"
|
||||
value[fname_long] = 6701
|
||||
self.__test_fetch_json(value)
|
||||
|
||||
def test_6702_json_with_many_large_field_names(self):
|
||||
"6702 - fetch JSON with many field names greater than 255 bytes"
|
||||
value = {}
|
||||
for i in range(26):
|
||||
for j in range(26):
|
||||
fname = chr(i + ord("A")) + chr(j + ord("A")) + "X" * 254
|
||||
value[fname] = 12.25
|
||||
self.__test_fetch_json(value)
|
||||
|
||||
def test_6703_json_with_many_field_names(self):
|
||||
"6703 - fetch JSON with many field names (large and small)"
|
||||
value = {}
|
||||
for i in range(26):
|
||||
for j in range(26):
|
||||
short_name = chr(i + ord("A")) + chr(j + ord("A"))
|
||||
value[short_name] = 6.75
|
||||
long_name = short_name + "X" * 254
|
||||
value[long_name] = 12.25
|
||||
self.__test_fetch_json(value)
|
||||
|
||||
def test_6704_json_with_many_short_field_names_one_long(self):
|
||||
"6704 - fetch JSON with many field names (one large and many small)"
|
||||
value = {}
|
||||
long_name = "B" * 256
|
||||
value[long_name] = 6704
|
||||
for i in range(26):
|
||||
for j in range(26):
|
||||
short_name = chr(i + ord("A")) + chr(j + ord("A"))
|
||||
value[short_name] = 8.625
|
||||
self.__test_fetch_json(value)
|
||||
|
||||
def test_6705_rt_json_with_field_name_greater_than_255(self):
|
||||
"6705 - round trip JSON with a field name greater than 255 bytes"
|
||||
fname_long = "A" * 256
|
||||
value = {}
|
||||
value[fname_long] = 6705
|
||||
self.__test_round_trip_json(value)
|
||||
|
||||
def test_6706_rt_json_with_field_name_greater_and_less_than_255(self):
|
||||
"6706 - round trip JSON with field names (small and large)"
|
||||
fname_short = "short_name"
|
||||
fname_long = "A" * 256
|
||||
value = {}
|
||||
value[fname_short] = "Short name"
|
||||
value[fname_long] = 6706
|
||||
self.__test_round_trip_json(value)
|
||||
|
||||
def test_6707_rt_json_with_many_large_field_names(self):
|
||||
"6707 - round trip JSON with many field names greater than 255 bytes"
|
||||
value = {}
|
||||
for i in range(26):
|
||||
for j in range(26):
|
||||
fname = chr(i + ord("A")) + chr(j + ord("A")) + "X" * 254
|
||||
value[fname] = 12.25
|
||||
self.__test_round_trip_json(value)
|
||||
|
||||
def test_6708_rt_json_with_many_field_names(self):
|
||||
"6708 - round trip JSON with many field names (large and small)"
|
||||
value = {}
|
||||
for i in range(26):
|
||||
for j in range(26):
|
||||
short_name = chr(i + ord("A")) + chr(j + ord("A"))
|
||||
value[short_name] = 6.75
|
||||
long_name = short_name + "X" * 254
|
||||
value[long_name] = 12.25
|
||||
self.__test_round_trip_json(value)
|
||||
|
||||
def test_6709_rt_json_with_many_short_field_names_one_long(self):
|
||||
"6709 - round trip JSON with many field names (1 large and many small)"
|
||||
value = {}
|
||||
long_name = "B" * 256
|
||||
value[long_name] = 6704
|
||||
for i in range(26):
|
||||
for j in range(26):
|
||||
short_name = chr(i + ord("A")) + chr(j + ord("A"))
|
||||
value[short_name] = 8.625
|
||||
self.__test_round_trip_json(value)
|
||||
|
||||
def test_6710_fetch_json_with_rel_offsets(self):
|
||||
"6710 - fetch JSON with relative offsets"
|
||||
value = {}
|
||||
fname_long = "C" * 256
|
||||
value[fname_long] = 6710
|
||||
value["num_list"] = [1.5, 2.25, 3.75, 5.5]
|
||||
value["str_list"] = ["string 1", "string 2"]
|
||||
self.__test_fetch_json(value, "TestCompressedJson")
|
||||
|
||||
def test_6711_fetch_json_with_rel_offsets_and_shared_fields(self):
|
||||
"6711 - fetch JSON with relative offsets and shared fields and values"
|
||||
value = []
|
||||
for i in range(15):
|
||||
value.append(dict(a=6711, b="String Value"))
|
||||
self.__test_fetch_json(value, "TestCompressedJson")
|
||||
|
||||
def test_6712_fetch_json_with_rel_offsets_and_shared_fields(self):
|
||||
"6712 - fetch JSON with relative offsets and shared fields, not values"
|
||||
value = []
|
||||
for i in range(15):
|
||||
value.append(dict(a=6711 + i, b=f"String Value {i}"))
|
||||
self.__test_fetch_json(value, "TestCompressedJson")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_env.run_test_cases()
|
Loading…
Reference in New Issue