From 7c56d95e6d850161772432eea682cf9381e9f0f8 Mon Sep 17 00:00:00 2001 From: zjp-shadow <46623500+zjp-shadow@users.noreply.github.com> Date: Wed, 27 Jul 2022 12:09:18 +0800 Subject: [PATCH] Revert "Change the architecture of the code and add setup.py" --- .gitignore | 2 - README.md | 8 - examples/example.py | 73 ---- python/JSparse/__init__.py | 1 - python/JSparse/indice_manager.py | 34 -- python/JSparse/nn/__init__.py | 1 - python/JSparse/nn/functional/__init__.py | 10 - python/JSparse/nn/functional/activation.py | 24 -- python/JSparse/nn/functional/conv.py | 351 ---------------- python/JSparse/nn/functional/count.py | 30 -- python/JSparse/nn/functional/devoxelize.py | 166 -------- python/JSparse/nn/functional/downsample.py | 51 --- python/JSparse/nn/functional/hash.py | 204 --------- python/JSparse/nn/functional/pooling.py | 26 -- python/JSparse/nn/functional/query.py | 454 --------------------- python/JSparse/nn/functional/voxelize.py | 114 ------ python/JSparse/nn/modules/__init__.py | 6 - python/JSparse/nn/modules/activation.py | 21 - python/JSparse/nn/modules/conv.py | 85 ---- python/JSparse/nn/modules/norm.py | 34 -- python/JSparse/nn/modules/pooling.py | 11 - python/JSparse/nn/utils/__init__.py | 2 - python/JSparse/nn/utils/apply.py | 15 - python/JSparse/nn/utils/kernel.py | 28 -- python/JSparse/sparse.py | 125 ------ python/JSparse/sparse_dense_function.py | 42 -- python/JSparse/utils/__init__.py | 2 - python/JSparse/utils/quantize.py | 58 --- python/JSparse/utils/utils.py | 43 -- python/setup.py | 15 - voxelize_test.py | 96 ----- 31 files changed, 2132 deletions(-) delete mode 100644 .gitignore delete mode 100644 examples/example.py delete mode 100644 python/JSparse/__init__.py delete mode 100644 python/JSparse/indice_manager.py delete mode 100644 python/JSparse/nn/__init__.py delete mode 100644 python/JSparse/nn/functional/__init__.py delete mode 100644 python/JSparse/nn/functional/activation.py delete mode 100644 python/JSparse/nn/functional/conv.py delete mode 100644 python/JSparse/nn/functional/count.py delete mode 100644 python/JSparse/nn/functional/devoxelize.py delete mode 100644 python/JSparse/nn/functional/downsample.py delete mode 100644 python/JSparse/nn/functional/hash.py delete mode 100644 python/JSparse/nn/functional/pooling.py delete mode 100644 python/JSparse/nn/functional/query.py delete mode 100644 python/JSparse/nn/functional/voxelize.py delete mode 100644 python/JSparse/nn/modules/__init__.py delete mode 100644 python/JSparse/nn/modules/activation.py delete mode 100644 python/JSparse/nn/modules/conv.py delete mode 100644 python/JSparse/nn/modules/norm.py delete mode 100644 python/JSparse/nn/modules/pooling.py delete mode 100644 python/JSparse/nn/utils/__init__.py delete mode 100644 python/JSparse/nn/utils/apply.py delete mode 100644 python/JSparse/nn/utils/kernel.py delete mode 100644 python/JSparse/sparse.py delete mode 100644 python/JSparse/sparse_dense_function.py delete mode 100644 python/JSparse/utils/__init__.py delete mode 100644 python/JSparse/utils/quantize.py delete mode 100644 python/JSparse/utils/utils.py delete mode 100644 python/setup.py delete mode 100644 voxelize_test.py diff --git a/.gitignore b/.gitignore deleted file mode 100644 index e576789..0000000 --- a/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -*.ipynb -__pycache__ \ No newline at end of file diff --git a/README.md b/README.md index ad02269..7e9240d 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,3 @@ # JSparse JSparse is a high-performance auto-differentiation library for sparse tensor. - - -## How to Install - -``` -cd python -python setup.py install -``` \ No newline at end of file diff --git a/examples/example.py b/examples/example.py deleted file mode 100644 index 2f04894..0000000 --- a/examples/example.py +++ /dev/null @@ -1,73 +0,0 @@ -from pickletools import optimize -from statistics import mode -import jittor -from jittor import nn - -import JSparse -from JSparse import SparseTensor -from JSparse import nn as spnn -from JSparse.utils.quantize import sparse_quantize - -import numpy as np - -class RandomDataset(jittor.dataset.Dataset): - - def __init__(self, input_size: int, voxel_size: float) -> None: - super().__init__() - self.set_attrs(total_len = input_size) - self.voxel_size = voxel_size - - def __getitem__(self, _: int): - inputs = np.random.uniform(-100, 100, size=(self.total_len, 4)) - labels = np.random.choice(10, size=self.total_len) - - coords, feats = inputs[:, :], inputs - coords -= np.min(coords, axis=0, keepdims=True) -# coords, indices = sparse_quantize(coords, self.voxel_size, return_index=True) - - coords = jittor.Var(coords) - feats = jittor.Var(feats) - labels = jittor.Var(labels) -# coords = jittor.Var(coords, dtype=jittor.int64) -# feats = jittor.Var(feats[indices], dtype=jittor.float64) -# labels = jittor.Var(labels[indices], dtype=jittor.int64) - - print(type(coords)) - inputs = SparseTensor(coords, feats, 1, 1) - labels = SparseTensor(coords, labels, 1, 1) - return inputs, labels - -if __name__ == '__main__': - np.random.seed(0) - - dataset = RandomDataset(input_size=10000, voxel_size=0.2) - - model = nn.Sequential( - spnn.Conv3d(4, 32, 3), - spnn.BatchNorm(32), - spnn.ReLU(True), - spnn.Conv3d(32, 64, 2, stride=2), - spnn.BatchNorm(64), - spnn.ReLU(True), - spnn.Conv3d(64, 64, 2, stride=2, transposed=True), - spnn.BatchNorm(64), - spnn.ReLU(True), - spnn.Conv3d(64, 32, 3), - spnn.BatchNorm(32), - spnn.ReLU(True), - spnn.Conv3d(32, 10, 1), - ) - - criterion = nn.CrossEntropyLoss() - optimizer = jittor.optim.Adam(model.parameters(), lr=1e-3) - - model.train() - lens = len(dataset) - for batch_idx, (inputs, labels) in enumerate(dataset): - outputs = model(inputs) - loss = criterion(outputs, labels) - optimizer.setp(loss) - - if batch_idx % 10 == 0: - print('Training: [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( - batch_idx, lens , 100. * batch_idx / lens, loss.numpy()[0])) \ No newline at end of file diff --git a/python/JSparse/__init__.py b/python/JSparse/__init__.py deleted file mode 100644 index 4228645..0000000 --- a/python/JSparse/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .sparse import * \ No newline at end of file diff --git a/python/JSparse/indice_manager.py b/python/JSparse/indice_manager.py deleted file mode 100644 index 18e16a2..0000000 --- a/python/JSparse/indice_manager.py +++ /dev/null @@ -1,34 +0,0 @@ -import os -import numpy as np -from typing import Union, List, Tuple - -import jittor as jt -from jittor import Function - -class IndiceManager: - def __init__( - self, - ndim, - # indice_map_type, - # sparse_alorithm, # set m_hashtable_occupancy for concurrent_unordered_map - ): - # if indice_map_type == 'GPU': - # assert(jt.has_cuda) - - self.ndim = ndim - # self.indice_map_type = indice_map_type - # self.sparse_algorithm = sparse_alorithm - self.stride_key_manager = {} - self.indice_map_manager = {} - self.kernel_map_manager = {} - - def insert(self, stride, indice_key, indice_hash): - self.stride_key_manager[stride] = indice_key - self.indice_map_manager[indice_key] = indice_hash - - -# class IndiceMapManager - - - - \ No newline at end of file diff --git a/python/JSparse/nn/__init__.py b/python/JSparse/nn/__init__.py deleted file mode 100644 index 270dceb..0000000 --- a/python/JSparse/nn/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .modules import * diff --git a/python/JSparse/nn/functional/__init__.py b/python/JSparse/nn/functional/__init__.py deleted file mode 100644 index d750418..0000000 --- a/python/JSparse/nn/functional/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from .activation import * -from .conv import * -from .count import * -# from .crop import * -from .devoxelize import * -from .downsample import * -from .hash import * -from .pooling import * -from .query import * -from .voxelize import * diff --git a/python/JSparse/nn/functional/activation.py b/python/JSparse/nn/functional/activation.py deleted file mode 100644 index dd94e46..0000000 --- a/python/JSparse/nn/functional/activation.py +++ /dev/null @@ -1,24 +0,0 @@ -import jittor as jt -import jittor.nn as nn - -from JSparse import SparseTensor -from JSparse.nn.utils import fapply - -__all__ = ['relu', 'leaky_relu'] -# __all__ = ['relu', 'leaky_relu', 'ReLU', 'LeakyReLU'] - -def relu(input: SparseTensor) -> SparseTensor: - return fapply(input, nn.relu) - - -def leaky_relu(input: SparseTensor, - scale: float = 0.01) -> SparseTensor: - return fapply(input, - nn.leaky_relu, - scale=scale) - -# Relu = jt.make_module(relu) -# ReLU = Relu -# Leaky_relu = jt.make_module(leaky_relu, 2) -# LeakyReLU = Leaky_relu - diff --git a/python/JSparse/nn/functional/conv.py b/python/JSparse/nn/functional/conv.py deleted file mode 100644 index 8d76a30..0000000 --- a/python/JSparse/nn/functional/conv.py +++ /dev/null @@ -1,351 +0,0 @@ -from typing import List, Optional, Tuple, Union - -import jittor as jt -from jittor import Function -from jittor.misc import _pair, _triple - -from JSparse import SparseTensor -from JSparse.nn import functional as F -from JSparse.nn.utils import get_kernel_offsets -from JSparse import make_ntuple - -__all__ = ['conv3d', 'Convolution'] - -class Convolution(Function): - def execute( - self, - input: jt.Var, - weight: jt.Var, - nbmaps: jt.Var, - nbsizes: jt.Var, - sizes: Tuple[int, int], - transposed: bool = False, - ) -> jt.Var: - if not transposed: - output = jt.zeros((sizes[1], weight.size(-1))) - else: - output = jt.zeros((sizes[0], weight.size(-1))) - - assert input.size(1) == weight.size(1) - in_size = input.size(0) - in_channels = input.size(1) - out_size = output.size(0) - out_channels = output.size(1) - - kernel_volume = weight.size(0) - flag = False - mid = kernel_volume // 2 - in_buffer_size = jt.Var(1) - if kernel_volume % 2 and out_size == in_size: - flag = True - in_buffer_size = max(in_buffer_size, jt.max(nbsizes[:mid])) - in_buffer_size = max(in_buffer_size, jt.max(nbsizes[mid + 1:])) - output = jt.matmul(input, weight[mid, :, :]) - else: - in_buffer_size = jt.max(nbsizes) - - # in_buffer_activated : in_buffer_size * in_channels - # weight : in_channels * out_channels - # out_buffer_activated: in_buffer_size * out_channels - # out_buffer_activated = in_buffer_activated * weight - in_buffer_activated = jt.zeros(in_buffer_size.tolist() + [in_channels]) - cur_offset = jt.Var(0) - for i in range(kernel_volume): - n_active_feats = nbsizes[i] - if (flag and (i == mid)): - cur_offset += n_active_feats - continue - if n_active_feats == 0: - continue - - t = 1 if transposed else 0 - ################ - ## gather - ################ - # print(n_active_feats, in_channels, cur_offset, t) - gather(n_active_feats, in_channels, cur_offset, t, input, in_buffer_activated, nbmaps) - ################ - ## matmul - ################ - out_buffer_activated = jt.matmul(in_buffer_activated, weight[i, :, :]) - ################ - ## scatter - ################ - scatter(n_active_feats, out_channels, cur_offset, t, out_buffer_activated, output, nbmaps) - cur_offset += n_active_feats - self.save_vars = input, weight, nbmaps, nbsizes, transposed - return output - - def grad( - self, - grad_output: jt.Var - ) -> Tuple[Optional[jt.Var], ...]: - input, weight, nbmaps, nbsizes, transposed = self.save_vars - - grad_input = jt.zeros_like(input) - grad_weight = jt.zeros_like(weight) - - # n_in_feats = input.size(0) - # n_out_feats = grad_output.size(0) - n_in_channels = input.size(1) - n_out_channels = weight.size(-1) - - kernel_volume = weight.size(0) - flag = False - in_buffer_size = jt.max(nbsizes) - # out_grad_buffer_activated n_active_feats x n_out_channels - # in_grad_buffer_activated n_active_feats x n_in_channels - # in_buffer_activated n_active_feats x n_in_channels - out_grad_buffer_activated = jt.zeros(in_buffer_size.tolist() + [n_out_channels]) - in_grad_buffer_activated = jt.zeros(in_buffer_size.tolist() + [n_in_channels]) - in_buffer_activated = jt.zeros(in_buffer_size.tolist() + [n_in_channels]) - - cur_offset = jt.Var(0) - mid = kernel_volume // 2 - for i in range(kernel_volume): - # kernel_grad_buffer = grad_weight[i, :, :] - n_active_feats = nbsizes[i] - # if flag and (i == mid): - # cur_offset += n_active_feats - # continue - - if n_active_feats == 0: - continue - - t = 1 if transposed else 0 - ################ - ## gather - ################ - gather(n_active_feats, n_out_channels, cur_offset, 1 - t, grad_output, out_grad_buffer_activated, nbmaps) - gather(n_active_feats, n_in_channels , cur_offset, t , input , in_buffer_activated , nbmaps) - ################ - ## matmul - ################ - # grad for input - # in_grad_buffer_activated = out_grad_buffer_activated @ weight^T - # n_active_feats x n_in_channels n_active_feats x n_out_channels n_out_channels x n_in_channels - in_grad_buffer_activated = jt.nn.matmul_transpose(out_grad_buffer_activated, weight[i, :, :]) - # grad for weight - # kernel_grad_buffer = in_buffer_activated^T @ out_grad_buffer_activated - # n_in_channels x n_out_channels n_in_channels x n_active_feats n_active_feats x n_out_channels - grad_weight[i, :, :] = jt.nn.matmul(in_buffer_activated.t(), out_grad_buffer_activated) - ################ - ## scatter - ################ - scatter(n_active_feats, n_in_channels, cur_offset, 1 - t, in_grad_buffer_activated, grad_input, nbmaps) - cur_offset += n_active_feats - return grad_input, grad_weight, None, None, None, None - -def gather( - n_active_feats, - channels, - cur_offset, - transpose, - in_feat, - out_feat, - kmap, -): - shape = n_active_feats.tolist() + cur_offset.tolist() + [channels, transpose, 0] - gather_args = jt.zeros(shape, dtype='int32') - return jt.code((0, ), out_feat.dtype, [in_feat, out_feat, kmap, gather_args], - cuda_header=""" - @alias(in_feat, in0) - @alias(out_feat, in1) - @alias(kmap, in2) - @alias(args, in3) - """, - cuda_src=""" - __global__ void gather_kernel(@ARGS_DEF) { - @PRECALC - const int n_k = args_shape0; - const int st = args_shape1; - const int c = args_shape2; - const int transpose = args_shape3; - - int idx = blockDim.x * blockIdx.x + threadIdx.x; - int i = idx / c; - int j = idx % c; - if (i >= n_k) return; - int in_pos = @kmap(st + i, transpose); - // if (in_pos < 0) return; - @out_feat(i, j) = @in_feat(in_pos, j); - } - - gather_kernel<<< (out_feat_shape0 * out_feat_shape1 + 255) / 256, 256 >>>(@ARGS); - """, - cpu_header=""" - @alias(in_feat, in0) - @alias(out_feat, in1) - @alias(kmap, in2) - @alias(args, in3) - """, - cpu_src=""" - const int n_k = args_shape0; - const int st = args_shape1; - const int c = args_shape2; - const int transpose = args_shape3; - - for (int i = 0; i < n_k; ++ i ) { - int in_pos = @kmap(st + i, transpose); - // if (in_pos < 0) { - // continue; - // } - #pragma omp parallel for - for (int j = 0; j < c; ++ j ) { - @out_feat(i, j) = @in_feat(in_pos, j); - // @out(i, j) = @in_feat(in_pos, j); - } - } - """ - ).sync() - - -def scatter( - n_active_feats, - channels, - cur_offset, - transpose, - in_feat, - out_feat, - kmap, -): - shape = n_active_feats.tolist() + cur_offset.tolist() + [channels, transpose, 0] - scatter_args = jt.zeros(shape, dtype='int32') - return jt.code((0, ), out_feat.dtype, [in_feat, out_feat, kmap, scatter_args], - cuda_header=""" - @alias(in_feat, in0) - @alias(out_feat, in1) - @alias(kmap, in2) - @alias(args, in3) - """, - cuda_src=""" - __global__ void scatter_kernel(@ARGS_DEF) { - @PRECALC - const int n_k = args_shape0; - const int st = args_shape1; - const int c = args_shape2; - const int transpose = args_shape3; - - int idx = blockDim.x * blockIdx.x + threadIdx.x; - int i = idx / c; - int j = idx % c; - if (i >= n_k) return; - int out_pos = @kmap(st + i, 1 - transpose); - // if (out_pos < 0) return; - @out_feat(out_pos, j) += @in_feat(i, j); - } - - scatter_kernel<<< (out_feat_shape0 * out_feat_shape1 + 255) / 256, 256 >>>(@ARGS); - """, - cpu_header=""" - @alias(in_feat, in0) - @alias(out_feat, in1) - @alias(kmap, in2) - @alias(args, in3) - """, - cpu_src=""" - const int n_k = args_shape0; - const int st = args_shape1; - const int c = args_shape2; - const int transpose = args_shape3; - - for (int i = 0; i < n_k; ++ i ) { - int out_pos = @kmap(st + i, 1 - transpose); - // if (out_pos < 0) { - // continue; - // } - #pragma omp parallel for - for (int j = 0; j < c; ++ j ) { - @out_feat(out_pos, j) += @in_feat(i, j); - // @out(out_pos, j) += @in_feat(i, j); - } - } - """ - ).sync() - -def conv3d( - input: SparseTensor, - weight: jt.Var, - kernel_size: Union[int, Tuple[int, ...]], - bias: Optional[jt.Var] = None, - stride: Union[int, Tuple[int, ...]] = 1, - dilation: Union[int, Tuple[int, ...]] = 1, - group: int = 1, - transposed: bool = False, -) -> SparseTensor: - # kernel_size = make_ntuple(kernel_size, ndim=3) - # stride = make_ntuple(stride, ndim=3) - # dilation = make_ntuple(dilation, ndim=3) - kernel_size = _triple(kernel_size) - stride = _triple(stride) - dilation = _triple(dilation) - - if (kernel_size == _triple(1) and stride == _triple(1) and dilation == _triple(1)): - output_stride = input.stride - output_indices = input.indices - output_values = input.values.matmul(weight) - elif not transposed: - output_stride = tuple(input.stride[k] * stride[k] for k in range(3)) - - if output_stride in input.cmaps: - output_indices = input.cmaps[output_stride] - elif all(stride[k] == 1 for k in range(3)): - output_indices = input.indices - else: - output_indices = F.spdownsample( - input.indices, - stride, - kernel_size, - input.stride, - ) - - if (input.stride, kernel_size, stride, dilation) not in input.kmaps: - offsets = get_kernel_offsets( - kernel_size, - stride=input.stride, - dilation=dilation, - ) - references = F.sphash(input.indices) # (N,) - queries = F.sphash(output_indices, offsets) # (|K|, N) - results = F.spquery(queries, references) # (|K|, N) - - nbsizes = jt.sum(results != -1, dim=1) - nbmaps = jt.nonzero(results != -1) - - indices = nbmaps[:, 0] * results.size(1) + nbmaps[:, 1] - nbmaps[:, 0] = results.view(-1)[indices] - - input.kmaps[(input.stride, kernel_size, stride, dilation)] = [ - nbmaps, nbsizes, (input.indices.shape[0], output_indices.shape[0]) - ] - - output_values = Convolution.apply( - input.values, - weight, - *input.kmaps[(input.stride, kernel_size, stride, dilation)], - transposed, - ) - else: - output_stride = tuple(input.stride[k] // stride[k] for k in range(3)) - output_indices = input.cmaps[output_stride] - output_values = Convolution.apply( - input.values, - weight, - *input.kmaps[(output_stride, kernel_size, stride, dilation)], - transposed, - ) - - if bias is not None: - output_values += bias - - # size have to be set - output = SparseTensor( - indices=output_indices, - values=output_values, - stride=output_stride, - size=input.size - ) - output.cmaps = input.cmaps - output.cmaps.setdefault(output_stride, output_indices) - output.kmaps = input.kmaps - return output diff --git a/python/JSparse/nn/functional/count.py b/python/JSparse/nn/functional/count.py deleted file mode 100644 index dd765f9..0000000 --- a/python/JSparse/nn/functional/count.py +++ /dev/null @@ -1,30 +0,0 @@ -import jittor as jt - -def spcount(idx_query: jt.Var, num: int) -> jt.Var: - return jt.code((num,), jt.int32, [idx_query], - cuda_src=""" - __global__ void count_kernel(@ARGS_DEF) { - @PRECALC - @alias(idx_query, in0) - int i = blockDim.x * blockIdx.x + threadIdx.x; - int cur_idx = @idx_query(i); - if (i < idx_query_shape0 && cur_idx >= 0) { - atomicAdd(out_p + cur_idx, 1); - } - } - @alias(idx_query, in0) - count_kernel<<<(idx_query_shape0 + 511) / 512, 512>>>(@ARGS); - """, - cpu_src=""" - @alias(idx_query, in0) - #pragma omp parallel for - for (int i = 0; i < idx_query_shape0; ++ i ) { - int cur_idx = @idx_query(i); - if (cur_idx < 0) { - continue; - } - #pragma omp atomic - @out(cur_idx) ++; - } - """ - ) diff --git a/python/JSparse/nn/functional/devoxelize.py b/python/JSparse/nn/functional/devoxelize.py deleted file mode 100644 index 1099f75..0000000 --- a/python/JSparse/nn/functional/devoxelize.py +++ /dev/null @@ -1,166 +0,0 @@ -import jittor as jt -from jittor import Function - -from JSparse import SparseTensor - -__all__ = ['calc_ti_weights', 'spdevoxelize'] - -def calc_ti_weights( - indices: jt.Var, - idx_query: jt.Var, - scale: float = 1 -) -> jt.Var: - with jt.no_grad(): - p = indices - if scale != 1: - pf = jt.floor(indices / scale) * scale - else: - pf = jt.floor(indices) - pc = pf + scale - - x = p[:, 1].view(-1, 1) - y = p[:, 2].view(-1, 1) - z = p[:, 3].view(-1, 1) - - xf = pf[:, 1].view(-1, 1).float() - yf = pf[:, 2].view(-1, 1).float() - zf = pf[:, 3].view(-1, 1).float() - - xc = pc[:, 1].view(-1, 1).float() - yc = pc[:, 2].view(-1, 1).float() - zc = pc[:, 3].view(-1, 1).float() - - w0 = (xc - x) * (yc - y) * (zc - z) - w1 = (xc - x) * (yc - y) * (z - zf) - w2 = (xc - x) * (y - yf) * (zc - z) - w3 = (xc - x) * (y - yf) * (z - zf) - w4 = (x - xf) * (yc - y) * (zc - z) - w5 = (x - xf) * (yc - y) * (z - zf) - w6 = (x - xf) * (y - yf) * (zc - z) - w7 = (x - xf) * (y - yf) * (z - zf) - - w = jt.concat([w0, w1, w2, w3, w4, w5, w6, w7], dim=1).t() - if scale != 1: - w /= scale ** 3 - w[idx_query == -1] = 0 - w /= jt.sum(w, dim=0) + 1e-8 - return w - - -class Devoxelize(Function): - def execute( - self, - values: jt.Var, - idx_query: jt.Var, - weights: jt.Var - ) -> jt.Var: - # c = values_shape1 - # N = idx_query_shape0 - output = jt.code((idx_query.shape[0], values.shape[1]), jt.float32, [values, idx_query, weights], - cuda_src=""" - __global__ void devoxelize_forward_kernel(@ARGS_DEF) { - @PRECALC - @alias(values, in0) - @alias(idx_query, in1) - @alias(weights, in2) - int index = blockIdx.x * blockDim.x + threadIdx.x; - int i = index / values_shape1; - int j = index % values_shape1; - - if (i < idx_query_shape0) { - float cur_values = 0; - for (int k = 0; k < 8; ++ k ) { - int idx = @idx_query(i, k); - cur_values = (idx >= 0) ? @values(idx, j) : 0; - @out(i, j) += @weights(i, k) * cur_values; - } - } - } - devoxelize_forward_kernel<<>>(@ARGS); - """, - cpu_src=""" - @alias(values, in0) - @alias(idx_query, in1) - @alias(weights, in2) - - #pragma omp parallel for - for (int i = 0; i < idx_query_shape0; ++ i ) { - for (int j = 0; j < values_shape1; ++ j ) { - float cur_values = 0; - for (int k = 0; k < 8; ++ k ) { - int idx = @idx_query(i, k); - cur_values = (idx >= 0) ? @values(idx, j) : 0; - #pragma omp atomic - @out(i ,j) += @weights(i, k) * cur_values; - } - } - } - """ - ) - self.save_vars = (idx_query, weights, values.shape[0]) - return output - - def grad(self, grad_output: jt.Var): - idx_query, weights, input_size = self.save_vars - - grad_values = jt.code((input_size, grad_output.shape[0]), jt.float, [idx_query, weights, grad_output], - cuda_header=""" - #include - #include - #include - """, - cuda_src=""" - __global__ void devoxelize_backward_kernel(@ARGS_DEF) { - @PRECALC - @alias(idx_query, in0) - @alias(weights, in1) - @alias(grad_output, in2) - - int index = blockIdx.x * blockDim.x + threadIdx.x; - int c = grad_output_shape1; - int i = index / c; - int j = index % c; - - if (i < grad_output_shape0) { - float cur_grad_output = @grad_output(i, j); - - #pragma unroll - for (int k = 0; k < 8; ++ k ) { - int idx = @idx_query(i, k); - if (idx >= 0) { - atomicAdd(&@out(idx, j), @weights(i, k) * cur_grad_output); - } - } - } - } - @alias(grad_output, in2) - devoxelize_backward_kernel<<>>(@ARGS); - """, - cpu_src=""" - @alias(idx_query, in0) - @alias(weights, in1) - @alias(grad_output, in2) - - for (int i = 0; i < grad_output_shape0; ++ i ) { - #pragma omp parallel for - for (int j = 0; j < grad_output_shape1; ++ j ) { - float cur_grad_output = 0; - for (int k = 0; k < 8; ++ k ) { - int idx = @idx_query(i, k); - cur_grad_output = (idx >= 0) ? @grad_output(i, j) : 0; - #pragma omp atomic - @out(idx, j) += @weights(i, k) * cur_grad_output; - } - } - } - """ - ) - return grad_values, None, None - -def spdevoxelize( - values: jt.Var, - idx_query: jt.Var, - weights: jt.Var -) -> jt.Var: - return Devoxelize.apply(values, idx_query, weights) - diff --git a/python/JSparse/nn/functional/downsample.py b/python/JSparse/nn/functional/downsample.py deleted file mode 100644 index 5a4b651..0000000 --- a/python/JSparse/nn/functional/downsample.py +++ /dev/null @@ -1,51 +0,0 @@ -from typing import Tuple, Union - -import jittor as jt -from jittor.misc import _pair, _triple - -from JSparse.nn.utils import get_kernel_offsets -from JSparse.utils import make_ntuple, trunc - -__all__ = ['spdownsample'] - -def spdownsample( - indices: jt.Var, - stride: Union[int, Tuple[int, ...]] = 2, - kernel_size: Union[int, Tuple[int, ...]] = 2, - tensor_stride: Union[int, Tuple[int, ...]] = 1) -> jt.Var: - # stride = make_ntuple(stride, ndim=3) - # kernel_size = make_ntuple(kernel_size, ndim=3) - # tensor_stride = make_ntuple(tensor_stride, ndim=3) - kernel_size = _triple(kernel_size) - stride = _triple(stride) - tensor_stride = _triple(tensor_stride) - - sample_stride = [stride[k] * tensor_stride[k] for k in range(3)] - sample_stride = jt.Var(sample_stride, - dtype='int32').unsqueeze(dim=0) - - if all(stride[k] in [1, kernel_size[k]] for k in range(3)): - indices = indices.clone() - indices[:, 1:] = trunc(jt.divide(indices[:, 1:], sample_stride)) * sample_stride - else: - offsets = get_kernel_offsets(kernel_size, - tensor_stride) - kernel_volume = offsets.size(0) - - indices_min = indices[:, :3].min(dim=0, keepdims=True) - - b = indices[:, :1].repeat(1, kernel_volume) - x = indices[:, 1:].unsqueeze(dim=1).repeat(1, kernel_volume, 1) + offsets - indices = jt.cat([b.view(-1, 1), x.view(-1, 3)], dim=1) - - # TODO: We need to also filter `indices` based on `indices_max`. - mask = (indices[:, 1:] % sample_stride == 0) - mask &= (indices[:, 1:] >= indices_min) - mask = jt.all(mask, dim=1) - indices = indices[mask] - - # we may have to unique the indices when we define the SparesTensor - # indices = indices[:, [3, 0, 1, 2]] - # indices = jt.unique(indices, dim=0) - # indices = indices[:, [1, 2, 3, 0]] - return indices \ No newline at end of file diff --git a/python/JSparse/nn/functional/hash.py b/python/JSparse/nn/functional/hash.py deleted file mode 100644 index 582fff5..0000000 --- a/python/JSparse/nn/functional/hash.py +++ /dev/null @@ -1,204 +0,0 @@ -from typing import Optional - -import jittor as jt - -__all__ = ['sphash'] - -def sphash(indices: jt.Var, - offsets: Optional[jt.Var] = None) -> jt.Var: - assert indices.dtype == jt.int, indices.dtype - assert indices.ndim == 2 and indices.shape[1] == 4, indices.shape - - if offsets is None: - return jt.code((indices.shape[0],), jt.int64, [indices], - cuda_header=""" - #include - #include - #include - #include - @alias(indices, in0) - """, - cuda_src=""" - __global__ static void hash_kernel(@ARGS_DEF) { - @PRECALC - - int i = blockIdx.x * blockDim.x + threadIdx.x; - if (i < indices_shape0) { - uint64_t hash = 14695981039346656037UL; - for (int j = 0; j < 4; ++ j ) { - hash ^= (unsigned int)@indices(i, j); - hash *= 1099511628211UL; - } - hash = (hash >> 60) ^ (hash & 0xFFFFFFFFFFFFFFF); - @out(i) = hash; - } - } - hash_kernel<<>>(@ARGS); - """, - cpu_header=""" - #include - @alias(indices, in0) - """, - cpu_src=""" - #pragma omp parallel for - for (int i = 0; i < indices_shape0; ++ i ) { - uint64_t hash = 14695981039346656037UL; - for (int j = 0; j < 4; ++ j ) { - hash ^= (unsigned int)@indices(i, j); - hash *= 1099511628211UL; - } - hash = (hash >> 60) ^ (hash & 0xFFFFFFFFFFFFFFF); - @out(i) = hash; - } - """) - else: - assert offsets.dtype == jt.int32, offsets.dtype - assert offsets.ndim == 2 and offsets.shape[1] == 3, offsets.shape - - return jt.code((offsets.shape[0], indices.shape[0]), jt.int64, [indices, offsets], - cuda_header=""" - #include - #include - #include - #include - #include - - @alias(indices, in0) - @alias(offsets, in1) - """, - # cuda_src=""" - # __global__ void kernel_hash_kernel(@ARGS_DEF){ - # @PRECALC - # extern __shared__ int offsets_shared[]; - - # int ix = blockDim.x * blockIdx.x + threadIdx.x; - # int iy = blockIdx.y; - - # // if (!threadIdx.x) { - # // for (int j = 0; j < 3; ++ j ) { - # // offsets_shared[iy * 3 + j] = @offsets(iy, j); - # // } - # // } - # // __syncthreads(); - - # if (!threadIdx.x) { - # for (int j = 0; j < 3; ++ j ) { - # offsets_shared[iy * 3 + j] = @offsets(iy, j); - # } - # } - # __syncthreads(); - - # if (ix < indices_shape0 && iy < offsets_shape0) { - # int cur_indices[4]; - # for (int j = 1; j <= 3; ++ j ) { - # // cur_indices[j] = @indices(ix, j) + @offsets(iy, j - 1); - # cur_indices[j] = @indices(ix, j) + offsets_shared[iy * 3 + j - 1]; - # } - # cur_indices[0] = @indices(ix, 0); - # uint64_t hash = 14695981039346656037UL; - # for (int j = 0; j < 4; ++ j ) { - # hash ^= (unsigned int)cur_indices[j]; - # hash *= 1099511628211UL; - # } - # hash = (hash >> 60) ^ (hash & 0xFFFFFFFFFFFFFFF); - # @out0(iy, ix) = hash; - # } - # } - # dim3 block(512); - # dim3 grid((indices_shape0 + block.x - 1) / block.x, offsets_shape0); - # kernel_hash_kernel<<< grid, block, offsets_shape0 * 3 * sizeof(offsets_type) >>>(@ARGS); - # """, - # cuda_src=""" - # __global__ void kernel_hash_kernel(@ARGS_DEF){ - # @PRECALC - # extern __shared__ int offsets_shared[]; - - # int ix = blockDim.x * blockIdx.x + threadIdx.x; - # int iy = blockDim.y * blockIdx.y + threadIdx.y; - - # // if (!threadIdx.x) { - # // for (int j = 0; j < 3; ++ j ) { - # // offsets_shared[iy * 3 + j] = @offsets(iy, j); - # // } - # // } - # // __syncthreads(); - - # if (iy < indices_shape0 && ix < offsets_shape0) { - # int cur_indices[4]; - # // for (int j = 1; j <= 3; ++ j ) { - # // cur_indices[j] = @indices(iy, j) + @offsets(ix, j - 1); - # // cur_indices[j] = @indices(iy, j) + offsets_shared[ix * 3 + j - 1]; - # // } - # cur_indices[0] = @indices(iy, 0); - # cur_indices[1] = @indices(iy, 1) + @offsets(ix, 0); - # cur_indices[2] = @indices(iy, 2) + @offsets(ix, 1); - # cur_indices[3] = @indices(iy, 3) + @offsets(ix, 2); - # uint64_t hash = 14695981039346656037UL; - # for (int j = 0; j < 4; ++ j ) { - # hash ^= (unsigned int)cur_indices[j]; - # hash *= 1099511628211UL; - # } - # hash = (hash >> 60) ^ (hash & 0xFFFFFFFFFFFFFFF); - # @out0(ix, iy) = hash; - # } - # } - # dim3 block(16, 64); - # dim3 grid((offsets_shape0 + block.x - 1) / block.x), (indices_shape0 + block.y - 1) / block.y); - # kernel_hash_kernel<<< grid, block, offsets_shape0 * 3 * sizeof(offsets_type) >>>(@ARGS); - # """, - cuda_src=""" - __global__ void kernel_hash_kernel(@ARGS_DEF){ - @PRECALC - extern __shared__ int offsets_shared[]; - - int idx = blockDim.x * blockIdx.x + threadIdx.x; - int k = idx % offsets_shape0; - int i = idx / offsets_shape0; - - if (i < indices_shape0) { - int cur_indices[4]; - for (int j = 1; j <= 3; ++ j ) { - cur_indices[j] = @indices(i, j) + @offsets(k, j - 1); - } - cur_indices[0] = @indices(i, 0); - - uint64_t hash = 14695981039346656037UL; - for (int j = 0; j < 4; ++ j ) { - hash ^= (unsigned int)cur_indices[j]; - hash *= 1099511628211UL; - } - hash = (hash >> 60) ^ (hash & 0xFFFFFFFFFFFFFFF); - @out(k, i) = hash; - } - } - int thread_nums = 512; - kernel_hash_kernel<<>>(@ARGS); - """, - cpu_header=""" - #include - @alias(indices, in0) - @alias(offsets, in1) - """, - cpu_src=""" - auto K = offsets_shape0; - auto N = indices_shape0; - - for (int k = 0; k < offsets_shape0; ++ k ) { - #pragma omp parallel for - for (int i = 0; i < indices_shape0; ++ i ) { - int cur_indices[4]; - for (int j = 1; j <= 3; ++ j ) { - cur_indices[j] = @indices(i, j) + @offsets(k, j - 1); - } - cur_indices[0] = @indices(i, 0); - uint64_t hash = 14695981039346656037UL; - for (int j = 0; j < 4; ++ j ) { - hash ^= (unsigned int)cur_indices[j]; - hash *= 1099511628211UL; - } - hash = (hash >> 60) ^ (hash & 0xFFFFFFFFFFFFFFF); - @out(k, i) = hash; - } - } - """ - ) diff --git a/python/JSparse/nn/functional/pooling.py b/python/JSparse/nn/functional/pooling.py deleted file mode 100644 index f34682f..0000000 --- a/python/JSparse/nn/functional/pooling.py +++ /dev/null @@ -1,26 +0,0 @@ -import jittor as jt - -from JSparse import SparseTensor - -__all__ = ['global_avg_pool', 'global_max_pool'] - -def global_avg_pool(inputs: SparseTensor) -> jt.Var: - batch_size = jt.max(inputs.indices[:, 0]).item() + 1 - outputs = [] - for k in range(batch_size): - input = inputs.values[inputs.indices[:, 0] == k] - output = jt.mean(input, dim=0) - outputs.append(output) - outputs = jt.stack(outputs, dim=0) - return outputs - - -def global_max_pool(inputs: SparseTensor) -> jt.Var: - batch_size = jt.max(inputs.indices[:, 0]).item() + 1 - outputs = [] - for k in range(batch_size): - input = inputs.values[inputs.indices[:, 0] == k] - output = jt.max(input, dim=0)[0] - outputs.append(output) - outputs = jt.stack(outputs, dim=0) - return outputs \ No newline at end of file diff --git a/python/JSparse/nn/functional/query.py b/python/JSparse/nn/functional/query.py deleted file mode 100644 index 45515a1..0000000 --- a/python/JSparse/nn/functional/query.py +++ /dev/null @@ -1,454 +0,0 @@ -import jittor as jt -import math - -__all__ = ['spquery'] - -def spquery(queries: jt.Var, - references: jt.Var) -> jt.Var: - q_size = queries.size() - queries = queries.view(-1) - - indices = jt.arange(len(references), dtype=jt.int64) - - if jt.flags.use_cuda > 0: - n = references.shape[0] - nextPow2 = pow(2, math.ceil(math.log2(n))) - - table_size = (4 * nextPow2) if (n < 2048) else (2 * nextPow2) - if table_size < 512: - table_size = 512 - num_funcs = 3 - key_buf = jt.zeros((table_size,), dtype='int64') - val_buf = jt.zeros((table_size,), dtype='int64') - key = jt.zeros((num_funcs * table_size,), dtype='int64') - val = jt.zeros((num_funcs * table_size,), dtype='int64') - - output = jt.code(queries.shape, jt.int64, [queries, references, indices, key_buf, val_buf, key, val], - cuda_header=""" - #include - #include - #include - #include - #include - #include - #include - - /** Reserved value for indicating "empty". */ - #define EMPTY_CELL (0) - /** Max rehashing depth, and error depth. */ - #define MAX_DEPTH (100) - #define ERR_DEPTH (-1) - /** CUDA naive thread block size. */ - #define BLOCK_SIZE (256) - /** CUDA multi-level thread block size = bucket size. */ - #define BUCKET_SIZE (512) - - __device__ static uint64_t atomicExch(uint64_t *addr, uint64_t val) { - return (uint64_t)atomicExch((unsigned long long int *)addr, - (unsigned long long int)val); - } - - /** Struct of a hash function config. */ - typedef struct { - int rv; // Randomized XOR value. - int ss; // Randomized shift filter start position. - } FuncConfig; - - /** Hard code hash functions and all inline helper functions for CUDA kernels' - * use. */ - inline __device__ int do_1st_hash(const uint64_t val, const int num_buckets) { - return val % num_buckets; - } - - inline __device__ int do_2nd_hash(const uint64_t val, - const FuncConfig *const hash_func_configs, - const int func_idx, const int size) { - FuncConfig fc = hash_func_configs[func_idx]; - return ((val ^ fc.rv) >> fc.ss) % size; // XOR function as 2nd-level hashing. - } - - // trying to ignore EMPTY_CELL by adding 1 at make_data. - inline __device__ uint64_t fetch_val(const uint64_t data, const int pos_width) { - return data >> pos_width; - } - - inline __device__ int fetch_func(const uint64_t data, const int pos_width) { - return data & ((0x1 << pos_width) - 1); - } - - inline __device__ uint64_t make_data(const uint64_t val, const int func, - const int pos_width) { - return (val << pos_width) ^ func; - } - - class CuckooHashTableCuda_Multi { - private: - const int _size; - const int _evict_bound; - const int _num_funcs; - const int _pos_width; - const int _num_buckets; - - FuncConfig *_d_hash_func_configs; - - /** Cuckoo hash function set. */ - FuncConfig *_hash_func_configs; - - /** Private operations. */ - void gen_hash_funcs() { - // Calculate bit width of value range and table size. - int val_width = 8 * sizeof(uint64_t) - ceil(log2((double)_num_funcs)); - int bucket_width = ceil(log2((double)_num_buckets)); - int size_width = ceil(log2((double)BUCKET_SIZE)); - // Generate randomized configurations. - for (int i = 0; i < _num_funcs; ++i) { // At index 0 is a dummy function. - if (val_width - bucket_width <= size_width) - _hash_func_configs[i] = {rand(), 0}; - else { - _hash_func_configs[i] = { - rand(), rand() % (val_width - bucket_width - size_width + 1) + - bucket_width}; - } - } - }; - - inline uint64_t fetch_val(const uint64_t data) { return data >> _pos_width; } - inline int fetch_func(const uint64_t data) { - return data & ((0x1 << _pos_width) - 1); - } - - public: - CuckooHashTableCuda_Multi(const int size, const int evict_bound, - const int num_funcs) - : _size(size), - _evict_bound(evict_bound), - _num_funcs(num_funcs), - _pos_width(ceil(log2((double)_num_funcs))), - _num_buckets(ceil((double)_size / BUCKET_SIZE)) { - srand(time(NULL)); - _d_hash_func_configs = NULL; - _hash_func_configs = NULL; - _hash_func_configs = new FuncConfig[num_funcs]; - - gen_hash_funcs(); - - cudaMalloc((void **)&_d_hash_func_configs, _num_funcs * sizeof(FuncConfig)); - cudaMemcpy(_d_hash_func_configs, _hash_func_configs, - _num_funcs * sizeof(FuncConfig), cudaMemcpyHostToDevice); - }; - ~CuckooHashTableCuda_Multi() { - if (_hash_func_configs != NULL) delete[] _hash_func_configs; - - if (_d_hash_func_configs != NULL) cudaFree(_d_hash_func_configs); - }; - - int insert_vals(const uint64_t *const keys, const uint64_t *const vals, - uint64_t *d_key_buf, uint64_t *d_val_buf, uint64_t *d_key, - uint64_t *d_val, const int n); - - void lookup_vals(const uint64_t *const keys, uint64_t *const results, - uint64_t *d_key, uint64_t *d_val, const int n); - }; - - __global__ void cuckooBucketKernel_Multi( - uint64_t *const key_buf, uint64_t *const val_buf, const int size, - const uint64_t *const keys, const uint64_t *const vals, const int n, - int *const counters, const int num_buckets); - - __global__ void cuckooInsertKernel_Multi( - uint64_t *const key, uint64_t *const val, const uint64_t *const key_buf, - const uint64_t *const val_buf, const int size, - const FuncConfig *const hash_func_configs, const int num_funcs, - const int *const counters, const int num_buckets, const int evict_bound, - const int pos_width, int *const rehash_requests); - - __global__ void cuckooLookupKernel_Multi( - const uint64_t *const keys, uint64_t *const results, const int n, - const uint64_t *const all_keys, const uint64_t *const all_vals, - const int size, const FuncConfig *const hash_func_configs, - const int num_funcs, const int num_buckets, const int pos_width); - - __global__ void cuckooBucketKernel_Multi( - uint64_t *const key_buf, uint64_t *const val_buf, const int size, - const uint64_t *const keys, const uint64_t *const vals, const int n, - int *const counters, const int num_buckets) { - // Get thread index. - int idx = threadIdx.x + blockIdx.x * blockDim.x; - - // Only threads within range are active. - if (idx < n) { - // Do 1st-level hashing to get bucket id, then do atomic add to get index - // inside the bucket. - uint64_t key = keys[idx]; - uint64_t val = vals[idx]; - - int bucket_num = do_1st_hash(key, num_buckets); - int bucket_ofs = atomicAdd(&counters[bucket_num], 1); - - // Directly write the key into the table buffer. - if (bucket_ofs >= BUCKET_SIZE) { - printf("%d/%d ERROR: bucket overflow! n=%d, bucket_num=%d/%d, key=%d", bucket_ofs, BUCKET_SIZE, n, bucket_num, num_buckets, key); - } else { - key_buf[bucket_num * BUCKET_SIZE + bucket_ofs] = key; - val_buf[bucket_num * BUCKET_SIZE + bucket_ofs] = val; - } - } - } - - __global__ void cuckooInsertKernel_Multi( - uint64_t *const key, uint64_t *const val, const uint64_t *const key_buf, - const uint64_t *const val_buf, const int size, - const FuncConfig *const hash_func_configs, const int num_funcs, - const int *const counters, const int num_buckets, const int evict_bound, - const int pos_width, int *const rehash_requests) { - // Create local cuckoo table in shared memory. Size passed in as the third - // kernel parameter. - extern __shared__ uint64_t local_key[]; - for (int i = 0; i < num_funcs; ++i) { - local_key[i * BUCKET_SIZE + threadIdx.x] = EMPTY_CELL; - } - - // might be useful - __syncthreads(); - - // Get thread index. - int idx = threadIdx.x + blockIdx.x * blockDim.x; - uint64_t cur_idx = idx; - - // Only threads within local bucket range are active. - if (threadIdx.x < counters[blockIdx.x]) { - // Set initial conditions. - uint64_t cur_key = key_buf[cur_idx]; - int cur_func = 0; - int evict_count = 0; - - // Start the test-kick-and-reinsert loops. - do { - int pos = do_2nd_hash(cur_key, hash_func_configs, cur_func, BUCKET_SIZE); - - uint64_t new_data = make_data(cur_idx + 1, cur_func, pos_width); - - uint64_t old_idx = - atomicExch(&local_key[cur_func * BUCKET_SIZE + pos], new_data); - - if (old_idx != EMPTY_CELL) { - cur_idx = fetch_val(old_idx, pos_width) - 1; - // potential overflow here. It seems that cur_idx < 0 is possible! - cur_key = key_buf[cur_idx]; - cur_func = (fetch_func(old_idx, pos_width) + 1) % num_funcs; - evict_count++; - } else { - break; - } - - } while (evict_count < num_funcs * evict_bound); - - // If exceeds eviction bound, then needs rehashing. - if (evict_count >= num_funcs * evict_bound) { - atomicAdd(rehash_requests, 1); - } - } - - // Every thread write its responsible local slot into the global data table. - __syncthreads(); - for (int i = 0; i < num_funcs; ++i) { - uint64_t cur_idx = local_key[i * BUCKET_SIZE + threadIdx.x]; - if (cur_idx == EMPTY_CELL) { - continue; - } - int cur_func = fetch_func(cur_idx, pos_width); - cur_idx = fetch_val(cur_idx, pos_width) - 1; - key[i * size + idx] = key_buf[cur_idx]; - val[i * size + idx] = val_buf[cur_idx]; - } - } - - __global__ void cuckooLookupKernel_Multi( - const uint64_t *const keys, uint64_t *const results, const int n, - const uint64_t *const all_keys, const uint64_t *const all_vals, - const int size, const FuncConfig *const hash_func_configs, - const int num_funcs, const int num_buckets, const int pos_width) { - int idx = threadIdx.x + blockIdx.x * blockDim.x; - - // Only threads within range are active. - if (idx < n) { - uint64_t key = keys[idx]; - int bucket_num = do_1st_hash(key, num_buckets); - for (int i = 0; i < num_funcs; ++i) { - int pos = bucket_num * BUCKET_SIZE + - do_2nd_hash(key, hash_func_configs, i, BUCKET_SIZE); - if (all_keys[i * size + pos] == key) { - results[idx] = all_vals[i * size + pos] + 1; - return; - } - } - - // TODO(Haotian): should be a value that will not be encountered. - results[idx] = EMPTY_CELL; - } - } - - void CuckooHashTableCuda_Multi::lookup_vals(const uint64_t *const keys, - uint64_t *d_key, uint64_t *d_val, - uint64_t *const results, - const int n) { - // Launch the lookup kernel. - cuckooLookupKernel_Multi<<>>( - keys, results, n, d_key, d_val, _size, _d_hash_func_configs, _num_funcs, - _num_buckets, _pos_width); - } - - int CuckooHashTableCuda_Multi::insert_vals(const uint64_t *const keys, - const uint64_t *const vals, - uint64_t *d_key_buf, - uint64_t *d_val_buf, uint64_t *d_key, - uint64_t *d_val, const int n) { - // - // Phase 1: Distribute keys into buckets. - // - - // Allocate GPU memory. - - int *d_counters = NULL; - - cudaMalloc((void **)&d_counters, _num_buckets * sizeof(int)); - - cudaMemset(d_counters, 0, _num_buckets * sizeof(int)); - - // Invoke bucket kernel. - cuckooBucketKernel_Multi<<>>( - d_key_buf, d_val_buf, _size, keys, vals, n, d_counters, _num_buckets); - - // - // Phase 2: Local cuckoo hashing. - // - - // Allocate GPU memory. - - cudaDeviceSynchronize(); - int *d_rehash_requests = NULL; - - cudaMalloc((void **)&d_rehash_requests, sizeof(int)); - - // Copy values onto GPU memory. - cudaMemcpy(_d_hash_func_configs, _hash_func_configs, - _num_funcs * sizeof(FuncConfig), cudaMemcpyHostToDevice); - - // Invoke insert kernel. Passes shared memory table size by the third - // argument. Loops until no rehashing needed. - - int rehash_count = 0; - do { - int rehash_requests = 0; - cudaMemset(d_rehash_requests, 0, sizeof(int)); - cuckooInsertKernel_Multi<<>>( - d_key, d_val, d_key_buf, d_val_buf, _size, _d_hash_func_configs, - _num_funcs, d_counters, _num_buckets, _evict_bound, _pos_width, - d_rehash_requests); - cudaMemcpy(&rehash_requests, d_rehash_requests, sizeof(int), - cudaMemcpyDeviceToHost); - - if (rehash_requests == 0) { - break; - } else { - rehash_count++; - gen_hash_funcs(); - cudaMemcpy(_d_hash_func_configs, _hash_func_configs, - _num_funcs * sizeof(FuncConfig), cudaMemcpyHostToDevice); - } - } while (rehash_count < MAX_DEPTH); - - cudaDeviceSynchronize(); - - // Free GPU resources. - - if (d_counters != NULL) { - cudaFree(d_counters); - } - if (d_rehash_requests != NULL) { - cudaFree(d_rehash_requests); - } - - return (rehash_count < MAX_DEPTH) ? rehash_count : ERR_DEPTH; - } - - @alias(hash_query, in0) - @alias(hash_target, in1) - @alias(idx_target, in2) - @alias(key_buf, in3) - @alias(val_buf, in4) - @alias(key, in5) - @alias(val, in6) - """, - cuda_src=""" - int n = hash_target_shape0; - int n1 = hash_query_shape0; - const int nextPow2 = pow(2, ceil(log2((double)n))); - // When n is large, the hash values tend to be more evenly distrubuted and - // choosing table_size to be 2 * nextPow2 typically suffices. For smaller n, - // the effect of uneven distribution of hash values is more pronounced and - // hence we choose table_size to be 4 * nextPow2 to reduce the chance of - // bucket overflow. - int table_size = (n < 2048) ? 4 * nextPow2 : 2 * nextPow2; - if (table_size < 512) { - table_size = 512; - } - int num_funcs = 3; - - CuckooHashTableCuda_Multi in_hash_table(table_size, 8 * ceil(log2((double)n)), - num_funcs); - in_hash_table.insert_vals((uint64_t *)(hash_target_p), - (uint64_t *)(idx_target_p), - (uint64_t *)(key_buf_p), - (uint64_t *)(val_buf_p), - (uint64_t *)(key_p), - (uint64_t *)(val_p), n); - in_hash_table.lookup_vals((uint64_t *)(hash_query_p), - (uint64_t *)(key_p), - (uint64_t *)(val_p), - (uint64_t *)(out_p), n1); - - """ - ) - else: - output = jt.code(queries.shape, jt.int64, [queries, references, indices], - cpu_header=""" - #include - #include - #include - #include - #include - #include - #include - - @alias(hash_query, in0) - @alias(hash_target, in1) - @alias(idx_target, in2) - """, - cpu_src=""" - int n = hash_target_shape0; - int n1 = hash_query_shape0; - - google::dense_hash_map hashmap; // Google Sparse Hash library - hashmap.set_empty_key(0); - - for (int idx = 0; idx < n; ++ idx ) { - int64_t key = @hash_target(idx); - int64_t val = @idx_target(idx) + 1; - hashmap.insert(std::make_pair(key, val)); - } - - #pragma omp parallel for - for (int idx = 0; idx < n1; ++ idx ) { - int64_t key = @hash_query(idx); - auto iter = hashmap.find(key); - if (iter != hashmap.end()) { - @out(idx) = iter->second; - } else @out(idx) = 0; - } - """ - ) - output = (output - 1).view(*q_size) - return output - # return indices diff --git a/python/JSparse/nn/functional/voxelize.py b/python/JSparse/nn/functional/voxelize.py deleted file mode 100644 index 47e4d53..0000000 --- a/python/JSparse/nn/functional/voxelize.py +++ /dev/null @@ -1,114 +0,0 @@ -import jittor as jt -from jittor import Function - -from JSparse import SparseTensor - -__all__ = ['spvoxelize'] - -class Voxelize(Function): - def execute( - self, - values: jt.Var, - idx_query: jt.Var, - counts: jt.Var - ) -> jt.Var: - # N = values_shape0 - # c = values_shape1 - # N1 = counts_shape0 - # out: N1 x c - output = jt.code((counts.shape[0], values.shape[1]), "float32", [values, idx_query, counts], - cuda_header=""" - #include - #include - #include - """, - cuda_src=""" - __global__ void voxelize_forward_kernel(@ARGS_DEF) { - @PRECALC - @alias(values, in0) - @alias(idx_query, in1) - @alias(counts, in2) - - int index = blockDim.x * blockIdx.x + threadIdx.x; - int c = values_shape1; - int i = index / c; - int j = index % c; - - if (i < values_shape0) { - int pos = @idx_query(i); - if (pos < 0 || pos >= counts_shape0 || @counts(pos) == 0) return; - atomicAdd(&@out(pos, j), @values(i, j) / (float)(@counts(pos))); - } - } - @alias(values, in0) - voxelize_forward_kernel<<< values_shape0, values_shape1 >>>(@ARGS); - """, - cpu_src=""" - @alias(values, in0) - @alias(idx_query, in1) - @alias(counts, in2) - for (int i = 0; i < values_shape0; ++ i ) { - int pos = @idx_query(i); - if (@counts(pos) == 0) - continue; - #pragma omp parallel for - for (int j = 0; j < values_shape1; ++ j ) { - #pragma omp atomic - @out(pos, j) += @values(i, j) / (float)@counts(pos); - } - } - """ - ) - self.save_vars = idx_query, counts, values.shape[0] - return output - - def grad(self, grad_output: jt.Var): - idx_query, counts, input_size = self.save_vars - - grad_values = jt.code((input_size, grad_output.shape[1]), jt.float32, [idx_query, counts, grad_output], - cuda_header=""" - #include - #include - #include - """, - cuda_src=""" - __global__ void voxelize_backward_kernel(@ARGS_DEF) { - @PRECALC - @alias(idx_query, in0) - @alias(counts, in1) - @alias(grad_output, in2) - int index = blockDim.x * blockIdx.x + threadIdx.x; - int i = index / grad_output_shape1; - int j = index % grad_output_shape1; - if (i < out_shape0) { - int pos = @idx_query(i); - if (pos < 0 || pos >= counts_shape0 || @counts(pos) == 0) return; - atomicAdd(&@out(pos, j), @grad_output(pos, j) / @counts(pos)); - } - } - - voxelize_backward_kernel<<>>(@ARGS); - """, - cpu_src=""" - @alias(idx_query, in0) - @alias(counts, in1) - @alias(grad_output, in2) - - for (int i = 0; i < out_shape0; ++ i ) { - int pos = @idx_query(i); - if (@counts(pos) == 0) continue; - #pragma omp parallel for - for (int j = 0; j < grad_output_shape1; ++ j ) { - @out(i, j) = @grad_output(pos, j) / (float)@counts(pos); - } - } - """ - ) - return grad_values, None, None - -def spvoxelize( - values: jt.Var, - idx_query: jt.Var, - counts: jt.Var -) -> jt.Var: - return Voxelize.apply(values, idx_query, counts) \ No newline at end of file diff --git a/python/JSparse/nn/modules/__init__.py b/python/JSparse/nn/modules/__init__.py deleted file mode 100644 index 82665d0..0000000 --- a/python/JSparse/nn/modules/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from .activation import * -# from .bev import * -from .conv import * -# from .crop import * -from .norm import * -from .pooling import * diff --git a/python/JSparse/nn/modules/activation.py b/python/JSparse/nn/modules/activation.py deleted file mode 100644 index 015ae62..0000000 --- a/python/JSparse/nn/modules/activation.py +++ /dev/null @@ -1,21 +0,0 @@ -import jittor as jt -from jittor import nn - -from JSparse import SparseTensor -from JSparse.nn.functional import relu, leaky_relu -# from nn.utils import fapply - -__all__ = ['ReLU', 'LeakyReLU'] - -# class ReLU(nn.ReLU): -# def execute(self, input: SparseTensor) -> SparseTensor: -# return fapply(input, super().execute) - -# class LeakyReLU(nn.LeakyReLU): -# def execute(self, input: SparseTensor) -> SparseTensor: -# return fapply(input, super().execute) - -Relu = jt.make_module(relu) -ReLU = Relu -Leaky_relu = jt.make_module(leaky_relu, 2) -LeakyReLU = Leaky_relu \ No newline at end of file diff --git a/python/JSparse/nn/modules/conv.py b/python/JSparse/nn/modules/conv.py deleted file mode 100644 index b776825..0000000 --- a/python/JSparse/nn/modules/conv.py +++ /dev/null @@ -1,85 +0,0 @@ -import math -from typing import List, Tuple, Union - -import numpy as np -import jittor as jt -from jittor import nn -from jittor import init -from jittor.misc import _pair, _triple - -from JSparse import SparseTensor -from JSparse.nn import functional as F -# from utils import make_ntuple - -__all__ = ['Conv3d'] - -class Conv3d(nn.Module): - - def __init__(self, - in_channels: int, - out_channels: int, - kernel_size: Union[int, Tuple[int, ...]] = 3, - stride: Union[int, Tuple[int, ...]] = 1, - dilation: int = 1, - groups: int = 1, - bias: bool = False, - transposed: bool = False) -> None: - super().__init__() - self.in_channels = in_channels - self.out_channels = out_channels - # self.kernel_size = make_ntuple(kernel_size, ndim=3) - # self.stride = make_ntuple(stride, ndim=3) - # self.dilation = dilation - self.kernel_size = kernel_size if isinstance(kernel_size, tuple) else (kernel_size, kernel_size, kernel_size) - self.stride = stride if isinstance(stride, tuple) else (stride, stride, stride) - self.dilation = dilation if isinstance(dilation, tuple) else (dilation, dilation, dilation) - self.groups = groups - assert in_channels % groups == 0, 'in_channels must be divisible by groups' - assert out_channels % groups == 0, 'out_channels must be divisible by groups' - self.transposed = transposed - - self.kernel_volume = int(np.prod(self.kernel_size)) - # if self.kernel_volume > 1: - # self.kernel = nn.Parameter( - # jt.zeros(self.kernel_volume, in_channels, out_channels)) - # else: - # self.kernel = nn.Parameter(jt.zeros(in_channels, out_channels)) - # if bias: - # self.bias = nn.Parameter(jt.zeros(out_channels)) - # else: - # self.register_parameter('bias', None) - # self.reset_parameters() - fan = (self.out_channels if self.transposed else self.in_channels) * self.kernel_volume - std = 1 / math.sqrt(fan) - - if self.kernel_volume > 1: - self.weight = init.uniform([self.kernel_volume, in_channels, out_channels], 'float32', -std, std) - else: - self.weight = init.uniform([in_channels, out_channels], 'float32') - if bias: - self.bias = init.uniform([out_channels], "float32", -std, std) - else: - self.bias = None - # self.reset_parameters() - - def execute(self, input: SparseTensor) -> SparseTensor: - return F.conv3d(input, - weight=self.weight, - kernel_size=self.kernel_size, - bias=self.bias, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - transposed=self.transposed) - - # def set_parameters(self) -> None: - # std = 1 / math.sqrt( - # (self.out_channels if self.transposed else self.in_channels) - # * self.kernel_volume) - # self.weight *= std - # if self.bias is not None: - # self.bias *= std - - - - diff --git a/python/JSparse/nn/modules/norm.py b/python/JSparse/nn/modules/norm.py deleted file mode 100644 index f0064de..0000000 --- a/python/JSparse/nn/modules/norm.py +++ /dev/null @@ -1,34 +0,0 @@ -import jittor as jt -from jittor import nn -from numpy import kaiser - -from JSparse import SparseTensor -from JSparse.nn.utils import fapply - -__all__ = ['BatchNorm', 'GroupNorm'] - -class BatchNorm(nn.BatchNorm): - def execute(self, input: SparseTensor) -> SparseTensor: - return fapply(input, super().execute) - -class GroupNorm(nn.GroupNorm): - def execute(self, input: SparseTensor) -> SparseTensor: - indices, values, stride, size = input.indices, input.values, input.stride, input.size - - batch_size = jt.max(indices[:, 0]).item() + 1 - num_channels = values.shape[1] - - n_values = jt.zeros_like(values) - for k in range(batch_size): - idx = indices[:, 0] == k - b_values = values[idx] - b_values = b_values.t().reshape(1, num_channels, -1) - b_values = super().execute(b_values) - b_values = b_values.reshape(num_channels, -1).t() - n_values[idx] = b_values - - output = SparseTensor(indices=indices, values=n_values, stride=stride, size=size) - output.cmaps = input.cmaps - output.kmaps = input.kmaps - return output - diff --git a/python/JSparse/nn/modules/pooling.py b/python/JSparse/nn/modules/pooling.py deleted file mode 100644 index b33c023..0000000 --- a/python/JSparse/nn/modules/pooling.py +++ /dev/null @@ -1,11 +0,0 @@ -from ast import Global -import jittor as jt -from jittor import nn - -from JSparse import SparseTensor -from JSparse.nn.functional import global_avg_pool, global_max_pool - -__all__ = ['GlobalAvgPool', 'GlobalMaxPool'] - -GlobalAvgPool = jt.make_module(global_avg_pool) -GlobalMaxPool = jt.make_module(global_max_pool) \ No newline at end of file diff --git a/python/JSparse/nn/utils/__init__.py b/python/JSparse/nn/utils/__init__.py deleted file mode 100644 index 919f5b0..0000000 --- a/python/JSparse/nn/utils/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .apply import * -from .kernel import * \ No newline at end of file diff --git a/python/JSparse/nn/utils/apply.py b/python/JSparse/nn/utils/apply.py deleted file mode 100644 index ca241bd..0000000 --- a/python/JSparse/nn/utils/apply.py +++ /dev/null @@ -1,15 +0,0 @@ -from typing import Callable - -import jittor as jt - -from JSparse import SparseTensor - -__all__ = ['fapply'] - -def fapply(input: SparseTensor, fn: Callable[..., jt.Var], *args, - **kwargs) -> SparseTensor: - values = fn(input.values, *args, **kwargs) - output = SparseTensor(indices=input.indices, values=values, stride=input.stride, size=input.size) - output.cmaps = input.cmaps - output.kmaps = input.kmaps - return output \ No newline at end of file diff --git a/python/JSparse/nn/utils/kernel.py b/python/JSparse/nn/utils/kernel.py deleted file mode 100644 index b55dfe5..0000000 --- a/python/JSparse/nn/utils/kernel.py +++ /dev/null @@ -1,28 +0,0 @@ -from typing import Tuple, Union - -import numpy as np -import jittor as jt - -from JSparse.utils import make_ntuple, trunc - -__all__ = ['get_kernel_offsets'] - -def get_kernel_offsets(kernel_size: Union[int, Tuple[int, ...]], - stride: Union[int, Tuple[int, ...]] = 1, - dilation: Union[int, Tuple[int, ...]] = 1) -> jt.Var: - kernel_size = make_ntuple(kernel_size, ndim=3) - stride = make_ntuple(stride, ndim=3) - dilation = make_ntuple(dilation, ndim=3) - - offsets = [(np.arange(-kernel_size[k] // 2 + 1, kernel_size[k] // 2 + 1) * stride[k] - * dilation[k]) for k in range(3)] - - if np.prod(kernel_size) % 2 == 1: - offsets = [[x, y, z] for z in offsets[2] for y in offsets[1] - for x in offsets[0]] - else: - offsets = [[x, y, z] for x in offsets[0] for y in offsets[1] - for z in offsets[2]] - - offsets = jt.int32(offsets) - return offsets \ No newline at end of file diff --git a/python/JSparse/sparse.py b/python/JSparse/sparse.py deleted file mode 100644 index 50ca251..0000000 --- a/python/JSparse/sparse.py +++ /dev/null @@ -1,125 +0,0 @@ -from itertools import count -import numpy as np - -import jittor as jt -from jittor.misc import _pair, _triple - -from typing import Any, Dict, Tuple, Union -from type_check import type_check - -from JSparse.utils import make_ntuple, sparse_quantize, set_hash -# from .utils.quantize import sparse_quantize -# from indice_manager import IndiceManager - -class SparseTensor: - - @type_check - def __init__( - self, - indices: jt.Var, - values: jt.Var, - stride: Union[int, Tuple[int, ...]], - size=None, - quantize=True, - voxel_size=1, - coalesce_mode:str='sum', - indice_manager=None, - device=None, - ): - assert (values.ndim == 2) - # self.indices = indices - # self.values = values - self.size = size - self.ndim = indices.shape[1] - 1 - self.stride =_triple(stride) - self.voxel_size = voxel_size - self.coalesce_mode = coalesce_mode - self.cmaps = {} - self.kmaps = {} - - ########################## - # Setup CoordsManager - ########################## - # if indice_manager is None: - # self.indice_manager = IndiceManager( - # ndim=self.ndim, - - # ) - - ########################## - # Initialize coords - ########################## - if quantize: - self.seed = 1 - for i in range(len(self.stride)): - self.seed += i - self.seed *= self.stride[i] - self.hash_multiplier = set_hash(self.ndim, self.seed) - - self.hash_num, self.indices, mapping, inverse_mapping, count = sparse_quantize(indices, self.hash_multiplier, self.voxel_size, return_index=True, return_inverse=True, return_count=True) - self.inverse_mapping = inverse_mapping - - if len(values.shape) == 1: - out_size = (self.indices.shape[0], ) - elif len(values.shape) == 2: - out_size = (self.indices.shape[0], values.shape[-1]) - - if self.coalesce_mode == 'sum': - out_size = (self.indices.shape[0], values.shape[-1]) - self.values = jt.zeros(out_size, dtype=values.dtype).scatter_(0, inverse_mapping, values, reduce='add') - elif self.coalesce_mode == 'average': - out_size = (self.indices.shape[0], values.shape[-1]) - self.values = jt.zeros(out_size, dtype=values.dtype).scatter_(0, inverse_mapping, values, reduce='add') - self.values /= count - elif self.coalesce_mode == 'sample': - self.values = values[self.indices] - else: - self.indices = indices - self.values = values - - # if indice_manager is None: - # # TODO If set to share the indices man, use the global indices man - - # # init the indices - # indice_manager = Indice - - - def _indices(self): - return self.indices - - def _values(self): - return self.values - - def _binary_operation(self, other, _binary_op): - assert isinstance(other, self.__class__) - return - # TODO set up the indices dict - # so that wedo not need to merge the indice group - # which has already been merged - - # if the indices of self and other should be merged - - -class PointTensor: - - def __init__(self, values, indices, idx_query=None, weights=None): - self.values = values - self.indices = indices - self.idx_query = idx_query if idx_query is not None else {} - self.weights = weights if weights is not None else {} - self.additional_values = {} - self.additional_values['idx_query'] = {} - self.additional_values['counts'] = {} - - def detach(self): - self.values = self.values.detach() - self.indices = self.indices.detach() - return self - - def __add__(self, other): - pt = PointTensor(self.values + other.values, self.indices, self.idx_query, - self.weights) - pt.additional_values = self.additional_values - return pt - - diff --git a/python/JSparse/sparse_dense_function.py b/python/JSparse/sparse_dense_function.py deleted file mode 100644 index f8bb855..0000000 --- a/python/JSparse/sparse_dense_function.py +++ /dev/null @@ -1,42 +0,0 @@ -import jittor as jt -from jittor import Function - - - - -def spmm( - rows: jt.Var, - cols: jt.Var, - vals: jt.Var, - size: jt.NanoVector, - mat: jt.Var, - spmm_mode='scatter', - is_sorted: bool = False, - cuda_spmm_alg: int = 1, -) -> jt.Var: - assert len(rows) == len(cols), "Invalid length" - assert len(rows) == len(vals), "Invalid length" - assert vals.dtype == mat.dtype, "dtype mismatch" - - if jt.flags.use_cuda > 1: - assert jt.has_cuda == 1, "No GPUs available" - rows = rows.int() - cols = cols.int() - ''' - TODO: Using the coo_spmm of cuSPARSE on GPU - result = coo_spmm_int32( - rows, cols, vals, size[0], size[1], mat, cuda_spmm_alg, is_sorted - ) - ''' - else: - if (spmm_mode == 'scatter'): - - - - - - - - - -class SPMM(Function): diff --git a/python/JSparse/utils/__init__.py b/python/JSparse/utils/__init__.py deleted file mode 100644 index a57be25..0000000 --- a/python/JSparse/utils/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .quantize import * -from .utils import * \ No newline at end of file diff --git a/python/JSparse/utils/quantize.py b/python/JSparse/utils/quantize.py deleted file mode 100644 index 7d668f9..0000000 --- a/python/JSparse/utils/quantize.py +++ /dev/null @@ -1,58 +0,0 @@ -from itertools import repeat -from typing import List, Tuple, Union - -import jittor as jt -import numpy as np - -from .utils import unique1d - -__all__ = ['sparse_quantize', 'set_hash'] - -def set_hash(ndim, seed, low=100, high=1000): - jt.set_seed(seed) - return jt.randint(low, high, shape=(ndim + 1,), dtype='uint64') - -def hash(x: np.ndarray, multiplier: np.ndarray) -> jt.Var: - assert x.ndim == 2, x.shape - - x = x - x.min(dim=0) - x = x.uint64() - - h = jt.zeros(x.shape[0], dtype='uint64') - for k in range(x.shape[1] - 1): - h += x[:, k] - h *= multiplier[k] - h += x[:, -1] - return h - -def sparse_quantize(indices, - hash_multiplier, - voxel_size: Union[float, Tuple[float, ...]] = 1, - *, - return_index: bool = False, - return_inverse: bool = False, - return_count: bool = False) -> List[np.ndarray]: - if indices.dtype.is_int() and voxel_size == 1: - pass - else: - if isinstance(voxel_size, (float, int)): - voxel_size = tuple(repeat(voxel_size, 3)) - assert isinstance(voxel_size, tuple) and len(voxel_size) == 3 - - voxel_size = jt.Var(voxel_size) - indices[:, 1:] /= voxel_size - indices = jt.floor(indices).astype(jt.int32) - - hash_num, mapping, inverse_mapping, count = unique1d(hash(indices, hash_multiplier)) - indices = indices[mapping] - - outputs = [hash_num, indices] - if return_index: - outputs += [mapping] - if return_inverse: - outputs += [inverse_mapping] - if return_count: - outputs += [count] - return outputs[0] if len(outputs) == 1 else outputs - - diff --git a/python/JSparse/utils/utils.py b/python/JSparse/utils/utils.py deleted file mode 100644 index 3abc773..0000000 --- a/python/JSparse/utils/utils.py +++ /dev/null @@ -1,43 +0,0 @@ -from itertools import repeat -from typing import List, Tuple, Union - -import jittor as jt - -__all__ = ['make_ntuple', 'trunc', 'unique1d'] - -def make_ntuple(x: Union[int, List[int], Tuple[int, ...], jt.Var], - ndim: int) -> Tuple[int, ...]: - if isinstance(x, int): - x = tuple(repeat(x, ndim)) - elif isinstance(x, list): - x = tuple(x) - elif isinstance(x, jt.Var): - x = tuple(x.view(-1).numpy().tolist()) - - assert isinstance(x, tuple) and len(x) == ndim, x - return x - -def trunc(x: jt.Var): - if x >= 0: - return jt.floor(x) - else: - return jt.ceil(x) - - -def unique1d(var): - assert len(var.shape) == 1 - perm, aux = jt.argsort(var) - mask = jt.empty(aux.shape, dtype='bool') - mask[:1] = True - mask[1:] = aux[1:] != aux[:-1] - ret = (aux[mask],) - ret += (perm[mask],) - imask = jt.cumsum(mask.astype(perm.dtype)) - 1 - inv_idx = jt.empty(mask.shape, dtype=perm.dtype) - inv_idx[perm] = imask - ret += (inv_idx,) - idx = jt.concat([jt.nonzero(mask).view(-1), jt.Var(mask.shape[0])]) - ret += (idx[1:] - idx[:-1],) - return ret - - diff --git a/python/setup.py b/python/setup.py deleted file mode 100644 index 6a826e0..0000000 --- a/python/setup.py +++ /dev/null @@ -1,15 +0,0 @@ -from gettext import install - -from setuptools import find_packages, setup - -with open("../README.md", "r") as file: - description = file.read() - -print(find_packages()) -setup( - name='JSparse', - version='0.1', - description=description, - packages=find_packages(), - install_requires=["jittor", "type_check"] -) \ No newline at end of file diff --git a/voxelize_test.py b/voxelize_test.py deleted file mode 100644 index c0f0c7a..0000000 --- a/voxelize_test.py +++ /dev/null @@ -1,96 +0,0 @@ -import time -import math -import numpy as np -import torch - -import jittor as jt -import jittor.nn as nn -from jittor import init -from jittor.misc import _pair, _triple - -from itertools import repeat -from typing import List, Tuple, Union - -from JSparse import SparseTensor -from JSparse import PointTensor -from JSparse.utils import make_ntuple -from JSparse.nn import functional as F -from JSparse.nn.utils import get_kernel_offsets -from JSparse.nn.functional import Convolution - -import torchsparse -from torchsparse import nn as spnn - -jt.flags.use_cuda = 1 - -in_channels = 3 -out_channels = 64 -kernel_size = 3 -stride = 1 -dilation = 1 -groups = 1 -bias = False -transposed = False - -kernel_size = _triple(kernel_size) -stride = _triple(stride) -dilation = _triple(dilation) -kernel_volume = int(np.prod(kernel_size)) - -N = 10 -coords = np.random.uniform(0, 10, size=(N, 4)) -feats = np.random.randn(N, 3) -labels = np.random.choice(5, N) -print(coords.shape) -print(feats.shape) - -coo = jt.Var(coords) -val = jt.Var(feats) -size = (10, 10, 10) - -fan = (out_channels if transposed else in_channels) * kernel_volume -std = 1 / math.sqrt(fan) - -if kernel_volume > 1: - weight = init.uniform([kernel_volume, in_channels, out_channels], 'float32', -std, std) -else: - weight = init.uniform([in_channels, out_channels], 'float32') -if bias: - bias = init.uniform([out_channels], "float32", -std, std) -else: - bias = None - - -x = SparseTensor(coo, val, 1, size) -z = PointTensor(x.values, x.indices.float()) - -pc_hash = F.sphash( - jt.concat([ - z.indices[:, 0].int().view(-1, 1), - jt.floor(z.indices[:, 1:] / x.stride[0]).int() * x.stride[0] - ], 1)) -sparse_hash = F.sphash(x.indices) -idx_query = F.spquery(pc_hash, sparse_hash).int() -counts = F.spcount(idx_query, x.indices.shape[0]) -z.additional_values['idx_query'][x.stride] = idx_query -z.additional_values['counts'][x.stride] = counts -inserted_values = F.spvoxelize(z.values, idx_query, counts) -new_tensor = SparseTensor(inserted_values, x.indices, x.stride, x.size, False) -new_tensor.cmaps = x.cmaps -new_tensor.kmaps = x.kmaps -print(inserted_values) - -offsets = get_kernel_offsets(kernel_size=2, stride=x.stride, dilation=1) -old_hash = F.sphash( - jt.concat([ - z.indices[:, 0].int().view(-1, 1), - jt.floor(z.indices[:, 1:] / x.stride[0]).int() * x.stride[0] - ], 1), offsets) -pc_hash = F.sphash(x.indices) -idx_query = F.spquery(old_hash, pc_hash).int() -weights = F.calc_ti_weights(z.indices, idx_query, - scale=x.stride[0]).t() -idx_query = idx_query.t() -new_values = F.spdevoxelize(x.values, idx_query, weights) - -print(jt.grad(new_values, x.values)) \ No newline at end of file