GPUCalculator

QuICT.ops.linalg.gpu_calculator ¶

MatrixPermutation ¶

MatrixPermutation(A, mapping, changeInput: bool = False, gpu_out: bool = False, sync: bool = True)

permute mat with mapping, inplace

Parameters:

A(np.array<np.complex>) –

the matrix A.
mapping(np.array<int>) –

the qubit mapping.
changeInput(bool) –

whether changes in A.
gpu_out(bool) –

return result from GPU.
sync(bool) –

Whether sync mode of async mode

Source code in QuICT/ops/linalg/gpu_calculator.py

def MatrixPermutation(A, mapping, changeInput: bool = False, gpu_out: bool = False, sync: bool = True):
    """ permute mat with mapping, inplace

    Args:
        A(np.array<np.complex>): the matrix A.
        mapping(np.array<int>): the qubit mapping.
        changeInput(bool): whether changes in A.
        gpu_out(bool): return result from GPU.
        sync(bool): Whether sync mode of async mode
    """
    row_a, n = A.shape[0], mapping.shape[0]
    if not row_a == 1 << n:
        raise IndexError("Indices do not match!")

    if mapping.dtype == np.int64:
        mapping = mapping.astype(np.int32)

    # data in GPU
    gpu_A = cp.array(A) if type(A) is np.ndarray else A
    gpu_mapping = cp.array(mapping)
    gpu_result = cp.empty_like(gpu_A)
    core_number = gpu_result.size
    kernel_function = matrixp_single_kernel if A.dtype == np.complex64 else matrixp_double_kernel
    kernel_function(
        (math.ceil(core_number / 1024),),
        (min(1024, core_number),),
        (gpu_A, gpu_result, gpu_mapping, cp.int32(n))
    )

    if sync:
        cp.cuda.Device().synchronize()

    if changeInput:
        A[:, :] = gpu_result.get() if type(A) is np.ndarray else gpu_result
        del gpu_result
        return

    if gpu_out:
        return gpu_result.get()

    return gpu_result

MatrixTensorI ¶

MatrixTensorI(A, n, m, gpu_out: bool = False, sync: bool = True)

Applying the Matrix Tensor operator to matrix A

\[ A' = I^n \otimes A \otimes I^m \]

Parameters:

A(np.array<np.complex>) –

the matrix A
n(int) –

the index of indentity
m(int) –

the index of indentity
gpu_out(bool) –

return result from GPU into CPU
sync(bool) –

Whether sync mode of async mode

Returns:

–

np.array: the tensor result I^n ⊗ A ⊗ I^m

Source code in QuICT/ops/linalg/gpu_calculator.py

def MatrixTensorI(A, n, m, gpu_out: bool = False, sync: bool = True):
    """ Applying the Matrix Tensor operator to matrix A

    $$ A' = I^n \otimes A \otimes I^m $$

    Args:
        A(np.array<np.complex>): the matrix A
        n(int): the index of indentity
        m(int): the index of indentity
        gpu_out(bool): return result from GPU into CPU
        sync(bool): Whether sync mode of async mode

    Returns:
        np.array<np.complex>: the tensor result I^n ⊗ A ⊗ I^m
    """
    if n == 1 and m == 1:
        return A

    row_a, col_a = A.shape

    # Data in GPU.
    precision = A.dtype
    gpu_A = cp.array(A) if type(A) is np.ndarray else A

    gpu_result = cp.zeros((row_a * n * m, col_a * n * m), dtype=precision)
    core_number = gpu_A.size * n * m
    kernel_function = matrixt_single_kernel if A.dtype == np.complex64 else matrixt_double_kernel
    kernel_function(
        (math.ceil(core_number / 1024),),
        (min(1024, core_number),),
        (gpu_A, gpu_result, cp.int32(n), cp.int32(m), cp.int32(row_a), cp.int32(col_a), cp.longlong(gpu_result.size))
    )

    if sync:
        cp.cuda.Device().synchronize()

    if gpu_out:
        return gpu_result.get()

    return gpu_result

VectorPermutation ¶

VectorPermutation(A, mapping, changeInput: bool = False, gpu_out: bool = False, sync: bool = True)

permutaion A with mapping, inplace

Parameters:

A(np.array<np.complex>) –

the matrix A.
mapping(np.array<int>) –

the qubit mapping.
changeInput(bool) –

whether changes in A.
gpu_out(bool) –

return result from GPU.
sync(bool) –

Whether sync mode of async mode

Returns:

–

np.array: the result of Permutation

Source code in QuICT/ops/linalg/gpu_calculator.py

def VectorPermutation(A, mapping, changeInput: bool = False, gpu_out: bool = False, sync: bool = True):
    """ permutaion A with mapping, inplace

    Args:
        A(np.array<np.complex>): the matrix A.
        mapping(np.array<int>): the qubit mapping.
        changeInput(bool): whether changes in A.
        gpu_out(bool): return result from GPU.
        sync(bool): Whether sync mode of async mode

    Returns:
        np.array<np.complex>: the result of Permutation
    """
    row_a, n = A.shape[0], mapping.shape[0]
    if not row_a == 1 << n:
        raise IndexError("Indices do not match!")

    if mapping.dtype == np.int64:
        mapping = mapping.astype(np.int32)

    # data in GPU
    gpu_A = cp.array(A) if type(A) is np.ndarray else A
    gpu_mapping = cp.array(mapping)
    gpu_result = cp.empty_like(gpu_A)
    core_number = gpu_result.size
    kernel_function = vectorp_single_kernel if A.dtype == np.complex64 else vectorp_double_kernel
    kernel_function(
        (math.ceil(core_number / 1024),),
        (min(1024, core_number),),
        (gpu_A, gpu_result, gpu_mapping, cp.int32(n))
    )

    if sync:
        cp.cuda.Device().synchronize()

    if changeInput:
        A[:] = gpu_result.get() if type(A) is np.ndarray else gpu_result
        del gpu_result
        return

    if gpu_out:
        return gpu_result.get()

    return gpu_result

dot ¶

dot(A, B, gpu_out: bool = False, sync: bool = True)

Applying the dot operator between A and B

Parameters:

A(np.array<np.complex>) –

the matrix A
B(np.array<np.complex>) –

the matrix B
gpu_out(bool) –

return result from GPU into CPU
sync(bool) –

Whether sync mode of async mode

Returns:

–

np.array: A * B

Source code in QuICT/ops/linalg/gpu_calculator.py

def dot(A, B, gpu_out: bool = False, sync: bool = True):
    """ Applying the dot operator between A and B

    Args:
        A(np.array<np.complex>): the matrix A
        B(np.array<np.complex>): the matrix B
        gpu_out(bool): return result from GPU into CPU
        sync(bool): Whether sync mode of async mode

    Returns:
        np.array<np.complex>: A * B
    """
    assert (A.shape[1] == B.shape[0])

    # Data in GPU.
    gpu_A = cp.array(A) if type(A) is np.ndarray else A
    gpu_B = cp.array(B) if type(B) is np.ndarray else B

    gpu_result = cp.dot(gpu_A, gpu_B)

    if sync:
        cp.cuda.Device().synchronize()

    if gpu_out:
        return gpu_result.get()

    return gpu_result

matrix_dot_matrix ¶

matrix_dot_matrix(mat_u: Union[ndarray, ndarray], mat_g: Union[ndarray, ndarray], control_args: ndarray = None, target_args: ndarray = None, sync: bool = True)

Dot the quantum gate's matrix and qubits'state vector, depending on the target qubits of gate.

Parameters:

vec (ndarray) –

The state vector of qubits
vec_bit (int) –

The number of qubits
mat (ndarray) –

The 2D numpy array, represent the quantum gate's matrix
mat_args (List[int]) –

The qubits' indexes of matrix.
sync(bool) –

Whether sync mode of async mode.

Returns:

–

np.ndarray: updated state vector

Source code in QuICT/ops/linalg/gpu_calculator.py

def matrix_dot_matrix(
    mat_u: Union[np.ndarray, cp.ndarray],
    mat_g: Union[np.ndarray, cp.ndarray],
    control_args: np.ndarray = None,
    target_args: np.ndarray = None,
    sync: bool = True
):
    """ Dot the quantum gate's matrix and qubits'state vector, depending on the target qubits of gate.

    Args:
        vec (np.ndarray): The state vector of qubits
        vec_bit (int): The number of qubits
        mat (np.ndarray): The 2D numpy array, represent the quantum gate's matrix
        mat_args (List[int]): The qubits' indexes of matrix.
        sync(bool): Whether sync mode of async mode.

    Returns:
        np.ndarray: updated state vector
    """
    # Matrix property
    # Step 1: Calculate mat_bit and vec_bit
    mat_bit = int(np.log2(mat_u.shape[0]))
    mat_length = mat_u.shape[0]
    len_c = 0 if control_args is None else len(control_args)
    len_t = 0 if target_args is None else len(target_args)
    gate_bit = len_c + len_t
    assert mat_bit >= gate_bit, "Vector length should larger than matrix."
    assert mat_length == 1 << mat_bit, "Matrix should be unitary and with [2^n, 2^n] shape."

    # Vector, Matrix preparation
    if isinstance(mat_u, np.ndarray):
        mat_u = cp.array(mat_u, dtype=mat_u.dtype)

    if isinstance(mat_g, np.ndarray):
        mat_g = cp.array(mat_g, dtype=mat_g.dtype)

    if mat_bit == gate_bit and len_c == 0:
        cp.dot(mat_g, mat_u, out=mat_u)
        return

    # Step 2: Get fixed index of vector by control indexes
    based_idx = 0
    if control_args is not None:
        for carg_idx in control_args:
            based_idx += 1 << carg_idx

    # Step 3: sorted target qubit indexes
    gate_args = cp.array(target_args, dtype=cp.int32)
    sorted_gate_args = np.append(control_args, target_args) if control_args is not None else target_args
    sorted_gate_args = cp.array(sorted_gate_args, dtype=cp.int32)
    sorted_gate_args.sort()

    # Step 4: start GPU Kernel function
    task_number = 1 << (2 * (mat_bit - gate_bit))
    thread_per_block = min(256, task_number)
    block_num = task_number // thread_per_block
    kernel_function = matrix_dot_matrix_single_kernel if mat_g.dtype == np.complex64 else matrix_dot_matrix_double_kernel
    kernel_function(
        (block_num,),
        (thread_per_block,),
        (
            mat_g, gate_bit, len_t, cp.int32(1 << len_t),
            mat_u, mat_bit, mat_length,
            gate_args, sorted_gate_args, cp.longlong(based_idx)
        )
    )

    if sync:
        cp.cuda.Device().synchronize()

matrix_dot_vector ¶

matrix_dot_vector(vec: Union[ndarray, ndarray], vec_bit: int, mat: Union[ndarray, ndarray], mat_args: List[int], sync: bool = True)

Dot the quantum gate's matrix and qubits'state vector, depending on the target qubits of gate.

Parameters:

vec (ndarray) –

The state vector of qubits
vec_bit (int) –

The number of qubits
mat (ndarray) –

The 2D numpy array, represent the quantum gate's matrix
mat_args (List[int]) –

The qubits' indexes of matrix.
sync(bool) –

Whether sync mode of async mode.

Returns:

–

np.ndarray: updated state vector

Source code in QuICT/ops/linalg/gpu_calculator.py

def matrix_dot_vector(
    vec: Union[np.ndarray, cp.ndarray],
    vec_bit: int,
    mat: Union[np.ndarray, cp.ndarray],
    mat_args: List[int],
    sync: bool = True
):
    """ Dot the quantum gate's matrix and qubits'state vector, depending on the target qubits of gate.

    Args:
        vec (np.ndarray): The state vector of qubits
        vec_bit (int): The number of qubits
        mat (np.ndarray): The 2D numpy array, represent the quantum gate's matrix
        mat_args (List[int]): The qubits' indexes of matrix.
        sync(bool): Whether sync mode of async mode.

    Returns:
        np.ndarray: updated state vector
    """
    # Matrix property
    mat_bit = np.int32(len(mat_args))
    mat_length = np.int32(2 ** mat_bit)
    assert vec_bit >= mat_bit, "Vector length should larger than matrix."

    if vec_bit == mat_bit:
        return dot(mat, vec, sync=sync)

    # GPU preparation
    task_number = 1 << (vec_bit - mat_bit)
    thread_per_block = min(256, task_number)
    block_num = task_number // thread_per_block

    sorted_mat_args = mat_args.copy()
    sorted_mat_args.sort()
    mat_args = cp.array(mat_args, dtype=np.int32)
    sorted_mat_args = cp.array(sorted_mat_args, dtype=np.int32)

    # Vector, Matrix preparation
    if isinstance(vec, np.ndarray):
        vec = cp.array(vec, dtype=vec.dtype)

    if isinstance(mat, np.ndarray):
        mat = cp.array(mat, dtype=mat.dtype)

    # Start GPU kernel function
    kernel_function = matrix_dot_vector_single_kernel if vec.dtype == np.complex64 else matrix_dot_vector_double_kernel
    kernel_function(
        (block_num,),
        (thread_per_block,),
        (mat, mat_bit, mat_length, vec, mat_args, sorted_mat_args)
    )

    if sync:
        cp.cuda.Device().synchronize()

partial_sv_sampling ¶

partial_sv_sampling(partial_prob, state_vector, shots: int, num_qubits: int, block_qubits: int, target_qubits: list = None, sync: bool = True, seed: int = -1)

permute mat with mapping, inplace

Parameters:

A(cp.array<float32>) –

the cdf vector A.
shots(int) –

The number of sample.
num_qubits(int) –

The number of quantum qubits.
target_qubits (list, default: None ) –

The List of target sample qubits.
sync(bool) –

Whether sync mode of async mode

Source code in QuICT/ops/linalg/gpu_calculator.py

def partial_sv_sampling(
    partial_prob, state_vector,
    shots: int, num_qubits: int, block_qubits: int,
    target_qubits: list = None, sync: bool = True, seed: int = -1
):
    """ permute mat with mapping, inplace

    Args:
        A(cp.array<float32>): the cdf vector A.
        shots(int): The number of sample.
        num_qubits(int): The number of quantum qubits.
        target_qubits (list): The List of target sample qubits.
        sync(bool): Whether sync mode of async mode
    """
    if seed != -1:
        cp.random.seed(seed)

    # data in GPU
    gpu_sv = cp.array(state_vector) if type(state_vector) is np.ndarray else state_vector
    shot_list = cp.empty(shots, dtype=np.int32)
    random_val_list = cp.random.rand(shots, dtype=cp.float32)
    if not cp.isclose(partial_prob[-1], 1):
        random_val_list = random_val_list * partial_prob[-1]

    vector_sampling_kernel(
        (math.ceil(shots / 1024),),
        (min(1024, shots),),
        (partial_prob, shot_list, random_val_list, num_qubits - block_qubits, shots)
    )

    block_dim = 1 << block_qubits
    new_shot_list = cp.empty(shots, dtype=cp.int64)
    for idx in range(shots):
        sval = cp.int64(shot_list[idx].get())
        block_prob = cp.cumsum(
            cp.square(cp.abs(gpu_sv[block_dim * sval: block_dim * (sval + 1)])),
            dtype=cp.float32
        )
        rval = cp.random.rand(1, dtype=cp.float32) * block_prob[-1]
        for bidx, bprob in enumerate(block_prob):
            if bprob >= rval:
                new_shot_list[idx] = block_dim * sval + bidx
                break

    if target_qubits is not None:
        tq_num = len(target_qubits)
        large_partial_sampling_kernel(
            (math.ceil(shots / 1024),),
            (min(1024, shots),),
            (new_shot_list, target_qubits, num_qubits, shots, tq_num)
        )

    if sync:
        cp.cuda.Device().synchronize()

    return new_shot_list

partial_sv_sampling_for_all_qubits ¶

partial_sv_sampling_for_all_qubits(partial_prob, state_vector, shots: int, num_qubits: int, block_qubits: int, sync: bool = True, seed: int = -1)

permute mat with mapping, inplace

Parameters:

partial_prob(cp.array<float32>) –

the cdf of state vector with block qubits.
state_vector(cp.array<complex128>) –

The State Vector.
shots(int) –

The number of sample.
num_qubits(int) –

The number of quantum qubits.
block_qubits(int) –

The number of block qubits.
sync(bool) –

Whether sync mode of async mode

Source code in QuICT/ops/linalg/gpu_calculator.py

def partial_sv_sampling_for_all_qubits(
    partial_prob,
    state_vector,
    shots: int,
    num_qubits: int,
    block_qubits: int,
    sync: bool = True,
    seed: int = -1,
):
    """ permute mat with mapping, inplace

    Args:
        partial_prob(cp.array<float32>): the cdf of state vector with block qubits.
        state_vector(cp.array<complex128>): The State Vector.
        shots(int): The number of sample.
        num_qubits(int): The number of quantum qubits.
        block_qubits(int): The number of block qubits.
        sync(bool): Whether sync mode of async mode
    """
    if seed != -1:
        cp.random.seed(seed)

    # data in GPU
    gpu_sv = cp.array(state_vector) if type(state_vector) is np.ndarray else state_vector
    shot_list = cp.empty(shots, dtype=np.int32)
    random_val_list = cp.random.rand(shots, dtype=cp.float32)
    if not cp.isclose(partial_prob[-1], 1):
        random_val_list = random_val_list * partial_prob[-1]

    vector_sampling_kernel(
        (math.ceil(shots / 1024),),
        (min(1024, shots),),
        (partial_prob, shot_list, random_val_list, num_qubits - block_qubits, shots)
    )

    block_dim = 1 << block_qubits
    new_shot_list = cp.empty(shots, dtype=cp.int64)
    for idx in range(shots):
        sval = cp.int64(shot_list[idx].get())
        block_prob = cp.cumsum(
            cp.square(cp.abs(gpu_sv[block_dim * sval: block_dim * (sval + 1)])),
            dtype=cp.float32
        )
        rval = cp.random.rand(1, dtype=cp.float32) * block_prob[-1]
        for bidx, bprob in enumerate(block_prob):
            if bprob > rval:
                new_shot_list[idx] = block_dim * sval + bidx
                break

    if sync:
        cp.cuda.Device().synchronize()

    return new_shot_list

sv_sampling ¶

sv_sampling(A, shots: int, num_qubits: int, target_qubits: list = None, sync: bool = True, seed: int = -1)

permute mat with mapping, inplace

Parameters:

A(cp.array<float32>) –

the cdf vector A.
shots(int) –

The number of sample.
num_qubits(int) –

The number of quantum qubits.
target_qubits (list, default: None ) –

The List of target sample qubits.
sync(bool) –

Whether sync mode of async mode

Source code in QuICT/ops/linalg/gpu_calculator.py

def sv_sampling(A, shots: int, num_qubits: int, target_qubits: list = None, sync: bool = True, seed: int = -1):
    """ permute mat with mapping, inplace

    Args:
        A(cp.array<float32>): the cdf vector A.
        shots(int): The number of sample.
        num_qubits(int): The number of quantum qubits.
        target_qubits (list): The List of target sample qubits.
        sync(bool): Whether sync mode of async mode
    """
    if seed != -1:
        cp.random.seed(seed)

    # data in GPU
    gpu_A = cp.array(A) if type(A) is np.ndarray else A
    shot_list = cp.empty(shots, dtype=np.int32)
    random_val_list = cp.random.rand(shots, dtype=cp.float32)
    if not cp.isclose(gpu_A[-1], 1):
        random_val_list = random_val_list * gpu_A[-1]

    vector_sampling_kernel(
        (math.ceil(shots / 1024),),
        (min(1024, shots),),
        (gpu_A, shot_list, random_val_list, num_qubits, shots)
    )

    if target_qubits is not None:
        tq_num = len(target_qubits)
        partial_sampling_kernel(
            (math.ceil(shots / 1024),),
            (min(1024, shots),),
            (shot_list, target_qubits, num_qubits, shots, tq_num)
        )

    if sync:
        cp.cuda.Device().synchronize()

    return shot_list

sv_sampling_for_all_qubits ¶

sv_sampling_for_all_qubits(A, shots: int, num_qubits: int, sync: bool = True, seed: int = -1)

permute mat with mapping, inplace

Parameters:

A(cp.array<float32>) –

the cdf vector A.
shots(int) –

The number of sample.
num_qubits(int) –

The number of quantum qubits.
sync(bool) –

Whether sync mode of async mode

Source code in QuICT/ops/linalg/gpu_calculator.py

def sv_sampling_for_all_qubits(A, shots: int, num_qubits: int, sync: bool = True, seed: int = -1):
    """ permute mat with mapping, inplace

    Args:
        A(cp.array<float32>): the cdf vector A.
        shots(int): The number of sample.
        num_qubits(int): The number of quantum qubits.
        sync(bool): Whether sync mode of async mode
    """
    if seed != -1:
        cp.random.seed(seed)

    # data in GPU
    gpu_A = cp.array(A) if type(A) is np.ndarray else A
    shot_list = cp.empty(shots, dtype=np.int32)
    random_val_list = cp.random.rand(shots, dtype=cp.float32)
    if not cp.isclose(gpu_A[-1], 1):
        random_val_list = random_val_list * gpu_A[-1]

    vector_sampling_kernel(
        (math.ceil(shots / 1024),),
        (min(1024, shots),),
        (gpu_A, shot_list, random_val_list, num_qubits, shots)
    )

    if sync:
        cp.cuda.Device().synchronize()

    return shot_list

tensor ¶

tensor(A, B, gpu_out: bool = False, sync: bool = True)

Applying the tensor operator between A and B.

Parameters:

A(np.array<np.complex>) –

the matrix A
B(np.array<np.complex>) –

the matrix B
gpu_out(bool) –

return result from GPU into CPU
sync(bool) –

Whether sync mode of async mode

Returns:

–

np.array: the tensor result A ⊗ B

Source code in QuICT/ops/linalg/gpu_calculator.py

def tensor(A, B, gpu_out: bool = False, sync: bool = True):
    """ Applying the tensor operator between A and B.

    Args:
        A(np.array<np.complex>): the matrix A
        B(np.array<np.complex>): the matrix B
        gpu_out(bool): return result from GPU into CPU
        sync(bool): Whether sync mode of async mode

    Returns:
        np.array<np.complex>: the tensor result A ⊗ B
    """
    # Data in GPU.
    gpu_A = cp.array(A) if type(A) is np.ndarray else A
    gpu_B = cp.array(B) if type(B) is np.ndarray else B

    row_a, row_b = A.shape[0], B.shape[0]
    col_a = 1 if A.ndim == 1 else A.shape[1]
    col_b = 1 if B.ndim == 1 else B.shape[1]

    gpu_result = cp.empty((row_a * row_b, col_a * col_b), dtype=A.dtype)
    core_number = gpu_result.size
    kernel_function = tensor_single_kernel if A.dtype == np.complex64 else tensor_double_kernel
    kernel_function(
        (math.ceil(core_number / 1024),),
        (min(1024, core_number),),
        (gpu_A, gpu_B, gpu_result, cp.int32(col_a), cp.int32(row_b), cp.int32(col_b), cp.longlong(gpu_result.size))
    )

    if sync:
        cp.cuda.Device().synchronize()

    if gpu_out:
        return gpu_result.get()

    return gpu_result