gpu

QuICT.ops.gate_kernel.gpu ¶

apply_measuregate ¶

apply_measuregate(index, vec, vec_bit, prob, sync: bool = False)

Measure Gate Measure.

Source code in QuICT/ops/gate_kernel/gpu.py

def apply_measuregate(index, vec, vec_bit, prob, sync: bool = False):
    """
    Measure Gate Measure.
    """
    # Kernel function preparation
    task_number = 1 << (vec_bit - 1)
    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block
    if vec.dtype == np.complex64:
        kernel_functions = (MeasureGate0_single_kernel, MeasureGate1_single_kernel)
        float_type = np.float32
    else:
        kernel_functions = (MeasureGate0_double_kernel, MeasureGate1_double_kernel)
        float_type = np.float64

    # Apply to state vector
    _0 = random.random()
    _1 = _0 > prob
    if not _1:
        alpha = float_type(1 / np.sqrt(prob))
        kernel_functions[0](
            (block_num, ),
            (thread_per_block, ),
            (index, alpha, vec)
        )
    else:
        alpha = float_type(1 / np.sqrt(1 - prob))
        kernel_functions[1](
            (block_num,),
            (thread_per_block,),
            (index, alpha, vec)
        )

    if sync:
        cp.cuda.Device().synchronize()

    return _1

apply_multi_control_targ_gate ¶

apply_multi_control_targ_gate(vec: array, qubits: int, mat: array, c_indexes: list, t_index: int, sync: bool = False)

Apply dot operator for multi-control gate's matrix and state vector. only working for the gate with 1 target qubit.

Source code in QuICT/ops/gate_kernel/gpu.py

def apply_multi_control_targ_gate(
    vec: cp.array,
    qubits: int,
    mat: cp.array,
    c_indexes: list,
    t_index: int,
    sync: bool = False
):
    """ Apply dot operator for multi-control gate's matrix and state vector.
    only working for the gate with 1 target qubit.
    """
    # Get Fixed indexes by given c_indexes
    based_idx = 0
    for cidx in c_indexes:
        based_idx += 1 << cidx

    mat_args = c_indexes.copy()
    mat_args.append(t_index)
    mat_args.sort()
    mat_args = cp.array(mat_args, dtype=np.int32)
    mat_bit = len(mat_args)

    # GPU preparation
    task_number = 1 << (qubits - len(c_indexes) - 1)
    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block

    # Start GPU kernel function
    kernel_function = multi_control_targ_single_kernel if vec.dtype == np.complex64 else \
        multi_control_targ_double_kernel
    kernel_function(
        (block_num,),
        (thread_per_block,),
        (mat, vec, based_idx, t_index, mat_bit, mat_args)
    )

    if sync:
        cp.cuda.Device().synchronize()

apply_multi_control_targs_gate ¶

apply_multi_control_targs_gate(vec: array, qubits: int, mat: array, c_indexes: list, t_indexes: list, sync: bool = False)

Apply dot operator for multi-control gate's matrix and state vector. only working for the gate with 2 target qubit.

Source code in QuICT/ops/gate_kernel/gpu.py

def apply_multi_control_targs_gate(
    vec: cp.array,
    qubits: int,
    mat: cp.array,
    c_indexes: list,
    t_indexes: list,
    sync: bool = False
):
    """ Apply dot operator for multi-control gate's matrix and state vector.
    only working for the gate with 2 target qubit.
    """
    # Get Fixed indexes by given c_indexes
    based_idx = 0
    for cidx in c_indexes:
        based_idx += 1 << cidx

    mat_args = c_indexes.copy() + t_indexes.copy()
    mat_args.sort()
    mat_args = cp.array(mat_args, dtype=np.int32)
    t_args = cp.array(t_indexes, dtype=np.int32)
    mat_bit = len(mat_args)

    # GPU preparation
    task_number = 1 << (qubits - len(c_indexes) - 1)
    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block

    # Start GPU kernel function
    kernel_function = multi_control_targs_single_kernel if vec.dtype == np.complex64 else \
        multi_control_targs_double_kernel
    kernel_function(
        (block_num,),
        (thread_per_block,),
        (mat, vec, based_idx, t_args, mat_bit, mat_args)
    )

    if sync:
        cp.cuda.Device().synchronize()

apply_rccxgate ¶

apply_rccxgate(c_index, t_indexes, vec, vec_bit, sync: bool = False)

Apply dot operator between reverse matrix (8x8) and state vector.

\[ \begin{bmatrix} 1 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\ 0 & 1 & 0 & 0 & 0 & 0 & 0 & 0 \\ 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0 \\ 0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 \\ 0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 \\ 0 & 0 & 0 & 0 & 0 & -1 & 0 & 0 \\ 0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 \\ 0 & 0 & 0 & 0 & 0 & 0 & 1 & 0 \\ \end{bmatrix} \cdot V \]

Source code in QuICT/ops/gate_kernel/gpu.py

def apply_rccxgate(c_index, t_indexes, vec, vec_bit, sync: bool = False):
    r""" Apply dot operator between reverse matrix (8x8) and state vector.

        $$ \begin{bmatrix}
        1 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
        0 & 1 & 0 & 0 & 0 & 0 & 0 & 0 \\
        0 & 0 & 1 & 0 & 0 & 0 & 0 & 0 \\
        0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 \\
        0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 \\
        0 & 0 & 0 & 0 & 0 & -1 & 0 & 0 \\
        0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 \\
        0 & 0 & 0 & 0 & 0 & 0 & 1 & 0 \\
        \end{bmatrix}
        \cdot V
        $$
    """
    task_number = 1 << (vec_bit - 3)
    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block

    if t_indexes[0] > t_indexes[1]:
        high, low = t_indexes[0], t_indexes[1]
    else:
        high, low = t_indexes[1], t_indexes[0]

    if vec.dtype == np.complex64:
        Apply_RCCX_single_kernel(
            (block_num,),
            (thread_per_block,),
            (c_index, high, low, t_indexes[0], t_indexes[1], vec)
        )
    else:
        Apply_RCCX_double_kernel(
            (block_num,),
            (thread_per_block,),
            (c_index, high, low, t_indexes[0], t_indexes[1], vec)
        )

    if sync:
        cp.cuda.Device().synchronize()

apply_resetgate ¶

apply_resetgate(index, vec, vec_bit, prob, sync: bool = False)

Measure Gate Measure.

Source code in QuICT/ops/gate_kernel/gpu.py

def apply_resetgate(index, vec, vec_bit, prob, sync: bool = False):
    """
    Measure Gate Measure.
    """
    # Kernel function preparation
    task_number = 1 << (vec_bit - 1)
    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block
    if vec.dtype == np.complex64:
        kernel_functions = (ResetGate0_single_kernel, ResetGate1_single_kernel)
    else:
        kernel_functions = (ResetGate0_double_kernel, ResetGate1_double_kernel)

    # Apply to state vector
    alpha = np.float64(np.sqrt(prob))
    if alpha < 1e-6:
        kernel_functions[1](
            (block_num, ),
            (thread_per_block,),
            (index, alpha, vec)
        )
    else:
        kernel_functions[0](
            (block_num,),
            (thread_per_block,),
            (index, alpha, vec)
        )

    if sync:
        cp.cuda.Device().synchronize()

control_cctarg ¶

control_cctarg(c_indexes, t_index, value, vec, vec_bit, sync: bool = False)

Apply dot operator between control matrix (8x8) and state vector.

\[ \begin{bmatrix} 1 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\ 0 & 1 & 0 & 0 & 0 & 0 & 0 & 0 \\ 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0 \\ 0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 \\ 0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 \\ 0 & 0 & 0 & 0 & 0 & 1 & 0 & 0 \\ 0 & 0 & 0 & 0 & 0 & 0 & 1 & 0 \\ 0 & 0 & 0 & 0 & 0 & 0 & 0 & v_{77} \\ \end{bmatrix} \cdot V \]

Source code in QuICT/ops/gate_kernel/gpu.py

def control_cctarg(c_indexes, t_index, value, vec, vec_bit, sync: bool = False):
    r""" Apply dot operator between control matrix (8x8) and state vector.

        $$ \begin{bmatrix}
        1 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
        0 & 1 & 0 & 0 & 0 & 0 & 0 & 0 \\
        0 & 0 & 1 & 0 & 0 & 0 & 0 & 0 \\
        0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 \\
        0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 \\
        0 & 0 & 0 & 0 & 0 & 1 & 0 & 0 \\
        0 & 0 & 0 & 0 & 0 & 0 & 1 & 0 \\
        0 & 0 & 0 & 0 & 0 & 0 & 0 & v_{77} \\
        \end{bmatrix}
        \cdot V
        $$
    """
    if c_indexes[0] > c_indexes[1]:
        high, low = c_indexes[0], c_indexes[1]
    else:
        high, low = c_indexes[1], c_indexes[0]

    task_number = 1 << (vec_bit - 2)
    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block

    if vec.dtype == np.complex64:
        Controlled_Product_cctarg_single_kernel(
            (block_num,),
            (thread_per_block,),
            (value, vec, high, low, t_index)
        )
    else:
        Controlled_Product_cctarg_double_kernel(
            (block_num,),
            (thread_per_block,),
            (value, vec, high, low, t_index)
        )

    if sync:
        cp.cuda.Device().synchronize()

control_ctargs ¶

control_ctargs(c_index, t_index, value, vec, vec_bit, sync: bool = False)

Apply dot operator between control matrix (4x4) and state vector.

\[ \begin{bmatrix} 1 & 0 & 0 & 0 \\ 0 & 1 & 0 & 0 \\ 0 & 0 & 1 & 0 \\ 0 & 0 & 0 & v_{33} \\ \end{bmatrix} \cdot V \]

Source code in QuICT/ops/gate_kernel/gpu.py

def control_ctargs(c_index, t_index, value, vec, vec_bit, sync: bool = False):
    r""" Apply dot operator between control matrix (4x4) and state vector.

        $$ \begin{bmatrix}
        1 & 0 & 0 & 0 \\
        0 & 1 & 0 & 0 \\
        0 & 0 & 1 & 0 \\
        0 & 0 & 0 & v_{33} \\
        \end{bmatrix}
        \cdot V
        $$
    """
    task_number = 1 << (vec_bit - 2)
    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block

    if vec.dtype == np.complex64:
        Controlled_Product_ctargs_single_kernel(
            (block_num,),
            (thread_per_block,),
            (value, vec, c_index, t_index)
        )
    else:
        Controlled_Product_ctargs_double_kernel(
            (block_num,),
            (thread_per_block,),
            (value, vec, c_index, t_index)
        )

    if sync:
        cp.cuda.Device().synchronize()

control_targ ¶

control_targ(t_index, val, vec, vec_bit, sync: bool = False)

Apply dot operator between control matrix (2x2) and state vector.

\[ \begin{bmatrix} 1 & 0 \\ 0 & v_{11} \\ \end{bmatrix} \cdot V \]

Source code in QuICT/ops/gate_kernel/gpu.py

def control_targ(t_index, val, vec, vec_bit, sync: bool = False):
    r""" Apply dot operator between control matrix (2x2) and state vector.

        $$ \begin{bmatrix}
        1 & 0 \\
        0 & v_{11} \\
        \end{bmatrix}
        \cdot V
        $$
    """
    task_number = 1 << (vec_bit - 1)
    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block

    if vec.dtype == np.complex64:
        Controlled_Multiply_targ_single_kernel(
            (block_num,),
            (thread_per_block,),
            (t_index, val, vec)
        )
    else:
        Controlled_Multiply_targ_double_kernel(
            (block_num,),
            (thread_per_block,),
            (t_index, val, vec)
        )

    if sync:
        cp.cuda.Device().synchronize()

ctrl_normal_targs ¶

ctrl_normal_targs(t_indexes, mat, vec, vec_bit, sync: bool = False)

Apply dot operator between ctrl_normal matrix (4x4) and state vector.

\[ \begin{bmatrix} 1 & 0 & 0 & 0 \\ 0 & v_{00} & v_{01} & 0 \\ 0 & v_{10} & v_{11} & 0 \\ 0 & 0 & 0 & v_{33} \\ \end{bmatrix} \cdot V \]

Source code in QuICT/ops/gate_kernel/gpu.py

def ctrl_normal_targs(t_indexes, mat, vec, vec_bit, sync: bool = False):
    r""" Apply dot operator between ctrl_normal matrix (4x4) and state vector.

        $$ \begin{bmatrix}
        1 & 0 & 0 & 0 \\
        0 & v_{00} & v_{01} & 0 \\
        0 & v_{10} & v_{11} & 0 \\
        0 & 0 & 0 & v_{33} \\
        \end{bmatrix}
        \cdot V
        $$
    """
    task_number = 1 << (vec_bit - 2)
    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block

    if t_indexes[0] > t_indexes[1]:
        high, low = t_indexes[0], t_indexes[1]
    else:
        high, low = t_indexes[1], t_indexes[0]

    if vec.dtype == np.complex64:
        Completed_MxIP_targs_single_kernel(
            (block_num,),
            (thread_per_block,),
            (high, low, mat, vec)
        )
    else:
        Completed_MxIP_targs_double_kernel(
            (block_num,),
            (thread_per_block,),
            (high, low, mat, vec)
        )

    if sync:
        cp.cuda.Device().synchronize()

diagonal_ctargs ¶

diagonal_ctargs(c_index, t_index, mat, vec, vec_bit, sync: bool = False)

Apply dot operator between control diagonal matrix (4x4) and state vector.

\[ \begin{bmatrix} 1 & 0 & 0 & 0 \\ 0 & 1 & 0 & 0 \\ 0 & 0 & v_{22} & 0 \\ 0 & 0 & 0 & v_{33} \\ \end{bmatrix} \cdot V \]

Source code in QuICT/ops/gate_kernel/gpu.py

def diagonal_ctargs(c_index, t_index, mat, vec, vec_bit, sync: bool = False):
    r""" Apply dot operator between control diagonal matrix (4x4) and state vector.

        $$ \begin{bmatrix}
        1 & 0 & 0 & 0 \\
        0 & 1 & 0 & 0 \\
        0 & 0 & v_{22} & 0 \\
        0 & 0 & 0 & v_{33} \\
        \end{bmatrix}
        \cdot V
        $$
    """
    task_number = 1 << (vec_bit - 2)
    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block

    if vec.dtype == np.complex64:
        Controlled_Multiply_ctargs_single_kernel(
            (block_num,),
            (thread_per_block,),
            (mat, vec, c_index, t_index)
        )
    else:
        Controlled_Multiply_ctargs_double_kernel(
            (block_num,),
            (thread_per_block,),
            (mat, vec, c_index, t_index)
        )

    if sync:
        cp.cuda.Device().synchronize()

diagonal_more ¶

diagonal_more(c_indexes, t_index, mat, vec, vec_bit, sync: bool = False)

Apply dot operator between control diagonal matrix (8x8) and state vector.

\[ \begin{bmatrix} 1 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\ 0 & 1 & 0 & 0 & 0 & 0 & 0 & 0 \\ 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0 \\ 0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 \\ 0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 \\ 0 & 0 & 0 & 0 & 0 & 1 & 0 & 0 \\ 0 & 0 & 0 & 0 & 0 & 0 & v_{66} & 0 \\ 0 & 0 & 0 & 0 & 0 & 0 & 0 & v_{77} \\ \end{bmatrix} \cdot V \]

Source code in QuICT/ops/gate_kernel/gpu.py

def diagonal_more(c_indexes, t_index, mat, vec, vec_bit, sync: bool = False):
    r""" Apply dot operator between control diagonal matrix (8x8) and state vector.

        $$ \begin{bmatrix}
        1 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
        0 & 1 & 0 & 0 & 0 & 0 & 0 & 0 \\
        0 & 0 & 1 & 0 & 0 & 0 & 0 & 0 \\
        0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 \\
        0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 \\
        0 & 0 & 0 & 0 & 0 & 1 & 0 & 0 \\
        0 & 0 & 0 & 0 & 0 & 0 & v_{66} & 0 \\
        0 & 0 & 0 & 0 & 0 & 0 & 0 & v_{77} \\
        \end{bmatrix}
        \cdot V
        $$
    """
    task_number = 1 << (vec_bit - 3)
    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block

    if c_indexes[0] > c_indexes[1]:
        high, low = c_indexes[0], c_indexes[1]
    else:
        high, low = c_indexes[1], c_indexes[0]

    if vec.dtype == np.complex64:
        Controlled_Multiply_more_single_kernel(
            (block_num,),
            (thread_per_block,),
            (high, low, t_index, mat, vec)
        )
    else:
        Controlled_Multiply_more_double_kernel(
            (block_num,),
            (thread_per_block,),
            (high, low, t_index, mat, vec)
        )

    if sync:
        cp.cuda.Device().synchronize()

diagonal_normal_targs ¶

diagonal_normal_targs(t_indexes, mat, vec, vec_bit, sync: bool = False)

Apply dot operator between diag_normal matrix (4x4) and state vector.

\[ \begin{bmatrix} v_{00} & v_{01} & 0 & 0 \\ v_{10} & v_{11} & 0 & 0 \\ 0 & 0 & v_{22} & v_{23} \\ 0 & 0 & v_{32} & v_{33} \\ \end{bmatrix} \cdot V \]

Source code in QuICT/ops/gate_kernel/gpu.py

def diagonal_normal_targs(t_indexes, mat, vec, vec_bit, sync: bool = False):
    r""" Apply dot operator between diag_normal matrix (4x4) and state vector.

        $$ \begin{bmatrix}
        v_{00} & v_{01} & 0 & 0 \\
        v_{10} & v_{11} & 0 & 0 \\
        0 & 0 & v_{22} & v_{23} \\
        0 & 0 & v_{32} & v_{33} \\
        \end{bmatrix}
        \cdot V
        $$
    """
    task_number = 1 << (vec_bit - 2)
    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block

    if vec.dtype == np.complex64:
        Diagonal_Multiply_normal_single_kernel(
            (block_num,),
            (thread_per_block,),
            (t_indexes[0], t_indexes[1], mat, vec)
        )
    else:
        Diagonal_Multiply_normal_double_kernel(
            (block_num,),
            (thread_per_block,),
            (t_indexes[0], t_indexes[1], mat, vec)
        )

    if sync:
        cp.cuda.Device().synchronize()

diagonal_targ ¶

diagonal_targ(t_index, mat, vec, vec_bit, sync: bool = False)

Apply dot operator between diagonal matrix (2x2) and state vector.

\[ \begin{bmatrix} v_{00} & 0 \\ 0 & v_{11} \\ \end{bmatrix} \cdot V \]

Source code in QuICT/ops/gate_kernel/gpu.py

def diagonal_targ(t_index, mat, vec, vec_bit, sync: bool = False):
    r""" Apply dot operator between diagonal matrix (2x2) and state vector.

        $$ \begin{bmatrix}
        v_{00} & 0 \\
        0 & v_{11} \\
        \end{bmatrix}
        \cdot V
        $$

    """
    task_number = 1 << (vec_bit - 1)
    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block

    if vec.dtype == np.complex64:
        Diagonal_Multiply_targ_single_kernel(
            (block_num,),
            (thread_per_block,),
            (t_index, mat, vec)
        )
    else:
        Diagonal_Multiply_targ_double_kernel(
            (block_num,),
            (thread_per_block,),
            (t_index, mat, vec)
        )

    if sync:
        cp.cuda.Device().synchronize()

diagonal_targs ¶

diagonal_targs(t_indexes, mat, vec, vec_bit, sync: bool = False)

Apply dot operator between diagonal matrix (4x4) and state vector.

\[ \begin{bmatrix} v_{00} & 0 & 0 & 0 \\ 0 & v_{11} & 0 & 0 \\ 0 & 0 & v_{22} & 0 \\ 0 & 0 & 0 & v_{33} \\ \end{bmatrix} \cdot V \]

Source code in QuICT/ops/gate_kernel/gpu.py

def diagonal_targs(t_indexes, mat, vec, vec_bit, sync: bool = False):
    r""" Apply dot operator between diagonal matrix (4x4) and state vector.

        $$ \begin{bmatrix}
        v_{00} & 0 & 0 & 0 \\
        0 & v_{11} & 0 & 0 \\
        0 & 0 & v_{22} & 0 \\
        0 & 0 & 0 & v_{33} \\
        \end{bmatrix}
        \cdot V
        $$
    """
    task_number = 1 << (vec_bit - 2)
    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block

    if t_indexes[0] > t_indexes[1]:
        high, low = t_indexes[0], t_indexes[1]
    else:
        high, low = t_indexes[1], t_indexes[0]

    if vec.dtype == np.complex64:
        Diagonal_Multiply_targs_single_kernel(
            (block_num,),
            (thread_per_block,),
            (high, low, mat, vec)
        )
    else:
        Diagonal_Multiply_targs_double_kernel(
            (block_num,),
            (thread_per_block,),
            (high, low, mat, vec)
        )

    if sync:
        cp.cuda.Device().synchronize()

measured_prob_calculate ¶

measured_prob_calculate(index, vec, vec_bit, all_measured: bool = False, sync: bool = False)

Calculate the probability to measured 0.

Source code in QuICT/ops/gate_kernel/gpu.py

def measured_prob_calculate(index, vec, vec_bit, all_measured: bool = False, sync: bool = False):
    """ Calculate the probability to measured 0. """
    # Deal with the whole vector state measured, only happen for multi-nodes simulator
    if all_measured:
        prob = mn_measureprob_calculator(vec)
        return prob.real

    # Kernel function preparation
    pre_added_qubits = MEASURED_PRE_ADDED if vec_bit > MEASURED_PRE_ADDED + 20 else 0
    task_number = 1 << (vec_bit - (pre_added_qubits + 1))

    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block
    out = cp.empty(task_number, dtype=vec.dtype)

    # Calculated the probability of measured 1 at current index
    kernel_functions = prop_add_double_kernel if vec.dtype == np.complex128 else prop_add_single_kernel
    kernel_functions(
        (block_num, ),
        (thread_per_block, ),
        (index, pre_added_qubits, vec, out)
    )

    prob = MeasureGate_prop(out, axis=0).real

    if sync:
        cp.cuda.Device().synchronize()

    return prob

normal_ctargs ¶

normal_ctargs(c_index, t_index, mat, vec, vec_bit, sync: bool = False)

Apply dot operator between control normal matrix (4x4) and state vector.

\[ \begin{bmatrix} 1 & 0 & 0 & 0 \\ 0 & 1 & 0 & 0 \\ 0 & 0 & v_{22} & v_{23} \\ 0 & 0 & v_{32} & v_{33} \\ \end{bmatrix} \cdot V \]

Source code in QuICT/ops/gate_kernel/gpu.py

def normal_ctargs(c_index, t_index, mat, vec, vec_bit, sync: bool = False):
    r""" Apply dot operator between control normal matrix (4x4) and state vector.

        $$ \begin{bmatrix}
        1 & 0 & 0 & 0 \\
        0 & 1 & 0 & 0 \\
        0 & 0 & v_{22} & v_{23} \\
        0 & 0 & v_{32} & v_{33} \\
        \end{bmatrix}
        \cdot V
        $$
    """
    task_number = 1 << (vec_bit - 2)
    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block

    if vec.dtype == np.complex64:
        Controlled_InnerProduct_ctargs_single_kernel(
            (block_num,),
            (thread_per_block,),
            (mat, vec, c_index, t_index)
        )
    else:
        Controlled_InnerProduct_ctargs_double_kernel(
            (block_num,),
            (thread_per_block,),
            (mat, vec, c_index, t_index)
        )

    if sync:
        cp.cuda.Device().synchronize()

normal_normal_targs ¶

normal_normal_targs(t_indexes, mat, vec, vec_bit, sync: bool = False)

Apply dot operator between normal_normal matrix (4x4) and state vector.

\[ \begin{bmatrix} v_{00} & 0 & 0 & v_{03} \\ 0 & v_{11} & v_{12} & 0 \\ 0 & v_{21} & v_{22} & 0 \\ v_{30} & 0 & 0 & v_{33} \\ \end{bmatrix} \cdot V \]

Source code in QuICT/ops/gate_kernel/gpu.py

def normal_normal_targs(t_indexes, mat, vec, vec_bit, sync: bool = False):
    r""" Apply dot operator between normal_normal matrix (4x4) and state vector.

        $$ \begin{bmatrix}
        v_{00} & 0 & 0 & v_{03} \\
        0 & v_{11} & v_{12} & 0 \\
        0 & v_{21} & v_{22} & 0 \\
        v_{30} & 0 & 0 & v_{33} \\
        \end{bmatrix}
        \cdot V
        $$
    """
    task_number = 1 << (vec_bit - 2)
    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block

    if t_indexes[0] > t_indexes[1]:
        high, low = t_indexes[0], t_indexes[1]
    else:
        high, low = t_indexes[1], t_indexes[0]

    if vec.dtype == np.complex64:
        Completed_IPxIP_targs_single_kernel(
            (block_num,),
            (thread_per_block,),
            (high, low, mat, vec)
        )
    else:
        Completed_IPxIP_targs_double_kernel(
            (block_num,),
            (thread_per_block,),
            (high, low, mat, vec)
        )

    if sync:
        cp.cuda.Device().synchronize()

normal_targ ¶

normal_targ(t_index, mat, vec, vec_bit, sync: bool = False)

Apply dot operator between normal matrix (2x2) and state vector.

\[ \begin{bmatrix} v_{00} & v_{01} \\ v_{10} & v_{11} \\ \end{bmatrix} \cdot V \]

Source code in QuICT/ops/gate_kernel/gpu.py

def normal_targ(t_index, mat, vec, vec_bit, sync: bool = False):
    r""" Apply dot operator between normal matrix (2x2) and state vector.

        $$ \begin{bmatrix}
        v_{00} & v_{01} \\
        v_{10} & v_{11} \\
        \end{bmatrix}
        \cdot V
        $$
    """
    task_number = 1 << (vec_bit - 1)
    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block

    if vec.dtype == np.complex64:
        Based_InnerProduct_targ_single_kernel(
            (block_num,),
            (thread_per_block,),
            (t_index, mat, vec)
        )
    else:
        Based_InnerProduct_targ_double_kernel(
            (block_num,),
            (thread_per_block,),
            (t_index, mat, vec)
        )

    if sync:
        cp.cuda.Device().synchronize()

normal_targs ¶

normal_targs(t_indexes, mat, vec, vec_bit, sync: bool = False)

Apply dot operator between normal matrix (4x4) and state vector.

\[ \begin{bmatrix} v_{00} & v_{01} & v_{02} & v_{03} \\ v_{10} & v_{11} & v_{12} & v_{13} \\ v_{20} & v_{21} & v_{22} & v_{23} \\ v_{30} & v_{31} & v_{32} & v_{33} \\ \end{bmatrix} \cdot V \]

Source code in QuICT/ops/gate_kernel/gpu.py

def normal_targs(t_indexes, mat, vec, vec_bit, sync: bool = False):
    r""" Apply dot operator between normal matrix (4x4) and state vector.

        $$ \begin{bmatrix}
        v_{00} & v_{01} & v_{02} & v_{03} \\
        v_{10} & v_{11} & v_{12} & v_{13} \\
        v_{20} & v_{21} & v_{22} & v_{23} \\
        v_{30} & v_{31} & v_{32} & v_{33} \\
        \end{bmatrix}
        \cdot V
        $$
    """
    task_number = 1 << (vec_bit - 2)
    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block

    if vec.dtype == np.complex64:
        Based_InnerProduct_targs_single_kernel(
            (block_num,),
            (thread_per_block,),
            (t_indexes[0], t_indexes[1], mat, vec)
        )
    else:
        Based_InnerProduct_targs_double_kernel(
            (block_num,),
            (thread_per_block,),
            (t_indexes[0], t_indexes[1], mat, vec)
        )

    if sync:
        cp.cuda.Device().synchronize()

reverse_ctargs ¶

reverse_ctargs(c_index, t_index, mat, vec, vec_bit, sync: bool = False)

Apply dot operator between control reverse matrix (4x4) and state vector.

\[ \begin{bmatrix} 1 & 0 & 0 & 0 \\ 0 & 1 & 0 & 0 \\ 0 & 0 & 0 & v_{23} \\ 0 & 0 & v_{32} & 0 \\ \end{bmatrix} \cdot V \]

Source code in QuICT/ops/gate_kernel/gpu.py

def reverse_ctargs(c_index, t_index, mat, vec, vec_bit, sync: bool = False):
    r""" Apply dot operator between control reverse matrix (4x4) and state vector.

        $$ \begin{bmatrix}
        1 & 0 & 0 & 0 \\
        0 & 1 & 0 & 0 \\
        0 & 0 & 0 & v_{23} \\
        0 & 0 & v_{32} & 0 \\
        \end{bmatrix}
        \cdot V
        $$
    """
    task_number = 1 << (vec_bit - 2)
    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block

    if vec.dtype == np.complex64:
        Controlled_MultiplySwap_ctargs_single_kernel(
            (block_num,),
            (thread_per_block,),
            (mat, vec, c_index, t_index)
        )
    else:
        Controlled_MultiplySwap_ctargs_double_kernel(
            (block_num,),
            (thread_per_block,),
            (mat, vec, c_index, t_index)
        )

    if sync:
        cp.cuda.Device().synchronize()

reverse_more ¶

reverse_more(c_indexes, t_index, vec, vec_bit, sync: bool = False)

Apply dot operator between reverse matrix (8x8) and state vector.

\[ \begin{bmatrix} 1 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\ 0 & 1 & 0 & 0 & 0 & 0 & 0 & 0 \\ 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0 \\ 0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 \\ 0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 \\ 0 & 0 & 0 & 0 & 0 & 1 & 0 & 0 \\ 0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 \\ 0 & 0 & 0 & 0 & 0 & 0 & 1 & 0 \\ \end{bmatrix} \cdot V \]

Source code in QuICT/ops/gate_kernel/gpu.py

def reverse_more(c_indexes, t_index, vec, vec_bit, sync: bool = False):
    r""" Apply dot operator between reverse matrix (8x8) and state vector.

        $$ \begin{bmatrix}
        1 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
        0 & 1 & 0 & 0 & 0 & 0 & 0 & 0 \\
        0 & 0 & 1 & 0 & 0 & 0 & 0 & 0 \\
        0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 \\
        0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 \\
        0 & 0 & 0 & 0 & 0 & 1 & 0 & 0 \\
        0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 \\
        0 & 0 & 0 & 0 & 0 & 0 & 1 & 0 \\
        \end{bmatrix}
        \cdot V
        $$
    """
    task_number = 1 << (vec_bit - 3)
    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block

    if c_indexes[0] > c_indexes[1]:
        high, low = c_indexes[0], c_indexes[1]
    else:
        high, low = c_indexes[1], c_indexes[0]

    if vec.dtype == np.complex64:
        Controlled_Swap_more_single_kernel(
            (block_num,),
            (thread_per_block,),
            (high, low, t_index, vec)
        )
    else:
        Controlled_Swap_more_double_kernel(
            (block_num,),
            (thread_per_block,),
            (high, low, t_index, vec)
        )

    if sync:
        cp.cuda.Device().synchronize()

reverse_targ ¶

reverse_targ(t_index, mat, vec, vec_bit, sync: bool = False)

Apply dot operator between reverse matrix (2x2) and state vector.

\[ \begin{bmatrix} 0 & v_{01} \\ v_{10} & 0 \\ \end{bmatrix} \cdot V \]

Source code in QuICT/ops/gate_kernel/gpu.py

def reverse_targ(t_index, mat, vec, vec_bit, sync: bool = False):
    r""" Apply dot operator between reverse matrix (2x2) and state vector.

        $$ \begin{bmatrix}
        0 & v_{01} \\
        v_{10} & 0 \\
        \end{bmatrix}
        \cdot V
        $$
    """
    task_number = 1 << (vec_bit - 1)
    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block

    if vec.dtype == np.complex64:
        RDiagonal_MultiplySwap_targ_single_kernel(
            (block_num,),
            (thread_per_block,),
            (t_index, mat, vec)
        )
    else:
        RDiagonal_MultiplySwap_targ_double_kernel(
            (block_num,),
            (thread_per_block,),
            (t_index, mat, vec)
        )

    if sync:
        cp.cuda.Device().synchronize()

reverse_targs ¶

reverse_targs(t_indexes, mat, vec, vec_bit, sync: bool = False)

Apply dot operator between reverse matrix (4x4) and state vector.

\[ \begin{bmatrix} 0 & 0 & 0 & v_{03} \\ 0 & 0 & v_{12} & 0 \\ 0 & v_{21} & 0 & 0 \\ v_{30} & 0 & 0 & 0 \\ \end{bmatrix} \cdot V \]

Source code in QuICT/ops/gate_kernel/gpu.py

def reverse_targs(t_indexes, mat, vec, vec_bit, sync: bool = False):
    r""" Apply dot operator between reverse matrix (4x4) and state vector.

        $$ \begin{bmatrix}
        0 & 0 & 0 & v_{03} \\
        0 & 0 & v_{12} & 0 \\
        0 & v_{21} & 0 & 0 \\
        v_{30} & 0 & 0 & 0 \\
        \end{bmatrix}
        \cdot V
        $$
    """
    task_number = 1 << (vec_bit - 2)
    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block

    if t_indexes[0] > t_indexes[1]:
        high, low = t_indexes[0], t_indexes[1]
    else:
        high, low = t_indexes[1], t_indexes[0]

    if vec.dtype == np.complex64:
        RDiagonal_Multiply_targs_single_kernel(
            (block_num,),
            (thread_per_block,),
            (high, low, mat, vec)
        )
    else:
        RDiagonal_Multiply_targs_double_kernel(
            (block_num,),
            (thread_per_block,),
            (high, low, mat, vec)
        )

    if sync:
        cp.cuda.Device().synchronize()

swap_targ ¶

swap_targ(t_index, vec, vec_bit, sync: bool = False)

Apply dot operator between Swap's matrix (2x2) and state vector.

\[ \begin{bmatrix} 0 & 1 \\ 1 & 0 \\ \end{bmatrix} \cdot V \]

Source code in QuICT/ops/gate_kernel/gpu.py

def swap_targ(t_index, vec, vec_bit, sync: bool = False):
    r""" Apply dot operator between Swap's matrix (2x2) and state vector.

        $$ \begin{bmatrix}
        0 & 1 \\
        1 & 0 \\
        \end{bmatrix}
        \cdot V
        $$
    """
    task_number = 1 << (vec_bit - 1)
    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block

    if vec.dtype == np.complex64:
        RDiagonal_Swap_targ_single_kernel(
            (block_num,),
            (thread_per_block,),
            (t_index, vec)
        )
    else:
        RDiagonal_Swap_targ_double_kernel(
            (block_num,),
            (thread_per_block,),
            (t_index, vec)
        )

    if sync:
        cp.cuda.Device().synchronize()

swap_targs ¶

swap_targs(t_indexes, mat, vec, vec_bit, sync: bool = False)

Apply dot operator between swap matrix (4x4) and state vector.

\[ \begin{bmatrix} 1 & 0 & 0 & 0 \\ 0 & 0 & v_{12} & 0 \\ 0 & v_{21} & 0 & 0 \\ 0 & 0 & 0 & 1 \\ \end{bmatrix} \cdot V \]

Source code in QuICT/ops/gate_kernel/gpu.py

def swap_targs(t_indexes, mat, vec, vec_bit, sync: bool = False):
    r""" Apply dot operator between swap matrix (4x4) and state vector.

        $$ \begin{bmatrix}
        1 & 0 & 0 & 0 \\
        0 & 0 & v_{12} & 0 \\
        0 & v_{21} & 0 & 0 \\
        0 & 0 & 0 & 1 \\
        \end{bmatrix}
        \cdot V
        $$
    """
    task_number = 1 << (vec_bit - 2)
    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block

    if t_indexes[0] > t_indexes[1]:
        high, low = t_indexes[0], t_indexes[1]
    else:
        high, low = t_indexes[1], t_indexes[0]

    if vec.dtype == np.complex64:
        Controlled_Swap_targs_single_kernel(
            (block_num,),
            (thread_per_block,),
            (high, low, mat, vec)
        )
    else:
        Controlled_Swap_targs_double_kernel(
            (block_num,),
            (thread_per_block,),
            (high, low, mat, vec)
        )

    if sync:
        cp.cuda.Device().synchronize()

swap_tmore ¶

swap_tmore(t_indexes, c_index, vec, vec_bit, sync: bool = False)

Apply dot operator between control swap matrix (8x8) and state vector.

\[ \begin{bmatrix} 1 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\ 0 & 1 & 0 & 0 & 0 & 0 & 0 & 0 \\ 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0 \\ 0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 \\ 0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 \\ 0 & 0 & 0 & 0 & 0 & 0 & 1 & 0 \\ 0 & 0 & 0 & 0 & 0 & 1 & 0 & 0 \\ 0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 \\ \end{bmatrix} \cdot V \]

Source code in QuICT/ops/gate_kernel/gpu.py

def swap_tmore(t_indexes, c_index, vec, vec_bit, sync: bool = False):
    r""" Apply dot operator between control swap matrix (8x8) and state vector.

        $$ \begin{bmatrix}
        1 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
        0 & 1 & 0 & 0 & 0 & 0 & 0 & 0 \\
        0 & 0 & 1 & 0 & 0 & 0 & 0 & 0 \\
        0 & 0 & 0 & 1 & 0 & 0 & 0 & 0 \\
        0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 \\
        0 & 0 & 0 & 0 & 0 & 0 & 1 & 0 \\
        0 & 0 & 0 & 0 & 0 & 1 & 0 & 0 \\
        0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 \\
        \end{bmatrix}
        \cdot V
        $$
    """
    task_number = 1 << (vec_bit - 3)
    thread_per_block = min(DEFAULT_BLOCK_NUM, task_number)
    block_num = task_number // thread_per_block

    if t_indexes[0] > t_indexes[1]:
        high, low = t_indexes[0], t_indexes[1]
    else:
        high, low = t_indexes[1], t_indexes[0]

    if vec.dtype == np.complex64:
        Controlled_Swap_tmore_single_kernel(
            (block_num,),
            (thread_per_block,),
            (high, low, c_index, vec)
        )
    else:
        Controlled_Swap_tmore_double_kernel(
            (block_num,),
            (thread_per_block,),
            (high, low, c_index, vec)
        )

    if sync:
        cp.cuda.Device().synchronize()