Skip to content

API Reference

Complete API documentation for PyVq.

Distance

class Distance:
    """Compute vector distances with SIMD acceleration."""

    @staticmethod
    def euclidean() -> Distance

    @staticmethod
    def squared_euclidean() -> Distance

    @staticmethod
    def manhattan() -> Distance

    @staticmethod
    def cosine() -> Distance

    def compute(self, a: np.ndarray, b: np.ndarray) -> float
        """Compute distance between two float32 arrays."""

Example:

dist = pyvq.Distance.euclidean()
result = dist.compute(np.array([1.0, 2.0], dtype=np.float32),
                      np.array([3.0, 4.0], dtype=np.float32))


BinaryQuantizer

class BinaryQuantizer:
    """Maps values to 0 or 1 based on a threshold."""

    def __init__(self, threshold: float, low: int = 0, high: int = 1)

    def quantize(self, values: np.ndarray) -> np.ndarray
        """Input: float32, Output: uint8"""

    def dequantize(self, codes: np.ndarray) -> np.ndarray
        """Input: uint8, Output: float32"""

    # Properties
    threshold: float
    low: int
    high: int

Example:

bq = pyvq.BinaryQuantizer(threshold=0.0)
codes = bq.quantize(np.array([-0.5, 0.5], dtype=np.float32))
# Returns: [0, 1]


ScalarQuantizer

class ScalarQuantizer:
    """Uniformly quantizes values to discrete levels."""

    def __init__(self, min: float, max: float, levels: int = 256)

    def quantize(self, values: np.ndarray) -> np.ndarray
        """Input: float32, Output: uint8"""

    def dequantize(self, codes: np.ndarray) -> np.ndarray
        """Input: uint8, Output: float32"""

    # Properties
    min: float
    max: float
    levels: int
    step: float

Example:

sq = pyvq.ScalarQuantizer(min=-1.0, max=1.0, levels=256)
codes = sq.quantize(np.array([0.0, 0.5], dtype=np.float32))
reconstructed = sq.dequantize(codes)


ProductQuantizer

class ProductQuantizer:
    """Divides vectors into subspaces and quantizes each separately."""

    def __init__(
        self,
        training_data: np.ndarray,  # 2D float32 array
        num_subspaces: int,
        num_centroids: int,
        max_iters: int = 10,
        distance: Distance = None,
        seed: int = 42
    )

    def quantize(self, vector: np.ndarray) -> np.ndarray
        """Input: float32, Output: float16"""

    def dequantize(self, codes: np.ndarray) -> np.ndarray
        """Input: float16, Output: float32"""

    # Properties
    num_subspaces: int
    sub_dim: int
    dim: int

Example:

training = np.random.randn(100, 16).astype(np.float32)
pq = pyvq.ProductQuantizer(
    training_data=training,
    num_subspaces=4,
    num_centroids=8,
    distance=pyvq.Distance.euclidean()
)
codes = pq.quantize(training[0])


TSVQ

class TSVQ:
    """Tree-structured vector quantizer using hierarchical clustering."""

    def __init__(
        self,
        training_data: np.ndarray,  # 2D float32 array
        max_depth: int,
        distance: Distance = None
    )

    def quantize(self, vector: np.ndarray) -> np.ndarray
        """Input: float32, Output: float16"""

    def dequantize(self, codes: np.ndarray) -> np.ndarray
        """Input: float16, Output: float32"""

    # Properties
    dim: int

Example:

training = np.random.randn(100, 32).astype(np.float32)
tsvq = pyvq.TSVQ(
    training_data=training,
    max_depth=5,
    distance=pyvq.Distance.squared_euclidean()
)
codes = tsvq.quantize(training[0])


Utility Functions

def get_simd_backend() -> str:
    """Returns the active SIMD backend (e.g., 'AVX2 (Auto)')."""

Type Summary

Quantizer Input quantize() Output dequantize() Output
BinaryQuantizer float32 uint8 float32
ScalarQuantizer float32 uint8 float32
ProductQuantizer float32 float16 float32
TSVQ float32 float16 float32