API Reference¶

Complete API documentation for PyVq.

Distance¶

class Distance:
    """Compute vector distances with SIMD acceleration."""

    @staticmethod
    def euclidean() -> Distance

    @staticmethod
    def squared_euclidean() -> Distance

    @staticmethod
    def manhattan() -> Distance

    @staticmethod
    def cosine() -> Distance

    def compute(self, a: np.ndarray, b: np.ndarray) -> float
        """Compute distance between two float32 arrays."""

Example:

dist = pyvq.Distance.euclidean()
result = dist.compute(np.array([1.0, 2.0], dtype=np.float32),
                      np.array([3.0, 4.0], dtype=np.float32))

BinaryQuantizer¶

class BinaryQuantizer:
    """Maps values to 0 or 1 based on a threshold."""

    def __init__(self, threshold: float, low: int = 0, high: int = 1)

    def quantize(self, values: np.ndarray) -> np.ndarray
        """Input: float32, Output: uint8"""

    def dequantize(self, codes: np.ndarray) -> np.ndarray
        """Input: uint8, Output: float32"""

    # Properties
    threshold: float
    low: int
    high: int

Example:

bq = pyvq.BinaryQuantizer(threshold=0.0)
codes = bq.quantize(np.array([-0.5, 0.5], dtype=np.float32))
# Returns: [0, 1]

ScalarQuantizer¶

class ScalarQuantizer:
    """Uniformly quantizes values to discrete levels."""

    def __init__(self, min: float, max: float, levels: int = 256)

    def quantize(self, values: np.ndarray) -> np.ndarray
        """Input: float32, Output: uint8"""

    def dequantize(self, codes: np.ndarray) -> np.ndarray
        """Input: uint8, Output: float32"""

    # Properties
    min: float
    max: float
    levels: int
    step: float

Example:

sq = pyvq.ScalarQuantizer(min=-1.0, max=1.0, levels=256)
codes = sq.quantize(np.array([0.0, 0.5], dtype=np.float32))
reconstructed = sq.dequantize(codes)

ProductQuantizer¶

class ProductQuantizer:
    """Divides vectors into subspaces and quantizes each separately."""

    def __init__(
        self,
        training_data: np.ndarray,  # 2D float32 array
        num_subspaces: int,
        num_centroids: int,
        max_iters: int = 10,
        distance: Distance = None,
        seed: int = 42
    )

    def quantize(self, vector: np.ndarray) -> np.ndarray
        """Input: float32, Output: float16"""

    def dequantize(self, codes: np.ndarray) -> np.ndarray
        """Input: float16, Output: float32"""

    # Properties
    num_subspaces: int
    sub_dim: int
    dim: int

Example:

training = np.random.randn(100, 16).astype(np.float32)
pq = pyvq.ProductQuantizer(
    training_data=training,
    num_subspaces=4,
    num_centroids=8,
    distance=pyvq.Distance.euclidean()
)
codes = pq.quantize(training[0])

TSVQ¶

class TSVQ:
    """Tree-structured vector quantizer using hierarchical clustering."""

    def __init__(
        self,
        training_data: np.ndarray,  # 2D float32 array
        max_depth: int,
        distance: Distance = None
    )

    def quantize(self, vector: np.ndarray) -> np.ndarray
        """Input: float32, Output: float16"""

    def dequantize(self, codes: np.ndarray) -> np.ndarray
        """Input: float16, Output: float32"""

    # Properties
    dim: int

Example:

training = np.random.randn(100, 32).astype(np.float32)
tsvq = pyvq.TSVQ(
    training_data=training,
    max_depth=5,
    distance=pyvq.Distance.squared_euclidean()
)
codes = tsvq.quantize(training[0])

Utility Functions¶

def get_simd_backend() -> str:
    """Returns the active SIMD backend (e.g., 'AVX2 (Auto)')."""

Type Summary¶

Quantizer	Input	quantize() Output	dequantize() Output
BinaryQuantizer	float32	uint8	float32
ScalarQuantizer	float32	uint8	float32
ProductQuantizer	float32	float16	float32
TSVQ	float32	float16	float32