API Reference¶
Complete API documentation for PyVq.
Distance¶
class Distance:
"""Compute vector distances with SIMD acceleration."""
@staticmethod
def euclidean() -> Distance
@staticmethod
def squared_euclidean() -> Distance
@staticmethod
def manhattan() -> Distance
@staticmethod
def cosine() -> Distance
def compute(self, a: np.ndarray, b: np.ndarray) -> float
"""Compute distance between two float32 arrays."""
Example:
dist = pyvq.Distance.euclidean()
result = dist.compute(np.array([1.0, 2.0], dtype=np.float32),
np.array([3.0, 4.0], dtype=np.float32))
BinaryQuantizer¶
class BinaryQuantizer:
"""Maps values to 0 or 1 based on a threshold."""
def __init__(self, threshold: float, low: int = 0, high: int = 1)
def quantize(self, values: np.ndarray) -> np.ndarray
"""Input: float32, Output: uint8"""
def dequantize(self, codes: np.ndarray) -> np.ndarray
"""Input: uint8, Output: float32"""
# Properties
threshold: float
low: int
high: int
Example:
bq = pyvq.BinaryQuantizer(threshold=0.0)
codes = bq.quantize(np.array([-0.5, 0.5], dtype=np.float32))
# Returns: [0, 1]
ScalarQuantizer¶
class ScalarQuantizer:
"""Uniformly quantizes values to discrete levels."""
def __init__(self, min: float, max: float, levels: int = 256)
def quantize(self, values: np.ndarray) -> np.ndarray
"""Input: float32, Output: uint8"""
def dequantize(self, codes: np.ndarray) -> np.ndarray
"""Input: uint8, Output: float32"""
# Properties
min: float
max: float
levels: int
step: float
Example:
sq = pyvq.ScalarQuantizer(min=-1.0, max=1.0, levels=256)
codes = sq.quantize(np.array([0.0, 0.5], dtype=np.float32))
reconstructed = sq.dequantize(codes)
ProductQuantizer¶
class ProductQuantizer:
"""Divides vectors into subspaces and quantizes each separately."""
def __init__(
self,
training_data: np.ndarray, # 2D float32 array
num_subspaces: int,
num_centroids: int,
max_iters: int = 10,
distance: Distance = None,
seed: int = 42
)
def quantize(self, vector: np.ndarray) -> np.ndarray
"""Input: float32, Output: float16"""
def dequantize(self, codes: np.ndarray) -> np.ndarray
"""Input: float16, Output: float32"""
# Properties
num_subspaces: int
sub_dim: int
dim: int
Example:
training = np.random.randn(100, 16).astype(np.float32)
pq = pyvq.ProductQuantizer(
training_data=training,
num_subspaces=4,
num_centroids=8,
distance=pyvq.Distance.euclidean()
)
codes = pq.quantize(training[0])
TSVQ¶
class TSVQ:
"""Tree-structured vector quantizer using hierarchical clustering."""
def __init__(
self,
training_data: np.ndarray, # 2D float32 array
max_depth: int,
distance: Distance = None
)
def quantize(self, vector: np.ndarray) -> np.ndarray
"""Input: float32, Output: float16"""
def dequantize(self, codes: np.ndarray) -> np.ndarray
"""Input: float16, Output: float32"""
# Properties
dim: int
Example:
training = np.random.randn(100, 32).astype(np.float32)
tsvq = pyvq.TSVQ(
training_data=training,
max_depth=5,
distance=pyvq.Distance.squared_euclidean()
)
codes = tsvq.quantize(training[0])
Utility Functions¶
Type Summary¶
| Quantizer | Input | quantize() Output | dequantize() Output |
|---|---|---|---|
| BinaryQuantizer | float32 | uint8 | float32 |
| ScalarQuantizer | float32 | uint8 | float32 |
| ProductQuantizer | float32 | float16 | float32 |
| TSVQ | float32 | float16 | float32 |