{ "pruning_method": "global_magnitude_unstructured", "target_sparsity": 0.2, "actual_sparsity": 0.1514008178145814, "zero_params": 15572124, "total_params": 102853632, "pruned_layers": "attention + FFN (excluding embeddings, LayerNorm, classifier)", "usage_notes": { "loading": "Use AutoModelForTokenClassification.from_pretrained() as usual", "inference": "No special requirements - works like any HF model", "sparse_format": "Dense format with zeros - consider torch.sparse for production", "quantization_compatible": "Can be further quantized to INT8 if needed" } }