LICENSE
README.md
pyproject.toml
setup.py
apex/__init__.py
apex/_autocast_utils.py
apex.egg-info/PKG-INFO
apex.egg-info/SOURCES.txt
apex.egg-info/dependency_links.txt
apex.egg-info/requires.txt
apex.egg-info/top_level.txt
apex/contrib/__init__.py
apex/contrib/bottleneck/__init__.py
apex/contrib/bottleneck/bottleneck.py
apex/contrib/bottleneck/halo_exchangers.py
apex/contrib/bottleneck/test.py
apex/contrib/clip_grad/__init__.py
apex/contrib/clip_grad/clip_grad.py
apex/contrib/conv_bias_relu/__init__.py
apex/contrib/conv_bias_relu/conv_bias_relu.py
apex/contrib/cudnn_gbn/__init__.py
apex/contrib/cudnn_gbn/batch_norm.py
apex/contrib/fmha/__init__.py
apex/contrib/fmha/fmha.py
apex/contrib/focal_loss/__init__.py
apex/contrib/focal_loss/focal_loss.py
apex/contrib/gpu_direct_storage/__init__.py
apex/contrib/group_norm/__init__.py
apex/contrib/group_norm/group_norm.py
apex/contrib/groupbn/__init__.py
apex/contrib/groupbn/batch_norm.py
apex/contrib/index_mul_2d/__init__.py
apex/contrib/index_mul_2d/index_mul_2d.py
apex/contrib/layer_norm/__init__.py
apex/contrib/layer_norm/layer_norm.py
apex/contrib/multihead_attn/__init__.py
apex/contrib/multihead_attn/encdec_multihead_attn.py
apex/contrib/multihead_attn/encdec_multihead_attn_func.py
apex/contrib/multihead_attn/fast_encdec_multihead_attn_func.py
apex/contrib/multihead_attn/fast_encdec_multihead_attn_norm_add_func.py
apex/contrib/multihead_attn/fast_self_multihead_attn_func.py
apex/contrib/multihead_attn/fast_self_multihead_attn_norm_add_func.py
apex/contrib/multihead_attn/mask_softmax_dropout_func.py
apex/contrib/multihead_attn/self_multihead_attn.py
apex/contrib/multihead_attn/self_multihead_attn_func.py
apex/contrib/nccl_allocator/__init__.py
apex/contrib/nccl_allocator/nccl_allocator.py
apex/contrib/openfold_triton/__init__.py
apex/contrib/openfold_triton/_layer_norm_backward_kernels.py
apex/contrib/openfold_triton/_layer_norm_config_ampere.py
apex/contrib/openfold_triton/_layer_norm_config_hopper.py
apex/contrib/openfold_triton/_layer_norm_forward_kernels.py
apex/contrib/openfold_triton/_mha_kernel.py
apex/contrib/openfold_triton/fused_adam_swa.py
apex/contrib/openfold_triton/layer_norm.py
apex/contrib/openfold_triton/mha.py
apex/contrib/optimizers/__init__.py
apex/contrib/optimizers/distributed_fused_adam.py
apex/contrib/optimizers/distributed_fused_lamb.py
apex/contrib/optimizers/fp16_optimizer.py
apex/contrib/optimizers/fused_adam.py
apex/contrib/optimizers/fused_lamb.py
apex/contrib/optimizers/fused_sgd.py
apex/contrib/peer_memory/__init__.py
apex/contrib/peer_memory/peer_halo_exchanger_1d.py
apex/contrib/peer_memory/peer_memory.py
apex/contrib/sparsity/__init__.py
apex/contrib/sparsity/asp.py
apex/contrib/sparsity/permutation_lib.py
apex/contrib/sparsity/sparse_masklib.py
apex/contrib/sparsity/permutation_search_kernels/__init__.py
apex/contrib/sparsity/permutation_search_kernels/call_permutation_search_kernels.py
apex/contrib/sparsity/permutation_search_kernels/channel_swap.py
apex/contrib/sparsity/permutation_search_kernels/exhaustive_search.py
apex/contrib/sparsity/permutation_search_kernels/permutation_utilities.py
apex/contrib/test/__init__.py
apex/contrib/test/bottleneck/__init__.py
apex/contrib/test/bottleneck/test_bottleneck_module.py
apex/contrib/test/clip_grad/__init__.py
apex/contrib/test/clip_grad/test_clip_grad.py
apex/contrib/test/conv_bias_relu/__init__.py
apex/contrib/test/conv_bias_relu/test_conv_bias_relu.py
apex/contrib/test/cudnn_gbn/__init__.py
apex/contrib/test/cudnn_gbn/test_cudnn_gbn_with_two_gpus.py
apex/contrib/test/fmha/__init__.py
apex/contrib/test/fmha/test_fmha.py
apex/contrib/test/focal_loss/__init__.py
apex/contrib/test/focal_loss/test_focal_loss.py
apex/contrib/test/group_norm/__init__.py
apex/contrib/test/group_norm/test_group_norm.py
apex/contrib/test/index_mul_2d/__init__.py
apex/contrib/test/index_mul_2d/test_index_mul_2d.py
apex/contrib/test/layer_norm/__init__.py
apex/contrib/test/layer_norm/test_fast_layer_norm.py
apex/contrib/test/multihead_attn/__init__.py
apex/contrib/test/multihead_attn/test_encdec_multihead_attn.py
apex/contrib/test/multihead_attn/test_encdec_multihead_attn_norm_add.py
apex/contrib/test/multihead_attn/test_fast_self_multihead_attn_bias.py
apex/contrib/test/multihead_attn/test_mha_fused_softmax.py
apex/contrib/test/multihead_attn/test_self_multihead_attn.py
apex/contrib/test/multihead_attn/test_self_multihead_attn_norm_add.py
apex/contrib/test/optimizers/__init__.py
apex/contrib/test/optimizers/test_dist_adam.py
apex/contrib/test/optimizers/test_distributed_fused_lamb.py
apex/contrib/test/peer_memory/__init__.py
apex/contrib/test/peer_memory/test_peer_halo_exchange_module.py
apex/contrib/test/transducer/__init__.py
apex/contrib/test/transducer/test_transducer_joint.py
apex/contrib/test/transducer/test_transducer_loss.py
apex/contrib/test/xentropy/__init__.py
apex/contrib/test/xentropy/test_label_smoothing.py
apex/contrib/torchsched/__init__.py
apex/contrib/torchsched/backend.py
apex/contrib/torchsched/config.py
apex/contrib/torchsched/inductor/__init__.py
apex/contrib/torchsched/inductor/_utils.py
apex/contrib/torchsched/inductor/event.py
apex/contrib/torchsched/inductor/graph.py
apex/contrib/torchsched/inductor/scheduler.py
apex/contrib/torchsched/inductor/wrapper.py
apex/contrib/torchsched/ops/__init__.py
apex/contrib/torchsched/ops/layer_norm.py
apex/contrib/torchsched/passes/__init__.py
apex/contrib/torchsched/passes/pre_grad_passes.py
apex/contrib/transducer/__init__.py
apex/contrib/transducer/_transducer_ref.py
apex/contrib/transducer/transducer.py
apex/contrib/xentropy/__init__.py
apex/contrib/xentropy/softmax_xentropy.py
apex/fused_dense/__init__.py
apex/fused_dense/fused_dense.py
apex/mlp/__init__.py
apex/mlp/mlp.py
apex/multi_tensor_apply/__init__.py
apex/multi_tensor_apply/multi_tensor_apply.py
apex/normalization/__init__.py
apex/normalization/fused_layer_norm.py
apex/optimizers/__init__.py
apex/optimizers/fused_adagrad.py
apex/optimizers/fused_adam.py
apex/optimizers/fused_lamb.py
apex/optimizers/fused_mixed_precision_lamb.py
apex/optimizers/fused_novograd.py
apex/optimizers/fused_sgd.py
apex/transformer/__init__.py
apex/transformer/_ucc_util.py
apex/transformer/enums.py
apex/transformer/log_util.py
apex/transformer/microbatches.py
apex/transformer/parallel_state.py
apex/transformer/utils.py
apex/transformer/_data/__init__.py
apex/transformer/_data/_batchsampler.py
apex/transformer/amp/__init__.py
apex/transformer/amp/grad_scaler.py
apex/transformer/functional/__init__.py
apex/transformer/functional/fused_rope.py
apex/transformer/functional/fused_softmax.py
apex/transformer/layers/__init__.py
apex/transformer/layers/layer_norm.py
apex/transformer/pipeline_parallel/__init__.py
apex/transformer/pipeline_parallel/_timers.py
apex/transformer/pipeline_parallel/p2p_communication.py
apex/transformer/pipeline_parallel/utils.py
apex/transformer/pipeline_parallel/schedules/__init__.py
apex/transformer/pipeline_parallel/schedules/common.py
apex/transformer/pipeline_parallel/schedules/fwd_bwd_no_pipelining.py
apex/transformer/pipeline_parallel/schedules/fwd_bwd_pipelining_with_interleaving.py
apex/transformer/pipeline_parallel/schedules/fwd_bwd_pipelining_without_interleaving.py
apex/transformer/tensor_parallel/__init__.py
apex/transformer/tensor_parallel/cross_entropy.py
apex/transformer/tensor_parallel/data.py
apex/transformer/tensor_parallel/layers.py
apex/transformer/tensor_parallel/mappings.py
apex/transformer/tensor_parallel/memory.py
apex/transformer/tensor_parallel/random.py
apex/transformer/tensor_parallel/utils.py
apex/transformer/testing/__init__.py
apex/transformer/testing/arguments.py
apex/transformer/testing/commons.py
apex/transformer/testing/distributed_test_base.py
apex/transformer/testing/global_vars.py
apex/transformer/testing/standalone_bert.py
apex/transformer/testing/standalone_gpt.py
apex/transformer/testing/standalone_transformer_lm.py
csrc/amp_C_frontend.cpp
csrc/flatten_unflatten.cpp
csrc/fused_dense.cpp
csrc/fused_dense_cuda.cu
csrc/layer_norm_cuda.cpp
csrc/layer_norm_cuda_kernel.cu
csrc/mlp.cpp
csrc/mlp_cuda.cu
csrc/multi_tensor_adagrad.cu
csrc/multi_tensor_adam.cu
csrc/multi_tensor_axpby_kernel.cu
csrc/multi_tensor_l2norm_kernel.cu
csrc/multi_tensor_l2norm_kernel_mp.cu
csrc/multi_tensor_l2norm_scale_kernel.cu
csrc/multi_tensor_lamb.cu
csrc/multi_tensor_lamb_mp.cu
csrc/multi_tensor_lamb_stage_1.cu
csrc/multi_tensor_lamb_stage_2.cu
csrc/multi_tensor_novograd.cu
csrc/multi_tensor_scale_kernel.cu
csrc/multi_tensor_sgd_kernel.cu
csrc/syncbn.cpp
csrc/update_scale_hysteresis.cu
csrc/welford.cu
csrc/megatron/fused_rotary_positional_embedding.cpp
csrc/megatron/fused_rotary_positional_embedding_cuda.cu
csrc/megatron/fused_weight_gradient_dense.cpp
csrc/megatron/fused_weight_gradient_dense_16bit_prec_cuda.cu
csrc/megatron/fused_weight_gradient_dense_cuda.cu
csrc/megatron/generic_scaled_masked_softmax.cpp
csrc/megatron/generic_scaled_masked_softmax_cuda.cu
csrc/megatron/scaled_masked_softmax.cpp
csrc/megatron/scaled_masked_softmax_cuda.cu
csrc/megatron/scaled_softmax.cpp
csrc/megatron/scaled_softmax_cuda.cu
csrc/megatron/scaled_upper_triang_masked_softmax.cpp
csrc/megatron/scaled_upper_triang_masked_softmax_cuda.cu