Enables to toggle fused_dense, flash_rotary and attn_pdrop in the configuration.

This commit is contained in:
Gustavo de Rosa 2023-11-01 23:33:57 +00:00 committed by huggingface-web
parent 0254d42a95
commit 45f4b21525

@ -30,6 +30,9 @@ class MixFormerSequentialConfig(PretrainedConfig):
n_head_kv: Optional[int] = None, n_head_kv: Optional[int] = None,
rotary_dim: Optional[int] = 32, rotary_dim: Optional[int] = 32,
activation_function: Optional[str] = "gelu_new", activation_function: Optional[str] = "gelu_new",
flash_rotary: bool = False,
fused_dense: bool = False,
attn_pdrop: Optional[float] = 0.0,
embd_pdrop: Optional[float] = 0.0, embd_pdrop: Optional[float] = 0.0,
resid_pdrop: Optional[float] = 0.0, resid_pdrop: Optional[float] = 0.0,
layer_norm_epsilon: Optional[float] = 1e-5, layer_norm_epsilon: Optional[float] = 1e-5,
@ -47,6 +50,9 @@ class MixFormerSequentialConfig(PretrainedConfig):
self.n_head_kv = n_head_kv self.n_head_kv = n_head_kv
self.rotary_dim = min(rotary_dim, n_embd // n_head) self.rotary_dim = min(rotary_dim, n_embd // n_head)
self.activation_function = activation_function self.activation_function = activation_function
self.flash_rotary = flash_rotary
self.fused_dense = fused_dense
self.attn_pdrop = attn_pdrop
self.embd_pdrop = embd_pdrop self.embd_pdrop = embd_pdrop
self.resid_pdrop = resid_pdrop self.resid_pdrop = resid_pdrop
self.layer_norm_epsilon = layer_norm_epsilon self.layer_norm_epsilon = layer_norm_epsilon