Enables to toggle fused_dense, flash_rotary and attn_pdrop in the configuration.
This commit is contained in:
parent
0254d42a95
commit
45f4b21525
@ -30,6 +30,9 @@ class MixFormerSequentialConfig(PretrainedConfig):
|
|||||||
n_head_kv: Optional[int] = None,
|
n_head_kv: Optional[int] = None,
|
||||||
rotary_dim: Optional[int] = 32,
|
rotary_dim: Optional[int] = 32,
|
||||||
activation_function: Optional[str] = "gelu_new",
|
activation_function: Optional[str] = "gelu_new",
|
||||||
|
flash_rotary: bool = False,
|
||||||
|
fused_dense: bool = False,
|
||||||
|
attn_pdrop: Optional[float] = 0.0,
|
||||||
embd_pdrop: Optional[float] = 0.0,
|
embd_pdrop: Optional[float] = 0.0,
|
||||||
resid_pdrop: Optional[float] = 0.0,
|
resid_pdrop: Optional[float] = 0.0,
|
||||||
layer_norm_epsilon: Optional[float] = 1e-5,
|
layer_norm_epsilon: Optional[float] = 1e-5,
|
||||||
@ -47,6 +50,9 @@ class MixFormerSequentialConfig(PretrainedConfig):
|
|||||||
self.n_head_kv = n_head_kv
|
self.n_head_kv = n_head_kv
|
||||||
self.rotary_dim = min(rotary_dim, n_embd // n_head)
|
self.rotary_dim = min(rotary_dim, n_embd // n_head)
|
||||||
self.activation_function = activation_function
|
self.activation_function = activation_function
|
||||||
|
self.flash_rotary = flash_rotary
|
||||||
|
self.fused_dense = fused_dense
|
||||||
|
self.attn_pdrop = attn_pdrop
|
||||||
self.embd_pdrop = embd_pdrop
|
self.embd_pdrop = embd_pdrop
|
||||||
self.resid_pdrop = resid_pdrop
|
self.resid_pdrop = resid_pdrop
|
||||||
self.layer_norm_epsilon = layer_norm_epsilon
|
self.layer_norm_epsilon = layer_norm_epsilon
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user