Update README.md
Browse files
README.md
CHANGED
|
@@ -117,4 +117,12 @@ Title: *Transformers Don’t Need LayerNorm at Inference Time: Scaling LayerNorm
|
|
| 117 |
|
| 118 |
**BibTeX:**
|
| 119 |
|
| 120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
**BibTeX:**
|
| 119 |
|
| 120 |
+
@misc{gpt2layernorm2025,
|
| 121 |
+
author = {Baroni, Luca and Khara, Galvin and Schaeffer, Joachim and Subkhankulov, Marat and Heimersheim, Stefan},
|
| 122 |
+
title = {Transformers Don't Need LayerNorm at Inference Time: Scaling LayerNorm Removal to GPT-2 XL and the Implications for Mechanistic Interpretability},
|
| 123 |
+
year = {2025},
|
| 124 |
+
eprint = {2507.02559},
|
| 125 |
+
archivePrefix = {arXiv},
|
| 126 |
+
primaryClass = {cs.LG},
|
| 127 |
+
url = {https://arxiv.org/abs/2507.02559v1}
|
| 128 |
+
}
|