@inproceedings{Zhang2021, title = {Can {{Domain Pre-training Help Interdisciplinary Researchers}} from {{Data Annotation Poverty}}? {{A Case Study}} of {{Legal Argument Mining}} with {{BERT-based Transformers}}}, booktitle = {Proceedings of the {{Workshop}} on {{Natural Language Processing}} for {{Digital Humanities}} ({{NLP4DH}})}, author = {Zhang, Gechuan and Lillis, David and Nulty, Paul}, year = {2021}, pages = {121--130}, publisher = {Association for Computational Linguistics}, url = {https://aclanthology.org/2021.nlp4dh-1.14}, abstract = {Interdisciplinary Natural Language Processing (NLP) research traditionally suffers from the requirement for costly data annotation. However, transformer frameworks with pre-training have shown their ability on many downstream tasks including digital humanities tasks with limited small datasets. Considering the fact that many digital humanities fields (e.g. law) feature an abundance of non-annotated textual resources, and the recent achievementsled by transformer models, we pay special attention to whether domain pre-training will enhance transformer's performance on interdisciplinary tasks and how. In this work, we use legal argument mining as our case study. This aims to automatically identify text segments with particular linguistic structures (i.e., arguments) from legal documents and to predict the reasoning relations between marked arguments. Our work includes a broad survey of a wide range of BERT variants with different pre-training strategies. Our case study focuses on: the comparison of general pre-training and domain pre-training; the generalisability of different domain pre-trained transformers; and the potential of merging general pre-training with domain pre-training. We also achieve better results than the current transformer baseline in legal argument mining.}, }