Skip to content

Commit

Permalink
Remove constraint_type parameter from CRF Tagger (allenai#2208)
Browse files Browse the repository at this point in the history
  • Loading branch information
schmmd authored Dec 19, 2018
1 parent eff25a3 commit 4cc4b6b
Show file tree
Hide file tree
Showing 9 changed files with 4 additions and 59 deletions.
25 changes: 0 additions & 25 deletions allennlp/models/crf_tagger.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from typing import Dict, Optional, List, Any
import warnings

from overrides import overrides
import torch
Expand Down Expand Up @@ -39,17 +38,6 @@ class CrfTagger(Model):
Label encoding to use when calculating span f1 and constraining
the CRF at decoding time . Valid options are "BIO", "BIOUL", "IOB1", "BMES".
Required if ``calculate_span_f1`` or ``constrain_crf_decoding`` is true.
constraint_type : ``str``, optional (default=``None``)
If provided, the CRF will be constrained at decoding time
to produce valid labels based on the specified type
(e.g. "BIO", or "BIOUL").
.. deprecated:: 0.6.1
``constraint_type`` was deprecated and replaced with
``label_encoding``, ``constrain_crf_decoding``, and
``calculate_span_f1`` in version 0.6.1. It will be removed
in version 0.8.
include_start_end_transitions : ``bool``, optional (default=``True``)
Whether to include start and end transition parameters in the CRF.
constrain_crf_decoding : ``bool``, optional (default=``None``)
Expand Down Expand Up @@ -81,7 +69,6 @@ def __init__(self, vocab: Vocabulary,
label_namespace: str = "labels",
feedforward: Optional[FeedForward] = None,
label_encoding: Optional[str] = None,
constraint_type: Optional[str] = None,
include_start_end_transitions: bool = True,
constrain_crf_decoding: bool = None,
calculate_span_f1: bool = None,
Expand Down Expand Up @@ -109,13 +96,6 @@ def __init__(self, vocab: Vocabulary,
self.tag_projection_layer = TimeDistributed(Linear(output_dim,
self.num_tags))

if constraint_type is not None:
warnings.warn("'constraint_type' was removed and replaced with"
"'label_encoding', 'constrain_crf_decoding', and "
"'calculate_span_f1' in version 0.6.1. It will be "
"removed in version 0.8.", DeprecationWarning)
label_encoding = constraint_type

# if constrain_crf_decoding and calculate_span_f1 are not
# provided, (i.e., they're None), set them to True
# if label_encoding is provided and False if it isn't.
Expand Down Expand Up @@ -152,11 +132,6 @@ def __init__(self, vocab: Vocabulary,
self._f1_metric = SpanBasedF1Measure(vocab,
tag_namespace=label_namespace,
label_encoding=label_encoding)
elif constraint_type is not None:
# Maintain deprecated behavior if constraint_type is provided
self._f1_metric = SpanBasedF1Measure(vocab,
tag_namespace=label_namespace,
label_encoding=constraint_type)

check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(),
"text field embedding dim", "encoder input dim")
Expand Down
2 changes: 1 addition & 1 deletion allennlp/pretrained.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def neural_coreference_resolution_lee_2017() -> predictors.CorefPredictor:
return model.predictor() # type: ignore

def named_entity_recognition_with_elmo_peters_2018() -> predictors.SentenceTaggerPredictor:
model = PretrainedModel('https://s3-us-west-2.amazonaws.com/allennlp/models/ner-model-2018.04.30.tar.gz',
model = PretrainedModel('https://s3-us-west-2.amazonaws.com/allennlp/models/ner-model-2019.12.18.tar.gz',
'sentence-tagger')
return model.predictor() # type: ignore

Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"validation_data_path": "allennlp/tests/fixtures/data/quarel.jsonl",
"model": {
"type": "crf_tagger",
"constraint_type": "BIO",
"label_encoding": "BIO",
"dropout": 0.2,
"include_start_end_transitions": false,
"text_field_embedder": {
Expand Down
Binary file not shown.
28 changes: 0 additions & 28 deletions allennlp/tests/models/crf_tagger_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,34 +25,6 @@ def test_simple_tagger_can_train_save_and_conll2000(self):
self.ensure_model_can_train_save_and_load(
self.FIXTURES_ROOT / 'crf_tagger' / 'experiment_conll2000.json')

def test_simple_tagger_constraint_type_deprecated(self):
params = Params({"model": {
"type": "crf_tagger",
"constraint_type": "IOB1",
"text_field_embedder": {
"token_embedders": {
"tokens": {
"type": "embedding",
"embedding_dim": 50
},
}
},
"encoder": {
"type": "gru",
"input_size": 50,
"hidden_size": 10,
"num_layers": 2,
"dropout": 0.5,
"bidirectional": True
}}})
with pytest.warns(DeprecationWarning):
model = Model.from_params(vocab=self.vocab,
params=params.pop("model"))
assert model._f1_metric is not None
assert model._f1_metric._label_encoding == "IOB1"
assert model.label_encoding == "IOB1"
assert model.crf._constraint_mask.sum().item() != (model.num_tags + 2)**2

@flaky
def test_batch_predictions_are_consistent(self):
self.ensure_batch_predictions_are_consistent()
Expand Down
2 changes: 1 addition & 1 deletion training_config/ner_elmo.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
"validation_data_path": std.extVar("NER_TEST_A_PATH"),
"model": {
"type": "crf_tagger",
"constraint_type": "BIOUL",
"label_encoding": "BIOUL",
"dropout": 0.5,
"include_start_end_transitions": false,
"text_field_embedder": {
Expand Down
2 changes: 1 addition & 1 deletion training_config/quarel_tagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"validation_data_path": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/quarel/quarel-v1-dev.jsonl",
"model": {
"type": "crf_tagger",
"constraint_type": "BIO",
"label_encoding": "BIO",
"dropout": 0.2,
"include_start_end_transitions": false,
"text_field_embedder": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,6 @@ We don't *need* to, but we also make a few other changes
treated as out-of-vocabulary at evaluation time. The second flag just evaluates
the model on the test set when training stops. Use this flag cautiously,
when you're doing real science you don't want to evaluate on your test set too often.
* if you want to specify constraints for the CRF, you can add a `"model.constraint_type"`
to your config that indicates what sort of constraints the CRF tagger should use.


## Putting It All Together
Expand Down

0 comments on commit 4cc4b6b

Please sign in to comment.