This error occur, When the load the checkpoint in Sagemaker that maked in huggingface(transformer) of other os(windows, ubuntu).
Maybe Sagemaker file system is configured as S3, So is not normal file system.
Thus, Can't use checkpoint file.
It is easy to solve this problem.
When the careate the model file, You just use torch.save
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/transformers/modeling_utils.py in load_state_dict(checkpoint_file)
348 try:
--> 349 return torch.load(checkpoint_file, map_location="cpu")
350 except Exception as e:
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/torch/serialization.py in load(f, map_location, pickle_module, **pickle_load_args)
586 orig_position = opened_file.tell()
--> 587 with _open_zipfile_reader(opened_file) as opened_zipfile:
588 if _is_torchscript_zip(opened_zipfile):
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/torch/serialization.py in __init__(self, name_or_buffer)
241 def __init__(self, name_or_buffer) -> None:
--> 242 super(_open_zipfile_reader, self).__init__(torch._C.PyTorchFileReader(name_or_buffer))
243
RuntimeError: [enforce fail at inline_container.cc:145] . PytorchStreamReader failed reading zip archive: failed finding central directory
During handling of the above exception, another exception occurred:
UnicodeDecodeError Traceback (most recent call last)
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/transformers/modeling_utils.py in load_state_dict(checkpoint_file)
352 with open(checkpoint_file) as f:
--> 353 if f.read().startswith("version"):
354 raise OSError(
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/codecs.py in decode(self, input, final)
320 data = self.buffer + input
--> 321 (result, consumed) = self._buffer_decode(data, self.errors, final)
322 # keep undecoded input until the next call
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x80 in position 64: invalid start byte
During handling of the above exception, another exception occurred:
OSError Traceback (most recent call last)
<ipython-input-7-33d79adeebc4> in <module>
1 # 사용할 모델과 토크나이저 로드
----> 2 model = AutoModelForSequenceClassification.from_pretrained('trained', num_labels=3)
3 tokenizer = AutoTokenizer.from_pretrained('koelectra-base-v3-discriminator')
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/transformers/models/auto/auto_factory.py in from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
444 elif type(config) in cls._model_mapping.keys():
445 model_class = _get_model_class(config, cls._model_mapping)
--> 446 return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, config=config, **kwargs)
447 raise ValueError(
448 f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n"
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/transformers/modeling_utils.py in from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
1795 if not is_sharded:
1796 # Time to load the checkpoint
-> 1797 state_dict = load_state_dict(resolved_archive_file)
1798 # set dtype to instantiate the model under:
1799 # 1. If torch_dtype is not None, we use that dtype
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/transformers/modeling_utils.py in load_state_dict(checkpoint_file)
364 except (UnicodeDecodeError, ValueError):
365 raise OSError(
--> 366 f"Unable to load weights from pytorch checkpoint file for '{checkpoint_file}' "
367 f"at '{checkpoint_file}'. "
368 "If you tried to load a PyTorch model from a TF 2.0 checkpoint, please set from_tf=True."
OSError: Unable to load weights from pytorch checkpoint file for 'trained/pytorch_model.bin' at 'trained/pytorch_model.bin'. If you tried to load a PyTorch model from a TF 2.0 checkpoint, please set from_tf=True.