Spaces:

MBZUAI
/

artst-demo-asr

Runtime error

App Files Files Community

artst-demo-asr / SpeechT5 /SpeechLM /speechlm /models /speechlm_ctcasr.py

amupd

SpeechT5 upload

62e9ca6 about 1 year ago

raw

history blame

2.12 kB

	# ----------------------------------------------------------------------------
	# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329)
	# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM
	# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4
	#
	# Copyright (c) 2022 Microsoft
	# Licensed under The MIT License [see LICENSE for details]
	# ----------------------------------------------------------------------------

	from dataclasses import dataclass
	from fairseq.models import BaseFairseqModel, register_model
	from fairseq.tasks import FairseqTask

	from fairseq.models.hubert import HubertAsrConfig, HubertCtc, HubertEncoder

	@dataclass
	class SpeechLMCtcConfig(HubertAsrConfig):
	pass


	@register_model("speechlm_ctc", dataclass=SpeechLMCtcConfig)
	class SpeechLMCtc(HubertCtc):
	def __init__(self, cfg: SpeechLMCtcConfig, w2v_encoder: BaseFairseqModel):
	super().__init__(cfg, w2v_encoder)

	@classmethod
	def build_model(cls, cfg: SpeechLMCtcConfig, task: FairseqTask):
	"""Build a new model instance."""
	w2v_encoder = SpeechLMEncoder(cfg, task)
	return cls(cfg, w2v_encoder)


	class SpeechLMEncoder(HubertEncoder):
	def __init__(self, cfg: HubertAsrConfig, task):
	super().__init__(cfg, task)

	if (task.target_dictionary is not None) and (
	hasattr(self.w2v_model, "unit_encoder_ctc_head")
	):
	self.proj = self.w2v_model.unit_encoder_ctc_head
	self.conv_ctc_proj = True
	else:
	self.conv_ctc_proj = False

	def forward(self, source, padding_mask, tbc=True, **kwargs):
	results = super().forward(
	source,
	padding_mask,
	tbc,
	**kwargs,
	)
	if self.conv_ctc_proj:
	padding_mask = self.w2v_model.downsample_ctc_padding_mask(results["padding_mask"])
	results["encoder_padding_mask"] = padding_mask
	results["padding_mask"] = padding_mask
	return results