Spaces:
Running
Running
File size: 3,471 Bytes
63ab978 6f0e822 63ab978 6f0e822 63ab978 6f0e822 63ab978 6f0e822 63ab978 6f0e822 40f2b57 63ab978 6f0e822 63ab978 6f0e822 63ab978 89f36c9 6f0e822 63ab978 6f0e822 63ab978 89f36c9 6f0e822 7e8138f 6f0e822 63ab978 6f0e822 63ab978 6f0e822 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import re
def timeformat_srt(time):
hours = time // 3600
minutes = (time - hours * 3600) // 60
seconds = time - hours * 3600 - minutes * 60
milliseconds = (time - int(time)) * 1000
return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{int(milliseconds):03d}"
def timeformat_vtt(time):
hours = time // 3600
minutes = (time - hours * 3600) // 60
seconds = time - hours * 3600 - minutes * 60
milliseconds = (time - int(time)) * 1000
return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}.{int(milliseconds):03d}"
def write_file(subtitle, output_file):
with open(output_file, 'w', encoding='utf-8') as f:
f.write(subtitle)
def get_srt(segments):
output = ""
for i, segment in enumerate(segments):
output += f"{i + 1}\n"
output += f"{timeformat_srt(segment['start'])} --> {timeformat_srt(segment['end'])}\n"
if segment['text'].startswith(' '):
segment['text'] = segment['text'][1:]
output += f"{segment['text']}\n\n"
return output
def get_vtt(segments):
output = "WebVTT\n\n"
for i, segment in enumerate(segments):
output += f"{i + 1}\n"
output += f"{timeformat_vtt(segment['start'])} --> {timeformat_vtt(segment['end'])}\n"
if segment['text'].startswith(' '):
segment['text'] = segment['text'][1:]
output += f"{segment['text']}\n\n"
return output
def get_txt(segments):
output = ""
for i, segment in enumerate(segments):
if segment['text'].startswith(' '):
segment['text'] = segment['text'][1:]
output += f"{segment['text']}\n"
return output
def parse_srt(file_path):
"""Reads SRT file and returns as dict"""
with open(file_path, 'r', encoding='utf-8') as file:
srt_data = file.read()
data = []
blocks = srt_data.split('\n\n')
for block in blocks:
if block.strip() != '':
lines = block.strip().split('\n')
index = lines[0]
timestamp = lines[1]
sentence = ' '.join(lines[2:])
data.append({
"index": index,
"timestamp": timestamp,
"sentence": sentence
})
return data
def parse_vtt(file_path):
"""Reads WebVTT file and returns as dict"""
with open(file_path, 'r', encoding='utf-8') as file:
webvtt_data = file.read()
data = []
blocks = webvtt_data.split('\n\n')
for block in blocks:
if block.strip() != '' and not block.strip().startswith("WebVTT"):
lines = block.strip().split('\n')
index = lines[0]
timestamp = lines[1]
sentence = ' '.join(lines[2:])
data.append({
"index": index,
"timestamp": timestamp,
"sentence": sentence
})
return data
def get_serialized_srt(dicts):
output = ""
for dic in dicts:
output += f'{dic["index"]}\n'
output += f'{dic["timestamp"]}\n'
output += f'{dic["sentence"]}\n\n'
return output
def get_serialized_vtt(dicts):
output = "WebVTT\n\n"
for dic in dicts:
output += f'{dic["index"]}\n'
output += f'{dic["timestamp"]}\n'
output += f'{dic["sentence"]}\n\n'
return output
def safe_filename(name):
INVALID_FILENAME_CHARS = r'[<>:"/\\|?*\x00-\x1f]'
return re.sub(INVALID_FILENAME_CHARS, '_', name)
|