冒险解谜游戏中文网 ChinaAVG
标题:
【汉化工具系列 #2】指定wave格式转换为srt格式字幕(CPU版本)
[打印本页]
作者:
shane007
时间:
2023-9-4 11:03
标题:
【汉化工具系列 #2】指定wave格式转换为srt格式字幕(CPU版本)
本帖最后由 shane007 于 2023-9-4 12:57 编辑
( t. D' f& ]1 T% s% g% G+ `
7 a& A( e Y; f7 _2 W0 X
本程序是在Whisper.net.Demo基础上修改的,可以批量识别wav文件,
0 x6 s' }, ]: V! E8 q, i
使用时需要輸入语言,源文件路径和目标文件路径
1 o. |: K# q3 S. E! P
最后是输出srt文件
/ ?1 r0 c9 J1 P# |1 M
( }" O( H1 b0 t9 @( W
代码如下
; e9 U& U2 j' G3 C, U# Q+ f
以下这句用多线程可以增速,否则很慢
0 D+ M5 C' \' [3 o8 R2 u4 f
以下这份代码,还只能使用到CPU,速度依旧比较慢,使用CPU的方法另行研究。
5 `" f7 Z& D1 W
$ G+ f1 ]5 }0 {! s1 ^
$ `3 D* H; O8 Q5 D8 u9 M4 {7 f' b- y( P
var builder = factory.CreateBuilder()
6 f8 M @5 f" ~& _
.WithLanguage(languageOption).WithSpeedUp2x().WithThreads(16);
复制代码
7 }# M1 B. d# ]$ c, e- }6 Y
. N F( ?& E7 b4 m9 B" F5 I
// Licensed under the MIT license: https://opensource.org/licenses/MIT
1 {0 S9 I: R/ ~: j' f+ ~% n
+ K1 f1 G% ]! v+ ~% F5 ~
using System;
$ R5 G3 F' n9 L' L) d
using System.Diagnostics;
+ v! a$ Q* N3 S# K
using System.IO;
* F8 D7 D0 H' ^% P; d8 T- z( k6 h
using System.Threading;
+ k- s5 n" t) t4 J9 G& J
using System.Threading.Tasks;
5 W. ]3 B# W& G/ k3 L
using CommandLine;
& P$ w0 e2 F5 W
using Whisper.net;
u" J$ A9 ~3 q/ B- ]1 A. D9 }! X+ V
using Whisper.net.Ggml;
& H) p8 o+ D7 C5 \
using Whisper.net.Wave;
& Q5 Z, N7 i% v0 j; c; l( ?& _
7 _/ u0 S, k: `
await Parser.Default.ParseArguments<Options>(args)
- I% J. S- [7 C( G' g ^: ^
.WithParsedAsync(Demo);
1 U# _" @$ d8 h! q
$ x v Q* A. D+ Z3 {8 w
async Task Demo(Options opt)
4 c$ m- i4 ^# o* s! j4 c
1 T; r/ n _9 X/ j
{
4 ^/ P8 J/ ^. x, }
if (!File.Exists(opt.ModelName))
+ ^; l. `4 |9 h. @4 G1 q9 n6 h( G
{
6 d* g. d% z% U) H2 r
Console.WriteLine([ DISCUZ_CODE_1 ]quot;Downloading Model {opt.ModelName}");
, R( `6 W! }( g; ^
using var modelStream = await WhisperGgmlDownloader.GetGgmlModelAsync(opt.ModelType);
3 a3 d5 }' k6 q. ^; g2 s
using var fileWriter = File.OpenWrite(opt.ModelName);
4 i) _# T$ z2 F8 F6 h: Q: P
await modelStream.CopyToAsync(fileWriter);
5 i( s# @8 c' T- z- F y6 o+ t
}
$ g- u; ^" j4 J0 n
0 n+ Y; e. H! M2 s: D- s! m
switch (opt.Command)
5 s g/ z4 z+ P( p
{
+ d& D7 o. R8 \. t7 t) }" \
case "lang-detect":
5 F. j2 B$ ?- t
LanguageIdentification(opt);
^. x1 U2 s2 i5 h
break;
" R8 a/ ?6 {" J
case "transcribe":
6 A) b) ~5 c9 S: j) a& u) j. v* `2 y
case "translate":
. {. Z/ F+ Z( J0 p. e, @* s
await FullDetection(opt);
8 Z; r+ G% {2 ^; l
break;
: Z( Q8 F) P7 h8 P7 |9 I, j
default:
0 e! ~- x2 ]) L* |
Console.WriteLine("Unknown command");
& p! S4 [% q9 y# M, t! g
break;
1 H5 {4 r$ K7 d$ O1 W8 F
}
* u& f: T/ F" u; b! O7 m7 p8 X
}
: h" m( |. i: P& {0 D$ j
" A' `6 J, d) f% j2 ^ o2 m
void LanguageIdentification(Options opt)
8 _' B, G1 Z/ P7 c/ ]5 I
{
) c @- l% v* c5 P
var bufferedModel = File.ReadAllBytes(opt.ModelName);
8 D4 [, }, s# }! f
- V0 n ^! Z7 _" V: j$ `7 e. I
// Same factory can be used by multiple task to create processors.
3 r. i k4 ?5 t( N$ p
using var factory = WhisperFactory.FromBuffer(bufferedModel);
% w. l1 K$ A g4 U% W7 H) R# h
+ Z- {; z# }8 z* |6 a7 H- f2 v
/* var builder = factory.CreateBuilder()
: i; u% M. y: i( ~4 f6 h
.WithLanguage(opt.Language);*/
: u, s# n9 t! M9 r2 f
var builder = factory.CreateBuilder()
) ~ c7 s( ]8 d3 b0 d
.WithLanguage("english");
( p7 ?5 k6 c* p
using var processor = builder.Build();
0 O" d4 A$ p: m' F3 M! T
" m7 V" _% l! z, Z, E
using var fileStream = File.OpenRead(opt.FileName);
1 F8 b4 T( {9 J# m7 D. @
% [5 K. \4 r: T8 B2 V
var wave = new WaveParser(fileStream);
) K, ]+ b1 @/ }
0 u+ }# V" {- W1 }# F
var samples = wave.GetAvgSamples();
: [# X7 V- k9 D$ X: |5 N# s
# C3 @4 V# d5 K) D, w8 x
var language = processor.DetectLanguage(samples, speedUp: true);
3 {) n$ [# w0 J) w. l
Console.WriteLine("Language is " + language);
# d4 l( L" P- @. Y# p( H9 z
}
* F6 s- G! N3 Q$ l
2 _, |/ Q% \; j% X4 }6 v" `
async Task FullDetection(Options opt)
% m. |- S+ B: D+ z, f" i
{
9 d& d$ |1 s' I E5 Z( ^/ [$ P7 |/ R, S$ r
// Same factory can be used by multiple task to create processors.
- ?8 t- p$ x; s. t( A
using var factory = WhisperFactory.FromPath(opt.ModelName);
) p7 R$ x9 R8 S T
7 B1 L; ~/ K/ g4 V) _
// var builder = factory.CreateBuilder().WithLanguage(opt.Language);
, s% O; i4 _9 L/ I( |- [* j
Console.WriteLine("请输入语言选项(例如:english,chinese, japanese等):");
" C- \- z" j1 x$ g- ?% ~
string languageOption = Console.ReadLine();
2 N) {; ?" ]; Z6 z$ f3 j3 D
var builder = factory.CreateBuilder()
) a4 B0 T9 q& k" k: F! }% u3 @
.WithLanguage(languageOption).WithSpeedUp2x().WithThreads(16);
; z; b$ f* V2 r' d
" V& C" t+ u9 ^0 X6 x% X! y
if (opt.Command == "translate")
4 l: r, G- a d
{
3 v% h. S' S6 m. ~% x8 W% u5 ^! U
builder.WithTranslate();
2 ^' G" s+ V0 |) E
}
" `4 U5 Y+ | v/ D/ Y* X, [
% ]7 V: a" Q. j+ W! Z+ i
WhisperProcessor processor = builder.Build();
' U6 a3 o! B& Z* u* d, K9 \
8 T$ {. V; C8 g# ]# n( d7 k6 n0 D
Console.WriteLine("请输入wave源文件目录:");
U8 E: G7 L8 f; W9 |, [
string sourceDirectory = Console.ReadLine();
9 I c8 `- }1 E d( k
% K: A0 t4 j* i: B5 p
Console.WriteLine("请输入目标文件目录:");
# P: X1 i# C9 c6 ]2 d
string targetDirectory = Console.ReadLine();
, T+ K& |+ u2 q- p4 u0 w5 F& p
( z; x2 M l3 I" w+ z$ c; k8 t
if (!Directory.Exists(sourceDirectory) || !Directory.Exists(targetDirectory))
3 I8 I9 }. \) m
{
* _2 t7 U8 ?$ X* d0 l2 U
Console.WriteLine("目录不存在,请检查输入的目录路径。");
+ u8 Q8 p- o$ m# O3 j9 J
return;
; {& D+ @4 L& V3 C
}
! Q4 o$ f1 w9 `) S7 E& L
& v5 H" A% c Q$ L8 }
await ProcessFilesAsync(processor ,sourceDirectory, targetDirectory);
) o/ ^$ b$ k6 v- Q' J
8 H! e- ^( @' i5 R, ^$ V
Console.WriteLine("处理完成!");
* |) Y7 S" o) i% T% R* c
9 J5 s7 R8 l6 A; W8 ]8 o3 V
}
/ S0 Y9 T, O- v7 a+ w% p' Q# }
static async Task ProcessFilesAsync(WhisperProcessor processor,
6 E/ ~- H1 J6 k: g% o4 o% |, {# j
string sourceDirectory, string targetDirectory)
* Z @! O3 }/ V$ c: q v# b: N
{
, Q# U9 E e9 h+ r( g
var files = Directory.GetFiles(sourceDirectory, "*.wav", SearchOption.AllDirectories);
8 z$ i$ H/ W+ ~8 y1 a3 W2 Z% Y
9 y& @' {7 \1 y6 x5 g" Q
foreach (var sourceFilePath in files)
1 \# {6 f, F, t; P8 M
{
. J Z4 b" Y7 l |
string relativePath = Path.GetRelativePath(sourceDirectory, sourceFilePath);
( u, J7 _) X4 Z
string destinationFilePath = Path.Combine(targetDirectory, relativePath);
' u. b$ M/ V$ F& d1 F& G
destinationFilePath = Path.ChangeExtension(destinationFilePath, ".srt");
2 }' ] I" P5 ]6 H8 t
/ M! ~. D9 i& i( J8 l$ R
Directory.CreateDirectory(Path.GetDirectoryName(destinationFilePath));
( {& x( j. ]+ A* T
! i8 O' V h. E2 S2 @/ j/ G! a, v
: k P% ~% h. B# k4 X
if (!File.Exists(destinationFilePath))
% j! Q: s; q& p( K ^% f- \( Q" q+ K
{
1 u) R$ |) B! ~" I7 a+ |5 X9 Y
Console.WriteLine([ DISCUZ_CODE_1 ]quot;正在处理文件:{sourceFilePath}");
) B9 @! _% N7 m8 s
2 L3 I) `4 h" Z5 z# K
using var fileStream = File.OpenRead(sourceFilePath);
8 Y8 n; U' e2 F" A3 P, {( c4 W
var segmentIndex = 1;
, b, r, l- R! S( `
using var writer = new StreamWriter(destinationFilePath); // 创建用于写入srt文件的StreamWriter
, \1 U0 v( a8 ?. L( r
var startTime = DateTime.Now; // 记录开始时间
) x9 F) B! a: q( S) |- n) c
4 p. |7 f+ }, i
await foreach (var segment in processor.ProcessAsync(fileStream, CancellationToken.None))
" b6 Z( N) s$ c, F" D6 M. T4 F$ s
{
O% P7 L4 X/ l2 G! k7 L5 A
Console.WriteLine([ DISCUZ_CODE_1 ]quot;{segmentIndex}");
# O% J0 ~9 ~1 w- j$ y+ H; ^; B d& P
Console.WriteLine([ DISCUZ_CODE_1 ]quot;{segment.Start:hh\\:mm\\:ss\\,fff} --> {segment.End:hh\\:mm\\:ss\\,fff}");
( {7 l x' J. I9 Z; {
Console.WriteLine(segment.Text);
0 f* K( ~0 {# s, M/ j' R
Console.WriteLine();
e8 j+ B/ W8 ?/ H2 J) ?/ v
2 z( [% k! p4 S- t' ^/ q
// 将srt内容写入文件
E$ U" L/ c3 R( N: a. L! `
await writer.WriteLineAsync([ DISCUZ_CODE_1 ]quot;{segmentIndex}");
, D& c( }1 B. c( r* o7 v1 U
await writer.WriteLineAsync([ DISCUZ_CODE_1 ]quot;{segment.Start:hh\\:mm\\:ss\\,fff} --> {segment.End:hh\\:mm\\:ss\\,fff}");
! H0 P2 ^( b" R6 n" B" ^$ F
await writer.WriteLineAsync(segment.Text);
+ K% }# [# C+ \( N" w5 Q# X
await writer.WriteLineAsync();
( G% ]: K7 |1 Q$ C
( s4 L$ w" U5 v/ M; I/ D% x9 @* n5 q
writer.Flush(); // 立即保存srt文件
7 m5 H4 E( `" r& X5 z, d
0 e6 n7 t: x) p
segmentIndex++;
$ x3 o& f3 o! h% s! ]! E! g H" R; _
}
/ f! L# [2 W9 y7 ~' Y1 \% R
0 y+ G& Z0 d q/ U D
var endTime = DateTime.Now; // 记录结束时间
X) m2 ?! \0 K
var elapsedMinutes = (endTime - startTime).TotalMinutes;
1 |/ b9 B- D+ P* [. A2 z# u4 n: C
Console.WriteLine([ DISCUZ_CODE_1 ]quot;已生成srt文件:{destinationFilePath}");
# C' Q6 }8 `! D- ^
Console.WriteLine([ DISCUZ_CODE_1 ]quot;生成耗费时间:{elapsedMinutes} 分钟");
# ~% z8 y. g& p
}
' f$ Q3 _6 m% \( c& e+ n: a/ O8 G
else {
6 h7 X2 Q" ]$ |( s) ]* Y
Console.WriteLine([ DISCUZ_CODE_1 ]quot;srt文件已经存在:{destinationFilePath}");
9 {; H4 x3 y P$ i
}
4 X4 \# |' u6 h X5 M4 |& }
}
- s: `: S( k6 J9 D& O. E
}
7 U* M* r2 N% _1 @
public class Options
$ M& [ L& z* F' D3 {9 N# w
{
- L2 Z' }0 a, I# T& p; B0 U
[Option('t', "command", Required = false, HelpText = "Command to run (lang-detect, transcribe or translate)", Default = "transcribe")]
# x; l2 E) I5 Y0 ?
public string Command { get; set; }
, J$ E: h; S$ j" }# U- y
0 g* x9 J5 Y# w( z% d- P2 c1 a+ |
[Option('f', "file", Required = false, HelpText = "File to process", Default = "test.mp3")]
0 c/ w6 a" _$ Y$ e+ s
public string FileName { get; set; }
4 s( w# P$ q9 m/ s3 @
* Z) h, q- |; ~+ u1 G- [
[Option('l', "lang", Required = false, HelpText = "Language", Default = "auto")]
# y9 v2 R7 X( ~( f
public string Language { get; set; }
/ g& O% x6 B" G- r. _
* u& u' r3 E7 ]
[Option('m', "modelFile", Required = false, HelpText = "Model to use (filename", Default = "ggml-large.bin")]
2 ]$ h) T2 v8 ~2 M$ Y& o9 x4 c' P* K
public string ModelName { get; set; }
+ `% A6 p+ F( m$ i9 Y, K
& x+ h" J: @1 m ^( |: ?
[Option('g', "ggml", Required = false, HelpText = "Ggml Model type to download (if not exists)", Default = GgmlType.Base)]
3 `" ~7 [; |4 a8 V5 W
public GgmlType ModelType { get; set; }
/ H% q, A5 h O) n/ l {+ X7 M5 v
}
. Y/ Q/ A' V" g. L: w$ z
复制代码
4 Q* ?8 [# F" k/ \$ ]9 B+ R
6 T* [( ?3 S4 {
* B D# E& q' Q" T w: w( T
& ^8 d/ t6 q- \/ p
欢迎光临 冒险解谜游戏中文网 ChinaAVG (https://chinaavg.com/)
Powered by Discuz! X3.2