本帖最后由 shane007 于 2023-9-4 12:57 编辑
; f6 C4 D. @. j& s b+ W
0 e/ R" `1 F2 T" {2 U- b 本程序是在Whisper.net.Demo基础上修改的,可以批量识别wav文件,4 h& B) |# A( ~/ J: Z5 T$ l
使用时需要輸入语言,源文件路径和目标文件路径3 K1 Y: X5 R* G' e
最后是输出srt文件+ B2 d1 N' q: e1 H+ m& V
8 R$ ~6 j" b4 x1 V: r8 @代码如下
9 c4 G$ z$ C! A以下这句用多线程可以增速,否则很慢
. c! ^0 C2 A4 ]2 ?以下这份代码,还只能使用到CPU,速度依旧比较慢,使用CPU的方法另行研究。
y; _' J, a6 s0 E, Z% S& w& _: ^
- M3 e+ E+ }0 Q* h# n: n. K6 b# y% u+ U9 h3 ]& l2 p
- var builder = factory.CreateBuilder(): F2 L% N8 |: Y, e) b; @
- .WithLanguage(languageOption).WithSpeedUp2x().WithThreads(16);
复制代码 " d4 Z$ C' j4 [5 V( f
& `# N( x4 ^+ C* s/ ?8 b p; @
- // Licensed under the MIT license: https://opensource.org/licenses/MIT
2 x% E, x# m# e1 h' C- G
% i. g4 f2 U2 h% i5 J# f- using System;' Z) V& p( \- M% W+ f; y
- using System.Diagnostics;. r7 r9 @' U8 t1 K
- using System.IO;
, Z" i! F4 t$ ? Y5 ]1 @ y - using System.Threading;
2 v' j) j& h0 [( A7 _ - using System.Threading.Tasks;+ V, b9 p) u, A: h! j
- using CommandLine;
9 U! o3 `, \- F# w3 L* q6 i2 M1 ? - using Whisper.net;! C+ i6 G# k& a
- using Whisper.net.Ggml;9 [( j: w N; Q, V* U P
- using Whisper.net.Wave;
/ @1 N7 c: C( ?9 F6 o - ! P" ?; J# Y5 h+ k' l6 `5 ?
- await Parser.Default.ParseArguments<Options>(args)
/ V2 _* @9 \% @ C$ V6 [/ p* p - .WithParsedAsync(Demo);
/ c; N1 l2 \- H: D/ }+ f
1 u$ Q& B! B' n- async Task Demo(Options opt)3 Y/ d/ H7 W. P
( M7 j! K6 V# L2 C o# t- {
8 ~( y2 c* p6 Q4 _* n7 L& [ - if (!File.Exists(opt.ModelName)): M, B1 e4 \2 P1 L. \4 X
- {. I) Y, z* c% y# o
- Console.WriteLine([ DISCUZ_CODE_1 ]quot;Downloading Model {opt.ModelName}");9 e4 H6 F. R; z# u8 C
- using var modelStream = await WhisperGgmlDownloader.GetGgmlModelAsync(opt.ModelType);
& B% B, {: y2 W/ X m8 x - using var fileWriter = File.OpenWrite(opt.ModelName);2 J' `- W5 W, M% l4 T( d; g
- await modelStream.CopyToAsync(fileWriter);
) a8 X) |' X8 ^! h, @0 l - }
& M5 Q& ]- y3 g; i. S2 C0 m
9 m& S0 u& m" @0 t7 g3 K$ r- switch (opt.Command)
% D" d' i: f# T; ^' i5 v+ r h - {
/ i- B# y& A, r! O0 u - case "lang-detect":: I% ^! ?3 x o1 }" d9 r" R: `
- LanguageIdentification(opt);, d3 S& q: ?9 e9 J
- break;0 c* y, h7 A4 } t4 Z
- case "transcribe":
/ Z% k3 v& ?( I% t5 ^5 s - case "translate":$ F E' b5 M# \! y2 q+ `' O# S
- await FullDetection(opt);- _6 Z. q; ~/ w$ R
- break;
5 ]0 s8 I1 Z3 H! z0 W - default:
) U1 q% C7 y. a: w4 J - Console.WriteLine("Unknown command");
) F4 m+ [0 r) r; C$ B/ l% i3 B7 i6 V - break;
! l H4 T% @0 E4 F0 k, s - }: y4 U# C" U' m; R Z* b
- }
* D* m$ u! E( K - 9 r# g- A* k' Y* n
- void LanguageIdentification(Options opt): }% k, ]3 C" @, y: k
- {3 B9 ^/ m7 X2 Y# }7 k. i' B
- var bufferedModel = File.ReadAllBytes(opt.ModelName);+ Z9 O" k1 P& Z& ^$ e( a3 c: Y5 I
- / h4 o0 w5 W. k: r$ z2 Z
- // Same factory can be used by multiple task to create processors.# n: `' E: N: ?4 @+ c: i+ [
- using var factory = WhisperFactory.FromBuffer(bufferedModel);
- M+ G& R" Y* d1 t0 w
* H I7 e, W4 f) s" Q- /* var builder = factory.CreateBuilder()
' d0 Y0 D- D3 F4 r! f' y - .WithLanguage(opt.Language);*/
. H, U1 q4 ~0 y* F1 D3 H- R$ s - var builder = factory.CreateBuilder()
, u2 H/ j4 q, c( X6 y% [- G* ]. I. i7 H6 q - .WithLanguage("english");
! b4 h) C- ~" E - using var processor = builder.Build();
) P4 H L8 }" \# U
0 V) b1 l, {( w* L q- using var fileStream = File.OpenRead(opt.FileName);3 C) k0 F- m/ L0 X& A0 m. S% _+ B
- 8 u! t3 S0 m2 k3 ~' Y
- var wave = new WaveParser(fileStream);& f6 m( l9 I& s$ u. S- B3 O
! g3 {/ l' p+ I- var samples = wave.GetAvgSamples();
' p' Y- P" W; B0 ^ - + p: S; T, t# B/ Q/ s8 I
- var language = processor.DetectLanguage(samples, speedUp: true);
7 q* N0 n; W* F- w# O; Z- K - Console.WriteLine("Language is " + language);
8 g1 J! T1 r: h - }
. ~( W9 m n% R* ^, C0 \' G - $ E) T6 o" d6 `; Y
- async Task FullDetection(Options opt); p" R9 k- w/ f1 `! I$ T7 ]# D
- {
6 e1 D) S$ `0 \ - // Same factory can be used by multiple task to create processors.
/ p _' |) s5 l8 O - using var factory = WhisperFactory.FromPath(opt.ModelName);
. G. Y4 ~- \7 l5 y' V" Q
/ F6 f. A O5 z, A: m- j9 ?. j- // var builder = factory.CreateBuilder().WithLanguage(opt.Language);$ |5 s0 K2 @, H: r
- Console.WriteLine("请输入语言选项(例如:english,chinese, japanese等):");
/ @3 L" W4 M* t( y - string languageOption = Console.ReadLine();
& F+ I I- M& I1 Q: N% \ - var builder = factory.CreateBuilder()
7 S4 X7 E5 p) v# d8 x( k3 F0 P - .WithLanguage(languageOption).WithSpeedUp2x().WithThreads(16);1 k4 X9 `+ A5 L) N
- ; n/ U2 s. v6 D7 u
- if (opt.Command == "translate")
' { h9 E6 I& B7 \0 C3 |6 H2 w - {
/ T6 Y: t$ @" ~; i/ ` - builder.WithTranslate();- r1 J" d B1 Y
- }
& ~7 [& ^; S8 [9 t. ]2 r6 `* B, x) l* ]
1 A5 r5 v$ e7 E9 W3 z$ q' M4 D- WhisperProcessor processor = builder.Build();
: I* D S f0 S% S, h% L -
9 A+ r! w% a' ^ - Console.WriteLine("请输入wave源文件目录:");6 h6 A9 ?& Q7 A* K1 V
- string sourceDirectory = Console.ReadLine();
5 D2 V# y7 u1 i
% M9 h" C h6 r3 X$ S F- Console.WriteLine("请输入目标文件目录:");
% P1 z: K9 c X" \5 L& V: g - string targetDirectory = Console.ReadLine();* S- u S& ~ u8 U0 n. U- a
" L' o! r: I- T. G- if (!Directory.Exists(sourceDirectory) || !Directory.Exists(targetDirectory))
- `7 Q0 ^- X6 c - {* \' ~* u; r. E' t
- Console.WriteLine("目录不存在,请检查输入的目录路径。");
# C% r4 T: [3 H' k1 L - return;
9 t) g% B0 b+ O/ P2 J - }
% t) j! T/ Z" O
0 l1 w1 D& j9 s- await ProcessFilesAsync(processor ,sourceDirectory, targetDirectory);
4 E. ~9 \1 G. z! M/ x4 a9 b
. T# j+ u; x/ @, ^- Console.WriteLine("处理完成!");: d; g0 N1 E; Z Q4 L% K
- 3 M3 \: f0 _: Y
- }4 H9 O/ E* r- } @1 p
- static async Task ProcessFilesAsync(WhisperProcessor processor, 1 @9 V! [* N$ H' ?" h% j
- string sourceDirectory, string targetDirectory)
# X3 t9 W( Z7 V$ z - {
# d. i2 \+ V: N- k' ^# s( c - var files = Directory.GetFiles(sourceDirectory, "*.wav", SearchOption.AllDirectories);/ z. I# f2 M; Q
- ' O" A: d0 U3 F t
- foreach (var sourceFilePath in files); q: n3 |% o4 G4 N1 R) t, y
- {! j! S- t' l0 m8 R- s2 T
- string relativePath = Path.GetRelativePath(sourceDirectory, sourceFilePath);
2 J4 ^& R4 ^# g& E/ i - string destinationFilePath = Path.Combine(targetDirectory, relativePath);
- m5 _4 D+ I9 `# n8 D8 s - destinationFilePath = Path.ChangeExtension(destinationFilePath, ".srt");, l* M' o `: I5 s& K/ m
( o! E5 m" j* w- Directory.CreateDirectory(Path.GetDirectoryName(destinationFilePath));" J, n$ O5 g+ ^8 y" B" W# l3 l
9 v" K" b* l" L. V/ a* N F- E! P
! ]9 v2 M0 W/ A- if (!File.Exists(destinationFilePath))& f- _/ @% ?) M z: {; ~/ {7 H
- {8 y% P5 u- O* {% C" G* M9 X# R' v4 A
- Console.WriteLine([ DISCUZ_CODE_1 ]quot;正在处理文件:{sourceFilePath}");* N* m' y+ N0 O3 N: \6 E
- ' R2 [4 D- m4 Y( P5 M+ r* ?
- using var fileStream = File.OpenRead(sourceFilePath);
+ K1 R6 Q3 X! m. j+ Q' o) B - var segmentIndex = 1;/ Q! O" [+ b# x% o: ^) v
- using var writer = new StreamWriter(destinationFilePath); // 创建用于写入srt文件的StreamWriter
! X1 V+ N* x' B& g0 c9 m - var startTime = DateTime.Now; // 记录开始时间
% T( D. r1 P3 [
! H5 ~3 j' r* ^/ C$ C- await foreach (var segment in processor.ProcessAsync(fileStream, CancellationToken.None))7 b# [- f3 G4 O G
- {" ?2 @; d! D7 t) s( k, W
- Console.WriteLine([ DISCUZ_CODE_1 ]quot;{segmentIndex}");
( Z1 S( Y$ W$ }: f; \ - Console.WriteLine([ DISCUZ_CODE_1 ]quot;{segment.Start:hh\\:mm\\:ss\\,fff} --> {segment.End:hh\\:mm\\:ss\\,fff}");
. _2 O7 m( u% A5 E - Console.WriteLine(segment.Text); ]# l8 b" b# ]
- Console.WriteLine();, R2 Y: x' R9 ]: h6 {
- 0 e; z9 R1 f1 Y
- // 将srt内容写入文件
/ B9 A* r: I& D - await writer.WriteLineAsync([ DISCUZ_CODE_1 ]quot;{segmentIndex}");
# c: G4 {' h$ v6 b& j5 }* | - await writer.WriteLineAsync([ DISCUZ_CODE_1 ]quot;{segment.Start:hh\\:mm\\:ss\\,fff} --> {segment.End:hh\\:mm\\:ss\\,fff}");
$ Z; c2 L$ p' O) o6 L N2 ^ - await writer.WriteLineAsync(segment.Text);5 f0 J! @7 J7 d" |7 j
- await writer.WriteLineAsync();
* q: u4 [9 S) F
" L# |1 `( J; y# R- writer.Flush(); // 立即保存srt文件1 m, H- L! }% B4 s+ x4 G
3 G M- q0 V& i3 w5 E- segmentIndex++;
- j1 ~" ?( S9 V+ C0 {* r - }: I/ ~4 @, U/ b
- " A) z7 B6 ` z, \9 I0 B
- var endTime = DateTime.Now; // 记录结束时间
! m; Q% m( ]# Y5 g4 B& ~/ k( e' P) e - var elapsedMinutes = (endTime - startTime).TotalMinutes;
2 ~3 b0 y* o1 c - Console.WriteLine([ DISCUZ_CODE_1 ]quot;已生成srt文件:{destinationFilePath}");
* n9 b& y0 i, L+ w3 ?* j2 p5 p - Console.WriteLine([ DISCUZ_CODE_1 ]quot;生成耗费时间:{elapsedMinutes} 分钟");
) q; n* T8 B( K) @( s0 D- w - }
1 U: f, w* y7 o0 V, W5 h( ? y - else {! Q, @. i( {" y8 y, y) F& G/ I
- Console.WriteLine([ DISCUZ_CODE_1 ]quot;srt文件已经存在:{destinationFilePath}");
, t% f5 }, ^1 A1 `3 w! D Y9 Q; o3 V - }& I: H: f# A. h% k
- }1 L J1 Q/ h' @$ h' j, M8 H
- }. Q8 m; Q: y6 T5 @, J1 {+ M1 O8 ~
- public class Options
. y5 S' c3 r) I a - {9 v' F+ _- X0 T" f6 i' n
- [Option('t', "command", Required = false, HelpText = "Command to run (lang-detect, transcribe or translate)", Default = "transcribe")]
* Z2 ~, P0 k( A I8 M, r* | - public string Command { get; set; }
. S# P) e( E) y2 R. Q9 e1 \- Z
) ]4 T1 Q$ T: a; w' G- [Option('f', "file", Required = false, HelpText = "File to process", Default = "test.mp3")]# ]$ ]: n/ f7 ^' Z6 F4 g2 i8 h
- public string FileName { get; set; }8 x4 b" ` T) D
- . O" @% j: H2 ?) V* P# n7 F7 _
- [Option('l', "lang", Required = false, HelpText = "Language", Default = "auto")]0 e2 W2 S' A, K( j% t
- public string Language { get; set; }+ j9 u9 K& l- X
1 y; @4 z6 m# x, g8 j) I6 A- [Option('m', "modelFile", Required = false, HelpText = "Model to use (filename", Default = "ggml-large.bin")]
2 ^. S s" y+ M% n' ?% D - public string ModelName { get; set; }
" p0 r! m, S4 P- V& W
2 J U+ @; u T4 }3 @& K+ p- [Option('g', "ggml", Required = false, HelpText = "Ggml Model type to download (if not exists)", Default = GgmlType.Base)]& ?; b9 K: I9 u" O
- public GgmlType ModelType { get; set; }9 G- X6 k& j9 h" g/ l
- }0 Z1 _' @. f+ g0 L3 Z6 ?
复制代码
0 z* X0 d9 x2 e& n3 l: H
( ^' i! R4 G" @6 y8 c' I+ @6 g4 V* K: \5 s& o( K' S
N4 i' ^; d# u" @" C |