Flutter Android не преобразует аудио речь в текст во время записиAndroid

Форум для тех, кто программирует под Android
Ответить
Anonymous
 Flutter Android не преобразует аудио речь в текст во время записи

Сообщение Anonymous »

Я создаю приложение, в котором мне нужно преобразовать речь в текст во время записи видео, как приложение телесуфли, и в iOS оно работает так, как я хотел, но в Android преобразование речи в текст не работает, и я исследовал, что в Android при записи видео он не позволяет включать микрофон для прослушивания, и преобразование речи в текст никогда не происходит, операционная система Android это запретила, но я действительно хотел работать с этим в Android

Пожалуйста, если кто-нибудь может мне помочь в этом

class _CameraState extends State with WidgetsBindingObserver {
//MARK:Variables
CameraController? cameraController;

late Future cameraValue = Future.value();
bool isFlashOn = false;
bool isRearCamera = true;
bool isRecording = false;

List videosList = [];

ScrollMode selectedMode = ScrollMode.autoScroll;
double sliderValue = 50;
bool isExpanded = false;
double containerWidth = 300;
double containerHeight = 180;

double fontSize = 30;
String fontFamily = "Lexend-Regular";
double leftMargin = 16;
double rightMargin = 16;
String content = "";
MirrorMode mirrorMode = MirrorMode.none;

double savedContainerWidth = 300;
double savedContainerHeight = 180;
double savedSliderValue = 50;
ScrollMode savedScrollMode = ScrollMode.autoScroll;

bool isDragging = false;

double minWidth = 200;
double minHeight = 120;

double maxWidth = 0;
double maxHeight = 0;

int selectedResolution = 1;
int selectedTab = 0;
int selectedFrameRate = 1;

late List wrappedLines = [];
String spokenWord = "";
int currentLine = 0;
int currentWordIndex = 0;
bool isListening = false;
final ScrollController textScrollController = ScrollController();

late stt.SpeechToText speech;

double _actualLineHeight = 0;
int _visibleLinesCount = 0;
final GlobalKey _textContainerKey = GlobalKey();
bool _isMetricsCalculated = false;

final Map _keyMap = {
ScrollMode.autoScroll: GlobalKey(),
ScrollMode.fixedScroll: GlobalKey(),
ScrollMode.timedScroll: GlobalKey(),
ScrollMode.wpm: GlobalKey(),
};

String selectedMic = "phone";
final GlobalKey micKey = GlobalKey();
Timer? fixedScrollTimer;

int timedMinutes = 0;
int timedSeconds = 0;
Timer? timedScrollTimer;
double timedPixelsPerSecond = 0;

int wpmValue = 120;
Timer? wpmScrollTimer;

late Timer recordTimer;
int recordSeconds = 0;

String formatTime(int seconds) {
final m = (seconds ~/ 60).toString().padLeft(2, '0');
final s = (seconds % 60).toString().padLeft(2, '0');
return "$m:$s";
}

double micLevel = 0.0;
NoiseMeter? noiseMeter;
StreamSubscription? noiseSubscription;

bool _isRestartingSpeech = false;
bool _isFirstRecording = true;
bool _userIsScrolling = false;
bool _isSwitchingCamera = false;

int _remainingSeconds = 0;
Timer? _countdownTimer;

late int _frontCameraIndex;
late int _rearCameraIndex;
bool _isStoppingRecording = false;

bool _isCameraTransitioning = false;

bool _segmentAlreadySaved = false;

String _lastSpokenSnapshot = "";
int _lastGlobalMatchIndex = 0;

Timer? _speechWatchdog;
DateTime _lastSpeechTime = DateTime.now();

//MARK:Custom Methods

void _resolveCameraIndexes() {
for (int i = 0; i < widget.cameras.length; i++) {
final lens = widget.cameras.lensDirection;
if (lens == CameraLensDirection.front) {
_frontCameraIndex = i;
} else if (lens == CameraLensDirection.back) {
_rearCameraIndex = i;
}
}
}

Future _ensureMicPermission() async {
final status = await Permission.microphone.request();

if (!status.isGranted) {
debugPrint("❌ Microphone permission denied");
} else {
debugPrint("✅ Microphone permission granted");
}
}

Future _generateVideoThumbnail(File videoFile) async {
final dir = await getTemporaryDirectory();
final thumbPath =
"${dir.path}/thumb_${DateTime.now().millisecondsSinceEpoch}.jpg";

await FFmpegKit.execute(
'-y -i "${videoFile.path}" -ss 00:00:00.100 -vframes 1 "$thumbPath"',
);

return File(thumbPath);
}

Future startCameraFullConfig(
int cameraIndex,
int resolutionIndex,
int frameRateIndex,
) async {
try {
final resolution = getResolutionFromIndex(resolutionIndex);
final fps = getFPSFromIndex(frameRateIndex);

final oldController = cameraController;
cameraController = null;

if (mounted) setState(() {});

await oldController?.dispose();

final controller = CameraController(
widget.cameras[cameraIndex],
resolution,
enableAudio: true,
imageFormatGroup: ImageFormatGroup.yuv420,
fps: fps,
);

cameraController = controller;

await controller.initialize();

if (!mounted) return;
setState(() {});
} catch (e) {
debugPrint("Camera switch error: $e");
}
}

void startRecording() async {
if (!cameraController!.value.isInitialized) return;

try {
await platform.invokeMethod("prepareForRecording");
} catch (e) {
debugPrint("prepareForRecording failed: $e");
}

if (isFlashOn) {
await cameraController!.setFlashMode(FlashMode.torch);
}

await cameraController!.startVideoRecording();

setState(() {
isRecording = true;

if (_isFirstRecording) {
currentLine = 0;
currentWordIndex = 0;
_lastSpokenSnapshot = "";
_lastGlobalMatchIndex = 0;
textScrollController.jumpTo(0);
_isFirstRecording = false;
}
});

if (selectedMode == ScrollMode.autoScroll) {
await _startAutoScrollSpeechSafely();
}

if (selectedMode == ScrollMode.fixedScroll) {
startFixedScroll();
}

if (selectedMode == ScrollMode.timedScroll) {
startTimedScrolling();
}

if (selectedMode == ScrollMode.wpm) {
startWpmScroll();
}

recordSeconds = 0;
recordTimer = Timer.periodic(Duration(seconds: 1), (_) {
setState(() => recordSeconds++);
});
}

Future stopRecording() async {
if (_isStoppingRecording) return;
_isStoppingRecording = true;

try {
final controller = cameraController;

fixedScrollTimer?.cancel();
timedScrollTimer?.cancel();
wpmScrollTimer?.cancel();
_countdownTimer?.cancel();
recordTimer.cancel();
_speechWatchdog?.cancel();
_speechWatchdog = null;

if (selectedMode == ScrollMode.autoScroll) {
try {
await speech.stop();
} catch (_) {}
isListening = false;
}

XFile? file;
if (!_segmentAlreadySaved &&
controller != null &&
controller.value.isRecordingVideo) {
file = await controller.stopVideoRecording();
} else {
debugPrint(
"⚠️ stopRecording: segment already saved or no active recording",
);
}

if (controller?.value.flashMode == FlashMode.torch) {
await controller!.setFlashMode(FlashMode.off);
}

if (file != null) {
final videoFile = await _storeTempClip(file);
final thumbFile = await _generateVideoThumbnail(videoFile);
videosList.add(ClipItem(video: videoFile, thumbnail: thumbFile));
}

_isRestartingSpeech = false;
_segmentAlreadySaved = false;

if (mounted) {
setState(() {
isRecording = false;
});
}
} catch (e) {
debugPrint("❌ stopRecording error: $e");
} finally {
_isStoppingRecording = false;
}
}

Future enableBluetoothMic() async {
if (isRecording) return;
await platform.invokeMethod("enableBluetooth");
}

void startMicLevelListener() {
try {
noiseMeter ??= NoiseMeter();

noiseSubscription = noiseMeter!.noise.listen(
(NoiseReading reading) {
double level = reading.meanDecibel;

double normalized = ((level - 30) / 60).clamp(0.0, 1.0);

setState(() {
micLevel = normalized;
});
},
onError: (err) {
print("Mic error: $err");
},
cancelOnError: true,
);
} catch (e) {
print("NoiseMeter start failed: $e");
}
}

Future _startAutoScrollSpeechSafely() async {
if (!isRecording) return;

try {
await platform.invokeMethod("prepareSpeechSession");
} catch (e) {
debugPrint("prepareSpeechSession failed: $e");
}

await Future.delayed(const Duration(milliseconds: 250));

if (!isRecording) return;

startSpeechListening();
}

void stopMicLevelListener() {
noiseSubscription?.cancel();
noiseSubscription = null;

setState(() => micLevel = 0.0);
}

Future _storeTempClip(XFile video) async {
final dir = await getApplicationDocumentsDirectory();
final recordingsDir = Directory("${dir.path}/temp_clips");

if (!await recordingsDir.exists()) {
await recordingsDir.create(recursive: true);
}

final file = File(
"${recordingsDir.path}/${DateTime.now().millisecondsSinceEpoch}.mp4",
);
await file.writeAsBytes(await video.readAsBytes());

return file;
}

Future _mergeVideos() async {
final supportDir = await getApplicationSupportDirectory();
final outputDir = Directory("${supportDir.path}/merged_temp");

if (!outputDir.existsSync()) {
outputDir.createSync(recursive: true);
}

final output = File(
"${outputDir.path}/merged_${DateTime.now().millisecondsSinceEpoch}.mp4",
);

final listFile = File("${outputDir.path}/input_list.txt");

final content = videosList
.map((f) => "file '${f.video.path.replaceAll("'", "\\'")}'")
.join("\n");

await listFile.writeAsString(content);

final command =
'-y -safe 0 -f concat -i "${listFile.path}" '
'-map 0:v:0 -map 0:a:0 '
'-af "loudnorm=I=-14:LRA=11:TP=-1.0" '
'-c:v copy -c:a aac -b:a 192k '
'"${output.path}"';

print("⚙️ Running FFmpeg command:");
print(command);

final session = await FFmpegKit.execute(command);

final rc = await session.getReturnCode();
print("FFmpeg return code: $rc");

final logs = await session.getAllLogs();
for (var log in logs) {
print("FFmpegLog: ${log.getMessage()}");
}

final failStack = await session.getFailStackTrace();
if (failStack != null) {
print("FFmpegFailStack: $failStack");
}

print("Merged output exists? ${output.existsSync()} → ${output.path}");

return output;
}

Future saveVideo(XFile video) async {
final filename = "${DateTime.now().millisecondsSinceEpoch}.mp4";
final bytes = await video.readAsBytes();

if (Platform.isAndroid) {
final downloadPath = await ExternalPath.getExternalStoragePublicDirectory(
ExternalPath.DIRECTORY_DOWNLOAD,
);

final file = File("$downloadPath/$filename");
await file.writeAsBytes(bytes);
return file;
} else {
final dir = await getApplicationDocumentsDirectory();
final file = File("${dir.path}/$filename");
await file.writeAsBytes(bytes);

await Gal.putVideo(file.path);

return file;
}
}

Future _loadSavedData() async {
final prefs = await SharedPreferences.getInstance();

setState(() {
fontSize = prefs.getDouble('selectedFontSize') ?? 26;
fontFamily = prefs.getString('selected_font') ?? "Lexend-Regular";
content = widget.scriptText;

leftMargin = prefs.getDouble('left_margin') ?? 16;
rightMargin = prefs.getDouble('right_margin') ?? 16;

sliderValue = prefs.getDouble('slider_value') ?? 50;

String scroll = prefs.getString('scroll_mode') ?? "autoScroll";
switch (scroll) {
case "fixedScroll":
selectedMode = ScrollMode.fixedScroll;
break;
case "timedScroll":
selectedMode = ScrollMode.timedScroll;
break;
case "wpm":
selectedMode = ScrollMode.wpm;
break;
default:
selectedMode = ScrollMode.autoScroll;
}

timedMinutes = prefs.getInt('reader_timed_minutes') ?? 0;
timedSeconds = prefs.getInt('reader_timed_seconds') ?? 0;

wpmValue = prefs.getInt('reader_wpm') ?? 120;

bool mh =
prefs.getBool("mirror_horizontal_${widget.scriptName}") ?? false;
bool mv = prefs.getBool("mirror_vertical_${widget.scriptName}") ?? false;

if (mh)
mirrorMode = MirrorMode.horizontal;
else if (mv)
mirrorMode = MirrorMode.vertical;
else
mirrorMode = MirrorMode.none;

_remainingSeconds = (timedMinutes * 60) + timedSeconds;

containerWidth = prefs.getDouble('overlay_width') ?? 300;
containerHeight = prefs.getDouble('overlay_height') ?? 180;
});

_prepareWrappedLines();
_buildGlobalWordIndex();

print("✅ Wrapped lines: ${wrappedLines.length}");
print("✅ Global words: ${_globalWords.length}");
}

Future _saveTeleprompterPrefs() async {
final prefs = await SharedPreferences.getInstance();

prefs.setString("scroll_mode", selectedMode.toString().split('.').last);
prefs.setDouble("slider_value", sliderValue);
prefs.setInt("reader_wpm", wpmValue);
prefs.setInt("reader_timed_minutes", timedMinutes);
prefs.setInt("reader_timed_seconds", timedSeconds);

prefs.setBool(
"mirror_horizontal_${widget.scriptName}",
mirrorMode == MirrorMode.horizontal,
);
prefs.setBool(
"mirror_vertical_${widget.scriptName}",
mirrorMode == MirrorMode.vertical,
);
}

void startSpeechListening() async {
if (_isRestartingSpeech || !isRecording) return;

bool available = await speech.initialize(
onStatus: _onSpeechStatus,
onError: (error) {
debugPrint("Speech error: $error");
_restartSpeechWithDelay();
},
);

if (!available) {
debugPrint("Speech not available");
return;
}

isListening = true;

await speech.listen(
onResult: (result) {
if (!isRecording || !isListening) return;
onSpeechResult(result.recognizedWords);
},
listenMode: stt.ListenMode.dictation,
partialResults: true,
cancelOnError: true,
listenFor: const Duration(minutes: 10),
pauseFor: const Duration(seconds: 5),
);

_startSpeechWatchdog();
}

void _startSpeechWatchdog() {
_speechWatchdog?.cancel();

_speechWatchdog = Timer.periodic(const Duration(seconds: 2), (_) {
if (!isRecording || !isListening) return;

final diff = DateTime.now().difference(_lastSpeechTime);

// ⏱ If no speech detected for 4 seconds → restart
if (diff.inSeconds >= 4) {
debugPrint("🛑 Speech stalled, restarting...");
_restartSpeechWithDelay();
}
});
}

void _onSpeechStatus(String status) {
debugPrint("🎤 Speech status: $status");

if (!isRecording) return;

if (status == "done" || status == "notListening") {
isListening = false;
_restartSpeechWithDelay();
}
}

void _restartSpeechWithDelay() {
if (_isCameraTransitioning) {
debugPrint("🎤 Speech restart blocked: camera transitioning");
return;
}

if (!isRecording || _isRestartingSpeech) return;

_isRestartingSpeech = true;

Future.delayed(const Duration(milliseconds: 800), () async {
if (_isCameraTransitioning || !isRecording) {
_isRestartingSpeech = false;
return;
}

await speech.stop();
await Future.delayed(const Duration(milliseconds: 300));

_isRestartingSpeech = false;
startSpeechListening();
});
}

void scrollToNextLine() {
if (selectedMode != ScrollMode.autoScroll) return;

WidgetsBinding.instance.addPostFrameCallback((_) {
if (!textScrollController.hasClients) return;
if (currentLine >= wrappedLines.length) return;

_scrollToLogicalLine(currentLine);
});
}

Future _scrollToLogicalLine(int lineIndex) async {
final fullText = _fullScriptText();
if (fullText.isEmpty) return;

final textSpan = TextSpan(
text: fullText,
style: TextStyle(fontSize: fontSize, fontFamily: fontFamily, height: 1.8),
);

final tp = TextPainter(
text: textSpan,
textAlign: TextAlign.center,
textDirection: TextDirection.ltr,
maxLines: null,
);

final double maxWidthForText = (containerWidth - leftMargin - rightMargin)
.clamp(20.0, double.infinity);

tp.layout(maxWidth: maxWidthForText);

final int charIndex = _charIndexForLine(lineIndex);
final selStart = charIndex.clamp(0, fullText.length - 1);
final selEnd = (selStart + 1).clamp(0, fullText.length);

final boxes = tp.getBoxesForSelection(
TextSelection(baseOffset: selStart, extentOffset: selEnd),
);

double targetY;

if (boxes.isNotEmpty) {
targetY = boxes.first.top;
} else {
targetY = lineIndex * (fontSize * 1.8);
}

final visibleHeight = containerHeight - 40;
final desired = (targetY - visibleHeight * 0.25).clamp(
0.0,
double.infinity,
);

final max = textScrollController.position.maxScrollExtent;
final finalOffset = desired.clamp(0.0, max);

textScrollController.animateTo(
finalOffset,
duration: Duration(milliseconds: 400),
curve: Curves.easeInOut,
);
}

void onSpeechResult(String spokenSoFar) {
_lastSpeechTime = DateTime.now();
if (!isRecording || selectedMode != ScrollMode.autoScroll) return;

final cleaned = spokenSoFar.toLowerCase().trim();
if (cleaned.isEmpty) return;

if (cleaned == _lastSpokenSnapshot) return;
_lastSpokenSnapshot = cleaned;

final spokenWords = cleaned.split(RegExp(r'\s+'));
if (spokenWords.isEmpty) return;

int startIndex = _lastGlobalMatchIndex;
int bestMatchIndex = -1;

const int searchAhead = 6;

final end = (startIndex + searchAhead).clamp(0, _globalWords.length);

for (int i = startIndex; i < end; i++) {
for (int s = spokenWords.length - 1; s >= 0; s--) {
if (_wordsMatch(spokenWords, _globalWords)) {
bestMatchIndex = i;
break;
}
}
if (bestMatchIndex != -1) break;
}

if (bestMatchIndex == -1) return;

if (bestMatchIndex - _lastGlobalMatchIndex > 3) {
return;
}

_lastGlobalMatchIndex = bestMatchIndex + 1;

final pos = _globalWordPositions[bestMatchIndex];

setState(() {
currentLine = pos.line;
currentWordIndex = pos.word;

if (currentWordIndex >= wrappedLines[currentLine].length - 1) {
currentLine++;
currentWordIndex = 0;
} else {
currentWordIndex++;
}
});

if (_shouldScrollForLine(currentLine)) {
_scrollToLogicalLine(currentLine);
}
}

bool _wordsMatch(String spoken, String expected) {
if (spoken == expected) return true;

Map variations = {
"im": "i'm",
"dont": "don't",
"cant": "can't",
"wont": "won't",
"its": "it's",
"thats": "that's",
"youre": "you're",
"theyre": "they're",
};

if (variations[spoken] == expected || variations[expected] == spoken)
return true;

String cleanSpoken = spoken.replaceAll(RegExp(r'[^\w\s]'), '');
String cleanExpected = expected.replaceAll(RegExp(r'[^\w\s]'), '');
if (cleanSpoken == cleanExpected) return true;

if (spoken == expected + 's' || spoken + 's' == expected) return true;

return false;
}

List buildHighlightedText() {
List spans = [];

for (int i = 0; i < wrappedLines.length; i++) {
for (int j = 0; j < wrappedLines.length; j++) {
final bool isSpoken =
i < currentLine || (i == currentLine && j < currentWordIndex);

spans.add(
TextSpan(
text: "${wrappedLines[j]} ",
style: TextStyle(
fontSize: fontSize,
fontFamily: fontFamily,
color: isSpoken ? Colors.grey : Colors.white,
fontWeight: FontWeight.normal,
),
),
);
}
spans.add(const TextSpan(text: "\n"));
}

return spans;
}

String _fullScriptText() {
final buffer = StringBuffer();
for (int i = 0; i < wrappedLines.length; i++) {
buffer.writeAll(wrappedLines, ' ');
if (i != wrappedLines.length - 1) buffer.write("\n");
}
return buffer.toString();
}

int _charIndexForLine(int lineIndex) {
int index = 0;
for (int i = 0; i < lineIndex && i < wrappedLines.length; i++) {
index += wrappedLines.join(" ").length + 1; // +1 = newline
}
return index.clamp(0, _fullScriptText().length);
}

void _prepareWrappedLines() {
final fullText = content
.replaceAll('\u00A0', ' ')
.replaceAll('\u200B', '')
.replaceAll('\u200D', '');

final words = fullText.split(RegExp(r'\s+'));
List currentLineWords = [];
wrappedLines = [];

final testPainter = TextPainter(
textDirection: TextDirection.ltr,
textAlign: TextAlign.center,
maxLines: 1,
);

double maxWidth = containerWidth - leftMargin - rightMargin;

for (final word in words) {
final String attempt = (currentLineWords + [word]).join(' ');

testPainter.text = TextSpan(
text: attempt,
style: TextStyle(
fontSize: fontSize,
fontFamily: fontFamily,
height: 1.8,
),
);

testPainter.layout(maxWidth: maxWidth);

if (testPainter.didExceedMaxLines) {
wrappedLines.add(List.from(currentLineWords));
currentLineWords = [word];
} else {
currentLineWords.add(word);
}
}

if (currentLineWords.isNotEmpty) {
wrappedLines.add(currentLineWords);
}
}

void _buildGlobalWordIndex() {
_globalWordPositions = [];
_globalWords = [];

for (int i = 0; i < wrappedLines.length; i++) {
for (int j = 0; j < wrappedLines.length; j++) {
_globalWords.add(wrappedLines[j].toLowerCase());
_globalWordPositions.add(WordPos(i, j));
}
}
}

bool _shouldScrollForLine(int targetLine) {
if (!_isMetricsCalculated) return true;
if (!textScrollController.hasClients) return true;

final double offset = textScrollController.offset;

final int topVisibleLine = (offset / _actualLineHeight).floor();

final int bottomVisibleLine = topVisibleLine + _visibleLinesCount - 2;

return targetLine >= bottomVisibleLine;
}

//MARK:View Methods
@override
void initState() {
super.initState();
WidgetsBinding.instance.addObserver(this);
speech = stt.SpeechToText();
_initCameraSafely();

print(_isMetricsCalculated);
}

Future _initCameraSafely() async {
await _ensureMicPermission();
await _loadCameraResolution();
await _loadCameraFPS();

_resolveCameraIndexes();

setState(() {
isRearCamera = false;
});

await startCameraFullConfig(
_frontCameraIndex,
selectedResolution,
selectedFrameRate,
);

_loadSavedData();
}

@override
void didChangeAppLifecycleState(AppLifecycleState state) {
if (state == AppLifecycleState.resumed) {
platform.invokeMethod("prepareForRecording").catchError((_) {});
}
}

@override
void didChangeDependencies() {
super.didChangeDependencies();

final w = MediaQuery.of(context).size.width;
maxWidth = w;
final h = MediaQuery.of(context).size.height;

maxHeight = (h * 0.55).clamp(400.0, h * 0.7);
}

@override
void dispose() {
WidgetsBinding.instance.removeObserver(this);
textScrollController.dispose();
_scrollController.dispose();
cameraController?.dispose();
_speechWatchdog?.cancel();
_speechWatchdog = null;

super.dispose();
}



Подробнее здесь: https://stackoverflow.com/questions/798 ... -recording
Ответить

Быстрый ответ

Изменение регистра текста: 
Смайлики
:) :( :oops: :roll: :wink: :muza: :clever: :sorry: :angel: :read: *x)
Ещё смайлики…
   
К этому ответу прикреплено по крайней мере одно вложение.

Если вы не хотите добавлять вложения, оставьте поля пустыми.

Максимально разрешённый размер вложения: 15 МБ.

Вернуться в «Android»