Various changes

Add requirements.txt Update secrets file with new options Added option to transcode voice to text
2025-11-10 08:40:22 -05:00 · 2024-02-12 12:01:13 -05:00 · 2024-02-12 12:01:13 -05:00 · b4d9997fc6
commit b4d9997fc6
parent 2aa3724637
4 changed files with 36 additions and 4 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,6 @@
 # Ignore ffmpeg executable
 ffmpeg
+ffprobe

 # Ignore secrets file
 secrets_file.py
--- a/pagebot.py
+++ b/pagebot.py
@ -6,6 +6,7 @@ import time
 import secrets_file
 from watchdog.observers import Observer
 from watchdog.events import FileSystemEventHandler
+import speech_recognition as sr 

 ## file handler

@ -15,25 +16,46 @@ class MyHandler(FileSystemEventHandler):
            return
        filepath = event.src_path
        filename, file_extension = os.path.splitext(filepath)
+        if file_extension.lower() == '.amr':
+            time.sleep(10)
+            os.remove(filepath)
+            print("Removing AMR.")
        if file_extension.lower() == '.mp3':
            print("New MP3!")
+            text = ""
+            if secrets_file.speech_to_text:
+                text = convert_to_text(filepath)
            mp4_file = convert_to_mp4(filepath)
-            client.loop.create_task(upload_to_discord(mp4_file))
+            client.loop.create_task(upload_to_discord(mp4_file,text))
+        os.remove(filepath)

 ##convert mp3 to mp4
 def convert_to_mp4(mp3_file):
    try:
+        time.sleep(10)
        mp4_file = os.path.splitext(mp3_file)[0] + '.mp4'
        command = f'./ffmpeg -loop 1 -i img/blacksmall.jpg -i "{mp3_file}" -c:a aac -b:a 192k -c:v libx264 -pix_fmt yuv420p -shortest "{mp4_file}"'
        subprocess.run(command, shell=True)
-        os.remove(mp3_file)
        return mp4_file
    except Exception as e:
        print(f"Error during conversion: {e}")
        return None
  
+def convert_to_text(mp3_file):
+    command = f'./ffmpeg -i "{mp3_file}" output_audio.wav'
+    subprocess.run(command, shell=True)
+    r = sr.Recognizer() 
+    # Load the audio file 
+    with sr.AudioFile("output_audio.wav") as source: 
+        data = r.record(source) 
+    # Convert speech to text 
+    text = r.recognize_google(data) 
+    os.remove("output_audio.wav")
+    return (text)
+
+
 ## upload to discord
-async def upload_to_discord(mp4_file):
+async def upload_to_discord(mp4_file,text):
    ## Check to make sure conversion worked.
    if mp4_file is None:
        print("Conversion failed. Skipping upload.")
@ -45,7 +67,11 @@ async def upload_to_discord(mp4_file):
        ## Send Video with name
        with open(mp4_file, 'rb') as f:
            await channel.send(filename,file=discord.File(f))
-
+        if secrets_file.delete_after_upload:
+            os.remove(mp4_file)
+        ## Send transcribed voice if present.
+        if (text != ""):
+            await channel.send(f"The following text was transcoded from the recording: \n{text}")
        ## Ping users with the appropriate number
        role_name = filename.split('-', 1)[0].strip()
        role = discord.utils.get(channel.guild.roles, name=role_name)
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,3 @@
+discord.py==2.3.2
+SpeechRecognition==3.10.1
+watchdog==4.0.0s
--- a/sample-secrets_file.py
+++ b/sample-secrets_file.py
@ -1,3 +1,5 @@
 key = 'Bot Token'
 watch_folder = '/path/to/folder'
 channel_id = channel_id_number
+delete_after_upload=True
+speech_to_text = True