Skip to content

Additional scripts

Notes

Here are some additional functions that I wanted to share.

The results of find_offsets are cached in cached_offset_searches.pickle

Code

find_offsets(within_file, find_file, multiplier, window=2, number=None, max_tries_number=80, to_print_plots=False)

Finds time codes of appearance audio of find_file in within_file using scipy.

Parameters:

Name Type Description Default
within_file str

path to the audio file to search within

required
find_file str

path to the audio file to search for

required
multiplier float

a multiplier used to calculate the prominence of peaks in the correlation signal. The bigger you set it, the less time codes function returns.

required
window int

the length of find_file audio to use for the correlation (in seconds)

2
number int

the goal number of offsets to return

None
max_tries_number int

maximum number of tries to find the number

80
to_print_plots bool

whether to print plot before proceeding

False

Returns:

Name Type Description
list list

list of time codes

Source code in ffmpeg_python_utils\other.py
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
@cache_results
def find_offsets(within_file: str, find_file: str, multiplier: float, window: int = 2, number: int = None,
                 max_tries_number: int = 80, to_print_plots=False) -> list:
    """
    Finds time codes of appearance audio of find_file in within_file using scipy.

    Args:
        within_file (str): path to the audio file to search within
        find_file (str): path to the audio file to search for
        multiplier (float): a multiplier used to calculate the prominence of peaks in the correlation signal. The bigger you set it, the less time codes function returns.
        window (int): the length of find_file audio to use for the correlation (in seconds)
        number (int): the goal number of offsets to return
        max_tries_number (int): maximum number of tries to find the number
        to_print_plots (bool): whether to print plot before proceeding

    Returns:
        list: list of time codes
    """

    y_within, sr_within = librosa.load(within_file, sr=None)
    y_find, _ = librosa.load(find_file, sr=sr_within)
    c = signal.correlate(y_within, y_find[:sr_within * window], mode='valid', method='fft')
    if number is not None and number < 1:
        print_info(f'Number of peaks you are looking for is {number}. Returning empty list.', 'red',
                   C_TO_PRINT_PACKAGE_INFO)
        return []

    elif number == 1:
        c = np.argmax(c)
        if to_print_plots:
            plot_offsets(c, find_file)
        peak = round(c / sr_within, 2)
        return [peak]
    else:
        prominence = int(c[np.argmax(c)] * multiplier)
        counter = 0
        while True:
            try:
                peaks, _ = signal.find_peaks(c, prominence=prominence)
                points_of_time = [round(peak / sr_within, 2) for peak in peaks]
                points_of_time = delete_neighbors(points_of_time)
                if to_print_plots:
                    plot_offsets(c, find_file)
                if counter > max_tries_number:
                    if C_TO_PRINT_PACKAGE_INFO: print(
                        f'Max try number reached. Returning the last time codes. {points_of_time}')
                    return points_of_time
                if number and number != len(points_of_time):
                    print_info(
                        f'Try number № {counter}. Looking for peaks. The goal number is {number}, the number we got is {len(points_of_time)}. Prominence: {prominence}',
                        'white', C_TO_PRINT_PACKAGE_INFO)
                    if C_TO_PRINT_PACKAGE_INFO: print_info(f'Time codes we got: {points_of_time}', 'white')
                    if number > len(points_of_time):
                        diff = number - len(points_of_time)
                        prominence *= 0.80 if diff > 5 else 0.95
                    else:
                        prominence *= 1.01
                    counter += 1
                    continue
                print_info(f'Found specified number={number}. Offsets: {points_of_time}', 'white',
                           C_TO_PRINT_PACKAGE_INFO)
                return points_of_time
            except IndexError:
                prominence *= 0.8
                counter += 1

remove_silence_from_audio_file(input_path, output_path, audio_format='wav', min_silence_len=100, silence_thresh=-45, keep_silence=50)

Removes silence from audio file. Results are cached taking in account hash of the input files and kwargs passed.

Parameters:

Name Type Description Default
input_path str

The path to the audio file.

required
output_path str

The path to the directory where the edited audio files will be saved.

required
audio_format str

The format of the audio output.

'wav'
min_silence_len int

The minimum length of silence to be removed, in milliseconds.

100
silence_thresh int

The threshold for silence, in decibels.

-45
keep_silence int

The length of silence to keep at the beginning and end of the audio file, in milliseconds.

50

Returns:

Name Type Description
str str

output_path

Source code in ffmpeg_python_utils\other.py
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def remove_silence_from_audio_file(input_path: str, output_path: str, audio_format: str = 'wav',
                                   min_silence_len: int = 100, silence_thresh: int = -45,
                                   keep_silence: int = 50) -> str:
    """
    Removes silence from audio file. Results are cached taking in account hash of the input files and kwargs passed.

    Args:
        input_path (str): The path to the audio file.
        output_path (str): The path to the directory where the edited audio files will be saved.
        audio_format (str, optional): The format of the audio output.
        min_silence_len (int, optional): The minimum length of silence to be removed, in milliseconds.
        silence_thresh (int, optional): The threshold for silence, in decibels.
        keep_silence (int, optional): The length of silence to keep at the beginning and end of the audio file, in milliseconds.

    Returns:
        str: output_path
    """
    sound = AudioSegment.from_file(input_path, format=audio_format)
    audio_chunks = split_on_silence(sound, min_silence_len=min_silence_len, silence_thresh=silence_thresh,
                                    keep_silence=keep_silence)
    combined = AudioSegment.empty()
    for chunk in audio_chunks:
        combined += chunk
    combined.export(output_path, format=audio_format)
    return output_path