diff --git a/backend-python/global_var.py b/backend-python/global_var.py index 4553356..dc21947 100644 --- a/backend-python/global_var.py +++ b/backend-python/global_var.py @@ -5,6 +5,7 @@ Model = "model" Model_Status = "model_status" Model_Config = "model_config" Deploy_Mode = "deploy_mode" +Midi_Vocab_Config_Type = "midi_vocab_config_type" class ModelStatus(Enum): @@ -13,11 +14,17 @@ class ModelStatus(Enum): Working = 3 +class MidiVocabConfig(Enum): + Default = auto() + Piano = auto() + + def init(): global GLOBALS GLOBALS = {} set(Model_Status, ModelStatus.Offline) set(Deploy_Mode, False) + set(Midi_Vocab_Config_Type, MidiVocabConfig.Default) def set(key, value): diff --git a/backend-python/routes/midi.py b/backend-python/routes/midi.py index 554751b..b470c90 100644 --- a/backend-python/routes/midi.py +++ b/backend-python/routes/midi.py @@ -23,7 +23,11 @@ class TextToMidiBody(BaseModel): @router.post("/text-to-midi", tags=["MIDI"]) def text_to_midi(body: TextToMidiBody): - vocab_config = "backend-python/utils/midi_vocab_config.json" + vocab_config_type = global_var.get(global_var.Midi_Vocab_Config_Type) + if vocab_config_type == global_var.MidiVocabConfig.Piano: + vocab_config = "backend-python/utils/vocab_config_piano.json" + else: + vocab_config = "backend-python/utils/midi_vocab_config.json" cfg = VocabConfig.from_json(vocab_config) mid = convert_str_to_midi(cfg, body.text.strip()) mid_data = io.BytesIO() @@ -35,7 +39,11 @@ def text_to_midi(body: TextToMidiBody): @router.post("/midi-to-text", tags=["MIDI"]) async def midi_to_text(file_data: UploadFile): - vocab_config = "backend-python/utils/midi_vocab_config.json" + vocab_config_type = global_var.get(global_var.Midi_Vocab_Config_Type) + if vocab_config_type == global_var.MidiVocabConfig.Piano: + vocab_config = "backend-python/utils/vocab_config_piano.json" + else: + vocab_config = "backend-python/utils/midi_vocab_config.json" cfg = VocabConfig.from_json(vocab_config) filter_config = "backend-python/utils/midi_filter_config.json" filter_cfg = FilterConfig.from_json(filter_config) @@ -69,7 +77,11 @@ def txt_to_midi(body: TxtToMidiBody): if not body.midi_path.startswith("midi/"): raise HTTPException(status.HTTP_400_BAD_REQUEST, "bad output path") - vocab_config = "backend-python/utils/midi_vocab_config.json" + vocab_config_type = global_var.get(global_var.Midi_Vocab_Config_Type) + if vocab_config_type == global_var.MidiVocabConfig.Piano: + vocab_config = "backend-python/utils/vocab_config_piano.json" + else: + vocab_config = "backend-python/utils/midi_vocab_config.json" cfg = VocabConfig.from_json(vocab_config) with open(body.txt_path, "r") as f: text = f.read() diff --git a/backend-python/utils/rwkv.py b/backend-python/utils/rwkv.py index 37d2bf4..2f61522 100644 --- a/backend-python/utils/rwkv.py +++ b/backend-python/utils/rwkv.py @@ -546,8 +546,10 @@ class MusicAbcRWKV(AbstractRWKV): def get_tokenizer(tokenizer_len: int): tokenizer_dir = f"{pathlib.Path(__file__).parent.parent.resolve()}/rwkv_pip/" - if tokenizer_len < 20096: + if tokenizer_len < 2176: return "abc_tokenizer" + if tokenizer_len < 20096: + return tokenizer_dir + "tokenizer-midipiano.json" if tokenizer_len < 50277: return tokenizer_dir + "tokenizer-midi.json" elif tokenizer_len < 65536: @@ -630,14 +632,27 @@ def RWKV(model: str, strategy: str, tokenizer: Union[str, None]) -> AbstractRWKV "20B_tokenizer": TextRWKV, "rwkv_vocab_v20230424": TextRWKV, "tokenizer-midi": MusicMidiRWKV, + "tokenizer-midipiano": MusicMidiRWKV, "abc_tokenizer": MusicAbcRWKV, } tokenizer_name = os.path.splitext(os.path.basename(tokenizer))[0] + global_var.set( + global_var.Midi_Vocab_Config_Type, + global_var.MidiVocabConfig.Piano + if tokenizer_name == "tokenizer-midipiano" + else global_var.MidiVocabConfig.Default, + ) rwkv: AbstractRWKV if tokenizer_name in rwkv_map: rwkv = rwkv_map[tokenizer_name](model, pipeline) else: - rwkv = TextRWKV(model, pipeline) + tokenizer_name = tokenizer_name.lower() + if "music" in tokenizer_name or "midi" in tokenizer_name: + rwkv = MusicMidiRWKV(model, pipeline) + elif "abc" in tokenizer_name: + rwkv = MusicAbcRWKV(model, pipeline) + else: + rwkv = TextRWKV(model, pipeline) rwkv.name = filename return rwkv diff --git a/backend-python/utils/vocab_config_piano.json b/backend-python/utils/vocab_config_piano.json new file mode 100644 index 0000000..63bb6c7 --- /dev/null +++ b/backend-python/utils/vocab_config_piano.json @@ -0,0 +1,279 @@ +{ + "note_events": 128, + "wait_events": 125, + "max_wait_time": 1000, + "velocity_events": 128, + "velocity_bins": 16, + "velocity_exp": 0.33, + "do_token_sorting": true, + "unrolled_tokens": false, + "decode_end_held_note_delay": 5.0, + "decode_fix_repeated_notes": true, + "bin_instrument_names": [ + "piano" + ], + "ch10_instrument_bin_name": "", + "program_name_to_bin_name": { + "Acoustic Grand Piano": "piano", + "Bright Acoustic Piano": "piano", + "Electric Grand Piano": "piano", + "Honky-tonk Piano": "piano", + "Electric Piano 1 (Rhodes Piano)": "piano", + "Electric Piano 2 (Chorused Piano)": "piano", + "Harpsichord": "piano", + "Clavinet": "piano", + "Celesta": "", + "Glockenspiel": "", + "Music Box": "", + "Vibraphone": "", + "Marimba": "", + "Xylophone": "", + "Tubular Bells": "", + "Dulcimer (Santur)": "", + "Drawbar Organ (Hammond)": "", + "Percussive Organ": "piano", + "Rock Organ": "piano", + "Church Organ": "piano", + "Reed Organ": "piano", + "Accordion (French)": "piano", + "Harmonica": "piano", + "Tango Accordion (Band neon)": "piano", + "Acoustic Guitar (nylon)": "", + "Acoustic Guitar (steel)": "", + "Electric Guitar (jazz)": "", + "Electric Guitar (clean)": "", + "Electric Guitar (muted)": "", + "Overdriven Guitar": "", + "Distortion Guitar": "", + "Guitar harmonics": "", + "Acoustic Bass": "", + "Electric Bass (fingered)": "", + "Electric Bass (picked)": "", + "Fretless Bass": "", + "Slap Bass 1": "", + "Slap Bass 2": "", + "Synth Bass 1": "", + "Synth Bass 2": "", + "Violin": "", + "Viola": "", + "Cello": "", + "Contrabass": "", + "Tremolo Strings": "", + "Pizzicato Strings": "", + "Orchestral Harp": "", + "Timpani": "", + "String Ensemble 1 (strings)": "", + "String Ensemble 2 (slow strings)": "", + "SynthStrings 1": "", + "SynthStrings 2": "", + "Choir Aahs": "", + "Voice Oohs": "", + "Synth Voice": "", + "Orchestra Hit": "", + "Trumpet": "", + "Trombone": "", + "Tuba": "", + "Muted Trumpet": "", + "French Horn": "", + "Brass Section": "", + "SynthBrass 1": "", + "SynthBrass 2": "", + "Soprano Sax": "", + "Alto Sax": "", + "Tenor Sax": "", + "Baritone Sax": "", + "Oboe": "", + "English Horn": "", + "Bassoon": "", + "Clarinet": "", + "Piccolo": "", + "Flute": "", + "Recorder": "", + "Pan Flute": "", + "Blown Bottle": "", + "Shakuhachi": "", + "Whistle": "", + "Ocarina": "", + "Lead 1 (square wave)": "", + "Lead 2 (sawtooth wave)": "", + "Lead 3 (calliope)": "", + "Lead 4 (chiffer)": "", + "Lead 5 (charang)": "", + "Lead 6 (voice solo)": "", + "Lead 7 (fifths)": "", + "Lead 8 (bass + lead)": "", + "Pad 1 (new age Fantasia)": "", + "Pad 2 (warm)": "", + "Pad 3 (polysynth)": "", + "Pad 4 (choir space voice)": "", + "Pad 5 (bowed glass)": "", + "Pad 6 (metallic pro)": "", + "Pad 7 (halo)": "", + "Pad 8 (sweep)": "", + "FX 1 (rain)": "", + "FX 2 (soundtrack)": "", + "FX 3 (crystal)": "", + "FX 4 (atmosphere)": "", + "FX 5 (brightness)": "", + "FX 6 (goblins)": "", + "FX 7 (echoes, drops)": "", + "FX 8 (sci-fi, star theme)": "", + "Sitar": "", + "Banjo": "", + "Shamisen": "", + "Koto": "", + "Kalimba": "", + "Bag pipe": "", + "Fiddle": "", + "Shanai": "", + "Tinkle Bell": "", + "Agogo": "", + "Steel Drums": "", + "Woodblock": "", + "Taiko Drum": "", + "Melodic Tom": "", + "Synth Drum": "", + "Reverse Cymbal": "", + "Guitar Fret Noise": "", + "Breath Noise": "", + "Seashore": "", + "Bird Tweet": "", + "Telephone Ring": "", + "Helicopter": "", + "Applause": "", + "Gunshot": "" + }, + "bin_name_to_program_name": { + "piano": "Acoustic Grand Piano" + }, + "instrument_names": { + "0": "Acoustic Grand Piano", + "1": "Bright Acoustic Piano", + "2": "Electric Grand Piano", + "3": "Honky-tonk Piano", + "4": "Electric Piano 1 (Rhodes Piano)", + "5": "Electric Piano 2 (Chorused Piano)", + "6": "Harpsichord", + "7": "Clavinet", + "8": "Celesta", + "9": "Glockenspiel", + "10": "Music Box", + "11": "Vibraphone", + "12": "Marimba", + "13": "Xylophone", + "14": "Tubular Bells", + "15": "Dulcimer (Santur)", + "16": "Drawbar Organ (Hammond)", + "17": "Percussive Organ", + "18": "Rock Organ", + "19": "Church Organ", + "20": "Reed Organ", + "21": "Accordion (French)", + "22": "Harmonica", + "23": "Tango Accordion (Band neon)", + "24": "Acoustic Guitar (nylon)", + "25": "Acoustic Guitar (steel)", + "26": "Electric Guitar (jazz)", + "27": "Electric Guitar (clean)", + "28": "Electric Guitar (muted)", + "29": "Overdriven Guitar", + "30": "Distortion Guitar", + "31": "Guitar harmonics", + "32": "Acoustic Bass", + "33": "Electric Bass (fingered)", + "34": "Electric Bass (picked)", + "35": "Fretless Bass", + "36": "Slap Bass 1", + "37": "Slap Bass 2", + "38": "Synth Bass 1", + "39": "Synth Bass 2", + "40": "Violin", + "41": "Viola", + "42": "Cello", + "43": "Contrabass", + "44": "Tremolo Strings", + "45": "Pizzicato Strings", + "46": "Orchestral Harp", + "47": "Timpani", + "48": "String Ensemble 1 (strings)", + "49": "String Ensemble 2 (slow strings)", + "50": "SynthStrings 1", + "51": "SynthStrings 2", + "52": "Choir Aahs", + "53": "Voice Oohs", + "54": "Synth Voice", + "55": "Orchestra Hit", + "56": "Trumpet", + "57": "Trombone", + "58": "Tuba", + "59": "Muted Trumpet", + "60": "French Horn", + "61": "Brass Section", + "62": "SynthBrass 1", + "63": "SynthBrass 2", + "64": "Soprano Sax", + "65": "Alto Sax", + "66": "Tenor Sax", + "67": "Baritone Sax", + "68": "Oboe", + "69": "English Horn", + "70": "Bassoon", + "71": "Clarinet", + "72": "Piccolo", + "73": "Flute", + "74": "Recorder", + "75": "Pan Flute", + "76": "Blown Bottle", + "77": "Shakuhachi", + "78": "Whistle", + "79": "Ocarina", + "80": "Lead 1 (square wave)", + "81": "Lead 2 (sawtooth wave)", + "82": "Lead 3 (calliope)", + "83": "Lead 4 (chiffer)", + "84": "Lead 5 (charang)", + "85": "Lead 6 (voice solo)", + "86": "Lead 7 (fifths)", + "87": "Lead 8 (bass + lead)", + "88": "Pad 1 (new age Fantasia)", + "89": "Pad 2 (warm)", + "90": "Pad 3 (polysynth)", + "91": "Pad 4 (choir space voice)", + "92": "Pad 5 (bowed glass)", + "93": "Pad 6 (metallic pro)", + "94": "Pad 7 (halo)", + "95": "Pad 8 (sweep)", + "96": "FX 1 (rain)", + "97": "FX 2 (soundtrack)", + "98": "FX 3 (crystal)", + "99": "FX 4 (atmosphere)", + "100": "FX 5 (brightness)", + "101": "FX 6 (goblins)", + "102": "FX 7 (echoes, drops)", + "103": "FX 8 (sci-fi, star theme)", + "104": "Sitar", + "105": "Banjo", + "106": "Shamisen", + "107": "Koto", + "108": "Kalimba", + "109": "Bag pipe", + "110": "Fiddle", + "111": "Shanai", + "112": "Tinkle Bell", + "113": "Agogo", + "114": "Steel Drums", + "115": "Woodblock", + "116": "Taiko Drum", + "117": "Melodic Tom", + "118": "Synth Drum", + "119": "Reverse Cymbal", + "120": "Guitar Fret Noise", + "121": "Breath Noise", + "122": "Seashore", + "123": "Bird Tweet", + "124": "Telephone Ring", + "125": "Helicopter", + "126": "Applause", + "127": "Gunshot" + } +} \ No newline at end of file