Upload Score_2_Performance_Transformer_Eval_Colab.ipynb

Browse files

Files changed (1) hide show

code/Score_2_Performance_Transformer_Eval_Colab.ipynb +519 -0

code/Score_2_Performance_Transformer_Eval_Colab.ipynb ADDED Viewed

	@@ -0,0 +1,519 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "VGrGd6__l5ch"
+   },
+   "source": [
+    "# Score 2 Performance Transformer Eval Colab (ver. 1.0)\n",
+    "\n",
+    "***\n",
+    "\n",
+    "Powered by tegridy-tools: https://github.com/asigalov61/tegridy-tools\n",
+    "\n",
+    "***\n",
+    "\n",
+    "WARNING: This complete implementation is a functioning model of the Artificial Intelligence. Please excercise great humility, care, and respect. https://www.nscai.gov/\n",
+    "\n",
+    "***\n",
+    "\n",
+    "#### Project Los Angeles\n",
+    "\n",
+    "#### Tegridy Code 2024\n",
+    "\n",
+    "***"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "shLrgoXdl5cj"
+   },
+   "source": [
+    "# GPU check"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "X3rABEpKCO02"
+   },
+   "outputs": [],
+   "source": [
+    "!nvidia-smi"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "0RcVC4btl5ck"
+   },
+   "source": [
+    "# Setup environment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "viHgEaNACPTs"
+   },
+   "outputs": [],
+   "source": [
+    "!git clone --depth 1 https://github.com/asigalov61/tegridy-tools"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "vK40g6V_BTNj"
+   },
+   "outputs": [],
+   "source": [
+    "!sudo pip install torch\n",
+    "!sudo pip install einops\n",
+    "!sudo pip install torch-summary\n",
+    "!sudo pip install tqdm\n",
+    "!sudo pip install huggingface_hub\n",
+    "!sudo pip install hf-transfer\n",
+    "!sudo pip install ipywidgets"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Import modules"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "DzCOZU_gBiQV"
+   },
+   "outputs": [],
+   "source": [
+    "# Load modules and make data dir\n",
+    "\n",
+    "print('Loading modules...')\n",
+    "\n",
+    "import os\n",
+    "import pickle\n",
+    "import random\n",
+    "import secrets\n",
+    "import tqdm\n",
+    "import math\n",
+    "\n",
+    "!set USE_FLASH_ATTENTION=1\n",
+    "os.environ['USE_FLASH_ATTENTION'] = '1'\n",
+    "os.environ['HF_HUB_ENABLE_HF_TRANSFER'] = '1'\n",
+    "\n",
+    "import torch\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "from torchsummary import summary\n",
+    "\n",
+    "%cd /home/ubuntu/tegridy-tools/tegridy-tools/\n",
+    "\n",
+    "import TMIDIX\n",
+    "\n",
+    "%cd /home/ubuntu/tegridy-tools/tegridy-tools/X-Transformer\n",
+    "\n",
+    "from x_transformer_1_23_2 import *\n",
+    "\n",
+    "torch.set_float32_matmul_precision('high')\n",
+    "torch.backends.cuda.matmul.allow_tf32 = True # allow tf32 on matmul\n",
+    "torch.backends.cudnn.allow_tf32 = True # allow tf32 on cudnn\n",
+    "torch.backends.cuda.enable_flash_sdp(True)\n",
+    "torch.backends.cuda.enable_cudnn_sdp(False)\n",
+    "\n",
+    "!set USE_FLASH_ATTENTION=1\n",
+    "\n",
+    "%cd /home/ubuntu/\n",
+    "\n",
+    "if not os.path.exists('/home/ubuntu/INTS'):\n",
+    "    os.makedirs('/home/ubuntu/INTS')\n",
+    "\n",
+    "import random\n",
+    "\n",
+    "from huggingface_hub import hf_hub_download\n",
+    "\n",
+    "print('Done')\n",
+    "\n",
+    "print('Torch version:', torch.__version__)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Download model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "SA8qQSzbWslM"
+   },
+   "outputs": [],
+   "source": [
+    "hf_hub_download(repo_id='asigalov61/Score-2-Performance-Transformer',\n",
+    "                filename='Score_2_Performance_Transformer_Small_Trained_Model_5280_steps_1.5374_loss_0.5525_acc.pth',\n",
+    "                local_dir='/home/ubuntu/Model/',\n",
+    "                )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Load model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "gSvqSRLaWslM"
+   },
+   "outputs": [],
+   "source": [
+    "SEQ_LEN = 1802\n",
+    "PAD_IDX = 771\n",
+    "\n",
+    "model = TransformerWrapper(\n",
+    "    num_tokens = PAD_IDX+1,\n",
+    "    max_seq_len = SEQ_LEN,\n",
+    "    attn_layers = Decoder(dim = 1024,\n",
+    "                          depth = 8,\n",
+    "                          heads = 8,\n",
+    "                          rotary_pos_emb = True,\n",
+    "                          attn_flash = True\n",
+    "                         )\n",
+    "    )\n",
+    "\n",
+    "model = AutoregressiveWrapper(model, ignore_index = PAD_IDX, pad_value=PAD_IDX)\n",
+    "\n",
+    "print('=' * 70)\n",
+    "print('Loading model checkpoint...')\n",
+    "\n",
+    "model_path = '/home/ubuntu/Model/Score_2_Performance_Transformer_Small_Trained_Model_5280_steps_1.5374_loss_0.5525_acc.pth'\n",
+    "\n",
+    "model.load_state_dict(torch.load(model_path, weights_only=True))\n",
+    "\n",
+    "print('=' * 70)\n",
+    "\n",
+    "model = torch.compile(model, mode='max-autotune')\n",
+    "\n",
+    "model.cuda()\n",
+    "model.eval()\n",
+    "\n",
+    "print('Done!')\n",
+    "\n",
+    "summary(model)\n",
+    "\n",
+    "dtype = torch.bfloat16\n",
+    "\n",
+    "ctx = torch.amp.autocast(device_type='cuda', dtype=dtype)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "feXay_Ed7mG5"
+   },
+   "source": [
+    "# Eval"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load source MIDI composition"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "enHpaHxaWslM"
+   },
+   "outputs": [],
+   "source": [
+    "#=================================================================\n",
+    "\n",
+    "# This can be a score or performance\n",
+    "# MIDI will be converted to solo Piano without drums\n",
+    "\n",
+    "# PLEASE NOTE THAT the MIDI composition MUST HAVE at least 300 notes for this demo to work properly!\n",
+    "\n",
+    "#=================================================================\n",
+    "\n",
+    "midi_file = '/home/ubuntu/tegridy-tools/tegridy-tools/seed2.mid'\n",
+    "# midi_file = 'midi_score.mid'\n",
+    "\n",
+    "#=================================================================\n",
+    "\n",
+    "raw_score = TMIDIX.midi2single_track_ms_score(midi_file)\n",
+    "\n",
+    "escore_notes = TMIDIX.advanced_score_processor(raw_score, return_enhanced_score_notes=True)\n",
+    "\n",
+    "if escore_notes[0]:\n",
+    "\n",
+    "    escore_notes = TMIDIX.augment_enhanced_score_notes(escore_notes[0], timings_divider=16)\n",
+    "\n",
+    "    pe = escore_notes[0]\n",
+    "\n",
+    "    melody_chords = []\n",
+    "\n",
+    "    seen = []\n",
+    "\n",
+    "    for e in escore_notes:\n",
+    "\n",
+    "        if e[3] != 9:\n",
+    "    \n",
+    "            #=======================================================\n",
+    "    \n",
+    "            dtime = max(0, min(255, e[1]-pe[1]))\n",
+    "    \n",
+    "            if dtime != 0:\n",
+    "                seen = []\n",
+    "            \n",
+    "            # Durations\n",
+    "            dur = max(1, min(255, e[2]))\n",
+    "    \n",
+    "            # Pitches\n",
+    "            ptc = max(1, min(127, e[4]))\n",
+    "            \n",
+    "            vel = max(1, min(127, e[5]))\n",
+    "    \n",
+    "            if ptc not in seen:\n",
+    "    \n",
+    "                melody_chords.append([dtime, dur, ptc, vel])\n",
+    "    \n",
+    "                seen.append(ptc)\n",
+    "    \n",
+    "            pe = e\n",
+    "\n",
+    "print('=' * 70)\n",
+    "print('Number of notes in a composition:', len(melody_chords))\n",
+    "print('=' * 70)\n",
+    "\n",
+    "src_melody_chords_f = []\n",
+    "melody_chords_f = []\n",
+    "\n",
+    "for i in range(0, len(melody_chords), 300):\n",
+    "    \n",
+    "    chunk = melody_chords[i:i+300]\n",
+    "    \n",
+    "    src = []\n",
+    "    src1 = []\n",
+    "    trg = []\n",
+    "    \n",
+    "    if len(chunk) == 300:\n",
+    "\n",
+    "        for mm in chunk:\n",
+    "            src.extend([mm[0], mm[2]+256])\n",
+    "            src1.append([mm[0], mm[2]+256, mm[1]+384, mm[3]+640])\n",
+    "            trg.extend([mm[0], mm[2]+256, mm[1]+384, mm[3]+640])\n",
+    "\n",
+    "        src_melody_chords_f.append(src1)\n",
+    "        melody_chords_f.append([768] + src + [769] + trg + [770])\n",
+    "        \n",
+    "print('Done!')\n",
+    "print('=' * 70)\n",
+    "print('Number of composition chunks:', len(melody_chords_f))\n",
+    "print('=' * 70)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Generate new durations and velocities"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.eval()\n",
+    "\n",
+    "#================================================================\n",
+    "\n",
+    "composition_chunk_idx = 0 # Composition chunk idx to generate durations and velocities for. Each chunk is 300 notes\n",
+    "\n",
+    "num_prime_notes = 4 # Priming improves the results but it is not necessary and you can set it to zero\n",
+    "dur_top_k = 2 # Use k == 1 if src composition is score and k > 1 if src composition is performance\n",
+    "\n",
+    "dur_temperature = 1.3 # For best results, durations temperature should be more than 1.0 but less than velocities temperature\n",
+    "vel_temperature = 1.5 # For best results, velocities temperature must be larger than 1.3 and larger than durations temperature\n",
+    "\n",
+    "#================================================================\n",
+    "\n",
+    "song_chunk = src_melody_chords_f[composition_chunk_idx]\n",
+    "\n",
+    "song = [768]\n",
+    "\n",
+    "for m in song_chunk:\n",
+    "    song.extend(m[:2])\n",
+    "\n",
+    "song.append(769)\n",
+    "\n",
+    "for i in tqdm.tqdm(range(len(song_chunk))):\n",
+    "\n",
+    "    song.extend(song_chunk[i][:2])\n",
+    "\n",
+    "    # Durations\n",
+    "\n",
+    "    if i < num_prime_notes:\n",
+    "        song.append(song_chunk[i][2])\n",
+    "\n",
+    "    else:\n",
+    "\n",
+    "        x = torch.LongTensor(song).cuda()\n",
+    "\n",
+    "        y = 0 \n",
+    "\n",
+    "        while not 384 < y < 640:\n",
+    "        \n",
+    "            with ctx:\n",
+    "                out = model.generate(x,\n",
+    "                                     1,\n",
+    "                                     temperature=dur_temperature,\n",
+    "                                     filter_logits_fn=top_k,\n",
+    "                                     filter_kwargs={'k': dur_top_k},\n",
+    "                                     return_prime=False,\n",
+    "                                     verbose=False)\n",
+    "            \n",
+    "            y = out.tolist()[0][0]\n",
+    "    \n",
+    "        song.append(y)\n",
+    "\n",
+    "\n",
+    "    # Velocities\n",
+    "    \n",
+    "    if i < num_prime_notes:\n",
+    "        song.append(song_chunk[i][3])\n",
+    "\n",
+    "    else:\n",
+    "\n",
+    "        x = torch.LongTensor(song).cuda()\n",
+    "        \n",
+    "        y = 0 \n",
+    "\n",
+    "        while not 640 < y < 768:\n",
+    "                \n",
+    "            with ctx:\n",
+    "                out = model.generate(x,\n",
+    "                                     1,\n",
+    "                                     temperature=vel_temperature,\n",
+    "                                     #filter_logits_fn=top_k,\n",
+    "                                     #filter_kwargs={'k': 10},\n",
+    "                                     return_prime=False,\n",
+    "                                     verbose=False)\n",
+    "            \n",
+    "            y = out.tolist()[0][0]\n",
+    "    \n",
+    "        song.append(y)\n",
+    "\n",
+    "\n",
+    "print('---------------')\n",
+    "\n",
+    "#===========================================================================\n",
+    "# Convert model output to MIDI\n",
+    "#===========================================================================\n",
+    "\n",
+    "song1 = song[602:]\n",
+    "\n",
+    "print('Sample INTs', song1[:15])\n",
+    "\n",
+    "song_f = []\n",
+    "\n",
+    "time = 0\n",
+    "dur = 0\n",
+    "vel = 90\n",
+    "pitch = 60\n",
+    "channel = 0\n",
+    "patch = 0\n",
+    "\n",
+    "patches = [0] * 16\n",
+    "\n",
+    "for ss in song1:\n",
+    "\n",
+    "    if 0 <= ss < 256:\n",
+    "\n",
+    "        time += ss * 16\n",
+    "\n",
+    "    if 256 <= ss < 384:\n",
+    "\n",
+    "        pitch = ss-256\n",
+    "\n",
+    "    if 384 <= ss < 640:\n",
+    "\n",
+    "        dur = (ss-384) * 16\n",
+    "\n",
+    "    if 640 <= ss < 768:\n",
+    "        \n",
+    "        vel = (ss-640)\n",
+    "    \n",
+    "        song_f.append(['note', time, dur, channel, pitch, vel, patch])\n",
+    "\n",
+    "detailed_stats = TMIDIX.Tegridy_ms_SONG_to_MIDI_Converter(song_f,\n",
+    "                                                        output_signature = 'Score 2 Performance Transformer',  \n",
+    "                                                        output_file_name = '/home/ubuntu/Score-2-Performance-Transformer-Music-Composition', \n",
+    "                                                        track_name='Project Los Angeles',\n",
+    "                                                        list_of_MIDI_patches=patches\n",
+    "                                                        )\n",
+    "\n",
+    "print('Done!')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "z87TlDTVl5cp"
+   },
+   "source": [
+    "# Congrats! You did it! :)"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "gpuClass": "premium",
+   "gpuType": "T4",
+   "private_outputs": true,
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}