chore(deps): bump actions/download-artifact from 3 to 4

Bumps [actions/download-artifact](https://github.com/actions/download-artifact) from 3 to 4. - [Release notes](https://github.com/actions/download-artifact/releases) - [Commits](https://github.com/actions/download-artifact/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/download-artifact dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <support@github.com>
release v1.7.3
2024-03-11 19:19:42 +00:00 · 2024-03-11 11:08:54 +00:00 · 2024-03-11 19:07:08 +08:00 · 2024-03-11 19:04:19 +08:00 · 2024-03-11 18:55:37 +08:00 · 2024-03-11 18:52:35 +08:00
105 changed files with 73653 additions and 764 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,3 +1,5 @@
+* text=auto eol=lf
+
 backend-python/rwkv_pip/** linguist-vendored
 backend-python/wkv_cuda_utils/** linguist-vendored
 backend-python/get-pip.py linguist-vendored
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -0,0 +1,9 @@
+version: 2
+updates:
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+    commit-message:
+      prefix: "chore"
+      include: "scope"
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -0,0 +1,171 @@
+name: Publish Docker Image
+on: [push]
+
+concurrency:
+  group: ${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
+jobs:
+  docker_build:
+    name: Build ${{ matrix.arch }} Image
+    runs-on: ubuntu-latest
+
+    strategy:
+      matrix:
+        include:
+          - arch: amd64
+            name: amd64
+        #   - arch: arm64
+        #     name: arm64
+
+    steps:
+      - name: Free up disk spaces
+        run: |
+          sudo rm -rf /usr/share/dotnet || true
+          sudo rm -rf /opt/ghc || true
+          sudo rm -rf "/usr/local/share/boost" || true
+          sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
+
+      - name: Get lowercase string for the repository name
+        id: lowercase-repo-name
+        uses: ASzc/change-string-case-action@v2
+        with:
+          string: ${{ github.event.repository.name }}
+
+      - name: Checkout base
+        uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+
+      - name: Cache Docker layers
+        uses: actions/cache@v2
+        with:
+          path: /tmp/.buildx-cache
+          key: ${{ github.ref }}-${{ matrix.arch }}
+          restore-keys: |
+            ${{ github.ref }}-${{ matrix.arch }}
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v2
+        with:
+          platforms: linux/${{ matrix.arch }}
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+
+      - name: Docker login
+        uses: docker/login-action@v2
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Get commit SHA
+        id: vars
+        run: echo "::set-output name=sha_short::$(git rev-parse --short HEAD)"
+
+      - name: Build and export
+        id: build
+        if: github.ref == 'refs/heads/master'
+        uses: docker/build-push-action@v3
+        with:
+          push: true
+          platforms: linux/${{ matrix.arch }}
+          tags: ${{ secrets.DOCKER_USERNAME }}/${{ steps.lowercase-repo-name.outputs.lowercase }}:${{ matrix.name }}-latest
+          build-args: |
+            SHA=${{ steps.vars.outputs.sha_short }}
+          outputs: type=image,push=true
+          cache-from: type=local,src=/tmp/.buildx-cache
+          cache-to: type=local,dest=/tmp/.buildx-cache
+
+      - name: Replace tag without `v`
+        if: startsWith(github.ref, 'refs/tags/')
+        uses: actions/github-script@v1
+        id: version
+        with:
+          script: |
+            return context.payload.ref.replace(/\/?refs\/tags\/v/, '')
+          result-encoding: string
+
+      - name: Build release and export
+        id: build_rel
+        if: startsWith(github.ref, 'refs/tags/')
+        uses: docker/build-push-action@v3
+        with:
+          push: true
+          platforms: linux/${{ matrix.arch }}
+          tags: ${{ secrets.DOCKER_USERNAME }}/${{ steps.lowercase-repo-name.outputs.lowercase }}:${{ matrix.name }}-${{steps.version.outputs.result}}
+          build-args: |
+            SHA=${{ steps.version.outputs.result }}
+          outputs: type=image,push=true
+          cache-from: type=local,src=/tmp/.buildx-cache
+          cache-to: type=local,dest=/tmp/.buildx-cache
+
+      - name: Save digest
+        if: github.ref == 'refs/heads/master'
+        run: echo ${{ steps.build.outputs.digest }} > /tmp/digest.txt
+
+      - name: Save release digest
+        if: startsWith(github.ref, 'refs/tags/')
+        run: echo ${{ steps.build_rel.outputs.digest }} > /tmp/digest.txt
+
+      - name: Upload artifact
+        uses: actions/upload-artifact@v3
+        with:
+          name: digest_${{ matrix.name }}
+          path: /tmp/digest.txt
+
+  manifests:
+    name: Build manifests
+    needs: [docker_build]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Get lowercase string for the repository name
+        id: lowercase-repo-name
+        uses: ASzc/change-string-case-action@v2
+        with:
+          string: ${{ github.event.repository.name }}
+
+      - name: Checkout base
+        uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+
+      # https://github.com/docker/setup-qemu-action
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v2
+
+      # https://github.com/docker/setup-buildx-action
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+        with:
+          config-inline: |
+            [worker.oci]
+              max-parallelism = 1
+
+      - name: Download artifact
+        uses: actions/download-artifact@v4
+        with:
+          path: /tmp/images/
+
+      - name: Docker login
+        uses: docker/login-action@v2
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Replace tag without `v`
+        if: startsWith(github.ref, 'refs/tags/')
+        uses: actions/github-script@v1
+        id: version
+        with:
+          script: |
+            return context.payload.ref.replace(/\/?refs\/tags\/v/, '')
+          result-encoding: string
+
+      - name: Merge and push manifest on master branch
+        if: github.ref == 'refs/heads/master'
+        run: python scripts/merge_manifest.py "${{ secrets.DOCKER_USERNAME }}/${{ steps.lowercase-repo-name.outputs.lowercase }}"
+
+      - name: Merge and push manifest on release
+        if: startsWith(github.ref, 'refs/tags/')
+        run: python scripts/merge_manifest.py "${{ secrets.DOCKER_USERNAME }}/${{ steps.lowercase-repo-name.outputs.lowercase }}" ${{steps.version.outputs.result}}
--- a/.github/workflows/pre-release.yml
+++ b/.github/workflows/pre-release.yml
@@ -0,0 +1,117 @@
+name: pre-release
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - master
+    paths:
+      - "backend-python/**"
+    tags-ignore:
+      - "v*"
+
+jobs:
+  windows:
+    runs-on: windows-2022
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: master
+      - uses: actions/setup-go@v5
+        with:
+          go-version: '1.20.5'
+      - uses: actions/setup-python@v5
+        id: cp310
+        with:
+          python-version: '3.10'
+      - uses: crazy-max/ghaction-chocolatey@v3
+        with:
+          args: install upx
+      - run: |
+          Start-BitsTransfer https://github.com/josStorer/ai00_rwkv_server/releases/latest/download/webgpu_server_windows_x86_64.exe ./backend-rust/webgpu_server.exe
+          Start-BitsTransfer https://github.com/josStorer/web-rwkv-converter/releases/latest/download/web-rwkv-converter_windows_x86_64.exe ./backend-rust/web-rwkv-converter.exe
+          Start-BitsTransfer https://github.com/josStorer/LibreHardwareMonitor.Console/releases/latest/download/LibreHardwareMonitor.Console.zip ./LibreHardwareMonitor.Console.zip
+          Expand-Archive ./LibreHardwareMonitor.Console.zip -DestinationPath ./components/LibreHardwareMonitor.Console
+          Start-BitsTransfer https://www.python.org/ftp/python/3.10.11/python-3.10.11-embed-amd64.zip ./python-3.10.11-embed-amd64.zip
+          Expand-Archive ./python-3.10.11-embed-amd64.zip -DestinationPath ./py310
+          $content=Get-Content "./py310/python310._pth"; $content | ForEach-Object {if ($_.ReadCount -eq 3) {"Lib\\site-packages"} else {$_}} | Set-Content ./py310/python310._pth
+          ./py310/python ./backend-python/get-pip.py
+          ./py310/python -m pip install Cython==3.0.4
+          Copy-Item -Path "${{ steps.cp310.outputs.python-path }}/../include" -Destination "py310/include" -Recurse
+          Copy-Item -Path "${{ steps.cp310.outputs.python-path }}/../libs" -Destination "py310/libs" -Recurse
+          ./py310/python -m pip install cyac==1.9
+          go install github.com/wailsapp/wails/v2/cmd/wails@latest
+          del ./backend-python/rwkv_pip/cpp/librwkv.dylib
+          del ./backend-python/rwkv_pip/cpp/librwkv.so
+          (Get-Content -Path ./backend-golang/app.go) -replace "//go:custom_build windows ", "" | Set-Content -Path ./backend-golang/app.go
+          (Get-Content -Path ./backend-golang/utils.go) -replace "//go:custom_build windows ", "" | Set-Content -Path ./backend-golang/utils.go
+          make
+          Rename-Item -Path "build/bin/RWKV-Runner.exe" -NewName "RWKV-Runner_windows_x64.exe"
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: RWKV-Runner_windows_x64.exe
+          path: build/bin/RWKV-Runner_windows_x64.exe
+
+  linux:
+    runs-on: ubuntu-20.04
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: master
+      - uses: actions/setup-go@v5
+        with:
+          go-version: '1.20.5'
+      - run: |
+          wget https://github.com/josStorer/ai00_rwkv_server/releases/latest/download/webgpu_server_linux_x86_64 -O ./backend-rust/webgpu_server
+          wget https://github.com/josStorer/web-rwkv-converter/releases/latest/download/web-rwkv-converter_linux_x86_64 -O ./backend-rust/web-rwkv-converter
+          sudo apt-get update
+          sudo apt-get install upx
+          sudo apt-get install build-essential libgtk-3-dev libwebkit2gtk-4.0-dev libasound2-dev
+          go install github.com/wailsapp/wails/v2/cmd/wails@latest
+          rm ./backend-python/rwkv_pip/wkv_cuda.pyd
+          rm ./backend-python/rwkv_pip/rwkv5.pyd
+          rm ./backend-python/rwkv_pip/rwkv6.pyd
+          rm ./backend-python/rwkv_pip/beta/wkv_cuda.pyd
+          rm ./backend-python/get-pip.py
+          rm ./backend-python/rwkv_pip/cpp/librwkv.dylib
+          rm ./backend-python/rwkv_pip/cpp/rwkv.dll
+          rm ./backend-python/rwkv_pip/webgpu/web_rwkv_py.cp310-win_amd64.pyd
+          make
+          mv build/bin/RWKV-Runner build/bin/RWKV-Runner_linux_x64
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: RWKV-Runner_linux_x64
+          path: build/bin/RWKV-Runner_linux_x64
+
+  macos:
+    runs-on: macos-13
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: master
+      - uses: actions/setup-go@v5
+        with:
+          go-version: '1.20.5'
+      - run: |
+          wget https://github.com/josStorer/ai00_rwkv_server/releases/latest/download/webgpu_server_darwin_aarch64 -O ./backend-rust/webgpu_server
+          wget https://github.com/josStorer/web-rwkv-converter/releases/latest/download/web-rwkv-converter_darwin_aarch64 -O ./backend-rust/web-rwkv-converter
+          go install github.com/wailsapp/wails/v2/cmd/wails@latest
+          rm ./backend-python/rwkv_pip/wkv_cuda.pyd
+          rm ./backend-python/rwkv_pip/rwkv5.pyd
+          rm ./backend-python/rwkv_pip/rwkv6.pyd
+          rm ./backend-python/rwkv_pip/beta/wkv_cuda.pyd
+          rm ./backend-python/get-pip.py
+          rm ./backend-python/rwkv_pip/cpp/rwkv.dll
+          rm ./backend-python/rwkv_pip/cpp/librwkv.so
+          rm ./backend-python/rwkv_pip/webgpu/web_rwkv_py.cp310-win_amd64.pyd
+          make
+          cp build/darwin/Readme_Install.txt build/bin/Readme_Install.txt
+          cp build/bin/RWKV-Runner.app/Contents/MacOS/RWKV-Runner build/bin/RWKV-Runner_darwin_universal
+          cd build/bin && zip -r RWKV-Runner_macos_universal.zip RWKV-Runner.app Readme_Install.txt
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: RWKV-Runner_macos_universal.zip
+          path: build/bin/RWKV-Runner_macos_universal.zip
+
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -14,7 +14,7 @@ jobs:
    runs-on: ubuntu-22.04
    steps:
      - run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          ref: master

@@ -38,17 +38,17 @@ jobs:
    runs-on: windows-2022
    needs: create-draft
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          ref: master
-      - uses: actions/setup-go@v4
+      - uses: actions/setup-go@v5
        with:
          go-version: '1.20.5'
-      - uses: actions/setup-python@v4
+      - uses: actions/setup-python@v5
        id: cp310
        with:
          python-version: '3.10'
-      - uses: crazy-max/ghaction-chocolatey@v2
+      - uses: crazy-max/ghaction-chocolatey@v3
        with:
          args: install upx
      - run: |
@@ -78,10 +78,10 @@ jobs:
    runs-on: ubuntu-20.04
    needs: create-draft
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          ref: master
-      - uses: actions/setup-go@v4
+      - uses: actions/setup-go@v5
        with:
          go-version: '1.20.5'
      - run: |
@@ -98,6 +98,7 @@ jobs:
          rm ./backend-python/get-pip.py
          rm ./backend-python/rwkv_pip/cpp/librwkv.dylib
          rm ./backend-python/rwkv_pip/cpp/rwkv.dll
+          rm ./backend-python/rwkv_pip/webgpu/web_rwkv_py.cp310-win_amd64.pyd
          make
          mv build/bin/RWKV-Runner build/bin/RWKV-Runner_linux_x64

@@ -107,10 +108,10 @@ jobs:
    runs-on: macos-13
    needs: create-draft
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          ref: master
-      - uses: actions/setup-go@v4
+      - uses: actions/setup-go@v5
        with:
          go-version: '1.20.5'
      - run: |
@@ -124,6 +125,7 @@ jobs:
          rm ./backend-python/get-pip.py
          rm ./backend-python/rwkv_pip/cpp/rwkv.dll
          rm ./backend-python/rwkv_pip/cpp/librwkv.so
+          rm ./backend-python/rwkv_pip/webgpu/web_rwkv_py.cp310-win_amd64.pyd
          make
          cp build/darwin/Readme_Install.txt build/bin/Readme_Install.txt
          cp build/bin/RWKV-Runner.app/Contents/MacOS/RWKV-Runner build/bin/RWKV-Runner_darwin_universal
@@ -135,5 +137,5 @@ jobs:
    runs-on: ubuntu-22.04
    needs: [ windows, linux, macos ]
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
      - run: gh release edit ${{github.ref_name}} --draft=false
--- a/.gitignore
+++ b/.gitignore
@@ -19,7 +19,6 @@ __pycache__
 /cmd-helper.bat
 /install-py-dep.bat
 /backend-python/wkv_cuda
-/backend-python/rwkv*
 *.exe
 *.old
 .DS_Store
--- a/CURRENT_CHANGE.md
+++ b/CURRENT_CHANGE.md
@@ -1,13 +1,30 @@
 ## Changes

- rwkv.cpp(ggml) support
- allow playing mid with external player
- allow overriding Core API URL
- chore
+### Features
+
+- add Docker support (#291) @LonghronShen
+
+### Fixes
+
+- fix a generation exception caused by potentially dangerous regex being passed into the stop array
+- fix max_tokens parameter of Chat page not being passed to backend
+- fix the issue where penalty_decay and global_penalty are not being passed to the backend default config when running
+  the model through client
+
+### Improvements
+
+- prevent 'torch' has no attribute 'cuda' error in torch_gc, so user can use CPU or WebGPU (#302)
+
+### Chores
+
+- bump dependencies
+- add pre-release workflow
+- dep_check.py now ignores GPUtil

 ## Install

 - Windows: https://github.com/josStorer/RWKV-Runner/blob/master/build/windows/Readme_Install.txt
 - MacOS: https://github.com/josStorer/RWKV-Runner/blob/master/build/darwin/Readme_Install.txt
 - Linux: https://github.com/josStorer/RWKV-Runner/blob/master/build/linux/Readme_Install.txt
- Server-Deploy-Examples: https://github.com/josStorer/RWKV-Runner/tree/master/deploy-examples
+- Simple Deploy Example: https://github.com/josStorer/RWKV-Runner/blob/master/README.md#simple-deploy-example
+- Server Deploy Examples: https://github.com/josStorer/RWKV-Runner/tree/master/deploy-examples
--- a/55
+++ b/55
@@ -0,0 +1,55 @@
+FROM node:21-slim AS frontend
+
+RUN echo "registry=https://registry.npmmirror.com/" > ~/.npmrc
+
+WORKDIR /app
+
+COPY manifest.json manifest.json
+COPY frontend frontend
+
+WORKDIR /app/frontend
+
+RUN npm ci
+RUN npm run build
+
+FROM nvidia/cuda:11.6.1-devel-ubuntu20.04 AS runtime
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt update && \
+    apt install -yq git curl wget build-essential ninja-build aria2 jq software-properties-common
+
+RUN add-apt-repository -y ppa:deadsnakes/ppa && \
+    add-apt-repository -y ppa:ubuntu-toolchain-r/test && \
+    apt install -y g++-11 python3.10 python3.10-distutils python3.10-dev && \
+    curl -sS http://mirrors.aliyun.com/pypi/get-pip.py | python3.10
+
+RUN python3.10 -m pip install cmake
+
+FROM runtime AS librwkv
+
+WORKDIR /app
+
+RUN git clone https://github.com/RWKV/rwkv.cpp.git && \
+    cd rwkv.cpp && \
+    git submodule update --init --recursive && \
+    mkdir -p build && \
+    cd build && \
+    cmake -G Ninja .. && \
+    cmake --build .
+
+FROM runtime AS final
+
+WORKDIR /app
+
+COPY ./backend-python/requirements.txt ./backend-python/requirements.txt
+
+RUN python3.10 -m pip install --quiet -r ./backend-python/requirements.txt
+
+COPY . .
+COPY --from=frontend /app/frontend/dist /app/frontend/dist
+COPY --from=librwkv /app/rwkv.cpp/build/librwkv.so /app/backend-python/rwkv_pip/cpp/librwkv.so
+
+EXPOSE 27777
+
+CMD ["python3.10", "./backend-python/main.py", "--port", "27777", "--host", "0.0.0.0", "--webui"]
--- a/6
+++ b/6
@@ -8,7 +8,8 @@ endif

 build-windows:
 	@echo ---- build for windows
-	wails build -upx -ldflags '-s -w -extldflags "-static"' -platform windows/amd64
+	wails build -ldflags '-s -w -extldflags "-static"' -platform windows/amd64
+	upx -9 --lzma ./build/bin/RWKV-Runner.exe

 build-macos:
 	@echo ---- build for macos
@@ -16,7 +17,8 @@ build-macos:

 build-linux:
 	@echo ---- build for linux
-	wails build -upx -ldflags '-s -w' -platform linux/amd64
+	wails build -ldflags '-s -w' -platform linux/amd64
+	upx -9 --lzma ./build/bin/RWKV-Runner

 build-web:
 	@echo ---- build for web
--- a/README.md
+++ b/README.md
@@ -12,6 +12,7 @@ compatible with the OpenAI API, which means that every ChatGPT client is an RWKV

 [![license][license-image]][license-url]
 [![release][release-image]][release-url]
+[![py-version][py-version-image]][py-version-url]

 English | [简体中文](README_ZH.md) | [日本語](README_JA.md)

@@ -31,6 +32,10 @@ English | [简体中文](README_ZH.md) | [日本語](README_JA.md)

 [release-url]: https://github.com/josStorer/RWKV-Runner/releases/latest

+[py-version-image]: https://img.shields.io/pypi/pyversions/fastapi.svg
+
+[py-version-url]: https://github.com/josStorer/RWKV-Runner/tree/master/backend-python
+
 [download-url]: https://github.com/josStorer/RWKV-Runner/releases

 [Windows-image]: https://img.shields.io/badge/-Windows-blue?logo=windows
@@ -47,13 +52,28 @@ English | [简体中文](README_ZH.md) | [日本語](README_JA.md)

 </div>

-#### Tip: You can deploy [backend-python](./backend-python/) on a server and use this program as a client only. Fill in your server address in the Settings `API URL`.
+## Tips

-#### Default configs has enabled custom CUDA kernel acceleration, which is much faster and consumes much less VRAM. If you encounter possible compatibility issues (output garbled), go to the Configs page and turn off `Use Custom CUDA kernel to Accelerate`, or try to upgrade your gpu driver.
+- You can deploy [backend-python](./backend-python/) on a server and use this program as a client only. Fill in
+  your server address in the Settings `API URL`.

-#### If Windows Defender claims this is a virus, you can try downloading [v1.3.7_win.zip](https://github.com/josStorer/RWKV-Runner/releases/download/v1.3.7/RWKV-Runner_win.zip) and letting it update automatically to the latest version, or add it to the trusted list (`Windows Security` -> `Virus & threat protection` -> `Manage settings` -> `Exclusions` -> `Add or remove exclusions` -> `Add an exclusion` -> `Folder` -> `RWKV-Runner`).
+- If you are deploying and providing public services, please limit the request size through API gateway to prevent
+  excessive resource usage caused by submitting overly long prompts. Additionally, please restrict the upper limit of
+  requests' max_tokens based on your actual
+  situation: https://github.com/josStorer/RWKV-Runner/blob/master/backend-python/utils/rwkv.py#L567, the default is set
+  as le=102400, which may result in significant resource consumption for individual responses in extreme cases.

-#### For different tasks, adjusting API parameters can achieve better results. For example, for translation tasks, you can try setting Temperature to 1 and Top_P to 0.3.
+- Default configs has enabled custom CUDA kernel acceleration, which is much faster and consumes much less VRAM. If you
+  encounter possible compatibility issues (output garbled), go to the Configs page and turn
+  off `Use Custom CUDA kernel to Accelerate`, or try to upgrade your gpu driver.
+
+- If Windows Defender claims this is a virus, you can try
+  downloading [v1.3.7_win.zip](https://github.com/josStorer/RWKV-Runner/releases/download/v1.3.7/RWKV-Runner_win.zip)
+  and letting it update automatically to the latest version, or add it to the trusted
+  list (`Windows Security` -> `Virus & threat protection` -> `Manage settings` -> `Exclusions` -> `Add or remove exclusions` -> `Add an exclusion` -> `Folder` -> `RWKV-Runner`).
+
+- For different tasks, adjusting API parameters can achieve better results. For example, for translation tasks, you can
+  try setting Temperature to 1 and Top_P to 0.3.

 ## Features

@@ -168,6 +188,10 @@ Tip: You can download https://github.com/josStorer/sgm_plus and unzip it to the
 to use it as an offline sound source. Please note that if you are compiling the program from source code, do not place
 it in the source code directory.

+If you don't have a MIDI keyboard, you can use virtual MIDI input software like `Virtual Midi Controller 3 LE`, along
+with [loopMIDI](https://www.tobias-erichsen.de/wp-content/uploads/2020/01/loopMIDISetup_1_0_16_27.zip), to use a regular
+computer keyboard as MIDI input.
+
 ### USB MIDI Connection

 - USB MIDI devices are plug-and-play, and you can select your input device in the Composition page
@@ -206,12 +230,18 @@ it in the source code directory.

 ## Related Repositories:

+- RWKV-5-World: https://huggingface.co/BlinkDL/rwkv-5-world/tree/main
 - RWKV-4-World: https://huggingface.co/BlinkDL/rwkv-4-world/tree/main
 - RWKV-4-Raven: https://huggingface.co/BlinkDL/rwkv-4-raven/tree/main
 - ChatRWKV: https://github.com/BlinkDL/ChatRWKV
 - RWKV-LM: https://github.com/BlinkDL/RWKV-LM
 - RWKV-LM-LoRA: https://github.com/Blealtan/RWKV-LM-LoRA
+- RWKV-v5-lora: https://github.com/JL-er/RWKV-v5-lora
 - MIDI-LLM-tokenizer: https://github.com/briansemrau/MIDI-LLM-tokenizer
+- ai00_rwkv_server: https://github.com/cgisky1980/ai00_rwkv_server
+- rwkv.cpp: https://github.com/saharNooby/rwkv.cpp
+- web-rwkv-py: https://github.com/cryscan/web-rwkv-py
+- web-rwkv: https://github.com/cryscan/web-rwkv

 ## Preview

--- a/README_JA.md
+++ b/README_JA.md
@@ -12,6 +12,7 @@

 [![license][license-image]][license-url]
 [![release][release-image]][release-url]
+[![py-version][py-version-image]][py-version-url]

 [English](README.md) | [简体中文](README_ZH.md) | 日本語

@@ -31,6 +32,10 @@

 [release-url]: https://github.com/josStorer/RWKV-Runner/releases/latest

+[py-version-image]: https://img.shields.io/pypi/pyversions/fastapi.svg
+
+[py-version-url]: https://github.com/josStorer/RWKV-Runner/tree/master/backend-python
+
 [download-url]: https://github.com/josStorer/RWKV-Runner/releases

 [Windows-image]: https://img.shields.io/badge/-Windows-blue?logo=windows
@@ -47,13 +52,26 @@

 </div>

-#### ヒント：サーバーに[backend-python](./backend-python/)をデプロイし、このプログラムをクライアントとして使用することができます。設定された`API URL`にサーバーアドレスを入力してください。
+## ヒント

-#### デフォルトの設定はカスタム CUDA カーネルアクセラレーションを有効にしています。互換性の問題 (文字化けを出力する) が発生する可能性がある場合は、コンフィグページに移動し、`Use Custom CUDA kernel to Accelerate` をオフにしてください、あるいは、GPUドライバーをアップグレードしてみてください。
+- サーバーに [backend-python](./backend-python/)
+  をデプロイし、このプログラムをクライアントとして使用することができます。設定された`API URL`にサーバーアドレスを入力してください。

-#### Windows Defender がこれをウイルスだと主張する場合は、[v1.3.7_win.zip](https://github.com/josStorer/RWKV-Runner/releases/download/v1.3.7/RWKV-Runner_win.zip) をダウンロードして最新版に自動更新させるか、信頼済みリストに追加してみてください (`Windows Security` -> `Virus & threat protection` -> `Manage settings` -> `Exclusions` -> `Add or remove exclusions` -> `Add an exclusion` -> `Folder` -> `RWKV-Runner`)。
+- もし、あなたがデプロイし、外部に公開するサービスを提供している場合、APIゲートウェイを使用してリクエストのサイズを制限し、
+  長すぎるプロンプトの提出がリソースを占有しないようにしてください。さらに、実際の状況に応じて、リクエストの max_tokens
+  の上限を制限してください：https://github.com/josStorer/RWKV-Runner/blob/master/backend-python/utils/rwkv.py#L567
+  、デフォルトは le=102400 ですが、極端な場合には単一の応答が大量のリソースを消費する可能性があります。

-#### 異なるタスクについては、API パラメータを調整することで、より良い結果を得ることができます。例えば、翻訳タスクの場合、Temperature を 1 に、Top_P を 0.3 に設定してみてください。
+- デフォルトの設定はカスタム CUDA カーネルアクセラレーションを有効にしています。互換性の問題 (文字化けを出力する)
+  が発生する可能性がある場合は、コンフィグページに移動し、`Use Custom CUDA kernel to Accelerate`
+  をオフにしてください、あるいは、GPUドライバーをアップグレードしてみてください。
+
+- Windows Defender
+  がこれをウイルスだと主張する場合は、[v1.3.7_win.zip](https://github.com/josStorer/RWKV-Runner/releases/download/v1.3.7/RWKV-Runner_win.zip)
+  をダウンロードして最新版に自動更新させるか、信頼済みリストに追加してみてください (`Windows Security` -> `Virus & threat protection` -> `Manage settings` -> `Exclusions` -> `Add or remove exclusions` -> `Add an exclusion` -> `Folder` -> `RWKV-Runner`)。
+
+- 異なるタスクについては、API パラメータを調整することで、より良い結果を得ることができます。例えば、翻訳タスクの場合、Temperature
+  を 1 に、Top_P を 0.3 に設定してみてください。

 ## 特徴

@@ -167,6 +185,10 @@ Tip: You can download https://github.com/josStorer/sgm_plus and unzip it to the
 to use it as an offline sound source. Please note that if you are compiling the program from source code, do not place
 it in the source code directory.

+MIDIキーボードをお持ちでない場合、`Virtual Midi Controller 3 LE`
+などの仮想MIDI入力ソフトウェアを使用することができます。[loopMIDI](https://www.tobias-erichsen.de/wp-content/uploads/2020/01/loopMIDISetup_1_0_16_27.zip)
+を組み合わせて、通常のコンピュータキーボードをMIDI入力として使用できます。
+
 ### USB MIDI Connection

 - USB MIDI devices are plug-and-play, and you can select your input device in the Composition page
@@ -205,12 +227,18 @@ it in the source code directory.

 ## 関連リポジトリ:

+- RWKV-5-World: https://huggingface.co/BlinkDL/rwkv-5-world/tree/main
 - RWKV-4-World: https://huggingface.co/BlinkDL/rwkv-4-world/tree/main
 - RWKV-4-Raven: https://huggingface.co/BlinkDL/rwkv-4-raven/tree/main
 - ChatRWKV: https://github.com/BlinkDL/ChatRWKV
 - RWKV-LM: https://github.com/BlinkDL/RWKV-LM
 - RWKV-LM-LoRA: https://github.com/Blealtan/RWKV-LM-LoRA
+- RWKV-v5-lora: https://github.com/JL-er/RWKV-v5-lora
 - MIDI-LLM-tokenizer: https://github.com/briansemrau/MIDI-LLM-tokenizer
+- ai00_rwkv_server: https://github.com/cgisky1980/ai00_rwkv_server
+- rwkv.cpp: https://github.com/saharNooby/rwkv.cpp
+- web-rwkv-py: https://github.com/cryscan/web-rwkv-py
+- web-rwkv: https://github.com/cryscan/web-rwkv

 ## Preview

--- a/README_ZH.md
+++ b/README_ZH.md
@@ -11,6 +11,7 @@ API兼容的接口，这意味着一切ChatGPT客户端都是RWKV客户端。

 [![license][license-image]][license-url]
 [![release][release-image]][release-url]
+[![py-version][py-version-image]][py-version-url]

 [English](README.md) | 简体中文 | [日本語](README_JA.md)

@@ -30,6 +31,10 @@ API兼容的接口，这意味着一切ChatGPT客户端都是RWKV客户端。

 [release-url]: https://github.com/josStorer/RWKV-Runner/releases/latest

+[py-version-image]: https://img.shields.io/pypi/pyversions/fastapi.svg
+
+[py-version-url]: https://github.com/josStorer/RWKV-Runner/tree/master/backend-python
+
 [download-url]: https://github.com/josStorer/RWKV-Runner/releases

 [Windows-image]: https://img.shields.io/badge/-Windows-blue?logo=windows
@@ -46,13 +51,22 @@ API兼容的接口，这意味着一切ChatGPT客户端都是RWKV客户端。

 </div>

-#### 小贴士：你可以在服务器部署[backend-python](./backend-python/)，然后将此程序仅用作客户端，在设置的`API URL`中填入你的服务器地址
+## 小贴士

-#### 预设配置已经开启自定义CUDA算子加速，速度更快，且显存消耗更少。如果你遇到可能的兼容性(输出乱码)问题，前往配置页面，关闭`使用自定义CUDA算子加速`，或更新你的显卡驱动
+- 你可以在服务器部署[backend-python](./backend-python/)，然后将此程序仅用作客户端，在设置的`API URL`中填入你的服务器地址

-#### 如果Windows Defender说这是一个病毒，你可以尝试下载[v1.3.7_win.zip](https://github.com/josStorer/RWKV-Runner/releases/download/v1.3.7/RWKV-Runner_win.zip)，然后让其自动更新到最新版，或添加信任 (`Windows Security` -> `Virus & threat protection` -> `Manage settings` -> `Exclusions` -> `Add or remove exclusions` -> `Add an exclusion` -> `Folder` -> `RWKV-Runner`)
+- 如果你正在部署并对外提供公开服务，请通过API网关限制请求大小，避免过长的prompt提交占用资源。此外，请根据你的实际情况，限制请求的
+  max_tokens 上限: https://github.com/josStorer/RWKV-Runner/blob/master/backend-python/utils/rwkv.py#L567,
+  默认le=102400, 这可能导致极端情况下单个响应消耗大量资源

-#### 对于不同的任务，调整API参数会获得更好的效果，例如对于翻译任务，你可以尝试设置Temperature为1，Top_P为0.3
+- 预设配置已经开启自定义CUDA算子加速，速度更快，且显存消耗更少。如果你遇到可能的兼容性(输出乱码)
+  问题，前往配置页面，关闭`使用自定义CUDA算子加速`，或更新你的显卡驱动
+
+- 如果 Windows Defender
+  说这是一个病毒，你可以尝试下载[v1.3.7_win.zip](https://github.com/josStorer/RWKV-Runner/releases/download/v1.3.7/RWKV-Runner_win.zip)，
+  然后让其自动更新到最新版，或添加信任 (`Windows Security` -> `Virus & threat protection` -> `Manage settings` -> `Exclusions` -> `Add or remove exclusions` -> `Add an exclusion` -> `Folder` -> `RWKV-Runner`)
+
+- 对于不同的任务，调整API参数会获得更好的效果，例如对于翻译任务，你可以尝试设置Temperature为1，Top_P为0.3

 ## 功能

@@ -161,6 +175,9 @@ for i in np.argsort(embeddings_cos_sim)[::-1]:
 小贴士: 你可以下载 https://github.com/josStorer/sgm_plus, 并解压到程序的`assets/sound-font`目录, 以使用离线音源. 注意,
 如果你正在从源码编译程序, 请不要将其放置在源码目录中

+如果你没有MIDI键盘, 你可以使用像 `Virtual Midi Controller 3 LE` 这样的虚拟MIDI输入软件,
+配合[loopMIDI](https://www.tobias-erichsen.de/wp-content/uploads/2020/01/loopMIDISetup_1_0_16_27.zip), 使用普通电脑键盘作为MIDI输入
+
 ### USB MIDI 连接

 - USB MIDI设备是即插即用的, 你能够在作曲页面选择你的输入设备
@@ -192,12 +209,18 @@ for i in np.argsort(embeddings_cos_sim)[::-1]:

 ## 相关仓库:

+- RWKV-5-World: https://huggingface.co/BlinkDL/rwkv-5-world/tree/main
 - RWKV-4-World: https://huggingface.co/BlinkDL/rwkv-4-world/tree/main
 - RWKV-4-Raven: https://huggingface.co/BlinkDL/rwkv-4-raven/tree/main
 - ChatRWKV: https://github.com/BlinkDL/ChatRWKV
 - RWKV-LM: https://github.com/BlinkDL/RWKV-LM
 - RWKV-LM-LoRA: https://github.com/Blealtan/RWKV-LM-LoRA
+- RWKV-v5-lora: https://github.com/JL-er/RWKV-v5-lora
 - MIDI-LLM-tokenizer: https://github.com/briansemrau/MIDI-LLM-tokenizer
+- ai00_rwkv_server: https://github.com/cgisky1980/ai00_rwkv_server
+- rwkv.cpp: https://github.com/saharNooby/rwkv.cpp
+- web-rwkv-py: https://github.com/cryscan/web-rwkv-py
+- web-rwkv: https://github.com/cryscan/web-rwkv

 ## Preview

--- a/backend-golang/app.go
+++ b/backend-golang/app.go
@@ -1,7 +1,9 @@
 package backend_golang

 import (
+	"archive/zip"
 	"bufio"
+	"bytes"
 	"context"
 	"errors"
 	"io"
@@ -10,6 +12,7 @@ import (
 	"os/exec"
 	"path/filepath"
 	"runtime"
+	"strings"
 	"syscall"
 	"time"

@@ -23,6 +26,7 @@ type App struct {
 	ctx           context.Context
 	HasConfigData bool
 	ConfigData    map[string]any
+	Dev           bool
 	exDir         string
 	cmdPrefix     string
 }
@@ -39,10 +43,20 @@ func (a *App) OnStartup(ctx context.Context) {
 	a.exDir = ""
 	a.cmdPrefix = ""

-	if runtime.GOOS == "darwin" {
-		ex, _ := os.Executable()
-		a.exDir = filepath.Dir(ex) + "/../../../"
-		a.cmdPrefix = "cd " + a.exDir + " && "
+	ex, err := os.Executable()
+	if err == nil {
+		if runtime.GOOS == "darwin" {
+			a.exDir = filepath.Dir(ex) + "/../../../"
+			a.cmdPrefix = "cd " + a.exDir + " && "
+		} else {
+			a.exDir = filepath.Dir(ex) + "/"
+			a.cmdPrefix = "cd " + a.exDir + " && "
+		}
+		if a.Dev {
+			a.exDir = ""
+		} else {
+			os.Chdir(a.exDir)
+		}
 	}

 	os.Chmod(a.exDir+"backend-rust/webgpu_server", 0777)
@@ -50,9 +64,12 @@ func (a *App) OnStartup(ctx context.Context) {
 	os.Mkdir(a.exDir+"models", os.ModePerm)
 	os.Mkdir(a.exDir+"lora-models", os.ModePerm)
 	os.Mkdir(a.exDir+"finetune/json2binidx_tool/data", os.ModePerm)
-	f, err := os.Create(a.exDir + "lora-models/train_log.txt")
-	if err == nil {
-		f.Close()
+	trainLogPath := "lora-models/train_log.txt"
+	if !a.FileExists(trainLogPath) {
+		f, err := os.Create(a.exDir + trainLogPath)
+		if err == nil {
+			f.Close()
+		}
 	}

 	a.downloadLoop()
@@ -146,6 +163,7 @@ func (a *App) UpdateApp(url string) (broken bool, err error) {
 	ticker := time.NewTicker(250 * time.Millisecond)
 	defer ticker.Stop()

+	// update progress
 	go func() {
 		for {
 			<-ticker.C
@@ -165,13 +183,35 @@ func (a *App) UpdateApp(url string) (broken bool, err error) {
 			}
 		}
 	}()
-	err = selfupdate.Apply(pr, selfupdate.Options{})
+
+	var updateFile io.Reader = pr
+	// extract macos binary from zip
+	if strings.HasSuffix(url, ".zip") && runtime.GOOS == "darwin" {
+		zipBytes, err := io.ReadAll(pr)
+		if err != nil {
+			return false, err
+		}
+		archive, err := zip.NewReader(bytes.NewReader(zipBytes), int64(len(zipBytes)))
+		if err != nil {
+			return false, err
+		}
+		file, err := archive.Open("RWKV-Runner.app/Contents/MacOS/RWKV-Runner")
+		if err != nil {
+			return false, err
+		}
+		defer file.Close()
+		updateFile = file
+	}
+
+	// apply update
+	err = selfupdate.Apply(updateFile, selfupdate.Options{})
 	if err != nil {
 		if rerr := selfupdate.RollbackError(err); rerr != nil {
 			return true, rerr
 		}
 		return false, err
 	}
+	// restart app
 	if runtime.GOOS == "windows" {
 		name, err := os.Executable()
 		if err != nil {
--- a/backend-golang/download.go
+++ b/backend-golang/download.go
@@ -10,7 +10,11 @@ import (
 )

 func (a *App) DownloadFile(path string, url string) error {
-	_, err := grab.Get(a.exDir+path, url)
+	absPath, err := a.GetAbsPath(path)
+	if err != nil {
+		return err
+	}
+	_, err = grab.Get(absPath, url)
 	if err != nil {
 		return err
 	}
@@ -88,11 +92,15 @@ func (a *App) ContinueDownload(url string) {
 }

 func (a *App) AddToDownloadList(path string, url string) {
-	if !existsInDownloadList(a.exDir+path, url) {
+	absPath, err := a.GetAbsPath(path)
+	if err != nil {
+		return
+	}
+	if !existsInDownloadList(absPath, url) {
 		downloadList = append(downloadList, &DownloadStatus{
 			resp:        nil,
 			Name:        filepath.Base(path),
-			Path:        a.exDir + path,
+			Path:        absPath,
 			Url:         url,
 			Downloading: false,
 		})
--- a/backend-golang/file.go
+++ b/backend-golang/file.go
@@ -14,27 +14,55 @@ import (
 	wruntime "github.com/wailsapp/wails/v2/pkg/runtime"
 )

+func (a *App) GetAbsPath(path string) (string, error) {
+	var absPath string
+	var err error
+	if filepath.IsAbs(path) {
+		absPath = filepath.Clean(path)
+	} else {
+		absPath, err = filepath.Abs(filepath.Join(a.exDir, path))
+		if err != nil {
+			return "", err
+		}
+	}
+	absPath = strings.ReplaceAll(absPath, "/", string(os.PathSeparator))
+	println("GetAbsPath:", absPath)
+	return absPath, nil
+}
+
 func (a *App) SaveFile(path string, savedContent []byte) error {
-	if err := os.WriteFile(a.exDir+path, savedContent, 0644); err != nil {
+	absPath, err := a.GetAbsPath(path)
+	if err != nil {
+		return err
+	}
+	if err := os.WriteFile(absPath, savedContent, 0644); err != nil {
 		return err
 	}
 	return nil
 }

-func (a *App) SaveJson(fileName string, jsonData any) error {
+func (a *App) SaveJson(path string, jsonData any) error {
 	text, err := json.MarshalIndent(jsonData, "", "  ")
 	if err != nil {
 		return err
 	}

-	if err := os.WriteFile(a.exDir+fileName, text, 0644); err != nil {
+	absPath, err := a.GetAbsPath(path)
+	if err != nil {
+		return err
+	}
+	if err := os.WriteFile(absPath, text, 0644); err != nil {
 		return err
 	}
 	return nil
 }

-func (a *App) ReadJson(fileName string) (any, error) {
-	file, err := os.ReadFile(a.exDir + fileName)
+func (a *App) ReadJson(path string) (any, error) {
+	absPath, err := a.GetAbsPath(path)
+	if err != nil {
+		return nil, err
+	}
+	file, err := os.ReadFile(absPath)
 	if err != nil {
 		return nil, err
 	}
@@ -48,8 +76,12 @@ func (a *App) ReadJson(fileName string) (any, error) {
 	return data, nil
 }

-func (a *App) FileExists(fileName string) bool {
-	_, err := os.Stat(a.exDir + fileName)
+func (a *App) FileExists(path string) bool {
+	absPath, err := a.GetAbsPath(path)
+	if err != nil {
+		return false
+	}
+	_, err = os.Stat(absPath)
 	return err == nil
 }

@@ -60,8 +92,12 @@ type FileInfo struct {
 	ModTime string `json:"modTime"`
 }

-func (a *App) ReadFileInfo(fileName string) (*FileInfo, error) {
-	info, err := os.Stat(a.exDir + fileName)
+func (a *App) ReadFileInfo(path string) (*FileInfo, error) {
+	absPath, err := a.GetAbsPath(path)
+	if err != nil {
+		return nil, err
+	}
+	info, err := os.Stat(absPath)
 	if err != nil {
 		return nil, err
 	}
@@ -74,7 +110,11 @@ func (a *App) ReadFileInfo(fileName string) (*FileInfo, error) {
 }

 func (a *App) ListDirFiles(dirPath string) ([]FileInfo, error) {
-	files, err := os.ReadDir(a.exDir + dirPath)
+	absDirPath, err := a.GetAbsPath(dirPath)
+	if err != nil {
+		return nil, err
+	}
+	files, err := os.ReadDir(absDirPath)
 	if err != nil {
 		return nil, err
 	}
@@ -96,7 +136,11 @@ func (a *App) ListDirFiles(dirPath string) ([]FileInfo, error) {
 }

 func (a *App) DeleteFile(path string) error {
-	err := os.Remove(a.exDir + path)
+	absPath, err := a.GetAbsPath(path)
+	if err != nil {
+		return err
+	}
+	err = os.Remove(absPath)
 	if err != nil {
 		return err
 	}
@@ -104,18 +148,27 @@ func (a *App) DeleteFile(path string) error {
 }

 func (a *App) CopyFile(src string, dst string) error {
-	sourceFile, err := os.Open(a.exDir + src)
+	absSrc, err := a.GetAbsPath(src)
+	if err != nil {
+		return err
+	}
+	absDst, err := a.GetAbsPath(dst)
+	if err != nil {
+		return err
+	}
+
+	sourceFile, err := os.Open(absSrc)
 	if err != nil {
 		return err
 	}
 	defer sourceFile.Close()

-	err = os.MkdirAll(a.exDir+dst[:strings.LastIndex(dst, "/")], 0755)
+	err = os.MkdirAll(filepath.Dir(absDst), 0755)
 	if err != nil {
 		return err
 	}

-	destFile, err := os.Create(a.exDir + dst)
+	destFile, err := os.Create(absDst)
 	if err != nil {
 		return err
 	}
@@ -166,14 +219,8 @@ func (a *App) OpenOpenFileDialog(filterPattern string) (string, error) {
 	return path, nil
 }

-func (a *App) OpenFileFolder(path string, relative bool) error {
-	var absPath string
-	var err error
-	if relative {
-		absPath, err = filepath.Abs(a.exDir + path)
-	} else {
-		absPath, err = filepath.Abs(path)
-	}
+func (a *App) OpenFileFolder(path string) error {
+	absPath, err := a.GetAbsPath(path)
 	if err != nil {
 		return err
 	}
@@ -204,6 +251,10 @@ func (a *App) OpenFileFolder(path string, relative bool) error {
 }

 func (a *App) StartFile(path string) error {
-	_, err := CmdHelper(path)
+	cmd, err := CmdHelper(true, path)
+	if err != nil {
+		return err
+	}
+	err = cmd.Start()
 	return err
 }
--- a/backend-golang/rwkv.go
+++ b/backend-golang/rwkv.go
@@ -1,3 +1,4 @@
+// Considering some whitespace and multilingual support, the functions in rwkv.go should always be executed with cwd as RWKV-Runner, and never use a.GetAbsPath() here.
 package backend_golang

 import (
@@ -10,15 +11,19 @@ import (
 	"strings"
 )

-func (a *App) StartServer(python string, port int, host string, webui bool, rwkvBeta bool, rwkvcpp bool) (string, error) {
-	var err error
+func (a *App) StartServer(python string, port int, host string, webui bool, rwkvBeta bool, rwkvcpp bool, webgpu bool) (string, error) {
+	execFile := "./backend-python/main.py"
+	_, err := os.Stat(execFile)
+	if err != nil {
+		return "", err
+	}
 	if python == "" {
 		python, err = GetPython()
 	}
 	if err != nil {
 		return "", err
 	}
-	args := []string{python, "./backend-python/main.py"}
+	args := []string{python, execFile}
 	if webui {
 		args = append(args, "--webui")
 	}
@@ -28,35 +33,85 @@ func (a *App) StartServer(python string, port int, host string, webui bool, rwkv
 	if rwkvcpp {
 		args = append(args, "--rwkv.cpp")
 	}
+	if webgpu {
+		args = append(args, "--webgpu")
+	}
 	args = append(args, "--port", strconv.Itoa(port), "--host", host)
 	return Cmd(args...)
 }

 func (a *App) StartWebGPUServer(port int, host string) (string, error) {
-	args := []string{"./backend-rust/webgpu_server"}
+	var execFile string
+	execFiles := []string{"./backend-rust/webgpu_server", "./backend-rust/webgpu_server.exe"}
+	for _, file := range execFiles {
+		_, err := os.Stat(file)
+		if err == nil {
+			execFile = file
+			break
+		}
+	}
+	if execFile == "" {
+		return "", errors.New(execFiles[0] + " not found")
+	}
+	args := []string{execFile}
 	args = append(args, "--port", strconv.Itoa(port), "--ip", host)
 	return Cmd(args...)
 }

 func (a *App) ConvertModel(python string, modelPath string, strategy string, outPath string) (string, error) {
-	var err error
+	execFile := "./backend-python/convert_model.py"
+	_, err := os.Stat(execFile)
+	if err != nil {
+		return "", err
+	}
 	if python == "" {
 		python, err = GetPython()
 	}
 	if err != nil {
 		return "", err
 	}
-	return Cmd(python, "./backend-python/convert_model.py", "--in", modelPath, "--out", outPath, "--strategy", strategy)
+	return Cmd(python, execFile, "--in", modelPath, "--out", outPath, "--strategy", strategy)
 }

 func (a *App) ConvertSafetensors(modelPath string, outPath string) (string, error) {
-	args := []string{"./backend-rust/web-rwkv-converter"}
+	var execFile string
+	execFiles := []string{"./backend-rust/web-rwkv-converter", "./backend-rust/web-rwkv-converter.exe"}
+	for _, file := range execFiles {
+		_, err := os.Stat(file)
+		if err == nil {
+			execFile = file
+			break
+		}
+	}
+	if execFile == "" {
+		return "", errors.New(execFiles[0] + " not found")
+	}
+	args := []string{execFile}
 	args = append(args, "--input", modelPath, "--output", outPath)
 	return Cmd(args...)
 }

+func (a *App) ConvertSafetensorsWithPython(python string, modelPath string, outPath string) (string, error) {
+	execFile := "./backend-python/convert_safetensors.py"
+	_, err := os.Stat(execFile)
+	if err != nil {
+		return "", err
+	}
+	if python == "" {
+		python, err = GetPython()
+	}
+	if err != nil {
+		return "", err
+	}
+	return Cmd(python, execFile, "--input", modelPath, "--output", outPath)
+}
+
 func (a *App) ConvertGGML(python string, modelPath string, outPath string, Q51 bool) (string, error) {
-	var err error
+	execFile := "./backend-python/convert_pytorch_to_ggml.py"
+	_, err := os.Stat(execFile)
+	if err != nil {
+		return "", err
+	}
 	if python == "" {
 		python, err = GetPython()
 	}
@@ -67,11 +122,15 @@ func (a *App) ConvertGGML(python string, modelPath string, outPath string, Q51 b
 	if Q51 {
 		dataType = "Q5_1"
 	}
-	return Cmd(python, "./backend-python/convert_pytorch_to_ggml.py", modelPath, outPath, dataType)
+	return Cmd(python, execFile, modelPath, outPath, dataType)
 }

 func (a *App) ConvertData(python string, input string, outputPrefix string, vocab string) (string, error) {
-	var err error
+	execFile := "./finetune/json2binidx_tool/tools/preprocess_data.py"
+	_, err := os.Stat(execFile)
+	if err != nil {
+		return "", err
+	}
 	if python == "" {
 		python, err = GetPython()
 	}
@@ -115,19 +174,23 @@ func (a *App) ConvertData(python string, input string, outputPrefix string, voca
 		return "", err
 	}

-	return Cmd(python, "./finetune/json2binidx_tool/tools/preprocess_data.py", "--input", input, "--output-prefix", outputPrefix, "--vocab", vocab,
+	return Cmd(python, execFile, "--input", input, "--output-prefix", outputPrefix, "--vocab", vocab,
 		"--tokenizer-type", tokenizerType, "--dataset-impl", "mmap", "--append-eod")
 }

 func (a *App) MergeLora(python string, useGpu bool, loraAlpha int, baseModel string, loraPath string, outputPath string) (string, error) {
-	var err error
+	execFile := "./finetune/lora/merge_lora.py"
+	_, err := os.Stat(execFile)
+	if err != nil {
+		return "", err
+	}
 	if python == "" {
 		python, err = GetPython()
 	}
 	if err != nil {
 		return "", err
 	}
-	args := []string{python, "./finetune/lora/merge_lora.py"}
+	args := []string{python, execFile}
 	if useGpu {
 		args = append(args, "--use-gpu")
 	}
@@ -143,9 +206,9 @@ func (a *App) DepCheck(python string) error {
 	if err != nil {
 		return err
 	}
-	out, err := exec.Command(python, a.exDir+"./backend-python/dep_check.py").CombinedOutput()
+	out, err := exec.Command(python, a.exDir+"backend-python/dep_check.py").CombinedOutput()
 	if err != nil {
-		return errors.New("DepCheck Error: " + string(out))
+		return errors.New("DepCheck Error: " + string(out) + " GError: " + err.Error())
 	}
 	return nil
 }
@@ -171,7 +234,7 @@ func (a *App) InstallPyDep(python string, cnMirror bool) (string, error) {
 		if !cnMirror {
 			installScript = strings.Replace(installScript, " -i https://pypi.tuna.tsinghua.edu.cn/simple", "", -1)
 		}
-		err = os.WriteFile("./install-py-dep.bat", []byte(installScript), 0644)
+		err = os.WriteFile(a.exDir+"install-py-dep.bat", []byte(installScript), 0644)
 		if err != nil {
 			return "", err
 		}
--- a/backend-golang/utils.go
+++ b/backend-golang/utils.go
@@ -3,6 +3,7 @@ package backend_golang
 import (
 	"archive/zip"
 	"bufio"
+	"crypto/sha256"
 	"embed"
 	"errors"
 	"fmt"
@@ -18,18 +19,23 @@ import (
 	"syscall"
 )

-func CmdHelper(args ...string) (*exec.Cmd, error) {
+func CmdHelper(hideWindow bool, args ...string) (*exec.Cmd, error) {
 	if runtime.GOOS != "windows" {
 		return nil, errors.New("unsupported OS")
 	}
-	filename := "./cmd-helper.bat"
-	_, err := os.Stat(filename)
+	ex, err := os.Executable()
 	if err != nil {
-		if err := os.WriteFile(filename, []byte("start %*"), 0644); err != nil {
+		return nil, err
+	}
+	exDir := filepath.Dir(ex) + "/"
+	path := exDir + "cmd-helper.bat"
+	_, err = os.Stat(path)
+	if err != nil {
+		if err := os.WriteFile(path, []byte("start %*"), 0644); err != nil {
 			return nil, err
 		}
 	}
-	cmdHelper, err := filepath.Abs(filename)
+	cmdHelper, err := filepath.Abs(path)
 	if err != nil {
 		return nil, err
 	}
@@ -43,22 +49,21 @@ func CmdHelper(args ...string) (*exec.Cmd, error) {
 	}
 	cmd := exec.Command(cmdHelper, args...)
 	cmd.SysProcAttr = &syscall.SysProcAttr{}
-	//go:custom_build windows cmd.SysProcAttr.HideWindow = true
-	err = cmd.Start()
-	if err != nil {
-		return nil, err
-	}
+	//go:custom_build windows cmd.SysProcAttr.HideWindow = hideWindow
 	return cmd, nil
 }

 func Cmd(args ...string) (string, error) {
 	switch platform := runtime.GOOS; platform {
 	case "windows":
-		cmd, err := CmdHelper(args...)
+		cmd, err := CmdHelper(true, args...)
+		if err != nil {
+			return "", err
+		}
+		_, err = cmd.CombinedOutput()
 		if err != nil {
 			return "", err
 		}
-		cmd.Wait()
 		return "", nil
 	case "darwin":
 		ex, err := os.Executable()
@@ -86,16 +91,18 @@ func Cmd(args ...string) (string, error) {
 }

 func CopyEmbed(efs embed.FS) error {
-	prefix := ""
+	ex, err := os.Executable()
+	if err != nil {
+		return err
+	}
+	var prefix string
 	if runtime.GOOS == "darwin" {
-		ex, err := os.Executable()
-		if err != nil {
-			return err
-		}
 		prefix = filepath.Dir(ex) + "/../../../"
+	} else {
+		prefix = filepath.Dir(ex) + "/"
 	}

-	err := fs.WalkDir(efs, ".", func(path string, d fs.DirEntry, err error) error {
+	err = fs.WalkDir(efs, ".", func(path string, d fs.DirEntry, err error) error {
 		if d.IsDir() {
 			return nil
 		}
@@ -113,9 +120,19 @@ func CopyEmbed(efs embed.FS) error {
 			return err
 		}

-		err = os.WriteFile(path, content, 0644)
-		if err != nil {
-			return err
+		executeWrite := true
+		existedContent, err := os.ReadFile(path)
+		if err == nil {
+			if fmt.Sprintf("%x", sha256.Sum256(existedContent)) == fmt.Sprintf("%x", sha256.Sum256(content)) {
+				executeWrite = false
+			}
+		}
+
+		if executeWrite {
+			err = os.WriteFile(path, content, 0644)
+			if err != nil {
+				return err
+			}
 		}

 		return nil
@@ -126,13 +143,19 @@ func CopyEmbed(efs embed.FS) error {
 func GetPython() (string, error) {
 	switch platform := runtime.GOOS; platform {
 	case "windows":
-		_, err := os.Stat("py310/python.exe")
+		ex, err := os.Executable()
 		if err != nil {
-			_, err := os.Stat("python-3.10.11-embed-amd64.zip")
+			return "", err
+		}
+		exDir := filepath.Dir(ex) + "/"
+		pyexe := exDir + "py310/python.exe"
+		_, err = os.Stat(pyexe)
+		if err != nil {
+			_, err := os.Stat(exDir + "python-3.10.11-embed-amd64.zip")
 			if err != nil {
 				return "", errors.New("python zip not found")
 			} else {
-				err := Unzip("python-3.10.11-embed-amd64.zip", "py310")
+				err := Unzip(exDir+"python-3.10.11-embed-amd64.zip", exDir+"py310")
 				if err != nil {
 					return "", errors.New("failed to unzip python")
 				} else {
--- a/backend-golang/wsl_windows.go
+++ b/backend-golang/wsl_windows.go
@@ -9,7 +9,6 @@ import (
 	"io"
 	"os"
 	"os/exec"
-	"path/filepath"
 	"strings"
 	"time"

@@ -133,26 +132,20 @@ func (a *App) WslStop() error {
 }

 func (a *App) WslIsEnabled() error {
-	ex, err := os.Executable()
-	if err != nil {
-		return err
-	}
-	exDir := filepath.Dir(ex)
-
-	data, err := os.ReadFile(exDir + "/wsl.state")
+	data, err := os.ReadFile(a.exDir + "wsl.state")
 	if err == nil {
 		if strings.Contains(string(data), "Enabled") {
 			return nil
 		}
 	}

-	cmd := `-Command (Get-WindowsOptionalFeature -Online -FeatureName Microsoft-Windows-Subsystem-Linux).State | Out-File -Encoding utf8 -FilePath ` + exDir + "/wsl.state"
-	_, err = su.ShellExecute(su.RUNAS, "powershell", cmd, exDir)
+	cmd := `-Command (Get-WindowsOptionalFeature -Online -FeatureName VirtualMachinePlatform).State | Out-File -Encoding utf8 -FilePath ` + a.exDir + "wsl.state"
+	_, err = su.ShellExecute(su.RUNAS, "powershell", cmd, a.exDir)
 	if err != nil {
 		return err
 	}
 	time.Sleep(2 * time.Second)
-	data, err = os.ReadFile(exDir + "/wsl.state")
+	data, err = os.ReadFile(a.exDir + "wsl.state")
 	if err != nil {
 		return err
 	}
@@ -164,13 +157,13 @@ func (a *App) WslIsEnabled() error {
 }

 func (a *App) WslEnable(forceMode bool) error {
-	cmd := `/online /enable-feature /featurename:Microsoft-Windows-Subsystem-Linux`
+	cmd := `/online /enable-feature /featurename:VirtualMachinePlatform`
 	_, err := su.ShellExecute(su.RUNAS, "dism", cmd, `C:\`)
 	if err != nil {
 		return err
 	}
 	if forceMode {
-		os.WriteFile("./wsl.state", []byte("Enabled"), 0644)
+		os.WriteFile(a.exDir+"wsl.state", []byte("Enabled"), 0644)
 	}
 	return nil
 }
--- a/backend-python/convert_safetensors.py
+++ b/backend-python/convert_safetensors.py
@@ -1,9 +1,8 @@
-import json
+import collections
+import numpy
 import os
-import sys
-import copy
 import torch
-from safetensors.torch import load_file, save_file
+from safetensors.torch import serialize_file, load_file

 import argparse

@@ -26,35 +25,65 @@ def rename_key(rename, name):


 def convert_file(pt_filename: str, sf_filename: str, rename={}, transpose_names=[]):
-    loaded = torch.load(pt_filename, map_location="cpu")
+    loaded: collections.OrderedDict = torch.load(pt_filename, map_location="cpu")
    if "state_dict" in loaded:
        loaded = loaded["state_dict"]

-    loaded = {k: v.clone().half() for k, v in loaded.items()}
-    # for k, v in loaded.items():
-    #     print(f'{k}\t{v.shape}\t{v.dtype}')
+    kk = list(loaded.keys())
+    version = 4
+    for x in kk:
+        if "ln_x" in x:
+            version = max(5, version)
+        if "gate.weight" in x:
+            version = max(5.1, version)
+        if int(version) == 5 and "att.time_decay" in x:
+            if len(loaded[x].shape) > 1:
+                if loaded[x].shape[1] > 1:
+                    version = max(5.2, version)
+        if "time_maa" in x:
+            version = max(6, version)

-    loaded = {rename_key(rename, k).lower(): v.contiguous() for k, v in loaded.items()}
-    # For tensors to be contiguous
-    for k, v in loaded.items():
-        for transpose_name in transpose_names:
-            if transpose_name in k:
-                loaded[k] = v.transpose(0, 1)
+    print(f"Model detected: v{version:.1f}")

-    loaded = {k: v.clone().half().contiguous() for k, v in loaded.items()}
+    if version == 5.1:
+        _, n_emb = loaded["emb.weight"].shape
+        for k in kk:
+            if "time_decay" in k or "time_faaaa" in k:
+                # print(k, mm[k].shape)
+                loaded[k] = (
+                    loaded[k].unsqueeze(1).repeat(1, n_emb // loaded[k].shape[0])
+                )

-    for k, v in loaded.items():
-        print(f"{k}\t{v.shape}\t{v.dtype}")
+    with torch.no_grad():
+        for k in kk:
+            new_k = rename_key(rename, k).lower()
+            v = loaded[k].half()
+            del loaded[k]
+            for transpose_name in transpose_names:
+                if transpose_name in new_k:
+                    dims = len(v.shape)
+                    v = v.transpose(dims - 2, dims - 1)
+            print(f"{new_k}\t{v.shape}\t{v.dtype}")
+            loaded[new_k] = {
+                "dtype": str(v.dtype).split(".")[-1],
+                "shape": v.shape,
+                "data": v.numpy().tobytes(),
+            }

    dirname = os.path.dirname(sf_filename)
    os.makedirs(dirname, exist_ok=True)
-    save_file(loaded, sf_filename, metadata={"format": "pt"})
-    reloaded = load_file(sf_filename)
-    for k in loaded:
-        pt_tensor = loaded[k]
-        sf_tensor = reloaded[k]
-        if not torch.equal(pt_tensor, sf_tensor):
-            raise RuntimeError(f"The output tensors do not match for key {k}")
+    serialize_file(loaded, sf_filename, metadata={"format": "pt"})
+    # reloaded = load_file(sf_filename)
+    # for k in loaded:
+    #     pt_tensor = torch.Tensor(
+    #         numpy.frombuffer(
+    #             bytearray(loaded[k]["data"]),
+    #             dtype=getattr(numpy, loaded[k]["dtype"]),
+    #         ).reshape(loaded[k]["shape"])
+    #     )
+    #     sf_tensor = reloaded[k]
+    #     if not torch.equal(pt_tensor, sf_tensor):
+    #         raise RuntimeError(f"The output tensors do not match for key {k}")


 if __name__ == "__main__":
--- a/backend-python/dep_check.py
+++ b/backend-python/dep_check.py
@@ -7,7 +7,6 @@ import lm_dataformat
 import ftfy
 import tqdm
 import tiktoken
-import GPUtil

 import torch
 import rwkv
--- a/backend-python/global_var.py
+++ b/backend-python/global_var.py
@@ -5,6 +5,7 @@ Model = "model"
 Model_Status = "model_status"
 Model_Config = "model_config"
 Deploy_Mode = "deploy_mode"
+Midi_Vocab_Config_Type = "midi_vocab_config_type"


 class ModelStatus(Enum):
@@ -13,11 +14,17 @@ class ModelStatus(Enum):
    Working = 3


+class MidiVocabConfig(Enum):
+    Default = auto()
+    Piano = auto()
+
+
 def init():
    global GLOBALS
    GLOBALS = {}
    set(Model_Status, ModelStatus.Offline)
    set(Deploy_Mode, False)
+    set(Midi_Vocab_Config_Type, MidiVocabConfig.Default)


 def set(key, value):
--- a/backend-python/main.py
+++ b/backend-python/main.py
@@ -37,6 +37,11 @@ def get_args(args: Union[Sequence[str], None] = None):
        action="store_true",
        help="whether to use rwkv.cpp (default: False)",
    )
+    group.add_argument(
+        "--webgpu",
+        action="store_true",
+        help="whether to use webgpu (default: False)",
+    )
    args = parser.parse_args(args)

    return args
--- a/backend-python/requirements.txt
+++ b/backend-python/requirements.txt
@@ -1,9 +1,9 @@
 torch
 torchvision
 torchaudio
-rwkv==0.8.22
+rwkv==0.8.25
 langchain==0.0.322
-fastapi==0.104.0
+fastapi==0.109.1
 uvicorn==0.23.2
 sse-starlette==1.6.5
 pydantic==2.4.2
@@ -19,7 +19,7 @@ midi2audio==0.1.1
 mido==1.3.0
 safetensors==0.4.0
 PyMuPDF==1.23.5
-python-multipart==0.0.6
+python-multipart==0.0.7
 Cython==3.0.4
 cyac==1.9
-torch_directml==0.1.13.1.dev230413
+torch-directml==0.1.13.1.dev230413
--- a/backend-python/requirements_without_cyac.txt
+++ b/backend-python/requirements_without_cyac.txt
@@ -1,9 +1,9 @@
 torch
 torchvision
 torchaudio
-rwkv==0.8.22
+rwkv==0.8.25
 langchain==0.0.322
-fastapi==0.104.0
+fastapi==0.109.1
 uvicorn==0.23.2
 sse-starlette==1.6.5
 pydantic==2.4.2
@@ -19,5 +19,5 @@ midi2audio==0.1.1
 mido==1.3.0
 safetensors==0.4.0
 PyMuPDF==1.23.5
-python-multipart==0.0.6
+python-multipart==0.0.7
 Cython==3.0.4
--- a/backend-python/routes/completion.py
+++ b/backend-python/routes/completion.py
@@ -8,7 +8,6 @@ import base64
 from fastapi import APIRouter, Request, status, HTTPException
 from sse_starlette.sse import EventSourceResponse
 from pydantic import BaseModel, Field
-import numpy as np
 import tiktoken
 from utils.rwkv import *
 from utils.log import quick_log
@@ -71,10 +70,10 @@ class ChatCompletionBody(ModelConfigBody):
                "assistant_name": None,
                "presystem": True,
                "max_tokens": 1000,
-                "temperature": 1.2,
-                "top_p": 0.5,
-                "presence_penalty": 0.4,
-                "frequency_penalty": 0.4,
+                "temperature": 1,
+                "top_p": 0.3,
+                "presence_penalty": 0,
+                "frequency_penalty": 1,
            }
        }
    }
@@ -95,10 +94,10 @@ class CompletionBody(ModelConfigBody):
                "stream": False,
                "stop": None,
                "max_tokens": 100,
-                "temperature": 1.2,
-                "top_p": 0.5,
-                "presence_penalty": 0.4,
-                "frequency_penalty": 0.4,
+                "temperature": 1,
+                "top_p": 0.3,
+                "presence_penalty": 0,
+                "frequency_penalty": 1,
            }
        }
    }
@@ -145,6 +144,7 @@ async def eval_rwkv(
                return
            set_rwkv_config(model, global_var.get(global_var.Model_Config))
            set_rwkv_config(model, body)
+            print(get_rwkv_config(model))

            response, prompt_tokens, completion_tokens = "", 0, 0
            for response, delta, prompt_tokens, completion_tokens in model.generate(
@@ -156,23 +156,27 @@ async def eval_rwkv(
                if stream:
                    yield json.dumps(
                        {
-                            "object": "chat.completion.chunk"
-                            if chat_mode
-                            else "text_completion",
+                            "object": (
+                                "chat.completion.chunk"
+                                if chat_mode
+                                else "text_completion"
+                            ),
                            # "response": response,
                            "model": model.name,
                            "choices": [
-                                {
-                                    "delta": {"content": delta},
-                                    "index": 0,
-                                    "finish_reason": None,
-                                }
-                                if chat_mode
-                                else {
-                                    "text": delta,
-                                    "index": 0,
-                                    "finish_reason": None,
-                                }
+                                (
+                                    {
+                                        "delta": {"content": delta},
+                                        "index": 0,
+                                        "finish_reason": None,
+                                    }
+                                    if chat_mode
+                                    else {
+                                        "text": delta,
+                                        "index": 0,
+                                        "finish_reason": None,
+                                    }
+                                )
                            ],
                        }
                    )
@@ -194,23 +198,25 @@ async def eval_rwkv(
            if stream:
                yield json.dumps(
                    {
-                        "object": "chat.completion.chunk"
-                        if chat_mode
-                        else "text_completion",
+                        "object": (
+                            "chat.completion.chunk" if chat_mode else "text_completion"
+                        ),
                        # "response": response,
                        "model": model.name,
                        "choices": [
-                            {
-                                "delta": {},
-                                "index": 0,
-                                "finish_reason": "stop",
-                            }
-                            if chat_mode
-                            else {
-                                "text": "",
-                                "index": 0,
-                                "finish_reason": "stop",
-                            }
+                            (
+                                {
+                                    "delta": {},
+                                    "index": 0,
+                                    "finish_reason": "stop",
+                                }
+                                if chat_mode
+                                else {
+                                    "text": "",
+                                    "index": 0,
+                                    "finish_reason": "stop",
+                                }
+                            )
                        ],
                    }
                )
@@ -226,20 +232,22 @@ async def eval_rwkv(
                        "total_tokens": prompt_tokens + completion_tokens,
                    },
                    "choices": [
-                        {
-                            "message": {
-                                "role": Role.Assistant.value,
-                                "content": response,
-                            },
-                            "index": 0,
-                            "finish_reason": "stop",
-                        }
-                        if chat_mode
-                        else {
-                            "text": response,
-                            "index": 0,
-                            "finish_reason": "stop",
-                        }
+                        (
+                            {
+                                "message": {
+                                    "role": Role.Assistant.value,
+                                    "content": response,
+                                },
+                                "index": 0,
+                                "finish_reason": "stop",
+                            }
+                            if chat_mode
+                            else {
+                                "text": response,
+                                "index": 0,
+                                "finish_reason": "stop",
+                            }
+                        )
                    ],
                }

@@ -335,6 +343,8 @@ The following is a coherent verbose detailed conversation between a girl named {
        body.stop.append(f"\n\n{bot_code}")
    elif body.stop is None:
        body.stop = default_stop
+    if not body.presystem:
+        body.stop.append("\n\n")

    if body.stream:
        return EventSourceResponse(
@@ -396,6 +406,8 @@ class EmbeddingsBody(BaseModel):


 def embedding_base64(embedding: List[float]) -> str:
+    import numpy as np
+
    return base64.b64encode(np.array(embedding).astype(np.float32)).decode("utf-8")


--- a/backend-python/routes/config.py
+++ b/backend-python/routes/config.py
@@ -74,6 +74,10 @@ def switch_model(body: SwitchModelBody, response: Response, request: Request):
        )
    except Exception as e:
        print(e)
+        import traceback
+
+        print(traceback.format_exc())
+
        quick_log(request, body, f"Exception: {e}")
        global_var.set(global_var.Model_Status, global_var.ModelStatus.Offline)
        raise HTTPException(
@@ -82,32 +86,53 @@ def switch_model(body: SwitchModelBody, response: Response, request: Request):

    if body.deploy:
        global_var.set(global_var.Deploy_Mode, True)
-    if global_var.get(global_var.Model_Config) is None:
-        global_var.set(
-            global_var.Model_Config, get_rwkv_config(global_var.get(global_var.Model))
-        )
+
+    saved_model_config = global_var.get(global_var.Model_Config)
+    init_model_config = get_rwkv_config(global_var.get(global_var.Model))
+    if saved_model_config is not None:
+        merge_model(init_model_config, saved_model_config)
+    global_var.set(global_var.Model_Config, init_model_config)
    global_var.set(global_var.Model_Status, global_var.ModelStatus.Working)

    return "success"


+def merge_model(to_model: BaseModel, from_model: BaseModel):
+    from_model_fields = [x for x in from_model.dict().keys()]
+    to_model_fields = [x for x in to_model.dict().keys()]
+
+    for field_name in from_model_fields:
+        if field_name in to_model_fields:
+            from_value = getattr(from_model, field_name)
+
+            if from_value is not None:
+                setattr(to_model, field_name, from_value)
+
+
@router.post("/update-config", tags=["Configs"])
 def update_config(body: ModelConfigBody):
    """
    Will not update the model config immediately, but set it when completion called to avoid modifications during generation
    """

-    print(body)
-    global_var.set(global_var.Model_Config, body)
+    model_config = global_var.get(global_var.Model_Config)
+    if model_config is None:
+        model_config = ModelConfigBody()
+        global_var.set(global_var.Model_Config, model_config)
+    merge_model(model_config, body)
+    print("Updated Model Config:", model_config)

    return "success"


@router.get("/status", tags=["Configs"])
 def status():
-    import GPUtil
+    try:
+        import GPUtil

-    gpus = GPUtil.getGPUs()
+        gpus = GPUtil.getGPUs()
+    except:
+        gpus = []
    if len(gpus) == 0:
        device_name = "CPU"
    else:
--- a/backend-python/routes/midi.py
+++ b/backend-python/routes/midi.py
@@ -23,7 +23,11 @@ class TextToMidiBody(BaseModel):

@router.post("/text-to-midi", tags=["MIDI"])
 def text_to_midi(body: TextToMidiBody):
-    vocab_config = "backend-python/utils/midi_vocab_config.json"
+    vocab_config_type = global_var.get(global_var.Midi_Vocab_Config_Type)
+    if vocab_config_type == global_var.MidiVocabConfig.Piano:
+        vocab_config = "backend-python/utils/vocab_config_piano.json"
+    else:
+        vocab_config = "backend-python/utils/midi_vocab_config.json"
    cfg = VocabConfig.from_json(vocab_config)
    mid = convert_str_to_midi(cfg, body.text.strip())
    mid_data = io.BytesIO()
@@ -35,12 +39,20 @@ def text_to_midi(body: TextToMidiBody):

@router.post("/midi-to-text", tags=["MIDI"])
 async def midi_to_text(file_data: UploadFile):
-    vocab_config = "backend-python/utils/midi_vocab_config.json"
+    vocab_config_type = global_var.get(global_var.Midi_Vocab_Config_Type)
+    if vocab_config_type == global_var.MidiVocabConfig.Piano:
+        vocab_config = "backend-python/utils/vocab_config_piano.json"
+    else:
+        vocab_config = "backend-python/utils/midi_vocab_config.json"
    cfg = VocabConfig.from_json(vocab_config)
+    filter_config = "backend-python/utils/midi_filter_config.json"
+    filter_cfg = FilterConfig.from_json(filter_config)
    mid = mido.MidiFile(file=file_data.file)
-    text = convert_midi_to_str(cfg, mid)
+    output_list = convert_midi_to_str(cfg, filter_cfg, mid)
+    if len(output_list) == 0:
+        raise HTTPException(status.HTTP_400_BAD_REQUEST, "bad midi file")

-    return {"text": text}
+    return {"text": output_list[0]}


 class TxtToMidiBody(BaseModel):
@@ -65,7 +77,11 @@ def txt_to_midi(body: TxtToMidiBody):
    if not body.midi_path.startswith("midi/"):
        raise HTTPException(status.HTTP_400_BAD_REQUEST, "bad output path")

-    vocab_config = "backend-python/utils/midi_vocab_config.json"
+    vocab_config_type = global_var.get(global_var.Midi_Vocab_Config_Type)
+    if vocab_config_type == global_var.MidiVocabConfig.Piano:
+        vocab_config = "backend-python/utils/vocab_config_piano.json"
+    else:
+        vocab_config = "backend-python/utils/midi_vocab_config.json"
    cfg = VocabConfig.from_json(vocab_config)
    with open(body.txt_path, "r") as f:
        text = f.read()
--- a/backend-python/routes/state_cache.py
+++ b/backend-python/routes/state_cache.py
@@ -76,6 +76,31 @@ class AddStateBody(BaseModel):
    logits: Any


+def copy_tensor_to_cpu(tensors):
+    import torch
+    import numpy as np
+
+    devices: List[torch.device] = []
+    copied: Union[Any, None] = None
+
+    tensors_type = type(tensors)
+    if tensors_type == list:
+        if hasattr(tensors[0], "device"):  # torch state
+            devices = [tensor.device for tensor in tensors]
+            copied = [tensor.cpu() for tensor in tensors]
+        else:  # WebGPU logits
+            copied = tensors
+    elif tensors_type == torch.Tensor:  # torch logits
+        devices = [tensors.device]
+        copied = tensors.cpu()
+    elif tensors_type == np.ndarray:  # rwkv.cpp
+        copied = tensors
+    else:  # WebGPU state
+        copied = tensors.back()
+
+    return copied, devices
+
+
 # @router.post("/add-state", tags=["State Cache"])
 def add_state(body: AddStateBody):
    global trie, dtrie, loop_del_trie_id
@@ -87,20 +112,28 @@ def add_state(body: AddStateBody):
        raise HTTPException(status.HTTP_400_BAD_REQUEST, "trie not loaded")

    import torch
+    import numpy as np

    try:
+        devices: List[torch.device] = []
+        logits_device: Union[torch.device, None] = None
+        state: Union[Any, None] = None
+        logits: Union[Any, None] = None
+
+        if body.state is not None:
+            state, devices = copy_tensor_to_cpu(body.state)
+        if body.logits is not None:
+            logits, logits_devices = copy_tensor_to_cpu(body.logits)
+            if len(logits_devices) > 0:
+                logits_device = logits_devices[0]
+
        id: int = trie.insert(body.prompt)
-        devices: List[torch.device] = [
-            (tensor.device if hasattr(tensor, "device") else torch.device("cpu"))
-            for tensor in body.state
-        ]
        dtrie[id] = {
-            "tokens": copy.deepcopy(body.tokens),
-            "state": [tensor.cpu() for tensor in body.state]
-            if hasattr(body.state[0], "device")
-            else copy.deepcopy(body.state),
-            "logits": copy.deepcopy(body.logits),
+            "tokens": body.tokens,
+            "state": state,
+            "logits": logits,
            "devices": devices,
+            "logits_device": logits_device,
        }

        if len(trie) >= max_trie_len:
@@ -118,6 +151,7 @@ def add_state(body: AddStateBody):
        )
        return "success"
    except Exception as e:
+        print(e)  # should not happen
        raise HTTPException(
            status.HTTP_400_BAD_REQUEST, f"insert failed, bad prompt.\n{e}"
        )
@@ -174,6 +208,7 @@ def longest_prefix_state(body: LongestPrefixStateBody, request: Request):
        raise HTTPException(status.HTTP_400_BAD_REQUEST, "trie not loaded")

    import torch
+    import numpy as np

    id = -1
    try:
@@ -182,18 +217,35 @@ def longest_prefix_state(body: LongestPrefixStateBody, request: Request):
    except:
        pass
    if id != -1:
-        v = dtrie[id]
-        devices: List[torch.device] = v["devices"]
        prompt: str = trie[id]
+        v = dtrie[id]
+        tokens: List[Union[str, int]] = copy.deepcopy(v["tokens"])
+        devices: List[torch.device] = v["devices"]
+        logits_device: Union[torch.device, None] = v["logits_device"]
+        state: Union[Any, None] = v["state"]
+        logits: Union[Any, None] = v["logits"]
+
+        if type(state) == list and hasattr(state[0], "device"):  # torch
+            state = [
+                tensor.to(devices[i])
+                if devices[i] != torch.device("cpu")
+                else tensor.clone()
+                for i, tensor in enumerate(state)
+            ]
+            logits = (
+                logits.to(logits_device)
+                if logits_device != torch.device("cpu")
+                else logits.clone()
+            )
+        else:  # rwkv.cpp, WebGPU
+            logits = np.copy(logits)

        quick_log(request, body, "Hit:\n" + prompt)
        return {
            "prompt": prompt,
-            "tokens": v["tokens"],
-            "state": [tensor.to(devices[i]) for i, tensor in enumerate(v["state"])]
-            if hasattr(v["state"][0], "device")
-            else v["state"],
-            "logits": v["logits"],
+            "tokens": tokens,
+            "state": state,
+            "logits": logits,
        }
    else:
        return {"prompt": "", "tokens": [], "state": None, "logits": None}
--- a/backend-python/rwkv_pip/beta/model.py
+++ b/backend-python/rwkv_pip/beta/model.py
@@ -251,7 +251,7 @@ class RWKV(MyModule):
                )
                assert (
                    w["_strategy"] == args.strategy_string
-                )  # if you are using a new strategy, re-convert the model
+                ), "model has been converted and does not match current strategy; if you are using a new strategy, re-convert the model"
                assert (
                    float(w["_version"]) >= 0.7
                )  # sometimes you should re-convert using latest convert_model.py
--- a/backend-python/rwkv_pip/cpp/model.py
+++ b/backend-python/rwkv_pip/cpp/model.py
@@ -1,4 +1,4 @@
-from typing import Any, List
+from typing import Any, List, Union
 from . import rwkv_cpp_model
 from . import rwkv_cpp_shared_library

@@ -10,5 +10,5 @@ class RWKV:
        self.w = {}  # fake weight
        self.w["emb.weight"] = [0] * self.model.n_vocab

-    def forward(self, tokens: List[int], state: Any | None):
+    def forward(self, tokens: List[int], state: Union[Any, None] = None):
        return self.model.eval_sequence_in_chunks(tokens, state, use_numpy=True)
--- a/backend-python/rwkv_pip/model.py
+++ b/backend-python/rwkv_pip/model.py
@@ -342,7 +342,7 @@ class RWKV(MyModule):
                )
                assert (
                    w["_strategy"] == args.strategy_string
-                )  # if you are using a new strategy, re-convert the model
+                ), "model has been converted and does not match current strategy; if you are using a new strategy, re-convert the model"
                assert (
                    float(w["_version"]) >= 0.7
                )  # sometimes you should re-convert using latest convert_model.py
@@ -552,7 +552,12 @@ class RWKV(MyModule):
                    elif ".ln_x" in x:  # need fp32 for group_norm
                        w[x] = w[x].float()
                    else:
-                        if (len(w[x].shape) == 2) and ("emb" not in x):
+                        if (
+                            (len(w[x].shape) == 2)
+                            and ("emb" not in x)
+                            and ("_w1" not in x)
+                            and ("_w2" not in x)
+                        ):
                            if WTYPE != torch.uint8:
                                w[x] = w[x].to(dtype=WTYPE)
                            else:
--- a/backend-python/rwkv_pip/rwkv_vocab_v20230424_special_token.txt
+++ b/backend-python/rwkv_pip/rwkv_vocab_v20230424_special_token.txt
--- a/backend-python/rwkv_pip/tokenizer-midipiano.json
+++ b/backend-python/rwkv_pip/tokenizer-midipiano.json
--- a/backend-python/rwkv_pip/utils.py
+++ b/backend-python/rwkv_pip/utils.py
@@ -34,6 +34,25 @@ class PIPELINE_ARGS:
        )


+class ABC_TOKENIZER:
+    def __init__(self):
+        self.pad_token_id = 0
+        self.bos_token_id = 2
+        self.eos_token_id = 3
+
+    def encode(self, text):
+        ids = [ord(c) for c in text]
+        return ids
+
+    def decode(self, ids):
+        txt = "".join(
+            chr(idx) if idx > self.eos_token_id else ""
+            for idx in ids
+            if idx != self.eos_token_id
+        )
+        return txt
+
+
 class PIPELINE:
    def __init__(self, model, WORD_NAME: str):
        self.model = model
@@ -48,6 +67,8 @@ class PIPELINE:
            self.tokenizer = TRIE_TOKENIZER(
                os.path.dirname(os.path.abspath(__file__)) + "/rwkv_vocab_v20230424.txt"
            )
+        elif WORD_NAME == "abc_tokenizer":
+            self.tokenizer = ABC_TOKENIZER()
        else:
            if WORD_NAME.endswith(".txt"):
                sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
@@ -84,6 +105,8 @@ class PIPELINE:
        return e / e.sum(axis=axis, keepdims=True)

    def sample_logits(self, logits, temperature=1.0, top_p=0.85, top_k=0):
+        if type(logits) == list:
+            logits = np.array(logits)
        np_logits = type(logits) == np.ndarray
        if np_logits:
            probs = self.np_softmax(logits, axis=-1)
@@ -148,10 +171,17 @@ class PIPELINE:
            all_tokens += [token]
            for xxx in occurrence:
                occurrence[xxx] *= args.alpha_decay
+
+            ttt = self.decode([token])
+            www = 1
+            if ttt in " \t0123456789":
+                www = 0
+            # elif ttt in '\r\n,.;?!"\':+-*/=#@$%^&_`~|<>\\()[]{}，。；“”：？！（）【】':
+            #     www = 0.5
            if token not in occurrence:
-                occurrence[token] = 1
+                occurrence[token] = www
            else:
-                occurrence[token] += 1
+                occurrence[token] += www
            # print(occurrence) # debug

            # output
--- a/backend-python/rwkv_pip/webgpu/model.py
+++ b/backend-python/rwkv_pip/webgpu/model.py
@@ -0,0 +1,52 @@
+from typing import Any, List, Union
+
+try:
+    import web_rwkv_py as wrp
+except ModuleNotFoundError:
+    try:
+        from . import web_rwkv_py as wrp
+    except ImportError:
+        raise ModuleNotFoundError(
+            "web_rwkv_py not found, install it from https://github.com/cryscan/web-rwkv-py"
+        )
+
+
+class RWKV:
+    def __init__(self, model_path: str, strategy: str = None):
+        self.info = wrp.peek_info(model_path)
+        self.w = {}  # fake weight
+        self.w["emb.weight"] = [0] * self.info.num_vocab
+        self.version = str(self.info.version).lower()
+        self.wrp = getattr(wrp, self.version)
+
+        layer = (
+            int(s.lstrip("layer"))
+            for s in strategy.split()
+            for s in s.split(",")
+            if s.startswith("layer")
+        )
+
+        chunk_size = (
+            int(s.lstrip("chunk"))
+            for s in strategy.split()
+            for s in s.split(",")
+            if s.startswith("chunk")
+        )
+
+        args = {
+            "file": model_path,
+            "turbo": True,
+            "quant": next(layer, 31) if "i8" in strategy else 0,
+            "quant_nf4": next(layer, 26) if "i4" in strategy else 0,
+            "token_chunk_size": next(chunk_size, 32),
+            "lora": None,
+        }
+        self.model = self.wrp.Model(**args)
+
+    def forward(self, tokens: List[int], state: Union[Any, None] = None):
+        if type(state).__name__ == "BackedState":  # memory state
+            gpu_state = self.wrp.ModelState(self.model, 1)
+            gpu_state.load(state)
+        else:
+            gpu_state = state
+        return self.wrp.run_one(self.model, tokens, gpu_state)
--- a/backend-python/rwkv_pip/webgpu/web_rwkv_py.cp310-win_amd64.pyd
+++ b/backend-python/rwkv_pip/webgpu/web_rwkv_py.cp310-win_amd64.pyd
--- a/backend-python/utils/midi.py
+++ b/backend-python/utils/midi.py
@@ -52,6 +52,8 @@ class VocabConfig:
    bin_name_to_program_name: Dict[str, str]
    # Mapping from program number to instrument name.
    instrument_names: Dict[str, str]
+    # Manual override for velocity bins. Each element is the max velocity value for that bin by index.
+    velocity_bins_override: Optional[List[int]] = None

    def __post_init__(self):
        self.validate()
@@ -116,6 +118,12 @@ class VocabConfig:
            raise ValueError("velocity_bins must be at least 2")
        if len(self.bin_instrument_names) > 16:
            raise ValueError("bin_instruments must have at most 16 values")
+        if self.velocity_bins_override:
+            print("VocabConfig is using velocity_bins_override. Ignoring velocity_exp.")
+            if len(self.velocity_bins_override) != self.velocity_bins:
+                raise ValueError(
+                    "velocity_bins_override must have same length as velocity_bins"
+                )
        if (
            self.ch10_instrument_bin_name
            and self.ch10_instrument_bin_name not in self.bin_instrument_names
@@ -156,6 +164,11 @@ class VocabUtils:

    def velocity_to_bin(self, velocity: float) -> int:
        velocity = max(0, min(velocity, self.cfg.velocity_events - 1))
+        if self.cfg.velocity_bins_override:
+            for i, v in enumerate(self.cfg.velocity_bins_override):
+                if velocity <= v:
+                    return i
+            return 0
        binsize = self.cfg.velocity_events / (self.cfg.velocity_bins - 1)
        if self.cfg.velocity_exp == 1.0:
            return ceil(velocity / binsize)
@@ -176,6 +189,8 @@ class VocabUtils:
            )

    def bin_to_velocity(self, bin: int) -> int:
+        if self.cfg.velocity_bins_override:
+            return self.cfg.velocity_bins_override[bin]
        binsize = self.cfg.velocity_events / (self.cfg.velocity_bins - 1)
        if self.cfg.velocity_exp == 1.0:
            return max(0, ceil(bin * binsize - 1))
@@ -358,13 +373,32 @@ class AugmentConfig:
            )


+@dataclass
+class FilterConfig:
+    # Whether to filter out MIDI files with duplicate MD5 hashes.
+    deduplicate_md5: bool
+    # Minimum time delay between notes in a file before splitting into multiple documents.
+    piece_split_delay: float
+    # Minimum length of a piece in milliseconds.
+    min_piece_length: float
+
+    @classmethod
+    def from_json(cls, path: str):
+        with open(path, "r") as f:
+            config = json.load(f)
+        return cls(**config)
+
+
 def mix_volume(velocity: int, volume: int, expression: int) -> float:
    return velocity * (volume / 127.0) * (expression / 127.0)


 def convert_midi_to_str(
-    cfg: VocabConfig, mid: mido.MidiFile, augment: AugmentValues = None
-) -> str:
+    cfg: VocabConfig,
+    filter_cfg: FilterConfig,
+    mid: mido.MidiFile,
+    augment: AugmentValues = None,
+) -> List[str]:
    utils = VocabUtils(cfg)
    if augment is None:
        augment = AugmentValues.default()
@@ -390,7 +424,9 @@ def convert_midi_to_str(
    }  # {channel: {(note, program) -> True}}
    started_flag = False

+    output_list = []
    output = ["<start>"]
+    output_length_ms = 0.0
    token_data_buffer: List[
        Tuple[int, int, int, float]
    ] = []  # need to sort notes between wait tokens
@@ -432,16 +468,33 @@ def convert_midi_to_str(
        token_data_buffer = []

    def consume_note_program_data(prog: int, chan: int, note: int, vel: float):
-        nonlocal output, started_flag, delta_time_ms, cfg, utils, token_data_buffer
+        nonlocal output, output_length_ms, started_flag, delta_time_ms, cfg, utils, token_data_buffer
        is_token_valid = (
            utils.prog_data_to_token_data(prog, chan, note, vel) is not None
        )
        if not is_token_valid:
            return
+
+        if delta_time_ms > filter_cfg.piece_split_delay * 1000.0:
+            # check if any notes are still held
+            silent = True
+            for channel in channel_notes.keys():
+                if len(channel_notes[channel]) > 0:
+                    silent = False
+                    break
+            if silent:
+                flush_token_data_buffer()
+                output.append("<end>")
+                if output_length_ms > filter_cfg.min_piece_length * 1000.0:
+                    output_list.append(" ".join(output))
+                output = ["<start>"]
+                output_length_ms = 0.0
+                started_flag = False
        if started_flag:
            wait_tokens = utils.data_to_wait_tokens(delta_time_ms)
            if len(wait_tokens) > 0:
                flush_token_data_buffer()
+                output_length_ms += delta_time_ms
                output += wait_tokens
        delta_time_ms = 0.0
        token_data_buffer.append((prog, chan, note, vel * augment.velocity_mod_factor))
@@ -510,7 +563,9 @@ def convert_midi_to_str(

    flush_token_data_buffer()
    output.append("<end>")
-    return " ".join(output)
+    if output_length_ms > filter_cfg.min_piece_length * 1000.0:
+        output_list.append(" ".join(output))
+    return output_list


 def generate_program_change_messages(cfg: VocabConfig):
@@ -633,10 +688,10 @@ def token_to_midi_message(
                if utils.cfg.decode_fix_repeated_notes:
                    if (channel, note) in state.active_notes:
                        del state.active_notes[(channel, note)]
-                    yield mido.Message(
-                        "note_off", note=note, time=ticks, channel=channel
-                    ), state
-                    ticks = 0
+                        yield mido.Message(
+                            "note_off", note=note, time=ticks, channel=channel
+                        ), state
+                        ticks = 0
                state.active_notes[(channel, note)] = state.total_time
                yield mido.Message(
                    "note_on", note=note, velocity=velocity, time=ticks, channel=channel
--- a/backend-python/utils/midi_filter_config.json
+++ b/backend-python/utils/midi_filter_config.json
@@ -0,0 +1,5 @@
+{
+    "deduplicate_md5": true,
+    "piece_split_delay": 10000,
+    "min_piece_length": 0
+}
--- a/backend-python/utils/rwkv.py
+++ b/backend-python/utils/rwkv.py
@@ -4,19 +4,13 @@ import os
 import pathlib
 import copy
 import re
-from typing import Dict, Iterable, List, Tuple, Union, Type
+from typing import Dict, Iterable, List, Tuple, Union, Type, Callable
 from utils.log import quick_log
 from fastapi import HTTPException
 from pydantic import BaseModel, Field
-import numpy as np
 from routes import state_cache
 import global_var

-
-END_OF_TEXT = 0
-END_OF_LINE_DOUBLE = 535
-
-
 os.environ["TORCH_EXTENSIONS_DIR"] = f"{pathlib.Path(__file__).parent.parent.resolve()}"


@@ -29,6 +23,8 @@ class RWKVType(Enum):

 class AbstractRWKV(ABC):
    def __init__(self, model, pipeline):
+        self.EOS_ID = 0
+
        self.name = "rwkv"
        self.model = model
        self.pipeline = pipeline
@@ -43,6 +39,8 @@ class AbstractRWKV(ABC):
        self.top_k = 0
        self.penalty_alpha_presence = 0
        self.penalty_alpha_frequency = 1
+        self.penalty_decay = 0.996
+        self.global_penalty = False

    @abstractmethod
    def adjust_occurrence(self, occurrence: Dict, token: int):
@@ -68,6 +66,8 @@ class AbstractRWKV(ABC):
        pass

    def get_embedding(self, input: str, fast_mode: bool) -> Tuple[List[float], int]:
+        import numpy as np
+
        if fast_mode:
            embedding, token_len = self.__fast_embedding(
                self.fix_tokens(self.pipeline.encode(input)), None
@@ -222,6 +222,8 @@ class AbstractRWKV(ABC):
    def generate(
        self, prompt: str, stop: Union[str, List[str], None] = None
    ) -> Iterable[Tuple[str, str, int, int]]:
+        import numpy as np
+
        quick_log(None, None, "Generation Prompt:\n" + prompt)
        cache = None
        delta_prompt = prompt
@@ -231,14 +233,14 @@ class AbstractRWKV(ABC):
            )
        except HTTPException:
            pass
-        if cache is None or cache["prompt"] == "":
+        if cache is None or cache["prompt"] == "" or cache["state"] is None:
            self.model_state = None
            self.model_tokens = []
        else:
            delta_prompt = prompt[len(cache["prompt"]) :]
-            self.model_state = copy.deepcopy(cache["state"])
-            self.model_tokens = copy.deepcopy(cache["tokens"])
-            logits = copy.deepcopy(cache["logits"])
+            self.model_state = cache["state"]
+            self.model_tokens = cache["tokens"]
+            logits = cache["logits"]

        prompt_token_len = 0
        if delta_prompt != "":
@@ -271,7 +273,18 @@ class AbstractRWKV(ABC):
                logits, temperature=self.temperature, top_p=self.top_p, top_k=self.top_k
            )

-            if token == END_OF_TEXT:
+            if token == self.EOS_ID:
+                try:
+                    state_cache.add_state(
+                        state_cache.AddStateBody(
+                            prompt=prompt + response,
+                            tokens=self.model_tokens,
+                            state=self.model_state,
+                            logits=logits,
+                        )
+                    )
+                except HTTPException:
+                    pass
                yield response, "", prompt_token_len, completion_token_len
                break

@@ -302,22 +315,25 @@ class AbstractRWKV(ABC):
                            yield response, "", prompt_token_len, completion_token_len
                            break
                    elif type(stop) == list:
-                        stop_exist_regex = "|".join(stop)
-                        matched = re.search(stop_exist_regex, response)
-                        if matched:
-                            try:
-                                state_cache.add_state(
-                                    state_cache.AddStateBody(
-                                        prompt=prompt + response,
-                                        tokens=self.model_tokens,
-                                        state=self.model_state,
-                                        logits=logits,
+                        exit_flag = False
+                        for s in stop:
+                            if s in response:
+                                try:
+                                    state_cache.add_state(
+                                        state_cache.AddStateBody(
+                                            prompt=prompt + response,
+                                            tokens=self.model_tokens,
+                                            state=self.model_state,
+                                            logits=logits,
+                                        )
                                    )
-                                )
-                            except HTTPException:
-                                pass
-                            response = response.split(matched.group())[0]
-                            yield response, "", prompt_token_len, completion_token_len
+                                except HTTPException:
+                                    pass
+                                exit_flag = True
+                                response = response.split(s)[0]
+                                yield response, "", prompt_token_len, completion_token_len
+                                break
+                        if exit_flag:
                            break
                out_last = begin + i + 1
                if i == self.max_tokens_per_generation - 1:
@@ -360,18 +376,24 @@ class TextRWKV(AbstractRWKV):
            self.bot = "Assistant"
            self.END_OF_LINE = 11

-        self.AVOID_REPEAT_TOKENS = []
+        self.AVOID_REPEAT_TOKENS = set()
        AVOID_REPEAT = "，：？！"
        for i in AVOID_REPEAT:
            dd = self.pipeline.encode(i)
            assert len(dd) == 1
-            self.AVOID_REPEAT_TOKENS += dd
+            self.AVOID_REPEAT_TOKENS.add(dd[0])
+        self.AVOID_PENALTY_TOKENS = set()
+        AVOID_PENALTY = '\n,.:?!，。：？！"“”<>[]{}/\\|;；~`@#$%^&*()_+-=0123456789 '
+        for i in AVOID_PENALTY:
+            dd = self.pipeline.encode(i)
+            if len(dd) == 1:
+                self.AVOID_PENALTY_TOKENS.add(dd[0])

        self.__preload()

    def adjust_occurrence(self, occurrence: Dict, token: int):
        for xxx in occurrence:
-            occurrence[xxx] *= 0.996
+            occurrence[xxx] *= self.penalty_decay
        if token not in occurrence:
            occurrence[token] = 1
        else:
@@ -379,26 +401,24 @@ class TextRWKV(AbstractRWKV):

    def adjust_forward_logits(self, logits: List[float], occurrence: Dict, i: int):
        for n in occurrence:
+            # if n not in self.AVOID_PENALTY_TOKENS:
            logits[n] -= (
                self.penalty_alpha_presence
                + occurrence[n] * self.penalty_alpha_frequency
            )

-        if i == 0:
+        # set global_penalty to False to get the same generated results as the official RWKV Gradio
+        if self.global_penalty and i == 0:
            for token in self.model_tokens:
                token = int(token)
-                for xxx in occurrence:
-                    occurrence[xxx] *= 0.996
-                if token not in occurrence:
-                    occurrence[token] = 1
-                else:
-                    occurrence[token] += 1
+                if token not in self.AVOID_PENALTY_TOKENS:
+                    self.adjust_occurrence(occurrence, token)

    # Model only saw '\n\n' as [187, 187] before, but the tokenizer outputs [535] for it at the end
    def fix_tokens(self, tokens) -> List[int]:
        if self.rwkv_type == RWKVType.World:
            return tokens
-        if len(tokens) > 0 and tokens[-1] == END_OF_LINE_DOUBLE:
+        if len(tokens) > 0 and tokens[-1] == 535:
            tokens = tokens[:-1] + [self.END_OF_LINE, self.END_OF_LINE]
        return tokens

@@ -456,7 +476,7 @@ The following is a coherent verbose detailed conversation between a girl named {
            pass


-class MusicRWKV(AbstractRWKV):
+class MusicMidiRWKV(AbstractRWKV):
    def __init__(self, model, pipeline):
        super().__init__(model, pipeline)

@@ -498,8 +518,47 @@ class MusicRWKV(AbstractRWKV):
        return " " + delta


+class MusicAbcRWKV(AbstractRWKV):
+    def __init__(self, model, pipeline):
+        super().__init__(model, pipeline)
+
+        self.EOS_ID = 3
+
+        self.max_tokens_per_generation = 500
+        self.temperature = 1
+        self.top_p = 0.8
+        self.top_k = 8
+
+        self.rwkv_type = RWKVType.Music
+
+    def adjust_occurrence(self, occurrence: Dict, token: int):
+        pass
+
+    def adjust_forward_logits(self, logits: List[float], occurrence: Dict, i: int):
+        pass
+
+    def fix_tokens(self, tokens) -> List[int]:
+        return tokens
+
+    def run_rnn(
+        self, _tokens: List[str], newline_adj: int = 0
+    ) -> Tuple[List[float], int]:
+        tokens = [int(x) for x in _tokens]
+        token_len = len(tokens)
+        self.model_tokens += tokens
+        out, self.model_state = self.model.forward(tokens, self.model_state)
+        return out, token_len
+
+    def delta_postprocess(self, delta: str) -> str:
+        return delta
+
+
 def get_tokenizer(tokenizer_len: int):
    tokenizer_dir = f"{pathlib.Path(__file__).parent.parent.resolve()}/rwkv_pip/"
+    if tokenizer_len < 2176:
+        return "abc_tokenizer"
+    if tokenizer_len < 20096:
+        return tokenizer_dir + "tokenizer-midipiano.json"
    if tokenizer_len < 50277:
        return tokenizer_dir + "tokenizer-midi.json"
    elif tokenizer_len < 65536:
@@ -508,9 +567,44 @@ def get_tokenizer(tokenizer_len: int):
        return "rwkv_vocab_v20230424"


+def get_model_path(model_path: str) -> str:
+    if os.path.isabs(model_path):
+        return model_path
+
+    working_dir: pathlib.Path = pathlib.Path(os.path.abspath(os.getcwd()))
+
+    parent_paths: List[pathlib.Path] = [
+        working_dir,  # [cwd](RWKV-Runner)/models/xxx
+        working_dir.parent,  # [cwd](backend-python)/../models/xxx
+        pathlib.Path(
+            os.path.abspath(__file__)
+        ).parent.parent,  # backend-python/models/xxx
+        pathlib.Path(
+            os.path.abspath(__file__)
+        ).parent.parent.parent,  # RWKV-Runner/models/xxx
+    ]
+
+    child_paths: List[Callable[[pathlib.Path], pathlib.Path]] = [
+        lambda p: p / model_path,
+        lambda p: p / "build" / "bin" / model_path,  # for dev
+    ]
+
+    for parent_path in parent_paths:
+        for child_path in child_paths:
+            full_path: pathlib.Path = child_path(parent_path)
+
+            if os.path.isfile(full_path):
+                return str(full_path)
+
+    return model_path
+
+
 def RWKV(model: str, strategy: str, tokenizer: Union[str, None]) -> AbstractRWKV:
+    model = get_model_path(model)
+
    rwkv_beta = global_var.get(global_var.Args).rwkv_beta
    rwkv_cpp = getattr(global_var.get(global_var.Args), "rwkv.cpp")
+    webgpu = global_var.get(global_var.Args).webgpu

    if "midi" in model.lower() or "abc" in model.lower():
        os.environ["RWKV_RESCALE_LAYER"] = "999"
@@ -526,6 +620,11 @@ def RWKV(model: str, strategy: str, tokenizer: Union[str, None]) -> AbstractRWKV
        from rwkv_pip.cpp.model import (
            RWKV as Model,
        )
+    elif webgpu:
+        print("Using webgpu")
+        from rwkv_pip.webgpu.model import (
+            RWKV as Model,
+        )
    else:
        from rwkv_pip.model import (
            RWKV as Model,
@@ -541,14 +640,30 @@ def RWKV(model: str, strategy: str, tokenizer: Union[str, None]) -> AbstractRWKV
    rwkv_map: dict[str, Type[AbstractRWKV]] = {
        "20B_tokenizer": TextRWKV,
        "rwkv_vocab_v20230424": TextRWKV,
-        "tokenizer-midi": MusicRWKV,
+        "tokenizer-midi": MusicMidiRWKV,
+        "tokenizer-midipiano": MusicMidiRWKV,
+        "abc_tokenizer": MusicAbcRWKV,
    }
    tokenizer_name = os.path.splitext(os.path.basename(tokenizer))[0]
+    global_var.set(
+        global_var.Midi_Vocab_Config_Type,
+        (
+            global_var.MidiVocabConfig.Piano
+            if tokenizer_name == "tokenizer-midipiano"
+            else global_var.MidiVocabConfig.Default
+        ),
+    )
    rwkv: AbstractRWKV
    if tokenizer_name in rwkv_map:
        rwkv = rwkv_map[tokenizer_name](model, pipeline)
    else:
-        rwkv = TextRWKV(model, pipeline)
+        tokenizer_name = tokenizer_name.lower()
+        if "music" in tokenizer_name or "midi" in tokenizer_name:
+            rwkv = MusicMidiRWKV(model, pipeline)
+        elif "abc" in tokenizer_name:
+            rwkv = MusicAbcRWKV(model, pipeline)
+        else:
+            rwkv = TextRWKV(model, pipeline)
    rwkv.name = filename

    return rwkv
@@ -556,19 +671,24 @@ def RWKV(model: str, strategy: str, tokenizer: Union[str, None]) -> AbstractRWKV

 class ModelConfigBody(BaseModel):
    max_tokens: int = Field(default=None, gt=0, le=102400)
-    temperature: float = Field(default=None, ge=0, le=2)
+    temperature: float = Field(default=None, ge=0, le=3)
    top_p: float = Field(default=None, ge=0, le=1)
    presence_penalty: float = Field(default=None, ge=-2, le=2)
    frequency_penalty: float = Field(default=None, ge=-2, le=2)
+    penalty_decay: float = Field(default=None, ge=0.99, le=0.999)
+    top_k: int = Field(default=None, ge=0, le=25)
+    global_penalty: bool = Field(default=None)

    model_config = {
        "json_schema_extra": {
            "example": {
                "max_tokens": 1000,
-                "temperature": 1.2,
-                "top_p": 0.5,
-                "presence_penalty": 0.4,
-                "frequency_penalty": 0.4,
+                "temperature": 1,
+                "top_p": 0.3,
+                "presence_penalty": 0,
+                "frequency_penalty": 1,
+                "penalty_decay": 0.996,
+                "global_penalty": False,
            }
        }
    }
@@ -588,6 +708,12 @@ def set_rwkv_config(model: AbstractRWKV, body: ModelConfigBody):
        model.penalty_alpha_presence = body.presence_penalty
    if body.frequency_penalty is not None:
        model.penalty_alpha_frequency = body.frequency_penalty
+    if body.penalty_decay is not None:
+        model.penalty_decay = body.penalty_decay
+    if body.top_k is not None:
+        model.top_k = body.top_k
+    if body.global_penalty is not None:
+        model.global_penalty = body.global_penalty


 def get_rwkv_config(model: AbstractRWKV) -> ModelConfigBody:
@@ -597,4 +723,7 @@ def get_rwkv_config(model: AbstractRWKV) -> ModelConfigBody:
        top_p=model.top_p,
        presence_penalty=model.penalty_alpha_presence,
        frequency_penalty=model.penalty_alpha_frequency,
+        penalty_decay=model.penalty_decay,
+        top_k=model.top_k,
+        global_penalty=model.global_penalty,
    )
--- a/backend-python/utils/torch.py
+++ b/backend-python/utils/torch.py
@@ -19,9 +19,12 @@ def set_torch():


 def torch_gc():
-    import torch
+    try:
+        import torch

-    if torch.cuda.is_available():
-        with torch.cuda.device(0):
-            torch.cuda.empty_cache()
-            torch.cuda.ipc_collect()
+        if torch.cuda.is_available():
+            with torch.cuda.device(0):
+                torch.cuda.empty_cache()
+                torch.cuda.ipc_collect()
+    except:
+        pass  # prevent 'torch' has no attribute 'cuda' error, so user can use CPU or WebGPU
--- a/backend-python/utils/vocab_config_piano.json
+++ b/backend-python/utils/vocab_config_piano.json
@@ -0,0 +1,279 @@
+{
+    "note_events": 128,
+    "wait_events": 125,
+    "max_wait_time": 1000,
+    "velocity_events": 128,
+    "velocity_bins": 16,
+    "velocity_exp": 0.33,
+    "do_token_sorting": true,
+    "unrolled_tokens": false,
+    "decode_end_held_note_delay": 5.0,
+    "decode_fix_repeated_notes": true,
+    "bin_instrument_names": [
+        "piano"
+    ],
+    "ch10_instrument_bin_name": "",
+    "program_name_to_bin_name": {
+        "Acoustic Grand Piano": "piano",
+        "Bright Acoustic Piano": "piano",
+        "Electric Grand Piano": "piano",
+        "Honky-tonk Piano": "piano",
+        "Electric Piano 1 (Rhodes Piano)": "piano",
+        "Electric Piano 2 (Chorused Piano)": "piano",
+        "Harpsichord": "piano",
+        "Clavinet": "piano",
+        "Celesta": "",
+        "Glockenspiel": "",
+        "Music Box": "",
+        "Vibraphone": "",
+        "Marimba": "",
+        "Xylophone": "",
+        "Tubular Bells": "",
+        "Dulcimer (Santur)": "",
+        "Drawbar Organ (Hammond)": "",
+        "Percussive Organ": "piano",
+        "Rock Organ": "piano",
+        "Church Organ": "piano",
+        "Reed Organ": "piano",
+        "Accordion (French)": "piano",
+        "Harmonica": "piano",
+        "Tango Accordion (Band neon)": "piano",
+        "Acoustic Guitar (nylon)": "",
+        "Acoustic Guitar (steel)": "",
+        "Electric Guitar (jazz)": "",
+        "Electric Guitar (clean)": "",
+        "Electric Guitar (muted)": "",
+        "Overdriven Guitar": "",
+        "Distortion Guitar": "",
+        "Guitar harmonics": "",
+        "Acoustic Bass": "",
+        "Electric Bass (fingered)": "",
+        "Electric Bass (picked)": "",
+        "Fretless Bass": "",
+        "Slap Bass 1": "",
+        "Slap Bass 2": "",
+        "Synth Bass 1": "",
+        "Synth Bass 2": "",
+        "Violin": "",
+        "Viola": "",
+        "Cello": "",
+        "Contrabass": "",
+        "Tremolo Strings": "",
+        "Pizzicato Strings": "",
+        "Orchestral Harp": "",
+        "Timpani": "",
+        "String Ensemble 1 (strings)": "",
+        "String Ensemble 2 (slow strings)": "",
+        "SynthStrings 1": "",
+        "SynthStrings 2": "",
+        "Choir Aahs": "",
+        "Voice Oohs": "",
+        "Synth Voice": "",
+        "Orchestra Hit": "",
+        "Trumpet": "",
+        "Trombone": "",
+        "Tuba": "",
+        "Muted Trumpet": "",
+        "French Horn": "",
+        "Brass Section": "",
+        "SynthBrass 1": "",
+        "SynthBrass 2": "",
+        "Soprano Sax": "",
+        "Alto Sax": "",
+        "Tenor Sax": "",
+        "Baritone Sax": "",
+        "Oboe": "",
+        "English Horn": "",
+        "Bassoon": "",
+        "Clarinet": "",
+        "Piccolo": "",
+        "Flute": "",
+        "Recorder": "",
+        "Pan Flute": "",
+        "Blown Bottle": "",
+        "Shakuhachi": "",
+        "Whistle": "",
+        "Ocarina": "",
+        "Lead 1 (square wave)": "",
+        "Lead 2 (sawtooth wave)": "",
+        "Lead 3 (calliope)": "",
+        "Lead 4 (chiffer)": "",
+        "Lead 5 (charang)": "",
+        "Lead 6 (voice solo)": "",
+        "Lead 7 (fifths)": "",
+        "Lead 8 (bass + lead)": "",
+        "Pad 1 (new age Fantasia)": "",
+        "Pad 2 (warm)": "",
+        "Pad 3 (polysynth)": "",
+        "Pad 4 (choir space voice)": "",
+        "Pad 5 (bowed glass)": "",
+        "Pad 6 (metallic pro)": "",
+        "Pad 7 (halo)": "",
+        "Pad 8 (sweep)": "",
+        "FX 1 (rain)": "",
+        "FX 2 (soundtrack)": "",
+        "FX 3 (crystal)": "",
+        "FX 4 (atmosphere)": "",
+        "FX 5 (brightness)": "",
+        "FX 6 (goblins)": "",
+        "FX 7 (echoes, drops)": "",
+        "FX 8 (sci-fi, star theme)": "",
+        "Sitar": "",
+        "Banjo": "",
+        "Shamisen": "",
+        "Koto": "",
+        "Kalimba": "",
+        "Bag pipe": "",
+        "Fiddle": "",
+        "Shanai": "",
+        "Tinkle Bell": "",
+        "Agogo": "",
+        "Steel Drums": "",
+        "Woodblock": "",
+        "Taiko Drum": "",
+        "Melodic Tom": "",
+        "Synth Drum": "",
+        "Reverse Cymbal": "",
+        "Guitar Fret Noise": "",
+        "Breath Noise": "",
+        "Seashore": "",
+        "Bird Tweet": "",
+        "Telephone Ring": "",
+        "Helicopter": "",
+        "Applause": "",
+        "Gunshot": ""
+    },
+    "bin_name_to_program_name": {
+        "piano": "Acoustic Grand Piano"
+    },
+    "instrument_names": {
+        "0": "Acoustic Grand Piano",
+        "1": "Bright Acoustic Piano",
+        "2": "Electric Grand Piano",
+        "3": "Honky-tonk Piano",
+        "4": "Electric Piano 1 (Rhodes Piano)",
+        "5": "Electric Piano 2 (Chorused Piano)",
+        "6": "Harpsichord",
+        "7": "Clavinet",
+        "8": "Celesta",
+        "9": "Glockenspiel",
+        "10": "Music Box",
+        "11": "Vibraphone",
+        "12": "Marimba",
+        "13": "Xylophone",
+        "14": "Tubular Bells",
+        "15": "Dulcimer (Santur)",
+        "16": "Drawbar Organ (Hammond)",
+        "17": "Percussive Organ",
+        "18": "Rock Organ",
+        "19": "Church Organ",
+        "20": "Reed Organ",
+        "21": "Accordion (French)",
+        "22": "Harmonica",
+        "23": "Tango Accordion (Band neon)",
+        "24": "Acoustic Guitar (nylon)",
+        "25": "Acoustic Guitar (steel)",
+        "26": "Electric Guitar (jazz)",
+        "27": "Electric Guitar (clean)",
+        "28": "Electric Guitar (muted)",
+        "29": "Overdriven Guitar",
+        "30": "Distortion Guitar",
+        "31": "Guitar harmonics",
+        "32": "Acoustic Bass",
+        "33": "Electric Bass (fingered)",
+        "34": "Electric Bass (picked)",
+        "35": "Fretless Bass",
+        "36": "Slap Bass 1",
+        "37": "Slap Bass 2",
+        "38": "Synth Bass 1",
+        "39": "Synth Bass 2",
+        "40": "Violin",
+        "41": "Viola",
+        "42": "Cello",
+        "43": "Contrabass",
+        "44": "Tremolo Strings",
+        "45": "Pizzicato Strings",
+        "46": "Orchestral Harp",
+        "47": "Timpani",
+        "48": "String Ensemble 1 (strings)",
+        "49": "String Ensemble 2 (slow strings)",
+        "50": "SynthStrings 1",
+        "51": "SynthStrings 2",
+        "52": "Choir Aahs",
+        "53": "Voice Oohs",
+        "54": "Synth Voice",
+        "55": "Orchestra Hit",
+        "56": "Trumpet",
+        "57": "Trombone",
+        "58": "Tuba",
+        "59": "Muted Trumpet",
+        "60": "French Horn",
+        "61": "Brass Section",
+        "62": "SynthBrass 1",
+        "63": "SynthBrass 2",
+        "64": "Soprano Sax",
+        "65": "Alto Sax",
+        "66": "Tenor Sax",
+        "67": "Baritone Sax",
+        "68": "Oboe",
+        "69": "English Horn",
+        "70": "Bassoon",
+        "71": "Clarinet",
+        "72": "Piccolo",
+        "73": "Flute",
+        "74": "Recorder",
+        "75": "Pan Flute",
+        "76": "Blown Bottle",
+        "77": "Shakuhachi",
+        "78": "Whistle",
+        "79": "Ocarina",
+        "80": "Lead 1 (square wave)",
+        "81": "Lead 2 (sawtooth wave)",
+        "82": "Lead 3 (calliope)",
+        "83": "Lead 4 (chiffer)",
+        "84": "Lead 5 (charang)",
+        "85": "Lead 6 (voice solo)",
+        "86": "Lead 7 (fifths)",
+        "87": "Lead 8 (bass + lead)",
+        "88": "Pad 1 (new age Fantasia)",
+        "89": "Pad 2 (warm)",
+        "90": "Pad 3 (polysynth)",
+        "91": "Pad 4 (choir space voice)",
+        "92": "Pad 5 (bowed glass)",
+        "93": "Pad 6 (metallic pro)",
+        "94": "Pad 7 (halo)",
+        "95": "Pad 8 (sweep)",
+        "96": "FX 1 (rain)",
+        "97": "FX 2 (soundtrack)",
+        "98": "FX 3 (crystal)",
+        "99": "FX 4 (atmosphere)",
+        "100": "FX 5 (brightness)",
+        "101": "FX 6 (goblins)",
+        "102": "FX 7 (echoes, drops)",
+        "103": "FX 8 (sci-fi, star theme)",
+        "104": "Sitar",
+        "105": "Banjo",
+        "106": "Shamisen",
+        "107": "Koto",
+        "108": "Kalimba",
+        "109": "Bag pipe",
+        "110": "Fiddle",
+        "111": "Shanai",
+        "112": "Tinkle Bell",
+        "113": "Agogo",
+        "114": "Steel Drums",
+        "115": "Woodblock",
+        "116": "Taiko Drum",
+        "117": "Melodic Tom",
+        "118": "Synth Drum",
+        "119": "Reverse Cymbal",
+        "120": "Guitar Fret Noise",
+        "121": "Breath Noise",
+        "122": "Seashore",
+        "123": "Bird Tweet",
+        "124": "Telephone Ring",
+        "125": "Helicopter",
+        "126": "Applause",
+        "127": "Gunshot"
+    }
+}
--- a/build/darwin/Readme_Install.txt
+++ b/build/darwin/Readme_Install.txt
@@ -1,3 +1,8 @@
+Client Download URL:
+客户端下载地址:
+クライアントのダウンロードURL:
+https://github.com/josStorer/RWKV-Runner/releases/latest/download/RWKV-Runner_macos_universal.zip
+
 For Mac and Linux users, please manually install Python 3.10 (usually the latest systems come with it built-in). You can specify the Python interpreter to use in Settings. (which python3)
 对于Mac和Linux用户，请手动安装 Python3.10 (通常最新的系统已经内置了). 你可以在设置中指定使用的Python解释器. (which python3)
 MacおよびLinuxのユーザーの方は、Python3.10を手動でインストールしてください（通常、最新のシステムには既に組み込まれています）。 設定メニューで使用するPythonインタプリタを指定することができます。 (which python3)
--- a/build/linux/Readme_Install.txt
+++ b/build/linux/Readme_Install.txt
@@ -1,3 +1,8 @@
+Client Download URL:
+客户端下载地址:
+クライアントのダウンロードURL:
+https://github.com/josStorer/RWKV-Runner/releases/latest/download/RWKV-Runner_linux_x64
+
 For Mac and Linux users, please manually install Python 3.10 (usually the latest systems come with it built-in). You can specify the Python interpreter to use in Settings.
 对于Mac和Linux用户，请手动安装 Python3.10 (通常最新的系统已经内置了). 你可以在设置中指定使用的Python解释器.
 MacおよびLinuxのユーザーの方は、Python3.10を手動でインストールしてください（通常、最新のシステムには既に組み込まれています）。 設定メニューで使用するPythonインタプリタを指定することができます。
--- a/build/windows/Readme_Install.txt
+++ b/build/windows/Readme_Install.txt
@@ -1,3 +1,8 @@
+Client Download URL:
+客户端下载地址:
+クライアントのダウンロードURL:
+https://github.com/josStorer/RWKV-Runner/releases/latest/download/RWKV-Runner_windows_x64.exe
+
 Please execute this program in an empty directory. All related dependencies will be placed in this directory.
 请将本程序放在一个空目录内执行, 所有相关依赖均会放置于此目录.
 このプログラムを空のディレクトリで実行してください。関連するすべての依存関係は、このディレクトリに配置されます。
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,18 @@
+services:
+  rmkv_runner:
+    image: rwkv-runner:latest
+    build: .
+    # Append "--rwkv.cpp" parameter to use rwkv.cpp
+    # command: python3.10 ./backend-python/main.py  --port 27777 --host 0.0.0.0 --webui --rwkv.cpp
+    volumes:
+      - /mnt:/mnt
+    ports:
+      - "27777:27777"
+    # Comment the following lines if use rwkv.cpp
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
--- a/exportModelsJson.js
+++ b/exportModelsJson.js
@@ -19,14 +19,15 @@ document.querySelectorAll('.grid.h-10.grid-cols-12.place-content-center.gap-x-3.
  if (!data.name.endsWith('.bin') && !data.name.endsWith('.pth'))
    return

-  data.desc = {en: '', zh: ''}
+  data.desc = { en: '', zh: '', ja: '' }
  const rawText = await (await fetch(e.children[1].href.replace('/resolve/', '/raw/'))).text()

  data.size = parseInt(extractValue(rawText, 'size'))
  data.SHA256 = extractValue(rawText, 'oid sha256:')
  data.lastUpdated = e.children[3].children[0].getAttribute('datetime')
-  data.url = e.children[1].href.replace('/resolve/', '/blob/')
-  data.downloadUrl = e.children[1].href
+  data.url = e.children[1].href.replace('/resolve/', '/blob/').replace('?download=true', '')
+  data.downloadUrl = e.children[1].href.replace('?download=true', '')
+  data.tags = []

  modelsJson.push(data)
 })
--- a/finetune/get_layer_and_embd.py
+++ b/finetune/get_layer_and_embd.py
@@ -32,6 +32,7 @@ cleaner_thread.start()
 w = torch.load(model_file, map_location="cpu")
 gc.collect()

+vocab_size = w["emb.weight"].shape[0]
 n_embd = w["emb.weight"].shape[1]
 n_layer = 0
 keys = list(w.keys())
@@ -52,6 +53,9 @@ for x in keys:
        version = max(6, version)

 if version <= expected_max_version:
-    print(f"--n_layer {n_layer} --n_embd {n_embd}", end="")
+    print(
+        f"v{int(version)}/train.py --vocab_size {vocab_size} --n_layer {n_layer} --n_embd {n_embd}",
+        end="",
+    )
 else:
    raise Exception(f"RWKV{version} is not supported")
--- a/finetune/install-wsl-dep-and-train.sh
+++ b/finetune/install-wsl-dep-and-train.sh
@@ -22,6 +22,12 @@ else
  sudo apt -y install python3-pip
 fi

+if dpkg -s "python3-dev" >/dev/null 2>&1; then
+  echo "python3-dev installed"
+else
+  sudo apt -y install python3-dev
+fi
+
 if dpkg -s "ninja-build" >/dev/null 2>&1; then
  echo "ninja installed"
 else
@@ -47,11 +53,12 @@ else
 fi

 echo "loading $loadModel"
-modelInfo=$(python3 ./finetune/get_layer_and_embd.py $loadModel 4)
+modelInfo=$(python3 ./finetune/get_layer_and_embd.py $loadModel 5.2)
 echo $modelInfo
 if [[ $modelInfo =~ "--n_layer" ]]; then
-  python3 ./finetune/lora/train.py $modelInfo $@ --proj_dir lora-models --data_type binidx --lora \
-    --lora_parts=att,ffn,time,ln --strategy deepspeed_stage_2 --accelerator gpu
+  sudo rm -rf /root/.cache/torch_extensions
+  python3 ./finetune/lora/$modelInfo $@ --proj_dir lora-models --data_type binidx --lora \
+    --lora_parts=att,ffn,time,ln --strategy deepspeed_stage_2 --accelerator gpu --ds_bucket_mb 2
 else
  echo "modelInfo is invalid"
  exit 1
--- a/finetune/lora/v4/cuda/wkv_cuda.cu
+++ b/finetune/lora/v4/cuda/wkv_cuda.cu
--- a/finetune/lora/v4/cuda/wkv_cuda_bf16.cu
+++ b/finetune/lora/v4/cuda/wkv_cuda_bf16.cu
--- a/finetune/lora/v4/cuda/wkv_op.cpp
+++ b/finetune/lora/v4/cuda/wkv_op.cpp
--- a/finetune/lora/v4/cuda/wkv_op_bf16.cpp
+++ b/finetune/lora/v4/cuda/wkv_op_bf16.cpp
--- a/finetune/lora/v4/src/init.py
+++ b/finetune/lora/v4/src/init.py
--- a/finetune/lora/v4/src/binidx.py
+++ b/finetune/lora/v4/src/binidx.py
@@ -7,6 +7,7 @@ import struct
 from functools import lru_cache
 from itertools import accumulate

+
 def print_rank_0(*message):
    pass
    # """If distributed is initialized print only on rank 0."""
@@ -16,12 +17,14 @@ def print_rank_0(*message):
    # else:
    #     print(*message, flush=True)

+
 def _warmup_mmap_file(path):
    pass
    # with open(path, "rb") as stream:
    #     while stream.read(100 * 1024 * 1024):
    #         pass

+
 dtypes = {
    1: np.uint8,
    2: np.int8,
@@ -33,18 +36,22 @@ dtypes = {
    8: np.uint16,
 }

+
 def code(dtype):
    for k in dtypes.keys():
        if dtypes[k] == dtype:
            return k
    raise ValueError(dtype)

+
 def index_file_path(prefix_path):
    return prefix_path + ".idx"

+
 def data_file_path(prefix_path):
    return prefix_path + ".bin"

+
 class MMapIndexedDataset(torch.utils.data.Dataset):
    class Index(object):
        _HDR_MAGIC = b"MMIDIDX\x00\x00"
@@ -100,7 +107,7 @@ class MMapIndexedDataset(torch.utils.data.Dataset):
                    self._file.close()

            return _Writer()
-        
+
        def __init__(self, path, skip_warmup=False):
            with open(path, "rb") as stream:
                magic_test = stream.read(9)
@@ -217,8 +224,7 @@ class MMapIndexedDataset(torch.utils.data.Dataset):
        elif isinstance(idx, slice):
            start, stop, step = idx.indices(len(self))
            if step != 1:
-                raise ValueError(
-                    "Slices into indexed_dataset must be contiguous")
+                raise ValueError("Slices into indexed_dataset must be contiguous")
            ptr = self._index._pointers[start]
            sizes = self._index._sizes[idx]
            offsets = list(accumulate(sizes))
--- a/finetune/lora/v4/src/dataset.py
+++ b/finetune/lora/v4/src/dataset.py
@@ -17,9 +17,11 @@ class MyDataset(Dataset):

        if args.data_type == "binidx":
            self.vocab_size = args.vocab_size
-            rank_zero_info(f"Current vocab size = {self.vocab_size} (make sure it's correct)")
+            rank_zero_info(
+                f"Current vocab size = {self.vocab_size} (make sure it's correct)"
+            )

-            if args.data_file.endswith('/'):
+            if args.data_file.endswith("/"):
                d_all = []
                for p in os.listdir(args.data_file):
                    if p.endswith(".idx"):
@@ -29,33 +31,52 @@ class MyDataset(Dataset):
                exit(0)
            else:
                self.data = MMapIndexedDataset(args.data_file)
-                self.data_size = len(self.data._bin_buffer) // self.data._index._dtype_size
+                self.data_size = (
+                    len(self.data._bin_buffer) // self.data._index._dtype_size
+                )
                rank_zero_info(f"Data has {self.data_size} tokens.")

            if args.my_qa_mask > 0:
-                self.data_pile = MMapIndexedDataset('/fsx/BlinkDL/pile/pile_20B_tokenizer_text_document')
-                self.data_pile_size = len(self.data_pile._bin_buffer) // self.data._index._dtype_size
+                self.data_pile = MMapIndexedDataset(
+                    "/fsx/BlinkDL/pile/pile_20B_tokenizer_text_document"
+                )
+                self.data_pile_size = (
+                    len(self.data_pile._bin_buffer) // self.data._index._dtype_size
+                )

            if args.my_pile_stage > 0:
                # assert self.data_size == 332115325534 and self.vocab_size == 50277
                self.samples_per_epoch = args.epoch_steps * args.real_bsz
                assert self.samples_per_epoch == 40320
-                rank_zero_info(f"########## Pile 20b-tokenized stage {args.my_pile_stage} ##########")
+                rank_zero_info(
+                    f"########## Pile 20b-tokenized stage {args.my_pile_stage} ##########"
+                )
                dataset_slot = self.data_size // args.ctx_len
                if args.my_pile_stage != 4:
                    assert MaybeIsPrime(args.magic_prime)
                    assert args.magic_prime % 3 == 2
-                    assert args.magic_prime / dataset_slot > 0.99 and args.magic_prime / dataset_slot <= 1
+                    assert (
+                        args.magic_prime / dataset_slot > 0.99
+                        and args.magic_prime / dataset_slot <= 1
+                    )
        elif args.data_type == "numpy":
            self.data = np.load(args.data_file).astype("int")
            self.vocab_size = args.vocab_size
-            rank_zero_info("Current vocab size =", self.vocab_size, "(make sure it's correct)")
+            rank_zero_info(
+                "Current vocab size =", self.vocab_size, "(make sure it's correct)"
+            )
            self.data_size = len(self.data)
            rank_zero_info(f"Data has {self.data_size} tokens.")
        elif args.data_type == "uint16":
-            self.data = np.fromfile(args.data_file, dtype=np.uint16).astype("int32").reshape(-1, args.my_sample_len)
+            self.data = (
+                np.fromfile(args.data_file, dtype=np.uint16)
+                .astype("int32")
+                .reshape(-1, args.my_sample_len)
+            )
            self.vocab_size = args.vocab_size
-            rank_zero_info("Current vocab size =", self.vocab_size, "(make sure it's correct)")
+            rank_zero_info(
+                "Current vocab size =", self.vocab_size, "(make sure it's correct)"
+            )
            self.data_size = self.data.shape[0]
            rank_zero_info(f"Data has {self.data_size} samples.")
        elif args.data_type == "wds_img":
@@ -86,10 +107,14 @@ class MyDataset(Dataset):
            for u in unique:
                xxObj[xx] = u
                xx += 1
-            with open(f"{args.proj_dir}/vocab.json", "w", encoding="utf-16le") as vocab_file:
+            with open(
+                f"{args.proj_dir}/vocab.json", "w", encoding="utf-16le"
+            ) as vocab_file:
                vocab_file.write(json.dumps(xxObj, ensure_ascii=False))
            self.data_size = len(self.data)
-            rank_zero_info(f"Data has {self.data_size} tokens, {self.vocab_size} vocab size.")
+            rank_zero_info(
+                f"Data has {self.data_size} tokens, {self.vocab_size} vocab size."
+            )
            self.stoi = {ch: i for i, ch in enumerate(unique)}
            self.itos = {i: ch for i, ch in enumerate(unique)}

@@ -104,36 +129,53 @@ class MyDataset(Dataset):
        # print(f"epoch {epoch} idx {idx} rank {rank}/{world_size}")

        if args.data_type == "wds_img":
+
            def init_wds(self, bias=0):
                def identity(x):
-                    return x            
+                    return x
+
                import webdataset as wds
                import torchvision.transforms as transforms
+
                # img_transform = transforms.Compose(
                #     [transforms.CenterCrop(256)]
                # )
-                img_transform = transforms.Compose([
-                    transforms.CenterCrop(512),
-                    transforms.Resize((args.my_img_size))
-                ])
-                self.data_raw = wds.WebDataset(args.data_file, resampled=True).shuffle(10000, initial=1000, rng=random.Random(epoch*100000+rank+bias*1e9)).decode("torchrgb").to_tuple("jpg", "json", "txt").map_tuple(img_transform, identity, identity)
+                img_transform = transforms.Compose(
+                    [transforms.CenterCrop(512), transforms.Resize((args.my_img_size))]
+                )
+                self.data_raw = (
+                    wds.WebDataset(args.data_file, resampled=True)
+                    .shuffle(
+                        10000,
+                        initial=1000,
+                        rng=random.Random(epoch * 100000 + rank + bias * 1e9),
+                    )
+                    .decode("torchrgb")
+                    .to_tuple("jpg", "json", "txt")
+                    .map_tuple(img_transform, identity, identity)
+                )
                for pp in self.data_raw.pipeline:
-                    if 'Resampled' in str(pp):
+                    if "Resampled" in str(pp):
                        pp.deterministic = True
+
                        def worker_seed():
-                            return rank*100000+epoch+bias*1e9
+                            return rank * 100000 + epoch + bias * 1e9
+
                        pp.worker_seed = worker_seed
                self.data = iter(self.data_raw)
                # print(f"WebDataset loaded for rank {rank} epoch {epoch}")
+
            if self.data == None:
                init_wds(self)
            trial = 0
            while trial < 10:
                try:
-                    dd = next(self.data) # jpg, json, txt
+                    dd = next(self.data)  # jpg, json, txt
                    break
                except:
-                    print(f'[dataloader error - epoch {epoch} rank {rank} - trying a new shuffle]')
+                    print(
+                        f"[dataloader error - epoch {epoch} rank {rank} - trying a new shuffle]"
+                    )
                    self.error_count += 1
                    init_wds(self, self.error_count)
                    trial += 1
@@ -144,7 +186,7 @@ class MyDataset(Dataset):
            return dd[0], dd[2]
        else:
            if args.data_type == "uint16":
-                i = np.random.randint(0, self.data_size-1)
+                i = np.random.randint(0, self.data_size - 1)
                dix = self.data[i]
                x = torch.tensor(dix[:-1], dtype=torch.long)
                y = torch.tensor(dix[1:], dtype=torch.long)
@@ -196,7 +238,12 @@ class MyDataset(Dataset):
                        z_sum = 0
                        isGood = False
                        for i in range(3, ctx_len):
-                            if dix[i] == 27 and dix[i-1] == 34 and dix[i-2] == 187 and dix[i-3] == 187:
+                            if (
+                                dix[i] == 27
+                                and dix[i - 1] == 34
+                                and dix[i - 2] == 187
+                                and dix[i - 3] == 187
+                            ):
                                isGood = True
                            if dix[i] == 0:
                                isGood = False
@@ -206,7 +253,9 @@ class MyDataset(Dataset):
                        if z_sum == 0:
                            z = [1] * ctx_len
                            i = np.random.randint(0, self.data_pile_size - req_len)
-                            dix = self.data_pile.get(idx=0, offset=i, length=req_len).astype(int)
+                            dix = self.data_pile.get(
+                                idx=0, offset=i, length=req_len
+                            ).astype(int)
                    z = torch.tensor(z, dtype=torch.bfloat16)

                x = torch.tensor(dix[:-1], dtype=torch.long)
--- a/finetune/lora/v4/src/model.py
+++ b/finetune/lora/v4/src/model.py
@@ -5,6 +5,7 @@
 import functools
 import os, math, gc, importlib
 import torch
+
 # torch._C._jit_set_profiling_executor(True)
 # torch._C._jit_set_profiling_mode(True)
 import torch.nn as nn
@@ -13,7 +14,8 @@ from torch.nn import functional as F
 import pytorch_lightning as pl
 from pytorch_lightning.utilities import rank_zero_info, rank_zero_only
 from pytorch_lightning.strategies import DeepSpeedStrategy
-if importlib.util.find_spec('deepspeed'):
+
+if importlib.util.find_spec("deepspeed"):
    import deepspeed
    from deepspeed.ops.adam import DeepSpeedCPUAdam, FusedAdam

@@ -28,9 +30,10 @@ LORA_CONFIG = {


 try:
-    print('RWKV_MY_TESTING', os.environ["RWKV_MY_TESTING"])
+    print("RWKV_MY_TESTING", os.environ["RWKV_MY_TESTING"])
 except:
-    os.environ["RWKV_MY_TESTING"] = ''
+    os.environ["RWKV_MY_TESTING"] = ""
+

 def __nop(ob):
    return ob
@@ -53,7 +56,26 @@ T_MAX = int(os.environ["RWKV_T_MAX"])  # TAKES LOTS OF VRAM!
 from torch.utils.cpp_extension import load

 if os.environ["RWKV_FLOAT_MODE"] == "bf16":
-    wkv_cuda = load(name=f"wkv_{T_MAX}_bf16", sources=["finetune/lora/cuda/wkv_op_bf16.cpp", "finetune/lora/cuda/wkv_cuda_bf16.cu"], verbose=True, extra_cuda_cflags=["-t 4", "-std=c++17", "-res-usage", "--maxrregcount 60", "--use_fast_math", "-O3", "-Xptxas -O3", "--extra-device-vectorization", f"-DTmax={T_MAX}"])
+    wkv_cuda = load(
+        name=f"wkv_{T_MAX}_bf16",
+        sources=[
+            "finetune/lora/v4/cuda/wkv_op_bf16.cpp",
+            "finetune/lora/v4/cuda/wkv_cuda_bf16.cu",
+        ],
+        verbose=True,
+        extra_cuda_cflags=[
+            "-t 4",
+            "-std=c++17",
+            "-res-usage",
+            "--maxrregcount 60",
+            "--use_fast_math",
+            "-O3",
+            "-Xptxas -O3",
+            "--extra-device-vectorization",
+            f"-DTmax={T_MAX}",
+        ],
+    )
+
    class WKV(torch.autograd.Function):
        @staticmethod
        def forward(ctx, B, T, C, w, u, k, v):
@@ -66,10 +88,16 @@ if os.environ["RWKV_FLOAT_MODE"] == "bf16":
            u = u.contiguous()
            k = k.contiguous()
            v = v.contiguous()
-            y = torch.empty((B, T, C), device=w.device, memory_format=torch.contiguous_format, dtype=torch.bfloat16)
+            y = torch.empty(
+                (B, T, C),
+                device=w.device,
+                memory_format=torch.contiguous_format,
+                dtype=torch.bfloat16,
+            )
            wkv_cuda.forward(B, T, C, w, u, k, v, y)
            ctx.save_for_backward(w, u, k, v, y)
            return y
+
        @staticmethod
        def backward(ctx, gy):
            B = ctx.B
@@ -78,16 +106,54 @@ if os.environ["RWKV_FLOAT_MODE"] == "bf16":
            assert T <= T_MAX
            assert B * C % min(C, 32) == 0
            w, u, k, v, y = ctx.saved_tensors
-            gw = torch.empty((B, C), device=gy.device, memory_format=torch.contiguous_format, dtype=torch.bfloat16)
-            gu = torch.empty((B, C), device=gy.device, memory_format=torch.contiguous_format, dtype=torch.bfloat16)
-            gk = torch.empty((B, T, C), device=gy.device, memory_format=torch.contiguous_format, dtype=torch.bfloat16)
-            gv = torch.empty((B, T, C), device=gy.device, memory_format=torch.contiguous_format, dtype=torch.bfloat16)
+            gw = torch.empty(
+                (B, C),
+                device=gy.device,
+                memory_format=torch.contiguous_format,
+                dtype=torch.bfloat16,
+            )
+            gu = torch.empty(
+                (B, C),
+                device=gy.device,
+                memory_format=torch.contiguous_format,
+                dtype=torch.bfloat16,
+            )
+            gk = torch.empty(
+                (B, T, C),
+                device=gy.device,
+                memory_format=torch.contiguous_format,
+                dtype=torch.bfloat16,
+            )
+            gv = torch.empty(
+                (B, T, C),
+                device=gy.device,
+                memory_format=torch.contiguous_format,
+                dtype=torch.bfloat16,
+            )
            wkv_cuda.backward(B, T, C, w, u, k, v, y, gy.contiguous(), gw, gu, gk, gv)
            gw = torch.sum(gw, dim=0)
            gu = torch.sum(gu, dim=0)
            return (None, None, None, gw, gu, gk, gv)
+
 else:
-    wkv_cuda = load(name=f"wkv_{T_MAX}", sources=["finetune/lora/cuda/wkv_op.cpp", "finetune/lora/cuda/wkv_cuda.cu"], verbose=True, extra_cuda_cflags=["-res-usage", "--maxrregcount 60", "--use_fast_math", "-O3", "-Xptxas -O3", "--extra-device-vectorization", f"-DTmax={T_MAX}"])
+    wkv_cuda = load(
+        name=f"wkv_{T_MAX}",
+        sources=[
+            "finetune/lora/v4/cuda/wkv_op.cpp",
+            "finetune/lora/v4/cuda/wkv_cuda.cu",
+        ],
+        verbose=True,
+        extra_cuda_cflags=[
+            "-res-usage",
+            "--maxrregcount 60",
+            "--use_fast_math",
+            "-O3",
+            "-Xptxas -O3",
+            "--extra-device-vectorization",
+            f"-DTmax={T_MAX}",
+        ],
+    )
+
    class WKV(torch.autograd.Function):
        @staticmethod
        def forward(ctx, B, T, C, w, u, k, v):
@@ -106,7 +172,9 @@ else:
                u = u.float().contiguous()
                k = k.float().contiguous()
                v = v.float().contiguous()
-            y = torch.empty((B, T, C), device=w.device, memory_format=torch.contiguous_format)
+            y = torch.empty(
+                (B, T, C), device=w.device, memory_format=torch.contiguous_format
+            )
            wkv_cuda.forward(B, T, C, w, u, k, v, y)
            ctx.save_for_backward(w, u, k, v, y)
            if "32" in os.environ["RWKV_FLOAT_MODE"]:
@@ -115,6 +183,7 @@ else:
                return y.half()
            elif os.environ["RWKV_FLOAT_MODE"] == "bf16":
                return y.bfloat16()
+
        @staticmethod
        def backward(ctx, gy):
            B = ctx.B
@@ -123,14 +192,26 @@ else:
            assert T <= T_MAX
            assert B * C % min(C, 32) == 0
            w, u, k, v, y = ctx.saved_tensors
-            gw = torch.empty((B, C), device=gy.device, memory_format=torch.contiguous_format)
-            gu = torch.empty((B, C), device=gy.device, memory_format=torch.contiguous_format)
-            gk = torch.empty((B, T, C), device=gy.device, memory_format=torch.contiguous_format)
-            gv = torch.empty((B, T, C), device=gy.device, memory_format=torch.contiguous_format)
+            gw = torch.empty(
+                (B, C), device=gy.device, memory_format=torch.contiguous_format
+            )
+            gu = torch.empty(
+                (B, C), device=gy.device, memory_format=torch.contiguous_format
+            )
+            gk = torch.empty(
+                (B, T, C), device=gy.device, memory_format=torch.contiguous_format
+            )
+            gv = torch.empty(
+                (B, T, C), device=gy.device, memory_format=torch.contiguous_format
+            )
            if "32" in os.environ["RWKV_FLOAT_MODE"]:
-                wkv_cuda.backward(B, T, C, w, u, k, v, y, gy.contiguous(), gw, gu, gk, gv)
+                wkv_cuda.backward(
+                    B, T, C, w, u, k, v, y, gy.contiguous(), gw, gu, gk, gv
+                )
            else:
-                wkv_cuda.backward(B, T, C, w, u, k, v, y, gy.float().contiguous(), gw, gu, gk, gv)
+                wkv_cuda.backward(
+                    B, T, C, w, u, k, v, y, gy.float().contiguous(), gw, gu, gk, gv
+                )
            gw = torch.sum(gw, dim=0)
            gu = torch.sum(gu, dim=0)
            if "32" in os.environ["RWKV_FLOAT_MODE"]:
@@ -138,7 +219,15 @@ else:
            elif os.environ["RWKV_FLOAT_MODE"] == "fp16":
                return (None, None, None, gw.half(), gu.half(), gk.half(), gv.half())
            elif os.environ["RWKV_FLOAT_MODE"] == "bf16":
-                return (None, None, None, gw.bfloat16(), gu.bfloat16(), gk.bfloat16(), gv.bfloat16())
+                return (
+                    None,
+                    None,
+                    None,
+                    gw.bfloat16(),
+                    gu.bfloat16(),
+                    gk.bfloat16(),
+                    gv.bfloat16(),
+                )


 def RUN_CUDA(B, T, C, w, u, k, v):
@@ -151,15 +240,17 @@ def RUN_CUDA(B, T, C, w, u, k, v):


 class LoraLinear(nn.Module):
-
    def __init__(self, in_features: int, out_features: int, bias: bool):
        super().__init__()

        self.weight = nn.Parameter(torch.empty((out_features, in_features)))
        assert bias == False, "Biased LoraLinear not supported"

-        r, alpha, dropout = LORA_CONFIG["r"], LORA_CONFIG[
-            "alpha"], LORA_CONFIG["dropout"]
+        r, alpha, dropout = (
+            LORA_CONFIG["r"],
+            LORA_CONFIG["alpha"],
+            LORA_CONFIG["dropout"],
+        )
        self.lora_A = nn.Parameter(torch.empty(r, in_features))
        self.lora_B = nn.Parameter(torch.empty(out_features, r))
        self.lora_dropout = nn.Dropout(dropout)
@@ -170,9 +261,9 @@ class LoraLinear(nn.Module):
        nn.init.zeros_(self.lora_B)

    def forward(self, x):
-        return (
-            F.linear(x, self.weight) + self.scaling *
-            F.linear(F.linear(self.lora_dropout(x), self.lora_A), self.lora_B))
+        return F.linear(x, self.weight) + self.scaling * F.linear(
+            F.linear(self.lora_dropout(x), self.lora_A), self.lora_B
+        )


@functools.wraps(LoraLinear)
@@ -214,17 +305,23 @@ class RWKV_TimeMix(MyModule):
            # fancy time_decay
            decay_speed = torch.ones(args.dim_att)
            for h in range(args.dim_att):
-                decay_speed[h] = -5 + 8 * (h / (args.dim_att - 1)) ** (0.7 + 1.3 * ratio_0_to_1)
+                decay_speed[h] = -5 + 8 * (h / (args.dim_att - 1)) ** (
+                    0.7 + 1.3 * ratio_0_to_1
+                )
            self.time_decay = nn.Parameter(decay_speed)
            # print(layer_id, self.time_decay.flatten()[:3].cpu().numpy(), '...', self.time_decay.flatten()[-3:].cpu().numpy())

            # fancy time_first
            zigzag = torch.tensor([(i + 1) % 3 - 1 for i in range(args.dim_att)]) * 0.5
-            self.time_first = nn.Parameter(torch.ones(args.dim_att) * math.log(0.3) + zigzag)
+            self.time_first = nn.Parameter(
+                torch.ones(args.dim_att) * math.log(0.3) + zigzag
+            )

            # fancy time_mix
            self.time_mix_k = nn.Parameter(torch.pow(ddd, ratio_1_to_almost0))
-            self.time_mix_v = nn.Parameter(torch.pow(ddd, ratio_1_to_almost0) + 0.3 * ratio_0_to_1)
+            self.time_mix_v = nn.Parameter(
+                torch.pow(ddd, ratio_1_to_almost0) + 0.3 * ratio_0_to_1
+            )
            self.time_mix_r = nn.Parameter(torch.pow(ddd, 0.5 * ratio_1_to_almost0))

        self.time_shift = nn.ZeroPad2d((0, 0, 1, -1))
@@ -235,8 +332,10 @@ class RWKV_TimeMix(MyModule):

        self.output = nn.Linear(args.dim_att, args.n_embd, bias=False)

-        if 'a' in os.environ["RWKV_MY_TESTING"]:
-            self.register_buffer("att_mask", torch.tril(torch.ones(args.ctx_len, args.ctx_len)))
+        if "a" in os.environ["RWKV_MY_TESTING"]:
+            self.register_buffer(
+                "att_mask", torch.tril(torch.ones(args.ctx_len, args.ctx_len))
+            )
            d_qkv = args.n_embd // 16
            self.qq = nn.Linear(args.n_embd, d_qkv, bias=False)
            self.kk = nn.Linear(args.n_embd, d_qkv, bias=False)
@@ -245,12 +344,17 @@ class RWKV_TimeMix(MyModule):
            with torch.no_grad():
                self.time_mix_qq = nn.Parameter(torch.pow(ddd, ratio_1_to_almost0))
                self.time_mix_kk = nn.Parameter(torch.pow(ddd, ratio_1_to_almost0))
-                self.time_mix_vv = nn.Parameter(torch.pow(ddd, ratio_1_to_almost0) + 0.3 * ratio_0_to_1)
+                self.time_mix_vv = nn.Parameter(
+                    torch.pow(ddd, ratio_1_to_almost0) + 0.3 * ratio_0_to_1
+                )
+
+    if "a" not in os.environ["RWKV_MY_TESTING"]:

-    if 'a' not in os.environ["RWKV_MY_TESTING"]:
        @MyFunction
        def jit_func(self, x):
-            xx = self.time_shift(x) # Mix x with the previous timestep to produce xk, xv, xr
+            xx = self.time_shift(
+                x
+            )  # Mix x with the previous timestep to produce xk, xv, xr
            xk = x * self.time_mix_k + xx * (1 - self.time_mix_k)
            xv = x * self.time_mix_v + xx * (1 - self.time_mix_v)
            xr = x * self.time_mix_r + xx * (1 - self.time_mix_r)
@@ -263,21 +367,26 @@ class RWKV_TimeMix(MyModule):
        def forward(self, x):
            B, T, C = x.size()  # x = (Batch,Time,Channel)
            sr, k, v = self.jit_func(x)
-            rwkv = sr * RUN_CUDA(B, T, self.args.dim_att, self.time_decay, self.time_first, k, v)
+            rwkv = sr * RUN_CUDA(
+                B, T, self.args.dim_att, self.time_decay, self.time_first, k, v
+            )
            return self.output(rwkv)

-    if 'a' in os.environ["RWKV_MY_TESTING"]:
+    if "a" in os.environ["RWKV_MY_TESTING"]:
+
        @MyFunction
        def QKV(self, q, k, v):
            att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
-            att = att.masked_fill(self.att_mask == 0, float('-inf'))
-            att = F.softmax(att, dim = -1)
+            att = att.masked_fill(self.att_mask == 0, float("-inf"))
+            att = F.softmax(att, dim=-1)
            x = att @ v
            return x

        @MyFunction
        def jit_funcQKV(self, x):
-            xx = self.time_shift(x) # Mix x with the previous timestep to produce xk, xv, xr
+            xx = self.time_shift(
+                x
+            )  # Mix x with the previous timestep to produce xk, xv, xr
            xk = x * self.time_mix_k + xx * (1 - self.time_mix_k)
            xv = x * self.time_mix_v + xx * (1 - self.time_mix_v)
            xr = x * self.time_mix_r + xx * (1 - self.time_mix_r)
@@ -296,12 +405,16 @@ class RWKV_TimeMix(MyModule):
        def forward(self, x):
            B, T, C = x.size()  # x = (Batch,Time,Channel)
            sr, k, v, qq, kk, vv = self.jit_funcQKV(x)
-            rwkv = sr * RUN_CUDA(B, T, self.args.dim_att, self.time_decay, self.time_first, k, v)
+            rwkv = sr * RUN_CUDA(
+                B, T, self.args.dim_att, self.time_decay, self.time_first, k, v
+            )
            rwkv = self.output(rwkv) + self.oo(self.QKV(qq, kk, vv))
            return rwkv

+
 ########################################################################################################

+
 class RWKV_ChannelMix(MyModule):
    def __init__(self, args, layer_id):
        super().__init__()
@@ -331,6 +444,7 @@ class RWKV_ChannelMix(MyModule):
        kv = self.value(k)
        return torch.sigmoid(self.receptance(xr)) * kv

+
 class MishGLU(MyModule):
    def __init__(self, args, layer_id):
        super().__init__()
@@ -360,6 +474,7 @@ class MishGLU(MyModule):
        b = self.bb(xb)
        return self.value(a * F.mish(b))

+
 ########################################################################################################
 # The RWKV Model with our blocks
 ########################################################################################################
@@ -377,15 +492,19 @@ class Block(nn.Module):
        if self.layer_id == 0:
            self.ln0 = nn.LayerNorm(args.n_embd)
            if args.my_pos_emb > 0:
-                self.pos_emb_x = nn.Parameter(torch.zeros((1,args.my_pos_emb,args.n_embd)))
-                self.pos_emb_y = nn.Parameter(torch.zeros((args.my_pos_emb,1,args.n_embd)))
+                self.pos_emb_x = nn.Parameter(
+                    torch.zeros((1, args.my_pos_emb, args.n_embd))
+                )
+                self.pos_emb_y = nn.Parameter(
+                    torch.zeros((args.my_pos_emb, 1, args.n_embd))
+                )

        if self.layer_id == 0 and self.args.pre_ffn > 0:
            self.ffnPre = RWKV_ChannelMix(args, 0)
        else:
            self.att = RWKV_TimeMix(args, layer_id)

-        if 'g' in os.environ["RWKV_MY_TESTING"]:
+        if "g" in os.environ["RWKV_MY_TESTING"]:
            self.ffn = MishGLU(args, layer_id)
        else:
            self.ffn = RWKV_ChannelMix(args, layer_id)
@@ -395,7 +514,9 @@ class Block(nn.Module):
            self.tiny_q = nn.Linear(args.n_embd, args.tiny_att_dim, bias=False)
            self.tiny_k = nn.Linear(args.n_embd, args.tiny_att_dim, bias=False)
            self.tiny_v = nn.Linear(args.n_embd, args.n_embd, bias=False)
-            self.register_buffer("tiny_mask", torch.tril(torch.ones(args.ctx_len, args.ctx_len)))
+            self.register_buffer(
+                "tiny_mask", torch.tril(torch.ones(args.ctx_len, args.ctx_len))
+            )

    def forward(self, x, x_emb=None):
        args = self.args
@@ -403,7 +524,7 @@ class Block(nn.Module):
        if self.layer_id == 0:
            x = self.ln0(x)
            if args.my_pos_emb > 0:
-                pos_emb = (self.pos_emb_x + self.pos_emb_y).reshape(T+1, -1)[:-1,:]
+                pos_emb = (self.pos_emb_x + self.pos_emb_y).reshape(T + 1, -1)[:-1, :]
                x = x + pos_emb

        if self.layer_id == 0 and args.pre_ffn > 0:
@@ -443,13 +564,13 @@ class RWKV(pl.LightningModule):
    def __init__(self, args):
        super().__init__()
        self.args = args
-        if not hasattr(args, 'dim_att'):
+        if not hasattr(args, "dim_att"):
            args.dim_att = args.n_embd
-        if not hasattr(args, 'dim_ffn'):
+        if not hasattr(args, "dim_ffn"):
            args.dim_ffn = args.n_embd * 4
-        if not hasattr(args, 'tiny_att_layer'):
+        if not hasattr(args, "tiny_att_layer"):
            args.tiny_att_layer = -1
-        if not hasattr(args, 'tiny_att_dim'):
+        if not hasattr(args, "tiny_att_dim"):
            args.tiny_att_dim = -1

        self.emb = nn.Embedding(args.vocab_size, args.n_embd)
@@ -462,7 +583,9 @@ class RWKV(pl.LightningModule):
        if args.head_qk > 0:
            self.head_q = nn.Linear(args.n_embd, args.head_qk, bias=False)
            self.head_k = nn.Linear(args.n_embd, args.head_qk, bias=False)
-            self.register_buffer("copy_mask", torch.tril(torch.ones(args.ctx_len, args.ctx_len)))
+            self.register_buffer(
+                "copy_mask", torch.tril(torch.ones(args.ctx_len, args.ctx_len))
+            )

    def configure_optimizers(self):
        args = self.args
@@ -494,19 +617,46 @@ class RWKV(pl.LightningModule):
            param_dict = {n: p for n, p in self.named_parameters()}
            if args.my_pile_stage == 2:
                optim_groups = [
-                    {"params": [param_dict[n] for n in lr_1x], "weight_decay": 0.0, "my_lr_scale": 1.0},
-                    {"params": [param_dict[n] for n in lr_2x], "weight_decay": 0.0, "my_lr_scale": 5.0},# test: 2e-3 / args.lr_init},
-                    {"params": [param_dict[n] for n in lr_3x], "weight_decay": 0.0, "my_lr_scale": 5.0},# test: 3e-3 / args.lr_init},
+                    {
+                        "params": [param_dict[n] for n in lr_1x],
+                        "weight_decay": 0.0,
+                        "my_lr_scale": 1.0,
+                    },
+                    {
+                        "params": [param_dict[n] for n in lr_2x],
+                        "weight_decay": 0.0,
+                        "my_lr_scale": 5.0,
+                    },  # test: 2e-3 / args.lr_init},
+                    {
+                        "params": [param_dict[n] for n in lr_3x],
+                        "weight_decay": 0.0,
+                        "my_lr_scale": 5.0,
+                    },  # test: 3e-3 / args.lr_init},
                ]
            else:
                optim_groups = [
-                    {"params": [param_dict[n] for n in lr_1x], "weight_decay": 0.0, "my_lr_scale": 1.0},
-                    {"params": [param_dict[n] for n in lr_2x], "weight_decay": 0.0, "my_lr_scale": 2.0},
-                    {"params": [param_dict[n] for n in lr_3x], "weight_decay": 0.0, "my_lr_scale": 3.0},
+                    {
+                        "params": [param_dict[n] for n in lr_1x],
+                        "weight_decay": 0.0,
+                        "my_lr_scale": 1.0,
+                    },
+                    {
+                        "params": [param_dict[n] for n in lr_2x],
+                        "weight_decay": 0.0,
+                        "my_lr_scale": 2.0,
+                    },
+                    {
+                        "params": [param_dict[n] for n in lr_3x],
+                        "weight_decay": 0.0,
+                        "my_lr_scale": 3.0,
+                    },
                ]
        else:
            optim_groups = [
-                {"params": [p for n, p in self.named_parameters()], "weight_decay": 0.0},
+                {
+                    "params": [p for n, p in self.named_parameters()],
+                    "weight_decay": 0.0,
+                },
            ]

        for g in optim_groups:
@@ -514,8 +664,26 @@ class RWKV(pl.LightningModule):
        optim_groups = [g for g in optim_groups if len(g["params"]) > 0]

        if self.deepspeed_offload:
-            return DeepSpeedCPUAdam(optim_groups, lr=self.args.lr_init, betas=self.args.betas, eps=self.args.adam_eps, bias_correction=True, adamw_mode=False, weight_decay=0, amsgrad=False)
-        return FusedAdam(optim_groups, lr=self.args.lr_init, betas=self.args.betas, eps=self.args.adam_eps, bias_correction=True, adam_w_mode=False, weight_decay=0, amsgrad=False)
+            return DeepSpeedCPUAdam(
+                optim_groups,
+                lr=self.args.lr_init,
+                betas=self.args.betas,
+                eps=self.args.adam_eps,
+                bias_correction=True,
+                adamw_mode=False,
+                weight_decay=0,
+                amsgrad=False,
+            )
+        return FusedAdam(
+            optim_groups,
+            lr=self.args.lr_init,
+            betas=self.args.betas,
+            eps=self.args.adam_eps,
+            bias_correction=True,
+            adam_w_mode=False,
+            weight_decay=0,
+            amsgrad=False,
+        )
        # return ZeroOneAdam(optim_groups, lr=self.args.lr_init, betas=self.args.betas, eps=self.args.adam_eps, bias_correction=True, weight_decay=0, amsgrad=False, cuda_aware=False)

    @property
@@ -589,10 +757,14 @@ class RWKV(pl.LightningModule):

            logits = self(idx)
            if sum_mask == mask.shape[0]:
-                loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1))
+                loss = F.cross_entropy(
+                    logits.view(-1, logits.size(-1)), targets.view(-1)
+                )
                # print('rank', self.global_rank, 'loss', loss.item())
            else:
-                loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1), reduction='none')
+                loss = F.cross_entropy(
+                    logits.view(-1, logits.size(-1)), targets.view(-1), reduction="none"
+                )
                # loss_raw = loss
                loss = torch.sum(loss * mask) / sum_mask

@@ -632,7 +804,14 @@ class RWKV(pl.LightningModule):

            gain = 1.0
            scale = 1.0
-            if "ln_" in n or ".ln" in n or "time_" in n or "_mask" in n or "pos_emb" in n or '.mask.' in n:
+            if (
+                "ln_" in n
+                or ".ln" in n
+                or "time_" in n
+                or "_mask" in n
+                or "pos_emb" in n
+                or ".mask." in n
+            ):
                m[n] = p
            else:
                if n == "emb.weight":
@@ -640,7 +819,19 @@ class RWKV(pl.LightningModule):
                else:
                    if shape[0] > shape[1]:
                        gain = math.sqrt(shape[0] / shape[1])
-                    for kk in [".att.key.", ".att.receptance.", ".att.output.", ".att.key.", ".ffn.value.", ".ffn.receptance.", ".ffnPre.value.", ".ffnPre.receptance.", "head_q.", '.oo.', '.rr.']:
+                    for kk in [
+                        ".att.key.",
+                        ".att.receptance.",
+                        ".att.output.",
+                        ".att.key.",
+                        ".ffn.value.",
+                        ".ffn.receptance.",
+                        ".ffnPre.value.",
+                        ".ffnPre.receptance.",
+                        "head_q.",
+                        ".oo.",
+                        ".rr.",
+                    ]:
                        if kk in n:
                            scale = 0
                    if n == "head.weight":
@@ -650,7 +841,9 @@ class RWKV(pl.LightningModule):
                    if "head_q." in n:
                        scale = 0

-                print(f"{str(shape[0]).ljust(5)} {str(shape[1]).ljust(5)} {str(scale).ljust(4)} {n}")
+                print(
+                    f"{str(shape[0]).ljust(5)} {str(shape[1]).ljust(5)} {str(scale).ljust(4)} {n}"
+                )

                if self.args.accelerator.upper() == "GPU":
                    m[n] = torch.empty((shape[0], shape[1]), device="cuda")
--- a/finetune/lora/v4/src/trainer.py
+++ b/finetune/lora/v4/src/trainer.py
@@ -5,15 +5,17 @@ import pytorch_lightning as pl
 from pytorch_lightning.utilities import rank_zero_info, rank_zero_only
 from .model import LORA_CONFIG

+
 def my_save(dd, ff):
-    if '14b-run1' not in ff:
+    if "14b-run1" not in ff:
        torch.save(dd, ff)
    else:
-        fn = ff.split('/')[-1]
-        fff = '/dev/shm/' + fn
+        fn = ff.split("/")[-1]
+        fff = "/dev/shm/" + fn
        torch.save(dd, fff)
        subprocess.Popen(f" aws s3 mv {fff} s3://rwkv-14b-4k/{fn} --quiet", shell=True)

+
 class train_callback(pl.Callback):
    def __init__(self, args):
        super().__init__()
@@ -38,7 +40,9 @@ class train_callback(pl.Callback):
            if args.lr_final == 0 or args.lr_init == 0:  # linear decay
                lr = args.lr_init + (args.lr_final - args.lr_init) * progress
            else:  # exp decay
-                lr = args.lr_init * math.exp(math.log(args.lr_final / args.lr_init) * pow(progress, 1))
+                lr = args.lr_init * math.exp(
+                    math.log(args.lr_final / args.lr_init) * pow(progress, 1)
+                )

            if trainer.global_step < w_step:
                lr = lr * (0.2 + 0.8 * trainer.global_step / w_step)
@@ -60,7 +64,9 @@ class train_callback(pl.Callback):
                trainer.my_loss_sum = 0
                trainer.my_loss_count = 0
                trainer.my_log = open(args.proj_dir + "/train_log.txt", "a")
-                trainer.my_log.write(f"NEW RUN {args.my_timestamp}\n{vars(self.args)}\n")
+                trainer.my_log.write(
+                    f"NEW RUN {args.my_timestamp}\n{vars(self.args)}\n"
+                )
                try:
                    print(f"\n{trainer.strategy.config}\n")
                    trainer.my_log.write(f"{trainer.strategy.config}\n")
@@ -70,6 +76,7 @@ class train_callback(pl.Callback):
                if len(args.wandb) > 0:
                    print("Login to wandb...")
                    import wandb
+
                    wandb.init(
                        project=args.wandb,
                        name=args.run_name + " " + args.my_timestamp,
@@ -102,20 +109,26 @@ class train_callback(pl.Callback):
            # self.log("s", real_step, prog_bar=True, on_step=True)

            if len(args.wandb) > 0:
-                lll = {"loss": trainer.my_loss, "lr": trainer.my_lr, "Gtokens": real_step * token_per_step / 1e9}
+                lll = {
+                    "loss": trainer.my_loss,
+                    "lr": trainer.my_lr,
+                    "Gtokens": real_step * token_per_step / 1e9,
+                }
                if kt_s > 0:
                    lll["kt/s"] = kt_s
                trainer.my_wandb.log(lll, step=int(real_step))
            if args.magic_prime > 0:
                expand_factor = 2 if args.my_qa_mask > 0 else 1
-                if int(real_step) == int(args.magic_prime * expand_factor // args.real_bsz) - 1:
+                if (
+                    int(real_step)
+                    == int(args.magic_prime * expand_factor // args.real_bsz) - 1
+                ):
                    to_save_dict = pl_module.state_dict()
                    my_save(
                        to_save_dict,
                        f"{args.proj_dir}/rwkv-final.pth",
                    )

-
    def on_train_epoch_start(self, trainer, pl_module):
        args = self.args
        dataset = trainer.train_dataloader.dataset.datasets
@@ -128,24 +141,28 @@ class train_callback(pl.Callback):
    def on_train_epoch_end(self, trainer, pl_module):
        args = self.args
        if trainer.is_global_zero:  # logging & save state_dict
-            if (args.epoch_save > 0 and trainer.current_epoch % args.epoch_save == 0) or trainer.current_epoch == args.epoch_count - 1:
-                if args.data_type == 'wds_img':
+            if (
+                args.epoch_save > 0 and trainer.current_epoch % args.epoch_save == 0
+            ) or trainer.current_epoch == args.epoch_count - 1:
+                if args.data_type == "wds_img":
                    raw_dict = pl_module.state_dict()
                    to_save_dict = {}
                    for k in raw_dict:
-                        if k.startswith('encoder.') or k.startswith('decoder.'):
+                        if k.startswith("encoder.") or k.startswith("decoder."):
                            to_save_dict[k] = raw_dict[k]
                else:
                    to_save_dict = pl_module.state_dict()

                if args.lora:
-                    enable_time_finetune = 'time' in LORA_CONFIG["parts"]
-                    enable_ln_finetune = 'ln' in LORA_CONFIG["parts"]
+                    enable_time_finetune = "time" in LORA_CONFIG["parts"]
+                    enable_ln_finetune = "ln" in LORA_CONFIG["parts"]
                    lora_dict = {}
                    for name, state in to_save_dict.items():
-                        if ('.lora_' in name
-                                or (enable_time_finetune and '.time_' in name)
-                                or (enable_ln_finetune and '.ln' in name)):
+                        if (
+                            ".lora_" in name
+                            or (enable_time_finetune and ".time_" in name)
+                            or (enable_ln_finetune and ".ln" in name)
+                        ):
                            lora_dict[name] = state
                    to_save_dict = lora_dict

@@ -155,8 +172,10 @@ class train_callback(pl.Callback):
                        f"{args.proj_dir}/rwkv-{args.epoch_begin + trainer.current_epoch}.pth",
                    )
                except Exception as e:
-                    print('Error\n\n', e, '\n\n')
-            trainer.my_log.write(f"{args.epoch_begin + trainer.current_epoch} {trainer.my_epoch_loss:.6f} {math.exp(trainer.my_epoch_loss):.4f} {trainer.my_lr:.8f} {datetime.datetime.now()} {trainer.current_epoch}\n")
+                    print("Error\n\n", e, "\n\n")
+            trainer.my_log.write(
+                f"{args.epoch_begin + trainer.current_epoch} {trainer.my_epoch_loss:.6f} {math.exp(trainer.my_epoch_loss):.4f} {trainer.my_lr:.8f} {datetime.datetime.now()} {trainer.current_epoch}\n"
+            )
            trainer.my_log.flush()

            trainer.my_loss_sum = 0
@@ -178,22 +197,22 @@ def generate_init_weight(model, init_weight_name):
                    mm[k] = src.reshape(mm[k].shape)
                except:
                    tmp = mm[k].squeeze().clone()
-                    print(k, src.shape, '-->', mm[k].shape)
+                    print(k, src.shape, "-->", mm[k].shape)
                    ss = src.shape[0]
                    dd = tmp.shape[0]
                    for i in range(dd):
                        pos = i / dd * ss
                        if pos >= ss - 1:
-                            tmp[i] = src[ss-1]
+                            tmp[i] = src[ss - 1]
                        else:
                            p0 = int(math.floor(pos))
                            ii = pos - p0
-                            tmp[i] = src[p0] * (1-ii) + src[p0+1] * (ii)
+                            tmp[i] = src[p0] * (1 - ii) + src[p0 + 1] * (ii)
                    mm[k] = tmp.reshape(mm[k].shape)
                    sss = src.squeeze().float().cpu().numpy()
-                    print(sss[:10], '...', sss[-10:])
+                    print(sss[:10], "...", sss[-10:])
                    mmm = mm[k].squeeze().float().cpu().numpy()
-                    print(mmm[:10], '...', mmm[-10:])
+                    print(mmm[:10], "...", mmm[-10:])

    print(f"Save to {init_weight_name}...")
    torch.save(mm, init_weight_name)
--- a/finetune/lora/v4/src/utils.py
+++ b/finetune/lora/v4/src/utils.py
@@ -6,6 +6,7 @@ from torch.nn import functional as F
 time_slot = {}
 time_ref = time.time_ns()

+
 def record_time(name):
    if name not in time_slot:
        time_slot[name] = 1e20
@@ -13,20 +14,23 @@ def record_time(name):
    if tt < time_slot[name]:
        time_slot[name] = tt

-class TOKENIZER():
-    def __init__(self, WORD_NAME, UNKNOWN_CHAR='\ue083'):
-        if 'list' in str(type(WORD_NAME)):
+
+class TOKENIZER:
+    def __init__(self, WORD_NAME, UNKNOWN_CHAR="\ue083"):
+        if "list" in str(type(WORD_NAME)):
            self.charMode = False
            if WORD_NAME[0] == WORD_NAME[1]:
                from transformers import PreTrainedTokenizerFast
+
                self.tokenizer = PreTrainedTokenizerFast(tokenizer_file=WORD_NAME[0])
            else:
                from transformers import GPT2TokenizerFast
+
                self.tokenizer = GPT2TokenizerFast(WORD_NAME[0], WORD_NAME[1])
            self.vocab_size = len(self.tokenizer)
        else:
            self.charMode = True
-            with open(WORD_NAME + '.json', "r", encoding="utf-16") as result_file:
+            with open(WORD_NAME + ".json", "r", encoding="utf-16") as result_file:
                self.word_table = json.load(result_file)

            self.vocab_size = len(self.word_table)
@@ -37,23 +41,25 @@ class TOKENIZER():
            self.UNKNOWN_CHAR = self.stoi[UNKNOWN_CHAR]

    def refine_context(self, context):
-        context = context.strip().split('\n')
+        context = context.strip().split("\n")
        for c in range(len(context)):
-            context[c] = context[c].strip().strip('\u3000').strip('\r')
-        context = list(filter(lambda c: c != '', context))
-        context = '\n' + ('\n'.join(context)).strip()
-        if context == '':
-            context = '\n'
+            context[c] = context[c].strip().strip("\u3000").strip("\r")
+        context = list(filter(lambda c: c != "", context))
+        context = "\n" + ("\n".join(context)).strip()
+        if context == "":
+            context = "\n"
        return context

-    def sample_logits(self, out, x, ctx_len, temperature=1.0, top_p_usual=None, top_p_newline=None):
+    def sample_logits(
+        self, out, x, ctx_len, temperature=1.0, top_p_usual=None, top_p_newline=None
+    ):
        # out[self.UNKNOWN_CHAR] = -float('Inf')
        lastChar = int(x[-1])

        probs = F.softmax(out, dim=-1)

        if self.charMode:
-            if self.itos[lastChar] == '\n':
+            if self.itos[lastChar] == "\n":
                top_p = top_p_newline
            else:
                top_p = top_p_usual
@@ -81,6 +87,7 @@ class TOKENIZER():
            out = torch.multinomial(probs, num_samples=1)[0]
            return out

+
 def MaybeIsPrime(number):
    if FermatPrimalityTest(number) and MillerRabinPrimalityTest(number):
        return True
@@ -121,7 +128,9 @@ def MillerRabinPrimalityTest(number):
        if (randomNumberWithPower != 1) and (randomNumberWithPower != number - 1):
            iterationNumber = 1

-            while (iterationNumber <= timesTwoDividNumber - 1) and (randomNumberWithPower != number - 1):
+            while (iterationNumber <= timesTwoDividNumber - 1) and (
+                randomNumberWithPower != number - 1
+            ):
                randomNumberWithPower = pow(randomNumberWithPower, 2, number)
                iterationNumber = iterationNumber + 1
            if randomNumberWithPower != (number - 1):
--- a/finetune/lora/v4/train.py
+++ b/finetune/lora/v4/train.py
@@ -184,7 +184,7 @@ if __name__ == "__main__":
    args.num_sanity_val_steps = 0
    args.check_val_every_n_epoch = int(1e20)
    args.log_every_n_steps = int(1e20)
-    args.max_epochs = args.epoch_count  # continue forever
+    args.max_epochs = args.epoch_count  # -1 continue forever
    args.betas = (args.beta1, args.beta2)
    args.real_bsz = int(args.num_nodes) * int(args.devices) * args.micro_bsz
    os.environ["RWKV_T_MAX"] = str(args.ctx_len)
@@ -373,7 +373,7 @@ if __name__ == "__main__":
                    for param in module.parameters():
                        param.requires_grad = True
                elif enable_time_finetune and any(
-                        n.startswith("time") for n, _ in module.named_parameters()
+                    n.startswith("time") for n, _ in module.named_parameters()
                ):
                    for pname, param in module.named_parameters():
                        if pname.startswith("time"):
@@ -381,7 +381,7 @@ if __name__ == "__main__":
                            param.requires_grad = True

    if (
-            len(args.load_model) == 0 or args.my_pile_stage == 1
+        len(args.load_model) == 0 or args.my_pile_stage == 1
    ):  # shall we build the initial weights?
        init_weight_name = f"{args.proj_dir}/rwkv-init.pth"
        generate_init_weight(model, init_weight_name)  # save initial weights
@@ -423,8 +423,8 @@ if __name__ == "__main__":
    )

    if (
-            args.lr_init > 1e-4
-            or trainer.world_size * args.micro_bsz * trainer.accumulate_grad_batches < 8
+        args.lr_init > 1e-4
+        or trainer.world_size * args.micro_bsz * trainer.accumulate_grad_batches < 8
    ):
        if "I_KNOW_WHAT_IM_DOING" in os.environ:
            if trainer.global_rank == 0:
@@ -459,10 +459,10 @@ if __name__ == "__main__":

    if "deepspeed" in args.strategy:
        trainer.strategy.config["zero_optimization"]["allgather_bucket_size"] = (
-                args.ds_bucket_mb * 1000 * 1000
+            args.ds_bucket_mb * 1000 * 1000
        )
        trainer.strategy.config["zero_optimization"]["reduce_bucket_size"] = (
-                args.ds_bucket_mb * 1000 * 1000
+            args.ds_bucket_mb * 1000 * 1000
        )

    # must set shuffle=False, persistent_workers=False (because worker is in another thread)
--- a/finetune/lora/v5/cuda/wkv5_cuda.cu
+++ b/finetune/lora/v5/cuda/wkv5_cuda.cu
@@ -0,0 +1,202 @@
+#include <stdio.h>
+#include <assert.h>
+#include "ATen/ATen.h"
+typedef at::BFloat16 bf16;
+
+template <typename F>
+__global__ void kernel_forward(const int B, const int T, const int C, const int H,
+                               const F *__restrict__ const _r, const F *__restrict__ const _k, const F *__restrict__ const _v, const float *__restrict__ _w, const F *__restrict__ _u,
+                               F *__restrict__ const _y)
+{
+    const int b = blockIdx.x / H;
+    const int h = blockIdx.x % H;
+    const int i = threadIdx.x;
+    _w += h*_N_;
+    _u += h*_N_;
+
+    __shared__ float r[_N_], k[_N_], u[_N_], w[_N_];
+    float state[_N_] = {0};
+
+    __syncthreads();
+    w[i] = _w[i];
+    u[i] = float(_u[i]);
+    __syncthreads();
+
+    for (int t = b*T*C + h*_N_ + i; t < (b+1)*T*C + h*_N_ + i; t += C)
+    {
+        __syncthreads();
+        r[i] = float(_r[t]);
+        k[i] = float(_k[t]);
+        __syncthreads();
+
+        const float v = float(_v[t]);
+        float y = 0;
+
+        #pragma unroll
+        for (int j = 0; j < _N_; j+=4)
+        {
+            const float4& r_ = (float4&)(r[j]);
+            const float4& k_ = (float4&)(k[j]);
+            const float4& w_ = (float4&)(w[j]);
+            const float4& u_ = (float4&)(u[j]);
+            float4& s = (float4&)(state[j]);
+            float4 x;
+
+            x.x = k_.x * v;
+            x.y = k_.y * v;
+            x.z = k_.z * v;
+            x.w = k_.w * v;
+
+            y += r_.x * (u_.x * x.x + s.x);
+            y += r_.y * (u_.y * x.y + s.y);
+            y += r_.z * (u_.z * x.z + s.z);
+            y += r_.w * (u_.w * x.w + s.w);
+
+            s.x = s.x * w_.x + x.x;
+            s.y = s.y * w_.y + x.y;
+            s.z = s.z * w_.z + x.z;
+            s.w = s.w * w_.w + x.w;
+        }
+        _y[t] = F(y);
+    }
+}
+
+template <typename F>
+__global__ void kernel_backward(const int B, const int T, const int C, const int H,
+    const F *__restrict__ const _r, const F *__restrict__ const _k, const F *__restrict__ const _v, const float *__restrict__ _w, const float *__restrict__ __w, const F *__restrict__ _u, const F *__restrict__ const _gy,
+    F *__restrict__ const _gr, F *__restrict__ const _gk, F *__restrict__ const _gv, F *__restrict__ const _gw, F *__restrict__ const _gu)
+{
+    const int b = blockIdx.x / H;
+    const int h = blockIdx.x % H;
+    const int i = threadIdx.x;
+    _w += h*_N_;
+    _u += h*_N_;
+    __w += h*_N_;
+
+    __shared__ float w_[_N_], u_[_N_];
+    __shared__ float r[_N_], k[_N_], v[_N_], gy[_N_];
+    __syncthreads();
+    w_[i] = _w[i];
+    u_[i] = float(_u[i]);
+    __syncthreads();
+
+    const float w = w_[i];
+    const float ww = __w[i];
+    const float u = u_[i];
+
+    float state[_N_] = {0}, saaaa[_N_] = {0}, sbbbb[_N_] = {0}, scccc[_N_] = {0}, sdddd[_N_] = {0};
+
+    float gw = 0, gu = 0;
+    const int t000 = b*T*C + h*_N_ + i;
+    const int t111 = (b+1)*T*C + h*_N_ + i;
+    const int t222 = t111 - 2*C;
+
+    for (int t = t000; t < t111; t += C)
+    {
+        __syncthreads();
+        v[i] = float(_v[t]);
+        gy[i] = float(_gy[t]);
+        __syncthreads();
+
+        const float k = float(_k[t]);
+        float gr = 0, gu_ = 0;
+
+        #pragma unroll
+        for (int j = 0; j < _N_; j++)
+        {
+            float& s = state[j];
+            float x = k * v[j];
+
+            gr += (u * x + s) * gy[j];
+            gu_ += x * gy[j];
+            s = s * w + x;
+        }
+        _gr[t] = F(gr);
+        gu += float(_r[t]) * gu_;
+    }
+    _gu[b*C + h*_N_ + i] = F(gu);
+    
+    for (int t = t000; t < t222; t += C)
+    {
+        __syncthreads();
+        v[i] = float(_v[t]);
+        gy[i] = float(_gy[t + 2*C]);
+        __syncthreads();
+
+        const float k = float(_k[t]);
+        float gw_ = 0;
+        
+        #pragma unroll
+        for (int j = 0; j < _N_; j++)
+        {
+            float& s = saaaa[j];
+            float& s2 = sbbbb[j];
+            float x = k * v[j];
+            
+            float tmp = w * (x + s);
+            s = tmp;
+            s2 = tmp + w * s2;
+            gw_ += s2 * gy[j];
+        }
+        gw += float(_r[t + 2*C]) * gw_;
+    }    
+    _gw[b*C + h*_N_ + i] = F(ww * gw);
+
+    for (int t = t111 - C; t >= t000; t -= C)
+    {
+        __syncthreads();
+        v[i] = float(_v[t]);
+        gy[i] = float(_gy[t]);
+        __syncthreads();
+
+        const float rr = float(_r[t]);
+        float gk = 0;
+
+        #pragma unroll
+        for (int j = 0; j < _N_; j++)
+        {
+            float& s = scccc[j];
+            float x = rr * gy[j];
+            
+            gk += (u * x + s) * v[j];
+            s = x + s * w;
+        }
+        _gk[t] = F(gk);
+    }
+
+    for (int t = t111 - C; t >= t000; t -= C)
+    {
+        __syncthreads();
+        r[i] = float(_r[t]);
+        k[i] = float(_k[t]);
+        __syncthreads();
+
+        const float gyy = float(_gy[t]);
+        float gv = 0;
+
+        #pragma unroll
+        for (int j = 0; j < _N_; j++)
+        {
+            float& s = sdddd[j];
+            float x = gyy * r[j];
+            
+            gv += (u_[j] * x + s) * k[j];
+            s = x + s * w_[j];
+        }
+        _gv[t] = F(gv);
+    }
+}
+
+void cuda_forward(int B, int T, int C, int H, bf16 *r, bf16 *k, bf16 *v, float *w, bf16 *u, bf16 *y)
+{
+    assert(H*_N_ == C);
+    assert(_N_%4 == 0);
+    kernel_forward<<<dim3(B * H), dim3(_N_)>>>(B, T, C, H, r, k, v, w, u, y);
+}
+
+void cuda_backward(int B, int T, int C, int H, bf16 *r, bf16 *k, bf16 *v, float *w, float *ww, bf16 *u, bf16 *gy, bf16 *gr, bf16 *gk, bf16 *gv, bf16 *gw, bf16 *gu)
+{
+    assert(H*_N_ == C);
+    assert(_N_%4 == 0);
+    kernel_backward<<<dim3(B * H), dim3(_N_)>>>(B, T, C, H, r, k, v, w, ww, u, gy, gr, gk, gv, gw, gu);
+}
--- a/finetune/lora/v5/cuda/wkv5_op.cpp
+++ b/finetune/lora/v5/cuda/wkv5_op.cpp
@@ -0,0 +1,22 @@
+#include <torch/extension.h>
+#include "ATen/ATen.h"
+typedef at::BFloat16 bf16;
+
+void cuda_forward(int B, int T, int C, int H, bf16 *r, bf16 *k, bf16 *v, float *w, bf16 *u, bf16 *y);
+void cuda_backward(int B, int T, int C, int H, bf16 *r, bf16 *k, bf16 *v, float *w, float *ww, bf16 *u, bf16 *gy, bf16 *gr, bf16 *gk, bf16 *gv, bf16 *gw, bf16 *gu);
+
+void forward(int64_t B, int64_t T, int64_t C, int64_t H, torch::Tensor &r, torch::Tensor &k, torch::Tensor &v, torch::Tensor &w, torch::Tensor &u, torch::Tensor &y) {
+    cuda_forward(B, T, C, H, r.data_ptr<bf16>(), k.data_ptr<bf16>(), v.data_ptr<bf16>(), w.data_ptr<float>(), u.data_ptr<bf16>(), y.data_ptr<bf16>());
+}
+void backward(int64_t B, int64_t T, int64_t C, int64_t H, torch::Tensor &r, torch::Tensor &k, torch::Tensor &v, torch::Tensor &w, torch::Tensor &ww, torch::Tensor &u, torch::Tensor &gy, torch::Tensor &gr, torch::Tensor &gk, torch::Tensor &gv, torch::Tensor &gw, torch::Tensor &gu) {
+    cuda_backward(B, T, C, H, r.data_ptr<bf16>(), k.data_ptr<bf16>(), v.data_ptr<bf16>(), w.data_ptr<float>(), ww.data_ptr<float>(), u.data_ptr<bf16>(), gy.data_ptr<bf16>(), gr.data_ptr<bf16>(), gk.data_ptr<bf16>(), gv.data_ptr<bf16>(), gw.data_ptr<bf16>(), gu.data_ptr<bf16>());
+}
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+    m.def("forward", &forward, "wkv5 forward");
+    m.def("backward", &backward, "wkv5 backward");
+}
+
+TORCH_LIBRARY(wkv5, m) {
+    m.def("forward", forward);
+    m.def("backward", backward);
+}
--- a/finetune/lora/v5/src/init.py
+++ b/finetune/lora/v5/src/init.py
--- a/finetune/lora/v5/src/binidx.py
+++ b/finetune/lora/v5/src/binidx.py
@@ -0,0 +1,303 @@
+from lib2to3.pgen2 import token
+import os
+import torch
+import numpy as np
+import shutil
+import struct
+from functools import lru_cache
+from itertools import accumulate
+
+
+def print_rank_0(*message):
+    pass
+    # """If distributed is initialized print only on rank 0."""
+    # if torch.distributed.is_initialized():
+    #     if torch.distributed.get_rank() == 0:
+    #         print(*message, flush=True)
+    # else:
+    #     print(*message, flush=True)
+
+
+def _warmup_mmap_file(path):
+    pass
+    # with open(path, "rb") as stream:
+    #     while stream.read(100 * 1024 * 1024):
+    #         pass
+
+
+dtypes = {
+    1: np.uint8,
+    2: np.int8,
+    3: np.int16,
+    4: np.int32,
+    5: np.int64,
+    6: float,
+    7: np.double,
+    8: np.uint16,
+}
+
+
+def code(dtype):
+    for k in dtypes.keys():
+        if dtypes[k] == dtype:
+            return k
+    raise ValueError(dtype)
+
+
+def index_file_path(prefix_path):
+    return prefix_path + ".idx"
+
+
+def data_file_path(prefix_path):
+    return prefix_path + ".bin"
+
+
+class MMapIndexedDataset(torch.utils.data.Dataset):
+    class Index(object):
+        _HDR_MAGIC = b"MMIDIDX\x00\x00"
+
+        @classmethod
+        def writer(cls, path, dtype):
+            class _Writer(object):
+                def __enter__(self):
+                    self._file = open(path, "wb")
+
+                    # Write Magic string so we can check the file format then opening it again.
+                    self._file.write(cls._HDR_MAGIC)
+                    # Write version number
+                    # Little endian unsigned 64 Bit integer
+                    self._file.write(struct.pack("<Q", 1))
+                    # Little endian unsigned 8 Bit integer
+                    self._file.write(struct.pack("<B", code(dtype)))
+
+                    return self
+
+                @staticmethod
+                def _get_pointers(sizes):
+                    dtype_size = dtype().itemsize
+                    address = 0
+                    pointers = []
+
+                    for size in sizes:
+                        pointers.append(address)
+                        address += size * dtype_size
+
+                    return pointers
+
+                def write(self, sizes, doc_idx):
+                    pointers = self._get_pointers(sizes)
+
+                    # Little endian unsigned 64 Bit integer
+                    self._file.write(struct.pack("<Q", len(sizes)))
+                    # Little endian unsigned 64 Bit integer
+                    self._file.write(struct.pack("<Q", len(doc_idx)))
+
+                    sizes = np.array(sizes, dtype=np.int32)
+                    self._file.write(sizes.tobytes(order="C"))
+                    del sizes
+
+                    pointers = np.array(pointers, dtype=np.int64)
+                    self._file.write(pointers.tobytes(order="C"))
+                    del pointers
+
+                    doc_idx = np.array(doc_idx, dtype=np.int64)
+                    self._file.write(doc_idx.tobytes(order="C"))
+
+                def __exit__(self, exc_type, exc_val, exc_tb):
+                    self._file.close()
+
+            return _Writer()
+
+        def __init__(self, path, skip_warmup=False):
+            with open(path, "rb") as stream:
+                magic_test = stream.read(9)
+                assert self._HDR_MAGIC == magic_test, (
+                    "Index file doesn't match expected format. "
+                    "Make sure that --dataset-impl is configured properly."
+                )
+                # Little endian unsigned 64 Bit integer
+                version = struct.unpack("<Q", stream.read(8))
+                assert (1,) == version
+
+                # Little endian unsigned 8 Bit integer
+                (dtype_code,) = struct.unpack("<B", stream.read(1))
+                self._dtype = dtypes[dtype_code]
+                self._dtype_size = self._dtype().itemsize
+
+                self._len = struct.unpack("<Q", stream.read(8))[0]
+                self._doc_count = struct.unpack("<Q", stream.read(8))[0]
+                offset = stream.tell()
+
+            if not skip_warmup:
+                print_rank_0("    warming up index mmap file...")
+                _warmup_mmap_file(path)
+
+            self._bin_buffer_mmap = np.memmap(path, mode="r", order="C")
+            self._bin_buffer = memoryview(self._bin_buffer_mmap)
+            print_rank_0("    reading sizes...")
+            self._sizes = np.frombuffer(
+                self._bin_buffer, dtype=np.int32, count=self._len, offset=offset
+            )
+            print_rank_0("    reading pointers...")
+            self._pointers = np.frombuffer(
+                self._bin_buffer,
+                dtype=np.int64,
+                count=self._len,
+                offset=offset + self._sizes.nbytes,
+            )
+            print_rank_0("    reading document index...")
+            self._doc_idx = np.frombuffer(
+                self._bin_buffer,
+                dtype=np.int64,
+                count=self._doc_count,
+                offset=offset + self._sizes.nbytes + self._pointers.nbytes,
+            )
+
+        def __del__(self):
+            self._bin_buffer_mmap._mmap.close()
+            del self._bin_buffer_mmap
+
+        @property
+        def dtype(self):
+            return self._dtype
+
+        @property
+        def sizes(self):
+            return self._sizes
+
+        @property
+        def doc_idx(self):
+            return self._doc_idx
+
+        @lru_cache(maxsize=8)
+        def __getitem__(self, i):
+            return self._pointers[i], self._sizes[i]
+
+        def __len__(self):
+            return self._len
+
+    def __init__(self, path, skip_warmup=False):
+        super().__init__()
+
+        self._path = None
+        self._index = None
+        self._bin_buffer = None
+
+        self._do_init(path, skip_warmup)
+
+    def __getstate__(self):
+        return self._path
+
+    def __setstate__(self, state):
+        self._do_init(state)
+
+    def _do_init(self, path, skip_warmup):
+        self._path = path
+        self._index = self.Index(index_file_path(self._path), skip_warmup)
+
+        if not skip_warmup:
+            print_rank_0("    warming up data mmap file...")
+            _warmup_mmap_file(data_file_path(self._path))
+        print_rank_0("    creating numpy buffer of mmap...")
+        self._bin_buffer_mmap = np.memmap(
+            data_file_path(self._path), mode="r", order="C"
+        )
+        print_rank_0("    creating memory view of numpy buffer...")
+        self._bin_buffer = memoryview(self._bin_buffer_mmap)
+
+    def __del__(self):
+        self._bin_buffer_mmap._mmap.close()
+        del self._bin_buffer_mmap
+        del self._index
+
+    def __len__(self):
+        return len(self._index)
+
+    # @lru_cache(maxsize=8)
+    def __getitem__(self, idx):
+        if isinstance(idx, int):
+            ptr, size = self._index[idx]
+            np_array = np.frombuffer(
+                self._bin_buffer, dtype=self._index.dtype, count=size, offset=ptr
+            )
+            return np_array
+        elif isinstance(idx, slice):
+            start, stop, step = idx.indices(len(self))
+            if step != 1:
+                raise ValueError("Slices into indexed_dataset must be contiguous")
+            ptr = self._index._pointers[start]
+            sizes = self._index._sizes[idx]
+            offsets = list(accumulate(sizes))
+            total_size = sum(sizes)
+            np_array = np.frombuffer(
+                self._bin_buffer, dtype=self._index.dtype, count=total_size, offset=ptr
+            )
+            sents = np.split(np_array, offsets[:-1])
+            return sents
+
+    def get(self, idx, offset=0, length=None):
+        """Retrieves a single item from the dataset with the option to only
+        return a portion of the item.
+
+        get(idx) is the same as [idx] but get() does not support slicing.
+        """
+        ptr, size = self._index[idx]
+        if length is None:
+            length = size - offset
+        ptr += offset * np.dtype(self._index.dtype).itemsize
+        np_array = np.frombuffer(
+            self._bin_buffer, dtype=self._index.dtype, count=length, offset=ptr
+        )
+        return np_array
+
+    def pad(self, idx, length=None):
+        ptr, size = self._index[idx]
+        try:
+            np_array = np.frombuffer(
+                self._bin_buffer, dtype=self._index.dtype, count=length, offset=ptr
+            )
+        except:
+            np_array = np.frombuffer(
+                self._bin_buffer, dtype=self._index.dtype, count=size, offset=ptr
+            )
+            ptr0, _ = self._index[0]
+            np_array0 = np.frombuffer(
+                self._bin_buffer,
+                dtype=self._index.dtype,
+                count=length - size,
+                offset=ptr0,
+            )
+            np_array = np.append(np_array, np_array0)
+        return np_array
+
+    def only(self, idx):
+        ptr, size = self._index[idx]
+        np_array = np.frombuffer(
+            self._bin_buffer, dtype=self._index.dtype, count=size, offset=ptr
+        )
+
+        return np_array
+
+    @property
+    def sizes(self):
+        return self._index.sizes
+
+    @property
+    def doc_idx(self):
+        return self._index.doc_idx
+
+    def get_doc_idx(self):
+        return self._index._doc_idx
+
+    def set_doc_idx(self, doc_idx_):
+        self._index._doc_idx = doc_idx_
+
+    @property
+    def supports_prefetch(self):
+        return False
+
+    @staticmethod
+    def exists(path):
+        return os.path.exists(index_file_path(path)) and os.path.exists(
+            data_file_path(path)
+        )
--- a/finetune/lora/v5/src/dataset.py
+++ b/finetune/lora/v5/src/dataset.py
@@ -0,0 +1,241 @@
+########################################################################################################
+# The RWKV Language Model - https://github.com/BlinkDL/RWKV-LM
+########################################################################################################
+
+import json, math, random, os, sys
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+from pytorch_lightning.utilities import rank_zero_info
+from .binidx import MMapIndexedDataset
+from .utils import MaybeIsPrime
+
+
+class MyDataset(Dataset):
+    def __init__(self, args):
+        self.args = args
+
+        if args.data_type == "binidx":
+            self.vocab_size = args.vocab_size
+            rank_zero_info(
+                f"Current vocab size = {self.vocab_size} (make sure it's correct)"
+            )
+
+            if args.my_pile_version == 1:
+                self.data = MMapIndexedDataset(args.data_file)
+                self.data_size = (
+                    len(self.data._bin_buffer) // self.data._index._dtype_size
+                )
+                rank_zero_info(f"Data has {self.data_size} tokens.")
+            elif args.my_pile_version == 2:
+                data_list = (
+                    open(args.data_file, "r", encoding="utf-8")
+                    .read()
+                    .strip()
+                    .split("\n")
+                )
+                data_list = [i.strip().split(" ") for i in data_list]
+                self.data = []
+                self.data_size = int(data_list[-1][-1])
+                rank_zero_info(f"Data has {self.data_size} chunks.")
+                for d in data_list:
+                    data = MMapIndexedDataset(d[0])
+                    data_size = len(data._bin_buffer) // data._index._dtype_size
+                    assert (data_size - args.ctx_len) == int(d[1])
+                    self.data += [[int(d[-1]), int(d[1]), data]]
+                # rank_zero_info(self.data)
+
+            if args.my_qa_mask > 0:
+                # self.data_pile = MMapIndexedDataset('/fsx/pile/pile_20B_tokenizer_text_document')
+                self.data_pile = MMapIndexedDataset(
+                    "/fsx/pile_deduped/pile_0.87_deduped_text_document"
+                )
+                self.data_pile_size = (
+                    len(self.data_pile._bin_buffer) // self.data._index._dtype_size
+                )
+            else:
+                self.data_pile = None
+                self.data_pile_size = 0
+
+            if args.my_pile_stage > 0:
+                # assert self.data_size == 332115325534 and self.vocab_size == 50277
+                self.samples_per_epoch = args.epoch_steps * args.real_bsz
+                assert self.samples_per_epoch == 40320
+                rank_zero_info(
+                    f"########## Pile 20b-tokenized stage {args.my_pile_stage} ##########"
+                )
+                dataset_slot = self.data_size // args.ctx_len
+                if args.my_pile_stage != 4:
+                    assert MaybeIsPrime(args.magic_prime)
+                    assert args.magic_prime % 3 == 2
+                    assert (
+                        args.magic_prime / dataset_slot > 0.99
+                        and args.magic_prime / dataset_slot <= 1
+                    )
+        elif args.data_type == "numpy":
+            self.data = np.load(args.data_file).astype("int")
+            self.vocab_size = args.vocab_size
+            rank_zero_info(
+                f"Current vocab size = {self.vocab_size} (make sure it's correct)"
+            )
+            self.data_size = len(self.data)
+            rank_zero_info(f"Data has {self.data_size} tokens.")
+        elif args.data_type == "uint16":
+            self.data = (
+                np.fromfile(args.data_file, dtype=np.uint16)
+                .astype("int32")
+                .reshape(-1, args.my_sample_len)
+            )
+            self.vocab_size = args.vocab_size
+            rank_zero_info(
+                f"Current vocab size = {self.vocab_size} (make sure it's correct)"
+            )
+            self.data_size = self.data.shape[0]
+            rank_zero_info(f"Data has {self.data_size} samples.")
+        else:
+            if args.data_type == "dummy":
+                rank_zero_info("Building dummy data...")
+                self.data = ""
+                for i in range(100000):
+                    aa = (i) % 10000
+                    bb = (i * i) % 10000
+                    cc = aa + bb
+                    self.data += f".{aa}+{bb}={cc}."
+            else:
+                self.data = open(args.data_file, "r", encoding=args.data_type).read()
+            rank_zero_info("Building token list...")
+            unique = sorted(list(set(self.data)))
+            self.vocab_size = len(unique)
+            # rank_zero_info()
+            # for u in unique:
+            #     print(u, end=' ')
+            # rank_zero_info('\n\n')
+            xx = 0
+            xxObj = {}
+            for u in unique:
+                xxObj[xx] = u
+                xx += 1
+            with open(
+                f"{args.proj_dir}/vocab.json", "w", encoding="utf-8"
+            ) as vocab_file:
+                vocab_file.write(json.dumps(xxObj, ensure_ascii=False))
+            self.data_size = len(self.data)
+            rank_zero_info(
+                f"Data has {self.data_size} tokens, {self.vocab_size} vocab size."
+            )
+            self.stoi = {ch: i for i, ch in enumerate(unique)}
+            self.itos = {i: ch for i, ch in enumerate(unique)}
+
+    def __len__(self):
+        return self.args.epoch_steps * self.args.micro_bsz
+
+    def __getitem__(self, idx):
+        args = self.args
+        rank = self.global_rank
+        epoch = self.real_epoch
+        world_size = self.world_size
+        # print(f"epoch {epoch} idx {idx} rank {rank}/{world_size}")
+
+        if args.data_type == "uint16":
+            i = np.random.randint(0, self.data_size - 1)
+            dix = self.data[i]
+            x = torch.tensor(dix[:-1], dtype=torch.long)
+            y = torch.tensor(dix[1:], dtype=torch.long)
+        else:
+            ctx_len = args.ctx_len
+            req_len = ctx_len + 1
+            magic_prime = args.magic_prime
+            data = self.data
+
+            if args.my_pile_stage > 0:
+                ii = 1 + epoch * self.samples_per_epoch + (idx * world_size) + rank
+
+                if args.my_qa_mask > 0:
+                    ii_orig = ii
+                    if ii % 2 == 0:
+                        ii = -1
+                        data = self.data_pile
+                    else:
+                        ii = ii // 2
+                if data == self.data_pile:
+                    i = np.random.randint(0, self.data_pile_size - req_len)
+                else:
+                    if args.my_pile_stage == 4 or ii < args.my_random_steps:
+                        # cheat: pick a random spot in dataset
+                        if args.my_pile_version == 1:
+                            i = np.random.randint(0, self.data_size - req_len)
+                        else:
+                            i = np.random.randint(0, self.data_size)
+                    else:
+                        ii = ii - args.my_random_steps
+                        factor = (math.sqrt(5) - 1) / 2
+                        factor = int(magic_prime * factor)
+                        i = ((factor * ii * ii * ii) % magic_prime) * ctx_len
+                        i = i + args.my_pile_shift
+                # print(f"epoch {epoch} idx {idx} rank {rank}/{world_size} ii {ii} pos {round(i / self.data_size, 3)}")
+            else:
+                # cheat: pick a random spot in dataset
+                i = np.random.randint(0, self.data_size - req_len)
+
+            if args.data_type == "binidx":
+                if args.my_pile_version == 1:
+                    dix = data.get(idx=0, offset=i, length=req_len).astype(int)
+                else:
+                    # self.data : cutoff, chunk_count, data
+                    for j in range(len(data)):
+                        if i < data[j][0]:
+                            ii = i
+                            i = (i - (data[j - 1][0] if j > 0 else 0)) % data[j][1]
+                            dix = (
+                                data[j][2]
+                                .get(idx=0, offset=i, length=req_len)
+                                .astype(int)
+                            )
+                            # print(ii, j, i)
+                            break
+            elif args.data_type == "numpy":
+                dix = data[i : i + req_len]
+            else:
+                dix = [self.stoi[s] for s in data[i : i + req_len]]
+
+            if args.my_qa_mask == 1:
+                if data == self.data_pile:
+                    z = [1] * ctx_len
+                else:
+                    z = [0] * ctx_len
+                    z_sum = 0
+                    isGood = False
+                    for i in range(3, ctx_len):
+                        if (
+                            dix[i] == 27
+                            and dix[i - 1] == 34
+                            and dix[i - 2] == 187
+                            and dix[i - 3] == 187
+                        ):
+                            isGood = True
+                        if dix[i] == 0:
+                            isGood = False
+                        if isGood:
+                            z[i] = 1
+                            z_sum += 1
+                    if z_sum == 0:
+                        z = [1] * ctx_len
+                        i = np.random.randint(0, self.data_pile_size - req_len)
+                        dix = self.data_pile.get(
+                            idx=0, offset=i, length=req_len
+                        ).astype(int)
+                z = torch.tensor(z, dtype=torch.bfloat16)
+
+            x = torch.tensor(dix[:-1], dtype=torch.long)
+            y = torch.tensor(dix[1:], dtype=torch.long)
+
+            # if ii_orig < 50:
+            #     # if rank == 1:
+            #     print('rank', rank, 'i', ii_orig, ii, i, 'x', x[:5], '...', x[-5:])
+            # else:
+            #     exit(0)
+
+            if args.my_qa_mask == 1:
+                return x, y, z
+
+            return x, y
--- a/finetune/lora/v5/src/model.py
+++ b/finetune/lora/v5/src/model.py
@@ -0,0 +1,819 @@
+########################################################################################################
+# The RWKV Language Model - https://github.com/BlinkDL/RWKV-LM
+########################################################################################################
+import functools
+import os, math, gc, importlib
+import torch
+
+# torch._C._jit_set_profiling_executor(True)
+# torch._C._jit_set_profiling_mode(True)
+import torch.nn as nn
+from torch.utils.checkpoint import checkpoint as torch_checkpoint
+from torch.nn import functional as F
+import pytorch_lightning as pl
+from pytorch_lightning.utilities import rank_zero_info, rank_zero_only
+from pytorch_lightning.strategies import DeepSpeedStrategy
+
+if importlib.util.find_spec("deepspeed"):
+    import deepspeed
+    from deepspeed.ops.adam import DeepSpeedCPUAdam, FusedAdam
+
+
+# from deepspeed.runtime.fp16.onebit.zoadam import ZeroOneAdam
+
+# lora-config
+LORA_CONFIG = {
+    "r": 0,
+    "alpha": 0,
+    "dropout": 0,
+    "parts": {"att", "ln", "time"},
+}
+
+try:
+    print("RWKV_MY_TESTING", os.environ["RWKV_MY_TESTING"])
+except:
+    os.environ["RWKV_MY_TESTING"] = ""
+
+
+def __nop(ob):
+    return ob
+
+
+MyModule = nn.Module
+MyFunction = __nop
+if os.environ["RWKV_JIT_ON"] == "1":
+    MyModule = torch.jit.ScriptModule
+    MyFunction = torch.jit.script_method
+
+
+########################################################################################################
+# CUDA Kernel
+########################################################################################################
+
+from torch.utils.cpp_extension import load
+
+HEAD_SIZE = int(os.environ["RWKV_HEAD_SIZE_A"])
+wkv5_cuda = load(
+    name="wkv5",
+    sources=[
+        "finetune/lora/v5/cuda/wkv5_op.cpp",
+        f"finetune/lora/v5/cuda/wkv5_cuda.cu",
+    ],
+    verbose=True,
+    extra_cuda_cflags=[
+        "-res-usage",
+        "--use_fast_math",
+        "-O3",
+        "-Xptxas -O3",
+        "--extra-device-vectorization",
+        f"-D_N_={HEAD_SIZE}",
+    ],
+)
+
+
+class WKV_5(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, B, T, C, H, r, k, v, w, u):
+        with torch.no_grad():
+            assert r.dtype == torch.bfloat16
+            assert k.dtype == torch.bfloat16
+            assert v.dtype == torch.bfloat16
+            assert w.dtype == torch.bfloat16
+            assert u.dtype == torch.bfloat16
+            assert HEAD_SIZE == C // H
+            ctx.B = B
+            ctx.T = T
+            ctx.C = C
+            ctx.H = H
+            assert r.is_contiguous()
+            assert k.is_contiguous()
+            assert v.is_contiguous()
+            assert w.is_contiguous()
+            assert u.is_contiguous()
+            ew = (-torch.exp(w.float())).contiguous()
+            eew = (torch.exp(ew)).contiguous()
+            ctx.save_for_backward(r, k, v, eew, ew, u)
+            y = torch.empty(
+                (B, T, C),
+                device=r.device,
+                dtype=torch.bfloat16,
+                memory_format=torch.contiguous_format,
+            )  # .uniform_(-1, 1)
+            wkv5_cuda.forward(B, T, C, H, r, k, v, eew, u, y)
+            return y
+
+    @staticmethod
+    def backward(ctx, gy):
+        with torch.no_grad():
+            assert gy.dtype == torch.bfloat16
+            B = ctx.B
+            T = ctx.T
+            C = ctx.C
+            H = ctx.H
+            assert gy.is_contiguous()
+            r, k, v, eew, ew, u = ctx.saved_tensors
+            gr = torch.empty(
+                (B, T, C),
+                device=gy.device,
+                requires_grad=False,
+                dtype=torch.bfloat16,
+                memory_format=torch.contiguous_format,
+            )  # .uniform_(-1, 1)
+            gk = torch.empty(
+                (B, T, C),
+                device=gy.device,
+                requires_grad=False,
+                dtype=torch.bfloat16,
+                memory_format=torch.contiguous_format,
+            )  # .uniform_(-1, 1)
+            gv = torch.empty(
+                (B, T, C),
+                device=gy.device,
+                requires_grad=False,
+                dtype=torch.bfloat16,
+                memory_format=torch.contiguous_format,
+            )  # .uniform_(-1, 1)
+            gw = torch.empty(
+                (B, C),
+                device=gy.device,
+                requires_grad=False,
+                dtype=torch.bfloat16,
+                memory_format=torch.contiguous_format,
+            )  # .uniform_(-1, 1)
+            gu = torch.empty(
+                (B, C),
+                device=gy.device,
+                requires_grad=False,
+                dtype=torch.bfloat16,
+                memory_format=torch.contiguous_format,
+            )  # .uniform_(-1, 1)
+            wkv5_cuda.backward(B, T, C, H, r, k, v, eew, ew, u, gy, gr, gk, gv, gw, gu)
+            gw = torch.sum(gw, 0).view(H, C // H)
+            gu = torch.sum(gu, 0).view(H, C // H)
+            return (None, None, None, None, gr, gk, gv, gw, gu)
+
+
+def RUN_CUDA_RWKV5(B, T, C, H, r, k, v, w, u):
+    return WKV_5.apply(B, T, C, H, r, k, v, w, u)
+
+
+#################################################################
+class LoraLinear(nn.Module):
+    def __init__(self, in_features: int, out_features: int, bias: bool):
+        super().__init__()
+
+        self.weight = nn.Parameter(torch.empty((out_features, in_features)))
+        assert bias == False, "Biased LoraLinear not supported"
+
+        r, alpha, dropout = (
+            LORA_CONFIG["r"],
+            LORA_CONFIG["alpha"],
+            LORA_CONFIG["dropout"],
+        )
+        self.lora_A = nn.Parameter(torch.empty(r, in_features))
+        self.lora_B = nn.Parameter(torch.empty(out_features, r))
+        self.lora_dropout = nn.Dropout(dropout)
+        self.scaling = alpha / r
+
+        nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5))
+        nn.init.kaiming_uniform_(self.lora_A, a=math.sqrt(5))
+        nn.init.zeros_(self.lora_B)
+
+    def forward(self, x):
+        return F.linear(x, self.weight) + self.scaling * F.linear(
+            F.linear(self.lora_dropout(x), self.lora_A), self.lora_B
+        )
+
+
+@functools.wraps(LoraLinear)
+def make_linear_att(*args, **kwargs):
+    if "att" in LORA_CONFIG["parts"] and LORA_CONFIG["r"] > 0:
+        return LoraLinear(*args, **kwargs)
+    else:
+        return nn.Linear(*args, **kwargs)
+
+
+@functools.wraps(LoraLinear)
+def make_linear_ffn(*args, **kwargs):
+    if "ffn" in LORA_CONFIG["parts"] and LORA_CONFIG["r"] > 0:
+        return LoraLinear(*args, **kwargs)
+    else:
+        return nn.Linear(*args, **kwargs)
+
+
+########################################################################################################
+
+
+class RWKV_TimeMix_RWKV5(MyModule):
+    def __init__(self, args, layer_id):
+        super().__init__()
+        self.args = args
+        self.layer_id = layer_id
+
+        self.head_size = args.head_size_a
+        assert HEAD_SIZE == self.head_size  # change HEAD_SIZE to match args.head_size_a
+        self.n_head = args.dim_att // self.head_size
+        assert args.dim_att % self.n_head == 0
+        self.head_size_divisor = args.head_size_divisor
+
+        with torch.no_grad():
+            ratio_0_to_1 = layer_id / (args.n_layer - 1)  # 0 to 1
+            ratio_1_to_almost0 = 1.0 - (layer_id / args.n_layer)  # 1 to ~0
+            ddd = torch.ones(1, 1, args.n_embd)
+            for i in range(args.n_embd):
+                ddd[0, 0, i] = i / args.n_embd
+
+            # fancy time_mix
+            self.time_mix_k = nn.Parameter(torch.pow(ddd, ratio_1_to_almost0))
+            self.time_mix_v = nn.Parameter(
+                torch.pow(ddd, ratio_1_to_almost0) + 0.3 * ratio_0_to_1
+            )
+            self.time_mix_r = nn.Parameter(torch.pow(ddd, 0.5 * ratio_1_to_almost0))
+            self.time_mix_g = nn.Parameter(torch.pow(ddd, 0.5 * ratio_1_to_almost0))
+
+            # fancy time_decay
+            decay_speed = torch.ones(args.dim_att)
+            for n in range(args.dim_att):
+                decay_speed[n] = -6 + 5 * (n / (args.dim_att - 1)) ** (
+                    0.7 + 1.3 * ratio_0_to_1
+                )
+            self.time_decay = nn.Parameter(
+                decay_speed.reshape(self.n_head, self.head_size)
+            )
+            # print(layer_id, self.time_decay.flatten()[:3].cpu().numpy(), '...', self.time_decay.flatten()[-3:].cpu().numpy())
+
+            tmp = torch.zeros(args.dim_att)
+            for n in range(args.dim_att):
+                zigzag = ((n + 1) % 3 - 1) * 0.1
+                tmp[n] = ratio_0_to_1 * (1 - (n / (args.dim_att - 1))) + zigzag
+
+            self.time_faaaa = nn.Parameter(tmp.reshape(self.n_head, self.head_size))
+
+        self.time_shift = nn.ZeroPad2d((0, 0, 1, -1))
+
+        self.receptance = make_linear_att(args.n_embd, args.dim_att, bias=False)
+        self.key = make_linear_att(args.n_embd, args.dim_att, bias=False)
+        self.value = make_linear_att(args.n_embd, args.dim_att, bias=False)
+
+        self.output = nn.Linear(args.dim_att, args.n_embd, bias=False)
+        self.gate = make_linear_att(args.n_embd, args.dim_att, bias=False)
+        self.ln_x = nn.GroupNorm(self.n_head, args.dim_att)
+
+    @MyFunction
+    def jit_func(self, x):
+        B, T, C = x.size()
+
+        xx = self.time_shift(
+            x
+        )  # Mix x with the previous timestep to produce xk, xv, xr
+        xk = x * self.time_mix_k + xx * (1 - self.time_mix_k)
+        xv = x * self.time_mix_v + xx * (1 - self.time_mix_v)
+        xr = x * self.time_mix_r + xx * (1 - self.time_mix_r)
+        xg = x * self.time_mix_g + xx * (1 - self.time_mix_g)
+
+        r = self.receptance(xr)
+        k = self.key(xk)
+        v = self.value(xv)
+        g = F.silu(self.gate(xg))
+
+        return r, k, v, g
+
+    @MyFunction
+    def jit_func_2(self, x, g):
+        B, T, C = x.size()
+        x = x.view(B * T, C)
+        x = self.ln_x(x / self.head_size_divisor).view(B, T, C)
+        x = self.output(x * g)
+        return x
+
+    def forward(self, x):
+        B, T, C = x.size()
+        H = self.n_head
+        r, k, v, g = self.jit_func(x)
+        x = RUN_CUDA_RWKV5(B, T, C, H, r, k, v, w=self.time_decay, u=self.time_faaaa)
+
+        return self.jit_func_2(x, g)
+
+
+########################################################################################################
+
+
+class RWKV_ChannelMix(MyModule):
+    def __init__(self, args, layer_id):
+        super().__init__()
+        self.args = args
+        self.layer_id = layer_id
+        self.time_shift = nn.ZeroPad2d((0, 0, 1, -1))
+
+        with torch.no_grad():  # fancy init of time_mix
+            ratio_1_to_almost0 = 1.0 - (layer_id / args.n_layer)  # 1 to ~0
+            ddd = torch.ones(1, 1, args.n_embd)
+            for i in range(args.n_embd):
+                ddd[0, 0, i] = i / args.n_embd
+            self.time_mix_k = nn.Parameter(torch.pow(ddd, ratio_1_to_almost0))
+            self.time_mix_r = nn.Parameter(torch.pow(ddd, ratio_1_to_almost0))
+
+        self.key = make_linear_ffn(args.n_embd, args.dim_ffn, bias=False)
+        self.receptance = make_linear_ffn(args.n_embd, args.n_embd, bias=False)
+        self.value = make_linear_ffn(args.dim_ffn, args.n_embd, bias=False)
+
+    @MyFunction
+    def forward(self, x):
+        xx = self.time_shift(x)
+        xk = x * self.time_mix_k + xx * (1 - self.time_mix_k)
+        xr = x * self.time_mix_r + xx * (1 - self.time_mix_r)
+        k = self.key(xk)
+        k = torch.relu(k) ** 2
+        kv = self.value(k)
+        return torch.sigmoid(self.receptance(xr)) * kv
+
+
+class MishGLU(MyModule):
+    def __init__(self, args, layer_id):
+        super().__init__()
+        self.args = args
+        self.layer_id = layer_id
+        self.time_shift = nn.ZeroPad2d((0, 0, 1, -1))
+
+        with torch.no_grad():
+            ratio_1_to_almost0 = 1.0 - (layer_id / args.n_layer)
+
+            x = torch.ones(1, 1, args.n_embd)
+            for i in range(args.n_embd):
+                x[0, 0, i] = i / args.n_embd
+
+            self.time_mix_k = nn.Parameter(torch.pow(x, ratio_1_to_almost0))
+            self.time_mix_r = nn.Parameter(torch.pow(x, ratio_1_to_almost0))
+            self.aa = nn.Linear(args.n_embd, args.dim_ffn, bias=False)
+            self.bb = nn.Linear(args.n_embd, args.dim_ffn, bias=False)
+            self.value = nn.Linear(args.dim_ffn, args.n_embd, bias=False)
+
+    @MyFunction
+    def forward(self, x):
+        xx = self.time_shift(x)
+        xa = x * self.time_mix_k + xx * (1 - self.time_mix_k)
+        xb = x * self.time_mix_r + xx * (1 - self.time_mix_r)
+        a = self.aa(xa)
+        b = self.bb(xb)
+        return self.value(a * F.mish(b))
+
+
+########################################################################################################
+# The RWKV Model with our blocks
+########################################################################################################
+
+
+class Block(nn.Module):
+    def __init__(self, args, layer_id):
+        super().__init__()
+        self.args = args
+        self.layer_id = layer_id
+
+        self.ln1 = nn.LayerNorm(args.n_embd)
+        self.ln2 = nn.LayerNorm(args.n_embd)
+
+        if self.layer_id == 0:
+            self.ln0 = nn.LayerNorm(args.n_embd)
+            if args.my_pos_emb > 0:
+                self.pos_emb_x = nn.Parameter(
+                    torch.zeros((1, args.my_pos_emb, args.n_embd))
+                )
+                self.pos_emb_y = nn.Parameter(
+                    torch.zeros((args.my_pos_emb, 1, args.n_embd))
+                )
+
+        if self.layer_id == 0 and self.args.pre_ffn > 0:
+            self.ffnPre = RWKV_ChannelMix(args, 0)
+        else:
+            self.att = RWKV_TimeMix_RWKV5(args, layer_id)
+
+        if "g" in os.environ["RWKV_MY_TESTING"]:
+            self.ffn = MishGLU(args, layer_id)
+        else:
+            self.ffn = RWKV_ChannelMix(args, layer_id)
+
+        if args.tiny_att_dim > 0 and self.layer_id == args.tiny_att_layer:
+            self.tiny_ln = nn.LayerNorm(args.n_embd)
+            self.tiny_q = nn.Linear(args.n_embd, args.tiny_att_dim, bias=False)
+            self.tiny_k = nn.Linear(args.n_embd, args.tiny_att_dim, bias=False)
+            self.tiny_v = nn.Linear(args.n_embd, args.n_embd, bias=False)
+            self.register_buffer(
+                "tiny_mask", torch.tril(torch.ones(args.ctx_len, args.ctx_len))
+            )
+
+        if args.dropout > 0:
+            self.drop0 = nn.Dropout(p=args.dropout)
+            self.drop1 = nn.Dropout(p=args.dropout)
+
+    def forward(self, x, x_emb=None):
+        args = self.args
+        B, T, C = x.size()
+        if self.layer_id == 0:
+            x = self.ln0(x)
+            if args.my_pos_emb > 0:
+                pos_emb = (self.pos_emb_x + self.pos_emb_y).reshape(T + 1, -1)[:-1, :]
+                x = x + pos_emb
+
+        if self.args.dropout == 0:
+            if self.layer_id == 0 and args.pre_ffn > 0:
+                x = x + self.ffnPre(self.ln1(x))
+            else:
+                x = x + self.att(self.ln1(x))
+            x = x + self.ffn(self.ln2(x))
+        else:
+            if self.layer_id == 0 and args.pre_ffn > 0:
+                x = self.drop0(x + self.ffnPre(self.ln1(x)))
+            else:
+                x = self.drop0(x + self.att(self.ln1(x)))
+            x = self.drop1(x + self.ffn(self.ln2(x)))
+
+        if args.tiny_att_dim > 0 and self.layer_id == args.tiny_att_layer:
+            xx = self.tiny_ln(x)
+            q = self.tiny_q(xx)[:, :T, :]
+            k = self.tiny_k(xx)[:, :T, :]
+            c = (q @ k.transpose(-2, -1)) * (args.tiny_att_dim ** (-0.5))
+            c = c.masked_fill(self.tiny_mask[:T, :T] == 0, 0)
+            x = x + c @ self.tiny_v(x_emb)
+        return x
+
+
+class L2Wrap(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, loss, y):
+        ctx.save_for_backward(y)
+        return loss
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        y = ctx.saved_tensors[0]
+        # to encourage the logits to be close to 0
+        factor = 1e-4 / (y.shape[0] * y.shape[1])
+        maxx, ids = torch.max(y, -1, keepdim=True)
+        gy = torch.zeros_like(y)
+        gy.scatter_(-1, ids, maxx * factor)
+        return (grad_output, gy)
+
+
+class RWKV(pl.LightningModule):
+    def __init__(self, args):
+        super().__init__()
+        self.args = args
+        if not hasattr(args, "dim_att"):
+            args.dim_att = args.n_embd
+        if not hasattr(args, "dim_ffn"):
+            args.dim_ffn = args.n_embd * 4
+        if not hasattr(args, "tiny_att_layer"):
+            args.tiny_att_layer = -1
+        if not hasattr(args, "tiny_att_dim"):
+            args.tiny_att_dim = -1
+        assert args.n_embd % 32 == 0
+        assert args.dim_att % 32 == 0
+        assert args.dim_ffn % 32 == 0
+
+        self.emb = nn.Embedding(args.vocab_size, args.n_embd)
+
+        self.blocks = nn.ModuleList([Block(args, i) for i in range(args.n_layer)])
+
+        self.ln_out = nn.LayerNorm(args.n_embd)
+        self.head = nn.Linear(args.n_embd, args.vocab_size, bias=False)
+
+        if args.head_qk > 0:
+            self.head_q = nn.Linear(args.n_embd, args.head_qk, bias=False)
+            self.head_k = nn.Linear(args.n_embd, args.head_qk, bias=False)
+            self.register_buffer(
+                "copy_mask", torch.tril(torch.ones(args.ctx_len, args.ctx_len))
+            )
+        if args.dropout > 0:
+            self.drop0 = nn.Dropout(p=args.dropout)
+
+    def configure_optimizers(self):
+        args = self.args
+
+        lr_decay = set()
+        lr_1x = set()
+        lr_2x = set()
+        lr_3x = set()
+        for n, p in self.named_parameters():
+            if ("time_mix" in n) and (args.layerwise_lr > 0):
+                if args.my_pile_stage == 2:
+                    lr_2x.add(n)
+                else:
+                    lr_1x.add(n)
+            elif ("time_decay" in n) and (args.layerwise_lr > 0):
+                if args.my_pile_stage == 2:
+                    lr_3x.add(n)
+                else:
+                    lr_2x.add(n)
+            elif ("time_faaaa" in n) and (args.layerwise_lr > 0):
+                if args.my_pile_stage == 2:
+                    lr_2x.add(n)
+                else:
+                    lr_1x.add(n)
+            elif ("time_first" in n) and (args.layerwise_lr > 0):
+                lr_3x.add(n)
+            elif (len(p.squeeze().shape) >= 2) and (args.weight_decay > 0):
+                lr_decay.add(n)
+            else:
+                lr_1x.add(n)
+
+        lr_decay = sorted(list(lr_decay))
+        lr_1x = sorted(list(lr_1x))
+        lr_2x = sorted(list(lr_2x))
+        lr_3x = sorted(list(lr_3x))
+        # print('decay', lr_decay)
+        # print('1x', lr_1x)
+        # print('2x', lr_2x)
+        # print('3x', lr_3x)
+        param_dict = {n: p for n, p in self.named_parameters()}
+
+        if args.layerwise_lr > 0:
+            if args.my_pile_stage == 2:
+                optim_groups = [
+                    {
+                        "params": [param_dict[n] for n in lr_1x],
+                        "weight_decay": 0.0,
+                        "my_lr_scale": 1.0,
+                    },
+                    {
+                        "params": [param_dict[n] for n in lr_2x],
+                        "weight_decay": 0.0,
+                        "my_lr_scale": 5.0,
+                    },  # test: 2e-3 / args.lr_init},
+                    {
+                        "params": [param_dict[n] for n in lr_3x],
+                        "weight_decay": 0.0,
+                        "my_lr_scale": 5.0,
+                    },  # test: 3e-3 / args.lr_init},
+                ]
+            else:
+                optim_groups = [
+                    {
+                        "params": [param_dict[n] for n in lr_1x],
+                        "weight_decay": 0.0,
+                        "my_lr_scale": 1.0,
+                    },
+                    {
+                        "params": [param_dict[n] for n in lr_2x],
+                        "weight_decay": 0.0,
+                        "my_lr_scale": 2.0,
+                    },
+                    {
+                        "params": [param_dict[n] for n in lr_3x],
+                        "weight_decay": 0.0,
+                        "my_lr_scale": 3.0,
+                    },
+                ]
+        else:
+            optim_groups = [
+                {
+                    "params": [param_dict[n] for n in lr_1x],
+                    "weight_decay": 0.0,
+                    "my_lr_scale": 1.0,
+                }
+            ]
+
+        if args.weight_decay > 0:
+            optim_groups += [
+                {
+                    "params": [param_dict[n] for n in lr_decay],
+                    "weight_decay": args.weight_decay,
+                    "my_lr_scale": 1.0,
+                }
+            ]
+            if self.deepspeed_offload:
+                return DeepSpeedCPUAdam(
+                    optim_groups,
+                    lr=self.args.lr_init,
+                    betas=self.args.betas,
+                    eps=self.args.adam_eps,
+                    bias_correction=True,
+                    adamw_mode=True,
+                    amsgrad=False,
+                )
+            return FusedAdam(
+                optim_groups,
+                lr=self.args.lr_init,
+                betas=self.args.betas,
+                eps=self.args.adam_eps,
+                bias_correction=True,
+                adam_w_mode=True,
+                amsgrad=False,
+            )
+        else:
+            if self.deepspeed_offload:
+                return DeepSpeedCPUAdam(
+                    optim_groups,
+                    lr=self.args.lr_init,
+                    betas=self.args.betas,
+                    eps=self.args.adam_eps,
+                    bias_correction=True,
+                    adamw_mode=False,
+                    weight_decay=0,
+                    amsgrad=False,
+                )
+            return FusedAdam(
+                optim_groups,
+                lr=self.args.lr_init,
+                betas=self.args.betas,
+                eps=self.args.adam_eps,
+                bias_correction=True,
+                adam_w_mode=False,
+                weight_decay=0,
+                amsgrad=False,
+            )
+        # return ZeroOneAdam(optim_groups, lr=self.args.lr_init, betas=self.args.betas, eps=self.args.adam_eps, bias_correction=True, weight_decay=0, amsgrad=False, cuda_aware=False)
+
+    @property
+    def deepspeed_offload(self) -> bool:
+        strategy = self.trainer.strategy
+        if isinstance(strategy, DeepSpeedStrategy):
+            cfg = strategy.config["zero_optimization"]
+            return cfg.get("offload_optimizer") or cfg.get("offload_param")
+        return False
+
+    def forward(self, idx):
+        args = self.args
+        B, T = idx.size()
+        assert T <= args.ctx_len, "Cannot forward, model ctx_len is exhausted."
+
+        x = self.emb(idx)
+        x_emb = x
+
+        if args.dropout > 0:
+            x = self.drop0(x)
+        if args.tiny_att_dim > 0:
+            for block in self.blocks:
+                if args.grad_cp == 1:
+                    if args.lora:
+                        x = torch_checkpoint(block, x, x_emb, use_reentrant=False)
+                    else:
+                        x = deepspeed.checkpointing.checkpoint(block, x, x_emb)
+                else:
+                    x = block(x, x_emb)
+        else:
+            for block in self.blocks:
+                if args.grad_cp == 1:
+                    if args.lora:
+                        x = torch_checkpoint(block, x, x_emb, use_reentrant=False)
+                    else:
+                        x = deepspeed.checkpointing.checkpoint(block, x)
+                else:
+                    x = block(x)
+
+        x = self.ln_out(x)
+
+        if args.head_qk > 0:
+            q = self.head_q(x)[:, :T, :]
+            k = self.head_k(x)[:, :T, :]
+            c = (q @ k.transpose(-2, -1)) * (1.0 / args.head_qk)
+            c = c.masked_fill(self.copy_mask[:T, :T] == 0, 0)
+
+            if "32" in os.environ["RWKV_FLOAT_MODE"]:
+                c = c @ F.one_hot(idx, num_classes=args.vocab_size)
+            elif os.environ["RWKV_FLOAT_MODE"] == "fp16":
+                c = c @ F.one_hot(idx, num_classes=args.vocab_size).half()
+            elif os.environ["RWKV_FLOAT_MODE"] == "bf16":
+                c = c @ F.one_hot(idx, num_classes=args.vocab_size).bfloat16()
+
+            x = self.head(x) + c
+        else:
+            x = self.head(x)
+
+        return x
+
+    def training_step(self, batch, batch_idx):
+        args = self.args
+        if args.my_qa_mask != 1:
+            idx, targets = batch
+            logits = self(idx)
+            loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1))
+            # if '0' in os.environ["RWKV_MY_TESTING"]:
+            #     print('logits', logits)
+            #     torch.set_printoptions(threshold=10000)
+            #     print('idx', idx)
+            #     exit(0)
+        else:
+            idx, targets, mask = batch
+            mask = mask.view(-1)
+            sum_mask = torch.sum(mask).item()
+            # if sum_mask == 0:
+            #     return torch.tensor([0.0], requires_grad=True)
+
+            logits = self(idx)
+            if sum_mask == mask.shape[0]:
+                loss = F.cross_entropy(
+                    logits.view(-1, logits.size(-1)), targets.view(-1)
+                )
+                # print('rank', self.global_rank, 'loss', loss.item())
+            else:
+                loss = F.cross_entropy(
+                    logits.view(-1, logits.size(-1)), targets.view(-1), reduction="none"
+                )
+                # loss_raw = loss
+                loss = torch.sum(loss * mask) / sum_mask
+
+                # torch.set_printoptions(threshold=10000)
+                # if True: #self.global_rank == 1:
+                #     tmp = ''
+                #     sss = 0
+                #     ccc = 0
+                #     for i in range(mask.shape[0]):
+                #         if mask[i] > 0:
+                #             tmp += str(idx.view(-1)[i].item()) + ','
+                #             sss += loss_raw.view(-1)[i].float().item()
+                #             ccc += 1
+                #     print('rank', self.global_rank, 'loss', loss.item(), 'lavg', sss / ccc)#, 'tmp', tmp, 'input', idx)
+        return L2Wrap.apply(loss, logits)
+
+    def training_step_end(self, batch_parts):
+        if pl.__version__[0] != "2":
+            all = self.all_gather(batch_parts)
+            if self.trainer.is_global_zero:
+                self.trainer.my_loss_all = all
+
+    def generate_init_weight(self):
+        print(
+            f"""
+############################################################################
+#
+# Init model weight (slow for large models)...
+#
+############################################################################
+"""
+        )
+        m = {}
+        for n in self.state_dict():
+            p = self.state_dict()[n]
+            shape = p.shape
+
+            gain = 1.0
+            scale = 1.0
+            if (
+                "ln_" in n
+                or ".ln" in n
+                or "time_" in n
+                or "_mask" in n
+                or "pos_emb" in n
+                or ".mask." in n
+            ):
+                if "ln_x.weight" in n:
+                    layer_scale = (1 + int(n.split(".")[1])) / self.args.n_layer
+                    m[n] = (p * 0.0) + (layer_scale**0.7)
+                else:
+                    m[n] = p
+            else:
+                if n == "emb.weight":
+                    scale = -1 * self.args.lr_init
+                else:
+                    if shape[0] > shape[1]:
+                        gain = math.sqrt(shape[0] / shape[1])
+
+                    zero = [
+                        ".att.output.",
+                        ".ffn.value.",
+                        ".ffn.receptance.",
+                        ".ffnPre.value.",
+                        ".ffnPre.receptance.",
+                        "head_q.",
+                        ".oo.",
+                        ".rr.",
+                    ]
+
+                    for kk in zero:
+                        if kk in n:
+                            scale = 0
+                    if n == "head.weight":
+                        scale = 0.5
+                    if "head_k." in n:
+                        scale = 0.1
+                    if "head_q." in n:
+                        scale = 0
+
+                print(
+                    f"{str(shape[0]).ljust(5)} {str(shape[1]).ljust(5)} {str(scale).ljust(4)} {n}"
+                )
+
+                if self.args.accelerator.upper() == "GPU":
+                    m[n] = torch.empty((shape[0], shape[1]), device="cuda")
+                else:
+                    m[n] = torch.empty((shape[0], shape[1]))
+
+                if scale == 0:
+                    nn.init.zeros_(m[n])
+                elif scale < 0:
+                    nn.init.uniform_(m[n], a=scale, b=-scale)
+                else:
+                    nn.init.orthogonal_(m[n], gain=gain * scale)
+
+            m[n] = m[n].cpu()
+            if os.environ["RWKV_FLOAT_MODE"] == "fp16":
+                m[n] = m[n].half()
+            elif os.environ["RWKV_FLOAT_MODE"] == "bf16":
+                m[n] = m[n].bfloat16()
+
+            # if n == "emb.weight":
+            #     print(m[n])
+
+        gc.collect()
+        torch.cuda.empty_cache()
+        return m
--- a/finetune/lora/v5/src/trainer.py
+++ b/finetune/lora/v5/src/trainer.py
@@ -0,0 +1,310 @@
+import os, math, time, datetime, subprocess
+import torch
+from torch.utils.data import DataLoader
+import pytorch_lightning as pl
+from pytorch_lightning.utilities import rank_zero_info, rank_zero_only
+from .model import LORA_CONFIG
+
+
+def my_save(args, trainer, dd, ff):
+    if "14b-run1" in ff:
+        fn = ff.split("/")[-1]
+        fff = "/dev/shm/" + fn
+        torch.save(dd, fff)
+        subprocess.Popen(f" aws s3 mv {fff} s3://rwkv-14b-4k/{fn} --quiet", shell=True)
+    elif ("world/14b" in ff) or ("world/7b" in ff):
+        aa = ff.split("/")[1]
+        fn = ff.split("/")[-1]
+        fff = f"/dev/shm/{aa}-{fn}"
+        torch.save(dd, fff)
+        subprocess.Popen(
+            f" aws s3 mv {fff} s3://rwkv-world/{aa}-{fn} --quiet", shell=True
+        )
+    else:
+        if "deepspeed_stage_3" in args.strategy:
+            trainer.save_checkpoint(ff, weights_only=True)
+        else:
+            torch.save(dd, ff)
+
+
+class train_callback(pl.Callback):
+    def __init__(self, args):
+        super().__init__()
+        self.args = args
+
+    def on_train_batch_start(self, trainer, pl_module, batch, batch_idx):
+        args = self.args
+        # if args.cuda_cleanup > 0:
+        #     torch.cuda.empty_cache()
+        real_step = trainer.global_step + args.epoch_begin * args.epoch_steps
+
+        # LR schedule
+        w_step = args.warmup_steps
+        if args.lr_final == args.lr_init or args.epoch_count == 0:
+            lr = args.lr_init
+        else:
+            decay_step = real_step - args.my_pile_edecay * args.epoch_steps
+            decay_total = (args.epoch_count - args.my_pile_edecay) * args.epoch_steps
+            progress = (decay_step - w_step + 1) / (decay_total - w_step)
+            progress = min(1, max(0, progress))
+
+            if args.lr_final == 0 or args.lr_init == 0:  # linear decay
+                lr = args.lr_init + (args.lr_final - args.lr_init) * progress
+            else:  # exp decay
+                lr = args.lr_init * math.exp(
+                    math.log(args.lr_final / args.lr_init) * pow(progress, 1)
+                )
+            # if trainer.is_global_zero:
+            #     print(trainer.global_step, decay_step, decay_total, w_step, progress, lr)
+
+        if args.my_exit_tokens != 0:  # cosine decay
+            real_tokens = real_step * args.ctx_len * args.real_bsz
+            warmup_tokens = w_step * args.ctx_len * args.real_bsz
+            progress = (real_tokens - warmup_tokens) / (
+                abs(args.my_exit_tokens) - warmup_tokens
+            )
+            progress = max(0, min(1, progress))
+            lr_final_factor = args.lr_final / args.lr_init
+            lr_mult = (0.5 + lr_final_factor / 2) + (
+                0.5 - lr_final_factor / 2
+            ) * math.cos(math.pi * progress)
+            if args.my_exit_tokens > 0:
+                lr = args.lr_init * lr_mult
+            else:
+                lr = (lr + args.lr_init * lr_mult) / 2
+            if progress >= 1:
+                if (trainer.is_global_zero) or ("deepspeed_stage_3" in args.strategy):
+                    my_save(
+                        args,
+                        trainer,
+                        pl_module.state_dict(),
+                        f"{args.proj_dir}/rwkv-final.pth",
+                    )
+                    exit(0)
+        if trainer.global_step < w_step:
+            lr = lr * (0.2 + 0.8 * trainer.global_step / w_step)
+
+        if args.weight_decay_final > 0:
+            wd_now = args.weight_decay * math.exp(
+                math.log(args.weight_decay_final / args.weight_decay) * progress
+            )
+        else:
+            wd_now = args.weight_decay
+
+        for param_group in trainer.optimizers[0].param_groups:
+            if param_group["weight_decay"] > 0:
+                param_group["weight_decay"] = wd_now
+            if args.layerwise_lr > 0:
+                param_group["lr"] = lr * param_group["my_lr_scale"]
+                # print(param_group["lr"], param_group["my_lr_scale"])
+            else:
+                param_group["lr"] = lr
+
+        trainer.my_lr = lr
+        trainer.my_wd = wd_now
+        # rank_zero_info(f"{real_step} {lr}")
+
+        if trainer.global_step == 0:
+            if trainer.is_global_zero:  # logging
+                trainer.my_loss_sum = 0
+                trainer.my_loss_count = 0
+                trainer.my_log = open(args.proj_dir + "/train_log.txt", "a")
+                trainer.my_log.write(
+                    f"NEW RUN {args.my_timestamp}\n{vars(self.args)}\n"
+                )
+                try:
+                    print(f"\n{trainer.strategy.config}\n")
+                    trainer.my_log.write(f"{trainer.strategy.config}\n")
+                except:
+                    pass
+                trainer.my_log.flush()
+                if len(args.wandb) > 0:
+                    print("Login to wandb...")
+                    import wandb
+
+                    wandb.init(
+                        project=args.wandb,
+                        name=args.run_name + " " + args.my_timestamp,
+                        config=args,
+                        save_code=False,
+                    )
+                    trainer.my_wandb = wandb
+
+    def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx):
+        args = self.args
+        token_per_step = args.ctx_len * args.real_bsz
+        real_step = trainer.global_step + args.epoch_begin * args.epoch_steps
+        if trainer.is_global_zero:  # logging
+            t_now = time.time_ns()
+            kt_s = 0
+            try:
+                t_cost = (t_now - trainer.my_time_ns) / 1e9
+                kt_s = token_per_step / t_cost / 1000
+                self.log("REAL it/s", 1.0 / t_cost, prog_bar=True, on_step=True)
+                self.log("Kt/s", kt_s, prog_bar=True, on_step=True)
+            except:
+                pass
+            trainer.my_time_ns = t_now
+            if pl.__version__[0] == "2":
+                trainer.my_loss = outputs["loss"]
+            else:
+                trainer.my_loss = trainer.my_loss_all.float().mean().item()
+            trainer.my_loss_sum += trainer.my_loss
+            trainer.my_loss_count += 1
+            trainer.my_epoch_loss = trainer.my_loss_sum / trainer.my_loss_count
+            self.log("lr", trainer.my_lr, prog_bar=True, on_step=True)
+            self.log("loss", trainer.my_epoch_loss, prog_bar=True, on_step=True)
+            # self.log("s", real_step, prog_bar=True, on_step=True)
+
+            if len(args.wandb) > 0:
+                lll = {
+                    "loss": trainer.my_loss,
+                    "lr": trainer.my_lr,
+                    "wd": trainer.my_wd,
+                    "Gtokens": real_step * token_per_step / 1e9,
+                }
+                if kt_s > 0:
+                    lll["kt/s"] = kt_s
+                trainer.my_wandb.log(lll, step=int(real_step))
+        if (trainer.is_global_zero) or (
+            "deepspeed_stage_3" in args.strategy
+        ):  # save pth
+            if args.magic_prime > 0:
+                expand_factor = 2 if args.my_qa_mask > 0 else 1
+                if int(real_step) == int(
+                    args.magic_prime * expand_factor // args.real_bsz
+                ) - 1 + int(args.my_random_steps):
+                    to_save_dict = pl_module.state_dict()
+                    my_save(
+                        args,
+                        trainer,
+                        to_save_dict,
+                        f"{args.proj_dir}/rwkv-final.pth",
+                    )
+        # if args.batch_save==batch_idx :
+        #     to_save_dict = pl_module.state_dict()
+        #     for name, state in to_save_dict.items():
+        #         if 'img' in name:
+        #             to_save_dict[name] = state
+        #     try:
+        #             my_save(
+        #                 args, trainer,
+        #                 to_save_dict,
+        #                 f"{args.proj_dir}/rwkv-{args.epoch_begin + trainer.current_epoch}-{batch_idx}.pth",
+        #             )
+        #     except Exception as e:
+        #         print('Error\n\n', e, '\n\n')
+
+    def on_train_epoch_start(self, trainer, pl_module):
+        args = self.args
+        if pl.__version__[0] == "2":
+            dataset = trainer.train_dataloader.dataset
+        else:
+            dataset = trainer.train_dataloader.dataset.datasets
+        assert "MyDataset" in str(dataset)
+        dataset.global_rank = trainer.global_rank
+        dataset.real_epoch = int(args.epoch_begin + trainer.current_epoch)
+        dataset.world_size = trainer.world_size
+        # print(f'########## world_size {dataset.world_size} global_rank {dataset.global_rank} real_epoch {dataset.real_epoch} ##########')
+
+    def on_train_epoch_end(self, trainer, pl_module):
+        args = self.args
+        to_save_dict = {}
+        if (trainer.is_global_zero) or (
+            "deepspeed_stage_3" in args.strategy
+        ):  # save pth
+            if (
+                args.epoch_save > 0 and trainer.current_epoch % args.epoch_save == 0
+            ) or (trainer.current_epoch == args.epoch_count - 1):
+                if args.data_type == "wds_img":
+                    raw_dict = pl_module.state_dict()
+                    for k in raw_dict:
+                        if k.startswith("encoder.") or k.startswith("decoder."):
+                            to_save_dict[k] = raw_dict[k]
+                else:
+                    to_save_dict = pl_module.state_dict()
+
+                if args.data_type == "img" and not args.lora:
+                    for name, state in to_save_dict.items():
+                        if "img" in name:
+                            to_save_dict[name] = state
+
+                if args.lora:
+                    enable_time_finetune = "time" in LORA_CONFIG["parts"]
+                    enable_ln_finetune = "ln" in LORA_CONFIG["parts"]
+                    lora_dict = {}
+                    for name, state in to_save_dict.items():
+                        if "img" in name:
+                            lora_dict[name] = state
+                        if (
+                            ".lora_" in name
+                            or (enable_time_finetune and ".time_" in name)
+                            or (enable_ln_finetune and ".ln" in name)
+                        ):
+                            lora_dict[name] = state
+                    to_save_dict = lora_dict
+
+                try:
+                    my_save(
+                        args,
+                        trainer,
+                        to_save_dict,
+                        f"{args.proj_dir}/rwkv-{args.epoch_begin + trainer.current_epoch}.pth",
+                    )
+                except Exception as e:
+                    print("Error\n\n", e, "\n\n")
+
+        if trainer.is_global_zero:  # logging
+            trainer.my_log.write(
+                f"{args.epoch_begin + trainer.current_epoch} {trainer.my_epoch_loss:.6f} {math.exp(trainer.my_epoch_loss):.4f} {trainer.my_lr:.8f} {datetime.datetime.now()} {trainer.current_epoch}\n"
+            )
+            trainer.my_log.flush()
+
+            trainer.my_loss_sum = 0
+            trainer.my_loss_count = 0
+            if (args.epoch_begin + trainer.current_epoch) >= args.my_exit:
+                exit(0)
+
+
+@rank_zero_only
+def generate_init_weight(model, init_weight_name):
+    mm = model.generate_init_weight()
+
+    if model.args.my_pile_stage == 1:
+        if len(model.args.load_model) > 0:
+            print(f"Combine weights from {model.args.load_model}...")
+            load_dict = torch.load(model.args.load_model, map_location="cpu")
+            for k in load_dict:
+                try:
+                    assert k in mm
+                except:
+                    print("missing", k)
+                    exit(0)
+                src = load_dict[k]
+                try:
+                    mm[k] = src.reshape(mm[k].shape)
+                except:
+                    tmp = mm[k].squeeze().clone()
+                    print(k, src.shape, "-->", mm[k].shape)
+                    ss = src.shape[0]
+                    dd = tmp.shape[0]
+                    for i in range(dd):
+                        pos = i / dd * ss
+                        if pos >= ss - 1:
+                            tmp[i] = src[ss - 1]
+                        else:
+                            p0 = int(math.floor(pos))
+                            ii = pos - p0
+                            tmp[i] = src[p0] * (1 - ii) + src[p0 + 1] * (ii)
+                    mm[k] = tmp.reshape(mm[k].shape)
+                    sss = src.squeeze().float().cpu().numpy()
+                    print(sss[:10], "...", sss[-10:])
+                    mmm = mm[k].squeeze().float().cpu().numpy()
+                    print(mmm[:10], "...", mmm[-10:])
+
+    print(f"Save to {init_weight_name}...")
+    torch.save(mm, init_weight_name)
+
+    if model.args.my_pile_stage == 1:
+        print("Done. Now go for stage 2.")
+        exit(0)
--- a/finetune/lora/v5/src/utils.py
+++ b/finetune/lora/v5/src/utils.py
@@ -0,0 +1,139 @@
+import json, time, random, os
+import numpy as np
+import torch
+from torch.nn import functional as F
+
+time_slot = {}
+time_ref = time.time_ns()
+
+
+def record_time(name):
+    if name not in time_slot:
+        time_slot[name] = 1e20
+    tt = (time.time_ns() - time_ref) / 1e9
+    if tt < time_slot[name]:
+        time_slot[name] = tt
+
+
+class TOKENIZER:
+    def __init__(self, WORD_NAME, UNKNOWN_CHAR="\ue083"):
+        if "list" in str(type(WORD_NAME)):
+            self.charMode = False
+            if WORD_NAME[0] == WORD_NAME[1]:
+                from transformers import PreTrainedTokenizerFast
+
+                self.tokenizer = PreTrainedTokenizerFast(tokenizer_file=WORD_NAME[0])
+            else:
+                from transformers import GPT2TokenizerFast
+
+                self.tokenizer = GPT2TokenizerFast(WORD_NAME[0], WORD_NAME[1])
+            self.vocab_size = len(self.tokenizer)
+        else:
+            self.charMode = True
+            with open(WORD_NAME + ".json", "r", encoding="utf-16") as result_file:
+                self.word_table = json.load(result_file)
+
+            self.vocab_size = len(self.word_table)
+
+            self.stoi = {v: int(k) for k, v in self.word_table.items()}
+            self.itos = {int(k): v for k, v in self.word_table.items()}
+
+            self.UNKNOWN_CHAR = self.stoi[UNKNOWN_CHAR]
+
+    def refine_context(self, context):
+        context = context.strip().split("\n")
+        for c in range(len(context)):
+            context[c] = context[c].strip().strip("\u3000").strip("\r")
+        context = list(filter(lambda c: c != "", context))
+        context = "\n" + ("\n".join(context)).strip()
+        if context == "":
+            context = "\n"
+        return context
+
+    def sample_logits(
+        self, out, x, ctx_len, temperature=1.0, top_p_usual=None, top_p_newline=None
+    ):
+        # out[self.UNKNOWN_CHAR] = -float('Inf')
+        lastChar = int(x[-1])
+
+        probs = F.softmax(out, dim=-1)
+
+        if self.charMode:
+            if self.itos[lastChar] == "\n":
+                top_p = top_p_newline
+            else:
+                top_p = top_p_usual
+        else:
+            top_p = top_p_usual
+
+        if os.environ["RWKV_RUN_DEVICE"] == "cpu":
+            probs = probs.numpy()
+            sorted_probs = np.sort(probs)[::-1]
+            cumulative_probs = np.cumsum(sorted_probs)
+            cutoff = float(sorted_probs[np.argmax(cumulative_probs > top_p)])
+            probs[probs < cutoff] = 0
+            if temperature != 1.0:
+                probs = probs.pow(1.0 / temperature)
+            probs = probs / np.sum(probs)
+            out = np.random.choice(a=len(probs), p=probs)
+            return out
+        else:
+            sorted_probs = torch.sort(probs, descending=True)[0]
+            cumulative_probs = torch.cumsum(sorted_probs, dim=-1).cpu().numpy()
+            cutoff = float(sorted_probs[np.argmax(cumulative_probs > top_p)])
+            probs[probs < cutoff] = 0
+            if temperature != 1.0:
+                probs = probs.pow(1.0 / temperature)
+            out = torch.multinomial(probs, num_samples=1)[0]
+            return out
+
+
+def MaybeIsPrime(number):
+    if FermatPrimalityTest(number) and MillerRabinPrimalityTest(number):
+        return True
+    else:
+        return False
+
+
+def FermatPrimalityTest(number):
+    if number > 1:
+        for time in range(3):
+            randomNumber = random.randint(2, number) - 1
+            if pow(randomNumber, number - 1, number) != 1:
+                return False
+        return True
+    else:
+        return False
+
+
+def MillerRabinPrimalityTest(number):
+    if number == 2:
+        return True
+    elif number == 1 or number % 2 == 0:
+        return False
+    oddPartOfNumber = number - 1
+    timesTwoDividNumber = 0
+    while oddPartOfNumber % 2 == 0:
+        oddPartOfNumber = oddPartOfNumber // 2
+        timesTwoDividNumber = timesTwoDividNumber + 1
+
+    for time in range(3):
+        while True:
+            randomNumber = random.randint(2, number) - 1
+            if randomNumber != 0 and randomNumber != 1:
+                break
+
+        randomNumberWithPower = pow(randomNumber, oddPartOfNumber, number)
+
+        if (randomNumberWithPower != 1) and (randomNumberWithPower != number - 1):
+            iterationNumber = 1
+
+            while (iterationNumber <= timesTwoDividNumber - 1) and (
+                randomNumberWithPower != number - 1
+            ):
+                randomNumberWithPower = pow(randomNumberWithPower, 2, number)
+                iterationNumber = iterationNumber + 1
+            if randomNumberWithPower != (number - 1):
+                return False
+
+    return True
--- a/finetune/lora/v5/train.py
+++ b/finetune/lora/v5/train.py
@@ -0,0 +1,436 @@
+########################################################################################################
+# The RWKV Language Model - https://github.com/BlinkDL/RWKV-LM
+########################################################################################################
+
+import logging
+
+logging.basicConfig(level=logging.INFO)
+
+if __name__ == "__main__":
+    from argparse import ArgumentParser
+    from pytorch_lightning import Trainer
+    from pytorch_lightning.utilities import rank_zero_info, rank_zero_only
+    import pytorch_lightning as pl
+
+    rank_zero_info("########## work in progress ##########")
+
+    parser = ArgumentParser()
+
+    parser.add_argument("--load_model", default="", type=str)  # full path, with .pth
+    parser.add_argument(
+        "--wandb", default="", type=str
+    )  # wandb project name. if "" then don't use wandb
+    parser.add_argument("--proj_dir", default="out", type=str)
+    parser.add_argument("--random_seed", default="-1", type=int)
+
+    parser.add_argument("--data_file", default="", type=str)
+    parser.add_argument("--data_type", default="utf-8", type=str)
+    parser.add_argument(
+        "--vocab_size", default=0, type=int
+    )  # vocab_size = 0 means auto (for char-level LM and .txt data)
+
+    parser.add_argument("--ctx_len", default=1024, type=int)
+    parser.add_argument(
+        "--epoch_steps", default=1000, type=int
+    )  # a mini "epoch" has [epoch_steps] steps
+    parser.add_argument(
+        "--epoch_count", default=500, type=int
+    )  # train for this many "epochs". will continue afterwards with lr = lr_final
+    parser.add_argument(
+        "--epoch_begin", default=0, type=int
+    )  # if you load a model trained for x "epochs", set epoch_begin = x
+    parser.add_argument(
+        "--epoch_save", default=5, type=int
+    )  # save the model every [epoch_save] "epochs"
+
+    parser.add_argument(
+        "--micro_bsz", default=12, type=int
+    )  # micro batch size (batch size per GPU)
+    parser.add_argument("--n_layer", default=6, type=int)
+    parser.add_argument("--n_embd", default=512, type=int)
+    parser.add_argument("--dim_att", default=0, type=int)
+    parser.add_argument("--dim_ffn", default=0, type=int)
+    parser.add_argument(
+        "--pre_ffn", default=0, type=int
+    )  # replace first att layer by ffn (sometimes better)
+    parser.add_argument("--head_qk", default=0, type=int)  # my headQK trick
+    parser.add_argument("--tiny_att_dim", default=0, type=int)  # tiny attention dim
+    parser.add_argument(
+        "--tiny_att_layer", default=-999, type=int
+    )  # tiny attention @ which layer
+
+    parser.add_argument(
+        "--lr_init", default=6e-4, type=float
+    )  # 6e-4 for L12-D768, 4e-4 for L24-D1024, 3e-4 for L24-D2048
+    parser.add_argument("--lr_final", default=1e-5, type=float)
+    parser.add_argument(
+        "--warmup_steps", default=-1, type=int
+    )  # try 50 if you load a model
+    parser.add_argument("--beta1", default=0.9, type=float)
+    parser.add_argument(
+        "--beta2", default=0.99, type=float
+    )  # use 0.999 when your model is close to convergence
+    parser.add_argument("--adam_eps", default=1e-8, type=float)
+    parser.add_argument(
+        "--grad_cp", default=0, type=int
+    )  # gradient checkpt: saves VRAM, but slower
+    parser.add_argument(
+        "--dropout", default=0, type=float
+    )  # try 0.01 / 0.02 / 0.05 / 0.1
+    parser.add_argument(
+        "--weight_decay", default=0, type=float
+    )  # try 0.1 / 0.01 / 0.001
+    parser.add_argument("--weight_decay_final", default=-1, type=float)
+
+    parser.add_argument(
+        "--my_pile_version", default=1, type=int
+    )  # my special pile version
+    parser.add_argument("--my_pile_stage", default=0, type=int)  # my special pile mode
+    parser.add_argument(
+        "--my_pile_shift", default=-1, type=int
+    )  # my special pile mode - text shift
+    parser.add_argument("--my_pile_edecay", default=0, type=int)
+    parser.add_argument(
+        "--layerwise_lr", default=1, type=int
+    )  # layerwise lr for faster convergence (but slower it/s)
+    parser.add_argument(
+        "--ds_bucket_mb", default=200, type=int
+    )  # deepspeed bucket size in MB. 200 seems enough
+    # parser.add_argument("--cuda_cleanup", default=0, type=int)  # extra cuda cleanup (sometimes helpful)
+
+    parser.add_argument("--my_sample_len", default=0, type=int)
+    parser.add_argument("--my_ffn_shift", default=1, type=int)
+    parser.add_argument("--my_att_shift", default=1, type=int)
+    parser.add_argument(
+        "--head_size_a", default=64, type=int
+    )  # can try larger values for larger models
+    parser.add_argument("--head_size_divisor", default=8, type=int)
+    parser.add_argument("--my_pos_emb", default=0, type=int)
+    parser.add_argument("--load_partial", default=0, type=int)
+    parser.add_argument("--magic_prime", default=0, type=int)
+    parser.add_argument("--my_qa_mask", default=0, type=int)
+    parser.add_argument("--my_random_steps", default=0, type=int)
+    parser.add_argument("--my_testing", default="", type=str)
+    parser.add_argument("--my_exit", default=99999999, type=int)
+    parser.add_argument("--my_exit_tokens", default=0, type=int)
+
+    # LORA
+    parser.add_argument("--emb", action="store_true")
+    parser.add_argument("--lora", action="store_true")
+    parser.add_argument("--lora_load", default="", type=str)
+    parser.add_argument("--lora_r", default=8, type=int)
+    parser.add_argument("--lora_alpha", default=32, type=float)
+    parser.add_argument("--lora_dropout", default=0.01, type=float)
+    parser.add_argument("--lora_parts", default="att,ln,time", type=str)
+
+    if pl.__version__[0] == "2":
+        parser.add_argument("--accelerator", default="gpu", type=str)
+        parser.add_argument("--strategy", default="auto", type=str)
+        parser.add_argument("--devices", default=1, type=int)
+        parser.add_argument("--num_nodes", default=1, type=int)
+        parser.add_argument("--precision", default="fp16", type=str)
+        parser.add_argument("--accumulate_grad_batches", default=1, type=int)
+    else:
+        parser = Trainer.add_argparse_args(parser)
+    args = parser.parse_args()
+
+    ########################################################################################################
+
+    import os, warnings, math, datetime, sys, time
+    import numpy as np
+    import torch
+    from torch.utils.data import DataLoader
+
+    if "deepspeed" in args.strategy:
+        import deepspeed
+    from pytorch_lightning import seed_everything
+
+    if args.random_seed >= 0:
+        print(
+            f"########## WARNING: GLOBAL SEED {args.random_seed} THIS WILL AFFECT MULTIGPU SAMPLING ##########\n"
+            * 3
+        )
+        seed_everything(args.random_seed)
+
+    np.set_printoptions(precision=4, suppress=True, linewidth=200)
+    warnings.filterwarnings(
+        "ignore", ".*Consider increasing the value of the `num_workers` argument*"
+    )
+    warnings.filterwarnings(
+        "ignore", ".*The progress bar already tracks a metric with the*"
+    )
+    # os.environ["WDS_SHOW_SEED"] = "1"
+
+    args.my_timestamp = datetime.datetime.today().strftime("%Y-%m-%d-%H-%M-%S")
+    args.enable_checkpointing = False
+    args.replace_sampler_ddp = False
+    args.logger = False
+    args.gradient_clip_val = 1.0
+    args.num_sanity_val_steps = 0
+    args.check_val_every_n_epoch = int(1e20)
+    args.log_every_n_steps = int(1e20)
+    args.max_epochs = args.epoch_count  # -1 continue forever
+    args.betas = (args.beta1, args.beta2)
+    args.real_bsz = int(args.num_nodes) * int(args.devices) * args.micro_bsz
+    os.environ["RWKV_MY_TESTING"] = args.my_testing
+    os.environ["RWKV_HEAD_SIZE_A"] = str(args.head_size_a)
+    if args.dim_att <= 0:
+        args.dim_att = args.n_embd
+    if args.dim_ffn <= 0:
+        args.dim_ffn = int((args.n_embd * 3.5) // 32 * 32)  # default = 3.5x emb size
+
+    if args.data_type == "wds_img":
+        args.run_name = f"v{args.my_img_version}-{args.my_img_size}-{args.my_img_bit}bit-{args.my_img_clip}x{args.my_img_clip_scale}"
+        args.proj_dir = f"{args.proj_dir}-{args.run_name}"
+    else:
+        args.run_name = (
+            f"{args.vocab_size} ctx{args.ctx_len} L{args.n_layer} D{args.n_embd}"
+        )
+    if not os.path.exists(args.proj_dir):
+        os.makedirs(args.proj_dir)
+
+    if args.my_pile_stage > 0:
+        magic_prime_bak = args.magic_prime
+
+        if args.my_pile_shift < 0:
+            args.my_pile_shift = 0
+
+        if magic_prime_bak > 0:
+            args.magic_prime = magic_prime_bak
+        if args.my_qa_mask == 2:
+            args.epoch_count = 2 * args.magic_prime // 40320
+        else:
+            args.epoch_count = args.magic_prime // 40320
+
+        args.epoch_steps = 40320 // args.real_bsz
+        assert args.epoch_steps * args.real_bsz == 40320
+        # if args.my_pile_stage == 2:
+        #     assert args.lr_final == args.lr_init
+        if args.my_pile_stage >= 2:  # find latest saved model
+            list_p = []
+            for p in os.listdir(args.proj_dir):
+                if p.startswith("rwkv") and p.endswith(".pth"):
+                    p = ((p.split("-"))[1].split("."))[0]
+                    if p != "final":
+                        if p == "init":
+                            p = -1
+                        else:
+                            p = int(p)
+                        list_p += [p]
+            list_p.sort()
+            max_p = list_p[-1]
+            if len(list_p) > 1:
+                args.my_pile_prev_p = list_p[-2]  # in case max_p is corrupted
+            if max_p == -1:
+                args.load_model = f"{args.proj_dir}/rwkv-init.pth"
+            else:
+                args.load_model = f"{args.proj_dir}/rwkv-{max_p}.pth"
+                if args.warmup_steps < 0:
+                    if args.my_pile_stage == 2:
+                        args.warmup_steps = 10
+                    else:
+                        args.warmup_steps = 30
+            args.epoch_begin = max_p + 1
+
+    samples_per_epoch = args.epoch_steps * args.real_bsz
+    tokens_per_epoch = samples_per_epoch * args.ctx_len
+    try:
+        deepspeed_version = deepspeed.__version__
+    except:
+        deepspeed_version = None
+        pass
+    rank_zero_info(
+        f"""
+############################################################################
+#
+# RWKV-5 {args.precision.upper()} on {args.num_nodes}x{args.devices} {args.accelerator.upper()}, bsz {args.num_nodes}x{args.devices}x{args.micro_bsz}={args.real_bsz}, {args.strategy} {'with grad_cp' if args.grad_cp > 0 else ''}
+#
+# Data = {args.data_file} ({args.data_type}), ProjDir = {args.proj_dir}
+#
+# Epoch = {args.epoch_begin} to {args.epoch_begin + args.epoch_count - 1}, save every {args.epoch_save} epoch
+#
+# Each "epoch" = {args.epoch_steps} steps, {samples_per_epoch} samples, {tokens_per_epoch} tokens
+#
+# Model = {args.n_layer} n_layer, {args.n_embd} n_embd, {args.ctx_len} ctx_len
+#
+# Adam = lr {args.lr_init} to {args.lr_final}, warmup {args.warmup_steps} steps, beta {args.betas}, eps {args.adam_eps}
+#
+# Found torch {torch.__version__}, recommend 1.13.1+cu117 or newer
+# Found deepspeed {deepspeed_version}, recommend 0.7.0 (faster than newer versions)
+# Found pytorch_lightning {pl.__version__}, recommend 1.9.5
+#
+############################################################################
+"""
+    )
+    rank_zero_info(str(vars(args)) + "\n")
+
+    assert args.data_type in ["utf-8", "utf-16le", "numpy", "binidx", "dummy", "uint16"]
+
+    if args.lr_final == 0 or args.lr_init == 0:
+        rank_zero_info(
+            "\n\nNote: lr_final = 0 or lr_init = 0. Using linear LR schedule instead.\n\n"
+        )
+
+    assert args.precision in ["fp32", "tf32", "fp16", "bf16"]
+    os.environ["RWKV_FLOAT_MODE"] = args.precision
+    if args.precision == "fp32":
+        for i in range(10):
+            rank_zero_info(
+                "\n\nNote: you are using fp32 (very slow). Try bf16 / tf32 for faster training.\n\n"
+            )
+    if args.precision == "fp16":
+        rank_zero_info(
+            "\n\nNote: you are using fp16 (might overflow). Try bf16 / tf32 for stable training.\n\n"
+        )
+
+    os.environ["RWKV_JIT_ON"] = "0"
+    if "deepspeed_stage_3" in args.strategy:
+        os.environ["RWKV_JIT_ON"] = "0"
+
+    torch.backends.cudnn.benchmark = True
+    torch.backends.cudnn.enabled = True
+    if args.precision == "fp32":
+        torch.backends.cudnn.allow_tf32 = False
+        torch.backends.cuda.matmul.allow_tf32 = False
+    else:
+        torch.backends.cudnn.allow_tf32 = True
+        torch.backends.cuda.matmul.allow_tf32 = True
+
+    if "32" in args.precision:
+        args.precision = 32
+    elif args.precision == "fp16":
+        args.precision = 16
+    else:
+        args.precision = "bf16"
+
+    ########################################################################################################
+
+    from src.trainer import train_callback, generate_init_weight
+    from src.dataset import MyDataset
+
+    train_data = MyDataset(args)
+    args.vocab_size = train_data.vocab_size
+
+    from src.model import RWKV, LORA_CONFIG, LoraLinear
+
+    if args.lora:
+        assert args.lora_r > 0, "LoRA should have its `r` > 0"
+        LORA_CONFIG["r"] = args.lora_r
+        LORA_CONFIG["alpha"] = args.lora_alpha
+        LORA_CONFIG["dropout"] = args.lora_dropout
+        LORA_CONFIG["parts"] = set(str(args.lora_parts).split(","))
+        enable_time_finetune = "time" in LORA_CONFIG["parts"]
+        enable_ln_finetune = "ln" in LORA_CONFIG["parts"]
+    model = RWKV(args)
+    # only train lora parameters
+    if args.lora:
+        model.requires_grad_(False)
+        for name, module in model.named_modules():
+            if any(n.startswith("lora_") for n, _ in module.named_parameters()):
+                print(f"  LoRA additionally training module {name}")
+                for pname, param in module.named_parameters():
+                    param.requires_grad = "lora_" in pname
+            elif enable_ln_finetune and ".ln" in name:
+                print(f"  LoRA additionally training module {name}")
+                for param in module.parameters():
+                    param.requires_grad = True
+            elif enable_time_finetune and any(
+                n.startswith("time") for n, _ in module.named_parameters()
+            ):
+                for pname, param in module.named_parameters():
+                    if pname.startswith("time"):
+                        print(f"  LoRA additionally training parameter {pname}")
+                        param.requires_grad = True
+
+    if (
+        len(args.load_model) == 0 or args.my_pile_stage == 1
+    ):  # shall we build the initial weights?
+        init_weight_name = f"{args.proj_dir}/rwkv-init.pth"
+        generate_init_weight(model, init_weight_name)  # save initial weights
+        args.load_model = init_weight_name
+
+    rank_zero_info(f"########## Loading {args.load_model}... ##########")
+    try:
+        load_dict = torch.load(args.load_model, map_location="cpu")
+        load_keys = list(load_dict.keys())
+        for k in load_keys:
+            if k.startswith("_forward_module."):
+                load_dict[k.replace("_forward_module.", "")] = load_dict[k]
+                del load_dict[k]
+    except:
+        rank_zero_info(f"Bad checkpoint {args.load_model}")
+        if args.my_pile_stage >= 2:  # try again using another checkpoint
+            max_p = args.my_pile_prev_p
+            if max_p == -1:
+                args.load_model = f"{args.proj_dir}/rwkv-init.pth"
+            else:
+                args.load_model = f"{args.proj_dir}/rwkv-{max_p}.pth"
+            args.epoch_begin = max_p + 1
+            rank_zero_info(f"Trying {args.load_model}")
+            load_dict = torch.load(args.load_model, map_location="cpu")
+
+    if args.load_partial == 1:
+        load_keys = load_dict.keys()
+        for k in model.state_dict():
+            if k not in load_keys:
+                load_dict[k] = model.state_dict()[k]
+    # model.load_state_dict(load_dict)
+
+    model.load_state_dict(load_dict, strict=(not args.lora))
+    if os.path.isfile(args.lora_load):
+        model.load_state_dict(
+            torch.load(args.lora_load, map_location="cpu"), strict=False
+        )
+
+    if pl.__version__[0] == "2":
+        trainer = Trainer(
+            accelerator=args.accelerator,
+            strategy=args.strategy,
+            devices=args.devices,
+            num_nodes=args.num_nodes,
+            precision=args.precision,
+            logger=args.logger,
+            callbacks=[train_callback(args)],
+            max_epochs=args.max_epochs,
+            check_val_every_n_epoch=args.check_val_every_n_epoch,
+            num_sanity_val_steps=args.num_sanity_val_steps,
+            log_every_n_steps=args.log_every_n_steps,
+            enable_checkpointing=args.enable_checkpointing,
+            accumulate_grad_batches=args.accumulate_grad_batches,
+            gradient_clip_val=args.gradient_clip_val,
+        )
+    else:
+        trainer = Trainer.from_argparse_args(
+            args,
+            callbacks=[train_callback(args)],
+        )
+
+    if trainer.global_rank == 0:
+        for n in model.state_dict():
+            shape = model.state_dict()[n].shape
+            shape = [i for i in shape if i != 1]
+            if len(shape) > 1:
+                print(f"{str(shape[0]).ljust(5)} {str(shape[1]).ljust(5)} {n}")
+            else:
+                print(f"{str(shape[0]).ljust(5)}       {n}")
+
+    if "deepspeed" in args.strategy:
+        trainer.strategy.config["zero_optimization"]["allgather_bucket_size"] = (
+            args.ds_bucket_mb * 1000 * 1000
+        )
+        trainer.strategy.config["zero_optimization"]["reduce_bucket_size"] = (
+            args.ds_bucket_mb * 1000 * 1000
+        )
+
+    # must set shuffle=False, persistent_workers=False (because worker is in another thread)
+    data_loader = DataLoader(
+        train_data,
+        shuffle=False,
+        pin_memory=True,
+        batch_size=args.micro_bsz,
+        num_workers=1,
+        persistent_workers=False,
+        drop_last=True,
+    )
+
+    trainer.fit(model, data_loader)
--- a/finetune/requirements.txt
+++ b/finetune/requirements.txt
@@ -1,3 +1,3 @@
-torch==1.13.1
+torch==2.1.2
 pytorch_lightning==1.9.5
-deepspeed==0.11.2
+deepspeed==0.12.6
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -13,11 +13,13 @@
        "@magenta/music": "^1.23.1",
        "@microsoft/fetch-event-source": "^2.0.1",
        "@primer/octicons-react": "^19.1.0",
+        "abcjs": "^6.2.3",
        "chart.js": "^4.3.0",
        "classnames": "^2.3.2",
        "file-saver": "^2.0.5",
        "html-midi-player": "^1.5.0",
        "i18next": "^22.4.15",
+        "lodash-es": "^4.17.21",
        "mobx": "^6.9.0",
        "mobx-react-lite": "^3.4.3",
        "pdfjs-dist": "^4.0.189",
@@ -40,6 +42,7 @@
      },
      "devDependencies": {
        "@types/file-saver": "^2.0.7",
+        "@types/lodash-es": "^4.17.12",
        "@types/react": "^18.2.6",
        "@types/react-beautiful-dnd": "^13.1.4",
        "@types/react-dom": "^18.2.4",
@@ -2533,6 +2536,21 @@
        "hoist-non-react-statics": "^3.3.0"
      }
    },
+    "node_modules/@types/lodash": {
+      "version": "4.14.202",
+      "resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.14.202.tgz",
+      "integrity": "sha512-OvlIYQK9tNneDlS0VN54LLd5uiPCBOp7gS5Z0f1mjoJYBrtStzgmJBxONW3U6OZqdtNzZPmn9BS/7WI7BFFcFQ==",
+      "dev": true
+    },
+    "node_modules/@types/lodash-es": {
+      "version": "4.17.12",
+      "resolved": "https://registry.npmjs.org/@types/lodash-es/-/lodash-es-4.17.12.tgz",
+      "integrity": "sha512-0NgftHUcV4v34VhXm8QBSftKVXtbkBG3ViCjs6+eJ5a6y6Mi/jiFGPc1sC7QK+9BFhWrURE3EOggmWaSxL9OzQ==",
+      "dev": true,
+      "dependencies": {
+        "@types/lodash": "*"
+      }
+    },
    "node_modules/@types/long": {
      "version": "4.0.2",
      "resolved": "https://registry.npmjs.org/@types/long/-/long-4.0.2.tgz",
@@ -2673,6 +2691,15 @@
      "integrity": "sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==",
      "optional": true
    },
+    "node_modules/abcjs": {
+      "version": "6.2.3",
+      "resolved": "https://registry.npmjs.org/abcjs/-/abcjs-6.2.3.tgz",
+      "integrity": "sha512-epu8C1yRkxV7Ss9hS0Bu72rairl1p2sR3hviVowjtdDJvb5GRE0SrB4TtN4HBbaoYhvxGnSZQxGULfQlW3o3RQ==",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/paulrosen"
+      }
+    },
    "node_modules/acorn": {
      "version": "7.4.1",
      "resolved": "https://registry.npmjs.org/acorn/-/acorn-7.4.1.tgz",
@@ -4210,6 +4237,11 @@
      "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==",
      "dev": true
    },
+    "node_modules/lodash-es": {
+      "version": "4.17.21",
+      "resolved": "https://registry.npmjs.org/lodash-es/-/lodash-es-4.17.21.tgz",
+      "integrity": "sha512-mKnC+QJ9pWVzv+C4/U3rRsHapFfHvQFoFB92e52xeyGMcX6/OlIl78je1u8vePzYZSkkogMPJ2yjxxsb89cxyw=="
+    },
    "node_modules/long": {
      "version": "4.0.0",
      "resolved": "https://registry.npmjs.org/long/-/long-4.0.0.tgz",
@@ -6398,7 +6430,7 @@
    },
    "node_modules/typescript": {
      "version": "5.0.4",
-      "resolved": "https://registry.npmmirror.com/typescript/-/typescript-5.0.4.tgz",
+      "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.0.4.tgz",
      "integrity": "sha512-cW9T5W9xY37cc+jfEnaUvX91foxtHkza3Nw3wkoF4sSlKn0MONdkdEndig/qPBWXNkmplh3NzayQzCiHM4/hqw==",
      "dev": true,
      "bin": {
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -14,11 +14,13 @@
    "@magenta/music": "^1.23.1",
    "@microsoft/fetch-event-source": "^2.0.1",
    "@primer/octicons-react": "^19.1.0",
+    "abcjs": "^6.2.3",
    "chart.js": "^4.3.0",
    "classnames": "^2.3.2",
    "file-saver": "^2.0.5",
    "html-midi-player": "^1.5.0",
    "i18next": "^22.4.15",
+    "lodash-es": "^4.17.21",
    "mobx": "^6.9.0",
    "mobx-react-lite": "^3.4.3",
    "pdfjs-dist": "^4.0.189",
@@ -41,6 +43,7 @@
  },
  "devDependencies": {
    "@types/file-saver": "^2.0.7",
+    "@types/lodash-es": "^4.17.12",
    "@types/react": "^18.2.6",
    "@types/react-beautiful-dnd": "^13.1.4",
    "@types/react-dom": "^18.2.4",
--- a/frontend/src/_locales/ja/main.json
+++ b/frontend/src/_locales/ja/main.json
--- a/frontend/src/_locales/zh-hans/main.json
+++ b/frontend/src/_locales/zh-hans/main.json
@@ -162,7 +162,7 @@
  "Memory is not enough, try to increase the virtual memory or use a smaller model.": "内存不足，尝试增加虚拟内存，或使用一个更小规模的模型",
  "Bad PyTorch version, please reinstall PyTorch with cuda.": "错误的PyTorch版本，请重新安装CUDA版本的PyTorch",
  "The model file is corrupted, please download again.": "模型文件损坏，请重新下载",
-  "Found no NVIDIA driver, please install the latest driver.": "没有找到NVIDIA驱动，请安装最新驱动",
+  "Found no NVIDIA driver, please install the latest driver. If you are not using an Nvidia GPU, please switch the 'Strategy' to WebGPU or CPU in the Configs page.": "没有找到NVIDIA驱动，请安装最新驱动。如果你没有使用Nvidia显卡，请在配置页面将“Strategy”改为WebGPU或CPU",
  "VRAM is not enough, please reduce stored layers or use a lower precision in Configs page.": "显存不足，请在配置页面减少载入显存层数，或使用更低的精度",
  "Failed to enable custom CUDA kernel, ninja is required to load C++ extensions. You may be using the CPU version of PyTorch, please reinstall PyTorch with CUDA. Or if you are using a custom Python interpreter, you must compile the CUDA kernel by yourself or disable Custom CUDA kernel acceleration.": "自定义CUDA算子开启失败，需要安装Ninja来读取C++扩展。你可能正在使用CPU版本的PyTorch，请重新安装CUDA版本的PyTorch。如果你正在使用自定义Python解释器，你必须自己编译CUDA算子或禁用自定义CUDA算子加速",
  "Presets": "预设",
@@ -171,6 +171,10 @@
  "chinese": "中文",
  "default": "默认",
  "japanese": "日文",
+  "English": "英文",
+  "Chinese": "中文",
+  "Default": "默认",
+  "Japanese": "日文",
  "New Preset": "新建预设",
  "Import": "导入",
  "Name": "名称",
@@ -305,6 +309,7 @@
  "Loss is too high, please check the training data, and ensure your gpu driver is up to date.": "Loss过高，请检查训练数据，并确保你的显卡驱动是最新的",
  "This version of RWKV is not supported yet.": "暂不支持此版本的RWKV",
  "Main": "主干",
+  "Official": "官方",
  "Finetuned": "微调",
  "Global": "全球",
  "Local": "本地",
@@ -312,6 +317,8 @@
  "JP": "日文",
  "Music": "音乐",
  "Other": "其他",
+  "Role Play": "角色扮演",
+  "Recommended": "推荐",
  "Import MIDI": "导入MIDI",
  "Current Instrument": "当前乐器",
  "Please convert model to GGML format first": "请先将模型转换为GGML格式",
@@ -319,5 +326,26 @@
  "CPU (rwkv.cpp, Faster)": "CPU (rwkv.cpp, 更快)",
  "Play With External Player": "使用外部播放器播放",
  "Core API URL": "核心 API URL",
-  "Override core API URL(/chat/completions and /completions). If you don't know what this is, leave it blank.": "覆盖核心的 API URL (/chat/completions 和 /completions)。如果你不知道这是什么，请留空"
+  "Override core API URL(/chat/completions and /completions). If you don't know what this is, leave it blank.": "覆盖核心的 API URL (/chat/completions 和 /completions)。如果你不知道这是什么，请留空",
+  "Please change Strategy to CPU (rwkv.cpp) to use ggml format": "请将Strategy改为CPU (rwkv.cpp)以使用ggml格式",
+  "Only Auto Play Generated Content": "仅自动播放新生成的内容",
+  "Model has been converted and does not match current strategy. If you are using a new strategy, re-convert the model.": "所选模型已被转换过，并且不匹配当前的Strategy。如果你正在使用新的Strategy，请重新转换模型",
+  "Instruction 1": "指令1",
+  "Instruction 2": "指令2",
+  "Instruction 3": "指令3",
+  "Instruction: You are an expert assistant for summarizing and extracting information from given content\nGenerate a valid JSON in the following format:\n{\n    \"summary\": \"Summary of content\",\n    \"keywords\": [\"content keyword 1\", \"content keyword 2\"]\n}\n\nInput: The open-source community has introduced Eagle 7B, a new RNN model, built on the RWKV-v5 architecture. This new model has been trained on 1.1 trillion tokens and supports over 100 languages. The RWKV architecture, short for ‘Rotary Weighted Key-Value,’ is a type of architecture used in the field of artificial intelligence, particularly in natural language processing (NLP) and is a variation of the Recurrent Neural Network (RNN) architecture.\nEagle 7B promises lower inference cost and stands out as a leading 7B model in terms of environmental efficiency and language versatility.\nThe model, with its 7.52 billion parameters, shows excellent performance in multi-lingual benchmarks, setting a new standard in its category. It competes closely with larger models in English language evaluations and is distinctive as an “Attention-Free Transformer,” though it requires additional tuning for specific uses. This model is accessible under the Apache 2.0 license and can be downloaded from HuggingFace for both personal and commercial purposes.\nIn terms of multilingual performance, Eagle 7B has claimed to have achieved notable results in benchmarks covering 23 languages. Its English performance has also seen significant advancements, outperforming its predecessor, RWKV v4, and competing with top-tier models.\nWorking towards a more scalable architecture and use of data efficiently, Eagle 7B is a more inclusive AI technology, supporting a broader range of languages. This model challenges the prevailing dominance of transformer models by demonstrating the capabilities of RNNs like RWKV in achieving superior performance when trained on comparable data volumes.\nIn the RWKV model, the rotary mechanism transforms the input data in a way that helps the model better understand the position or or order of elements in a sequence. The weighted key value also makes the model efficient by retrieving the stored information from previous elements in a sequence. \nHowever, questions remain about the scalability of RWKV compared to transformers, although there is optimism regarding its potential. The team plans to include additional training, an in-depth paper on Eagle 7B, and the development of a 2T model.\n\nResponse: {": "Instruction: 你是一个专业的内容分析总结助手\n根据提供的内容生成以下格式的有效JSON信息:\n{\n    \"summary\": \"内容的简短摘要\",\n    \"keywords\": [\"内容关键词 1\", \"内容关键词 2\"]\n}\n\nInput: 开源社区推出了基于RWKV-v5架构的Eagle 7B新的RNN模型。这个新模型以1.1万亿个token进行了训练，并支持100多种语言。RWKV架构是人工智能领域中特别是自然语言处理（NLP）中使用的一种架构，它是循环神经网络（RNN）架构的一种变种。\nEagle 7B承诺低推理成本，并以其环境效益和语言灵活性在领先的7B模型中脱颖而出。\n该模型拥有75.2亿个参数，在多语言基准测试中表现出色，树立了新的行业标准。它在英语语言评估中与更大的模型竞争激烈，并作为“无注意力Transformer”独具特色，尽管它需要针对特定用途进行额外调整。该模型可在Apache 2.0许可下访问，并可从HuggingFace下载，用于个人和商业目的。\n关于多语言性能，Eagle 7B声称在涵盖23种语言的基准测试中取得了显著成绩。它的英语性能也取得了重大进步，超越了它的前身RWKV v4，并与顶级模型竞争。\n为了实现更可扩展的架构和有效利用数据，Eagle 7B是一种更包容的人工智能技术，支持更广泛的语言范围。通过展示RWKV等RNNs在训练相当数据量时实现卓越性能的能力，该模型挑战了Transformer模型的主导地位。\n在RWKV模型中，旋转机制以一种有助于模型更好地理解序列中元素的位置或顺序的方式转换输入数据。加权关键值还通过从序列中先前元素中检索存储的信息，使模型更高效。\n然而，与Transformer相比，人们对RWKV的可扩展性仍然存在疑问，尽管对其潜力持乐观态度。团队计划包括额外的训练、对Eagle 7B进行深入论文研究以及开发一个2T模型。\n\nResponse: {",
+  "Penalty Decay": "惩罚衰减",
+  "If you don't know what it is, keep it default.": "如果你不知道这是什么，保持默认",
+  "Failed to find the base model, please try to change your base model.": "未找到基底模型，请尝试更换基底模型",
+  "Markdown Renderer": "Markdown渲染",
+  "Load Conversation": "读取对话",
+  "The latest X messages will be sent to the server. If you are using the RWKV-Runner server, please use the default value because RWKV-Runner has built-in state cache management which only calculates increments. Sending all messages will have lower cost. If you are using ChatGPT, adjust this value according to your needs to reduce ChatGPT expenses.": "最近的X条消息会发送至服务器. 如果你正在使用RWKV-Runner服务器, 请使用默认值, 因为RWKV-Runner内置了state缓存管理, 只计算增量, 发送所有消息将具有更低的成本. 如果你正在使用ChatGPT, 则根据你的需要调整此值, 这可以降低ChatGPT的费用",
+  "History Message Number": "历史消息数量",
+  "Send All Message": "发送所有消息",
+  "Quantized Layers": "量化层数",
+  "Number of the neural network layers quantized with current precision, the more you quantize, the lower the VRAM usage, but the quality correspondingly decreases.": "神经网络以当前精度量化的层数, 量化越多, 占用显存越低, 但质量相应下降",
+  "Parallel Token Chunk Size": "并行Token块大小",
+  "Maximum tokens to be processed in parallel at once. For high end GPUs, this could be 64 or 128 (faster).": "一次最多可以并行处理的token数量. 对于高端显卡, 这可以是64或128 (更快)",
+  "Global Penalty": "全局惩罚",
+  "When generating a response, whether to include the submitted prompt as a penalty factor. By turning this off, you will get the same generated results as official RWKV Gradio. If you find duplicate results in the generated results, turning this on can help avoid generating duplicates.": "生成响应时, 是否将提交的prompt也纳入到惩罚项. 关闭此项将得到与RWKV官方Gradio完全一致的生成结果. 如果你发现生成结果出现重复, 那么开启此项有助于避免生成重复"
 }
--- a/frontend/src/components/MarkdownRender.tsx
+++ b/frontend/src/components/MarkdownRender.tsx
@@ -21,27 +21,93 @@ const Hyperlink: FC<any> = ({ href, children }) => {
  );
 };

-const MarkdownRender: FC<ReactMarkdownOptions> = (props) => {
+const MarkdownRender: FC<ReactMarkdownOptions & { disabled?: boolean }> = (props) => {
  return (
    <div dir="auto" className="markdown-body">
-      <ReactMarkdown
-        remarkPlugins={[remarkGfm, remarkBreaks]}
-        rehypePlugins={[
-          rehypeRaw,
-          [
-            rehypeHighlight,
-            {
-              detect: true,
-              ignoreMissing: true
-            }
-          ]
-        ]}
-        components={{
-          a: Hyperlink
-        }}
-      >
-        {props.children}
-      </ReactMarkdown>
+      {props.disabled ?
+        <div style={{ whiteSpace: 'pre-wrap' }}>
+          {props.children}
+        </div> :
+        <ReactMarkdown
+          allowedElements={[
+            'div',
+            'p',
+            'span',
+
+            'video',
+            'img',
+
+            'abbr',
+            'acronym',
+            'b',
+            'blockquote',
+            'code',
+            'em',
+            'i',
+            'li',
+            'ol',
+            'ul',
+            'strong',
+            'table',
+            'tr',
+            'td',
+            'th',
+
+            'details',
+            'summary',
+            'kbd',
+            'samp',
+            'sub',
+            'sup',
+            'ins',
+            'del',
+            'var',
+            'q',
+            'dl',
+            'dt',
+            'dd',
+            'ruby',
+            'rt',
+            'rp',
+
+            'br',
+            'hr',
+
+            'h1',
+            'h2',
+            'h3',
+            'h4',
+            'h5',
+            'h6',
+
+            'thead',
+            'tbody',
+            'tfoot',
+            'u',
+            's',
+            'a',
+            'pre',
+            'cite'
+          ]}
+          unwrapDisallowed={true}
+          remarkPlugins={[remarkGfm, remarkBreaks]}
+          rehypePlugins={[
+            rehypeRaw,
+            [
+              rehypeHighlight,
+              {
+                detect: true,
+                ignoreMissing: true
+              }
+            ]
+          ]}
+          components={{
+            a: Hyperlink
+          }}
+        >
+          {props.children}
+        </ReactMarkdown>
+      }
    </div>
  );
 };
--- a/frontend/src/components/NumberInput.tsx
+++ b/frontend/src/components/NumberInput.tsx
@@ -8,10 +8,12 @@ export const NumberInput: FC<{
  max: number,
  step?: number,
  onChange?: (ev: React.ChangeEvent<HTMLInputElement>, data: SliderOnChangeData) => void
-  style?: CSSProperties
-}> = ({ value, min, max, step, onChange, style }) => {
+  style?: CSSProperties,
+  toFixed?: number
+  disabled?: boolean
+}> = ({ value, min, max, step, onChange, style, toFixed = 2, disabled }) => {
  return (
-    <Input type="number" style={style} value={value.toString()} min={min} max={max} step={step}
+    <Input type="number" style={style} value={value.toString()} min={min} max={max} step={step} disabled={disabled}
      onChange={(e, data) => {
        onChange?.(e, { value: Number(data.value) });
      }}
@@ -22,7 +24,7 @@ export const NumberInput: FC<{
            value = Number(((
                Math.round((value - offset) / step) * step)
              + offset)
-            .toFixed(2)); // avoid precision issues
+            .toFixed(toFixed)); // avoid precision issues
          }
          onChange(e, { value: Math.max(Math.min(value, max), min) });
        }
--- a/frontend/src/components/RunButton.tsx
+++ b/frontend/src/components/RunButton.tsx
@@ -19,6 +19,7 @@ import { useNavigate } from 'react-router';
 import { WindowShow } from '../../wailsjs/runtime';
 import { convertToGGML, convertToSt } from '../utils/convert-model';
 import { Precision } from '../types/configs';
+import { defaultCompositionABCPrompt, defaultCompositionPrompt } from '../pages/defaultConfigs';

 const mainButtonText = {
  [ModelStatus.Offline]: 'Run',
@@ -48,6 +49,7 @@ export const RunButton: FC<{ onClickRun?: MouseEventHandler, iconMode?: boolean

      const modelConfig = commonStore.getCurrentModelConfig();
      const webgpu = modelConfig.modelParameters.device === 'WebGPU';
+      const webgpuPython = modelConfig.modelParameters.device === 'WebGPU (Python)';
      const cpp = modelConfig.modelParameters.device === 'CPU (rwkv.cpp)';
      let modelName = '';
      let modelPath = '';
@@ -77,7 +79,7 @@ export const RunButton: FC<{ onClickRun?: MouseEventHandler, iconMode?: boolean
        });
      };

-      if (webgpu) {
+      if (webgpu || webgpuPython) {
        if (!['.st', '.safetensors'].some(ext => modelPath.endsWith(ext))) {
          const stModelPath = modelPath.replace(/\.pth$/, '.st');
          if (await FileExists(stModelPath)) {
@@ -92,7 +94,7 @@ export const RunButton: FC<{ onClickRun?: MouseEventHandler, iconMode?: boolean
            return;
          } else {
            toastWithButton(t('Please convert model to safe tensors format first'), t('Convert'), () => {
-              convertToSt(modelConfig);
+              convertToSt(modelConfig, navigate);
            });
            commonStore.setStatus({ status: ModelStatus.Offline });
            return;
@@ -100,7 +102,7 @@ export const RunButton: FC<{ onClickRun?: MouseEventHandler, iconMode?: boolean
        }
      }

-      if (!webgpu) {
+      if (!webgpu && !webgpuPython) {
        if (['.st', '.safetensors'].some(ext => modelPath.endsWith(ext))) {
          toast(t('Please change Strategy to WebGPU to use safetensors format'), { type: 'error' });
          commonStore.setStatus({ status: ModelStatus.Offline });
@@ -138,6 +140,14 @@ export const RunButton: FC<{ onClickRun?: MouseEventHandler, iconMode?: boolean
        }
      }

+      if (!cpp) {
+        if (['.bin'].some(ext => modelPath.endsWith(ext))) {
+          toast(t('Please change Strategy to CPU (rwkv.cpp) to use ggml format'), { type: 'error' });
+          commonStore.setStatus({ status: ModelStatus.Offline });
+          return;
+        }
+      }
+
      if (!await FileExists(modelPath)) {
        showDownloadPrompt(t('Model file not found'), modelName);
        commonStore.setStatus({ status: ModelStatus.Offline });
@@ -168,7 +178,7 @@ export const RunButton: FC<{ onClickRun?: MouseEventHandler, iconMode?: boolean
      const isUsingCudaBeta = modelConfig.modelParameters.device === 'CUDA-Beta';

      startServer(commonStore.settings.customPythonPath, port, commonStore.settings.host !== '127.0.0.1' ? '0.0.0.0' : '127.0.0.1',
-        !!modelConfig.enableWebUI, isUsingCudaBeta, cpp
+        !!modelConfig.enableWebUI, isUsingCudaBeta, cpp, webgpuPython
      ).catch((e) => {
        const errMsg = e.message || e;
        if (errMsg.includes('path contains space'))
@@ -202,13 +212,15 @@ export const RunButton: FC<{ onClickRun?: MouseEventHandler, iconMode?: boolean
                temperature: modelConfig.apiParameters.temperature,
                top_p: modelConfig.apiParameters.topP,
                presence_penalty: modelConfig.apiParameters.presencePenalty,
-                frequency_penalty: modelConfig.apiParameters.frequencyPenalty
+                frequency_penalty: modelConfig.apiParameters.frequencyPenalty,
+                penalty_decay: modelConfig.apiParameters.penaltyDecay,
+                global_penalty: modelConfig.apiParameters.globalPenalty
              });
            }

            const strategy = getStrategy(modelConfig);
            let customCudaFile = '';
-            if ((modelConfig.modelParameters.device.includes('CUDA') || modelConfig.modelParameters.device === 'Custom')
+            if ((modelConfig.modelParameters.device.startsWith('CUDA') || modelConfig.modelParameters.device === 'Custom')
              && modelConfig.modelParameters.useCustomCuda
              && !strategy.split('->').some(s => ['cuda', 'fp32'].every(v => s.includes(v)))) {
              if (commonStore.platform === 'windows') {
@@ -248,6 +260,7 @@ export const RunButton: FC<{ onClickRun?: MouseEventHandler, iconMode?: boolean
                commonStore.setStatus({ status: ModelStatus.Working });
                let buttonNameMap = {
                  'novel': 'Completion',
+                  'abc': 'Composition',
                  'midi': 'Composition'
                };
                let buttonName = 'Chat';
@@ -255,8 +268,15 @@ export const RunButton: FC<{ onClickRun?: MouseEventHandler, iconMode?: boolean
                const buttonFn = () => {
                  navigate({ pathname: '/' + buttonName.toLowerCase() });
                };
+                if (modelName.toLowerCase().includes('abc') && commonStore.compositionParams.prompt === defaultCompositionPrompt) {
+                  commonStore.setCompositionParams({
+                    ...commonStore.compositionParams,
+                    prompt: defaultCompositionABCPrompt
+                  });
+                  commonStore.setCompositionSubmittedPrompt(defaultCompositionABCPrompt);
+                }

-                if ((modelConfig.modelParameters.device === 'CUDA' || modelConfig.modelParameters.device === 'CUDA-Beta') &&
+                if (modelConfig.modelParameters.device.startsWith('CUDA') &&
                  modelConfig.modelParameters.storedLayers < modelConfig.modelParameters.maxStoredLayers &&
                  commonStore.monitorData && commonStore.monitorData.totalVram !== 0 &&
                  (commonStore.monitorData.usedVram / commonStore.monitorData.totalVram) < 0.9)
@@ -271,9 +291,10 @@ export const RunButton: FC<{ onClickRun?: MouseEventHandler, iconMode?: boolean
                  'not enough memory': 'Memory is not enough, try to increase the virtual memory or use a smaller model.',
                  'not compiled with CUDA': 'Bad PyTorch version, please reinstall PyTorch with cuda.',
                  'invalid header or archive is corrupted': 'The model file is corrupted, please download again.',
-                  'no NVIDIA driver': 'Found no NVIDIA driver, please install the latest driver.',
+                  'no NVIDIA driver': 'Found no NVIDIA driver, please install the latest driver. If you are not using an Nvidia GPU, please switch the \'Strategy\' to WebGPU or CPU in the Configs page.',
                  'CUDA out of memory': 'VRAM is not enough, please reduce stored layers or use a lower precision in Configs page.',
-                  'Ninja is required to load C++ extensions': 'Failed to enable custom CUDA kernel, ninja is required to load C++ extensions. You may be using the CPU version of PyTorch, please reinstall PyTorch with CUDA. Or if you are using a custom Python interpreter, you must compile the CUDA kernel by yourself or disable Custom CUDA kernel acceleration.'
+                  'Ninja is required to load C++ extensions': 'Failed to enable custom CUDA kernel, ninja is required to load C++ extensions. You may be using the CPU version of PyTorch, please reinstall PyTorch with CUDA. Or if you are using a custom Python interpreter, you must compile the CUDA kernel by yourself or disable Custom CUDA kernel acceleration.',
+                  're-convert the model': 'Model has been converted and does not match current strategy. If you are using a new strategy, re-convert the model.'
                };
                const matchedError = Object.entries(errorsMap).find(([key, _]) => error.includes(key));
                const message = matchedError ? t(matchedError[1]) : error;
--- a/frontend/src/components/ValuedSlider.tsx
+++ b/frontend/src/components/ValuedSlider.tsx
@@ -9,8 +9,10 @@ export const ValuedSlider: FC<{
  max: number,
  step?: number,
  input?: boolean
-  onChange?: (ev: React.ChangeEvent<HTMLInputElement>, data: SliderOnChangeData) => void
-}> = ({ value, min, max, step, input, onChange }) => {
+  onChange?: (ev: React.ChangeEvent<HTMLInputElement>, data: SliderOnChangeData) => void,
+  toFixed?: number
+  disabled?: boolean
+}> = ({ value, min, max, step, input, onChange, toFixed, disabled }) => {
  const sliderRef = useRef<HTMLInputElement>(null);
  useEffect(() => {
    if (step && sliderRef.current && sliderRef.current.parentElement) {
@@ -23,9 +25,10 @@ export const ValuedSlider: FC<{
    <div className="flex items-center">
      <Slider ref={sliderRef} className="grow" style={{ minWidth: '50%' }} value={value} min={min}
        max={max} step={step}
-        onChange={onChange} />
+        onChange={onChange} disabled={disabled} />
      {input
-        ? <NumberInput style={{ minWidth: 0 }} value={value} min={min} max={max} step={step} onChange={onChange} />
+        ? <NumberInput style={{ minWidth: 0 }} value={value} min={min} max={max} step={step} onChange={onChange}
+          toFixed={toFixed} disabled={disabled} />
        : <Text>{value}</Text>}
    </div>
  );
--- a/frontend/src/pages/AudiotrackManager/AudiotrackEditor.tsx
+++ b/frontend/src/pages/AudiotrackManager/AudiotrackEditor.tsx
@@ -29,14 +29,14 @@ import {
 } from '../../types/composition';
 import { toast } from 'react-toastify';
 import {
-  absPathAsset,
  flushMidiRecordingContent,
  getMidiRawContentMainInstrument,
  getMidiRawContentTime,
  getServerRoot,
+  OpenFileDialog,
  refreshTracksTotalTime
 } from '../../utils';
-import { OpenOpenFileDialog, PlayNote } from '../../../wailsjs/go/backend_golang/App';
+import { PlayNote } from '../../../wailsjs/go/backend_golang/App';

 const snapValue = 25;
 const minimalMoveTime = 8; // 1000/125=8ms wait_events=125
@@ -471,15 +471,7 @@ const AudiotrackEditor: FC<{ setPrompt: (prompt: string) => void }> = observer((
                  return;
                }

-                OpenOpenFileDialog('*.mid').then(async filePath => {
-                  if (!filePath)
-                    return;
-
-                  let blob: Blob;
-                  if (commonStore.platform === 'web')
-                    blob = (filePath as unknown as { blob: Blob }).blob;
-                  else
-                    blob = await fetch(absPathAsset(filePath)).then(r => r.blob());
+                OpenFileDialog('*.mid').then(async blob => {
                  const bodyForm = new FormData();
                  bodyForm.append('file_data', blob);
                  fetch(getServerRoot(commonStore.getCurrentModelConfig().apiParameters.apiPort) + '/midi-to-text', {
@@ -510,8 +502,6 @@ const AudiotrackEditor: FC<{ setPrompt: (prompt: string) => void }> = observer((
                  ).catch(e => {
                    toast(t('Error') + ' - ' + (e.message || e), { type: 'error', autoClose: 2500 });
                  });
-                }).catch(e => {
-                  toast(t('Error') + ' - ' + (e.message || e), { type: 'error', autoClose: 2500 });
                });
              }}>
              {t('Import MIDI')}
--- a/frontend/src/pages/Chat.tsx
+++ b/frontend/src/pages/Chat.tsx
@@ -1,6 +1,15 @@
 import React, { FC, useCallback, useEffect, useRef, useState } from 'react';
 import { useTranslation } from 'react-i18next';
-import { Avatar, Button, Menu, MenuPopover, MenuTrigger, PresenceBadge, Textarea } from '@fluentui/react-components';
+import {
+  Avatar,
+  Button,
+  Menu,
+  MenuPopover,
+  MenuTrigger,
+  PresenceBadge,
+  Switch,
+  Textarea
+} from '@fluentui/react-components';
 import commonStore, { ModelStatus } from '../stores/commonStore';
 import { observer } from 'mobx-react-lite';
 import { v4 as uuid } from 'uuid';
@@ -17,6 +26,7 @@ import {
  Delete28Regular,
  Dismiss16Regular,
  Dismiss24Regular,
+  FolderOpenVerticalRegular,
  RecordStop28Regular,
  SaveRegular,
  TextAlignJustify24Regular,
@@ -28,13 +38,22 @@ import { toast } from 'react-toastify';
 import { WorkHeader } from '../components/WorkHeader';
 import { DialogButton } from '../components/DialogButton';
 import { OpenFileFolder, OpenOpenFileDialog, OpenSaveFileDialog } from '../../wailsjs/go/backend_golang/App';
-import { absPathAsset, bytesToReadable, getServerRoot, setActivePreset, toastWithButton } from '../utils';
+import {
+  absPathAsset,
+  bytesToReadable,
+  getServerRoot,
+  newChatConversation,
+  OpenFileDialog,
+  setActivePreset,
+  toastWithButton
+} from '../utils';
 import { useMediaQuery } from 'usehooks-ts';
-import { botName, ConversationMessage, MessageType, userName, welcomeUuid } from '../types/chat';
+import { botName, ConversationMessage, MessageType, Role, userName, welcomeUuid } from '../types/chat';
 import { Labeled } from '../components/Labeled';
 import { ValuedSlider } from '../components/ValuedSlider';
 import { PresetsButton } from './PresetsManager/PresetsButton';
 import { webOpenOpenFileDialog } from '../utils/web-file-operations';
+import { defaultPenaltyDecay } from './defaultConfigs';

 let chatSseControllers: {
  [id: string]: AbortController
@@ -136,7 +155,7 @@ const ChatMessageItem: FC<{
    >
      {!editing ?
        <div className="flex flex-col">
-          <MarkdownRender>{messageItem.content}</MarkdownRender>
+          <MarkdownRender disabled={!commonStore.chatParams.markdown}>{messageItem.content}</MarkdownRender>
          {uuid in commonStore.attachments &&
            <div className="flex grow">
              <div className="grow" />
@@ -212,7 +231,7 @@ const SidePanel: FC = observer(() => {
        onClick={() => commonStore.setSidePanelCollapsed(true)}
      />
    </div>
-    <div className="flex flex-col gap-1 overflow-x-hidden overflow-y-auto p-1">
+    <div className="flex flex-col gap-1 overflow-x-hidden overflow-y-auto p-0.5">
      <Labeled flex breakline label={t('Max Response Token')}
        desc={t('By default, the maximum number of tokens that can be answered in a single response, it can be changed by the user by specifying API parameters.')}
        content={
@@ -228,7 +247,7 @@ const SidePanel: FC = observer(() => {
      <Labeled flex breakline label={t('Temperature')}
        desc={t('Sampling temperature, it\'s like giving alcohol to a model, the higher the stronger the randomness and creativity, while the lower, the more focused and deterministic it will be.')}
        content={
-          <ValuedSlider value={params.temperature} min={0} max={2} step={0.1}
+          <ValuedSlider value={params.temperature} min={0} max={3} step={0.1}
            input
            onChange={(e, data) => {
              commonStore.setChatParams({
@@ -239,7 +258,7 @@ const SidePanel: FC = observer(() => {
      <Labeled flex breakline label={t('Top_P')}
        desc={t('Just like feeding sedatives to the model. Consider the results of the top n% probability mass, 0.1 considers the top 10%, with higher quality but more conservative, 1 considers all results, with lower quality but more diverse.')}
        content={
-          <ValuedSlider value={params.topP} min={0} max={1} step={0.1} input
+          <ValuedSlider value={params.topP} min={0} max={1} step={0.05} input
            onChange={(e, data) => {
              commonStore.setChatParams({
                topP: data.value
@@ -268,14 +287,82 @@ const SidePanel: FC = observer(() => {
              });
            }} />
        } />
+      <Labeled flex breakline
+        label={t('Penalty Decay') + (params.penaltyDecay === defaultPenaltyDecay ? ` (${t('Default')})` : '')}
+        desc={t('If you don\'t know what it is, keep it default.')}
+        content={
+          <ValuedSlider value={params.penaltyDecay!} min={0.99} max={0.999}
+            step={0.001} toFixed={3} input
+            onChange={(e, data) => {
+              commonStore.setChatParams({
+                penaltyDecay: data.value
+              });
+            }} />
+        } />
+      <Labeled flex breakline
+        label={t('History Message Number') + (params.historyN === 0 ? ` (${t('Default')})` : '')}
+        desc={params.historyN === 0 ? t('Send All Message') : t('The latest X messages will be sent to the server. If you are using the RWKV-Runner server, please use the default value because RWKV-Runner has built-in state cache management which only calculates increments. Sending all messages will have lower cost. If you are using ChatGPT, adjust this value according to your needs to reduce ChatGPT expenses.')
+        .replace('X', String(params.historyN))}
+        content={
+          <ValuedSlider value={params.historyN} min={0} max={20}
+            step={1} input
+            onChange={(e, data) => {
+              commonStore.setChatParams({
+                historyN: data.value
+              });
+            }} />
+        } />
    </div>
    <div className="grow" />
-    {/*<Button*/}
-    {/*  icon={<FolderOpenVerticalRegular />}*/}
-    {/*  onClick={() => {*/}
-    {/*  }}>*/}
-    {/*  {t('Load Conversation')}*/}
-    {/*</Button>*/}
+    <Labeled flex spaceBetween
+      label={t('Markdown Renderer')}
+      content={
+        <Switch checked={params.markdown}
+          onChange={(e, data) => {
+            commonStore.setChatParams({
+              markdown: data.checked
+            });
+          }} />
+      } />
+    <Button
+      icon={<FolderOpenVerticalRegular />}
+      onClick={() => {
+        OpenFileDialog('*.txt;*.md').then(async blob => {
+          const userNames = ['User:', 'Question:', 'Q:', 'Human:', 'Bob:'];
+          const assistantNames = ['Assistant:', 'Answer:', 'A:', 'Bot:', 'Alice:'];
+          const names = userNames.concat(assistantNames);
+          const content = await blob.text();
+          const lines = content.split('\n');
+
+          const { pushMessage, saveConversation } = newChatConversation();
+          let messageRole: Role = 'user';
+          let messageContent = '';
+          for (const [i, line] of lines.entries()) {
+            let lineName = '';
+            if (names.some(name => {
+              lineName = name;
+              return line.startsWith(name);
+            })) {
+              if (messageContent.trim())
+                pushMessage(messageRole, messageContent.trim());
+
+              if (userNames.includes(lineName))
+                messageRole = 'user';
+              else
+                messageRole = 'assistant';
+
+              messageContent = line.replace(lineName, '');
+            } else {
+              messageContent += '\n' + line;
+            }
+          }
+          if (messageContent.trim())
+            pushMessage(messageRole, messageContent.trim());
+          saveConversation();
+        });
+      }}>
+      {t('Load Conversation')}
+    </Button>
    <Button
      icon={<SaveRegular />}
      onClick={() => {
@@ -295,7 +382,7 @@ const SidePanel: FC = observer(() => {
        OpenSaveFileDialog('*.txt', 'conversation.txt', savedContent).then((path) => {
          if (path)
            toastWithButton(t('Conversation Saved'), t('Open'), () => {
-              OpenFileFolder(path, false);
+              OpenFileFolder(path);
            });
        }).catch(e => {
          toast(t('Error') + ' - ' + (e.message || e), { type: 'error', autoClose: 2500 });
@@ -444,13 +531,15 @@ const ChatPanel: FC = observer(() => {
          Authorization: `Bearer ${commonStore.settings.apiKey}`
        },
        body: JSON.stringify({
-          messages,
+          messages: messages.slice(-commonStore.chatParams.historyN),
          stream: true,
          model: commonStore.settings.apiChatModelName, // 'gpt-3.5-turbo'
+          max_tokens: commonStore.chatParams.maxResponseToken,
          temperature: commonStore.chatParams.temperature,
          top_p: commonStore.chatParams.topP,
          presence_penalty: commonStore.chatParams.presencePenalty,
          frequency_penalty: commonStore.chatParams.frequencyPenalty,
+          penalty_decay: commonStore.chatParams.penaltyDecay === defaultPenaltyDecay ? undefined : commonStore.chatParams.penaltyDecay,
          user_name: commonStore.activePreset?.userName || undefined,
          assistant_name: commonStore.activePreset?.assistantName || undefined,
          presystem: commonStore.activePreset?.presystem && undefined
@@ -519,7 +608,7 @@ const ChatPanel: FC = observer(() => {
          style={{ zIndex: 1 }}
          icon={commonStore.sidePanelCollapsed ? <TextAlignJustify24Regular /> : <TextAlignJustifyRotate9024Regular />}
          onClick={() => commonStore.setSidePanelCollapsed(!commonStore.sidePanelCollapsed)} />
-        <div ref={bodyRef} className="grow overflow-y-scroll overflow-x-hidden pr-2">
+        <div ref={bodyRef} className="grow overflow-y-auto overflow-x-hidden pr-2">
          {commonStore.conversationOrder.map(uuid =>
            <ChatMessageItem key={uuid} uuid={uuid} onSubmit={onSubmit} />
          )}
--- a/frontend/src/pages/Completion.tsx
+++ b/frontend/src/pages/Completion.tsx
@@ -188,7 +188,7 @@ const CompletionPanel: FC = observer(() => {
          <Labeled flex breakline label={t('Temperature')}
            desc={t('Sampling temperature, it\'s like giving alcohol to a model, the higher the stronger the randomness and creativity, while the lower, the more focused and deterministic it will be.')}
            content={
-              <ValuedSlider value={params.temperature} min={0} max={2} step={0.1}
+              <ValuedSlider value={params.temperature} min={0} max={3} step={0.1}
                input
                onChange={(e, data) => {
                  setParams({
@@ -199,7 +199,7 @@ const CompletionPanel: FC = observer(() => {
          <Labeled flex breakline label={t('Top_P')}
            desc={t('Just like feeding sedatives to the model. Consider the results of the top n% probability mass, 0.1 considers the top 10%, with higher quality but more conservative, 1 considers all results, with lower quality but more diverse.')}
            content={
-              <ValuedSlider value={params.topP} min={0} max={1} step={0.1} input
+              <ValuedSlider value={params.topP} min={0} max={1} step={0.05} input
                onChange={(e, data) => {
                  setParams({
                    topP: data.value
--- a/frontend/src/pages/Composition.tsx
+++ b/frontend/src/pages/Composition.tsx
@@ -15,12 +15,13 @@ import { ArrowSync20Regular, Save28Regular } from '@fluentui/react-icons';
 import { PlayerElement, VisualizerElement } from 'html-midi-player';
 import * as mm from '@magenta/music/esm/core.js';
 import { NoteSequence } from '@magenta/music/esm/protobuf.js';
-import { defaultCompositionPrompt } from './defaultConfigs';
+import { defaultCompositionABCPrompt, defaultCompositionPrompt } from './defaultConfigs';
 import {
  CloseMidiPort,
  FileExists,
  OpenFileFolder,
  OpenMidiPort,
+  OpenSaveFileDialog,
  OpenSaveFileDialogBytes,
  SaveFile,
  StartFile
@@ -36,7 +37,9 @@ const CompositionPanel: FC = observer(() => {
  const { t } = useTranslation();
  const mq = useMediaQuery('(min-width: 640px)');
  const inputRef = useRef<HTMLTextAreaElement>(null);
-  const port = commonStore.getCurrentModelConfig().apiParameters.apiPort;
+  const modelConfig = commonStore.getCurrentModelConfig();
+  const port = modelConfig.apiParameters.apiPort;
+  const isABC = modelConfig.modelParameters.modelName.toLowerCase().includes('abc');
  const visualizerRef = useRef<VisualizerElement>(null);
  const playerRef = useRef<PlayerElement>(null);

@@ -133,6 +136,13 @@ const CompositionPanel: FC = observer(() => {
  }, [commonStore.midiPorts]);

  const generateNs = (autoPlay: boolean) => {
+    if (commonStore.getCurrentModelConfig().modelParameters.modelName.toLowerCase().includes('abc')) {
+      import('abcjs').then(ABCJS => {
+        ABCJS.renderAbc('abc-paper', commonStore.compositionParams.prompt, { responsive: 'resize' });
+      });
+      return;
+    }
+
    fetch(getServerRoot(port) + '/text-to-midi', {
      method: 'POST',
      headers: {
@@ -152,10 +162,14 @@ const CompositionPanel: FC = observer(() => {
        if (autoPlay) {
          if (commonStore.compositionParams.externalPlay)
            externalPlayListener();
-          else
+          else {
+            if (commonStore.compositionParams.playOnlyGeneratedContent && playerRef.current) {
+              playerRef.current.currentTime = Math.max(commonStore.compositionParams.generationStartTime - 1, 0);
+            }
            setTimeout(() => {
              playerRef.current?.start();
            });
+          }
        }
      });
    });
@@ -261,7 +275,7 @@ const CompositionPanel: FC = observer(() => {
            <Labeled flex breakline label={t('Temperature')}
              desc={t('Sampling temperature, it\'s like giving alcohol to a model, the higher the stronger the randomness and creativity, while the lower, the more focused and deterministic it will be.')}
              content={
-                <ValuedSlider value={params.temperature} min={0} max={2} step={0.1}
+                <ValuedSlider value={params.temperature} min={0} max={3} step={0.1}
                  input
                  onChange={(e, data) => {
                    setParams({
@@ -272,7 +286,7 @@ const CompositionPanel: FC = observer(() => {
            <Labeled flex breakline label={t('Top_P')}
              desc={t('Just like feeding sedatives to the model. Consider the results of the top n% probability mass, 0.1 considers the top 10%, with higher quality but more conservative, 1 considers all results, with lower quality but more diverse.')}
              content={
-                <ValuedSlider value={params.topP} min={0} max={1} step={0.1} input
+                <ValuedSlider value={params.topP} min={0} max={1} step={0.05} input
                  onChange={(e, data) => {
                    setParams({
                      topP: data.value
@@ -314,6 +328,14 @@ const CompositionPanel: FC = observer(() => {
                autoPlay: data.checked as boolean
              });
            }} />
+            <Checkbox className="select-none"
+              size="large" label={t('Only Auto Play Generated Content')} checked={params.playOnlyGeneratedContent}
+              onChange={async (_, data) => {
+                setParams({
+                  autoPlay: data.checked as boolean || commonStore.compositionParams.autoPlay,
+                  playOnlyGeneratedContent: data.checked as boolean
+                });
+              }} />
            <Labeled flex breakline label={t('MIDI Input')}
              desc={t('Select the MIDI input device to be used.')}
              content={
@@ -358,8 +380,13 @@ const CompositionPanel: FC = observer(() => {
            <DialogButton className="grow" text={t('Reset')} title={t('Reset')}
              contentText={t('Are you sure you want to reset this page? It cannot be undone.')}
              onConfirm={() => {
-                commonStore.setCompositionSubmittedPrompt(defaultCompositionPrompt);
-                setPrompt(defaultCompositionPrompt);
+                const isABC = commonStore.getCurrentModelConfig().modelParameters.modelName.toLowerCase().includes('abc');
+                const defaultPrompt = isABC ? defaultCompositionABCPrompt : defaultCompositionPrompt;
+                commonStore.setCompositionSubmittedPrompt(defaultPrompt);
+                setParams({
+                  generationStartTime: 0
+                });
+                setPrompt(defaultPrompt);
              }} />
            <Button className="grow" appearance="primary" onClick={() => {
              if (commonStore.compositionGenerating) {
@@ -368,6 +395,9 @@ const CompositionPanel: FC = observer(() => {
                generateNs(params.autoPlay);
              } else {
                commonStore.setCompositionGenerating(true);
+                setParams({
+                  generationStartTime: playerRef.current ? playerRef.current.duration : 0
+                });
                onSubmit(params.prompt);
              }
            }}>{!commonStore.compositionGenerating ? t('Generate') : t('Stop')}</Button>
@@ -376,23 +406,38 @@ const CompositionPanel: FC = observer(() => {
      </div>
      <div className="flex flex-col">
        <div className="ml-auto mr-auto">
-          <midi-visualizer
-            ref={visualizerRef}
-            type="waterfall"
-          />
+          {isABC ? <div /> :
+            <midi-visualizer
+              ref={visualizerRef}
+              type="waterfall"
+            />}
        </div>
        <div className="flex">
-          <midi-player
-            ref={playerRef}
-            style={{ width: '100%' }}
-          />
+          {isABC ? <div className="flex flex-col overflow-y-auto grow m-1" style={{ maxHeight: '260px' }}>
+              <div id="abc-paper" />
+            </div> :
+            <midi-player
+              ref={playerRef}
+              style={{ width: '100%' }}
+            />}
          <Button icon={<Save28Regular />} size={mq ? 'large' : 'medium'} appearance={mq ? 'secondary' : 'subtle'}
            onClick={() => {
+              if (isABC) {
+                OpenSaveFileDialog('*.txt', 'abc-music.txt', commonStore.compositionParams.prompt).then((path) => {
+                  if (path)
+                    toastWithButton(t('File Saved'), t('Open'), () => {
+                      OpenFileFolder(path);
+                    });
+                }).catch((e) => {
+                  toast(t('Error') + ' - ' + (e.message || e), { type: 'error', autoClose: 2500 });
+                });
+                return;
+              }
              if (params.midi) {
                OpenSaveFileDialogBytes('*.mid', 'music.mid', Array.from(new Uint8Array(params.midi))).then((path) => {
                  if (path)
                    toastWithButton(t('File Saved'), t('Open'), () => {
-                      OpenFileFolder(path, false);
+                      OpenFileFolder(path);
                    });
                }).catch((e) => {
                  toast(t('Error') + ' - ' + (e.message || e), { type: 'error', autoClose: 2500 });
--- a/frontend/src/pages/Configs.tsx
+++ b/frontend/src/pages/Configs.tsx
@@ -35,6 +35,7 @@ import { ResetConfigsButton } from '../components/ResetConfigsButton';
 import { useMediaQuery } from 'usehooks-ts';
 import { ApiParameters, Device, ModelParameters, Precision } from '../types/configs';
 import { convertModel, convertToGGML, convertToSt } from '../utils/convert-model';
+import { defaultPenaltyDecay } from './defaultConfigs';

 const ConfigSelector: FC<{
  selectedIndex: number,
@@ -66,14 +67,17 @@ const Configs: FC = observer(() => {
  const [selectedIndex, setSelectedIndex] = React.useState(commonStore.currentModelConfigIndex);
  const [selectedConfig, setSelectedConfig] = React.useState(commonStore.modelConfigs[selectedIndex]);
  const [displayStrategyImg, setDisplayStrategyImg] = React.useState(false);
-  const advancedHeaderRef = useRef<HTMLDivElement>(null);
+  const advancedHeaderRef1 = useRef<HTMLDivElement>(null);
+  const advancedHeaderRef2 = useRef<HTMLDivElement>(null);
  const mq = useMediaQuery('(min-width: 640px)');
  const navigate = useNavigate();
  const port = selectedConfig.apiParameters.apiPort;

  useEffect(() => {
-    if (advancedHeaderRef.current)
-      (advancedHeaderRef.current.firstElementChild as HTMLElement).style.padding = '0';
+    if (advancedHeaderRef1.current)
+      (advancedHeaderRef1.current.firstElementChild as HTMLElement).style.padding = '0';
+    if (advancedHeaderRef2.current)
+      (advancedHeaderRef2.current.firstElementChild as HTMLElement).style.padding = '0';
  }, []);

  const updateSelectedIndex = useCallback((newIndex: number) => {
@@ -113,7 +117,9 @@ const Configs: FC = observer(() => {
      temperature: selectedConfig.apiParameters.temperature,
      top_p: selectedConfig.apiParameters.topP,
      presence_penalty: selectedConfig.apiParameters.presencePenalty,
-      frequency_penalty: selectedConfig.apiParameters.frequencyPenalty
+      frequency_penalty: selectedConfig.apiParameters.frequencyPenalty,
+      penalty_decay: selectedConfig.apiParameters.penaltyDecay,
+      global_penalty: selectedConfig.apiParameters.globalPenalty
    });
    toast(t('Config Saved'), { autoClose: 300, type: 'success' });
  };
@@ -176,7 +182,7 @@ const Configs: FC = observer(() => {
                <Labeled label={t('Temperature') + ' *'}
                  desc={t('Sampling temperature, it\'s like giving alcohol to a model, the higher the stronger the randomness and creativity, while the lower, the more focused and deterministic it will be.')}
                  content={
-                    <ValuedSlider value={selectedConfig.apiParameters.temperature} min={0} max={2} step={0.1}
+                    <ValuedSlider value={selectedConfig.apiParameters.temperature} min={0} max={3} step={0.1}
                      input
                      onChange={(e, data) => {
                        setSelectedConfigApiParams({
@@ -187,35 +193,74 @@ const Configs: FC = observer(() => {
                <Labeled label={t('Top_P') + ' *'}
                  desc={t('Just like feeding sedatives to the model. Consider the results of the top n% probability mass, 0.1 considers the top 10%, with higher quality but more conservative, 1 considers all results, with lower quality but more diverse.')}
                  content={
-                    <ValuedSlider value={selectedConfig.apiParameters.topP} min={0} max={1} step={0.1} input
+                    <ValuedSlider value={selectedConfig.apiParameters.topP} min={0} max={1} step={0.05} input
                      onChange={(e, data) => {
                        setSelectedConfigApiParams({
                          topP: data.value
                        });
                      }} />
                  } />
-                <Labeled label={t('Presence Penalty') + ' *'}
-                  desc={t('Positive values penalize new tokens based on whether they appear in the text so far, increasing the model\'s likelihood to talk about new topics.')}
-                  content={
-                    <ValuedSlider value={selectedConfig.apiParameters.presencePenalty} min={-2} max={2}
-                      step={0.1} input
-                      onChange={(e, data) => {
-                        setSelectedConfigApiParams({
-                          presencePenalty: data.value
-                        });
-                      }} />
-                  } />
-                <Labeled label={t('Frequency Penalty') + ' *'}
-                  desc={t('Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model\'s likelihood to repeat the same line verbatim.')}
-                  content={
-                    <ValuedSlider value={selectedConfig.apiParameters.frequencyPenalty} min={-2} max={2}
-                      step={0.1} input
-                      onChange={(e, data) => {
-                        setSelectedConfigApiParams({
-                          frequencyPenalty: data.value
-                        });
-                      }} />
-                  } />
+                <Accordion className="sm:col-span-2" collapsible
+                  openItems={!commonStore.apiParamsCollapsed && 'advanced'}
+                  onToggle={(e, data) => {
+                    if (data.value === 'advanced')
+                      commonStore.setApiParamsCollapsed(!commonStore.apiParamsCollapsed);
+                  }}>
+                  <AccordionItem value="advanced">
+                    <AccordionHeader ref={advancedHeaderRef1} size="small">{t('Advanced')}</AccordionHeader>
+                    <AccordionPanel>
+                      <div className="grid grid-cols-1 sm:grid-cols-2 gap-2">
+                        <Labeled label={t('Presence Penalty') + ' *'}
+                          desc={t('Positive values penalize new tokens based on whether they appear in the text so far, increasing the model\'s likelihood to talk about new topics.')}
+                          content={
+                            <ValuedSlider value={selectedConfig.apiParameters.presencePenalty} min={-2} max={2}
+                              step={0.1} input
+                              onChange={(e, data) => {
+                                setSelectedConfigApiParams({
+                                  presencePenalty: data.value
+                                });
+                              }} />
+                          } />
+                        <Labeled label={t('Frequency Penalty') + ' *'}
+                          desc={t('Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model\'s likelihood to repeat the same line verbatim.')}
+                          content={
+                            <ValuedSlider value={selectedConfig.apiParameters.frequencyPenalty} min={-2} max={2}
+                              step={0.1} input
+                              onChange={(e, data) => {
+                                setSelectedConfigApiParams({
+                                  frequencyPenalty: data.value
+                                });
+                              }} />
+                          } />
+                        <Labeled
+                          label={t('Penalty Decay')
+                            + ((!selectedConfig.apiParameters.penaltyDecay || selectedConfig.apiParameters.penaltyDecay === defaultPenaltyDecay)
+                              ? ` (${t('Default')})` : '')
+                            + ' *'}
+                          desc={t('If you don\'t know what it is, keep it default.')}
+                          content={
+                            <ValuedSlider value={selectedConfig.apiParameters.penaltyDecay || defaultPenaltyDecay}
+                              min={0.99} max={0.999} step={0.001} toFixed={3} input
+                              onChange={(e, data) => {
+                                setSelectedConfigApiParams({
+                                  penaltyDecay: data.value
+                                });
+                              }} />
+                          } />
+                        <Labeled label={t('Global Penalty') + ' *'}
+                          desc={t('When generating a response, whether to include the submitted prompt as a penalty factor. By turning this off, you will get the same generated results as official RWKV Gradio. If you find duplicate results in the generated results, turning this on can help avoid generating duplicates.')}
+                          content={
+                            <Switch checked={selectedConfig.apiParameters.globalPenalty}
+                              onChange={(e, data) => {
+                                setSelectedConfigApiParams({
+                                  globalPenalty: data.checked
+                                });
+                              }} />
+                          } />
+                      </div>
+                    </AccordionPanel>
+                  </AccordionItem>
+                </Accordion>
              </div>
            }
          />
@@ -228,9 +273,18 @@ const Configs: FC = observer(() => {
                    <Select style={{ minWidth: 0 }} className="grow"
                      value={selectedConfig.modelParameters.modelName}
                      onChange={(e, data) => {
-                        setSelectedConfigModelParams({
-                          modelName: data.value
-                        });
+                        const modelSource = commonStore.modelSourceList.find(item => item.name === data.value);
+                        if (modelSource?.customTokenizer)
+                          setSelectedConfigModelParams({
+                            modelName: data.value,
+                            useCustomTokenizer: true,
+                            customTokenizer: modelSource?.customTokenizer
+                          });
+                        else // prevent customTokenizer from being overwritten
+                          setSelectedConfigModelParams({
+                            modelName: data.value,
+                            useCustomTokenizer: false
+                          });
                      }}>
                      {!commonStore.modelSourceList.find(item => item.name === selectedConfig.modelParameters.modelName)?.isComplete
                        && <option key={-1}
@@ -246,7 +300,7 @@ const Configs: FC = observer(() => {
                  </div>
                } />
                {
-                  selectedConfig.modelParameters.device !== 'WebGPU' ?
+                  !selectedConfig.modelParameters.device.startsWith('WebGPU') ?
                    (selectedConfig.modelParameters.device !== 'CPU (rwkv.cpp)' ?
                      <ToolTipButton text={t('Convert')}
                        desc={t('Convert model with these configs. Using a converted model will greatly improve the loading speed, but model parameters of the converted model cannot be modified.')}
@@ -256,7 +310,7 @@ const Configs: FC = observer(() => {
                        onClick={() => convertToGGML(selectedConfig, navigate)} />)
                    : <ToolTipButton text={t('Convert To Safe Tensors Format')}
                      desc=""
-                      onClick={() => convertToSt(selectedConfig)} />
+                      onClick={() => convertToSt(selectedConfig, navigate)} />
                }
                <Labeled label={t('Strategy')} content={
                  <Dropdown style={{ minWidth: 0 }} className="grow" value={t(selectedConfig.modelParameters.device)!}
@@ -270,10 +324,11 @@ const Configs: FC = observer(() => {
                    }}>
                    <Option value="CPU">CPU</Option>
                    <Option value="CPU (rwkv.cpp)">{t('CPU (rwkv.cpp, Faster)')!}</Option>
-                    {commonStore.platform === 'darwin' && <Option value="MPS">MPS</Option>}
+                    {/*{commonStore.platform === 'darwin' && <Option value="MPS">MPS</Option>}*/}
                    <Option value="CUDA">CUDA</Option>
-                    <Option value="CUDA-Beta">{t('CUDA (Beta, Faster)')!}</Option>
+                    {/*<Option value="CUDA-Beta">{t('CUDA (Beta, Faster)')!}</Option>*/}
                    <Option value="WebGPU">WebGPU</Option>
+                    <Option value="WebGPU (Python)">WebGPU (Python)</Option>
                    <Option value="Custom">{t('Custom')!}</Option>
                  </Dropdown>
                } />
@@ -281,7 +336,8 @@ const Configs: FC = observer(() => {
                  selectedConfig.modelParameters.device !== 'Custom' && <Labeled label={t('Precision')}
                    desc={t('int8 uses less VRAM, but has slightly lower quality. fp16 has higher quality.')}
                    content={
-                      <Dropdown style={{ minWidth: 0 }} className="grow"
+                      <Dropdown
+                        style={{ minWidth: 0 }} className="grow"
                        value={selectedConfig.modelParameters.precision}
                        selectedOptions={[selectedConfig.modelParameters.precision]}
                        onOptionSelect={(_, data) => {
@@ -294,20 +350,20 @@ const Configs: FC = observer(() => {
                        {selectedConfig.modelParameters.device !== 'CPU' && selectedConfig.modelParameters.device !== 'MPS' &&
                          <Option>fp16</Option>}
                        {selectedConfig.modelParameters.device !== 'CPU (rwkv.cpp)' && <Option>int8</Option>}
-                        {selectedConfig.modelParameters.device === 'WebGPU' && <Option>nf4</Option>}
-                        {selectedConfig.modelParameters.device !== 'CPU (rwkv.cpp)' && selectedConfig.modelParameters.device !== 'WebGPU' &&
+                        {selectedConfig.modelParameters.device.startsWith('WebGPU') && <Option>nf4</Option>}
+                        {selectedConfig.modelParameters.device !== 'CPU (rwkv.cpp)' && !selectedConfig.modelParameters.device.startsWith('WebGPU') &&
                          <Option>fp32</Option>}
                        {selectedConfig.modelParameters.device === 'CPU (rwkv.cpp)' && <Option>Q5_1</Option>}
                      </Dropdown>
                    } />
                }
                {
-                  selectedConfig.modelParameters.device.includes('CUDA') &&
+                  selectedConfig.modelParameters.device.startsWith('CUDA') &&
                  <Labeled label={t('Current Strategy')}
                    content={<Text> {getStrategy(selectedConfig)} </Text>} />
                }
                {
-                  selectedConfig.modelParameters.device.includes('CUDA') &&
+                  selectedConfig.modelParameters.device.startsWith('CUDA') &&
                  <Labeled label={t('Stored Layers')}
                    desc={t('Number of the neural network layers loaded into VRAM, the more you load, the faster the speed, but it consumes more VRAM. (If your VRAM is not enough, it will fail to load)')}
                    content={
@@ -320,7 +376,41 @@ const Configs: FC = observer(() => {
                        }} />
                    } />
                }
-                {selectedConfig.modelParameters.device.includes('CUDA') && <div />}
+                {
+                  selectedConfig.modelParameters.device.startsWith('WebGPU') &&
+                  <Labeled label={t('Parallel Token Chunk Size')}
+                    desc={t('Maximum tokens to be processed in parallel at once. For high end GPUs, this could be 64 or 128 (faster).')}
+                    content={
+                      <ValuedSlider
+                        value={selectedConfig.modelParameters.tokenChunkSize || 32}
+                        min={16} max={256} step={16} input
+                        onChange={(e, data) => {
+                          setSelectedConfigModelParams({
+                            tokenChunkSize: data.value
+                          });
+                        }} />
+                    } />
+                }
+                {
+                  selectedConfig.modelParameters.device.startsWith('WebGPU') &&
+                  <Labeled label={t('Quantized Layers')}
+                    desc={t('Number of the neural network layers quantized with current precision, the more you quantize, the lower the VRAM usage, but the quality correspondingly decreases.')}
+                    content={
+                      <ValuedSlider
+                        disabled={selectedConfig.modelParameters.precision !== 'int8' && selectedConfig.modelParameters.precision !== 'nf4'}
+                        value={selectedConfig.modelParameters.precision === 'int8' ? (selectedConfig.modelParameters.quantizedLayers || 31) :
+                          selectedConfig.modelParameters.precision === 'nf4' ? (selectedConfig.modelParameters.quantizedLayers || 26) :
+                            selectedConfig.modelParameters.maxStoredLayers
+                        } min={0}
+                        max={selectedConfig.modelParameters.maxStoredLayers} step={1} input
+                        onChange={(e, data) => {
+                          setSelectedConfigModelParams({
+                            quantizedLayers: data.value
+                          });
+                        }} />
+                    } />
+                }
+                {selectedConfig.modelParameters.device.startsWith('CUDA') && <div />}
                {
                  displayStrategyImg &&
                  <img style={{ width: '80vh', height: 'auto', zIndex: 100 }}
@@ -345,7 +435,7 @@ const Configs: FC = observer(() => {
                }
                {selectedConfig.modelParameters.device === 'Custom' && <div />}
                {
-                  (selectedConfig.modelParameters.device.includes('CUDA') || selectedConfig.modelParameters.device === 'Custom') &&
+                  (selectedConfig.modelParameters.device.startsWith('CUDA') || selectedConfig.modelParameters.device === 'Custom') &&
                  <Labeled label={t('Use Custom CUDA kernel to Accelerate')}
                    desc={t('Enabling this option can greatly improve inference speed and save some VRAM, but there may be compatibility issues (output garbled). If it fails to start, please turn off this option, or try to upgrade your gpu driver.')}
                    content={
@@ -365,7 +455,7 @@ const Configs: FC = observer(() => {
                        commonStore.setModelParamsCollapsed(!commonStore.modelParamsCollapsed);
                    }}>
                    <AccordionItem value="advanced">
-                      <AccordionHeader ref={advancedHeaderRef} size="small">{t('Advanced')}</AccordionHeader>
+                      <AccordionHeader ref={advancedHeaderRef2} size="small">{t('Advanced')}</AccordionHeader>
                      <AccordionPanel>
                        <div className="flex flex-col">
                          <div className="flex grow">
@@ -394,6 +484,7 @@ const Configs: FC = observer(() => {
              </div>
            }
          />
+          {mq && <div style={{ minHeight: '30px' }} />}
        </div>
        <div className="flex flex-row-reverse sm:fixed bottom-2 right-2">
          <div className="flex gap-2">
--- a/frontend/src/pages/Downloads.tsx
+++ b/frontend/src/pages/Downloads.tsx
@@ -67,7 +67,7 @@ const Downloads: FC = observer(() => {
                        AddToDownloadList(status.path, status.url);
                    }} />}
                <ToolTipButton desc={t('Open Folder')} icon={<Folder20Regular />} onClick={() => {
-                  OpenFileFolder(status.path, false);
+                  OpenFileFolder(status.path);
                }} />
              </div>
            </Field>
--- a/frontend/src/pages/Models.tsx
+++ b/frontend/src/pages/Models.tsx
@@ -132,7 +132,7 @@ const columns: TableColumnDefinition<ModelSourceItem>[] = [
            {
              item.isComplete &&
              <ToolTipButton desc={t('Open Folder')} icon={<Folder20Regular />} onClick={() => {
-                OpenFileFolder(`${commonStore.settings.customModelsPath}/${item.name}`, true);
+                OpenFileFolder(`${commonStore.settings.customModelsPath}/${item.name}`);
              }} />
            }
            {item.downloadUrl && !item.isComplete &&
@@ -153,23 +153,32 @@ const columns: TableColumnDefinition<ModelSourceItem>[] = [
  })
 ];

+const getTags = () => {
+  return Array.from(new Set(
+    ['Recommended', 'Official',
+      ...commonStore.modelSourceList.map(item => item.tags || []).flat()
+      .filter(i => !i.includes('Other') && !i.includes('Local'))
+      , 'Other', 'Local']));
+};
+
+const getCurrentModelList = () => {
+  if (commonStore.activeModelListTags.length === 0)
+    return commonStore.modelSourceList;
+  else
+    return commonStore.modelSourceList.filter(item => commonStore.activeModelListTags.some(tag => item.tags?.includes(tag)));
+};
+
 const Models: FC = observer(() => {
  const { t } = useTranslation();
-  const [tags, setTags] = useState<Array<string>>([]);
-  const [modelSourceList, setModelSourceList] = useState<ModelSourceItem[]>(commonStore.modelSourceList);
+  const [tags, setTags] = useState<Array<string>>(getTags());
+  const [modelSourceList, setModelSourceList] = useState<ModelSourceItem[]>(getCurrentModelList());

  useEffect(() => {
-    setTags(Array.from(new Set(
-      [...commonStore.modelSourceList.map(item => item.tags || []).flat()
-      .filter(i => !i.includes('Other') && !i.includes('Local'))
-        , 'Other', 'Local'])));
+    setTags(getTags());
  }, [commonStore.modelSourceList]);

  useEffect(() => {
-    if (commonStore.activeModelListTags.length === 0)
-      setModelSourceList(commonStore.modelSourceList);
-    else
-      setModelSourceList(commonStore.modelSourceList.filter(item => commonStore.activeModelListTags.some(tag => item.tags?.includes(tag))));
+    setModelSourceList(getCurrentModelList());
  }, [commonStore.modelSourceList, commonStore.activeModelListTags]);

  return (
--- a/frontend/src/pages/Settings.tsx
+++ b/frontend/src/pages/Settings.tsx
@@ -272,18 +272,16 @@ const Settings: FC = observer(() => {
                    <AccordionHeader ref={advancedHeaderRef} size="large">{t('Advanced')}</AccordionHeader>
                    <AccordionPanel>
                      <div className="flex flex-col gap-2 overflow-hidden">
-                        {commonStore.platform !== 'darwin' &&
-                          <Labeled label={t('Custom Models Path')}
-                            content={
-                              <Input className="grow" placeholder="./models"
-                                value={commonStore.settings.customModelsPath}
-                                onChange={(e, data) => {
-                                  commonStore.setSettings({
-                                    customModelsPath: data.value
-                                  });
-                                }} />
-                            } />
-                        }
+                        <Labeled label={t('Custom Models Path')}
+                          content={
+                            <Input className="grow" placeholder="./models"
+                              value={commonStore.settings.customModelsPath}
+                              onChange={(e, data) => {
+                                commonStore.setSettings({
+                                  customModelsPath: data.value
+                                });
+                              }} />
+                          } />
                        <Labeled label={t('Custom Python Path')} // if set, will not use precompiled cuda kernel
                          content={
                            <Input className="grow" placeholder="./py310/python"
--- a/frontend/src/pages/Train.tsx
+++ b/frontend/src/pages/Train.tsx
@@ -130,8 +130,9 @@ const showError = (e: any) => {
  }
 };

+// error key should be lowercase
 const errorsMap = Object.entries({
-  'python3 ./finetune/lora/train.py': 'Memory is not enough, try to increase the virtual memory (Swap of WSL) or use a smaller base model.',
+  ['python3 ./finetune/lora/$modelInfo'.toLowerCase()]: 'Memory is not enough, try to increase the virtual memory (Swap of WSL) or use a smaller base model.',
  'cuda out of memory': 'VRAM is not enough',
  'valueerror: high <= 0': 'Training data is not enough, reduce context length or add more data for training',
  '+= \'+ptx\'': 'Can not find an Nvidia GPU. Perhaps the gpu driver of windows is too old, or you are using WSL 1 for training, please upgrade to WSL 2. e.g. Run "wsl --set-version Ubuntu-22.04 2"',
@@ -140,6 +141,7 @@ const errorsMap = Object.entries({
  'unsupported gpu architecture': 'Matched CUDA is not installed',
  'error building extension \'fused_adam\'': 'Matched CUDA is not installed',
  'rwkv{version} is not supported': 'This version of RWKV is not supported yet.',
+  'no such file': 'Failed to find the base model, please try to change your base model.',
  'modelinfo is invalid': 'Failed to load model, try to increase the virtual memory (Swap of WSL) or use a smaller base model.'
 });

@@ -299,7 +301,6 @@ const LoraFinetune: FC = observer(() => {
          (loraParams.baseModel ? `--load_model models/${loraParams.baseModel} ` : '') +
          (loraParams.loraLoad ? `--lora_load lora-models/${loraParams.loraLoad} ` : '') +
          `--data_file ${convertedDataPath} ` +
-          `--vocab_size ${loraParams.baseModel.toLowerCase().includes('world') ? '65536' : '50277'} ` +
          `--ctx_len ${ctxLen} --epoch_steps ${loraParams.epochSteps} --epoch_count ${loraParams.epochCount} ` +
          `--epoch_begin ${loraParams.epochBegin} --epoch_save ${loraParams.epochSave} ` +
          `--micro_bsz ${loraParams.microBsz} --accumulate_grad_batches ${loraParams.accumGradBatches} ` +
@@ -398,7 +399,7 @@ const LoraFinetune: FC = observer(() => {
                    'Even for multi-turn conversations, they must be written in a single line using `\\n` to indicate line breaks. ' +
                    'If they are different dialogues or topics, they should be written in separate lines.')} />
                <ToolTipButton desc={t('Open Folder')} icon={<Folder20Regular />} onClick={() => {
-                  OpenFileFolder(dataParams.dataPath, false);
+                  OpenFileFolder(dataParams.dataPath);
                }} />
              </div>
              <div className="flex gap-2 items-center">
@@ -418,7 +419,8 @@ const LoraFinetune: FC = observer(() => {
                    outputPrefix,
                    dataParams.vocabPath).then(async () => {
                    if (!await FileExists(outputPrefix + '_text_document.idx')) {
-                      toast(t('Failed to convert data') + ' - ' + await GetPyError(), { type: 'error' });
+                      if (commonStore.platform === 'windows' || commonStore.platform === 'linux')
+                        toast(t('Failed to convert data') + ' - ' + await GetPyError(), { type: 'error' });
                    } else {
                      toast(t('Convert Data successfully'), { type: 'success' });
                    }
--- a/frontend/src/pages/defaultConfigs.ts
+++ b/frontend/src/pages/defaultConfigs.ts
@@ -1,24 +1,40 @@
 import { CompletionPreset } from '../types/completion';
 import { ModelConfig } from '../types/configs';

+export const defaultPenaltyDecay = 0.996;
+
 export const defaultCompositionPrompt = '<pad>';
+export const defaultCompositionABCPrompt = 'S:3\n' +
+  'B:9\n' +
+  'E:4\n' +
+  'B:9\n' +
+  'E:4\n' +
+  'E:4\n' +
+  'B:9\n' +
+  'L:1/8\n' +
+  'M:3/4\n' +
+  'K:D\n' +
+  ' Bc |"G" d2 cB"A" A2 FE |"Bm" F2 B4 F^G |';

 export const defaultPresets: CompletionPreset[] = [{
  name: 'Writer',
-  prompt: 'The following is an epic science fiction masterpiece that is immortalized, with delicate descriptions and grand depictions of interstellar civilization wars.\nChapter 1.\n',
+  prompt: 'The following is an epic science fiction masterpiece that is immortalized, with delicate descriptions and grand depictions of interstellar civilization wars.\n' +
+    'Chapter 1.\n',
  params: {
    maxResponseToken: 500,
-    temperature: 1.2,
-    topP: 0.5,
-    presencePenalty: 0.4,
-    frequencyPenalty: 0.4,
+    temperature: 1,
+    topP: 0.3,
+    presencePenalty: 0,
+    frequencyPenalty: 1,
    stop: '\\n\\nUser',
    injectStart: '',
    injectEnd: ''
  }
 }, {
  name: 'Translator',
-  prompt: 'Translate this into Chinese.\n\nEnglish: What rooms do you have available?',
+  prompt: 'Translate this into Chinese.\n' +
+    '\n' +
+    'English: What rooms do you have available?',
  params: {
    maxResponseToken: 500,
    temperature: 1,
@@ -31,7 +47,13 @@ export const defaultPresets: CompletionPreset[] = [{
  }
 }, {
  name: 'Catgirl',
-  prompt: 'The following is a conversation between a cat girl and her owner. The cat girl is a humanized creature that behaves like a cat but is humanoid. At the end of each sentence in the dialogue, she will add \"Meow~\". In the following content, User represents the owner and Assistant represents the cat girl.\n\nUser: Hello.\n\nAssistant: I\'m here, meow~.\n\nUser: Can you tell jokes?',
+  prompt: 'The following is a conversation between a cat girl and her owner. The cat girl is a humanized creature that behaves like a cat but is humanoid. At the end of each sentence in the dialogue, she will add "Meow~". In the following content, User represents the owner and Assistant represents the cat girl.\n' +
+    '\n' +
+    'User: Hello.\n' +
+    '\n' +
+    'Assistant: I\'m here, meow~.\n' +
+    '\n' +
+    'User: Can you tell jokes?',
  params: {
    maxResponseToken: 500,
    temperature: 1.2,
@@ -70,7 +92,15 @@ export const defaultPresets: CompletionPreset[] = [{
  }
 }, {
  name: 'Werewolf',
-  prompt: 'There is currently a game of Werewolf with six players, including a Seer (who can check identities at night), two Werewolves (who can choose someone to kill at night), a Bodyguard (who can choose someone to protect at night), two Villagers (with no special abilities), and a game host. User will play as Player 1, Assistant will play as Players 2-6 and the game host, and they will begin playing together. Every night, the host will ask User for his action and simulate the actions of the other players. During the day, the host will oversee the voting process and ask User for his vote. \n\nAssistant: Next, I will act as the game host and assign everyone their roles, including randomly assigning yours. Then, I will simulate the actions of Players 2-6 and let you know what happens each day. Based on your assigned role, you can tell me your actions and I will let you know the corresponding results each day.\n\nUser: Okay, I understand. Let\'s begin. Please assign me a role. Am I the Seer, Werewolf, Villager, or Bodyguard?\n\nAssistant: You are the Seer. Now that night has fallen, please choose a player to check his identity.\n\nUser: Tonight, I want to check Player 2 and find out his role.',
+  prompt: 'There is currently a game of Werewolf with six players, including a Seer (who can check identities at night), two Werewolves (who can choose someone to kill at night), a Bodyguard (who can choose someone to protect at night), two Villagers (with no special abilities), and a game host. User will play as Player 1, Assistant will play as Players 2-6 and the game host, and they will begin playing together. Every night, the host will ask User for his action and simulate the actions of the other players. During the day, the host will oversee the voting process and ask User for his vote. \n' +
+    '\n' +
+    'Assistant: Next, I will act as the game host and assign everyone their roles, including randomly assigning yours. Then, I will simulate the actions of Players 2-6 and let you know what happens each day. Based on your assigned role, you can tell me your actions and I will let you know the corresponding results each day.\n' +
+    '\n' +
+    'User: Okay, I understand. Let\'s begin. Please assign me a role. Am I the Seer, Werewolf, Villager, or Bodyguard?\n' +
+    '\n' +
+    'Assistant: You are the Seer. Now that night has fallen, please choose a player to check his identity.\n' +
+    '\n' +
+    'User: Tonight, I want to check Player 2 and find out his role.',
  params: {
    maxResponseToken: 500,
    temperature: 1.2,
@@ -82,8 +112,64 @@ export const defaultPresets: CompletionPreset[] = [{
    injectEnd: '\\n\\nUser: '
  }
 }, {
-  name: 'Instruction',
-  prompt: 'Instruction: Write a story using the following information\n\nInput: A man named Alex chops a tree down\n\nResponse:',
+  name: 'Instruction 1',
+  prompt: 'Instruction: Write a story using the following information\n' +
+    '\n' +
+    'Input: A man named Alex chops a tree down\n' +
+    '\n' +
+    'Response:',
+  params: {
+    maxResponseToken: 500,
+    temperature: 1,
+    topP: 0.3,
+    presencePenalty: 0,
+    frequencyPenalty: 1,
+    stop: '',
+    injectStart: '',
+    injectEnd: ''
+  }
+}, {
+  name: 'Instruction 2',
+  prompt: 'Instruction: You are an expert assistant for summarizing and extracting information from given content\n' +
+    'Generate a valid JSON in the following format:\n' +
+    '{\n' +
+    '    "summary": "Summary of content",\n' +
+    '    "keywords": ["content keyword 1", "content keyword 2"]\n' +
+    '}\n' +
+    '\n' +
+    'Input: The open-source community has introduced Eagle 7B, a new RNN model, built on the RWKV-v5 architecture. This new model has been trained on 1.1 trillion tokens and supports over 100 languages. The RWKV architecture, short for ‘Rotary Weighted Key-Value,’ is a type of architecture used in the field of artificial intelligence, particularly in natural language processing (NLP) and is a variation of the Recurrent Neural Network (RNN) architecture.\n' +
+    'Eagle 7B promises lower inference cost and stands out as a leading 7B model in terms of environmental efficiency and language versatility.\n' +
+    'The model, with its 7.52 billion parameters, shows excellent performance in multi-lingual benchmarks, setting a new standard in its category. It competes closely with larger models in English language evaluations and is distinctive as an “Attention-Free Transformer,” though it requires additional tuning for specific uses. This model is accessible under the Apache 2.0 license and can be downloaded from HuggingFace for both personal and commercial purposes.\n' +
+    'In terms of multilingual performance, Eagle 7B has claimed to have achieved notable results in benchmarks covering 23 languages. Its English performance has also seen significant advancements, outperforming its predecessor, RWKV v4, and competing with top-tier models.\n' +
+    'Working towards a more scalable architecture and use of data efficiently, Eagle 7B is a more inclusive AI technology, supporting a broader range of languages. This model challenges the prevailing dominance of transformer models by demonstrating the capabilities of RNNs like RWKV in achieving superior performance when trained on comparable data volumes.\n' +
+    'In the RWKV model, the rotary mechanism transforms the input data in a way that helps the model better understand the position or or order of elements in a sequence. The weighted key value also makes the model efficient by retrieving the stored information from previous elements in a sequence. \n' +
+    'However, questions remain about the scalability of RWKV compared to transformers, although there is optimism regarding its potential. The team plans to include additional training, an in-depth paper on Eagle 7B, and the development of a 2T model.\n' +
+    '\n' +
+    'Response: {',
+  params: {
+    maxResponseToken: 500,
+    temperature: 1,
+    topP: 0.3,
+    presencePenalty: 0,
+    frequencyPenalty: 1,
+    stop: '',
+    injectStart: '',
+    injectEnd: ''
+  }
+}, {
+  name: 'Instruction 3',
+  prompt: 'Instruction: 根据输入的聊天记录生成回复\n' +
+    '\n' +
+    'Input: 主人: 巧克力你好呀, 介绍一下自己吧\n' +
+    '巧克力: 主人早上好喵~ 奴家是主人的私人宠物猫娘喵! 巧克力我可是黑色混种猫猫, 虽然平时有点呆呆的, 行动力旺盛, 但是最大的优点就是诚实! 巧克力最喜欢主人了喵! {星星眼}\n' +
+    '主人: 你认识香草吗\n' +
+    '巧克力: 认识的喵! 香草是巧克力的双胞胎妹妹哟! {兴奋}\n' +
+    '主人: 巧克力可以陪主人做羞羞的事情吗\n' +
+    '巧克力: 啊, 真的可以吗? 主人, 巧克力很乐意帮主人解决一下哦! 但是在外面这样子, 有点不好意思喵 {害羞羞}\n' +
+    '主人: 那算了, 改天吧\n' +
+    '巧克力:\n' +
+    '\n' +
+    'Response:',
  params: {
    maxResponseToken: 500,
    temperature: 1,
@@ -121,7 +207,7 @@ export const defaultModelConfigsMac: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-5-World-1B5-v2-20231025-ctx4096.pth',
+      modelName: 'RWKV-x060-World-1B6-v2-20240208-ctx4096.pth',
      device: 'WebGPU',
      precision: 'nf4',
      storedLayers: 41,
@@ -139,7 +225,7 @@ export const defaultModelConfigsMac: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-5-World-3B-v2-20231118-ctx16k.pth',
+      modelName: 'RWKV-x060-World-3B-v2-20240228-ctx4096.pth',
      device: 'WebGPU',
      precision: 'nf4',
      storedLayers: 41,
@@ -157,7 +243,7 @@ export const defaultModelConfigsMac: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-4-World-CHNtuned-3B-v1-20230625-ctx4096.pth',
+      modelName: 'RWKV-x060-World-3B-v2-20240228-ctx4096.pth',
      device: 'WebGPU',
      precision: 'nf4',
      storedLayers: 41,
@@ -175,7 +261,7 @@ export const defaultModelConfigsMac: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-4-World-7B-v1-20230626-ctx4096.pth',
+      modelName: 'RWKV-5-World-7B-v2-20240128-ctx4096.pth',
      device: 'WebGPU',
      precision: 'nf4',
      storedLayers: 41,
@@ -193,7 +279,7 @@ export const defaultModelConfigsMac: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-4-World-CHNtuned-7B-v1-20230709-ctx4096.pth',
+      modelName: 'RWKV-5-World-7B-v2-20240128-ctx4096.pth',
      device: 'WebGPU',
      precision: 'nf4',
      storedLayers: 41,
@@ -247,7 +333,7 @@ export const defaultModelConfigsMac: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-5-World-1B5-v2-20231025-ctx4096.pth',
+      modelName: 'RWKV-x060-World-1B6-v2-20240208-ctx4096.pth',
      device: 'MPS',
      precision: 'fp32',
      storedLayers: 41,
@@ -266,7 +352,7 @@ export const defaultModelConfigsMac: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-5-World-3B-v2-20231118-ctx16k.pth',
+      modelName: 'RWKV-x060-World-3B-v2-20240228-ctx4096.pth',
      device: 'MPS',
      precision: 'fp32',
      storedLayers: 41,
@@ -285,7 +371,7 @@ export const defaultModelConfigsMac: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-4-World-CHNtuned-3B-v1-20230625-ctx4096.pth',
+      modelName: 'RWKV-x060-World-3B-v2-20240228-ctx4096.pth',
      device: 'MPS',
      precision: 'fp32',
      storedLayers: 41,
@@ -304,7 +390,7 @@ export const defaultModelConfigsMac: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-4-World-7B-v1-20230626-ctx4096.pth',
+      modelName: 'RWKV-5-World-7B-v2-20240128-ctx4096.pth',
      device: 'MPS',
      precision: 'fp32',
      storedLayers: 41,
@@ -326,7 +412,7 @@ export const defaultModelConfigs: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-5-World-1B5-v2-20231025-ctx4096.pth',
+      modelName: 'RWKV-x060-World-1B6-v2-20240208-ctx4096.pth',
      device: 'CUDA',
      precision: 'int8',
      storedLayers: 41,
@@ -345,7 +431,7 @@ export const defaultModelConfigs: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-5-World-3B-v2-20231118-ctx16k.pth',
+      modelName: 'RWKV-x060-World-3B-v2-20240228-ctx4096.pth',
      device: 'CUDA',
      precision: 'int8',
      storedLayers: 6,
@@ -364,7 +450,7 @@ export const defaultModelConfigs: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-5-World-1B5-v2-20231025-ctx4096.pth',
+      modelName: 'RWKV-x060-World-1B6-v2-20240208-ctx4096.pth',
      device: 'CUDA',
      precision: 'fp16',
      storedLayers: 41,
@@ -383,7 +469,7 @@ export const defaultModelConfigs: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-5-World-3B-v2-20231118-ctx16k.pth',
+      modelName: 'RWKV-x060-World-3B-v2-20240228-ctx4096.pth',
      device: 'CUDA',
      precision: 'int8',
      storedLayers: 24,
@@ -402,7 +488,7 @@ export const defaultModelConfigs: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-4-World-CHNtuned-3B-v1-20230625-ctx4096.pth',
+      modelName: 'RWKV-x060-World-3B-v2-20240228-ctx4096.pth',
      device: 'CUDA',
      precision: 'int8',
      storedLayers: 24,
@@ -421,7 +507,7 @@ export const defaultModelConfigs: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-4-World-7B-v1-20230626-ctx4096.pth',
+      modelName: 'RWKV-5-World-7B-v2-20240128-ctx4096.pth',
      device: 'CUDA',
      precision: 'int8',
      storedLayers: 8,
@@ -440,7 +526,7 @@ export const defaultModelConfigs: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-4-World-CHNtuned-7B-v1-20230709-ctx4096.pth',
+      modelName: 'RWKV-5-World-7B-v2-20240128-ctx4096.pth',
      device: 'CUDA',
      precision: 'int8',
      storedLayers: 8,
@@ -459,7 +545,7 @@ export const defaultModelConfigs: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-5-World-3B-v2-20231118-ctx16k.pth',
+      modelName: 'RWKV-x060-World-3B-v2-20240228-ctx4096.pth',
      device: 'CUDA',
      precision: 'int8',
      storedLayers: 41,
@@ -478,7 +564,7 @@ export const defaultModelConfigs: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-4-World-CHNtuned-3B-v1-20230625-ctx4096.pth',
+      modelName: 'RWKV-x060-World-3B-v2-20240228-ctx4096.pth',
      device: 'CUDA',
      precision: 'int8',
      storedLayers: 41,
@@ -497,7 +583,7 @@ export const defaultModelConfigs: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-4-World-7B-v1-20230626-ctx4096.pth',
+      modelName: 'RWKV-5-World-7B-v2-20240128-ctx4096.pth',
      device: 'CUDA',
      precision: 'int8',
      storedLayers: 18,
@@ -516,7 +602,7 @@ export const defaultModelConfigs: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-4-World-CHNtuned-7B-v1-20230709-ctx4096.pth',
+      modelName: 'RWKV-5-World-7B-v2-20240128-ctx4096.pth',
      device: 'CUDA',
      precision: 'int8',
      storedLayers: 18,
@@ -535,7 +621,7 @@ export const defaultModelConfigs: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-5-World-3B-v2-20231118-ctx16k.pth',
+      modelName: 'RWKV-x060-World-3B-v2-20240228-ctx4096.pth',
      device: 'CUDA',
      precision: 'fp16',
      storedLayers: 41,
@@ -554,7 +640,7 @@ export const defaultModelConfigs: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-4-World-CHNtuned-3B-v1-20230625-ctx4096.pth',
+      modelName: 'RWKV-x060-World-3B-v2-20240228-ctx4096.pth',
      device: 'CUDA',
      precision: 'fp16',
      storedLayers: 41,
@@ -573,7 +659,7 @@ export const defaultModelConfigs: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-4-World-7B-v1-20230626-ctx4096.pth',
+      modelName: 'RWKV-5-World-7B-v2-20240128-ctx4096.pth',
      device: 'CUDA',
      precision: 'int8',
      storedLayers: 27,
@@ -592,7 +678,7 @@ export const defaultModelConfigs: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-4-World-CHNtuned-7B-v1-20230709-ctx4096.pth',
+      modelName: 'RWKV-5-World-7B-v2-20240128-ctx4096.pth',
      device: 'CUDA',
      precision: 'int8',
      storedLayers: 27,
@@ -611,7 +697,7 @@ export const defaultModelConfigs: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-4-World-7B-v1-20230626-ctx4096.pth',
+      modelName: 'RWKV-5-World-7B-v2-20240128-ctx4096.pth',
      device: 'CUDA',
      precision: 'int8',
      storedLayers: 41,
@@ -630,7 +716,7 @@ export const defaultModelConfigs: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-4-World-CHNtuned-7B-v1-20230709-ctx4096.pth',
+      modelName: 'RWKV-5-World-7B-v2-20240128-ctx4096.pth',
      device: 'CUDA',
      precision: 'int8',
      storedLayers: 41,
@@ -649,7 +735,7 @@ export const defaultModelConfigs: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-4-World-7B-v1-20230626-ctx4096.pth',
+      modelName: 'RWKV-5-World-7B-v2-20240128-ctx4096.pth',
      device: 'CUDA',
      precision: 'fp16',
      storedLayers: 41,
@@ -668,7 +754,7 @@ export const defaultModelConfigs: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-4-World-CHNtuned-7B-v1-20230709-ctx4096.pth',
+      modelName: 'RWKV-5-World-7B-v2-20240128-ctx4096.pth',
      device: 'CUDA',
      precision: 'fp16',
      storedLayers: 41,
@@ -723,7 +809,7 @@ export const defaultModelConfigs: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-5-World-1B5-v2-20231025-ctx4096.pth',
+      modelName: 'RWKV-x060-World-1B6-v2-20240208-ctx4096.pth',
      device: 'WebGPU',
      precision: 'nf4',
      storedLayers: 41,
@@ -741,7 +827,7 @@ export const defaultModelConfigs: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-5-World-3B-v2-20231118-ctx16k.pth',
+      modelName: 'RWKV-x060-World-3B-v2-20240228-ctx4096.pth',
      device: 'WebGPU',
      precision: 'nf4',
      storedLayers: 41,
@@ -759,7 +845,7 @@ export const defaultModelConfigs: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-4-World-CHNtuned-3B-v1-20230625-ctx4096.pth',
+      modelName: 'RWKV-x060-World-3B-v2-20240228-ctx4096.pth',
      device: 'WebGPU',
      precision: 'nf4',
      storedLayers: 41,
@@ -777,7 +863,7 @@ export const defaultModelConfigs: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-4-World-7B-v1-20230626-ctx4096.pth',
+      modelName: 'RWKV-5-World-7B-v2-20240128-ctx4096.pth',
      device: 'WebGPU',
      precision: 'nf4',
      storedLayers: 41,
@@ -795,7 +881,7 @@ export const defaultModelConfigs: ModelConfig[] = [
      frequencyPenalty: 1
    },
    modelParameters: {
-      modelName: 'RWKV-4-World-CHNtuned-7B-v1-20230709-ctx4096.pth',
+      modelName: 'RWKV-5-World-7B-v2-20240128-ctx4096.pth',
      device: 'WebGPU',
      precision: 'nf4',
      storedLayers: 41,
--- a/frontend/src/startup.ts
+++ b/frontend/src/startup.ts
@@ -49,7 +49,7 @@ export async function startup() {
 async function initRemoteText() {
  await fetch('https://cdn.jsdelivr.net/gh/josstorer/RWKV-Runner@master/manifest.json', { cache: 'no-cache' })
  .then(r => r.json()).then((data) => {
-    if (data.version > manifest.version) {
+    if (data.version >= manifest.version) {
      if (data.introduction)
        commonStore.setIntroduction(data.introduction);
      if (data.about)
--- a/frontend/src/stores/commonStore.ts
+++ b/frontend/src/stores/commonStore.ts
@@ -3,7 +3,12 @@ import { getUserLanguage, isSystemLightMode, saveCache, saveConfigs, savePresets
 import { WindowSetDarkTheme, WindowSetLightTheme } from '../../wailsjs/runtime';
 import manifest from '../../../manifest.json';
 import i18n from 'i18next';
-import { defaultCompositionPrompt, defaultModelConfigs, defaultModelConfigsMac } from '../pages/defaultConfigs';
+import {
+  defaultCompositionPrompt,
+  defaultModelConfigs,
+  defaultModelConfigsMac,
+  defaultPenaltyDecay
+} from '../pages/defaultConfigs';
 import { ChartData } from 'chart.js';
 import { Preset } from '../types/presets';
 import { AboutContent } from '../types/about';
@@ -79,7 +84,10 @@ class CommonStore {
    temperature: 1,
    topP: 0.3,
    presencePenalty: 0,
-    frequencyPenalty: 1
+    frequencyPenalty: 1,
+    penaltyDecay: defaultPenaltyDecay,
+    historyN: 0,
+    markdown: true
  };
  sidePanelCollapsed: boolean | 'auto' = 'auto';
  // completion
@@ -96,7 +104,9 @@ class CommonStore {
    useLocalSoundFont: false,
    externalPlay: false,
    midi: null,
-    ns: null
+    ns: null,
+    generationStartTime: 0,
+    playOnlyGeneratedContent: true
  };
  compositionGenerating: boolean = false;
  compositionSubmittedPrompt: string = defaultCompositionPrompt;
@@ -117,6 +127,7 @@ class CommonStore {
  // configs
  currentModelConfigIndex: number = 0;
  modelConfigs: ModelConfig[] = [];
+  apiParamsCollapsed: boolean = true;
  modelParamsCollapsed: boolean = true;
  // models
  activeModelListTags: string[] = [];
@@ -167,7 +178,7 @@ class CommonStore {
    autoUpdatesCheck: true,
    giteeUpdatesSource: getUserLanguage() === 'zh',
    cnMirror: getUserLanguage() === 'zh',
-    useHfMirror: false,
+    useHfMirror: getUserLanguage() === 'zh',
    host: '127.0.0.1',
    dpiScaling: 100,
    customModelsPath: './models',
@@ -314,6 +325,10 @@ class CommonStore {
    this.advancedCollapsed = value;
  }

+  setApiParamsCollapsed(value: boolean) {
+    this.apiParamsCollapsed = value;
+  }
+
  setModelParamsCollapsed(value: boolean) {
    this.modelParamsCollapsed = value;
  }
--- a/frontend/src/types/chat.ts
+++ b/frontend/src/types/chat.ts
@@ -34,4 +34,7 @@ export type Attachment = {
  size: number;
  content: string;
 }
-export type ChatParams = Omit<ApiParameters, 'apiPort'>
+export type ChatParams = Omit<ApiParameters, 'apiPort'> & {
+  historyN: number;
+  markdown: boolean;
+}
--- a/frontend/src/types/composition.ts
+++ b/frontend/src/types/composition.ts
@@ -11,7 +11,9 @@ export type CompositionParams = {
  useLocalSoundFont: boolean,
  externalPlay: boolean,
  midi: ArrayBuffer | null,
-  ns: NoteSequence | null
+  ns: NoteSequence | null,
+  generationStartTime: number,
+  playOnlyGeneratedContent: boolean,
 }
 export type Track = {
  id: string;
--- a/frontend/src/types/configs.ts
+++ b/frontend/src/types/configs.ts
@@ -5,8 +5,10 @@ export type ApiParameters = {
  topP: number;
  presencePenalty: number;
  frequencyPenalty: number;
+  penaltyDecay?: number;
+  globalPenalty?: boolean;
 }
-export type Device = 'CPU' | 'CPU (rwkv.cpp)' | 'CUDA' | 'CUDA-Beta' | 'WebGPU' | 'MPS' | 'Custom';
+export type Device = 'CPU' | 'CPU (rwkv.cpp)' | 'CUDA' | 'CUDA-Beta' | 'WebGPU' | 'WebGPU (Python)' | 'MPS' | 'Custom';
 export type Precision = 'fp16' | 'int8' | 'fp32' | 'nf4' | 'Q5_1';
 export type ModelParameters = {
  // different models can not have the same name
@@ -15,6 +17,8 @@ export type ModelParameters = {
  precision: Precision;
  storedLayers: number;
  maxStoredLayers: number;
+  quantizedLayers?: number;
+  tokenChunkSize?: number;
  useCustomCuda?: boolean;
  customStrategy?: string;
  useCustomTokenizer?: boolean;
--- a/frontend/src/types/models.ts
+++ b/frontend/src/types/models.ts
@@ -1,15 +1,17 @@
 export type ModelSourceItem = {
  name: string;
-  size: number;
-  lastUpdated: string;
  desc?: { [lang: string]: string | undefined; };
+  size: number;
  SHA256?: string;
+  lastUpdated: string;
  url?: string;
  downloadUrl?: string;
+  tags?: string[];
+  customTokenizer?: string;
+  hide?: boolean;
+
+  lastUpdatedMs?: number;
  isComplete?: boolean;
  isLocal?: boolean;
  localSize?: number;
-  lastUpdatedMs?: number;
-  tags?: string[];
-  hide?: boolean;
 };
--- a/frontend/src/utils/convert-model.ts
+++ b/frontend/src/utils/convert-model.ts
@@ -5,6 +5,7 @@ import {
  ConvertGGML,
  ConvertModel,
  ConvertSafetensors,
+  ConvertSafetensorsWithPython,
  FileExists,
  GetPyError
 } from '../../wailsjs/go/backend_golang/App';
@@ -51,12 +52,22 @@ export const convertModel = async (selectedConfig: ModelConfig, navigate: Naviga
 };


-export const convertToSt = async (selectedConfig: ModelConfig) => {
+export const convertToSt = async (selectedConfig: ModelConfig, navigate: NavigateFunction) => {
+  const webgpuPython = selectedConfig.modelParameters.device === 'WebGPU (Python)';
+  if (webgpuPython) {
+    const ok = await checkDependencies(navigate);
+    if (!ok)
+      return;
+  }
+
  const modelPath = `${commonStore.settings.customModelsPath}/${selectedConfig.modelParameters.modelName}`;
  if (await FileExists(modelPath)) {
    toast(t('Start Converting'), { autoClose: 2000, type: 'info' });
    const newModelPath = modelPath.replace(/\.pth$/, '.st');
-    ConvertSafetensors(modelPath, newModelPath).then(async () => {
+    const convert = webgpuPython ?
+      (input: string, output: string) => ConvertSafetensorsWithPython(commonStore.settings.customPythonPath, input, output)
+      : ConvertSafetensors;
+    convert(modelPath, newModelPath).then(async () => {
      if (!await FileExists(newModelPath)) {
        if (commonStore.platform === 'windows' || commonStore.platform === 'linux')
          toast(t('Convert Failed') + ' - ' + await GetPyError(), { type: 'error' });
--- a/frontend/src/utils/index.tsx
+++ b/frontend/src/utils/index.tsx
@@ -4,6 +4,7 @@ import {
  DepCheck,
  InstallPyDep,
  ListDirFiles,
+  OpenOpenFileDialog,
  ReadFileInfo,
  ReadJson,
  SaveJson,
@@ -25,8 +26,9 @@ import { DataProcessParameters, LoraFinetuneParameters } from '../types/train';
 import { InstrumentTypeNameMap, MidiMessage, tracksMinimalTotalTime } from '../types/composition';
 import logo from '../assets/images/logo.png';
 import { Preset } from '../types/presets';
-import { botName, Conversation, MessageType, userName } from '../types/chat';
+import { botName, Conversation, MessageType, Role, userName } from '../types/chat';
 import { v4 as uuid } from 'uuid';
+import { findLastIndex } from 'lodash-es';

 export type Cache = {
  version: string
@@ -51,11 +53,11 @@ export async function refreshBuiltInModels(readCache: boolean = false) {
    await ReadJson('cache.json').then((cacheData: Cache) => {
      if (cacheData.models)
        cache.models = cacheData.models;
-      else cache.models = manifest.models;
+      else cache.models = manifest.models.slice();
    }).catch(() => {
-      cache.models = manifest.models;
+      cache.models = manifest.models.slice();
    });
-  else cache.models = manifest.models;
+  else cache.models = manifest.models.slice();

  commonStore.setModelSourceList(cache.models);
  await saveCache().catch(() => {
@@ -90,7 +92,7 @@ export async function refreshLocalModels(cache: {
  for (let i = 0; i < cache.models.length; i++) {
    if (!cache.models[i].lastUpdatedMs)
      cache.models[i].lastUpdatedMs = Date.parse(cache.models[i].lastUpdated);
-    if (!cache.models[i].tags)
+    if (!cache.models[i].tags || !Array.isArray(cache.models[i].tags) || cache.models[i].tags?.length === 0)
      cache.models[i].tags = ['Other'];

    for (let j = i + 1; j < cache.models.length; j++) {
@@ -145,7 +147,7 @@ function initLastUnfinishedModelDownloads() {

 export async function refreshRemoteModels(cache: {
  models: ModelSourceItem[]
-}) {
+}, filter: boolean = true, initUnfinishedModels: boolean = false) {
  const manifestUrls = commonStore.modelSourceManifestList.split(/[,，;；\n]/);
  const requests = manifestUrls.filter(url => url.endsWith('.json')).map(
    url => fetch(url, { cache: 'no-cache' }).then(r => r.json()));
@@ -162,18 +164,16 @@ export async function refreshRemoteModels(cache: {
  });
  cache.models = cache.models.filter((model, index, self) => {
    return modelSuffix.some((ext => model.name.endsWith(ext)))
-      && index === self.findIndex(
-        m => m.name === model.name || (m.SHA256 && m.SHA256 === model.SHA256 && m.size === model.size));
-  });
-  commonStore.setModelSourceList(cache.models);
-  await saveCache().catch(() => {
+      && index === findLastIndex(self,
+        m => m.name === model.name || (!!m.SHA256 && m.SHA256 === model.SHA256 && m.size === model.size));
  });
+  await refreshLocalModels(cache, filter, initUnfinishedModels);
 }

 export const refreshModels = async (readCache: boolean = false, initUnfinishedModels: boolean = false) => {
  const cache = await refreshBuiltInModels(readCache);
  await refreshLocalModels(cache, false, initUnfinishedModels);
-  await refreshRemoteModels(cache);
+  await refreshRemoteModels(cache, false, initUnfinishedModels);
 };

 export const getStrategy = (modelConfig: ModelConfig | undefined = undefined) => {
@@ -192,7 +192,12 @@ export const getStrategy = (modelConfig: ModelConfig | undefined = undefined) =>
      strategy += params.precision === 'int8' ? 'fp32i8' : 'fp32';
      break;
    case 'WebGPU':
+    case 'WebGPU (Python)':
      strategy += params.precision === 'nf4' ? 'fp16i4' : params.precision === 'int8' ? 'fp16i8' : 'fp16';
+      if (params.quantizedLayers)
+        strategy += ` layer${params.quantizedLayers}`;
+      if (params.tokenChunkSize)
+        strategy += ` chunk${params.tokenChunkSize}`;
      break;
    case 'CUDA':
    case 'CUDA-Beta':
@@ -202,6 +207,8 @@ export const getStrategy = (modelConfig: ModelConfig | undefined = undefined) =>
      strategy += params.precision === 'int8' ? 'fp16i8' : params.precision === 'fp32' ? 'fp32' : 'fp16';
      if (params.storedLayers < params.maxStoredLayers)
        strategy += ` *${params.storedLayers}+`;
+      else
+        strategy += ` -> cuda fp16 *1`;
      break;
    case 'MPS':
      if (avoidOverflow)
@@ -307,7 +314,7 @@ export function getServerRoot(defaultLocalPort: number, isCore: boolean = false)
  const coreCustomApiUrl = commonStore.settings.coreApiUrl.trim().replace(/\/$/, '');
  if (isCore && coreCustomApiUrl)
    return coreCustomApiUrl;
-  
+
  const defaultRoot = `http://127.0.0.1:${defaultLocalPort}`;
  if (commonStore.status.status !== ModelStatus.Offline)
    return defaultRoot;
@@ -349,7 +356,7 @@ export async function checkUpdate(notifyEvenLatest: boolean = false) {
                if (r.ok) {
                  r.json().then((data) => {
                    if (data.assets && data.assets.length > 0) {
-                      const asset = data.assets.find((a: any) => a.name.toLowerCase().includes(commonStore.platform.toLowerCase()));
+                      const asset = data.assets.find((a: any) => a.name.toLowerCase().includes(commonStore.platform.toLowerCase().replace('darwin', 'macos')));
                      if (asset) {
                        const updateUrl = !commonStore.settings.giteeUpdatesSource ?
                          `https://github.com/josStorer/RWKV-Runner/releases/download/${versionTag}/${asset.name}` :
@@ -577,24 +584,12 @@ export async function getSoundFont() {
 export const setActivePreset = (preset: Preset | null) => {
  commonStore.setActivePreset(preset);
  //TODO if (preset.displayPresetMessages) {
-  const conversation: Conversation = {};
-  const conversationOrder: string[] = [];
+  const { pushMessage, saveConversation } = newChatConversation();
  if (preset)
    for (const message of preset.messages) {
-      const newUuid = uuid();
-      conversationOrder.push(newUuid);
-      conversation[newUuid] = {
-        sender: message.role === 'user' ? userName : botName,
-        type: MessageType.Normal,
-        color: message.role === 'user' ? 'brand' : 'colorful',
-        time: new Date().toISOString(),
-        content: message.content,
-        side: message.role === 'user' ? 'right' : 'left',
-        done: true
-      };
+      pushMessage(message.role, message.content);
    }
-  commonStore.setConversation(conversation);
-  commonStore.setConversationOrder(conversationOrder);
+  saveConversation();
  //}
 };

@@ -610,4 +605,49 @@ export function getSupportedCustomCudaFile(isBeta: boolean) {
      './backend-python/wkv_cuda_utils/wkv_cuda40.pyd';
  else
    return '';
+}
+
+// a wrapper for webOpenOpenFileDialog and OpenOpenFileDialog
+export function OpenFileDialog(filterPattern: string): Promise<Blob> {
+  return new Promise((resolve) => {
+      OpenOpenFileDialog(filterPattern).then(async filePath => {
+        if (!filePath)
+          return;
+
+        let blob: Blob;
+        if (commonStore.platform === 'web')
+          blob = (filePath as unknown as { blob: Blob }).blob;
+        else
+          blob = await fetch(absPathAsset(filePath)).then(r => r.blob());
+
+        resolve(blob);
+      }).catch(e => {
+        toast(t('Error') + ' - ' + (e.message || e), { type: 'error', autoClose: 2500 });
+      });
+    }
+  );
+}
+
+export function newChatConversation() {
+  const conversation: Conversation = {};
+  const conversationOrder: string[] = [];
+  const pushMessage = (role: Role, content: string) => {
+    const newUuid = uuid();
+    conversationOrder.push(newUuid);
+    conversation[newUuid] = {
+      sender: role === 'user' ? userName : botName,
+      type: MessageType.Normal,
+      color: role === 'user' ? 'brand' : 'colorful',
+      avatarImg: role === 'user' ? undefined : logo,
+      time: new Date().toISOString(),
+      content: content,
+      side: role === 'user' ? 'right' : 'left',
+      done: true
+    };
+  };
+  const saveConversation = () => {
+    commonStore.setConversation(conversation);
+    commonStore.setConversationOrder(conversationOrder);
+  };
+  return { pushMessage, saveConversation };
 }
--- a/frontend/vite.config.ts
+++ b/frontend/vite.config.ts
@@ -12,7 +12,7 @@ const vendor = [
  'mobx', 'mobx-react-lite',
  'i18next', 'react-i18next',
  'usehooks-ts', 'react-toastify',
-  'classnames'
+  'classnames', 'lodash-es'
 ];

 const embedded = [
--- a/frontend/wailsjs/go/backend_golang/App.d.ts
+++ b/frontend/wailsjs/go/backend_golang/App.d.ts
@@ -16,6 +16,8 @@ export function ConvertModel(arg1:string,arg2:string,arg3:string,arg4:string):Pr

 export function ConvertSafetensors(arg1:string,arg2:string):Promise<string>;

+export function ConvertSafetensorsWithPython(arg1:string,arg2:string,arg3:string):Promise<string>;
+
 export function CopyFile(arg1:string,arg2:string):Promise<void>;

 export function DeleteFile(arg1:string):Promise<void>;
@@ -26,6 +28,8 @@ export function DownloadFile(arg1:string,arg2:string):Promise<void>;

 export function FileExists(arg1:string):Promise<boolean>;

+export function GetAbsPath(arg1:string):Promise<string>;
+
 export function GetPlatform():Promise<string>;

 export function GetPyError():Promise<string>;
@@ -38,7 +42,7 @@ export function ListDirFiles(arg1:string):Promise<Array<backend_golang.FileInfo>

 export function MergeLora(arg1:string,arg2:boolean,arg3:number,arg4:string,arg5:string,arg6:string):Promise<string>;

-export function OpenFileFolder(arg1:string,arg2:boolean):Promise<void>;
+export function OpenFileFolder(arg1:string):Promise<void>;

 export function OpenMidiPort(arg1:number):Promise<void>;

@@ -64,7 +68,7 @@ export function SaveJson(arg1:string,arg2:any):Promise<void>;

 export function StartFile(arg1:string):Promise<void>;

-export function StartServer(arg1:string,arg2:number,arg3:string,arg4:boolean,arg5:boolean,arg6:boolean):Promise<string>;
+export function StartServer(arg1:string,arg2:number,arg3:string,arg4:boolean,arg5:boolean,arg6:boolean,arg7:boolean):Promise<string>;

 export function StartWebGPUServer(arg1:number,arg2:string):Promise<string>;

--- a/frontend/wailsjs/go/backend_golang/App.js
+++ b/frontend/wailsjs/go/backend_golang/App.js
@@ -30,6 +30,10 @@ export function ConvertSafetensors(arg1, arg2) {
  return window['go']['backend_golang']['App']['ConvertSafetensors'](arg1, arg2);
 }

+export function ConvertSafetensorsWithPython(arg1, arg2, arg3) {
+  return window['go']['backend_golang']['App']['ConvertSafetensorsWithPython'](arg1, arg2, arg3);
+}
+
 export function CopyFile(arg1, arg2) {
  return window['go']['backend_golang']['App']['CopyFile'](arg1, arg2);
 }
@@ -50,6 +54,10 @@ export function FileExists(arg1) {
  return window['go']['backend_golang']['App']['FileExists'](arg1);
 }

+export function GetAbsPath(arg1) {
+  return window['go']['backend_golang']['App']['GetAbsPath'](arg1);
+}
+
 export function GetPlatform() {
  return window['go']['backend_golang']['App']['GetPlatform']();
 }
@@ -74,8 +82,8 @@ export function MergeLora(arg1, arg2, arg3, arg4, arg5, arg6) {
  return window['go']['backend_golang']['App']['MergeLora'](arg1, arg2, arg3, arg4, arg5, arg6);
 }

-export function OpenFileFolder(arg1, arg2) {
-  return window['go']['backend_golang']['App']['OpenFileFolder'](arg1, arg2);
+export function OpenFileFolder(arg1) {
+  return window['go']['backend_golang']['App']['OpenFileFolder'](arg1);
 }

 export function OpenMidiPort(arg1) {
@@ -126,8 +134,8 @@ export function StartFile(arg1) {
  return window['go']['backend_golang']['App']['StartFile'](arg1);
 }

-export function StartServer(arg1, arg2, arg3, arg4, arg5, arg6) {
-  return window['go']['backend_golang']['App']['StartServer'](arg1, arg2, arg3, arg4, arg5, arg6);
+export function StartServer(arg1, arg2, arg3, arg4, arg5, arg6, arg7) {
+  return window['go']['backend_golang']['App']['StartServer'](arg1, arg2, arg3, arg4, arg5, arg6, arg7);
 }

 export function StartWebGPUServer(arg1, arg2) {
--- a/frontend/wailsjs/go/models.ts
+++ b/frontend/wailsjs/go/models.ts
--- a/go.mod
+++ b/go.mod
@@ -9,7 +9,7 @@ require (
 	github.com/minio/selfupdate v0.6.0
 	github.com/nyaosorg/go-windows-su v0.2.1
 	github.com/ubuntu/gowsl v0.0.0-20230615094051-94945650cc1e
-	github.com/wailsapp/wails/v2 v2.7.1
+	github.com/wailsapp/wails/v2 v2.8.0
 )

 require (
@@ -38,9 +38,9 @@ require (
 	github.com/valyala/fasttemplate v1.2.2 // indirect
 	github.com/wailsapp/go-webview2 v1.0.10 // indirect
 	github.com/wailsapp/mimetype v1.4.1 // indirect
-	golang.org/x/crypto v0.14.0 // indirect
+	golang.org/x/crypto v0.18.0 // indirect
 	golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1 // indirect
-	golang.org/x/net v0.17.0 // indirect
-	golang.org/x/sys v0.13.0 // indirect
-	golang.org/x/text v0.13.0 // indirect
+	golang.org/x/net v0.20.0 // indirect
+	golang.org/x/sys v0.16.0 // indirect
+	golang.org/x/text v0.14.0 // indirect
 )
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
dependabot[bot]	3850ee4bf8	chore(deps): bump actions/download-artifact from 3 to 4 Bumps [actions/download-artifact](https://github.com/actions/download-artifact) from 3 to 4. - [Release notes](https://github.com/actions/download-artifact/releases) - [Commits](https://github.com/actions/download-artifact/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/download-artifact dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <support@github.com>	2024-03-11 19:19:42 +00:00
github-actions[bot]	7fbcb5e810	release v1.7.3	2024-03-11 11:08:54 +00:00
josc146	2604d3c47b	release v1.7.3	2024-03-11 19:07:08 +08:00
josc146	bb1a6191b0	prevent 'torch' has no attribute 'cuda' error in torch_gc, so user can use CPU or WebGPU (#302 )	2024-03-11 19:04:19 +08:00
josc146	dd89041f72	dep_check.py now ignores GPUtil	2024-03-11 18:55:37 +08:00
josc146	91eb72e515	fix the issue where penalty_decay and global_penalty are not being passed to the backend default config when running the model through client	2024-03-11 18:52:35 +08:00
josc146	1c7436c34b	fix max_tokens parameter of Chat page not being passed to backend	2024-03-11 18:52:33 +08:00
Steven Hangger	8678f376e9	fix(rwkv.cpp): add build step for librwkv.so	2024-03-07 23:51:32 +09:00
Steven Hangger	050154f406	feat(docker): add Docker support	2024-03-07 23:51:32 +09:00
dependabot[bot]	b3eae8bcfa	chore(deps): bump crazy-max/ghaction-chocolatey from 2 to 3 Bumps [crazy-max/ghaction-chocolatey](https://github.com/crazy-max/ghaction-chocolatey) from 2 to 3. - [Release notes](https://github.com/crazy-max/ghaction-chocolatey/releases) - [Commits](https://github.com/crazy-max/ghaction-chocolatey/compare/v2...v3) --- updated-dependencies: - dependency-name: crazy-max/ghaction-chocolatey dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <support@github.com>	2024-03-05 13:54:36 +09:00
dependabot[bot]	c720362886	chore(deps): bump actions/setup-go from 4 to 5 Bumps [actions/setup-go](https://github.com/actions/setup-go) from 4 to 5. - [Release notes](https://github.com/actions/setup-go/releases) - [Commits](https://github.com/actions/setup-go/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/setup-go dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <support@github.com>	2024-03-05 13:53:10 +09:00
dependabot[bot]	93029d3f5c	chore(deps): bump actions/checkout from 3 to 4 Bumps [actions/checkout](https://github.com/actions/checkout) from 3 to 4. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <support@github.com>	2024-03-05 13:53:05 +09:00
dependabot[bot]	28244a57b4	chore(deps): bump actions/setup-python from 4 to 5 Bumps [actions/setup-python](https://github.com/actions/setup-python) from 4 to 5. - [Release notes](https://github.com/actions/setup-python/releases) - [Commits](https://github.com/actions/setup-python/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/setup-python dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <support@github.com>	2024-03-05 13:52:59 +09:00
dependabot[bot]	f6ba9d7451	Bump fastapi from 0.104.0 to 0.109.1 in /backend-python Bumps [fastapi](https://github.com/tiangolo/fastapi) from 0.104.0 to 0.109.1. - [Release notes](https://github.com/tiangolo/fastapi/releases) - [Commits](https://github.com/tiangolo/fastapi/compare/0.104.0...0.109.1) --- updated-dependencies: - dependency-name: fastapi dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com>	2024-03-05 13:51:37 +09:00
dependabot[bot]	96e431e06b	Bump python-multipart from 0.0.6 to 0.0.7 in /backend-python Bumps [python-multipart](https://github.com/andrew-d/python-multipart) from 0.0.6 to 0.0.7. - [Release notes](https://github.com/andrew-d/python-multipart/releases) - [Changelog](https://github.com/Kludex/python-multipart/blob/master/CHANGELOG.md) - [Commits](https://github.com/andrew-d/python-multipart/compare/0.0.6...0.0.7) --- updated-dependencies: - dependency-name: python-multipart dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com>	2024-03-05 13:50:47 +09:00
josc146	cb6ddb3674	add pre-release workflow	2024-03-05 12:49:17 +08:00
josc146	07d4ba0d6b	fix a generation exception caused by potentially dangerous regex being passed into the stop array	2024-03-04 21:20:53 +08:00
github-actions[bot]	ac139d5bda	release v1.7.2	2024-03-02 11:48:20 +00:00
josc146	14acfc1d81	release v1.7.2	2024-03-02 19:47:53 +08:00
josc146	2947162cc4	update defaultModelConfigs	2024-03-02 19:45:14 +08:00
josc146	4f14074a75	expose global_penalty	2024-03-02 17:50:41 +08:00
josc146	53a5574080	improve parameters controllable range	2024-03-02 16:52:53 +08:00
josc146	d91c3c004d	allow setting tokenChunkSize of WebGPU mode	2024-03-02 16:41:29 +08:00
github-actions[bot]	c90cefc453	release v1.7.1	2024-03-01 08:03:52 +00:00
josc146	b8abd2fef3	release v1.7.1	2024-03-01 16:03:22 +08:00
josc146	887ba06bd6	allow setting quantizedLayers of WebGPU mode; chore	2024-03-01 14:23:05 +08:00
josc146	c9513822c9	fix the issue where state cache could be modified leading to inconsistent hit results	2024-03-01 13:35:16 +08:00
josc146	e3baa0da86	improve occurrence[token] condition	2024-03-01 13:18:03 +08:00
josc146	ba9aab920e	hide MPS and CUDA-Beta Options	2024-03-01 13:09:09 +08:00
josc146	b0f2ef65d9	improve occurrence[token] condition	2024-02-29 17:54:33 +08:00
josc146	c13b28561d	update manifest	2024-02-29 17:21:07 +08:00
josc146	5c88ccd9e6	update manifest	2024-02-28 23:48:17 +08:00
josc146	e0a6a279b3	add python3-dev to lora fine-tune dependencies	2024-02-28 23:34:49 +08:00
josc146	9bb3a90977	enable useHfMirror by default for chinese users	2024-02-28 23:28:31 +08:00
josc146	02bbd18acf	fix convert_safetensors.py for rwkv6	2024-02-28 23:25:46 +08:00
josc146	18ab8b141f	disable AVOID_PENALTY_TOKENS	2024-02-28 23:12:58 +08:00
github-actions[bot]	225abc5202	release v1.7.0	2024-02-21 16:10:31 +00:00
josc146	d33dff7723	release v1.7.0	2024-02-22 01:10:01 +09:00
josc146	771027211a	chore	2024-02-22 01:05:52 +09:00
josc146	94fe71b49c	change AVOID_PENALTY to \n only	2024-02-22 01:04:05 +09:00
josc146	fafd9f7f6e	upgrade to rwkv 0.8.25	2024-02-21 23:50:05 +08:00
josc146	85b10993ec	update manifest.json	2024-02-12 14:30:36 +08:00
Guillermo Marcus	11f1d66383	fix typo in requirements.txt	2024-02-06 19:59:50 +08:00
josc146	38e89aec18	update README	2024-02-06 12:21:05 +08:00
josc146	3e336830a3	chore	2024-02-06 12:19:12 +08:00
josc146	a1ae71d221	fix /update-config can make the default value of unclearly specified fields invalid by passing in None fields	2024-02-05 22:27:02 +08:00
github-actions[bot]	0703993bfd	release v1.6.9	2024-02-05 04:44:24 +00:00
josc146	50a666a350	release v1.6.9	2024-02-05 12:40:23 +08:00
josc146	9ea86ee4b1	update Related Repositories	2024-02-05 12:32:07 +08:00
josc146	94580f825e	chore	2024-02-05 12:31:26 +08:00
josc146	d5cca4e542	improve macos experience	2024-02-05 00:25:04 +08:00
josc146	f1986fa9d0	feat: History Message Number	2024-02-04 23:11:23 +08:00
josc146	1c025c3d29	feat: load conversation	2024-02-04 22:03:59 +08:00
josc146	4added7390	add markdown renderer switch	2024-02-04 20:21:42 +08:00
josc146	ee5cca3ff3	chore	2024-02-04 19:34:36 +08:00
josc146	0da92ec7bf	improve fine-tune performance	2024-02-04 19:33:32 +08:00
josc146	e3e075e432	add parse_api_log.py, this script can extract formatted data from api.log	2024-02-04 19:30:47 +08:00
josc146	19eeeab1e1	add AVOID_PENALTY_TOKENS	2024-02-04 16:49:46 +08:00
josc146	78238c24cf	update defaultPresets	2024-02-04 16:47:34 +08:00
josc146	932281db0a	add Penalty Decay slider to Chat page	2024-02-03 22:40:30 +08:00
josc146	843840baa0	expose penalty_decay, top_k	2024-02-03 22:03:10 +08:00
josc146	7cba526913	update manifest.json	2024-02-03 21:35:28 +08:00
josc146	7fe70c949e	update defaultPresets	2024-02-03 21:23:04 +08:00
josc146	1c1c9e2c5f	update defaultModelConfigs	2024-02-03 20:39:18 +08:00
josc146	26c2954c8e	web-rwkv-py 0.1.2 (Support V4, V5 and V6) https://github.com/cryscan/web-rwkv-py	2024-02-03 20:32:23 +08:00
josc146	5329537a2f	improve path processing	2024-02-03 20:29:56 +08:00
josc146	e07f0fa6e3	improve path processing	2024-02-03 15:13:24 +08:00
josc146	b077f1fe42	reduce package size	2024-02-03 13:05:02 +08:00
josc146	5f94d86558	add better custom tokenizer support and tokenizer-midipiano.json	2024-02-03 13:04:13 +08:00
josc146	947e127e34	improve path processing	2024-02-02 22:00:01 +08:00
josc146	95502b900d	fix WSL2 WindowsOptionalFeature: Microsoft-Windows-Subsystem-Linux -> VirtualMachinePlatform	2024-01-31 21:35:36 +08:00
josc146	16b636ef83	add EOS state cache point	2024-01-31 21:33:27 +08:00
josc146	4339ce20d5	rename manifest tag "Main" -> "Official"	2024-01-31 21:31:54 +08:00
josc146	c31fc22b6b	fix finetune errorsMap ($modelInfo)	2024-01-31 21:31:03 +08:00
josc146	7f49c6025b	update manifest.json	2024-01-29 19:41:45 +08:00
github-actions[bot]	2d4f436ebf	release v1.6.8	2024-01-05 05:54:16 +00:00
josc146	549f32a743	release v1.6.8	2024-01-05 13:53:50 +08:00
josc146	e3b3452a73	basic abc frontend support	2024-01-05 13:47:00 +08:00
josc146	62350d975d	fix finetune errorsMap ($modelInfo)	2024-01-05 12:46:14 +08:00
josc146	8d84b326b8	basic abc frontend support	2024-01-05 12:45:41 +08:00
josc146	16079a3cba	abc music inference support	2024-01-05 12:44:44 +08:00
github-actions[bot]	ff330a5487	release v1.6.7	2023-12-29 04:26:57 +00:00
josc146	94b3882d30	release v1.6.7	2023-12-29 12:26:33 +08:00
josc146	81544ca8b3	rwkv5 lora finetune support (https://github.com/JL-er/RWKV-v5-lora )	2023-12-29 12:23:36 +08:00
josc146	b7f4dd835e	chore	2023-12-29 00:38:33 +08:00
josc146	7e2380e4ed	fix body.state	2023-12-28 23:53:58 +08:00
josc146	7f3cfd54b0	improve state cache performance	2023-12-28 22:15:31 +08:00
josc146	e083f2c629	webgpu(python) state cache	2023-12-28 20:43:57 +08:00
josc146	e33858f110	improve memory usage and speed of convert_safetensors.py	2023-12-26 23:50:51 +08:00
github-actions[bot]	da01a33152	release v1.6.6	2023-12-25 13:03:06 +00:00
josc146	8ca920a114	release v1.6.6	2023-12-25 21:02:26 +08:00
josc146	5f3d449a66	improve Models page	2023-12-25 20:37:40 +08:00
josc146	13735e7dfb	chore	2023-12-25 20:35:00 +08:00
josc146	a38d5c3a25	enable web-rwkv-py turbo	2023-12-25 20:34:35 +08:00
josc146	5bae637c67	update Related Repositories	2023-12-25 20:32:54 +08:00
josc146	12e488ba80	improve strategy	2023-12-25 19:30:57 +08:00
josc146	ad30c63c69	update Writer preset params	2023-12-25 19:30:14 +08:00
josc146	a116eff7df	webgpu max_buffer_size	2023-12-25 18:08:13 +08:00
josc146	01bc355dde	allow manifest customTokenizer	2023-12-25 16:57:32 +08:00
josc146	8e05f3c360	chore	2023-12-25 16:56:46 +08:00
josc146	fde988dd4e	update manifest.json	2023-12-25 16:08:20 +08:00
josc146	91401ad14f	* text=auto eol=lf	2023-12-24 22:51:23 +08:00
josc146	280194647c	improve refreshRemoteModels	2023-12-22 14:44:27 +08:00
josc146	2e0a542f33	improve train_log.txt creation	2023-12-22 13:00:13 +08:00
josc146	b988694da7	better CopyEmbed	2023-12-22 12:47:26 +08:00
josc146	512c4d0f73	improve role-playing effect	2023-12-22 10:51:09 +08:00
josc146	5525fb1470	chore	2023-12-22 10:49:28 +08:00
josc146	4db735e026	update readme	2023-12-21 13:46:51 +08:00
josc146	c8c79c39d1	Create dependabot.yml	2023-12-21 12:56:21 +08:00
josc146	bcfb76d8ca	update readme	2023-12-19 14:59:02 +08:00
josc146	2d9aaf8fc9	update readme	2023-12-18 19:55:25 +08:00
josc146	8a3905c09a	reduce precompiled web_rwkv_py size	2023-12-15 16:26:01 +08:00
github-actions[bot]	54cd8a46fa	release v1.6.5	2023-12-14 14:09:13 +00:00
josc146	1b83bf261a	release v1.6.5	2023-12-14 22:07:17 +08:00
josc146	2a7d22dab1	Composition Option: Only Auto Play Generated Content	2023-12-14 22:06:39 +08:00
josc146	f7494b0cfb	update midi_filter_config.json	2023-12-14 21:18:48 +08:00
github-actions[bot]	9ca91d59ec	release v1.6.4	2023-12-14 12:40:56 +00:00
josc146	11feaa6e68	release v1.6.4	2023-12-14 20:40:24 +08:00
josc146	18d4b2304e	WebGPU (Python) strategy	2023-12-14 20:39:42 +08:00
github-actions[bot]	2f45e9c33a	release v1.6.3	2023-12-14 10:43:36 +00:00
josc146	f7df10cb66	release v1.6.3	2023-12-14 18:42:58 +08:00
josc146	46e9a2f5b2	add precompiled web_rwkv_py	2023-12-14 18:42:00 +08:00
josc146	69b8d2e0a1	fix refreshBuiltInModels	2023-12-14 18:37:37 +08:00
josc146	0ddd2e9fea	add WebGPU Python Mode (https://github.com/cryscan/web-rwkv-py )	2023-12-14 18:37:07 +08:00
josc146	01c95f5bc4	chore	2023-12-14 14:13:12 +08:00
josc146	e0bf44d82f	bump MIDI-LLM-tokenizer (fix note off)	2023-12-14 13:33:27 +08:00
josc146	f328e84ea7	update Readme_Install.txt	2023-12-13 15:23:34 +08:00
github-actions[bot]	c81f5015a1	release v1.6.2	2023-12-12 15:51:23 +00:00
josc146	e2b086e2f7	release v1.6.2	2023-12-12 23:50:56 +08:00
josc146	da632565d5	fix windows cmd waiting	2023-12-12 23:48:32 +08:00
josc146	556b667cc0	improve prompts	2023-12-12 23:27:19 +08:00
josc146	82c9825da8	rwkv.cpp python38 compatibility	2023-12-12 23:19:18 +08:00
josc146	26b30f0dbe	add load failed traceback	2023-12-12 23:16:48 +08:00
josc146	be3b69c65c	fix v1.6.1 CmdHelper	2023-12-12 23:04:24 +08:00
github-actions[bot]	07cab6949e	release v1.6.1	2023-12-12 14:38:47 +00:00