feat(gpt-runner-core): optimize token count and md json parse

This commit is contained in:
JinmingYang
2023-07-12 21:32:53 +08:00
parent cf91fee6d9
commit d839ab0fce
13 changed files with 542 additions and 396 deletions

View File

@@ -35,10 +35,10 @@
"@types/prettier": "^2.7.3",
"@types/react": "^18.2.14",
"@vitejs/plugin-legacy": "^4.1.0",
"@vitest/ui": "^0.32.4",
"@vitest/ui": "^0.33.0",
"bumpp": "^9.1.1",
"eslint": "8.44.0",
"esno": "^0.16.3",
"esno": "^0.17.0",
"execa": "^7.1.1",
"fast-glob": "^3.3.0",
"fs-extra": "^11.1.1",
@@ -46,22 +46,22 @@
"jsdom": "^22.1.0",
"lint-staged": "^13.2.3",
"msw": "1.2.2",
"pnpm": "8.6.6",
"prettier": "^2.8.8",
"pnpm": "8.6.7",
"prettier": "^3.0.0",
"react": "^18.2.0",
"rollup": "^3.26.2",
"semver": "^7.5.4",
"simple-git-hooks": "^2.8.1",
"taze": "^0.11.2",
"terser": "^5.18.2",
"terser": "^5.19.0",
"tsup": "^7.1.0",
"typescript": "^5.1.6",
"unbuild": "^0.8.11",
"unplugin-auto-import": "^0.16.6",
"vite": "^4.4.2",
"vite": "^4.4.3",
"vite-plugin-inspect": "^0.7.32",
"vite-plugin-pages": "^0.31.0",
"vitest": "^0.32.4"
"vitest": "^0.33.0"
},
"pnpm": {
"overrides": {
@@ -77,4 +77,4 @@
"eslint --cache --fix"
]
}
}
}

View File

@@ -48,7 +48,7 @@
"dependencies": {
"@nicepkg/gpt-runner-shared": "workspace:*",
"ignore": "^5.2.4",
"langchain": "^0.0.102",
"langchain": "^0.0.107",
"unconfig": "^0.3.9"
}
}
}

View File

@@ -1,25 +1,20 @@
import fs from 'node:fs'
// import { Tiktoken } from 'tiktoken/lite'
// import cl100kBase from 'tiktoken/encoders/cl100k_base.json'
import { PathUtils } from '@nicepkg/gpt-runner-shared/node'
import { isChineseCharacter } from '@nicepkg/gpt-runner-shared'
// slow but accurate
// export function countTokens(text: string) {
// const encoding = new Tiktoken(
// cl100kBase.bpe_ranks,
// cl100kBase.special_tokens,
// cl100kBase.pat_str,
// )
// const tokens = encoding.encode(text)
// encoding.free()
// return tokens.length
// }
export function countTokenQuick(text: string): number {
let chineseCount = 0
let otherCount = 0
// fast but inaccurate
export function countTokenQuick(text: string) {
// int
return Math.floor(text.length / 3.5)
for (const char of text) {
if (isChineseCharacter(char))
chineseCount += 1
else
otherCount += 1
}
return chineseCount * 2 + (otherCount / 3.5)
}
export function countFileTokens(filePath: string, quick = true) {

View File

@@ -15,7 +15,7 @@ export async function gptMdFileParser(params: GptMdFileParserParams): Promise<Si
// match ```json
const configJsonString = content.match(/^\s*?```json([\s\S]*?)```/i)?.[1]?.trim()
const singleFileConfig = singleFileConfigWithDefault(configJsonString ? tryParseJson(configJsonString) : {})
const singleFileConfig = singleFileConfigWithDefault(configJsonString ? tryParseJson(configJsonString, true) : {})
type ResolveConfigKey = 'userPrompt' | 'systemPrompt'
const resolveTitleConfig: {

View File

@@ -1,9 +1,9 @@
import { ChatPromptTemplate } from 'langchain/prompts'
import type { BaseChatMessage, InputValues } from 'langchain/schema'
import type { BaseMessage, InputValues } from 'langchain/schema'
ChatPromptTemplate.prototype.formatMessages = async function (values: InputValues): Promise<BaseChatMessage[]> {
ChatPromptTemplate.prototype.formatMessages = async function (values: InputValues): Promise<BaseMessage[]> {
const allValues = await this.mergePartialAndUserVariables(values)
let resultMessages: BaseChatMessage[] = []
let resultMessages: BaseMessage[] = []
for (const promptMessage of this.promptMessages) {
const inputValues = promptMessage.inputVariables.reduce((acc, inputVariable) => {
if (!(inputVariable in allValues)) {

View File

@@ -2,20 +2,20 @@ import type { SingleChatMessage } from '@nicepkg/gpt-runner-shared/common'
import { ChatRole } from '@nicepkg/gpt-runner-shared/common'
import { AIMessagePromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate } from 'langchain/prompts'
import type { BaseMessageStringPromptTemplate } from 'langchain/dist/prompts/chat'
import type { BaseChatMessage } from 'langchain/schema'
import { AIChatMessage, HumanChatMessage, SystemChatMessage } from 'langchain/schema'
import type { BaseMessage } from 'langchain/schema'
import { AIMessage, HumanMessage, SystemMessage } from 'langchain/schema'
export function mapStoredMessagesToChatMessages(
messages: SingleChatMessage[],
): BaseChatMessage[] {
): BaseMessage[] {
return messages.map((message) => {
switch (message.name) {
case ChatRole.User:
return new HumanChatMessage(message.text)
return new HumanMessage(message.text)
case ChatRole.Assistant:
return new AIChatMessage(message.text)
return new AIMessage(message.text)
case ChatRole.System:
return new SystemChatMessage(message.text)
return new SystemMessage(message.text)
default:
throw new Error('Role must be defined for generic messages')
}

View File

@@ -84,6 +84,7 @@
"http-proxy-agent": "*",
"https-proxy-agent": "*",
"ip": "*",
"jsonc-parser": "*",
"launch-editor": "*",
"minimatch": "*",
"socket.io": "*",
@@ -99,19 +100,20 @@
"debug": "^4.3.4",
"find-free-ports": "^3.1.1",
"http-proxy-agent": "^7.0.0",
"https-proxy-agent": "^7.0.0",
"https-proxy-agent": "^7.0.1",
"ip": "^1.1.8",
"jsonc-parser": "^3.2.0",
"launch-editor": "^2.6.0",
"minimatch": "^9.0.3",
"open": "^8.4.2",
"socket.io": "^4.7.1",
"socket.io-client": "^4.7.1",
"zod": "^3.21.4",
"zod-to-json-schema": "^3.21.3"
"zod-to-json-schema": "^3.21.4"
},
"devDependencies": {
"@types/express": "^4.17.17",
"@types/ip": "^1.1.0",
"express": "^4.18.2"
}
}
}

View File

@@ -1,4 +1,5 @@
import { AxiosError } from 'axios'
import * as jsonc from 'jsonc-parser'
import type { TreeItem } from '../types'
export function sleep(ms: number) {
@@ -82,9 +83,10 @@ export function travelTreeDeepFirst<T extends TreeItem<Record<string, any>>, R e
return travel(tree) as R[]
}
export function tryParseJson(str: string) {
export function tryParseJson(str: string, supportJsonc = false) {
try {
return JSON.parse(str?.trim() ?? '')
const target = str?.trim() ?? ''
return supportJsonc ? jsonc.parse(target) : JSON.parse(target)
}
catch (e) {
console.error('tryParseJson error: ', str, e)
@@ -246,3 +248,15 @@ export function waitForCondition(conditionFn: (...args: any[]) => boolean, timeo
}, 100)
})
}
export function isChineseCharacter(char: string): boolean {
const charCode = char.charCodeAt(0)
return (charCode >= 0x4E00 && charCode <= 0x9FFF)
|| (charCode >= 0x3400 && charCode <= 0x4DBF)
|| (charCode >= 0x20000 && charCode <= 0x2A6DF)
|| (charCode >= 0x2A700 && charCode <= 0x2B73F)
|| (charCode >= 0x2B740 && charCode <= 0x2B81F)
|| (charCode >= 0x2B820 && charCode <= 0x2CEAF)
|| (charCode >= 0xF900 && charCode <= 0xFAFF)
|| (charCode >= 0x2F800 && charCode <= 0x2FA1F)
}

View File

@@ -134,7 +134,7 @@
"@nicepkg/gpt-runner-web": "workspace:*",
"@types/vscode": "^1.71.0",
"@vscode/vsce": "^2.19.0",
"esno": "^0.16.3",
"esno": "^0.17.0",
"eventemitter3": "^5.0.1",
"execa": "^7.1.1",
"fs-extra": "^11.1.1",
@@ -142,4 +142,4 @@
"uuid": "^9.0.0",
"wait-port": "^1.0.4"
}
}
}

View File

@@ -1,5 +1,5 @@
import type { ParsedUrlQuery } from 'node:querystring'
import { formatSourceValue } from '@nicepkg/gpt-runner-shared/common'
import { formatSourceValue, isChineseCharacter } from '@nicepkg/gpt-runner-shared/common'
import type { MutableRefObject, Ref } from 'react'
export function createEl<T extends keyof HTMLElementTagNameMap>(tag: T,
@@ -44,9 +44,19 @@ export function formatNumWithK(num: number) {
return `${(num / 1000).toFixed(1)}k`
}
export function countTokenQuick(text: string) {
// int
return Math.floor(text.length / 3.5)
export function countTokenQuick(text: string): number {
let chineseCount = 0
let otherCount = 0
for (const char of text) {
if (isChineseCharacter(char))
chineseCount += 1
else
otherCount += 1
}
return chineseCount * 2 + (otherCount / 3.5)
}
export function isDomHidden(el: HTMLElement) {

View File

@@ -91,7 +91,7 @@
"@types/react-syntax-highlighter": "^15.5.7",
"@types/uuid": "^9.0.2",
"@use-gesture/react": "^10.2.27",
"@vitejs/plugin-react": "^4.0.2",
"@vitejs/plugin-react": "^4.0.3",
"@vscode/webview-ui-toolkit": "^1.2.2",
"clsx": "^1.2.1",
"commander": "^10.0.1",
@@ -103,18 +103,18 @@
"framer-motion": "^10.12.18",
"fs-extra": "^11.1.1",
"global-agent": "^3.0.0",
"i18next": "^23.2.8",
"i18next": "^23.2.10",
"i18next-browser-languagedetector": "^7.1.0",
"i18next-http-backend": "^2.2.1",
"keyboardjs": "^2.7.0",
"lodash-es": "^4.17.21",
"monaco-editor": "^0.39.0",
"monaco-editor": "^0.40.0",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"react-error-boundary": "^4.0.10",
"react-hook-form": "^7.45.1",
"react-hot-toast": "^2.4.1",
"react-i18next": "^13.0.1",
"react-i18next": "^13.0.2",
"react-markdown": "^8.0.7",
"react-router-dom": "^6.14.1",
"react-syntax-highlighter": "^15.5.0",
@@ -123,11 +123,11 @@
"remark-gfm": "^3.0.1",
"styled-components": "^6.0.3",
"undici": "^5.22.1",
"unist-util-visit": "^4.1.2",
"unist-util-visit": "^5.0.0",
"uuid": "^9.0.0",
"vite": "^4.4.2",
"vite": "^4.4.3",
"vite-plugin-monaco-editor": "^1.1.0",
"vite-plugin-svgr": "^3.2.0",
"zustand": "^4.3.9"
}
}
}

803
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@@ -44,7 +44,7 @@
"@docusaurus/utils": "^2.4.1",
"@docusaurus/utils-common": "^2.4.1",
"@popperjs/core": "^2.11.8",
"@swc/core": "1.3.62",
"@swc/core": "1.3.68",
"clsx": "^1.2.1",
"color": "^4.2.3",
"esno": "^0.16.3",