feat(gpt-runner-core): optimize token count and md json parse
This commit is contained in:
16
package.json
16
package.json
@@ -35,10 +35,10 @@
|
||||
"@types/prettier": "^2.7.3",
|
||||
"@types/react": "^18.2.14",
|
||||
"@vitejs/plugin-legacy": "^4.1.0",
|
||||
"@vitest/ui": "^0.32.4",
|
||||
"@vitest/ui": "^0.33.0",
|
||||
"bumpp": "^9.1.1",
|
||||
"eslint": "8.44.0",
|
||||
"esno": "^0.16.3",
|
||||
"esno": "^0.17.0",
|
||||
"execa": "^7.1.1",
|
||||
"fast-glob": "^3.3.0",
|
||||
"fs-extra": "^11.1.1",
|
||||
@@ -46,22 +46,22 @@
|
||||
"jsdom": "^22.1.0",
|
||||
"lint-staged": "^13.2.3",
|
||||
"msw": "1.2.2",
|
||||
"pnpm": "8.6.6",
|
||||
"prettier": "^2.8.8",
|
||||
"pnpm": "8.6.7",
|
||||
"prettier": "^3.0.0",
|
||||
"react": "^18.2.0",
|
||||
"rollup": "^3.26.2",
|
||||
"semver": "^7.5.4",
|
||||
"simple-git-hooks": "^2.8.1",
|
||||
"taze": "^0.11.2",
|
||||
"terser": "^5.18.2",
|
||||
"terser": "^5.19.0",
|
||||
"tsup": "^7.1.0",
|
||||
"typescript": "^5.1.6",
|
||||
"unbuild": "^0.8.11",
|
||||
"unplugin-auto-import": "^0.16.6",
|
||||
"vite": "^4.4.2",
|
||||
"vite": "^4.4.3",
|
||||
"vite-plugin-inspect": "^0.7.32",
|
||||
"vite-plugin-pages": "^0.31.0",
|
||||
"vitest": "^0.32.4"
|
||||
"vitest": "^0.33.0"
|
||||
},
|
||||
"pnpm": {
|
||||
"overrides": {
|
||||
@@ -77,4 +77,4 @@
|
||||
"eslint --cache --fix"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -48,7 +48,7 @@
|
||||
"dependencies": {
|
||||
"@nicepkg/gpt-runner-shared": "workspace:*",
|
||||
"ignore": "^5.2.4",
|
||||
"langchain": "^0.0.102",
|
||||
"langchain": "^0.0.107",
|
||||
"unconfig": "^0.3.9"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,25 +1,20 @@
|
||||
import fs from 'node:fs'
|
||||
|
||||
// import { Tiktoken } from 'tiktoken/lite'
|
||||
// import cl100kBase from 'tiktoken/encoders/cl100k_base.json'
|
||||
import { PathUtils } from '@nicepkg/gpt-runner-shared/node'
|
||||
import { isChineseCharacter } from '@nicepkg/gpt-runner-shared'
|
||||
|
||||
// slow but accurate
|
||||
// export function countTokens(text: string) {
|
||||
// const encoding = new Tiktoken(
|
||||
// cl100kBase.bpe_ranks,
|
||||
// cl100kBase.special_tokens,
|
||||
// cl100kBase.pat_str,
|
||||
// )
|
||||
// const tokens = encoding.encode(text)
|
||||
// encoding.free()
|
||||
// return tokens.length
|
||||
// }
|
||||
export function countTokenQuick(text: string): number {
|
||||
let chineseCount = 0
|
||||
let otherCount = 0
|
||||
|
||||
// fast but inaccurate
|
||||
export function countTokenQuick(text: string) {
|
||||
// int
|
||||
return Math.floor(text.length / 3.5)
|
||||
for (const char of text) {
|
||||
if (isChineseCharacter(char))
|
||||
chineseCount += 1
|
||||
|
||||
else
|
||||
otherCount += 1
|
||||
}
|
||||
|
||||
return chineseCount * 2 + (otherCount / 3.5)
|
||||
}
|
||||
|
||||
export function countFileTokens(filePath: string, quick = true) {
|
||||
|
||||
@@ -15,7 +15,7 @@ export async function gptMdFileParser(params: GptMdFileParserParams): Promise<Si
|
||||
// match ```json
|
||||
const configJsonString = content.match(/^\s*?```json([\s\S]*?)```/i)?.[1]?.trim()
|
||||
|
||||
const singleFileConfig = singleFileConfigWithDefault(configJsonString ? tryParseJson(configJsonString) : {})
|
||||
const singleFileConfig = singleFileConfigWithDefault(configJsonString ? tryParseJson(configJsonString, true) : {})
|
||||
|
||||
type ResolveConfigKey = 'userPrompt' | 'systemPrompt'
|
||||
const resolveTitleConfig: {
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import { ChatPromptTemplate } from 'langchain/prompts'
|
||||
import type { BaseChatMessage, InputValues } from 'langchain/schema'
|
||||
import type { BaseMessage, InputValues } from 'langchain/schema'
|
||||
|
||||
ChatPromptTemplate.prototype.formatMessages = async function (values: InputValues): Promise<BaseChatMessage[]> {
|
||||
ChatPromptTemplate.prototype.formatMessages = async function (values: InputValues): Promise<BaseMessage[]> {
|
||||
const allValues = await this.mergePartialAndUserVariables(values)
|
||||
let resultMessages: BaseChatMessage[] = []
|
||||
let resultMessages: BaseMessage[] = []
|
||||
for (const promptMessage of this.promptMessages) {
|
||||
const inputValues = promptMessage.inputVariables.reduce((acc, inputVariable) => {
|
||||
if (!(inputVariable in allValues)) {
|
||||
|
||||
@@ -2,20 +2,20 @@ import type { SingleChatMessage } from '@nicepkg/gpt-runner-shared/common'
|
||||
import { ChatRole } from '@nicepkg/gpt-runner-shared/common'
|
||||
import { AIMessagePromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate } from 'langchain/prompts'
|
||||
import type { BaseMessageStringPromptTemplate } from 'langchain/dist/prompts/chat'
|
||||
import type { BaseChatMessage } from 'langchain/schema'
|
||||
import { AIChatMessage, HumanChatMessage, SystemChatMessage } from 'langchain/schema'
|
||||
import type { BaseMessage } from 'langchain/schema'
|
||||
import { AIMessage, HumanMessage, SystemMessage } from 'langchain/schema'
|
||||
|
||||
export function mapStoredMessagesToChatMessages(
|
||||
messages: SingleChatMessage[],
|
||||
): BaseChatMessage[] {
|
||||
): BaseMessage[] {
|
||||
return messages.map((message) => {
|
||||
switch (message.name) {
|
||||
case ChatRole.User:
|
||||
return new HumanChatMessage(message.text)
|
||||
return new HumanMessage(message.text)
|
||||
case ChatRole.Assistant:
|
||||
return new AIChatMessage(message.text)
|
||||
return new AIMessage(message.text)
|
||||
case ChatRole.System:
|
||||
return new SystemChatMessage(message.text)
|
||||
return new SystemMessage(message.text)
|
||||
default:
|
||||
throw new Error('Role must be defined for generic messages')
|
||||
}
|
||||
|
||||
@@ -84,6 +84,7 @@
|
||||
"http-proxy-agent": "*",
|
||||
"https-proxy-agent": "*",
|
||||
"ip": "*",
|
||||
"jsonc-parser": "*",
|
||||
"launch-editor": "*",
|
||||
"minimatch": "*",
|
||||
"socket.io": "*",
|
||||
@@ -99,19 +100,20 @@
|
||||
"debug": "^4.3.4",
|
||||
"find-free-ports": "^3.1.1",
|
||||
"http-proxy-agent": "^7.0.0",
|
||||
"https-proxy-agent": "^7.0.0",
|
||||
"https-proxy-agent": "^7.0.1",
|
||||
"ip": "^1.1.8",
|
||||
"jsonc-parser": "^3.2.0",
|
||||
"launch-editor": "^2.6.0",
|
||||
"minimatch": "^9.0.3",
|
||||
"open": "^8.4.2",
|
||||
"socket.io": "^4.7.1",
|
||||
"socket.io-client": "^4.7.1",
|
||||
"zod": "^3.21.4",
|
||||
"zod-to-json-schema": "^3.21.3"
|
||||
"zod-to-json-schema": "^3.21.4"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/express": "^4.17.17",
|
||||
"@types/ip": "^1.1.0",
|
||||
"express": "^4.18.2"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
import { AxiosError } from 'axios'
|
||||
import * as jsonc from 'jsonc-parser'
|
||||
import type { TreeItem } from '../types'
|
||||
|
||||
export function sleep(ms: number) {
|
||||
@@ -82,9 +83,10 @@ export function travelTreeDeepFirst<T extends TreeItem<Record<string, any>>, R e
|
||||
return travel(tree) as R[]
|
||||
}
|
||||
|
||||
export function tryParseJson(str: string) {
|
||||
export function tryParseJson(str: string, supportJsonc = false) {
|
||||
try {
|
||||
return JSON.parse(str?.trim() ?? '')
|
||||
const target = str?.trim() ?? ''
|
||||
return supportJsonc ? jsonc.parse(target) : JSON.parse(target)
|
||||
}
|
||||
catch (e) {
|
||||
console.error('tryParseJson error: ', str, e)
|
||||
@@ -246,3 +248,15 @@ export function waitForCondition(conditionFn: (...args: any[]) => boolean, timeo
|
||||
}, 100)
|
||||
})
|
||||
}
|
||||
|
||||
export function isChineseCharacter(char: string): boolean {
|
||||
const charCode = char.charCodeAt(0)
|
||||
return (charCode >= 0x4E00 && charCode <= 0x9FFF)
|
||||
|| (charCode >= 0x3400 && charCode <= 0x4DBF)
|
||||
|| (charCode >= 0x20000 && charCode <= 0x2A6DF)
|
||||
|| (charCode >= 0x2A700 && charCode <= 0x2B73F)
|
||||
|| (charCode >= 0x2B740 && charCode <= 0x2B81F)
|
||||
|| (charCode >= 0x2B820 && charCode <= 0x2CEAF)
|
||||
|| (charCode >= 0xF900 && charCode <= 0xFAFF)
|
||||
|| (charCode >= 0x2F800 && charCode <= 0x2FA1F)
|
||||
}
|
||||
|
||||
@@ -134,7 +134,7 @@
|
||||
"@nicepkg/gpt-runner-web": "workspace:*",
|
||||
"@types/vscode": "^1.71.0",
|
||||
"@vscode/vsce": "^2.19.0",
|
||||
"esno": "^0.16.3",
|
||||
"esno": "^0.17.0",
|
||||
"eventemitter3": "^5.0.1",
|
||||
"execa": "^7.1.1",
|
||||
"fs-extra": "^11.1.1",
|
||||
@@ -142,4 +142,4 @@
|
||||
"uuid": "^9.0.0",
|
||||
"wait-port": "^1.0.4"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
import type { ParsedUrlQuery } from 'node:querystring'
|
||||
import { formatSourceValue } from '@nicepkg/gpt-runner-shared/common'
|
||||
import { formatSourceValue, isChineseCharacter } from '@nicepkg/gpt-runner-shared/common'
|
||||
import type { MutableRefObject, Ref } from 'react'
|
||||
|
||||
export function createEl<T extends keyof HTMLElementTagNameMap>(tag: T,
|
||||
@@ -44,9 +44,19 @@ export function formatNumWithK(num: number) {
|
||||
return `${(num / 1000).toFixed(1)}k`
|
||||
}
|
||||
|
||||
export function countTokenQuick(text: string) {
|
||||
// int
|
||||
return Math.floor(text.length / 3.5)
|
||||
export function countTokenQuick(text: string): number {
|
||||
let chineseCount = 0
|
||||
let otherCount = 0
|
||||
|
||||
for (const char of text) {
|
||||
if (isChineseCharacter(char))
|
||||
chineseCount += 1
|
||||
|
||||
else
|
||||
otherCount += 1
|
||||
}
|
||||
|
||||
return chineseCount * 2 + (otherCount / 3.5)
|
||||
}
|
||||
|
||||
export function isDomHidden(el: HTMLElement) {
|
||||
|
||||
@@ -91,7 +91,7 @@
|
||||
"@types/react-syntax-highlighter": "^15.5.7",
|
||||
"@types/uuid": "^9.0.2",
|
||||
"@use-gesture/react": "^10.2.27",
|
||||
"@vitejs/plugin-react": "^4.0.2",
|
||||
"@vitejs/plugin-react": "^4.0.3",
|
||||
"@vscode/webview-ui-toolkit": "^1.2.2",
|
||||
"clsx": "^1.2.1",
|
||||
"commander": "^10.0.1",
|
||||
@@ -103,18 +103,18 @@
|
||||
"framer-motion": "^10.12.18",
|
||||
"fs-extra": "^11.1.1",
|
||||
"global-agent": "^3.0.0",
|
||||
"i18next": "^23.2.8",
|
||||
"i18next": "^23.2.10",
|
||||
"i18next-browser-languagedetector": "^7.1.0",
|
||||
"i18next-http-backend": "^2.2.1",
|
||||
"keyboardjs": "^2.7.0",
|
||||
"lodash-es": "^4.17.21",
|
||||
"monaco-editor": "^0.39.0",
|
||||
"monaco-editor": "^0.40.0",
|
||||
"react": "^18.2.0",
|
||||
"react-dom": "^18.2.0",
|
||||
"react-error-boundary": "^4.0.10",
|
||||
"react-hook-form": "^7.45.1",
|
||||
"react-hot-toast": "^2.4.1",
|
||||
"react-i18next": "^13.0.1",
|
||||
"react-i18next": "^13.0.2",
|
||||
"react-markdown": "^8.0.7",
|
||||
"react-router-dom": "^6.14.1",
|
||||
"react-syntax-highlighter": "^15.5.0",
|
||||
@@ -123,11 +123,11 @@
|
||||
"remark-gfm": "^3.0.1",
|
||||
"styled-components": "^6.0.3",
|
||||
"undici": "^5.22.1",
|
||||
"unist-util-visit": "^4.1.2",
|
||||
"unist-util-visit": "^5.0.0",
|
||||
"uuid": "^9.0.0",
|
||||
"vite": "^4.4.2",
|
||||
"vite": "^4.4.3",
|
||||
"vite-plugin-monaco-editor": "^1.1.0",
|
||||
"vite-plugin-svgr": "^3.2.0",
|
||||
"zustand": "^4.3.9"
|
||||
}
|
||||
}
|
||||
}
|
||||
803
pnpm-lock.yaml
generated
803
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
@@ -44,7 +44,7 @@
|
||||
"@docusaurus/utils": "^2.4.1",
|
||||
"@docusaurus/utils-common": "^2.4.1",
|
||||
"@popperjs/core": "^2.11.8",
|
||||
"@swc/core": "1.3.62",
|
||||
"@swc/core": "1.3.68",
|
||||
"clsx": "^1.2.1",
|
||||
"color": "^4.2.3",
|
||||
"esno": "^0.16.3",
|
||||
|
||||
Reference in New Issue
Block a user