11import * as vscode from "vscode" ;
22import path from "node:path" ;
3+ import Logger from "../logger" ;
34
45const tokenize = async ( text : string , url : string ) => {
56 try {
@@ -53,6 +54,9 @@ const spliteDocumentByPosition = (
5354 ) ;
5455 return [ textBefore , textAfter ] ;
5556} ;
57+ const inverseSquareRoot = ( x : number ) => 1 / Math . sqrt ( x ) ;
58+ const randomFromInterval = ( min : number , max : number ) =>
59+ Math . floor ( Math . random ( ) * ( max - min + 1 ) + min ) ;
5660
5761const processingDocumentWithPosition = async ( {
5862 document,
@@ -66,34 +70,56 @@ const processingDocumentWithPosition = async ({
6670 maxToken : number ;
6771} ) => {
6872 const [ textBefore , textAfter ] = spliteDocumentByPosition ( document , position ) ;
69- let beforeTokens = 50 ;
70- let afterTokens = 50 ;
73+
74+ let beforeTokens = maxToken / 2 ;
75+ let afterTokens = maxToken / 2 ;
7176
7277 let textBeforeSlice : string ;
7378 let textAfterSlice : string ;
7479
75- let resToken = 0 ;
80+ let tokens = 0 ;
7681
7782 while ( true ) {
78- textBeforeSlice = textBefore . slice ( beforeTokens * - 1 ) ;
79- textAfterSlice = textAfter . slice ( 0 , afterTokens ) ;
83+ textBeforeSlice = textBefore . slice ( beforeTokens * 3 * - 1 ) ;
84+ textAfterSlice = textAfter . slice ( 0 , afterTokens * 3 ) ;
8085
81- resToken = await tokenize ( textBeforeSlice + textAfterSlice , url ) ;
86+ tokens = await tokenize ( textBeforeSlice + textAfterSlice , url ) ;
87+ const tokenDifference = Math . abs ( maxToken - tokens ) ;
88+ const maxDifference = Math . max ( maxToken * 0.1 , 10 ) ;
8289
90+ const documentName = document . fileName ;
91+ Logger . debug ( `${ documentName } document tokens: ${ tokens } ` ) ;
8392 if (
84- resToken >= maxToken ||
85- ( textBeforeSlice . length >= textBefore . length &&
86- textAfterSlice . length >= textAfter . length )
93+ ( tokens <= maxToken &&
94+ textBeforeSlice . length >= textBefore . length &&
95+ textAfterSlice . length >= textAfter . length ) ||
96+ tokenDifference <= maxDifference
8797 ) {
8898 return {
8999 documentText : `${ textBeforeSlice } <|fim▁hole|>${ textAfterSlice } ` ,
90- documentTokens : resToken ,
100+ documentTokens : tokens ,
91101 } ;
92102 }
93103
94- beforeTokens =
95- Number ( ( beforeTokens * ( maxToken / resToken ) ) . toFixed ( 0 ) ) + 5 ;
96- afterTokens = Number ( ( afterTokens * ( maxToken / resToken ) ) . toFixed ( 0 ) ) + 5 ;
104+ if ( tokens <= maxToken ) {
105+ beforeTokens +=
106+ inverseSquareRoot ( beforeTokens / maxToken ) *
107+ randomFromInterval ( 30 , 60 ) *
108+ 4 ;
109+ afterTokens +=
110+ inverseSquareRoot ( afterTokens / maxToken ) *
111+ randomFromInterval ( 30 , 60 ) *
112+ 4 ;
113+ } else {
114+ beforeTokens -=
115+ inverseSquareRoot ( beforeTokens / maxToken ) *
116+ randomFromInterval ( 30 , 60 ) *
117+ 4 ;
118+ afterTokens -=
119+ inverseSquareRoot ( afterTokens / maxToken ) *
120+ randomFromInterval ( 30 , 60 ) *
121+ 4 ;
122+ }
97123 }
98124} ;
99125
@@ -107,25 +133,58 @@ const processingDocument = async ({
107133 maxToken : number ;
108134} ) => {
109135 const text = getTextNormalized ( document . getText ( ) ) ;
110- let tokens = 50 ;
111136
112- let textSlice : string ;
137+ let tokens = maxToken ;
113138
114- let resToken = 0 ;
139+ let textSlice : string ;
115140
116141 while ( true ) {
117- textSlice = text . slice ( 0 , tokens ) ;
142+ Logger . debug ( "New iteration of the while loop" ) ;
143+
144+ textSlice = text . slice ( 0 , Number ( tokens . toFixed ( 0 ) ) * 3 ) ;
145+
146+ tokens = await tokenize ( textSlice , url ) ;
118147
119- resToken = await tokenize ( textSlice , url ) ;
148+ const tokenDifference = Math . abs ( maxToken - tokens ) ;
149+ const maxDifference = Math . max ( maxToken * 0.05 , 10 ) ;
150+
151+ const logMessage = `Text slice length: ${ textSlice . length } , Tokens after tokenization: ${ tokens } , Max token: ${ maxToken } , Token difference: ${ tokenDifference } ` ;
152+
153+ Logger . debug ( logMessage ) ;
154+
155+ const documentName = document . fileName ;
156+ Logger . debug ( `${ documentName } document tokens: ${ tokens } ` ) ;
157+ if (
158+ ( tokens <= maxToken && textSlice . length >= text . length ) ||
159+ tokenDifference <= maxDifference
160+ ) {
161+ Logger . debug ( `${ documentName } document tokens resualt: ${ tokens } ` ) ;
120162
121- if ( resToken >= maxToken || textSlice . length >= text . length ) {
122163 return {
123164 documentText : textSlice ,
124- documentTokens : resToken ,
165+ documentTokens : tokens ,
125166 } ;
126167 }
127168
128- tokens = Number ( ( tokens * ( maxToken / resToken ) ) . toFixed ( 0 ) ) + 5 ;
169+ if ( tokens <= maxToken ) {
170+ const ratio = tokens / maxToken ;
171+ Logger . debug ( `Calculating increment for ratio: ${ ratio } ` ) ;
172+
173+ const increment = inverseSquareRoot ( ratio ) * randomFromInterval ( 10 , 20 ) ;
174+ Logger . debug ( `Increment calculated: ${ increment } ` ) ;
175+
176+ tokens += increment ;
177+ Logger . debug ( `Tokens incremented by: ${ increment } ` ) ;
178+ } else {
179+ const ratio = tokens / maxToken ;
180+ Logger . debug ( `Calculating decrement for ratio: ${ ratio } ` ) ;
181+
182+ const decrement = inverseSquareRoot ( ratio ) * randomFromInterval ( 250 , 500 ) ;
183+ Logger . debug ( `Decrement calculated: ${ decrement } ` ) ;
184+
185+ tokens -= decrement ;
186+ Logger . debug ( `Tokens decremented by: ${ decrement } ` ) ;
187+ }
129188 }
130189} ;
131190
@@ -148,7 +207,7 @@ export const getPromptCompletion = async ({
148207 maxTokenExpect : number ;
149208 url : string ;
150209} ) => {
151- const maxTokenHardLimit = 4000 ;
210+ const maxTokenHardLimit = 10000 ;
152211 const maxToken =
153212 maxTokenExpect > maxTokenHardLimit ? maxTokenHardLimit : maxTokenExpect ;
154213
@@ -170,18 +229,23 @@ export const getPromptCompletion = async ({
170229 ) {
171230 let restTokens = maxToken - activeDocumentTokens ;
172231 for ( const document of additionalDocuments ) {
232+ if ( restTokens <= 50 ) {
233+ break ;
234+ }
173235 const { documentText, documentTokens } = await processingDocument ( {
174236 document,
175237 maxToken : restTokens ,
176238 url,
177239 } ) ;
240+ const documentName = document . fileName ;
241+
242+ Logger . debug (
243+ `${ documentName } document tokens resualt: ${ documentTokens } `
244+ ) ;
178245
179246 additionalDocumentsText +=
180247 "\n" + getRelativePath ( document . uri ) + "\n" + documentText ;
181248 restTokens -= documentTokens ;
182- if ( restTokens <= 0 ) {
183- break ;
184- }
185249 }
186250 }
187251
0 commit comments