added ability to prefix- documents for embeddings (depends on model config), misc tweaks
This commit is contained in:
parent
8695c6ae0f
commit
98dda95bab
4 changed files with 51 additions and 136 deletions
|
@ -1,7 +1,6 @@
|
|||
import { MilvusClient } from '@zilliz/milvus2-sdk-node'
|
||||
|
||||
const async_get_client = async () => {
|
||||
const address = process.env['MILVUS_HOST']
|
||||
const async_get_client = async (address) => {
|
||||
console.info('connecting...')
|
||||
const client = new MilvusClient({
|
||||
address,
|
||||
|
|
121
nodeapp/package-lock.json
generated
121
nodeapp/package-lock.json
generated
|
@ -1,12 +1,11 @@
|
|||
{
|
||||
"name": "mount",
|
||||
"name": "nodeapp",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"dependencies": {
|
||||
"@zilliz/milvus2-sdk-node": "^2.5.8",
|
||||
"ioredis": "^5.6.1"
|
||||
"@zilliz/milvus2-sdk-node": "^2.5.8"
|
||||
}
|
||||
},
|
||||
"node_modules/@colors/colors": {
|
||||
|
@ -60,12 +59,6 @@
|
|||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/@ioredis/commands": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.2.0.tgz",
|
||||
"integrity": "sha512-Sx1pU8EM64o2BrqNpEO1CNLtKQwyhuXuqyfH7oGKCk+1a33d2r5saW8zNwm3j6BTExtjrv2BxTgzzkMwts6vGg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@js-sdsl/ordered-map": {
|
||||
"version": "4.4.2",
|
||||
"resolved": "https://registry.npmjs.org/@js-sdsl/ordered-map/-/ordered-map-4.4.2.tgz",
|
||||
|
@ -156,9 +149,9 @@
|
|||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "22.15.3",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-22.15.3.tgz",
|
||||
"integrity": "sha512-lX7HFZeHf4QG/J7tBZqrCAXwz9J5RD56Y6MpP0eJkka8p+K0RY/yBTW7CYFJ4VGCclxqOLKmiGP5juQc6MKgcw==",
|
||||
"version": "22.15.14",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-22.15.14.tgz",
|
||||
"integrity": "sha512-BL1eyu/XWsFGTtDWOYULQEs4KR0qdtYfCxYAUYRoB7JP7h9ETYLgQTww6kH8Sj2C0pFGgrpM0XKv6/kbIzYJ1g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"undici-types": "~6.21.0"
|
||||
|
@ -248,15 +241,6 @@
|
|||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/cluster-key-slot": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/cluster-key-slot/-/cluster-key-slot-1.1.2.tgz",
|
||||
"integrity": "sha512-RMr0FhtfXemyinomL4hrWcYJxmX6deFdCxpJzhDttxgO1+bcCnkk+9drydLVDmAMG7NE6aN/fl4F7ucU/90gAA==",
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/color": {
|
||||
"version": "3.2.1",
|
||||
"resolved": "https://registry.npmjs.org/color/-/color-3.2.1.tgz",
|
||||
|
@ -308,32 +292,6 @@
|
|||
"integrity": "sha512-oaMBel6gjolK862uaPQOVTA7q3TZhuSvuMQAAglQDOWYO9A91IrAOUJEyKVlqJlHE0vq5p5UXxzdPfMH/x6xNg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/debug": {
|
||||
"version": "4.4.0",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-4.4.0.tgz",
|
||||
"integrity": "sha512-6WTZ/IxCY/T6BALoZHaE4ctp9xm+Z5kY/pzYaCHRFeyVhojxlrm+46y68HA6hr0TcwEssoxNiDEUJQjfPZ/RYA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"ms": "^2.1.3"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"supports-color": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/denque": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/denque/-/denque-2.1.0.tgz",
|
||||
"integrity": "sha512-HVQE3AAb/pxF8fQAoiqpvg9i3evqug3hoiwakOyZAwJm+6vZehbkYXZ0l4JxS+I3QxM97v5aaRNhj8v5oBhekw==",
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
"node": ">=0.10"
|
||||
}
|
||||
},
|
||||
"node_modules/emoji-regex": {
|
||||
"version": "8.0.0",
|
||||
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
|
||||
|
@ -391,30 +349,6 @@
|
|||
"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/ioredis": {
|
||||
"version": "5.6.1",
|
||||
"resolved": "https://registry.npmjs.org/ioredis/-/ioredis-5.6.1.tgz",
|
||||
"integrity": "sha512-UxC0Yv1Y4WRJiGQxQkP0hfdL0/5/6YvdfOOClRgJ0qppSarkhneSa6UvkMkms0AkdGimSH3Ikqm+6mkMmX7vGA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@ioredis/commands": "^1.1.1",
|
||||
"cluster-key-slot": "^1.1.0",
|
||||
"debug": "^4.3.4",
|
||||
"denque": "^2.1.0",
|
||||
"lodash.defaults": "^4.2.0",
|
||||
"lodash.isarguments": "^3.1.0",
|
||||
"redis-errors": "^1.2.0",
|
||||
"redis-parser": "^3.0.0",
|
||||
"standard-as-callback": "^2.1.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12.22.0"
|
||||
},
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/ioredis"
|
||||
}
|
||||
},
|
||||
"node_modules/is-arrayish": {
|
||||
"version": "0.3.2",
|
||||
"resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.2.tgz",
|
||||
|
@ -454,18 +388,6 @@
|
|||
"integrity": "sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/lodash.defaults": {
|
||||
"version": "4.2.0",
|
||||
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
|
||||
"integrity": "sha512-qjxPLHd3r5DnsdGacqOMU6pb/avJzdh9tFX2ymgoZE27BmjXrNy/y4LoaiTeAb+O3gL8AfpJGtqfX/ae2leYYQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/lodash.isarguments": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/lodash.isarguments/-/lodash.isarguments-3.1.0.tgz",
|
||||
"integrity": "sha512-chi4NHZlZqZD18a0imDHnZPrDeBbTtVN7GXMwuGdRH9qotxAjYs3aVLKc7zNOG9eddR5Ksd8rvFEBc9SsggPpg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/logform": {
|
||||
"version": "2.7.0",
|
||||
"resolved": "https://registry.npmjs.org/logform/-/logform-2.7.0.tgz",
|
||||
|
@ -514,9 +436,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/protobufjs": {
|
||||
"version": "7.5.0",
|
||||
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.0.tgz",
|
||||
"integrity": "sha512-Z2E/kOY1QjoMlCytmexzYfDm/w5fKAiRwpSzGtdnXW1zC88Z2yXazHHrOtwCzn+7wSxyE8PYM4rvVcMphF9sOA==",
|
||||
"version": "7.4.0",
|
||||
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.4.0.tgz",
|
||||
"integrity": "sha512-mRUWCc3KUU4w1jU8sGxICXH/gNS94DvI1gxqDvBzhj1JpcsimQkYiOJfwsPUykUI5ZaspFbSgmBLER8IrQ3tqw==",
|
||||
"hasInstallScript": true,
|
||||
"license": "BSD-3-Clause",
|
||||
"dependencies": {
|
||||
|
@ -551,27 +473,6 @@
|
|||
"node": ">= 6"
|
||||
}
|
||||
},
|
||||
"node_modules/redis-errors": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/redis-errors/-/redis-errors-1.2.0.tgz",
|
||||
"integrity": "sha512-1qny3OExCf0UvUV/5wpYKf2YwPcOqXzkwKKSmKHiE6ZMQs5heeE/c8eXK+PNllPvmjgAbfnsbpkGZWy8cBpn9w==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=4"
|
||||
}
|
||||
},
|
||||
"node_modules/redis-parser": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/redis-parser/-/redis-parser-3.0.0.tgz",
|
||||
"integrity": "sha512-DJnGAeenTdpMEH6uAJRK/uiyEIH9WVsUmoLwzudwGJUwZPp80PDBWPHXSAGNPwNvIXAbe7MSUB1zQFugFml66A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"redis-errors": "^1.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=4"
|
||||
}
|
||||
},
|
||||
"node_modules/require-directory": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
|
||||
|
@ -628,12 +529,6 @@
|
|||
"node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/standard-as-callback": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/standard-as-callback/-/standard-as-callback-2.1.0.tgz",
|
||||
"integrity": "sha512-qoRRSyROncaz1z0mvYqIE4lCd9p2R90i6GxW3uZv5ucSu8tU7B5HXUP1gG8pVZsYNVaXjk8ClXHPttLyxAL48A==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/string_decoder": {
|
||||
"version": "1.3.0",
|
||||
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
{
|
||||
"dependencies": {
|
||||
"@zilliz/milvus2-sdk-node": "^2.5.8",
|
||||
"ioredis": "^5.6.1"
|
||||
"@zilliz/milvus2-sdk-node": "^2.5.8"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,21 +10,21 @@ const execFile = util.promisify(child_process.execFile)
|
|||
import { DataType } from '@zilliz/milvus2-sdk-node'
|
||||
|
||||
|
||||
import fs from 'fs'
|
||||
import path from 'path'
|
||||
const CONFIG = JSON.parse(fs.readFileSync(path.resolve(import.meta.dirname, './config.json'), 'utf8'))
|
||||
const { MODELKEY_SERVER_MAP, MILVUS_HOST } = CONFIG
|
||||
|
||||
|
||||
// init a single Milvus client for this process lifecycle
|
||||
import { async_get_client } from './milvus_utils.js'
|
||||
const client = await async_get_client()
|
||||
const client = await async_get_client(MILVUS_HOST)
|
||||
process.on('exit', (code) => {
|
||||
client.closeConnection()
|
||||
return
|
||||
})
|
||||
|
||||
|
||||
import fs from 'fs'
|
||||
import path from 'path'
|
||||
const CONFIG = JSON.parse(fs.readFileSync(path.resolve(import.meta.dirname, './config.json'), 'utf8'))
|
||||
const { MODELKEY_SERVER_MAP } = CONFIG
|
||||
|
||||
|
||||
const PORT = 8801
|
||||
const HOST = '0.0.0.0'
|
||||
|
||||
|
@ -59,17 +59,28 @@ const async_get_embeddings_for_model = async (model_key, documents) => {
|
|||
console.warn(`no matching model_key: ${model_key}`)
|
||||
return []
|
||||
}
|
||||
const { endpoint } = MODELKEY_SERVER_MAP[model_key]
|
||||
const { endpoint, prefix } = MODELKEY_SERVER_MAP[model_key]
|
||||
|
||||
// direct pass, no pre-processing
|
||||
const endpoint_content = [ ...documents ]
|
||||
let endpoint_content
|
||||
if (prefix === undefined) {
|
||||
endpoint_content = [ ...documents ]
|
||||
}
|
||||
else {
|
||||
endpoint_content = documents.map(x=>(prefix + x))
|
||||
}
|
||||
|
||||
const resp = await fetch(endpoint, {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ content: endpoint_content }),
|
||||
})
|
||||
|
||||
const resp_json = await resp.json()
|
||||
let resp_json = []
|
||||
try {
|
||||
resp_json = await resp.json()
|
||||
}
|
||||
catch (err) {
|
||||
console.error(err)
|
||||
}
|
||||
|
||||
return [ ...resp_json ]
|
||||
|
||||
|
@ -166,6 +177,8 @@ const async_get_documents_embeddings_then_insert_into_collection = async (model_
|
|||
const vector = embedding[0]
|
||||
const og_document = documents[index]
|
||||
// create stable key (used as primary key) for doc-specific Embedding
|
||||
// NOTE: hash the Original Document, so that Client can always have (locally-computable) id to it
|
||||
// - (i.e. intentionally not storing hash of "prefix+document")
|
||||
const md5_doc = crypto.createHash('md5').update(og_document).digest('hex')
|
||||
return { id: md5_doc, vector }
|
||||
})
|
||||
|
@ -204,14 +217,23 @@ const get_searchresults_from_params = async (j_params) => {
|
|||
const embed_result = await async_get_embeddings_for_model(model_key, [ query ])
|
||||
const vector = embed_result[0].embedding[0]
|
||||
const internal_collectionname = get_modelkey_from_original(collection_name, model_key)
|
||||
let search_result = []
|
||||
try {
|
||||
await ensure_collection_loaded(internal_collectionname)
|
||||
const search_result = await client.search({
|
||||
const full_result = await client.search({
|
||||
collection_name: internal_collectionname,
|
||||
data: vector,
|
||||
limit,
|
||||
consistency_level: 'Eventually',
|
||||
output_fields: ['id'],
|
||||
})
|
||||
search_result = full_result.results
|
||||
}
|
||||
catch (err) {
|
||||
console.error(err)
|
||||
// could not load collection, maybe does not exist
|
||||
// (or, .search failed)
|
||||
}
|
||||
return { model_key, search_result }
|
||||
}
|
||||
const lst_promises = []
|
||||
|
|
Loading…
Add table
Reference in a new issue