const _ = require("lodash") const {Index, Document, Worker} = require("flexsearch") const settings = require("../settings") const fs = require("node:fs") ///////////////////////////////////////////////////////////////////////// // Message index const options = { // tokenize: "full", tokenize: "forward", split: true, } const idxUser = new Index(options) // Tag index const optionsTag = { tokenize: "forward", // nur vorwärts indexieren bei den tags split: true, // ein Tag ist immer nur ein Wort // split: true, // doc encode: function (it) { // return it return it.split(" ") // return [it] }, // encode: it => function (it) { // return it.split(" ") // }, // encode: "default", stemmer: false, matcher: false, context: false, } const idxTags = new Index(optionsTag) // const idxTags = new Document({ // document: { // id: "_id", // index: [ // { // field: "tags", // tokenize: "forward", // // encode: it => it, // encode sorgt dafür, dass die Suche nach "+" funktioniert, aber auch dass komische Ergebnisse erscheinen // } // ], // }, // }) ///////////////////////////////////////////////////////////////////////// module.exports = { // idxMessage: idxUser, // idxTags, doIndex, search, // searchUsers, // searchTags, // addMessage: addUser, // updateMessage: updateUser, // deleteMessage: removeUser, // addTags, // removeTags, } // run() // .then(console.log) // .catch(console.error) async function run () { await doIndex() console.log(search("latu")) } ///////////////////////////////////////////////////////////////////////// let indexed = false async function doIndex () { const start = Date.now() console.log("++ START indexing Users...") const {readFromFile, file} = settings.search clearIndex(idxUser) let users if (readFromFile) { users = fs.readFileSync(file, "utf8") users = JSON.parse(users) } else { console.log("~~~ reading users from DB ... ~~~") const db = require("./db") const {data} = await db.getUsers(0, 100000) console.log(`loaded ${data.length} users from DB...`) users = data } for (const user of users) { addUser(user) // addTags(user) } indexed = true console.log(`++ END indexing Users in ${Date.now() - start}ms`) } async function search (query) { // when called from a cli program the search index is not initalized if(!indexed) await doIndex() const searchLimit = 10000 return idxUser.search(query, searchLimit) } function searchUsers (query, user) { // query = query.split(" ").join(" OR ") // ohne das "OR" scheint immer nur "AND" zu sein | die search option {bool:"or"} wird ignoriert // console.log(`searching messages for "${query}"`) return idxUser.search(`${user} ${query}`, {suggest: true}) } function searchTags (query, user) { const limit = 100000 // todo das mit dem Limit anders lösen // count? nein siehe https://github.com/nextapps-de/flexsearch?tab=readme-ov-file#limit--offset const results = idxTags.search(`${user} ${query}`, limit) return results // format is now [{field,result:[_id]}] because using document index // return results.length ? results[0].result : [] } /////// idxMessage FNS //////////////////////////////////////////////////////////////// function getUserString (user) { const {usr_id, firstname, lastname, login, institution, department} = user // if(firstname.trim()==="Adolfo") console.log(user) return `${login} ${firstname} ${lastname} ${institution} ${department}`.trim() // return `${usr_id} ${login} ${firstname} ${lastname} ${institution} ${department}`.trim() // KEINE usr_id // return `${usr_id} ${firstname} ${lastname}`.trim() } function addUser (user) { add(idxUser, user.usr_id, getUserString(user)) } function updateUser ({_id, title, tags, user}) { update(idxUser, user.usr_id, getUserString(user)) } function removeUser (usr_id) { remove(idxUser, usr_id) } function clearIndex(index) { index.clear() } /////// idxTags FNS //////////////////////////////////////////////////////////////// /** * add Tags from a message * @param _id * @param tags */ function addTags (msg) { let {_id, tags, user} = msg if (!tags) throw new Error("tags must be an array") if (_.isString(tags)) tags = [tags] add(idxTags, _id.toString(), `${user} ${tags.join(" ")}`) // muss erst gejoined werden, dann später in encode wird noch mal gesplittet - anders rum geht es nicht! } function removeTags (_id) { remove(idxTags, _id.toString()) } /////// FNS //////////////////////////////////////////////////////////////// function add (index, key, value) { index.add(key, value) } function update (index, key, value) { index.update(key, value) } function remove (index, key) { index.remove(key) }