mirror of
https://github.com/syuilo/ai.git
synced 2024-11-21 20:58:00 +00:00
MeCabに外部ライブラリを使わないように (#64)
This commit is contained in:
parent
a2e4bd87fb
commit
725305a2df
|
@ -20,7 +20,7 @@
|
|||
"canvas": "2.6.1",
|
||||
"chalk": "4.1.0",
|
||||
"lokijs": "1.5.11",
|
||||
"mecab-async": "0.1.2",
|
||||
"memory-streams": "0.1.3",
|
||||
"misskey-reversi": "0.0.5",
|
||||
"promise-retry": "2.0.1",
|
||||
"random-seed": "0.3.0",
|
||||
|
|
|
@ -3,7 +3,7 @@ import * as loki from 'lokijs';
|
|||
import Module from '../../module';
|
||||
import config from '../../config';
|
||||
import serifs from '../../serifs';
|
||||
const MeCab = require('mecab-async');
|
||||
import { mecab } from './mecab';
|
||||
|
||||
function kanaToHira(str: string) {
|
||||
return str.replace(/[\u30a1-\u30f6]/g, match => {
|
||||
|
@ -15,7 +15,6 @@ function kanaToHira(str: string) {
|
|||
export default class extends Module {
|
||||
public readonly name = 'keyword';
|
||||
|
||||
private tokenizer: any;
|
||||
private learnedKeywords: loki.Collection<{
|
||||
keyword: string;
|
||||
learnedAt: number;
|
||||
|
@ -29,9 +28,6 @@ export default class extends Module {
|
|||
indices: ['userId']
|
||||
});
|
||||
|
||||
this.tokenizer = new MeCab();
|
||||
this.tokenizer.command = config.mecab;
|
||||
|
||||
setInterval(this.learn, 1000 * 60 * 60);
|
||||
|
||||
return {};
|
||||
|
@ -50,13 +46,13 @@ export default class extends Module {
|
|||
|
||||
let keywords: string[][] = [];
|
||||
|
||||
await Promise.all(interestedNotes.map(note => new Promise((res, rej) => {
|
||||
this.tokenizer.parse(note.text, (err, tokens) => {
|
||||
for (const note of interestedNotes) {
|
||||
const tokens = await mecab(note.text, config.mecab);
|
||||
const keywordsInThisNote = tokens.filter(token => token[2] == '固有名詞' && token[8] != null);
|
||||
keywords = keywords.concat(keywordsInThisNote);
|
||||
res();
|
||||
});
|
||||
})));
|
||||
}
|
||||
|
||||
if (keywords.length === 0) return;
|
||||
|
||||
const rnd = Math.floor((1 - Math.sqrt(Math.random())) * keywords.length);
|
||||
const keyword = keywords.sort((a, b) => a[0].length < b[0].length ? 1 : -1)[rnd];
|
||||
|
|
45
src/modules/keyword/mecab.ts
Normal file
45
src/modules/keyword/mecab.ts
Normal file
|
@ -0,0 +1,45 @@
|
|||
import { spawn } from 'child_process';
|
||||
import * as util from 'util';
|
||||
import * as stream from 'stream';
|
||||
import * as memoryStreams from 'memory-streams';
|
||||
import { EOL } from 'os';
|
||||
|
||||
const pipeline = util.promisify(stream.pipeline);
|
||||
|
||||
/**
|
||||
* Run MeCab
|
||||
* @param text Text to analyze
|
||||
* @param mecab mecab bin
|
||||
* @param dic mecab dictionaly path
|
||||
*/
|
||||
export async function mecab(text: string, mecab = 'mecab', dic?: string): Promise<string[][]> {
|
||||
const args: string[] = [];
|
||||
if (dic) args.push('-d', dic);
|
||||
|
||||
const lines = await cmd(mecab, args, `${text.replace(/[\n\s\t]/g, ' ')}\n`);
|
||||
|
||||
const results: string[][] = [];
|
||||
|
||||
for (const line of lines) {
|
||||
if (line === 'EOS') break;
|
||||
const [word, value = ''] = line.split('\t');
|
||||
const array = value.split(',');
|
||||
array.unshift(word);
|
||||
results.push(array);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
export async function cmd(command: string, args: string[], stdin: string): Promise<string[]> {
|
||||
const mecab = spawn(command, args);
|
||||
|
||||
const writable = new memoryStreams.WritableStream();
|
||||
|
||||
mecab.stdin.write(stdin);
|
||||
mecab.stdin.end();
|
||||
|
||||
await pipeline(mecab.stdout, writable);
|
||||
|
||||
return writable.toString().split(EOL);
|
||||
}
|
Loading…
Reference in a new issue