mirror of
https://github.com/syuilo/ai.git
synced 2024-11-22 05:08:00 +00:00
MeCabに外部ライブラリを使わないように (#64)
This commit is contained in:
parent
a2e4bd87fb
commit
725305a2df
|
@ -20,7 +20,7 @@
|
||||||
"canvas": "2.6.1",
|
"canvas": "2.6.1",
|
||||||
"chalk": "4.1.0",
|
"chalk": "4.1.0",
|
||||||
"lokijs": "1.5.11",
|
"lokijs": "1.5.11",
|
||||||
"mecab-async": "0.1.2",
|
"memory-streams": "0.1.3",
|
||||||
"misskey-reversi": "0.0.5",
|
"misskey-reversi": "0.0.5",
|
||||||
"promise-retry": "2.0.1",
|
"promise-retry": "2.0.1",
|
||||||
"random-seed": "0.3.0",
|
"random-seed": "0.3.0",
|
||||||
|
|
|
@ -3,7 +3,7 @@ import * as loki from 'lokijs';
|
||||||
import Module from '../../module';
|
import Module from '../../module';
|
||||||
import config from '../../config';
|
import config from '../../config';
|
||||||
import serifs from '../../serifs';
|
import serifs from '../../serifs';
|
||||||
const MeCab = require('mecab-async');
|
import { mecab } from './mecab';
|
||||||
|
|
||||||
function kanaToHira(str: string) {
|
function kanaToHira(str: string) {
|
||||||
return str.replace(/[\u30a1-\u30f6]/g, match => {
|
return str.replace(/[\u30a1-\u30f6]/g, match => {
|
||||||
|
@ -15,7 +15,6 @@ function kanaToHira(str: string) {
|
||||||
export default class extends Module {
|
export default class extends Module {
|
||||||
public readonly name = 'keyword';
|
public readonly name = 'keyword';
|
||||||
|
|
||||||
private tokenizer: any;
|
|
||||||
private learnedKeywords: loki.Collection<{
|
private learnedKeywords: loki.Collection<{
|
||||||
keyword: string;
|
keyword: string;
|
||||||
learnedAt: number;
|
learnedAt: number;
|
||||||
|
@ -29,9 +28,6 @@ export default class extends Module {
|
||||||
indices: ['userId']
|
indices: ['userId']
|
||||||
});
|
});
|
||||||
|
|
||||||
this.tokenizer = new MeCab();
|
|
||||||
this.tokenizer.command = config.mecab;
|
|
||||||
|
|
||||||
setInterval(this.learn, 1000 * 60 * 60);
|
setInterval(this.learn, 1000 * 60 * 60);
|
||||||
|
|
||||||
return {};
|
return {};
|
||||||
|
@ -50,13 +46,13 @@ export default class extends Module {
|
||||||
|
|
||||||
let keywords: string[][] = [];
|
let keywords: string[][] = [];
|
||||||
|
|
||||||
await Promise.all(interestedNotes.map(note => new Promise((res, rej) => {
|
for (const note of interestedNotes) {
|
||||||
this.tokenizer.parse(note.text, (err, tokens) => {
|
const tokens = await mecab(note.text, config.mecab);
|
||||||
const keywordsInThisNote = tokens.filter(token => token[2] == '固有名詞' && token[8] != null);
|
const keywordsInThisNote = tokens.filter(token => token[2] == '固有名詞' && token[8] != null);
|
||||||
keywords = keywords.concat(keywordsInThisNote);
|
keywords = keywords.concat(keywordsInThisNote);
|
||||||
res();
|
}
|
||||||
});
|
|
||||||
})));
|
if (keywords.length === 0) return;
|
||||||
|
|
||||||
const rnd = Math.floor((1 - Math.sqrt(Math.random())) * keywords.length);
|
const rnd = Math.floor((1 - Math.sqrt(Math.random())) * keywords.length);
|
||||||
const keyword = keywords.sort((a, b) => a[0].length < b[0].length ? 1 : -1)[rnd];
|
const keyword = keywords.sort((a, b) => a[0].length < b[0].length ? 1 : -1)[rnd];
|
||||||
|
|
45
src/modules/keyword/mecab.ts
Normal file
45
src/modules/keyword/mecab.ts
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
import { spawn } from 'child_process';
|
||||||
|
import * as util from 'util';
|
||||||
|
import * as stream from 'stream';
|
||||||
|
import * as memoryStreams from 'memory-streams';
|
||||||
|
import { EOL } from 'os';
|
||||||
|
|
||||||
|
const pipeline = util.promisify(stream.pipeline);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run MeCab
|
||||||
|
* @param text Text to analyze
|
||||||
|
* @param mecab mecab bin
|
||||||
|
* @param dic mecab dictionaly path
|
||||||
|
*/
|
||||||
|
export async function mecab(text: string, mecab = 'mecab', dic?: string): Promise<string[][]> {
|
||||||
|
const args: string[] = [];
|
||||||
|
if (dic) args.push('-d', dic);
|
||||||
|
|
||||||
|
const lines = await cmd(mecab, args, `${text.replace(/[\n\s\t]/g, ' ')}\n`);
|
||||||
|
|
||||||
|
const results: string[][] = [];
|
||||||
|
|
||||||
|
for (const line of lines) {
|
||||||
|
if (line === 'EOS') break;
|
||||||
|
const [word, value = ''] = line.split('\t');
|
||||||
|
const array = value.split(',');
|
||||||
|
array.unshift(word);
|
||||||
|
results.push(array);
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function cmd(command: string, args: string[], stdin: string): Promise<string[]> {
|
||||||
|
const mecab = spawn(command, args);
|
||||||
|
|
||||||
|
const writable = new memoryStreams.WritableStream();
|
||||||
|
|
||||||
|
mecab.stdin.write(stdin);
|
||||||
|
mecab.stdin.end();
|
||||||
|
|
||||||
|
await pipeline(mecab.stdout, writable);
|
||||||
|
|
||||||
|
return writable.toString().split(EOL);
|
||||||
|
}
|
Loading…
Reference in a new issue