blob: 8ac60cb2823c705e8e381f21b3996ddd1d7e70bc (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
|
//
// FMTokenizers.h
// fmdb
//
// Created by Andrew on 4/9/14.
// Copyright (c) 2014 Andrew Goodale. All rights reserved.
//
#import <Foundation/Foundation.h>
#import "FMDatabase+FTS3.h"
NS_ASSUME_NONNULL_BEGIN
/**
This is the base tokenizer implementation, using a CFStringTokenizer to find words.
*/
@interface FMSimpleTokenizer : NSObject <FMTokenizerDelegate>
/**
Create the tokenizer with a given locale. The locale will be used to initialize the string tokenizer and to lowercase the parsed word.
@param locale The locale used by the simple tokenizer. The locale can be @c NULL , in which case the current locale will be used.
*/
- (instancetype)initWithLocale:(CFLocaleRef _Nullable)locale;
@end
#pragma mark
/**
This tokenizer extends the simple tokenizer with support for a stop word list.
*/
@interface FMStopWordTokenizer : NSObject <FMTokenizerDelegate>
@property (atomic, copy) NSSet *words;
/**
Load a stop-word tokenizer using a file containing words delimited by newlines. The file should be encoded in UTF-8.
@param wordFileURL The file URL for the list of words.
@param tokenizer The @c FMTokenizerDelegate .
@param error The @c NSError if there was any error reading the file.
*/
+ (instancetype)tokenizerWithFileURL:(NSURL *)wordFileURL baseTokenizer:(id<FMTokenizerDelegate>)tokenizer error:(NSError * _Nullable *)error;
/**
Initialize an instance of the tokenizer using the set of words. The words should be lowercase if you're using the
`FMSimpleTokenizer` as the base.
@param words The @c NSSet of words.
@param tokenizer The @c FMTokenizerDelegate .
*/
- (instancetype)initWithWords:(NSSet *)words baseTokenizer:(id<FMTokenizerDelegate>)tokenizer;
@end
NS_ASSUME_NONNULL_END
|