Welcome to mirror list, hosted at ThFree Co, Russian Federation.

FMTokenizers.h « fts3 « extra « src - github.com/ccgus/fmdb.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 4ba370b658a8c6dc3c4950b8e7584740bb8e757f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
//
//  FMTokenizers.h
//  fmdb
//
//  Created by Andrew on 4/9/14.
//  Copyright (c) 2014 Andrew Goodale. All rights reserved.
//

#import <Foundation/Foundation.h>
#import "FMDatabase+FTS3.h"

NS_ASSUME_NONNULL_BEGIN

/**
 This is the base tokenizer implementation, using a CFStringTokenizer to find words.
 */
@interface FMSimpleTokenizer : NSObject <FMTokenizerDelegate>

/**
 Create the tokenizer with a given locale. The locale will be used to initialize the string tokenizer and to lowercase the parsed word.
 The locale can be `NULL`, in which case the current locale will be used.
 */
- (instancetype)initWithLocale:(CFLocaleRef _Nullable)locale;

@end

#pragma mark

/**
 This tokenizer extends the simple tokenizer with support for a stop word list.
 */
@interface FMStopWordTokenizer : NSObject <FMTokenizerDelegate>

@property (atomic, copy) NSSet *words;

/**
 Load a stop-word tokenizer using a file containing words delimited by newlines. The file should be encoded in UTF-8.
 */
+ (instancetype)tokenizerWithFileURL:(NSURL *)wordFileURL baseTokenizer:(id<FMTokenizerDelegate>)tokenizer error:(NSError * _Nullable *)error;

/**
 Initialize an instance of the tokenizer using the set of words. The words should be lowercase if you're using the 
 `FMSimpleTokenizer` as the base.
 */
- (instancetype)initWithWords:(NSSet *)words baseTokenizer:(id<FMTokenizerDelegate>)tokenizer;

@end

NS_ASSUME_NONNULL_END