Skip to content

Commit 5211bd4

Browse files
authored
Implement TagFilter functionality (#635)
* "Implement TagFilter functionality with support for all operations except LikePattern" * add license * fix some issues * demo for TsFile-CPP TagFilter
1 parent 448046f commit 5211bd4

10 files changed

Lines changed: 952 additions & 8 deletions

File tree

cpp/examples/cpp_examples/cpp_examples.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "file/write_file.h"
2727
#include "reader/expression.h"
2828
#include "reader/filter/filter.h"
29+
#include "reader/filter/tag_filter.h"
2930
#include "reader/qds_with_timegenerator.h"
3031
#include "reader/qds_without_timegenerator.h"
3132
#include "reader/tsfile_reader.h"

cpp/examples/cpp_examples/demo_read.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,12 @@ int demo_read() {
4040
columns.emplace_back("id2");
4141
columns.emplace_back("s1");
4242

43+
auto table_schema = reader.get_table_schema(table_name);
44+
storage::Filter* tag_filter1 = storage::TagFilterBuilder(table_schema.get()).eq("id1", "id1_filed_1");
45+
storage::Filter* tag_filter2 = storage::TagFilterBuilder(table_schema.get()).eq("id2", "id1_filed_2");
46+
storage::Filter* tag_filter = storage::TagFilterBuilder(table_schema.get()).and_filter(tag_filter1, tag_filter2);
4347
// Column vector contains the columns you want to select.
44-
HANDLE_ERROR(reader.query(table_name, columns, 0, 100, temp_ret));
48+
HANDLE_ERROR(reader.query(table_name, columns, 0, 100, temp_ret, tag_filter));
4549

4650
// Get query handler.
4751
auto ret = dynamic_cast<storage::TableResultSet*>(temp_ret);
@@ -98,5 +102,6 @@ int demo_read() {
98102

99103
// Close reader.
100104
reader.close();
105+
delete tag_filter;
101106
return 0;
102107
}

cpp/src/reader/device_meta_iterator.cc

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
#include "device_meta_iterator.h"
2121

22+
#include "filter/tag_filter.h"
23+
2224
namespace storage {
2325
bool DeviceMetaIterator::has_next() {
2426
if (!result_cache_.empty()) {
@@ -74,9 +76,11 @@ int DeviceMetaIterator::load_leaf_device(MetaIndexNode* meta_index_node) {
7476
const auto& leaf_children = meta_index_node->children_;
7577
for (size_t i = 0; i < leaf_children.size(); i++) {
7678
std::shared_ptr<IMetaIndexEntry> child = leaf_children[i];
77-
// const auto& device_id = child->name_;
78-
if (id_filter_ != nullptr /*TODO: !id_filter_->satisfy(device_id)*/) {
79-
continue;
79+
if (id_filter_ != nullptr) {
80+
if (!id_filter_->satisfyRow(
81+
0, child->get_device_id()->get_segments())) {
82+
continue;
83+
}
8084
}
8185
int32_t start_offset = child->get_offset();
8286
int32_t end_offset = i + 1 < leaf_children.size()

cpp/src/reader/filter/filter.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@ class Filter {
5454
ASSERT(false);
5555
return false;
5656
}
57+
virtual bool satisfyRow(int time,
58+
std::vector<std::string*> segments) const {
59+
ASSERT(false);
60+
return false;
61+
}
5762
virtual std::vector<TimeRange*>* get_time_ranges() {
5863
ASSERT(false);
5964
return nullptr;
Lines changed: 295 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,295 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* License); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "tag_filter.h"
21+
22+
#include <algorithm>
23+
#include <utility>
24+
25+
namespace storage {
26+
27+
// TagFilter base class implementation
28+
TagFilter::TagFilter(int col_idx, std::string tag_value)
29+
: col_idx_(col_idx), value_(std::move(tag_value)), value2_("") {}
30+
31+
TagFilter::~TagFilter() = default;
32+
33+
bool TagFilter::satisfyRow(int time, std::vector<std::string*> segments) const {
34+
return satisfyRow(segments);
35+
}
36+
37+
bool TagFilter::satisfyRow(std::vector<std::string*> segments) const {
38+
ASSERT(false);
39+
return false;
40+
}
41+
42+
// TagEq implementation
43+
TagEq::TagEq(int col_idx, std::string tag_value)
44+
: TagFilter(col_idx, std::move(tag_value)) {}
45+
46+
bool TagEq::satisfyRow(std::vector<std::string*> segments) const {
47+
if (col_idx_ >= segments.size()) return false;
48+
return *segments[col_idx_] == value_;
49+
}
50+
51+
// TagNeq implementation
52+
TagNeq::TagNeq(int col_idx, std::string tag_value)
53+
: TagFilter(col_idx, std::move(tag_value)) {}
54+
55+
bool TagNeq::satisfyRow(std::vector<std::string*> segments) const {
56+
if (col_idx_ >= segments.size()) return false;
57+
return *segments[col_idx_] != value_;
58+
}
59+
60+
// TagLt implementation
61+
TagLt::TagLt(int col_idx, std::string tag_value)
62+
: TagFilter(col_idx, std::move(tag_value)) {}
63+
64+
bool TagLt::satisfyRow(std::vector<std::string*> segments) const {
65+
if (col_idx_ >= segments.size()) return false;
66+
return *segments[col_idx_] < value_;
67+
}
68+
69+
// TagLteq implementation
70+
TagLteq::TagLteq(int col_idx, std::string tag_value)
71+
: TagFilter(col_idx, std::move(tag_value)) {}
72+
73+
bool TagLteq::satisfyRow(std::vector<std::string*> segments) const {
74+
if (col_idx_ >= segments.size()) return false;
75+
return *segments[col_idx_] <= value_;
76+
}
77+
78+
// TagGt implementation
79+
TagGt::TagGt(int col_idx, std::string tag_value)
80+
: TagFilter(col_idx, std::move(tag_value)) {}
81+
82+
bool TagGt::satisfyRow(std::vector<std::string*> segments) const {
83+
if (col_idx_ >= segments.size()) return false;
84+
return *segments[col_idx_] > value_;
85+
}
86+
87+
// TagGteq implementation
88+
TagGteq::TagGteq(int col_idx, std::string tag_value)
89+
: TagFilter(col_idx, std::move(tag_value)) {}
90+
91+
bool TagGteq::satisfyRow(std::vector<std::string*> segments) const {
92+
if (col_idx_ >= segments.size()) return false;
93+
return *segments[col_idx_] >= value_;
94+
}
95+
96+
// TagRegExp implementation
97+
TagRegExp::TagRegExp(int col_idx, std::string tag_value)
98+
: TagFilter(col_idx, std::move(tag_value)) {
99+
try {
100+
pattern_ = std::regex(value_);
101+
is_valid_pattern_ = true;
102+
} catch (const std::regex_error& e) {
103+
is_valid_pattern_ = false;
104+
}
105+
}
106+
107+
bool TagRegExp::satisfyRow(std::vector<std::string*> segments) const {
108+
if (col_idx_ >= segments.size() || !is_valid_pattern_) return false;
109+
try {
110+
return std::regex_search(*segments[col_idx_], pattern_);
111+
} catch (const std::regex_error&) {
112+
return false;
113+
}
114+
}
115+
116+
// TagNotRegExp implementation
117+
TagNotRegExp::TagNotRegExp(int col_idx, std::string tag_value)
118+
: TagFilter(col_idx, std::move(tag_value)) {
119+
try {
120+
pattern_ = std::regex(value_);
121+
is_valid_pattern_ = true;
122+
} catch (const std::regex_error& e) {
123+
is_valid_pattern_ = false;
124+
}
125+
}
126+
127+
bool TagNotRegExp::satisfyRow(std::vector<std::string*> segments) const {
128+
if (col_idx_ >= segments.size() || !is_valid_pattern_) return false;
129+
try {
130+
return !std::regex_search(*segments[col_idx_], pattern_);
131+
} catch (const std::regex_error&) {
132+
return true;
133+
}
134+
}
135+
136+
// TagBetween implementation
137+
TagBetween::TagBetween(int col_idx, std::string lower_value,
138+
std::string upper_value)
139+
: TagFilter(col_idx, std::move(lower_value)) {
140+
value2_ = std::move(upper_value);
141+
}
142+
143+
bool TagBetween::satisfyRow(std::vector<std::string*> segments) const {
144+
if (col_idx_ >= segments.size()) return false;
145+
const std::string& segment_value = *segments[col_idx_];
146+
return segment_value >= value_ && segment_value <= value2_;
147+
}
148+
149+
// TagNotBetween implementation
150+
TagNotBetween::TagNotBetween(int col_idx, std::string lower_value,
151+
std::string upper_value)
152+
: TagFilter(col_idx, std::move(lower_value)) {
153+
value2_ = std::move(upper_value);
154+
}
155+
156+
bool TagNotBetween::satisfyRow(std::vector<std::string*> segments) const {
157+
if (col_idx_ >= segments.size()) return false;
158+
const std::string& segment_value = *segments[col_idx_];
159+
return segment_value < value_ || segment_value > value2_;
160+
}
161+
162+
// TagAnd implementation
163+
TagAnd::TagAnd(TagFilter* left, TagFilter* right)
164+
: TagFilter(-1, ""), left_(left), right_(right) {}
165+
166+
TagAnd::~TagAnd() {
167+
delete left_;
168+
delete right_;
169+
}
170+
171+
bool TagAnd::satisfyRow(std::vector<std::string*> segments) const {
172+
return left_->satisfyRow(segments) && right_->satisfyRow(segments);
173+
}
174+
175+
// TagOr implementation
176+
TagOr::TagOr(TagFilter* left, TagFilter* right)
177+
: TagFilter(-1, ""), left_(left), right_(right) {}
178+
179+
TagOr::~TagOr() {
180+
delete left_;
181+
delete right_;
182+
}
183+
184+
bool TagOr::satisfyRow(std::vector<std::string*> segments) const {
185+
return left_->satisfyRow(segments) || right_->satisfyRow(segments);
186+
}
187+
188+
// TagNot implementation
189+
TagNot::TagNot(TagFilter* filter) : TagFilter(-1, ""), filter_(filter) {}
190+
191+
TagNot::~TagNot() { delete filter_; }
192+
193+
bool TagNot::satisfyRow(std::vector<std::string*> segments) const {
194+
return !filter_->satisfyRow(segments);
195+
}
196+
197+
// TagFilterBuilder implementation
198+
TagFilterBuilder::TagFilterBuilder(TableSchema* schema)
199+
: table_schema_(schema) {}
200+
201+
Filter* TagFilterBuilder::eq(const std::string& columnName,
202+
const std::string& value) {
203+
auto idx = get_id_column_index(columnName);
204+
if (idx < 0) return nullptr;
205+
return new TagEq(idx, value);
206+
}
207+
208+
Filter* TagFilterBuilder::neq(const std::string& columnName,
209+
const std::string& value) {
210+
auto idx = get_id_column_index(columnName);
211+
if (idx < 0) return nullptr;
212+
return new TagNeq(idx, value);
213+
}
214+
215+
Filter* TagFilterBuilder::lt(const std::string& columnName,
216+
const std::string& value) {
217+
auto idx = get_id_column_index(columnName);
218+
if (idx < 0) return nullptr;
219+
return new TagLt(idx, value);
220+
}
221+
222+
Filter* TagFilterBuilder::lteq(const std::string& columnName,
223+
const std::string& value) {
224+
auto idx = get_id_column_index(columnName);
225+
if (idx < 0) return nullptr;
226+
return new TagLteq(idx, value);
227+
}
228+
229+
Filter* TagFilterBuilder::gt(const std::string& columnName,
230+
const std::string& value) {
231+
auto idx = get_id_column_index(columnName);
232+
if (idx < 0) return nullptr;
233+
return new TagGt(idx, value);
234+
}
235+
236+
Filter* TagFilterBuilder::gteq(const std::string& columnName,
237+
const std::string& value) {
238+
auto idx = get_id_column_index(columnName);
239+
if (idx < 0) return nullptr;
240+
return new TagGteq(idx, value);
241+
}
242+
243+
Filter* TagFilterBuilder::reg_exp(const std::string& columnName,
244+
const std::string& value) {
245+
auto idx = get_id_column_index(columnName);
246+
if (idx < 0) return nullptr;
247+
return new TagRegExp(idx, value);
248+
}
249+
250+
Filter* TagFilterBuilder::not_reg_exp(const std::string& columnName,
251+
const std::string& value) {
252+
auto idx = get_id_column_index(columnName);
253+
if (idx < 0) return nullptr;
254+
return new TagNotRegExp(idx, value);
255+
}
256+
257+
Filter* TagFilterBuilder::between_and(const std::string& columnName,
258+
const std::string& lower,
259+
const std::string& upper) {
260+
auto idx = get_id_column_index(columnName);
261+
if (idx < 0) return nullptr;
262+
return new TagBetween(idx, lower, upper);
263+
}
264+
265+
Filter* TagFilterBuilder::not_between_and(const std::string& columnName,
266+
const std::string& lower,
267+
const std::string& upper) {
268+
auto idx = get_id_column_index(columnName);
269+
if (idx < 0) return nullptr;
270+
return new TagNotBetween(idx, lower, upper);
271+
}
272+
273+
Filter* TagFilterBuilder::and_filter(Filter* left, Filter* right) {
274+
return new TagAnd(dynamic_cast<TagFilter*>(left),
275+
dynamic_cast<TagFilter*>(right));
276+
}
277+
278+
Filter* TagFilterBuilder::or_filter(Filter* left, Filter* right) {
279+
return new TagOr(dynamic_cast<TagFilter*>(left),
280+
dynamic_cast<TagFilter*>(right));
281+
}
282+
283+
Filter* TagFilterBuilder::not_filter(Filter* filter) {
284+
return new TagNot(dynamic_cast<TagFilter*>(filter));
285+
}
286+
287+
int TagFilterBuilder::get_id_column_index(const std::string& columnName) {
288+
int idColumnOrder = table_schema_->find_id_column_order(columnName);
289+
if (idColumnOrder == -1) {
290+
return -1;
291+
}
292+
return idColumnOrder + 1;
293+
}
294+
295+
} // namespace storage

0 commit comments

Comments
 (0)