Loading cmds/statsd/benchmark/filter_value_benchmark.cpp +1 −17 Original line number Diff line number Diff line Loading @@ -53,29 +53,13 @@ static void BM_FilterValue(benchmark::State& state) { std::vector<Matcher> matchers; translateFieldMatcher(field_matcher, &matchers); while (state.KeepRunning()) { vector<HashableDimensionKey> output; filterValues(matchers, event.getValues(), &output); } } BENCHMARK(BM_FilterValue); static void BM_FilterValue2(benchmark::State& state) { LogEvent event(1, 100000); FieldMatcher field_matcher; createLogEventAndMatcher(&event, &field_matcher); std::vector<Matcher> matchers; translateFieldMatcher(field_matcher, &matchers); while (state.KeepRunning()) { HashableDimensionKey output; filterValues(matchers, event.getValues(), &output); } } BENCHMARK(BM_FilterValue2); BENCHMARK(BM_FilterValue); } // namespace statsd } // namespace os Loading cmds/statsd/src/FieldValue.cpp +9 −0 Original line number Diff line number Diff line Loading @@ -48,6 +48,11 @@ bool Field::matches(const Matcher& matcher) const { return true; } if (matcher.hasAllPositionMatcher() && (mField & (matcher.mMask & kClearAllPositionMatcherMask)) == matcher.mMatcher.getField()) { return true; } return false; } Loading @@ -67,6 +72,10 @@ void translateFieldMatcher(int tag, const FieldMatcher& matcher, int depth, int* return; } switch (matcher.position()) { case Position::ALL: pos[depth] = 0x00; mask[depth] = 0x7f; break; case Position::ANY: pos[depth] = 0; mask[depth] = 0; Loading cmds/statsd/src/FieldValue.h +16 −2 Original line number Diff line number Diff line Loading @@ -30,6 +30,7 @@ const int32_t kAttributionField = 1; const int32_t kMaxLogDepth = 2; const int32_t kLastBitMask = 0x80; const int32_t kClearLastBitDeco = 0x7f; const int32_t kClearAllPositionMatcherMask = 0xffff00ff; enum Type { UNKNOWN, INT, LONG, FLOAT, STRING }; Loading Loading @@ -205,6 +206,7 @@ public: * First: [Matcher Field] 0x02010101 [Mask]0xff7f7f7f * Last: [Matcher Field] 0x02018001 [Mask]0xff7f807f * Any: [Matcher Field] 0x02010001 [Mask]0xff7f007f * All: [Matcher Field] 0x02010001 [Mask]0xff7f7f7f * * [To match a log Field with a Matcher] we apply the bit mask to the log Field and check if * the result is equal to the Matcher Field. That's a bit wise AND operation + check if 2 ints are Loading @@ -226,9 +228,21 @@ struct Matcher { return mMask; } inline int32_t getRawMaskAtDepth(int32_t depth) const { int32_t field = (mMask & 0x00ffffff); int32_t shift = 8 * (kMaxLogDepth - depth); int32_t mask = 0xff << shift; return (field & mask) >> shift; } bool hasAllPositionMatcher() const { return mMatcher.getDepth() == 2 && getRawMaskAtDepth(1) == 0x7f; } bool hasAnyPositionMatcher(int* prefix) const { if (mMatcher.getDepth() == 2 && mMatcher.getRawPosAtDepth(2) == 0) { (*prefix) = mMatcher.getPrefix(2); if (mMatcher.getDepth() == 2 && mMatcher.getRawPosAtDepth(1) == 0) { (*prefix) = mMatcher.getPrefix(1); return true; } return false; Loading cmds/statsd/src/HashableDimensionKey.cpp +8 −111 Original line number Diff line number Diff line Loading @@ -61,125 +61,22 @@ android::hash_t hashDimension(const HashableDimensionKey& value) { bool filterValues(const vector<Matcher>& matcherFields, const vector<FieldValue>& values, HashableDimensionKey* output) { size_t num_matches = 0; for (const auto& value : values) { for (size_t i = 0; i < matcherFields.size(); ++i) { const auto& matcher = matcherFields[i]; bool found = false; for (const auto& value : values) { // TODO: potential optimization here to break early because all fields are naturally // sorted. if (value.mField.matches(matcher)) { output->addValue(value); output->mutableValue(i)->mField.setTag(value.mField.getTag()); output->mutableValue(i)->mField.setField(value.mField.getField() & matcher.mMask); found = true; break; } } if (!found) { VLOG("We can't find a dimension value for matcher (%d)%#x.", matcher.mMatcher.getTag(), matcher.mMatcher.getField()); return false; } } return true; } // Filter fields using the matchers and output the results as a HashableDimensionKey. // Note: HashableDimensionKey is just a wrapper for vector<FieldValue> bool filterValues(const vector<Matcher>& matcherFields, const vector<FieldValue>& values, vector<HashableDimensionKey>* output) { output->push_back(HashableDimensionKey()); // Top level is only tag id. Now take the real child matchers int prevAnyMatcherPrefix = 0; size_t prevPrevFanout = 0; size_t prevFanout = 0; // For each matcher get matched results. vector<FieldValue> matchedResults(2); for (const auto& matcher : matcherFields) { size_t num_matches = 0; for (const auto& value : values) { // TODO: potential optimization here to break early because all fields are naturally // sorted. if (value.mField.matches(matcher)) { if (num_matches >= matchedResults.size()) { matchedResults.resize(num_matches * 2); } matchedResults[num_matches].mField.setTag(value.mField.getTag()); matchedResults[num_matches].mField.setField(value.mField.getField() & matcher.mMask); matchedResults[num_matches].mValue = value.mValue; output->mutableValue(num_matches)->mField.setTag(value.mField.getTag()); output->mutableValue(num_matches)->mField.setField( value.mField.getField() & matcher.mMask); num_matches++; } } if (num_matches == 0) { VLOG("We can't find a dimension value for matcher (%d)%#x.", matcher.mMatcher.getTag(), matcher.mMatcher.getField()); continue; } if (num_matches == 1) { for (auto& dimension : *output) { dimension.addValue(matchedResults[0]); } prevAnyMatcherPrefix = 0; prevFanout = 0; continue; } // All the complexity below is because we support ANY in dimension. bool createFanout = true; // createFanout is true when the matcher doesn't need to follow the prev matcher's // order. // e.g., get (uid, tag) from any position in attribution. because we have translated // it as 2 matchers, they need to follow the same ordering, we can't create a cross // product of all uid and tags. // However, if the 2 matchers have different prefix, they will create a cross product // e.g., [any uid] [any some other repeated field], we will create a cross product for them if (prevAnyMatcherPrefix != 0) { int anyMatcherPrefix = 0; bool isAnyMatcher = matcher.hasAnyPositionMatcher(&anyMatcherPrefix); if (isAnyMatcher && anyMatcherPrefix == prevAnyMatcherPrefix) { createFanout = false; } else { prevAnyMatcherPrefix = anyMatcherPrefix; } } // Each matcher should match exact one field, unless position is ANY // When x number of fields matches a matcher, the returned dimension // size is multiplied by x. int oldSize; if (createFanout) { // First create fanout (fanout size is matchedResults.Size which could be one, // which means we do nothing here) oldSize = output->size(); for (size_t i = 1; i < num_matches; i++) { output->insert(output->end(), output->begin(), output->begin() + oldSize); } prevPrevFanout = oldSize; prevFanout = num_matches; } else { // If we should not create fanout, e.g., uid tag from same position should be remain // together. oldSize = prevPrevFanout; if (prevFanout != num_matches) { // sanity check. ALOGE("2 Any matcher result in different output"); return false; } } // now add the matched field value to output for (size_t i = 0; i < num_matches; i++) { for (int j = 0; j < oldSize; j++) { (*output)[i * oldSize + j].addValue(matchedResults[i]); } } } return output->size() > 0 && (*output)[0].getValues().size() > 0; return num_matches > 0; } void filterGaugeValues(const std::vector<Matcher>& matcherFields, Loading cmds/statsd/src/HashableDimensionKey.h +2 −5 Original line number Diff line number Diff line Loading @@ -122,16 +122,13 @@ android::hash_t hashDimension(const HashableDimensionKey& key); /** * Creating HashableDimensionKeys from FieldValues using matcher. * * This function may make modifications to the Field if the matcher has Position=LAST or ANY in * it. This is because: for example, when we create dimension from last uid in attribution chain, * This function may make modifications to the Field if the matcher has Position=FIRST,LAST or ALL * in it. This is because: for example, when we create dimension from last uid in attribution chain, * In one event, uid 1000 is at position 5 and it's the last * In another event, uid 1000 is at position 6, and it's the last * these 2 events should be mapped to the same dimension. So we will remove the original position * from the dimension key for the uid field (by applying 0x80 bit mask). */ bool filterValues(const std::vector<Matcher>& matcherFields, const std::vector<FieldValue>& values, std::vector<HashableDimensionKey>* output); // This function is used when there is at most one output dimension key. (no ANY matcher) bool filterValues(const std::vector<Matcher>& matcherFields, const std::vector<FieldValue>& values, HashableDimensionKey* output); Loading Loading
cmds/statsd/benchmark/filter_value_benchmark.cpp +1 −17 Original line number Diff line number Diff line Loading @@ -53,29 +53,13 @@ static void BM_FilterValue(benchmark::State& state) { std::vector<Matcher> matchers; translateFieldMatcher(field_matcher, &matchers); while (state.KeepRunning()) { vector<HashableDimensionKey> output; filterValues(matchers, event.getValues(), &output); } } BENCHMARK(BM_FilterValue); static void BM_FilterValue2(benchmark::State& state) { LogEvent event(1, 100000); FieldMatcher field_matcher; createLogEventAndMatcher(&event, &field_matcher); std::vector<Matcher> matchers; translateFieldMatcher(field_matcher, &matchers); while (state.KeepRunning()) { HashableDimensionKey output; filterValues(matchers, event.getValues(), &output); } } BENCHMARK(BM_FilterValue2); BENCHMARK(BM_FilterValue); } // namespace statsd } // namespace os Loading
cmds/statsd/src/FieldValue.cpp +9 −0 Original line number Diff line number Diff line Loading @@ -48,6 +48,11 @@ bool Field::matches(const Matcher& matcher) const { return true; } if (matcher.hasAllPositionMatcher() && (mField & (matcher.mMask & kClearAllPositionMatcherMask)) == matcher.mMatcher.getField()) { return true; } return false; } Loading @@ -67,6 +72,10 @@ void translateFieldMatcher(int tag, const FieldMatcher& matcher, int depth, int* return; } switch (matcher.position()) { case Position::ALL: pos[depth] = 0x00; mask[depth] = 0x7f; break; case Position::ANY: pos[depth] = 0; mask[depth] = 0; Loading
cmds/statsd/src/FieldValue.h +16 −2 Original line number Diff line number Diff line Loading @@ -30,6 +30,7 @@ const int32_t kAttributionField = 1; const int32_t kMaxLogDepth = 2; const int32_t kLastBitMask = 0x80; const int32_t kClearLastBitDeco = 0x7f; const int32_t kClearAllPositionMatcherMask = 0xffff00ff; enum Type { UNKNOWN, INT, LONG, FLOAT, STRING }; Loading Loading @@ -205,6 +206,7 @@ public: * First: [Matcher Field] 0x02010101 [Mask]0xff7f7f7f * Last: [Matcher Field] 0x02018001 [Mask]0xff7f807f * Any: [Matcher Field] 0x02010001 [Mask]0xff7f007f * All: [Matcher Field] 0x02010001 [Mask]0xff7f7f7f * * [To match a log Field with a Matcher] we apply the bit mask to the log Field and check if * the result is equal to the Matcher Field. That's a bit wise AND operation + check if 2 ints are Loading @@ -226,9 +228,21 @@ struct Matcher { return mMask; } inline int32_t getRawMaskAtDepth(int32_t depth) const { int32_t field = (mMask & 0x00ffffff); int32_t shift = 8 * (kMaxLogDepth - depth); int32_t mask = 0xff << shift; return (field & mask) >> shift; } bool hasAllPositionMatcher() const { return mMatcher.getDepth() == 2 && getRawMaskAtDepth(1) == 0x7f; } bool hasAnyPositionMatcher(int* prefix) const { if (mMatcher.getDepth() == 2 && mMatcher.getRawPosAtDepth(2) == 0) { (*prefix) = mMatcher.getPrefix(2); if (mMatcher.getDepth() == 2 && mMatcher.getRawPosAtDepth(1) == 0) { (*prefix) = mMatcher.getPrefix(1); return true; } return false; Loading
cmds/statsd/src/HashableDimensionKey.cpp +8 −111 Original line number Diff line number Diff line Loading @@ -61,125 +61,22 @@ android::hash_t hashDimension(const HashableDimensionKey& value) { bool filterValues(const vector<Matcher>& matcherFields, const vector<FieldValue>& values, HashableDimensionKey* output) { size_t num_matches = 0; for (const auto& value : values) { for (size_t i = 0; i < matcherFields.size(); ++i) { const auto& matcher = matcherFields[i]; bool found = false; for (const auto& value : values) { // TODO: potential optimization here to break early because all fields are naturally // sorted. if (value.mField.matches(matcher)) { output->addValue(value); output->mutableValue(i)->mField.setTag(value.mField.getTag()); output->mutableValue(i)->mField.setField(value.mField.getField() & matcher.mMask); found = true; break; } } if (!found) { VLOG("We can't find a dimension value for matcher (%d)%#x.", matcher.mMatcher.getTag(), matcher.mMatcher.getField()); return false; } } return true; } // Filter fields using the matchers and output the results as a HashableDimensionKey. // Note: HashableDimensionKey is just a wrapper for vector<FieldValue> bool filterValues(const vector<Matcher>& matcherFields, const vector<FieldValue>& values, vector<HashableDimensionKey>* output) { output->push_back(HashableDimensionKey()); // Top level is only tag id. Now take the real child matchers int prevAnyMatcherPrefix = 0; size_t prevPrevFanout = 0; size_t prevFanout = 0; // For each matcher get matched results. vector<FieldValue> matchedResults(2); for (const auto& matcher : matcherFields) { size_t num_matches = 0; for (const auto& value : values) { // TODO: potential optimization here to break early because all fields are naturally // sorted. if (value.mField.matches(matcher)) { if (num_matches >= matchedResults.size()) { matchedResults.resize(num_matches * 2); } matchedResults[num_matches].mField.setTag(value.mField.getTag()); matchedResults[num_matches].mField.setField(value.mField.getField() & matcher.mMask); matchedResults[num_matches].mValue = value.mValue; output->mutableValue(num_matches)->mField.setTag(value.mField.getTag()); output->mutableValue(num_matches)->mField.setField( value.mField.getField() & matcher.mMask); num_matches++; } } if (num_matches == 0) { VLOG("We can't find a dimension value for matcher (%d)%#x.", matcher.mMatcher.getTag(), matcher.mMatcher.getField()); continue; } if (num_matches == 1) { for (auto& dimension : *output) { dimension.addValue(matchedResults[0]); } prevAnyMatcherPrefix = 0; prevFanout = 0; continue; } // All the complexity below is because we support ANY in dimension. bool createFanout = true; // createFanout is true when the matcher doesn't need to follow the prev matcher's // order. // e.g., get (uid, tag) from any position in attribution. because we have translated // it as 2 matchers, they need to follow the same ordering, we can't create a cross // product of all uid and tags. // However, if the 2 matchers have different prefix, they will create a cross product // e.g., [any uid] [any some other repeated field], we will create a cross product for them if (prevAnyMatcherPrefix != 0) { int anyMatcherPrefix = 0; bool isAnyMatcher = matcher.hasAnyPositionMatcher(&anyMatcherPrefix); if (isAnyMatcher && anyMatcherPrefix == prevAnyMatcherPrefix) { createFanout = false; } else { prevAnyMatcherPrefix = anyMatcherPrefix; } } // Each matcher should match exact one field, unless position is ANY // When x number of fields matches a matcher, the returned dimension // size is multiplied by x. int oldSize; if (createFanout) { // First create fanout (fanout size is matchedResults.Size which could be one, // which means we do nothing here) oldSize = output->size(); for (size_t i = 1; i < num_matches; i++) { output->insert(output->end(), output->begin(), output->begin() + oldSize); } prevPrevFanout = oldSize; prevFanout = num_matches; } else { // If we should not create fanout, e.g., uid tag from same position should be remain // together. oldSize = prevPrevFanout; if (prevFanout != num_matches) { // sanity check. ALOGE("2 Any matcher result in different output"); return false; } } // now add the matched field value to output for (size_t i = 0; i < num_matches; i++) { for (int j = 0; j < oldSize; j++) { (*output)[i * oldSize + j].addValue(matchedResults[i]); } } } return output->size() > 0 && (*output)[0].getValues().size() > 0; return num_matches > 0; } void filterGaugeValues(const std::vector<Matcher>& matcherFields, Loading
cmds/statsd/src/HashableDimensionKey.h +2 −5 Original line number Diff line number Diff line Loading @@ -122,16 +122,13 @@ android::hash_t hashDimension(const HashableDimensionKey& key); /** * Creating HashableDimensionKeys from FieldValues using matcher. * * This function may make modifications to the Field if the matcher has Position=LAST or ANY in * it. This is because: for example, when we create dimension from last uid in attribution chain, * This function may make modifications to the Field if the matcher has Position=FIRST,LAST or ALL * in it. This is because: for example, when we create dimension from last uid in attribution chain, * In one event, uid 1000 is at position 5 and it's the last * In another event, uid 1000 is at position 6, and it's the last * these 2 events should be mapped to the same dimension. So we will remove the original position * from the dimension key for the uid field (by applying 0x80 bit mask). */ bool filterValues(const std::vector<Matcher>& matcherFields, const std::vector<FieldValue>& values, std::vector<HashableDimensionKey>* output); // This function is used when there is at most one output dimension key. (no ANY matcher) bool filterValues(const std::vector<Matcher>& matcherFields, const std::vector<FieldValue>& values, HashableDimensionKey* output); Loading