... | @@ -171,11 +171,56 @@ In order to do this, we first need to number the marked positions according to t |
... | @@ -171,11 +171,56 @@ In order to do this, we first need to number the marked positions according to t |
|
We create a BixNum stream set for the positions at which insertions.
|
|
We create a BixNum stream set for the positions at which insertions.
|
|
|
|
|
|
For the positions marking the starts of each field, we assign consecutive numbers starting with 0.
|
|
For the positions marking the starts of each field, we assign consecutive numbers starting with 0.
|
|
This can be achieved using the pablo `EveryNth` operation.
|
|
This can be achieved using the pablo `EveryNth` operation, where N is the number of fields in the CSV records.
|
|
|
|
The BixNum values for other template strings are calculated using bitwise logic over the `FilteredMarks`.
|
|
|
|
|
|
Given these numbered values, we can use the Character Class Compiler to compute the bits of another BixNum
|
|
Given these BixNum values for the template strings, we next want to compute the another BixNum representing
|
|
representing the number of 0 bits to insert.
|
|
the number of 0 bits to insert at each position. This can be achieved by the Parabix utility
|
|
|
|
`StringInsertBixNum`.
|
|
|
|
|
|
`SpreadByMask` then does the expansion.
|
|
Given the number of zeroes to insert at selected positions, `InsertionSpreadMask` computes a mask that
|
|
|
|
actually has 0 bits inserted at all the desired positions and 1 bits everywhere else. This mask can
|
|
|
|
be used by the `SpreadByMask` operation to compute the `ExpandedBasisBits`.
|
|
|
|
|
|
|
|
### Filling in Template Values
|
|
|
|
|
|
|
|
The final step is to use the `StringReplaceKernel` to fill in template values.
|
|
|
|
|
|
|
|
As a guide to this entire process, it may be useful to refer to the icgrep colorization code, which
|
|
|
|
does the insertion of color escape sequences for matched strings.
|
|
|
|
|
|
|
|
```
|
|
|
|
std::string ESC = "\x1B";
|
|
|
|
std::vector<std::string> colorEscapes = {ESC + "[01;31m" + ESC + "[K", ESC + "[m"};
|
|
|
|
unsigned insertLengthBits = 4;
|
|
|
|
|
|
|
|
StreamSet * const InsertBixNum = E->CreateStreamSet(insertLengthBits, 1);
|
|
|
|
E->CreateKernelCall<StringInsertBixNum>(colorEscapes, InsertMarks, InsertBixNum);
|
|
|
|
//E->CreateKernelCall<DebugDisplayKernel>("InsertBixNum", InsertBixNum);
|
|
|
|
StreamSet * const SpreadMask = InsertionSpreadMask(E, InsertBixNum, InsertPosition::Before);
|
|
|
|
//E->CreateKernelCall<DebugDisplayKernel>("SpreadMask", SpreadMask);
|
|
|
|
|
|
|
|
// For each run of 0s marking insert positions, create a parallel
|
|
|
|
// bixnum sequentially numbering the string insert positions.
|
|
|
|
StreamSet * const InsertIndex = E->CreateStreamSet(insertLengthBits);
|
|
|
|
E->CreateKernelCall<RunIndex>(SpreadMask, InsertIndex, nullptr, /*invert = */ true);
|
|
|
|
//E->CreateKernelCall<DebugDisplayKernel>("InsertIndex", InsertIndex);
|
|
|
|
|
|
|
|
StreamSet * FilteredBasis = E->CreateStreamSet(8, 1);
|
|
|
|
E->CreateKernelCall<S2PKernel>(Filtered, FilteredBasis);
|
|
|
|
|
|
|
|
// Baais bit streams expanded with 0 bits for each string to be inserted.
|
|
|
|
StreamSet * ExpandedBasis = E->CreateStreamSet(8);
|
|
|
|
SpreadByMask(E, SpreadMask, FilteredBasis, ExpandedBasis);
|
|
|
|
//E->CreateKernelCall<DebugDisplayKernel>("ExpandedBasis", ExpandedBasis);
|
|
|
|
|
|
|
|
// Map the match start/end marks to their positions in the expanded basis.
|
|
|
|
StreamSet * ExpandedMarks = E->CreateStreamSet(2);
|
|
|
|
SpreadByMask(E, SpreadMask, InsertMarks, ExpandedMarks);
|
|
|
|
|
|
|
|
StreamSet * ColorizedBasis = E->CreateStreamSet(8);
|
|
|
|
E->CreateKernelCall<StringReplaceKernel>(colorEscapes, ExpandedBasis, SpreadMask, ExpandedMarks, InsertIndex, ColorizedBasis);
|
|
|
|
|
|
|
|
StreamSet * ColorizedBytes = E->CreateStreamSet(1, 8);
|
|
|
|
E->CreateKernelCall<P2SKernel>(ColorizedBasis, ColorizedBytes);
|
|
|
|
``` |