Commit 27055d4b authored by Rob Cameron's avatar Rob Cameron
Browse files

Merge branch 'master' into beta-csv

parents b8569c8c fe481746
......@@ -10,7 +10,7 @@
#include <toolchain/toolchain.h>
namespace IDISA {
const unsigned AVX_width = 256;
const unsigned AVX512_width = 512;
......@@ -29,8 +29,8 @@ public:
virtual std::string getBuilderUniqueName() override;
llvm::Value * hsimd_signmask(unsigned fw, llvm::Value * a) override;
llvm::Value * CreateZeroHiBitsFrom(llvm::Value * bits, llvm::Value * pos, const llvm::Twine &Name = "") override;
llvm::Value * CreateZeroHiBitsFrom(llvm::Value * bits, llvm::Value * pos, const llvm::Twine Name = "") override;
~IDISA_AVX_Builder() {}
protected:
bool hasBMI1;
......
......@@ -140,7 +140,7 @@ public:
// 1 0 c
// 1 1 d
llvm::Value * simd_binary(unsigned char mask, llvm::Value * bit_1, llvm::Value * bit_0);
//
// Return a logic expression in terms of bitwise And, Or and Not for an
// arbitrary three-operand boolean function corresponding to an 8-bit truth table mask.
......@@ -244,8 +244,7 @@ public:
return getStreamSetTy(getContext(), NumElements, FieldWidth);
}
void CallPrintRegisterCond(llvm::StringRef regName, llvm::Value * const value, llvm::Value * const cond, const STD_FD fd = STD_FD::STD_ERR);
void CallPrintRegister(llvm::StringRef regName, llvm::Value * const value, const STD_FD fd = STD_FD::STD_ERR);
llvm::CallInst * CallPrintRegister(llvm::StringRef regName, llvm::Value * const value, const STD_FD fd = STD_FD::STD_ERR);
protected:
LLVM_ATTRIBUTE_NORETURN void UnsupportedFieldWidthError(const unsigned FieldWidth, std::string op_name);
......
......@@ -33,7 +33,7 @@ public:
unsigned getGroupThreads() const;
void CreateBaseFunctions() override;
llvm::Value * bitblock_any(llvm::Value * a) override;
std::pair<llvm::Value *, llvm::Value *> bitblock_add_with_carry(llvm::Value * a, llvm::Value * b, llvm::Value * carryin) override;
virtual std::pair<llvm::Value *, llvm::Value *> bitblock_advance(llvm::Value * a, llvm::Value * shiftin, unsigned shift) override;
......@@ -55,11 +55,11 @@ public:
#ifdef HAS_ADDRESS_SANITIZER
llvm::LoadInst * CreateLoad(llvm::Value *Ptr, const char *Name) override;
llvm::LoadInst * CreateLoad(llvm::Value *Ptr, const llvm::Twine &Name = "") override;
llvm::LoadInst * CreateLoad(llvm::Value *Ptr, const llvm::Twine Name = "") override;
llvm::LoadInst * CreateLoad(llvm::Type *Ty, llvm::Value *Ptr, const llvm::Twine &Name = "") override;
llvm::LoadInst * CreateLoad(llvm::Type *Ty, llvm::Value *Ptr, const llvm::Twine Name = "") override;
llvm::LoadInst * CreateLoad(llvm::Value *Ptr, bool isVolatile, const llvm::Twine &Name = "") override;
llvm::LoadInst * CreateLoad(llvm::Value *Ptr, bool isVolatile, const llvm::Twine Name = "") override;
llvm::StoreInst * CreateStore(llvm::Value *Val, llvm::Value *Ptr, bool isVolatile = false) override;
#endif
......@@ -86,13 +86,13 @@ private:
class IDISA_NVPTX35_Builder : public IDISA_NVPTX20_Builder {
IDISA_NVPTX35_Builder(Module * m, int groupSize) : IDISA_NVPTX30_Builder(m, groupSize) {}
std::pair<llvm::Value *, llvm::Value *> bitblock_advance(llvm::Value * a, llvm::Value * shiftin, unsigned shift) override;
~IDISA_NVPTX35_Builder() {};
virtual std::string getBuilderUniqueName() override;
};
#endif
#endif
}
#endif // IDISA_NVPTX_BUILDER_H
......@@ -131,6 +131,16 @@ struct Attribute {
// that a particular output stream needs both the consumed item count and a pointer
// to each of its consumers logical segment number for its internal logic.
SharedManagedBuffer,
// A shared buffer is a managed buffer that is not owned strictly by one kernel.
// For example, an output of an OptimizationBranch kernel may be owned by the
// OptimizationBranch; however, each branch contains some kernel that may write
// to the output and could have to expand it to fit the produced data. While the
// synchronization will prevent two kernels from simultaneously writing/expanding
// the buffers, both must be coordinated to ensure that every possible writer
// sees the same view.
Delayed,
// Similar to Deferred, a consumer of a stream of N items with a Delayed attribute
......@@ -363,6 +373,10 @@ inline Attribute Add1() {
return Attribute(Attribute::KindId::Add, 1);
}
inline Attribute Truncate(const unsigned k = 1) {
return Attribute(Attribute::KindId::Truncate, k);
}
inline Attribute RoundUpTo(const unsigned k) {
return Attribute(Attribute::KindId::RoundUpTo, k);
}
......@@ -371,6 +385,10 @@ inline Attribute ManagedBuffer() {
return Attribute(Attribute::KindId::ManagedBuffer, 0);
}
inline Attribute SharedManagedBuffer() {
return Attribute(Attribute::KindId::SharedManagedBuffer, 0);
}
inline Attribute Principal() {
return Attribute(Attribute::KindId::Principal, 0);
}
......
......@@ -34,6 +34,8 @@ struct Binding : public AttributeSet {
Binding(llvm::Type * const scalarType, std::string name, Relationship * const value, ProcessingRate r, Attribute && attribute);
Binding(llvm::Type * const scalarType, std::string name, Relationship * const value, ProcessingRate r, std::initializer_list<Attribute> attributes);
Binding(const Binding & original, ProcessingRate r);
const std::string & getName() const LLVM_READNONE {
return mName;
}
......@@ -72,10 +74,6 @@ struct Binding : public AttributeSet {
LLVM_READNONE unsigned getFieldWidth() const;
protected:
Binding(const Binding & original, ProcessingRate r);
void print(const Kernel * const kernel, llvm::raw_ostream & out) const noexcept;
private:
......@@ -87,14 +85,10 @@ private:
using Bindings = std::vector<Binding>;
/** ------------------------------------------------------------------------------------------------------------- *
* @brief isCountable
** ------------------------------------------------------------------------------------------------------------- */
LLVM_READNONE inline bool isCountable(const Binding & binding) {
if (LLVM_UNLIKELY(binding.isDeferred())) {
return false;
}
const ProcessingRate & rate = binding.getRate();
switch (rate.getKind()) {
case ProcessingRate::KindId::Fixed:
......@@ -112,9 +106,6 @@ LLVM_READNONE inline bool isCountable(const Binding & binding) {
* @brief isNonFixedCountable
** ------------------------------------------------------------------------------------------------------------- */
LLVM_READNONE inline bool isNonFixedCountable(const Binding & binding) {
if (LLVM_UNLIKELY(binding.isDeferred())) {
return false;
}
const ProcessingRate & rate = binding.getRate();
switch (rate.getKind()) {
case ProcessingRate::KindId::PopCount:
......
......@@ -52,6 +52,7 @@ public:
, BlockOriented
, Pipeline
, OptimizationBranch
, PopCountKernel
};
using InitArgs = llvm::SmallVector<llvm::Value *, 32>;
......@@ -128,8 +129,6 @@ public:
using InternalScalars = std::vector<InternalScalar>;
using ScalarValueMap = llvm::StringMap<llvm::Value *>;
enum class PortType { Input, Output };
struct StreamSetPort {
......@@ -137,8 +136,13 @@ public:
unsigned Number;
StreamSetPort() : Type(PortType::Input), Number(0) { }
explicit StreamSetPort(PortType Type, unsigned Number) : Type(Type), Number(Number) { }
StreamSetPort(const PortType Type, const unsigned Number) : Type(Type), Number(Number) { }
StreamSetPort(const StreamSetPort & other) = default;
StreamSetPort & operator = (const StreamSetPort & other) {
Type = other.Type;
Number = other.Number;
return *this;
}
bool operator < (const StreamSetPort other) const {
if (Type == other.Type) {
return Number < other.Number;
......@@ -166,9 +170,6 @@ public:
// mechanisms that are short, inexpensive to compute and guarantee uniqueness
// based on the semantics of the kernel.
//
// If no other mechanism is available, the default makeSignature() method uses the
// full LLVM IR (before optimization) of the kernel instance.
//
// A kernel Module ID is short string that is used as a name for a particular kernel
// instance. Kernel Module IDs are used to look up and retrieve cached kernel
// instances and so should be highly likely to uniquely identify a kernel instance.
......@@ -177,16 +178,6 @@ public:
// guaranteeing uniqueness. In this case, hasSignature() should return false.
//
//
// Kernel builder subtypes define their logic of kernel construction
// in terms of 3 virtual methods for
// (a) preparing the Kernel state data structure
// (c) defining the logic of the finalBlock function.
//
// Note: the kernel state data structure must only be finalized after
// all scalar fields have been added. If there are no fields to
// be added, the default method for preparing kernel state may be used.
LLVM_READNONE const llvm::StringRef getName() const {
return mKernelName;
}
......@@ -223,6 +214,8 @@ public:
return mThreadLocalStateType != nullptr;
}
virtual bool requiresExplicitPartialFinalStride() const;
unsigned getStride() const { return mStride; }
void setStride(const unsigned stride) { mStride = stride; }
......@@ -367,9 +360,13 @@ public:
template <typename ExternalFunctionType>
void link(llvm::StringRef name, ExternalFunctionType & functionPtr);
static bool isLocalBuffer(const Binding & output, const bool includeShared = true);
LLVM_READNONE bool canSetTerminateSignal() const;
virtual void addKernelDeclarations(BuilderRef b);
virtual std::unique_ptr<KernelCompiler> instantiateKernelCompiler(BuilderRef b) const noexcept;
virtual std::unique_ptr<KernelCompiler> instantiateKernelCompiler(BuilderRef b) const;
virtual ~Kernel() = 0;
......@@ -379,10 +376,20 @@ protected:
llvm::Function * addInitializeDeclaration(BuilderRef b) const;
LLVM_READNONE virtual bool allocatesInternalStreamSets() const;
llvm::Function * getAllocateSharedInternalStreamSetsFunction(BuilderRef b, const bool alwayReturnDeclaration = true) const;
llvm::Function * addAllocateSharedInternalStreamSetsDeclaration(BuilderRef b) const;
llvm::Function * getInitializeThreadLocalFunction(BuilderRef b, const bool alwayReturnDeclaration = true) const;
llvm::Function * addInitializeThreadLocalDeclaration(BuilderRef b) const;
llvm::Function * getAllocateThreadLocalInternalStreamSetsFunction(BuilderRef b, const bool alwayReturnDeclaration = true) const;
llvm::Function * addAllocateThreadLocalInternalStreamSetsDeclaration(BuilderRef b) const;
llvm::Function * addDoSegmentDeclaration(BuilderRef b) const;
std::vector<llvm::Type *> getDoSegmentFields(BuilderRef b) const;
......@@ -397,6 +404,8 @@ protected:
llvm::Function * addFinalizeDeclaration(BuilderRef b) const;
virtual void runOptimizationPasses(BuilderRef b) const;
public:
llvm::Function * addOrDeclareMainFunction(BuilderRef b, const MainMethodGenerationType method) const;
......@@ -427,15 +436,9 @@ protected:
LLVM_READNONE std::string getDefaultFamilyName() const;
LLVM_READNONE bool canSetTerminateSignal() const;
static bool isLocalBuffer(const Binding & output);
static bool requiresExplicitPartialFinalStride(const Kernel * const kernel);
LLVM_READNONE bool hasFixedRate() const;
LLVM_READNONE bool hasFixedRateInput() const;
LLVM_READNONE Rational getFixedRateLCM() const;
LLVM_READNONE bool isGreedy() const;
virtual void addInternalProperties(BuilderRef) { }
......@@ -449,16 +452,16 @@ protected:
virtual void generateInitializeThreadLocalMethod(BuilderRef) { }
virtual void generateAllocateSharedInternalStreamSetsMethod(BuilderRef b, llvm::Value * expectedNumOfStrides);
virtual void generateAllocateThreadLocalInternalStreamSetsMethod(BuilderRef b, llvm::Value * expectedNumOfStrides);
virtual void generateKernelMethod(BuilderRef) = 0;
virtual void generateFinalizeThreadLocalMethod(BuilderRef) { }
virtual void generateFinalizeMethod(BuilderRef) { }
private:
LLVM_READNONE bool supportsInternalSynchronization() const;
protected:
// Constructor
......@@ -570,7 +573,7 @@ public:
static bool classof(const void *) { return false; }
std::unique_ptr<KernelCompiler> instantiateKernelCompiler(BuilderRef b) const noexcept;
std::unique_ptr<KernelCompiler> instantiateKernelCompiler(BuilderRef b) const;
protected:
......@@ -589,8 +592,6 @@ protected:
virtual void generateFinalBlockMethod(BuilderRef b, llvm::Value * remainingItems);
BlockOrientedKernel(BuilderRef b,
std::string && kernelName,
Bindings && stream_inputs,
......
......@@ -23,6 +23,8 @@ public:
using ScalarValueMap = llvm::StringMap<llvm::Value *>;
using ScalarAliasMap = std::vector<std::pair<std::string, std::string>>;
using ScalarType = Kernel::ScalarType;
using InternalScalar = Kernel::InternalScalar;
......@@ -46,7 +48,7 @@ public:
using OwnedStreamSetBuffers = Vec<std::unique_ptr<StreamSetBuffer>>;
enum class InitializeScalarMapOptions {
enum class InitializeOptions {
SkipThreadLocal
, IncludeThreadLocal
};
......@@ -333,10 +335,16 @@ protected:
private:
void initializeScalarMap(BuilderRef b, const InitializeScalarMapOptions options);
void initializeScalarMap(BuilderRef b, const InitializeOptions options);
void initializeIOBindingMap();
void initializeOwnedBufferHandles(BuilderRef b, const InitializeOptions options);
protected:
void addAlias(llvm::StringRef alias, llvm::StringRef scalarName);
protected:
// In threaded mode, the PipelineCompiler generates a DoSegment block that instantiates
......@@ -357,6 +365,10 @@ public:
void callGenerateInitializeThreadLocalMethod(BuilderRef b);
void callGenerateAllocateSharedInternalStreamSets(BuilderRef b);
void callGenerateAllocateThreadLocalInternalStreamSets(BuilderRef b);
std::vector<llvm::Value *> getDoSegmentProperties(BuilderRef b) const;
void setDoSegmentProperties(BuilderRef b, const llvm::ArrayRef<llvm::Value *> args);
......@@ -373,10 +385,14 @@ protected:
private:
void loadHandlesOfLocalOutputStreamSets(BuilderRef b) const;
void clearInternalStateAfterCodeGen();
static Rational getLCMOfFixedRateInputs(const Kernel * const target);
static Rational getLCMOfFixedRateOutputs(const Kernel * const target);
void runInternalOptimizationPasses(llvm::Module * const m);
protected:
Kernel * const mTarget;
......@@ -390,6 +406,8 @@ protected:
llvm::Function * mCurrentMethod = nullptr;
llvm::BasicBlock * mEntryPoint = nullptr;
llvm::Value * mSharedHandle = nullptr;
llvm::Value * mThreadLocalHandle = nullptr;
......@@ -416,6 +434,7 @@ protected:
Vec<llvm::Value *> mConsumedOutputItems;
ScalarValueMap mScalarFieldMap;
ScalarAliasMap mScalarAliasMap;
BindingMap mBindingMap;
OwnedStreamSetBuffers mStreamSetInputBuffers;
......
......@@ -6,6 +6,7 @@
template <typename T>
struct PtrWrapper {
PtrWrapper(const std::unique_ptr<T> & p) noexcept : mReference(p.get()) { }
PtrWrapper(const PtrWrapper<T> & p) noexcept : mReference(p.get()) { }
PtrWrapper(T * const ref) noexcept : mReference(ref) { }
......@@ -21,6 +22,10 @@ private:
T * const mReference;
};
template <typename T>
constexpr inline bool operator< (const PtrWrapper<T> & a, const PtrWrapper<T> & b) {
return a.get() < b.get();
}
#endif // PTRWRAPPER_HPP
......@@ -9,19 +9,31 @@ namespace kernel {
template <typename T>
struct RefWrapper {
RefWrapper() noexcept : mReference(nullptr) {}
RefWrapper(const T & ref) noexcept : mReference(&ref) {}
RefWrapper(const T * const ref) noexcept : mReference(ref) {}
RefWrapper(const T & ref) noexcept : mReference(const_cast<T*>(&ref)) {}
RefWrapper(const T * const ref) noexcept : mReference(const_cast<T*>(ref)) {}
operator const T & () const noexcept {
return get();
}
operator T & () noexcept {
return get();
}
const T & get() const noexcept {
assert (mReference && "was not set!");
return *mReference;
}
T & get() noexcept {
assert (mReference && "was not set!");
return *mReference;
}
private:
const T * mReference;
T * mReference;
};
template <typename T>
constexpr inline bool operator< (const RefWrapper<T> & a, const RefWrapper<T> & b) {
return a.get() < b.get();
}
}
#endif // REFWRAPPER_H
......@@ -28,7 +28,7 @@ class Relationship {
friend class PipelineKernel;
public:
using Allocator = ProxyAllocator<Relationship *>;
using Allocator = ProxyAllocator<Relationship>;
static inline bool classof(const Relationship *) {
return true;
......
......@@ -75,6 +75,8 @@ public:
size_t getOverflowCapacity(BuilderPtr b) const;
bool isEmptySet() const;
virtual ~StreamSetBuffer() = 0;
llvm::Value * getHandle() const {
......@@ -85,7 +87,7 @@ public:
mHandle = handle;
}
virtual void allocateBuffer(BuilderPtr b) = 0;
virtual void allocateBuffer(BuilderPtr b, llvm::Value * const capacityMultiplier) = 0;
virtual void releaseBuffer(BuilderPtr b) const = 0;
......@@ -108,6 +110,8 @@ public:
virtual llvm::Value * getBaseAddress(BuilderPtr b) const = 0;
virtual llvm::Value * getMallocAddress(BuilderPtr b) const = 0;
virtual void setBaseAddress(BuilderPtr b, llvm::Value * addr) const = 0;
virtual llvm::Value * getOverflowAddress(BuilderPtr b) const = 0;
......@@ -116,15 +120,17 @@ public:
virtual llvm::Value * getCapacity(BuilderPtr b) const = 0;
virtual llvm::Value * getInternalCapacity(BuilderPtr b) const = 0;
virtual llvm::Value * modByCapacity(BuilderPtr b, llvm::Value * const offset) const = 0;
virtual llvm::Value * getRawItemPointer(BuilderPtr b, llvm::Value * streamIndex, llvm::Value * absolutePosition) const;
virtual llvm::Value * getStreamLogicalBasePtr(BuilderPtr b, llvm::Value * baseAddress, llvm::Value * const streamIndex, llvm::Value * blockIndex) const = 0;
virtual llvm::Value * reserveCapacity(BuilderPtr b, llvm::Value * produced, llvm::Value * consumed, llvm::Value * required, llvm::Constant * const overflowItems) const = 0;
virtual void prepareLinearBuffer(BuilderPtr b, llvm::Value * produced, llvm::Value * consumed, const unsigned lookBehind) const = 0;
virtual void linearizeBuffer(BuilderPtr b, llvm::Value * produced, llvm::Value * consumed) const = 0;
virtual void reserveCapacity(BuilderPtr b, llvm::Value * produced, llvm::Value * consumed, llvm::Value * required) const = 0;
static llvm::Type * resolveType(BuilderPtr b, llvm::Type * const streamSetType);
......@@ -149,7 +155,7 @@ protected:
const unsigned mUnderflow;
const unsigned mAddressSpace;
llvm::Type * const mBaseType;
const bool mLinear;
const bool mLinear;
};
class ExternalBuffer final : public StreamSetBuffer {
......@@ -158,11 +164,11 @@ public:
return b->getBufferKind() == BufferKind::ExternalBuffer;
}
enum Field {BaseAddress, Capacity};
enum Field { BaseAddress, EffectiveCapacity };
ExternalBuffer(BuilderPtr b, llvm::Type * const type, const bool linear, const unsigned AddressSpace);
void allocateBuffer(BuilderPtr b) override;
void allocateBuffer(BuilderPtr b, llvm::Value * const capacityMultiplier) override;
void releaseBuffer(BuilderPtr b) const override;
......@@ -176,20 +182,24 @@ public:
llvm::Value * getBaseAddress(BuilderPtr b) const override;
llvm::Value * getMallocAddress(BuilderPtr b) const override;
void setCapacity(BuilderPtr b, llvm::Value * capacity) const override;
llvm::Value * getCapacity(BuilderPtr b) const override;
llvm::Value * getInternalCapacity(BuilderPtr b) const override;
llvm::Value * modByCapacity(BuilderPtr b, llvm::Value * const offset) const override;
llvm::Value * reserveCapacity(BuilderPtr b, llvm::Value * produced, llvm::Value * consumed, llvm::Value * required, llvm::Constant * const overflowItems) const override;
void prepareLinearBuffer(BuilderPtr b, llvm::Value * produced, llvm::Value * consumed, const unsigned lookBehind) const override;
void reserveCapacity(BuilderPtr b, llvm::Value * produced, llvm::Value * consumed, llvm::Value * required) const override;
void setBaseAddress(BuilderPtr b, llvm::Value * addr) const override;
llvm::Value * getOverflowAddress(BuilderPtr b) const override;
void linearizeBuffer(BuilderPtr b, llvm::Value * produced, llvm::Value * consumed) const override;
private:
void assertValidBlockIndex(BuilderPtr b, llvm::Value * blockIndex) const;
......@@ -234,9 +244,9 @@ public:
const size_t capacity, const size_t overflowBlocks, const size_t underflowSize,
const bool linear, const unsigned AddressSpace);
enum Field { BaseAddress, InitialAddress };
enum Field { BaseAddress, EffectiveCapacity, MallocedAddress, InternalCapacity, PriorAddress };
void allocateBuffer(BuilderPtr b) override;
void allocateBuffer(BuilderPtr b, llvm::Value * const capacityMultiplier) override;
void releaseBuffer(BuilderPtr b) const override;
......@@ -244,6 +254,8 @@ public:
llvm::Value * getBaseAddress(BuilderPtr b) const override;
llvm::Value * getMallocAddress(BuilderPtr b) const override;
void setBaseAddress(BuilderPtr b, llvm::Value * addr) const override;
llvm::Value * getOverflowAddress(BuilderPtr b) const override;
......@@ -252,11 +264,13 @@ public:
llvm::Value * getCapacity(BuilderPtr b) const override;
llvm::Value * getInternalCapacity(BuilderPtr b) const override;
llvm::Value * modByCapacity(BuilderPtr b, llvm::Value * const offset) const final;
llvm::Value * reserveCapacity(BuilderPtr b, llvm::Value * produced, llvm::Value * consumed, llvm::Value * required, llvm::Constant * const overflowItems) const override;
void prepareLinearBuffer(BuilderPtr b, llvm::Value * produced, llvm::Value * consumed, const unsigned lookBehind) const override;
void linearizeBuffer(BuilderPtr b, llvm::Value * produced, llvm::Value * consumed) const override;
void reserveCapacity(BuilderPtr b, llvm::Value * produced, llvm::Value * consumed, llvm::Value * required) const override;
size_t getCapacity() const {
return mCapacity;
......@@ -270,7 +284,7 @@ private:
class DynamicBuffer final : public InternalBuffer {
enum Field {BaseAddress, Capacity, PriorBaseAddress};
enum Field { BaseAddress, EffectiveCapacity, MallocedAddress, InternalCapacity, PriorAddress };
public:
......@@ -282,19 +296,23 @@ public:
const size_t overflowSize, const size_t underflowSize,
const bool linear, const unsigned AddressSpace);
void allocateBuffer(BuilderPtr b) override;
void allocateBuffer(BuilderPtr b, llvm::Value * const capacityMultiplier) override;
void releaseBuffer(BuilderPtr b) const override;
llvm::Value * getMallocAddress(BuilderPtr b) const override;
llvm::Value * getCapacity(BuilderPtr b) const override;
llvm::Value * getInternalCapacity(BuilderPtr b) const override;
void setCapacity(BuilderPtr b, llvm::Value * capacity) const override;
llvm::Value * modByCapacity(BuilderPtr b, llvm::Value * const offset) const final;
llvm::Value * reserveCapacity(BuilderPtr b, llvm::Value * produced, llvm::Value * consumed, llvm::Value * required, llvm::Constant * const overflowItems) const override;
void prepareLinearBuffer(BuilderPtr b, llvm::Value * produced, llvm::Value * consumed, const unsigned lookBehind) const override;
void linearizeBuffer(BuilderPtr b, llvm::Value * produced, llvm::Value * consumed) const override;
void reserveCapacity(BuilderPtr b, llvm::Value * produced, llvm::Value * consumed, llvm::Value * required) const override;
size_t getInitialCapacity() const {
return mInitialCapacity;
......
......@@ -35,7 +35,7 @@ public:
return mCondition;
}
std::unique_ptr<KernelCompiler> instantiateKernelCompiler(BuilderRef b) const noexcept final;
std::unique_ptr<KernelCompiler> instantiateKernelCompiler(BuilderRef b) const final;
protected:
......@@ -53,6 +53,12 @@ protected:
void addInternalProperties(BuilderRef b) override;
LLVM_READNONE bool allocatesInternalStreamSets() const final;
void generateAllocateSharedInternalStreamSetsMethod(BuilderRef b, llvm::Value * expectedNumOfStrides) override;
void generateAllocateThreadLocalInternalStreamSetsMethod(BuilderRef b, llvm::Value * expectedNumOfStrides) override;
void generateInitializeMethod(BuilderRef b) override;
void generateInitializeThreadLocalMethod(BuilderRef b) override;
......
......@@ -18,6 +18,8 @@ public:
using CallBinding = PipelineKernel::CallBinding;
using CallBindings = PipelineKernel::CallBindings;
using NestedBuilders = std::vector<std::shared_ptr<PipelineBuilder>>;
using LengthAssertion = PipelineKernel::LengthAssertion;
using LengthAssertions = PipelineKernel::LengthAssertions;
BaseDriver & getDriver() { return mDriver;}
......@@ -68,16 +70,23 @@ public:
void setOutputScalar(const llvm::StringRef name, Scalar * value);
void AssertEqualLength(const StreamSet * A, const StreamSet * B) {
mLengthAssertions.emplace_back(LengthAssertion{{A, B}});
}
PipelineBuilder(BaseDriver & driver,
Bindings && stream_inputs, Bindings && stream_outputs,
Bindings && scalar_inputs, Bindings && scalar_outputs,
const unsigned numOfThreads,
const bool requiresPipeline);
const unsigned numOfThreads);
virtual ~PipelineBuilder() {}
virtual Kernel * makeKernel();
void setExternallySynchronized(const bool value = true) {
mExternallySynchronized = value;
}
protected:
......@@ -95,8 +104,7 @@ protected:
BaseDriver & mDriver;
// eventual pipeline configuration
unsigned mNumOfThreads;
unsigned mNumOfBufferSegments;
const bool mRequiresPipeline;
bool mExternallySynchronized = false;
Bindings mInputStreamSets;
Bindings mOutputStreamSets;
Bindings mInputScalars;
......@@ -105,6 +113,7 @@ protected:
Kernels mKernels;
CallBindings mCallBindings;
NestedBuilders mNestedBuilders;
LengthAssertions mLengthAssertions;
};
/** ------------------------------------------------------------------------------------------------------------- *
......@@ -120,10 +129,6 @@ public:
mNumOfThreads = threads;
}
void setNumOfBufferSegments(const unsigned bufferSegments) {
mNumOfBufferSegments = bufferSegments;
}
ProgramBuilder(BaseDriver & driver,
Bindings && stream_inputs, Bindings && stream_outputs,
Bindings && scalar_inputs, Bindings && scalar_outputs);
......
......@@ -12,7 +12,9 @@ namespace llvm { class Value; }
namespace kernel {
const static std::string INITIALIZE_FUNCTION_POINTER_SUFFIX = "_IFP";
const static std::string ALLOCATE_SHARED_INTERNAL_STREAMSETS_FUNCTION_POINTER_SUFFIX = "_AFP";
const static std::string INITIALIZE_THREAD_LOCAL_FUNCTION_POINTER_SUFFIX = "_ITFP";
const static std::string ALLOCATE_THREAD_LOCAL_INTERNAL_STREAMSETS_FUNCTION_POINTER_SUFFIX = "_ATFP";
const static std::string DO_SEGMENT_FUNCTION_POINTER_SUFFIX = "_SFP";
const static std::string FINALIZE_THREAD_LOCAL_FUNCTION_POINTER_SUFFIX = "_FTIP";
const static std::string FINALIZE_FUNCTION_POINTER_SUFFIX = "_FIP";
......@@ -42,7 +44,7 @@ public:
void * const FunctionPointer;
const Scalars Args;
llvm::Constant * Callee;
mutable llvm::Constant * Callee;
CallBinding(const std::string Name, llvm::FunctionType * Type, void * FunctionPointer, std::initializer_list<Scalar *> && Args)
: Name(Name), Type(Type), FunctionPointer(FunctionPointer), Args(Args), Callee(nullptr) { }
......@@ -50,6 +52,10 @@ public:
using CallBindings = std::vector<CallBinding>;
using LengthAssertion = std::array<const StreamSet *, 2>;
using LengthAssertions = std::vector<LengthAssertion>;
bool hasSignature() const final { return true; }
bool externallyInitialized() const;
......@@ -70,10 +76,6 @@ public:
return mNumOfThreads;
}
const unsigned getNumOfSegments() const {
return mNumOfSegments;
}
const Kernels & getKernels() const {
return mKernels;
}
......@@ -82,9 +84,13 @@ public:
return mCallBindings;
}
const LengthAssertions & getLengthAssertions() const {
return mLengthAssertions;
}
void addKernelDeclarations(BuilderRef b) final;
std::unique_ptr<KernelCompiler> instantiateKernelCompiler(BuilderRef b) const noexcept final;
std::unique_ptr<KernelCompiler> instantiateKernelCompiler(BuilderRef b) const final;
virtual ~PipelineKernel();
......@@ -92,10 +98,11 @@ protected:
PipelineKernel(BaseDriver & driver,
std::string && signature,
const unsigned numOfThreads, const unsigned numOfSegments,
const unsigned numOfThreads,
Kernels && kernels, CallBindings && callBindings,
Bindings && stream_inputs, Bindings && stream_outputs,
Bindings && scalar_inputs, Bindings && scalar_outputs);
Bindings && scalar_inputs, Bindings && scalar_outputs,
LengthAssertions && lengthAssertions);
void addFamilyInitializationArgTypes(BuilderRef b, InitArgTypes & argTypes) const final;
......@@ -103,6 +110,12 @@ protected:
void linkExternalMethods(BuilderRef b) final;
LLVM_READNONE bool allocatesInternalStreamSets() const final;
void generateAllocateSharedInternalStreamSetsMethod(BuilderRef b, llvm::Value * expectedNumOfStrides) final;
void generateAllocateThreadLocalInternalStreamSetsMethod(BuilderRef b, llvm::Value * expectedNumOfStrides) final;
void addAdditionalFunctions(BuilderRef b) final;
void addInternalProperties(BuilderRef b) final;
......@@ -117,13 +130,16 @@ protected:
void generateFinalizeMethod(BuilderRef b) final;
void runOptimizationPasses(BuilderRef b) const final;
protected:
const unsigned mNumOfThreads;
const unsigned mNumOfSegments;
const Kernels mKernels;
CallBindings mCallBindings;
const std::string mSignature;
const Kernels mKernels;
const CallBindings mCallBindings;
const LengthAssertions mLengthAssertions;
};
}
......
......@@ -36,6 +36,15 @@ private:
void generateMultiBlockLogic(BuilderRef iBuilder, llvm::Value * const numOfStrides) override;
};
class BatchCoordinatesKernel : public MultiBlockKernel {
public:
BatchCoordinatesKernel(BuilderRef b,
StreamSet * const Matches, StreamSet * const LineBreakStream,
StreamSet * const Coordinates, Scalar * const callbackObject, unsigned strideBlocks = 1);
private:
void generateMultiBlockLogic(BuilderRef iBuilder, llvm::Value * const numOfStrides) override;
};
class MatchReporter : public SegmentOrientedKernel {
public:
MatchReporter(BuilderRef b,
......
......@@ -19,31 +19,37 @@ namespace kernel {
// The result may then be used for calculation of a SpreadMask by InsertionSpreadMask.
//
class StringInsertBixNum : public pablo::PabloKernel {
class StringInsertBixNum final : public pablo::PabloKernel {
public:
StringInsertBixNum(BuilderRef b, std::vector<std::string> & insertStrs,
StringInsertBixNum(BuilderRef b, const std::vector<std::string> &insertStrs,
StreamSet * insertMarks, StreamSet * insertBixNum);
void generatePabloMethod() override;
bool hasSignature() const override { return true; }
llvm::StringRef getSignature() const override {
return mSignature;
}
private:
std::vector<std::string> mInsertStrings;
bool mMultiplexing;
unsigned mBixNumBits;
const std::vector<std::string> mInsertStrings;
const bool mMultiplexing;
const unsigned mBixNumBits;
const std::string mSignature;
};
class StringReplaceKernel : public pablo::PabloKernel {
class StringReplaceKernel final : public pablo::PabloKernel {
public:
StringReplaceKernel(BuilderRef b, std::vector<std::string> & insertStrs,
StringReplaceKernel(BuilderRef b, const std::vector<std::string> & insertStrs,
StreamSet * basis, StreamSet * spreadMask,
StreamSet * insertMarks, StreamSet * runIndex,
StreamSet * output);
llvm::StringRef getSignature() const override;
bool hasSignature() const override { return true; }
void generatePabloMethod() override;
bool hasSignature() const override { return true; }
llvm::StringRef getSignature() const override {
return mSignature;
}
private:
std::string makeSignature(std::vector<std::string> & insertStrs, StreamSet * insertMarks);
std::vector<std::string> mInsertStrings;
bool mMultiplexing;
std::string mSignature;
const std::vector<std::string> mInsertStrings;
const bool mMultiplexing;
const std::string mSignature;
};
}
......
......@@ -22,6 +22,12 @@ enum class Intrinsic {
class IntrinsicCall final : public Statement {
friend class PabloBlock;
static constexpr auto __inSpanUpTo = "spanupto";
static constexpr auto __inInclusiveSpan = "inclusivespan";
static constexpr auto __inExclusiveSpan = "exclusivespan";
static constexpr auto __inPrintRegister = "printregister";
public:
static inline bool classof(PabloAST const * e) {
return e->getClassTypeId() == ClassTypeId::IntrinsicCall;
......@@ -40,7 +46,8 @@ public:
}
inline llvm::StringRef getIntrinsicName() const noexcept {
#define PABLO_INTRINSIC_CASE(INTRINSIC) case Intrinsic::INTRINSIC: return #INTRINSIC
#define PABLO_INTRINSIC_CASE(INTRINSIC) \
case Intrinsic::INTRINSIC: return llvm::StringRef(__in##INTRINSIC)
switch (mIntrinsic) {
PABLO_INTRINSIC_CASE(SpanUpTo);
PABLO_INTRINSIC_CASE(InclusiveSpan);
......@@ -70,8 +77,9 @@ public:
protected:
IntrinsicCall(Intrinsic intrinsic, llvm::Type * type, llvm::ArrayRef<PabloAST *> argv, const String * name, Allocator & allocator)
: Statement(ClassTypeId::IntrinsicCall, type, argv, name, allocator)
, mIntrinsic(intrinsic)
{}
, mIntrinsic(intrinsic) {
setSideEffecting(intrinsic == pablo::Intrinsic::PrintRegister);
}
const Intrinsic mIntrinsic;
......
......@@ -138,6 +138,8 @@ public:
return *mContext;
}
bool requiresExplicitPartialFinalStride() const override;
protected:
PabloKernel(BuilderRef builder,
......@@ -172,7 +174,7 @@ protected:
// finalizing the KernelStateType.
void addInternalProperties(BuilderRef b) final;
std::unique_ptr<kernel::KernelCompiler> instantiateKernelCompiler(BuilderRef b) const noexcept;
std::unique_ptr<kernel::KernelCompiler> instantiateKernelCompiler(BuilderRef b) const override;
private:
......
......@@ -15,6 +15,7 @@
#include <type_traits>
#include <tuple>
#include <vector>
#include <util/aligned_allocator.h>
#include <llvm/IR/Type.h>
#include <llvm/Support/ErrorHandling.h>
#include <kernel/core/idisa_target.h>
......@@ -150,7 +151,7 @@ public:
using set_literal_t = std::vector<literal_t>;
/// The internal buffer type of the stream.
using buffer_t = std::vector<buffer_item_type>;
using buffer_t = std::vector<buffer_item_type, AlignedAllocator<buffer_item_type, 64>>;
/// The number of stream items per buffer item;
static const uint32_t stream_items_per_buffer_item_v = si_per_bi<I>::value;
......@@ -178,9 +179,7 @@ struct copy_decoder {
static const size_t num_elements_v = 1;
static result_t decode(typename traits::literal_t const & str) {
static_assert(std::is_same<typename traits::literal_t, typename traits::buffer_t>::value,
"copy_decoder cannot be used when literal_t != buffer_t");
return std::make_tuple(str, str.size(), 1);
return std::make_tuple(typename traits::buffer_t{str.begin(), str.end()}, str.size(), 1);
}
};
......@@ -265,7 +264,7 @@ struct bin_decoder {
static const size_t num_elements_v = 1;
static result_t decode(typename traits::literal_t const & str) {
std::vector<uint8_t> buffer{};
std::vector<uint8_t, AlignedAllocator<uint8_t, 64>> buffer{};
int counter = 0;
size_t len = 0;
uint8_t builder = 0;
......
#ifndef MAXSAT_HPP
#define MAXSAT_HPP
#include <llvm/Support/ErrorHandling.h>
#include <algorithm>
#include <vector>
#include <z3.h>
inline Z3_ast mk_binary_or(Z3_context ctx, Z3_ast in_1, Z3_ast in_2) {
Z3_ast args[2] = { in_1, in_2 };
return Z3_mk_or(ctx, 2, args);
......@@ -95,36 +98,35 @@ inline void assert_le_one(Z3_context ctx, Z3_solver s, unsigned n, Z3_ast * val)
/** ------------------------------------------------------------------------------------------------------------- *
* Fu & Malik procedure for MaxSAT. This procedure is based on unsat core extraction and the at-most-one constraint.
** ------------------------------------------------------------------------------------------------------------- */
static int maxsat(Z3_context ctx, Z3_solver solver, std::vector<Z3_ast> & soft) {
if (LLVM_UNLIKELY(Z3_solver_check(ctx, solver) == Z3_L_FALSE)) {
return -1;
}
static int Z3_maxsat(Z3_context ctx, Z3_solver solver, std::vector<Z3_ast> soft) {
assert("initial formula is unsatisfiable!" && (Z3_solver_check(ctx, solver) != Z3_L_FALSE));
if (LLVM_UNLIKELY(soft.empty())) {
return 0;
}
const auto n = soft.size();
const auto ty = Z3_mk_bool_sort(ctx);
Z3_ast aux_vars[n];
Z3_ast assumptions[n];
std::vector<Z3_ast> aux_vars(n);
std::vector<Z3_ast> assumptions(n);
for (unsigned i = 0; i < n; ++i) {
aux_vars[i] = Z3_mk_fresh_const(ctx, nullptr, ty);
Z3_solver_assert(ctx, solver, mk_binary_or(ctx, soft[i], aux_vars[i]));
}
for (unsigned c = n; c; --c) {
for (auto c = n; c; --c) {
// create assumptions
for (unsigned i = 0; i < n; i++) {
// Recall that we asserted (soft_cnstrs[i] \/ aux_vars[i])
// So using (NOT aux_vars[i]) as an assumption we are actually forcing the soft_cnstrs[i] to be considered.
// So using (NOT aux_vars[i]) as an assumption we are force the soft constraints to be considered.
assumptions[i] = Z3_mk_not(ctx, aux_vars[i]);
}
if (Z3_solver_check_assumptions(ctx, solver, n, assumptions) != Z3_L_FALSE) {
if (Z3_solver_check_assumptions(ctx, solver, n, assumptions.data()) != Z3_L_FALSE) {
return c; // done
} else {
Z3_ast_vector core = Z3_solver_get_unsat_core(ctx, solver);
unsigned m = Z3_ast_vector_size(ctx, core);
Z3_ast block_vars[m];
std::vector<Z3_ast> block_vars(m);
unsigned k = 0;
// update soft-constraints and aux_vars
for (unsigned i = 0; i < n; i++) {
......@@ -147,11 +149,11 @@ static int maxsat(Z3_context ctx, Z3_solver solver, std::vector<Z3_ast> & soft)
}
if (k > 1) {
Z3_ast aux_array_1[k + 1];
Z3_ast aux_array_2[k + 1];
Z3_ast * aux_1 = aux_array_1;
Z3_ast * aux_2 = aux_array_2;
std::memcpy(aux_1, block_vars, sizeof(Z3_ast) * k);
std::vector<Z3_ast> aux_array_1(k + 1);
std::vector<Z3_ast> aux_array_2(k + 1);
Z3_ast * aux_1 = aux_array_1.data();
Z3_ast * aux_2 = aux_array_2.data();
std::copy_n(block_vars.data(), k, aux_array_1.data());
unsigned i = 1;
for (; k > 1; ++i) {
assert (aux_1 != aux_2);
......@@ -162,8 +164,7 @@ static int maxsat(Z3_context ctx, Z3_solver solver, std::vector<Z3_ast> & soft)
}
}
}
llvm_unreachable("unreachable");
return -1;
return 0;
}
#endif // MAXSAT_HPP
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment