Commit 958e3889 authored by Rob Cameron's avatar Rob Cameron
Browse files

icgrep-devel created

git-svn-id: http://parabix.costar.sfu.ca/svn/icGREP/icgrep-devel@3911 a82c6ad6-7e1c-4be0-8e68-26c050b43eaf
parents
==============================================================================
LLVM Release License
==============================================================================
University of Illinois/NCSA
Open Source License
Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign.
All rights reserved.
Developed by:
LLVM Team
University of Illinois at Urbana-Champaign
http://llvm.org
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal with
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimers.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimers in the
documentation and/or other materials provided with the distribution.
* Neither the names of the LLVM Team, University of Illinois at
Urbana-Champaign, nor the names of its contributors may be used to
endorse or promote products derived from this Software without specific
prior written permission.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
SOFTWARE.
==============================================================================
Copyrights and Licenses for Third Party Software Distributed with LLVM:
==============================================================================
The LLVM software contains code written by third parties. Such software will
have its own individual LICENSE.TXT file in the directory in which it appears.
This file will describe the copyrights, license, and restrictions which apply
to that code.
The disclaimer of warranty in the University of Illinois Open Source License
applies to all code in the LLVM Distribution, and nothing in any of the
other licenses gives permission to use the names of the LLVM Team or the
University of Illinois to endorse or promote products derived from this
Software.
The following pieces of software have additional or alternate copyrights,
licenses, and/or restrictions:
Program Directory
------- ---------
Autoconf llvm/autoconf
llvm/projects/ModuleMaker/autoconf
llvm/projects/sample/autoconf
Google Test llvm/utils/unittest/googletest
OpenBSD regex llvm/lib/Support/{reg*, COPYRIGHT.regex}
pyyaml tests llvm/test/YAMLParser/{*.data, LICENSE.TXT}
ARM contributions llvm/lib/Target/ARM/LICENSE.TXT
md5 contributions llvm/lib/Support/MD5.cpp llvm/include/llvm/Support/MD5.h
Open Software License ("OSL") v. 3.0
This Open Software License (the "License") applies to any original work of
authorship (the "Original Work") whose owner (the "Licensor") has placed the
following licensing notice adjacent to the copyright notice for the Original
Work:
Licensed under the Open Software License version 3.0
1) Grant of Copyright License. Licensor grants You a worldwide, royalty-free,
non-exclusive, sublicensable license, for the duration of the copyright, to do
the following:
a) to reproduce the Original Work in copies, either alone or as part of a
collective work;
b) to translate, adapt, alter, transform, modify, or arrange the Original Work,
thereby creating derivative works ("Derivative Works") based upon the Original
Work;
c) to distribute or communicate copies of the Original Work and Derivative Works
to the public, with the proviso that copies of Original Work or Derivative Works
that You distribute or communicate shall be licensed under this Open Software
License;
d) to perform the Original Work publicly; and
e) to display the Original Work publicly.
2) Grant of Patent License. Licensor grants You a worldwide, royalty-free,
non-exclusive, sublicensable license, under patent claims owned or controlled by
the Licensor that are embodied in the Original Work as furnished by the
Licensor, for the duration of the patents, to make, use, sell, offer for sale,
have made, and import the Original Work and Derivative Works.
3) Grant of Source Code License. The term "Source Code" means the preferred form
of the Original Work for making modifications to it and all available
documentation describing how to modify the Original Work. Licensor agrees to
provide a machine-readable copy of the Source Code of the Original Work along
with each copy of the Original Work that Licensor distributes. Licensor reserves
the right to satisfy this obligation by placing a machine-readable copy of the
Source Code in an information repository reasonably calculated to permit
inexpensive and convenient access by You for as long as Licensor continues to
distribute the Original Work.
4) Exclusions From License Grant. Neither the names of Licensor, nor the names
of any contributors to the Original Work, nor any of their trademarks or service
marks, may be used to endorse or promote products derived from this Original
Work without express prior permission of the Licensor. Except as expressly
stated herein, nothing in this License grants any license to Licensor's
trademarks, copyrights, patents, trade secrets or any other intellectual
property. No patent license is granted to make, use, sell, offer for sale, have
made, or import embodiments of any patent claims other than the licensed claims
defined in Section 2. No license is granted to the trademarks of Licensor even
if such marks are included in the Original Work. Nothing in this License shall
be interpreted to prohibit Licensor from licensing under terms different from
this License any Original Work that Licensor otherwise would have a right to
license.
5) External Deployment. The term "External Deployment" means the use,
distribution, or communication of the Original Work or Derivative Works in any
way such that the Original Work or Derivative Works may be used by anyone other
than You, whether those works are distributed or communicated to those persons
or made available as an application intended for use over a network. As an
express condition for the grants of license hereunder, You must treat any
External Deployment by You of the Original Work or a Derivative Work as a
distribution under section 1(c).
6) Attribution Rights. You must retain, in the Source Code of any Derivative
Works that You create, all copyright, patent, or trademark notices from the
Source Code of the Original Work, as well as any notices of licensing and any
descriptive text identified therein as an "Attribution Notice." You must cause
the Source Code for any Derivative Works that You create to carry a prominent
Attribution Notice reasonably calculated to inform recipients that You have
modified the Original Work.
7) Warranty of Provenance and Disclaimer of Warranty. Licensor warrants that the
copyright in and to the Original Work and the patent rights granted herein by
Licensor are owned by the Licensor or are sublicensed to You under the terms of
this License with the permission of the contributor(s) of those copyrights and
patent rights. Except as expressly stated in the immediately preceding sentence,
the Original Work is provided under this License on an "AS IS" BASIS and WITHOUT
WARRANTY, either express or implied, including, without limitation, the
warranties of non-infringement, merchantability or fitness for a particular
purpose. THE ENTIRE RISK AS TO THE QUALITY OF THE ORIGINAL WORK IS WITH YOU.
This DISCLAIMER OF WARRANTY constitutes an essential part of this License. No
license to the Original Work is granted by this License except under this
disclaimer.
8) Limitation of Liability. Under no circumstances and under no legal theory,
whether in tort (including negligence), contract, or otherwise, shall the
Licensor be liable to anyone for any indirect, special, incidental, or
consequential damages of any character arising as a result of this License or
the use of the Original Work including, without limitation, damages for loss of
goodwill, work stoppage, computer failure or malfunction, or any and all other
commercial damages or losses. This limitation of liability shall not apply to
the extent applicable law prohibits such limitation.
9) Acceptance and Termination. If, at any time, You expressly assented to this
License, that assent indicates your clear and irrevocable acceptance of this
License and all of its terms and conditions. If You distribute or communicate
copies of the Original Work or a Derivative Work, You must make a reasonable
effort under the circumstances to obtain the express assent of recipients to the
terms of this License. This License conditions your rights to undertake the
activities listed in Section 1, including your right to create Derivative Works
based upon the Original Work, and doing so without honoring these terms and
conditions is prohibited by copyright law and international treaty. Nothing in
this License is intended to affect copyright exceptions and limitations
(including "fair use" or "fair dealing"). This License shall terminate
immediately and You may no longer exercise any of the rights granted to You by
this License upon your failure to honor the conditions in Section 1(c).
10) Termination for Patent Action. This License shall terminate automatically
and You may no longer exercise any of the rights granted to You by this License
as of the date You commence an action, including a cross-claim or counterclaim,
against Licensor or any licensee alleging that the Original Work infringes a
patent. This termination provision shall not apply for an action alleging patent
infringement by combinations of the Original Work with other software or
hardware.
11) Jurisdiction, Venue and Governing Law. Any action or suit relating to this
License may be brought only in the courts of a jurisdiction wherein the Licensor
resides or in which Licensor conducts its primary business, and under the laws
of that jurisdiction excluding its conflict-of-law provisions. The application
of the United Nations Convention on Contracts for the International Sale of
Goods is expressly excluded. Any use of the Original Work outside the scope of
this License or after its termination shall be subject to the requirements and
penalties of copyright or patent law in the appropriate jurisdiction. This
section shall survive the termination of this License.
12) Attorneys' Fees. In any action to enforce the terms of this License or
seeking damages relating thereto, the prevailing party shall be entitled to
recover its costs and expenses, including, without limitation, reasonable
attorneys' fees and costs incurred in connection with such action, including any
appeal of such action. This section shall survive the termination of this
License.
13) Miscellaneous. If any provision of this License is held to be unenforceable,
such provision shall be reformed only to the extent necessary to make it
enforceable.
14) Definition of "You" in This License. "You" throughout this License, whether
in upper or lower case, means an individual or a legal entity exercising rights
under, and complying with all of the terms of, this License. For legal entities,
"You" includes any entity that controls, is controlled by, or is under common
control with you. For purposes of this definition, "control" means (i) the
power, direct or indirect, to cause the direction or management of such entity,
whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or
more of the outstanding shares, or (iii) beneficial ownership of such entity.
15) Right to Use. You may use the Original Work in all ways not otherwise
restricted or conditioned by this License or by law, and Licensor promises not
to interfere with or be responsible for such uses by You.
16) Modification of This License. This License is Copyright (c) 2005 Lawrence
Rosen. Permission is granted to copy, distribute, or communicate this License
without modification. Nothing in this License permits You to modify this License
as applied to the Original Work or to Derivative Works. However, You may modify
the text of this License and copy, distribute or communicate your modified
version (the "Modified License") and apply it to other original works of
authorship subject to the following conditions: (i) You may not indicate in any
way that your Modified License is the "Open Software License" or "OSL" and you
may not use those names in the name of your Modified License; (ii) You must
replace the notice specified in the first paragraph above with the notice
"Licensed under <insert your license name here>" or with a notice of your own
that is not confusingly similar to the notice in this License; and (iii) You may
not claim that your original works are open source software unless your Modified
License has been approved by Open Source Initiative (OSI) and You comply with
its license review and certification process.
\ No newline at end of file
README-icgrep-0.8.txt
This is the open-source version of icgrep 0.8. This file includes
an executable for 64-bit Linux systems (compiled specifically for
Ubuntu 12.04) as well as instructions for building in other contexts.
The executable is in icgrep-0.8/icgrep-build/icgrep
To build icgrep, you need an installed LLVM system providing the
core libraries. One is included with this distributed in the
libllvm directory.
Using the installed LLVM, building icgrep uses the CMake build
system generator.
(IC1) open a terminal window and cd to the icgrep-build directory
(IC2) enter the following command to build the makefiles
cmake -DCMAKE_CXX_FLAGS="-O3 -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS" -DCMAKE_CXX_COMPILER:FILEPATH=/usr/bin/clang++ -DCMAKE_C_COMPILER:FILEPATH=/usr/bin/clang ../icgrep
(IC3) Enter the command "make"
To rebuild LLVM,
(L1) download a source distribution from llvm.org
and place inside the icgrep-0.8 directory, e.g., llvm-3.4.1.src
(L2) open a terminal window and cd to the llvm-build directory
(L3) enter the following command to build the makefiles
cmake -DCMAKE_INSTALL_PREFIX=../libllvm -DLLVM_TARGETS_TO_BUILD=X86 -DLLVM_BUILD_TOOLS=OFF -DLLVM_BUILD_EXAMPLES=OFF -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER:FILEPATH=/usr/bin/clang++ -DCMAKE_C_COMPILER:FILEPATH=/usr/bin/clang ../llvm-3.4.1.src
(L4) Still in the llvm-build directory, enter the commands "make" and then "make install"
Now complete icgrep installation using steps IC1 to IC3 above.
LLVM files are governed by the LLVM Release License in LLVM-LICENSE.txt.
icgrep is governed by Open Software License 3.0 in OSL-3.0.txt.
.
File added
cmake_minimum_required (VERSION 2.8)
project (icgrep)
# The version number.
set (icgrep_VERSION_MAJOR 0)
set (icgrep_VERSION_MINOR 8)
# configure a header file to pass some of the CMake settings
# to the source code
#configure_file (
# "${PROJECT_SOURCE_DIR}/icgrepconfig.h.in"
# "${PROJECT_BINARY_DIR}/icgrepconfig.h"
# )
# LLVM set up
#
# A convenience variable:
set(LLVM_ROOT "../libllvm" CACHE PATH "Root of LLVM install.")
# A bit of a sanity check:
if( NOT EXISTS ${LLVM_ROOT}/include/llvm )
message(FATAL_ERROR "LLVM_ROOT (${LLVM_ROOT}) is not a valid LLVM install")
endif()
# We incorporate the CMake features provided by LLVM:
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${LLVM_ROOT}/share/llvm/cmake")
include(LLVMConfig)
# Now set the header and library paths:
include_directories( ${LLVM_INCLUDE_DIRS} )
link_directories( ${LLVM_LIBRARY_DIRS} )
add_definitions( ${LLVM_DEFINITIONS} )
# Let's suppose we want to build a JIT compiler with support for
# binary code (no interpreter):
llvm_map_components_to_libraries(REQ_LLVM_LIBRARIES mcjit native)
add_library(PabloADT pe_advance.cpp pe_all.cpp pe_and.cpp pe_charclass.cpp pe_matchstar.cpp pe_not.cpp pe_or.cpp pe_pabloe.cpp pe_sel.cpp pe_var.cpp pe_xor.cpp ps_assign.cpp ps_if.cpp ps_pablos.cpp ps_while.cpp printer_pablos.cpp)
add_library(RegExpADT re_alt.cpp re_cc.cpp re_end.cpp re_parser.cpp re_re.cpp re_rep.cpp re_seq.cpp re_start.cpp rl_replimit.cpp rl_unbounded.cpp rl_upperbound.cpp parsefailure.cpp parseresult.cpp parsesuccess.cpp printer_re.cpp)
#include_directories("${PROJECT_BINARY_DIR}")
include_directories("${PROJECT_SOURCE_DIR}")
include_directories("${PROJECT_SOURCE_DIR}/include")
include_directories("${PROJECT_SOURCE_DIR}/include/simd-lib")
include_directories("${PROJECT_SOURCE_DIR}/include/simd-lib/idisa_cpp")
# add the executable
add_executable(icgrep icgrep.cpp llvm_gen.cpp llvm_gen_helper.cpp utf_encoding.cpp cc_codegenobject.cpp cc_compiler.cpp cc_compiler_helper.cpp re_compiler.cpp pbix_compiler.cpp symbol_generator.cpp utf8_encoder.cpp)
target_link_libraries (icgrep PabloADT RegExpADT ${REQ_LLVM_LIBRARIES})
/*
* Copyright (c) 2014 International Characters.
* This software is licensed to the public under the Open Software License 3.0.
* icgrep is a trademark of International Characters.
*/
/*
* Copyright © 2014 International Characters.
* This software is licensed to the public under the Open Software License 3.0.
* icgrep is a trademark of International Characters.
*/
#include "cc_codegenobject.h"
CC_CodeGenObject::CC_CodeGenObject(std::string gensym_pattern)
{
mGenSym_Template = gensym_pattern;
mGenSymCounter = 0;
}
void CC_CodeGenObject::add_predefined(std::string key_value, Expression* mapped_value)
{
mCommon_Expression_Map.insert(make_pair(key_value, mapped_value));
}
Expression* CC_CodeGenObject::add_assignment(std::string varname, Expression* expr)
{
//Add the new mapping to the list of pablo statements:
mStmtsl.push_back(new Assign(varname, expr->pablo_expr));
//Add the new mapping to the common expression map:
std::string key_value = expr->expr_string;
Expression* mapped_value = new Expression();
mapped_value->expr_string = varname;
mapped_value->pablo_expr = new Var(varname);
std::pair<std::map<std::string, Expression*>::iterator, bool> ret = mCommon_Expression_Map.insert(make_pair(key_value, mapped_value));
return ret.first->second;
}
Expression* CC_CodeGenObject::expr_to_variable(Expression* expr)
{
if (mCommon_Expression_Map.count(expr->expr_string) > 0)
{
std::map<std::string, Expression*>::iterator itGet = mCommon_Expression_Map.find(expr->expr_string);
return itGet->second;
}
else
{
mGenSymCounter++;
std::string sym = mGenSym_Template + INT2STRING(mGenSymCounter);
return add_assignment(sym, expr);
}
}
std::list<PabloS*> CC_CodeGenObject::get_stmtsl()
{
return mStmtsl;
}
/*
* Copyright (c) 2014 International Characters.
* This software is licensed to the public under the Open Software License 3.0.
* icgrep is a trademark of International Characters.
*/
#ifndef CC_CODEGENOBJECT_H
#define CC_CODEGENOBJECT_H
//Pablo Expressions
#include "pe_pabloe.h"
#include "pe_sel.h"
#include "pe_advance.h"
#include "pe_all.h"
#include "pe_and.h"
#include "pe_charclass.h"
#include "pe_matchstar.h"
#include "pe_not.h"
#include "pe_or.h"
#include "pe_var.h"
#include "pe_xor.h"
//Pablo Statements
#include "ps_pablos.h"
#include "ps_assign.h"
#include "ps_if.h"
#include "ps_while.h"
#include <iostream>
#include <sstream>
#include <string>
#include <list>
#include <map>
#define INT2STRING(i) static_cast<std::ostringstream*>(&(std::ostringstream() << i))->str()
struct Expression{
std::string expr_string;
PabloE* pablo_expr;
};
class CC_CodeGenObject
{
public:
CC_CodeGenObject(std::string gensym_pattern);
void add_predefined(std::string key_value, Expression *mapped_value);
Expression* add_assignment(std::string value, Expression* expr);
Expression* expr_to_variable(Expression* cgo);
std::list<PabloS*> get_stmtsl();
private:
std::string mGenSym_Template;
int mGenSymCounter;
std::list<PabloS*> mStmtsl;
std::map<std::string, Expression*> mCommon_Expression_Map;
};
#endif // CC_CODEGENOBJECT_H
/*
* Copyright (c) 2014 International Characters.
* This software is licensed to the public under the Open Software License 3.0.
* icgrep is a trademark of International Characters.
*/
#include "cc_compiler.h"
CC_Compiler::CC_Compiler(UTF_Encoding encoding)
{
mEncoding = encoding;
}
std::list<PabloS*> CC_Compiler::compile(std::string basis_pattern, std::string gensym_pattern, RE* re, std::list<CC*> predefined)
{
mEncoding.setBasisPattern(basis_pattern);
CC_CodeGenObject cgo(gensym_pattern);
for (int i = 0; i < mEncoding.getBits(); i++)
{
std::string b_pattern = bit_var((mEncoding.getBits() -1) - i);
Expression* expr = new Expression();
expr->expr_string = b_pattern;
expr->pablo_expr = make_bitv(i);
cgo.add_predefined(b_pattern, expr);
}
process_re(cgo, re);
process_predefined(cgo, predefined);
return cgo.get_stmtsl();
}
void CC_Compiler::process_re(CC_CodeGenObject &cgo, RE *re)
{
if (Alt* re_alt = dynamic_cast<Alt*>(re))
{
std::list<RE*>::iterator it;
for (it = re_alt->GetREList()->begin(); it != re_alt->GetREList()->end(); ++it)
{
process_re(cgo, *it);
}
}
else if (CC* re_cc = dynamic_cast<CC*>(re))
{
cgo = cc2pablos(cgo, re_cc);
}
else if (Rep* re_rep = dynamic_cast<Rep*>(re))
{
process_re(cgo, re_rep->getRE());
}
else if (Seq* re_seq = dynamic_cast<Seq*>(re))
{
std::list<RE*>::iterator it;
for (it = re_seq->GetREList()->begin(); it != re_seq->GetREList()->end(); ++it)
{
process_re(cgo, *it);
}
}
}
void CC_Compiler::process_predefined(CC_CodeGenObject &cgo, std::list<CC*> predefined)
{
std::list<CC*>::iterator it;
for (it = predefined.begin(); it != predefined.end(); ++it)
{
cgo = cc2pablos(cgo, *it);
}
}
PabloE* CC_Compiler::bit_pattern_expr(int pattern, int selected_bits)
{
if (selected_bits == 0) return new All(1);
std::vector<PabloE*> bit_terms;
int bit_no = 0;
while (selected_bits)
{
char test_bit = 1 << bit_no;
if (selected_bits & test_bit)
{
if ((pattern & test_bit) == 0)
{
bit_terms.push_back(CC_Compiler_Helper::make_not(make_bitv(bit_no)));
}
else
{
bit_terms.push_back(make_bitv(bit_no));
}
}
else
{
bit_terms.push_back(new All(1));
}
selected_bits &= ~test_bit;
bit_no++;
}
/*
std::cout << "FIRST LOOP:" << std::endl;
for (int i = bit_terms.size() - 1; i >= 0; i--)
{
std::cout << StatementPrinter::ShowPabloE(bit_terms.at(i)) << std::endl;
}
*/
//Reduce the list so that all of the expressions are contained within a single expression.
while (bit_terms.size() > 1)
{
std::vector<PabloE*> new_terms;
for (unsigned long i = 0; i < (bit_terms.size()/2); i++)
{
new_terms.push_back(CC_Compiler_Helper::make_and(bit_terms[(2 * i) + 1], bit_terms[2 * i]));
}
if (bit_terms.size() % 2 == 1)
{
new_terms.push_back(bit_terms[bit_terms.size() -1]);
}
/*
std::cout << "\nNEW TERMS ITERATION:\n" << std::endl;
for (int i = new_terms.size() - 1; i >=0; i--)
{
std::cout << StatementPrinter::ShowPabloE(new_terms[i]) << std::endl;
}
std::cout << "\n" << std::endl;
*/
std::vector<PabloE*>::iterator it;
bit_terms.assign(new_terms.begin(), new_terms.end());
}
/*
std::cout << "bit_terms.size(): " << bit_terms.size() << std::endl;
std::cout << StatementPrinter::ShowPabloE(bit_terms[0]) << std::endl;
*/
return bit_terms[0];
}
PabloE* CC_Compiler::char_test_expr(int ch)
{
return bit_pattern_expr(ch, mEncoding.getMask());
}
PabloE* CC_Compiler::make_range(int n1, int n2)
{
unsigned char diff_bits = n1 ^ n2;
int diff_count = 0;
while (diff_bits > 0)
{
diff_count++;
diff_bits >>= 1;
}
if ((n2 < n1) || (diff_count > mEncoding.getBits()))
{
int n1i = n1;
int n2i = n2;
std::cout << "n1: " << n1i << std::endl;
std::cout << "n2: " << n2i << std::endl;
std::cout << "Exception: Bad Range!" << std::endl;
return 0;
}
int mask = pow(2, diff_count) - 1;
PabloE* common = bit_pattern_expr(n1 & ~mask, mEncoding.getMask() ^ mask);
if (diff_count == 0) return common;
mask = pow(2, (diff_count - 1)) - 1;
PabloE* lo_test = GE_Range(diff_count - 1, n1 & mask);
PabloE* hi_test = LE_Range(diff_count - 1, n2 & mask);
return CC_Compiler_Helper::make_and(common, CC_Compiler_Helper::make_sel(make_bitv(diff_count - 1), hi_test, lo_test));
}
PabloE* CC_Compiler::GE_Range(int N, int n)
{
if (N == 0)
{
return new All(1); //Return a true literal.
}
else if (((N % 2) == 0) && ((n >> (N - 2)) == 0))
{
return CC_Compiler_Helper::make_or(CC_Compiler_Helper::make_or(make_bitv(N-1), make_bitv(N-2)), GE_Range(N-2, n));
}
else if (((N % 2) == 0) && ((n >> (N - 2)) == 3))
{
return CC_Compiler_Helper::make_and(CC_Compiler_Helper::make_and(make_bitv(N-1), make_bitv(N-2)), GE_Range(N-2, n-(3<<(N-2))));
}
else if(N >= 1)
{
int hi_bit = n & (1 << (N-1));
int lo_bits = n - hi_bit;
PabloE* lo_range = GE_Range(N-1, lo_bits);
if (hi_bit == 0)
{
/*
If the hi_bit of n is not set, then whenever the corresponding bit
is set in the target, the target will certaily be >=. Oterwise,
the value of GE_range(N-1), lo_range) is required.
*/
return CC_Compiler_Helper::make_or(make_bitv(N-1), lo_range);
}
else
{
/*
If the hi_bit of n is set, then the corresponding bit must be set
in the target for >= and GE_range(N-1, lo_bits) must also be true.
*/
return CC_Compiler_Helper::make_and(make_bitv(N-1), lo_range);
}
}
else
{
return 0;
}
}
PabloE* CC_Compiler::LE_Range(int N, int n)
{
/*
If an N-bit pattern is all ones, then it is always true that any n-bit value is LE this pattern.
Handling this as a special case avoids an overflow issue with n+1 requiring more than N bits.
*/
if ((n+1) == pow(2, N))
{
return new All(1); //True.
}
else
{
return CC_Compiler_Helper::make_not(GE_Range(N, n+1));
}
}
PabloE* CC_Compiler::char_or_range_expr(CharSetItem charset_item)
{
if (charset_item.lo_codepoint == charset_item.hi_codepoint)
{
return char_test_expr(charset_item.lo_codepoint);
}
else
{
if (charset_item.lo_codepoint < charset_item.hi_codepoint)
{
return make_range(charset_item.lo_codepoint, charset_item.hi_codepoint);
}
}
std::cout << "Exception: Bad Character Set Item!" << std::endl;
return 0;
}
PabloE* CC_Compiler::charset_expr(CC* cc)
{
if (cc->getItems().size() == 0)
{
return new All(0);
}
PabloE* e1 = char_or_range_expr(cc->getItems().at(0));
if (cc->getItems().size() > 1)
{
for (unsigned long i = 1; i < cc->getItems().size(); i++)
{
e1 = CC_Compiler_Helper::make_or(e1, char_or_range_expr(cc->getItems().at(i)));
}
}
return e1;
}
Expression* CC_Compiler::expr2pabloe(CC_CodeGenObject &cgo, PabloE* expr)
{
/*
Translate a Pablo Expression into three-address code using
the code generator object CC_CodeGenObject.
*/
Expression* retExpr = new Expression();
if (All* all = dynamic_cast<All*>(expr))
{
if (all->getNum() == 1)
{
retExpr->expr_string = "All(1)";
retExpr->pablo_expr = new All(1);
}
else if (all->getNum() ==0)
{
retExpr->expr_string = "All(0)";
retExpr->pablo_expr = new All(0);
}
}
else if (Var * var = dynamic_cast<Var*>(expr))
{
retExpr->expr_string = var->getVar();
retExpr->pablo_expr = new Var(var->getVar());
}
else if (Not* pe_not = dynamic_cast<Not*>(expr))
{
Expression* ret = cgo.expr_to_variable(expr2pabloe(cgo, pe_not->getExpr()));
retExpr->expr_string = "~" + ret->expr_string;
retExpr->pablo_expr = new Not(ret->pablo_expr);
}
else if(Or* pe_or = dynamic_cast<Or*>(expr))
{
Expression* ret1 = cgo.expr_to_variable(expr2pabloe(cgo, pe_or->getExpr1()));
Expression* ret2 = cgo.expr_to_variable(expr2pabloe(cgo, pe_or->getExpr2()));
retExpr->expr_string = "(" + ret1->expr_string + "|" + ret2->expr_string + ")";
retExpr->pablo_expr = new Or(ret1->pablo_expr, ret2->pablo_expr);
}
else if (Xor* pe_xor = dynamic_cast<Xor*>(expr))
{
Expression* ret1 = cgo.expr_to_variable(expr2pabloe(cgo, pe_xor->getExpr1()));
Expression* ret2 = cgo.expr_to_variable(expr2pabloe(cgo, pe_xor->getExpr2()));
retExpr->expr_string = "(" + ret1->expr_string + "^" + ret2->expr_string + ")";
retExpr->pablo_expr = new Xor(ret1->pablo_expr, ret2->pablo_expr);
}
else if (And* pe_and = dynamic_cast<And*>(expr))
{
if (Not* pe_not = dynamic_cast<Not*>(pe_and->getExpr1()))
{
Expression* ret1 = cgo.expr_to_variable(expr2pabloe(cgo, pe_not->getExpr()));
Expression* ret2 = cgo.expr_to_variable(expr2pabloe(cgo, pe_and->getExpr2()));
retExpr->expr_string = "(" + ret2->expr_string + "&~" + ret1->expr_string + ")";
retExpr->pablo_expr = new And(ret2->pablo_expr, new Not(ret1->pablo_expr));
}
else if (Not* pe_not = dynamic_cast<Not*>(pe_and->getExpr2()))
{
Expression* ret1 = cgo.expr_to_variable(expr2pabloe(cgo, pe_and->getExpr1()));
Expression* ret2 = cgo.expr_to_variable(expr2pabloe(cgo, pe_not->getExpr()));
retExpr->expr_string = "(" + ret1->expr_string + "&~" + ret2->expr_string + ")";
retExpr->pablo_expr = new And(ret1->pablo_expr, new Not(ret2->pablo_expr));
}
else
{
Expression* ret1 = cgo.expr_to_variable(expr2pabloe(cgo, pe_and->getExpr1()));
Expression* ret2 = cgo.expr_to_variable(expr2pabloe(cgo, pe_and->getExpr2()));
retExpr->expr_string = "(" + ret1->expr_string + "&" + ret2->expr_string + ")";
retExpr->pablo_expr = new And(ret1->pablo_expr, ret2->pablo_expr);
}
}
else if (Sel * pe_sel = dynamic_cast<Sel*>(expr))
{
Expression* ret_sel = cgo.expr_to_variable(expr2pabloe(cgo, pe_sel->getIf_expr()));
Expression* ret_true = cgo.expr_to_variable(expr2pabloe(cgo, pe_sel->getT_expr()));
Expression* ret_false = cgo.expr_to_variable(expr2pabloe(cgo, pe_sel->getF_expr()));
retExpr->expr_string = "((" + ret_sel->expr_string + "&" + ret_true->expr_string + ")|(~("
+ ret_sel->expr_string + ")&" + ret_false->expr_string + ")";
retExpr->pablo_expr = new Sel(ret_sel->pablo_expr, ret_true->pablo_expr, ret_false->pablo_expr);
}
return retExpr;
}
CC_CodeGenObject CC_Compiler::cc2pablos(CC_CodeGenObject cgo, CC* cc)
{
cgo.add_assignment(cc->getName(), expr2pabloe(cgo, charset_expr(cc)));
return cgo;
}
std::string CC_Compiler::bit_var(int n)
{
return mEncoding.getBasisPattern(0) + INT2STRING(n);
}
PabloE* CC_Compiler::make_bitv(int n)
{
return new Var(bit_var((mEncoding.getBits() - 1) - n));
}
/*
* Copyright (c) 2014 International Characters.
* This software is licensed to the public under the Open Software License 3.0.
* icgrep is a trademark of International Characters.
*/
#ifndef CC_COMPILER_H
#define CC_COMPILER_H
#include "ps_pablos.h"
#include "cc_codegenobject.h"
#include "utf_encoding.h"
#include "cc_compiler_helper.h"
#include <math.h>
#include <utility>
#include <iostream>
#include <sstream>
#include <string>
#include <list>
#include <cassert>
#include <stdlib.h>
//***********************************
//TODO: Just for development
//#include "printer_pablos.h"
//***********************************
#define INT2STRING(i) static_cast<std::ostringstream*>(&(std::ostringstream() << i))->str()
class CC_Compiler
{
public:
CC_Compiler(UTF_Encoding encoding);
std::list<PabloS*> compile(std::string basis_pattern, std::string gensym_pattern, RE* re, std::list<CC*> predefined);
private:
void process_re(CC_CodeGenObject& cgo, RE* re);
void process_predefined(CC_CodeGenObject& cgo, std::list<CC*> predefined);
std::string bit_var(int n);
PabloE* make_bitv(int n);
PabloE* bit_pattern_expr(int pattern, int selected_bits);
PabloE* char_test_expr(int ch);
PabloE* make_range(int n1, int n2);
PabloE* GE_Range(int N, int n);
PabloE* LE_Range(int N, int n);
PabloE* char_or_range_expr(CharSetItem charset_item);
PabloE* charset_expr(CC* cc);
Expression* expr2pabloe(CC_CodeGenObject& cgo, PabloE* expr);
CC_CodeGenObject cc2pablos(CC_CodeGenObject cgo, CC* cc);
UTF_Encoding mEncoding;
};
#endif // CC_COMPILER_H
/*
* Copyright (c) 2014 International Characters.
* This software is licensed to the public under the Open Software License 3.0.
* icgrep is a trademark of International Characters.
*/
#include "cc_compiler_helper.h"
CC_Compiler_Helper::CC_Compiler_Helper(){}
/*
Optimizing Constructors for Boolean Expressions
-Maintaining Assembler Instruction Form:
-All boolean algebraic rules involving true/flase applied.
-Negations restricted:
-no negations within or (DeMorgan's to nand)
-at most one negation within and.
*/
PabloE* CC_Compiler_Helper::make_not(PabloE* expr)
{
if (All* all = dynamic_cast<All*>(expr))
{
if (all->getNum() == 1) //If true literal.
{
return new All(0); //Set to false literal.
}
else if (all->getNum() == 0) //If false literal.
{
return new All(1); //Set to true literal.
}
}
else if (Not* pe_not = dynamic_cast<Not*>(expr))
{
return pe_not->getExpr();
}
else
{
return new Not(expr);
}
}
PabloE* CC_Compiler_Helper::make_and(PabloE *expr1, PabloE *expr2)
{
if (All* all = dynamic_cast<All*>(expr1))
{
if (all->getNum() == 1)
{
return expr2;
}
else if (all->getNum() == 0)
{
return new All(0);
}
}
else if (All* all = dynamic_cast<All*>(expr2))
{
if (all->getNum() == 1)
{
return expr1;
}
else if (all->getNum() == 0)
{
return new All(0);
}
}
else if (equal_exprs(expr1, expr2 ))
{
return expr1;
}
else if (Not* pe_not_e1 = dynamic_cast<Not*>(expr1))
{
if (Not* pe_not_e2 = dynamic_cast<Not*>(expr2))
{
return make_not(make_or(pe_not_e1->getExpr(), pe_not_e2->getExpr()));
}
else if (equal_exprs(pe_not_e1->getExpr(), expr2))
{
return new All(0); //Return false literal.
}
else
{
return new And(expr1, expr2);
}
}
else if (Not* pe_not_e2 = dynamic_cast<Not*>(expr2))
{
if (equal_exprs(expr1, pe_not_e2->getExpr()))
{
return new All(0);
}
else
{
return new And(expr1, expr2);
}
}
else
{
return new And(expr1, expr2);
}
}
PabloE* CC_Compiler_Helper::make_or(PabloE *expr1, PabloE *expr2)
{
if (All* all = dynamic_cast<All*>(expr1))
{
if (all->getNum() == 1)
{
return new All(1); //Return a true literal.
}
else if (all->getNum() == 0)
{
return expr2;
}
}
else if (All* all = dynamic_cast<All*>(expr2))
{
if (all->getNum() == 1)
{
return new All(1); //Return a true literal.
}
else if (all->getNum() == 0)
{
return expr1;
}
}
else if (Not* pe_not_e1 = dynamic_cast<Not*>(expr1))
{
return make_not(make_and(pe_not_e1->getExpr(), make_not(expr2)));
}
else if (Not* pe_not_e2 = dynamic_cast<Not*>(expr2))
{
return make_not(make_and(make_not(expr1), pe_not_e2->getExpr()));
}
else if (equal_exprs(expr1, expr2))
{
return expr1;
}
if (And* and_expr1 = dynamic_cast<And*>(expr1))
{
if (And* and_expr2 = dynamic_cast<And*>(expr2))
{
//These optimizations factor out common components that can occur when sets are formed by union
//(e.g., union of [a-z] and [A-Z].
if (equal_exprs(and_expr1->getExpr1(), and_expr2->getExpr1()))
{
return make_and(and_expr1->getExpr1(), make_or(and_expr1->getExpr2(), and_expr2->getExpr2()));
}
else if (equal_exprs(and_expr1->getExpr2(), and_expr2->getExpr2()))
{
return make_and(and_expr1->getExpr2(), make_or(and_expr1->getExpr1(), and_expr2->getExpr1()));
}
else if (equal_exprs(and_expr1->getExpr1(), and_expr2->getExpr2()))
{
return make_and(and_expr1->getExpr1(), make_or(and_expr1->getExpr2(), and_expr2->getExpr1()));
}
else if (equal_exprs(and_expr1->getExpr2(), and_expr2->getExpr1()))
{
return make_and(and_expr1->getExpr2(), make_or(and_expr1->getExpr1(), and_expr2->getExpr2()));
}
}
}
return new Or(expr1, expr2);
}
PabloE* CC_Compiler_Helper::make_sel(PabloE *if_expr, PabloE *t_expr, PabloE *f_expr)
{
if (All* all_if_expr = dynamic_cast<All*>(if_expr))
{
if (all_if_expr->getNum() == 1)
{
return t_expr;
}
else if (all_if_expr->getNum() == 0)
{
return f_expr;
}
}
else if (All* all_t_expr = dynamic_cast<All*>(t_expr))
{
if (all_t_expr->getNum() == 1)
{
return make_or(if_expr, f_expr);
}
else if (all_t_expr->getNum() == 0)
{
return make_and(make_not(if_expr), f_expr);
}
}
else if (All* all_f_expr = dynamic_cast<All*>(f_expr))
{
if (all_f_expr->getNum() == 1)
{
return make_or(make_not(if_expr), t_expr);
}
else if (all_f_expr->getNum() == 0)
{
return make_and(if_expr, t_expr);
}
}
else if (equal_exprs(t_expr, f_expr))
{
return t_expr;
}
else
{
return new Sel(if_expr, t_expr, f_expr);
}
}
PabloE* CC_Compiler_Helper::make_xor(PabloE *expr1, PabloE *expr2)
{
if (All* all_expr1 = dynamic_cast<All*>(expr1))
{
if (all_expr1->getNum() == 1)
{
return make_not(expr2);
}
else if (all_expr1->getNum() == 0)
{
return expr2;
}
}
else if (All* all_expr2 = dynamic_cast<All*>(expr2))
{
if (all_expr2->getNum() == 1)
{
return make_not(expr1);
}
else if (all_expr2->getNum() == 0)
{
return expr1;
}
}
if (Not* not_expr1 = dynamic_cast<Not*>(expr1))
{
if (Not* not_expr2 = dynamic_cast<Not*>(expr2))
{
return make_xor(not_expr1->getExpr(), not_expr2->getExpr());
}
}
return new Xor(expr1, expr2);
}
/*
Return true if expr1 and expr2 can be proven equivalent according to some rules,
false otherwise. Note that false may be returned i some cases when the exprs are
equivalent.
*/
bool CC_Compiler_Helper::equal_exprs(PabloE *expr1, PabloE *expr2)
{
if (All* all_expr1 = dynamic_cast<All*>(expr1))
{
if (all_expr1->getNum() == 1)
{
if (All* all_expr2 = dynamic_cast<All*>(expr2))
{
if (all_expr2->getNum() == 1)
{
return true;
}
else
{
return false;
}
}
else
{
return false;
}
}
else if (all_expr1->getNum() == 0)
{
if (All* all_expr2 = dynamic_cast<All*>(expr2))
{
if (all_expr2->getNum() == 1)
{
return false;
}
else
{
return true;
}
}
else
{
return false;
}
}
}
if (Var* var_expr1 = dynamic_cast<Var*>(expr1))
{
if (Var* var_expr2 = dynamic_cast<Var*>(expr2))
{
return (var_expr1->getVar() == var_expr2->getVar());
}
}
if (Not* not_expr1 = dynamic_cast<Not*>(expr1))
{
if (Not* not_expr2 = dynamic_cast<Not*>(expr2))
{
return equal_exprs(not_expr1->getExpr(), not_expr2->getExpr());
}
}
if (And* and_expr1 = dynamic_cast<And*>(expr1))
{
if (And* and_expr2 = dynamic_cast<And*>(expr2))
{
if (equal_exprs(and_expr1->getExpr1(), and_expr2->getExpr1()))
{
return equal_exprs(and_expr1->getExpr2(), and_expr2->getExpr2());
}
else if (equal_exprs(and_expr1->getExpr1(), and_expr2->getExpr2()))
{
return equal_exprs(and_expr1->getExpr2(), and_expr2->getExpr1());
}
else
return false;
}
}
if (Or* or_expr1 = dynamic_cast<Or*>(expr1))
{
if (Or* or_expr2 = dynamic_cast<Or*>(expr2))
{
if (equal_exprs(or_expr1->getExpr1(), or_expr2->getExpr1()))
{
return equal_exprs(or_expr1->getExpr2(), or_expr2->getExpr2());
}
else if (equal_exprs(or_expr1->getExpr1(), or_expr2->getExpr2()))
{
return equal_exprs(or_expr1->getExpr2(), or_expr2->getExpr1());
}
else
return false;
}
}
if (Xor* xor_expr1 = dynamic_cast<Xor*>(expr1))
{
if (Xor* xor_expr2 = dynamic_cast<Xor*>(expr2))
{
if (equal_exprs(xor_expr1->getExpr1(), xor_expr2->getExpr1()))
{
return equal_exprs(xor_expr1->getExpr2(), xor_expr2->getExpr2());
}
else if (equal_exprs(xor_expr1->getExpr1(), xor_expr2->getExpr2()))
{
return equal_exprs(xor_expr1->getExpr2(), xor_expr2->getExpr1());
}
else
return false;
}
}
if (Sel* sel_expr1 = dynamic_cast<Sel*>(expr1))
{
if (Sel* sel_expr2 = dynamic_cast<Sel*>(expr2))
{
if (equal_exprs(sel_expr1->getIf_expr(), sel_expr2->getIf_expr()))
{
if (equal_exprs(sel_expr1->getT_expr(), sel_expr2->getT_expr()))
{
return equal_exprs(sel_expr1->getF_expr(), sel_expr2->getF_expr());
}
else
return false;
}
else
return false;
}
}
return false;
}
/*
* Copyright (c) 2014 International Characters.
* This software is licensed to the public under the Open Software License 3.0.
* icgrep is a trademark of International Characters.
*/
#ifndef COMPILER_HELPER_H
#define COMPILER_HELPER_H
#include "pbix_compiler.h"
#include "cc_compiler.h"
class CC_Compiler_Helper
{
public:
static PabloE* make_not(PabloE* expr);
static PabloE* make_and(PabloE* expr1, PabloE* expr2);
static PabloE* make_or(PabloE* expr1, PabloE* expr2);
static PabloE* make_sel(PabloE* if_expr, PabloE* t_expr, PabloE* f_expr);
static PabloE* make_xor(PabloE* expr1, PabloE* expr2);
static bool equal_exprs(PabloE* expr1, PabloE* expr2);
private:
CC_Compiler_Helper();
};
#endif // COMPILER_HELPER_H
/*
* Copyright (c) 2014 International Characters.
* This software is licensed to the public under the Open Software License 3.0.
* icgrep is a trademark of International Characters.
*/
#include "compiler.h"
Compiler::Compiler()
{
}
/*
TODO: This will become the main driver for the application. The parser, the cc compiler, the pbix compiler
and the ir generator will all be called from here.
*/
/*
* Copyright (c) 2014 International Characters.
* This software is licensed to the public under the Open Software License 3.0.
* icgrep is a trademark of International Characters.
*/
#ifndef COMPILER_H
#define COMPILER_H
#include "compiler.h"
#include "re_parser.h"
#include "pbix_compiler.h"
#include "llvm_gen.h"
class Compiler
{
public:
Compiler();
};
#endif // COMPILER_H
/*
TODO: This will become the main driver for the application. The parser, the cc compiler, the pbix compiler
and the ir generator will all be called from here.
*/
/*
* Copyright (c) 2014 International Characters.
* This software is licensed to the public under the Open Software License 3.0.
* icgrep is a trademark of International Characters.
*/
#include "compiler_helper.h"
Compiler_Helper::Compiler_Helper(){}
/*
Optimizing Constructors for Boolean Expressions
-Maintaining Assembler Instruction Form:
-All boolean algebraic rules involving true/flase applied.
-Negations restricted:
-no negations within or (DeMorgan's to nand)
-at most one negation within and.
*/
PabloE* Compiler_Helper::make_not(PabloE* expr)
{
if (All* all = dynamic_cast<All*>(expr))
{
if (all->getNum() == 1) //If true literal.
{
all->setNum(0);
return all; //Set to false literal.
}
else if (all->getNum() == 0) //If false literal.
{
all->setNum(1);
return all; //Set to true literal.
}
}
else if (Not* pe_not = dynamic_cast<Not*>(expr))
{
return pe_not->getExpr();
}
else
return new Not(expr);
}
PabloE* Compiler_Helper::make_and(PabloE *expr1, PabloE *expr2)
{
if (All* all = dynamic_cast<All*>(expr1))
{
if (all->getNum() == 1)
{
return expr2;
}
else if (all->getNum() == 0)
{
return expr1;
}
}
else if (All* all = dynamic_cast<All*>(expr2))
{
if (all->getNum() == 1)
{
return expr1;
}
else if (all->getNum() == 0)
{
return expr2;
}
}
else if (equal_exprs(expr1, expr2 ))
{
return expr1;
}
else if (Not* pe_not_e1 = dynamic_cast<Not*>(expr1))
{
if (Not* pe_not_e2 = dynamic_cast<Not*>(expr2))
{
return make_not(make_or(pe_not_e1->getExpr(), pe_not_e2->getExpr()));
}
else if (equal_exprs(pe_not_e1->getExpr(), expr2))
{
return new All(0); //Return false literal.
}
else
return new And(expr1, expr2);
}
else if (Not* pe_not_e2 = dynamic_cast<Not*>(expr2))
{
if (equal_exprs(expr1, pe_not_e2->getExpr()))
{
return new All(0);
}
else
return new And(expr1, expr2);
}
else
return new And(expr1, expr2);
}
PabloE* Compiler_Helper::make_or(PabloE *expr1, PabloE *expr2)
{
if (All* all = dynamic_cast<All*>(expr1))
{
if (all->getNum() == 1)
{
return expr1; //Return a true literal.
}
else if (all->getNum() == 0)
{
return expr2;
}
}
else if (All* all = dynamic_cast<All*>(expr2))
{
if (all->getNum() == 1)
{
return expr2; //Return a true literal.
}
else if (all->getNum() == 0)
{
return expr1;
}
}
else if (Not* pe_not_e1 = dynamic_cast<Not*>(expr1))
{
return make_not(make_and(pe_not_e1->getExpr(), make_not(expr2)));
}
else if (Not* pe_not_e2 = dynamic_cast<Not*>(expr2))
{
return make_not(make_and(make_not(expr1), pe_not_e2->getExpr()));
}
else if (equal_exprs(expr1, expr2))
{
return expr1;
}
if (And* and_expr1 = dynamic_cast<And*>(expr1))
{
if (And* and_expr2 = dynamic_cast<And*>(expr2))
{
//These optimizations factor out common components that can occur when sets are formed by union
//(e.g., union of [a-z] and [A-Z].
if (equal_exprs(and_expr1->getExpr1(), and_expr2->getExpr1()))
{
return make_and(and_expr1->getExpr1(), make_or(and_expr1->getExpr2(), and_expr2->getExpr2()));
}
else if (equal_exprs(and_expr1->getExpr2(), and_expr2->getExpr2()))
{
return make_and(and_expr1->getExpr2(), make_or(and_expr1->getExpr1(), and_expr2->getExpr1()));
}
else if (equal_exprs(and_expr1->getExpr1(), and_expr2->getExpr2()))
{
return make_and(and_expr1->getExpr1(), make_or(and_expr1->getExpr2(), and_expr2->getExpr1()));
}
else if (equal_exprs(and_expr1->getExpr2(), and_expr2->getExpr1()))
{
return make_and(and_expr1->getExpr2(), make_or(and_expr1->getExpr1(), and_expr2->getExpr2()));
}
}
}
return new Or(expr1, expr2);
}
PabloE* Compiler_Helper::make_sel(PabloE *if_expr, PabloE *t_expr, PabloE *f_expr)
{
if (All* all_if_expr = dynamic_cast<All*>(if_expr))
{
if (all_if_expr->getNum() == 1)
{
return t_expr;
}
else if (all_if_expr->getNum() == 0)
{
return f_expr;
}
}
else if (All* all_t_expr = dynamic_cast<All*>(t_expr))
{
if (all_t_expr->getNum() == 1)
{
return make_or(if_expr, f_expr);
}
else if (all_t_expr->getNum() == 0)
{
return make_and(make_not(if_expr), f_expr);
}
}
else if (All* all_f_expr = dynamic_cast<All*>(f_expr))
{
if (all_f_expr->getNum() == 1)
{
return make_or(make_not(if_expr), t_expr);
}
else if (all_f_expr->getNum() == 0)
{
return make_and(if_expr, t_expr);
}
}
else if (equal_exprs(t_expr, f_expr))
{
return t_expr;
}
else
return new Sel(if_expr, t_expr, f_expr);
}
PabloE* Compiler_Helper::make_xor(PabloE *expr1, PabloE *expr2)
{
if (All* all_expr1 = dynamic_cast<All*>(expr1))
{
if (all_expr1->getNum() == 1)
{
return make_not(expr2);
}
else if (all_expr1->getNum() == 0)
{
return expr2;
}
}
else if (All* all_expr2 = dynamic_cast<All*>(expr2))
{
if (all_expr2->getNum() == 1)
{
return make_not(expr1);
}
else if (all_expr2->getNum() == 0)
{
return expr1;
}
}
if (Not* not_expr1 = dynamic_cast<Not*>(expr1))
{
if (Not* not_expr2 = dynamic_cast<Not*>(expr2))
{
return make_xor(not_expr1->getExpr(), not_expr2->getExpr());
}
}
return new Xor(expr1, expr2);
}
/*
Return true if expr1 and expr2 can be proven equivalent according to some rules,
false otherwise. Note that false may be returned i some cases when the exprs are
equivalent.
*/
bool Compiler_Helper::equal_exprs(PabloE *expr1, PabloE *expr2)
{
if (All* all_expr1 = dynamic_cast<All*>(expr1))
{
if (all_expr1->getNum() == 1)
{
if (All* all_expr2 = dynamic_cast<All*>(expr2))
{
if (all_expr2->getNum() == 1)
{
return true;
}
else
{
return false;
}
}
else
{
return false;
}
}
else if (all_expr1->getNum() == 0)
{
if (All* all_expr2 = dynamic_cast<All*>(expr2))
{
if (all_expr2->getNum() == 1)
{
return false;
}
else
{
return true;
}
}
else
{
return false;
}
}
}
if (Var* var_expr1 = dynamic_cast<Var*>(expr1))
{
if (Var* var_expr2 = dynamic_cast<Var*>(expr2))
{
return (var_expr1->getVar() == var_expr2->getVar());
}
}
if (Not* not_expr1 = dynamic_cast<Not*>(expr1))
{
if (Not* not_expr2 = dynamic_cast<Not*>(expr2))
{
return equal_exprs(not_expr1->getExpr(), not_expr2->getExpr());
}
}
if (And* and_expr1 = dynamic_cast<And*>(expr1))
{
if (And* and_expr2 = dynamic_cast<And*>(expr2))
{
if (equal_exprs(and_expr1->getExpr1(), and_expr2->getExpr1()))
{
return equal_exprs(and_expr1->getExpr2(), and_expr2->getExpr2());
}
else if (equal_exprs(and_expr1->getExpr1(), and_expr2->getExpr2()))
{
return equal_exprs(and_expr1->getExpr2(), and_expr2->getExpr1());
}
else
return false;
}
}
if (Or* or_expr1 = dynamic_cast<Or*>(expr1))
{
if (Or* or_expr2 = dynamic_cast<Or*>(expr2))
{
if (equal_exprs(or_expr1->getExpr1(), or_expr2->getExpr1()))
{
return equal_exprs(or_expr1->getExpr2(), or_expr2->getExpr2());
}
else if (equal_exprs(or_expr1->getExpr1(), or_expr2->getExpr2()))
{
return equal_exprs(or_expr1->getExpr2(), or_expr2->getExpr1());
}
else
return false;
}
}
if (Xor* xor_expr1 = dynamic_cast<Xor*>(expr1))
{
if (Xor* xor_expr2 = dynamic_cast<Xor*>(expr2))
{
if (equal_exprs(xor_expr1->getExpr1(), xor_expr2->getExpr1()))
{
return equal_exprs(xor_expr1->getExpr2(), xor_expr2->getExpr2());
}
else if (equal_exprs(xor_expr1->getExpr1(), xor_expr2->getExpr2()))
{
return equal_exprs(xor_expr1->getExpr2(), xor_expr2->getExpr1());
}
else
return false;
}
}
if (Sel* sel_expr1 = dynamic_cast<Sel*>(expr1))
{
if (Sel* sel_expr2 = dynamic_cast<Sel*>(expr2))
{
if (equal_exprs(sel_expr1->getIf_expr(), sel_expr2->getIf_expr()))
{
if (equal_exprs(sel_expr1->getT_expr(), sel_expr2->getT_expr()))
{
return equal_exprs(sel_expr1->getF_expr(), sel_expr2->getF_expr());
}
else
return false;
}
else
return false;
}
}
return false;
}
/*
* Copyright (c) 2014 International Characters.
* This software is licensed to the public under the Open Software License 3.0.
* icgrep is a trademark of International Characters.
*/
#ifndef COMPILER_HELPER_H
#define COMPILER_HELPER_H
#include "compiler.h"
#include "cc_compiler.h"
class Compiler_Helper //: public CC_Compiler
{
public:
//protected:
//Compiler_Helper();
static PabloE* make_not(PabloE* expr);
static PabloE* make_and(PabloE* expr1, PabloE* expr2);
static PabloE* make_or(PabloE* expr1, PabloE* expr2);
static PabloE* make_sel(PabloE* if_expr, PabloE* t_expr, PabloE* f_expr);
static PabloE* make_xor(PabloE* expr1, PabloE* expr2);
static bool equal_exprs(PabloE* expr1, PabloE* expr2);
private:
Compiler_Helper();
};
#endif // COMPILER_HELPER_H
#ifndef __HRTIME_H__
#define __HRTIME_H__
//Downloaded from code.Google.com
#include <stdio.h>
#include <string.h>
#include <assert.h>
// get the number of CPU cycles per microsecond from Linux /proc filesystem
// return < 0 on error
inline double getMHZ(void) {
double mhz = -1;
char line[1024], *s, search_str[] = "cpu MHz";
FILE* fp;
// open proc/cpuinfo
if ((fp = fopen("/proc/cpuinfo", "r")) == NULL)
return -1;
// ignore all lines until we reach MHz information
while (fgets(line, 1024, fp) != NULL) {
if (strstr(line, search_str) != NULL) {
// ignore all characters in line up to :
for (s = line; *s && (*s != ':'); ++s)
;
// get MHz number
if (*s && (sscanf(s+1, "%lf", &mhz) == 1))
break;
}
}
if (fp != NULL)
fclose(fp);
return mhz;
}
// get the number of CPU cycles since startup using rdtsc instruction
inline unsigned long long get_hrcycles() {
unsigned int tmp[2];
asm ("rdtsc" : "=a" (tmp[1]), "=d" (tmp[0]));
return (((unsigned long long)tmp[0] << 32 | tmp[1]));
}
// get the elapsed time (in milliseconds) since startup
inline double getElapsedTime() {
static double CPU_HZ = 0;
if (CPU_HZ == 0)
CPU_HZ = getMHZ() * 1000000;
return (get_hrcycles() / CPU_HZ);
}
#endif // __HRTIME_H__
/*
* Copyright (c) 2014 International Characters.
* This software is licensed to the public under the Open Software License 3.0.
* icgrep is a trademark of International Characters.
*/
#include "utf_encoding.h"
#include "re_compiler.h"
#include <fstream>
#include <sstream>
#include <iostream>
#include <string>
#include <stdint.h>
#define assert_0_error(errkind, errstrm)
// XMLWF application headers and definitions
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <simd-lib/bitblock.hpp>
#include <simd-lib/carryQ.hpp>
#include <simd-lib/pabloSupport.hpp>
#include <simd-lib/s2p.hpp>
#include <simd-lib/buffer.hpp>
#include <simd-lib/bitblock_iterator.hpp>
//#include <simd-lib/perflib/perfsec.h>
#define SEGMENT_BLOCKS 15
#define SEGMENT_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
#define BUFFER_SEGMENTS 15
#define BUFFER_SIZE (BUFFER_SEGMENTS * SEGMENT_SIZE)
#define BitBlock_declare(name) BitBlock name
#define ubitblock_declare(name, n) \
ubitblock name[n];\
do {int i;\
for (i = 0; i < n; i++) name[i]._128 = simd<1>::constant<0>();\
}\
while (0)
BitBlock EOF_mask = simd<1>::constant<1>();
struct Output {
BitBlock matches;
BitBlock LF;
};
struct Basis_bits {
BitBlock bit_0;
BitBlock bit_1;
BitBlock bit_2;
BitBlock bit_3;
BitBlock bit_4;
BitBlock bit_5;
BitBlock bit_6;
BitBlock bit_7;
};
#include <simd-lib/transpose.hpp>
using namespace std;
typedef void (*process_block_fcn)(const Basis_bits &basis_bits, BitBlock carry_q[], Output &output);
void do_process(FILE *infile, FILE *outfile, int count_only_option, int carry_count, process_block_fcn process_block);
int main(int argc, char *argv[])
{
double timer;
char * inregex, * fileregex, * infilename, * outfilename;
FILE *infile, *outfile, *regexfile;
int opt_code;
int count_only_option = 0;
int print_version_option = 0;
int regex_from_file_option = 0;
int compile_time_option = 0;
size_t result;
long lSize;
while ((opt_code = getopt(argc, argv, "cvft")) != -1)
{
switch (opt_code)
{
case 'c':
count_only_option = 1;
break;
case 'v':
print_version_option = 1;
break;
case 'f':
regex_from_file_option = 1;
break;
case 't':
compile_time_option = 1;
break;
case '?':
break;
default:
printf ("Invalid option: %c\n", opt_code);
printf("Usage: %s [-c] [-v] [-f] <regex|regexfile> <inputfile> [<outputfile>]\n", argv[0]);
exit(-1);
}
}
if (optind >= argc)
{
printf ("Too few arguments\n");
printf("Usage: %s [-c] [-v] [-f] <regex|regexfile> <inputfile> [<outputfile>]\n", argv[0]);
exit(-1);
}
inregex = argv[optind++];
if (inregex == 0)
{
fprintf(stderr, "Error: cannot read the regular expression.\n");
exit(-1);
}
if (regex_from_file_option)
{
regexfile = fopen(inregex, "rb");
if (!regexfile){
fprintf(stderr, "Error: cannot open %s for processing.\n", inregex);
exit(-1);
}
fseek (regexfile , 0 , SEEK_END);
lSize = ftell (regexfile);
rewind (regexfile);
fileregex = (char*) malloc (sizeof(char)*lSize);
if (fileregex == NULL) {fputs ("Memory error",stderr); exit (2);}
result = fread (fileregex, 1, lSize, regexfile);
if (result != lSize) {fputs ("Reading error",stderr); exit (3);}
fclose(regexfile);
if (fileregex[lSize - 1] == '\n') fileregex[lSize - 1] = '\0';
}
infilename = argv[optind++];
infile = fopen(infilename, "rb");
if (!infile) {
fprintf(stderr, "Error: cannot open %s for processing.\n", infilename);
exit(-1);
}
if (optind >= argc) outfile = stdout;
else
{
outfilename = argv[optind++];
if (optind != argc)
{
printf ("Too many arguments\n");
printf("Usage: %s [-c] [-v] [-f] <regex|regexfile> <inputfile> [<outputfile>]\n", argv[0]);
exit(-1);
}
outfile = fopen(outfilename, "wb");
if (!outfile)
{
fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
exit(-1);
}
}
if (print_version_option)
{
fprintf(outfile, "Parabix icgrep implementation: April 2014\n");
}
UTF_Encoding encoding;
encoding.setName("UTF-8");
encoding.setBits(8);
encoding.setMask(0xFF);
RE_Compiler* re_compiler = new RE_Compiler();
if (compile_time_option) timer = getElapsedTime();
LLVM_Gen_RetVal llvm_codegen = re_compiler->compile(compile_time_option, "basis_bits.bit_", "temp", encoding ,(regex_from_file_option ? fileregex : inregex));
if (compile_time_option)
{
timer = getElapsedTime() - timer;
std::cout << "Total Compile Time: " << timer << " seconds" << std::endl;
}
if (llvm_codegen.process_block_fptr != 0)
{
void (*FP)(const Basis_bits &basis_bits, BitBlock carry_q[], Output &output) = (void (*)(const Basis_bits &basis_bits, BitBlock carry_q[], Output &output))(void*)llvm_codegen.process_block_fptr;
do_process(infile, outfile, count_only_option, llvm_codegen.carry_q_size, FP);
}
delete re_compiler;
fclose(infile);
fclose(outfile);
if (regex_from_file_option) free(fileregex);
return 0;
}
void do_process(FILE *infile, FILE *outfile, int count_only_option, int carry_count, process_block_fcn process_block) {
struct Basis_bits basis_bits;
struct Output output;
BitBlock carry_q[carry_count];
memset (carry_q, 0, sizeof(BitBlock) * carry_count);
BitBlock match_vector = simd<1>::constant<0>();
int match_count=0;
int blk = 0;
int block_base = 0;
int block_pos = 0;
int buffer_pos = 0;
int chars_avail = 0;
int chars_read = 0;
int line_start = 0;
int line_end = 0;
int match_pos = 0;
int line_no = 0;
int counter = 0;
BitStreamScanner<BitBlock, uint64_t, uint64_t, SEGMENT_BLOCKS> LF_scanner;
BitStreamScanner<BitBlock, uint64_t, uint64_t, SEGMENT_BLOCKS> match_scanner;
ATTRIBUTE_SIMD_ALIGN char src_buffer[SEGMENT_SIZE];
chars_read = fread((void *)&src_buffer[0], 1, SEGMENT_SIZE, infile);
chars_avail = chars_read;
if (chars_avail >= SEGMENT_SIZE) chars_avail = SEGMENT_SIZE;
//////////////////////////////////////////////////////////////////////////////////////////
// Full Segments
//////////////////////////////////////////////////////////////////////////////////////////
while (chars_avail >= SEGMENT_SIZE) {
LF_scanner.init();
match_scanner.init();
counter++;
for (blk = 0; blk < SEGMENT_BLOCKS; blk++) {
block_base = blk*BLOCK_SIZE;
s2p_do_block((BytePack *) &src_buffer[block_base], basis_bits);
process_block(basis_bits, carry_q, output);
LF_scanner.load_block(output.LF, blk);
match_scanner.load_block(output.matches, blk);
if (count_only_option){
if (bitblock::any(output.matches))
{
if (bitblock::any(simd_and(match_vector, output.matches))){
match_count += bitblock::popcount(match_vector);
match_vector = output.matches;
}
else
{
match_vector = simd_or(match_vector, output.matches);
}
}
}
}
int copy_back_pos = 0;
if (LF_scanner.count() > 0) {
copy_back_pos = LF_scanner.get_final_pos() + 1;
memset (carry_q, 0, sizeof(BitBlock) * carry_count);
}
else {
copy_back_pos = SEGMENT_SIZE;
}
int copy_back_size = SEGMENT_SIZE - copy_back_pos;
if (!count_only_option) {
line_start = 0;
while (match_scanner.has_next()) {
match_pos = match_scanner.scan_to_next();
line_end = LF_scanner.scan_to_next();
while (line_end < match_pos) {
line_start = line_end+1;
line_no++;
line_end = LF_scanner.scan_to_next();
}
fwrite(&src_buffer[line_start], 1, line_end - line_start + 1, outfile);
line_start = line_end+1;
line_no++;
}
while (LF_scanner.has_next()) {
line_end = LF_scanner.scan_to_next();
line_no++;
}
}
memmove(&src_buffer[0], &src_buffer[copy_back_pos], copy_back_size);
//Do another read.
chars_read = fread(&src_buffer[copy_back_size], 1, copy_back_pos, infile);
chars_avail = chars_read + copy_back_size;
if (chars_avail >= SEGMENT_SIZE) chars_avail = SEGMENT_SIZE;
buffer_pos += chars_avail;
}
//////////////////////////////////////////////////////////////////////////////////////////
// For the Final Partial Segment.
//////////////////////////////////////////////////////////////////////////////////////////
block_pos = 0;
int remaining = chars_avail;
LF_scanner.init();
match_scanner.init();
/* Full Blocks */
blk = 0;
while (remaining >= BLOCK_SIZE) {
block_base = block_pos;
s2p_do_block((BytePack *) &src_buffer[block_pos], basis_bits);
process_block(basis_bits, carry_q, output);
LF_scanner.load_block(output.LF, blk);
match_scanner.load_block(output.matches, blk);
if (count_only_option)
{
if (bitblock::any(output.matches))
{
if (bitblock::any(simd_and(match_vector, output.matches)))
{
match_count += bitblock::popcount(match_vector);
match_vector = output.matches;
}
else
{
match_vector = simd_or(match_vector, output.matches);
}
}
}
block_pos += BLOCK_SIZE;
remaining -= BLOCK_SIZE;
blk++;
}
block_base = block_pos;
//For the last partial block, or for any carry.
EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
s2p_do_final_block((BytePack *) &src_buffer[block_pos], basis_bits, EOF_mask);
process_block(basis_bits, carry_q, output);
if (count_only_option)
{
match_count += bitblock::popcount(match_vector);
if (bitblock::any(output.matches))
{
match_count += bitblock::popcount(output.matches);
}
fprintf(outfile, "Matching Lines:%d\n", match_count);
}
else
{
LF_scanner.load_block(output.LF, blk);
match_scanner.load_block(output.matches, blk);
blk++;
for (int i = blk; i < SEGMENT_BLOCKS; i++)
{
LF_scanner.load_block(simd<1>::constant<0>(), i);
match_scanner.load_block(simd<1>::constant<0>(), i);
}
line_start = 0;
while (match_scanner.has_next())
{
match_pos = match_scanner.scan_to_next();
line_end = LF_scanner.scan_to_next();
while(line_end < match_pos)
{
line_start = line_end + 1;
line_no++;
line_end = LF_scanner.scan_to_next();
}
fwrite(&src_buffer[line_start], 1, line_end - line_start + 1, outfile);
line_start = line_end + 1;
line_no++;
}
while(LF_scanner.has_next())
{
line_end = LF_scanner.scan_to_next();
line_no++;
}
}
buffer_pos += chars_avail;
}
add_subdirectory(simd-lib)
add_subdirectory(idisa_cpp)
#ifndef BITBLOCK_HPP
#define BITBLOCK_HPP
/*=============================================================================
Copyright (C) 2011, Robert D. Cameron, Kenneth S. Herdy
Licensed to the public under the Open Software License 3.0.
Licensed to International Characters Inc.
under the Academic Free License version 3.0.
=============================================================================*/
// #define NDEBUG // if NDEBUG then disable assertions
#define __STDC_LIMIT_MACROS
#include <stdint.h>
#include <stdio.h>
#include <assert.h>
#include "config.hpp"
#include "builtins.hpp"
#include "idisa.hpp"
#define BytePack BitBlock
#ifndef BLOCK_SIZE
#define BLOCK_SIZE 128
#endif
#ifndef ATTRIBUTE_SIMD_ALIGN
#if defined _MSC_VER
//note: MSVC++ cannot accept sizeof or division within __declspec(align(...))
#define ATTRIBUTE_SIMD_ALIGN __declspec(align(16))
#elif defined __GNUC__
#define ATTRIBUTE_SIMD_ALIGN __attribute__((aligned(sizeof(BitBlock))))
#else
#define ATTRIBUTE_SIMD_ALIGN
#endif
#endif
template<class T> void print_register(const char * var_name, T v);
static IDISA_ALWAYS_INLINE uint32_t count_forward_zeroes(BitBlock v);
static IDISA_ALWAYS_INLINE uint32_t count_reverse_zeroes(BitBlock v);
static IDISA_ALWAYS_INLINE BitBlock mask_forward_zeroes(uint32_t count);
static IDISA_ALWAYS_INLINE BitBlock mask_reverse_zeroes(uint32_t count);
static IDISA_ALWAYS_INLINE uint32_t bitstream_scan(BitBlock * v, uint32_t pos);
/* BitBlock union type */
union ubitblock;
/* Default BLOCK_SIZE is 128, compatible with SSE, Altivec, SPU */
#if (BLOCK_SIZE == 128)
#include "bitblock128.hpp"
#endif
/* BLOCK_SIZE 256 for AVX */
#if (BLOCK_SIZE == 256)
#include "bitblock256.hpp"
#endif
template <class T> void print_register(const char * var_name, T v);
template <class T>
void print_register(const char * var_name, T v) {
unsigned char c;
printf("%40s = ", var_name);
for(int i=sizeof(T)-1; i>=0; i--) {
c = *(((unsigned char *)&v)+i);
printf("%02X ", c);
}
printf("\n");
}
IDISA_ALWAYS_INLINE uint32_t count_forward_zeroes(BitBlock v) {
union {BitBlock bitblock; ScanWord elems[sizeof(BitBlock)/sizeof(ScanWord)];} u;
u.bitblock = v;
uint32_t so_far = 0;
for (unsigned int i = 0; i < sizeof(BitBlock)/sizeof(ScanWord); i++) {
if (u.elems[i] != 0) return so_far | scan_forward_zeroes(u.elems[i]);
so_far += 8 * sizeof(ScanWord);
}
return so_far;
}
IDISA_ALWAYS_INLINE BitBlock mask_forward_zeroes(uint32_t count) {
if (count >= sizeof(BitBlock) * 8) return simd<1>::constant<0>();
else return bitblock::sll(simd<1>::constant<1>(), convert(count));
}
IDISA_ALWAYS_INLINE uint32_t count_reverse_zeroes(BitBlock v) {
union {BitBlock bitblock; ScanWord elems[sizeof(BitBlock)/sizeof(ScanWord)];} u;
u.bitblock = v;
uint32_t so_far = 0;
for (unsigned int i = (sizeof(BitBlock)/sizeof(ScanWord)); i != 0; ) {
if (u.elems[--i] != 0) return so_far | scan_backward_zeroes(u.elems[i]);
so_far += 8 * sizeof(ScanWord);
}
return so_far;
}
IDISA_ALWAYS_INLINE BitBlock mask_reverse_zeroes(uint32_t count) {
if (count >= sizeof(BitBlock) * 8) return simd<1>::constant<0>();
else return bitblock::srl(simd<1>::constant<1>(), convert(count));
}
IDISA_ALWAYS_INLINE uint32_t bitstream_scan(BitBlock * v, uint32_t pos) {
ScanWord * bitstream_ptr = (ScanWord *) (((intptr_t) v) + pos/8);
ScanWord bitstream_slice = ((*bitstream_ptr) >> (pos % 8));
if (bitstream_slice != 0) return pos + scan_forward_zeroes(bitstream_slice);
else {
do {
bitstream_ptr++;
bitstream_slice = *bitstream_ptr;
} while (bitstream_slice == 0);
uint32_t base_posn = 8*((intptr_t) bitstream_ptr - (intptr_t) v);
return base_posn + scan_forward_zeroes(bitstream_slice);
}
}
static IDISA_ALWAYS_INLINE void assert_bitblock_align(void * addr) {
assert(0 == ((intptr_t)(addr) & (sizeof(BitBlock)-1)));
}
static IDISA_ALWAYS_INLINE void assert_bitblock_align(BitBlock v) {
assert(0 == ((intptr_t)(&v) & (sizeof(BitBlock)-1)));
}
#define ASSERT_BITBLOCK_ALIGN(v) assert_bitblock_align(v)
#endif // BITBLOCK_HPP
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment