Shellmiao 5 місяців тому
батько
коміт
94fa71b671

+ 0 - 0
scripts/CombineScript.sh → scripts/c/CombineScript.sh


+ 0 - 0
scripts/CompileModuleFusionScript.sh → scripts/c/CompileModuleFusionScript.sh


+ 0 - 0
scripts/CompileProprocessScript.sh → scripts/c/CompileProprocessScript.sh


+ 0 - 0
scripts/ModuleFusionScript.sh → scripts/c/ModuleFusionScript.sh


+ 0 - 0
scripts/ProprocessScript.sh → scripts/c/ProprocessScript.sh


+ 19 - 0
scripts/cpp/CombineCpp.sh

@@ -0,0 +1,19 @@
+#!/bin/bash
+# 获取脚本所在目录
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+# 切换到项目根目录
+cd "$SCRIPT_DIR/../.."
+
+# 确保 combine_output/cpp 目录存在
+mkdir -p combine_output/cpp
+
+# 为C++项目调整命令
+clang++ -S -emit-llvm -std=c++11 data/cpp/ProjectA.cpp -o combine_output/cpp/projectA_cpp.ll -DPROJECTA_MAIN -Dmain=projectA_main
+clang++ -S -emit-llvm -std=c++11 data/cpp/ProjectB.cpp -o combine_output/cpp/projectB_cpp.ll -DPROJECTB_MAIN -Dmain=projectB_main
+llvm-as combine_output/cpp/projectA_cpp.ll -o combine_output/cpp/projectA_cpp.bc
+llvm-as combine_output/cpp/projectB_cpp.ll -o combine_output/cpp/projectB_cpp.bc
+llvm-link combine_output/cpp/projectA_cpp.bc combine_output/cpp/projectB_cpp.bc -o combine_output/cpp/combined_cpp.bc
+llvm-dis combine_output/cpp/combined_cpp.bc -o combine_output/cpp/combined_cpp.ll
+
+echo "C++ projects combined successfully."
+echo "Combined LLVM IR is at: combine_output/cpp/combined_cpp.ll" 

+ 21 - 0
scripts/cpp/CompileCppModuleFusionScript.sh

@@ -0,0 +1,21 @@
+#!/bin/bash
+# 获取脚本所在目录
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+# 切换到项目根目录
+cd "$SCRIPT_DIR/../.."
+
+# 确保build目录存在
+mkdir -p build
+
+clang++ -fPIC -shared \
+    src/CppModuleFusion.cpp \
+    src/Util/LogSystem.cpp \
+    src/Util/CallGraphManager.cpp \
+    src/Util/Utils.cpp \
+    src/Fusion/SliceAnalyzer.cpp \
+    src/Fusion/Fusion.cpp \
+    src/Fusion/CppFusion.cpp \
+    -I./src \
+    `llvm-config --cxxflags --ldflags --libs core analysis passes support` \
+    -Wl,--no-undefined \
+    -o build/CppModuleFusion.so 

+ 10 - 0
scripts/cpp/CompileCppProprocessScript.sh

@@ -0,0 +1,10 @@
+#!/bin/bash
+# 获取脚本所在目录
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+# 切换到项目根目录
+cd "$SCRIPT_DIR/../.."
+
+# 确保build目录存在
+mkdir -p build
+
+clang++ -fPIC -shared src/CppProprocessFunctionPass.cpp `llvm-config --cxxflags --ldflags` -o build/CppProprocessFunctionPass.so 

+ 19 - 0
scripts/cpp/CppModuleFusionScript.sh

@@ -0,0 +1,19 @@
+#!/bin/bash
+# 获取脚本所在目录
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+# 切换到项目根目录
+cd "$SCRIPT_DIR/../.."
+
+# 确保输出目录存在
+mkdir -p output/cpp
+
+# 使用 -S 参数让 opt 直接输出 .ll 文件,并重定向错误输出到日志
+opt -load ./build/CppModuleFusion.so -cpp-codefusion proprocess_output/cpp/combined_cpp_tagged.ll -S -o output/cpp/cpp_module_fusion_text.ll 2> output/cpp/log_cpp_module_fusion.txt
+
+# 由于直接生成了 .ll 文件,不再需要 llvm-dis 命令
+llvm-cbe output/cpp/cpp_module_fusion_text.ll
+
+echo "C++ module fusion completed successfully."
+echo "Output files:"
+echo "  - LLVM IR: output/cpp/cpp_module_fusion_text.ll"
+echo "  - Log file: output/cpp/log_cpp_module_fusion.txt" 

+ 16 - 0
scripts/cpp/CppProprocessScript.sh

@@ -0,0 +1,16 @@
+#!/bin/bash
+# 获取脚本所在目录
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+# 切换到项目根目录
+cd "$SCRIPT_DIR/../.."
+
+# 确保输出目录存在
+mkdir -p proprocess_output/cpp
+
+opt -load build/CppProprocessFunctionPass.so -add-cpp-project-metadata -cpp-project-name=CppBunker < combine_output/cpp/projectA_cpp.bc > proprocess_output/cpp/projectA_cpp_tagged.bc
+opt -load build/CppProprocessFunctionPass.so -add-cpp-project-metadata -cpp-project-name=CppTarget < combine_output/cpp/projectB_cpp.bc > proprocess_output/cpp/projectB_cpp_tagged.bc
+llvm-link proprocess_output/cpp/projectA_cpp_tagged.bc proprocess_output/cpp/projectB_cpp_tagged.bc -o proprocess_output/cpp/combined_cpp_tagged.bc
+llvm-dis proprocess_output/cpp/combined_cpp_tagged.bc -o proprocess_output/cpp/combined_cpp_tagged.ll
+
+echo "C++ projects preprocessed successfully."
+echo "Tagged C++ LLVM IR is at: proprocess_output/cpp/combined_cpp_tagged.ll" 

+ 154 - 0
src/CppModuleFusion.cpp

@@ -0,0 +1,154 @@
+#include "llvm/Pass.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+#include "Util/LogSystem.h"
+#include "Util/CallGraphManager.h"
+#include "Util/Utils.h"
+#include "Fusion/SliceAnalyzer.h"
+#include "Fusion/Fusion.h"
+#include "Fusion/CppFusion.h"
+
+#include <vector>
+#include <map>
+#include <set>
+#include <queue>
+#include <random>
+#include <algorithm>
+#include <string>
+
+using namespace llvm;
+
+extern logging::LogSystem logger;
+
+namespace {
+    struct SliceBlocks {
+        std::vector<BasicBlock*> blocks;
+        std::vector<BasicBlock*> backBlocks;
+    };
+
+    // C++项目专用的目标代码识别函数
+    bool isCppTargetCode(const Function &F) {
+        MDNode *MD = F.getMetadata("cpp_project_source");
+        if (!MD) return false;
+        
+        if (MDString *MDS = dyn_cast<MDString>(MD->getOperand(0))) {
+            StringRef projectName = MDS->getString();
+            // 对于C++项目,CppTarget是目标代码
+            return projectName == "CppTarget";
+        }
+        return false;
+    }
+
+    // C++项目专用的掩护代码识别函数
+    bool isCppBunkerCode(const Function &F) {
+        MDNode *MD = F.getMetadata("cpp_project_source");
+        if (!MD) return false;
+        
+        if (MDString *MDS = dyn_cast<MDString>(MD->getOperand(0))) {
+            StringRef projectName = MDS->getString();
+            // 对于C++项目,CppBunker是bunker代码
+            return projectName == "CppBunker";
+        }
+        return false;
+    }
+
+    struct CppCodeFusionPass : public ModulePass {
+    public:
+        static char ID;
+        CppCodeFusionPass() : ModulePass(ID), callGraph() {}
+
+        bool runOnModule(Module &M) override {
+            auto& logger = logging::LogSystem::getInstance();
+            logger.setGlobalLevel(logging::LogLevel::TRACE);
+            // 配置Log输出权限
+            logger.enableFunction("runOnModule");
+
+            LOG_INFO("runOnModule", "Starting C++ analysis for module: {0}", M.getName().str());
+
+            for (Function &F : M) {
+                if(!F.isDeclaration()){
+                    if (isCppTargetCode(F)) {
+                        LOG_INFO("runOnModule", "Found C++ target function: {0}", F.getName().str());
+                        targetFunctions.insert(F.getName().str());
+                    } else if (isCppBunkerCode(F)) {
+                        LOG_INFO("runOnModule", "Found C++ bunker function: {0}", F.getName().str());
+                        bunkerFunctions.insert(F.getName().str());
+                    } else {
+                        LOG_INFO("runOnModule", "Skipping function without metadata: {0}", F.getName().str());
+                    }
+                }
+            }
+
+            callGraph.buildCallGraph(M, targetFunctions);
+            callGraph.buildCallGraph(M, bunkerFunctions);
+
+            // 遍历模块中的所有函数,对每个函数调用SliceAnalyzer的分析函数
+            for (Function &F : M) {
+                if (!F.isDeclaration()) {
+                    std::string fname = F.getName().str();
+                    auto criticalPoints = slicefusion::SliceAnalyzer::analyzeFunctionCriticalPoints(F);
+                    // 将分析结果存储到callGraph中
+                    callGraph[fname].criticalPoints = criticalPoints;
+                }
+            }
+
+            // 生成项目调用图
+            callGraph.generateProjectCallGraph();
+            // 遍历模块中的所有函数,对每个函数调用SliceAnalyzer的分析函数
+            for (Function &F : M) {
+                if(!F.isDeclaration()){
+                    callGraph.dumpControlFlowGraph(F);
+                }
+            }
+
+            LOG_INFO("runOnModule", "Creating slices for C++ target functions");
+            for (const auto& targetFuncName : targetFunctions) {
+                LOG_INFO("runOnModule", "C++ targetFuncName: {0}", targetFuncName);
+                if (Function* F = M.getFunction(targetFuncName)) {
+                    slicefusion::SliceAnalyzer::createFunctionSlices(*F, callGraph[targetFuncName]);
+                } else {
+                    LOG_ERROR("runOnModule", "Could not find function {0} in module", targetFuncName);
+                }
+            }
+
+            LOG_INFO("runOnModule", "Creating fusion points for C++ bunker functions");
+            for (const auto& bunkerFuncName : bunkerFunctions) {
+                LOG_INFO("runOnModule", "C++ bunkerFuncName: {0}", bunkerFuncName);
+                if (Function* F = M.getFunction(bunkerFuncName)) {
+                    slicefusion::SliceAnalyzer::countFusionPoints(*F, callGraph[bunkerFuncName]);
+                } else {
+                    LOG_ERROR("runOnModule", "Could not find function {0} in module", bunkerFuncName);
+                }
+            }
+
+            // 创建一个C++专用的Fusion对象并保持它的生命周期
+            slicefusion::CppFusion fusion(callGraph);
+            if(fusion.matchFunctionsForFusion(targetFunctions, bunkerFunctions)) {
+                callGraph.generateFusionMatchGraph(targetFunctions, bunkerFunctions, fusion.getFusionPairs());
+                fusion.performCodeFusion(M);
+            }
+
+            return false;
+        }
+
+    private:
+        CallGraphManager callGraph;
+        std::set<std::string> targetFunctions;
+        std::set<std::string> bunkerFunctions;
+    };
+}
+
+char CppCodeFusionPass::ID = 0;
+static RegisterPass<CppCodeFusionPass> X("cpp-codefusion", "C++ Code Fusion Pass"); 

+ 61 - 0
src/CppProprocessFunctionPass.cpp

@@ -0,0 +1,61 @@
+#include "llvm/Pass.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+// 添加命令行选项,用于指定项目名称
+static cl::opt<std::string> CppProjectName(
+    "cpp-project-name",
+    cl::desc("Specify the C++ project name to be added as metadata"),
+    cl::value_desc("name"),
+    cl::Required // 设置为必需参数
+);
+
+namespace {
+struct CppProjectMetadataPass : public ModulePass {
+    static char ID;
+    CppProjectMetadataPass() : ModulePass(ID) {}
+
+    bool runOnModule(Module &M) override {
+        bool Modified = false;
+        LLVMContext &Ctx = M.getContext();
+
+        // 创建项目名称的metadata字符串
+        MDString *ProjectStr = MDString::get(Ctx, CppProjectName.getValue());
+        
+        // 为每个非声明函数添加metadata
+        for (Function &F : M) {
+            if (F.isDeclaration())
+                continue;
+
+            // 检查函数是否已经有项目metadata
+            if (F.getMetadata("cpp_project_source")) {
+                errs() << "Warning: Function " << F.getName() 
+                       << " already has C++ project metadata.\n";
+                continue;
+            }
+
+            // 创建metadata节点
+            MDNode *ProjectMD = MDNode::get(Ctx, ProjectStr);
+            
+            // 为函数添加metadata
+            F.setMetadata("cpp_project_source", ProjectMD);
+            
+            errs() << "Added C++ project metadata '" << CppProjectName 
+                   << "' to function: " << F.getName() << "\n";
+            
+            Modified = true;
+        }
+
+        return Modified;
+    }
+};
+}
+
+char CppProjectMetadataPass::ID = 0;
+static RegisterPass<CppProjectMetadataPass> 
+    X("add-cpp-project-metadata", "Add C++ project source metadata to functions"); 

+ 104 - 0
src/Fusion/CppFusion.cpp

@@ -0,0 +1,104 @@
+#include "CppFusion.h"
+#include "../Util/LogSystem.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Verifier.h"
+#include <random>
+#include <algorithm>
+
+using namespace llvm;
+
+namespace slicefusion {
+
+bool CppFusion::matchFunctionsForFusion(const std::set<std::string>& targetFunctions, 
+                                        const std::set<std::string>& bunkerFunctions) {
+        auto& logger = logging::LogSystem::getInstance();
+        logger.enableFunction("matchFunctionsForFusion");
+        LOG_INFO("matchFunctionsForFusion", "Starting C++ function matching process");
+
+        // 直接指定要融合的函数对
+        const std::string SPECIFIED_TARGET = "_ZN8ProjectB10testPointsEi";
+        const std::string SPECIFIED_BUNKER = "_ZN8ProjectA9expandKeyEPhS0_NS_7keySizeEm";
+
+        LOG_INFO("matchFunctionsForFusion", "Using specified function pair:");
+        LOG_INFO("matchFunctionsForFusion", "  Target: {0}", SPECIFIED_TARGET);
+        LOG_INFO("matchFunctionsForFusion", "  Bunker: {0}", SPECIFIED_BUNKER);
+
+        // 检查指定的函数是否存在于集合中
+        if (targetFunctions.find(SPECIFIED_TARGET) == targetFunctions.end()) {
+            LOG_ERROR("matchFunctionsForFusion", "Specified target function {0} not found in target functions", SPECIFIED_TARGET);
+            return false;
+        }
+
+        if (bunkerFunctions.find(SPECIFIED_BUNKER) == bunkerFunctions.end()) {
+            LOG_ERROR("matchFunctionsForFusion", "Specified bunker function {0} not found in bunker functions", SPECIFIED_BUNKER);
+            return false;
+        }
+
+        // 检查函数的融合点是否足够
+        const auto& targetNode = callGraph[SPECIFIED_TARGET];
+        const auto& bunkerNode = callGraph[SPECIFIED_BUNKER];
+        
+        LOG_INFO("matchFunctionsForFusion", "Target function {0} has {1} slices", SPECIFIED_TARGET, targetNode.slices_num);
+        LOG_INFO("matchFunctionsForFusion", "Bunker function {0} has {1} fusion points", SPECIFIED_BUNKER, bunkerNode.points_num);
+
+        if (bunkerNode.points_num < targetNode.slices_num) {
+            LOG_ERROR("matchFunctionsForFusion", 
+                "Insufficient fusion points in bunker function {0} ({1}) for target function {2} ({3})",
+                SPECIFIED_BUNKER, bunkerNode.points_num,
+                SPECIFIED_TARGET, targetNode.slices_num);
+            return false;
+        }
+
+        // 建立匹配关系
+        fusionPairs[SPECIFIED_TARGET] = SPECIFIED_BUNKER;
+        
+        LOG_INFO("matchFunctionsForFusion", 
+            "Successfully matched target {0} ({1} slices) with bunker {2} ({3} fusion points)",
+            SPECIFIED_TARGET, targetNode.slices_num,
+            SPECIFIED_BUNKER, bunkerNode.points_num);
+
+        return true;
+    }
+
+void CppFusion::performCodeFusion(Module &M) {
+        auto& logger = logging::LogSystem::getInstance();
+        logger.enableFunction("performCodeFusion");
+        LOG_INFO("performCodeFusion", "Starting C++ code fusion");
+        
+        // 调用基类的performCodeFusion来融合除main之外的函数
+        Fusion::performCodeFusion(M);
+
+        // 处理main函数:将projectB_main重命名为main,并删除projectA_main
+        const std::string CPP_TARGET_MAIN = "_Z13projectB_mainv";
+        const std::string CPP_BUNKER_MAIN = "_Z13projectA_mainv";
+
+        if (Function* targetMain = M.getFunction(CPP_TARGET_MAIN)) {
+            LOG_INFO("performCodeFusion", "Renaming {0} to main", CPP_TARGET_MAIN);
+            targetMain->setName("main");
+        } else {
+            LOG_ERROR("performCodeFusion", "Could not find target main function {0} to rename", CPP_TARGET_MAIN);
+        }
+
+        if (Function* bunkerMain = M.getFunction(CPP_BUNKER_MAIN)) {
+            LOG_INFO("performCodeFusion", "Removing bunker main function {0}", CPP_BUNKER_MAIN);
+            bunkerMain->eraseFromParent();
+        } else {
+            LOG_WARNING("performCodeFusion", "Could not find bunker main function {0} to remove", CPP_BUNKER_MAIN);
+        }
+
+        // // 最终的模块验证
+        // LOG_INFO("performCodeFusion", "Performing final C++ module verification");
+        // std::string errorStr;
+        // llvm::raw_string_ostream errorStream(errorStr);
+        // if (llvm::verifyModule(M, &errorStream)) {
+        //     LOG_ERROR("performCodeFusion", "C++ module verification failed: {0}", errorStr);
+        //     // 同时输出到标准错误流
+        //     llvm::errs() << "C++ module verification failed:\n" << errorStr << "\n";
+        // } else {
+        //     LOG_INFO("performCodeFusion", "C++ module verification passed successfully");
+        // }
+    }
+
+} // namespace slicefusion 

+ 31 - 0
src/Fusion/CppFusion.h

@@ -0,0 +1,31 @@
+#ifndef SLICE_FUSION_CPP_FUSION_H
+#define SLICE_FUSION_CPP_FUSION_H
+
+#include "Fusion.h"
+#include <string>
+#include <set>
+
+namespace slicefusion {
+
+/**
+ * @brief C++版本的Fusion类,专门处理C++函数名称修饰
+ */
+class CppFusion : public Fusion {
+public:
+    CppFusion(llvm::CallGraphManager& callGraph) : Fusion(callGraph) {}
+
+    /**
+     * @brief C++版本的函数匹配,使用C++修饰后的函数名
+     */
+    bool matchFunctionsForFusion(const std::set<std::string>& targetFunctions, 
+                                 const std::set<std::string>& bunkerFunctions) override;
+
+    /**
+     * @brief C++版本的代码融合,处理C++特有的函数重命名
+     */
+    void performCodeFusion(llvm::Module &M) override;
+};
+
+} // namespace slicefusion
+
+#endif // SLICE_FUSION_CPP_FUSION_H 

+ 13 - 0
src/Fusion/Fusion.cpp

@@ -4,6 +4,7 @@
 #include "../Util/Utils.h"
 
 #include "Fusion.h"
+#include "llvm/IR/Verifier.h"
 
 
 using namespace llvm;
@@ -867,6 +868,15 @@ void Fusion::performCodeFusion(Module &M) {
         "Code fusion completed, processed {0} function pairs", 
         fusionPairs.size());
 
+    // // 验证模块完整性
+    // std::string errorStr;
+    // raw_string_ostream errorStream(errorStr);
+    // if (verifyModule(M, &errorStream)) {
+    //     LOG_ERROR("performCodeFusion", "Module verification failed: {0}", errorStr);
+    // } else {
+    //     LOG_INFO("performCodeFusion", "Module verification passed");
+    // }
+
     // 查找并重命名融合后的main函数
     if (Function* fusedMain = M.getFunction("fused_projectB_main_projectA_main")) {
         LOG_INFO("performCodeFusion", "Renaming fused_projectB_main_projectA_main to main");
@@ -874,6 +884,9 @@ void Fusion::performCodeFusion(Module &M) {
     } else {
         LOG_ERROR("performCodeFusion", "Could not find fused_projectB_main_projectA_main function for renaming");
     }
+    LOG_INFO("performCodeFusion", 
+        "Code fusion completed2, processed {0} function pairs", 
+        fusionPairs.size());
 }
 
 /**

+ 6 - 3
src/Fusion/Fusion.h

@@ -21,24 +21,27 @@ struct SliceBlocks {
 class Fusion {
 public:
     Fusion(llvm::CallGraphManager& callGraph) : callGraph(callGraph) {}
+    virtual ~Fusion() = default;
 
-    bool matchFunctionsForFusion(
+    virtual bool matchFunctionsForFusion(
         const std::set<std::string>& targetFunctions,
         const std::set<std::string>& bunkerFunctions);
 
-    void performCodeFusion(llvm::Module &M);
+    virtual void performCodeFusion(llvm::Module &M);
 
     const std::map<std::string, std::string>& getFusionPairs() const { 
         return fusionPairs; 
     }
 
-private:
+protected:
     llvm::CallGraphManager& callGraph;
     std::map<std::string, std::string> fusionPairs;
     
     // 获取随机索引
     size_t getRandomIndex(size_t max);
 
+private:
+
     // 创建融合函数类型
     llvm::FunctionType* createFusedFunctionType(
         llvm::Function* bunkerFunc,