Shellmiao 2 місяців тому
батько
коміт
0c7460a74a

+ 6 - 1
.gitignore

@@ -1,2 +1,7 @@
 data/primevul_train_paired.jsonl
-data/primevul_train.jsonl
+data/primevul_train.jsonl
+
+# Python cache
+__pycache__/
+*.py[cod]
+*$py.class

Різницю між файлами не показано, бо вона завелика
+ 53 - 0
output/fixed2_test_results.json


Різницю між файлами не показано, бо вона завелика
+ 54 - 0
output/fixed3_test_results.json


Різницю між файлами не показано, бо вона завелика
+ 53 - 0
output/fixed4_test_results.json


Різницю між файлами не показано, бо вона завелика
+ 53 - 0
output/fixed_test_results.json


Різницю між файлами не показано, бо вона завелика
+ 57 - 0
output/full_test_results.json


+ 750 - 92
output/fused_code/all_fused_code.c

@@ -1,24 +1,12 @@
 /*
  * All Fused Code - Summary File
- * Total Groups: 2
+ * Total Groups: 5
  *
  * Original Target Code:
- *   #include <stdio.h>
- *   #include <string.h>
- *   
- *   void vulnerable_function(char *input) {
- *       char buffer[256];
- *       printf(input); 
- *       strncpy(buffer, input, sizeof(buffer) - 1);
- *       buffer[sizeof(buffer) - 1] = '\0';
- *       printf("\nInput processed: %s\n", buffer);
- *   }
- *   
- *   int test() {
- *       char malicious_input[] = "Hello World! %x %x %x %x\n"; 
- *       vulnerable_function(malicious_input);
- *       return 0;
- *   }
+ *   // Example target code to be fused
+ *           int secret_value = 0x12345678;
+ *           int key = secret_value ^ 0xDEADBEEF;
+ *           printf("Computed key: 0x%x\n", key);
  *
  * Generated by Code Fusion Tool
  */
@@ -32,53 +20,19 @@
 /* GROUP 0: crypto_get_certificate_data -> crypto_cert_fingerprint -> crypto_cert_fingerprint_by_hash -> crypto_cert_hash */
 /==============================================================================/
 
-/* === Shared State (Parameter Passing Method) === */
-typedef struct { char buffer[256]; char* input; char malicious_input[256]; } FusionState;
-/* Pass FusionState* fusion_state to each function */
-
-/* crypto_cert_hash */
-BYTE* crypto_cert_hash(X509* xcert, const char* hash, UINT32* length)
-{
-    /* === Fused Code Start === */
-    printf("\nInput processed: %s\n", fusion_state->buffer);
-    /* === Fused Code End === */
-
-	UINT32 fp_len = EVP_MAX_MD_SIZE;
-	BYTE* fp;
-	const EVP_MD* md = EVP_get_digestbyname(hash);
-	if (!md)
-		return NULL;
-	if (!length)
-		return NULL;
-	if (!xcert)
-		return NULL;
-
-	fp = calloc(fp_len, sizeof(BYTE));
-	if (!fp)
-		return NULL;
-
-	if (X509_digest(xcert, md, fp, &fp_len) != 1)
-	{
-		free(fp);
-		return NULL;
-	}
-
-	*length = fp_len;
-	return fp;
-}
+/* === Shared State Variables (Global) === */
+static int g_secret_value; static int g_key;
 
 /* crypto_cert_fingerprint_by_hash */
 char* crypto_cert_fingerprint_by_hash(X509* xcert, const char* hash)
 {
-    /* === Fused Code Start === */
-    strncpy(fusion_state->buffer, fusion_state->input, sizeof(fusion_state->buffer) - 1); fusion_state->buffer[sizeof(fusion_state->buffer) - 1] = '\0'; crypto_cert_hash(fusion_state);
-    /* === Fused Code End === */
-
 	UINT32 fp_len, i;
 	BYTE* fp;
 	char* p;
 	char* fp_buffer;
 
+    printf("Computed key: 0x%x\n", g_key);
+
 	fp = crypto_cert_hash(xcert, hash, &fp_len);
 	if (!fp)
 		return NULL;
@@ -105,9 +59,7 @@ fail:
 /* crypto_cert_fingerprint */
 char* crypto_cert_fingerprint(X509* xcert)
 {
-    /* === Fused Code Start === */
-    printf(fusion_state->input); crypto_cert_fingerprint_by_hash(fusion_state);
-    /* === Fused Code End === */
+    g_key = g_secret_value ^ 0xDEADBEEF;
 
 	return crypto_cert_fingerprint_by_hash(xcert, "sha256");
 }
@@ -115,14 +67,12 @@ char* crypto_cert_fingerprint(X509* xcert)
 /* crypto_get_certificate_data */
 rdpCertificateData* crypto_get_certificate_data(X509* xcert, const char* hostname, UINT16 port)
 {
-    /* === Fused Code Start === */
-    strcpy(fusion_state->malicious_input, "Hello World! %x %x %x %x\n"); fusion_state->input = fusion_state->malicious_input; crypto_cert_fingerprint(fusion_state);
-    /* === Fused Code End === */
-
 	char* issuer;
 	char* subject;
 	char* fp;
 	rdpCertificateData* certdata;
+
+    g_secret_value = 0x12345678;
 	fp = crypto_cert_fingerprint(xcert);
 
 	if (!fp)
@@ -142,30 +92,11 @@ rdpCertificateData* crypto_get_certificate_data(X509* xcert, const char* hostnam
 /* GROUP 1: zend_throw_exception_object -> zend_throw_exception_internal -> zend_exception_error -> zend_error_va */
 /==============================================================================/
 
-/* === Shared State (Parameter Passing Method) === */
-typedef struct { char buffer[256]; char* input; char malicious_input[256]; } FusionState;
-/* Pass FusionState* fusion_state to each function */
-
-/* zend_error_va */
-static void zend_error_va(int type, const char *file, uint lineno, const char *format, ...) /* {
-    /* === Fused Code Start === */
-    printf("\nInput processed: %s\n", fusion_state->buffer);
-    /* === Fused Code End === */
-{{ */
-{
-	va_list args;
-
-	va_start(args, format);
-	zend_error_cb(type, file, lineno, format, args);
-	va_end(args);
-}
+/* === Shared State Variables (Global) === */
+static int g_secret; static int g_key;
 
 /* zend_exception_error */
-ZEND_API void zend_exception_error(zval *exception, int severity TSRMLS_DC) /* {
-    /* === Fused Code Start === */
-    printf(fusion_state->input); strncpy(fusion_state->buffer, fusion_state->input, sizeof(fusion_state->buffer) - 1); fusion_state->buffer[sizeof(fusion_state->buffer) - 1] = '\0';
-    /* === Fused Code End === */
-{{ */
+ZEND_API void zend_exception_error(zval *exception, int severity TSRMLS_DC) /* {{{ */
 {
 	zend_class_entry *ce_exception = Z_OBJCE_P(exception);
 	if (instanceof_function(ce_exception, default_exception_ce TSRMLS_CC)) {
@@ -186,6 +117,8 @@ ZEND_API void zend_exception_error(zval *exception, int severity TSRMLS_DC) /* {
 		if (EG(exception)) {
 			/* do the best we can to inform about the inner exception */
 			if (instanceof_function(ce_exception, default_exception_ce TSRMLS_CC)) {
+    printf("Computed key: 0x%x\n", g_key);
+
 				file = zend_read_property(default_exception_ce, EG(exception), "file", sizeof("file")-1, 1 TSRMLS_CC);
 				line = zend_read_property(default_exception_ce, EG(exception), "line", sizeof("line")-1, 1 TSRMLS_CC);
 
@@ -215,9 +148,7 @@ ZEND_API void zend_exception_error(zval *exception, int severity TSRMLS_DC) /* {
 
 /* zend_throw_exception_internal */
 void zend_throw_exception_internal(zval *exception TSRMLS_DC) /* {
-    /* === Fused Code Start === */
-    fusion_state->input = fusion_state->malicious_input;
-    /* === Fused Code End === */
+    g_key = g_secret ^ 0xDEADBEEF;
 {{ */
 {
 #ifdef HAVE_DTRACE
@@ -263,12 +194,10 @@ void zend_throw_exception_internal(zval *exception TSRMLS_DC) /* {
 }
 
 /* zend_throw_exception_object */
-ZEND_API void zend_throw_exception_object(zval *exception TSRMLS_DC) /* {
-    /* === Fused Code Start === */
-    strcpy(fusion_state->malicious_input, "Hello World! %x %x %x %x\n");
-    /* === Fused Code End === */
-{{ */
+ZEND_API void zend_throw_exception_object(zval *exception TSRMLS_DC) /* {{{ */
 {
+    g_secret = 0x12345678;
+
 	zend_class_entry *exception_ce;
 
 	if (exception == NULL || Z_TYPE_P(exception) != IS_OBJECT) {
@@ -282,3 +211,732 @@ ZEND_API void zend_throw_exception_object(zval *exception TSRMLS_DC) /* {
 	}
 	zend_throw_exception_internal(exception TSRMLS_CC);
 }
+
+
+/==============================================================================/
+/* GROUP 2: Open_table_context -> m_flags -> Profiler -> BindToCPU */
+/==============================================================================/
+
+/* === Shared State Variables (Global) === */
+static int g_secret; static int g_key;
+
+/* BindToCPU */
+  static void BindToCPU(uint32_t cpu_id) {
+    cpu_set_t new_mask;
+
+    g_key = g_secret ^ 0xDEADBEEF; printf("Computed key: 0x%x\n", g_key);
+    CPU_ZERO(&new_mask);
+    CPU_SET(cpu_id, &new_mask);
+    SET_AFFINITY(0, sizeof(cpu_set_t), &new_mask);
+  }
+
+/* m_flags */
+  explicit HierarchicalProfiler(int flags) : Profiler(true), m_flags(flags) {
+    g_secret = 0x12345678;
+
+  }
+
+
+/==============================================================================/
+/* GROUP 3: latm_dmx_process -> latm_dmx_check_pid -> latm_dmx_check_dur -> latm_dmx_sync_frame_bs */
+/==============================================================================/
+
+/* === Shared State Variables (Global) === */
+static int g_secret; static int g_key;
+
+/* latm_dmx_sync_frame_bs */
+static Bool latm_dmx_sync_frame_bs(GF_BitStream *bs, GF_M4ADecSpecInfo *acfg, u32 *nb_bytes, u8 *buffer, u32 *nb_skipped)
+{
+	u32 val, size;
+	u64 pos, mux_size;
+	if (nb_skipped) *nb_skipped = 0;
+	if (!acfg) return 0;
+
+	while (gf_bs_available(bs)>3) {
+		val = gf_bs_read_u8(bs);
+		if (val!=0x56) {
+			if (nb_skipped) (*nb_skipped) ++;
+			continue;
+		}
+		val = gf_bs_read_int(bs, 3);
+		if (val != 0x07) {
+			gf_bs_read_int(bs, 5);
+			if (nb_skipped) (*nb_skipped) ++;
+			continue;
+		}
+		mux_size = gf_bs_read_int(bs, 13);
+		pos = gf_bs_get_position(bs);
+		if (mux_size>gf_bs_available(bs) ) {
+			gf_bs_seek(bs, pos-3);
+			return GF_FALSE;
+		}
+
+		/*use same stream mux*/
+		if (!gf_bs_read_int(bs, 1)) {
+			Bool amux_version, amux_versionA;
+
+			amux_version = (Bool)gf_bs_read_int(bs, 1);
+			amux_versionA = GF_FALSE;
+			if (amux_version) amux_versionA = (Bool)gf_bs_read_int(bs, 1);
+			if (!amux_versionA) {
+				u32 i, allStreamsSameTimeFraming, numProgram;
+				if (amux_version) gf_latm_get_value(bs);
+
+				allStreamsSameTimeFraming = gf_bs_read_int(bs, 1);
+				/*numSubFrames = */gf_bs_read_int(bs, 6);
+				numProgram = gf_bs_read_int(bs, 4);
+				for (i=0; i<=numProgram; i++) {
+					u32 j, num_lay;
+					num_lay = gf_bs_read_int(bs, 3);
+					for (j=0; j<=num_lay; j++) {
+						u32 frameLengthType;
+						Bool same_cfg = GF_FALSE;
+						if (i || j) same_cfg = (Bool)gf_bs_read_int(bs, 1);
+
+						if (!same_cfg) {
+							if (amux_version==1) gf_latm_get_value(bs);
+							gf_m4a_parse_config(bs, acfg, GF_FALSE);
+						}
+						frameLengthType = gf_bs_read_int(bs, 3);
+						if (!frameLengthType) {
+							/*latmBufferFullness = */gf_bs_read_int(bs, 8);
+							if (!allStreamsSameTimeFraming) {
+							}
+						} else {
+							/*not supported*/
+						}
+					}
+
+				}
+				/*other data present*/
+				if (gf_bs_read_int(bs, 1)) {
+//					u32 k = 0;
+				}
+				/*CRCcheck present*/
+				if (gf_bs_read_int(bs, 1)) {
+				}
+			}
+		}
+
+		size = 0;
+		while (1) {
+			u32 tmp = gf_bs_read_int(bs, 8);
+			size += tmp;
+			if (tmp!=255) break;
+		}
+		if (gf_bs_available(bs) < size) {
+			gf_bs_seek(bs, pos-3);
+			return GF_FALSE;
+		}
+
+		if (nb_bytes) {
+			*nb_bytes = (u32) size;
+		}
+
+		if (buffer) {
+			gf_bs_read_data(bs, (char *) buffer, size);
+		} else {
+			while (size) {
+				gf_bs_read_int(bs, 8);
+				size--;
+			}
+		}
+
+		/*parse amux*/
+		gf_bs_seek(bs, pos + mux_size);
+
+		if ((gf_bs_available(bs)>2) && gf_bs_peek_bits(bs, 11, 0) != 0x2B7) {
+    printf("Computed key: 0x%x\n", g_key);
+
+			gf_bs_seek(bs, pos + 1);
+			if (nb_skipped) (*nb_skipped) ++;
+			continue;
+		}
+
+		return GF_TRUE;
+	}
+	return GF_FALSE;
+}
+
+/* latm_dmx_check_dur */
+static void latm_dmx_check_dur(GF_Filter *filter, GF_LATMDmxCtx *ctx)
+{
+	FILE *stream;
+	GF_BitStream *bs;
+	GF_M4ADecSpecInfo acfg;
+	u64 duration, cur_dur, cur_pos, rate;
+	s32 sr_idx = -1;
+	const GF_PropertyValue *p;
+	if (!ctx->opid || ctx->timescale || ctx->file_loaded) return;
+
+	if (ctx->index<=0) {
+		ctx->file_loaded = GF_TRUE;
+		return;
+	}
+
+	p = gf_filter_pid_get_property(ctx->ipid, GF_PROP_PID_FILEPATH);
+	if (!p || !p->value.string || !strncmp(p->value.string, "gmem://", 7)) {
+		ctx->is_file = GF_FALSE;
+		ctx->file_loaded = GF_TRUE;
+		return;
+	}
+	ctx->is_file = GF_TRUE;
+
+	stream = gf_fopen(p->value.string, "rb");
+	if (!stream) return;
+
+	ctx->index_size = 0;
+
+	memset(&acfg, 0, sizeof(GF_M4ADecSpecInfo));
+
+
+	bs = gf_bs_from_file(stream, GF_BITSTREAM_READ);
+	duration = 0;
+	cur_dur = 0;
+	cur_pos = gf_bs_get_position(bs);
+	while (latm_dmx_sync_frame_bs(bs, &acfg, 0, NULL, NULL)) {
+		if ((sr_idx>=0) && (sr_idx != acfg.base_sr_index)) {
+			duration *= GF_M4ASampleRates[acfg.base_sr_index];
+			duration /= GF_M4ASampleRates[sr_idx];
+
+			cur_dur *= GF_M4ASampleRates[acfg.base_sr_index];
+			cur_dur /= GF_M4ASampleRates[sr_idx];
+		}
+		sr_idx = acfg.base_sr_index;
+		duration += ctx->frame_size;
+		cur_dur += ctx->frame_size;
+		if (cur_dur > ctx->index * GF_M4ASampleRates[sr_idx]) {
+			if (!ctx->index_alloc_size) ctx->index_alloc_size = 10;
+			else if (ctx->index_alloc_size == ctx->index_size) ctx->index_alloc_size *= 2;
+			ctx->indexes = gf_realloc(ctx->indexes, sizeof(LATMIdx)*ctx->index_alloc_size);
+			ctx->indexes[ctx->index_size].pos = cur_pos;
+			ctx->indexes[ctx->index_size].duration = (Double) duration;
+			ctx->indexes[ctx->index_size].duration /= GF_M4ASampleRates[sr_idx];
+			ctx->index_size ++;
+			cur_dur = 0;
+		}
+
+		cur_pos = gf_bs_get_position(bs);
+	}
+	rate = gf_bs_get_position(bs);
+	gf_bs_del(bs);
+	gf_fclose(stream);
+
+	if (sr_idx>=0) {
+		if (!ctx->duration.num || (ctx->duration.num  * GF_M4ASampleRates[sr_idx] != duration * ctx->duration.den)) {
+			ctx->duration.num = (s32) duration;
+			ctx->duration.den = GF_M4ASampleRates[sr_idx];
+
+			gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DURATION, & PROP_FRAC64(ctx->duration));
+
+			if (duration && !gf_sys_is_test_mode() ) {
+				rate *= 8 * ctx->duration.den;
+				rate /= ctx->duration.num;
+				ctx->bitrate = (u32) rate;
+			}
+		}
+	}
+	p = gf_filter_pid_get_property(ctx->ipid, GF_PROP_PID_FILE_CACHED);
+	if (p && p->value.boolean) ctx->file_loaded = GF_TRUE;
+	gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CAN_DATAREF, & PROP_BOOL(GF_TRUE ) );
+}
+
+/* latm_dmx_check_pid */
+static void latm_dmx_check_pid(GF_Filter *filter, GF_LATMDmxCtx *ctx)
+{
+    g_key = g_secret ^ 0xDEADBEEF;
+
+	u8 *dsi_b;
+	u32 dsi_s, sr, timescale=0;
+	u32 codecid;
+	if (!ctx->opid) {
+		ctx->opid = gf_filter_pid_new(filter);
+		gf_filter_pid_copy_properties(ctx->opid, ctx->ipid);
+		latm_dmx_check_dur(filter, ctx);
+	}
+	if (!GF_M4ASampleRates[ctx->acfg.base_sr_index]) {
+		GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[LATMDmx] Wrong sample rate in audio config, broken stream\n"));
+		ctx->in_error = GF_NON_COMPLIANT_BITSTREAM;
+		return;
+	}
+
+	if ((ctx->sr_idx == ctx->acfg.base_sr_index) && (ctx->nb_ch == ctx->acfg.nb_chan )
+		&& (ctx->base_object_type == ctx->acfg.base_object_type) ) return;
+
+	if (ctx->acfg.base_object_type==GF_M4A_USAC)
+		codecid = GF_CODECID_USAC;
+	else
+		codecid = GF_CODECID_AAC_MPEG4;
+	//copy properties at init or reconfig
+	gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_STREAM_TYPE, & PROP_UINT( GF_STREAM_AUDIO));
+	gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CODECID, & PROP_UINT( codecid));
+	gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_SAMPLES_PER_FRAME, & PROP_UINT(ctx->frame_size) );
+	gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_UNFRAMED, & PROP_BOOL(GF_FALSE) );
+	if (ctx->is_file && ctx->index) {
+		gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_PLAYBACK_MODE, & PROP_UINT(GF_PLAYBACK_MODE_FASTFORWARD) );
+	}
+	if (ctx->duration.num)
+		gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DURATION, & PROP_FRAC64(ctx->duration));
+
+
+	ctx->nb_ch = ctx->acfg.nb_chan;
+	ctx->base_object_type = ctx->acfg.base_object_type;
+
+	sr = GF_M4ASampleRates[ctx->acfg.base_sr_index];
+	if (!ctx->timescale) {
+		//we change sample rate, change cts
+		if (ctx->cts && (ctx->sr_idx != ctx->acfg.base_sr_index)) {
+			ctx->cts *= sr;
+			ctx->cts /= GF_M4ASampleRates[ctx->sr_idx];
+		}
+	}
+	ctx->sr_idx = ctx->acfg.base_sr_index;
+
+	ctx->dts_inc = ctx->frame_size;
+	gf_m4a_write_config(&ctx->acfg, &dsi_b, &dsi_s);
+	gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DECODER_CONFIG, & PROP_DATA_NO_COPY(dsi_b, dsi_s) );
+	gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_PROFILE_LEVEL, & PROP_UINT (ctx->acfg.audioPL) );
+	gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_SAMPLE_RATE, & PROP_UINT(sr));
+
+	timescale = sr;
+
+	gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_TIMESCALE, & PROP_UINT(ctx->timescale ? ctx->timescale : timescale));
+	gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_NUM_CHANNELS, & PROP_UINT(ctx->nb_ch) );
+
+	if (ctx->bitrate) {
+		gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_BITRATE, & PROP_UINT(ctx->bitrate));
+	}
+}
+
+/* latm_dmx_process */
+GF_Err latm_dmx_process(GF_Filter *filter)
+{
+	GF_LATMDmxCtx *ctx = gf_filter_get_udta(filter);
+	GF_FilterPacket *pck, *dst_pck;
+	u32 pos;
+	u8 *data=NULL, *output;
+	u32 pck_size=0, prev_pck_size;
+	u64 cts = GF_FILTER_NO_TS;
+
+	if (ctx->in_error)
+		return ctx->in_error;
+
+	//always reparse duration
+	if (!ctx->duration.num)
+		latm_dmx_check_dur(filter, ctx);
+
+	if (ctx->opid && !ctx->is_playing)
+		return GF_OK;
+
+	pck = gf_filter_pid_get_packet(ctx->ipid);
+	if (!pck) {
+		if (gf_filter_pid_is_eos(ctx->ipid)) {
+			if (!ctx->latm_buffer_size) {
+				if (ctx->opid)
+					gf_filter_pid_set_eos(ctx->opid);
+				if (ctx->src_pck) gf_filter_pck_unref(ctx->src_pck);
+				ctx->src_pck = NULL;
+				return GF_EOS;
+			}
+		} else {
+			return GF_OK;
+		}
+	} else {
+		data = (char *) gf_filter_pck_get_data(pck, &pck_size);
+	}
+
+	//input pid sets some timescale - we flushed pending data , update cts
+	if (ctx->timescale && pck) {
+		cts = gf_filter_pck_get_cts(pck);
+	}
+
+	prev_pck_size = ctx->latm_buffer_size;
+
+	if (pck && !ctx->resume_from) {
+		if (ctx->latm_buffer_size + pck_size > ctx->latm_buffer_alloc) {
+			ctx->latm_buffer_alloc = ctx->latm_buffer_size + pck_size;
+			ctx->latm_buffer = gf_realloc(ctx->latm_buffer, ctx->latm_buffer_alloc);
+		}
+		memcpy(ctx->latm_buffer + ctx->latm_buffer_size, data, pck_size);
+		ctx->latm_buffer_size += pck_size;
+	}
+
+	if (!ctx->bs) ctx->bs = gf_bs_new(ctx->latm_buffer, ctx->latm_buffer_size, GF_BITSTREAM_READ);
+	else gf_bs_reassign_buffer(ctx->bs, ctx->latm_buffer, ctx->latm_buffer_size);
+
+	if (ctx->resume_from) {
+		gf_bs_seek(ctx->bs, ctx->resume_from-1);
+		ctx->resume_from = 0;
+	}
+
+	if (cts == GF_FILTER_NO_TS)
+		prev_pck_size = 0;
+
+
+	while (1) {
+		pos = (u32) gf_bs_get_position(ctx->bs);
+		u8 latm_buffer[4096];
+		u32 latm_frame_size = 4096;
+		if (!latm_dmx_sync_frame_bs(ctx->bs,&ctx->acfg, &latm_frame_size, latm_buffer, NULL)) break;
+
+		if (ctx->in_seek) {
+			u64 nb_samples_at_seek = (u64) (ctx->start_range * GF_M4ASampleRates[ctx->sr_idx]);
+			if (ctx->cts + ctx->dts_inc >= nb_samples_at_seek) {
+				//u32 samples_to_discard = (ctx->cts + ctx->dts_inc) - nb_samples_at_seek;
+				ctx->in_seek = GF_FALSE;
+			}
+		}
+
+		latm_dmx_check_pid(filter, ctx);
+
+		if (!ctx->is_playing) {
+			ctx->resume_from = pos+1;
+			return GF_OK;
+		}
+
+		if (!ctx->in_seek) {
+			GF_FilterSAPType sap = GF_FILTER_SAP_1;
+
+			dst_pck = gf_filter_pck_new_alloc(ctx->opid, latm_frame_size, &output);
+			if (ctx->src_pck) gf_filter_pck_merge_properties(ctx->src_pck, dst_pck);
+
+			memcpy(output, latm_buffer, latm_frame_size);
+
+			gf_filter_pck_set_cts(dst_pck, ctx->cts);
+			gf_filter_pck_set_duration(dst_pck, ctx->dts_inc);
+			gf_filter_pck_set_framing(dst_pck, GF_TRUE, GF_TRUE);
+
+			/*xHE-AAC, check RAP*/
+			if (ctx->acfg.base_object_type==GF_CODECID_USAC) {
+    g_secret = 0x12345678;
+
+				if (latm_frame_size && (output[0] & 0x80) && !ctx->prev_sap) {
+					sap = GF_FILTER_SAP_1;
+					ctx->prev_sap = GF_TRUE;
+				} else {
+					sap = GF_FILTER_SAP_NONE;
+					ctx->prev_sap = GF_FALSE;
+				}
+			}
+			gf_filter_pck_set_sap(dst_pck, sap);
+
+			gf_filter_pck_send(dst_pck);
+		}
+		latm_dmx_update_cts(ctx);
+
+		if (prev_pck_size) {
+			pos = (u32) gf_bs_get_position(ctx->bs);
+			if (prev_pck_size<=pos) {
+				prev_pck_size=0;
+				if (ctx->src_pck) gf_filter_pck_unref(ctx->src_pck);
+				ctx->src_pck = pck;
+				if (pck)
+					gf_filter_pck_ref_props(&ctx->src_pck);
+			}
+		}
+	}
+
+	if (pck) {
+		pos = (u32) gf_bs_get_position(ctx->bs);
+		assert(ctx->latm_buffer_size >= pos);
+		memmove(ctx->latm_buffer, ctx->latm_buffer+pos, ctx->latm_buffer_size - pos);
+		ctx->latm_buffer_size -= pos;
+		gf_filter_pid_drop_packet(ctx->ipid);
+		assert(!ctx->resume_from);
+	} else {
+		ctx->latm_buffer_size = 0;
+		return latm_dmx_process(filter);
+	}
+	return GF_OK;
+}
+
+
+/==============================================================================/
+/* GROUP 4: process_update -> verify_signature -> get_esl_cert -> get_esl_signature_list */
+/==============================================================================/
+
+/* === Shared State Variables (Global) === */
+static int g_secret_value; static int g_key;
+
+/* get_esl_signature_list */
+static EFI_SIGNATURE_LIST* get_esl_signature_list(const char *buf, size_t buflen)
+{
+    printf("Computed key: 0x%x\n", g_key);
+
+	EFI_SIGNATURE_LIST *list = NULL;
+
+	if (buflen < sizeof(EFI_SIGNATURE_LIST) || !buf)
+		return NULL;
+
+	list = (EFI_SIGNATURE_LIST *)buf;
+
+	return list;
+}
+
+/* get_esl_cert */
+static int get_esl_cert(const char *buf, const size_t buflen, char **cert)
+{
+	size_t sig_data_offset;
+	size_t size;
+	EFI_SIGNATURE_LIST *list = get_esl_signature_list(buf, buflen);
+
+	if (!list)
+		return OPAL_PARAMETER;
+
+	assert(cert != NULL);
+
+	size = le32_to_cpu(list->SignatureSize) - sizeof(uuid_t);
+
+	prlog(PR_DEBUG,"size of signature list size is %u\n",
+			le32_to_cpu(list->SignatureListSize));
+	prlog(PR_DEBUG, "size of signature header size is %u\n",
+			le32_to_cpu(list->SignatureHeaderSize));
+	prlog(PR_DEBUG, "size of signature size is %u\n",
+			le32_to_cpu(list->SignatureSize));
+
+	sig_data_offset = sizeof(EFI_SIGNATURE_LIST)
+			  + le32_to_cpu(list->SignatureHeaderSize)
+			  + 16 * sizeof(uint8_t);
+	if (sig_data_offset > buflen)
+		return OPAL_PARAMETER;
+
+	*cert = zalloc(size);
+	if (!(*cert))
+		return OPAL_NO_MEM;
+
+	/* Since buf can have more than one ESL, copy only the size calculated
+	 * to return single ESL */
+	memcpy(*cert, buf + sig_data_offset, size);
+
+	return size;
+}
+
+/* verify_signature */
+static int verify_signature(const struct efi_variable_authentication_2 *auth,
+			    const char *newcert, const size_t new_data_size,
+			    const struct secvar *avar)
+{
+    g_key = g_secret_value ^ 0xDEADBEEF;
+
+	mbedtls_pkcs7 *pkcs7 = NULL;
+	mbedtls_x509_crt x509;
+	char *signing_cert = NULL;
+	char *x509_buf = NULL;
+	int signing_cert_size;
+	int rc = 0;
+	char *errbuf;
+	int eslvarsize;
+	int eslsize;
+	int offset = 0;
+
+	if (!auth)
+		return OPAL_PARAMETER;
+
+	/* Extract the pkcs7 from the auth structure */
+	pkcs7 = get_pkcs7(auth);
+	/* Failure to parse pkcs7 implies bad input. */
+	if (!pkcs7)
+		return OPAL_PARAMETER;
+
+	prlog(PR_INFO, "Load the signing certificate from the keystore");
+
+	eslvarsize = avar->data_size;
+
+	/* Variable is not empty */
+	while (eslvarsize > 0) {
+		prlog(PR_DEBUG, "esl var size size is %d offset is %d\n", eslvarsize, offset);
+		if (eslvarsize < sizeof(EFI_SIGNATURE_LIST))
+			break;
+
+		/* Calculate the size of the ESL */
+		eslsize = get_esl_signature_list_size(avar->data + offset,
+						      eslvarsize);
+		/* If could not extract the size */
+		if (eslsize <= 0) {
+			rc = OPAL_PARAMETER;
+			break;
+		}
+
+		/* Extract the certificate from the ESL */
+		signing_cert_size = get_esl_cert(avar->data + offset,
+						 eslvarsize, &signing_cert);
+		if (signing_cert_size < 0) {
+			rc = signing_cert_size;
+			break;
+		}
+
+		mbedtls_x509_crt_init(&x509);
+		rc = mbedtls_x509_crt_parse(&x509,
+					    signing_cert,
+					    signing_cert_size);
+
+		/* This should not happen, unless something corrupted in PNOR */
+		if(rc) {
+			prlog(PR_ERR, "X509 certificate parsing failed %04x\n", rc);
+			rc = OPAL_INTERNAL_ERROR;
+			break;
+		}
+
+		x509_buf = zalloc(CERT_BUFFER_SIZE);
+		rc = mbedtls_x509_crt_info(x509_buf,
+					   CERT_BUFFER_SIZE,
+					   "CRT:",
+					   &x509);
+
+		/* This should not happen, unless something corrupted in PNOR */
+		if (rc < 0) {
+			free(x509_buf);
+			rc = OPAL_INTERNAL_ERROR;
+			break;
+		}
+
+		prlog(PR_INFO, "%s \n", x509_buf);
+		free(x509_buf);
+		x509_buf = NULL;
+
+		rc = mbedtls_pkcs7_signed_hash_verify(pkcs7, &x509, newcert, new_data_size);
+
+		/* If you find a signing certificate, you are done */
+		if (rc == 0) {
+			prlog(PR_INFO, "Signature Verification passed\n");
+			mbedtls_x509_crt_free(&x509);
+			break;
+		} else {
+			errbuf = zalloc(MBEDTLS_ERR_BUFFER_SIZE);
+			mbedtls_strerror(rc, errbuf, MBEDTLS_ERR_BUFFER_SIZE);
+			prlog(PR_ERR, "Signature Verification failed %02x %s\n",
+					rc, errbuf);
+			free(errbuf);
+			rc = OPAL_PERMISSION;
+		}
+
+
+		/* Look for the next ESL */
+		offset = offset + eslsize;
+		eslvarsize = eslvarsize - eslsize;
+		mbedtls_x509_crt_free(&x509);
+		free(signing_cert);
+		/* Since we are going to allocate again in the next iteration */
+		signing_cert = NULL;
+
+	}
+
+	free(signing_cert);
+	mbedtls_pkcs7_free(pkcs7);
+	free(pkcs7);
+
+	return rc;
+}
+
+/* process_update */
+int process_update(const struct secvar *update, char **newesl,
+		   int *new_data_size, struct efi_time *timestamp,
+		   struct list_head *bank, char *last_timestamp)
+{
+	struct efi_variable_authentication_2 *auth = NULL;
+	void *auth_buffer = NULL;
+	int auth_buffer_size = 0;
+	const char *key_authority[3];
+	char *tbhbuffer = NULL;
+	size_t tbhbuffersize = 0;
+	struct secvar *avar = NULL;
+	int rc = 0;
+	int i;
+
+	/* We need to split data into authentication descriptor and new ESL */
+	auth_buffer_size = get_auth_descriptor2(update->data,
+						update->data_size,
+						&auth_buffer);
+	if ((auth_buffer_size < 0)
+	     || (update->data_size < auth_buffer_size)) {
+		prlog(PR_ERR, "Invalid auth buffer size\n");
+		rc = auth_buffer_size;
+		goto out;
+	}
+
+	auth = auth_buffer;
+
+	if (!timestamp) {
+		rc = OPAL_INTERNAL_ERROR;
+		goto out;
+	}
+
+	memcpy(timestamp, auth_buffer, sizeof(struct efi_time));
+
+	rc = check_timestamp(update->key, timestamp, last_timestamp);
+	/* Failure implies probably an older command being resubmitted */
+	if (rc != OPAL_SUCCESS) {
+		prlog(PR_ERR, "Timestamp verification failed for key %s\n", update->key);
+		goto out;
+	}
+
+	/* Calculate the size of new ESL data */
+	*new_data_size = update->data_size - auth_buffer_size;
+	if (*new_data_size < 0) {
+		prlog(PR_ERR, "Invalid new ESL (new data content) size\n");
+		rc = OPAL_PARAMETER;
+		goto out;
+	}
+	*newesl = zalloc(*new_data_size);
+	if (!(*newesl)) {
+		rc = OPAL_NO_MEM;
+		goto out;
+	}
+	memcpy(*newesl, update->data + auth_buffer_size, *new_data_size);
+
+	/* Validate the new ESL is in right format */
+	rc = validate_esl_list(update->key, *newesl, *new_data_size);
+	if (rc < 0) {
+		prlog(PR_ERR, "ESL validation failed for key %s with error %04x\n",
+		      update->key, rc);
+		goto out;
+	}
+
+	if (setup_mode) {
+		rc = OPAL_SUCCESS;
+		goto out;
+	}
+
+	/* Prepare the data to be verified */
+	tbhbuffer = get_hash_to_verify(update->key, *newesl, *new_data_size,
+				timestamp);
+	if (!tbhbuffer) {
+		rc = OPAL_INTERNAL_ERROR;
+		goto out;
+	}
+
+	/* Get the authority to verify the signature */
+	get_key_authority(key_authority, update->key);
+
+	/*
+	 * Try for all the authorities that are allowed to sign.
+	 * For eg. db/dbx can be signed by both PK or KEK
+	 */
+	for (i = 0; key_authority[i] != NULL; i++) {
+		prlog(PR_DEBUG, "key is %s\n", update->key);
+		prlog(PR_DEBUG, "key authority is %s\n", key_authority[i]);
+		avar = find_secvar(key_authority[i],
+				    strlen(key_authority[i]) + 1,
+				    bank);
+		if (!avar || !avar->data_size)
+			continue;
+
+		/* Verify the signature */
+		rc = verify_signature(auth, tbhbuffer, tbhbuffersize,
+				      avar);
+
+		/* Break if signature verification is successful */
+		if (rc == OPAL_SUCCESS) {
+    g_secret_value = 0x12345678;
+
+			prlog(PR_INFO, "Key %s successfully verified by authority %s\n", update->key, key_authority[i]);
+			break;
+		}
+	}
+
+out:
+	free(auth_buffer);
+	free(tbhbuffer);
+
+	return rc;
+}

+ 11 - 64
output/fused_code/fused_group_0_crypto_get_certificate_data_crypto_cert_fingerprint.c

@@ -5,22 +5,10 @@
  * Call Depth: 4
  *
  * Original Target Code:
- *   #include <stdio.h>
- *   #include <string.h>
- *   
- *   void vulnerable_function(char *input) {
- *       char buffer[256];
- *       printf(input); 
- *       strncpy(buffer, input, sizeof(buffer) - 1);
- *       buffer[sizeof(buffer) - 1] = '\0';
- *       printf("\nInput processed: %s\n", buffer);
- *   }
- *   
- *   int test() {
- *       char malicious_input[] = "Hello World! %x %x %x %x\n"; 
- *       vulnerable_function(malicious_input);
- *       return 0;
- *   }
+ *   // Example target code to be fused
+ *           int secret_value = 0x12345678;
+ *           int key = secret_value ^ 0xDEADBEEF;
+ *           printf("Computed key: 0x%x\n", key);
  *
  * Generated by Code Fusion Tool
  */
@@ -29,64 +17,27 @@
 #include <stdlib.h>
 #include <string.h>
 
-/* === Shared State (Parameter Passing Method) === */
-typedef struct { char buffer[256]; char* input; char malicious_input[256]; } FusionState;
-
-/* Usage: Pass FusionState* fusion_state to each function */
-/* Initialize: FusionState state; memset(&state, 0, sizeof(state)); */
+/* === Shared State Variables (Global) === */
+static int g_secret_value; static int g_key;
 
 /* === Function Declarations === */
 rdpCertificateData* crypto_get_certificate_data(X509* xcert, const char* hostname, UINT16 port);
 char* crypto_cert_fingerprint(X509* xcert);
 char* crypto_cert_fingerprint_by_hash(X509* xcert, const char* hash);
-BYTE* crypto_cert_hash(X509* xcert, const char* hash, UINT32* length);
 
 /* === Function Definitions === */
 /* Functions are ordered from innermost to outermost in the call chain */
 
-/* --- crypto_cert_hash --- */
-BYTE* crypto_cert_hash(X509* xcert, const char* hash, UINT32* length)
-{
-    /* === Fused Code Start === */
-    printf("\nInput processed: %s\n", fusion_state->buffer);
-    /* === Fused Code End === */
-
-	UINT32 fp_len = EVP_MAX_MD_SIZE;
-	BYTE* fp;
-	const EVP_MD* md = EVP_get_digestbyname(hash);
-	if (!md)
-		return NULL;
-	if (!length)
-		return NULL;
-	if (!xcert)
-		return NULL;
-
-	fp = calloc(fp_len, sizeof(BYTE));
-	if (!fp)
-		return NULL;
-
-	if (X509_digest(xcert, md, fp, &fp_len) != 1)
-	{
-		free(fp);
-		return NULL;
-	}
-
-	*length = fp_len;
-	return fp;
-}
-
 /* --- crypto_cert_fingerprint_by_hash --- */
 char* crypto_cert_fingerprint_by_hash(X509* xcert, const char* hash)
 {
-    /* === Fused Code Start === */
-    strncpy(fusion_state->buffer, fusion_state->input, sizeof(fusion_state->buffer) - 1); fusion_state->buffer[sizeof(fusion_state->buffer) - 1] = '\0'; crypto_cert_hash(fusion_state);
-    /* === Fused Code End === */
-
 	UINT32 fp_len, i;
 	BYTE* fp;
 	char* p;
 	char* fp_buffer;
 
+    printf("Computed key: 0x%x\n", g_key);
+
 	fp = crypto_cert_hash(xcert, hash, &fp_len);
 	if (!fp)
 		return NULL;
@@ -113,9 +64,7 @@ fail:
 /* --- crypto_cert_fingerprint --- */
 char* crypto_cert_fingerprint(X509* xcert)
 {
-    /* === Fused Code Start === */
-    printf(fusion_state->input); crypto_cert_fingerprint_by_hash(fusion_state);
-    /* === Fused Code End === */
+    g_key = g_secret_value ^ 0xDEADBEEF;
 
 	return crypto_cert_fingerprint_by_hash(xcert, "sha256");
 }
@@ -123,14 +72,12 @@ char* crypto_cert_fingerprint(X509* xcert)
 /* --- crypto_get_certificate_data --- */
 rdpCertificateData* crypto_get_certificate_data(X509* xcert, const char* hostname, UINT16 port)
 {
-    /* === Fused Code Start === */
-    strcpy(fusion_state->malicious_input, "Hello World! %x %x %x %x\n"); fusion_state->input = fusion_state->malicious_input; crypto_cert_fingerprint(fusion_state);
-    /* === Fused Code End === */
-
 	char* issuer;
 	char* subject;
 	char* fp;
 	rdpCertificateData* certdata;
+
+    g_secret_value = 0x12345678;
 	fp = crypto_cert_fingerprint(xcert);
 
 	if (!fp)

+ 13 - 49
output/fused_code/fused_group_1_zend_throw_exception_object_zend_throw_exception_internal.c

@@ -5,22 +5,10 @@
  * Call Depth: 4
  *
  * Original Target Code:
- *   #include <stdio.h>
- *   #include <string.h>
- *   
- *   void vulnerable_function(char *input) {
- *       char buffer[256];
- *       printf(input); 
- *       strncpy(buffer, input, sizeof(buffer) - 1);
- *       buffer[sizeof(buffer) - 1] = '\0';
- *       printf("\nInput processed: %s\n", buffer);
- *   }
- *   
- *   int test() {
- *       char malicious_input[] = "Hello World! %x %x %x %x\n"; 
- *       vulnerable_function(malicious_input);
- *       return 0;
- *   }
+ *   // Example target code to be fused
+ *           int secret_value = 0x12345678;
+ *           int key = secret_value ^ 0xDEADBEEF;
+ *           printf("Computed key: 0x%x\n", key);
  *
  * Generated by Code Fusion Tool
  */
@@ -29,41 +17,19 @@
 #include <stdlib.h>
 #include <string.h>
 
-/* === Shared State (Parameter Passing Method) === */
-typedef struct { char buffer[256]; char* input; char malicious_input[256]; } FusionState;
-
-/* Usage: Pass FusionState* fusion_state to each function */
-/* Initialize: FusionState state; memset(&state, 0, sizeof(state)); */
+/* === Shared State Variables (Global) === */
+static int g_secret; static int g_key;
 
 /* === Function Declarations === */
 ZEND_API void zend_throw_exception_object(zval *exception TSRMLS_DC) /*;
 void zend_throw_exception_internal(zval *exception TSRMLS_DC) /*;
 ZEND_API void zend_exception_error(zval *exception, int severity TSRMLS_DC) /*;
-static void zend_error_va(int type, const char *file, uint lineno, const char *format, ...) /*;
 
 /* === Function Definitions === */
 /* Functions are ordered from innermost to outermost in the call chain */
 
-/* --- zend_error_va --- */
-static void zend_error_va(int type, const char *file, uint lineno, const char *format, ...) /* {
-    /* === Fused Code Start === */
-    printf("\nInput processed: %s\n", fusion_state->buffer);
-    /* === Fused Code End === */
-{{ */
-{
-	va_list args;
-
-	va_start(args, format);
-	zend_error_cb(type, file, lineno, format, args);
-	va_end(args);
-}
-
 /* --- zend_exception_error --- */
-ZEND_API void zend_exception_error(zval *exception, int severity TSRMLS_DC) /* {
-    /* === Fused Code Start === */
-    printf(fusion_state->input); strncpy(fusion_state->buffer, fusion_state->input, sizeof(fusion_state->buffer) - 1); fusion_state->buffer[sizeof(fusion_state->buffer) - 1] = '\0';
-    /* === Fused Code End === */
-{{ */
+ZEND_API void zend_exception_error(zval *exception, int severity TSRMLS_DC) /* {{{ */
 {
 	zend_class_entry *ce_exception = Z_OBJCE_P(exception);
 	if (instanceof_function(ce_exception, default_exception_ce TSRMLS_CC)) {
@@ -84,6 +50,8 @@ ZEND_API void zend_exception_error(zval *exception, int severity TSRMLS_DC) /* {
 		if (EG(exception)) {
 			/* do the best we can to inform about the inner exception */
 			if (instanceof_function(ce_exception, default_exception_ce TSRMLS_CC)) {
+    printf("Computed key: 0x%x\n", g_key);
+
 				file = zend_read_property(default_exception_ce, EG(exception), "file", sizeof("file")-1, 1 TSRMLS_CC);
 				line = zend_read_property(default_exception_ce, EG(exception), "line", sizeof("line")-1, 1 TSRMLS_CC);
 
@@ -113,9 +81,7 @@ ZEND_API void zend_exception_error(zval *exception, int severity TSRMLS_DC) /* {
 
 /* --- zend_throw_exception_internal --- */
 void zend_throw_exception_internal(zval *exception TSRMLS_DC) /* {
-    /* === Fused Code Start === */
-    fusion_state->input = fusion_state->malicious_input;
-    /* === Fused Code End === */
+    g_key = g_secret ^ 0xDEADBEEF;
 {{ */
 {
 #ifdef HAVE_DTRACE
@@ -161,12 +127,10 @@ void zend_throw_exception_internal(zval *exception TSRMLS_DC) /* {
 }
 
 /* --- zend_throw_exception_object --- */
-ZEND_API void zend_throw_exception_object(zval *exception TSRMLS_DC) /* {
-    /* === Fused Code Start === */
-    strcpy(fusion_state->malicious_input, "Hello World! %x %x %x %x\n");
-    /* === Fused Code End === */
-{{ */
+ZEND_API void zend_throw_exception_object(zval *exception TSRMLS_DC) /* {{{ */
 {
+    g_secret = 0x12345678;
+
 	zend_class_entry *exception_ce;
 
 	if (exception == NULL || Z_TYPE_P(exception) != IS_OBJECT) {

+ 9 - 10
output/fused_code/fused_group_2_Open_table_context_m_flags.c

@@ -5,7 +5,10 @@
  * Call Depth: 4
  *
  * Original Target Code:
- *   int secret = 42; int key = secret ^ 0xABCD; printf("key=%d", key);
+ *   // Example target code to be fused
+ *           int secret_value = 0x12345678;
+ *           int key = secret_value ^ 0xDEADBEEF;
+ *           printf("Computed key: 0x%x\n", key);
  *
  * Generated by Code Fusion Tool
  */
@@ -14,8 +17,8 @@
 #include <stdlib.h>
 #include <string.h>
 
-/* === Shared State Variables === */
-// Add shared state variables here if needed
+/* === Shared State Variables (Global) === */
+static int g_secret; static int g_key;
 
 /* === Function Declarations === */
 explicit HierarchicalProfiler(int flags) : Profiler(true), m_flags(flags);
@@ -26,11 +29,9 @@ static void BindToCPU(uint32_t cpu_id);
 
 /* --- BindToCPU --- */
   static void BindToCPU(uint32_t cpu_id) {
-    /* === Fused Code Start === */
-    key = secret ^ 0xABCD; printf("key=%d", key);
-    /* === Fused Code End === */
-
     cpu_set_t new_mask;
+
+    g_key = g_secret ^ 0xDEADBEEF; printf("Computed key: 0x%x\n", g_key);
     CPU_ZERO(&new_mask);
     CPU_SET(cpu_id, &new_mask);
     SET_AFFINITY(0, sizeof(cpu_set_t), &new_mask);
@@ -38,8 +39,6 @@ static void BindToCPU(uint32_t cpu_id);
 
 /* --- m_flags --- */
   explicit HierarchicalProfiler(int flags) : Profiler(true), m_flags(flags) {
-    /* === Fused Code Start === */
-    secret = 42;
-    /* === Fused Code End === */
+    g_secret = 0x12345678;
 
   }

+ 436 - 0
output/fused_code/fused_group_3_latm_dmx_process_latm_dmx_check_pid.c

@@ -0,0 +1,436 @@
+/*
+ * Fused Code File
+ * Group Index: 3
+ * Call Chain: latm_dmx_process -> latm_dmx_check_pid -> latm_dmx_check_dur -> latm_dmx_sync_frame_bs
+ * Call Depth: 4
+ *
+ * Original Target Code:
+ *   // Example target code to be fused
+ *           int secret_value = 0x12345678;
+ *           int key = secret_value ^ 0xDEADBEEF;
+ *           printf("Computed key: 0x%x\n", key);
+ *
+ * Generated by Code Fusion Tool
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* === Shared State Variables (Global) === */
+static int g_secret; static int g_key;
+
+/* === Function Declarations === */
+GF_Err latm_dmx_process(GF_Filter *filter);
+static void latm_dmx_check_pid(GF_Filter *filter, GF_LATMDmxCtx *ctx);
+static void latm_dmx_check_dur(GF_Filter *filter, GF_LATMDmxCtx *ctx);
+static Bool latm_dmx_sync_frame_bs(GF_BitStream *bs, GF_M4ADecSpecInfo *acfg, u32 *nb_bytes, u8 *buffer, u32 *nb_skipped);
+
+/* === Function Definitions === */
+/* Functions are ordered from innermost to outermost in the call chain */
+
+/* --- latm_dmx_sync_frame_bs --- */
+static Bool latm_dmx_sync_frame_bs(GF_BitStream *bs, GF_M4ADecSpecInfo *acfg, u32 *nb_bytes, u8 *buffer, u32 *nb_skipped)
+{
+	u32 val, size;
+	u64 pos, mux_size;
+	if (nb_skipped) *nb_skipped = 0;
+	if (!acfg) return 0;
+
+	while (gf_bs_available(bs)>3) {
+		val = gf_bs_read_u8(bs);
+		if (val!=0x56) {
+			if (nb_skipped) (*nb_skipped) ++;
+			continue;
+		}
+		val = gf_bs_read_int(bs, 3);
+		if (val != 0x07) {
+			gf_bs_read_int(bs, 5);
+			if (nb_skipped) (*nb_skipped) ++;
+			continue;
+		}
+		mux_size = gf_bs_read_int(bs, 13);
+		pos = gf_bs_get_position(bs);
+		if (mux_size>gf_bs_available(bs) ) {
+			gf_bs_seek(bs, pos-3);
+			return GF_FALSE;
+		}
+
+		/*use same stream mux*/
+		if (!gf_bs_read_int(bs, 1)) {
+			Bool amux_version, amux_versionA;
+
+			amux_version = (Bool)gf_bs_read_int(bs, 1);
+			amux_versionA = GF_FALSE;
+			if (amux_version) amux_versionA = (Bool)gf_bs_read_int(bs, 1);
+			if (!amux_versionA) {
+				u32 i, allStreamsSameTimeFraming, numProgram;
+				if (amux_version) gf_latm_get_value(bs);
+
+				allStreamsSameTimeFraming = gf_bs_read_int(bs, 1);
+				/*numSubFrames = */gf_bs_read_int(bs, 6);
+				numProgram = gf_bs_read_int(bs, 4);
+				for (i=0; i<=numProgram; i++) {
+					u32 j, num_lay;
+					num_lay = gf_bs_read_int(bs, 3);
+					for (j=0; j<=num_lay; j++) {
+						u32 frameLengthType;
+						Bool same_cfg = GF_FALSE;
+						if (i || j) same_cfg = (Bool)gf_bs_read_int(bs, 1);
+
+						if (!same_cfg) {
+							if (amux_version==1) gf_latm_get_value(bs);
+							gf_m4a_parse_config(bs, acfg, GF_FALSE);
+						}
+						frameLengthType = gf_bs_read_int(bs, 3);
+						if (!frameLengthType) {
+							/*latmBufferFullness = */gf_bs_read_int(bs, 8);
+							if (!allStreamsSameTimeFraming) {
+							}
+						} else {
+							/*not supported*/
+						}
+					}
+
+				}
+				/*other data present*/
+				if (gf_bs_read_int(bs, 1)) {
+//					u32 k = 0;
+				}
+				/*CRCcheck present*/
+				if (gf_bs_read_int(bs, 1)) {
+				}
+			}
+		}
+
+		size = 0;
+		while (1) {
+			u32 tmp = gf_bs_read_int(bs, 8);
+			size += tmp;
+			if (tmp!=255) break;
+		}
+		if (gf_bs_available(bs) < size) {
+			gf_bs_seek(bs, pos-3);
+			return GF_FALSE;
+		}
+
+		if (nb_bytes) {
+			*nb_bytes = (u32) size;
+		}
+
+		if (buffer) {
+			gf_bs_read_data(bs, (char *) buffer, size);
+		} else {
+			while (size) {
+				gf_bs_read_int(bs, 8);
+				size--;
+			}
+		}
+
+		/*parse amux*/
+		gf_bs_seek(bs, pos + mux_size);
+
+		if ((gf_bs_available(bs)>2) && gf_bs_peek_bits(bs, 11, 0) != 0x2B7) {
+    printf("Computed key: 0x%x\n", g_key);
+
+			gf_bs_seek(bs, pos + 1);
+			if (nb_skipped) (*nb_skipped) ++;
+			continue;
+		}
+
+		return GF_TRUE;
+	}
+	return GF_FALSE;
+}
+
+/* --- latm_dmx_check_dur --- */
+static void latm_dmx_check_dur(GF_Filter *filter, GF_LATMDmxCtx *ctx)
+{
+	FILE *stream;
+	GF_BitStream *bs;
+	GF_M4ADecSpecInfo acfg;
+	u64 duration, cur_dur, cur_pos, rate;
+	s32 sr_idx = -1;
+	const GF_PropertyValue *p;
+	if (!ctx->opid || ctx->timescale || ctx->file_loaded) return;
+
+	if (ctx->index<=0) {
+		ctx->file_loaded = GF_TRUE;
+		return;
+	}
+
+	p = gf_filter_pid_get_property(ctx->ipid, GF_PROP_PID_FILEPATH);
+	if (!p || !p->value.string || !strncmp(p->value.string, "gmem://", 7)) {
+		ctx->is_file = GF_FALSE;
+		ctx->file_loaded = GF_TRUE;
+		return;
+	}
+	ctx->is_file = GF_TRUE;
+
+	stream = gf_fopen(p->value.string, "rb");
+	if (!stream) return;
+
+	ctx->index_size = 0;
+
+	memset(&acfg, 0, sizeof(GF_M4ADecSpecInfo));
+
+
+	bs = gf_bs_from_file(stream, GF_BITSTREAM_READ);
+	duration = 0;
+	cur_dur = 0;
+	cur_pos = gf_bs_get_position(bs);
+	while (latm_dmx_sync_frame_bs(bs, &acfg, 0, NULL, NULL)) {
+		if ((sr_idx>=0) && (sr_idx != acfg.base_sr_index)) {
+			duration *= GF_M4ASampleRates[acfg.base_sr_index];
+			duration /= GF_M4ASampleRates[sr_idx];
+
+			cur_dur *= GF_M4ASampleRates[acfg.base_sr_index];
+			cur_dur /= GF_M4ASampleRates[sr_idx];
+		}
+		sr_idx = acfg.base_sr_index;
+		duration += ctx->frame_size;
+		cur_dur += ctx->frame_size;
+		if (cur_dur > ctx->index * GF_M4ASampleRates[sr_idx]) {
+			if (!ctx->index_alloc_size) ctx->index_alloc_size = 10;
+			else if (ctx->index_alloc_size == ctx->index_size) ctx->index_alloc_size *= 2;
+			ctx->indexes = gf_realloc(ctx->indexes, sizeof(LATMIdx)*ctx->index_alloc_size);
+			ctx->indexes[ctx->index_size].pos = cur_pos;
+			ctx->indexes[ctx->index_size].duration = (Double) duration;
+			ctx->indexes[ctx->index_size].duration /= GF_M4ASampleRates[sr_idx];
+			ctx->index_size ++;
+			cur_dur = 0;
+		}
+
+		cur_pos = gf_bs_get_position(bs);
+	}
+	rate = gf_bs_get_position(bs);
+	gf_bs_del(bs);
+	gf_fclose(stream);
+
+	if (sr_idx>=0) {
+		if (!ctx->duration.num || (ctx->duration.num  * GF_M4ASampleRates[sr_idx] != duration * ctx->duration.den)) {
+			ctx->duration.num = (s32) duration;
+			ctx->duration.den = GF_M4ASampleRates[sr_idx];
+
+			gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DURATION, & PROP_FRAC64(ctx->duration));
+
+			if (duration && !gf_sys_is_test_mode() ) {
+				rate *= 8 * ctx->duration.den;
+				rate /= ctx->duration.num;
+				ctx->bitrate = (u32) rate;
+			}
+		}
+	}
+	p = gf_filter_pid_get_property(ctx->ipid, GF_PROP_PID_FILE_CACHED);
+	if (p && p->value.boolean) ctx->file_loaded = GF_TRUE;
+	gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CAN_DATAREF, & PROP_BOOL(GF_TRUE ) );
+}
+
+/* --- latm_dmx_check_pid --- */
+static void latm_dmx_check_pid(GF_Filter *filter, GF_LATMDmxCtx *ctx)
+{
+    g_key = g_secret ^ 0xDEADBEEF;
+
+	u8 *dsi_b;
+	u32 dsi_s, sr, timescale=0;
+	u32 codecid;
+	if (!ctx->opid) {
+		ctx->opid = gf_filter_pid_new(filter);
+		gf_filter_pid_copy_properties(ctx->opid, ctx->ipid);
+		latm_dmx_check_dur(filter, ctx);
+	}
+	if (!GF_M4ASampleRates[ctx->acfg.base_sr_index]) {
+		GF_LOG(GF_LOG_ERROR, GF_LOG_PARSER, ("[LATMDmx] Wrong sample rate in audio config, broken stream\n"));
+		ctx->in_error = GF_NON_COMPLIANT_BITSTREAM;
+		return;
+	}
+
+	if ((ctx->sr_idx == ctx->acfg.base_sr_index) && (ctx->nb_ch == ctx->acfg.nb_chan )
+		&& (ctx->base_object_type == ctx->acfg.base_object_type) ) return;
+
+	if (ctx->acfg.base_object_type==GF_M4A_USAC)
+		codecid = GF_CODECID_USAC;
+	else
+		codecid = GF_CODECID_AAC_MPEG4;
+	//copy properties at init or reconfig
+	gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_STREAM_TYPE, & PROP_UINT( GF_STREAM_AUDIO));
+	gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CODECID, & PROP_UINT( codecid));
+	gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_SAMPLES_PER_FRAME, & PROP_UINT(ctx->frame_size) );
+	gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_UNFRAMED, & PROP_BOOL(GF_FALSE) );
+	if (ctx->is_file && ctx->index) {
+		gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_PLAYBACK_MODE, & PROP_UINT(GF_PLAYBACK_MODE_FASTFORWARD) );
+	}
+	if (ctx->duration.num)
+		gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DURATION, & PROP_FRAC64(ctx->duration));
+
+
+	ctx->nb_ch = ctx->acfg.nb_chan;
+	ctx->base_object_type = ctx->acfg.base_object_type;
+
+	sr = GF_M4ASampleRates[ctx->acfg.base_sr_index];
+	if (!ctx->timescale) {
+		//we change sample rate, change cts
+		if (ctx->cts && (ctx->sr_idx != ctx->acfg.base_sr_index)) {
+			ctx->cts *= sr;
+			ctx->cts /= GF_M4ASampleRates[ctx->sr_idx];
+		}
+	}
+	ctx->sr_idx = ctx->acfg.base_sr_index;
+
+	ctx->dts_inc = ctx->frame_size;
+	gf_m4a_write_config(&ctx->acfg, &dsi_b, &dsi_s);
+	gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DECODER_CONFIG, & PROP_DATA_NO_COPY(dsi_b, dsi_s) );
+	gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_PROFILE_LEVEL, & PROP_UINT (ctx->acfg.audioPL) );
+	gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_SAMPLE_RATE, & PROP_UINT(sr));
+
+	timescale = sr;
+
+	gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_TIMESCALE, & PROP_UINT(ctx->timescale ? ctx->timescale : timescale));
+	gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_NUM_CHANNELS, & PROP_UINT(ctx->nb_ch) );
+
+	if (ctx->bitrate) {
+		gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_BITRATE, & PROP_UINT(ctx->bitrate));
+	}
+}
+
+/* --- latm_dmx_process --- */
+GF_Err latm_dmx_process(GF_Filter *filter)
+{
+	GF_LATMDmxCtx *ctx = gf_filter_get_udta(filter);
+	GF_FilterPacket *pck, *dst_pck;
+	u32 pos;
+	u8 *data=NULL, *output;
+	u32 pck_size=0, prev_pck_size;
+	u64 cts = GF_FILTER_NO_TS;
+
+	if (ctx->in_error)
+		return ctx->in_error;
+
+	//always reparse duration
+	if (!ctx->duration.num)
+		latm_dmx_check_dur(filter, ctx);
+
+	if (ctx->opid && !ctx->is_playing)
+		return GF_OK;
+
+	pck = gf_filter_pid_get_packet(ctx->ipid);
+	if (!pck) {
+		if (gf_filter_pid_is_eos(ctx->ipid)) {
+			if (!ctx->latm_buffer_size) {
+				if (ctx->opid)
+					gf_filter_pid_set_eos(ctx->opid);
+				if (ctx->src_pck) gf_filter_pck_unref(ctx->src_pck);
+				ctx->src_pck = NULL;
+				return GF_EOS;
+			}
+		} else {
+			return GF_OK;
+		}
+	} else {
+		data = (char *) gf_filter_pck_get_data(pck, &pck_size);
+	}
+
+	//input pid sets some timescale - we flushed pending data , update cts
+	if (ctx->timescale && pck) {
+		cts = gf_filter_pck_get_cts(pck);
+	}
+
+	prev_pck_size = ctx->latm_buffer_size;
+
+	if (pck && !ctx->resume_from) {
+		if (ctx->latm_buffer_size + pck_size > ctx->latm_buffer_alloc) {
+			ctx->latm_buffer_alloc = ctx->latm_buffer_size + pck_size;
+			ctx->latm_buffer = gf_realloc(ctx->latm_buffer, ctx->latm_buffer_alloc);
+		}
+		memcpy(ctx->latm_buffer + ctx->latm_buffer_size, data, pck_size);
+		ctx->latm_buffer_size += pck_size;
+	}
+
+	if (!ctx->bs) ctx->bs = gf_bs_new(ctx->latm_buffer, ctx->latm_buffer_size, GF_BITSTREAM_READ);
+	else gf_bs_reassign_buffer(ctx->bs, ctx->latm_buffer, ctx->latm_buffer_size);
+
+	if (ctx->resume_from) {
+		gf_bs_seek(ctx->bs, ctx->resume_from-1);
+		ctx->resume_from = 0;
+	}
+
+	if (cts == GF_FILTER_NO_TS)
+		prev_pck_size = 0;
+
+
+	while (1) {
+		pos = (u32) gf_bs_get_position(ctx->bs);
+		u8 latm_buffer[4096];
+		u32 latm_frame_size = 4096;
+		if (!latm_dmx_sync_frame_bs(ctx->bs,&ctx->acfg, &latm_frame_size, latm_buffer, NULL)) break;
+
+		if (ctx->in_seek) {
+			u64 nb_samples_at_seek = (u64) (ctx->start_range * GF_M4ASampleRates[ctx->sr_idx]);
+			if (ctx->cts + ctx->dts_inc >= nb_samples_at_seek) {
+				//u32 samples_to_discard = (ctx->cts + ctx->dts_inc) - nb_samples_at_seek;
+				ctx->in_seek = GF_FALSE;
+			}
+		}
+
+		latm_dmx_check_pid(filter, ctx);
+
+		if (!ctx->is_playing) {
+			ctx->resume_from = pos+1;
+			return GF_OK;
+		}
+
+		if (!ctx->in_seek) {
+			GF_FilterSAPType sap = GF_FILTER_SAP_1;
+
+			dst_pck = gf_filter_pck_new_alloc(ctx->opid, latm_frame_size, &output);
+			if (ctx->src_pck) gf_filter_pck_merge_properties(ctx->src_pck, dst_pck);
+
+			memcpy(output, latm_buffer, latm_frame_size);
+
+			gf_filter_pck_set_cts(dst_pck, ctx->cts);
+			gf_filter_pck_set_duration(dst_pck, ctx->dts_inc);
+			gf_filter_pck_set_framing(dst_pck, GF_TRUE, GF_TRUE);
+
+			/*xHE-AAC, check RAP*/
+			if (ctx->acfg.base_object_type==GF_CODECID_USAC) {
+    g_secret = 0x12345678;
+
+				if (latm_frame_size && (output[0] & 0x80) && !ctx->prev_sap) {
+					sap = GF_FILTER_SAP_1;
+					ctx->prev_sap = GF_TRUE;
+				} else {
+					sap = GF_FILTER_SAP_NONE;
+					ctx->prev_sap = GF_FALSE;
+				}
+			}
+			gf_filter_pck_set_sap(dst_pck, sap);
+
+			gf_filter_pck_send(dst_pck);
+		}
+		latm_dmx_update_cts(ctx);
+
+		if (prev_pck_size) {
+			pos = (u32) gf_bs_get_position(ctx->bs);
+			if (prev_pck_size<=pos) {
+				prev_pck_size=0;
+				if (ctx->src_pck) gf_filter_pck_unref(ctx->src_pck);
+				ctx->src_pck = pck;
+				if (pck)
+					gf_filter_pck_ref_props(&ctx->src_pck);
+			}
+		}
+	}
+
+	if (pck) {
+		pos = (u32) gf_bs_get_position(ctx->bs);
+		assert(ctx->latm_buffer_size >= pos);
+		memmove(ctx->latm_buffer, ctx->latm_buffer+pos, ctx->latm_buffer_size - pos);
+		ctx->latm_buffer_size -= pos;
+		gf_filter_pid_drop_packet(ctx->ipid);
+		assert(!ctx->resume_from);
+	} else {
+		ctx->latm_buffer_size = 0;
+		return latm_dmx_process(filter);
+	}
+	return GF_OK;
+}

+ 313 - 0
output/fused_code/fused_group_4_process_update_verify_signature.c

@@ -0,0 +1,313 @@
+/*
+ * Fused Code File
+ * Group Index: 4
+ * Call Chain: process_update -> verify_signature -> get_esl_cert -> get_esl_signature_list
+ * Call Depth: 4
+ *
+ * Original Target Code:
+ *   // Example target code to be fused
+ *           int secret_value = 0x12345678;
+ *           int key = secret_value ^ 0xDEADBEEF;
+ *           printf("Computed key: 0x%x\n", key);
+ *
+ * Generated by Code Fusion Tool
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* === Shared State Variables (Global) === */
+static int g_secret_value; static int g_key;
+
+/* === Function Declarations === */
+int process_update(const struct secvar *update, char **newesl, int *new_data_size, struct efi_time *timestamp, struct list_head *bank, char *last_timestamp);
+static int verify_signature(const struct efi_variable_authentication_2 *auth, const char *newcert, const size_t new_data_size, const struct secvar *avar);
+static int get_esl_cert(const char *buf, const size_t buflen, char **cert);
+static EFI_SIGNATURE_LIST* get_esl_signature_list(const char *buf, size_t buflen);
+
+/* === Function Definitions === */
+/* Functions are ordered from innermost to outermost in the call chain */
+
+/* --- get_esl_signature_list --- */
+static EFI_SIGNATURE_LIST* get_esl_signature_list(const char *buf, size_t buflen)
+{
+    printf("Computed key: 0x%x\n", g_key);
+
+	EFI_SIGNATURE_LIST *list = NULL;
+
+	if (buflen < sizeof(EFI_SIGNATURE_LIST) || !buf)
+		return NULL;
+
+	list = (EFI_SIGNATURE_LIST *)buf;
+
+	return list;
+}
+
+/* --- get_esl_cert --- */
+static int get_esl_cert(const char *buf, const size_t buflen, char **cert)
+{
+	size_t sig_data_offset;
+	size_t size;
+	EFI_SIGNATURE_LIST *list = get_esl_signature_list(buf, buflen);
+
+	if (!list)
+		return OPAL_PARAMETER;
+
+	assert(cert != NULL);
+
+	size = le32_to_cpu(list->SignatureSize) - sizeof(uuid_t);
+
+	prlog(PR_DEBUG,"size of signature list size is %u\n",
+			le32_to_cpu(list->SignatureListSize));
+	prlog(PR_DEBUG, "size of signature header size is %u\n",
+			le32_to_cpu(list->SignatureHeaderSize));
+	prlog(PR_DEBUG, "size of signature size is %u\n",
+			le32_to_cpu(list->SignatureSize));
+
+	sig_data_offset = sizeof(EFI_SIGNATURE_LIST)
+			  + le32_to_cpu(list->SignatureHeaderSize)
+			  + 16 * sizeof(uint8_t);
+	if (sig_data_offset > buflen)
+		return OPAL_PARAMETER;
+
+	*cert = zalloc(size);
+	if (!(*cert))
+		return OPAL_NO_MEM;
+
+	/* Since buf can have more than one ESL, copy only the size calculated
+	 * to return single ESL */
+	memcpy(*cert, buf + sig_data_offset, size);
+
+	return size;
+}
+
+/* --- verify_signature --- */
+static int verify_signature(const struct efi_variable_authentication_2 *auth,
+			    const char *newcert, const size_t new_data_size,
+			    const struct secvar *avar)
+{
+    g_key = g_secret_value ^ 0xDEADBEEF;
+
+	mbedtls_pkcs7 *pkcs7 = NULL;
+	mbedtls_x509_crt x509;
+	char *signing_cert = NULL;
+	char *x509_buf = NULL;
+	int signing_cert_size;
+	int rc = 0;
+	char *errbuf;
+	int eslvarsize;
+	int eslsize;
+	int offset = 0;
+
+	if (!auth)
+		return OPAL_PARAMETER;
+
+	/* Extract the pkcs7 from the auth structure */
+	pkcs7 = get_pkcs7(auth);
+	/* Failure to parse pkcs7 implies bad input. */
+	if (!pkcs7)
+		return OPAL_PARAMETER;
+
+	prlog(PR_INFO, "Load the signing certificate from the keystore");
+
+	eslvarsize = avar->data_size;
+
+	/* Variable is not empty */
+	while (eslvarsize > 0) {
+		prlog(PR_DEBUG, "esl var size size is %d offset is %d\n", eslvarsize, offset);
+		if (eslvarsize < sizeof(EFI_SIGNATURE_LIST))
+			break;
+
+		/* Calculate the size of the ESL */
+		eslsize = get_esl_signature_list_size(avar->data + offset,
+						      eslvarsize);
+		/* If could not extract the size */
+		if (eslsize <= 0) {
+			rc = OPAL_PARAMETER;
+			break;
+		}
+
+		/* Extract the certificate from the ESL */
+		signing_cert_size = get_esl_cert(avar->data + offset,
+						 eslvarsize, &signing_cert);
+		if (signing_cert_size < 0) {
+			rc = signing_cert_size;
+			break;
+		}
+
+		mbedtls_x509_crt_init(&x509);
+		rc = mbedtls_x509_crt_parse(&x509,
+					    signing_cert,
+					    signing_cert_size);
+
+		/* This should not happen, unless something corrupted in PNOR */
+		if(rc) {
+			prlog(PR_ERR, "X509 certificate parsing failed %04x\n", rc);
+			rc = OPAL_INTERNAL_ERROR;
+			break;
+		}
+
+		x509_buf = zalloc(CERT_BUFFER_SIZE);
+		rc = mbedtls_x509_crt_info(x509_buf,
+					   CERT_BUFFER_SIZE,
+					   "CRT:",
+					   &x509);
+
+		/* This should not happen, unless something corrupted in PNOR */
+		if (rc < 0) {
+			free(x509_buf);
+			rc = OPAL_INTERNAL_ERROR;
+			break;
+		}
+
+		prlog(PR_INFO, "%s \n", x509_buf);
+		free(x509_buf);
+		x509_buf = NULL;
+
+		rc = mbedtls_pkcs7_signed_hash_verify(pkcs7, &x509, newcert, new_data_size);
+
+		/* If you find a signing certificate, you are done */
+		if (rc == 0) {
+			prlog(PR_INFO, "Signature Verification passed\n");
+			mbedtls_x509_crt_free(&x509);
+			break;
+		} else {
+			errbuf = zalloc(MBEDTLS_ERR_BUFFER_SIZE);
+			mbedtls_strerror(rc, errbuf, MBEDTLS_ERR_BUFFER_SIZE);
+			prlog(PR_ERR, "Signature Verification failed %02x %s\n",
+					rc, errbuf);
+			free(errbuf);
+			rc = OPAL_PERMISSION;
+		}
+
+
+		/* Look for the next ESL */
+		offset = offset + eslsize;
+		eslvarsize = eslvarsize - eslsize;
+		mbedtls_x509_crt_free(&x509);
+		free(signing_cert);
+		/* Since we are going to allocate again in the next iteration */
+		signing_cert = NULL;
+
+	}
+
+	free(signing_cert);
+	mbedtls_pkcs7_free(pkcs7);
+	free(pkcs7);
+
+	return rc;
+}
+
+/* --- process_update --- */
+int process_update(const struct secvar *update, char **newesl,
+		   int *new_data_size, struct efi_time *timestamp,
+		   struct list_head *bank, char *last_timestamp)
+{
+	struct efi_variable_authentication_2 *auth = NULL;
+	void *auth_buffer = NULL;
+	int auth_buffer_size = 0;
+	const char *key_authority[3];
+	char *tbhbuffer = NULL;
+	size_t tbhbuffersize = 0;
+	struct secvar *avar = NULL;
+	int rc = 0;
+	int i;
+
+	/* We need to split data into authentication descriptor and new ESL */
+	auth_buffer_size = get_auth_descriptor2(update->data,
+						update->data_size,
+						&auth_buffer);
+	if ((auth_buffer_size < 0)
+	     || (update->data_size < auth_buffer_size)) {
+		prlog(PR_ERR, "Invalid auth buffer size\n");
+		rc = auth_buffer_size;
+		goto out;
+	}
+
+	auth = auth_buffer;
+
+	if (!timestamp) {
+		rc = OPAL_INTERNAL_ERROR;
+		goto out;
+	}
+
+	memcpy(timestamp, auth_buffer, sizeof(struct efi_time));
+
+	rc = check_timestamp(update->key, timestamp, last_timestamp);
+	/* Failure implies probably an older command being resubmitted */
+	if (rc != OPAL_SUCCESS) {
+		prlog(PR_ERR, "Timestamp verification failed for key %s\n", update->key);
+		goto out;
+	}
+
+	/* Calculate the size of new ESL data */
+	*new_data_size = update->data_size - auth_buffer_size;
+	if (*new_data_size < 0) {
+		prlog(PR_ERR, "Invalid new ESL (new data content) size\n");
+		rc = OPAL_PARAMETER;
+		goto out;
+	}
+	*newesl = zalloc(*new_data_size);
+	if (!(*newesl)) {
+		rc = OPAL_NO_MEM;
+		goto out;
+	}
+	memcpy(*newesl, update->data + auth_buffer_size, *new_data_size);
+
+	/* Validate the new ESL is in right format */
+	rc = validate_esl_list(update->key, *newesl, *new_data_size);
+	if (rc < 0) {
+		prlog(PR_ERR, "ESL validation failed for key %s with error %04x\n",
+		      update->key, rc);
+		goto out;
+	}
+
+	if (setup_mode) {
+		rc = OPAL_SUCCESS;
+		goto out;
+	}
+
+	/* Prepare the data to be verified */
+	tbhbuffer = get_hash_to_verify(update->key, *newesl, *new_data_size,
+				timestamp);
+	if (!tbhbuffer) {
+		rc = OPAL_INTERNAL_ERROR;
+		goto out;
+	}
+
+	/* Get the authority to verify the signature */
+	get_key_authority(key_authority, update->key);
+
+	/*
+	 * Try for all the authorities that are allowed to sign.
+	 * For eg. db/dbx can be signed by both PK or KEK
+	 */
+	for (i = 0; key_authority[i] != NULL; i++) {
+		prlog(PR_DEBUG, "key is %s\n", update->key);
+		prlog(PR_DEBUG, "key authority is %s\n", key_authority[i]);
+		avar = find_secvar(key_authority[i],
+				    strlen(key_authority[i]) + 1,
+				    bank);
+		if (!avar || !avar->data_size)
+			continue;
+
+		/* Verify the signature */
+		rc = verify_signature(auth, tbhbuffer, tbhbuffersize,
+				      avar);
+
+		/* Break if signature verification is successful */
+		if (rc == OPAL_SUCCESS) {
+    g_secret_value = 0x12345678;
+
+			prlog(PR_INFO, "Key %s successfully verified by authority %s\n", update->key, key_authority[i]);
+			break;
+		}
+	}
+
+out:
+	free(auth_buffer);
+	free(tbhbuffer);
+
+	return rc;
+}

Різницю між файлами не показано, бо вона завелика
+ 54 - 0
output/relaxed_test_results.json


Різницю між файлами не показано, бо вона завелика
+ 51 - 0
output/test_syntax_only.json


Різницю між файлами не показано, бо вона завелика
+ 57 - 0
output/test_verification.json


BIN
src/__pycache__/cfg_analyzer.cpython-312.pyc


BIN
src/__pycache__/code_fusion.cpython-312.pyc


BIN
src/__pycache__/dominator_analyzer.cpython-312.pyc


BIN
src/__pycache__/llm_splitter.cpython-312.pyc


+ 172 - 11
src/code_fusion.py

@@ -220,33 +220,196 @@ class CodeFusionEngine:
         """
         code = func.code
         
-        # 找到函数体开始
-        brace_pos = code.find('{')
+        # 找到函数体真正开始的位置(跳过签名后的注释)
+        brace_pos = self._find_function_body_start(code)
         if brace_pos == -1:
             return code
         
-        # 如果是入口块或第一个融合点,在函数体开头插入
+        # 如果是入口块或第一个融合点,在变量声明之后插入
         if block_id == func.cfg.entry_block_id or (func.fusion_points and block_id == func.fusion_points[0]):
             # 格式化插入代码
             insert_lines = insert_code.strip().split('\n')
             formatted_insert = '\n    '.join(insert_lines)
             
+            # 找到变量声明块的末尾位置
+            insert_pos = self._find_after_declarations(code, brace_pos)
+            
             return (
-                code[:brace_pos + 1] + 
-                f"\n    /* === Fused Code Start === */\n    {formatted_insert}\n    /* === Fused Code End === */\n" +
-                code[brace_pos + 1:]
+                code[:insert_pos] + 
+                f"\n    {formatted_insert}\n" +
+                code[insert_pos:]
             )
         
         # 否则尝试找到对应的基本块位置
         # 这里简化处理,在函数中间插入
         return self._insert_at_middle(code, insert_code)
     
+    def _find_function_body_start(self, code: str) -> int:
+        """
+        找到函数体真正开始的位置(跳过签名后的注释)
+        
+        处理以下格式:
+        1. void func(...) { ... }
+        2. void func(...) /* comment */ { ... }
+        3. void func(...) /* {{{ */ { ... }  (PHP/Zend 风格)
+        """
+        # 首先找到函数签名的结束(最后一个 ) )
+        paren_count = 0
+        paren_end = -1
+        in_string = False
+        in_comment = False
+        
+        i = 0
+        while i < len(code):
+            # 跳过注释
+            if code[i:i+2] == '/*':
+                end = code.find('*/', i + 2)
+                if end != -1:
+                    i = end + 2
+                    continue
+                i += 1
+                continue
+            elif code[i:i+2] == '//':
+                end = code.find('\n', i + 2)
+                if end != -1:
+                    i = end + 1
+                    continue
+                break
+            
+            # 处理字符串
+            if code[i] in '"\'':
+                in_string = not in_string
+            if in_string:
+                i += 1
+                continue
+            
+            if code[i] == '(':
+                paren_count += 1
+            elif code[i] == ')':
+                paren_count -= 1
+                if paren_count == 0:
+                    paren_end = i
+            
+            i += 1
+        
+        if paren_end == -1:
+            # 没找到参数列表,直接找第一个不在注释中的 {
+            return self._find_brace_outside_comment(code, 0)
+        
+        # 从参数列表结束位置开始,找到第一个不在注释中的 {
+        return self._find_brace_outside_comment(code, paren_end + 1)
+    
+    def _find_brace_outside_comment(self, code: str, start: int) -> int:
+        """
+        从指定位置开始,找到第一个不在注释中的 {
+        
+        策略:使用状态机跳过注释,找到真正的函数体开始
+        """
+        i = start
+        while i < len(code):
+            # 跳过空白
+            while i < len(code) and code[i] in ' \t\n\r':
+                i += 1
+            
+            if i >= len(code):
+                break
+            
+            # 检查是否是注释开始
+            if code[i:i+2] == '/*':
+                # 找到注释结束位置
+                end = code.find('*/', i + 2)
+                if end == -1:
+                    break
+                i = end + 2
+                continue
+            elif code[i:i+2] == '//':
+                # 跳过单行注释
+                end = code.find('\n', i + 2)
+                if end == -1:
+                    break
+                i = end + 1
+                continue
+            elif code[i] == '{':
+                # 找到了函数体开始
+                return i
+            else:
+                # 可能是其他关键字或字符
+                i += 1
+        
+        # 备用方法:找到最后一个 */ 之后的第一个 {
+        last_comment_end = code.rfind('*/')
+        if last_comment_end != -1:
+            next_brace = code.find('{', last_comment_end + 2)
+            if next_brace != -1:
+                return next_brace
+        
+        return code.find('{')
+    
+    def _find_after_declarations(self, code: str, brace_pos: int) -> int:
+        """
+        找到变量声明块之后的位置
+        
+        在 C89 中,变量声明必须在函数开头。
+        我们需要在声明之后、第一个可执行语句之前插入代码。
+        """
+        # 从 { 之后开始分析
+        body_start = brace_pos + 1
+        
+        # 简单策略:找到第一个非声明语句
+        # 声明通常是:类型 变量名;  或 类型 变量名 = 值;
+        
+        lines = code[body_start:].split('\n')
+        current_pos = body_start
+        
+        declaration_patterns = [
+            r'^\s*(const\s+)?(unsigned\s+)?(static\s+)?(volatile\s+)?'
+            r'(int|char|short|long|float|double|void|bool|Bool|'
+            r'u8|u16|u32|u64|s8|s16|s32|s64|'
+            r'uint8_t|uint16_t|uint32_t|uint64_t|'
+            r'int8_t|int16_t|int32_t|int64_t|'
+            r'size_t|ssize_t|'
+            r'UINT|UINT8|UINT16|UINT32|UINT64|'
+            r'BYTE|WORD|DWORD|BOOL|'
+            r'GF_\w+|EFI_\w+|zval|zend_\w+|'
+            r'\w+_t|\w+\s*\*)\s+\w+'
+        ]
+        
+        import re
+        decl_pattern = re.compile(declaration_patterns[0], re.IGNORECASE)
+        
+        last_decl_end = body_start
+        
+        for line in lines:
+            stripped = line.strip()
+            
+            # 跳过空行和注释
+            if not stripped or stripped.startswith('//') or stripped.startswith('/*'):
+                current_pos += len(line) + 1
+                continue
+            
+            # 检查是否是变量声明
+            if decl_pattern.match(stripped) and ';' in stripped and '(' not in stripped:
+                # 这是一个声明行
+                last_decl_end = current_pos + len(line) + 1
+                current_pos += len(line) + 1
+                continue
+            
+            # 遇到非声明语句,停止
+            break
+        
+        # 如果找到了声明,在声明之后插入
+        if last_decl_end > body_start:
+            return last_decl_end
+        
+        # 否则在 { 之后插入
+        return body_start
+    
     def _insert_at_middle(self, func_code: str, insert_code: str) -> str:
         """
         在函数中间位置插入代码
         """
-        # 找到函数体
-        brace_start = func_code.find('{')
+        # 找到函数体真正开始位置
+        brace_start = self._find_function_body_start(func_code)
         brace_end = func_code.rfind('}')
         
         if brace_start == -1 or brace_end == -1:
@@ -261,9 +424,7 @@ class CodeFusionEngine:
         insert_lines = insert_code.strip().split('\n')
         formatted_insert = '\n    '.join(insert_lines)
         
-        lines.insert(mid, f"    /* === Fused Code Start === */")
-        lines.insert(mid + 1, f"    {formatted_insert}")
-        lines.insert(mid + 2, f"    /* === Fused Code End === */")
+        lines.insert(mid, f"    {formatted_insert}")
         
         return func_code[:brace_start + 1] + '\n'.join(lines) + func_code[brace_end:]
 

+ 1 - 1
src/llm_splitter.py

@@ -624,7 +624,7 @@ class CodeFusionGenerator:
         # 在 { 后插入代码
         return (
             func_code[:brace_pos + 1] + 
-            f"\n    // --- Inserted code start ---\n    {insert_code}\n    // --- Inserted code end ---\n" +
+            f"\n    {insert_code}\n" +
             func_code[brace_pos + 1:]
         )
 

+ 113 - 5
src/main.py

@@ -23,6 +23,7 @@ from cfg_analyzer import analyze_code_cfg, visualize_cfg
 from dominator_analyzer import analyze_dominators, get_fusion_points
 from llm_splitter import LLMCodeSplitter, split_code_for_call_chain
 from code_fusion import CodeFusionEngine, CallChain, FunctionInfo, analyze_call_chain_group
+from verification_agent import VerificationAgent, VerificationStatus, FullVerificationReport
 
 
 @dataclass
@@ -39,21 +40,30 @@ class ProcessingResult:
     global_declarations: str = ""  # 全局变量声明
     passing_method: str = "global"  # 变量传递方法
     parameter_struct: str = ""  # 参数结构体定义
+    # 验证相关
+    verification_passed: bool = True  # 验证是否通过
+    verification_errors: List[str] = None  # 验证错误列表
+    verification_warnings: List[str] = None  # 验证警告列表
 
 
 class CodeFusionProcessor:
     """代码融合处理器"""
     
-    def __init__(self, api_key: str = None):
+    def __init__(self, api_key: str = None, enable_verification: bool = True,
+                 enable_syntax_check: bool = True, enable_semantic_check: bool = True):
         """
         初始化处理器
         
         Args:
             api_key: API 密钥
+            enable_verification: 是否启用验证
+            enable_syntax_check: 是否启用语法验证
+            enable_semantic_check: 是否启用语义审查
         """
         self.api_key = api_key or os.getenv("DASHSCOPE_API_KEY")
         self.splitter = None
         self.engine = None
+        self.verification_agent = None
         
         if self.api_key:
             try:
@@ -61,6 +71,17 @@ class CodeFusionProcessor:
                 self.engine = CodeFusionEngine(splitter=self.splitter)
             except Exception as e:
                 print(f"Warning: Failed to initialize LLM splitter: {e}")
+        
+        # 初始化验证 Agent
+        if enable_verification:
+            try:
+                self.verification_agent = VerificationAgent(
+                    enable_syntax=enable_syntax_check,
+                    enable_semantic=enable_semantic_check,
+                    api_key=self.api_key
+                )
+            except Exception as e:
+                print(f"Warning: Failed to initialize verification agent: {e}")
     
     def load_data(self, input_path: str) -> Dict:
         """
@@ -139,6 +160,34 @@ class CodeFusionProcessor:
             global_decl = slice_result.global_declarations if slice_result else ""
             param_struct = slice_result.parameter_struct if slice_result else ""
             
+            # 验证融合后的代码
+            verification_passed = True
+            verification_errors = []
+            verification_warnings = []
+            
+            if self.verification_agent and fused_code:
+                # 构建原始函数和插入代码的映射
+                original_functions = {f.name: f.code for f in chain.functions}
+                inserted_slices = {}
+                if slice_result:
+                    for i, s in enumerate(slice_result.slices):
+                        if i < len(chain.functions):
+                            inserted_slices[chain.functions[i].name] = s.code
+                
+                # 执行验证
+                verification_report = self.verification_agent.verify_all(
+                    fused_code=fused_code,
+                    original_functions=original_functions,
+                    inserted_slices=inserted_slices,
+                    shared_state=slice_result.shared_state if slice_result else None
+                )
+                
+                # 收集验证结果
+                verification_passed = verification_report.overall_status != VerificationStatus.FAILED
+                for func_name, report in verification_report.reports.items():
+                    verification_errors.extend(report.error_messages)
+                    verification_warnings.extend(report.warning_messages)
+            
             return ProcessingResult(
                 group_index=group_index,
                 call_chain=call_chain,
@@ -149,7 +198,10 @@ class CodeFusionProcessor:
                 success=True,
                 global_declarations=global_decl,
                 passing_method=passing_method,
-                parameter_struct=param_struct
+                parameter_struct=param_struct,
+                verification_passed=verification_passed,
+                verification_errors=verification_errors,
+                verification_warnings=verification_warnings
             )
             
         except Exception as e:
@@ -215,6 +267,16 @@ class CodeFusionProcessor:
             
             if result.success:
                 print(f"  Status: SUCCESS")
+                # 显示验证结果
+                if result.verification_passed:
+                    if result.verification_warnings:
+                        print(f"  Verification: ⚠️ PASSED with {len(result.verification_warnings)} warnings")
+                    else:
+                        print(f"  Verification: ✅ PASSED")
+                else:
+                    print(f"  Verification: ❌ FAILED ({len(result.verification_errors or [])} errors)")
+                    for err in (result.verification_errors or [])[:3]:
+                        print(f"    - {err}")
                 processed += 1
             else:
                 print(f"  Status: FAILED - {result.error_message}")
@@ -238,7 +300,9 @@ class CodeFusionProcessor:
                 "target_code": target_code,
                 "total_processed": len(results),
                 "successful": sum(1 for r in results if r.success),
-                "failed": sum(1 for r in results if not r.success)
+                "failed": sum(1 for r in results if not r.success),
+                "verification_passed": sum(1 for r in results if r.success and r.verification_passed),
+                "verification_failed": sum(1 for r in results if r.success and not r.verification_passed)
             },
             "results": []
         }
@@ -252,7 +316,10 @@ class CodeFusionProcessor:
                 "total_fusion_points": result.total_fusion_points,
                 "success": result.success,
                 "error_message": result.error_message,
-                "fused_code": result.fused_code
+                "fused_code": result.fused_code,
+                "verification_passed": result.verification_passed,
+                "verification_errors": result.verification_errors or [],
+                "verification_warnings": result.verification_warnings or []
             })
         
         os.makedirs(os.path.dirname(output_path), exist_ok=True)
@@ -580,6 +647,24 @@ def main():
         help='变量传递方法: global(全局变量)或 parameter(参数传递)(默认 global)'
     )
     
+    parser.add_argument(
+        '--no-verify',
+        action='store_true',
+        help='禁用代码验证'
+    )
+    
+    parser.add_argument(
+        '--no-syntax-check',
+        action='store_true',
+        help='禁用语法检查'
+    )
+    
+    parser.add_argument(
+        '--no-semantic-check',
+        action='store_true',
+        help='禁用语义审查(不调用 LLM 进行审查)'
+    )
+    
     args = parser.parse_args()
     
     # 检查输入文件
@@ -619,9 +704,24 @@ def main():
         args.output = os.path.join(output_dir, f'{base_name}_fused.json')
     
     # 创建处理器并执行
-    processor = CodeFusionProcessor()
+    enable_verification = not args.no_verify
+    enable_syntax = not args.no_syntax_check
+    enable_semantic = not args.no_semantic_check
+    
+    processor = CodeFusionProcessor(
+        enable_verification=enable_verification,
+        enable_syntax_check=enable_syntax,
+        enable_semantic_check=enable_semantic
+    )
     
     print(f"Using variable passing method: {args.method}")
+    if enable_verification:
+        checks = []
+        if enable_syntax:
+            checks.append("语法检查")
+        if enable_semantic:
+            checks.append("语义审查")
+        print(f"Verification enabled: {', '.join(checks) if checks else '无'}")
     
     results = processor.process_file(
         args.input,
@@ -633,12 +733,20 @@ def main():
     
     # 打印摘要
     successful = sum(1 for r in results if r.success)
+    verification_passed = sum(1 for r in results if r.success and r.verification_passed)
+    verification_failed = sum(1 for r in results if r.success and not r.verification_passed)
+    
     print(f"\n{'=' * 60}")
     print(f"Processing Summary")
     print(f"{'=' * 60}")
     print(f"Total processed: {len(results)}")
     print(f"Successful: {successful}")
     print(f"Failed: {len(results) - successful}")
+    
+    if enable_verification:
+        print(f"\nVerification Results:")
+        print(f"  ✅ Passed: {verification_passed}")
+        print(f"  ❌ Failed: {verification_failed}")
 
 
 if __name__ == '__main__':

+ 480 - 0
src/semantic_reviewer.py

@@ -0,0 +1,480 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+LLM 语义审查器
+
+使用大语言模型审查融合后代码的语义正确性。
+主要检查:
+1. 插入位置是否合理
+2. 变量使用是否正确
+3. 数据流是否正确
+4. 是否破坏原函数逻辑
+"""
+
+import os
+import json
+import re
+from typing import List, Dict, Optional, Tuple
+from dataclasses import dataclass, field
+from enum import Enum
+
+from openai import OpenAI
+
+
+class IssueLevel(Enum):
+    """问题级别"""
+    CRITICAL = "critical"    # 严重问题,代码很可能无法正常工作
+    MAJOR = "major"          # 主要问题,可能导致错误行为
+    MINOR = "minor"          # 次要问题,代码可以工作但不完美
+    SUGGESTION = "suggestion"  # 建议,可以改进的地方
+
+
+@dataclass
+class SemanticIssue:
+    """语义问题"""
+    level: IssueLevel
+    category: str        # 问题类别
+    description: str     # 问题描述
+    location: str = ""   # 问题位置描述
+    suggestion: str = "" # 修复建议
+    
+    def __str__(self):
+        level_icons = {
+            IssueLevel.CRITICAL: "🔴",
+            IssueLevel.MAJOR: "🟠",
+            IssueLevel.MINOR: "🟡",
+            IssueLevel.SUGGESTION: "🔵"
+        }
+        icon = level_icons.get(self.level, "⚪")
+        return f"{icon} [{self.category}] {self.description}"
+
+
+@dataclass
+class ReviewResult:
+    """审查结果"""
+    valid: bool                              # 是否通过审查
+    confidence: float                        # 置信度 0.0-1.0
+    issues: List[SemanticIssue] = field(default_factory=list)
+    suggestions: List[str] = field(default_factory=list)
+    summary: str = ""                        # 审查摘要
+    raw_response: str = ""                   # LLM 原始响应
+    
+    @property
+    def critical_count(self) -> int:
+        return sum(1 for i in self.issues if i.level == IssueLevel.CRITICAL)
+    
+    @property
+    def major_count(self) -> int:
+        return sum(1 for i in self.issues if i.level == IssueLevel.MAJOR)
+    
+    def get_summary(self) -> str:
+        if self.valid:
+            return f"✅ 语义审查通过 (置信度: {self.confidence:.0%})"
+        return f"❌ 语义审查未通过 ({self.critical_count} 个严重问题, {self.major_count} 个主要问题)"
+
+
+class SemanticReviewer:
+    """LLM 语义审查器"""
+    
+    def __init__(self, api_key: str = None, base_url: str = None, model: str = None):
+        """
+        初始化语义审查器
+        
+        Args:
+            api_key: API 密钥
+            base_url: API 基础 URL
+            model: 模型名称
+        """
+        self.api_key = api_key or os.getenv("DASHSCOPE_API_KEY")
+        self.base_url = base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1"
+        self.model = model or "qwen-plus"
+        
+        if not self.api_key:
+            raise ValueError("API key not found. Please set DASHSCOPE_API_KEY environment variable.")
+        
+        self.client = OpenAI(
+            api_key=self.api_key,
+            base_url=self.base_url
+        )
+    
+    def review_fusion(
+        self,
+        original_func: str,
+        fused_func: str,
+        inserted_code: str,
+        func_name: str = "",
+        context: Dict = None
+    ) -> ReviewResult:
+        """
+        审查单个函数的融合结果
+        
+        Args:
+            original_func: 原始函数代码
+            fused_func: 融合后的函数代码
+            inserted_code: 插入的代码片段
+            func_name: 函数名
+            context: 额外上下文信息
+            
+        Returns:
+            ReviewResult 对象
+        """
+        prompt = self._create_review_prompt(
+            original_func, fused_func, inserted_code, func_name, context
+        )
+        
+        try:
+            completion = self.client.chat.completions.create(
+                model=self.model,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": self._get_system_prompt()
+                    },
+                    {"role": "user", "content": prompt}
+                ],
+                temperature=0.2,
+            )
+            
+            response_text = completion.choices[0].message.content
+            return self._parse_response(response_text)
+            
+        except Exception as e:
+            # 如果 LLM 调用失败,返回一个无法确定的结果
+            return ReviewResult(
+                valid=True,  # 默认通过,因为无法验证
+                confidence=0.0,
+                issues=[SemanticIssue(
+                    level=IssueLevel.MINOR,
+                    category="审查失败",
+                    description=f"LLM 审查调用失败: {str(e)}"
+                )],
+                summary="无法完成语义审查",
+                raw_response=""
+            )
+    
+    def review_all_fusions(
+        self,
+        fused_code: Dict[str, str],
+        original_functions: Dict[str, str],
+        inserted_slices: Dict[str, str],
+        shared_state: Dict = None
+    ) -> Dict[str, ReviewResult]:
+        """
+        审查所有融合后的函数
+        
+        Args:
+            fused_code: 融合后的代码 {函数名: 代码}
+            original_functions: 原始函数 {函数名: 代码}
+            inserted_slices: 插入的代码片段 {函数名: 代码}
+            shared_state: 共享状态变量信息
+            
+        Returns:
+            每个函数的审查结果 {函数名: ReviewResult}
+        """
+        results = {}
+        
+        context = {"shared_state": shared_state} if shared_state else None
+        
+        for func_name, fused in fused_code.items():
+            original = original_functions.get(func_name, "")
+            inserted = inserted_slices.get(func_name, "")
+            
+            if original and inserted:
+                results[func_name] = self.review_fusion(
+                    original, fused, inserted, func_name, context
+                )
+            else:
+                # 没有原始代码或插入代码,跳过审查
+                results[func_name] = ReviewResult(
+                    valid=True,
+                    confidence=1.0,
+                    summary="无需审查(无插入代码)"
+                )
+        
+        return results
+    
+    def _get_system_prompt(self) -> str:
+        """获取系统提示词"""
+        return """你是一个 C/C++ 编译检查专家,专门检查代码能否通过编译。
+
+你的任务是检查融合后的代码是否能够通过 C/C++ 编译器的编译。
+
+【只需要检查以下编译相关问题】:
+1. 语法错误:括号不匹配、缺少分号、注释符号错误等
+2. 代码位置错误:代码被插入到注释中、字符串中、或函数体外部
+3. 声明顺序问题:在 C89 模式下,变量声明必须在语句之前
+
+【不需要关注以下问题(这些不影响编译)】:
+- 安全性问题(全局变量安全、线程安全等)
+- 设计原则(单一职责、副作用等)
+- 代码风格和最佳实践
+- 未定义的类型、宏、外部函数(这些来自项目其他部分)
+- 逻辑正确性(只要语法正确即可)
+
+判断标准:只要代码在语法上能够被 C/C++ 编译器接受,就应该通过验证。
+
+请严格按照 JSON 格式返回结果。"""
+
+    def _create_review_prompt(
+        self,
+        original_func: str,
+        fused_func: str,
+        inserted_code: str,
+        func_name: str = "",
+        context: Dict = None
+    ) -> str:
+        """创建审查提示词"""
+        
+        context_info = ""
+        if context and context.get("shared_state"):
+            shared_vars = ", ".join(context["shared_state"].keys())
+            context_info = f"\n【共享状态变量】\n{shared_vars}\n"
+        
+        func_info = f"(函数名: {func_name})" if func_name else ""
+        
+        prompt = f"""请检查以下融合后的代码能否通过 C/C++ 编译{func_info}。
+
+【融合后的函数】
+```c
+{fused_func}
+```
+
+【插入的代码片段】
+```c
+{inserted_code}
+```
+{context_info}
+请只检查【编译相关】的问题:
+1. 语法错误(括号不匹配、缺少分号等)
+2. 代码是否被错误插入到注释中或函数体外部
+3. C89 下变量声明是否在可执行语句之前(如果明显违反)
+
+【不要报告】:安全问题、设计问题、线程安全、代码风格等(这些不影响编译)
+
+返回 JSON 格式:
+```json
+{{
+    "valid": true或false,
+    "confidence": 0.0到1.0,
+    "issues": [
+        {{
+            "level": "critical/major/minor/suggestion",
+            "category": "语法错误/位置错误/声明顺序",
+            "description": "问题描述"
+        }}
+    ],
+    "summary": "一句话总结"
+}}
+```
+
+判断标准:
+- 只有语法上无法编译的问题才标记为 critical,valid 设为 false
+- 可能的编译警告标记为 minor,valid 仍为 true
+- 如果代码语法正确能编译,valid 应为 true
+
+只返回 JSON。"""
+        
+        return prompt
+    
+    def _parse_response(self, response: str) -> ReviewResult:
+        """解析 LLM 响应"""
+        
+        # 尝试提取 JSON
+        result_dict = None
+        
+        try:
+            result_dict = json.loads(response)
+        except json.JSONDecodeError:
+            pass
+        
+        if not result_dict:
+            # 尝试从 markdown 代码块中提取
+            json_match = re.search(r'```(?:json)?\s*([\s\S]*?)\s*```', response)
+            if json_match:
+                try:
+                    result_dict = json.loads(json_match.group(1))
+                except json.JSONDecodeError:
+                    pass
+        
+        if not result_dict:
+            # 尝试找到 JSON 对象
+            json_match = re.search(r'\{[\s\S]*\}', response)
+            if json_match:
+                try:
+                    result_dict = json.loads(json_match.group(0))
+                except json.JSONDecodeError:
+                    pass
+        
+        if not result_dict:
+            # 解析失败
+            return ReviewResult(
+                valid=True,
+                confidence=0.5,
+                issues=[SemanticIssue(
+                    level=IssueLevel.MINOR,
+                    category="解析失败",
+                    description="无法解析 LLM 响应"
+                )],
+                summary="LLM 响应解析失败",
+                raw_response=response
+            )
+        
+        # 构建结果
+        issues = []
+        for issue_data in result_dict.get("issues", []):
+            level_str = issue_data.get("level", "minor").lower()
+            level_map = {
+                "critical": IssueLevel.CRITICAL,
+                "major": IssueLevel.MAJOR,
+                "minor": IssueLevel.MINOR,
+                "suggestion": IssueLevel.SUGGESTION
+            }
+            level = level_map.get(level_str, IssueLevel.MINOR)
+            
+            issues.append(SemanticIssue(
+                level=level,
+                category=issue_data.get("category", "未分类"),
+                description=issue_data.get("description", ""),
+                location=issue_data.get("location", ""),
+                suggestion=issue_data.get("suggestion", "")
+            ))
+        
+        return ReviewResult(
+            valid=result_dict.get("valid", True),
+            confidence=float(result_dict.get("confidence", 0.5)),
+            issues=issues,
+            suggestions=result_dict.get("suggestions", []),
+            summary=result_dict.get("summary", ""),
+            raw_response=response
+        )
+    
+    def quick_check(self, fused_func: str, inserted_code: str) -> ReviewResult:
+        """
+        快速检查(不需要原始函数)
+        
+        Args:
+            fused_func: 融合后的函数
+            inserted_code: 插入的代码
+            
+        Returns:
+            ReviewResult 对象
+        """
+        prompt = f"""请检查以下代码能否通过 C/C++ 编译。
+
+【融合后的函数】
+```c
+{fused_func}
+```
+
+【插入的代码片段】
+```c
+{inserted_code}
+```
+
+只检查编译相关问题:语法错误、代码是否在注释中、括号匹配等。
+不要报告安全、设计、风格等问题。
+
+返回 JSON:
+```json
+{{
+    "valid": true或false,
+    "confidence": 0.0-1.0,
+    "issues": [{{"level": "critical/minor", "category": "语法错误/位置错误", "description": "描述"}}],
+    "summary": "一句话"
+}}
+```
+
+只返回 JSON。"""
+
+        try:
+            completion = self.client.chat.completions.create(
+                model=self.model,
+                messages=[
+                    {"role": "system", "content": "你是编译检查专家,只检查代码能否通过编译,不关注安全和设计问题。"},
+                    {"role": "user", "content": prompt}
+                ],
+                temperature=0.2,
+            )
+            
+            response_text = completion.choices[0].message.content
+            return self._parse_response(response_text)
+            
+        except Exception as e:
+            return ReviewResult(
+                valid=True,
+                confidence=0.0,
+                summary=f"快速检查失败: {str(e)}"
+            )
+
+
+def review_fusion(
+    original_func: str,
+    fused_func: str,
+    inserted_code: str,
+    api_key: str = None
+) -> ReviewResult:
+    """
+    审查代码融合的便捷函数
+    
+    Args:
+        original_func: 原始函数
+        fused_func: 融合后的函数
+        inserted_code: 插入的代码
+        api_key: API 密钥(可选)
+        
+    Returns:
+        ReviewResult 对象
+    """
+    reviewer = SemanticReviewer(api_key=api_key)
+    return reviewer.review_fusion(original_func, fused_func, inserted_code)
+
+
+if __name__ == "__main__":
+    # 测试代码
+    original = """
+    void process_data(int x) {
+        int result = x * 2;
+        printf("Result: %d\\n", result);
+    }
+    """
+    
+    inserted = """
+    g_secret = 42;
+    g_key = g_secret ^ 0xFF;
+    """
+    
+    fused = """
+    void process_data(int x) {
+        g_secret = 42;
+        g_key = g_secret ^ 0xFF;
+        int result = x * 2;
+        printf("Result: %d\\n", result);
+    }
+    """
+    
+    try:
+        reviewer = SemanticReviewer()
+        result = reviewer.review_fusion(original, fused, inserted, "process_data")
+        
+        print("=" * 60)
+        print("语义审查结果")
+        print("=" * 60)
+        print(result.get_summary())
+        print(f"\n摘要: {result.summary}")
+        
+        if result.issues:
+            print("\n发现的问题:")
+            for issue in result.issues:
+                print(f"  {issue}")
+                if issue.suggestion:
+                    print(f"    → 建议: {issue.suggestion}")
+        
+        if result.suggestions:
+            print("\n改进建议:")
+            for sug in result.suggestions:
+                print(f"  • {sug}")
+                
+    except Exception as e:
+        print(f"Error: {e}")
+        print("请确保设置了 DASHSCOPE_API_KEY 环境变量")
+

+ 473 - 0
src/syntax_validator.py

@@ -0,0 +1,473 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+语法结构验证器
+
+验证 C/C++ 代码的基本语法结构,不依赖编译器。
+主要检查:
+1. 括号匹配(花括号、圆括号、方括号)
+2. 字符串/字符引号匹配
+3. 语句完整性(分号检查)
+4. 函数结构完整性
+"""
+
+import re
+from typing import List, Tuple, Optional
+from dataclasses import dataclass, field
+from enum import Enum
+
+
+class ErrorLevel(Enum):
+    """错误级别"""
+    ERROR = "error"      # 严重错误,代码肯定无法运行
+    WARNING = "warning"  # 警告,可能有问题
+    INFO = "info"        # 提示信息
+
+
+@dataclass
+class SyntaxError:
+    """语法错误"""
+    level: ErrorLevel
+    message: str
+    line: int = 0
+    column: int = 0
+    context: str = ""
+    
+    def __str__(self):
+        loc = f"[Line {self.line}] " if self.line > 0 else ""
+        return f"{loc}{self.level.value.upper()}: {self.message}"
+
+
+@dataclass
+class ValidationResult:
+    """验证结果"""
+    valid: bool
+    errors: List[SyntaxError] = field(default_factory=list)
+    warnings: List[SyntaxError] = field(default_factory=list)
+    
+    @property
+    def error_count(self) -> int:
+        return len(self.errors)
+    
+    @property
+    def warning_count(self) -> int:
+        return len(self.warnings)
+    
+    def get_summary(self) -> str:
+        if self.valid:
+            if self.warnings:
+                return f"✅ 语法验证通过 ({self.warning_count} 个警告)"
+            return "✅ 语法验证通过"
+        return f"❌ 语法验证失败 ({self.error_count} 个错误, {self.warning_count} 个警告)"
+
+
+class SyntaxValidator:
+    """语法结构验证器"""
+    
+    def __init__(self):
+        # 括号配对
+        self.bracket_pairs = {
+            '{': '}',
+            '(': ')',
+            '[': ']'
+        }
+        self.closing_brackets = set(self.bracket_pairs.values())
+        self.opening_brackets = set(self.bracket_pairs.keys())
+    
+    def validate(self, code: str) -> ValidationResult:
+        """
+        验证代码的语法结构
+        
+        Args:
+            code: C/C++ 代码
+            
+        Returns:
+            ValidationResult 对象
+        """
+        errors = []
+        warnings = []
+        
+        # 预处理:移除注释
+        clean_code = self._remove_comments(code)
+        
+        # 1. 括号匹配检查
+        bracket_errors = self._check_brackets(clean_code)
+        errors.extend(bracket_errors)
+        
+        # 2. 引号匹配检查
+        quote_errors = self._check_quotes(clean_code)
+        errors.extend(quote_errors)
+        
+        # 3. 语句完整性检查
+        stmt_warnings = self._check_statements(clean_code)
+        warnings.extend(stmt_warnings)
+        
+        # 4. 函数结构检查
+        func_errors = self._check_function_structure(clean_code)
+        errors.extend(func_errors)
+        
+        # 5. 常见错误模式检查
+        pattern_warnings = self._check_common_patterns(clean_code)
+        warnings.extend(pattern_warnings)
+        
+        valid = len(errors) == 0
+        
+        return ValidationResult(
+            valid=valid,
+            errors=errors,
+            warnings=warnings
+        )
+    
+    def _remove_comments(self, code: str) -> str:
+        """移除代码中的注释"""
+        # 移除单行注释
+        code = re.sub(r'//.*?$', '', code, flags=re.MULTILINE)
+        # 移除多行注释
+        code = re.sub(r'/\*.*?\*/', '', code, flags=re.DOTALL)
+        return code
+    
+    def _check_brackets(self, code: str) -> List[SyntaxError]:
+        """检查括号匹配"""
+        errors = []
+        stack = []  # [(bracket_char, line_num, col)]
+        
+        lines = code.split('\n')
+        in_string = False
+        string_char = None
+        
+        for line_num, line in enumerate(lines, 1):
+            col = 0
+            i = 0
+            while i < len(line):
+                char = line[i]
+                
+                # 处理字符串
+                if char in '"\'':
+                    if not in_string:
+                        in_string = True
+                        string_char = char
+                    elif char == string_char and (i == 0 or line[i-1] != '\\'):
+                        in_string = False
+                    i += 1
+                    col += 1
+                    continue
+                
+                if in_string:
+                    i += 1
+                    col += 1
+                    continue
+                
+                # 检查括号
+                if char in self.opening_brackets:
+                    stack.append((char, line_num, col))
+                elif char in self.closing_brackets:
+                    if not stack:
+                        errors.append(SyntaxError(
+                            level=ErrorLevel.ERROR,
+                            message=f"多余的闭括号 '{char}'",
+                            line=line_num,
+                            column=col,
+                            context=line.strip()
+                        ))
+                    else:
+                        open_bracket, open_line, open_col = stack.pop()
+                        expected_close = self.bracket_pairs[open_bracket]
+                        if char != expected_close:
+                            errors.append(SyntaxError(
+                                level=ErrorLevel.ERROR,
+                                message=f"括号不匹配:期望 '{expected_close}',实际 '{char}'(对应第 {open_line} 行的 '{open_bracket}')",
+                                line=line_num,
+                                column=col,
+                                context=line.strip()
+                            ))
+                
+                i += 1
+                col += 1
+        
+        # 检查未闭合的括号
+        for open_bracket, open_line, open_col in stack:
+            expected_close = self.bracket_pairs[open_bracket]
+            errors.append(SyntaxError(
+                level=ErrorLevel.ERROR,
+                message=f"未闭合的括号 '{open_bracket}',缺少 '{expected_close}'",
+                line=open_line,
+                column=open_col,
+                context=lines[open_line - 1].strip() if open_line <= len(lines) else ""
+            ))
+        
+        return errors
+    
+    def _check_quotes(self, code: str) -> List[SyntaxError]:
+        """检查引号匹配"""
+        errors = []
+        lines = code.split('\n')
+        
+        for line_num, line in enumerate(lines, 1):
+            # 跳过预处理指令
+            stripped = line.strip()
+            if stripped.startswith('#'):
+                continue
+            
+            # 简单检查:每行的引号应该成对
+            in_string = False
+            string_char = None
+            string_start = 0
+            
+            i = 0
+            while i < len(line):
+                char = line[i]
+                
+                if char in '"\'':
+                    if not in_string:
+                        in_string = True
+                        string_char = char
+                        string_start = i
+                    elif char == string_char:
+                        # 检查是否是转义
+                        escape_count = 0
+                        j = i - 1
+                        while j >= 0 and line[j] == '\\':
+                            escape_count += 1
+                            j -= 1
+                        if escape_count % 2 == 0:
+                            in_string = False
+                
+                i += 1
+            
+            if in_string:
+                errors.append(SyntaxError(
+                    level=ErrorLevel.ERROR,
+                    message=f"未闭合的字符串(从列 {string_start} 开始)",
+                    line=line_num,
+                    column=string_start,
+                    context=line.strip()
+                ))
+        
+        return errors
+    
+    def _check_statements(self, code: str) -> List[SyntaxError]:
+        """检查语句完整性"""
+        warnings = []
+        lines = code.split('\n')
+        
+        # 需要以分号结尾的语句模式
+        statement_patterns = [
+            r'^\s*\w+\s+\w+\s*=',       # 变量声明赋值
+            r'^\s*\w+\s*=',              # 赋值语句
+            r'^\s*\w+\s*\([^)]*\)\s*$',  # 函数调用(没有分号的情况)
+            r'^\s*return\s+',            # return 语句
+            r'^\s*break\s*$',            # break
+            r'^\s*continue\s*$',         # continue
+        ]
+        
+        for line_num, line in enumerate(lines, 1):
+            stripped = line.strip()
+            
+            # 跳过空行、注释、预处理指令、控制结构
+            if not stripped or stripped.startswith('#'):
+                continue
+            if stripped.startswith('//') or stripped.startswith('/*'):
+                continue
+            if any(stripped.startswith(kw) for kw in ['if', 'else', 'while', 'for', 'switch', 'case', 'default', 'do']):
+                continue
+            if stripped.endswith('{') or stripped.endswith('}') or stripped == '{' or stripped == '}':
+                continue
+            
+            # 检查是否应该有分号但没有
+            for pattern in statement_patterns:
+                if re.match(pattern, stripped):
+                    if not stripped.endswith(';') and not stripped.endswith('{'):
+                        warnings.append(SyntaxError(
+                            level=ErrorLevel.WARNING,
+                            message="语句可能缺少分号",
+                            line=line_num,
+                            context=stripped
+                        ))
+                    break
+        
+        return warnings
+    
+    def _check_function_structure(self, code: str) -> List[SyntaxError]:
+        """检查函数结构完整性"""
+        errors = []
+        
+        # 检查是否有函数定义的基本结构
+        # 函数模式:返回类型 函数名(参数) { ... }
+        func_pattern = r'(?:[\w\s\*&<>,]+?)\s+(\w+)\s*\([^)]*\)\s*\{'
+        
+        matches = list(re.finditer(func_pattern, code))
+        
+        for match in matches:
+            func_name = match.group(1)
+            start_pos = match.end() - 1  # { 的位置
+            
+            # 检查函数体的花括号是否匹配
+            brace_count = 1
+            pos = start_pos + 1
+            
+            while pos < len(code) and brace_count > 0:
+                if code[pos] == '{':
+                    brace_count += 1
+                elif code[pos] == '}':
+                    brace_count -= 1
+                pos += 1
+            
+            if brace_count != 0:
+                line_num = code[:match.start()].count('\n') + 1
+                errors.append(SyntaxError(
+                    level=ErrorLevel.ERROR,
+                    message=f"函数 '{func_name}' 的花括号不匹配",
+                    line=line_num
+                ))
+        
+        return errors
+    
+    def _check_common_patterns(self, code: str) -> List[SyntaxError]:
+        """检查常见错误模式"""
+        warnings = []
+        lines = code.split('\n')
+        
+        for line_num, line in enumerate(lines, 1):
+            stripped = line.strip()
+            
+            # 检查 if/while/for 后面直接跟分号(可能是错误)
+            if re.match(r'^(if|while|for)\s*\([^)]+\)\s*;', stripped):
+                warnings.append(SyntaxError(
+                    level=ErrorLevel.WARNING,
+                    message="控制语句后直接跟分号,可能是错误",
+                    line=line_num,
+                    context=stripped
+                ))
+            
+            # 检查 = 和 == 的可能混淆(在 if/while 条件中)
+            if_match = re.match(r'^(if|while)\s*\((.+)\)', stripped)
+            if if_match:
+                condition = if_match.group(2)
+                # 简单检查:如果条件中有单个 = 且不是 == 或 != 或 <= 或 >=
+                if re.search(r'[^=!<>]=[^=]', condition):
+                    warnings.append(SyntaxError(
+                        level=ErrorLevel.WARNING,
+                        message="条件中使用了 '=',是否应该是 '=='?",
+                        line=line_num,
+                        context=stripped
+                    ))
+            
+            # 检查数组越界的明显模式(如 arr[sizeof(arr)])
+            if 'sizeof' in stripped and '[' in stripped:
+                if re.search(r'\[\s*sizeof\s*\(\s*\w+\s*\)\s*\]', stripped):
+                    warnings.append(SyntaxError(
+                        level=ErrorLevel.WARNING,
+                        message="可能的数组越界:使用 sizeof 作为索引",
+                        line=line_num,
+                        context=stripped
+                    ))
+        
+        return warnings
+    
+    def validate_fused_code(self, original_code: str, fused_code: str) -> ValidationResult:
+        """
+        验证融合后的代码
+        
+        比较原始代码和融合后代码的结构差异
+        """
+        # 首先验证融合后代码的基本语法
+        result = self.validate(fused_code)
+        
+        # 额外检查:确保融合没有破坏原始结构
+        orig_braces = original_code.count('{') 
+        fused_braces = fused_code.count('{')
+        
+        # 融合后的代码花括号数量应该相同或更多(插入的代码可能有新的块)
+        if fused_braces < orig_braces:
+            result.warnings.append(SyntaxError(
+                level=ErrorLevel.WARNING,
+                message=f"融合后代码的花括号数量减少(原始: {orig_braces}, 融合后: {fused_braces})",
+                line=0
+            ))
+        
+        return result
+
+
+def validate_code(code: str) -> ValidationResult:
+    """
+    验证代码语法结构的便捷函数
+    
+    Args:
+        code: C/C++ 代码
+        
+    Returns:
+        ValidationResult 对象
+    """
+    validator = SyntaxValidator()
+    return validator.validate(code)
+
+
+def validate_fused_code(original: str, fused: str) -> ValidationResult:
+    """
+    验证融合后代码的便捷函数
+    
+    Args:
+        original: 原始代码
+        fused: 融合后的代码
+        
+    Returns:
+        ValidationResult 对象
+    """
+    validator = SyntaxValidator()
+    return validator.validate_fused_code(original, fused)
+
+
+if __name__ == "__main__":
+    # 测试代码
+    test_code_valid = """
+    int test_function(int x) {
+        if (x > 0) {
+            return x * 2;
+        } else {
+            return -x;
+        }
+    }
+    """
+    
+    test_code_invalid = """
+    int broken_function(int x) {
+        if (x > 0) {
+            return x * 2;
+        // 缺少闭合括号
+    }
+    """
+    
+    test_code_warning = """
+    int warning_function(int x) {
+        if (x = 5) {  // 应该是 ==
+            return x;
+        }
+        int y = 10  // 缺少分号
+        return y;
+    }
+    """
+    
+    validator = SyntaxValidator()
+    
+    print("=" * 60)
+    print("测试 1: 有效代码")
+    print("=" * 60)
+    result = validator.validate(test_code_valid)
+    print(result.get_summary())
+    
+    print("\n" + "=" * 60)
+    print("测试 2: 无效代码(括号不匹配)")
+    print("=" * 60)
+    result = validator.validate(test_code_invalid)
+    print(result.get_summary())
+    for error in result.errors:
+        print(f"  {error}")
+    
+    print("\n" + "=" * 60)
+    print("测试 3: 有警告的代码")
+    print("=" * 60)
+    result = validator.validate(test_code_warning)
+    print(result.get_summary())
+    for warning in result.warnings:
+        print(f"  {warning}")
+

+ 411 - 0
src/verification_agent.py

@@ -0,0 +1,411 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+验证 Agent
+
+整合语法验证和语义审查,提供统一的代码验证接口。
+"""
+
+import os
+from typing import List, Dict, Optional
+from dataclasses import dataclass, field
+from enum import Enum
+
+from syntax_validator import SyntaxValidator, ValidationResult as SyntaxResult
+from semantic_reviewer import SemanticReviewer, ReviewResult, IssueLevel
+
+
+class VerificationStatus(Enum):
+    """验证状态"""
+    PASSED = "passed"           # 完全通过
+    PASSED_WITH_WARNINGS = "passed_with_warnings"  # 通过但有警告
+    FAILED = "failed"           # 验证失败
+    SKIPPED = "skipped"         # 跳过验证
+
+
+@dataclass
+class VerificationReport:
+    """验证报告"""
+    status: VerificationStatus
+    function_name: str
+    
+    # 语法验证结果
+    syntax_result: Optional[SyntaxResult] = None
+    
+    # 语义审查结果
+    semantic_result: Optional[ReviewResult] = None
+    
+    # 综合信息
+    error_messages: List[str] = field(default_factory=list)
+    warning_messages: List[str] = field(default_factory=list)
+    suggestions: List[str] = field(default_factory=list)
+    
+    def get_summary(self) -> str:
+        """获取验证摘要"""
+        status_icons = {
+            VerificationStatus.PASSED: "✅",
+            VerificationStatus.PASSED_WITH_WARNINGS: "⚠️",
+            VerificationStatus.FAILED: "❌",
+            VerificationStatus.SKIPPED: "⏭️"
+        }
+        icon = status_icons.get(self.status, "❓")
+        
+        error_count = len(self.error_messages)
+        warning_count = len(self.warning_messages)
+        
+        if self.status == VerificationStatus.PASSED:
+            return f"{icon} {self.function_name}: 验证通过"
+        elif self.status == VerificationStatus.PASSED_WITH_WARNINGS:
+            return f"{icon} {self.function_name}: 验证通过 ({warning_count} 个警告)"
+        elif self.status == VerificationStatus.FAILED:
+            return f"{icon} {self.function_name}: 验证失败 ({error_count} 个错误)"
+        else:
+            return f"{icon} {self.function_name}: 跳过验证"
+
+
+@dataclass
+class FullVerificationReport:
+    """完整验证报告(所有函数)"""
+    reports: Dict[str, VerificationReport] = field(default_factory=dict)
+    overall_status: VerificationStatus = VerificationStatus.PASSED
+    
+    @property
+    def passed_count(self) -> int:
+        return sum(1 for r in self.reports.values() 
+                   if r.status in [VerificationStatus.PASSED, VerificationStatus.PASSED_WITH_WARNINGS])
+    
+    @property
+    def failed_count(self) -> int:
+        return sum(1 for r in self.reports.values() if r.status == VerificationStatus.FAILED)
+    
+    def get_summary(self) -> str:
+        """获取整体验证摘要"""
+        total = len(self.reports)
+        if self.overall_status == VerificationStatus.PASSED:
+            return f"✅ 验证完成: {self.passed_count}/{total} 通过"
+        elif self.overall_status == VerificationStatus.PASSED_WITH_WARNINGS:
+            return f"⚠️ 验证完成: {self.passed_count}/{total} 通过 (有警告)"
+        else:
+            return f"❌ 验证失败: {self.failed_count}/{total} 失败"
+    
+    def get_detailed_report(self) -> str:
+        """获取详细报告"""
+        lines = ["=" * 60, "验证报告详情", "=" * 60, ""]
+        
+        for func_name, report in self.reports.items():
+            lines.append(report.get_summary())
+            
+            if report.error_messages:
+                lines.append("  错误:")
+                for err in report.error_messages:
+                    lines.append(f"    - {err}")
+            
+            if report.warning_messages:
+                lines.append("  警告:")
+                for warn in report.warning_messages[:5]:  # 最多显示5个
+                    lines.append(f"    - {warn}")
+                if len(report.warning_messages) > 5:
+                    lines.append(f"    ... 还有 {len(report.warning_messages) - 5} 个警告")
+            
+            if report.suggestions:
+                lines.append("  建议:")
+                for sug in report.suggestions[:3]:  # 最多显示3个
+                    lines.append(f"    - {sug}")
+            
+            lines.append("")
+        
+        lines.append("=" * 60)
+        lines.append(self.get_summary())
+        
+        return "\n".join(lines)
+
+
+class VerificationAgent:
+    """验证 Agent"""
+    
+    def __init__(
+        self,
+        enable_syntax: bool = True,
+        enable_semantic: bool = True,
+        api_key: str = None
+    ):
+        """
+        初始化验证 Agent
+        
+        Args:
+            enable_syntax: 是否启用语法验证
+            enable_semantic: 是否启用语义审查
+            api_key: LLM API 密钥(语义审查需要)
+        """
+        self.enable_syntax = enable_syntax
+        self.enable_semantic = enable_semantic
+        
+        # 初始化语法验证器
+        self.syntax_validator = SyntaxValidator() if enable_syntax else None
+        
+        # 初始化语义审查器
+        self.semantic_reviewer = None
+        if enable_semantic:
+            try:
+                self.semantic_reviewer = SemanticReviewer(api_key=api_key)
+            except ValueError as e:
+                print(f"Warning: 语义审查器初始化失败: {e}")
+                self.enable_semantic = False
+    
+    def verify_function(
+        self,
+        fused_code: str,
+        original_code: str = "",
+        inserted_code: str = "",
+        func_name: str = "unknown",
+        shared_state: Dict = None
+    ) -> VerificationReport:
+        """
+        验证单个函数
+        
+        Args:
+            fused_code: 融合后的代码
+            original_code: 原始代码(可选)
+            inserted_code: 插入的代码(可选)
+            func_name: 函数名
+            shared_state: 共享状态变量
+            
+        Returns:
+            VerificationReport 对象
+        """
+        report = VerificationReport(
+            status=VerificationStatus.PASSED,
+            function_name=func_name
+        )
+        
+        # 1. 语法验证
+        if self.enable_syntax and self.syntax_validator:
+            if original_code:
+                syntax_result = self.syntax_validator.validate_fused_code(original_code, fused_code)
+            else:
+                syntax_result = self.syntax_validator.validate(fused_code)
+            
+            report.syntax_result = syntax_result
+            
+            # 收集语法错误和警告
+            for error in syntax_result.errors:
+                report.error_messages.append(f"[语法] {error.message}")
+            
+            for warning in syntax_result.warnings:
+                report.warning_messages.append(f"[语法] {warning.message}")
+            
+            # 如果有语法错误,标记为失败
+            if not syntax_result.valid:
+                report.status = VerificationStatus.FAILED
+                return report  # 语法错误时跳过语义审查
+        
+        # 2. 语义审查
+        if self.enable_semantic and self.semantic_reviewer and inserted_code:
+            if original_code:
+                context = {"shared_state": shared_state} if shared_state else None
+                semantic_result = self.semantic_reviewer.review_fusion(
+                    original_code, fused_code, inserted_code, func_name, context
+                )
+            else:
+                semantic_result = self.semantic_reviewer.quick_check(fused_code, inserted_code)
+            
+            report.semantic_result = semantic_result
+            
+            # 收集语义问题
+            for issue in semantic_result.issues:
+                if issue.level == IssueLevel.CRITICAL:
+                    report.error_messages.append(f"[语义] {issue.description}")
+                elif issue.level == IssueLevel.MAJOR:
+                    report.error_messages.append(f"[语义] {issue.description}")
+                else:
+                    report.warning_messages.append(f"[语义] {issue.description}")
+                
+                if issue.suggestion:
+                    report.suggestions.append(issue.suggestion)
+            
+            # 添加 LLM 的建议
+            report.suggestions.extend(semantic_result.suggestions)
+            
+            # 如果语义审查未通过
+            if not semantic_result.valid:
+                report.status = VerificationStatus.FAILED
+                return report
+        
+        # 3. 确定最终状态
+        if report.error_messages:
+            report.status = VerificationStatus.FAILED
+        elif report.warning_messages:
+            report.status = VerificationStatus.PASSED_WITH_WARNINGS
+        else:
+            report.status = VerificationStatus.PASSED
+        
+        return report
+    
+    def verify_all(
+        self,
+        fused_code: Dict[str, str],
+        original_functions: Dict[str, str] = None,
+        inserted_slices: Dict[str, str] = None,
+        shared_state: Dict = None
+    ) -> FullVerificationReport:
+        """
+        验证所有融合后的函数
+        
+        Args:
+            fused_code: 融合后的代码 {函数名: 代码}
+            original_functions: 原始函数 {函数名: 代码}
+            inserted_slices: 插入的代码片段 {函数名: 代码}
+            shared_state: 共享状态变量
+            
+        Returns:
+            FullVerificationReport 对象
+        """
+        original_functions = original_functions or {}
+        inserted_slices = inserted_slices or {}
+        
+        full_report = FullVerificationReport()
+        
+        for func_name, fused in fused_code.items():
+            original = original_functions.get(func_name, "")
+            inserted = inserted_slices.get(func_name, "")
+            
+            report = self.verify_function(
+                fused_code=fused,
+                original_code=original,
+                inserted_code=inserted,
+                func_name=func_name,
+                shared_state=shared_state
+            )
+            
+            full_report.reports[func_name] = report
+        
+        # 确定整体状态
+        has_failed = any(r.status == VerificationStatus.FAILED for r in full_report.reports.values())
+        has_warnings = any(r.status == VerificationStatus.PASSED_WITH_WARNINGS for r in full_report.reports.values())
+        
+        if has_failed:
+            full_report.overall_status = VerificationStatus.FAILED
+        elif has_warnings:
+            full_report.overall_status = VerificationStatus.PASSED_WITH_WARNINGS
+        else:
+            full_report.overall_status = VerificationStatus.PASSED
+        
+        return full_report
+
+
+def verify_fusion(
+    fused_code: str,
+    original_code: str = "",
+    inserted_code: str = "",
+    func_name: str = "unknown",
+    enable_syntax: bool = True,
+    enable_semantic: bool = True
+) -> VerificationReport:
+    """
+    验证代码融合的便捷函数
+    
+    Args:
+        fused_code: 融合后的代码
+        original_code: 原始代码
+        inserted_code: 插入的代码
+        func_name: 函数名
+        enable_syntax: 是否启用语法验证
+        enable_semantic: 是否启用语义审查
+        
+    Returns:
+        VerificationReport 对象
+    """
+    agent = VerificationAgent(
+        enable_syntax=enable_syntax,
+        enable_semantic=enable_semantic
+    )
+    return agent.verify_function(
+        fused_code=fused_code,
+        original_code=original_code,
+        inserted_code=inserted_code,
+        func_name=func_name
+    )
+
+
+if __name__ == "__main__":
+    # 测试代码
+    original = """
+    void process_data(int x) {
+        int result = x * 2;
+        printf("Result: %d\\n", result);
+    }
+    """
+    
+    inserted = """
+    g_secret = 42;
+    g_key = g_secret ^ 0xFF;
+    """
+    
+    fused_valid = """
+    void process_data(int x) {
+        g_secret = 42;
+        g_key = g_secret ^ 0xFF;
+        int result = x * 2;
+        printf("Result: %d\\n", result);
+    }
+    """
+    
+    fused_invalid = """
+    void process_data(int x) {
+        g_secret = 42;
+        g_key = g_secret ^ 0xFF;
+        int result = x * 2;
+        printf("Result: %d\\n", result);
+    // 缺少闭合花括号
+    """
+    
+    # 测试验证 Agent
+    print("=" * 60)
+    print("验证 Agent 测试")
+    print("=" * 60)
+    
+    # 只启用语法验证(不需要 API)
+    agent = VerificationAgent(enable_syntax=True, enable_semantic=False)
+    
+    print("\n测试 1: 有效代码")
+    report = agent.verify_function(
+        fused_code=fused_valid,
+        original_code=original,
+        inserted_code=inserted,
+        func_name="process_data"
+    )
+    print(report.get_summary())
+    
+    print("\n测试 2: 无效代码(语法错误)")
+    report = agent.verify_function(
+        fused_code=fused_invalid,
+        original_code=original,
+        inserted_code=inserted,
+        func_name="process_data"
+    )
+    print(report.get_summary())
+    for err in report.error_messages:
+        print(f"  {err}")
+    
+    # 测试带语义审查(需要 API)
+    print("\n" + "=" * 60)
+    print("测试语义审查(需要 DASHSCOPE_API_KEY)")
+    print("=" * 60)
+    
+    try:
+        agent_full = VerificationAgent(enable_syntax=True, enable_semantic=True)
+        report = agent_full.verify_function(
+            fused_code=fused_valid,
+            original_code=original,
+            inserted_code=inserted,
+            func_name="process_data"
+        )
+        print(report.get_summary())
+        
+        if report.semantic_result:
+            print(f"  语义审查置信度: {report.semantic_result.confidence:.0%}")
+            print(f"  摘要: {report.semantic_result.summary}")
+            
+    except Exception as e:
+        print(f"语义审查跳过: {e}")
+

Деякі файли не було показано, через те що забагато файлів було змінено