2 년 전 · 1904a01117
--- a/configs/inference/inference-v1.yaml
+++ b/configs/inference/inference-v1.yaml
--- a/configs/inference/inference-v2.yaml
+++ b/configs/inference/inference-v2.yaml
@@ -0,0 +1,27 @@
 
				+unet_additional_kwargs:
			
 
				+  use_inflated_groupnorm: true
			
 
				+  unet_use_cross_frame_attention: false
			
 
				+  unet_use_temporal_attention: false
			
 
				+  use_motion_module: true
			
 
				+  motion_module_resolutions:
			
 
				+  - 1
			
 
				+  - 2
			
 
				+  - 4
			
 
				+  - 8
			
 
				+  motion_module_mid_block: true
			
 
				+  motion_module_decoder_only: false
			
 
				+  motion_module_type: Vanilla
			
 
				+  motion_module_kwargs:
			
 
				+    num_attention_heads: 8
			
 
				+    num_transformer_block: 1
			
 
				+    attention_block_types:
			
 
				+    - Temporal_Self
			
 
				+    - Temporal_Self
			
 
				+    temporal_position_encoding: true
			
 
				+    temporal_position_encoding_max_len: 32
			
 
				+    temporal_attention_dim_div: 1
			
 
				+
			
 
				+noise_scheduler_kwargs:
			
 
				+  beta_start: 0.00085
			
 
				+  beta_end: 0.012
			
 
				+  beta_schedule: "linear"
			
--- a/configs/prompts/v2/5-RealisticVision.yaml
+++ b/configs/prompts/v2/5-RealisticVision.yaml
@@ -0,0 +1,23 @@
 
				+RealisticVision:
			
 
				+  base: ""
			
 
				+  path: "models/DreamBooth_LoRA/realisticVisionV20_v20.safetensors"
			
 
				+
			
 
				+  inference_config: "configs/inference/inference-v2.yaml"
			
 
				+  motion_module:
			
 
				+    - "models/Motion_Module/mm_sd_v15_v2.ckpt"
			
 
				+
			
 
				+  seed:           [13100322578370451493, 14752961627088720670, 9329399085567825781, 16987697414827649302]
			
 
				+  steps:          25
			
 
				+  guidance_scale: 7.5
			
 
				+
			
 
				+  prompt:
			
 
				+    - "b&w photo of 42 y.o man in black clothes, bald, face, half body, body, high detailed skin, skin pores, coastline, overcast weather, wind, waves, 8k uhd, dslr, soft lighting, high quality, film grain, Fujifilm XT3"
			
 
				+    - "close up photo of a rabbit, forest, haze, halation, bloom, dramatic atmosphere, centred, rule of thirds, 200mm 1.4f macro shot"
			
 
				+    - "photo of coastline, rocks, storm weather, wind, waves, lightning, 8k uhd, dslr, soft lighting, high quality, film grain, Fujifilm XT3"
			
 
				+    - "night, b&w photo of old house, post apocalypse, forest, storm weather, wind, rocks, 8k uhd, dslr, soft lighting, high quality, film grain"
			
 
				+
			
 
				+  n_prompt:
			
 
				+    - "semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, text, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck"
			
 
				+    - "semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, text, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck"
			
 
				+    - "blur, haze, deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers, deformed, distorted, disfigured, poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation"
			
 
				+    - "blur, haze, deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, art, mutated hands and fingers, deformed, distorted, disfigured, poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation"
			
--- a/scripts/animate.py
+++ b/scripts/animate.py
@@ -34,7 +34,6 @@ def main(args):
 
				     time_str = datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
			
 
				     savedir = f"samples/{Path(args.config).stem}-{time_str}"
			
 
				     os.makedirs(savedir)
			
 
				-    inference_config = OmegaConf.load(args.inference_config)
			
 
				 
			
 
				     config  = OmegaConf.load(args.config)
			
 
				     samples = []
			
@@ -45,7 +44,8 @@ def main(args):
 
				         motion_modules = model_config.motion_module
			
 
				         motion_modules = [motion_modules] if isinstance(motion_modules, str) else list(motion_modules)
			
 
				         for motion_module in motion_modules:
			
 
				-        
			
 
				+            inference_config = OmegaConf.load(model_config.get("inference_config", args.inference_config))
			
 
				+
			
 
				             ### >>> create validation pipeline >>> ###
			
 
				             tokenizer    = CLIPTokenizer.from_pretrained(args.pretrained_model_path, subfolder="tokenizer")
			
 
				             text_encoder = CLIPTextModel.from_pretrained(args.pretrained_model_path, subfolder="text_encoder")
			
@@ -148,7 +148,7 @@ def main(args):
 
				 if __name__ == "__main__":
			
 
				     parser = argparse.ArgumentParser()
			
 
				     parser.add_argument("--pretrained_model_path", type=str, default="models/StableDiffusion/stable-diffusion-v1-5",)
			
 
				-    parser.add_argument("--inference_config",      type=str, default="configs/inference/inference.yaml")    
			
 
				+    parser.add_argument("--inference_config",      type=str, default="configs/inference/inference-v1.yaml")    
			
 
				     parser.add_argument("--config",                type=str, required=True)
			
 
				     
			
 
				     parser.add_argument("--L", type=int, default=16 )