2 vuotta sitten · 0cdcc95fb5
--- a/animatediff/utils/convert_from_ckpt.py
+++ b/animatediff/utils/convert_from_ckpt.py
@@ -48,16 +48,7 @@ from diffusers.schedulers import (
 
				     PNDMScheduler,
			
 
				     UnCLIPScheduler,
			
 
				 )
			
 
				-# from diffusers.utils import is_omegaconf_available, is_safetensors_available, logging
			
 
				 from diffusers.utils.import_utils import BACKENDS_MAPPING
			
 
				-# from diffusers.latent_diffusion.pipeline_latent_diffusion import LDMBertConfig, LDMBertModel
			
 
				-# from diffusers.pipelines.paint_by_example import PaintByExampleImageEncoder
			
 
				-# from diffusers.pipelines.pipeline_utils import DiffusionPipeline
			
 
				-# from .safety_checker import StableDiffusionSafetyChecker
			
 
				-# from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
			
 
				-
			
 
				-
			
 
				-# logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
			
 
				 
			
 
				 
			
 
				 def shave_segments(path, n_shave_prefix_segments=1):
			
@@ -724,8 +715,7 @@ def convert_ldm_bert_checkpoint(checkpoint, config):
 
				 
			
 
				 
			
 
				 def convert_ldm_clip_checkpoint(checkpoint):
			
 
				-    # text_model = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14")
			
 
				-    text_model = CLIPTextModel.from_pretrained("/mnt/petrelfs/guoyuwei/projects/huggingface/clip-vit-large-patch14")
			
 
				+    text_model = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14")
			
 
				     keys = list(checkpoint.keys())
			
 
				 
			
 
				     text_model_dict = {}
			
@@ -968,415 +958,3 @@ def convert_controlnet_checkpoint(
 
				     controlnet_model.load_state_dict(converted_ctrl_checkpoint)
			
 
				 
			
 
				     return controlnet_model
			
 
				-
			
 
				-
			
 
				-# def download_from_original_stable_diffusion_ckpt(
			
 
				-#     checkpoint_path: str,
			
 
				-#     original_config_file: str = None,
			
 
				-#     image_size: int = 512,
			
 
				-#     prediction_type: str = None,
			
 
				-#     model_type: str = None,
			
 
				-#     extract_ema: bool = False,
			
 
				-#     scheduler_type: str = "pndm",
			
 
				-#     num_in_channels: Optional[int] = None,
			
 
				-#     upcast_attention: Optional[bool] = None,
			
 
				-#     device: str = None,
			
 
				-#     from_safetensors: bool = False,
			
 
				-#     stable_unclip: Optional[str] = None,
			
 
				-#     stable_unclip_prior: Optional[str] = None,
			
 
				-#     clip_stats_path: Optional[str] = None,
			
 
				-#     controlnet: Optional[bool] = None,
			
 
				-#     load_safety_checker: bool = True,
			
 
				-#     pipeline_class: DiffusionPipeline = None,
			
 
				-# ) -> DiffusionPipeline:
			
 
				-#     """
			
 
				-#     Load a Stable Diffusion pipeline object from a CompVis-style `.ckpt`/`.safetensors` file and (ideally) a `.yaml`
			
 
				-#     config file.
			
 
				-
			
 
				-#     Although many of the arguments can be automatically inferred, some of these rely on brittle checks against the
			
 
				-#     global step count, which will likely fail for models that have undergone further fine-tuning. Therefore, it is
			
 
				-#     recommended that you override the default values and/or supply an `original_config_file` wherever possible.
			
 
				-
			
 
				-#     Args:
			
 
				-#         checkpoint_path (`str`): Path to `.ckpt` file.
			
 
				-#         original_config_file (`str`):
			
 
				-#             Path to `.yaml` config file corresponding to the original architecture. If `None`, will be automatically
			
 
				-#             inferred by looking for a key that only exists in SD2.0 models.
			
 
				-#         image_size (`int`, *optional*, defaults to 512):
			
 
				-#             The image size that the model was trained on. Use 512 for Stable Diffusion v1.X and Stable Diffusion v2
			
 
				-#             Base. Use 768 for Stable Diffusion v2.
			
 
				-#         prediction_type (`str`, *optional*):
			
 
				-#             The prediction type that the model was trained on. Use `'epsilon'` for Stable Diffusion v1.X and Stable
			
 
				-#             Diffusion v2 Base. Use `'v_prediction'` for Stable Diffusion v2.
			
 
				-#         num_in_channels (`int`, *optional*, defaults to None):
			
 
				-#             The number of input channels. If `None`, it will be automatically inferred.
			
 
				-#         scheduler_type (`str`, *optional*, defaults to 'pndm'):
			
 
				-#             Type of scheduler to use. Should be one of `["pndm", "lms", "heun", "euler", "euler-ancestral", "dpm",
			
 
				-#             "ddim"]`.
			
 
				-#         model_type (`str`, *optional*, defaults to `None`):
			
 
				-#             The pipeline type. `None` to automatically infer, or one of `["FrozenOpenCLIPEmbedder",
			
 
				-#             "FrozenCLIPEmbedder", "PaintByExample"]`.
			
 
				-#         is_img2img (`bool`, *optional*, defaults to `False`):
			
 
				-#             Whether the model should be loaded as an img2img pipeline.
			
 
				-#         extract_ema (`bool`, *optional*, defaults to `False`): Only relevant for
			
 
				-#             checkpoints that have both EMA and non-EMA weights. Whether to extract the EMA weights or not. Defaults to
			
 
				-#             `False`. Pass `True` to extract the EMA weights. EMA weights usually yield higher quality images for
			
 
				-#             inference. Non-EMA weights are usually better to continue fine-tuning.
			
 
				-#         upcast_attention (`bool`, *optional*, defaults to `None`):
			
 
				-#             Whether the attention computation should always be upcasted. This is necessary when running stable
			
 
				-#             diffusion 2.1.
			
 
				-#         device (`str`, *optional*, defaults to `None`):
			
 
				-#             The device to use. Pass `None` to determine automatically.
			
 
				-#         from_safetensors (`str`, *optional*, defaults to `False`):
			
 
				-#             If `checkpoint_path` is in `safetensors` format, load checkpoint with safetensors instead of PyTorch.
			
 
				-#         load_safety_checker (`bool`, *optional*, defaults to `True`):
			
 
				-#             Whether to load the safety checker or not. Defaults to `True`.
			
 
				-#         pipeline_class (`str`, *optional*, defaults to `None`):
			
 
				-#             The pipeline class to use. Pass `None` to determine automatically.
			
 
				-#         return: A StableDiffusionPipeline object representing the passed-in `.ckpt`/`.safetensors` file.
			
 
				-#     """
			
 
				-
			
 
				-#     # import pipelines here to avoid circular import error when using from_ckpt method
			
 
				-#     from diffusers import (
			
 
				-#         LDMTextToImagePipeline,
			
 
				-#         PaintByExamplePipeline,
			
 
				-#         StableDiffusionControlNetPipeline,
			
 
				-#         StableDiffusionPipeline,
			
 
				-#         StableUnCLIPImg2ImgPipeline,
			
 
				-#         StableUnCLIPPipeline,
			
 
				-#     )
			
 
				-
			
 
				-#     if pipeline_class is None:
			
 
				-#         pipeline_class = StableDiffusionPipeline
			
 
				-
			
 
				-#     if prediction_type == "v-prediction":
			
 
				-#         prediction_type = "v_prediction"
			
 
				-
			
 
				-#     if not is_omegaconf_available():
			
 
				-#         raise ValueError(BACKENDS_MAPPING["omegaconf"][1])
			
 
				-
			
 
				-#     from omegaconf import OmegaConf
			
 
				-
			
 
				-#     if from_safetensors:
			
 
				-#         if not is_safetensors_available():
			
 
				-#             raise ValueError(BACKENDS_MAPPING["safetensors"][1])
			
 
				-
			
 
				-#         from safetensors import safe_open
			
 
				-
			
 
				-#         checkpoint = {}
			
 
				-#         with safe_open(checkpoint_path, framework="pt", device="cpu") as f:
			
 
				-#             for key in f.keys():
			
 
				-#                 checkpoint[key] = f.get_tensor(key)
			
 
				-#     else:
			
 
				-#         if device is None:
			
 
				-#             device = "cuda" if torch.cuda.is_available() else "cpu"
			
 
				-#             checkpoint = torch.load(checkpoint_path, map_location=device)
			
 
				-#         else:
			
 
				-#             checkpoint = torch.load(checkpoint_path, map_location=device)
			
 
				-
			
 
				-#     # Sometimes models don't have the global_step item
			
 
				-#     if "global_step" in checkpoint:
			
 
				-#         global_step = checkpoint["global_step"]
			
 
				-#     else:
			
 
				-#         print("global_step key not found in model")
			
 
				-#         global_step = None
			
 
				-
			
 
				-#     # NOTE: this while loop isn't great but this controlnet checkpoint has one additional
			
 
				-#     # "state_dict" key https://huggingface.co/thibaud/controlnet-canny-sd21
			
 
				-#     while "state_dict" in checkpoint:
			
 
				-#         checkpoint = checkpoint["state_dict"]
			
 
				-
			
 
				-#     if original_config_file is None:
			
 
				-#         key_name = "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight"
			
 
				-
			
 
				-#         # model_type = "v1"
			
 
				-#         config_url = "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/configs/stable-diffusion/v1-inference.yaml"
			
 
				-
			
 
				-#         if key_name in checkpoint and checkpoint[key_name].shape[-1] == 1024:
			
 
				-#             # model_type = "v2"
			
 
				-#             config_url = "https://raw.githubusercontent.com/Stability-AI/stablediffusion/main/configs/stable-diffusion/v2-inference-v.yaml"
			
 
				-
			
 
				-#             if global_step == 110000:
			
 
				-#                 # v2.1 needs to upcast attention
			
 
				-#                 upcast_attention = True
			
 
				-
			
 
				-#         original_config_file = BytesIO(requests.get(config_url).content)
			
 
				-
			
 
				-#     original_config = OmegaConf.load(original_config_file)
			
 
				-
			
 
				-#     if num_in_channels is not None:
			
 
				-#         original_config["model"]["params"]["unet_config"]["params"]["in_channels"] = num_in_channels
			
 
				-
			
 
				-#     if (
			
 
				-#         "parameterization" in original_config["model"]["params"]
			
 
				-#         and original_config["model"]["params"]["parameterization"] == "v"
			
 
				-#     ):
			
 
				-#         if prediction_type is None:
			
 
				-#             # NOTE: For stable diffusion 2 base it is recommended to pass `prediction_type=="epsilon"`
			
 
				-#             # as it relies on a brittle global step parameter here
			
 
				-#             prediction_type = "epsilon" if global_step == 875000 else "v_prediction"
			
 
				-#         if image_size is None:
			
 
				-#             # NOTE: For stable diffusion 2 base one has to pass `image_size==512`
			
 
				-#             # as it relies on a brittle global step parameter here
			
 
				-#             image_size = 512 if global_step == 875000 else 768
			
 
				-#     else:
			
 
				-#         if prediction_type is None:
			
 
				-#             prediction_type = "epsilon"
			
 
				-#         if image_size is None:
			
 
				-#             image_size = 512
			
 
				-
			
 
				-#     if controlnet is None:
			
 
				-#         controlnet = "control_stage_config" in original_config.model.params
			
 
				-
			
 
				-#     if controlnet:
			
 
				-#         controlnet_model = convert_controlnet_checkpoint(
			
 
				-#             checkpoint, original_config, checkpoint_path, image_size, upcast_attention, extract_ema
			
 
				-#         )
			
 
				-
			
 
				-#     num_train_timesteps = original_config.model.params.timesteps
			
 
				-#     beta_start = original_config.model.params.linear_start
			
 
				-#     beta_end = original_config.model.params.linear_end
			
 
				-
			
 
				-#     scheduler = DDIMScheduler(
			
 
				-#         beta_end=beta_end,
			
 
				-#         beta_schedule="scaled_linear",
			
 
				-#         beta_start=beta_start,
			
 
				-#         num_train_timesteps=num_train_timesteps,
			
 
				-#         steps_offset=1,
			
 
				-#         clip_sample=False,
			
 
				-#         set_alpha_to_one=False,
			
 
				-#         prediction_type=prediction_type,
			
 
				-#     )
			
 
				-#     # make sure scheduler works correctly with DDIM
			
 
				-#     scheduler.register_to_config(clip_sample=False)
			
 
				-
			
 
				-#     if scheduler_type == "pndm":
			
 
				-#         config = dict(scheduler.config)
			
 
				-#         config["skip_prk_steps"] = True
			
 
				-#         scheduler = PNDMScheduler.from_config(config)
			
 
				-#     elif scheduler_type == "lms":
			
 
				-#         scheduler = LMSDiscreteScheduler.from_config(scheduler.config)
			
 
				-#     elif scheduler_type == "heun":
			
 
				-#         scheduler = HeunDiscreteScheduler.from_config(scheduler.config)
			
 
				-#     elif scheduler_type == "euler":
			
 
				-#         scheduler = EulerDiscreteScheduler.from_config(scheduler.config)
			
 
				-#     elif scheduler_type == "euler-ancestral":
			
 
				-#         scheduler = EulerAncestralDiscreteScheduler.from_config(scheduler.config)
			
 
				-#     elif scheduler_type == "dpm":
			
 
				-#         scheduler = DPMSolverMultistepScheduler.from_config(scheduler.config)
			
 
				-#     elif scheduler_type == "ddim":
			
 
				-#         scheduler = scheduler
			
 
				-#     else:
			
 
				-#         raise ValueError(f"Scheduler of type {scheduler_type} doesn't exist!")
			
 
				-
			
 
				-#     # Convert the UNet2DConditionModel model.
			
 
				-#     unet_config = create_unet_diffusers_config(original_config, image_size=image_size)
			
 
				-#     unet_config["upcast_attention"] = upcast_attention
			
 
				-#     unet = UNet2DConditionModel(**unet_config)
			
 
				-
			
 
				-#     converted_unet_checkpoint = convert_ldm_unet_checkpoint(
			
 
				-#         checkpoint, unet_config, path=checkpoint_path, extract_ema=extract_ema
			
 
				-#     )
			
 
				-
			
 
				-#     unet.load_state_dict(converted_unet_checkpoint)
			
 
				-
			
 
				-#     # Convert the VAE model.
			
 
				-#     vae_config = create_vae_diffusers_config(original_config, image_size=image_size)
			
 
				-#     converted_vae_checkpoint = convert_ldm_vae_checkpoint(checkpoint, vae_config)
			
 
				-
			
 
				-#     vae = AutoencoderKL(**vae_config)
			
 
				-#     vae.load_state_dict(converted_vae_checkpoint)
			
 
				-
			
 
				-#     # Convert the text model.
			
 
				-#     if model_type is None:
			
 
				-#         model_type = original_config.model.params.cond_stage_config.target.split(".")[-1]
			
 
				-#         logger.debug(f"no `model_type` given, `model_type` inferred as: {model_type}")
			
 
				-
			
 
				-#     if model_type == "FrozenOpenCLIPEmbedder":
			
 
				-#         text_model = convert_open_clip_checkpoint(checkpoint)
			
 
				-#         tokenizer = CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2", subfolder="tokenizer")
			
 
				-
			
 
				-#         if stable_unclip is None:
			
 
				-#             if controlnet:
			
 
				-#                 pipe = StableDiffusionControlNetPipeline(
			
 
				-#                     vae=vae,
			
 
				-#                     text_encoder=text_model,
			
 
				-#                     tokenizer=tokenizer,
			
 
				-#                     unet=unet,
			
 
				-#                     scheduler=scheduler,
			
 
				-#                     controlnet=controlnet_model,
			
 
				-#                     safety_checker=None,
			
 
				-#                     feature_extractor=None,
			
 
				-#                     requires_safety_checker=False,
			
 
				-#                 )
			
 
				-#             else:
			
 
				-#                 pipe = pipeline_class(
			
 
				-#                     vae=vae,
			
 
				-#                     text_encoder=text_model,
			
 
				-#                     tokenizer=tokenizer,
			
 
				-#                     unet=unet,
			
 
				-#                     scheduler=scheduler,
			
 
				-#                     safety_checker=None,
			
 
				-#                     feature_extractor=None,
			
 
				-#                     requires_safety_checker=False,
			
 
				-#                 )
			
 
				-#         else:
			
 
				-#             image_normalizer, image_noising_scheduler = stable_unclip_image_noising_components(
			
 
				-#                 original_config, clip_stats_path=clip_stats_path, device=device
			
 
				-#             )
			
 
				-
			
 
				-#             if stable_unclip == "img2img":
			
 
				-#                 feature_extractor, image_encoder = stable_unclip_image_encoder(original_config)
			
 
				-
			
 
				-#                 pipe = StableUnCLIPImg2ImgPipeline(
			
 
				-#                     # image encoding components
			
 
				-#                     feature_extractor=feature_extractor,
			
 
				-#                     image_encoder=image_encoder,
			
 
				-#                     # image noising components
			
 
				-#                     image_normalizer=image_normalizer,
			
 
				-#                     image_noising_scheduler=image_noising_scheduler,
			
 
				-#                     # regular denoising components
			
 
				-#                     tokenizer=tokenizer,
			
 
				-#                     text_encoder=text_model,
			
 
				-#                     unet=unet,
			
 
				-#                     scheduler=scheduler,
			
 
				-#                     # vae
			
 
				-#                     vae=vae,
			
 
				-#                 )
			
 
				-#             elif stable_unclip == "txt2img":
			
 
				-#                 if stable_unclip_prior is None or stable_unclip_prior == "karlo":
			
 
				-#                     karlo_model = "kakaobrain/karlo-v1-alpha"
			
 
				-#                     prior = PriorTransformer.from_pretrained(karlo_model, subfolder="prior")
			
 
				-
			
 
				-#                     prior_tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
			
 
				-#                     prior_text_model = CLIPTextModelWithProjection.from_pretrained("openai/clip-vit-large-patch14")
			
 
				-
			
 
				-#                     prior_scheduler = UnCLIPScheduler.from_pretrained(karlo_model, subfolder="prior_scheduler")
			
 
				-#                     prior_scheduler = DDPMScheduler.from_config(prior_scheduler.config)
			
 
				-#                 else:
			
 
				-#                     raise NotImplementedError(f"unknown prior for stable unclip model: {stable_unclip_prior}")
			
 
				-
			
 
				-#                 pipe = StableUnCLIPPipeline(
			
 
				-#                     # prior components
			
 
				-#                     prior_tokenizer=prior_tokenizer,
			
 
				-#                     prior_text_encoder=prior_text_model,
			
 
				-#                     prior=prior,
			
 
				-#                     prior_scheduler=prior_scheduler,
			
 
				-#                     # image noising components
			
 
				-#                     image_normalizer=image_normalizer,
			
 
				-#                     image_noising_scheduler=image_noising_scheduler,
			
 
				-#                     # regular denoising components
			
 
				-#                     tokenizer=tokenizer,
			
 
				-#                     text_encoder=text_model,
			
 
				-#                     unet=unet,
			
 
				-#                     scheduler=scheduler,
			
 
				-#                     # vae
			
 
				-#                     vae=vae,
			
 
				-#                 )
			
 
				-#             else:
			
 
				-#                 raise NotImplementedError(f"unknown `stable_unclip` type: {stable_unclip}")
			
 
				-#     elif model_type == "PaintByExample":
			
 
				-#         vision_model = convert_paint_by_example_checkpoint(checkpoint)
			
 
				-#         tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
			
 
				-#         feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker")
			
 
				-#         pipe = PaintByExamplePipeline(
			
 
				-#             vae=vae,
			
 
				-#             image_encoder=vision_model,
			
 
				-#             unet=unet,
			
 
				-#             scheduler=scheduler,
			
 
				-#             safety_checker=None,
			
 
				-#             feature_extractor=feature_extractor,
			
 
				-#         )
			
 
				-#     elif model_type == "FrozenCLIPEmbedder":
			
 
				-#         text_model = convert_ldm_clip_checkpoint(checkpoint)
			
 
				-#         # tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
			
 
				-#         tokenizer = CLIPTokenizer.from_pretrained("/mnt/petrelfs/guoyuwei/projects/huggingface/clip-vit-large-patch14")
			
 
				-
			
 
				-#         # if load_safety_checker:
			
 
				-#         if False:
			
 
				-#             safety_checker = StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker")
			
 
				-#             feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker")
			
 
				-#         else:
			
 
				-#             safety_checker = None
			
 
				-#             feature_extractor = None
			
 
				-
			
 
				-#         if controlnet:
			
 
				-#             pipe = StableDiffusionControlNetPipeline(
			
 
				-#                 vae=vae,
			
 
				-#                 text_encoder=text_model,
			
 
				-#                 tokenizer=tokenizer,
			
 
				-#                 unet=unet,
			
 
				-#                 controlnet=controlnet_model,
			
 
				-#                 scheduler=scheduler,
			
 
				-#                 safety_checker=safety_checker,
			
 
				-#                 feature_extractor=feature_extractor,
			
 
				-#             )
			
 
				-#         else:
			
 
				-#             pipe = pipeline_class(
			
 
				-#                 vae=vae,
			
 
				-#                 text_encoder=text_model,
			
 
				-#                 tokenizer=tokenizer,
			
 
				-#                 unet=unet,
			
 
				-#                 scheduler=scheduler,
			
 
				-#                 safety_checker=safety_checker,
			
 
				-#                 feature_extractor=feature_extractor,
			
 
				-#             )
			
 
				-#     else:
			
 
				-#         text_config = create_ldm_bert_config(original_config)
			
 
				-#         text_model = convert_ldm_bert_checkpoint(checkpoint, text_config)
			
 
				-#         tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")
			
 
				-#         pipe = LDMTextToImagePipeline(vqvae=vae, bert=text_model, tokenizer=tokenizer, unet=unet, scheduler=scheduler)
			
 
				-
			
 
				-#     return pipe
			
 
				-
			
 
				-
			
 
				-# def download_controlnet_from_original_ckpt(
			
 
				-#     checkpoint_path: str,
			
 
				-#     original_config_file: str,
			
 
				-#     image_size: int = 512,
			
 
				-#     extract_ema: bool = False,
			
 
				-#     num_in_channels: Optional[int] = None,
			
 
				-#     upcast_attention: Optional[bool] = None,
			
 
				-#     device: str = None,
			
 
				-#     from_safetensors: bool = False,
			
 
				-# ) -> DiffusionPipeline:
			
 
				-#     if not is_omegaconf_available():
			
 
				-#         raise ValueError(BACKENDS_MAPPING["omegaconf"][1])
			
 
				-
			
 
				-#     from omegaconf import OmegaConf
			
 
				-
			
 
				-#     if from_safetensors:
			
 
				-#         if not is_safetensors_available():
			
 
				-#             raise ValueError(BACKENDS_MAPPING["safetensors"][1])
			
 
				-
			
 
				-#         from safetensors import safe_open
			
 
				-
			
 
				-#         checkpoint = {}
			
 
				-#         with safe_open(checkpoint_path, framework="pt", device="cpu") as f:
			
 
				-#             for key in f.keys():
			
 
				-#                 checkpoint[key] = f.get_tensor(key)
			
 
				-#     else:
			
 
				-#         if device is None:
			
 
				-#             device = "cuda" if torch.cuda.is_available() else "cpu"
			
 
				-#             checkpoint = torch.load(checkpoint_path, map_location=device)
			
 
				-#         else:
			
 
				-#             checkpoint = torch.load(checkpoint_path, map_location=device)
			
 
				-
			
 
				-#     # NOTE: this while loop isn't great but this controlnet checkpoint has one additional
			
 
				-#     # "state_dict" key https://huggingface.co/thibaud/controlnet-canny-sd21
			
 
				-#     while "state_dict" in checkpoint:
			
 
				-#         checkpoint = checkpoint["state_dict"]
			
 
				-
			
 
				-#     original_config = OmegaConf.load(original_config_file)
			
 
				-
			
 
				-#     if num_in_channels is not None:
			
 
				-#         original_config["model"]["params"]["unet_config"]["params"]["in_channels"] = num_in_channels
			
 
				-
			
 
				-#     if "control_stage_config" not in original_config.model.params:
			
 
				-#         raise ValueError("`control_stage_config` not present in original config")
			
 
				-
			
 
				-#     controlnet_model = convert_controlnet_checkpoint(
			
 
				-#         checkpoint, original_config, checkpoint_path, image_size, upcast_attention, extract_ema
			
 
				-#     )
			
 
				-
			
 
				-#     return controlnet_model
			
--- a/animatediff/utils/convert_lora_safetensor_to_diffusers.py
+++ b/animatediff/utils/convert_lora_safetensor_to_diffusers.py
@@ -76,14 +76,10 @@ def convert_lora(pipeline, state_dict, LORA_PREFIX_UNET="lora_unet", LORA_PREFIX
 
				             weight_up = state_dict[pair_keys[0]].squeeze(3).squeeze(2).to(torch.float32)
			
 
				             weight_down = state_dict[pair_keys[1]].squeeze(3).squeeze(2).to(torch.float32)
			
 
				             curr_layer.weight.data += alpha * torch.mm(weight_up, weight_down).unsqueeze(2).unsqueeze(3).to(curr_layer.weight.data.device)
			
 
				-            # lora_dim = weight_up.shape[1]
			
 
				-            # curr_layer.weight.data += (1/lora_dim) * alpha * torch.mm(weight_up, weight_down).unsqueeze(2).unsqueeze(3).to(curr_layer.weight.data.device)
			
 
				         else:
			
 
				             weight_up = state_dict[pair_keys[0]].to(torch.float32)
			
 
				             weight_down = state_dict[pair_keys[1]].to(torch.float32)
			
 
				             curr_layer.weight.data += alpha * torch.mm(weight_up, weight_down).to(curr_layer.weight.data.device)
			
 
				-            # lora_dim = weight_up.shape[1]
			
 
				-            # curr_layer.weight.data += (1/lora_dim) * alpha * torch.mm(weight_up, weight_down).to(curr_layer.weight.data.device)
			
 
				 
			
 
				         # update visited list
			
 
				         for item in pair_keys: