Spaces:
Running
Running
| """ | |
| 在首次启动自动生成config.yaml文件后,对配置进行修改时,应该直接在config.yaml文件中进行,而不是在config.py文件中修改。 | |
| 初回の起動後にconfig.yamlが自動生成された場合、設定の変更はconfig.pyではなくconfig.yamlで行うべきです。 | |
| After the initial launch that automatically generates the config.yaml file, any modifications to the configuration should be made directly in the config.yaml file, not in the config.py file. | |
| """ | |
| import copy | |
| import logging | |
| import os | |
| import secrets | |
| import shutil | |
| import string | |
| import sys | |
| import traceback | |
| from dataclasses import dataclass, field, asdict, fields, is_dataclass | |
| from typing import List, Union, Optional, Dict | |
| import torch | |
| import yaml | |
| JSON_AS_ASCII = False | |
| MAX_CONTENT_LENGTH = 5242880 | |
| # Absolute path of vits-simple-api | |
| ABS_PATH = os.path.dirname(os.path.realpath(__file__)) | |
| # WTForms CSRF | |
| SECRET_KEY = secrets.token_hex(16) | |
| """ | |
| 模型存放在data/models文件夹下,每个文件夹包含一个模型文件和一个配置文件。请按照以下格式填写路径信息: | |
| {"model_path": "文件夹名/模型文件.pth", "config_path": "文件夹名/config.json"}, | |
| 注意:只有当auto_load(自动加载模型)为False时才有效。当auto_load为True(默认值)时, | |
| 此处填写的模型路径具有最高优先级,将在每次启动时加载。如非必要,请尽量在config.yaml填写模型路径。 | |
| Models are stored in the data/models folder, with each folder containing a model file and a configuration file. | |
| Please fill in the paths following the format: {"model_path": "folder_name/model_file.pth", "config_path": "folder_name/config.json"}, | |
| Note: This is effective only when auto_load (automatic model loading) is set to False. When auto_load is True (default), | |
| the model paths specified here have the highest priority and will be loaded each time the program starts. If not necessary, | |
| it's recommended to specify model paths in config.yaml. | |
| """ | |
| model_list = [ | |
| # {"model_path": "model_name/G_9000.pth", "config_path": "model_name/config.json"}, | |
| ] | |
| class AsDictMixin: | |
| def asdict(self): | |
| data = {} | |
| for attr, value in vars(self).items(): | |
| if isinstance(value, AsDictMixin): | |
| data[attr] = value.asdict() | |
| elif isinstance(value, list): | |
| data[attr] = [] | |
| for item in value: | |
| data[attr].append(item.asdict()) | |
| elif isinstance(value, dict): | |
| data[attr] = {} | |
| for k, v in value.items(): | |
| data[attr].update({k: v.asdict()}) | |
| else: | |
| data[attr] = value | |
| return data | |
| def __iter__(self): | |
| for key, value in self.asdict().items(): | |
| yield key, value | |
| def update_config(self, new_config_dict): | |
| for field in fields(self): | |
| field_name = field.name | |
| field_type = field.type | |
| if field_name in new_config_dict: | |
| new_value = new_config_dict[field_name] | |
| if is_dataclass(field_type): | |
| if isinstance(new_value, list): | |
| # If the field type is a dataclass and the new value is a list | |
| # Convert each element of the list to the corresponding class object | |
| new_value = [field_type(**item) for item in new_value] | |
| setattr(self, field_name, new_value) | |
| else: | |
| # If the field type is a dataclass but not a list, recursively update the dataclass | |
| nested_config = getattr(self, field_name) | |
| nested_config.update_config(new_value) | |
| setattr(self, field_name, nested_config) | |
| else: | |
| if field_type == bool: | |
| new_value = str(new_value).lower() == "true" | |
| elif field_type == int: | |
| new_value = int(new_value) | |
| elif field_type == float: | |
| new_value = float(new_value) | |
| elif field_type == str: | |
| new_value = str(new_value) | |
| elif field_type == torch.device: | |
| new_value = torch.device(new_value) | |
| setattr(self, field_name, new_value) | |
| class VitsConfig(AsDictMixin): | |
| # For VITS: Load models during inference, dynamically release models after inference. | |
| dynamic_loading: bool = False | |
| id: int = 0 | |
| format: str = "wav" | |
| lang: str = "auto" | |
| length: float = 1 | |
| noise: float = 0.33 | |
| noisew: float = 0.4 | |
| # Batch processing threshold. Text will not be processed in batches if segment_size<=0 | |
| segment_size: int = 50 | |
| use_streaming: bool = False | |
| class W2V2VitsConfig(AsDictMixin): | |
| id: int = 0 | |
| format: str = "wav" | |
| lang: str = "auto" | |
| length: float = 1 | |
| noise: float = 0.33 | |
| noisew: float = 0.4 | |
| # Batch processing threshold. Text will not be processed in batches if segment_size<=0 | |
| segment_size: int = 50 | |
| emotion: int = 0 | |
| class HuBertVitsConfig(AsDictMixin): | |
| id: int = 0 | |
| format: str = "wav" | |
| length: float = 1 | |
| noise: float = 0.33 | |
| noisew: float = 0.4 | |
| class BertVits2Config(AsDictMixin): | |
| id: int = 0 | |
| format: str = "wav" | |
| lang: str = "auto" | |
| length: float = 1 | |
| noise: float = 0.33 | |
| noisew: float = 0.4 | |
| # Batch processing threshold. Text will not be processed in batches if segment_size<=0 | |
| segment_size: int = 50 | |
| sdp_ratio: float = 0.2 | |
| emotion: int = 0 | |
| text_prompt: str = "Happy" | |
| style_text: str = None | |
| style_weight: float = 0.7 | |
| use_streaming: bool = False | |
| # Can be set to "float16"/"fp16". | |
| torch_data_type: str = "" | |
| class GPTSoVitsPreset(AsDictMixin): | |
| refer_wav_path: str = None | |
| prompt_text: str = None | |
| prompt_lang: str = "auto" | |
| class GPTSoVitsConfig(AsDictMixin): | |
| hz: int = 50 | |
| is_half: bool = False | |
| id: int = 0 | |
| lang: str = "auto" | |
| format: str = "wav" | |
| segment_size: int = 30 | |
| top_k: int = 5 | |
| top_p: float = 1.0 | |
| temperature: float = 1.0 | |
| use_streaming: bool = False | |
| batch_size: int = 5 | |
| speed: float = 1.0 | |
| presets: Dict[str, GPTSoVitsPreset] = field(default_factory=lambda: {"default": GPTSoVitsPreset(), | |
| "default2": GPTSoVitsPreset()}) | |
| def update_config(self, new_config_dict): | |
| for field in fields(self): | |
| field_name = field.name | |
| field_type = field.type | |
| if field_name in new_config_dict: | |
| new_value = new_config_dict[field_name] | |
| if is_dataclass(field_type): | |
| if isinstance(new_value, list): | |
| # If the field type is a dataclass and the new value is a list | |
| # Convert each element of the list to the corresponding class object | |
| new_value = [field_type(**item) for item in new_value] | |
| setattr(self, field_name, new_value) | |
| else: | |
| # If the field type is a dataclass but not a list, recursively update the dataclass | |
| nested_config = getattr(self, field_name) | |
| nested_config.update_config(new_value) | |
| else: | |
| if field_type == Dict[str, GPTSoVitsPreset]: | |
| new_dict = {} | |
| for k, v in new_value.items(): | |
| refer_wav_path = v.get("refer_wav_path") | |
| prompt_text = v.get("prompt_text") | |
| prompt_lang = v.get("prompt_lang") | |
| new_dict.update({k: GPTSoVitsPreset(refer_wav_path, prompt_text, prompt_lang)}) | |
| new_value = new_dict | |
| elif field_type == bool: | |
| new_value = str(new_value).lower() == "true" | |
| elif field_type == int: | |
| new_value = int(new_value) | |
| elif field_type == float: | |
| new_value = float(new_value) | |
| elif field_type == str: | |
| new_value = str(new_value) | |
| elif field_type == torch.device: | |
| new_value = torch.device(new_value) | |
| setattr(self, field_name, new_value) | |
| class Reader(AsDictMixin): | |
| model_type: str = "VITS" | |
| id: int = 0 | |
| preset: str = "default" | |
| class ReadingConfig(AsDictMixin): | |
| interlocutor: Reader = Reader() | |
| narrator: Reader = Reader() | |
| class ModelConfig(AsDictMixin): | |
| chinese_roberta_wwm_ext_large: str = "bert/chinese-roberta-wwm-ext-large" | |
| bert_base_japanese_v3: str = "bert/bert-base-japanese-v3" | |
| bert_large_japanese_v2: str = "bert/bert-large-japanese-v2" | |
| deberta_v2_large_japanese: str = "bert/deberta-v2-large-japanese" | |
| deberta_v3_large: str = "bert/deberta-v3-large" | |
| deberta_v2_large_japanese_char_wwm: str = "bert/deberta-v2-large-japanese-char-wwm" | |
| wav2vec2_large_robust_12_ft_emotion_msp_dim: str = "emotional/wav2vec2-large-robust-12-ft-emotion-msp-dim" | |
| clap_htsat_fused: str = "emotional/clap-htsat-fused" | |
| erlangshen_MegatronBert_1_3B_Chinese: str = "bert/Erlangshen-MegatronBert-1.3B-Chinese" | |
| vits_chinese_bert: str = "bert/vits_chinese_bert" | |
| # hubert-vits | |
| hubert_soft_0d54a1f4: str = "hubert/hubert_soft/hubert-soft-0d54a1f4.pt" | |
| # w2v2-vits: .npy file or folder are alvailable | |
| dimensional_emotion_npy: Union[str, List[str]] = "emotional/dimensional_emotion_npy" | |
| # w2v2-vits: Need to have both `models.onnx` and `models.yaml` files in the same path. | |
| dimensional_emotion_model: str = "emotional/dimensional_emotion_model/models.yaml" | |
| g2pw_model: str = "G2PWModel" | |
| chinese_hubert_base: str = "hubert/chinese_hubert_base" | |
| class TTSModelConfig(AsDictMixin): | |
| model_path: Optional[str] = None | |
| config_path: Optional[str] = None | |
| sovits_path: Optional[str] = None | |
| gpt_path: Optional[str] = None | |
| def asdict(self): | |
| data = {} | |
| for attr, value in vars(self).items(): | |
| if value is not None: | |
| data[attr] = value | |
| return data | |
| class TTSConfig(AsDictMixin): | |
| # Directory name for models under the data folder | |
| models_path: str = "models" | |
| # If set to True (default), models under the specified models_path will be automatically loaded. | |
| # When set to False, you can manually specify the models to load. | |
| auto_load: bool = True | |
| # List to store configurations of Text-to-Speech models | |
| models: List[TTSModelConfig] = field(default_factory=list) | |
| def asdict(self): | |
| data = {} | |
| for attr, value in vars(self).items(): | |
| if isinstance(value, list): | |
| data[attr] = [] | |
| for item in value: | |
| data[attr].append(item.asdict()) | |
| else: | |
| data[attr] = value | |
| return data | |
| def update_config(self, new_config_dict): | |
| for field in fields(self): | |
| field_name = field.name | |
| field_type = field.type | |
| if field_name in new_config_dict: | |
| new_value = new_config_dict[field_name] | |
| if is_dataclass(field_type): | |
| nested_config = getattr(self, field_name) | |
| nested_config.update_config(new_value) | |
| else: | |
| if field_type == bool: | |
| new_value = str(new_value).lower() == "true" | |
| elif field_type == int: | |
| new_value = int(new_value) | |
| elif field_type == float: | |
| new_value = float(new_value) | |
| elif field_type == str: | |
| new_value = str(new_value) | |
| elif field_type == torch.device: | |
| new_value = torch.device(new_value) | |
| elif field_type == List[TTSModelConfig]: | |
| new_value = [TTSModelConfig(model.get("model_path"), | |
| model.get("config_path"), | |
| model.get("sovits_path"), | |
| model.get("gpt_path")) for model in | |
| new_value] | |
| setattr(self, field_name, new_value) | |
| class HttpService(AsDictMixin): | |
| host: str = "0.0.0.0" | |
| port: int = 23456 | |
| debug: bool = False | |
| class LogConfig(AsDictMixin): | |
| # Logs path | |
| logs_path: str = "logs" | |
| # Set the number of backup log files to keep. | |
| logs_backupcount: int = 30 | |
| # logging_level:DEBUG/INFO/WARNING/ERROR/CRITICAL | |
| logging_level: str = "DEBUG" | |
| class System(AsDictMixin): | |
| device: torch.device = torch.device( | |
| "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu") | |
| # Upload path | |
| upload_folder: str = "upload" | |
| # Cahce path | |
| cache_path: str = "cache" | |
| # If CLEAN_INTERVAL_SECONDS <= 0, the cleaning task will not be executed. | |
| clean_interval_seconds: int = 3600 | |
| # save audio to CACHE_PATH | |
| cache_audio: bool = False | |
| # Set to True to enable API Key authentication | |
| api_key_enabled: bool = False | |
| # API_KEY is required for authentication | |
| api_key: str = ''.join(secrets.choice(string.ascii_letters + string.digits) for _ in range(24)) | |
| # Control whether to enable the admin backend functionality. Set to False to disable the admin backend. | |
| is_admin_enabled: bool = True | |
| # Define the route for the admin backend. You can change this to your desired route | |
| admin_route: str = '/admin' | |
| # Path to the 'data' folder, where various models are stored | |
| data_path: str = "data" | |
| def asdict(self): | |
| data = {} | |
| for attr, value in vars(self).items(): | |
| if attr == "device": | |
| data[attr] = str(value) | |
| else: | |
| data[attr] = value | |
| return data | |
| class LanguageIdentification(AsDictMixin): | |
| # Language identification library. Optional fastlid, langid | |
| language_identification_library: str = "langid" | |
| # To use the english_cleaner, you need to install espeak and provide the path of libespeak-ng.dll as input here. | |
| # If ESPEAK_LIBRARY is set to empty, it will be read from the environment variable. | |
| # For windows : "C:/Program Files/eSpeak NG/libespeak-ng.dll" | |
| espeak_library: str = r"C:/Program Files/eSpeak NG/libespeak-ng.dll" if "win" in sys.platform else "" | |
| # zh ja ko en... If it is empty, it will be read based on the text_cleaners specified in the config.json. | |
| language_automatic_detect: list = field(default_factory=list) | |
| class User(AsDictMixin): | |
| id: int = 0 | |
| username: str = ''.join(secrets.choice(string.ascii_letters + string.digits) for _ in range(8)) | |
| password: str = ''.join(secrets.choice(string.ascii_letters + string.digits) for _ in range(16)) | |
| def is_authenticated(self): | |
| return True | |
| def is_active(self): | |
| return True | |
| def is_anonymous(self): | |
| return False | |
| def get_id(self): | |
| return str(self.id) | |
| class Config(AsDictMixin): | |
| abs_path: str = ABS_PATH | |
| http_service: HttpService = HttpService() | |
| model_config: ModelConfig = ModelConfig() | |
| tts_config: TTSConfig = TTSConfig() | |
| admin: User = User() | |
| system: System = System() | |
| log_config: LogConfig = LogConfig() | |
| language_identification: LanguageIdentification = LanguageIdentification() | |
| reading_config: ReadingConfig = ReadingConfig() | |
| vits_config: VitsConfig = VitsConfig() | |
| w2v2_vits_config: W2V2VitsConfig = W2V2VitsConfig() | |
| hubert_vits_config: HuBertVitsConfig = HuBertVitsConfig() | |
| bert_vits2_config: BertVits2Config = BertVits2Config() | |
| gpt_sovits_config: GPTSoVitsConfig = GPTSoVitsConfig() | |
| def asdict(self): | |
| data = {} | |
| for attr, value in vars(self).items(): | |
| if isinstance(value, AsDictMixin): | |
| data[attr] = value.asdict() | |
| else: | |
| data[attr] = value | |
| return data | |
| def load_config(): | |
| logging.getLogger().setLevel(logging.INFO) | |
| config_path = os.path.join(Config.abs_path, "config.yaml") | |
| if not os.path.exists(config_path) or not os.path.isfile(config_path): | |
| logging.info("config.yaml not found. Generating a new config.yaml based on config.py.") | |
| config = Config() | |
| # 初始化管理员账号密码 | |
| logging.info( | |
| f"New admin user created:\n" | |
| f"{'-' * 40}\n" | |
| f"| Username: {config.admin.username:<26} |\n" | |
| f"| Password: {config.admin.password:<26} |\n" | |
| f"{'-' * 40}\n" | |
| f"Please do not share this information.") | |
| Config.save_config(config) | |
| return config | |
| else: | |
| try: | |
| logging.info("Loading config...") | |
| with open(config_path, 'r', encoding='utf-8') as f: | |
| loaded_config = yaml.safe_load(f) | |
| config = Config() | |
| if loaded_config is not None: | |
| config.update_config(loaded_config) | |
| logging.info("Loading config success!") | |
| else: | |
| logging.info("config.yaml is empty, initializing config.yaml...") | |
| # Load default models from config.py. | |
| # config.update_config(model_list) | |
| # If parameters are incomplete, they will be automatically filled in upon saving. | |
| Config.save_config(config) | |
| return config | |
| except Exception as e: | |
| logging.error(traceback.print_exc()) | |
| ValueError(e) | |
| def save_config(config): | |
| temp_filename = os.path.join(Config.abs_path, "config.yaml.tmp") | |
| with open(temp_filename, 'w', encoding='utf-8') as f: | |
| yaml.dump(config.asdict(), f, allow_unicode=True, default_style='', sort_keys=False) | |
| shutil.move(temp_filename, os.path.join(Config.abs_path, "config.yaml")) | |
| logging.info(f"Config is saved.") | |