| import os | |
| filepath = r'd:\aicoding\kaiyuan\v2\index.html' | |
| with open(filepath, 'r', encoding='latin-1') as f: | |
| content = f.read() | |
| # Fix common Latin-1 corruptions of UTF-8 strings | |
| mapping = { | |
| '🇺🇸': '🇺🇸', | |
| 'ðŸ‡ðŸ‡°': '🇭🇰', | |
| '🇪🇸': '🇪🇸', | |
| '🇫🇷': '🇫🇷', | |
| '🇩🇪': '🇩🇪', | |
| '🇯🇵': '🇯🇵', | |
| '🇰🇷': '🇰🇷', | |
| '🇸🇦': '🇸🇦', | |
| '🇵🇹': '🇵🇹', | |
| '🌎': '🌍', | |
| '📢': '📢', | |
| '🌠': '🌍', | |
| '📠': '📄', | |
| 'â–?': '▾', | |
| 'Français': 'Français', | |
| 'Español': 'Español', | |
| 'Português': 'Português', | |
| '日本èª?': '日本語', | |
| '한êµì–?': '한국어', | |
| 'ç¹ é«”ä¸æ–‡': '繁體中文', | |
| 'العربية': 'العربية', | |
| 'â€?': '—', | |
| '•': '•', | |
| '©': '©' | |
| } | |
| # Also ensure meta charset is present | |
| if '<meta charset="UTF-8">' not in content: | |
| content = content.replace('<html lang="en">', '<html lang="en">\n<head>\n <meta charset="UTF-8">') | |
| for old, new in mapping.items(): | |
| content = content.replace(old, new) | |
| # One more pass for specific broken tags seen in view_file | |
| content = content.replace('日本èª?/option>', '日本語</option>') | |
| content = content.replace('한êµì–?/option>', '한국어</option>') | |
| with open(filepath, 'w', encoding='utf-8', newline='\n') as f: | |
| f.write(content) | |
| print('index.html encoding and characters fixed successfully') | |