aegis-graph / scratch /fix_html_encoding.py
ACLASCollege's picture
FINAL PROFESSIONALIZATION: Synchronized with GitHub sovereign standards.
05a5750 verified
import os
filepath = r'd:\aicoding\kaiyuan\v2\index.html'
with open(filepath, 'r', encoding='latin-1') as f:
content = f.read()
# Fix common Latin-1 corruptions of UTF-8 strings
mapping = {
'🇺🇸': '🇺🇸',
'🇭🇰': '🇭🇰',
'🇪🇸': '🇪🇸',
'🇫🇷': '🇫🇷',
'🇩🇪': '🇩🇪',
'🇯🇵': '🇯🇵',
'🇰🇷': '🇰🇷',
'🇸🇦': '🇸🇦',
'🇵🇹': '🇵🇹',
'🌎': '🌍',
'📢': '📢',
'🌠': '🌍',
'📠': '📄',
'â–?': '▾',
'Français': 'Français',
'Español': 'Español',
'Português': 'Português',
'日本èª?': '日本語',
'한국ì–?': '한국어',
'ç¹ é«”ä¸­æ–‡': '繁體中文',
'العربية': 'العربية',
'â€?': '—',
'•': '•',
'©': '©'
}
# Also ensure meta charset is present
if '<meta charset="UTF-8">' not in content:
content = content.replace('<html lang="en">', '<html lang="en">\n<head>\n <meta charset="UTF-8">')
for old, new in mapping.items():
content = content.replace(old, new)
# One more pass for specific broken tags seen in view_file
content = content.replace('日本èª?/option>', '日本語</option>')
content = content.replace('한국ì–?/option>', '한국어</option>')
with open(filepath, 'w', encoding='utf-8', newline='\n') as f:
f.write(content)
print('index.html encoding and characters fixed successfully')