libretime/utils/serbianLatinToCyrillicConve...

93 lines
3.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
#-------------------------------------------------------------------------------
# Copyright (c) 2010 Sourcefabric O.P.S.
#
# This file is part of the Airtime project.
# http://airtime.sourcefabric.org/
# To report bugs, send an e-mail to contact@sourcefabric.org
#
# Airtime is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Airtime is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Airtime; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
#
#-------------------------------------------------------------------------------
#-------------------------------------------------------------------------------
# This script converts an ICU localization file from Serbian Latin
# to Serbian Cyrillic.
#-------------------------------------------------------------------------------
import sys, re, codecs
usageString = 'Usage: serbianLatinToCyrillicConverter.py' \
' inputfile outputfile'
if len(sys.argv) >= 3:
fileNameIn = sys.argv[1]
fileNameOut = sys.argv[2]
else:
print usageString
sys.exit(1)
oldLines = codecs.open(fileNameIn, 'r', 'utf-8').readlines()
newLines = [ ]
def cyrillize(word):
if re.match(r'#.*#\Z', word):
return word
compound = { u'lj' : u'љ', u'Lj' : u'Љ',
u'nj' : u'њ', u'Nj' : u'Њ',
u'' : u'џ', u'' : u'Џ' }
simple = dict(zip(u'abvgdđežzijklmnoprstćufhcčšw',
u'абвгдђежзијклмнопрстћуфхцчшв'))
simple.update(dict(zip(u'ABVGDĐEŽZIJKLMNOPRSTĆUFHCČŠW',
u'АБВГДЂЕЖЗИЈКЛМНОПРСТЋУФХЦЧШВ')))
exceptions = { ur'\н' : ur'\n',
u'Фаде ин' : u'Фејд ин',
u'Фаде оут' : u'Фејд аут',
u'фаде ин' : u'фејд ин',
u'фаде оут' : u'фејд аут',
u'есцапе' : u'ескејп',
u'Плаy' : u'Плеј',
u'Паусе' : u'Поуз',
u'трацк' : u'трак',
u'УРИ' : u'URI',
u'РДС' : u'RDS',
u'БПМ' : u'BPM',
u'ИСРЦ' : u'ISRC' }
for latin, cyrillic in compound.iteritems():
word = word.replace(latin, cyrillic)
for latin, cyrillic in simple.iteritems():
word = word.replace(latin, cyrillic)
for bad, good in exceptions.iteritems():
word = word.replace(bad, good)
return word
for line in oldLines:
m = re.match(r'(.*)"(.*)"(.*)\n', line)
if m:
line = m.groups()[0] + '"' \
+ cyrillize(m.groups()[1]) + '"' \
+ m.groups()[2] + '\n'
elif line == 'sr_CS:table\n':
line = 'sr_CS_CYRILLIC:table\n'
newLines += [line]
codecs.open(fileNameOut, 'w', 'utf-8').writelines(newLines)