2010-09-30 21:40:11 +02:00
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
#-------------------------------------------------------------------------------
|
|
|
|
|
# Copyright (c) 2010 Sourcefabric O.P.S.
|
|
|
|
|
#
|
2011-01-05 20:18:03 +01:00
|
|
|
|
# This file is part of the Airtime project.
|
2011-01-07 23:17:23 +01:00
|
|
|
|
# http://airtime.sourcefabric.org/
|
|
|
|
|
# To report bugs, send an e-mail to contact@sourcefabric.org
|
2010-09-30 21:40:11 +02:00
|
|
|
|
#
|
2011-01-05 20:18:03 +01:00
|
|
|
|
# Airtime is free software; you can redistribute it and/or modify
|
2010-09-30 21:40:11 +02:00
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
# (at your option) any later version.
|
|
|
|
|
#
|
2011-01-05 20:18:03 +01:00
|
|
|
|
# Airtime is distributed in the hope that it will be useful,
|
2010-09-30 21:40:11 +02:00
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
|
#
|
|
|
|
|
# You should have received a copy of the GNU General Public License
|
2011-01-05 20:18:03 +01:00
|
|
|
|
# along with Airtime; if not, write to the Free Software
|
2010-09-30 21:40:11 +02:00
|
|
|
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
|
#
|
|
|
|
|
#
|
|
|
|
|
#-------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
#-------------------------------------------------------------------------------
|
|
|
|
|
# This script converts an ICU localization file from Serbian Latin
|
|
|
|
|
# to Serbian Cyrillic.
|
|
|
|
|
#-------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
import sys, re, codecs
|
|
|
|
|
|
|
|
|
|
usageString = 'Usage: serbianLatinToCyrillicConverter.py' \
|
|
|
|
|
' inputfile outputfile'
|
|
|
|
|
|
|
|
|
|
if len(sys.argv) >= 3:
|
|
|
|
|
fileNameIn = sys.argv[1]
|
|
|
|
|
fileNameOut = sys.argv[2]
|
|
|
|
|
else:
|
|
|
|
|
print usageString
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
oldLines = codecs.open(fileNameIn, 'r', 'utf-8').readlines()
|
|
|
|
|
newLines = [ ]
|
|
|
|
|
|
|
|
|
|
def cyrillize(word):
|
|
|
|
|
if re.match(r'#.*#\Z', word):
|
|
|
|
|
return word
|
|
|
|
|
|
|
|
|
|
compound = { u'lj' : u'љ', u'Lj' : u'Љ',
|
|
|
|
|
u'nj' : u'њ', u'Nj' : u'Њ',
|
|
|
|
|
u'dž' : u'џ', u'Dž' : u'Џ' }
|
|
|
|
|
simple = dict(zip(u'abvgdđežzijklmnoprstćufhcčšw',
|
|
|
|
|
u'абвгдђежзијклмнопрстћуфхцчшв'))
|
|
|
|
|
simple.update(dict(zip(u'ABVGDĐEŽZIJKLMNOPRSTĆUFHCČŠW',
|
|
|
|
|
u'АБВГДЂЕЖЗИЈКЛМНОПРСТЋУФХЦЧШВ')))
|
|
|
|
|
exceptions = { ur'\н' : ur'\n',
|
|
|
|
|
u'Фаде ин' : u'Фејд ин',
|
|
|
|
|
u'Фаде оут' : u'Фејд аут',
|
|
|
|
|
u'фаде ин' : u'фејд ин',
|
|
|
|
|
u'фаде оут' : u'фејд аут',
|
|
|
|
|
u'есцапе' : u'ескејп',
|
|
|
|
|
u'Плаy' : u'Плеј',
|
|
|
|
|
u'Паусе' : u'Поуз',
|
|
|
|
|
u'трацк' : u'трак',
|
|
|
|
|
u'УРИ' : u'URI',
|
|
|
|
|
u'РДС' : u'RDS',
|
|
|
|
|
u'БПМ' : u'BPM',
|
|
|
|
|
u'ИСРЦ' : u'ISRC' }
|
|
|
|
|
|
|
|
|
|
for latin, cyrillic in compound.iteritems():
|
|
|
|
|
word = word.replace(latin, cyrillic)
|
|
|
|
|
for latin, cyrillic in simple.iteritems():
|
|
|
|
|
word = word.replace(latin, cyrillic)
|
|
|
|
|
for bad, good in exceptions.iteritems():
|
|
|
|
|
word = word.replace(bad, good)
|
|
|
|
|
|
|
|
|
|
return word
|
|
|
|
|
|
|
|
|
|
for line in oldLines:
|
|
|
|
|
m = re.match(r'(.*)"(.*)"(.*)\n', line)
|
|
|
|
|
if m:
|
|
|
|
|
line = m.groups()[0] + '"' \
|
|
|
|
|
+ cyrillize(m.groups()[1]) + '"' \
|
|
|
|
|
+ m.groups()[2] + '\n'
|
|
|
|
|
|
|
|
|
|
elif line == 'sr_CS:table\n':
|
|
|
|
|
line = 'sr_CS_CYRILLIC:table\n'
|
|
|
|
|
|
|
|
|
|
newLines += [line]
|
|
|
|
|
|
|
|
|
|
codecs.open(fileNameOut, 'w', 'utf-8').writelines(newLines)
|