97 lines
3.6 KiB
Python
Executable File
97 lines
3.6 KiB
Python
Executable File
#!/usr/bin/env python
|
||
# -*- coding: utf-8 -*-
|
||
#-------------------------------------------------------------------------------
|
||
# Copyright (c) 2010 Sourcefabric O.P.S.
|
||
#
|
||
# This file is part of the Airtime project.
|
||
# http://campcaster.sourcefabric.org/
|
||
# To report bugs, send an e-mail to bugs@campware.org
|
||
#
|
||
# Airtime is free software; you can redistribute it and/or modify
|
||
# it under the terms of the GNU General Public License as published by
|
||
# the Free Software Foundation; either version 2 of the License, or
|
||
# (at your option) any later version.
|
||
#
|
||
# Airtime is distributed in the hope that it will be useful,
|
||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
# GNU General Public License for more details.
|
||
#
|
||
# You should have received a copy of the GNU General Public License
|
||
# along with Airtime; if not, write to the Free Software
|
||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||
#
|
||
#
|
||
# Author : $Author$
|
||
# Version : $Revision$
|
||
# Location : $URL$
|
||
#-------------------------------------------------------------------------------
|
||
|
||
#-------------------------------------------------------------------------------
|
||
# This script converts an ICU localization file from Serbian Latin
|
||
# to Serbian Cyrillic.
|
||
#-------------------------------------------------------------------------------
|
||
|
||
import sys, re, codecs
|
||
|
||
usageString = 'Usage: serbianLatinToCyrillicConverter.py' \
|
||
' inputfile outputfile'
|
||
|
||
if len(sys.argv) >= 3:
|
||
fileNameIn = sys.argv[1]
|
||
fileNameOut = sys.argv[2]
|
||
else:
|
||
print usageString
|
||
sys.exit(1)
|
||
|
||
oldLines = codecs.open(fileNameIn, 'r', 'utf-8').readlines()
|
||
newLines = [ ]
|
||
|
||
def cyrillize(word):
|
||
if re.match(r'#.*#\Z', word):
|
||
return word
|
||
|
||
compound = { u'lj' : u'љ', u'Lj' : u'Љ',
|
||
u'nj' : u'њ', u'Nj' : u'Њ',
|
||
u'dž' : u'џ', u'Dž' : u'Џ' }
|
||
simple = dict(zip(u'abvgdđežzijklmnoprstćufhcčšw',
|
||
u'абвгдђежзијклмнопрстћуфхцчшв'))
|
||
simple.update(dict(zip(u'ABVGDĐEŽZIJKLMNOPRSTĆUFHCČŠW',
|
||
u'АБВГДЂЕЖЗИЈКЛМНОПРСТЋУФХЦЧШВ')))
|
||
exceptions = { ur'\н' : ur'\n',
|
||
u'Фаде ин' : u'Фејд ин',
|
||
u'Фаде оут' : u'Фејд аут',
|
||
u'фаде ин' : u'фејд ин',
|
||
u'фаде оут' : u'фејд аут',
|
||
u'есцапе' : u'ескејп',
|
||
u'Плаy' : u'Плеј',
|
||
u'Паусе' : u'Поуз',
|
||
u'трацк' : u'трак',
|
||
u'УРИ' : u'URI',
|
||
u'РДС' : u'RDS',
|
||
u'БПМ' : u'BPM',
|
||
u'ИСРЦ' : u'ISRC' }
|
||
|
||
for latin, cyrillic in compound.iteritems():
|
||
word = word.replace(latin, cyrillic)
|
||
for latin, cyrillic in simple.iteritems():
|
||
word = word.replace(latin, cyrillic)
|
||
for bad, good in exceptions.iteritems():
|
||
word = word.replace(bad, good)
|
||
|
||
return word
|
||
|
||
for line in oldLines:
|
||
m = re.match(r'(.*)"(.*)"(.*)\n', line)
|
||
if m:
|
||
line = m.groups()[0] + '"' \
|
||
+ cyrillize(m.groups()[1]) + '"' \
|
||
+ m.groups()[2] + '\n'
|
||
|
||
elif line == 'sr_CS:table\n':
|
||
line = 'sr_CS_CYRILLIC:table\n'
|
||
|
||
newLines += [line]
|
||
|
||
codecs.open(fileNameOut, 'w', 'utf-8').writelines(newLines)
|