97 lines
3.6 KiB
Python
97 lines
3.6 KiB
Python
|
#!/usr/bin/env python
|
|||
|
# -*- coding: utf-8 -*-
|
|||
|
#-------------------------------------------------------------------------------
|
|||
|
# Copyright (c) 2010 Sourcefabric O.P.S.
|
|||
|
#
|
|||
|
# This file is part of the Campcaster project.
|
|||
|
# http://campcaster.sourcefabric.org/
|
|||
|
# To report bugs, send an e-mail to bugs@campware.org
|
|||
|
#
|
|||
|
# Campcaster is free software; you can redistribute it and/or modify
|
|||
|
# it under the terms of the GNU General Public License as published by
|
|||
|
# the Free Software Foundation; either version 2 of the License, or
|
|||
|
# (at your option) any later version.
|
|||
|
#
|
|||
|
# Campcaster is distributed in the hope that it will be useful,
|
|||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|||
|
# GNU General Public License for more details.
|
|||
|
#
|
|||
|
# You should have received a copy of the GNU General Public License
|
|||
|
# along with Campcaster; if not, write to the Free Software
|
|||
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|||
|
#
|
|||
|
#
|
|||
|
# Author : $Author$
|
|||
|
# Version : $Revision$
|
|||
|
# Location : $URL$
|
|||
|
#-------------------------------------------------------------------------------
|
|||
|
|
|||
|
#-------------------------------------------------------------------------------
|
|||
|
# This script converts an ICU localization file from Serbian Latin
|
|||
|
# to Serbian Cyrillic.
|
|||
|
#-------------------------------------------------------------------------------
|
|||
|
|
|||
|
import sys, re, codecs
|
|||
|
|
|||
|
usageString = 'Usage: serbianLatinToCyrillicConverter.py' \
|
|||
|
' inputfile outputfile'
|
|||
|
|
|||
|
if len(sys.argv) >= 3:
|
|||
|
fileNameIn = sys.argv[1]
|
|||
|
fileNameOut = sys.argv[2]
|
|||
|
else:
|
|||
|
print usageString
|
|||
|
sys.exit(1)
|
|||
|
|
|||
|
oldLines = codecs.open(fileNameIn, 'r', 'utf-8').readlines()
|
|||
|
newLines = [ ]
|
|||
|
|
|||
|
def cyrillize(word):
|
|||
|
if re.match(r'#.*#\Z', word):
|
|||
|
return word
|
|||
|
|
|||
|
compound = { u'lj' : u'љ', u'Lj' : u'Љ',
|
|||
|
u'nj' : u'њ', u'Nj' : u'Њ',
|
|||
|
u'dž' : u'џ', u'Dž' : u'Џ' }
|
|||
|
simple = dict(zip(u'abvgdđežzijklmnoprstćufhcčšw',
|
|||
|
u'абвгдђежзијклмнопрстћуфхцчшв'))
|
|||
|
simple.update(dict(zip(u'ABVGDĐEŽZIJKLMNOPRSTĆUFHCČŠW',
|
|||
|
u'АБВГДЂЕЖЗИЈКЛМНОПРСТЋУФХЦЧШВ')))
|
|||
|
exceptions = { ur'\н' : ur'\n',
|
|||
|
u'Фаде ин' : u'Фејд ин',
|
|||
|
u'Фаде оут' : u'Фејд аут',
|
|||
|
u'фаде ин' : u'фејд ин',
|
|||
|
u'фаде оут' : u'фејд аут',
|
|||
|
u'есцапе' : u'ескејп',
|
|||
|
u'Плаy' : u'Плеј',
|
|||
|
u'Паусе' : u'Поуз',
|
|||
|
u'трацк' : u'трак',
|
|||
|
u'УРИ' : u'URI',
|
|||
|
u'РДС' : u'RDS',
|
|||
|
u'БПМ' : u'BPM',
|
|||
|
u'ИСРЦ' : u'ISRC' }
|
|||
|
|
|||
|
for latin, cyrillic in compound.iteritems():
|
|||
|
word = word.replace(latin, cyrillic)
|
|||
|
for latin, cyrillic in simple.iteritems():
|
|||
|
word = word.replace(latin, cyrillic)
|
|||
|
for bad, good in exceptions.iteritems():
|
|||
|
word = word.replace(bad, good)
|
|||
|
|
|||
|
return word
|
|||
|
|
|||
|
for line in oldLines:
|
|||
|
m = re.match(r'(.*)"(.*)"(.*)\n', line)
|
|||
|
if m:
|
|||
|
line = m.groups()[0] + '"' \
|
|||
|
+ cyrillize(m.groups()[1]) + '"' \
|
|||
|
+ m.groups()[2] + '\n'
|
|||
|
|
|||
|
elif line == 'sr_CS:table\n':
|
|||
|
line = 'sr_CS_CYRILLIC:table\n'
|
|||
|
|
|||
|
newLines += [line]
|
|||
|
|
|||
|
codecs.open(fileNameOut, 'w', 'utf-8').writelines(newLines)
|