iso-8859-1.py 4.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. # ##### BEGIN LICENSE BLOCK #####
  4. # Version: MPL 1.1/GPL 2.0/LGPL 2.1
  5. #
  6. # The contents of this file are subject to the Mozilla Public License Version
  7. # 1.1 (the "License"); you may not use this file except in compliance with
  8. # the License. You may obtain a copy of the License at
  9. # http://www.mozilla.org/MPL/
  10. #
  11. # Software distributed under the License is distributed on an "AS IS" basis,
  12. # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  13. # for the specific language governing rights and limitations under the
  14. # License.
  15. #
  16. # The Original Code is Mozilla Universal charset detector code.
  17. #
  18. # The Initial Developer of the Original Code is
  19. # Netscape Communications Corporation.
  20. # Portions created by the Initial Developer are Copyright (C) 2001
  21. # the Initial Developer. All Rights Reserved.
  22. #
  23. # Contributor(s):
  24. # Jehan <jehan@girinstud.io>
  25. #
  26. # Alternatively, the contents of this file may be used under the terms of
  27. # either the GNU General Public License Version 2 or later (the "GPL"), or
  28. # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  29. # in which case the provisions of the GPL or the LGPL are applicable instead
  30. # of those above. If you wish to allow use of your version of this file only
  31. # under the terms of either the GPL or the LGPL, and not to allow others to
  32. # use your version of this file under the terms of the MPL, indicate your
  33. # decision by deleting the provisions above and replace them with the notice
  34. # and other provisions required by the GPL or the LGPL. If you do not delete
  35. # the provisions above, a recipient may use your version of this file under
  36. # the terms of any one of the MPL, the GPL or the LGPL.
  37. #
  38. # ##### END LICENSE BLOCK #####
  39. from codepoints import *
  40. # ISO-8859-1 is the full 8-bit range, IANA-defined, superset of ISO/CEI 8859-1.
  41. # It is basically the same as ISO/CEI 8859-1, but with control characters.
  42. # As far as I can see, `iconv` has no support for the ISO/CEI 8859-1 subset,
  43. # so there is no need for us to support it anyway.
  44. name = 'ISO-8859-1'
  45. aliases = ['ISO_8859-1:1987', 'ISO_8859-1', 'iso-ir-100',
  46. 'csISOLatin1', 'latin1', 'l1', 'IBM819', 'CP819']
  47. language = \
  48. {
  49. # Languages with complete coverage.
  50. # Some languages actually have several alphabets and only one of them is
  51. # compatible with ISO-8859-1 (ex: Kurdish).
  52. # Some don't have a ISO language code (like Leonese, for which I used
  53. # a Glottolog code).
  54. 'complete': [ 'af', 'sq', 'eu', 'br', 'co', 'da', 'en', 'fo', 'gl', 'de',
  55. 'is', 'id', 'it', 'ku', 'leon1250', 'lb', 'ms', 'gv', 'no',
  56. 'oc', 'pt', 'rm', 'gd', 'es', 'sw', 'sv', 'wa' ],
  57. 'incomplete': [ 'ca', 'cs', 'nl', 'et', 'fi', 'fr', 'gn', 'hu', 'ga',
  58. 'la', 'mi', 'ro', 'tr', 'cy' ]
  59. }
  60. # X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF #
  61. charmap = \
  62. [
  63. CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, # 0X
  64. CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 1X
  65. SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # 2X
  66. NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, # 3X
  67. SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 4X
  68. LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,SYM, # 5X
  69. SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 6X
  70. LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,CTR, # 7X
  71. CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 8X
  72. CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 9X
  73. SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # AX
  74. SYM,SYM,SYM,SYM,SYM,LET,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # BX
  75. LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # CX
  76. LET,LET,LET,LET,LET,LET,LET,SYM,LET,LET,LET,LET,LET,LET,LET,LET, # DX
  77. LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # EX
  78. LET,LET,LET,LET,LET,LET,LET,SYM,LET,LET,LET,LET,LET,LET,LET,LET, # FX
  79. ]