ALT Linux Bugzilla
– Attachment 1402 Details for
Bug 4871
Неверная работа с кодировками названий файлов
New bug
|
Search
|
[?]
|
Help
Register
|
Log In
[x]
|
Forgot Password
Login:
[x]
|
EN
|
RU
[patch]
Патч для перекодировки с поддержкой UTF-8
unzip-5.50-alt-iconv-v1.2-utf8.patch (text/plain), 14.87 KB, created by
m0sia
on 2006-02-23 21:42:36 MSK
(
hide
)
Description:
Патч для перекодировки с поддержкой UTF-8
Filename:
MIME Type:
Creator:
m0sia
Created:
2006-02-23 21:42:36 MSK
Size:
14.87 KB
patch
obsolete
>*** unzip-5.50-real/unix/unix.c 2002-01-22 03:54:42.000000000 +0500 >--- unzip-5.50/unix/unix.c 2006-02-20 14:42:21.607294176 +0500 >*************** >*** 29,34 **** >--- 29,37 ---- > #define UNZIP_INTERNAL > #include "unzip.h" > >+ #include <iconv.h> >+ #include <langinfo.h> >+ > #ifdef SCO_XENIX > # define SYSNDIR > #else /* SCO Unix, AIX, DNIX, TI SysV, Coherent 4.x, ... */ >*************** >*** 1524,1526 **** >--- 1527,1616 ---- > } > } > #endif /* QLZIP */ >+ >+ >+ typedef struct { >+ char *local_charset; >+ char *archive_charset; >+ } CHARSET_MAP; >+ >+ /* A mapping of local <-> archive charsets used by default to convert filenames >+ * of DOS/Windows Zip archives. Currently very basic. */ >+ static CHARSET_MAP dos_charset_map[] = { >+ { "ANSI_X3.4-1968", "CP850" }, >+ { "ISO-8859-1", "CP850" }, >+ { "CP1252", "CP850" }, >+ { "UTF-8", "CP866" }, >+ { "KOI8-R", "CP866" }, >+ { "KOI8-U", "CP866" }, >+ { "ISO-8859-5", "CP866" } >+ }; >+ >+ char OEM_CP[MAX_CP_NAME] = ""; >+ char ISO_CP[MAX_CP_NAME] = ""; >+ >+ /* Try to guess the default value of OEM_CP based on the current locale. >+ * ISO_CP is left alone for now. */ >+ void init_conversion_charsets() >+ { >+ const char *local_charset; >+ int i; >+ >+ /* Make a guess only if OEM_CP not already set. */ >+ if(*OEM_CP == '\0') { >+ local_charset = nl_langinfo(CODESET); >+ for(i = 0; i < sizeof(dos_charset_map)/sizeof(CHARSET_MAP); i++) >+ if(!strcasecmp(local_charset, dos_charset_map[i].local_charset)) { >+ strncpy(OEM_CP, dos_charset_map[i].archive_charset, >+ sizeof(OEM_CP)); >+ break; >+ } >+ } >+ } >+ >+ /* Convert a string from one encoding to the current locale using iconv(). >+ * Be as non-intrusive as possible. If error is encountered during covertion >+ * just leave the string intact. */ >+ static void charset_to_intern(char *string, char *from_charset) >+ { >+ iconv_t cd; >+ char *s,*d, *buf; >+ size_t slen, dlen, buflen; >+ const char *local_charset; >+ >+ if(*from_charset == '\0') >+ return; >+ >+ buf = NULL; >+ local_charset = nl_langinfo(CODESET); >+ >+ if((cd = iconv_open(local_charset, from_charset)) == (iconv_t)-1) >+ return; >+ >+ slen = strlen(string); >+ s = string; >+ dlen = buflen = 2*slen; >+ d = buf = malloc(buflen + 1); >+ if(!d) >+ goto cleanup; >+ bzero(buf,buflen); >+ if(iconv(cd, &s, &slen, &d, &dlen) == (size_t)-1) >+ goto cleanup; >+ strncpy(string, buf, buflen); >+ >+ cleanup: >+ free(buf); >+ iconv_close(cd); >+ } >+ >+ /* Convert a string from OEM_CP to the current locale charset. */ >+ inline void oem_intern(char *string) >+ { >+ charset_to_intern(string, OEM_CP); >+ } >+ >+ /* Convert a string from ISO_CP to the current locale charset. */ >+ inline void iso_intern(char *string) >+ { >+ charset_to_intern(string, ISO_CP); >+ } >*** unzip-5.50-real/unix/unxcfg.h 2001-06-04 05:27:14.000000000 +0600 >--- unzip-5.50/unix/unxcfg.h 2006-02-20 14:16:27.194600824 +0500 >*************** >*** 123,126 **** >--- 123,152 ---- > /* wild_dir, dirname, wildname, matchname[], dirnamelen, have_dirname, */ > /* and notfirstcall are used by do_wild(). */ > >+ >+ #define MAX_CP_NAME 25 >+ >+ #ifdef SETLOCALE >+ # undef SETLOCALE >+ #endif >+ #define SETLOCALE(category, locale) setlocale(category, locale) >+ #include <locale.h> >+ >+ #ifdef _ISO_INTERN >+ # undef _ISO_INTERN >+ #endif >+ #define _ISO_INTERN(str1) iso_intern(str1) >+ >+ #ifdef _OEM_INTERN >+ # undef _OEM_INTERN >+ #endif >+ #ifndef IZ_OEM2ISO_ARRAY >+ # define IZ_OEM2ISO_ARRAY >+ #endif >+ #define _OEM_INTERN(str1) oem_intern(str1) >+ >+ void iso_intern(char *); >+ void oem_intern(char *); >+ void init_conversion_charsets(void); >+ > #endif /* !__unxcfg_h */ >*** unzip-5.50-real/unzip.c 2002-01-28 00:26:16.000000000 +0500 >--- unzip-5.50/unzip.c 2006-02-20 14:16:27.196600520 +0500 >*************** >*** 304,314 **** >--- 304,324 ---- > -2 just filenames but allow -h/-t/-z -l long Unix \"ls -l\" format\n\ > -v verbose, multi-page format\n"; > >+ #ifndef UNIX > static ZCONST char Far ZipInfoUsageLine3[] = "miscellaneous options:\n\ > -h print header line -t print totals for listed files or for all\n\ > -z print zipfile comment %c-T%c print file times in sortable decimal format\ > \n %c-C%c be case-insensitive %s\ > -x exclude filenames that follow from listing\n"; >+ #else /* UNIX */ >+ static ZCONST char Far ZipInfoUsageLine3[] = "miscellaneous options:\n\ >+ -h print header line -t print totals for listed files or for all\n\ >+ -z print zipfile comment %c-T%c print file times in sortable decimal format\ >+ \n %c-C%c be case-insensitive %s\ >+ -x exclude filenames that follow from listing\n\ >+ -O CHARSET specify a character encoding for DOS, Windows and OS/2 archives\n\ >+ -I CHARSET specify a character encoding for UNIX and other archives\n"; >+ #endif /* !UNIX */ > #ifdef MORE > #ifdef VMS > static ZCONST char Far ZipInfoUsageLine4[] = >*************** >*** 589,594 **** >--- 599,605 ---- > #endif /* ?VM_CMS */ > #endif /* ?MACOS */ > >+ #ifndef UNIX > static ZCONST char Far UnzipUsageLine4[] = "\ > modifiers: -q quiet mode (-qq => quieter)\n\ > -n never overwrite existing files -a auto-convert any text files\n\ >*************** >*** 596,601 **** >--- 607,623 ---- > -j junk paths (do not make directories) -v be verbose/print version info\n\ > %c-C%c match filenames case-insensitively %c-L%c make (some) names \ > lowercase\n %-42s %c-V%c retain VMS version numbers\n%s"; >+ #else /* UNIX */ >+ static ZCONST char Far UnzipUsageLine4[] = "\ >+ modifiers: -q quiet mode (-qq => quieter)\n\ >+ -n never overwrite existing files -a auto-convert any text files\n\ >+ -o overwrite files WITHOUT prompting -aa treat ALL files as text\n \ >+ -j junk paths (do not make directories) -v be verbose/print version info\n\ >+ %c-C%c match filenames case-insensitively %c-L%c make (some) names \ >+ lowercase\n %-42s %c-V%c retain VMS version numbers\n%s\n\ >+ -O CHARSET specify a character encoding for DOS, Windows and OS/2 archives\n\ >+ -I CHARSET specify a character encoding for UNIX and other archives\n\n"; >+ #endif /* !UNIX */ > > static ZCONST char Far UnzipUsageLine5[] = "\ > Examples (see unzip.txt for more info):\n\ >*************** >*** 656,661 **** >--- 678,687 ---- > > SETLOCALE(LC_CTYPE,""); > >+ #ifdef UNIX >+ init_conversion_charsets(); >+ #endif >+ > #if (defined(__IBMC__) && defined(__DEBUG_ALLOC__)) > extern void DebugMalloc(void); > >*************** >*** 1070,1075 **** >--- 1096,1106 ---- > argc = *pargc; > argv = *pargv; > >+ #ifdef UNIX >+ extern char OEM_CP[MAX_CP_NAME]; >+ extern char ISO_CP[MAX_CP_NAME]; >+ #endif >+ > while (++argv, (--argc > 0 && *argv != NULL && **argv == '-')) { > s = *argv + 1; > while ((c = *s++) != 0) { /* "!= 0": prevent Turbo C warning */ >*************** >*** 1233,1238 **** >--- 1264,1298 ---- > } > break; > #endif /* MACOS */ >+ #ifdef UNIX >+ case ('I'): >+ if (negative) { >+ Info(slide, 0x401, ((char *)slide, >+ "error: encodings can't be negated")); >+ return(PK_PARAM); >+ } else { >+ if(*s) { /* Handle the -Icharset case */ >+ /* Assume that charsets can't start with a dash to spot arguments misuse */ >+ if(*s == '-') { >+ Info(slide, 0x401, ((char *)slide, >+ "error: a valid character encoding should follow the -I argument")); >+ return(PK_PARAM); >+ } >+ strncpy(ISO_CP, s, sizeof(ISO_CP)); >+ } else { /* -I charset */ >+ ++argv; >+ if(!(--argc > 0 && *argv != NULL && **argv != '-')) { >+ Info(slide, 0x401, ((char *)slide, >+ "error: a valid character encoding should follow the -I argument")); >+ return(PK_PARAM); >+ } >+ s = *argv; >+ strncpy(ISO_CP, s, sizeof(ISO_CP)); >+ } >+ while(*(++s)); /* No params straight after charset name */ >+ } >+ break; >+ #endif /* ?UNIX */ > case ('j'): /* junk pathnames/directory structure */ > if (negative) > uO.jflag = FALSE, negative = 0; >*************** >*** 1299,1304 **** >--- 1359,1393 ---- > } else > ++uO.overwrite_all; > break; >+ #ifdef UNIX >+ case ('O'): >+ if (negative) { >+ Info(slide, 0x401, ((char *)slide, >+ "error: encodings can't be negated")); >+ return(PK_PARAM); >+ } else { >+ if(*s) { /* Handle the -Ocharset case */ >+ /* Assume that charsets can't start with a dash to spot arguments misuse */ >+ if(*s == '-') { >+ Info(slide, 0x401, ((char *)slide, >+ "error: a valid character encoding should follow the -I argument")); >+ return(PK_PARAM); >+ } >+ strncpy(OEM_CP, s, sizeof(OEM_CP)); >+ } else { /* -O charset */ >+ ++argv; >+ if(!(--argc > 0 && *argv != NULL && **argv != '-')) { >+ Info(slide, 0x401, ((char *)slide, >+ "error: a valid character encoding should follow the -O argument")); >+ return(PK_PARAM); >+ } >+ s = *argv; >+ strncpy(OEM_CP, s, sizeof(OEM_CP)); >+ } >+ while(*(++s)); /* No params straight after charset name */ >+ } >+ break; >+ #endif /* ?UNIX */ > case ('p'): /* pipes: extract to stdout, no messages */ > if (negative) { > uO.cflag = FALSE; >*** unzip-5.50-real/unzpriv.h 2002-02-17 22:01:48.000000000 +0500 >--- unzip-5.50/unzpriv.h 2006-02-20 14:16:27.198600216 +0500 >*************** >*** 1103,1109 **** > # define lastchar(ptr, len) (ptr[(len)-1]) > # define MBSCHR(str, c) strchr(str, c) > # define MBSRCHR(str, c) strrchr(str, c) >! # define SETLOCALE(category, locale) > #endif /* ?_MBCS */ > #define INCSTR(ptr) PREINCSTR(ptr) > >--- 1103,1111 ---- > # define lastchar(ptr, len) (ptr[(len)-1]) > # define MBSCHR(str, c) strchr(str, c) > # define MBSRCHR(str, c) strrchr(str, c) >! # ifndef SETLOCALE >! # define SETLOCALE(category, locale) >! # endif > #endif /* ?_MBCS */ > #define INCSTR(ptr) PREINCSTR(ptr) > >*************** >*** 2424,2430 **** > !(((islochdr) || (isuxatt)) && \ > ((hostver) == 25 || (hostver) == 26 || (hostver) == 40))) || \ > (hostnum) == FS_HPFS_ || \ >! ((hostnum) == FS_NTFS_ && (hostver) == 50)) { \ > _OEM_INTERN((string)); \ > } else { \ > _ISO_INTERN((string)); \ >--- 2426,2432 ---- > !(((islochdr) || (isuxatt)) && \ > ((hostver) == 25 || (hostver) == 26 || (hostver) == 40))) || \ > (hostnum) == FS_HPFS_ || \ >! ((hostnum) == FS_NTFS_/* && (hostver) == 50*/)) { \ > _OEM_INTERN((string)); \ > } else { \ > _ISO_INTERN((string)); \ >*** unzip-5.50-real/zipinfo.c 2001-12-26 03:56:40.000000000 +0500 >--- unzip-5.50/zipinfo.c 2006-02-20 14:16:27.200599912 +0500 >*************** >*** 440,445 **** >--- 440,449 ---- > int tflag_slm=TRUE, tflag_2v=FALSE; > int explicit_h=FALSE, explicit_t=FALSE; > >+ #ifdef UNIX >+ extern char OEM_CP[MAX_CP_NAME]; >+ extern char ISO_CP[MAX_CP_NAME]; >+ #endif > > #ifdef MACOS > uO.lflag = LFLAG; /* reset default on each call */ >*************** >*** 484,489 **** >--- 488,522 ---- > uO.lflag = 0; > } > break; >+ #ifdef UNIX >+ case ('I'): >+ if (negative) { >+ Info(slide, 0x401, ((char *)slide, >+ "error: encodings can't be negated")); >+ return(PK_PARAM); >+ } else { >+ if(*s) { /* Handle the -Icharset case */ >+ /* Assume that charsets can't start with a dash to spot arguments misuse */ >+ if(*s == '-') { >+ Info(slide, 0x401, ((char *)slide, >+ "error: a valid character encoding should follow the -I argument")); >+ return(PK_PARAM); >+ } >+ strncpy(ISO_CP, s, sizeof(ISO_CP)); >+ } else { /* -I charset */ >+ ++argv; >+ if(!(--argc > 0 && *argv != NULL && **argv != '-')) { >+ Info(slide, 0x401, ((char *)slide, >+ "error: a valid character encoding should follow the -I argument")); >+ return(PK_PARAM); >+ } >+ s = *argv; >+ strncpy(ISO_CP, s, sizeof(ISO_CP)); >+ } >+ while(*(++s)); /* No params straight after charset name */ >+ } >+ break; >+ #endif /* ?UNIX */ > case 'l': /* longer form of "ls -l" type listing */ > if (negative) > uO.lflag = -2, negative = 0; >*************** >*** 504,509 **** >--- 537,571 ---- > G.M_flag = TRUE; > break; > #endif >+ #ifdef UNIX >+ case ('O'): >+ if (negative) { >+ Info(slide, 0x401, ((char *)slide, >+ "error: encodings can't be negated")); >+ return(PK_PARAM); >+ } else { >+ if(*s) { /* Handle the -Ocharset case */ >+ /* Assume that charsets can't start with a dash to spot arguments misuse */ >+ if(*s == '-') { >+ Info(slide, 0x401, ((char *)slide, >+ "error: a valid character encoding should follow the -I argument")); >+ return(PK_PARAM); >+ } >+ strncpy(OEM_CP, s, sizeof(OEM_CP)); >+ } else { /* -O charset */ >+ ++argv; >+ if(!(--argc > 0 && *argv != NULL && **argv != '-')) { >+ Info(slide, 0x401, ((char *)slide, >+ "error: a valid character encoding should follow the -O argument")); >+ return(PK_PARAM); >+ } >+ s = *argv; >+ strncpy(OEM_CP, s, sizeof(OEM_CP)); >+ } >+ while(*(++s)); /* No params straight after charset name */ >+ } >+ break; >+ #endif /* ?UNIX */ > case 's': /* default: shorter "ls -l" type listing */ > if (negative) > uO.lflag = -2, negative = 0;
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 4871
:
502
|
531
|
532
| 1402