From f1ef4efb60e341a2a8ec72560071656b9d8b927d Mon Sep 17 00:00:00 2001 From: Tomas Bzatek Date: Sat, 15 Nov 2008 22:14:21 +0100 Subject: Encoding revision --- common/strutils.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++ common/strutils.h | 1 + common/treepathutils.c | 13 +++++++---- common/treepathutils.h | 7 +++--- libarchive/libarchive.c | 52 ++++++++++++++++++++++++++++++++++++----- unrar/unrar.c | 46 +++++++++++++++++++++++++++++-------- zip/zip.cpp | 7 +++++- 7 files changed, 164 insertions(+), 23 deletions(-) diff --git a/common/strutils.c b/common/strutils.c index 47494de..ffd95c3 100644 --- a/common/strutils.c +++ b/common/strutils.c @@ -193,3 +193,64 @@ char* resolve_relative(const char *source, const char *point_to) return canon; } + + + +// Originally stolen from wine-0.9.19, utf8.c +// Copyright 2000 Alexandre Julliard +char* +wide_to_utf8 (const wchar_t *src) +{ +#define CONV_BUFF_MAX 32768 + int len; + char *buf, *dst, *ret; + + buf = (char *) malloc (CONV_BUFF_MAX); + memset (&buf[0], 0, CONV_BUFF_MAX); + dst = buf; + + if (src) + for (len = CONV_BUFF_MAX; *src; src++) + { + wchar_t ch = *src; + + if (ch < 0x80) /* 0x00-0x7f: 1 byte */ + { + if (!len--) { + log ("wide_to_utf8: error converting input string, overflow.\n"); + break; /* overflow */ + } + *dst++ = ch; + continue; + } + + if (ch < 0x800) /* 0x80-0x7ff: 2 bytes */ + { + if ((len -= 2) < 0) { + log ("wide_to_utf8: error converting input string, overflow.\n"); + break; /* overflow */ + } + dst[1] = 0x80 | (ch & 0x3f); + ch >>= 6; + dst[0] = 0xc0 | ch; + dst += 2; + continue; + } + + /* 0x800-0xffff: 3 bytes */ + if ((len -= 3) < 0) { + log ("wide_to_utf8: error converting input string, overflow.\n"); + break; /* overflow */ + } + dst[2] = 0x80 | (ch & 0x3f); + ch >>= 6; + dst[1] = 0x80 | (ch & 0x3f); + ch >>= 6; + dst[0] = 0xe0 | ch; + dst += 3; + } + + ret = g_strdup (buf); + free (buf); + return ret; +} diff --git a/common/strutils.h b/common/strutils.h index 872365c..84f6013 100644 --- a/common/strutils.h +++ b/common/strutils.h @@ -46,5 +46,6 @@ char* extract_file_path(const char *APath); char* resolve_relative(const char *source, const char *point_to); char* canonicalize_filename(const char *filename); +char* wide_to_utf8(const wchar_t *src); #endif /* __STRUTILS_H__ */ diff --git a/common/treepathutils.c b/common/treepathutils.c index d8a5f10..d3ace5a 100644 --- a/common/treepathutils.c +++ b/common/treepathutils.c @@ -42,6 +42,7 @@ struct PathTree* filelist_tree_new() tree->data = NULL; tree->index = 0; tree->node = strdup("/"); + tree->original_pathstr = NULL; // create placeholder data tree->data = (struct TVFSItem*)malloc(sizeof(struct TVFSItem)); @@ -83,6 +84,8 @@ void filelist_tree_free(struct PathTree *tree) } if (tree->node) free(tree->node); + if (tree->original_pathstr) + free(tree->original_pathstr); free(tree); } } @@ -182,7 +185,7 @@ unsigned long int filelist_find_index_by_path(struct PathTree *tree, const char else return 0; } -void filelist_tree_add_item_recurr(struct PathTree *tree, const char *path, struct TVFSItem *item, unsigned long index) +void filelist_tree_add_item_recurr(struct PathTree *tree, const char *path, const char *original_pathstr, struct TVFSItem *item, unsigned long index) { char *pos = strstr(path, "/"); char *first_part; @@ -208,6 +211,7 @@ void filelist_tree_add_item_recurr(struct PathTree *tree, const char *path, stru t->data = item; t->index = index; t->node = strdup(path); + if (original_pathstr) t->original_pathstr = strdup(original_pathstr); if (t->data) t->data->FName = strdup(path); if (t->data) t->data->FDisplayName = strdup(path); // create new list of subitems and add new item @@ -240,6 +244,7 @@ void filelist_tree_add_item_recurr(struct PathTree *tree, const char *path, stru node->items = g_ptr_array_new(); node->index = 0; node->node = strdup(first_part); + node->original_pathstr = NULL; // create placeholder data node->data = (struct TVFSItem*)malloc(sizeof(struct TVFSItem)); @@ -258,7 +263,7 @@ void filelist_tree_add_item_recurr(struct PathTree *tree, const char *path, stru } // and recurse one level deeper - filelist_tree_add_item_recurr(node, last_part, item, index); + filelist_tree_add_item_recurr(node, last_part, original_pathstr, item, index); } free(first_part); @@ -266,7 +271,7 @@ void filelist_tree_add_item_recurr(struct PathTree *tree, const char *path, stru } -gboolean filelist_tree_add_item(struct PathTree *tree, const char *path, struct TVFSItem *item, unsigned long index) +gboolean filelist_tree_add_item(struct PathTree *tree, const char *path, const char *original_pathstr, struct TVFSItem *item, unsigned long index) { if (! tree) { fprintf(stderr, "filelist_tree_add_item: tree == NULL !\n"); @@ -306,7 +311,7 @@ gboolean filelist_tree_add_item(struct PathTree *tree, const char *path, struct if (found->data) found->data->FDisplayName = strdup(found->node); } else // create new item recursively - filelist_tree_add_item_recurr(tree, pp, item, index); + filelist_tree_add_item_recurr(tree, pp, original_pathstr, item, index); free(p); free(pp); diff --git a/common/treepathutils.h b/common/treepathutils.h index 13f3057..a902a33 100644 --- a/common/treepathutils.h +++ b/common/treepathutils.h @@ -33,6 +33,7 @@ struct PathTree { struct TVFSItem *data; unsigned long index; char *node; + char *original_pathstr; }; @@ -40,11 +41,11 @@ struct PathTree* filelist_tree_new(); void filelist_tree_free(struct PathTree *tree); void filelist_tree_print(struct PathTree *tree); -/* Symlink resolving: strongly discouraged to use at the present state of art. - * We would have to implement full symlink system, do loop checking etc. */ +/* Symlink resolving: strongly discouraged to use at the present state of art. + * We would have to implement full symlink system, do loop checking etc. */ void filelist_tree_resolve_symlinks(struct PathTree *tree); -gboolean filelist_tree_add_item(struct PathTree *tree, const char *path, struct TVFSItem *item, unsigned long index); +gboolean filelist_tree_add_item(struct PathTree *tree, const char *path, const char *original_pathstr, struct TVFSItem *item, unsigned long index); struct PathTree* filelist_tree_find_node_by_path(struct PathTree *tree, const char *path); unsigned long int filelist_find_index_by_path(struct PathTree *tree, const char *path); struct PathTree* filelist_tree_get_item_by_index(struct PathTree *tree, unsigned long index); diff --git a/libarchive/libarchive.c b/libarchive/libarchive.c index efdf379..00f49a1 100644 --- a/libarchive/libarchive.c +++ b/libarchive/libarchive.c @@ -260,8 +260,34 @@ TVFSResult VFSOpen(struct TVFSGlobs *globs, char *sName) item->c_time = archive_entry_ctime(entry); item->a_time = archive_entry_atime(entry); +#if 0 + char *s; + if (! archive_entry_pathname_w(entry)) { + if (g_utf8_validate (archive_entry_pathname(entry), -1, NULL)) + s = g_strdup (archive_entry_pathname(entry)); + else + s = g_filename_display_name (archive_entry_pathname(entry)); + } + else + s = wide_to_utf8 (archive_entry_pathname_w(entry)); +#endif + + char *s; + if (g_utf8_validate (archive_entry_pathname(entry), -1, NULL)) + s = g_strdup (archive_entry_pathname(entry)); + else { + if (archive_entry_pathname_w(entry)) + s = wide_to_utf8 (archive_entry_pathname_w(entry)); + else + s = g_filename_display_name (archive_entry_pathname(entry)); + } + + +// g_print("file = '%s', wide = '%ls'\n", archive_entry_pathname(entry), archive_entry_pathname_w(entry)); + // Add item to the global list and continue with next file - filelist_tree_add_item(globs->files, archive_entry_pathname(entry), item, 0); + filelist_tree_add_item(globs->files, s, archive_entry_pathname(entry), item, 0); + g_free (s); } archive_read_close(a); } @@ -575,9 +601,17 @@ TVFSResult VFSCopyOut(struct TVFSGlobs *globs, const char *sSrcName, const char printf("(II) VFSCopyOut: copying file '%s' out to '%s'\n", sSrcName, sDstName); - char *src; - if (! IS_DIR_SEP(*sSrcName)) src = g_build_path("/", globs->curr_dir, sSrcName, NULL); - else src = g_strdup(sSrcName); + struct PathTree *node = filelist_tree_find_node_by_path(globs->files, sSrcName); + if (! node) { + fprintf(stderr, "(EE) VFSCopyOut: cannot find file '%s'\n", sSrcName); + return cVFS_ReadErr; + } + + const char *src = node->original_pathstr; + if (! src) { + fprintf(stderr, "(WW) VFSCopyOut: cannot determine original filename\n"); + src = sSrcName; + } printf("(II) VFSCopyOut: new src path: '%s'\n", src); @@ -585,6 +619,7 @@ TVFSResult VFSCopyOut(struct TVFSGlobs *globs, const char *sSrcName, const char struct archive *a; struct archive_entry *entry; int r; + gboolean found = FALSE; Result = libarchive_open(&a, globs->archive_path, globs->block_size); if (Result == cVFS_OK) @@ -605,7 +640,7 @@ TVFSResult VFSCopyOut(struct TVFSGlobs *globs, const char *sSrcName, const char } // printf ("found file: %s, mode = %x\n", archive_entry_pathname(entry), archive_entry_mode(entry)); - char *ssrc = src; + const char *ssrc = src; const char *asrc = archive_entry_pathname(entry); if (IS_DIR_SEP(*ssrc)) ssrc++; if (IS_DIR_SEP(*asrc)) asrc++; @@ -614,6 +649,7 @@ TVFSResult VFSCopyOut(struct TVFSGlobs *globs, const char *sSrcName, const char if (strcmp(ssrc, asrc) == 0) { // printf("--> found file, extracting\n"); fprintf(stderr, "(II) VFSCopyOut: extract_file_path(sDstName) = '%s', extract_file_name(sDstName) = '%s' \n", extract_file_path(sDstName), extract_file_name(sDstName)); + found = TRUE; Result = my_archive_read_data_into_fd(globs, a, entry, sDstName, globs->block_size, Append); break; @@ -622,7 +658,11 @@ TVFSResult VFSCopyOut(struct TVFSGlobs *globs, const char *sSrcName, const char archive_read_close(a); } archive_read_finish(a); - g_free(src); + + if ((! found) && Result == cVFS_OK) { + fprintf(stderr, "(EE) VFSCopyOut: file not found in archive.\n"); + Result = cVFS_ReadErr; + } fprintf(stderr, "(II) VFSCopyOut: finished. \n"); return Result; diff --git a/unrar/unrar.c b/unrar/unrar.c index b35f37e..1fc2882 100644 --- a/unrar/unrar.c +++ b/unrar/unrar.c @@ -364,7 +364,7 @@ TVFSResult VFSOpen(struct TVFSGlobs *globs, char *sName) int PASCAL res = 0; while ((res = RARReadHeaderEx(handle, header)) == 0) { - printf(" header->FileName = '%s', Flags = 0x%x\n", header->FileName, header->Flags); +// printf(" header->FileName = '%s', Flags = 0x%x\n", header->FileName, header->Flags); // Create a TVFSItem entry and fill all info struct TVFSItem *item = (struct TVFSItem*)malloc(sizeof(struct TVFSItem)); @@ -397,9 +397,20 @@ TVFSResult VFSOpen(struct TVFSGlobs *globs, char *sName) item->c_time = item->m_time; item->a_time = item->m_time; +// g_print (" valid = %d\n", g_utf8_validate (header->FileName, -1, NULL)); + + char *s; + if (g_utf8_validate (header->FileName, -1, NULL)) + s = g_strdup (header->FileName); + else + s = wide_to_utf8 (header->FileNameW); +// g_print (" ansi = '%s'\n wide = '%ls'\n utf8 = '%s'\n", header->FileName, header->FileNameW, s); + // Add item to the global list and continue with next file - filelist_tree_add_item(globs->files, header->FileName, item, 0); - int PASCAL res2 = RARProcessFile(handle, RAR_SKIP, NULL, NULL); + filelist_tree_add_item(globs->files, s, header->FileName, item, 0); + g_free (s); + + int PASCAL res2 = RARProcessFile(handle, RAR_SKIP, NULL, NULL); if (res2) printf("RARProcessFile result = %d\n", res2); } // printf("\nRARReadHeader result = %d\n", res); @@ -657,11 +668,20 @@ TVFSResult VFSCopyOut(struct TVFSGlobs *globs, const char *sSrcName, const char printf("(II) VFSCopyOut: copying file '%s' out to '%s'\n", sSrcName, sDstName); + struct PathTree *node = filelist_tree_find_node_by_path(globs->files, sSrcName); + if (! node) { + fprintf(stderr, "(EE) VFSCopyOut: cannot find file '%s'\n", sSrcName); + return cVFS_ReadErr; + } + TVFSResult Result = cVFS_OK; + gboolean found = FALSE; - char *src; - if (! IS_DIR_SEP(*sSrcName)) src = g_build_path("/", globs->curr_dir, sSrcName, NULL); - else src = g_strdup(sSrcName); + char *src = node->original_pathstr; + if (! src) { + fprintf(stderr, "(WW) VFSCopyOut: cannot determine original filename\n"); + src = (char *)sSrcName; + } printf("(II) VFSCopyOut: new src path: '%s'\n", src); @@ -713,6 +733,7 @@ TVFSResult VFSCopyOut(struct TVFSGlobs *globs, const char *sSrcName, const char globs->extract_done = 0; globs->extract_file_size = (int64_t)((int64_t)(header->UnpSizeHigh * 0x100000000) + (int64_t)header->UnpSize); globs->extract_cancelled = FALSE; + found = TRUE; int res2 = RARProcessFile(handle, RAR_EXTRACT, NULL, (char *)sDstName); @@ -761,7 +782,10 @@ TVFSResult VFSCopyOut(struct TVFSGlobs *globs, const char *sSrcName, const char break; case ERAR_MISSING_PASSWORD: Result = cVFS_BadPassword; - if (globs->password) g_free (globs->password); + if (globs->password) { + g_free (globs->password); + globs->password = NULL; + } break; case ERAR_UNKNOWN: default: @@ -784,7 +808,11 @@ TVFSResult VFSCopyOut(struct TVFSGlobs *globs, const char *sSrcName, const char } free(archive_data); - g_free(src); + + if ((! found) && Result == cVFS_OK) { + fprintf(stderr, "(EE) VFSCopyOut: file not found in archive.\n"); + Result = cVFS_ReadErr; + } fprintf(stderr, "(II) VFSCopyOut: finished. \n"); return Result; @@ -804,7 +832,7 @@ TVFSResult VFSCopyIn(struct TVFSGlobs *globs, const char *sSrcName, const char * * * - UTF-8, FName/FDisplayName and absolute/relative paths revision needed! * - find a reliable way to catch bad password errors and free the cached invalid password - * - no error reporting when archive is corrupted + * - no error reporting when archive is corrupted -- hopefully fixed by ask_question callback * - archive testing (needs new VFS API) * ***/ diff --git a/zip/zip.cpp b/zip/zip.cpp index 14698ea..094fc37 100644 --- a/zip/zip.cpp +++ b/zip/zip.cpp @@ -219,8 +219,13 @@ void build_global_filelist(struct TVFSGlobs *globs) if (fh->IsEncrypted()) globs->need_password = TRUE; + char *s; + + s = g_filename_display_name ((LPCTSTR)fh->GetFileName()); + // Add item to the global list and continue with next file - filelist_tree_add_item(globs->files, (LPCTSTR)fh->GetFileName(), item, i + 1); + filelist_tree_add_item(globs->files, s, item, i + 1); + g_free (s); printf("\n"); } } -- cgit v1.2.3