mirror of
https://github.com/tfussell/xlnt.git
synced 2024-03-22 13:11:17 +08:00
c49341c82f
housekeeping, fixes #72, fixes #70
1191 lines
32 KiB
C++
1191 lines
32 KiB
C++
/* POLE - Portable C++ library to access OLE Storage
|
|
Copyright (C) 2002-2007 Ariya Hidayat (ariya@kde.org).
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions
|
|
are met:
|
|
|
|
1. Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
2. Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in the
|
|
documentation and/or other materials provided with the distribution.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <cstring>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include <list>
|
|
#include <string>
|
|
#include <vector>
|
|
#include <cassert>
|
|
|
|
#include "pole.h"
|
|
|
|
// enable to activate debugging output
|
|
// #define POLE_DEBUG
|
|
|
|
namespace POLE
|
|
{
|
|
|
|
|
|
} // namespace POLE
|
|
|
|
using namespace POLE;
|
|
|
|
static inline std::uint16_t readU16( const std::uint8_t* ptr )
|
|
{
|
|
return ptr[0]+(ptr[1]<<8);
|
|
}
|
|
|
|
static inline std::uint32_t readU32( const std::uint8_t* ptr )
|
|
{
|
|
return ptr[0]+(ptr[1]<<8)+(ptr[2]<<16)+(ptr[3]<<24);
|
|
}
|
|
|
|
static inline void writeU16( std::uint8_t* ptr, std::uint16_t data )
|
|
{
|
|
ptr[0] = (std::uint8_t)(data & 0xff);
|
|
ptr[1] = (std::uint8_t)((data >> 8) & 0xff);
|
|
}
|
|
|
|
static inline void writeU32( std::uint8_t* ptr, std::uint32_t data )
|
|
{
|
|
ptr[0] = (std::uint8_t)(data & 0xff);
|
|
ptr[1] = (std::uint8_t)((data >> 8) & 0xff);
|
|
ptr[2] = (std::uint8_t)((data >> 16) & 0xff);
|
|
ptr[3] = (std::uint8_t)((data >> 24) & 0xff);
|
|
}
|
|
|
|
static const std::uint8_t pole_magic[] =
|
|
{ 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
|
|
|
|
// =========== Header ==========
|
|
|
|
Header::Header():
|
|
b_shift( 9 ),
|
|
s_shift( 6 ),
|
|
num_bat( 0 ),
|
|
dirent_start( 0 ),
|
|
threshold( 4096 ),
|
|
sbat_start( 0 ),
|
|
num_sbat( 0 ),
|
|
mbat_start( 0 ),
|
|
num_mbat( 0 )
|
|
{
|
|
for( std::size_t i = 0; i < 8; i++ )
|
|
id[i] = pole_magic[i];
|
|
for( std::size_t i=0; i<109; i++ )
|
|
bb_blocks[i] = AllocTable::Avail;
|
|
}
|
|
|
|
bool Header::valid()
|
|
{
|
|
if( threshold != 4096 ) return false;
|
|
if( num_bat == 0 ) return false;
|
|
if( (num_bat > 109) && (num_bat > (num_mbat * 127) + 109)) return false;
|
|
if( (num_bat < 109) && (num_mbat != 0) ) return false;
|
|
if( s_shift > b_shift ) return false;
|
|
if( b_shift <= 6 ) return false;
|
|
if( b_shift >=31 ) return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
void Header::load( const std::uint8_t* buffer )
|
|
{
|
|
b_shift = readU16( buffer + 0x1e );
|
|
s_shift = readU16( buffer + 0x20 );
|
|
num_bat = readU32( buffer + 0x2c );
|
|
dirent_start = readU32( buffer + 0x30 );
|
|
threshold = readU32( buffer + 0x38 );
|
|
sbat_start = readU32( buffer + 0x3c );
|
|
num_sbat = readU32( buffer + 0x40 );
|
|
mbat_start = readU32( buffer + 0x44 );
|
|
num_mbat = readU32( buffer + 0x48 );
|
|
|
|
for( std::size_t i = 0; i < 8; i++ )
|
|
id[i] = buffer[i];
|
|
for( std::size_t i=0; i<109; i++ )
|
|
bb_blocks[i] = readU32( buffer + 0x4C+i*4 );
|
|
}
|
|
|
|
void Header::save( std::uint8_t* buffer )
|
|
{
|
|
memset( buffer, 0, 0x4c );
|
|
memcpy( buffer, pole_magic, 8 ); // ole signature
|
|
writeU32( buffer + 8, 0 ); // unknown
|
|
writeU32( buffer + 12, 0 ); // unknown
|
|
writeU32( buffer + 16, 0 ); // unknown
|
|
writeU16( buffer + 24, 0x003e ); // revision ?
|
|
writeU16( buffer + 26, 3 ); // version ?
|
|
writeU16( buffer + 28, 0xfffe ); // unknown
|
|
writeU16( buffer + 0x1e, b_shift );
|
|
writeU16( buffer + 0x20, s_shift );
|
|
writeU32( buffer + 0x2c, num_bat );
|
|
writeU32( buffer + 0x30, dirent_start );
|
|
writeU32( buffer + 0x38, threshold );
|
|
writeU32( buffer + 0x3c, sbat_start );
|
|
writeU32( buffer + 0x40, num_sbat );
|
|
writeU32( buffer + 0x44, mbat_start );
|
|
writeU32( buffer + 0x48, num_mbat );
|
|
|
|
for( std::size_t i=0; i<109; i++ )
|
|
writeU32( buffer + 0x4C+i*4, bb_blocks[i] );
|
|
}
|
|
|
|
void Header::debug()
|
|
{
|
|
std::cout << std::endl;
|
|
std::cout << "b_shift " << b_shift << std::endl;
|
|
std::cout << "s_shift " << s_shift << std::endl;
|
|
std::cout << "num_bat " << num_bat << std::endl;
|
|
std::cout << "dirent_start " << dirent_start << std::endl;
|
|
std::cout << "threshold " << threshold << std::endl;
|
|
std::cout << "sbat_start " << sbat_start << std::endl;
|
|
std::cout << "num_sbat " << num_sbat << std::endl;
|
|
std::cout << "mbat_start " << mbat_start << std::endl;
|
|
std::cout << "num_mbat " << num_mbat << std::endl;
|
|
|
|
std::size_t s = (num_bat<=109) ? num_bat : 109;
|
|
std::cout << "bat blocks: ";
|
|
for( std::size_t i = 0; i < s; i++ )
|
|
std::cout << bb_blocks[i] << " ";
|
|
std::cout << std::endl;
|
|
}
|
|
|
|
// =========== AllocTable ==========
|
|
|
|
const std::uint32_t AllocTable::Avail = 0xffffffff;
|
|
const std::uint32_t AllocTable::Eof = 0xfffffffe;
|
|
const std::uint32_t AllocTable::Bat = 0xfffffffd;
|
|
const std::uint32_t AllocTable::MetaBat = 0xfffffffc;
|
|
|
|
AllocTable::AllocTable(): blockSize( 4096 ), data()
|
|
{
|
|
// initial size
|
|
resize( 128 );
|
|
}
|
|
|
|
std::size_t AllocTable::count()
|
|
{
|
|
return data.size();
|
|
}
|
|
|
|
void AllocTable::resize( std::size_t newsize )
|
|
{
|
|
std::size_t oldsize = data.size();
|
|
data.resize( newsize );
|
|
if( newsize > oldsize )
|
|
for( std::size_t i = oldsize; i<newsize; i++ )
|
|
data[i] = Avail;
|
|
}
|
|
|
|
// make sure there're still free blocks
|
|
void AllocTable::preserve( std::size_t n )
|
|
{
|
|
std::vector<std::size_t> pre;
|
|
for( std::size_t i=0; i < n; i++ )
|
|
pre.push_back( unused() );
|
|
}
|
|
|
|
std::size_t AllocTable::operator[]( std::size_t index )
|
|
{
|
|
std::size_t result;
|
|
result = data[index];
|
|
return result;
|
|
}
|
|
|
|
void AllocTable::set( std::size_t index, std::uint32_t value )
|
|
{
|
|
if( index >= count() ) resize( index + 1);
|
|
data[ index ] = value;
|
|
}
|
|
|
|
void AllocTable::setChain( std::vector<std::uint32_t> chain )
|
|
{
|
|
if( chain.size() )
|
|
{
|
|
for( std::size_t i=0; i<chain.size()-1; i++ )
|
|
set( chain[i], chain[i+1] );
|
|
set( chain[ chain.size()-1 ], AllocTable::Eof );
|
|
}
|
|
}
|
|
|
|
// TODO: optimize this with better search
|
|
static bool already_exist(const std::vector<std::size_t>& chain,
|
|
std::size_t item)
|
|
{
|
|
for(std::size_t i = 0; i < chain.size(); i++)
|
|
if(chain[i] == item) return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
// follow
|
|
std::vector<std::size_t> AllocTable::follow( std::size_t start )
|
|
{
|
|
std::vector<std::size_t> chain;
|
|
|
|
if( start >= count() ) return chain;
|
|
|
|
std::size_t p = start;
|
|
while( p < count() )
|
|
{
|
|
if( p == (std::size_t)Eof ) break;
|
|
if( p == (std::size_t)Bat ) break;
|
|
if( p == (std::size_t)MetaBat ) break;
|
|
if( already_exist(chain, p) ) break;
|
|
chain.push_back(p);
|
|
if( data[p] >= count() ) break;
|
|
p = data[ p ];
|
|
}
|
|
|
|
return chain;
|
|
}
|
|
|
|
std::size_t AllocTable::unused()
|
|
{
|
|
// find first available block
|
|
for( std::size_t i = 0; i < data.size(); i++ )
|
|
if( data[i] == Avail )
|
|
return i;
|
|
|
|
// completely full, so enlarge the table
|
|
std::size_t block = data.size();
|
|
resize( data.size()+10 );
|
|
return block;
|
|
}
|
|
|
|
void AllocTable::load( const std::uint8_t* buffer, std::size_t len )
|
|
{
|
|
resize( len / 4 );
|
|
for( std::size_t i = 0; i < count(); i++ )
|
|
set( i, readU32( buffer + i*4 ) );
|
|
}
|
|
|
|
// return space required to save this dirtree
|
|
std::size_t AllocTable::size()
|
|
{
|
|
return count() * 4;
|
|
}
|
|
|
|
void AllocTable::save( std::uint8_t* buffer )
|
|
{
|
|
for( std::size_t i = 0; i < count(); i++ )
|
|
writeU32( buffer + i*4, data[i] );
|
|
}
|
|
|
|
void AllocTable::debug()
|
|
{
|
|
std::cout << "block size " << data.size() << std::endl;
|
|
for( std::size_t i=0; i< data.size(); i++ )
|
|
{
|
|
if( data[i] == Avail ) continue;
|
|
std::cout << i << ": ";
|
|
if( data[i] == Eof ) std::cout << "[eof]";
|
|
else if( data[i] == Bat ) std::cout << "[bat]";
|
|
else if( data[i] == MetaBat ) std::cout << "[metabat]";
|
|
else std::cout << data[i];
|
|
std::cout << std::endl;
|
|
}
|
|
}
|
|
|
|
// =========== DirTree ==========
|
|
|
|
const std::uint32_t DirTree::End = 0xffffffff;
|
|
|
|
DirTree::DirTree(): entries()
|
|
{
|
|
clear();
|
|
}
|
|
|
|
void DirTree::clear()
|
|
{
|
|
// leave only root entry
|
|
entries.resize( 1 );
|
|
entries[0].valid = true;
|
|
entries[0].name = "Root Entry";
|
|
entries[0].dir = true;
|
|
entries[0].size = 0;
|
|
entries[0].start = End;
|
|
entries[0].prev = End;
|
|
entries[0].next = End;
|
|
entries[0].child = End;
|
|
}
|
|
|
|
std::size_t DirTree::entryCount()
|
|
{
|
|
return entries.size();
|
|
}
|
|
|
|
DirEntry* DirTree::entry( std::size_t index )
|
|
{
|
|
if( index >= entryCount() ) return (DirEntry*) 0;
|
|
return &entries[ index ];
|
|
}
|
|
|
|
std::ptrdiff_t DirTree::indexOf( DirEntry* e )
|
|
{
|
|
for( std::size_t i = 0; i < entryCount(); i++ )
|
|
if( entry( i ) == e ) return i;
|
|
|
|
return -1;
|
|
}
|
|
|
|
std::ptrdiff_t DirTree::parent( std::size_t index )
|
|
{
|
|
// brute-force, basically we iterate for each entries, find its children
|
|
// and check if one of the children is 'index'
|
|
for( std::size_t j=0; j<entryCount(); j++ )
|
|
{
|
|
std::vector<std::size_t> chi = children( j );
|
|
for( std::size_t i=0; i<chi.size();i++ )
|
|
if( chi[i] == index )
|
|
return j;
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
std::string DirTree::fullName( std::size_t index )
|
|
{
|
|
// don't use root name ("Root Entry"), just give "/"
|
|
if( index == 0 ) return "/";
|
|
|
|
std::string result = entry( index )->name;
|
|
result.insert( 0, "/" );
|
|
auto p = parent( index );
|
|
DirEntry * _entry = 0;
|
|
while( p > 0 )
|
|
{
|
|
_entry = entry( p );
|
|
if (_entry->dir && _entry->valid)
|
|
{
|
|
result.insert( 0, _entry->name);
|
|
result.insert( 0, "/" );
|
|
}
|
|
--p;
|
|
index = p;
|
|
if( index <= 0 ) break;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
// given a fullname (e.g "/ObjectPool/_1020961869"), find the entry
|
|
// if not found and create is false, return 0
|
|
// if create is true, a new entry is returned
|
|
DirEntry* DirTree::entry( const std::string& name, bool create )
|
|
{
|
|
if( !name.length() ) return (DirEntry*)0;
|
|
|
|
// quick check for "/" (that's root)
|
|
if( name == "/" ) return entry( 0 );
|
|
|
|
// split the names, e.g "/ObjectPool/_1020961869" will become:
|
|
// "ObjectPool" and "_1020961869"
|
|
std::list<std::string> names;
|
|
std::string::size_type start = 0, end = 0;
|
|
if( name[0] == '/' ) start++;
|
|
while( start < name.length() )
|
|
{
|
|
end = name.find_first_of( '/', start );
|
|
if( end == std::string::npos ) end = name.length();
|
|
names.push_back( name.substr( start, end-start ) );
|
|
start = end+1;
|
|
}
|
|
|
|
// start from root
|
|
std::size_t index = 0 ;
|
|
|
|
// trace one by one
|
|
std::list<std::string>::iterator it;
|
|
|
|
for( it = names.begin(); it != names.end(); ++it )
|
|
{
|
|
// find among the children of index
|
|
std::vector<std::size_t> chi = children( index );
|
|
std::ptrdiff_t child = 0;
|
|
for( std::size_t i = 0; i < chi.size(); i++ )
|
|
{
|
|
DirEntry* ce = entry( chi[i] );
|
|
if( ce )
|
|
if( ce->valid && ( ce->name.length()>1 ) )
|
|
if( ce->name == *it )
|
|
child = chi[i];
|
|
}
|
|
|
|
// traverse to the child
|
|
if( child > 0 ) index = child;
|
|
else
|
|
{
|
|
// not found among children
|
|
if( !create ) return (DirEntry*)0;
|
|
|
|
// create a new entry
|
|
std::size_t parent = index;
|
|
entries.push_back( DirEntry() );
|
|
index = entryCount()-1;
|
|
DirEntry* e = entry( index );
|
|
e->valid = true;
|
|
e->name = *it;
|
|
e->dir = false;
|
|
e->size = 0;
|
|
e->start = 0;
|
|
e->child = End;
|
|
e->prev = End;
|
|
e->next = entry(parent)->child;
|
|
entry(parent)->child = static_cast<std::uint32_t>(index);
|
|
}
|
|
}
|
|
|
|
return entry( index );
|
|
}
|
|
|
|
// helper function: recursively find siblings of index
|
|
void dirtree_find_siblings( DirTree* dirtree, std::vector<std::size_t>& result,
|
|
std::size_t index )
|
|
{
|
|
DirEntry* e = dirtree->entry( index );
|
|
if( !e ) return;
|
|
if( !e->valid ) return;
|
|
|
|
// prevent infinite loop
|
|
for( std::size_t i = 0; i < result.size(); i++ )
|
|
if( result[i] == index ) return;
|
|
|
|
// add myself
|
|
result.push_back( index );
|
|
|
|
// visit previous sibling, don't go infinitely
|
|
std::size_t prev = e->prev;
|
|
if( ( prev > 0 ) && ( prev < dirtree->entryCount() ) )
|
|
{
|
|
for( std::size_t i = 0; i < result.size(); i++ )
|
|
if( result[i] == prev ) prev = 0;
|
|
if( prev ) dirtree_find_siblings( dirtree, result, prev );
|
|
}
|
|
|
|
// visit next sibling, don't go infinitely
|
|
std::size_t next = e->next;
|
|
if( ( next > 0 ) && ( next < dirtree->entryCount() ) )
|
|
{
|
|
for( std::size_t i = 0; i < result.size(); i++ )
|
|
if( result[i] == next ) next = 0;
|
|
if( next ) dirtree_find_siblings( dirtree, result, next );
|
|
}
|
|
}
|
|
|
|
std::vector<std::size_t> DirTree::children( std::size_t index )
|
|
{
|
|
std::vector<std::size_t> result;
|
|
|
|
DirEntry* e = entry( index );
|
|
if( e ) if( e->valid && e->child < entryCount() )
|
|
dirtree_find_siblings( this, result, e->child );
|
|
|
|
return result;
|
|
}
|
|
|
|
void DirTree::load( std::uint8_t* buffer, std::size_t size )
|
|
{
|
|
entries.clear();
|
|
|
|
for( std::size_t i = 0; i < size/128; i++ )
|
|
{
|
|
std::size_t p = i * 128;
|
|
|
|
// would be < 32 if first char in the name isn't printable
|
|
std::size_t prefix = 32;
|
|
|
|
// parse name of this entry, which stored as Unicode 16-bit
|
|
std::string name;
|
|
int name_len = readU16( buffer + 0x40+p );
|
|
if( name_len > 64 ) name_len = 64;
|
|
for( int j=0; ( buffer[j+p]) && (j<name_len); j+= 2 )
|
|
name.append( 1, buffer[j+p] );
|
|
|
|
// first char isn't printable ? remove it...
|
|
if( buffer[p] < 32 )
|
|
{
|
|
prefix = buffer[0];
|
|
name.erase( 0,1 );
|
|
}
|
|
|
|
// 2 = file (aka stream), 1 = directory (aka storage), 5 = root
|
|
std::size_t type = buffer[ 0x42 + p];
|
|
|
|
DirEntry e;
|
|
e.valid = true;
|
|
e.name = name;
|
|
e.start = readU32( buffer + 0x74+p );
|
|
e.size = readU32( buffer + 0x78+p );
|
|
e.prev = readU32( buffer + 0x44+p );
|
|
e.next = readU32( buffer + 0x48+p );
|
|
e.child = readU32( buffer + 0x4C+p );
|
|
e.dir = ( type!=2 );
|
|
|
|
// sanity checks
|
|
if( (type != 2) && (type != 1 ) && (type != 5 ) ) e.valid = false;
|
|
if( name_len < 1 ) e.valid = false;
|
|
|
|
entries.push_back( e );
|
|
}
|
|
}
|
|
|
|
// return space required to save this dirtree
|
|
std::size_t DirTree::size()
|
|
{
|
|
return entryCount() * 128;
|
|
}
|
|
|
|
void DirTree::save( std::uint8_t* buffer )
|
|
{
|
|
memset( buffer, 0, size() );
|
|
|
|
// root is fixed as "Root Entry"
|
|
DirEntry* root = entry( 0 );
|
|
std::string name = "Root Entry";
|
|
for( std::size_t j = 0; j < name.length(); j++ )
|
|
buffer[ j*2 ] = name[j];
|
|
writeU16( buffer + 0x40, static_cast<std::uint16_t>(name.length()*2 + 2) );
|
|
writeU32( buffer + 0x74, 0xffffffff );
|
|
writeU32( buffer + 0x78, 0 );
|
|
writeU32( buffer + 0x44, 0xffffffff );
|
|
writeU32( buffer + 0x48, 0xffffffff );
|
|
writeU32( buffer + 0x4c, root->child );
|
|
buffer[ 0x42 ] = 5;
|
|
buffer[ 0x43 ] = 1;
|
|
|
|
for( std::size_t i = 1; i < entryCount(); i++ )
|
|
{
|
|
DirEntry* e = entry( i );
|
|
if( !e ) continue;
|
|
if( e->dir )
|
|
{
|
|
e->start = 0xffffffff;
|
|
e->size = 0;
|
|
}
|
|
|
|
// max length for name is 32 chars
|
|
std::string name = e->name;
|
|
if( name.length() > 32 )
|
|
name.erase( 32, name.length() );
|
|
|
|
// write name as Unicode 16-bit
|
|
for( std::size_t j = 0; j < name.length(); j++ )
|
|
buffer[ i*128 + j*2 ] = name[j];
|
|
|
|
writeU16( buffer + i*128 + 0x40, static_cast<std::uint16_t>(name.length()*2 + 2) );
|
|
writeU32( buffer + i*128 + 0x74, e->start );
|
|
writeU32( buffer + i*128 + 0x78, e->size );
|
|
writeU32( buffer + i*128 + 0x44, e->prev );
|
|
writeU32( buffer + i*128 + 0x48, e->next );
|
|
writeU32( buffer + i*128 + 0x4c, e->child );
|
|
buffer[ i*128 + 0x42 ] = e->dir ? 1 : 2;
|
|
buffer[ i*128 + 0x43 ] = 1; // always black
|
|
}
|
|
}
|
|
|
|
void DirTree::debug()
|
|
{
|
|
for( std::size_t i = 0; i < entryCount(); i++ )
|
|
{
|
|
DirEntry* e = entry( i );
|
|
if( !e ) continue;
|
|
std::cout << i << ": ";
|
|
if( !e->valid ) std::cout << "INVALID ";
|
|
std::cout << e->name << " ";
|
|
if( e->dir ) std::cout << "(Dir) ";
|
|
else std::cout << "(File) ";
|
|
std::cout << e->size << " ";
|
|
std::cout << "s:" << e->start << " ";
|
|
std::cout << "(";
|
|
if( e->child == End ) std::cout << "-"; else std::cout << e->child;
|
|
std::cout << " ";
|
|
if( e->prev == End ) std::cout << "-"; else std::cout << e->prev;
|
|
std::cout << ":";
|
|
if( e->next == End ) std::cout << "-"; else std::cout << e->next;
|
|
std::cout << ")";
|
|
std::cout << std::endl;
|
|
}
|
|
}
|
|
|
|
// =========== StorageIO ==========
|
|
|
|
StorageIO::StorageIO( Storage* st, char* bytes, std::size_t length ):
|
|
storage( st ),
|
|
filedata((std::uint8_t *)bytes),
|
|
dataLength(length),
|
|
result( Storage::Ok ),
|
|
opened( false ),
|
|
filesize( 0 ),
|
|
header( new Header() ),
|
|
dirtree( new DirTree() ),
|
|
bbat ( new AllocTable() ),
|
|
sbat ( new AllocTable() ),
|
|
sb_blocks(),
|
|
streams()
|
|
{
|
|
bbat->blockSize = static_cast<std::size_t>(1) << header->b_shift;
|
|
sbat->blockSize = static_cast<std::size_t>(1) << header->s_shift;
|
|
}
|
|
|
|
StorageIO::~StorageIO()
|
|
{
|
|
if( opened ) close();
|
|
delete sbat;
|
|
delete bbat;
|
|
delete dirtree;
|
|
delete header;
|
|
}
|
|
|
|
bool StorageIO::open()
|
|
{
|
|
// already opened ? close first
|
|
if( opened ) close();
|
|
|
|
load();
|
|
|
|
return result == Storage::Ok;
|
|
}
|
|
|
|
void StorageIO::load()
|
|
{
|
|
std::uint8_t* buffer = 0;
|
|
std::size_t buflen = 0;
|
|
std::vector<std::size_t> blocks;
|
|
|
|
// open the file, check for error
|
|
result = Storage::OpenFailed;
|
|
//FSTREAM file.open( filename.c_str(), std::ios::binary | std::ios::in );
|
|
//FSTREAM if( !file.good() ) return;
|
|
|
|
// find size of input file
|
|
//FSTREAM file.seekg( 0, std::ios::end );
|
|
//FSTREAM filesize = file.tellg();
|
|
filesize = dataLength;
|
|
|
|
// load header
|
|
buffer = new std::uint8_t[512];
|
|
//FSTREAM file.seekg( 0 );
|
|
//FSTREAM file.read( (char*)buffer, 512 );
|
|
memcpy(buffer, filedata, 512);
|
|
header->load( buffer );
|
|
delete[] buffer;
|
|
|
|
// check OLE magic id
|
|
result = Storage::NotOLE;
|
|
for( std::size_t i=0; i<8; i++ )
|
|
if( header->id[i] != pole_magic[i] )
|
|
return;
|
|
|
|
// sanity checks
|
|
result = Storage::BadOLE;
|
|
if( !header->valid() ) return;
|
|
if( header->threshold != 4096 ) return;
|
|
|
|
// important block size
|
|
bbat->blockSize = static_cast<std::size_t>(1) << header->b_shift;
|
|
sbat->blockSize = static_cast<std::size_t>(1) << header->s_shift;
|
|
|
|
// find blocks allocated to store big bat
|
|
// the first 109 blocks are in header, the rest in meta bat
|
|
blocks.clear();
|
|
blocks.resize( header->num_bat );
|
|
for( std::size_t i = 0; i < 109; i++ )
|
|
if( i >= header->num_bat ) break;
|
|
else blocks[i] = header->bb_blocks[i];
|
|
if( (header->num_bat > 109) && (header->num_mbat > 0) )
|
|
{
|
|
std::uint8_t* buffer2 = new std::uint8_t[ bbat->blockSize ];
|
|
memset(buffer2, 0, bbat->blockSize);
|
|
std::size_t k = 109;
|
|
std::size_t mblock = header->mbat_start;
|
|
for( std::size_t r = 0; r < header->num_mbat; r++ )
|
|
{
|
|
loadBigBlock( mblock, buffer2, bbat->blockSize );
|
|
for( std::size_t s=0; s < bbat->blockSize-4; s+=4 )
|
|
{
|
|
if( k >= header->num_bat ) break;
|
|
else blocks[k++] = readU32( buffer2 + s );
|
|
}
|
|
mblock = readU32( buffer2 + bbat->blockSize-4 );
|
|
}
|
|
delete[] buffer2;
|
|
}
|
|
|
|
// load big bat
|
|
buflen = blocks.size()*bbat->blockSize;
|
|
if( buflen > 0 )
|
|
{
|
|
buffer = new std::uint8_t[ buflen ];
|
|
memset(buffer, 0, buflen);
|
|
loadBigBlocks( blocks, buffer, buflen );
|
|
bbat->load( buffer, buflen );
|
|
delete[] buffer;
|
|
}
|
|
|
|
// load small bat
|
|
blocks.clear();
|
|
blocks = bbat->follow( header->sbat_start );
|
|
buflen = blocks.size()*bbat->blockSize;
|
|
if( buflen > 0 )
|
|
{
|
|
buffer = new std::uint8_t[ buflen ];
|
|
memset(buffer, 0, buflen);
|
|
loadBigBlocks( blocks, buffer, buflen );
|
|
sbat->load( buffer, buflen );
|
|
delete[] buffer;
|
|
}
|
|
|
|
// load directory tree
|
|
blocks.clear();
|
|
blocks = bbat->follow( header->dirent_start );
|
|
buflen = blocks.size()*bbat->blockSize;
|
|
buffer = new std::uint8_t[ buflen ];
|
|
memset(buffer, 0, buflen);
|
|
loadBigBlocks( blocks, buffer, buflen );
|
|
dirtree->load( buffer, buflen );
|
|
std::size_t sb_start = readU32( buffer + 0x74 );
|
|
delete[] buffer;
|
|
|
|
// fetch block chain as data for small-files
|
|
sb_blocks = bbat->follow( sb_start ); // small files
|
|
|
|
// for troubleshooting, just enable this block
|
|
#if 0
|
|
header->debug();
|
|
sbat->debug();
|
|
bbat->debug();
|
|
dirtree->debug();
|
|
#endif
|
|
|
|
// so far so good
|
|
result = Storage::Ok;
|
|
opened = true;
|
|
}
|
|
|
|
void StorageIO::create()
|
|
{
|
|
// std::cout << "Creating " << filename << std::endl;
|
|
|
|
/*FSTREAM file.open( filename.c_str(), std::ios::out|std::ios::binary );
|
|
if( !file.good() )
|
|
{
|
|
std::cerr << "Can't create " << filename << std::endl;
|
|
result = Storage::OpenFailed;
|
|
return;
|
|
}*/
|
|
|
|
// so far so good
|
|
opened = true;
|
|
result = Storage::Ok;
|
|
}
|
|
|
|
void StorageIO::flush()
|
|
{
|
|
/* Note on Microsoft implementation:
|
|
- directory entries are stored in the last block(s)
|
|
- BATs are as second to the last
|
|
- Meta BATs are third to the last
|
|
*/
|
|
}
|
|
|
|
void StorageIO::close()
|
|
{
|
|
if( !opened ) return;
|
|
|
|
//FSTREAM file.close();
|
|
opened = false;
|
|
|
|
std::list<Stream*>::iterator it;
|
|
for( it = streams.begin(); it != streams.end(); ++it )
|
|
delete *it;
|
|
}
|
|
|
|
StreamIO* StorageIO::streamIO( const std::string& name )
|
|
{
|
|
// sanity check
|
|
if( !name.length() ) return (StreamIO*)0;
|
|
|
|
// search in the entries
|
|
DirEntry* entry = dirtree->entry( name );
|
|
//if( entry) std::cout << "FOUND\n";
|
|
if( !entry ) return (StreamIO*)0;
|
|
//if( !entry->dir ) std::cout << " NOT DIR\n";
|
|
if( entry->dir ) return (StreamIO*)0;
|
|
|
|
StreamIO* result = new StreamIO( this, entry );
|
|
result->fullName = name;
|
|
|
|
return result;
|
|
}
|
|
|
|
std::size_t StorageIO::loadBigBlocks( std::vector<std::size_t> blocks,
|
|
std::uint8_t* data, std::size_t maxlen )
|
|
{
|
|
// sentinel
|
|
if( !data ) return 0;
|
|
if( blocks.size() < 1 ) return 0;
|
|
if( maxlen == 0 ) return 0;
|
|
|
|
// read block one by one, seems fast enough
|
|
std::size_t bytes = 0;
|
|
for( std::size_t i=0; (i < blocks.size() ) && ( bytes<maxlen ); i++ )
|
|
{
|
|
std::size_t block = blocks[i];
|
|
std::size_t pos = bbat->blockSize * ( block+1 );
|
|
std::size_t p = (bbat->blockSize < maxlen-bytes) ? bbat->blockSize : maxlen-bytes;
|
|
if( pos + p > filesize ) p = filesize - pos;
|
|
//FSTREAM file.seekg( pos );
|
|
//FSTREAM file.read( (char*)data + bytes, p );
|
|
memcpy((char*)data + bytes, filedata + pos, p);
|
|
bytes += p;
|
|
}
|
|
|
|
return bytes;
|
|
}
|
|
|
|
std::size_t StorageIO::loadBigBlock( std::size_t block,
|
|
std::uint8_t* data, std::size_t maxlen )
|
|
{
|
|
// sentinel
|
|
if( !data ) return 0;
|
|
|
|
// wraps call for loadBigBlocks
|
|
std::vector<std::size_t> blocks;
|
|
blocks.resize( 1 );
|
|
blocks[ 0 ] = block;
|
|
|
|
return loadBigBlocks( blocks, data, maxlen );
|
|
}
|
|
|
|
// return number of bytes which has been read
|
|
std::size_t StorageIO::loadSmallBlocks( std::vector<std::size_t> blocks,
|
|
std::uint8_t* data, std::size_t maxlen )
|
|
{
|
|
// sentinel
|
|
if( !data ) return 0;
|
|
if( blocks.size() < 1 ) return 0;
|
|
if( maxlen == 0 ) return 0;
|
|
|
|
// our own local buffer
|
|
std::uint8_t* buf = new std::uint8_t[ bbat->blockSize ];
|
|
|
|
// read small block one by one
|
|
std::size_t bytes = 0;
|
|
for( std::size_t i=0; ( i<blocks.size() ) && ( bytes<maxlen ); i++ )
|
|
{
|
|
std::size_t block = blocks[i];
|
|
|
|
// find where the small-block exactly is
|
|
std::size_t pos = block * sbat->blockSize;
|
|
std::size_t bbindex = pos / bbat->blockSize;
|
|
if( bbindex >= sb_blocks.size() ) break;
|
|
|
|
loadBigBlock( sb_blocks[ bbindex ], buf, bbat->blockSize );
|
|
|
|
// copy the data
|
|
std::size_t offset = pos % bbat->blockSize;
|
|
std::size_t p = (maxlen-bytes < bbat->blockSize-offset ) ? maxlen-bytes : bbat->blockSize-offset;
|
|
p = (sbat->blockSize<p ) ? sbat->blockSize : p;
|
|
memcpy( data + bytes, buf + offset, p );
|
|
bytes += p;
|
|
}
|
|
|
|
delete[] buf;
|
|
|
|
return bytes;
|
|
}
|
|
|
|
std::size_t StorageIO::loadSmallBlock( std::size_t block,
|
|
std::uint8_t* data, std::size_t maxlen )
|
|
{
|
|
// sentinel
|
|
if( !data ) return 0;
|
|
|
|
// wraps call for loadSmallBlocks
|
|
std::vector<std::size_t> blocks;
|
|
blocks.resize( 1 );
|
|
blocks.assign( 1, block );
|
|
|
|
return loadSmallBlocks( blocks, data, maxlen );
|
|
}
|
|
|
|
// =========== StreamIO ==========
|
|
|
|
StreamIO::StreamIO( StorageIO* s, DirEntry* e ):
|
|
io( s ),
|
|
entry( e ),
|
|
fullName(),
|
|
eof( false ),
|
|
fail( false ),
|
|
blocks(),
|
|
m_pos( 0 ),
|
|
cache_data( 0 ),
|
|
cache_size( 4096 ), // optimal ?
|
|
cache_pos( 0 )
|
|
{
|
|
if( entry->size >= io->header->threshold )
|
|
blocks = io->bbat->follow( entry->start );
|
|
else
|
|
blocks = io->sbat->follow( entry->start );
|
|
|
|
// prepare cache
|
|
cache_data = new std::uint8_t[cache_size];
|
|
updateCache();
|
|
}
|
|
|
|
// FIXME tell parent we're gone
|
|
StreamIO::~StreamIO()
|
|
{
|
|
delete[] cache_data;
|
|
}
|
|
|
|
void StreamIO::seek( std::size_t pos )
|
|
{
|
|
m_pos = pos;
|
|
}
|
|
|
|
std::size_t StreamIO::tell()
|
|
{
|
|
return m_pos;
|
|
}
|
|
|
|
int StreamIO::getch()
|
|
{
|
|
// past end-of-file ?
|
|
if( m_pos > entry->size ) return -1;
|
|
|
|
// need to update cache ?
|
|
if( !cache_size || ( m_pos < cache_pos ) ||
|
|
( m_pos >= cache_pos + cache_size ) )
|
|
updateCache();
|
|
|
|
// something bad if we don't get good cache
|
|
if( !cache_size ) return -1;
|
|
|
|
int data = cache_data[m_pos - cache_pos];
|
|
m_pos++;
|
|
|
|
return data;
|
|
}
|
|
|
|
std::size_t StreamIO::read( std::size_t pos, std::uint8_t* data, std::size_t maxlen )
|
|
{
|
|
// sanity checks
|
|
if( !data ) return 0;
|
|
if( maxlen == 0 ) return 0;
|
|
|
|
std::size_t totalbytes = 0;
|
|
|
|
if ( entry->size < io->header->threshold )
|
|
{
|
|
// small file
|
|
std::size_t index = pos / io->sbat->blockSize;
|
|
|
|
if( index >= blocks.size() ) return 0;
|
|
|
|
std::uint8_t* buf = new std::uint8_t[ io->sbat->blockSize ];
|
|
std::size_t offset = pos % io->sbat->blockSize;
|
|
while( totalbytes < maxlen )
|
|
{
|
|
if( index >= blocks.size() ) break;
|
|
io->loadSmallBlock( blocks[index], buf, io->bbat->blockSize );
|
|
std::size_t count = io->sbat->blockSize - offset;
|
|
if( count > maxlen-totalbytes ) count = maxlen-totalbytes;
|
|
memcpy( data+totalbytes, buf + offset, count );
|
|
totalbytes += count;
|
|
offset = 0;
|
|
index++;
|
|
}
|
|
delete[] buf;
|
|
|
|
}
|
|
else
|
|
{
|
|
// big file
|
|
std::size_t index = pos / io->bbat->blockSize;
|
|
|
|
if( index >= blocks.size() ) return 0;
|
|
|
|
std::uint8_t* buf = new std::uint8_t[ io->bbat->blockSize ];
|
|
std::size_t offset = pos % io->bbat->blockSize;
|
|
while( totalbytes < maxlen )
|
|
{
|
|
if( index >= blocks.size() ) break;
|
|
io->loadBigBlock( blocks[index], buf, io->bbat->blockSize );
|
|
std::size_t count = io->bbat->blockSize - offset;
|
|
if( count > maxlen-totalbytes ) count = maxlen-totalbytes;
|
|
memcpy( data+totalbytes, buf + offset, count );
|
|
totalbytes += count;
|
|
index++;
|
|
offset = 0;
|
|
}
|
|
delete [] buf;
|
|
|
|
}
|
|
|
|
return totalbytes;
|
|
}
|
|
|
|
std::size_t StreamIO::read( std::uint8_t* data, std::size_t maxlen )
|
|
{
|
|
std::size_t bytes = read( tell(), data, maxlen );
|
|
m_pos += bytes;
|
|
return bytes;
|
|
}
|
|
|
|
void StreamIO::updateCache()
|
|
{
|
|
// sanity check
|
|
if( !cache_data ) return;
|
|
|
|
cache_pos = m_pos - ( m_pos % cache_size );
|
|
std::size_t bytes = cache_size;
|
|
if( cache_pos + bytes > entry->size ) bytes = entry->size - cache_pos;
|
|
cache_size = read( cache_pos, cache_data, bytes );
|
|
}
|
|
|
|
|
|
// =========== Storage ==========
|
|
|
|
Storage::Storage( char* bytes, std::size_t length ):
|
|
io( new StorageIO( this, bytes, length ) )
|
|
{
|
|
}
|
|
|
|
Storage::~Storage()
|
|
{
|
|
delete io;
|
|
}
|
|
|
|
int Storage::result()
|
|
{
|
|
return io->result;
|
|
}
|
|
|
|
bool Storage::open()
|
|
{
|
|
return io->open();
|
|
}
|
|
|
|
void Storage::close()
|
|
{
|
|
io->close();
|
|
}
|
|
|
|
std::list<std::string> Storage::entries( const std::string& path )
|
|
{
|
|
std::list<std::string> result;
|
|
DirTree* dt = io->dirtree;
|
|
DirEntry* e = dt->entry( path, false );
|
|
if( e && e->dir )
|
|
{
|
|
std::size_t parent = dt->indexOf( e );
|
|
std::vector<std::size_t> children = dt->children( parent );
|
|
for( std::size_t i = 0; i < children.size(); i++ )
|
|
result.push_back( dt->entry( children[i] )->name );
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
bool Storage::isDirectory( const std::string& name )
|
|
{
|
|
DirEntry* e = io->dirtree->entry( name, false );
|
|
return e ? e->dir : false;
|
|
}
|
|
|
|
DirTree* Storage::dirTree()
|
|
{
|
|
return io->dirtree;
|
|
}
|
|
|
|
|
|
StorageIO* Storage::storageIO()
|
|
{
|
|
return io;
|
|
}
|
|
|
|
std::list<DirEntry*> Storage::dirEntries( const std::string& path )
|
|
{
|
|
std::list<DirEntry*> result;
|
|
DirTree* dt = io->dirtree;
|
|
DirEntry* e = dt->entry( path, false );
|
|
if( e && e->dir )
|
|
{
|
|
std::size_t parent = dt->indexOf( e );
|
|
std::vector<std::size_t> children = dt->children( parent );
|
|
for( std::size_t i = 0; i < children.size(); i++ )
|
|
result.push_back( dt->entry( children[i] ) );
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
// =========== Stream ==========
|
|
|
|
Stream::Stream( Storage* storage, const std::string& name ):
|
|
io( storage->io->streamIO( name ) )
|
|
{
|
|
}
|
|
|
|
// FIXME tell parent we're gone
|
|
Stream::~Stream()
|
|
{
|
|
delete io;
|
|
}
|
|
|
|
std::string Stream::fullName()
|
|
{
|
|
return io ? io->fullName : std::string();
|
|
}
|
|
|
|
std::size_t Stream::tell()
|
|
{
|
|
return io ? io->tell() : 0;
|
|
}
|
|
|
|
void Stream::seek( std::size_t newpos )
|
|
{
|
|
if( io ) io->seek( newpos );
|
|
}
|
|
|
|
std::size_t Stream::size()
|
|
{
|
|
return io ? io->entry->size : 0;
|
|
}
|
|
|
|
int Stream::getch()
|
|
{
|
|
return io ? io->getch() : 0;
|
|
}
|
|
|
|
std::size_t Stream::read( std::uint8_t* data, std::size_t maxlen )
|
|
{
|
|
return io ? io->read( data, maxlen ) : 0;
|
|
}
|
|
|
|
bool Stream::eof()
|
|
{
|
|
return io ? io->eof : false;
|
|
}
|
|
|
|
bool Stream::fail()
|
|
{
|
|
return io ? io->fail : true;
|
|
}
|