summaryrefslogtreecommitdiff
path: root/fs_mgr/libsnapshot/snapuserd.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'fs_mgr/libsnapshot/snapuserd.cpp')
-rw-r--r--fs_mgr/libsnapshot/snapuserd.cpp786
1 files changed, 722 insertions, 64 deletions
diff --git a/fs_mgr/libsnapshot/snapuserd.cpp b/fs_mgr/libsnapshot/snapuserd.cpp
index a6ff4fd04..d3f4f70de 100644
--- a/fs_mgr/libsnapshot/snapuserd.cpp
+++ b/fs_mgr/libsnapshot/snapuserd.cpp
@@ -15,102 +15,735 @@
*/
#include <linux/types.h>
+#include <stdlib.h>
+
+#include <csignal>
+#include <cstring>
+#include <iostream>
+#include <limits>
+#include <string>
+#include <thread>
+#include <vector>
#include <android-base/file.h>
#include <android-base/logging.h>
#include <android-base/stringprintf.h>
#include <android-base/unique_fd.h>
#include <libdm/dm.h>
+#include <libsnapshot/cow_reader.h>
+#include <libsnapshot/cow_writer.h>
+#include <libsnapshot/snapuserd.h>
+
+namespace android {
+namespace snapshot {
+using namespace android;
+using namespace android::dm;
using android::base::unique_fd;
#define DM_USER_MAP_READ 0
#define DM_USER_MAP_WRITE 1
-struct dm_user_message {
- __u64 seq;
- __u64 type;
- __u64 flags;
- __u64 sector;
- __u64 len;
- __u8 buf[];
+static constexpr size_t PAYLOAD_SIZE = (1UL << 16);
+
+static_assert(PAYLOAD_SIZE >= BLOCK_SIZE);
+
+class Target {
+ public:
+ // Represents an already-created Target, which is referenced by UUID.
+ Target(std::string uuid) : uuid_(uuid) {}
+
+ const auto& uuid() { return uuid_; }
+ std::string control_path() { return std::string("/dev/dm-user-") + uuid(); }
+
+ private:
+ const std::string uuid_;
};
-using namespace android::dm;
+class Daemon {
+ // The Daemon class is a singleton to avoid
+ // instantiating more than once
+ public:
+ static Daemon& Instance() {
+ static Daemon instance;
+ return instance;
+ }
+
+ bool IsRunning();
+
+ private:
+ bool is_running_;
+
+ Daemon();
+ Daemon(Daemon const&) = delete;
+ void operator=(Daemon const&) = delete;
+
+ static void SignalHandler(int signal);
+};
+
+Daemon::Daemon() {
+ is_running_ = true;
+ signal(SIGINT, Daemon::SignalHandler);
+ signal(SIGTERM, Daemon::SignalHandler);
+}
+
+bool Daemon::IsRunning() {
+ return is_running_;
+}
+
+void Daemon::SignalHandler(int signal) {
+ LOG(DEBUG) << "Snapuserd received signal: " << signal;
+ switch (signal) {
+ case SIGINT:
+ case SIGTERM: {
+ Daemon::Instance().is_running_ = false;
+ break;
+ }
+ }
+}
+
+class BufferSink : public IByteSink {
+ public:
+ void Initialize(size_t size) {
+ buffer_size_ = size;
+ buffer_offset_ = 0;
+ buffer_ = std::make_unique<uint8_t[]>(size);
+ }
+
+ void* GetBufPtr() { return buffer_.get(); }
+
+ void Clear() { memset(GetBufPtr(), 0, buffer_size_); }
+
+ void* GetPayloadBuffer(size_t size) {
+ if ((buffer_size_ - buffer_offset_) < size) return nullptr;
+
+ char* buffer = reinterpret_cast<char*>(GetBufPtr());
+ struct dm_user_message* msg = (struct dm_user_message*)(&(buffer[0]));
+ return (char*)msg->payload.buf + buffer_offset_;
+ }
+
+ void* GetBuffer(size_t requested, size_t* actual) override {
+ void* buf = GetPayloadBuffer(requested);
+ if (!buf) {
+ *actual = 0;
+ return nullptr;
+ }
+ *actual = requested;
+ return buf;
+ }
+
+ void UpdateBufferOffset(size_t size) { buffer_offset_ += size; }
+
+ struct dm_user_header* GetHeaderPtr() {
+ CHECK(sizeof(struct dm_user_header) <= buffer_size_);
+ char* buf = reinterpret_cast<char*>(GetBufPtr());
+ struct dm_user_header* header = (struct dm_user_header*)(&(buf[0]));
+ return header;
+ }
+
+ bool ReturnData(void*, size_t) override { return true; }
+ void ResetBufferOffset() { buffer_offset_ = 0; }
+
+ private:
+ std::unique_ptr<uint8_t[]> buffer_;
+ loff_t buffer_offset_;
+ size_t buffer_size_;
+};
+
+class Snapuserd final {
+ public:
+ Snapuserd(const std::string& in_cow_device, const std::string& in_backing_store_device)
+ : in_cow_device_(in_cow_device),
+ in_backing_store_device_(in_backing_store_device),
+ metadata_read_done_(false) {}
+
+ int Run();
+ int ReadDmUserHeader();
+ int WriteDmUserPayload(size_t size);
+ int ConstructKernelCowHeader();
+ int ReadMetadata();
+ int ZerofillDiskExceptions(size_t read_size);
+ int ReadDiskExceptions(chunk_t chunk, size_t size);
+ int ReadData(chunk_t chunk, size_t size);
+
+ private:
+ int ProcessReplaceOp(const CowOperation* cow_op);
+ int ProcessCopyOp(const CowOperation* cow_op);
+ int ProcessZeroOp();
+
+ std::string in_cow_device_;
+ std::string in_backing_store_device_;
+
+ unique_fd cow_fd_;
+ unique_fd backing_store_fd_;
+ unique_fd ctrl_fd_;
+
+ uint32_t exceptions_per_area_;
+
+ std::unique_ptr<ICowOpIter> cowop_iter_;
+ std::unique_ptr<CowReader> reader_;
+
+ // Vector of disk exception which is a
+ // mapping of old-chunk to new-chunk
+ std::vector<std::unique_ptr<uint8_t[]>> vec_;
+
+ // Index - Chunk ID
+ // Value - cow operation
+ std::vector<const CowOperation*> chunk_vec_;
+
+ bool metadata_read_done_;
+ BufferSink bufsink_;
+};
+
+// Construct kernel COW header in memory
+// This header will be in sector 0. The IO
+// request will always be 4k. After constructing
+// the header, zero out the remaining block.
+int Snapuserd::ConstructKernelCowHeader() {
+ void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SIZE);
+ CHECK(buffer != nullptr);
+
+ memset(buffer, 0, BLOCK_SIZE);
+
+ struct disk_header* dh = reinterpret_cast<struct disk_header*>(buffer);
+
+ dh->magic = SNAP_MAGIC;
+ dh->valid = SNAPSHOT_VALID;
+ dh->version = SNAPSHOT_DISK_VERSION;
+ dh->chunk_size = CHUNK_SIZE;
+
+ return BLOCK_SIZE;
+}
+
+// Start the replace operation. This will read the
+// internal COW format and if the block is compressed,
+// it will be de-compressed.
+int Snapuserd::ProcessReplaceOp(const CowOperation* cow_op) {
+ if (!reader_->ReadData(*cow_op, &bufsink_)) {
+ LOG(ERROR) << "ReadData failed for chunk: " << cow_op->new_block;
+ return -EIO;
+ }
+
+ return BLOCK_SIZE;
+}
+
+// Start the copy operation. This will read the backing
+// block device which is represented by cow_op->source.
+int Snapuserd::ProcessCopyOp(const CowOperation* cow_op) {
+ void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SIZE);
+ CHECK(buffer != nullptr);
+
+ // Issue a single 4K IO. However, this can be optimized
+ // if the successive blocks are contiguous.
+ if (!android::base::ReadFullyAtOffset(backing_store_fd_, buffer, BLOCK_SIZE,
+ cow_op->source * BLOCK_SIZE)) {
+ LOG(ERROR) << "Copy-op failed. Read from backing store at: " << cow_op->source;
+ return -1;
+ }
+
+ return BLOCK_SIZE;
+}
+
+int Snapuserd::ProcessZeroOp() {
+ // Zero out the entire block
+ void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SIZE);
+ CHECK(buffer != nullptr);
+
+ memset(buffer, 0, BLOCK_SIZE);
+ return BLOCK_SIZE;
+}
+
+/*
+ * Read the data of size bytes from a given chunk.
+ *
+ * Kernel can potentially merge the blocks if the
+ * successive chunks are contiguous. For chunk size of 8,
+ * there can be 256 disk exceptions; and if
+ * all 256 disk exceptions are contiguous, kernel can merge
+ * them into a single IO.
+ *
+ * Since each chunk in the disk exception
+ * mapping represents a 4k block, kernel can potentially
+ * issue 256*4k = 1M IO in one shot.
+ *
+ * Even though kernel assumes that the blocks are
+ * contiguous, we need to split the 1M IO into 4k chunks
+ * as each operation represents 4k and it can either be:
+ *
+ * 1: Replace operation
+ * 2: Copy operation
+ * 3: Zero operation
+ *
+ */
+int Snapuserd::ReadData(chunk_t chunk, size_t size) {
+ int ret = 0;
+
+ size_t read_size = size;
+
+ chunk_t chunk_key = chunk;
+ uint32_t stride;
+ lldiv_t divresult;
+
+ // Size should always be aligned
+ CHECK((read_size & (BLOCK_SIZE - 1)) == 0);
+
+ while (read_size > 0) {
+ const CowOperation* cow_op = chunk_vec_[chunk_key];
+ CHECK(cow_op != nullptr);
+ int result;
+
+ switch (cow_op->type) {
+ case kCowReplaceOp: {
+ result = ProcessReplaceOp(cow_op);
+ break;
+ }
+
+ case kCowZeroOp: {
+ result = ProcessZeroOp();
+ break;
+ }
+
+ case kCowCopyOp: {
+ result = ProcessCopyOp(cow_op);
+ break;
+ }
+
+ default: {
+ LOG(ERROR) << "Unknown operation-type found: " << cow_op->type;
+ ret = -EIO;
+ goto done;
+ }
+ }
+
+ if (result < 0) {
+ ret = result;
+ goto done;
+ }
+
+ // Update the buffer offset
+ bufsink_.UpdateBufferOffset(BLOCK_SIZE);
+
+ read_size -= BLOCK_SIZE;
+ ret += BLOCK_SIZE;
+
+ // Start iterating the chunk incrementally; Since while
+ // constructing the metadata, we know that the chunk IDs
+ // are contiguous
+ chunk_key += 1;
+
+ // This is similar to the way when chunk IDs were assigned
+ // in ReadMetadata().
+ //
+ // Skip if the chunk id represents a metadata chunk.
+ stride = exceptions_per_area_ + 1;
+ divresult = lldiv(chunk_key, stride);
+ if (divresult.rem == NUM_SNAPSHOT_HDR_CHUNKS) {
+ // Crossing exception boundary. Kernel will never
+ // issue IO which is spanning between a data chunk
+ // and a metadata chunk. This should be perfectly aligned.
+ //
+ // Since the input read_size is 4k aligned, we will
+ // always end up reading all 256 data chunks in one area.
+ // Thus, every multiple of 4K IO represents 256 data chunks
+ CHECK(read_size == 0);
+ break;
+ }
+ }
+
+done:
+
+ // Reset the buffer offset
+ bufsink_.ResetBufferOffset();
+ return ret;
+}
+
+/*
+ * dm-snap does prefetch reads while reading disk-exceptions.
+ * By default, prefetch value is set to 12; this means that
+ * dm-snap will issue 12 areas wherein each area is a 4k page
+ * of disk-exceptions.
+ *
+ * If during prefetch, if the chunk-id seen is beyond the
+ * actual number of metadata page, fill the buffer with zero.
+ * When dm-snap starts parsing the buffer, it will stop
+ * reading metadata page once the buffer content is zero.
+ */
+int Snapuserd::ZerofillDiskExceptions(size_t read_size) {
+ size_t size = exceptions_per_area_ * sizeof(struct disk_exception);
+
+ if (read_size > size) return -EINVAL;
+
+ void* buffer = bufsink_.GetPayloadBuffer(size);
+ CHECK(buffer != nullptr);
+
+ memset(buffer, 0, size);
+ return size;
+}
+
+/*
+ * A disk exception is a simple mapping of old_chunk to new_chunk.
+ * When dm-snapshot device is created, kernel requests these mapping.
+ *
+ * Each disk exception is of size 16 bytes. Thus a single 4k page can
+ * have:
+ *
+ * exceptions_per_area_ = 4096/16 = 256. This entire 4k page
+ * is considered a metadata page and it is represented by chunk ID.
+ *
+ * Convert the chunk ID to index into the vector which gives us
+ * the metadata page.
+ */
+int Snapuserd::ReadDiskExceptions(chunk_t chunk, size_t read_size) {
+ uint32_t stride = exceptions_per_area_ + 1;
+ size_t size;
+
+ // ChunkID to vector index
+ lldiv_t divresult = lldiv(chunk, stride);
+
+ if (divresult.quot < vec_.size()) {
+ size = exceptions_per_area_ * sizeof(struct disk_exception);
+
+ if (read_size > size) return -EINVAL;
+
+ void* buffer = bufsink_.GetPayloadBuffer(size);
+ CHECK(buffer != nullptr);
+
+ memcpy(buffer, vec_[divresult.quot].get(), size);
+ } else {
+ size = ZerofillDiskExceptions(read_size);
+ }
+
+ return size;
+}
+
+/*
+ * Read the metadata from COW device and
+ * construct the metadata as required by the kernel.
+ *
+ * Please see design on kernel COW format
+ *
+ * 1: Read the metadata from internal COW device
+ * 2: There are 3 COW operations:
+ * a: Replace op
+ * b: Copy op
+ * c: Zero op
+ * 3: For each of the 3 operations, op->new_block
+ * represents the block number in the base device
+ * for which one of the 3 operations have to be applied.
+ * This represents the old_chunk in the kernel COW format
+ * 4: We need to assign new_chunk for a corresponding old_chunk
+ * 5: The algorithm is similar to how kernel assigns chunk number
+ * while creating exceptions.
+ * 6: Use a monotonically increasing chunk number to assign the
+ * new_chunk
+ * 7: Each chunk-id represents either a: Metadata page or b: Data page
+ * 8: Chunk-id representing a data page is stored in a vector. Index is the
+ * chunk-id and value is the pointer to the CowOperation
+ * 9: Chunk-id representing a metadata page is converted into a vector
+ * index. We store this in vector as kernel requests metadata during
+ * two stage:
+ * a: When initial dm-snapshot device is created, kernel requests
+ * all the metadata and stores it in its internal data-structures.
+ * b: During merge, kernel once again requests the same metadata
+ * once-again.
+ * In both these cases, a quick lookup based on chunk-id is done.
+ * 10: When chunk number is incremented, we need to make sure that
+ * if the chunk is representing a metadata page and skip.
+ * 11: Each 4k page will contain 256 disk exceptions. We call this
+ * exceptions_per_area_
+ * 12: Kernel will stop issuing metadata IO request when new-chunk ID is 0.
+ */
+int Snapuserd::ReadMetadata() {
+ reader_ = std::make_unique<CowReader>();
+ CowHeader header;
-static int daemon_main(const std::string& device) {
- unique_fd block_fd(open(device.c_str(), O_RDWR));
- if (block_fd < 0) {
- PLOG(ERROR) << "Unable to open " << device;
+ if (!reader_->Parse(cow_fd_)) {
+ LOG(ERROR) << "Failed to parse";
return 1;
}
- unique_fd ctrl_fd(open("/dev/dm-user", O_RDWR));
- if (ctrl_fd < 0) {
- PLOG(ERROR) << "Unable to open /dev/dm-user";
+ if (!reader_->GetHeader(&header)) {
+ LOG(ERROR) << "Failed to get header";
return 1;
}
- size_t buf_size = 1UL << 16;
- auto buf = std::make_unique<char>(buf_size);
+ CHECK(header.block_size == BLOCK_SIZE);
- /* Just keeps pumping messages between userspace and the kernel. We won't
- * actually be doing anything, but the sequence numbers line up so it'll at
- * least make forward progress. */
- while (true) {
- struct dm_user_message* msg = (struct dm_user_message*)buf.get();
+ LOG(DEBUG) << "Num-ops: " << std::hex << header.num_ops;
+ LOG(DEBUG) << "ops-offset: " << std::hex << header.ops_offset;
+ LOG(DEBUG) << "ops-size: " << std::hex << header.ops_size;
+
+ cowop_iter_ = reader_->GetOpIter();
+
+ if (cowop_iter_ == nullptr) {
+ LOG(ERROR) << "Failed to get cowop_iter";
+ return 1;
+ }
+
+ exceptions_per_area_ = (CHUNK_SIZE << SECTOR_SHIFT) / sizeof(struct disk_exception);
+
+ // Start from chunk number 2. Chunk 0 represents header and chunk 1
+ // represents first metadata page.
+ chunk_t next_free = NUM_SNAPSHOT_HDR_CHUNKS + 1;
+ chunk_vec_.push_back(nullptr);
+ chunk_vec_.push_back(nullptr);
+
+ loff_t offset = 0;
+ std::unique_ptr<uint8_t[]> de_ptr =
+ std::make_unique<uint8_t[]>(exceptions_per_area_ * sizeof(struct disk_exception));
+
+ // This memset is important. Kernel will stop issuing IO when new-chunk ID
+ // is 0. When Area is not filled completely will all 256 exceptions,
+ // this memset will ensure that metadata read is completed.
+ memset(de_ptr.get(), 0, (exceptions_per_area_ * sizeof(struct disk_exception)));
+ size_t num_ops = 0;
+
+ while (!cowop_iter_->Done()) {
+ const CowOperation* cow_op = &cowop_iter_->Get();
+ struct disk_exception* de =
+ reinterpret_cast<struct disk_exception*>((char*)de_ptr.get() + offset);
+
+ if (!(cow_op->type == kCowReplaceOp || cow_op->type == kCowZeroOp ||
+ cow_op->type == kCowCopyOp)) {
+ LOG(ERROR) << "Unknown operation-type found: " << cow_op->type;
+ return 1;
+ }
+
+ // Construct the disk-exception
+ de->old_chunk = cow_op->new_block;
+ de->new_chunk = next_free;
- memset(buf.get(), 0, buf_size);
+ LOG(DEBUG) << "Old-chunk: " << de->old_chunk << "New-chunk: " << de->new_chunk;
- ssize_t readed = read(ctrl_fd.get(), buf.get(), buf_size);
- if (readed < 0) {
- PLOG(ERROR) << "Control read failed, trying with more space";
- buf_size *= 2;
- buf = std::make_unique<char>(buf_size);
- continue;
+ // Store operation pointer. Note, new-chunk ID is the index
+ chunk_vec_.push_back(cow_op);
+ CHECK(next_free == (chunk_vec_.size() - 1));
+
+ offset += sizeof(struct disk_exception);
+
+ cowop_iter_->Next();
+
+ // Find the next free chunk-id to be assigned. Check if the next free
+ // chunk-id represents a metadata page. If so, skip it.
+ next_free += 1;
+ uint32_t stride = exceptions_per_area_ + 1;
+ lldiv_t divresult = lldiv(next_free, stride);
+ num_ops += 1;
+
+ if (divresult.rem == NUM_SNAPSHOT_HDR_CHUNKS) {
+ CHECK(num_ops == exceptions_per_area_);
+ // Store it in vector at the right index. This maps the chunk-id to
+ // vector index.
+ vec_.push_back(std::move(de_ptr));
+ offset = 0;
+ num_ops = 0;
+
+ chunk_t metadata_chunk = (next_free - exceptions_per_area_ - NUM_SNAPSHOT_HDR_CHUNKS);
+
+ LOG(DEBUG) << "Area: " << vec_.size() - 1;
+ LOG(DEBUG) << "Metadata-chunk: " << metadata_chunk;
+ LOG(DEBUG) << "Sector number of Metadata-chunk: " << (metadata_chunk << CHUNK_SHIFT);
+
+ // Create buffer for next area
+ de_ptr = std::make_unique<uint8_t[]>(exceptions_per_area_ *
+ sizeof(struct disk_exception));
+ memset(de_ptr.get(), 0, (exceptions_per_area_ * sizeof(struct disk_exception)));
+
+ // Since this is a metadata, store at this index
+ chunk_vec_.push_back(nullptr);
+
+ // Find the next free chunk-id
+ next_free += 1;
+ if (cowop_iter_->Done()) {
+ vec_.push_back(std::move(de_ptr));
+ }
}
+ }
- LOG(DEBUG) << android::base::StringPrintf("read() from dm-user returned %d bytes:",
- (int)readed);
- LOG(DEBUG) << android::base::StringPrintf(" msg->seq: 0x%016llx", msg->seq);
- LOG(DEBUG) << android::base::StringPrintf(" msg->type: 0x%016llx", msg->type);
- LOG(DEBUG) << android::base::StringPrintf(" msg->flags: 0x%016llx", msg->flags);
- LOG(DEBUG) << android::base::StringPrintf(" msg->sector: 0x%016llx", msg->sector);
- LOG(DEBUG) << android::base::StringPrintf(" msg->len: 0x%016llx", msg->len);
+ // Partially filled area
+ if (num_ops) {
+ LOG(DEBUG) << "Partially filled area num_ops: " << num_ops;
+ vec_.push_back(std::move(de_ptr));
+ }
- switch (msg->type) {
+ return 0;
+}
+
+void MyLogger(android::base::LogId, android::base::LogSeverity severity, const char*, const char*,
+ unsigned int, const char* message) {
+ if (severity == android::base::ERROR) {
+ fprintf(stderr, "%s\n", message);
+ } else {
+ fprintf(stdout, "%s\n", message);
+ }
+}
+
+// Read Header from dm-user misc device. This gives
+// us the sector number for which IO is issued by dm-snapshot device
+int Snapuserd::ReadDmUserHeader() {
+ if (!android::base::ReadFully(ctrl_fd_, bufsink_.GetBufPtr(), sizeof(struct dm_user_header))) {
+ PLOG(ERROR) << "Control read failed";
+ return -1;
+ }
+
+ return sizeof(struct dm_user_header);
+}
+
+// Send the payload/data back to dm-user misc device.
+int Snapuserd::WriteDmUserPayload(size_t size) {
+ if (!android::base::WriteFully(ctrl_fd_, bufsink_.GetBufPtr(),
+ sizeof(struct dm_user_header) + size)) {
+ PLOG(ERROR) << "Write to dm-user failed";
+ return -1;
+ }
+
+ return sizeof(struct dm_user_header) + size;
+}
+
+// Start the daemon.
+// TODO: Handle signals
+int Snapuserd::Run() {
+ backing_store_fd_.reset(open(in_backing_store_device_.c_str(), O_RDONLY));
+ if (backing_store_fd_ < 0) {
+ LOG(ERROR) << "Open Failed: " << in_backing_store_device_;
+ return 1;
+ }
+
+ cow_fd_.reset(open(in_cow_device_.c_str(), O_RDWR));
+ if (cow_fd_ < 0) {
+ LOG(ERROR) << "Open Failed: " << in_cow_device_;
+ return 1;
+ }
+
+ std::string str(in_cow_device_);
+ std::size_t found = str.find_last_of("/\\");
+ CHECK(found != std::string::npos);
+ std::string device_name = str.substr(found + 1);
+
+ LOG(DEBUG) << "Fetching UUID for: " << device_name;
+
+ auto& dm = dm::DeviceMapper::Instance();
+ std::string uuid;
+ if (!dm.GetDmDeviceUuidByName(device_name, &uuid)) {
+ LOG(ERROR) << "Unable to find UUID for " << in_cow_device_;
+ return 1;
+ }
+
+ LOG(DEBUG) << "UUID: " << uuid;
+ Target t(uuid);
+
+ ctrl_fd_.reset(open(t.control_path().c_str(), O_RDWR));
+ if (ctrl_fd_ < 0) {
+ LOG(ERROR) << "Unable to open " << t.control_path();
+ return 1;
+ }
+
+ int ret = 0;
+
+ // Allocate the buffer which is used to communicate between
+ // daemon and dm-user. The buffer comprises of header and a fixed payload.
+ // If the dm-user requests a big IO, the IO will be broken into chunks
+ // of PAYLOAD_SIZE.
+ size_t buf_size = sizeof(struct dm_user_header) + PAYLOAD_SIZE;
+ bufsink_.Initialize(buf_size);
+
+ while (true) {
+ struct dm_user_header* header = bufsink_.GetHeaderPtr();
+
+ bufsink_.Clear();
+
+ ret = ReadDmUserHeader();
+ if (ret < 0) return ret;
+
+ LOG(DEBUG) << "dm-user returned " << ret << " bytes";
+
+ LOG(DEBUG) << "msg->seq: " << std::hex << header->seq;
+ LOG(DEBUG) << "msg->type: " << std::hex << header->type;
+ LOG(DEBUG) << "msg->flags: " << std::hex << header->flags;
+ LOG(DEBUG) << "msg->sector: " << std::hex << header->sector;
+ LOG(DEBUG) << "msg->len: " << std::hex << header->len;
+
+ switch (header->type) {
case DM_USER_MAP_READ: {
- LOG(DEBUG) << android::base::StringPrintf(
- "Responding to read of sector %lld with %lld bytes data", msg->sector,
- msg->len);
-
- if ((sizeof(*msg) + msg->len) > buf_size) {
- auto old_buf = std::move(buf);
- buf_size = sizeof(*msg) + msg->len;
- buf = std::make_unique<char>(buf_size);
- memcpy(buf.get(), old_buf.get(), sizeof(*msg));
- msg = (struct dm_user_message*)buf.get();
- }
-
- if (lseek(block_fd.get(), msg->sector * 512, SEEK_SET) < 0) {
- PLOG(ERROR) << "lseek failed: " << device;
- return 7;
- }
- if (!android::base::ReadFully(block_fd.get(), msg->buf, msg->len)) {
- PLOG(ERROR) << "read failed: " << device;
- return 7;
- }
-
- if (!android::base::WriteFully(ctrl_fd.get(), buf.get(), sizeof(*msg) + msg->len)) {
- PLOG(ERROR) << "write control failed";
- return 3;
- }
+ size_t remaining_size = header->len;
+ loff_t offset = 0;
+ header->io_in_progress = 0;
+ ret = 0;
+ do {
+ size_t read_size = std::min(PAYLOAD_SIZE, remaining_size);
+
+ // Request to sector 0 is always for kernel
+ // representation of COW header. This IO should be only
+ // once during dm-snapshot device creation. We should
+ // never see multiple IO requests. Additionally this IO
+ // will always be a single 4k.
+ if (header->sector == 0) {
+ // Read the metadata from internal COW device
+ // and build the in-memory data structures
+ // for all the operations in the internal COW.
+ if (!metadata_read_done_ && ReadMetadata()) {
+ LOG(ERROR) << "Metadata read failed";
+ return 1;
+ }
+ metadata_read_done_ = true;
+
+ CHECK(read_size == BLOCK_SIZE);
+ ret = ConstructKernelCowHeader();
+ if (ret < 0) return ret;
+ } else {
+ // Convert the sector number to a chunk ID.
+ //
+ // Check if the chunk ID represents a metadata
+ // page. If the chunk ID is not found in the
+ // vector, then it points to a metadata page.
+ chunk_t chunk = (header->sector >> CHUNK_SHIFT);
+
+ if (chunk >= chunk_vec_.size()) {
+ ret = ZerofillDiskExceptions(read_size);
+ if (ret < 0) {
+ LOG(ERROR) << "ZerofillDiskExceptions failed";
+ return ret;
+ }
+ } else if (chunk_vec_[chunk] == nullptr) {
+ ret = ReadDiskExceptions(chunk, read_size);
+ if (ret < 0) {
+ LOG(ERROR) << "ReadDiskExceptions failed";
+ return ret;
+ }
+ } else {
+ chunk_t num_chunks_read = (offset >> BLOCK_SHIFT);
+ ret = ReadData(chunk + num_chunks_read, read_size);
+ if (ret < 0) {
+ LOG(ERROR) << "ReadData failed";
+ return ret;
+ }
+ }
+ }
+
+ ssize_t written = WriteDmUserPayload(ret);
+ if (written < 0) return written;
+
+ remaining_size -= ret;
+ offset += ret;
+ if (remaining_size) {
+ LOG(DEBUG) << "Write done ret: " << ret
+ << " remaining size: " << remaining_size;
+ bufsink_.GetHeaderPtr()->io_in_progress = 1;
+ }
+ } while (remaining_size);
+
break;
}
- case DM_USER_MAP_WRITE:
+ case DM_USER_MAP_WRITE: {
+ // TODO: After merge operation is completed, kernel issues write
+ // to flush all the exception mappings where the merge is
+ // completed. If dm-user routes the WRITE IO, we need to clear
+ // in-memory data structures representing those exception
+ // mappings.
abort();
break;
+ }
}
LOG(DEBUG) << "read() finished, next message";
@@ -119,8 +752,33 @@ static int daemon_main(const std::string& device) {
return 0;
}
+} // namespace snapshot
+} // namespace android
+
+void run_thread(std::string cow_device, std::string backing_device) {
+ android::snapshot::Snapuserd snapd(cow_device, backing_device);
+ snapd.Run();
+}
+
int main([[maybe_unused]] int argc, char** argv) {
android::base::InitLogging(argv, &android::base::KernelLogger);
- daemon_main(argv[1]);
+
+ android::snapshot::Daemon& daemon = android::snapshot::Daemon::Instance();
+
+ while (daemon.IsRunning()) {
+ // TODO: This is hardcoded wherein:
+ // argv[1] = system_cow, argv[2] = /dev/block/mapper/system_a
+ // argv[3] = product_cow, argv[4] = /dev/block/mapper/product_a
+ //
+ // This should be fixed based on some kind of IPC or setup a
+ // command socket and spin up the thread based when a new
+ // partition is visible.
+ std::thread system_a(run_thread, argv[1], argv[2]);
+ std::thread product_a(run_thread, argv[3], argv[4]);
+
+ system_a.join();
+ product_a.join();
+ }
+
return 0;
}